Author: tmckay Date: 2011-11-11 19:28:53 +0000 (Fri, 11 Nov 2011) New Revision: 5134
Added: trunk/cumin/python/cumin/errors.py Modified: trunk/cumin/bin/cumin trunk/cumin/bin/cumin-data trunk/cumin/bin/cumin-web Log: Additional error checking, log general message to syslog on master error.
Modified: trunk/cumin/bin/cumin =================================================================== --- trunk/cumin/bin/cumin 2011-11-11 15:08:34 UTC (rev 5133) +++ trunk/cumin/bin/cumin 2011-11-11 19:28:53 UTC (rev 5134) @@ -7,6 +7,8 @@ import signal import traceback from datetime import datetime, timedelta +import syslog +from cumin.errors import CuminErrors
home = os.environ.get("CUMIN_HOME", os.path.normpath("/usr/share/cumin")) sys.path.append(os.path.join(home, "python")) @@ -25,6 +27,7 @@ def log_parse_errors(r): msg = os.fdopen(r, "r").readlines() if len(msg) > 0: + _syslog.log() log.error("".join(msg)) return 1 return 0 @@ -40,6 +43,14 @@ prog_string = "".join([" "+x for x in args]) return args, prog_string
+class _syslog(object): + enabled = False + @classmethod + def log(cls): + if _syslog.enabled: + home = os.environ.get("CUMIN_HOME", os.path.normpath("/usr/share/cumin")) + syslog.syslog("cumin: master script exited with errors, see %s/master.log" % home) + def main():
# tuple indices, for clarity @@ -72,15 +83,18 @@ parser.add_option("--console", dest="console", action="store_true", default=False, help="Log to stderr rather than master.log, no IO redirection for children.")
- parser.add_option("--data_options", dest="data_options", default="", type=str, + parser.add_option("--data-options", dest="data_options", default="", type=str, help="Additional options string to pass to data instances."\ "\nEnclose in quotes, options must be --option form, splits on spaces."\ '\nExample: data_options="--print-events=5 --print-stats"')
- parser.add_option("--web_options", dest="web_options", default="", type=str, + parser.add_option("--web-options", dest="web_options", default="", type=str, help="Additional options string to pass to web instances."\ "\nEnclose in quotes, options must be --option form, splits on spaces."\ '\nExample: web_options="--debug --port=12345"') + parser.add_option("--syslog", dest="syslog", action="store_true", default=False, + help="Log general error notfications to syslog. Intended for systemd") + (options, args) = parser.parse_args() except SystemExit: options = args = None @@ -91,19 +105,26 @@ sys.stderr = sys.__stderr__
# Parse may have failed, in which case make a quick check for options ourselves - console = (options and options.console) or "--console" in sys.argv[1:] + if options: + _syslog.enabled = options.syslog + console = options.console + else: + _syslog.enabled = "--syslog" in sys.argv[1:] + console = "--console" in sys.argv[1:] if console: log_dest = sys.stderr else: log_dest = os.path.join(home, "log", "master.log") enable_logging("cumin.master", logging.INFO, log_dest)
+ # Parser exited, either on --help or with errors if not options: return log_parse_errors(r)
if len(args) != 0: log.error("Extra arguments:" + "".join([" "+arg for arg in args])) + _syslog.log() return 1
# Get our list of cumin-web and data instances @@ -153,19 +174,23 @@ # been started, do not start the remaining. # Note, signals that cause termination will result in # a negative error code, we treat those as "normal" + err = CuminErrors.translate(poll) if poll > 0 and poll & 1: - log.error("Subprocess failed init checks (pid %s), "\ - "status %s, check subprocess logs for details" %\ - (app[PROCESS].pid, str(poll))) + log.error("Subprocess (%s) failed init checks "\ + "with status %s (%s), %s"\ + % (app[PROCESS].pid, poll, err[0], err[1])) + log.info("Subprocess logs may contain more details.") log.info("Stopping cumin") + _syslog.log() app[PROCESS] = None return_code = 2 complete = len(apps) break else: - log.warn("Subprocess exited (pid %s), status %s, "\ - "check subprocess logs for details" %\ - (app[PROCESS].pid, str(poll))) + log.warn("Subprocess (%s) exited with status %s (%s), %s"\ + % (app[PROCESS].pid, poll, err[0], err[1])) + if poll != 0: + log.info("Subprocess logs may contain more details.") if options.init_only: app[PROCESS] = None complete += 1
Modified: trunk/cumin/bin/cumin-data =================================================================== --- trunk/cumin/bin/cumin-data 2011-11-11 15:08:34 UTC (rev 5133) +++ trunk/cumin/bin/cumin-data 2011-11-11 19:28:53 UTC (rev 5134) @@ -12,6 +12,7 @@ from parsley.loggingex import PipeLogThread from psycopg2 import OperationalError from cumin.admin import SchemaVersion, SchemaMissing +from cumin.errors import CuminErrors
def restore_IO(): sys.stderr = sys.__stderr__ @@ -19,7 +20,7 @@
def process_classes(mint, values, section_name, on_empty=None):
- return_code = 0 + return_code = CuminErrors.NO_ERROR pkgs = set() if values and len(values) > 0:
@@ -32,7 +33,7 @@ log.error("Configuration section '%s',"\ " class name '%s' is badly formed"\ % (section_name, cls_str.strip())) - return_code = 1 + return_code = CuminErrors.PARSE_ERROR break
try: @@ -58,7 +59,7 @@
def process_agents(values, section_name):
- return_code = 0 + return_code = CuminErrors.NO_ERROR agents = set() if values and len(values) > 0:
@@ -75,7 +76,7 @@ log.error("Configuration section '%s',"\ " agent name '%s' is badly formed"\ % (section_name, agent_str)) - return_code = 1 + return_code = CuminErrors.PARSE_ERROR break if len(agent) >= 1: vendor = agent[0] @@ -96,7 +97,7 @@
def main(): passed_init = 1 - return_code = 0 + return_code = CuminErrors.NO_ERROR
# Do our own simple option check so we can redirect IO early # without worrying about other options or the behavior of optParse @@ -129,7 +130,7 @@ print_exc() pipeThread = None
- class EarlyReturn(Exception): + class ArgError(Exception): pass
try: @@ -166,7 +167,7 @@
if len(args) != 0: log.error("Extra arguments:" + "".join([" "+arg for arg in args])) - raise EarlyReturn + raise ArgError
model_dir = os.path.join(config.home, "model")
@@ -233,27 +234,30 @@ except KeyboardInterrupt: log.info("Received SIGINT")
- except (SystemExit, EarlyReturn): + except SystemExit: if "--help" not in sys.argv: - log.error("Error in options, arguments, or config values") - return_code = 1 + log.error("Error in options") + return_code = CuminErrors.PARSE_ERROR
+ except ArgError: + return_code = CuminErrors.PARSE_ERROR + except OperationalError: # Failed to talk to the database on check() log.info("Run 'cumin-database check' as root for more information.") - return_code = 3 + return_code = CuminErrors.DATABASE_ERROR
except SchemaMissing: log.info("Run 'cumin-admin create-schema' as root") - return_code = 4 + return_code = CuminErrors.SCHEMA_ERROR
except SchemaVersion: log.info("Run 'cumin-admin upgrade-schema' as root") - return_code = 5 + return_code = CuminErrors.SCHEMA_VER_ERROR
except: print_exc() - return_code = 2 + return_code = CuminErrors.UNHANDLED_ERROR
if mint: mint.stop()
Modified: trunk/cumin/bin/cumin-web =================================================================== --- trunk/cumin/bin/cumin-web 2011-11-11 15:08:34 UTC (rev 5133) +++ trunk/cumin/bin/cumin-web 2011-11-11 19:28:53 UTC (rev 5134) @@ -11,6 +11,7 @@ from parsley.loggingex import PipeLogThread from psycopg2 import OperationalError from cumin.admin import SchemaVersion, SchemaMissing +from cumin.errors import CuminErrors
def restore_IO(): sys.stderr = sys.__stderr__ @@ -50,7 +51,7 @@
def main(): passed_init = 1 - return_code = 0 + return_code = CuminErrors.NO_ERROR
# Do our own simple option check so we can redirect IO early # without worrying about other options or the behavior of optParse @@ -84,8 +85,8 @@ print_exc() pipeThread = None
- # Use EarlyReturn to jump to the finally block and exit - class EarlyReturn(Exception): + # Use ArgError to jump to the finally block and exit + class ArgError(Exception): pass
try: @@ -121,7 +122,7 @@
if len(args) != 0: log.error("Extra arguments:" + "".join([" "+arg for arg in args])) - raise EarlyReturn + raise ArgError
broker_uris = [x.strip() for x in opts.brokers.split(",")] authmech = [x.strip() for x in values.auth.split(";")] @@ -159,37 +160,38 @@ sleep(1) if not cumin.server_alive(): print "web server has stopped, exiting..." - return_code = 1 + log.error("Web server process has stopped") + return_code = CuminErrors.WEB_SERVER_ERROR break
except KeyboardInterrupt: log.info("Received SIGINT") pass
- except (SystemExit, EarlyReturn): + except SystemExit: if "--help" not in sys.argv: - log.error("Error in options, arguments, or config values") - return_code = 1 + log.error("Error in options") + return_code = CuminErrors.PARSE_ERROR
- except EarlyReturn: - return_code = 1 + except ArgError: + return_code = CuminErrors.PARSE_ERROR
except OperationalError: # Failed to talk to the database on check() log.info("Run 'cumin-database check' as root for more information.") - return_code = 3 + return_code = CuminErrors.DATABASE_ERROR
except SchemaMissing: log.info("Run 'cumin-admin create-schema' as root") - return_code = 4 + return_code = CuminErrors.SCHEMA_ERROR
except SchemaVersion: log.info("Run 'cumin-admin upgrade-schema' as root") - return_code = 5 + return_code = CuminErrors.SCHEMA_VER_ERROR
except: print_exc() - return_code = 2 + return_code = CuminErrors.UNHANDLED_ERROR
if cumin: cumin.stop()
Added: trunk/cumin/python/cumin/errors.py =================================================================== --- trunk/cumin/python/cumin/errors.py (rev 0) +++ trunk/cumin/python/cumin/errors.py 2011-11-11 19:28:53 UTC (rev 5134) @@ -0,0 +1,46 @@ +# Intended for use by top level scripts like cumin, cumin-web and cumin-data +# for reporting errors during startup. +class CuminErrors(object): + NO_ERROR = 0 + PARSE_ERROR = 1 + UNHANDLED_ERROR = 2 + DATABASE_ERROR = 3 + SCHEMA_ERROR = 4 + SCHEMA_VER_ERROR = 5 + WEB_SERVER_ERROR = 6 + + errors = \ + {NO_ERROR: ("no error", "normal operation"), + PARSE_ERROR: \ + ("parse error", + "error in options, arguments, or config values"), + UNHANDLED_ERROR: \ + ("unhandled error", + "*.stderr or *.stdout logs may contain details"), + DATABASE_ERROR: \ + ("database error", + "run 'cumin-database check' as root for more information"), + SCHEMA_ERROR: \ + ("schema error", + "run 'cumin-admin create-schema' as root"), + SCHEMA_VER_ERROR: \ + ("schema version error", + "run 'cumin-admin upgrade-schema' as root"), + WEB_SERVER_ERROR: \ + ("web server error", + "web server process has exited") + } + + @classmethod + def translate(cls, status): + s = status + # low bit indicates wheter error was during init or not. + # shift right for value (if it's not a signal) + if s > 0: + s = s >> 1 + if s in CuminErrors.errors: + return CuminErrors.errors[s] + elif s < 0: + return ("received a signal", "unhandled") + else: + return ("unrecognized error", "check logs for details")