diff --git a/NEWS b/NEWS index f8e17da85..09a76063f 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,8 @@ 1.5.6.dev0 + * #292: Allow before_backup and similiar hooks to exit with a soft failure without altering the + monitoring status on Healthchecks or other providers. Support this by waiting to ping monitoring + services with a "start" status until after before_* hooks finish. Failures in before_* hooks + still trigger a monitoring "fail" status. * #316: Fix hang when a stale database dump named pipe from an aborted borgmatic run remains on disk. * Tweak comment indentation in generated configuration file for clarity. diff --git a/borgmatic/commands/borgmatic.py b/borgmatic/commands/borgmatic.py index b94437b20..6d21d4da8 100644 --- a/borgmatic/commands/borgmatic.py +++ b/borgmatic/commands/borgmatic.py @@ -59,11 +59,10 @@ def run_configuration(config_filename, config, arguments): try: if prune_create_or_check: dispatch.call_hooks( - 'ping_monitor', + 'initialize_monitor', hooks, config_filename, monitor.MONITOR_HOOK_NAMES, - monitor.State.START, monitoring_log_level, global_arguments.dry_run, ) @@ -91,6 +90,16 @@ def run_configuration(config_filename, config, arguments): 'pre-check', global_arguments.dry_run, ) + if prune_create_or_check: + dispatch.call_hooks( + 'ping_monitor', + hooks, + config_filename, + monitor.MONITOR_HOOK_NAMES, + monitor.State.START, + monitoring_log_level, + global_arguments.dry_run, + ) except (OSError, CalledProcessError) as error: if command.considered_soft_failure(config_filename, error): return @@ -123,6 +132,16 @@ def run_configuration(config_filename, config, arguments): if not encountered_error: try: + if prune_create_or_check: + dispatch.call_hooks( + 'ping_monitor', + hooks, + config_filename, + monitor.MONITOR_HOOK_NAMES, + monitor.State.FINISH, + monitoring_log_level, + global_arguments.dry_run, + ) if 'prune' in arguments: command.execute_hook( hooks.get('after_prune'), @@ -155,16 +174,6 @@ def run_configuration(config_filename, config, arguments): 'post-check', global_arguments.dry_run, ) - if {'prune', 'create', 'check'}.intersection(arguments): - dispatch.call_hooks( - 'ping_monitor', - hooks, - config_filename, - monitor.MONITOR_HOOK_NAMES, - monitor.State.FINISH, - monitoring_log_level, - global_arguments.dry_run, - ) except (OSError, CalledProcessError) as error: if command.considered_soft_failure(config_filename, error): return @@ -176,6 +185,15 @@ def run_configuration(config_filename, config, arguments): if encountered_error and prune_create_or_check: try: + dispatch.call_hooks( + 'ping_monitor', + hooks, + config_filename, + monitor.MONITOR_HOOK_NAMES, + monitor.State.FAIL, + monitoring_log_level, + global_arguments.dry_run, + ) command.execute_hook( hooks.get('on_error'), hooks.get('umask'), @@ -186,15 +204,6 @@ def run_configuration(config_filename, config, arguments): error=encountered_error, output=getattr(encountered_error, 'output', ''), ) - dispatch.call_hooks( - 'ping_monitor', - hooks, - config_filename, - monitor.MONITOR_HOOK_NAMES, - monitor.State.FAIL, - monitoring_log_level, - global_arguments.dry_run, - ) except (OSError, CalledProcessError) as error: if command.considered_soft_failure(config_filename, error): return diff --git a/borgmatic/hooks/cronhub.py b/borgmatic/hooks/cronhub.py index f95abe622..3b95d3e4c 100644 --- a/borgmatic/hooks/cronhub.py +++ b/borgmatic/hooks/cronhub.py @@ -13,6 +13,13 @@ MONITOR_STATE_TO_CRONHUB = { } +def initialize_monitor(ping_url, config_filename, monitoring_log_level, dry_run): + ''' + No initialization is necessary for this monitor. + ''' + pass + + def ping_monitor(ping_url, config_filename, state, monitoring_log_level, dry_run): ''' Ping the given Cronhub URL, modified with the monitor.State. Use the given configuration diff --git a/borgmatic/hooks/cronitor.py b/borgmatic/hooks/cronitor.py index e4bc7c4b8..c1687708c 100644 --- a/borgmatic/hooks/cronitor.py +++ b/borgmatic/hooks/cronitor.py @@ -13,6 +13,13 @@ MONITOR_STATE_TO_CRONITOR = { } +def initialize_monitor(ping_url, config_filename, monitoring_log_level, dry_run): + ''' + No initialization is necessary for this monitor. + ''' + pass + + def ping_monitor(ping_url, config_filename, state, monitoring_log_level, dry_run): ''' Ping the given Cronitor URL, modified with the monitor.State. Use the given configuration diff --git a/borgmatic/hooks/healthchecks.py b/borgmatic/hooks/healthchecks.py index e16dfc7e7..72ea943d1 100644 --- a/borgmatic/hooks/healthchecks.py +++ b/borgmatic/hooks/healthchecks.py @@ -65,20 +65,22 @@ def format_buffered_logs_for_payload(): return payload +def initialize_monitor(ping_url_or_uuid, config_filename, monitoring_log_level, dry_run): + ''' + Add a handler to the root logger that stores in memory the most recent logs emitted. That + way, we can send them all to Healthchecks upon a finish or failure state. + ''' + logging.getLogger().addHandler( + Forgetful_buffering_handler(PAYLOAD_LIMIT_BYTES, monitoring_log_level) + ) + + def ping_monitor(ping_url_or_uuid, config_filename, state, monitoring_log_level, dry_run): ''' Ping the given Healthchecks URL or UUID, modified with the monitor.State. Use the given configuration filename in any log entries, and log to Healthchecks with the giving log level. If this is a dry run, then don't actually ping anything. ''' - if state is monitor.State.START: - # Add a handler to the root logger that stores in memory the most recent logs emitted. That - # way, we can send them all to Healthchecks upon a finish or failure state. - logging.getLogger().addHandler( - Forgetful_buffering_handler(PAYLOAD_LIMIT_BYTES, monitoring_log_level) - ) - payload = '' - ping_url = ( ping_url_or_uuid if ping_url_or_uuid.startswith('http') @@ -97,6 +99,8 @@ def ping_monitor(ping_url_or_uuid, config_filename, state, monitoring_log_level, if state in (monitor.State.FINISH, monitor.State.FAIL): payload = format_buffered_logs_for_payload() + else: + payload = '' if not dry_run: logging.getLogger('urllib3').setLevel(logging.ERROR) diff --git a/borgmatic/hooks/pagerduty.py b/borgmatic/hooks/pagerduty.py index 0e613cc56..01033c625 100644 --- a/borgmatic/hooks/pagerduty.py +++ b/borgmatic/hooks/pagerduty.py @@ -12,6 +12,13 @@ logger = logging.getLogger(__name__) EVENTS_API_URL = 'https://events.pagerduty.com/v2/enqueue' +def initialize_monitor(integration_key, config_filename, monitoring_log_level, dry_run): + ''' + No initialization is necessary for this monitor. + ''' + pass + + def ping_monitor(integration_key, config_filename, state, monitoring_log_level, dry_run): ''' If this is an error state, create a PagerDuty event with the given integration key. Use the diff --git a/docs/how-to/monitor-your-backups.md b/docs/how-to/monitor-your-backups.md index af9c9d0b5..1f4b93e8d 100644 --- a/docs/how-to/monitor-your-backups.md +++ b/docs/how-to/monitor-your-backups.md @@ -117,21 +117,21 @@ hooks: ``` With this hook in place, borgmatic pings your Healthchecks project when a -backup begins, ends, or errors. Specifically, before the `before_backup` hooks run, borgmatic lets Healthchecks know that it has started if any of the `prune`, `create`, or `check` actions are run. Then, if the actions complete successfully, borgmatic notifies Healthchecks of -the success after the `after_backup` hooks run, and includes borgmatic logs in +the success before the `after_backup` hooks run, and includes borgmatic logs in the payload data sent to Healthchecks. This means that borgmatic logs show up in the Healthchecks UI, although be aware that Healthchecks currently has a 10-kilobyte limit for the logs in each ping. -If an error occurs during any action, borgmatic notifies Healthchecks after -the `on_error` hooks run, also tacking on logs including the error itself. But -the logs are only included for errors that occur when a `prune`, `create`, or -`check` action is run. +If an error occurs during any action or hook, borgmatic notifies Healthchecks +before the `on_error` hooks run, also tacking on logs including the error +itself. But the logs are only included for errors that occur when a `prune`, +`create`, or `check` action is run. You can customize the verbosity of the logs that are sent to Healthchecks with borgmatic's `--monitoring-verbosity` flag. The `--files` and `--stats` flags @@ -157,13 +157,13 @@ hooks: ``` With this hook in place, borgmatic pings your Cronitor monitor when a backup -begins, ends, or errors. Specifically, before the `before_backup` hooks run, borgmatic lets Cronitor know that it has started if any of the `prune`, `create`, or `check` actions are run. Then, if the actions complete -successfully, borgmatic notifies Cronitor of the success after the -`after_backup` hooks run. And if an error occurs during any action, borgmatic -notifies Cronitor after the `on_error` hooks run. +successfully, borgmatic notifies Cronitor of the success before the +`after_backup` hooks run. And if an error occurs during any action or hook, +borgmatic notifies Cronitor before the `on_error` hooks run. You can configure Cronitor to notify you by a [variety of mechanisms](https://cronitor.io/docs/cron-job-notifications) when backups fail @@ -185,13 +185,13 @@ hooks: ``` With this hook in place, borgmatic pings your Cronhub monitor when a backup -begins, ends, or errors. Specifically, before the `before_backup` hooks run, borgmatic lets Cronhub know that it has started if any of the `prune`, `create`, or `check` actions are run. Then, if the actions complete -successfully, borgmatic notifies Cronhub of the success after the -`after_backup` hooks run. And if an error occurs during any action, borgmatic -notifies Cronhub after the `on_error` hooks run. +successfully, borgmatic notifies Cronhub of the success before the +`after_backup` hooks run. And if an error occurs during any action or hook, +borgmatic notifies Cronhub before the `on_error` hooks run. Note that even though you configure borgmatic with the "start" variant of the ping URL, borgmatic substitutes the correct state into the URL when pinging @@ -228,7 +228,7 @@ hooks: With this hook in place, borgmatic creates a PagerDuty event for your service whenever backups fail. Specifically, if an error occurs during a `create`, -`prune`, or `check` action, borgmatic sends an event to PagerDuty after the +`prune`, or `check` action, borgmatic sends an event to PagerDuty before the `on_error` hooks run. Note that borgmatic does not contact PagerDuty when a backup starts or ends without error.