From 54933ebef5a2689585cab545793bf126933f044d Mon Sep 17 00:00:00 2001 From: Dan Helfman Date: Tue, 24 May 2022 15:50:04 -0700 Subject: [PATCH] Change connection failures for monitoring hooks to be warnings instead of errors (#439). --- NEWS | 3 +++ borgmatic/hooks/command.py | 2 +- borgmatic/hooks/cronhub.py | 5 ++++- borgmatic/hooks/cronitor.py | 5 ++++- borgmatic/hooks/healthchecks.py | 5 ++++- borgmatic/hooks/pagerduty.py | 5 ++++- docs/how-to/monitor-your-backups.md | 5 ++++- tests/unit/hooks/test_cronhub.py | 15 +++++++++++++++ tests/unit/hooks/test_cronitor.py | 15 +++++++++++++++ tests/unit/hooks/test_healthchecks.py | 17 +++++++++++++++++ tests/unit/hooks/test_pagerduty.py | 15 +++++++++++++++ 11 files changed, 86 insertions(+), 6 deletions(-) diff --git a/NEWS b/NEWS index e66c87f..c1f05aa 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,9 @@ logs to send to the Healthchecks server. * #402: Remove the error when "archive_name_format" is specified but a retention prefix isn't. * #420: Warn when an unsupported variable is used in a hook command. + * #439: Change connection failures for monitoring hooks (Healthchecks, Cronitor, PagerDuty, and + Cronhub) to be warnings instead of errors. This way, the monitoring system failing does not block + backups. * #460: Add Healthchecks monitoring hook "send_logs" option to enable/disable sending borgmatic logs to the Healthchecks server. * #525: Add Healthchecks monitoring hook "states" option to only enable pinging for particular diff --git a/borgmatic/hooks/command.py b/borgmatic/hooks/command.py index e1c581e..756f877 100644 --- a/borgmatic/hooks/command.py +++ b/borgmatic/hooks/command.py @@ -19,7 +19,7 @@ def interpolate_context(config_filename, hook_description, command, context): command = command.replace('{%s}' % name, str(value)) for unsupported_variable in re.findall(r'{\w+}', command): - logger.warn( + logger.warning( f"{config_filename}: Variable '{unsupported_variable}' is not supported in {hook_description} hook" ) diff --git a/borgmatic/hooks/cronhub.py b/borgmatic/hooks/cronhub.py index 68ef4fd..4832347 100644 --- a/borgmatic/hooks/cronhub.py +++ b/borgmatic/hooks/cronhub.py @@ -42,7 +42,10 @@ def ping_monitor(hook_config, config_filename, state, monitoring_log_level, dry_ if not dry_run: logging.getLogger('urllib3').setLevel(logging.ERROR) - requests.get(ping_url) + try: + requests.get(ping_url) + except requests.exceptions.RequestException as error: + logger.warning(f'{config_filename}: Cronhub error: {error}') def destroy_monitor( diff --git a/borgmatic/hooks/cronitor.py b/borgmatic/hooks/cronitor.py index a9acec3..43f1779 100644 --- a/borgmatic/hooks/cronitor.py +++ b/borgmatic/hooks/cronitor.py @@ -37,7 +37,10 @@ def ping_monitor(hook_config, config_filename, state, monitoring_log_level, dry_ if not dry_run: logging.getLogger('urllib3').setLevel(logging.ERROR) - requests.get(ping_url) + try: + requests.get(ping_url) + except requests.exceptions.RequestException as error: + logger.warning(f'{config_filename}: Cronitor error: {error}') def destroy_monitor( diff --git a/borgmatic/hooks/healthchecks.py b/borgmatic/hooks/healthchecks.py index 56b1e07..7ac04e2 100644 --- a/borgmatic/hooks/healthchecks.py +++ b/borgmatic/hooks/healthchecks.py @@ -124,7 +124,10 @@ def ping_monitor(hook_config, config_filename, state, monitoring_log_level, dry_ if not dry_run: logging.getLogger('urllib3').setLevel(logging.ERROR) - requests.post(ping_url, data=payload.encode('utf-8')) + try: + requests.post(ping_url, data=payload.encode('utf-8')) + except requests.exceptions.RequestException as error: + logger.warning(f'{config_filename}: Healthchecks error: {error}') def destroy_monitor(hook_config, config_filename, monitoring_log_level, dry_run): diff --git a/borgmatic/hooks/pagerduty.py b/borgmatic/hooks/pagerduty.py index 8d96f56..20fc771 100644 --- a/borgmatic/hooks/pagerduty.py +++ b/borgmatic/hooks/pagerduty.py @@ -68,7 +68,10 @@ def ping_monitor(hook_config, config_filename, state, monitoring_log_level, dry_ logger.debug('{}: Using PagerDuty payload: {}'.format(config_filename, payload)) logging.getLogger('urllib3').setLevel(logging.ERROR) - requests.post(EVENTS_API_URL, data=payload.encode('utf-8')) + try: + requests.post(EVENTS_API_URL, data=payload.encode('utf-8')) + except requests.exceptions.RequestException as error: + logger.warning(f'{config_filename}: PagerDuty error: {error}') def destroy_monitor( diff --git a/docs/how-to/monitor-your-backups.md b/docs/how-to/monitor-your-backups.md index 70a3e0c..725e5d1 100644 --- a/docs/how-to/monitor-your-backups.md +++ b/docs/how-to/monitor-your-backups.md @@ -159,7 +159,10 @@ itself. But the logs are only included for errors that occur when a `prune`, You can customize the verbosity of the logs that are sent to Healthchecks with borgmatic's `--monitoring-verbosity` flag. The `--files` and `--stats` flags -may also be of use. See `borgmatic --help` for more information. +may also be of use. See `borgmatic --help` for more information. Additionally, +see the [borgmatic configuration +file](https://torsion.org/borgmatic/docs/reference/configuration/) for +additional Healthchecks options. You can configure Healthchecks to notify you by a [variety of mechanisms](https://healthchecks.io/#welcome-integrations) when backups fail diff --git a/tests/unit/hooks/test_cronhub.py b/tests/unit/hooks/test_cronhub.py index 0a5abff..54bd3db 100644 --- a/tests/unit/hooks/test_cronhub.py +++ b/tests/unit/hooks/test_cronhub.py @@ -58,3 +58,18 @@ def test_ping_monitor_dry_run_does_not_hit_ping_url(): module.ping_monitor( hook_config, 'config.yaml', module.monitor.State.START, monitoring_log_level=1, dry_run=True ) + + +def test_ping_monitor_with_connection_error_does_not_raise(): + hook_config = {'ping_url': 'https://example.com/start/abcdef'} + flexmock(module.requests).should_receive('get').and_raise( + module.requests.exceptions.ConnectionError + ) + + module.ping_monitor( + hook_config, + 'config.yaml', + module.monitor.State.START, + monitoring_log_level=1, + dry_run=False, + ) diff --git a/tests/unit/hooks/test_cronitor.py b/tests/unit/hooks/test_cronitor.py index d080424..5c258ae 100644 --- a/tests/unit/hooks/test_cronitor.py +++ b/tests/unit/hooks/test_cronitor.py @@ -45,3 +45,18 @@ def test_ping_monitor_dry_run_does_not_hit_ping_url(): module.ping_monitor( hook_config, 'config.yaml', module.monitor.State.START, monitoring_log_level=1, dry_run=True ) + + +def test_ping_monitor_with_connection_error_does_not_raise(): + hook_config = {'ping_url': 'https://example.com'} + flexmock(module.requests).should_receive('get').and_raise( + module.requests.exceptions.ConnectionError + ) + + module.ping_monitor( + hook_config, + 'config.yaml', + module.monitor.State.START, + monitoring_log_level=1, + dry_run=False, + ) diff --git a/tests/unit/hooks/test_healthchecks.py b/tests/unit/hooks/test_healthchecks.py index 3a27cd6..48a6a21 100644 --- a/tests/unit/hooks/test_healthchecks.py +++ b/tests/unit/hooks/test_healthchecks.py @@ -231,3 +231,20 @@ def test_ping_monitor_hits_ping_url_when_states_matching(): monitoring_log_level=1, dry_run=False, ) + + +def test_ping_monitor_with_connection_error_does_not_raise(): + flexmock(module).should_receive('Forgetful_buffering_handler') + flexmock(module.logger).should_receive('warning') + hook_config = {'ping_url': 'https://example.com'} + flexmock(module.requests).should_receive('post').with_args( + 'https://example.com/start', data=''.encode('utf-8') + ).and_raise(module.requests.exceptions.ConnectionError) + + module.ping_monitor( + hook_config, + 'config.yaml', + state=module.monitor.State.START, + monitoring_log_level=1, + dry_run=False, + ) diff --git a/tests/unit/hooks/test_pagerduty.py b/tests/unit/hooks/test_pagerduty.py index 4f425de..3d8589f 100644 --- a/tests/unit/hooks/test_pagerduty.py +++ b/tests/unit/hooks/test_pagerduty.py @@ -49,3 +49,18 @@ def test_ping_monitor_dry_run_does_not_call_api(): monitoring_log_level=1, dry_run=True, ) + + +def test_ping_monitor_with_connection_error_does_not_raise(): + flexmock(module.requests).should_receive('post').and_raise( + module.requests.exceptions.ConnectionError + ) + flexmock(module.logger).should_receive('warning') + + module.ping_monitor( + {'integration_key': 'abc123'}, + 'config.yaml', + module.monitor.State.FAIL, + monitoring_log_level=1, + dry_run=False, + )