From 6b7653484ba4273eca76305857907938dfb801e7 Mon Sep 17 00:00:00 2001 From: Andrea Ghensi Date: Sun, 26 Dec 2021 01:00:58 +0100 Subject: [PATCH] Add mongodb dump hook --- borgmatic/config/schema.yaml | 65 +++++++ borgmatic/hooks/dispatch.py | 3 +- borgmatic/hooks/dump.py | 2 +- borgmatic/hooks/mongodb.py | 158 ++++++++++++++++ tests/end-to-end/test_database.py | 14 +- tests/unit/hooks/test_mongodb.py | 290 ++++++++++++++++++++++++++++++ 6 files changed, 527 insertions(+), 5 deletions(-) create mode 100644 borgmatic/hooks/mongodb.py create mode 100644 tests/unit/hooks/test_mongodb.py diff --git a/borgmatic/config/schema.yaml b/borgmatic/config/schema.yaml index 24273123c..64254a16c 100644 --- a/borgmatic/config/schema.yaml +++ b/borgmatic/config/schema.yaml @@ -773,6 +773,71 @@ properties: mysqldump/mysql commands (from either MySQL or MariaDB). See https://dev.mysql.com/doc/refman/8.0/en/mysqldump.html or https://mariadb.com/kb/en/library/mysqldump/ for details. + mongodb_databases: + type: array + items: + type: object + required: ['name'] + additionalProperties: false + properties: + name: + type: string + description: | + Database name (required if using this hook). Or + "all" to dump all databases on the host. Note + that using this database hook implicitly enables + both read_special and one_file_system (see + above) to support dump and restore streaming. + example: users + hostname: + type: string + description: | + Database hostname to connect to. Defaults to + connecting to localhost. + example: database.example.org + port: + type: integer + description: Port to connect to. Defaults to 27017. + example: 27017 + username: + type: string + description: | + Username with which to connect to the database. + Skip it if no authentication is needed. + example: dbuser + password: + type: string + description: | + Password with which to connect to the database. + Skip it if no authentication is needed. + example: trustsome1 + format: + type: string + enum: ['archive', 'directory'] + description: | + Database dump output format. One of "archive", + or "directory". Defaults to "archive" (unlike + raw pg_dump). See pg_dump documentation for + details. Note that format is ignored when the + database name is "all". + example: directory + options: + type: string + description: | + Additional mongodump options to pass + directly to the dump command, without performing + any validation on them. See mongodump + documentation for details. + example: --role=someone + description: | + List of one or more MongoDB databases to dump before + creating a backup, run once per configuration file. The + database dumps are added to your source directories at + runtime, backed up, and removed afterwards. Requires + mongodump/mongorestore commands. See + https://docs.mongodb.com/database-tools/mongodump/ and + https://docs.mongodb.com/database-tools/mongorestore/ for + details. healthchecks: type: string description: | diff --git a/borgmatic/hooks/dispatch.py b/borgmatic/hooks/dispatch.py index 6fb2c8086..a689e7033 100644 --- a/borgmatic/hooks/dispatch.py +++ b/borgmatic/hooks/dispatch.py @@ -1,6 +1,6 @@ import logging -from borgmatic.hooks import cronhub, cronitor, healthchecks, mysql, pagerduty, postgresql +from borgmatic.hooks import cronhub, cronitor, healthchecks, mongodb, mysql, pagerduty, postgresql logger = logging.getLogger(__name__) @@ -11,6 +11,7 @@ HOOK_NAME_TO_MODULE = { 'pagerduty': pagerduty, 'postgresql_databases': postgresql, 'mysql_databases': mysql, + 'mongodb_databases': mongodb, } diff --git a/borgmatic/hooks/dump.py b/borgmatic/hooks/dump.py index 8bc9fcb89..f905d4925 100644 --- a/borgmatic/hooks/dump.py +++ b/borgmatic/hooks/dump.py @@ -6,7 +6,7 @@ from borgmatic.borg.create import DEFAULT_BORGMATIC_SOURCE_DIRECTORY logger = logging.getLogger(__name__) -DATABASE_HOOK_NAMES = ('postgresql_databases', 'mysql_databases') +DATABASE_HOOK_NAMES = ('postgresql_databases', 'mysql_databases', 'mongodb_databases') def make_database_dump_path(borgmatic_source_directory, database_hook_name): diff --git a/borgmatic/hooks/mongodb.py b/borgmatic/hooks/mongodb.py new file mode 100644 index 000000000..88a367abd --- /dev/null +++ b/borgmatic/hooks/mongodb.py @@ -0,0 +1,158 @@ +import logging + +from borgmatic.execute import execute_command, execute_command_with_processes +from borgmatic.hooks import dump + +logger = logging.getLogger(__name__) + + +def make_dump_path(location_config): # pragma: no cover + ''' + Make the dump path from the given location configuration and the name of this hook. + ''' + return dump.make_database_dump_path( + location_config.get('borgmatic_source_directory'), 'mongodb_databases' + ) + + +def dump_databases(databases, log_prefix, location_config, dry_run): + ''' + Dump the given MongoDB databases to a named pipe. The databases are supplied as a sequence of + dicts, one dict describing each database as per the configuration schema. Use the given log + prefix in any log entries. Use the given location configuration dict to construct the + destination path. + + Return a sequence of subprocess.Popen instances for the dump processes ready to spew to a named + pipe. But if this is a dry run, then don't actually dump anything and return an empty sequence. + ''' + dry_run_label = ' (dry run; not actually dumping anything)' if dry_run else '' + + logger.info('{}: Dumping MongoDB databases{}'.format(log_prefix, dry_run_label)) + + processes = [] + for database in databases: + name = database['name'] + dump_filename = dump.make_database_dump_filename( + make_dump_path(location_config), name, database.get('hostname') + ) + dump_format = database.get('format', 'custom') + + logger.debug( + '{}: Dumping MongoDB database {} to {}{}'.format( + log_prefix, name, dump_filename, dry_run_label + ) + ) + if dry_run: + continue + + if dump_format == 'directory': + dump.create_parent_directory_for_dump(dump_filename) + else: + dump.create_named_pipe_for_dump(dump_filename) + + command = build_dump_command(database, dump_filename, dump_format) + processes.append(execute_command(command, shell=True, run_to_completion=False)) + + return processes + + +def build_dump_command(database, dump_filename, dump_format): + ''' + Return the mongodump command from a single database configuration. + ''' + all_databases = database['name'] == 'all' + command = ['mongodump', '--archive'] + if dump_format == 'directory': + command.append(dump_filename) + if 'hostname' in database: + command.extend(('--host', database['hostname'])) + if 'port' in database: + command.extend(('--port', str(database['port']))) + if 'username' in database: + command.extend(('--username', database['username'])) + if 'password' in database: + command.extend(('--password', database['password'])) + if not all_databases: + command.extend(('--db', database['name'])) + if 'options' in database: + command.extend(database['options'].split(' ')) + if dump_format != 'directory': + command.extend(('>', dump_filename)) + return command + + +def remove_database_dumps(databases, log_prefix, location_config, dry_run): # pragma: no cover + ''' + Remove all database dump files for this hook regardless of the given databases. Use the log + prefix in any log entries. Use the given location configuration dict to construct the + destination path. If this is a dry run, then don't actually remove anything. + ''' + dump.remove_database_dumps(make_dump_path(location_config), 'MongoDB', log_prefix, dry_run) + + +def make_database_dump_pattern( + databases, log_prefix, location_config, name=None +): # pragma: no cover + ''' + Given a sequence of configurations dicts, a prefix to log with, a location configuration dict, + and a database name to match, return the corresponding glob patterns to match the database dump + in an archive. + ''' + return dump.make_database_dump_filename(make_dump_path(location_config), name, hostname='*') + + +def restore_database_dump(database_config, log_prefix, location_config, dry_run, extract_process): + ''' + Restore the given MongoDB database from an extract stream. The database is supplied as a + one-element sequence containing a dict describing the database, as per the configuration schema. + Use the given log prefix in any log entries. If this is a dry run, then don't actually restore + anything. Trigger the given active extract process (an instance of subprocess.Popen) to produce + output to consume. + + If the extract process is None, then restore the dump from the filesystem rather than from an + extract stream. + ''' + dry_run_label = ' (dry run; not actually restoring anything)' if dry_run else '' + + if len(database_config) != 1: + raise ValueError('The database configuration value is invalid') + + database = database_config[0] + dump_filename = dump.make_database_dump_filename( + make_dump_path(location_config), database['name'], database.get('hostname') + ) + restore_command = build_restore_command(extract_process, database, dump_filename) + + logger.debug( + '{}: Restoring MongoDB database {}{}'.format(log_prefix, database['name'], dry_run_label) + ) + if dry_run: + return + + execute_command_with_processes( + restore_command, + [extract_process] if extract_process else [], + output_log_level=logging.DEBUG, + input_file=extract_process.stdout if extract_process else None, + borg_local_path=location_config.get('local_path', 'borg'), + ) + + +def build_restore_command(extract_process, database, dump_filename): + ''' + Return the mongorestore command from a single database configuration. + ''' + command = ['mongorestore', '--archive'] + if not extract_process: + command.append(dump_filename) + if database['name'] != 'all': + command.extend(('--drop', '--db', database['name'])) + if 'hostname' in database: + command.extend(('--host', database['hostname'])) + if 'port' in database: + command.extend(('--port', str(database['port']))) + if 'username' in database: + command.extend(('--username', database['username'])) + if 'password' in database: + command.extend(('--password', database['password'])) + return command diff --git a/tests/end-to-end/test_database.py b/tests/end-to-end/test_database.py index 956918b29..208b9bbb2 100644 --- a/tests/end-to-end/test_database.py +++ b/tests/end-to-end/test_database.py @@ -47,13 +47,21 @@ hooks: hostname: mysql username: root password: test + mongodb_databases: + - name: test + hostname: mongodb + username: root + password: test + - name: all + hostname: mongodb + username: root + password: test '''.format( config_path, repository_path, borgmatic_source_directory, postgresql_dump_format ) - config_file = open(config_path, 'w') - config_file.write(config) - config_file.close() + with open(config_path, 'w') as config_file: + config_file.write(config) def test_database_dump_and_restore(): diff --git a/tests/unit/hooks/test_mongodb.py b/tests/unit/hooks/test_mongodb.py new file mode 100644 index 000000000..eec83bf73 --- /dev/null +++ b/tests/unit/hooks/test_mongodb.py @@ -0,0 +1,290 @@ +import logging + +import pytest +from flexmock import flexmock + +from borgmatic.hooks import mongodb as module + + +def test_dump_databases_runs_mongodump_for_each_database(): + databases = [{'name': 'foo'}, {'name': 'bar'}] + processes = [flexmock(), flexmock()] + flexmock(module).should_receive('make_dump_path').and_return('') + flexmock(module.dump).should_receive('make_database_dump_filename').and_return( + 'databases/localhost/foo' + ).and_return('databases/localhost/bar') + flexmock(module.dump).should_receive('create_named_pipe_for_dump') + + for name, process in zip(('foo', 'bar'), processes): + flexmock(module).should_receive('execute_command').with_args( + ['mongodump', '--archive', '--db', name, '>', 'databases/localhost/{}'.format(name)], + shell=True, + run_to_completion=False, + ).and_return(process).once() + + assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == processes + + +def test_dump_databases_with_dry_run_skips_mongodump(): + databases = [{'name': 'foo'}, {'name': 'bar'}] + flexmock(module).should_receive('make_dump_path').and_return('') + flexmock(module.dump).should_receive('make_database_dump_filename').and_return( + 'databases/localhost/foo' + ).and_return('databases/localhost/bar') + flexmock(module.dump).should_receive('create_named_pipe_for_dump').never() + flexmock(module).should_receive('execute_command').never() + + assert module.dump_databases(databases, 'test.yaml', {}, dry_run=True) == [] + + +def test_dump_databases_runs_mongodump_with_hostname_and_port(): + databases = [{'name': 'foo', 'hostname': 'database.example.org', 'port': 5433}] + process = flexmock() + flexmock(module).should_receive('make_dump_path').and_return('') + flexmock(module.dump).should_receive('make_database_dump_filename').and_return( + 'databases/database.example.org/foo' + ) + flexmock(module.dump).should_receive('create_named_pipe_for_dump') + + flexmock(module).should_receive('execute_command').with_args( + [ + 'mongodump', + '--archive', + '--host', + 'database.example.org', + '--port', + '5433', + '--db', + 'foo', + '>', + 'databases/database.example.org/foo', + ], + shell=True, + run_to_completion=False, + ).and_return(process).once() + + assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process] + + +def test_dump_databases_runs_mongodump_with_username_and_password(): + databases = [{'name': 'foo', 'username': 'mongo', 'password': 'trustsome1'}] + process = flexmock() + flexmock(module).should_receive('make_dump_path').and_return('') + flexmock(module.dump).should_receive('make_database_dump_filename').and_return( + 'databases/localhost/foo' + ) + flexmock(module.dump).should_receive('create_named_pipe_for_dump') + + flexmock(module).should_receive('execute_command').with_args( + [ + 'mongodump', + '--archive', + '--username', + 'mongo', + '--password', + 'trustsome1', + '--db', + 'foo', + '>', + 'databases/localhost/foo', + ], + shell=True, + run_to_completion=False, + ).and_return(process).once() + + assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process] + + +def test_dump_databases_runs_mongodump_with_directory_format(): + databases = [{'name': 'foo', 'format': 'directory'}] + process = flexmock() + flexmock(module).should_receive('make_dump_path').and_return('') + flexmock(module.dump).should_receive('make_database_dump_filename').and_return( + 'databases/localhost/foo' + ) + flexmock(module.dump).should_receive('create_parent_directory_for_dump') + flexmock(module.dump).should_receive('create_named_pipe_for_dump').never() + + flexmock(module).should_receive('execute_command').with_args( + ['mongodump', '--archive', 'databases/localhost/foo', '--db', 'foo'], + shell=True, + run_to_completion=False, + ).and_return(process).once() + + assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process] + + +def test_dump_databases_runs_mongodump_with_options(): + databases = [{'name': 'foo', 'options': '--stuff=such'}] + process = flexmock() + flexmock(module).should_receive('make_dump_path').and_return('') + flexmock(module.dump).should_receive('make_database_dump_filename').and_return( + 'databases/localhost/foo' + ) + flexmock(module.dump).should_receive('create_named_pipe_for_dump') + + flexmock(module).should_receive('execute_command').with_args( + ['mongodump', '--archive', '--db', 'foo', '--stuff=such', '>', 'databases/localhost/foo'], + shell=True, + run_to_completion=False, + ).and_return(process).once() + + assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process] + + +def test_dump_databases_runs_mongodumpall_for_all_databases(): + databases = [{'name': 'all'}] + process = flexmock() + flexmock(module).should_receive('make_dump_path').and_return('') + flexmock(module.dump).should_receive('make_database_dump_filename').and_return( + 'databases/localhost/all' + ) + flexmock(module.dump).should_receive('create_named_pipe_for_dump') + + flexmock(module).should_receive('execute_command').with_args( + ['mongodump', '--archive', '>', 'databases/localhost/all'], + shell=True, + run_to_completion=False, + ).and_return(process).once() + + assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process] + + +def test_restore_database_dump_runs_pg_restore(): + database_config = [{'name': 'foo'}] + extract_process = flexmock(stdout=flexmock()) + + flexmock(module).should_receive('make_dump_path') + flexmock(module.dump).should_receive('make_database_dump_filename') + flexmock(module).should_receive('execute_command_with_processes').with_args( + ['mongorestore', '--archive', '--drop', '--db', 'foo'], + processes=[extract_process], + output_log_level=logging.DEBUG, + input_file=extract_process.stdout, + borg_local_path='borg', + ).once() + + module.restore_database_dump( + database_config, 'test.yaml', {}, dry_run=False, extract_process=extract_process + ) + + +def test_restore_database_dump_errors_on_multiple_database_config(): + database_config = [{'name': 'foo'}, {'name': 'bar'}] + + flexmock(module).should_receive('make_dump_path') + flexmock(module.dump).should_receive('make_database_dump_filename') + flexmock(module).should_receive('execute_command_with_processes').never() + flexmock(module).should_receive('execute_command').never() + + with pytest.raises(ValueError): + module.restore_database_dump( + database_config, 'test.yaml', {}, dry_run=False, extract_process=flexmock() + ) + + +def test_restore_database_dump_runs_pg_restore_with_hostname_and_port(): + database_config = [{'name': 'foo', 'hostname': 'database.example.org', 'port': 5433}] + extract_process = flexmock(stdout=flexmock()) + + flexmock(module).should_receive('make_dump_path') + flexmock(module.dump).should_receive('make_database_dump_filename') + flexmock(module).should_receive('execute_command_with_processes').with_args( + [ + 'mongorestore', + '--archive', + '--drop', + '--db', + 'foo', + '--host', + 'database.example.org', + '--port', + '5433', + ], + processes=[extract_process], + output_log_level=logging.DEBUG, + input_file=extract_process.stdout, + borg_local_path='borg', + ).once() + + module.restore_database_dump( + database_config, 'test.yaml', {}, dry_run=False, extract_process=extract_process + ) + + +def test_restore_database_dump_runs_pg_restore_with_username_and_password(): + database_config = [{'name': 'foo', 'username': 'mongo', 'password': 'trustsome1'}] + extract_process = flexmock(stdout=flexmock()) + + flexmock(module).should_receive('make_dump_path') + flexmock(module.dump).should_receive('make_database_dump_filename') + flexmock(module).should_receive('execute_command_with_processes').with_args( + [ + 'mongorestore', + '--archive', + '--drop', + '--db', + 'foo', + '--username', + 'mongo', + '--password', + 'trustsome1', + ], + processes=[extract_process], + output_log_level=logging.DEBUG, + input_file=extract_process.stdout, + borg_local_path='borg', + ).once() + + module.restore_database_dump( + database_config, 'test.yaml', {}, dry_run=False, extract_process=extract_process + ) + + +def test_restore_database_dump_runs_psql_for_all_database_dump(): + database_config = [{'name': 'all'}] + extract_process = flexmock(stdout=flexmock()) + + flexmock(module).should_receive('make_dump_path') + flexmock(module.dump).should_receive('make_database_dump_filename') + flexmock(module).should_receive('execute_command_with_processes').with_args( + ['mongorestore', '--archive'], + processes=[extract_process], + output_log_level=logging.DEBUG, + input_file=extract_process.stdout, + borg_local_path='borg', + ).once() + + module.restore_database_dump( + database_config, 'test.yaml', {}, dry_run=False, extract_process=extract_process + ) + + +def test_restore_database_dump_with_dry_run_skips_restore(): + database_config = [{'name': 'foo'}] + + flexmock(module).should_receive('make_dump_path') + flexmock(module.dump).should_receive('make_database_dump_filename') + flexmock(module).should_receive('execute_command_with_processes').never() + + module.restore_database_dump( + database_config, 'test.yaml', {}, dry_run=True, extract_process=flexmock() + ) + + +def test_restore_database_dump_without_extract_process_restores_from_disk(): + database_config = [{'name': 'foo'}] + + flexmock(module).should_receive('make_dump_path') + flexmock(module.dump).should_receive('make_database_dump_filename').and_return('/dump/path') + flexmock(module).should_receive('execute_command_with_processes').with_args( + ['mongorestore', '--archive', '/dump/path', '--drop', '--db', 'foo'], + processes=[], + output_log_level=logging.DEBUG, + input_file=None, + borg_local_path='borg', + ).once() + + module.restore_database_dump( + database_config, 'test.yaml', {}, dry_run=False, extract_process=None + )