diff --git a/NEWS b/NEWS index 0ddc500d3..48adc0f32 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,7 @@ 1.7.6.dev0 + * #438, #560: Optionally dump "all" PostgreSQL databases to separate files instead of one combined + dump file, allowing more convenient restores of individual databases. You can enable this by + specifying the database dump "format" option when the database is named "all". * #602: Fix logs that interfere with JSON output by making warnings go to stderr instead of stdout. 1.7.5 diff --git a/borgmatic/config/schema.yaml b/borgmatic/config/schema.yaml index b0f0223af..62c16c69a 100644 --- a/borgmatic/config/schema.yaml +++ b/borgmatic/config/schema.yaml @@ -691,10 +691,13 @@ properties: type: string description: | Database name (required if using this hook). Or - "all" to dump all databases on the host. Note - that using this database hook implicitly enables - both read_special and one_file_system (see - above) to support dump and restore streaming. + "all" to dump all databases on the host. (Also + set the "format" to dump each database to a + separate file instead of one combined file.) + Note that using this database hook implicitly + enables both read_special and one_file_system + (see above) to support dump and restore + streaming. example: users hostname: type: string @@ -729,9 +732,14 @@ properties: description: | Database dump output format. One of "plain", "custom", "directory", or "tar". Defaults to - "custom" (unlike raw pg_dump). See pg_dump - documentation for details. Note that format is - ignored when the database name is "all". + "custom" (unlike raw pg_dump) for a single + database. Or, when database name is "all" and + format is blank, dumps all databases to a single + file. But if a format is specified with an "all" + database name, dumps each database to a separate + file of that format, allowing more convenient + restores of individual databases. See the + pg_dump documentation for more about formats. example: directory ssl_mode: type: string diff --git a/borgmatic/hooks/mysql.py b/borgmatic/hooks/mysql.py index c2d91e65b..c7c92c91e 100644 --- a/borgmatic/hooks/mysql.py +++ b/borgmatic/hooks/mysql.py @@ -24,7 +24,7 @@ SYSTEM_DATABASE_NAMES = ('information_schema', 'mysql', 'performance_schema', 's def database_names_to_dump(database, extra_environment, log_prefix, dry_run_label): ''' - Given a requested database name, return the corresponding sequence of database names to dump. + Given a requested database config, return the corresponding sequence of database names to dump. In the case of "all", query for the names of databases on the configured host and return them, excluding any system databases that will cause problems during restore. ''' diff --git a/borgmatic/hooks/postgresql.py b/borgmatic/hooks/postgresql.py index 6a0ac690c..c184774d5 100644 --- a/borgmatic/hooks/postgresql.py +++ b/borgmatic/hooks/postgresql.py @@ -1,6 +1,11 @@ +import csv import logging -from borgmatic.execute import execute_command, execute_command_with_processes +from borgmatic.execute import ( + execute_command, + execute_command_and_capture_output, + execute_command_with_processes, +) from borgmatic.hooks import dump logger = logging.getLogger(__name__) @@ -34,6 +39,44 @@ def make_extra_environment(database): return extra +EXCLUDED_DATABASE_NAMES = ('template0', 'template1') + + +def database_names_to_dump(database, extra_environment, log_prefix, dry_run_label): + ''' + Given a requested database config, return the corresponding sequence of database names to dump. + In the case of "all" when a database format is given, query for the names of databases on the + configured host and return them. For "all" without a database format, just return a sequence + containing "all". + ''' + requested_name = database['name'] + + if requested_name != 'all': + return (requested_name,) + if not database.get('format'): + return ('all',) + + list_command = ( + ('psql', '--list', '--no-password', '--csv', '--tuples-only') + + (('--host', database['hostname']) if 'hostname' in database else ()) + + (('--port', str(database['port'])) if 'port' in database else ()) + + (('--username', database['username']) if 'username' in database else ()) + + (tuple(database['options'].split(' ')) if 'options' in database else ()) + ) + logger.debug( + '{}: Querying for "all" PostgreSQL databases to dump{}'.format(log_prefix, dry_run_label) + ) + list_output = execute_command_and_capture_output( + list_command, extra_environment=extra_environment + ) + + return tuple( + row[0] + for row in csv.reader(list_output.splitlines(), delimiter=',', quotechar='"') + if row[0] not in EXCLUDED_DATABASE_NAMES + ) + + def dump_databases(databases, log_prefix, location_config, dry_run): ''' Dump the given PostgreSQL databases to a named pipe. The databases are supplied as a sequence of @@ -43,6 +86,8 @@ def dump_databases(databases, log_prefix, location_config, dry_run): Return a sequence of subprocess.Popen instances for the dump processes ready to spew to a named pipe. But if this is a dry run, then don't actually dump anything and return an empty sequence. + + Raise ValueError if the databases to dump cannot be determined. ''' dry_run_label = ' (dry run; not actually dumping anything)' if dry_run else '' processes = [] @@ -50,48 +95,59 @@ def dump_databases(databases, log_prefix, location_config, dry_run): logger.info('{}: Dumping PostgreSQL databases{}'.format(log_prefix, dry_run_label)) for database in databases: - name = database['name'] - dump_filename = dump.make_database_dump_filename( - make_dump_path(location_config), name, database.get('hostname') - ) - all_databases = bool(name == 'all') - dump_format = database.get('format', 'custom') - default_dump_command = 'pg_dumpall' if all_databases else 'pg_dump' - dump_command = database.get('pg_dump_command') or default_dump_command - command = ( - (dump_command, '--no-password', '--clean', '--if-exists',) - + (('--host', database['hostname']) if 'hostname' in database else ()) - + (('--port', str(database['port'])) if 'port' in database else ()) - + (('--username', database['username']) if 'username' in database else ()) - + (() if all_databases else ('--format', dump_format)) - + (('--file', dump_filename) if dump_format == 'directory' else ()) - + (tuple(database['options'].split(' ')) if 'options' in database else ()) - + (() if all_databases else (name,)) - # Use shell redirection rather than the --file flag to sidestep synchronization issues - # when pg_dump/pg_dumpall tries to write to a named pipe. But for the directory dump - # format in a particular, a named destination is required, and redirection doesn't work. - + (('>', dump_filename) if dump_format != 'directory' else ()) - ) extra_environment = make_extra_environment(database) - - logger.debug( - '{}: Dumping PostgreSQL database {} to {}{}'.format( - log_prefix, name, dump_filename, dry_run_label - ) + dump_path = make_dump_path(location_config) + dump_database_names = database_names_to_dump( + database, extra_environment, log_prefix, dry_run_label ) - if dry_run: - continue - if dump_format == 'directory': - dump.create_parent_directory_for_dump(dump_filename) - else: - dump.create_named_pipe_for_dump(dump_filename) + if not dump_database_names: + raise ValueError('Cannot find any PostgreSQL databases to dump.') - processes.append( - execute_command( - command, shell=True, extra_environment=extra_environment, run_to_completion=False + for database_name in dump_database_names: + dump_format = database.get('format', None if database_name == 'all' else 'custom') + default_dump_command = 'pg_dumpall' if database_name == 'all' else 'pg_dump' + dump_command = database.get('pg_dump_command') or default_dump_command + dump_filename = dump.make_database_dump_filename( + dump_path, database_name, database.get('hostname') + ) + + command = ( + (dump_command, '--no-password', '--clean', '--if-exists',) + + (('--host', database['hostname']) if 'hostname' in database else ()) + + (('--port', str(database['port'])) if 'port' in database else ()) + + (('--username', database['username']) if 'username' in database else ()) + + (('--format', dump_format) if dump_format else ()) + + (('--file', dump_filename) if dump_format == 'directory' else ()) + + (tuple(database['options'].split(' ')) if 'options' in database else ()) + + (() if database_name == 'all' else (database_name,)) + # Use shell redirection rather than the --file flag to sidestep synchronization issues + # when pg_dump/pg_dumpall tries to write to a named pipe. But for the directory dump + # format in a particular, a named destination is required, and redirection doesn't work. + + (('>', dump_filename) if dump_format != 'directory' else ()) + ) + + logger.debug( + '{}: Dumping PostgreSQL database "{}" to {}{}'.format( + log_prefix, database_name, dump_filename, dry_run_label + ) + ) + if dry_run: + continue + + if dump_format == 'directory': + dump.create_parent_directory_for_dump(dump_filename) + else: + dump.create_named_pipe_for_dump(dump_filename) + + processes.append( + execute_command( + command, + shell=True, + extra_environment=extra_environment, + run_to_completion=False, + ) ) - ) return processes diff --git a/docs/how-to/backup-your-databases.md b/docs/how-to/backup-your-databases.md index 1386a1428..5884ccf96 100644 --- a/docs/how-to/backup-your-databases.md +++ b/docs/how-to/backup-your-databases.md @@ -76,6 +76,9 @@ hooks: options: "--ssl" ``` + +### All databases + If you want to dump all databases on a host, use `all` for the database name: ```yaml @@ -91,9 +94,30 @@ hooks: Note that you may need to use a `username` of the `postgres` superuser for this to work with PostgreSQL. -If you would like to backup databases only and not source directories, you can -specify an empty `source_directories` value (as it is a mandatory field prior -to borgmatic 1.7.1): +New in version 1.7.6 With +PostgreSQL and MySQL, you can optionally dump "all" databases to separate +files instead of one combined dump file, allowing more convenient restores of +individual databases. Enable this by specifying your desired database dump +`format`: + +```yaml +hooks: + postgresql_databases: + - name: all + format: custom + mysql_databases: + - name: all + format: sql +``` + +### No source directories + +New in version 1.7.1 If you +would like to backup databases only and not source directories, you can omit +`source_directories` entirely. + +In older versions of borgmatic, instead specify an empty `source_directories` +value, as it is a mandatory option prior to version 1.7.1: ```yaml location: @@ -103,8 +127,7 @@ hooks: - name: all ``` -New in version 1.7.1 You can -omit `source_directories` entirely. + ### External passwords diff --git a/tests/unit/hooks/test_postgresql.py b/tests/unit/hooks/test_postgresql.py index aeb0d4b07..de5ac04bf 100644 --- a/tests/unit/hooks/test_postgresql.py +++ b/tests/unit/hooks/test_postgresql.py @@ -6,15 +6,57 @@ from flexmock import flexmock from borgmatic.hooks import postgresql as module +def test_database_names_to_dump_passes_through_individual_database_name(): + database = {'name': 'foo'} + + assert module.database_names_to_dump(database, flexmock(), flexmock(), flexmock()) == ('foo',) + + +def test_database_names_to_dump_passes_through_individual_database_name_with_format(): + database = {'name': 'foo', 'format': 'custom'} + + assert module.database_names_to_dump(database, flexmock(), flexmock(), flexmock()) == ('foo',) + + +def test_database_names_to_dump_passes_through_all_without_format(): + database = {'name': 'all'} + + assert module.database_names_to_dump(database, flexmock(), flexmock(), flexmock()) == ('all',) + + +def test_database_names_to_dump_with_all_and_format_lists_databases(): + database = {'name': 'all', 'format': 'custom'} + flexmock(module).should_receive('execute_command_and_capture_output').and_return( + 'foo,test,\nbar,test,"stuff and such"' + ) + + assert module.database_names_to_dump(database, flexmock(), flexmock(), flexmock()) == ( + 'foo', + 'bar', + ) + + +def test_database_names_to_dump_with_all_and_format_excludes_particular_databases(): + database = {'name': 'all', 'format': 'custom'} + flexmock(module).should_receive('execute_command_and_capture_output').and_return( + 'foo,test,\ntemplate0,test,blah' + ) + + assert module.database_names_to_dump(database, flexmock(), flexmock(), flexmock()) == ('foo',) + + def test_dump_databases_runs_pg_dump_for_each_database(): databases = [{'name': 'foo'}, {'name': 'bar'}] processes = [flexmock(), flexmock()] + flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('make_dump_path').and_return('') + flexmock(module).should_receive('database_names_to_dump').and_return(('foo',)).and_return( + ('bar',) + ) flexmock(module.dump).should_receive('make_database_dump_filename').and_return( 'databases/localhost/foo' ).and_return('databases/localhost/bar') flexmock(module.dump).should_receive('create_named_pipe_for_dump') - flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) for name, process in zip(('foo', 'bar'), processes): flexmock(module).should_receive('execute_command').with_args( @@ -37,14 +79,27 @@ def test_dump_databases_runs_pg_dump_for_each_database(): assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == processes +def test_dump_databases_runs_raises_when_no_database_names_to_dump(): + databases = [{'name': 'foo'}, {'name': 'bar'}] + flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) + flexmock(module).should_receive('make_dump_path').and_return('') + flexmock(module).should_receive('database_names_to_dump').and_return(()) + + with pytest.raises(ValueError): + module.dump_databases(databases, 'test.yaml', {}, dry_run=False) + + def test_dump_databases_with_dry_run_skips_pg_dump(): databases = [{'name': 'foo'}, {'name': 'bar'}] + flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('make_dump_path').and_return('') + flexmock(module).should_receive('database_names_to_dump').and_return(('foo',)).and_return( + ('bar',) + ) flexmock(module.dump).should_receive('make_database_dump_filename').and_return( 'databases/localhost/foo' ).and_return('databases/localhost/bar') flexmock(module.dump).should_receive('create_named_pipe_for_dump').never() - flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('execute_command').never() assert module.dump_databases(databases, 'test.yaml', {}, dry_run=True) == [] @@ -53,12 +108,13 @@ def test_dump_databases_with_dry_run_skips_pg_dump(): def test_dump_databases_runs_pg_dump_with_hostname_and_port(): databases = [{'name': 'foo', 'hostname': 'database.example.org', 'port': 5433}] process = flexmock() + flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('make_dump_path').and_return('') + flexmock(module).should_receive('database_names_to_dump').and_return(('foo',)) flexmock(module.dump).should_receive('make_database_dump_filename').and_return( 'databases/database.example.org/foo' ) flexmock(module.dump).should_receive('create_named_pipe_for_dump') - flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('execute_command').with_args( ( @@ -87,14 +143,15 @@ def test_dump_databases_runs_pg_dump_with_hostname_and_port(): def test_dump_databases_runs_pg_dump_with_username_and_password(): databases = [{'name': 'foo', 'username': 'postgres', 'password': 'trustsome1'}] process = flexmock() + flexmock(module).should_receive('make_extra_environment').and_return( + {'PGPASSWORD': 'trustsome1', 'PGSSLMODE': 'disable'} + ) flexmock(module).should_receive('make_dump_path').and_return('') + flexmock(module).should_receive('database_names_to_dump').and_return(('foo',)) flexmock(module.dump).should_receive('make_database_dump_filename').and_return( 'databases/localhost/foo' ) flexmock(module.dump).should_receive('create_named_pipe_for_dump') - flexmock(module).should_receive('make_extra_environment').and_return( - {'PGPASSWORD': 'trustsome1', 'PGSSLMODE': 'disable'} - ) flexmock(module).should_receive('execute_command').with_args( ( @@ -144,13 +201,14 @@ def test_make_extra_environment_maps_options_to_environment(): def test_dump_databases_runs_pg_dump_with_directory_format(): databases = [{'name': 'foo', 'format': 'directory'}] process = flexmock() + flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('make_dump_path').and_return('') + flexmock(module).should_receive('database_names_to_dump').and_return(('foo',)) flexmock(module.dump).should_receive('make_database_dump_filename').and_return( 'databases/localhost/foo' ) flexmock(module.dump).should_receive('create_parent_directory_for_dump') flexmock(module.dump).should_receive('create_named_pipe_for_dump').never() - flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('execute_command').with_args( ( @@ -175,12 +233,13 @@ def test_dump_databases_runs_pg_dump_with_directory_format(): def test_dump_databases_runs_pg_dump_with_options(): databases = [{'name': 'foo', 'options': '--stuff=such'}] process = flexmock() + flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('make_dump_path').and_return('') + flexmock(module).should_receive('database_names_to_dump').and_return(('foo',)) flexmock(module.dump).should_receive('make_database_dump_filename').and_return( 'databases/localhost/foo' ) flexmock(module.dump).should_receive('create_named_pipe_for_dump') - flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('execute_command').with_args( ( @@ -206,12 +265,13 @@ def test_dump_databases_runs_pg_dump_with_options(): def test_dump_databases_runs_pg_dumpall_for_all_databases(): databases = [{'name': 'all'}] process = flexmock() + flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('make_dump_path').and_return('') + flexmock(module).should_receive('database_names_to_dump').and_return(('all',)) flexmock(module.dump).should_receive('make_database_dump_filename').and_return( 'databases/localhost/all' ) flexmock(module.dump).should_receive('create_named_pipe_for_dump') - flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('execute_command').with_args( ('pg_dumpall', '--no-password', '--clean', '--if-exists', '>', 'databases/localhost/all'), @@ -226,12 +286,13 @@ def test_dump_databases_runs_pg_dumpall_for_all_databases(): def test_dump_databases_runs_non_default_pg_dump(): databases = [{'name': 'foo', 'pg_dump_command': 'special_pg_dump'}] process = flexmock() + flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('make_dump_path').and_return('') + flexmock(module).should_receive('database_names_to_dump').and_return(('foo',)) flexmock(module.dump).should_receive('make_database_dump_filename').and_return( 'databases/localhost/foo' ) flexmock(module.dump).should_receive('create_named_pipe_for_dump') - flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('execute_command').with_args( ( @@ -257,9 +318,9 @@ def test_restore_database_dump_runs_pg_restore(): database_config = [{'name': 'foo'}] extract_process = flexmock(stdout=flexmock()) + flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('make_dump_path') flexmock(module.dump).should_receive('make_database_dump_filename') - flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('execute_command_with_processes').with_args( ( 'pg_restore', @@ -288,9 +349,9 @@ def test_restore_database_dump_runs_pg_restore(): def test_restore_database_dump_errors_on_multiple_database_config(): database_config = [{'name': 'foo'}, {'name': 'bar'}] + flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('make_dump_path') flexmock(module.dump).should_receive('make_database_dump_filename') - flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('execute_command_with_processes').never() flexmock(module).should_receive('execute_command').never() @@ -304,9 +365,9 @@ def test_restore_database_dump_runs_pg_restore_with_hostname_and_port(): database_config = [{'name': 'foo', 'hostname': 'database.example.org', 'port': 5433}] extract_process = flexmock(stdout=flexmock()) + flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('make_dump_path') flexmock(module.dump).should_receive('make_database_dump_filename') - flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('execute_command_with_processes').with_args( ( 'pg_restore', @@ -352,11 +413,11 @@ def test_restore_database_dump_runs_pg_restore_with_username_and_password(): database_config = [{'name': 'foo', 'username': 'postgres', 'password': 'trustsome1'}] extract_process = flexmock(stdout=flexmock()) - flexmock(module).should_receive('make_dump_path') - flexmock(module.dump).should_receive('make_database_dump_filename') flexmock(module).should_receive('make_extra_environment').and_return( {'PGPASSWORD': 'trustsome1', 'PGSSLMODE': 'disable'} ) + flexmock(module).should_receive('make_dump_path') + flexmock(module.dump).should_receive('make_database_dump_filename') flexmock(module).should_receive('execute_command_with_processes').with_args( ( 'pg_restore', @@ -398,9 +459,9 @@ def test_restore_database_dump_runs_psql_for_all_database_dump(): database_config = [{'name': 'all'}] extract_process = flexmock(stdout=flexmock()) + flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('make_dump_path') flexmock(module.dump).should_receive('make_database_dump_filename') - flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('execute_command_with_processes').with_args( ('psql', '--no-password'), processes=[extract_process], @@ -424,9 +485,9 @@ def test_restore_database_dump_runs_non_default_pg_restore_and_psql(): ] extract_process = flexmock(stdout=flexmock()) + flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('make_dump_path') flexmock(module.dump).should_receive('make_database_dump_filename') - flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('execute_command_with_processes').with_args( ( 'special_pg_restore', @@ -455,9 +516,9 @@ def test_restore_database_dump_runs_non_default_pg_restore_and_psql(): def test_restore_database_dump_with_dry_run_skips_restore(): database_config = [{'name': 'foo'}] + flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('make_dump_path') flexmock(module.dump).should_receive('make_database_dump_filename') - flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('execute_command_with_processes').never() module.restore_database_dump( @@ -468,9 +529,9 @@ def test_restore_database_dump_with_dry_run_skips_restore(): def test_restore_database_dump_without_extract_process_restores_from_disk(): database_config = [{'name': 'foo'}] + flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('make_dump_path') flexmock(module.dump).should_receive('make_database_dump_filename').and_return('/dump/path') - flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'}) flexmock(module).should_receive('execute_command_with_processes').with_args( ( 'pg_restore',