Fix for potential data loss (data not getting backed up) when dumping large "directory" format PostgreSQL/MongoDB databases (#643).

This commit is contained in:
Dan Helfman 2023-02-20 15:18:51 -08:00
parent 37ac542b31
commit f5a448c7c2
7 changed files with 46 additions and 34 deletions

9
NEWS
View File

@ -1,3 +1,12 @@
1.7.7
* #643: Fix for potential data loss (data not getting backed up) when dumping large "directory"
format PostgreSQL/MongoDB databases. Prior to the fix, these dumps would not finish writing to
disk before Borg consumed them. Now, the dumping process completes before Borg starts. This only
applies to "directory" format databases; other formats still stream to Borg without using
temporary disk space.
* Fix MongoDB "directory" format to work with mongodump/mongorestore without error. Prior to this
fix, only the "archive" format worked.
1.7.6
* #393, #438, #560: Optionally dump "all" PostgreSQL/MySQL databases to separate files instead of
one combined dump file, allowing more convenient restores of individual databases. You can enable

View File

@ -45,13 +45,14 @@ def dump_databases(databases, log_prefix, location_config, dry_run):
if dry_run:
continue
command = build_dump_command(database, dump_filename, dump_format)
if dump_format == 'directory':
dump.create_parent_directory_for_dump(dump_filename)
execute_command(command, shell=True)
else:
dump.create_named_pipe_for_dump(dump_filename)
command = build_dump_command(database, dump_filename, dump_format)
processes.append(execute_command(command, shell=True, run_to_completion=False))
processes.append(execute_command(command, shell=True, run_to_completion=False))
return processes
@ -61,9 +62,9 @@ def build_dump_command(database, dump_filename, dump_format):
Return the mongodump command from a single database configuration.
'''
all_databases = database['name'] == 'all'
command = ['mongodump', '--archive']
command = ['mongodump']
if dump_format == 'directory':
command.append(dump_filename)
command.extend(('--out', dump_filename))
if 'hostname' in database:
command.extend(('--host', database['hostname']))
if 'port' in database:
@ -79,7 +80,7 @@ def build_dump_command(database, dump_filename, dump_format):
if 'options' in database:
command.extend(database['options'].split(' '))
if dump_format != 'directory':
command.extend(('>', dump_filename))
command.extend(('--archive', '>', dump_filename))
return command
@ -145,9 +146,11 @@ def build_restore_command(extract_process, database, dump_filename):
'''
Return the mongorestore command from a single database configuration.
'''
command = ['mongorestore', '--archive']
if not extract_process:
command.append(dump_filename)
command = ['mongorestore']
if extract_process:
command.append('--archive')
else:
command.extend(('--dir', dump_filename))
if database['name'] != 'all':
command.extend(('--drop', '--db', database['name']))
if 'hostname' in database:

View File

@ -141,17 +141,19 @@ def dump_databases(databases, log_prefix, location_config, dry_run):
if dump_format == 'directory':
dump.create_parent_directory_for_dump(dump_filename)
execute_command(
command, shell=True, extra_environment=extra_environment,
)
else:
dump.create_named_pipe_for_dump(dump_filename)
processes.append(
execute_command(
command,
shell=True,
extra_environment=extra_environment,
run_to_completion=False,
processes.append(
execute_command(
command,
shell=True,
extra_environment=extra_environment,
run_to_completion=False,
)
)
)
return processes

View File

@ -1,6 +1,6 @@
from setuptools import find_packages, setup
VERSION = '1.7.6'
VERSION = '1.7.7'
setup(

View File

@ -14,6 +14,7 @@ def write_configuration(
repository_path,
borgmatic_source_directory,
postgresql_dump_format='custom',
mongodb_dump_format='archive',
):
'''
Write out borgmatic configuration into a file at the config path. Set the options so as to work
@ -67,6 +68,7 @@ hooks:
username: root
password: test
authentication_database: admin
format: {mongodb_dump_format}
- name: all
hostname: mongodb
username: root
@ -136,6 +138,7 @@ def test_database_dump_and_restore_with_directory_format():
repository_path,
borgmatic_source_directory,
postgresql_dump_format='directory',
mongodb_dump_format='directory',
)
subprocess.check_call(

View File

@ -17,7 +17,7 @@ def test_dump_databases_runs_mongodump_for_each_database():
for name, process in zip(('foo', 'bar'), processes):
flexmock(module).should_receive('execute_command').with_args(
['mongodump', '--archive', '--db', name, '>', 'databases/localhost/{}'.format(name)],
['mongodump', '--db', name, '--archive', '>', 'databases/localhost/{}'.format(name)],
shell=True,
run_to_completion=False,
).and_return(process).once()
@ -49,13 +49,13 @@ def test_dump_databases_runs_mongodump_with_hostname_and_port():
flexmock(module).should_receive('execute_command').with_args(
[
'mongodump',
'--archive',
'--host',
'database.example.org',
'--port',
'5433',
'--db',
'foo',
'--archive',
'>',
'databases/database.example.org/foo',
],
@ -85,7 +85,6 @@ def test_dump_databases_runs_mongodump_with_username_and_password():
flexmock(module).should_receive('execute_command').with_args(
[
'mongodump',
'--archive',
'--username',
'mongo',
'--password',
@ -94,6 +93,7 @@ def test_dump_databases_runs_mongodump_with_username_and_password():
'admin',
'--db',
'foo',
'--archive',
'>',
'databases/localhost/foo',
],
@ -106,7 +106,6 @@ def test_dump_databases_runs_mongodump_with_username_and_password():
def test_dump_databases_runs_mongodump_with_directory_format():
databases = [{'name': 'foo', 'format': 'directory'}]
process = flexmock()
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
'databases/localhost/foo'
@ -115,12 +114,10 @@ def test_dump_databases_runs_mongodump_with_directory_format():
flexmock(module.dump).should_receive('create_named_pipe_for_dump').never()
flexmock(module).should_receive('execute_command').with_args(
['mongodump', '--archive', 'databases/localhost/foo', '--db', 'foo'],
shell=True,
run_to_completion=False,
).and_return(process).once()
['mongodump', '--out', 'databases/localhost/foo', '--db', 'foo'], shell=True,
).and_return(flexmock()).once()
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process]
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == []
def test_dump_databases_runs_mongodump_with_options():
@ -133,7 +130,7 @@ def test_dump_databases_runs_mongodump_with_options():
flexmock(module.dump).should_receive('create_named_pipe_for_dump')
flexmock(module).should_receive('execute_command').with_args(
['mongodump', '--archive', '--db', 'foo', '--stuff=such', '>', 'databases/localhost/foo'],
['mongodump', '--db', 'foo', '--stuff=such', '--archive', '>', 'databases/localhost/foo'],
shell=True,
run_to_completion=False,
).and_return(process).once()
@ -305,12 +302,12 @@ def test_restore_database_dump_with_dry_run_skips_restore():
def test_restore_database_dump_without_extract_process_restores_from_disk():
database_config = [{'name': 'foo'}]
database_config = [{'name': 'foo', 'format': 'directory'}]
flexmock(module).should_receive('make_dump_path')
flexmock(module.dump).should_receive('make_database_dump_filename').and_return('/dump/path')
flexmock(module).should_receive('execute_command_with_processes').with_args(
['mongorestore', '--archive', '/dump/path', '--drop', '--db', 'foo'],
['mongorestore', '--dir', '/dump/path', '--drop', '--db', 'foo'],
processes=[],
output_log_level=logging.DEBUG,
input_file=None,

View File

@ -270,7 +270,6 @@ def test_make_extra_environment_maps_options_to_environment():
def test_dump_databases_runs_pg_dump_with_directory_format():
databases = [{'name': 'foo', 'format': 'directory'}]
process = flexmock()
flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module).should_receive('database_names_to_dump').and_return(('foo',))
@ -295,10 +294,9 @@ def test_dump_databases_runs_pg_dump_with_directory_format():
),
shell=True,
extra_environment={'PGSSLMODE': 'disable'},
run_to_completion=False,
).and_return(process).once()
).and_return(flexmock()).once()
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process]
assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == []
def test_dump_databases_runs_pg_dump_with_options():