Split out (most of) command construction from create_archive() in preparation for reuse in spot check (#656).

This commit is contained in:
Dan Helfman 2024-04-04 14:23:56 -07:00
parent 57eb93760f
commit 6680aece5a
6 changed files with 959 additions and 1832 deletions

View File

@ -2,16 +2,15 @@ import datetime
import hashlib
import itertools
import logging
import pathlib
import os
import pathlib
import borgmatic.borg.extract
import borgmatic.borg.check
import borgmatic.borg.extract
import borgmatic.borg.state
import borgmatic.config.validate
import borgmatic.hooks.command
DEFAULT_CHECKS = (
{'name': 'repository', 'frequency': '1 month'},
{'name': 'archives', 'frequency': '1 month'},
@ -176,7 +175,9 @@ def make_check_time_path(config, borg_repository_id, check_type, archives_check_
that check's time (the time of that check last occurring).
'''
borgmatic_source_directory = os.path.expanduser(
config.get('borgmatic_source_directory', borgmatic.borg.state.DEFAULT_BORGMATIC_SOURCE_DIRECTORY)
config.get(
'borgmatic_source_directory', borgmatic.borg.state.DEFAULT_BORGMATIC_SOURCE_DIRECTORY
)
)
if check_type in ('archives', 'data'):
@ -354,9 +355,7 @@ def run_check(
remote_path=remote_path,
)
for check in borg_specific_checks:
write_check_time(
make_check_time_path(config, repository_id, check, archives_check_id)
)
write_check_time(make_check_time_path(config, repository_id, check, archives_check_id))
if 'extract' in checks:
borgmatic.borg.extract.extract_last_archive_dry_run(
@ -370,14 +369,14 @@ def run_check(
)
write_check_time(make_check_time_path(config, repository_id, 'extract'))
#if 'spot' in checks:
# TODO:
# count the number of files in source directories
# in a loop until the sample percentage (of the total source files) is met:
# pick a random file from source directories and calculate its sha256 sum
# extract the file from the latest archive (to stdout) and calculate its sha256 sum
# if the two checksums are equal, increment the matching files count
# if the percentage of matching files (of the total source files) < tolerance percentage, error
# if 'spot' in checks:
# TODO:
# count the number of files in source directories, but need to take patterns and stuff into account...
# in a loop until the sample percentage (of the total source files) is met:
# pick a random file from source directories and calculate its sha256 sum
# extract the file from the latest archive (to stdout) and calculate its sha256 sum
# if the two checksums are equal, increment the matching files count
# if the percentage of matching files (of the total source files) < tolerance percentage, error
borgmatic.hooks.command.execute_hook(
config.get('after_check'),

View File

@ -1,12 +1,10 @@
import argparse
import json
import logging
import os
from borgmatic.borg import environment, feature, flags, rinfo
from borgmatic.execute import DO_NOT_CAPTURE, execute_command
logger = logging.getLogger(__name__)
@ -85,7 +83,9 @@ def make_check_flags(checks, archive_filter_flags):
)
def get_repository_id(repository_path, config, local_borg_version, global_arguments, local_path, remote_path):
def get_repository_id(
repository_path, config, local_borg_version, global_arguments, local_path, remote_path
):
'''
Given a local or remote repository path, a configuration dict, the local Borg version, global
arguments, and local/remote commands to run, return the corresponding Borg repository ID.

View File

@ -320,35 +320,31 @@ def check_all_source_directories_exist(source_directories):
raise ValueError(f"Source directories do not exist: {', '.join(missing_directories)}")
def create_archive(
def make_base_create_command(
dry_run,
repository_path,
config,
config_paths,
local_borg_version,
global_arguments,
borgmatic_source_directories,
local_path='borg',
remote_path=None,
progress=False,
stats=False,
json=False,
list_files=False,
stream_processes=None,
):
'''
Given vebosity/dry-run flags, a local or remote repository path, a configuration dict, a
sequence of loaded configuration paths, the local Borg version, and global arguments as an
argparse.Namespace instance, create a Borg archive and return Borg's JSON output (if any).
If a sequence of stream processes is given (instances of subprocess.Popen), then execute the
create command while also triggering the given processes to produce output.
sequence of loaded configuration paths, the local Borg version, global arguments as an
argparse.Namespace instance, and a sequence of borgmatic source directories, return a tuple of
(base Borg create command flags, Borg create command positional arguments, open pattern file
handle, open exclude file handle).
'''
borgmatic.logger.add_custom_log_levels()
borgmatic_source_directories = expand_directories(
collect_borgmatic_source_directories(config.get('borgmatic_source_directory'))
)
if config.get('source_directories_must_exist', False):
check_all_source_directories_exist(config.get('source_directories'))
sources = deduplicate_directories(
map_directories_to_devices(
expand_directories(
@ -364,11 +360,6 @@ def create_archive(
ensure_files_readable(config.get('patterns_from'), config.get('exclude_from'))
try:
working_directory = os.path.expanduser(config.get('working_directory'))
except TypeError:
working_directory = None
pattern_file = (
write_pattern_file(config.get('patterns'), sources)
if config.get('patterns') or config.get('patterns_from')
@ -451,6 +442,55 @@ def create_archive(
repository_path, archive_name_format, local_borg_version
) + (sources if not pattern_file else ())
return (create_flags, create_positional_arguments, pattern_file, exclude_file)
def create_archive(
dry_run,
repository_path,
config,
config_paths,
local_borg_version,
global_arguments,
local_path='borg',
remote_path=None,
progress=False,
stats=False,
json=False,
list_files=False,
stream_processes=None,
):
'''
Given vebosity/dry-run flags, a local or remote repository path, a configuration dict, a
sequence of loaded configuration paths, the local Borg version, and global arguments as an
argparse.Namespace instance, create a Borg archive and return Borg's JSON output (if any).
If a sequence of stream processes is given (instances of subprocess.Popen), then execute the
create command while also triggering the given processes to produce output.
'''
borgmatic.logger.add_custom_log_levels()
borgmatic_source_directories = expand_directories(
collect_borgmatic_source_directories(config.get('borgmatic_source_directory'))
)
(create_flags, create_positional_arguments, pattern_file, exclude_file) = (
make_base_create_command(
dry_run,
repository_path,
config,
config_paths,
local_borg_version,
global_arguments,
borgmatic_source_directories,
local_path,
remote_path,
progress,
json,
list_files,
stream_processes,
)
)
if json:
output_log_level = None
elif list_files or (stats and not dry_run):
@ -462,6 +502,11 @@ def create_archive(
# the terminal directly.
output_file = DO_NOT_CAPTURE if progress else None
try:
working_directory = os.path.expanduser(config.get('working_directory'))
except TypeError:
working_directory = None
borg_environment = environment.make_environment(config)
# If database hooks are enabled (as indicated by streaming processes), exclude files that might

View File

@ -1,5 +1,5 @@
from flexmock import flexmock
import pytest
from flexmock import flexmock
from borgmatic.actions import check as module
@ -417,7 +417,9 @@ def test_run_check_checks_archives_for_configured_repository():
flexmock(module).should_receive('parse_checks')
flexmock(module.borgmatic.borg.check).should_receive('make_archive_filter_flags').and_return(())
flexmock(module).should_receive('make_archives_check_id').and_return(None)
flexmock(module).should_receive('filter_checks_on_frequency').and_return({'repository', 'archives'})
flexmock(module).should_receive('filter_checks_on_frequency').and_return(
{'repository', 'archives'}
)
flexmock(module.borgmatic.borg.check).should_receive('check_archives').once()
flexmock(module).should_receive('make_check_time_path')
flexmock(module).should_receive('write_check_time')
@ -527,7 +529,9 @@ def test_run_check_checks_archives_in_selected_repository():
flexmock(module).should_receive('parse_checks')
flexmock(module.borgmatic.borg.check).should_receive('make_archive_filter_flags').and_return(())
flexmock(module).should_receive('make_archives_check_id').and_return(None)
flexmock(module).should_receive('filter_checks_on_frequency').and_return({'repository', 'archives'})
flexmock(module).should_receive('filter_checks_on_frequency').and_return(
{'repository', 'archives'}
)
flexmock(module.borgmatic.borg.check).should_receive('check_archives').once()
flexmock(module).should_receive('make_check_time_path')
flexmock(module).should_receive('write_check_time')

View File

@ -69,7 +69,10 @@ def test_make_archive_filter_flags_with_archives_check_and_last_includes_last_fl
flexmock(module.flags).should_receive('make_match_archives_flags').and_return(())
flags = module.make_archive_filter_flags(
'1.2.3', {'check_last': 3}, ('archives',), check_arguments=flexmock(match_archives=None),
'1.2.3',
{'check_last': 3},
('archives',),
check_arguments=flexmock(match_archives=None),
)
assert flags == ('--last', '3')
@ -80,7 +83,10 @@ def test_make_archive_filter_flags_with_data_check_and_last_includes_last_flag()
flexmock(module.flags).should_receive('make_match_archives_flags').and_return(())
flags = module.make_archive_filter_flags(
'1.2.3', {'check_last': 3}, ('data',), check_arguments=flexmock(match_archives=None),
'1.2.3',
{'check_last': 3},
('data',),
check_arguments=flexmock(match_archives=None),
)
assert flags == ('--last', '3')
@ -91,7 +97,10 @@ def test_make_archive_filter_flags_with_repository_check_and_last_omits_last_fla
flexmock(module.flags).should_receive('make_match_archives_flags').and_return(())
flags = module.make_archive_filter_flags(
'1.2.3', {'check_last': 3}, ('repository',), check_arguments=flexmock(match_archives=None),
'1.2.3',
{'check_last': 3},
('repository',),
check_arguments=flexmock(match_archives=None),
)
assert flags == ()
@ -116,7 +125,10 @@ def test_make_archive_filter_flags_with_archives_check_and_prefix_includes_match
flexmock(module.flags).should_receive('make_match_archives_flags').and_return(())
flags = module.make_archive_filter_flags(
'1.2.3', {'prefix': 'foo-'}, ('archives',), check_arguments=flexmock(match_archives=None),
'1.2.3',
{'prefix': 'foo-'},
('archives',),
check_arguments=flexmock(match_archives=None),
)
assert flags == ('--match-archives', 'sh:foo-*')
@ -127,7 +139,10 @@ def test_make_archive_filter_flags_with_data_check_and_prefix_includes_match_arc
flexmock(module.flags).should_receive('make_match_archives_flags').and_return(())
flags = module.make_archive_filter_flags(
'1.2.3', {'prefix': 'foo-'}, ('data',), check_arguments=flexmock(match_archives=None),
'1.2.3',
{'prefix': 'foo-'},
('data',),
check_arguments=flexmock(match_archives=None),
)
assert flags == ('--match-archives', 'sh:foo-*')
@ -170,7 +185,10 @@ def test_make_archive_filter_flags_with_archives_check_and_none_prefix_omits_mat
flexmock(module.flags).should_receive('make_match_archives_flags').and_return(())
flags = module.make_archive_filter_flags(
'1.2.3', {}, ('archives',), check_arguments=flexmock(match_archives=None),
'1.2.3',
{},
('archives',),
check_arguments=flexmock(match_archives=None),
)
assert flags == ()
@ -181,7 +199,10 @@ def test_make_archive_filter_flags_with_repository_check_and_prefix_omits_match_
flexmock(module.flags).should_receive('make_match_archives_flags').and_return(())
flags = module.make_archive_filter_flags(
'1.2.3', {'prefix': 'foo-'}, ('repository',), check_arguments=flexmock(match_archives=None),
'1.2.3',
{'prefix': 'foo-'},
('repository',),
check_arguments=flexmock(match_archives=None),
)
assert flags == ()

File diff suppressed because it is too large Load Diff