forked from borgmatic-collective/borgmatic
381 lines
13 KiB
Python
381 lines
13 KiB
Python
import copy
|
|
import logging
|
|
import os
|
|
|
|
import borgmatic.borg.extract
|
|
import borgmatic.borg.list
|
|
import borgmatic.borg.mount
|
|
import borgmatic.borg.rlist
|
|
import borgmatic.borg.state
|
|
import borgmatic.config.validate
|
|
import borgmatic.hooks.dispatch
|
|
import borgmatic.hooks.dump
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
UNSPECIFIED_HOOK = object()
|
|
|
|
|
|
def get_configured_data_source(
|
|
config,
|
|
archive_data_source_names,
|
|
hook_name,
|
|
data_source_name,
|
|
configuration_data_source_name=None,
|
|
):
|
|
'''
|
|
Find the first data source with the given hook name and data source name in the configuration
|
|
dict and the given archive data source names dict (from hook name to data source names contained
|
|
in a particular backup archive). If UNSPECIFIED_HOOK is given as the hook name, search all data
|
|
source hooks for the named data source. If a configuration data source name is given, use that
|
|
instead of the data source name to lookup the data source in the given hooks configuration.
|
|
|
|
Return the found data source as a tuple of (found hook name, data source configuration dict) or
|
|
(None, None) if not found.
|
|
'''
|
|
if not configuration_data_source_name:
|
|
configuration_data_source_name = data_source_name
|
|
|
|
if hook_name == UNSPECIFIED_HOOK:
|
|
hooks_to_search = {
|
|
hook_name: value
|
|
for (hook_name, value) in config.items()
|
|
if hook_name in borgmatic.hooks.dump.DATA_SOURCE_HOOK_NAMES
|
|
}
|
|
else:
|
|
try:
|
|
hooks_to_search = {hook_name: config[hook_name]}
|
|
except KeyError:
|
|
return (None, None)
|
|
|
|
return next(
|
|
(
|
|
(name, hook_data_source)
|
|
for (name, hook) in hooks_to_search.items()
|
|
for hook_data_source in hook
|
|
if hook_data_source['name'] == configuration_data_source_name
|
|
and data_source_name in archive_data_source_names.get(name, [])
|
|
),
|
|
(None, None),
|
|
)
|
|
|
|
|
|
def restore_single_data_source(
|
|
repository,
|
|
config,
|
|
local_borg_version,
|
|
global_arguments,
|
|
local_path,
|
|
remote_path,
|
|
archive_name,
|
|
hook_name,
|
|
data_source,
|
|
connection_params,
|
|
): # pragma: no cover
|
|
'''
|
|
Given (among other things) an archive name, a data source hook name, the hostname, port,
|
|
username/password as connection params, and a configured data source configuration dict, restore
|
|
that data source from the archive.
|
|
'''
|
|
logger.info(
|
|
f'{repository.get("label", repository["path"])}: Restoring data source {data_source["name"]}'
|
|
)
|
|
|
|
dump_pattern = borgmatic.hooks.dispatch.call_hooks(
|
|
'make_data_source_dump_pattern',
|
|
config,
|
|
repository['path'],
|
|
borgmatic.hooks.dump.DATA_SOURCE_HOOK_NAMES,
|
|
data_source['name'],
|
|
)[hook_name]
|
|
|
|
# Kick off a single data source extract to stdout.
|
|
extract_process = borgmatic.borg.extract.extract_archive(
|
|
dry_run=global_arguments.dry_run,
|
|
repository=repository['path'],
|
|
archive=archive_name,
|
|
paths=borgmatic.hooks.dump.convert_glob_patterns_to_borg_patterns([dump_pattern]),
|
|
config=config,
|
|
local_borg_version=local_borg_version,
|
|
global_arguments=global_arguments,
|
|
local_path=local_path,
|
|
remote_path=remote_path,
|
|
destination_path='/',
|
|
# A directory format dump isn't a single file, and therefore can't extract
|
|
# to stdout. In this case, the extract_process return value is None.
|
|
extract_to_stdout=bool(data_source.get('format') != 'directory'),
|
|
)
|
|
|
|
# Run a single data source restore, consuming the extract stdout (if any).
|
|
borgmatic.hooks.dispatch.call_hooks(
|
|
function_name='restore_data_source_dump',
|
|
config=config,
|
|
log_prefix=repository['path'],
|
|
hook_names=[hook_name],
|
|
data_source=data_source,
|
|
dry_run=global_arguments.dry_run,
|
|
extract_process=extract_process,
|
|
connection_params=connection_params,
|
|
)
|
|
|
|
|
|
def collect_archive_data_source_names(
|
|
repository,
|
|
archive,
|
|
config,
|
|
local_borg_version,
|
|
global_arguments,
|
|
local_path,
|
|
remote_path,
|
|
):
|
|
'''
|
|
Given a local or remote repository path, a resolved archive name, a configuration dict, the
|
|
local Borg version, global_arguments an argparse.Namespace, and local and remote Borg paths,
|
|
query the archive for the names of data sources it contains as dumps and return them as a dict
|
|
from hook name to a sequence of data source names.
|
|
'''
|
|
borgmatic_source_directory = os.path.expanduser(
|
|
config.get(
|
|
'borgmatic_source_directory', borgmatic.borg.state.DEFAULT_BORGMATIC_SOURCE_DIRECTORY
|
|
)
|
|
).lstrip('/')
|
|
dump_paths = borgmatic.borg.list.capture_archive_listing(
|
|
repository,
|
|
archive,
|
|
config,
|
|
local_borg_version,
|
|
global_arguments,
|
|
list_paths=[
|
|
os.path.expanduser(
|
|
borgmatic.hooks.dump.make_data_source_dump_path(borgmatic_source_directory, pattern)
|
|
)
|
|
for pattern in ('*_databases/*/*',)
|
|
],
|
|
local_path=local_path,
|
|
remote_path=remote_path,
|
|
)
|
|
|
|
# Determine the data source names corresponding to the dumps found in the archive and
|
|
# add them to restore_names.
|
|
archive_data_source_names = {}
|
|
|
|
for dump_path in dump_paths:
|
|
try:
|
|
(hook_name, _, data_source_name) = dump_path.split(
|
|
borgmatic_source_directory + os.path.sep, 1
|
|
)[1].split(os.path.sep)[0:3]
|
|
except (ValueError, IndexError):
|
|
logger.warning(
|
|
f'{repository}: Ignoring invalid data source dump path "{dump_path}" in archive {archive}'
|
|
)
|
|
else:
|
|
if data_source_name not in archive_data_source_names.get(hook_name, []):
|
|
archive_data_source_names.setdefault(hook_name, []).extend([data_source_name])
|
|
|
|
return archive_data_source_names
|
|
|
|
|
|
def find_data_sources_to_restore(requested_data_source_names, archive_data_source_names):
|
|
'''
|
|
Given a sequence of requested data source names to restore and a dict of hook name to the names
|
|
of data sources found in an archive, return an expanded sequence of data source names to
|
|
restore, replacing "all" with actual data source names as appropriate.
|
|
|
|
Raise ValueError if any of the requested data source names cannot be found in the archive.
|
|
'''
|
|
# A map from data source hook name to the data source names to restore for that hook.
|
|
restore_names = (
|
|
{UNSPECIFIED_HOOK: requested_data_source_names}
|
|
if requested_data_source_names
|
|
else {UNSPECIFIED_HOOK: ['all']}
|
|
)
|
|
|
|
# If "all" is in restore_names, then replace it with the names of dumps found within the
|
|
# archive.
|
|
if 'all' in restore_names[UNSPECIFIED_HOOK]:
|
|
restore_names[UNSPECIFIED_HOOK].remove('all')
|
|
|
|
for hook_name, data_source_names in archive_data_source_names.items():
|
|
restore_names.setdefault(hook_name, []).extend(data_source_names)
|
|
|
|
# If a data source is to be restored as part of "all", then remove it from restore names
|
|
# so it doesn't get restored twice.
|
|
for data_source_name in data_source_names:
|
|
if data_source_name in restore_names[UNSPECIFIED_HOOK]:
|
|
restore_names[UNSPECIFIED_HOOK].remove(data_source_name)
|
|
|
|
if not restore_names[UNSPECIFIED_HOOK]:
|
|
restore_names.pop(UNSPECIFIED_HOOK)
|
|
|
|
combined_restore_names = set(
|
|
name for data_source_names in restore_names.values() for name in data_source_names
|
|
)
|
|
combined_archive_data_source_names = set(
|
|
name
|
|
for data_source_names in archive_data_source_names.values()
|
|
for name in data_source_names
|
|
)
|
|
|
|
missing_names = sorted(set(combined_restore_names) - combined_archive_data_source_names)
|
|
if missing_names:
|
|
joined_names = ', '.join(f'"{name}"' for name in missing_names)
|
|
raise ValueError(
|
|
f"Cannot restore data source{'s' if len(missing_names) > 1 else ''} {joined_names} missing from archive"
|
|
)
|
|
|
|
return restore_names
|
|
|
|
|
|
def ensure_data_sources_found(restore_names, remaining_restore_names, found_names):
|
|
'''
|
|
Given a dict from hook name to data source names to restore, a dict from hook name to remaining
|
|
data source names to restore, and a sequence of found (actually restored) data source names,
|
|
raise ValueError if requested data source to restore were missing from the archive and/or
|
|
configuration.
|
|
'''
|
|
combined_restore_names = set(
|
|
name
|
|
for data_source_names in tuple(restore_names.values())
|
|
+ tuple(remaining_restore_names.values())
|
|
for name in data_source_names
|
|
)
|
|
|
|
if not combined_restore_names and not found_names:
|
|
raise ValueError('No data sources were found to restore')
|
|
|
|
missing_names = sorted(set(combined_restore_names) - set(found_names))
|
|
if missing_names:
|
|
joined_names = ', '.join(f'"{name}"' for name in missing_names)
|
|
raise ValueError(
|
|
f"Cannot restore data source{'s' if len(missing_names) > 1 else ''} {joined_names} missing from borgmatic's configuration"
|
|
)
|
|
|
|
|
|
def run_restore(
|
|
repository,
|
|
config,
|
|
local_borg_version,
|
|
restore_arguments,
|
|
global_arguments,
|
|
local_path,
|
|
remote_path,
|
|
):
|
|
'''
|
|
Run the "restore" action for the given repository, but only if the repository matches the
|
|
requested repository in restore arguments.
|
|
|
|
Raise ValueError if a configured data source could not be found to restore.
|
|
'''
|
|
if restore_arguments.repository and not borgmatic.config.validate.repositories_match(
|
|
repository, restore_arguments.repository
|
|
):
|
|
return
|
|
|
|
logger.info(
|
|
f'{repository.get("label", repository["path"])}: Restoring data sources from archive {restore_arguments.archive}'
|
|
)
|
|
|
|
borgmatic.hooks.dispatch.call_hooks_even_if_unconfigured(
|
|
'remove_data_source_dumps',
|
|
config,
|
|
repository['path'],
|
|
borgmatic.hooks.dump.DATA_SOURCE_HOOK_NAMES,
|
|
global_arguments.dry_run,
|
|
)
|
|
|
|
archive_name = borgmatic.borg.rlist.resolve_archive_name(
|
|
repository['path'],
|
|
restore_arguments.archive,
|
|
config,
|
|
local_borg_version,
|
|
global_arguments,
|
|
local_path,
|
|
remote_path,
|
|
)
|
|
archive_data_source_names = collect_archive_data_source_names(
|
|
repository['path'],
|
|
archive_name,
|
|
config,
|
|
local_borg_version,
|
|
global_arguments,
|
|
local_path,
|
|
remote_path,
|
|
)
|
|
restore_names = find_data_sources_to_restore(
|
|
restore_arguments.data_sources, archive_data_source_names
|
|
)
|
|
found_names = set()
|
|
remaining_restore_names = {}
|
|
connection_params = {
|
|
'hostname': restore_arguments.hostname,
|
|
'port': restore_arguments.port,
|
|
'username': restore_arguments.username,
|
|
'password': restore_arguments.password,
|
|
'restore_path': restore_arguments.restore_path,
|
|
}
|
|
|
|
for hook_name, data_source_names in restore_names.items():
|
|
for data_source_name in data_source_names:
|
|
found_hook_name, found_data_source = get_configured_data_source(
|
|
config, archive_data_source_names, hook_name, data_source_name
|
|
)
|
|
|
|
if not found_data_source:
|
|
remaining_restore_names.setdefault(found_hook_name or hook_name, []).append(
|
|
data_source_name
|
|
)
|
|
continue
|
|
|
|
found_names.add(data_source_name)
|
|
restore_single_data_source(
|
|
repository,
|
|
config,
|
|
local_borg_version,
|
|
global_arguments,
|
|
local_path,
|
|
remote_path,
|
|
archive_name,
|
|
found_hook_name or hook_name,
|
|
dict(found_data_source, **{'schemas': restore_arguments.schemas}),
|
|
connection_params,
|
|
)
|
|
|
|
# For any data sources that weren't found via exact matches in the configuration, try to
|
|
# fallback to "all" entries.
|
|
for hook_name, data_source_names in remaining_restore_names.items():
|
|
for data_source_name in data_source_names:
|
|
found_hook_name, found_data_source = get_configured_data_source(
|
|
config, archive_data_source_names, hook_name, data_source_name, 'all'
|
|
)
|
|
|
|
if not found_data_source:
|
|
continue
|
|
|
|
found_names.add(data_source_name)
|
|
data_source = copy.copy(found_data_source)
|
|
data_source['name'] = data_source_name
|
|
|
|
restore_single_data_source(
|
|
repository,
|
|
config,
|
|
local_borg_version,
|
|
global_arguments,
|
|
local_path,
|
|
remote_path,
|
|
archive_name,
|
|
found_hook_name or hook_name,
|
|
dict(data_source, **{'schemas': restore_arguments.schemas}),
|
|
connection_params,
|
|
)
|
|
|
|
borgmatic.hooks.dispatch.call_hooks_even_if_unconfigured(
|
|
'remove_data_source_dumps',
|
|
config,
|
|
repository['path'],
|
|
borgmatic.hooks.dump.DATA_SOURCE_HOOK_NAMES,
|
|
global_arguments.dry_run,
|
|
)
|
|
|
|
ensure_data_sources_found(restore_names, remaining_restore_names, found_names)
|