ZFS snapshots (#261).
All checks were successful
build / test (push) Successful in 6m1s
build / docs (push) Successful in 1m40s

Reviewed-on: #944
This commit is contained in:
Dan Helfman 2024-11-24 04:42:19 +00:00
commit 8de7094691
39 changed files with 1737 additions and 787 deletions

2
NEWS
View File

@ -1,4 +1,6 @@
1.9.3.dev0
* #261 (beta): Add a ZFS hook for snapshotting and backing up ZFS datasets. See the documentation
for more information: https://torsion.org/borgmatic/docs/how-to/snapshot-your-filesystems/
* Add a "--deleted" flag to the "repo-list" action for listing deleted archives that haven't
yet been compacted (Borg 2 only).

View File

@ -61,6 +61,7 @@ borgmatic is powered by [Borg Backup](https://www.borgbackup.org/).
<a href="https://mariadb.com/"><img src="docs/static/mariadb.png" alt="MariaDB" height="60px" style="margin-bottom:20px; margin-right:20px;"></a>
<a href="https://www.mongodb.com/"><img src="docs/static/mongodb.png" alt="MongoDB" height="60px" style="margin-bottom:20px; margin-right:20px;"></a>
<a href="https://sqlite.org/"><img src="docs/static/sqlite.png" alt="SQLite" height="60px" style="margin-bottom:20px; margin-right:20px;"></a>
<a href="https://openzfs.org/"><img src="docs/static/openzfs.png" alt="OpenZFS" height="60px" style="margin-bottom:20px; margin-right:20px;"></a>
<a href="https://healthchecks.io/"><img src="docs/static/healthchecks.png" alt="Healthchecks" height="60px" style="margin-bottom:20px; margin-right:20px;"></a>
<a href="https://uptime.kuma.pet/"><img src="docs/static/uptimekuma.png" alt="Uptime Kuma" height="60px" style="margin-bottom:20px; margin-right:20px;"></a>
<a href="https://cronitor.io/"><img src="docs/static/cronitor.png" alt="Cronitor" height="60px" style="margin-bottom:20px; margin-right:20px;"></a>

View File

@ -8,6 +8,7 @@ import pathlib
import random
import shutil
import borgmatic.actions.create
import borgmatic.borg.check
import borgmatic.borg.create
import borgmatic.borg.environment
@ -345,7 +346,13 @@ def upgrade_check_times(config, borg_repository_id):
def collect_spot_check_source_paths(
repository, config, local_borg_version, global_arguments, local_path, remote_path
repository,
config,
local_borg_version,
global_arguments,
local_path,
remote_path,
borgmatic_runtime_directory,
):
'''
Given a repository configuration dict, a configuration dict, the local Borg version, global
@ -366,10 +373,12 @@ def collect_spot_check_source_paths(
dry_run=True,
repository_path=repository['path'],
config=config,
config_paths=(),
source_directories=borgmatic.actions.create.process_source_directories(
config, config_paths=()
),
local_borg_version=local_borg_version,
global_arguments=global_arguments,
borgmatic_runtime_directories=(),
borgmatic_runtime_directory=borgmatic_runtime_directory,
local_path=local_path,
remote_path=remote_path,
list_files=True,
@ -585,6 +594,7 @@ def spot_check(
global_arguments,
local_path,
remote_path,
borgmatic_runtime_directory,
)
logger.debug(f'{log_prefix}: {len(source_paths)} total source paths for spot check')

View File

@ -1,7 +1,10 @@
import glob
import importlib.metadata
import itertools
import json
import logging
import os
import pathlib
import borgmatic.actions.json
import borgmatic.borg.create
@ -40,6 +43,138 @@ def create_borgmatic_manifest(config, config_paths, borgmatic_runtime_directory,
)
def expand_directory(directory, working_directory):
'''
Given a directory path, expand any tilde (representing a user's home directory) and any globs
therein. Return a list of one or more resulting paths.
'''
expanded_directory = os.path.join(working_directory or '', os.path.expanduser(directory))
return glob.glob(expanded_directory) or [expanded_directory]
def expand_directories(directories, working_directory=None):
'''
Given a sequence of directory paths and an optional working directory, expand tildes and globs
in each one. Return all the resulting directories as a single flattened tuple.
'''
if directories is None:
return ()
return tuple(
itertools.chain.from_iterable(
expand_directory(directory, working_directory) for directory in directories
)
)
def map_directories_to_devices(directories, working_directory=None):
'''
Given a sequence of directories and an optional working directory, return a map from directory
to an identifier for the device on which that directory resides or None if the path doesn't
exist.
This is handy for determining whether two different directories are on the same filesystem (have
the same device identifier).
'''
return {
directory: os.stat(full_directory).st_dev if os.path.exists(full_directory) else None
for directory in directories
for full_directory in (os.path.join(working_directory or '', directory),)
}
def deduplicate_directories(directory_devices, additional_directory_devices):
'''
Given a map from directory to the identifier for the device on which that directory resides,
return the directories as a sorted sequence with all duplicate child directories removed. For
instance, if paths is ['/foo', '/foo/bar'], return just: ['/foo']
The one exception to this rule is if two paths are on different filesystems (devices). In that
case, they won't get de-duplicated in case they both need to be passed to Borg (e.g. the
location.one_file_system option is true).
The idea is that if Borg is given a parent directory, then it doesn't also need to be given
child directories, because it will naturally spider the contents of the parent directory. And
there are cases where Borg coming across the same file twice will result in duplicate reads and
even hangs, e.g. when a database hook is using a named pipe for streaming database dumps to
Borg.
If any additional directory devices are given, also deduplicate against them, but don't include
them in the returned directories.
'''
deduplicated = set()
directories = sorted(directory_devices.keys())
additional_directories = sorted(additional_directory_devices.keys())
all_devices = {**directory_devices, **additional_directory_devices}
for directory in directories:
deduplicated.add(directory)
parents = pathlib.PurePath(directory).parents
# If another directory in the given list (or the additional list) is a parent of current
# directory (even n levels up) and both are on the same filesystem, then the current
# directory is a duplicate.
for other_directory in directories + additional_directories:
for parent in parents:
if (
pathlib.PurePath(other_directory) == parent
and all_devices[directory] is not None
and all_devices[other_directory] == all_devices[directory]
):
if directory in deduplicated:
deduplicated.remove(directory)
break
return sorted(deduplicated)
ROOT_PATTERN_PREFIX = 'R '
def pattern_root_directories(patterns=None):
'''
Given a sequence of patterns, parse out and return just the root directories.
'''
if not patterns:
return []
return [
pattern.split(ROOT_PATTERN_PREFIX, maxsplit=1)[1]
for pattern in patterns
if pattern.startswith(ROOT_PATTERN_PREFIX)
]
def process_source_directories(config, config_paths, source_directories=None):
'''
Given a sequence of source directories (either in the source_directories argument or, lacking
that, from config) and a sequence of config paths to append, expand and deduplicate the source
directories, returning the result.
'''
working_directory = borgmatic.config.paths.get_working_directory(config)
if source_directories is None:
source_directories = tuple(config.get('source_directories', ())) + (
tuple(config_paths) if config.get('store_config_files', True) else ()
)
return deduplicate_directories(
map_directories_to_devices(
expand_directories(
tuple(source_directories),
working_directory=working_directory,
)
),
additional_directory_devices=map_directories_to_devices(
expand_directories(
pattern_root_directories(config.get('patterns')),
working_directory=working_directory,
)
),
)
def run_create(
config_filename,
repository,
@ -86,14 +221,21 @@ def run_create(
borgmatic_runtime_directory,
global_arguments.dry_run,
)
source_directories = process_source_directories(config, config_paths)
active_dumps = borgmatic.hooks.dispatch.call_hooks(
'dump_data_sources',
config,
repository['path'],
borgmatic.hooks.dump.DATA_SOURCE_HOOK_NAMES,
borgmatic_runtime_directory,
source_directories,
global_arguments.dry_run,
)
# Process source directories again in case any data source hooks updated them. Without this
# step, we could end up with duplicate paths that cause Borg to hang when it tries to read
# from the same named pipe twice.
source_directories = process_source_directories(config, config_paths, source_directories)
stream_processes = [process for processes in active_dumps.values() for process in processes]
if config.get('store_config_files', True):
@ -103,12 +245,14 @@ def run_create(
borgmatic_runtime_directory,
global_arguments.dry_run,
)
if not global_arguments.dry_run:
source_directories.append(os.path.join(borgmatic_runtime_directory, 'bootstrap'))
json_output = borgmatic.borg.create.create_archive(
global_arguments.dry_run,
repository['path'],
config,
config_paths,
source_directories,
local_borg_version,
global_arguments,
borgmatic_runtime_directory,

View File

@ -1,4 +1,3 @@
import glob
import itertools
import logging
import os
@ -20,31 +19,6 @@ from borgmatic.execute import (
logger = logging.getLogger(__name__)
def expand_directory(directory, working_directory):
'''
Given a directory path, expand any tilde (representing a user's home directory) and any globs
therein. Return a list of one or more resulting paths.
'''
expanded_directory = os.path.join(working_directory or '', os.path.expanduser(directory))
return glob.glob(expanded_directory) or [expanded_directory]
def expand_directories(directories, working_directory=None):
'''
Given a sequence of directory paths and an optional working directory, expand tildes and globs
in each one. Return all the resulting directories as a single flattened tuple.
'''
if directories is None:
return ()
return tuple(
itertools.chain.from_iterable(
expand_directory(directory, working_directory) for directory in directories
)
)
def expand_home_directories(directories):
'''
Given a sequence of directory paths, expand tildes in each one. Do not perform any globbing.
@ -56,67 +30,6 @@ def expand_home_directories(directories):
return tuple(os.path.expanduser(directory) for directory in directories)
def map_directories_to_devices(directories, working_directory=None):
'''
Given a sequence of directories and an optional working directory, return a map from directory
to an identifier for the device on which that directory resides or None if the path doesn't
exist.
This is handy for determining whether two different directories are on the same filesystem (have
the same device identifier).
'''
return {
directory: os.stat(full_directory).st_dev if os.path.exists(full_directory) else None
for directory in directories
for full_directory in (os.path.join(working_directory or '', directory),)
}
def deduplicate_directories(directory_devices, additional_directory_devices):
'''
Given a map from directory to the identifier for the device on which that directory resides,
return the directories as a sorted tuple with all duplicate child directories removed. For
instance, if paths is ('/foo', '/foo/bar'), return just: ('/foo',)
The one exception to this rule is if two paths are on different filesystems (devices). In that
case, they won't get de-duplicated in case they both need to be passed to Borg (e.g. the
location.one_file_system option is true).
The idea is that if Borg is given a parent directory, then it doesn't also need to be given
child directories, because it will naturally spider the contents of the parent directory. And
there are cases where Borg coming across the same file twice will result in duplicate reads and
even hangs, e.g. when a database hook is using a named pipe for streaming database dumps to
Borg.
If any additional directory devices are given, also deduplicate against them, but don't include
them in the returned directories.
'''
deduplicated = set()
directories = sorted(directory_devices.keys())
additional_directories = sorted(additional_directory_devices.keys())
all_devices = {**directory_devices, **additional_directory_devices}
for directory in directories:
deduplicated.add(directory)
parents = pathlib.PurePath(directory).parents
# If another directory in the given list (or the additional list) is a parent of current
# directory (even n levels up) and both are on the same filesystem, then the current
# directory is a duplicate.
for other_directory in directories + additional_directories:
for parent in parents:
if (
pathlib.PurePath(other_directory) == parent
and all_devices[directory] is not None
and all_devices[other_directory] == all_devices[directory]
):
if directory in deduplicated:
deduplicated.remove(directory)
break
return tuple(sorted(deduplicated))
def write_pattern_file(patterns=None, sources=None, pattern_file=None):
'''
Given a sequence of patterns and an optional sequence of source directories, write them to a
@ -221,32 +134,6 @@ def make_list_filter_flags(local_borg_version, dry_run):
return f'{base_flags}-'
def collect_borgmatic_runtime_directories(borgmatic_runtime_directory):
'''
Return a list of borgmatic-specific runtime directories used for temporary runtime data like
streaming database dumps and bootstrap metadata. If no such directories exist, return an empty
list.
'''
return [borgmatic_runtime_directory] if os.path.exists(borgmatic_runtime_directory) else []
ROOT_PATTERN_PREFIX = 'R '
def pattern_root_directories(patterns=None):
'''
Given a sequence of patterns, parse out and return just the root directories.
'''
if not patterns:
return []
return [
pattern.split(ROOT_PATTERN_PREFIX, maxsplit=1)[1]
for pattern in patterns
if pattern.startswith(ROOT_PATTERN_PREFIX)
]
def special_file(path):
'''
Return whether the given path is a special file (character device, block device, or named pipe
@ -307,21 +194,15 @@ def collect_special_file_paths(
)
def check_all_source_directories_exist(source_directories, working_directory=None):
def check_all_source_directories_exist(source_directories):
'''
Given a sequence of source directories and an optional working directory to serve as a prefix
for each (if it's a relative directory), check that the source directories all exist. If any do
Given a sequence of source directories, check that the source directories all exist. If any do
not, raise an exception.
'''
missing_directories = [
source_directory
for source_directory in source_directories
if not all(
[
os.path.exists(os.path.join(working_directory or '', directory))
for directory in expand_directory(source_directory, working_directory)
]
)
if not os.path.exists(source_directory)
]
if missing_directories:
raise ValueError(f"Source directories do not exist: {', '.join(missing_directories)}")
@ -334,10 +215,10 @@ def make_base_create_command(
dry_run,
repository_path,
config,
config_paths,
source_directories,
local_borg_version,
global_arguments,
borgmatic_runtime_directories,
borgmatic_runtime_directory,
local_path='borg',
remote_path=None,
progress=False,
@ -352,34 +233,13 @@ def make_base_create_command(
(base Borg create command flags, Borg create command positional arguments, open pattern file
handle, open exclude file handle).
'''
working_directory = borgmatic.config.paths.get_working_directory(config)
if config.get('source_directories_must_exist', False):
check_all_source_directories_exist(
config.get('source_directories'), working_directory=working_directory
)
sources = deduplicate_directories(
map_directories_to_devices(
expand_directories(
tuple(config.get('source_directories', ()))
+ borgmatic_runtime_directories
+ tuple(config_paths if config.get('store_config_files', True) else ()),
working_directory=working_directory,
)
),
additional_directory_devices=map_directories_to_devices(
expand_directories(
pattern_root_directories(config.get('patterns')),
working_directory=working_directory,
)
),
)
check_all_source_directories_exist(source_directories)
ensure_files_readable(config.get('patterns_from'), config.get('exclude_from'))
pattern_file = (
write_pattern_file(config.get('patterns'), sources)
write_pattern_file(config.get('patterns'), source_directories)
if config.get('patterns') or config.get('patterns_from')
else None
)
@ -457,7 +317,7 @@ def make_base_create_command(
create_positional_arguments = flags.make_repository_archive_flags(
repository_path, archive_name_format, local_borg_version
) + (sources if not pattern_file else ())
) + (tuple(source_directories) if not pattern_file else ())
# If database hooks are enabled (as indicated by streaming processes), exclude files that might
# cause Borg to hang. But skip this if the user has explicitly set the "read_special" to True.
@ -466,6 +326,7 @@ def make_base_create_command(
f'{repository_path}: Ignoring configured "read_special" value of false, as true is needed for database hooks.'
)
borg_environment = environment.make_environment(config)
working_directory = borgmatic.config.paths.get_working_directory(config)
logger.debug(f'{repository_path}: Collecting special file paths')
special_file_paths = collect_special_file_paths(
@ -474,7 +335,9 @@ def make_base_create_command(
local_path,
working_directory,
borg_environment,
skip_directories=borgmatic_runtime_directories,
skip_directories=(
[borgmatic_runtime_directory] if os.path.exists(borgmatic_runtime_directory) else []
),
)
if special_file_paths:
@ -501,7 +364,7 @@ def create_archive(
dry_run,
repository_path,
config,
config_paths,
source_directories,
local_borg_version,
global_arguments,
borgmatic_runtime_directory,
@ -524,20 +387,16 @@ def create_archive(
borgmatic.logger.add_custom_log_levels()
working_directory = borgmatic.config.paths.get_working_directory(config)
borgmatic_runtime_directories = expand_directories(
collect_borgmatic_runtime_directories(borgmatic_runtime_directory),
working_directory=working_directory,
)
(create_flags, create_positional_arguments, pattern_file, exclude_file) = (
make_base_create_command(
dry_run,
repository_path,
config,
config_paths,
source_directories,
local_borg_version,
global_arguments,
borgmatic_runtime_directories,
borgmatic_runtime_directory,
local_path,
remote_path,
progress,

View File

@ -44,12 +44,12 @@ def schema_to_sample_configuration(schema, level=0, parent_is_sequence=False):
if example is not None:
return example
if schema_type == 'array':
if schema_type == 'array' or (isinstance(schema_type, list) and 'array' in schema_type):
config = ruamel.yaml.comments.CommentedSeq(
[schema_to_sample_configuration(schema['items'], level, parent_is_sequence=True)]
)
add_comments_to_configuration_sequence(config, schema, indent=(level * INDENT))
elif schema_type == 'object':
elif schema_type == 'object' or (isinstance(schema_type, list) and 'object' in schema_type):
config = ruamel.yaml.comments.CommentedMap(
[
(field_name, schema_to_sample_configuration(sub_schema, level + 1))

View File

@ -33,6 +33,32 @@ def get_borgmatic_source_directory(config):
TEMPORARY_DIRECTORY_PREFIX = 'borgmatic-'
def replace_temporary_subdirectory_with_glob(path):
'''
Given an absolute temporary directory path, look for a subdirectory within it starting with the
temporary directory prefix and replace it with an appropriate glob. For instance, given:
/tmp/borgmatic-aet8kn93/borgmatic
... replace it with:
/tmp/borgmatic-*/borgmatic
This is useful for finding previous temporary directories from prior borgmatic runs.
'''
return os.path.join(
'/',
*(
(
f'{TEMPORARY_DIRECTORY_PREFIX}*'
if subdirectory.startswith(TEMPORARY_DIRECTORY_PREFIX)
else subdirectory
)
for subdirectory in path.split(os.path.sep)
),
)
class Runtime_directory:
'''
A Python context manager for creating and cleaning up the borgmatic runtime directory used for
@ -84,7 +110,9 @@ class Runtime_directory:
self.runtime_path = expand_user_in_path(
os.path.join(
base_path if final_directory == 'borgmatic' else runtime_directory, '.', 'borgmatic'
base_path if final_directory == 'borgmatic' else runtime_directory,
'.', # Borg 1.4+ "slashdot" hack.
'borgmatic',
)
)
os.makedirs(self.runtime_path, mode=0o700, exist_ok=True)
@ -102,7 +130,13 @@ class Runtime_directory:
Delete any temporary directory that was created as part of initialization.
'''
if self.temporary_directory:
self.temporary_directory.cleanup()
try:
self.temporary_directory.cleanup()
# The cleanup() call errors if, for instance, there's still a
# mounted filesystem within the temporary directory. There's
# nothing we can do about that here, so swallow the error.
except OSError:
pass
def make_runtime_directory_glob(borgmatic_runtime_directory):

View File

@ -2255,7 +2255,29 @@ properties:
config: "__config"
hostname: "__hostname"
description: |
Configuration for a monitoring integration with Grafana loki. You
Configuration for a monitoring integration with Grafana Loki. You
can send the logs to a self-hosted instance or create an account at
https://grafana.com/auth/sign-up/create-user. See borgmatic
monitoring documentation for details.
zfs:
type: ["object", "null"]
additionalProperties: false
properties:
zfs_command:
type: string
description: |
Command to use instead of "zfs".
example: /usr/local/bin/zfs
mount_command:
type: string
description: |
Command to use instead of "mount".
example: /usr/local/bin/mount
umount_command:
type: string
description: |
Command to use instead of "umount".
example: /usr/local/bin/umount
description: |
Configuration for integration with the ZFS filesystem.

View File

@ -16,6 +16,7 @@ from borgmatic.hooks import (
sqlite,
uptimekuma,
zabbix,
zfs,
)
logger = logging.getLogger(__name__)
@ -36,6 +37,7 @@ HOOK_NAME_TO_MODULE = {
'sqlite_databases': sqlite,
'uptime_kuma': uptimekuma,
'zabbix': zabbix,
'zfs': zfs,
}
@ -49,7 +51,7 @@ def call_hook(function_name, config, log_prefix, hook_name, *args, **kwargs):
Raise AttributeError if the function name is not found in the module.
Raise anything else that the called function raises.
'''
hook_config = config.get(hook_name, {})
hook_config = config.get(hook_name) or {}
try:
module = HOOK_NAME_TO_MODULE[hook_name]
@ -77,7 +79,7 @@ def call_hooks(function_name, config, log_prefix, hook_names, *args, **kwargs):
return {
hook_name: call_hook(function_name, config, log_prefix, hook_name, *args, **kwargs)
for hook_name in hook_names
if config.get(hook_name)
if hook_name in config
}

View File

@ -11,6 +11,7 @@ DATA_SOURCE_HOOK_NAMES = (
'mongodb_databases',
'postgresql_databases',
'sqlite_databases',
'zfs',
)

View File

@ -122,7 +122,14 @@ def use_streaming(databases, config, log_prefix):
return any(databases)
def dump_data_sources(databases, config, log_prefix, borgmatic_runtime_directory, dry_run):
def dump_data_sources(
databases,
config,
log_prefix,
borgmatic_runtime_directory,
source_directories,
dry_run,
):
'''
Dump the given MariaDB databases to a named pipe. The databases are supplied as a sequence of
dicts, one dict describing each database as per the configuration schema. Use the given
@ -131,6 +138,7 @@ def dump_data_sources(databases, config, log_prefix, borgmatic_runtime_directory
Return a sequence of subprocess.Popen instances for the dump processes ready to spew to a named
pipe. But if this is a dry run, then don't actually dump anything and return an empty sequence.
Also append the given source directories with the parent directory of the database dumps.
'''
dry_run_label = ' (dry run; not actually dumping anything)' if dry_run else ''
processes = []
@ -178,6 +186,9 @@ def dump_data_sources(databases, config, log_prefix, borgmatic_runtime_directory
)
)
if not dry_run:
source_directories.append(os.path.join(borgmatic_runtime_directory, 'mariadb_databases'))
return [process for process in processes if process]

View File

@ -1,4 +1,5 @@
import logging
import os
import shlex
import borgmatic.config.paths
@ -23,7 +24,14 @@ def use_streaming(databases, config, log_prefix):
return any(database.get('format') != 'directory' for database in databases)
def dump_data_sources(databases, config, log_prefix, borgmatic_runtime_directory, dry_run):
def dump_data_sources(
databases,
config,
log_prefix,
borgmatic_runtime_directory,
source_directories,
dry_run,
):
'''
Dump the given MongoDB databases to a named pipe. The databases are supplied as a sequence of
dicts, one dict describing each database as per the configuration schema. Use the borgmatic
@ -32,6 +40,7 @@ def dump_data_sources(databases, config, log_prefix, borgmatic_runtime_directory
Return a sequence of subprocess.Popen instances for the dump processes ready to spew to a named
pipe. But if this is a dry run, then don't actually dump anything and return an empty sequence.
Also append the given source directories with the parent directory of the database dumps.
'''
dry_run_label = ' (dry run; not actually dumping anything)' if dry_run else ''
@ -60,6 +69,9 @@ def dump_data_sources(databases, config, log_prefix, borgmatic_runtime_directory
dump.create_named_pipe_for_dump(dump_filename)
processes.append(execute_command(command, shell=True, run_to_completion=False))
if not dry_run:
source_directories.append(os.path.join(borgmatic_runtime_directory, 'mongodb_databases'))
return processes

View File

@ -121,7 +121,14 @@ def use_streaming(databases, config, log_prefix):
return any(databases)
def dump_data_sources(databases, config, log_prefix, borgmatic_runtime_directory, dry_run):
def dump_data_sources(
databases,
config,
log_prefix,
borgmatic_runtime_directory,
source_directories,
dry_run,
):
'''
Dump the given MySQL/MariaDB databases to a named pipe. The databases are supplied as a sequence
of dicts, one dict describing each database as per the configuration schema. Use the given
@ -130,6 +137,7 @@ def dump_data_sources(databases, config, log_prefix, borgmatic_runtime_directory
Return a sequence of subprocess.Popen instances for the dump processes ready to spew to a named
pipe. But if this is a dry run, then don't actually dump anything and return an empty sequence.
Also append the given source directories with the parent directory of the database dumps.
'''
dry_run_label = ' (dry run; not actually dumping anything)' if dry_run else ''
processes = []
@ -177,6 +185,9 @@ def dump_data_sources(databases, config, log_prefix, borgmatic_runtime_directory
)
)
if not dry_run:
source_directories.append(os.path.join(borgmatic_runtime_directory, 'mysql_databases'))
return [process for process in processes if process]

View File

@ -104,7 +104,14 @@ def use_streaming(databases, config, log_prefix):
return any(database.get('format') != 'directory' for database in databases)
def dump_data_sources(databases, config, log_prefix, borgmatic_runtime_directory, dry_run):
def dump_data_sources(
databases,
config,
log_prefix,
borgmatic_runtime_directory,
source_directories,
dry_run,
):
'''
Dump the given PostgreSQL databases to a named pipe. The databases are supplied as a sequence of
dicts, one dict describing each database as per the configuration schema. Use the given
@ -113,6 +120,7 @@ def dump_data_sources(databases, config, log_prefix, borgmatic_runtime_directory
Return a sequence of subprocess.Popen instances for the dump processes ready to spew to a named
pipe. But if this is a dry run, then don't actually dump anything and return an empty sequence.
Also append the given source directories with the parent directory of the database dumps.
Raise ValueError if the databases to dump cannot be determined.
'''
@ -203,6 +211,9 @@ def dump_data_sources(databases, config, log_prefix, borgmatic_runtime_directory
)
)
if not dry_run:
source_directories.append(os.path.join(borgmatic_runtime_directory, 'postgresql_databases'))
return processes

View File

@ -24,7 +24,14 @@ def use_streaming(databases, config, log_prefix):
return any(databases)
def dump_data_sources(databases, config, log_prefix, borgmatic_runtime_directory, dry_run):
def dump_data_sources(
databases,
config,
log_prefix,
borgmatic_runtime_directory,
source_directories,
dry_run,
):
'''
Dump the given SQLite databases to a named pipe. The databases are supplied as a sequence of
configuration dicts, as per the configuration schema. Use the given borgmatic runtime directory
@ -32,6 +39,7 @@ def dump_data_sources(databases, config, log_prefix, borgmatic_runtime_directory
Return a sequence of subprocess.Popen instances for the dump processes ready to spew to a named
pipe. But if this is a dry run, then don't actually dump anything and return an empty sequence.
Also append the given source directories with the parent directory of the database dumps.
'''
dry_run_label = ' (dry run; not actually dumping anything)' if dry_run else ''
processes = []
@ -73,6 +81,9 @@ def dump_data_sources(databases, config, log_prefix, borgmatic_runtime_directory
dump.create_named_pipe_for_dump(dump_filename)
processes.append(execute_command(command, shell=True, run_to_completion=False))
if not dry_run:
source_directories.append(os.path.join(borgmatic_runtime_directory, 'sqlite_databases'))
return processes

324
borgmatic/hooks/zfs.py Normal file
View File

@ -0,0 +1,324 @@
import glob
import logging
import os
import shutil
import subprocess
import borgmatic.config.paths
import borgmatic.execute
logger = logging.getLogger(__name__)
def use_streaming(hook_config, config, log_prefix): # pragma: no cover
'''
Return whether dump streaming is used for this hook. (Spoiler: It isn't.)
'''
return False
BORGMATIC_SNAPSHOT_PREFIX = 'borgmatic-'
BORGMATIC_USER_PROPERTY = 'org.torsion.borgmatic:backup'
def get_datasets_to_backup(zfs_command, source_directories):
'''
Given a ZFS command to run and a sequence of configured source directories, find the
intersection between the current ZFS dataset mount points and the configured borgmatic source
directories. The idea is that these are the requested datasets to snapshot. But also include any
datasets tagged with a borgmatic-specific user property, whether or not they appear in source
directories.
Return the result as a sequence of (dataset name, mount point) pairs.
'''
list_output = borgmatic.execute.execute_command_and_capture_output(
(
zfs_command,
'list',
'-H',
'-t',
'filesystem',
'-o',
f'name,mountpoint,{BORGMATIC_USER_PROPERTY}',
)
)
source_directories_set = set(source_directories)
try:
return tuple(
(dataset_name, mount_point)
for line in list_output.splitlines()
for (dataset_name, mount_point, user_property_value) in (line.rstrip().split('\t'),)
if mount_point in source_directories_set or user_property_value == 'auto'
)
except ValueError:
raise ValueError('Invalid {zfs_command} list output')
def get_all_datasets(zfs_command):
'''
Given a ZFS command to run, return all ZFS datasets as a sequence of (dataset name, mount point)
pairs.
'''
list_output = borgmatic.execute.execute_command_and_capture_output(
(
zfs_command,
'list',
'-H',
'-t',
'filesystem',
'-o',
'name,mountpoint',
)
)
try:
return tuple(
(dataset_name, mount_point)
for line in list_output.splitlines()
for (dataset_name, mount_point) in (line.rstrip().split('\t'),)
)
except ValueError:
raise ValueError('Invalid {zfs_command} list output')
def snapshot_dataset(zfs_command, full_snapshot_name): # pragma: no cover
'''
Given a ZFS command to run and a snapshot name of the form "dataset@snapshot", create a new ZFS
snapshot.
'''
borgmatic.execute.execute_command(
(
zfs_command,
'snapshot',
'-r',
full_snapshot_name,
),
output_log_level=logging.DEBUG,
)
def mount_snapshot(mount_command, full_snapshot_name, snapshot_mount_path): # pragma: no cover
'''
Given a mount command to run, an existing snapshot name of the form "dataset@snapshot", and the
path where the snapshot should be mounted, mount the snapshot (making any necessary directories
first).
'''
os.makedirs(snapshot_mount_path, mode=0o700, exist_ok=True)
borgmatic.execute.execute_command(
(
mount_command,
'-t',
'zfs',
full_snapshot_name,
snapshot_mount_path,
),
output_log_level=logging.DEBUG,
)
def dump_data_sources(
hook_config,
config,
log_prefix,
borgmatic_runtime_directory,
source_directories,
dry_run,
):
'''
Given a ZFS configuration dict, a configuration dict, a log prefix, the borgmatic runtime
directory, the configured source directories, and whether this is a dry run, auto-detect and
snapshot any ZFS dataset mount points listed in the given source directories and any dataset
with a borgmatic-specific user property. Also update those source directories, replacing dataset
mount points with corresponding snapshot directories so they get stored in the Borg archive
instead of the dataset mount points. Use the log prefix in any log entries.
Return an empty sequence, since there are no ongoing dump processes from this hook.
If this is a dry run, then don't actually snapshot anything.
'''
dry_run_label = ' (dry run; not actually snapshotting anything)' if dry_run else ''
logger.info(f'{log_prefix}: Snapshotting ZFS datasets{dry_run_label}')
# List ZFS datasets to get their mount points.
zfs_command = hook_config.get('zfs_command', 'zfs')
requested_datasets = get_datasets_to_backup(zfs_command, source_directories)
# Snapshot each dataset, rewriting source directories to use the snapshot paths.
snapshot_name = f'{BORGMATIC_SNAPSHOT_PREFIX}{os.getpid()}'
for dataset_name, mount_point in requested_datasets:
full_snapshot_name = f'{dataset_name}@{snapshot_name}'
logger.debug(f'{log_prefix}: Creating ZFS snapshot {full_snapshot_name}{dry_run_label}')
if not dry_run:
snapshot_dataset(zfs_command, full_snapshot_name)
# Mount the snapshot into a particular named temporary directory so that the snapshot ends
# up in the Borg archive at the "original" dataset mount point path.
snapshot_mount_path_for_borg = os.path.join(
os.path.normpath(borgmatic_runtime_directory),
'zfs_snapshots',
'.', # Borg 1.4+ "slashdot" hack.
mount_point.lstrip(os.path.sep),
)
snapshot_mount_path = os.path.normpath(snapshot_mount_path_for_borg)
logger.debug(
f'{log_prefix}: Mounting ZFS snapshot {full_snapshot_name} at {snapshot_mount_path}{dry_run_label}'
)
if not dry_run:
mount_snapshot(
hook_config.get('mount_command', 'mount'), full_snapshot_name, snapshot_mount_path
)
if mount_point in source_directories:
source_directories.remove(mount_point)
source_directories.append(snapshot_mount_path_for_borg)
return []
def unmount_snapshot(umount_command, snapshot_mount_path): # pragma: no cover
'''
Given a umount command to run and the mount path of a snapshot, unmount it.
'''
borgmatic.execute.execute_command(
(
umount_command,
snapshot_mount_path,
),
output_log_level=logging.DEBUG,
)
def destroy_snapshot(zfs_command, full_snapshot_name): # pragma: no cover
'''
Given a ZFS command to run and the name of a snapshot in the form "dataset@snapshot", destroy
it.
'''
borgmatic.execute.execute_command(
(
zfs_command,
'destroy',
'-r',
full_snapshot_name,
),
output_log_level=logging.DEBUG,
)
def get_all_snapshots(zfs_command):
'''
Given a ZFS command to run, return all ZFS snapshots as a sequence of full snapshot names of the
form "dataset@snapshot".
'''
list_output = borgmatic.execute.execute_command_and_capture_output(
(
zfs_command,
'list',
'-H',
'-t',
'snapshot',
'-o',
'name',
)
)
return tuple(line.rstrip() for line in list_output.splitlines())
def remove_data_source_dumps(hook_config, config, log_prefix, borgmatic_runtime_directory, dry_run):
'''
Given a ZFS configuration dict, a configuration dict, a log prefix, the borgmatic runtime
directory, and whether this is a dry run, unmount and destroy any ZFS snapshots created by
borgmatic. Use the log prefix in any log entries. If this is a dry run, then don't actually
remove anything.
'''
dry_run_label = ' (dry run; not actually removing anything)' if dry_run else ''
# Unmount snapshots.
zfs_command = hook_config.get('zfs_command', 'zfs')
try:
datasets = get_all_datasets(zfs_command)
except FileNotFoundError:
logger.debug(f'{log_prefix}: Could not find "{zfs_command}" command')
return
except subprocess.CalledProcessError as error:
logger.debug(f'{log_prefix}: {error}')
return
snapshots_glob = os.path.join(
borgmatic.config.paths.replace_temporary_subdirectory_with_glob(
os.path.normpath(borgmatic_runtime_directory),
),
'zfs_snapshots',
)
logger.debug(
f'{log_prefix}: Looking for snapshots to remove in {snapshots_glob}{dry_run_label}'
)
umount_command = hook_config.get('umount_command', 'umount')
for snapshots_directory in glob.glob(snapshots_glob):
if not os.path.isdir(snapshots_directory):
continue
# This might fail if the directory is already mounted, but we swallow errors here since
# we'll try again below. The point of doing it here is that we don't want to try to unmount
# a non-mounted directory (which *will* fail), and probing for whether a directory is
# mounted is tough to do in a cross-platform way.
if not dry_run:
shutil.rmtree(snapshots_directory, ignore_errors=True)
for _, mount_point in datasets:
snapshot_mount_path = os.path.join(snapshots_directory, mount_point.lstrip(os.path.sep))
if not os.path.isdir(snapshot_mount_path):
continue
logger.debug(
f'{log_prefix}: Unmounting ZFS snapshot at {snapshot_mount_path}{dry_run_label}'
)
if not dry_run:
try:
unmount_snapshot(umount_command, snapshot_mount_path)
except FileNotFoundError:
logger.debug(f'{log_prefix}: Could not find "{umount_command}" command')
return
except subprocess.CalledProcessError as error:
logger.debug(f'{log_prefix}: {error}')
return
if not dry_run:
shutil.rmtree(snapshots_directory)
# Destroy snapshots.
full_snapshot_names = get_all_snapshots(zfs_command)
for full_snapshot_name in full_snapshot_names:
# Only destroy snapshots that borgmatic actually created!
if not full_snapshot_name.split('@')[-1].startswith(BORGMATIC_SNAPSHOT_PREFIX):
continue
logger.debug(f'{log_prefix}: Destroying ZFS snapshot {full_snapshot_name}{dry_run_label}')
if not dry_run:
destroy_snapshot(zfs_command, full_snapshot_name)
def make_data_source_dump_patterns(hook_config, config, log_prefix, name=None): # pragma: no cover
'''
Restores aren't implemented, because stored files can be extracted directly with "extract".
'''
raise NotImplementedError()
def restore_data_source_dump(
hook_config, config, log_prefix, data_source, dry_run, extract_process, connection_params
): # pragma: no cover
'''
Restores aren't implemented, because stored files can be extracted directly with "extract".
'''
raise NotImplementedError()

View File

@ -2,7 +2,7 @@ FROM docker.io/alpine:3.20.1 AS borgmatic
COPY . /app
RUN apk add --no-cache py3-pip py3-ruamel.yaml py3-ruamel.yaml.clib
RUN pip install --break-system-packages --no-cache /app && generate-borgmatic-config && chmod +r /etc/borgmatic/config.yaml
RUN pip install --break-system-packages --no-cache /app && borgmatic config generate && chmod +r /etc/borgmatic/config.yaml
RUN borgmatic --help > /command-line.txt \
&& for action in repo-create transfer create prune compact check delete extract config "config bootstrap" "config generate" "config validate" export-tar mount umount repo-delete restore repo-list list repo-info info break-lock "key export" "key change-passphrase" borg; do \
echo -e "\n--------------------------------------------------------------------------------\n" >> /command-line.txt \

View File

@ -3,7 +3,7 @@ title: How to add preparation and cleanup steps to backups
eleventyNavigation:
key: 🧹 Add preparation and cleanup steps
parent: How-to guides
order: 9
order: 10
---
## Preparation and cleanup hooks

View File

@ -3,7 +3,7 @@ title: How to backup to a removable drive or an intermittent server
eleventyNavigation:
key: 💾 Backup to a removable drive/server
parent: How-to guides
order: 10
order: 11
---
## Occasional backups

View File

@ -3,7 +3,7 @@ title: How to customize warnings and errors
eleventyNavigation:
key: 💥 Customize warnings/errors
parent: How-to guides
order: 12
order: 13
---
## When things go wrong

View File

@ -3,7 +3,7 @@ title: How to develop on borgmatic
eleventyNavigation:
key: 🏗️ Develop on borgmatic
parent: How-to guides
order: 14
order: 15
---
## Source code

View File

@ -119,10 +119,10 @@ archive, regardless of the user who performs the backup. (Note that Borg
doesn't store the leading `/`.)
<span class="minilink minilink-addedin">With Borg version 1.2 and
earlier</span>Database dump files are stored at a path dependent on the
[runtime
earlier</span>Database dump files are stored at a path dependent on the [runtime
directory](https://torsion.org/borgmatic/docs/how-to/backup-your-databases/#runtime-directory)
in use at the time the archive was created.
in use at the time the archive was created, as Borg 1.2 and earlier do not
support path rewriting.
<span class="minilink minilink-addedin">Prior to borgmatic version
1.9.0</span>Database dump files were instead stored at `~/.borgmatic` within