diff --git a/borgmatic/hooks/data_source/lvm.py b/borgmatic/hooks/data_source/lvm.py index 8ee1d03c..ffdf9e14 100644 --- a/borgmatic/hooks/data_source/lvm.py +++ b/borgmatic/hooks/data_source/lvm.py @@ -50,6 +50,9 @@ def get_logical_volumes(lsblk_command, source_directories=None): except json.JSONDecodeError as error: raise ValueError('Invalid {lsblk_command} JSON output: {error}') + + candidate_source_directories = set(source_directories or ()) + try: return tuple( (device['name'], device['path'], device['mountpoint'], contained_source_directories) @@ -57,7 +60,7 @@ def get_logical_volumes(lsblk_command, source_directories=None): if device['mountpoint'] and device['type'] == 'lvm' for contained_source_directories in ( borgmatic.hooks.data_source.snapshot.get_contained_directories( - device['mountpoint'], source_directories + device['mountpoint'], candidate_source_directories ), ) if not source_directories or contained_source_directories diff --git a/borgmatic/hooks/data_source/snapshot.py b/borgmatic/hooks/data_source/snapshot.py index 097b51ea..b03b01d9 100644 --- a/borgmatic/hooks/data_source/snapshot.py +++ b/borgmatic/hooks/data_source/snapshot.py @@ -1,3 +1,4 @@ +import itertools import pathlib @@ -6,18 +7,24 @@ IS_A_HOOK = False def get_contained_directories(parent_directory, candidate_contained_directories): ''' - Given a parent directory and a sequence of candiate directories potentially inside it, get the - subset of contained directories for which the parent directory is actually the parent, a - grandparent, the very same directory, etc. The idea is if, say, /var/log and /var/lib are - candidate contained directories, but there's a parent directory (logical volume, dataset, - subvolume, etc.) at /var, then /var is what we want to snapshot. + Given a parent directory and a set of candiate directories potentially inside it, get the subset + of contained directories for which the parent directory is actually the parent, a grandparent, + the very same directory, etc. The idea is if, say, /var/log and /var/lib are candidate contained + directories, but there's a parent directory (logical volume, dataset, subvolume, etc.) at /var, + then /var is what we want to snapshot. + + Also mutate the given set of candidate contained directories to remove any actually contained + directories from it. ''' if not candidate_contained_directories: return () - return tuple( + contained = tuple( candidate for candidate in candidate_contained_directories if parent_directory == candidate or pathlib.PurePosixPath(parent_directory) in pathlib.PurePath(candidate).parents ) + candidate_contained_directories -= set(contained) + + return contained diff --git a/borgmatic/hooks/data_source/zfs.py b/borgmatic/hooks/data_source/zfs.py index 5f011a1c..a56489e0 100644 --- a/borgmatic/hooks/data_source/zfs.py +++ b/borgmatic/hooks/data_source/zfs.py @@ -1,3 +1,4 @@ +import collections import glob import logging import os @@ -5,6 +6,7 @@ import shutil import subprocess import borgmatic.config.paths +import borgmatic.hooks.data_source.snapshot import borgmatic.execute logger = logging.getLogger(__name__) @@ -21,6 +23,9 @@ BORGMATIC_SNAPSHOT_PREFIX = 'borgmatic-' BORGMATIC_USER_PROPERTY = 'org.torsion.borgmatic:backup' +Dataset = collections.namedtuple('Dataset', ('name', 'mount_point', 'user_property_value', 'contained_source_directories')) + + def get_datasets_to_backup(zfs_command, source_directories): ''' Given a ZFS command to run and a sequence of configured source directories, find the @@ -29,7 +34,7 @@ def get_datasets_to_backup(zfs_command, source_directories): datasets tagged with a borgmatic-specific user property, whether or not they appear in source directories. - Return the result as a sequence of (dataset name, mount point) pairs. + Return the result as a sequence of Dataset instances, sorted by mount point. ''' list_output = borgmatic.execute.execute_command_and_capture_output( ( @@ -42,23 +47,44 @@ def get_datasets_to_backup(zfs_command, source_directories): f'name,mountpoint,{BORGMATIC_USER_PROPERTY}', ) ) - source_directories_set = set(source_directories) try: - return tuple( - (dataset_name, mount_point) - for line in list_output.splitlines() - for (dataset_name, mount_point, user_property_value) in (line.rstrip().split('\t'),) - if mount_point in source_directories_set or user_property_value == 'auto' + # Sort from longest to shortest mount points, so longer mount points get a whack at the + # candidate source directory piñata before their parents do. (Source directories are + # consumed during the second loop below, so no two datasets get the same contained source + # directories.) + datasets = sorted( + ( + Dataset(dataset_name, mount_point, user_property_value, ()) + for line in list_output.splitlines() + for (dataset_name, mount_point, user_property_value) in (line.rstrip().split('\t'),) + ), + key=lambda dataset: dataset.mount_point, + reverse=True, ) except ValueError: raise ValueError('Invalid {zfs_command} list output') + candidate_source_directories = set(source_directories) -def get_all_datasets(zfs_command): + return sorted( + tuple( + Dataset(dataset.name, dataset.mount_point, dataset.user_property_value, contained_source_directories) + for dataset in datasets + for contained_source_directories in ( + borgmatic.hooks.data_source.snapshot.get_contained_directories( + dataset.mount_point, candidate_source_directories + ), + ) + if contained_source_directories or dataset.user_property_value == 'auto' + ), + key=lambda dataset: dataset.mount_point, + ) + + +def get_all_dataset_mount_points(zfs_command): ''' - Given a ZFS command to run, return all ZFS datasets as a sequence of (dataset name, mount point) - pairs. + Given a ZFS command to run, return all ZFS datasets as a sequence of sorted mount points. ''' list_output = borgmatic.execute.execute_command_and_capture_output( ( @@ -68,15 +94,13 @@ def get_all_datasets(zfs_command): '-t', 'filesystem', '-o', - 'name,mountpoint', + 'mountpoint', ) ) try: return tuple( - (dataset_name, mount_point) - for line in list_output.splitlines() - for (dataset_name, mount_point) in (line.rstrip().split('\t'),) + sorted(line.rstrip() for line in list_output.splitlines()) ) except ValueError: raise ValueError('Invalid {zfs_command} list output') @@ -147,40 +171,52 @@ def dump_data_sources( # Snapshot each dataset, rewriting source directories to use the snapshot paths. snapshot_name = f'{BORGMATIC_SNAPSHOT_PREFIX}{os.getpid()}' + normalized_runtime_directory = os.path.normpath(borgmatic_runtime_directory) if not requested_datasets: logger.warning(f'{log_prefix}: No ZFS datasets found to snapshot{dry_run_label}') - for dataset_name, mount_point in requested_datasets: - full_snapshot_name = f'{dataset_name}@{snapshot_name}' - logger.debug(f'{log_prefix}: Creating ZFS snapshot {full_snapshot_name}{dry_run_label}') + for dataset in requested_datasets: + full_snapshot_name = f'{dataset.name}@{snapshot_name}' + logger.debug(f'{log_prefix}: Creating ZFS snapshot {full_snapshot_name} of {dataset.mount_point}{dry_run_label}') if not dry_run: snapshot_dataset(zfs_command, full_snapshot_name) # Mount the snapshot into a particular named temporary directory so that the snapshot ends # up in the Borg archive at the "original" dataset mount point path. - snapshot_mount_path_for_borg = os.path.join( - os.path.normpath(borgmatic_runtime_directory), + snapshot_mount_path = os.path.join( + normalized_runtime_directory, 'zfs_snapshots', - '.', # Borg 1.4+ "slashdot" hack. - mount_point.lstrip(os.path.sep), + dataset.mount_point.lstrip(os.path.sep), ) - snapshot_mount_path = os.path.normpath(snapshot_mount_path_for_borg) + logger.debug( f'{log_prefix}: Mounting ZFS snapshot {full_snapshot_name} at {snapshot_mount_path}{dry_run_label}' ) - if not dry_run: - mount_snapshot( - hook_config.get('mount_command', 'mount'), full_snapshot_name, snapshot_mount_path + if dry_run: + continue + + mount_snapshot( + hook_config.get('mount_command', 'mount'), full_snapshot_name, snapshot_mount_path + ) + + for source_directory in dataset.contained_source_directories: + try: + source_directories.remove(source_directory) + except ValueError: + pass + + source_directories.append( + os.path.join( + normalized_runtime_directory, + 'zfs_snapshots', + '.', # Borg 1.4+ "slashdot" hack. + source_directory.lstrip(os.path.sep), + ) ) - if mount_point in source_directories: - source_directories.remove(mount_point) - - source_directories.append(snapshot_mount_path_for_borg) - return [] @@ -245,7 +281,7 @@ def remove_data_source_dumps(hook_config, config, log_prefix, borgmatic_runtime_ zfs_command = hook_config.get('zfs_command', 'zfs') try: - datasets = get_all_datasets(zfs_command) + dataset_mount_points = get_all_dataset_mount_points(zfs_command) except FileNotFoundError: logger.debug(f'{log_prefix}: Could not find "{zfs_command}" command') return @@ -275,7 +311,9 @@ def remove_data_source_dumps(hook_config, config, log_prefix, borgmatic_runtime_ if not dry_run: shutil.rmtree(snapshots_directory, ignore_errors=True) - for _, mount_point in datasets: + # Reversing the sorted datasets ensures that we unmount the longer mount point paths of + # child datasets before the shorter mount point paths of parent datasets. + for mount_point in reversed(dataset_mount_points): snapshot_mount_path = os.path.join(snapshots_directory, mount_point.lstrip(os.path.sep)) if not os.path.isdir(snapshot_mount_path): continue diff --git a/docs/how-to/snapshot-your-filesystems.md b/docs/how-to/snapshot-your-filesystems.md index c572b067..6f7680e5 100644 --- a/docs/how-to/snapshot-your-filesystems.md +++ b/docs/how-to/snapshot-your-filesystems.md @@ -76,6 +76,14 @@ in an archive at `/var/dataset` as well—even if borgmatic has to mount the snapshot somewhere in `/run/user/1000/borgmatic/zfs_snapshots/` to perform the backup. +New in version 1.9.4 borgmatic +is smart enough to look at the parent (and grandparent, etc.) directories of +each of your `source_directories` to discover any datasets. For instance, +let's say you add `/var/log` and `/var/lib` to your source directories, but +`/var` is a dataset. borgmatic will discover that and snapshot `/var` +accordingly. This also works even with nested datasets; borgmatic selects +the dataset that's the "closest" parent to your source directories. + With Borg version 1.2 and earlierSnapshotted files are instead stored at a path dependent on the [runtime