diff --git a/borgmatic/hooks/data_source/lvm.py b/borgmatic/hooks/data_source/lvm.py
index 8ee1d03c..ffdf9e14 100644
--- a/borgmatic/hooks/data_source/lvm.py
+++ b/borgmatic/hooks/data_source/lvm.py
@@ -50,6 +50,9 @@ def get_logical_volumes(lsblk_command, source_directories=None):
except json.JSONDecodeError as error:
raise ValueError('Invalid {lsblk_command} JSON output: {error}')
+
+ candidate_source_directories = set(source_directories or ())
+
try:
return tuple(
(device['name'], device['path'], device['mountpoint'], contained_source_directories)
@@ -57,7 +60,7 @@ def get_logical_volumes(lsblk_command, source_directories=None):
if device['mountpoint'] and device['type'] == 'lvm'
for contained_source_directories in (
borgmatic.hooks.data_source.snapshot.get_contained_directories(
- device['mountpoint'], source_directories
+ device['mountpoint'], candidate_source_directories
),
)
if not source_directories or contained_source_directories
diff --git a/borgmatic/hooks/data_source/snapshot.py b/borgmatic/hooks/data_source/snapshot.py
index 097b51ea..b03b01d9 100644
--- a/borgmatic/hooks/data_source/snapshot.py
+++ b/borgmatic/hooks/data_source/snapshot.py
@@ -1,3 +1,4 @@
+import itertools
import pathlib
@@ -6,18 +7,24 @@ IS_A_HOOK = False
def get_contained_directories(parent_directory, candidate_contained_directories):
'''
- Given a parent directory and a sequence of candiate directories potentially inside it, get the
- subset of contained directories for which the parent directory is actually the parent, a
- grandparent, the very same directory, etc. The idea is if, say, /var/log and /var/lib are
- candidate contained directories, but there's a parent directory (logical volume, dataset,
- subvolume, etc.) at /var, then /var is what we want to snapshot.
+ Given a parent directory and a set of candiate directories potentially inside it, get the subset
+ of contained directories for which the parent directory is actually the parent, a grandparent,
+ the very same directory, etc. The idea is if, say, /var/log and /var/lib are candidate contained
+ directories, but there's a parent directory (logical volume, dataset, subvolume, etc.) at /var,
+ then /var is what we want to snapshot.
+
+ Also mutate the given set of candidate contained directories to remove any actually contained
+ directories from it.
'''
if not candidate_contained_directories:
return ()
- return tuple(
+ contained = tuple(
candidate
for candidate in candidate_contained_directories
if parent_directory == candidate
or pathlib.PurePosixPath(parent_directory) in pathlib.PurePath(candidate).parents
)
+ candidate_contained_directories -= set(contained)
+
+ return contained
diff --git a/borgmatic/hooks/data_source/zfs.py b/borgmatic/hooks/data_source/zfs.py
index 5f011a1c..a56489e0 100644
--- a/borgmatic/hooks/data_source/zfs.py
+++ b/borgmatic/hooks/data_source/zfs.py
@@ -1,3 +1,4 @@
+import collections
import glob
import logging
import os
@@ -5,6 +6,7 @@ import shutil
import subprocess
import borgmatic.config.paths
+import borgmatic.hooks.data_source.snapshot
import borgmatic.execute
logger = logging.getLogger(__name__)
@@ -21,6 +23,9 @@ BORGMATIC_SNAPSHOT_PREFIX = 'borgmatic-'
BORGMATIC_USER_PROPERTY = 'org.torsion.borgmatic:backup'
+Dataset = collections.namedtuple('Dataset', ('name', 'mount_point', 'user_property_value', 'contained_source_directories'))
+
+
def get_datasets_to_backup(zfs_command, source_directories):
'''
Given a ZFS command to run and a sequence of configured source directories, find the
@@ -29,7 +34,7 @@ def get_datasets_to_backup(zfs_command, source_directories):
datasets tagged with a borgmatic-specific user property, whether or not they appear in source
directories.
- Return the result as a sequence of (dataset name, mount point) pairs.
+ Return the result as a sequence of Dataset instances, sorted by mount point.
'''
list_output = borgmatic.execute.execute_command_and_capture_output(
(
@@ -42,23 +47,44 @@ def get_datasets_to_backup(zfs_command, source_directories):
f'name,mountpoint,{BORGMATIC_USER_PROPERTY}',
)
)
- source_directories_set = set(source_directories)
try:
- return tuple(
- (dataset_name, mount_point)
- for line in list_output.splitlines()
- for (dataset_name, mount_point, user_property_value) in (line.rstrip().split('\t'),)
- if mount_point in source_directories_set or user_property_value == 'auto'
+ # Sort from longest to shortest mount points, so longer mount points get a whack at the
+ # candidate source directory piñata before their parents do. (Source directories are
+ # consumed during the second loop below, so no two datasets get the same contained source
+ # directories.)
+ datasets = sorted(
+ (
+ Dataset(dataset_name, mount_point, user_property_value, ())
+ for line in list_output.splitlines()
+ for (dataset_name, mount_point, user_property_value) in (line.rstrip().split('\t'),)
+ ),
+ key=lambda dataset: dataset.mount_point,
+ reverse=True,
)
except ValueError:
raise ValueError('Invalid {zfs_command} list output')
+ candidate_source_directories = set(source_directories)
-def get_all_datasets(zfs_command):
+ return sorted(
+ tuple(
+ Dataset(dataset.name, dataset.mount_point, dataset.user_property_value, contained_source_directories)
+ for dataset in datasets
+ for contained_source_directories in (
+ borgmatic.hooks.data_source.snapshot.get_contained_directories(
+ dataset.mount_point, candidate_source_directories
+ ),
+ )
+ if contained_source_directories or dataset.user_property_value == 'auto'
+ ),
+ key=lambda dataset: dataset.mount_point,
+ )
+
+
+def get_all_dataset_mount_points(zfs_command):
'''
- Given a ZFS command to run, return all ZFS datasets as a sequence of (dataset name, mount point)
- pairs.
+ Given a ZFS command to run, return all ZFS datasets as a sequence of sorted mount points.
'''
list_output = borgmatic.execute.execute_command_and_capture_output(
(
@@ -68,15 +94,13 @@ def get_all_datasets(zfs_command):
'-t',
'filesystem',
'-o',
- 'name,mountpoint',
+ 'mountpoint',
)
)
try:
return tuple(
- (dataset_name, mount_point)
- for line in list_output.splitlines()
- for (dataset_name, mount_point) in (line.rstrip().split('\t'),)
+ sorted(line.rstrip() for line in list_output.splitlines())
)
except ValueError:
raise ValueError('Invalid {zfs_command} list output')
@@ -147,40 +171,52 @@ def dump_data_sources(
# Snapshot each dataset, rewriting source directories to use the snapshot paths.
snapshot_name = f'{BORGMATIC_SNAPSHOT_PREFIX}{os.getpid()}'
+ normalized_runtime_directory = os.path.normpath(borgmatic_runtime_directory)
if not requested_datasets:
logger.warning(f'{log_prefix}: No ZFS datasets found to snapshot{dry_run_label}')
- for dataset_name, mount_point in requested_datasets:
- full_snapshot_name = f'{dataset_name}@{snapshot_name}'
- logger.debug(f'{log_prefix}: Creating ZFS snapshot {full_snapshot_name}{dry_run_label}')
+ for dataset in requested_datasets:
+ full_snapshot_name = f'{dataset.name}@{snapshot_name}'
+ logger.debug(f'{log_prefix}: Creating ZFS snapshot {full_snapshot_name} of {dataset.mount_point}{dry_run_label}')
if not dry_run:
snapshot_dataset(zfs_command, full_snapshot_name)
# Mount the snapshot into a particular named temporary directory so that the snapshot ends
# up in the Borg archive at the "original" dataset mount point path.
- snapshot_mount_path_for_borg = os.path.join(
- os.path.normpath(borgmatic_runtime_directory),
+ snapshot_mount_path = os.path.join(
+ normalized_runtime_directory,
'zfs_snapshots',
- '.', # Borg 1.4+ "slashdot" hack.
- mount_point.lstrip(os.path.sep),
+ dataset.mount_point.lstrip(os.path.sep),
)
- snapshot_mount_path = os.path.normpath(snapshot_mount_path_for_borg)
+
logger.debug(
f'{log_prefix}: Mounting ZFS snapshot {full_snapshot_name} at {snapshot_mount_path}{dry_run_label}'
)
- if not dry_run:
- mount_snapshot(
- hook_config.get('mount_command', 'mount'), full_snapshot_name, snapshot_mount_path
+ if dry_run:
+ continue
+
+ mount_snapshot(
+ hook_config.get('mount_command', 'mount'), full_snapshot_name, snapshot_mount_path
+ )
+
+ for source_directory in dataset.contained_source_directories:
+ try:
+ source_directories.remove(source_directory)
+ except ValueError:
+ pass
+
+ source_directories.append(
+ os.path.join(
+ normalized_runtime_directory,
+ 'zfs_snapshots',
+ '.', # Borg 1.4+ "slashdot" hack.
+ source_directory.lstrip(os.path.sep),
+ )
)
- if mount_point in source_directories:
- source_directories.remove(mount_point)
-
- source_directories.append(snapshot_mount_path_for_borg)
-
return []
@@ -245,7 +281,7 @@ def remove_data_source_dumps(hook_config, config, log_prefix, borgmatic_runtime_
zfs_command = hook_config.get('zfs_command', 'zfs')
try:
- datasets = get_all_datasets(zfs_command)
+ dataset_mount_points = get_all_dataset_mount_points(zfs_command)
except FileNotFoundError:
logger.debug(f'{log_prefix}: Could not find "{zfs_command}" command')
return
@@ -275,7 +311,9 @@ def remove_data_source_dumps(hook_config, config, log_prefix, borgmatic_runtime_
if not dry_run:
shutil.rmtree(snapshots_directory, ignore_errors=True)
- for _, mount_point in datasets:
+ # Reversing the sorted datasets ensures that we unmount the longer mount point paths of
+ # child datasets before the shorter mount point paths of parent datasets.
+ for mount_point in reversed(dataset_mount_points):
snapshot_mount_path = os.path.join(snapshots_directory, mount_point.lstrip(os.path.sep))
if not os.path.isdir(snapshot_mount_path):
continue
diff --git a/docs/how-to/snapshot-your-filesystems.md b/docs/how-to/snapshot-your-filesystems.md
index c572b067..6f7680e5 100644
--- a/docs/how-to/snapshot-your-filesystems.md
+++ b/docs/how-to/snapshot-your-filesystems.md
@@ -76,6 +76,14 @@ in an archive at `/var/dataset` as well—even if borgmatic has to mount the
snapshot somewhere in `/run/user/1000/borgmatic/zfs_snapshots/` to perform the
backup.
+New in version 1.9.4 borgmatic
+is smart enough to look at the parent (and grandparent, etc.) directories of
+each of your `source_directories` to discover any datasets. For instance,
+let's say you add `/var/log` and `/var/lib` to your source directories, but
+`/var` is a dataset. borgmatic will discover that and snapshot `/var`
+accordingly. This also works even with nested datasets; borgmatic selects
+the dataset that's the "closest" parent to your source directories.
+
With Borg version 1.2 and
earlierSnapshotted files are instead stored at a path dependent on the
[runtime