From 1ea4433aa98c6a15b6ba223f2835b900e54b5c55 Mon Sep 17 00:00:00 2001 From: Dan Helfman Date: Tue, 11 Apr 2023 21:49:10 -0700 Subject: [PATCH] Selectively shallow merge certain mappings or sequences when including configuration files (#672). --- NEWS | 3 + borgmatic/config/load.py | 79 +++-- docs/how-to/make-per-application-backups.md | 59 ++++ tests/integration/config/test_load.py | 303 +++++++++++++++++--- 4 files changed, 374 insertions(+), 70 deletions(-) diff --git a/NEWS b/NEWS index c9869b90..6d4f5448 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,9 @@ "match_archives" configuration option for the "transfer", "list", "rlist", and "info" actions. * #668: Fix error when running the "prune" action with both "archive_name_format" and "prefix" options set. + * #672: Selectively shallow merge certain mappings or sequences when including configuration files. + See the documentation for more information: + https://torsion.org/borgmatic/docs/how-to/make-per-application-backups/#shallow-merge * #673: View the results of configuration file merging via "validate-borgmatic-config --show" flag. See the documentation for more information: https://torsion.org/borgmatic/docs/how-to/make-per-application-backups/#debugging-includes diff --git a/borgmatic/config/load.py b/borgmatic/config/load.py index f3c45d52..e9caeb4c 100644 --- a/borgmatic/config/load.py +++ b/borgmatic/config/load.py @@ -38,6 +38,24 @@ def include_configuration(loader, filename_node, include_directory): return load_configuration(include_filename) +def retain_node_error(loader, node): + ''' + Given a ruamel.yaml.loader.Loader and a YAML node, raise an error. + + Raise ValueError if a mapping or sequence node is given, as that indicates that "!retain" was + used in a configuration file without a merge. In configuration files with a merge, mapping and + sequence nodes with "!retain" tags are handled by deep_merge_nodes() below. + + Also raise ValueError if a scalar node is given, as "!retain" is not supported on scalar nodes. + ''' + if isinstance(node, (ruamel.yaml.nodes.MappingNode, ruamel.yaml.nodes.SequenceNode)): + raise ValueError( + 'The !retain tag may only be used within a configuration file containing a merged !include tag.' + ) + + raise ValueError('The !retain tag may only be used on a YAML mapping or sequence.') + + class Include_constructor(ruamel.yaml.SafeConstructor): ''' A YAML "constructor" (a ruamel.yaml concept) that supports a custom "!include" tag for including @@ -50,6 +68,7 @@ class Include_constructor(ruamel.yaml.SafeConstructor): '!include', functools.partial(include_configuration, include_directory=include_directory), ) + self.add_constructor('!retain', retain_node_error) def flatten_mapping(self, node): ''' @@ -176,6 +195,8 @@ def deep_merge_nodes(nodes): ), ] + If a mapping or sequence node has a YAML "!retain" tag, then that node is not merged. + The purpose of deep merging like this is to support, for instance, merging one borgmatic configuration file into another for reuse, such that a configuration section ("retention", etc.) does not completely replace the corresponding section in a merged file. @@ -198,32 +219,42 @@ def deep_merge_nodes(nodes): # If we're dealing with MappingNodes, recurse and merge its values as well. if isinstance(b_value, ruamel.yaml.nodes.MappingNode): - replaced_nodes[(b_key, b_value)] = ( - b_key, - ruamel.yaml.nodes.MappingNode( - tag=b_value.tag, - value=deep_merge_nodes(a_value.value + b_value.value), - start_mark=b_value.start_mark, - end_mark=b_value.end_mark, - flow_style=b_value.flow_style, - comment=b_value.comment, - anchor=b_value.anchor, - ), - ) + # A "!retain" tag says to skip deep merging for this node. Replace the tag so + # downstream schema validation doesn't break on our application-specific tag. + if b_value.tag == '!retain': + b_value.tag = 'tag:yaml.org,2002:map' + else: + replaced_nodes[(b_key, b_value)] = ( + b_key, + ruamel.yaml.nodes.MappingNode( + tag=b_value.tag, + value=deep_merge_nodes(a_value.value + b_value.value), + start_mark=b_value.start_mark, + end_mark=b_value.end_mark, + flow_style=b_value.flow_style, + comment=b_value.comment, + anchor=b_value.anchor, + ), + ) # If we're dealing with SequenceNodes, merge by appending one sequence to the other. elif isinstance(b_value, ruamel.yaml.nodes.SequenceNode): - replaced_nodes[(b_key, b_value)] = ( - b_key, - ruamel.yaml.nodes.SequenceNode( - tag=b_value.tag, - value=a_value.value + b_value.value, - start_mark=b_value.start_mark, - end_mark=b_value.end_mark, - flow_style=b_value.flow_style, - comment=b_value.comment, - anchor=b_value.anchor, - ), - ) + # A "!retain" tag says to skip deep merging for this node. Replace the tag so + # downstream schema validation doesn't break on our application-specific tag. + if b_value.tag == '!retain': + b_value.tag = 'tag:yaml.org,2002:seq' + else: + replaced_nodes[(b_key, b_value)] = ( + b_key, + ruamel.yaml.nodes.SequenceNode( + tag=b_value.tag, + value=a_value.value + b_value.value, + start_mark=b_value.start_mark, + end_mark=b_value.end_mark, + flow_style=b_value.flow_style, + comment=b_value.comment, + anchor=b_value.anchor, + ), + ) return [ replaced_nodes.get(node, node) for node in nodes if replaced_nodes.get(node) != DELETED_NODE diff --git a/docs/how-to/make-per-application-backups.md b/docs/how-to/make-per-application-backups.md index ab293472..ddaa37d2 100644 --- a/docs/how-to/make-per-application-backups.md +++ b/docs/how-to/make-per-application-backups.md @@ -276,6 +276,65 @@ include, the local file's option takes precedence. list values are appended together. +### Shallow merge + +Even though deep merging is generally pretty handy for included files, +sometimes you want specific sections in the local file to take precedence over +included sections—without any merging occuring for them. + +New in version 1.7.12 That's +where the `!retain` tag comes in. Whenever you're merging an included file +into your configuration file, you can optionally add the `!retain` tag to +particular local mappings or sequences to retain the local values and ignore +included values. + +For instance, start with this configuration file containing the `!retain` tag +on the `retention` mapping: + +```yaml +<<: !include /etc/borgmatic/common.yaml + +location: + repositories: + - repo.borg + +retention: !retain + keep_daily: 5 +``` + +And `common.yaml` like this: + +```yaml +location: + repositories: + - common.borg + +retention: + keep_hourly: 24 + keep_daily: 7 +``` + +Once this include gets merged in, the resulting configuration will have a +`keep_daily` value of `5` and nothing else in the `retention` section. That's +because the `!retain` tag says to retain the local version of `retention` and +ignore any values coming in from the include. But because the `repositories` +sequence doesn't have a `!retain` tag, that sequence still gets merged +together to contain both `common.borg` and `repo.borg`. + +The `!retain` tag can only be placed on mapping and sequence nodes, and it +goes right after the name of the option (and its colon) on the same line. The +effects of `!retain` are recursive, meaning that if you place a `!retain` tag +on a top-level mapping, even deeply nested values within it will not be +merged. + +Additionally, the `!retain` tag only works in a configuration file that also +performs a merge include with `<<: !include`. It doesn't make sense within, +for instance, an included configuration file itself (unless it in turn +performs its own merge include). That's because `!retain` only applies to the +file doing the include; it doesn't work in reverse or propagate through +includes. + + ## Debugging includes New in version 1.7.12 If you'd diff --git a/tests/integration/config/test_load.py b/tests/integration/config/test_load.py index 3382d734..ef9be0f8 100644 --- a/tests/integration/config/test_load.py +++ b/tests/integration/config/test_load.py @@ -2,7 +2,6 @@ import io import sys import pytest -import ruamel.yaml from flexmock import flexmock from borgmatic.config import load as module @@ -150,6 +149,99 @@ def test_load_configuration_merges_include(): assert module.load_configuration('config.yaml') == {'foo': 'override', 'baz': 'quux'} +def test_load_configuration_with_retain_tag_merges_include_but_keeps_local_values(): + builtins = flexmock(sys.modules['builtins']) + flexmock(module.os).should_receive('getcwd').and_return('/tmp') + flexmock(module.os.path).should_receive('isabs').and_return(False) + flexmock(module.os.path).should_receive('exists').and_return(True) + include_file = io.StringIO( + ''' + stuff: + foo: bar + baz: quux + + other: + a: b + c: d + ''' + ) + include_file.name = 'include.yaml' + builtins.should_receive('open').with_args('/tmp/include.yaml').and_return(include_file) + config_file = io.StringIO( + ''' + stuff: !retain + foo: override + + other: + a: override + <<: !include include.yaml + ''' + ) + config_file.name = 'config.yaml' + builtins.should_receive('open').with_args('config.yaml').and_return(config_file) + + assert module.load_configuration('config.yaml') == { + 'stuff': {'foo': 'override'}, + 'other': {'a': 'override', 'c': 'd'}, + } + + +def test_load_configuration_with_retain_tag_but_without_merge_include_raises(): + builtins = flexmock(sys.modules['builtins']) + flexmock(module.os).should_receive('getcwd').and_return('/tmp') + flexmock(module.os.path).should_receive('isabs').and_return(False) + flexmock(module.os.path).should_receive('exists').and_return(True) + include_file = io.StringIO( + ''' + stuff: !retain + foo: bar + baz: quux + ''' + ) + include_file.name = 'include.yaml' + builtins.should_receive('open').with_args('/tmp/include.yaml').and_return(include_file) + config_file = io.StringIO( + ''' + stuff: + foo: override + <<: !include include.yaml + ''' + ) + config_file.name = 'config.yaml' + builtins.should_receive('open').with_args('config.yaml').and_return(config_file) + + with pytest.raises(ValueError): + assert module.load_configuration('config.yaml') + + +def test_load_configuration_with_retain_tag_on_scalar_raises(): + builtins = flexmock(sys.modules['builtins']) + flexmock(module.os).should_receive('getcwd').and_return('/tmp') + flexmock(module.os.path).should_receive('isabs').and_return(False) + flexmock(module.os.path).should_receive('exists').and_return(True) + include_file = io.StringIO( + ''' + stuff: + foo: bar + baz: quux + ''' + ) + include_file.name = 'include.yaml' + builtins.should_receive('open').with_args('/tmp/include.yaml').and_return(include_file) + config_file = io.StringIO( + ''' + stuff: + foo: !retain override + <<: !include include.yaml + ''' + ) + config_file.name = 'config.yaml' + builtins.should_receive('open').with_args('config.yaml').and_return(config_file) + + with pytest.raises(ValueError): + assert module.load_configuration('config.yaml') + + def test_load_configuration_does_not_merge_include_list(): builtins = flexmock(sys.modules['builtins']) flexmock(module.os).should_receive('getcwd').and_return('/tmp') @@ -173,42 +265,59 @@ def test_load_configuration_does_not_merge_include_list(): config_file.name = 'config.yaml' builtins.should_receive('open').with_args('config.yaml').and_return(config_file) - with pytest.raises(ruamel.yaml.error.YAMLError): + with pytest.raises(module.ruamel.yaml.error.YAMLError): assert module.load_configuration('config.yaml') +@pytest.mark.parametrize( + 'node_class', + ( + module.ruamel.yaml.nodes.MappingNode, + module.ruamel.yaml.nodes.SequenceNode, + module.ruamel.yaml.nodes.ScalarNode, + ), +) +def test_retain_node_error_raises(node_class): + with pytest.raises(ValueError): + module.retain_node_error( + loader=flexmock(), node=node_class(tag=flexmock(), value=flexmock()) + ) + + def test_deep_merge_nodes_replaces_colliding_scalar_values(): node_values = [ ( - ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='retention'), - ruamel.yaml.nodes.MappingNode( + module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='retention'), + module.ruamel.yaml.nodes.MappingNode( tag='tag:yaml.org,2002:map', value=[ ( - ruamel.yaml.nodes.ScalarNode( + module.ruamel.yaml.nodes.ScalarNode( tag='tag:yaml.org,2002:str', value='keep_hourly' ), - ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:int', value='24'), + module.ruamel.yaml.nodes.ScalarNode( + tag='tag:yaml.org,2002:int', value='24' + ), ), ( - ruamel.yaml.nodes.ScalarNode( + module.ruamel.yaml.nodes.ScalarNode( tag='tag:yaml.org,2002:str', value='keep_daily' ), - ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:int', value='7'), + module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:int', value='7'), ), ], ), ), ( - ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='retention'), - ruamel.yaml.nodes.MappingNode( + module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='retention'), + module.ruamel.yaml.nodes.MappingNode( tag='tag:yaml.org,2002:map', value=[ ( - ruamel.yaml.nodes.ScalarNode( + module.ruamel.yaml.nodes.ScalarNode( tag='tag:yaml.org,2002:str', value='keep_daily' ), - ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:int', value='5'), + module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:int', value='5'), ), ], ), @@ -230,35 +339,39 @@ def test_deep_merge_nodes_replaces_colliding_scalar_values(): def test_deep_merge_nodes_keeps_non_colliding_scalar_values(): node_values = [ ( - ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='retention'), - ruamel.yaml.nodes.MappingNode( + module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='retention'), + module.ruamel.yaml.nodes.MappingNode( tag='tag:yaml.org,2002:map', value=[ ( - ruamel.yaml.nodes.ScalarNode( + module.ruamel.yaml.nodes.ScalarNode( tag='tag:yaml.org,2002:str', value='keep_hourly' ), - ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:int', value='24'), + module.ruamel.yaml.nodes.ScalarNode( + tag='tag:yaml.org,2002:int', value='24' + ), ), ( - ruamel.yaml.nodes.ScalarNode( + module.ruamel.yaml.nodes.ScalarNode( tag='tag:yaml.org,2002:str', value='keep_daily' ), - ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:int', value='7'), + module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:int', value='7'), ), ], ), ), ( - ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='retention'), - ruamel.yaml.nodes.MappingNode( + module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='retention'), + module.ruamel.yaml.nodes.MappingNode( tag='tag:yaml.org,2002:map', value=[ ( - ruamel.yaml.nodes.ScalarNode( + module.ruamel.yaml.nodes.ScalarNode( tag='tag:yaml.org,2002:str', value='keep_minutely' ), - ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:int', value='10'), + module.ruamel.yaml.nodes.ScalarNode( + tag='tag:yaml.org,2002:int', value='10' + ), ), ], ), @@ -282,28 +395,28 @@ def test_deep_merge_nodes_keeps_non_colliding_scalar_values(): def test_deep_merge_nodes_keeps_deeply_nested_values(): node_values = [ ( - ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='storage'), - ruamel.yaml.nodes.MappingNode( + module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='storage'), + module.ruamel.yaml.nodes.MappingNode( tag='tag:yaml.org,2002:map', value=[ ( - ruamel.yaml.nodes.ScalarNode( + module.ruamel.yaml.nodes.ScalarNode( tag='tag:yaml.org,2002:str', value='lock_wait' ), - ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:int', value='5'), + module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:int', value='5'), ), ( - ruamel.yaml.nodes.ScalarNode( + module.ruamel.yaml.nodes.ScalarNode( tag='tag:yaml.org,2002:str', value='extra_borg_options' ), - ruamel.yaml.nodes.MappingNode( + module.ruamel.yaml.nodes.MappingNode( tag='tag:yaml.org,2002:map', value=[ ( - ruamel.yaml.nodes.ScalarNode( + module.ruamel.yaml.nodes.ScalarNode( tag='tag:yaml.org,2002:str', value='init' ), - ruamel.yaml.nodes.ScalarNode( + module.ruamel.yaml.nodes.ScalarNode( tag='tag:yaml.org,2002:str', value='--init-option' ), ), @@ -314,22 +427,22 @@ def test_deep_merge_nodes_keeps_deeply_nested_values(): ), ), ( - ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='storage'), - ruamel.yaml.nodes.MappingNode( + module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='storage'), + module.ruamel.yaml.nodes.MappingNode( tag='tag:yaml.org,2002:map', value=[ ( - ruamel.yaml.nodes.ScalarNode( + module.ruamel.yaml.nodes.ScalarNode( tag='tag:yaml.org,2002:str', value='extra_borg_options' ), - ruamel.yaml.nodes.MappingNode( + module.ruamel.yaml.nodes.MappingNode( tag='tag:yaml.org,2002:map', value=[ ( - ruamel.yaml.nodes.ScalarNode( + module.ruamel.yaml.nodes.ScalarNode( tag='tag:yaml.org,2002:str', value='prune' ), - ruamel.yaml.nodes.ScalarNode( + module.ruamel.yaml.nodes.ScalarNode( tag='tag:yaml.org,2002:str', value='--prune-option' ), ), @@ -361,32 +474,32 @@ def test_deep_merge_nodes_keeps_deeply_nested_values(): def test_deep_merge_nodes_appends_colliding_sequence_values(): node_values = [ ( - ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='hooks'), - ruamel.yaml.nodes.MappingNode( + module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='hooks'), + module.ruamel.yaml.nodes.MappingNode( tag='tag:yaml.org,2002:map', value=[ ( - ruamel.yaml.nodes.ScalarNode( + module.ruamel.yaml.nodes.ScalarNode( tag='tag:yaml.org,2002:str', value='before_backup' ), - ruamel.yaml.nodes.SequenceNode( - tag='tag:yaml.org,2002:int', value=['echo 1', 'echo 2'] + module.ruamel.yaml.nodes.SequenceNode( + tag='tag:yaml.org,2002:seq', value=['echo 1', 'echo 2'] ), ), ], ), ), ( - ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='hooks'), - ruamel.yaml.nodes.MappingNode( + module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='hooks'), + module.ruamel.yaml.nodes.MappingNode( tag='tag:yaml.org,2002:map', value=[ ( - ruamel.yaml.nodes.ScalarNode( + module.ruamel.yaml.nodes.ScalarNode( tag='tag:yaml.org,2002:str', value='before_backup' ), - ruamel.yaml.nodes.SequenceNode( - tag='tag:yaml.org,2002:int', value=['echo 3', 'echo 4'] + module.ruamel.yaml.nodes.SequenceNode( + tag='tag:yaml.org,2002:seq', value=['echo 3', 'echo 4'] ), ), ], @@ -402,3 +515,101 @@ def test_deep_merge_nodes_appends_colliding_sequence_values(): assert len(options) == 1 assert options[0][0].value == 'before_backup' assert options[0][1].value == ['echo 1', 'echo 2', 'echo 3', 'echo 4'] + + +def test_deep_merge_nodes_keeps_mapping_values_tagged_with_retain(): + node_values = [ + ( + module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='retention'), + module.ruamel.yaml.nodes.MappingNode( + tag='tag:yaml.org,2002:map', + value=[ + ( + module.ruamel.yaml.nodes.ScalarNode( + tag='tag:yaml.org,2002:str', value='keep_hourly' + ), + module.ruamel.yaml.nodes.ScalarNode( + tag='tag:yaml.org,2002:int', value='24' + ), + ), + ( + module.ruamel.yaml.nodes.ScalarNode( + tag='tag:yaml.org,2002:str', value='keep_daily' + ), + module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:int', value='7'), + ), + ], + ), + ), + ( + module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='retention'), + module.ruamel.yaml.nodes.MappingNode( + tag='!retain', + value=[ + ( + module.ruamel.yaml.nodes.ScalarNode( + tag='tag:yaml.org,2002:str', value='keep_daily' + ), + module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:int', value='5'), + ), + ], + ), + ), + ] + + result = module.deep_merge_nodes(node_values) + assert len(result) == 1 + (section_key, section_value) = result[0] + assert section_key.value == 'retention' + assert section_value.tag == 'tag:yaml.org,2002:map' + options = section_value.value + assert len(options) == 1 + assert options[0][0].value == 'keep_daily' + assert options[0][1].value == '5' + + +def test_deep_merge_nodes_keeps_sequence_values_tagged_with_retain(): + node_values = [ + ( + module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='hooks'), + module.ruamel.yaml.nodes.MappingNode( + tag='tag:yaml.org,2002:map', + value=[ + ( + module.ruamel.yaml.nodes.ScalarNode( + tag='tag:yaml.org,2002:str', value='before_backup' + ), + module.ruamel.yaml.nodes.SequenceNode( + tag='tag:yaml.org,2002:seq', value=['echo 1', 'echo 2'] + ), + ), + ], + ), + ), + ( + module.ruamel.yaml.nodes.ScalarNode(tag='tag:yaml.org,2002:str', value='hooks'), + module.ruamel.yaml.nodes.MappingNode( + tag='tag:yaml.org,2002:map', + value=[ + ( + module.ruamel.yaml.nodes.ScalarNode( + tag='tag:yaml.org,2002:str', value='before_backup' + ), + module.ruamel.yaml.nodes.SequenceNode( + tag='!retain', value=['echo 3', 'echo 4'] + ), + ), + ], + ), + ), + ] + + result = module.deep_merge_nodes(node_values) + assert len(result) == 1 + (section_key, section_value) = result[0] + assert section_key.value == 'hooks' + options = section_value.value + assert len(options) == 1 + assert options[0][0].value == 'before_backup' + assert options[0][1].tag == 'tag:yaml.org,2002:seq' + assert options[0][1].value == ['echo 3', 'echo 4']