2022-05-28 21:42:19 +00:00
import argparse
import datetime
import json
2017-11-03 05:22:40 +00:00
import logging
2022-05-28 21:42:19 +00:00
import os
import pathlib
2017-08-05 23:21:39 +00:00
2022-10-04 05:50:37 +00:00
from borgmatic . borg import environment , extract , feature , flags , rinfo , state
2020-05-10 04:53:16 +00:00
from borgmatic . execute import DO_NOT_CAPTURE , execute_command
2017-08-05 23:21:39 +00:00
2022-05-28 21:42:19 +00:00
DEFAULT_CHECKS = (
2022-05-28 22:49:50 +00:00
{ ' name ' : ' repository ' , ' frequency ' : ' 1 month ' } ,
{ ' name ' : ' archives ' , ' frequency ' : ' 1 month ' } ,
2022-05-28 21:42:19 +00:00
)
2018-05-21 05:11:40 +00:00
DEFAULT_PREFIX = ' {hostname} - '
2017-08-05 23:21:39 +00:00
2019-06-17 18:53:08 +00:00
logger = logging . getLogger ( __name__ )
2017-11-03 05:22:40 +00:00
2022-05-28 21:42:19 +00:00
def parse_checks ( consistency_config , only_checks = None ) :
2017-08-05 23:21:39 +00:00
'''
2022-05-28 21:42:19 +00:00
Given a consistency config with a " checks " sequence of dicts and an optional list of override
checks , return a tuple of named checks to run .
2017-08-05 23:21:39 +00:00
For example , given a retention config of :
2022-05-28 21:42:19 +00:00
{ ' checks ' : ( { ' name ' : ' repository ' } , { ' name ' : ' archives ' } ) }
2017-08-05 23:21:39 +00:00
This will be returned as :
( ' repository ' , ' archives ' )
2022-05-28 21:42:19 +00:00
If no " checks " option is present in the config , return the DEFAULT_CHECKS . If a checks value
has a name of " disabled " , return an empty tuple , meaning that no checks should be run .
2017-08-05 23:21:39 +00:00
'''
2022-05-28 21:42:19 +00:00
checks = only_checks or tuple (
check_config [ ' name ' ]
for check_config in ( consistency_config . get ( ' checks ' , None ) or DEFAULT_CHECKS )
)
checks = tuple ( check . lower ( ) for check in checks )
if ' disabled ' in checks :
if len ( checks ) > 1 :
logger . warning (
' Multiple checks are configured, but one of them is " disabled " ; not running any checks '
)
2017-08-05 23:21:39 +00:00
return ( )
2022-05-28 21:42:19 +00:00
return checks
def parse_frequency ( frequency ) :
'''
Given a frequency string with a number and a unit of time , return a corresponding
datetime . timedelta instance or None if the frequency is None or " always " .
For instance , given " 3 weeks " , return datetime . timedelta ( weeks = 3 )
Raise ValueError if the given frequency cannot be parsed .
'''
if not frequency :
return None
frequency = frequency . strip ( ) . lower ( )
if frequency == ' always ' :
return None
try :
number , time_unit = frequency . split ( ' ' )
number = int ( number )
except ValueError :
raise ValueError ( f " Could not parse consistency check frequency ' { frequency } ' " )
if not time_unit . endswith ( ' s ' ) :
time_unit + = ' s '
if time_unit == ' months ' :
2022-05-29 02:29:33 +00:00
number * = 30
time_unit = ' days '
2022-05-28 21:42:19 +00:00
elif time_unit == ' years ' :
number * = 365
time_unit = ' days '
try :
return datetime . timedelta ( * * { time_unit : number } )
except TypeError :
raise ValueError ( f " Could not parse consistency check frequency ' { frequency } ' " )
2022-05-29 02:29:33 +00:00
def filter_checks_on_frequency (
location_config , consistency_config , borg_repository_id , checks , force
) :
2022-05-28 21:42:19 +00:00
'''
Given a location config , a consistency config with a " checks " sequence of dicts , a Borg
2022-05-29 02:29:33 +00:00
repository ID , a sequence of checks , and whether to force checks to run , filter down those
checks based on the configured " frequency " for each check as compared to its check time file .
2022-05-28 21:42:19 +00:00
In other words , a check whose check time file ' s timestamp is too new (based on the configured
frequency ) will get cut from the returned sequence of checks . Example :
consistency_config = {
' checks ' : [
{
' name ' : ' archives ' ,
' frequency ' : ' 2 weeks ' ,
} ,
]
}
When this function is called with that consistency_config and " archives " in checks , " archives "
will get filtered out of the returned result if its check time file is newer than 2 weeks old ,
indicating that it ' s not yet time to run that check again.
Raise ValueError if a frequency cannot be parsed .
'''
filtered_checks = list ( checks )
2022-05-29 02:29:33 +00:00
if force :
return tuple ( filtered_checks )
2022-05-28 21:42:19 +00:00
for check_config in consistency_config . get ( ' checks ' , DEFAULT_CHECKS ) :
check = check_config [ ' name ' ]
if checks and check not in checks :
continue
frequency_delta = parse_frequency ( check_config . get ( ' frequency ' ) )
if not frequency_delta :
continue
check_time = read_check_time (
make_check_time_path ( location_config , borg_repository_id , check )
)
if not check_time :
2023-02-28 17:40:01 +00:00
logger . debug (
f " Running { check } check due to never having run before "
)
2022-05-28 21:42:19 +00:00
continue
# If we've not yet reached the time when the frequency dictates we're ready for another
# check, skip this check.
if datetime . datetime . now ( ) < check_time + frequency_delta :
remaining = check_time + frequency_delta - datetime . datetime . now ( )
logger . info (
f " Skipping { check } check due to configured frequency; { remaining } until next check "
)
filtered_checks . remove ( check )
2023-02-28 17:40:01 +00:00
else :
due_at = check_time + frequency_delta
logger . debug (
f " Running { check } check due to configured frequency; the last check was at { check_time } with a frequency of { check_config . get ( ' frequency ' ) } , so the check was due at { due_at } ( { datetime . datetime . now ( ) - due_at } ago) "
)
2019-09-18 23:52:27 +00:00
2022-05-28 21:42:19 +00:00
return tuple ( filtered_checks )
2017-08-05 23:21:39 +00:00
2022-10-04 05:50:37 +00:00
def make_check_flags ( local_borg_version , checks , check_last = None , prefix = None ) :
2017-08-05 23:21:39 +00:00
'''
2022-10-04 05:50:37 +00:00
Given the local Borg version and a parsed sequence of checks , transform the checks into tuple of
command - line flags .
2017-08-05 23:21:39 +00:00
For example , given parsed checks of :
( ' repository ' , )
This will be returned as :
( ' --repository-only ' , )
2018-01-14 22:09:20 +00:00
However , if both " repository " and " archives " are in checks , then omit them from the returned
2022-07-24 04:02:21 +00:00
flags because Borg does both checks by default . If " data " is in checks , that implies " archives " .
2018-05-21 05:11:40 +00:00
Additionally , if a check_last value is given and " archives " is in checks , then include a
" --last " flag . And if a prefix value is given and " archives " is in checks , then include a
2022-10-04 05:50:37 +00:00
" --match-archives " flag .
2017-08-05 23:21:39 +00:00
'''
2022-11-17 18:19:48 +00:00
if ' data ' in checks :
data_flags = ( ' --verify-data ' , )
checks + = ( ' archives ' , )
else :
data_flags = ( )
2018-05-21 05:11:40 +00:00
if ' archives ' in checks :
last_flags = ( ' --last ' , str ( check_last ) ) if check_last else ( )
2022-10-04 05:50:37 +00:00
if feature . available ( feature . Feature . MATCH_ARCHIVES , local_borg_version ) :
match_archives_flags = ( ' --match-archives ' , f ' sh: { prefix } * ' ) if prefix else ( )
else :
match_archives_flags = ( ' --glob-archives ' , f ' { prefix } * ' ) if prefix else ( )
2018-05-21 05:11:40 +00:00
else :
last_flags = ( )
2022-10-04 05:50:37 +00:00
match_archives_flags = ( )
2018-05-21 05:11:40 +00:00
if check_last :
2022-11-17 18:19:48 +00:00
logger . warning (
' Ignoring check_last option, as " archives " or " data " are not in consistency checks '
)
2018-05-21 05:11:40 +00:00
if prefix :
2022-11-17 18:19:48 +00:00
logger . warning (
' Ignoring consistency prefix option, as " archives " or " data " are not in consistency checks '
2018-09-30 05:45:00 +00:00
)
2022-10-04 05:50:37 +00:00
common_flags = last_flags + match_archives_flags + data_flags
2019-09-18 23:52:27 +00:00
2022-05-28 21:42:19 +00:00
if { ' repository ' , ' archives ' } . issubset ( set ( checks ) ) :
2019-09-18 23:52:27 +00:00
return common_flags
2017-08-05 23:21:39 +00:00
2018-09-30 05:45:00 +00:00
return (
2022-05-28 21:42:19 +00:00
tuple ( ' -- {} -only ' . format ( check ) for check in checks if check in ( ' repository ' , ' archives ' ) )
2019-09-18 23:52:27 +00:00
+ common_flags
2018-09-30 05:45:00 +00:00
)
2017-08-05 23:21:39 +00:00
2022-05-28 21:42:19 +00:00
def make_check_time_path ( location_config , borg_repository_id , check_type ) :
'''
Given a location configuration dict , a Borg repository ID , and the name of a check type
( " repository " , " archives " , etc . ) , return a path for recording that check ' s time (the time of
that check last occurring ) .
'''
return os . path . join (
os . path . expanduser (
location_config . get (
' borgmatic_source_directory ' , state . DEFAULT_BORGMATIC_SOURCE_DIRECTORY
)
) ,
' checks ' ,
borg_repository_id ,
check_type ,
)
def write_check_time ( path ) : # pragma: no cover
'''
Record a check time of now as the modification time of the given path .
'''
logger . debug ( f ' Writing check time at { path } ' )
os . makedirs ( os . path . dirname ( path ) , mode = 0o700 , exist_ok = True )
pathlib . Path ( path , mode = 0o600 ) . touch ( )
def read_check_time ( path ) :
'''
Return the check time based on the modification time of the given path . Return None if the path
doesn ' t exist.
'''
logger . debug ( f ' Reading check time from { path } ' )
try :
return datetime . datetime . fromtimestamp ( os . stat ( path ) . st_mtime )
except FileNotFoundError :
return None
2018-09-30 05:45:00 +00:00
def check_archives (
2019-09-19 18:43:53 +00:00
repository ,
2022-05-28 21:42:19 +00:00
location_config ,
2019-09-19 18:43:53 +00:00
storage_config ,
consistency_config ,
2022-08-12 21:53:20 +00:00
local_borg_version ,
2019-09-19 18:43:53 +00:00
local_path = ' borg ' ,
remote_path = None ,
2020-01-24 19:27:16 +00:00
progress = None ,
2019-12-05 00:07:00 +00:00
repair = None ,
2019-09-19 18:43:53 +00:00
only_checks = None ,
2022-05-29 02:29:33 +00:00
force = None ,
2018-09-30 05:45:00 +00:00
) :
2017-08-05 23:21:39 +00:00
'''
2018-09-30 06:15:18 +00:00
Given a local or remote repository path , a storage config dict , a consistency config dict ,
2020-01-24 19:27:16 +00:00
local / remote commands to run , whether to include progress information , whether to attempt a
repair , and an optional list of checks to use instead of configured checks , check the contained
Borg archives for consistency .
2017-08-05 23:21:39 +00:00
If there are no consistency checks to run , skip running them .
2022-05-28 21:42:19 +00:00
Raises ValueError if the Borg repository ID cannot be determined .
2017-08-05 23:21:39 +00:00
'''
2022-05-28 21:42:19 +00:00
try :
borg_repository_id = json . loads (
2022-08-12 21:53:20 +00:00
rinfo . display_repository_info (
2022-05-28 21:42:19 +00:00
repository ,
storage_config ,
2022-08-12 21:53:20 +00:00
local_borg_version ,
argparse . Namespace ( json = True ) ,
2022-05-28 21:42:19 +00:00
local_path ,
remote_path ,
)
) [ ' repository ' ] [ ' id ' ]
except ( json . JSONDecodeError , KeyError ) :
raise ValueError ( f ' Cannot determine Borg repository ID for { repository } ' )
checks = filter_checks_on_frequency (
location_config ,
consistency_config ,
borg_repository_id ,
parse_checks ( consistency_config , only_checks ) ,
2022-05-29 02:29:33 +00:00
force ,
2022-05-28 21:42:19 +00:00
)
2017-08-05 23:21:39 +00:00
check_last = consistency_config . get ( ' check_last ' , None )
2018-02-19 23:51:04 +00:00
lock_wait = None
2019-12-04 23:48:10 +00:00
extra_borg_options = storage_config . get ( ' extra_borg_options ' , { } ) . get ( ' check ' , ' ' )
2017-08-05 23:21:39 +00:00
2022-05-28 21:42:19 +00:00
if set ( checks ) . intersection ( { ' repository ' , ' archives ' , ' data ' } ) :
2018-02-19 23:51:04 +00:00
lock_wait = storage_config . get ( ' lock_wait ' , None )
2018-09-08 20:53:37 +00:00
verbosity_flags = ( )
if logger . isEnabledFor ( logging . INFO ) :
verbosity_flags = ( ' --info ' , )
if logger . isEnabledFor ( logging . DEBUG ) :
verbosity_flags = ( ' --debug ' , ' --show-rc ' )
2019-07-27 21:04:13 +00:00
prefix = consistency_config . get ( ' prefix ' , DEFAULT_PREFIX )
2018-03-04 06:17:39 +00:00
2017-08-05 23:21:39 +00:00
full_command = (
2019-09-12 22:27:04 +00:00
( local_path , ' check ' )
2019-12-05 00:07:00 +00:00
+ ( ( ' --repair ' , ) if repair else ( ) )
2022-10-04 05:50:37 +00:00
+ make_check_flags ( local_borg_version , checks , check_last , prefix )
2019-12-05 00:07:00 +00:00
+ ( ( ' --remote-path ' , remote_path ) if remote_path else ( ) )
+ ( ( ' --lock-wait ' , str ( lock_wait ) ) if lock_wait else ( ) )
2018-09-30 05:45:00 +00:00
+ verbosity_flags
2020-01-24 19:27:16 +00:00
+ ( ( ' --progress ' , ) if progress else ( ) )
2019-12-04 23:48:10 +00:00
+ ( tuple ( extra_borg_options . split ( ' ' ) ) if extra_borg_options else ( ) )
2022-08-14 05:07:15 +00:00
+ flags . make_repository_flags ( repository , local_borg_version )
2018-09-30 05:45:00 +00:00
)
2017-08-05 23:21:39 +00:00
2022-06-30 20:42:17 +00:00
borg_environment = environment . make_environment ( storage_config )
2022-05-28 21:42:19 +00:00
# The Borg repair option triggers an interactive prompt, which won't work when output is
2020-01-24 19:27:16 +00:00
# captured. And progress messes with the terminal directly.
if repair or progress :
2022-06-30 20:42:17 +00:00
execute_command (
full_command , output_file = DO_NOT_CAPTURE , extra_environment = borg_environment
)
2020-01-24 19:27:16 +00:00
else :
2022-06-30 20:42:17 +00:00
execute_command ( full_command , extra_environment = borg_environment )
2017-08-05 23:21:39 +00:00
2022-05-28 21:42:19 +00:00
for check in checks :
write_check_time ( make_check_time_path ( location_config , borg_repository_id , check ) )
2017-08-05 23:21:39 +00:00
if ' extract ' in checks :
2022-06-30 20:42:17 +00:00
extract . extract_last_archive_dry_run (
2022-08-15 22:04:40 +00:00
storage_config , local_borg_version , repository , lock_wait , local_path , remote_path
2022-06-30 20:42:17 +00:00
)
2022-05-28 21:42:19 +00:00
write_check_time ( make_check_time_path ( location_config , borg_repository_id , ' extract ' ) )