Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements.d/development.lock.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ pytest-xdist==3.8.0
coverage[toml]==7.13.5
pytest-cov==7.0.0
pytest-benchmark==5.2.3
freezegun==1.5.5
Cython==3.2.4
pre-commit==4.5.1
types-PyYAML==6.0.12.20250915
1 change: 1 addition & 0 deletions requirements.d/development.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ pytest-xdist
coverage[toml]
pytest-cov
pytest-benchmark
freezegun
Cython
pre-commit
bandit[toml]
Expand Down
169 changes: 99 additions & 70 deletions src/borg/archiver/prune_cmd.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from collections import OrderedDict
from datetime import datetime, timezone, timedelta
from datetime import datetime, timezone
import logging
from operator import attrgetter
import os
import itertools

from ._common import with_repository, Highlander
from ..constants import * # NOQA
from ..helpers import ArchiveFormatter, interval, sig_int, ProgressIndicatorPercent, CommandError, Error
from ..helpers import archivename_validator
from ..helpers import ArchiveFormatter, ProgressIndicatorPercent, CommandError, Error
from ..helpers import archivename_validator, interval, int_or_interval, sig_int
from ..helpers import json_print, basic_json_data
from ..helpers.argparsing import ArgumentParser
from ..manifest import Manifest
Expand All @@ -17,27 +18,31 @@
logger = create_logger()


def prune_within(archives, seconds, kept_because):
target = datetime.now(timezone.utc) - timedelta(seconds=seconds)
kept_counter = 0
result = []
for a in archives:
if a.ts > target:
kept_counter += 1
kept_because[a.id] = ("within", kept_counter)
result.append(a)
return result
# The *_period_func group of functions create period grouping keys to group together archives falling within a certain
# period. Among archives in each of these groups, only the latest (by creation timestamp) is kept.


def default_period_func(pattern):
def unique_period_func():
counter = itertools.count()

def unique_values(_a):
"""Group archives by an incrementing counter, practically making each archive a group of 1"""
return next(counter)

return unique_values


def pattern_period_func(pattern):
def inner(a):
"""Group archives by extracting given strftime-pattern from their creation timestamp"""
# compute in local timezone
return a.ts.astimezone().strftime(pattern)

return inner


def quarterly_13weekly_period_func(a):
"""Group archives by extracting the ISO-8601 13-week quarter from their creation timestamp"""
(year, week, _) = a.ts.astimezone().isocalendar() # local time
if week <= 13:
# Weeks containing Jan 4th to Mar 28th (leap year) or 29th- 91 (13*7)
Expand All @@ -59,6 +64,7 @@ def quarterly_13weekly_period_func(a):


def quarterly_3monthly_period_func(a):
"""Group archives by extracting the 3-month quarter from their creation timestamp"""
lt = a.ts.astimezone() # local time
if lt.month <= 3:
# 1-1 to 3-31
Expand All @@ -76,51 +82,78 @@ def quarterly_3monthly_period_func(a):

PRUNING_PATTERNS = OrderedDict(
[
("secondly", default_period_func("%Y-%m-%d %H:%M:%S")),
("minutely", default_period_func("%Y-%m-%d %H:%M")),
("hourly", default_period_func("%Y-%m-%d %H")),
("daily", default_period_func("%Y-%m-%d")),
("weekly", default_period_func("%G-%V")),
("monthly", default_period_func("%Y-%m")),
# Each archive is considered for keeping
("within", unique_period_func()),
("last", unique_period_func()),
("keep", unique_period_func()),
# Last archive (by creation timestamp) within period group is consiedered for keeping
("secondly", pattern_period_func("%Y-%m-%d %H:%M:%S")),
("minutely", pattern_period_func("%Y-%m-%d %H:%M")),
("hourly", pattern_period_func("%Y-%m-%d %H")),
("daily", pattern_period_func("%Y-%m-%d")),
("weekly", pattern_period_func("%G-%V")),
("monthly", pattern_period_func("%Y-%m")),
("quarterly_13weekly", quarterly_13weekly_period_func),
("quarterly_3monthly", quarterly_3monthly_period_func),
("yearly", default_period_func("%Y")),
("yearly", pattern_period_func("%Y")),
]
)


def prune_split(archives, rule, n, kept_because=None):
last = None
# Datetime cannot represent times before datetime.min, so a day is added to allow for time zone offset.
DATETIME_MIN_WITH_ZONE = datetime.min.replace(tzinfo=timezone.utc)


def prune_split(archives, rule, n_or_interval, base_timestamp, kept_because={}):
if isinstance(n_or_interval, int):
n, earliest_timestamp = n_or_interval, None
else:
n, earliest_timestamp = None, base_timestamp - n_or_interval

def can_retain(a, keep):
if n is not None:
return len(keep) < n
else:
return a.ts > earliest_timestamp

keep = []
period_func = PRUNING_PATTERNS[rule]
if kept_because is None:
kept_because = {}
if n == 0:
if n == 0 or len(archives) == 0:
return keep

a = None
for a in sorted(archives, key=attrgetter("ts"), reverse=True):
last = None
period_func = PRUNING_PATTERNS[rule]
sorted_archives = sorted(archives, key=attrgetter("ts"), reverse=True)
for a in sorted_archives:
if not can_retain(a, keep):
break
period = period_func(a)
if period != last:
last = period
if a.id not in kept_because:
keep.append(a)
kept_because[a.id] = (rule, len(keep))
if len(keep) == n:
break

# Keep oldest archive if we didn't reach the target retention count
if a is not None and len(keep) < n and a.id not in kept_because:
a = sorted_archives[-1]
if a is not None and a.id not in kept_because and can_retain(a, keep):
keep.append(a)
kept_because[a.id] = (rule + "[oldest]", len(keep))

return keep


class PruneMixIn:
@with_repository(compatibility=(Manifest.Operation.DELETE,))
def do_prune(self, args, repository, manifest):
"""Prune archives according to specified rules."""
if not any(
(
if all(
# Needs explicit None-check to cover Falsey timedelta(0)
e is None
for e in (
args.keep,
args.within,
args.last,
args.secondly,
args.minutely,
args.hourly,
Expand All @@ -130,11 +163,10 @@ def do_prune(self, args, repository, manifest):
args.quarterly_13weekly,
args.quarterly_3monthly,
args.yearly,
args.within,
)
):
raise CommandError(
'At least one of the "keep-within", "keep-last", '
'At least one of the "keep", "keep-within", "keep-last", '
'"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
'"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", '
'or "keep-yearly" settings must be specified.'
Expand All @@ -158,15 +190,12 @@ def do_prune(self, args, repository, manifest):
# (<rulename>, <how many archives were kept by this rule so far >)
kept_because = {}

# find archives which need to be kept because of the keep-within rule
if args.within:
keep += prune_within(archives, args.within, kept_because)

base_timestamp = datetime.now().astimezone()
# find archives which need to be kept because of the various time period rules
for rule in PRUNING_PATTERNS.keys():
num = getattr(args, rule, None)
if num is not None:
keep += prune_split(archives, rule, num, kept_because)
num_or_interval = getattr(args, rule, None)
if num_or_interval is not None:
keep += prune_split(archives, rule, num_or_interval, base_timestamp, kept_because)

to_delete = set(archives) - set(keep)
if not args.json:
Expand Down Expand Up @@ -337,81 +366,81 @@ def build_parser_prune(self, subparsers, common_parser, mid_common_parser):
help="keep all archives within this time interval",
)
subparser.add_argument(
"--keep-last",
"--keep-last", dest="last", type=int, action=Highlander, help="number of archives to keep"
)
subparser.add_argument(
"--keep",
dest="keep",
type=int_or_interval,
action=Highlander,
help="number or time interval of archives to keep",
)
subparser.add_argument(
"--keep-secondly",
dest="secondly",
type=int,
default=0,
type=int_or_interval,
action=Highlander,
help="number of secondly archives to keep",
help="number or time interval of secondly archives to keep",
)
subparser.add_argument(
"--keep-minutely",
dest="minutely",
type=int,
default=0,
type=int_or_interval,
action=Highlander,
help="number of minutely archives to keep",
help="number or time interval of minutely archives to keep",
)
subparser.add_argument(
"-H",
"--keep-hourly",
dest="hourly",
type=int,
default=0,
type=int_or_interval,
action=Highlander,
help="number of hourly archives to keep",
help="number or time interval of hourly archives to keep",
)
subparser.add_argument(
"-d",
"--keep-daily",
dest="daily",
type=int,
default=0,
type=int_or_interval,
action=Highlander,
help="number of daily archives to keep",
help="number or time interval of daily archives to keep",
)
subparser.add_argument(
"-w",
"--keep-weekly",
dest="weekly",
type=int,
default=0,
type=int_or_interval,
action=Highlander,
help="number of weekly archives to keep",
help="number or time interval of weekly archives to keep",
)
subparser.add_argument(
"-m",
"--keep-monthly",
dest="monthly",
type=int,
default=0,
type=int_or_interval,
action=Highlander,
help="number of monthly archives to keep",
help="number or time interval of monthly archives to keep",
)
quarterly_group = subparser.add_mutually_exclusive_group()
quarterly_group.add_argument(
"--keep-13weekly",
dest="quarterly_13weekly",
type=int,
default=0,
help="number of quarterly archives to keep (13 week strategy)",
type=int_or_interval,
help="number or time interval of quarterly archives to keep (13 week strategy)",
)
quarterly_group.add_argument(
"--keep-3monthly",
dest="quarterly_3monthly",
type=int,
default=0,
help="number of quarterly archives to keep (3 month strategy)",
type=int_or_interval,
help="number or time interval of quarterly archives to keep (3 month strategy)",
)
subparser.add_argument(
"-y",
"--keep-yearly",
dest="yearly",
type=int,
default=0,
type=int_or_interval,
action=Highlander,
help="number of yearly archives to keep",
help="number or time interval of yearly archives to keep",
)
define_archive_filters_group(subparser, sort_by=False, first_last=False)
subparser.add_argument(
Expand Down
2 changes: 2 additions & 0 deletions src/borg/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,9 @@
EXIT_SIGNAL_BASE = 128 # terminated due to signal, rc = 128 + sig_no

ISO_FORMAT_NO_USECS = "%Y-%m-%dT%H:%M:%S"
ISO_FORMAT_NO_USECS_ZONE = ISO_FORMAT_NO_USECS + "%z"
ISO_FORMAT = ISO_FORMAT_NO_USECS + ".%f"
ISO_FORMAT_ZONE = ISO_FORMAT + "%z"

DASHES = "-" * 78

Expand Down
2 changes: 1 addition & 1 deletion src/borg/helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from .misc import ChunkIteratorFileWrapper, open_item, chunkit, iter_separated, ErrorIgnoringTextIOWrapper
from .parseformat import octal_int, bin_to_hex, hex_to_bin, safe_encode, safe_decode
from .parseformat import text_to_json, binary_to_json, remove_surrogates, join_cmd
from .parseformat import eval_escapes, decode_dict, interval
from .parseformat import eval_escapes, decode_dict, interval, int_or_interval
from .parseformat import (
PathSpec,
FilesystemPathSpec,
Expand Down
21 changes: 18 additions & 3 deletions src/borg/helpers/parseformat.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from pathlib import Path
from typing import ClassVar, Any, TYPE_CHECKING, Literal
from collections import OrderedDict
from datetime import datetime, timezone
from datetime import datetime, timezone, timedelta
from functools import partial
from hashlib import sha256
from string import Formatter
Expand Down Expand Up @@ -159,12 +159,27 @@ def interval(s):
except ValueError:
seconds = -1

if seconds <= 0:
raise ArgumentTypeError(f'Invalid number "{number}": expected positive integer')
if seconds < 0:
raise ArgumentTypeError(f'Invalid number "{number}": expected nonnegative integer')

return seconds


def int_or_interval(s):
if isinstance(s, (int, timedelta)):
return s

try:
return int(s)
except ValueError:
pass

try:
return timedelta(seconds=interval(s))
except ArgumentTypeError as e:
raise ArgumentTypeError(f"Value is neither an integer nor an interval: {e}")


class CompressionSpec:
def __init__(self, s):
if isinstance(s, CompressionSpec):
Expand Down
Loading
Loading