diff --git a/Makefile b/Makefile index 9f54783..63f4904 100644 --- a/Makefile +++ b/Makefile @@ -33,7 +33,8 @@ APPS = mergerfs.fsck \ mergerfs.dedup \ mergerfs.ctl \ mergerfs.balance \ - mergerfs.consolidate + mergerfs.consolidate \ + mergerfs.consolidate-dirs install: @for APP in $(APPS); \ diff --git a/README.md b/README.md index 74f9199..65e3125 100644 --- a/README.md +++ b/README.md @@ -232,6 +232,26 @@ optional arguments: -h, --help Print this help. ``` + +### mergerfs.consolidate-dirs + +Consolidate directories so that each of them only exists on one a single drive, recursively. The approach is that the tool loops through given directories, looks up the source drives, checks the space used per source directory, and moves the data from the smaller ones into the largest one. Ending with a single directory. + +Requires `rsync` to be installed. + +``` +usage: mergerfs.consolidate-dirs [] ... + +positional arguments: + dir directory to consolidate, can be repeated + +optional arguments: + -v, --verbose Verbose printing + -e, --execute Execute `rsync` commands as well as print them. + -h, --help Print this help. +``` + + ## SUPPORT #### Contact / Issue submission diff --git a/src/mergerfs.consolidate-dirs b/src/mergerfs.consolidate-dirs new file mode 100755 index 0000000..a24fbc4 --- /dev/null +++ b/src/mergerfs.consolidate-dirs @@ -0,0 +1,262 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2016, Antonio SJ Musumeci +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +import argparse +import ctypes +import errno +import fnmatch +import io +import os +import shlex +import stat +import subprocess +import sys + + +_libc = ctypes.CDLL("libc.so.6",use_errno=True) +_lgetxattr = _libc.lgetxattr +_lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t] +def lgetxattr(path,name): + if type(path) == str: + path = path.encode(errors='backslashreplace') + if type(name) == str: + name = name.encode(errors='backslashreplace') + length = 64 + while True: + buf = ctypes.create_string_buffer(length) + res = _lgetxattr(path,name,buf,ctypes.c_size_t(length)) + if res >= 0: + return buf.raw[0:res] + else: + err = ctypes.get_errno() + if err == errno.ERANGE: + length *= 2 + elif err == errno.ENODATA: + return None + else: + raise IOError(err,os.strerror(err),path) + + +def ismergerfs(path): + try: + lgetxattr(path,'user.mergerfs.version') + return True + except IOError as e: + return False + + +def mergerfs_control_file(basedir): + if basedir == '/': + return None + ctrlfile = os.path.join(basedir,'.mergerfs') + if os.path.exists(ctrlfile): + return ctrlfile + else: + dirname = os.path.dirname(basedir) + return mergerfs_control_file(dirname) + + +def mergerfs_srcmounts(ctrlfile): + srcmounts = lgetxattr(ctrlfile,'user.mergerfs.srcmounts') + srcmounts = srcmounts.decode(errors='backslashreplace').split(':') + return srcmounts + + +def execute_cmd(args): + return subprocess.call(args) + + +def execute_cmds(args): + if isinstance(args[0], list): + for args2 in args: + execute_cmd(args2) + else: + return execute_cmd(args) + + +def print_margs(margs): + for args in margs[:-1]: + quoted = [shlex.quote(arg) for arg in args] + print(' '.join(quoted), end = ' && ') + + args = margs[-1] + quoted = [shlex.quote(arg) for arg in args] + print(' '.join(quoted)) + + +def build_move_file(src,tgt): + return ['rsync', + '-avHAXWES', + '--numeric-ids', + '--progress', + '--remove-source-files', + src, + tgt] + + +def print_help(): + help = \ +''' +usage: mergerfs.consolidate-dirs [] + +Consolidate listed directories into a single drive by moving the smaller ones into the largest. +Automatically skips directories that are not spread between multiple source drives. + +Usage example: +user@host:/srv/mergerfs/movies$ mergerfs.consolidate-dirs * + +positional arguments: + dirs Directories to consolidate + +optional arguments: + -v, --verbose Verbose + -e, --execute Execute `rsync` commands as well as print them. + -h, --help Print this help. +''' + print(help) + + +def buildargparser(): + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument('dirs', + type=str, + nargs='*', + default=None) + parser.add_argument('-v','--verbose', + dest='verbose', + action='store_true') + parser.add_argument('-e','--execute', + dest='execute', + action='store_true') + parser.add_argument('-h','--help', + action='store_true') + + return parser + + +def mergerfs_sourcepaths(fullpath): + attr = xattr_allpaths(fullpath) + if not attr: + dirname = os.path.dirname(fullpath) + basename = os.path.basename(fullpath) + attr = xattr_allpaths(dirname) + attr = attr.split('\0') + attr = [os.path.join(path,basename) + for path in attr + if os.path.lexists(os.path.join(path,basename))] + else: + attr = attr.decode('utf-8').split('\0') + return [x.rstrip('/') for x in attr] + +def xattr_allpaths(fullpath): + return lgetxattr(fullpath,'user.mergerfs.allpaths') + +def dir_size(path): + total_size = 0 + for dirpath, dirnames, filenames in os.walk(path): + for f in filenames: + fp = os.path.join(dirpath, f) + # skip if it is symbolic link + if not os.path.islink(fp): + total_size += os.path.getsize(fp) + + return total_size + +def bytes_to_human(num): + for unit in ["", "K", "M", "G", "T"]: + if abs(num) < 1024.0 or unit == 'T': + return "%3.1f%s" % (num, unit) + num /= 1024.0 + return None + + + + +def main(): + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, + encoding='utf8', + errors='backslashreplace', + line_buffering=True) + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, + encoding='utf8', + errors='backslashreplace', + line_buffering=True) + + parser = buildargparser() + args = parser.parse_args() + + if args.help or not args.dirs: + print_help() + sys.exit(0) + + execute = args.execute + verbose = args.verbose + + + real_dirs = [] + for dir in args.dirs: + real_dir = os.path.realpath(dir) + + if not os.path.isdir(real_dir): + if(verbose): + print("%s is not a directory, skipping" % real_dir) + continue + + ctrlfile = mergerfs_control_file(real_dir) + if not ismergerfs(ctrlfile): + print("%s is not a mergerfs mount" % real_dir) + sys.exit(1) + + real_dirs.append( real_dir ) + + + try: + for dir in real_dirs: + if(verbose): + print("Considering %s" % dir) + sourcepaths = mergerfs_sourcepaths(dir) + if(len(sourcepaths) == 1): + if(verbose): + print(" Already consolidated, skipping") + continue + + sizes = {} + for sourcepath in sourcepaths: + sizes[sourcepath] = dir_size(sourcepath) + + sizes = sorted(sizes.items(), key=lambda item: item[1]) + + if(verbose): + for path,size in sizes: + print(" %s %s" % (bytes_to_human(size), path)) + + tgt = os.path.dirname(sizes[-1][0]).rstrip('/') + '/' + for src,size in sizes[:-1]: + margs = [ build_move_file(src,tgt), [ 'find', src, '-type', 'd', '-empty', '-delete' ] ] + print_margs(margs) + if execute: + if(verbose): + print("Consolidating %s" % dir) + execute_cmds(margs) + + except (KeyboardInterrupt,BrokenPipeError): + pass + + sys.exit(0) + + +if __name__ == "__main__": + main()