diff --git a/.gitignore b/.gitignore index 366cd1f..f7ab85b 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ test-driver nettle32bit/ *.log rdfind-*.tar.gz +.vs/slnx.sqlite diff --git a/rdfind.1 b/rdfind.1 index e263f1b..e7da168 100644 --- a/rdfind.1 +++ b/rdfind.1 @@ -80,6 +80,9 @@ is true. What type of checksum to be used: md5, sha1 or sha256. The default is sha1 since version 1.4.0. .TP +.BR \-skip " " \fIfirstbytes\fR|\fIlastbytes\fR +Skip some steps during the candidates pruning. This argument can be repeated. +.TP .BR \-deterministic " " \fItrue\fR|\fIfalse\fR If set (the default), sort files of equal rank in an unspecified but deterministic order. This makes the behaviour independent of in which diff --git a/rdfind.cc b/rdfind.cc index fbd6cb8..0454395 100644 --- a/rdfind.cc +++ b/rdfind.cc @@ -62,6 +62,8 @@ usage() "device and inode\n" << " -checksum md5 |(sha1)| sha256\n" << " checksum type\n" + << " -skip firstbytes | lastbytes\n" + << " skip some check\n" << " -deterministic (true)| false makes results independent of order\n" << " from listing the filesystem\n" << " -makesymlinks true |(false) replace duplicate files with " @@ -102,6 +104,8 @@ struct Options bool followsymlinks = false; // follow symlinks bool dryrun = false; // only dryrun, don't destroy anything bool remove_identical_inode = true; // remove files with identical inodes + bool usefirstbytes = true; // use first bytes to check for differences + bool uselastbytes = true; // use last bytes to check for differences bool usemd5 = false; // use md5 checksum to check for similarity bool usesha1 = false; // use sha1 checksum to check for similarity bool usesha256 = false; // use sha256 checksum to check for similarity @@ -178,6 +182,16 @@ parseOptions(Parser& parser) << parser.get_parsed_string() << "\"\n"; std::exit(EXIT_FAILURE); } + } else if (parser.try_parse_string("-skip")) { + if (parser.parsed_string_is("firstbytes")) { + o.usefirstbytes = false; + } else if (parser.parsed_string_is("lastbytes")) { + o.uselastbytes = false; + } else { + std::cerr << "expected firstbytes/lastbytes, not \"" + << parser.get_parsed_string() << "\"\n"; + std::exit(EXIT_FAILURE); + } } else if (parser.try_parse_string("-sleep")) { const auto nextarg = std::string(parser.get_parsed_string()); if (nextarg == "1ms") { @@ -352,9 +366,15 @@ main(int narg, const char* argv[]) // candidates. start looking at the contents. std::vector> modes{ { Fileinfo::readtobuffermode::NOT_DEFINED, "" }, - { Fileinfo::readtobuffermode::READ_FIRST_BYTES, "first bytes" }, - { Fileinfo::readtobuffermode::READ_LAST_BYTES, "last bytes" }, }; + if (o.usefirstbytes) { + modes.emplace_back(Fileinfo::readtobuffermode::READ_FIRST_BYTES, + "first bytes"); + } + if (o.uselastbytes) { + modes.emplace_back(Fileinfo::readtobuffermode::READ_LAST_BYTES, + "last bytes"); + } if (o.usemd5) { modes.emplace_back(Fileinfo::readtobuffermode::CREATE_MD5_CHECKSUM, "md5 checksum"); diff --git a/testcases/skip_options.sh b/testcases/skip_options.sh new file mode 100644 index 0000000..eec848a --- /dev/null +++ b/testcases/skip_options.sh @@ -0,0 +1,22 @@ +#!/bin/sh +# Test that selection of skip works as expected. + + +set -e +. "$(dirname "$0")/common_funcs.sh" + + + + + +for skiptype in firstbytes lastbytes; do + reset_teststate + dbgecho "trying skip $skiptype" + echo skiptype >a + echo skiptype >b + $rdfind -skip $skiptype -deleteduplicates true a b + [ -e a ] + [ ! -e b ] +done + +dbgecho "all is good in this test!"