From 697ecd63679eeb4a5b8ea6ff1166efa4d6d40305 Mon Sep 17 00:00:00 2001 From: Niklas Beisert Date: Sun, 17 Mar 2024 20:37:39 +0100 Subject: [PATCH] operate on all detected hardlinks --- Fileinfo.cc | 79 +++++++++++++++++++++++++++++++++++------------------ Fileinfo.hh | 10 +++++++ Rdutil.cc | 15 ++++++---- 3 files changed, 72 insertions(+), 32 deletions(-) diff --git a/Fileinfo.cc b/Fileinfo.cc index 46d05e9..c4077c1 100644 --- a/Fileinfo.cc +++ b/Fileinfo.cc @@ -166,11 +166,19 @@ Fileinfo::Fileinfostat::Fileinfostat() int Fileinfo::deletefile() { - const int ret = unlink(name().c_str()); - if (ret) { - std::cerr << "Failed deleting file " << name() << '\n'; + std::vector allfiles; + allfiles.push_back(m_filename); + allfiles.insert(allfiles.end(), m_aliases.begin(), m_aliases.end()); + + for (const std::string& n : allfiles) { + const int ret = unlink(n.c_str()); + + if (ret) { + std::cerr << "Failed deleting file " << n << '\n'; + return ret; + } } - return ret; + return 0; } namespace { @@ -267,39 +275,56 @@ transactional_operation(const std::string& filename, const Func& f) int Fileinfo::makesymlink(const Fileinfo& A) { - const int retval = - transactional_operation(name(), [&](const std::string& filename) { - // The tricky thing is that the path must be correct, as seen from - // the directory where *this is. Making the path absolute solves this - // problem. Doing string manipulations to find how to make the path - // relative is prone to error because directories can be symlinks. - std::string target = A.name(); - makeAbsolute(target); - // clean up the path, so it does not contain sequences "/./" or "//" - simplifyPath(target); - return symlink(target.c_str(), filename.c_str()); - }); - - if (retval) { - std::cerr << "Failed to make symlink " << name() << " to " << A.name() - << '\n'; + std::vector allfiles; + allfiles.push_back(m_filename); + allfiles.insert(allfiles.end(), m_aliases.begin(), m_aliases.end()); + + // The tricky thing is that the path must be correct, as seen from + // the directory where *this is. Making the path absolute solves this + // problem. Doing string manipulations to find how to make the path + // relative is prone to error because directories can be symlinks. + std::string target = A.name(); + makeAbsolute(target); + // clean up the path, so it does not contain sequences "/./" or "//" + simplifyPath(target); + + for (const std::string& n : allfiles) { + const int retval = + transactional_operation(n, [&target](const std::string& filename) { + return symlink(target.c_str(), filename.c_str()); + }); + + if (retval) { + std::cerr << "Failed to make symlink " << n << " to " << A.name() + << '\n'; + return retval; + } } - return retval; + return 0; } // makes a hard link that points to A int Fileinfo::makehardlink(const Fileinfo& A) { - return transactional_operation(name(), [&](const std::string& filename) { - // make a hardlink. - const int retval = link(A.name().c_str(), filename.c_str()); + std::vector allfiles; + allfiles.push_back(m_filename); + allfiles.insert(allfiles.end(), m_aliases.begin(), m_aliases.end()); + + for (const std::string& n : allfiles) { + const int retval = + transactional_operation(n, [&A](const std::string& filename) { + // make a hardlink. + return link(A.name().c_str(), filename.c_str()); + }); + if (retval) { - std::cerr << "Failed to make hardlink " << filename << " to " << A.name() + std::cerr << "Failed to make hardlink " << n << " to " << A.name() << '\n'; + return retval; } - return retval; - }); + } + return 0; } int diff --git a/Fileinfo.hh b/Fileinfo.hh index 73c89f7..f56e615 100644 --- a/Fileinfo.hh +++ b/Fileinfo.hh @@ -10,6 +10,7 @@ #include #include #include +#include // os specific headers #include //for off_t and others. @@ -127,6 +128,12 @@ public: // gets the filename const std::string& name() const { return m_filename; } + // gets the filename + const std::vector& aliases() const { return m_aliases; } + + // adds a filename alias + void add_alias(std::string n) { m_aliases.push_back(n); } + // gets the command line index this item was found at int get_cmdline_index() const { return m_cmdline_index; } @@ -174,6 +181,9 @@ private: // to be deleted or not bool m_delete; + // list of hardlinks + std::vector m_aliases; + duptype m_duptype; // If two files are found to be identical, the one with highest ranking is diff --git a/Rdutil.cc b/Rdutil.cc index f1f2ed7..929de42 100644 --- a/Rdutil.cc +++ b/Rdutil.cc @@ -42,14 +42,19 @@ Rdutil::printtofile(const std::string& filename) const // This uses "priority" instead of "cmdlineindex". Change this the day // a change in output format is allowed (for backwards compatibility). output << "# Automatically generated\n"; - output << "# duptype id depth size device inode priority name\n"; + output << "# duptype id depth size device inode priority name(s)\n"; std::vector::iterator it; for (it = m_list.begin(); it != m_list.end(); ++it) { output << Fileinfo::getduptypestring(*it) << " " << it->getidentity() << " " << it->depth() << " " << it->size() << " " << it->device() << " " - << it->inode() << " " << it->get_cmdline_index() << " " << it->name() - << '\n'; + << it->inode() << " " << it->get_cmdline_index() << " " << it->name(); + if (!it->aliases().empty()) { + output << " aliases:"; + for (const std::string& n : it->aliases()) + output << " " << n; + } + output << '\n'; } output << "# end of file\n"; f1.close(); @@ -313,9 +318,9 @@ Rdutil::removeIdenticalInodes() // let the highest-ranking element not be deleted. do this in order, to be // cache friendly. auto best = std::min_element(first, last, cmpRank); - std::for_each(first, best, [](Fileinfo& f) { f.setdeleteflag(true); }); + std::for_each(first, last, [](Fileinfo& f) { f.setdeleteflag(true); }); best->setdeleteflag(false); - std::for_each(best + 1, last, [](Fileinfo& f) { f.setdeleteflag(true); }); + std::for_each(first, last, [&best](Fileinfo& f) { if (f.deleteflag()) best->add_alias(f.name()); }); }); return cleanup(); }