From cacc388562b9f00bad78636ffb1e87c8af0cada9 Mon Sep 17 00:00:00 2001 From: Isaac Olson Date: Wed, 22 Oct 2025 16:29:35 -0400 Subject: [PATCH 1/3] zcm log repair now can split up logs with large time jumps --- tools/cpp/log-repair/main.cpp | 264 +++++++++++++++++++++++----------- 1 file changed, 184 insertions(+), 80 deletions(-) diff --git a/tools/cpp/log-repair/main.cpp b/tools/cpp/log-repair/main.cpp index bd5113f7..f12d870f 100644 --- a/tools/cpp/log-repair/main.cpp +++ b/tools/cpp/log-repair/main.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -23,36 +24,45 @@ static void sighandler(int signal) struct Args { - string infile = ""; - string outfile = ""; - bool verify = false; + string infile = ""; + string outfile = ""; + int64_t splitUs = numeric_limits::max(); + bool outputSplit = false; + bool verify = false; bool init(int argc, char *argv[]) { struct option long_opts[] = { - { "help", no_argument, 0, 'h' }, - { "output", required_argument, 0, 'o' }, - { "verify", no_argument, 0, 'v' }, + { "help", no_argument, 0, 'h' }, + { "input", required_argument, 0, 'i' }, + { "output", required_argument, 0, 'o' }, + { "split", required_argument, 0, 's' }, + { "output-split", no_argument, 0, 'S' }, + { "verify", no_argument, 0, 'v' }, { 0, 0, 0, 0 } }; int c; - while ((c = getopt_long(argc, argv, "ho:v", long_opts, 0)) >= 0) { + while ((c = getopt_long(argc, argv, "hi:o:s:Sv", long_opts, 0)) >= 0) { switch (c) { - case 'o': outfile = string(optarg); break; - case 'v': verify = true; break; + case 'i': infile = string(optarg); break; + case 'o': outfile = string(optarg); break; + case 's': splitUs = atol(optarg); break; + case 'S': outputSplit = true; break; + case 'v': verify = true; break; case 'h': default: usage(); return false; }; } - if (optind != argc - 1) { - cerr << "Please specify a logfile" << endl; - usage(); - return false; + if (infile.empty()) { + if (optind != argc - 1) { + cerr << "Please specify a logfile" << endl; + usage(); + return false; + } + infile = string(argv[optind]); } - infile = string(argv[optind]); - if (outfile.empty() && !verify) { cerr << "Must specify output file or verify mode" << endl; return false; @@ -74,7 +84,14 @@ struct Args << "Options:" << endl << "" << endl << " -h, --help Shows this help text and exits" << endl + << " -i, --input=filename specify input file (alteratively can be a positional argument)" << endl << " -o, --output=filename specify output file" << endl + << endl + << " -s, --split=delta_us split output logs if there is a timestamp jump of >= provided magnitude" << endl + << " only the largest section will be output" << endl + << " -S, --output-split also output all the other files resulting from the split" << endl + << " file names will be .[num]" << endl + << endl << " -v, --verify verify input log is monotonic in timestamp" << endl << endl; } @@ -83,102 +100,134 @@ struct Args struct LogRepair { Args args; - zcm::LogFile* logIn = nullptr; - zcm::LogFile* logOut = nullptr; + unique_ptr logIn; + unique_ptr logOut; + // These variables are reused between functions const zcm::LogEvent* event; off_t length; off_t offset; - vector> timestamps; size_t progress; + vector>> timestamps; + LogRepair() { } - ~LogRepair() - { - if (logIn) delete logIn; - if (logOut) delete logOut; - } + ~LogRepair() { } bool init(int argc, char *argv[]) { if (!args.init(argc, argv)) return false; - logIn = new zcm::LogFile(args.infile, "r"); + logIn.reset(new zcm::LogFile(args.infile, "r")); if (!logIn->good()) { cerr << "Error: Failed to open '" << args.infile << "'" << endl; return false; } if (!args.verify) { - logOut = new zcm::LogFile(args.outfile, "w"); + logOut.reset(new zcm::LogFile(args.outfile, "w")); if (!logOut->good()) { cerr << "Error: Failed to create output log" << endl; return false; } } - fseeko(logIn->getFilePtr(), 0, SEEK_END); - length = ftello(logIn->getFilePtr()); - fseeko(logIn->getFilePtr(), 0, SEEK_SET); - // somewhat arbitrary, but starting with a high capacity helps speed up the read-in - timestamps.reserve(1e6); + timestamps.emplace_back(); + timestamps.back().reserve(1e6); return true; } - int run() + int verifyLog(string name, const vector>* expectedTimestamps = nullptr) { - // XXX: Note we are reading ALL of the event timestamps into memory, so - // this will use something like 16 * num_events memory. In some use - // cases that won't be ideal, so if people are running into that, we - // can try a different approach. - cout << "Reading log" << endl; - progress = 0; + cout << "Verifying " << name << endl; + unique_ptr log(new zcm::LogFile(name, "r")); + if (!log->good()) { + cerr << "Error: Failed to open log for verification" << endl; + return 1; + } + + fseeko(log->getFilePtr(), 0, SEEK_END); + length = ftello(log->getFilePtr()); + fseeko(log->getFilePtr(), 0, SEEK_SET); + + int64_t lastTimestamp = 0; + size_t i = 0; + progress = 0; cout << progress << "%" << flush; while (true) { if (done) return 1; - offset = ftello(logIn->getFilePtr()); - event = logIn->readNextEvent(); - if (!event) break; + offset = ftello(log->getFilePtr()); + event = log->readNextEvent(); - if (args.verify && !timestamps.empty() && - event->timestamp < timestamps.back().first) { - cerr << endl << "Detected nonmonotonic timestamp at event " - << timestamps.size() << endl; - return 1; + size_t p = (100 * offset) / length; + if (p != progress) { + progress = p; + cout << "\r" << progress << "%" << flush; } - timestamps.emplace_back(event->timestamp, offset); + if (!event) break; - if (timestamps.size() == timestamps.capacity()) { - timestamps.reserve(timestamps.capacity() * 2); + if (event->timestamp < lastTimestamp) { + cerr << endl << "Error: " << name + << " event timestamp nonmonotonic at event " << i << endl; + return 1; } + lastTimestamp = event->timestamp; - size_t p = (size_t)((100 * offset) / length); - if (p != progress) { - progress = p; - cout << "\r" << progress << "%" << flush; + if (expectedTimestamps && i < expectedTimestamps->size() && + event->timestamp != (*expectedTimestamps)[i].first) { + cerr << endl << "Error: output log timestamp mismatch" << endl; + cerr << "Expected " << (*expectedTimestamps)[i].first << " got " + << event->timestamp << " (idx " << i << ")" << endl; + return 1; } + + ++i; } - cout << endl << "Read " << timestamps.size() << " events" << endl; + cout << endl; - if (args.verify) return 0; + if (expectedTimestamps && i != expectedTimestamps->size()) { + cerr << endl << "Error: " << name << " has wrong number of events. " + << "Expected " << expectedTimestamps->size() << " found " << i << endl; + } + + return 0; + } + + int writeLog(string name, unique_ptr& input, + const vector>& sectionTimestamps) + { + cout << "Writing " << sectionTimestamps.size() << " events to " << name << endl; - sort(timestamps.begin(), timestamps.end()); + logOut.reset(new zcm::LogFile(name, "w")); + if (!logOut->good()) { + cerr << "Error: Failed to create output log" << endl; + return 1; + } - cout << "Writing new log" << endl; progress = 0; cout << progress << "%" << flush; - for (size_t i = 0; i < timestamps.size(); ++i) { + for (size_t i = 0; i < sectionTimestamps.size(); ++i) { if (done) return 1; - logOut->writeEvent(logIn->readEventAtOffset(timestamps[i].second)); + auto* event = input->readEventAtOffset(sectionTimestamps[i].second); + if (!event) { + fseeko(input->getFilePtr(), 0, SEEK_END); + length = ftello(input->getFilePtr()); + + cerr << endl << "Failed to read event from input log at offset " + << sectionTimestamps[i].second << " / " << length << endl; - size_t p = (100 * i) / timestamps.size(); + return 1; + } + logOut->writeEvent(event); + + size_t p = (100 * (i + 1)) / sectionTimestamps.size(); if (p != progress) { progress = p; cout << "\r" << progress << "%" << flush; @@ -186,42 +235,97 @@ struct LogRepair } cout << endl << "Flushing to disk" << endl; - delete logOut; + logOut.reset(); - cout << "Verifying output" << endl; - logOut = new zcm::LogFile(args.verify ? args.infile : args.outfile, "r"); - if (!logOut->good()) { - cerr << "Error: Failed to open log for verification" << endl; - return 1; - } + return 0; + } + + int run() + { + if (args.verify) return verifyLog(args.infile); - size_t i = 0; + fseeko(logIn->getFilePtr(), 0, SEEK_END); + length = ftello(logIn->getFilePtr()); + fseeko(logIn->getFilePtr(), 0, SEEK_SET); + + // XXX: Note we are reading ALL of the event timestamps into memory, so + // this will use something like 16 * num_events memory. In some use + // cases that won't be ideal, so if people are running into that, we + // can try a different approach. + cout << "Reading log with length " << length << endl; progress = 0; cout << progress << "%" << flush; while (true) { if (done) return 1; - event = logOut->readNextEvent(); - if (!event) break; - if (event->timestamp != timestamps[i++].first) { - cerr << endl << "Error: output log timestamp mismatch" << endl; - cerr << "Expected " << timestamps[i].first << " got " - << event->timestamp << " (idx " << i << ")" << endl; - return 1; - } + offset = ftello(logIn->getFilePtr()); + event = logIn->readNextEvent(); - size_t p = (100 * i) / timestamps.size(); + size_t p = (size_t)((100 * offset) / length); if (p != progress) { progress = p; cout << "\r" << progress << "%" << flush; } + + if (!event) break; + + if (timestamps.back().empty()) { + timestamps.back().emplace_back(event->timestamp, offset); + continue; + } + + int64_t lastTimestamp = timestamps.back().back().first; + int64_t delta = event->timestamp > lastTimestamp + ? event->timestamp - lastTimestamp + : lastTimestamp - event->timestamp; + + if (delta > args.splitUs) { + timestamps.emplace_back(); + timestamps.back().reserve(1e6); + } + + timestamps.back().emplace_back(event->timestamp, offset); + if (timestamps.back().size() == timestamps.back().capacity()) { + timestamps.back().reserve(timestamps.back().capacity() * 2); + } } - if (i < timestamps.size()) { - cerr << endl << "Error: output log was missing " - << timestamps.size() - i << " events" << endl; + + size_t largestIdx = 0; + size_t largestSize = 0; + cout << endl << "Read {"; + for (size_t i = 0; i < timestamps.size(); ++i) { + auto& t = timestamps[i]; + if (t.size() > largestSize) { + largestIdx = i; + largestSize = t.size(); + } + cout << t.size() << ", "; } + cout << "\b\b} events" << endl; - return 0; + if (args.verify) return 0; + + auto process = [&](size_t idx, const string& name) { + auto& t = timestamps[idx]; + + sort(t.begin(), t.end()); + int ret = writeLog(name, logIn, t); + if (ret != 0) return ret; + + ret = verifyLog(name, &t); + if (ret != 0) return ret; + + return ret; + }; + + if (!args.outputSplit) return process(largestIdx, args.outfile); + + int ret = 0; + for (size_t i = 0; i < timestamps.size(); ++i) { + ret = process(i, args.outfile + "." + to_string(i)); + if (ret != 0) return ret; + } + return ret; } }; From 22d31008d3410106d2ee1be45f35df7c7cab1cf5 Mon Sep 17 00:00:00 2001 From: Jakob Hoellerbauer Date: Mon, 3 Nov 2025 11:21:57 -0500 Subject: [PATCH 2/3] Review --- tools/cpp/log-repair/main.cpp | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/tools/cpp/log-repair/main.cpp b/tools/cpp/log-repair/main.cpp index f12d870f..f873d7c8 100644 --- a/tools/cpp/log-repair/main.cpp +++ b/tools/cpp/log-repair/main.cpp @@ -36,7 +36,7 @@ struct Args { "help", no_argument, 0, 'h' }, { "input", required_argument, 0, 'i' }, { "output", required_argument, 0, 'o' }, - { "split", required_argument, 0, 's' }, + { "split-us", required_argument, 0, 's' }, { "output-split", no_argument, 0, 'S' }, { "verify", no_argument, 0, 'v' }, { 0, 0, 0, 0 } @@ -101,7 +101,6 @@ struct LogRepair { Args args; unique_ptr logIn; - unique_ptr logOut; // These variables are reused between functions const zcm::LogEvent* event; @@ -126,13 +125,6 @@ struct LogRepair return false; } - if (!args.verify) { - logOut.reset(new zcm::LogFile(args.outfile, "w")); - if (!logOut->good()) { - cerr << "Error: Failed to create output log" << endl; - return false; - } - } // somewhat arbitrary, but starting with a high capacity helps speed up the read-in timestamps.emplace_back(); @@ -204,7 +196,7 @@ struct LogRepair { cout << "Writing " << sectionTimestamps.size() << " events to " << name << endl; - logOut.reset(new zcm::LogFile(name, "w")); + unique_ptr logOut(new zcm::LogFile(name, "w")); if (!logOut->good()) { cerr << "Error: Failed to create output log" << endl; return 1; @@ -235,7 +227,6 @@ struct LogRepair } cout << endl << "Flushing to disk" << endl; - logOut.reset(); return 0; } @@ -303,8 +294,6 @@ struct LogRepair } cout << "\b\b} events" << endl; - if (args.verify) return 0; - auto process = [&](size_t idx, const string& name) { auto& t = timestamps[idx]; From 631712bc9f5f20b8d76825d7949896e1f77f76d0 Mon Sep 17 00:00:00 2001 From: Jakob Hoellerbauer Date: Mon, 3 Nov 2025 11:24:10 -0500 Subject: [PATCH 3/3] Update arg in usage --- tools/cpp/log-repair/main.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/cpp/log-repair/main.cpp b/tools/cpp/log-repair/main.cpp index f873d7c8..fcb0e2d4 100644 --- a/tools/cpp/log-repair/main.cpp +++ b/tools/cpp/log-repair/main.cpp @@ -83,16 +83,16 @@ struct Args << "" << endl << "Options:" << endl << "" << endl - << " -h, --help Shows this help text and exits" << endl - << " -i, --input=filename specify input file (alteratively can be a positional argument)" << endl - << " -o, --output=filename specify output file" << endl + << " -h, --help Shows this help text and exits" << endl + << " -i, --input=filename specify input file (alteratively can be a positional argument)" << endl + << " -o, --output=filename specify output file" << endl << endl - << " -s, --split=delta_us split output logs if there is a timestamp jump of >= provided magnitude" << endl - << " only the largest section will be output" << endl - << " -S, --output-split also output all the other files resulting from the split" << endl - << " file names will be .[num]" << endl + << " -s, --split-us=delta_us split output logs if there is a timestamp jump of >= provided magnitude" << endl + << " only the largest section will be output" << endl + << " -S, --output-split also output all the other files resulting from the split" << endl + << " file names will be .[num]" << endl << endl - << " -v, --verify verify input log is monotonic in timestamp" << endl + << " -v, --verify verify input log is monotonic in timestamp" << endl << endl; } };