diff --git a/lib/bitcask.rb b/lib/bitcask.rb index 33f540f..b363bc2 100644 --- a/lib/bitcask.rb +++ b/lib/bitcask.rb @@ -44,11 +44,23 @@ def data_files end end + # Iterates over all keys in keydir. Yields key. + def each_key(skip_tombstones=false) + @keydir.each do |key, index| + if skip_tombstones + key, is_tombstone = @keydir.data_files[index.file_id].read_key(index.value_pos) + yield key unless is_tombstone + else + yield key + end + end + end + # Iterates over all keys in keydir. Yields key, value pairs. - def each + def each(skip_tombstones=false) @keydir.each do |key, index| entry = @keydir.data_files[index.file_id][index.value_pos, index.value_sz] - yield [entry.key, entry.value] + yield [entry.key, entry.value] unless skip_tombstones and entry.value == TOMBSTONE end end diff --git a/lib/bitcask/data_file.rb b/lib/bitcask/data_file.rb index 2496bce..970f907 100644 --- a/lib/bitcask/data_file.rb +++ b/lib/bitcask/data_file.rb @@ -63,6 +63,29 @@ def pos end alias tell pos + def read_key(offset) + seek offset + + f = @file + + # Parse header + header = f.read(14) or return + crc, tstamp, ksz, value_sz = header.unpack "NNnN" + + # Read key + key = f.read ksz + + if value_sz != Bitcask::TOMBSTONE.size + is_tombstone = false + else + value = f.read value_sz + raise Bitcask::ChecksumError unless crc == Zlib.crc32(header[4..-1] + key + value) + is_tombstone = value == Bitcask::TOMBSTONE + end + + return key, is_tombstone + end + # Returns a single Entry read from the current offset, and advances to the # next. #