From 6858e18675086feb7cf0261494f738719d44818b Mon Sep 17 00:00:00 2001 From: grourk Date: Fri, 17 Jun 2011 01:35:52 -0700 Subject: [PATCH 1/2] Allow skipping tombstones when iterating over entries. Also, adding method to iterate over keys with more efficient checking for tombstone. --- lib/bitcask.rb | 16 ++++++++++++++-- lib/bitcask/data_file.rb | 27 +++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/lib/bitcask.rb b/lib/bitcask.rb index 33f540f..c18b8b1 100644 --- a/lib/bitcask.rb +++ b/lib/bitcask.rb @@ -44,11 +44,23 @@ def data_files end end + # Iterates over all keys in keydir. Yields key. + def each_key(skip_tombstones=false) + @keydir.each do |key, index| + if skip_tombstones + key, is_tombstone = @keydir.data_files[index.file_id].read_key(index.value_pos, index.value_sz) + yield key unless is_tombstone + else + yield key + end + end + end + # Iterates over all keys in keydir. Yields key, value pairs. - def each + def each(skip_tombstones=false) @keydir.each do |key, index| entry = @keydir.data_files[index.file_id][index.value_pos, index.value_sz] - yield [entry.key, entry.value] + yield [entry.key, entry.value] unless skip_tombstones and entry.value == TOMBSTONE end end diff --git a/lib/bitcask/data_file.rb b/lib/bitcask/data_file.rb index 2496bce..b6140bd 100644 --- a/lib/bitcask/data_file.rb +++ b/lib/bitcask/data_file.rb @@ -63,6 +63,33 @@ def pos end alias tell pos + def read_key(offset, size = nil) + seek offset + + if size + f = StringIO.new @file.read(size) + else + f = @file + end + + # Parse header + header = f.read(14) or return + crc, tstamp, ksz, value_sz = header.unpack "NNnN" + + # Read key + key = f.read ksz + + if value_sz != Bitcask::TOMBSTONE.size + is_tombstone = false + else + value = f.read value_sz + raise Bitcask::ChecksumError unless crc == Zlib.crc32(header[4..-1] + key + value) + is_tombstone = value == Bitcask::TOMBSTONE + end + + return key, is_tombstone + end + # Returns a single Entry read from the current offset, and advances to the # next. # From 68844499deb1164814a99c1c21a5c592cadfd0c2 Mon Sep 17 00:00:00 2001 From: grourk Date: Fri, 17 Jun 2011 02:16:22 -0700 Subject: [PATCH 2/2] Woops, don't read value from file in read_key if we don't have to. --- lib/bitcask.rb | 2 +- lib/bitcask/data_file.rb | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/lib/bitcask.rb b/lib/bitcask.rb index c18b8b1..b363bc2 100644 --- a/lib/bitcask.rb +++ b/lib/bitcask.rb @@ -48,7 +48,7 @@ def data_files def each_key(skip_tombstones=false) @keydir.each do |key, index| if skip_tombstones - key, is_tombstone = @keydir.data_files[index.file_id].read_key(index.value_pos, index.value_sz) + key, is_tombstone = @keydir.data_files[index.file_id].read_key(index.value_pos) yield key unless is_tombstone else yield key diff --git a/lib/bitcask/data_file.rb b/lib/bitcask/data_file.rb index b6140bd..970f907 100644 --- a/lib/bitcask/data_file.rb +++ b/lib/bitcask/data_file.rb @@ -63,14 +63,10 @@ def pos end alias tell pos - def read_key(offset, size = nil) + def read_key(offset) seek offset - if size - f = StringIO.new @file.read(size) - else - f = @file - end + f = @file # Parse header header = f.read(14) or return