Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions examples/query-id/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
add_executable(query-id main.cpp)
target_link_libraries(query-id PRIVATE geodesk)
72 changes: 72 additions & 0 deletions examples/query-id/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright (c) 2024 Clarisma / GeoDesk contributors
// SPDX-License-Identifier: LGPL-3.0-only

#include <cctype>
#include <chrono>
#include <cstdlib>
#include <iostream>
#include <geodesk/geodesk.h>

using namespace geodesk;

template<typename Collection>
void queryAndDisplay(Collection collection, uint64_t id, const char* typeName)
{
auto start = std::chrono::high_resolution_clock::now();
auto feature = collection.byId(id);
auto end = std::chrono::high_resolution_clock::now();
auto us = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();

if (feature)
{
std::cout << typeName << " " << id << " (" << us << " us):" << std::endl;
for (Tag tag : feature->tags())
{
std::cout << " " << tag.key() << " = " << tag.value() << std::endl;
}
}
else
{
std::cout << typeName << " " << id << " not found (" << us << " us)" << std::endl;
}
}

int main(int argc, char* argv[])
{
if (argc < 2)
{
std::cerr << "Usage: " << argv[0] << " <gol-file> [n|w|r<id>]" << std::endl;
std::cerr << " Example: " << argv[0] << " planet.gol w12345" << std::endl;
return 1;
}

Features features(argv[1]);
std::cout << "Loaded " << argv[1] << std::endl;

if (argc >= 3)
{
const char* arg = argv[2];
char typeChar = std::tolower(static_cast<unsigned char>(arg[0]));
uint64_t id = std::strtoull(arg + 1, nullptr, 10);

try
{
switch (typeChar)
{
case 'n': queryAndDisplay(features.nodes(), id, "Node"); break;
case 'w': queryAndDisplay(features.ways(), id, "Way"); break;
case 'r': queryAndDisplay(features.relations(), id, "Relation"); break;
default:
std::cerr << "Unknown type '" << arg[0] << "'. Use n, w, or r." << std::endl;
return 1;
}
}
catch (const QueryException& e)
{
std::cerr << "Error: " << e.what() << std::endl;
return 1;
}
}

return 0;
}
9 changes: 9 additions & 0 deletions include/geodesk/feature/FeatureStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

#pragma once

#include <memory>
#include <mutex>
#include <span>
#include <unordered_map>
#ifdef GEODESK_PYTHON
Expand All @@ -27,6 +29,7 @@ class PyFeatures; // not namespaced for now

namespace geodesk {

class IdIndex;
class MatcherHolder;

// Possible threadpool alternatives:
Expand Down Expand Up @@ -171,6 +174,10 @@ class GEODESK_API FeatureStore final : public clarisma::FreeStore
TilePtr fetchTile(Tip tip) const;
static bool isTileValid(const byte* p);

/// @brief Returns the ID index if available, or nullptr if not.
/// ID index files are created by `gol build -i`.
IdIndex* idIndex();

struct Metadata;
class Transaction;

Expand Down Expand Up @@ -216,6 +223,8 @@ class GEODESK_API FeatureStore final : public clarisma::FreeStore
#endif
clarisma::ThreadPool<TileQueryTask> executor_;
ZoomLevels zoomLevels_;
std::unique_ptr<IdIndex> idIndex_;
mutable std::once_flag idIndexInitFlag_;

friend class Transaction;
};
Expand Down
44 changes: 44 additions & 0 deletions include/geodesk/feature/FeaturesBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#pragma once

#include <optional>
#include <geodesk/feature/IdIndex.h>
#include <geodesk/filter/Filters.h>
#include <geodesk/feature/FeatureUtils.h>
#include <geodesk/feature/QueryException.h>
Expand Down Expand Up @@ -102,6 +103,49 @@ class FeaturesBase
[[nodiscard]] std::optional<T> first() const;
[[nodiscard]] T one() const;

/// @brief Looks up a feature by its OSM ID.
///
/// Requires ID index files created by `gol build -i`.
///
/// @param id The OSM ID to look up
/// @return The feature if found, or `std::nullopt` if not found
/// @throws QueryException if ID indexes are not available
///
[[nodiscard]] std::optional<T> byId(uint64_t id) const
{
IdIndex* idx = store()->idIndex();
if (!idx)
{
throw QueryException("ID indexes not available (build GOL with -i flag)");
}

// Determine the feature type from the template parameter T
// T is Node, Way, Relation, or Feature (FeatureBase<XxxPtr>)
FeatureType type;
if constexpr (std::is_same_v<T, Node>)
{
type = FeatureType::NODE;
}
else if constexpr (std::is_same_v<T, Way>)
{
type = FeatureType::WAY;
}
else if constexpr (std::is_same_v<T, Relation>)
{
type = FeatureType::RELATION;
}
else
{
static_assert(
std::is_same_v<T, Node> || std::is_same_v<T, Way> || std::is_same_v<T, Relation>,
"byId() is only available on Nodes, Ways, or Relations collections");
}

FeaturePtr ptr = idx->findById(id, type);
if (ptr.isNull()) return std::nullopt;
return T(store(), ptr);
}

// NOLINTNEXTLINE(google-explicit-constructor)
[[nodiscard]] operator std::vector<T>() const;

Expand Down
84 changes: 84 additions & 0 deletions include/geodesk/feature/IdIndex.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Copyright (c) 2024 Clarisma / GeoDesk contributors
// SPDX-License-Identifier: LGPL-3.0-only

#pragma once

#include <memory>
#include <clarisma/store/IndexFile.h>
#include <geodesk/feature/FeaturePtr.h>
#include <geodesk/feature/Tip.h>
#include <geodesk/feature/TilePtr.h>
#include <geodesk/feature/types.h>

namespace geodesk {

class FeatureStore;

/// @brief Provides O(1) lookup of features by their OSM ID.
///
/// This class manages the optional ID index files created by `gol build -i`.
/// The index files map OSM IDs to pile numbers, which are then converted
/// to tile index positions (tips) for feature retrieval.
///
/// Lookup chain: OSM_ID -> pile -> tip -> tile -> linear scan
///
class IdIndex
{
public:
/// Constructs an IdIndex for the given FeatureStore.
/// If index files don't exist, isAvailable() returns false.
explicit IdIndex(FeatureStore* store);

~IdIndex() = default;

// Non-copyable, non-movable (owns file handles)
IdIndex(const IdIndex&) = delete;
IdIndex& operator=(const IdIndex&) = delete;
IdIndex(IdIndex&&) = delete;
IdIndex& operator=(IdIndex&&) = delete;

/// Returns true if ID index files are available and loaded.
bool isAvailable() const noexcept { return available_; }

/// Finds a feature by its OSM ID.
/// @param id The OSM ID to look up
/// @param type The feature type (NODE, WAY, or RELATION)
/// @return The FeaturePtr if found, or an empty FeaturePtr if not found
FeaturePtr findById(uint64_t id, FeatureType type);

private:
/// Extra bits for ways and relations to encode tile pair flags.
static constexpr int PILEPAIR_EXTRA_BITS = 2;

/// Calculates bit width needed to store tile count values.
/// Uses same formula as gol-tool: 32 - countLeadingZeros(tileCount)
static int calculateBaseBitWidth(uint32_t tileCount);

/// Builds the pile-to-tip mapping by walking the tile index.
void buildPileToTip();

/// Scans a tile to find a feature by ID.
FeaturePtr scanTileForId(TilePtr tile, uint64_t id, FeatureType type) const;

/// Scans a single index (NODE, WAY, AREA, or RELATION) for a feature.
FeaturePtr scanIndexForId(DataPtr pIndex, uint64_t id, FeatureType type) const;

/// Recursively scans R-tree branches for a feature.
FeaturePtr scanBranchForId(DataPtr p, uint64_t id, FeatureType type, bool isNode) const;

/// Scans a leaf node for the feature.
FeaturePtr scanNodeLeafForId(DataPtr p, uint64_t id) const;

/// Scans a leaf (way/relation) for the feature.
FeaturePtr scanLeafForId(DataPtr p, uint64_t id, FeatureType type) const;

FeatureStore* store_;
bool available_ = false;
uint32_t maxPile_ = 0;
std::unique_ptr<Tip[]> pileToTip_;
clarisma::IndexFile nodeIndex_;
clarisma::IndexFile wayIndex_;
clarisma::IndexFile relationIndex_;
};

} // namespace geodesk
9 changes: 5 additions & 4 deletions src/clarisma/store/IndexFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@
// SPDX-License-Identifier: LGPL-3.0-only

#include <clarisma/store/IndexFile.h>
#include <limits>

namespace clarisma {

IndexFile::IndexFile() :
slotsPerSegment_(0),
maxEntryCount_(std::numeric_limits<int64_t>::max()), // TODO
maxEntryCount_(0),
valueWidth_(0)
{
}
Expand All @@ -19,6 +18,8 @@ void IndexFile::open(const char* filename, OpenMode mode, int valueWidth)
assert(valueWidth > 0 && valueWidth <= 32);
valueWidth_ = valueWidth;
slotsPerSegment_ = static_cast<int64_t>(SEGMENT_LENGTH) * 8 / valueWidth_;
// Calculate max entry count based on actual file size
maxEntryCount_ = static_cast<int64_t>(mainMappingSize()) * 8 / valueWidth_;
}

IndexFile::CellRef IndexFile::getCell(int64_t key)
Expand All @@ -35,8 +36,8 @@ IndexFile::CellRef IndexFile::getCell(int64_t key)
return ref;
}

// TODO: Clarify that it is legal to call get() if IndexFile is not open,
// which will always return 0 because maxEntryCount_==0
// It is legal to call get() if IndexFile is not open;
// it will always return 0 because maxEntryCount_==0
uint32_t IndexFile::get(uint64_t key)
{
if (key >= maxEntryCount_) [[unlikely]]
Expand Down
9 changes: 9 additions & 0 deletions src/feature/FeatureStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-License-Identifier: LGPL-3.0-only

#include <geodesk/feature/FeatureStore.h>
#include <geodesk/feature/IdIndex.h>
#include <geodesk/feature/TileIndexEntry.h>
#include <filesystem>
#include <clarisma/io/FilePath.h>
Expand Down Expand Up @@ -132,6 +133,14 @@ TilePtr FeatureStore::fetchTile(Tip tip) const
return TilePtr(pagePointer(entry.page()));
}

IdIndex* FeatureStore::idIndex()
{
std::call_once(idIndexInitFlag_, [this]()
{
idIndex_ = std::make_unique<IdIndex>(this);
});
return idIndex_->isAvailable() ? idIndex_.get() : nullptr;
}


void FeatureStore::readIndexSchema(DataPtr p)
Expand Down
Loading