Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 155 additions & 28 deletions server/src/instant/db/datalog.clj
Original file line number Diff line number Diff line change
Expand Up @@ -707,6 +707,15 @@
(uuid-util/coerce s)
(uuid-util/coerce (str s (subs all-fs-uuid (count s))))))

(defn op->sql-op [op]
(case op
:$gt :>
:$gte :>=
:$lt :<
:$lte :<=
:$like :like
:$ilike :ilike))

(defn- value-function-clauses [app-id triples-alias idx [v-tag v-value]]
(case v-tag
:function (let [[func val] (first v-value)]
Expand Down Expand Up @@ -739,13 +748,7 @@
[[:not= (kw alias :.value) [:cast (->json nil) :jsonb]]]))})]]
:$comparator (let [{:keys [op value data-type]} val]
[(data-type-comparison data-type
(case op
:$gt :>
:$gte :>=
:$lt :<
:$lte :<=
:$like :like
:$ilike :ilike)
(op->sql-op op)
:value
value)])
:$entityIdStartsWith
Expand Down Expand Up @@ -1264,7 +1267,7 @@
next-cost)))
1
(:path costs))
join-cost (reduce + 1 (vals (:join-remaining costs)))
join-cost (reduce * 1 (vals (:join-remaining costs)))
filter-cost (reduce + 0 (vals (select-keys (:known-remaining costs)
(:filter-components costs))))]
(* 1.0
Expand All @@ -1278,27 +1281,142 @@
nested loop."
[index]
(let [costs (:index-costs index)
unique-cols (or (:unique-cols index) #{})
unique-hit? (some #(contains? unique-cols (:col %)) (:path costs))
index-lookup-cost (reduce (fn [acc {:keys [cost col]}]
(let [next-cost (* acc cost)]
(if (contains? (:unique-cols index) col)
(if (contains? unique-cols col)
(reduced next-cost)
next-cost)))
1
(:path costs))
unbounded-filter-cost (reduce + 0 (vals (select-keys (:known-remaining costs)
(:filter-components costs))))
filtered-row-cost (if unique-hit?
(min index-lookup-cost unbounded-filter-cost)
unbounded-filter-cost)
join-cost (reduce * 1 (vals (:join-remaining costs)))
filter-cost (reduce + 0 (vals (select-keys (:known-remaining costs)
(:filter-components costs))))]
filter-cost filtered-row-cost]
(* 1.0
(+ index-lookup-cost
(* 2 filter-cost))
(max 1 (* 1.1 join-cost)))))

(defn use-new-index-cost? []
(flags/toggled? :new-index-cost true))

(defn path-cost-with-joins
[index]
(if (flags/toggled? :new-index-cost)
(if (use-new-index-cost?)
(path-cost-with-joins-new index)
(path-cost-with-joins-old index)))

(defn- value-access-mode
"Returns :raw if we will compare the value as json in the sql query or
:typed-extract if we will extract a sql value (e.g. [:triples-extract-number-value])"
[named-p]
(let [value-pattern (:v named-p)
data-type (idx-data-type (:idx named-p))]
(cond
(nil? data-type)
:raw

(= :constant (first value-pattern))
(if (extract-value-fn data-type :=)
:typed-extract
:raw)

(= :function (first value-pattern))
(let [[func {:keys [op]}] (first (second value-pattern))]
(case func
:$comparator (if (extract-value-fn data-type (op->sql-op op))
:typed-extract
:raw)
:$not (if (extract-value-fn data-type :is-distinct-from)
:typed-extract
:raw)
:raw))

:else
:raw)))

(defn- can-match-on-index-column?
"Returns true if we can use this index column in the query. For example,
this will return false if we're checking the :v column on the ave_index
against a query against a number type."
[named-p idx-config component]
(case component
:v (let [access-mode (value-access-mode named-p)
candidate-data-type (:data-type idx-config)]
(case access-mode
:typed-extract (= candidate-data-type (idx-data-type (:idx named-p)))
:raw (nil? candidate-data-type)))
true))

(defn- debug-realistic-prefix
"Computes the prefix that is realistically usable by this index based on the
SQL shape we emit, not just the abstract pattern."
[named-p idx-config known-components]
(loop [cols (:cols idx-config)
prefix []
blocked nil]
(if-let [col (first cols)]
(cond
(not (contains? known-components col))
{:path prefix
:blocked-on {:col col
:reason :unknown-component}}

(not (can-match-on-index-column? named-p idx-config col))
{:path prefix
:blocked-on {:col col
:reason :not-indexable-for-sql-shape
:value-access-mode (value-access-mode named-p)
:idx-data-type (:data-type idx-config)
:pattern-data-type (idx-data-type (:idx named-p))}}

:else
(recur (rest cols)
(conj prefix col)
blocked))
{:path prefix
:blocked-on blocked})))

(defn- debug-cost-breakdown
[named-p idx-config costs path-cost-with-joins]
(let [unique-cols (or (:unique-cols idx-config) #{})
unique-hit? (some #(contains? unique-cols (:col %)) (:path costs))
index-lookup-cost (reduce (fn [acc {:keys [cost col]}]
(let [next-cost (* acc cost)]
(if (contains? unique-cols col)
(reduced next-cost)
next-cost)))
1
(:path costs))
join-cost-old (reduce + 1 (vals (:join-remaining costs)))
join-cost-new (reduce * 1 (vals (:join-remaining costs)))
unbounded-filter-cost (reduce + 0 (vals (select-keys (:known-remaining costs)
(:filter-components costs))))
filter-cost (if unique-hit?
(min index-lookup-cost unbounded-filter-cost)
unbounded-filter-cost)
realistic-prefix (debug-realistic-prefix named-p idx-config
(:known-components costs))]
{:lookup-cost index-lookup-cost
:filter-cost filter-cost
:unbounded-filter-cost unbounded-filter-cost
:unique-hit? unique-hit?
:join-cost-old join-cost-old
:join-cost-new join-cost-new
:score path-cost-with-joins
:value-access-mode (value-access-mode named-p)
:optimistic-path (mapv :col (:path costs))
:realistic-prefix (:path realistic-prefix)
:blocked-on (:blocked-on realistic-prefix)
:known-remaining (:known-remaining costs)
:join-remaining (:join-remaining costs)
:filter-remaining (:filter-remaining costs)}))
Comment on lines +1385 to +1418
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Make the debug filter cost reflect the active model.

When :new-index-cost is false, :score comes from path-cost-with-joins-old, but filter-cost here is still capped with unique-hit?. The breakdown can therefore disagree with the score in the flag-off path you're trying to compare. Either branch this field on use-new-index-cost? or emit separate :filter-cost-old / :filter-cost-new values.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@server/src/instant/db/datalog.clj` around lines 1385 - 1418, In
debug-cost-breakdown, make the reported filter cost align with the active
indexing-mode: either branch the computed :filter-cost on use-new-index-cost?
(so when the flag is false, compute :filter-cost the same way as when :score
uses path-cost-with-joins-old) or emit both :filter-cost-old and
:filter-cost-new fields; update the calculation that currently uses unique-hit?
to produce the alternate value for the old code path (referencing unique-hit?,
index-lookup-cost, unbounded-filter-cost, path-cost-with-joins and
path-cost-with-joins-old) and ensure :score and the chosen filter-cost reflect
the same use-new-index-cost? branch.


(defn index-compare
"Compares the indexes pairwise to try to pick the best one.
A scoring system might be better, but this is a easier to debug."
Expand Down Expand Up @@ -1471,15 +1589,18 @@
indexes-with-costs
(map (fn [idx-config]
(let [costs (reduce (fn [acc col]
(if-let [cost (get (:known-remaining acc) col)]
(-> acc
(update :known-remaining dissoc col)
(update :join-remaining dissoc col)
(update :filter-remaining disj col)
(update :path conj {:cost cost
:col col
:type :index-lookup}))
(reduced acc)))
(if (and (use-new-index-cost?)
(not (can-match-on-index-column? named-p idx-config col)))
(reduced acc)
(if-let [cost (get (:known-remaining acc) col)]
(-> acc
(update :known-remaining dissoc col)
(update :join-remaining dissoc col)
(update :filter-remaining disj col)
(update :path conj {:cost cost
:col col
:type :index-lookup}))
(reduced acc))))
{:known-remaining known-components
:known-components known-components
:join-components join-components
Expand All @@ -1495,8 +1616,15 @@
:matching-idx-key? (= (:idx-key idx-config)
(idx-key (:idx named-p)))
:matching-data-type? (= (:data-type idx-config)
(idx-data-type (:idx named-p))))]
(assoc cfg :path-cost-with-joins (path-cost-with-joins cfg))))
(idx-data-type (:idx named-p))))
path-cost (path-cost-with-joins cfg)
cfg (assoc cfg :path-cost-with-joins path-cost)]
(cond-> cfg
*debug* (assoc :debug-costs
(debug-cost-breakdown named-p
idx-config
costs
path-cost)))))
index-candidates)

sorted-indexes (sort index-compare indexes-with-costs)
Expand Down Expand Up @@ -1696,8 +1824,7 @@
(into acc (map (fn [path]
[ctype (:ctype path)])
paths))
acc)
)
acc))
#{}
(variable-components named-p))]
(if (or (= #{[:e :e]} join-ctypes)
Expand Down Expand Up @@ -2834,9 +2961,9 @@
(get "exists")))
(:page-info group) (assoc-in [:page-info :has-previous-page?]
(let [has-prev (-> sql-res
(get (name (has-prev-tbl table)))
first
(get "exists"))
(get (name (has-prev-tbl table)))
first
(get "exists"))
offset (get-in group [:page-info :offset])
after-cursor (get-in group [:page-info :after])]
;; If the page is empty but we have an offset > 0 or an after cursor,
Expand Down
27 changes: 26 additions & 1 deletion server/test/instant/db/datalog_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@
[honey.sql :as hsql]
[instant.jdbc.aurora :as aurora]
[instant.config :as config]
[instant.db.attr-sketch :as cms]
[instant.db.datalog :as d]
[instant.db.model.attr :as attr-model]
[instant.db.transaction :as tx]
[instant.data.resolvers :as resolvers]
[instant.jdbc.sql :as sql]
[instant.fixtures :refer [with-movies-app with-zeneca-app with-zeneca-checked-data-app]]
[instant.util.test :refer [in-memory-sketches-for-app]]))
[instant.util.test :refer [in-memory-sketches-for-app]])
(:import
(java.time Instant)))

(defn make-ctx [app]
{:db {:conn-pool (aurora/conn-pool :read)}
Expand Down Expand Up @@ -43,6 +46,28 @@
(is (= '([:pattern {:idx [:keyword :vae], :e [:any _], :a [:any _], :v [:any _] :created-at [:any _]}])
(d/->named-patterns '[[:vae]])))))

(deftest best-index-ignores-index-columns-it-cant-use
(let [app-id (random-uuid)
attr-id (random-uuid)
date-value (Instant/parse "2026-01-24T05:00:00Z")
sketch (reduce (fn [acc i]
(cms/add acc :date (Instant/ofEpochMilli (* 1000 i))))
(cms/make-sketch)
(range 200))
ctx {:app-id app-id
:sketches {{:app-id app-id
:attr-id attr-id}
{:sketch sketch}}}
named-p {:idx [:map {:idx-key :ave, :data-type :date}]
:e [:variable '?item]
:a [:constant #{attr-id}]
:v [:function {:$comparator {:op :$gt
:value date-value
:data-type :date}}]
:created-at [:any '_]}
best-index (d/best-index ctx named-p {'?item 2})]
(is (= :triples_pkey (:name best-index)))))

(deftest pats->coarse-topics
(is (= '[[#{:eav} _ _ _ _]]
(d/pats->coarse-topics '[[:eav ?e]])))
Expand Down
Loading