From 1ed59af12246b11644b2296fa608c6d40bd942e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikl=C3=B3s=20Koren?= Date: Sun, 14 Jul 2024 00:07:05 +0200 Subject: [PATCH 01/11] ScopedValues will be useful for defining local context (WIP) But they seem immutable. Maybe a chain of with(s => new_value) blocks? 186/371 pass --- Project.toml | 1 + src/Kezdi.jl | 4 +++- src/With.jl | 12 +++--------- src/codegen.jl | 4 ++-- src/consts.jl | 4 ++-- src/functions.jl | 9 +++++---- src/structs.jl | 7 +++++++ 7 files changed, 23 insertions(+), 18 deletions(-) diff --git a/Project.toml b/Project.toml index be404c1..d63e0e5 100644 --- a/Project.toml +++ b/Project.toml @@ -16,6 +16,7 @@ Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" ReadStatTables = "52522f7a-9570-4e34-8ac6-c005c74d4b84" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" +ScopedValues = "7e506255-f358-4e82-b7e4-beb19740aa63" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" diff --git a/src/Kezdi.jl b/src/Kezdi.jl index 14eaf60..9387994 100644 --- a/src/Kezdi.jl +++ b/src/Kezdi.jl @@ -4,12 +4,13 @@ Kezdi.jl is a Julia package for data manipulation and analysis. It is inspired b module Kezdi export @generate, @replace, @egen, @collapse, @keep, @drop, @summarize, @regress, use, @use, @tabulate, @count, @sort, @order, getdf, setdf, @list, @head, @tail, @names, @rename -export display_and_return, keep_only_values, rowcount, distinct +export display_and_return, keep_only_values, rowcount, distinct, _Kezdi_local_context using Reexport using Logging using InteractiveUtils using ReadStatTables +import ScopedValues @reexport using FreqTables: freqtable @reexport using FixedEffectModels @@ -31,5 +32,6 @@ include("side_effects.jl") include("With.jl") @reexport using .With: @with, @with! +context = ScopedValues.ScopedValue(Context()) end # module diff --git a/src/With.jl b/src/With.jl index 5ed420e..e8400fa 100644 --- a/src/With.jl +++ b/src/With.jl @@ -51,31 +51,25 @@ function rewrite_with_block(block) reconvert_docstrings!(block_expressions) - # save current dataframe - previous_df = gensym() rewritten_exprs = [] did_first = false + local df for expr in block_expressions # could be an expression first or a LineNumberNode, so a bit convoluted # we just do the firstvar transformation for the first non LineNumberNode # we encounter if !(did_first || expr isa LineNumberNode) did_first = true - push!(rewritten_exprs, :(local $previous_df = getdf())) - push!(rewritten_exprs, :(setdf($expr))) + df = expr continue end push!(rewritten_exprs, expr) end - teardown = :(x -> begin - setdf($previous_df) - x - end) result = Expr(:block, rewritten_exprs...) - :($(esc(result)) |> $(esc(teardown))) + :($(esc(:(Kezdi.ScopedValues.@with Kezdi.context => setdf($df) $result)) )) end # if a line in a with is a string, it can be parsed as a docstring diff --git a/src/codegen.jl b/src/codegen.jl index f09ba83..5ef6bf7 100644 --- a/src/codegen.jl +++ b/src/codegen.jl @@ -28,8 +28,8 @@ function generate_command(command::Command; options=[], allowed=[]) (opt in allowed) || ArgumentError("Invalid option \"$opt\" for this command: @$(command.command)") |> throw end - push!(setup, :(getdf() isa AbstractDataFrame || error("Kezdi.jl commands can only operate on a global DataFrame set by setdf()"))) - push!(setup, :(local $df2 = copy(getdf()))) + push!(setup, :(Kezdi.context[].df isa AbstractDataFrame || error("Kezdi.jl commands can only operate on a global DataFrame set by setdf()"))) + push!(setup, :(local $df2 = copy(Kezdi.context[].df))) variables_condition = (:ifable in options) ? vcat(extract_variable_references(command.condition)...) : Symbol[] variables_RHS = (:variables in options) ? vcat(extract_variable_references.(command.arguments)...) : Symbol[] variables = vcat(variables_condition, variables_RHS) diff --git a/src/consts.jl b/src/consts.jl index ed62404..03183ba 100644 --- a/src/consts.jl +++ b/src/consts.jl @@ -116,11 +116,11 @@ const OPTIONS = ( :variables ) +const DEFAULT_FLAGS = Set{Symbol}() + const SYNTACTIC_OPERATORS = tuple([Symbol(x) for x in split(raw"&& || += -= *= /= //= \= ^= ÷= %= <<= >>= >>>= |= &= ⊻=")]...) const OPERATORS = tuple( vcat( [Symbol(x) for x in split(raw"= += -= −= *= /= //= \= ^= ÷= %= <<= >>= >>>= |= &= ⊻= ≔ ⩴ ≕ ← → ↔ ↚ ↛ ↞ ↠ ↢ ↣ ↦ ↤ ↮ ⇎ ⇍ ⇏ ⇐ ⇒ ⇔ ⇴ ⇶ ⇷ ⇸ ⇹ ⇺ ⇻ ⇼ ⇽ ⇾ ⇿ ⟵ ⟶ ⟷ ⟹ ⟺ ⟻ ⟼ ⟽ ⟾ ⟿ ⤀ ⤁ ⤂ ⤃ ⤄ ⤅ ⤆ ⤇ ⤌ ⤍ ⤎ ⤏ ⤐ ⤑ ⤔ ⤕ ⤖ ⤗ ⤘ ⤝ ⤞ ⤟ ⤠ ⥄ ⥅ ⥆ ⥇ ⥈ ⥊ ⥋ ⥎ ⥐ ⥒ ⥓ ⥖ ⥗ ⥚ ⥛ ⥞ ⥟ ⥢ ⥤ ⥦ ⥧ ⥨ ⥩ ⥪ ⥫ ⥬ ⥭ ⥰ ⧴ ⬱ ⬰ ⬲ ⬳ ⬴ ⬵ ⬶ ⬷ ⬸ ⬹ ⬺ ⬻ ⬼ ⬽ ⬾ ⬿ ⭀ ⭁ ⭂ ⭃ ⥷ ⭄ ⥺ ⭇ ⭈ ⭉ ⭊ ⭋ ⭌ ← → ⇜ ⇝ ↜ ↝ ↩ ↪ ↫ ↬ ↼ ↽ ⇀ ⇁ ⇄ ⇆ ⇇ ⇉ ⇋ ⇌ ⇚ ⇛ ⇠ ⇢ ↷ ↶ ↺ ↻ ~ --> <-- <--> > < >= ≥ <= ≤ == === ≡ != ≠ !== ≢ ∈ ∉ ∋ ∌ ⊆ ⊈ ⊂ ⊄ ⊊ ∝ ∊ ∍ ∥ ∦ ∷ ∺ ∻ ∽ ∾ ≁ ≃ ≂ ≄ ≅ ≆ ≇ ≈ ≉ ≊ ≋ ≌ ≍ ≎ ≐ ≑ ≒ ≓ ≖ ≗ ≘ ≙ ≚ ≛ ≜ ≝ ≞ ≟ ≣ ≦ ≧ ≨ ≩ ≪ ≫ ≬ ≭ ≮ ≯ ≰ ≱ ≲ ≳ ≴ ≵ ≶ ≷ ≸ ≹ ≺ ≻ ≼ ≽ ≾ ≿ ⊀ ⊁ ⊃ ⊅ ⊇ ⊉ ⊋ ⊏ ⊐ ⊑ ⊒ ⊜ ⊩ ⊬ ⊮ ⊰ ⊱ ⊲ ⊳ ⊴ ⊵ ⊶ ⊷ ⋍ ⋐ ⋑ ⋕ ⋖ ⋗ ⋘ ⋙ ⋚ ⋛ ⋜ ⋝ ⋞ ⋟ ⋠ ⋡ ⋢ ⋣ ⋤ ⋥ ⋦ ⋧ ⋨ ⋩ ⋪ ⋫ ⋬ ⋭ ⋲ ⋳ ⋴ ⋵ ⋶ ⋷ ⋸ ⋹ ⋺ ⋻ ⋼ ⋽ ⋾ ⋿ ⟈ ⟉ ⟒ ⦷ ⧀ ⧁ ⧡ ⧣ ⧤ ⧥ ⩦ ⩧ ⩪ ⩫ ⩬ ⩭ ⩮ ⩯ ⩰ ⩱ ⩲ ⩳ ⩵ ⩶ ⩷ ⩸ ⩹ ⩺ ⩻ ⩼ ⩽ ⩾ ⩿ ⪀ ⪁ ⪂ ⪃ ⪄ ⪅ ⪆ ⪇ ⪈ ⪉ ⪊ ⪋ ⪌ ⪍ ⪎ ⪏ ⪐ ⪑ ⪒ ⪓ ⪔ ⪕ ⪖ ⪗ ⪘ ⪙ ⪚ ⪛ ⪜ ⪝ ⪞ ⪟ ⪠ ⪡ ⪢ ⪣ ⪤ ⪥ ⪦ ⪧ ⪨ ⪩ ⪪ ⪫ ⪬ ⪭ ⪮ ⪯ ⪰ ⪱ ⪲ ⪳ ⪴ ⪵ ⪶ ⪷ ⪸ ⪹ ⪺ ⪻ ⪼ ⪽ ⪾ ⪿ ⫀ ⫁ ⫂ ⫃ ⫄ ⫅ ⫆ ⫇ ⫈ ⫉ ⫊ ⫋ ⫌ ⫍ ⫎ ⫏ ⫐ ⫑ ⫒ ⫓ ⫔ ⫕ ⫖ ⫗ ⫘ ⫙ ⫷ ⫸ ⫹ ⫺ ⊢ ⊣ ⟂ ⫪ ⫫ <: >: + - − ¦ | ⊕ ⊖ ⊞ ⊟ ++ ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⟇ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣ * / ⌿ ÷ % & · · ⋅ ∘ × \ ∩ ∧ ⊗ ⊘ ⊙ ⊚ ⊛ ⊠ ⊡ ⊓ ∗ ∙ ∤ ⅋ ≀ ⊼ ⋄ ⋆ ⋇ ⋉ ⋊ ⋋ ⋌ ⋏ ⋒ ⟑ ⦸ ⦼ ⦾ ⦿ ⧶ ⧷ ⨇ ⨰ ⨱ ⨲ ⨳ ⨴ ⨵ ⨶ ⨷ ⨸ ⨻ ⨼ ⨽ ⩀ ⩃ ⩄ ⩋ ⩍ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩞ ⩟ ⩠ ⫛ ⊍ ▷ ⨝ ⟕ ⟖ ⟗ ⨟ // ^ ↑ ↓ ⇵ ⟰ ⟱ ⤈ ⤉ ⤊ ⤋ ⤒ ⤓ ⥉ ⥌ ⥍ ⥏ ⥑ ⥔ ⥕ ⥘ ⥙ ⥜ ⥝ ⥠ ⥡ ⥣ ⥥ ⥮ ⥯ ↑ ↓ << >> >>>")], SYNTACTIC_OPERATORS...)...) -# not really a const, but anyway -global _global_dataframe::Union{AbstractDataFrame, Nothing} = nothing diff --git a/src/functions.jl b/src/functions.jl index 2e92790..6864b04 100644 --- a/src/functions.jl +++ b/src/functions.jl @@ -3,16 +3,17 @@ use(fname::AbstractString) = readstat(fname) |> DataFrame |> setdf """ getdf() -> AbstractDataFrame -Return the global data frame. +Return the data frame set in the current scope. """ -getdf() = _global_dataframe +getdf() = Kezdi.context[].df """ setdf(df::Union{AbstractDataFrame, Nothing}) -Set the global data frame. +Return a Kezdi.Context with the DataFrame set. """ -setdf(df::Union{AbstractDataFrame, Nothing}) = global _global_dataframe = df +setdf(df::Union{AbstractDataFrame, Nothing}) = Context(df, Kezdi.context[].scalars, Kezdi.context[].flags) + display_and_return(x) = (display(x); x) """ diff --git a/src/structs.jl b/src/structs.jl index 27860d1..5968a5a 100644 --- a/src/structs.jl +++ b/src/structs.jl @@ -19,6 +19,13 @@ struct GeneratedCommand options::Vector{Any} end +struct Context + df::Any + scalars::Vector{Symbol} + flags::Set{Symbol} +end +Context() = Context(nothing, Symbol[], DEFAULT_FLAGS) + using DataFrames using Statistics using StatsBase From f35a19aa70a3311d7427aa84ab4feb2d14454254 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikl=C3=B3s=20Koren?= Date: Sun, 14 Jul 2024 00:31:57 +0200 Subject: [PATCH 02/11] Proof of concept about chain of ScopedValues By creating a chain of scopes, we can mutate the scope for the following function calls. Written with help from Claude 3.5 --- scope.jl | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 scope.jl diff --git a/scope.jl b/scope.jl new file mode 100644 index 0000000..a3ee34e --- /dev/null +++ b/scope.jl @@ -0,0 +1,41 @@ +import ScopedValues + +const MyValue = ScopedValues.ScopedValue{Int}() + +function step1() + return MyValue[] + 1 +end + +function step2() + return MyValue[] * 2 +end + +function step3() + return MyValue[] - 3 +end + +function process_steps(steps) + if isempty(steps) + return MyValue[] + else + current_step = first(steps) + remaining_steps = steps[2:end] + new_value = current_step() + println("After $(current_step): ", new_value) + + ScopedValues.@with MyValue => new_value begin + process_steps(remaining_steps) + end + end +end + +function process_chain(initial_value, steps) + ScopedValues.@with MyValue => initial_value begin + println("Initial: ", MyValue[]) + process_steps(steps) + end +end + +steps = [step1, step2, step3] +result = process_chain(0, steps) +println("Final result: ", result) \ No newline at end of file From 8b8e6efda1110110f39833d56f2d210ba977bbd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikl=C3=B3s=20Koren?= Date: Sun, 14 Jul 2024 10:12:31 +0200 Subject: [PATCH 03/11] Revert back to 45c5ddb8d35449ac406ccbb838102c8f234c962f version of With.jl --- src/With.jl | 260 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 240 insertions(+), 20 deletions(-) diff --git a/src/With.jl b/src/With.jl index e8400fa..ef75eec 100644 --- a/src/With.jl +++ b/src/With.jl @@ -1,30 +1,246 @@ module With -using ..Kezdi export @with, @with! +using ..Kezdi -""" - @with df begin - # do something with df +is_aside(x) = false +function is_aside(x::Expr)::Bool + if x.head == :(=) + return is_aside(x.args[2]) end + return x.head == :macrocall && x.args[1] in Kezdi.SIDE_EFFECTS +end -The `@with` macro is a convenience macro that allows you to set the current data frame and perform operations on it in a single block. The first argument is the data frame to set as the current data frame, and the second argument is a block of code to execute. The data frame is set as the current data frame for the duration of the block, and then restored to its previous value after the block is executed. -The macro returns the value of the last expression in the block. -""" -macro with(initial_value, args...) - block = flatten_to_single_block(initial_value, args...) +insert_first_arg(symbol::Symbol, firstarg; assignment = false) = Expr(:call, symbol, firstarg) +insert_first_arg(any, firstarg; assignment = false) = insertionerror(any) + +function insertionerror(expr) + error( + """Can't insert a first argument into: + $expr. + + First argument insertion works with expressions like these, where [Module.SubModule.] is optional: + + [Module.SubModule.]func + [Module.SubModule.]func(args...) + [Module.SubModule.]func(args...; kwargs...) + [Module.SubModule.]@macro + [Module.SubModule.]@macro(args...) + @. [Module.SubModule.]func + """ + ) +end + +is_moduled_symbol(x) = false +function is_moduled_symbol(e::Expr) + e.head == :. && + length(e.args) == 2 && + (e.args[1] isa Symbol || is_moduled_symbol(e.args[1])) && + e.args[2] isa QuoteNode && + e.args[2].value isa Symbol +end + +function insert_first_arg(e::Expr, firstarg; assignment = false) + head = e.head + args = e.args + # variable = ... + # set assignment = true and rerun with right hand side + if !assignment && head == :(=) && length(args) == 2 + if !(args[1] isa Symbol) + error("You can only use assignment syntax with a Symbol as a variable name, not $(args[1]).") + end + variable = args[1] + righthandside = insert_first_arg(args[2], firstarg; assignment = true) + :($variable = $righthandside) + # Module.SubModule.symbol + elseif is_moduled_symbol(e) + Expr(:call, e, firstarg) + + # f(args...) --> f(firstarg, args...) + elseif head == :call && length(args) > 0 + if length(args) ≥ 2 && Meta.isexpr(args[2], :parameters) + Expr(head, args[1:2]..., firstarg, args[3:end]...) + elseif args[1] in [:env, :scalars] + # does not have to insert first argument into $e + Expr(head, args...) + else + Expr(head, args[1], firstarg, args[2:end]...) + end + + # f.(args...) --> f.(firstarg, args...) + elseif head == :. && + length(args) > 1 && + args[1] isa Symbol && + args[2] isa Expr && + args[2].head == :tuple + + Expr(head, args[1], Expr(args[2].head, firstarg, args[2].args...)) + + # @. [Module.SubModule.]somesymbol --> somesymbol.(firstarg) + elseif head == :macrocall && + length(args) == 3 && + args[1] == Symbol("@__dot__") && + args[2] isa LineNumberNode && + (is_moduled_symbol(args[3]) || args[3] isa Symbol) + + Expr(:., args[3], Expr(:tuple, firstarg)) + + # @macro(args...) --> @macro(firstarg, args...) + elseif head == :macrocall && + (is_moduled_symbol(args[1]) || args[1] isa Symbol) && + args[2] isa LineNumberNode + if args[1] == Symbol("@__dot__") + error("You can only use the @. macro and automatic first argument insertion if what follows is of the form `[Module.SubModule.]func`") + end + + if length(args) >= 3 && args[3] isa Expr && args[3].head == :parameters + # macros can have keyword arguments after ; as well + Expr(head, args[1], args[2], args[3], firstarg, args[4:end]...) + else + Expr(head, args[1], args[2], firstarg, args[3:end]...) + end + + else + insertionerror(e) + end +end + +function rewrite(expr, replacement) + aside = is_aside(expr) + new_expr = insert_first_arg(expr, replacement) + if !aside + replacement = gensym() + new_expr = :(local $replacement = $new_expr) + else + new_expr = :(display($new_expr)) + end + + (new_expr, replacement) +end + +rewrite(l::LineNumberNode, replacement) = (l, replacement) + +function rewrite_with_block(firstpart, block) + pushfirst!(block.args, firstpart) rewrite_with_block(block) end """ - @with! df begin - # do something with df - end + @with(expr, exprs...) -The `@with!` macro is a convenience macro that allows you to set the current data frame and perform operations on it in a single block. The first argument is the data frame to set as the current data frame, and the second argument is a block of code to execute. The data frame is set as the current data frame for the duration of the block, and then restored to its previous value after the block is executed. +Rewrites a series of expressions into a with, where the result of one expression +is inserted into the next expression following certain rules. + +**Rule 1** + +Any `expr` that is a `begin ... end` block is flattened. +For example, these two pseudocodes are equivalent: + +```julia +@with a b c d e f + +@with a begin + b + c + d +end e f +``` + +**Rule 2** + +Any expression but the first (in the flattened representation) will have the preceding result +inserted as its first argument, unless at least one underscore `_` is present. +In that case, all underscores will be replaced with the preceding result. + +If the expression is a symbol, the symbol is treated equivalently to a function call. + +For example, the following code block + +```julia +@with begin + x + f() + @g() + h + @i + j(123, _) + k(_, 123, _) +end +``` + +is equivalent to + +```julia +begin + local temp1 = f(x) + local temp2 = @g(temp1) + local temp3 = h(temp2) + local temp4 = @i(temp3) + local temp5 = j(123, temp4) + local temp6 = k(temp5, 123, temp5) +end +``` + +**Rule 3** + +An expression that begins with `@aside` does not pass its result on to the following expression. +Instead, the result of the previous expression will be passed on. +This is meant for inspecting the state of the with. +The expression within `@aside` will not get the previous result auto-inserted, you can use +underscores to reference it. + +```julia +@with begin + [1, 2, 3] + filter(isodd, _) + @aside @info "There are \$(length(_)) elements after filtering" + sum +end +``` + +**Rule 4** + +It is allowed to start an expression with a variable assignment. +In this case, the usual insertion rules apply to the right-hand side of that assignment. +This can be used to store intermediate results. + +```julia +@with begin + [1, 2, 3] + filtered = filter(isodd, _) + sum +end + +filtered == [1, 3] +``` + +**Rule 5** + +The `@.` macro may be used with a symbol to broadcast that function over the preceding result. + +```julia +@with begin + [1, 2, 3] + @. sqrt +end +``` + +is equivalent to + +```julia +@with begin + [1, 2, 3] + sqrt.(_) +end +``` -The macro does not have a return value, it overwrites the data frame directly. """ +macro with(initial_value, args...) + block = flatten_to_single_block(initial_value, args...) + rewrite_with_block(block) +end + + macro with!(initial_value, args...) block = flatten_to_single_block(initial_value, args...) result = rewrite_with_block(block) @@ -51,25 +267,29 @@ function rewrite_with_block(block) reconvert_docstrings!(block_expressions) + # assign first line to first gensym variable + firstvar = gensym() rewritten_exprs = [] + replacement = firstvar did_first = false - local df for expr in block_expressions # could be an expression first or a LineNumberNode, so a bit convoluted # we just do the firstvar transformation for the first non LineNumberNode # we encounter if !(did_first || expr isa LineNumberNode) + expr = :(local $firstvar = $expr) did_first = true - df = expr + push!(rewritten_exprs, expr) continue end - push!(rewritten_exprs, expr) + rewritten, replacement = rewrite(expr, replacement) + push!(rewritten_exprs, rewritten) end - result = Expr(:block, rewritten_exprs...) + result = Expr(:block, rewritten_exprs..., replacement) - :($(esc(:(Kezdi.ScopedValues.@with Kezdi.context => setdf($df) $result)) )) + :($(esc(result))) end # if a line in a with is a string, it can be parsed as a docstring @@ -92,4 +312,4 @@ function reconvert_docstrings!(args::Vector) args end -end +end \ No newline at end of file From 4eb24378dc0025eece8508504c0b52c24de32940 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikl=C3=B3s=20Koren?= Date: Sun, 14 Jul 2024 10:19:43 +0200 Subject: [PATCH 04/11] List side effects with simple symbols --- src/With.jl | 2 +- src/consts.jl | 28 ++++++++-------------------- 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/src/With.jl b/src/With.jl index ef75eec..5c014f1 100644 --- a/src/With.jl +++ b/src/With.jl @@ -7,7 +7,7 @@ function is_aside(x::Expr)::Bool if x.head == :(=) return is_aside(x.args[2]) end - return x.head == :macrocall && x.args[1] in Kezdi.SIDE_EFFECTS + return x.head == :macrocall && Symbol(String(x.args[1])[2:end]) in Kezdi.SIDE_EFFECTS end diff --git a/src/consts.jl b/src/consts.jl index 03183ba..3a1feed 100644 --- a/src/consts.jl +++ b/src/consts.jl @@ -72,27 +72,15 @@ const TYPES = ( :Vector ) -const COMMANDS = ( - :keep, - :drop, - :generate, - :replace, - :egen, - :collapse, - :tabulate, - :summarize, - :regress -) - const SIDE_EFFECTS = ( - Symbol("@tabulate"), - Symbol("@summarize"), - Symbol("@regress"), - Symbol("@list"), - Symbol("@head"), - Symbol("@tail"), - Symbol("@names"), - Symbol("@count") + :tabulate, + :summarize, + :regress, + :list, + :head, + :tail, + :names, + :count ) const DO_NOT_VECTORIZE = ( From a51286287ede6203e3b0efc092ba8326ac78e94c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikl=C3=B3s=20Koren?= Date: Sun, 14 Jul 2024 11:33:42 +0200 Subject: [PATCH 05/11] Setting df through ScopedValues --- src/With.jl | 84 ++++++------------------------------------------ src/codegen.jl | 7 ++-- src/commands.jl | 20 ++++++------ src/functions.jl | 7 ---- 4 files changed, 24 insertions(+), 94 deletions(-) diff --git a/src/With.jl b/src/With.jl index 5c014f1..993a63c 100644 --- a/src/With.jl +++ b/src/With.jl @@ -11,8 +11,9 @@ function is_aside(x::Expr)::Bool end -insert_first_arg(symbol::Symbol, firstarg; assignment = false) = Expr(:call, symbol, firstarg) -insert_first_arg(any, firstarg; assignment = false) = insertionerror(any) +function call_with_context(e::Expr, firstarg) + :(Kezdi.ScopedValues.@with Kezdi.context => $firstarg $e) +end function insertionerror(expr) error( @@ -40,77 +41,12 @@ function is_moduled_symbol(e::Expr) e.args[2].value isa Symbol end -function insert_first_arg(e::Expr, firstarg; assignment = false) - head = e.head - args = e.args - # variable = ... - # set assignment = true and rerun with right hand side - if !assignment && head == :(=) && length(args) == 2 - if !(args[1] isa Symbol) - error("You can only use assignment syntax with a Symbol as a variable name, not $(args[1]).") - end - variable = args[1] - righthandside = insert_first_arg(args[2], firstarg; assignment = true) - :($variable = $righthandside) - # Module.SubModule.symbol - elseif is_moduled_symbol(e) - Expr(:call, e, firstarg) - - # f(args...) --> f(firstarg, args...) - elseif head == :call && length(args) > 0 - if length(args) ≥ 2 && Meta.isexpr(args[2], :parameters) - Expr(head, args[1:2]..., firstarg, args[3:end]...) - elseif args[1] in [:env, :scalars] - # does not have to insert first argument into $e - Expr(head, args...) - else - Expr(head, args[1], firstarg, args[2:end]...) - end - - # f.(args...) --> f.(firstarg, args...) - elseif head == :. && - length(args) > 1 && - args[1] isa Symbol && - args[2] isa Expr && - args[2].head == :tuple - - Expr(head, args[1], Expr(args[2].head, firstarg, args[2].args...)) - - # @. [Module.SubModule.]somesymbol --> somesymbol.(firstarg) - elseif head == :macrocall && - length(args) == 3 && - args[1] == Symbol("@__dot__") && - args[2] isa LineNumberNode && - (is_moduled_symbol(args[3]) || args[3] isa Symbol) - - Expr(:., args[3], Expr(:tuple, firstarg)) - - # @macro(args...) --> @macro(firstarg, args...) - elseif head == :macrocall && - (is_moduled_symbol(args[1]) || args[1] isa Symbol) && - args[2] isa LineNumberNode - if args[1] == Symbol("@__dot__") - error("You can only use the @. macro and automatic first argument insertion if what follows is of the form `[Module.SubModule.]func`") - end - - if length(args) >= 3 && args[3] isa Expr && args[3].head == :parameters - # macros can have keyword arguments after ; as well - Expr(head, args[1], args[2], args[3], firstarg, args[4:end]...) - else - Expr(head, args[1], args[2], firstarg, args[3:end]...) - end - - else - insertionerror(e) - end -end - function rewrite(expr, replacement) aside = is_aside(expr) - new_expr = insert_first_arg(expr, replacement) + new_expr = call_with_context(expr, replacement) if !aside replacement = gensym() - new_expr = :(local $replacement = $new_expr) + new_expr = :(local $replacement = Kezdi.Context($new_expr, Symbol[], Set{Symbol}())) else new_expr = :(display($new_expr)) end @@ -267,18 +203,17 @@ function rewrite_with_block(block) reconvert_docstrings!(block_expressions) - # assign first line to first gensym variable - firstvar = gensym() + local_context = gensym() + replacement = local_context rewritten_exprs = [] - replacement = firstvar did_first = false for expr in block_expressions # could be an expression first or a LineNumberNode, so a bit convoluted - # we just do the firstvar transformation for the first non LineNumberNode + # we just do the local_context transformation for the first non LineNumberNode # we encounter if !(did_first || expr isa LineNumberNode) - expr = :(local $firstvar = $expr) + expr = :(local $local_context = Kezdi.Context($expr, Symbol[], Set{Symbol}())) did_first = true push!(rewritten_exprs, expr) continue @@ -287,6 +222,7 @@ function rewrite_with_block(block) rewritten, replacement = rewrite(expr, replacement) push!(rewritten_exprs, rewritten) end + result = Expr(:block, rewritten_exprs..., replacement) :($(esc(result))) diff --git a/src/codegen.jl b/src/codegen.jl index 5ef6bf7..d631d34 100644 --- a/src/codegen.jl +++ b/src/codegen.jl @@ -1,4 +1,6 @@ function generate_command(command::Command; options=[], allowed=[]) + df = :(Kezdi.context[].df) + df2 = gensym() sdf = gensym() gdf = gensym() @@ -28,8 +30,8 @@ function generate_command(command::Command; options=[], allowed=[]) (opt in allowed) || ArgumentError("Invalid option \"$opt\" for this command: @$(command.command)") |> throw end - push!(setup, :(Kezdi.context[].df isa AbstractDataFrame || error("Kezdi.jl commands can only operate on a global DataFrame set by setdf()"))) - push!(setup, :(local $df2 = copy(Kezdi.context[].df))) + push!(setup, :($df isa AbstractDataFrame || error("Kezdi.jl commands can only operate on a DataFrame"))) + push!(setup, :(local $df2 = copy($df))) variables_condition = (:ifable in options) ? vcat(extract_variable_references(command.condition)...) : Symbol[] variables_RHS = (:variables in options) ? vcat(extract_variable_references.(command.arguments)...) : Symbol[] variables = vcat(variables_condition, variables_RHS) @@ -89,7 +91,6 @@ function get_option(command::Command, key::Symbol) end end - function get_top_symbol(expr::Any) if expr isa Expr return get_top_symbol(expr.args[1]) diff --git a/src/commands.jl b/src/commands.jl index 6eef5a2..83b5d5c 100644 --- a/src/commands.jl +++ b/src/commands.jl @@ -9,7 +9,7 @@ function rewrite(::Val{:rename}, command::Command) ArgumentError("Syntax is @rename oldname newname") |> throw else $setup - rename!($local_copy, $arguments[1] => $arguments[2]) |> $teardown |> setdf + rename!($local_copy, $arguments[1] => $arguments[2]) |> $teardown end end |> esc end @@ -26,7 +26,7 @@ function rewrite(::Val{:generate}, command::Command) $setup $local_copy[!, $target_column] .= missing $target_df[!, $target_column] .= $RHS - $local_copy |> $teardown |> setdf + $local_copy |> $teardown end end |> esc end @@ -53,7 +53,7 @@ function rewrite(::Val{:replace}, command::Command) else $target_df[!, $target_column] .= $RHS end - $local_copy |> $teardown |> setdf + $local_copy |> $teardown end end |> esc end @@ -63,7 +63,7 @@ function rewrite(::Val{:keep}, command::Command) (; local_copy, target_df, setup, teardown, arguments, options) = gc quote $setup - $target_df[!, isempty($(command.arguments)) ? eval(:(:)) : collect($command.arguments)] |> $teardown |> setdf + $target_df[!, isempty($(command.arguments)) ? eval(:(:)) : collect($command.arguments)] |> $teardown end |> esc end @@ -73,13 +73,13 @@ function rewrite(::Val{:drop}, command::Command) if isnothing(command.condition) return quote $setup - select($local_copy, Not(collect($(command.arguments)))) |> $teardown |> setdf + select($local_copy, Not(collect($(command.arguments)))) |> $teardown end |> esc end bitmask = build_bitmask(local_copy, command.condition) return quote $setup - $local_copy[.!($bitmask), :] |> $teardown |> setdf + $local_copy[.!($bitmask), :] |> $teardown end |> esc end @@ -89,7 +89,7 @@ function rewrite(::Val{:collapse}, command::Command) combine_epxression = Expr(:call, :combine, target_df, build_assignment_formula.(command.arguments)...) quote $setup - $combine_epxression |> $teardown |> setdf + $combine_epxression |> $teardown end |> esc end @@ -104,7 +104,7 @@ function rewrite(::Val{:egen}, command::Command) else $setup $transform_expression - $local_copy |> $teardown |> setdf + $local_copy |> $teardown end end |> esc end @@ -116,7 +116,7 @@ function rewrite(::Val{:sort}, command::Command) desc = :desc in get_top_symbol.(options) ? true : false quote $setup - sort($target_df, $columns, rev=$desc) |> $teardown |> setdf + sort($target_df, $columns, rev=$desc) |> $teardown end |> esc end @@ -180,7 +180,7 @@ function rewrite(::Val{:order}, command::Command) cols = pushfirst!(cols, target_cols...) end - $target_df[!,cols]|> $teardown |> setdf + $target_df[!,cols]|> $teardown end |> esc end diff --git a/src/functions.jl b/src/functions.jl index 6864b04..2b82327 100644 --- a/src/functions.jl +++ b/src/functions.jl @@ -7,13 +7,6 @@ Return the data frame set in the current scope. """ getdf() = Kezdi.context[].df -""" - setdf(df::Union{AbstractDataFrame, Nothing}) - -Return a Kezdi.Context with the DataFrame set. -""" -setdf(df::Union{AbstractDataFrame, Nothing}) = Context(df, Kezdi.context[].scalars, Kezdi.context[].flags) - display_and_return(x) = (display(x); x) """ From 0c93feede09d507047c604469e8ca46121385440 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikl=C3=B3s=20Koren?= Date: Sun, 14 Jul 2024 15:35:35 +0200 Subject: [PATCH 06/11] Separate runtime and compile time context --- src/Kezdi.jl | 5 +++-- src/With.jl | 6 +++--- src/codegen.jl | 2 +- src/functions.jl | 2 +- src/structs.jl | 9 ++++++--- 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/Kezdi.jl b/src/Kezdi.jl index 9387994..5a62998 100644 --- a/src/Kezdi.jl +++ b/src/Kezdi.jl @@ -4,7 +4,7 @@ Kezdi.jl is a Julia package for data manipulation and analysis. It is inspired b module Kezdi export @generate, @replace, @egen, @collapse, @keep, @drop, @summarize, @regress, use, @use, @tabulate, @count, @sort, @order, getdf, setdf, @list, @head, @tail, @names, @rename -export display_and_return, keep_only_values, rowcount, distinct, _Kezdi_local_context +export display_and_return, keep_only_values, rowcount, distinct using Reexport using Logging @@ -32,6 +32,7 @@ include("side_effects.jl") include("With.jl") @reexport using .With: @with, @with! -context = ScopedValues.ScopedValue(Context()) +runtime_context = ScopedValues.ScopedValue(RuntimeContext(nothing)) +compile_context = ScopedValues.ScopedValue(CompileContext()) end # module diff --git a/src/With.jl b/src/With.jl index 993a63c..1b81bd8 100644 --- a/src/With.jl +++ b/src/With.jl @@ -12,7 +12,7 @@ end function call_with_context(e::Expr, firstarg) - :(Kezdi.ScopedValues.@with Kezdi.context => $firstarg $e) + :(Kezdi.ScopedValues.@with Kezdi.runtime_context => $firstarg $e) end function insertionerror(expr) @@ -46,7 +46,7 @@ function rewrite(expr, replacement) new_expr = call_with_context(expr, replacement) if !aside replacement = gensym() - new_expr = :(local $replacement = Kezdi.Context($new_expr, Symbol[], Set{Symbol}())) + new_expr = :(local $replacement = Kezdi.RuntimeContext($new_expr)) else new_expr = :(display($new_expr)) end @@ -213,7 +213,7 @@ function rewrite_with_block(block) # we just do the local_context transformation for the first non LineNumberNode # we encounter if !(did_first || expr isa LineNumberNode) - expr = :(local $local_context = Kezdi.Context($expr, Symbol[], Set{Symbol}())) + expr = :(local $local_context = Kezdi.RuntimeContext($expr)) did_first = true push!(rewritten_exprs, expr) continue diff --git a/src/codegen.jl b/src/codegen.jl index d631d34..1ca34a0 100644 --- a/src/codegen.jl +++ b/src/codegen.jl @@ -1,5 +1,5 @@ function generate_command(command::Command; options=[], allowed=[]) - df = :(Kezdi.context[].df) + df = :(Kezdi.runtime_context[].df) df2 = gensym() sdf = gensym() diff --git a/src/functions.jl b/src/functions.jl index 2b82327..53dac2d 100644 --- a/src/functions.jl +++ b/src/functions.jl @@ -5,7 +5,7 @@ use(fname::AbstractString) = readstat(fname) |> DataFrame |> setdf Return the data frame set in the current scope. """ -getdf() = Kezdi.context[].df +getdf() = Kezdi.runtime_context[].df display_and_return(x) = (display(x); x) diff --git a/src/structs.jl b/src/structs.jl index 5968a5a..3e9645d 100644 --- a/src/structs.jl +++ b/src/structs.jl @@ -19,12 +19,15 @@ struct GeneratedCommand options::Vector{Any} end -struct Context - df::Any +struct CompileContext scalars::Vector{Symbol} flags::Set{Symbol} end -Context() = Context(nothing, Symbol[], DEFAULT_FLAGS) +CompileContext() = CompileContext(Symbol[], DEFAULT_FLAGS) + +struct RuntimeContext + df::Any +end using DataFrames using Statistics From 29fc11238047f91f55d0ebd0a0872c6e231f6a67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikl=C3=B3s=20Koren?= Date: Sun, 14 Jul 2024 16:57:40 +0200 Subject: [PATCH 07/11] refactor: Pass df as ScopedValue called Kezdi.runtime_context 373/374 tests pass Global df not implemented yet --- src/With.jl | 67 +++++++++++++++++++----------------------------- src/functions.jl | 1 + 2 files changed, 27 insertions(+), 41 deletions(-) diff --git a/src/With.jl b/src/With.jl index 1b81bd8..7ec2775 100644 --- a/src/With.jl +++ b/src/With.jl @@ -11,50 +11,31 @@ function is_aside(x::Expr)::Bool end -function call_with_context(e::Expr, firstarg) - :(Kezdi.ScopedValues.@with Kezdi.runtime_context => $firstarg $e) -end - -function insertionerror(expr) - error( - """Can't insert a first argument into: - $expr. - - First argument insertion works with expressions like these, where [Module.SubModule.] is optional: - - [Module.SubModule.]func - [Module.SubModule.]func(args...) - [Module.SubModule.]func(args...; kwargs...) - [Module.SubModule.]@macro - [Module.SubModule.]@macro(args...) - @. [Module.SubModule.]func - """ - ) -end - -is_moduled_symbol(x) = false -function is_moduled_symbol(e::Expr) - e.head == :. && - length(e.args) == 2 && - (e.args[1] isa Symbol || is_moduled_symbol(e.args[1])) && - e.args[2] isa QuoteNode && - e.args[2].value isa Symbol +function call_with_context(e::Expr, firstarg; assignment = false) + head = e.head + args = e.args + # set assignment = true and rerun with right hand side + if !assignment && head == :(=) && length(args) == 2 + if !(args[1] isa Symbol) + error("You can only use assignment syntax with a Symbol as a variable name, not $(args[1]).") + end + variable = args[1] + righthandside = call_with_context(args[2], firstarg; assignment = true) + return :($variable = $righthandside) + end + :(Kezdi.ScopedValues.@with Kezdi.runtime_context => Kezdi.RuntimeContext($firstarg) $e) end function rewrite(expr, replacement) aside = is_aside(expr) new_expr = call_with_context(expr, replacement) - if !aside - replacement = gensym() - new_expr = :(local $replacement = Kezdi.RuntimeContext($new_expr)) - else - new_expr = :(display($new_expr)) - end + replacement = gensym() + new_expr = :(local $replacement = $new_expr) - (new_expr, replacement) + (new_expr, replacement, aside) end -rewrite(l::LineNumberNode, replacement) = (l, replacement) +rewrite(l::LineNumberNode, replacement) = (l, replacement, true) function rewrite_with_block(firstpart, block) pushfirst!(block.args, firstpart) @@ -203,8 +184,9 @@ function rewrite_with_block(block) reconvert_docstrings!(block_expressions) - local_context = gensym() - replacement = local_context + local_value = gensym() + replaced_value = local_value + current_df = local_value rewritten_exprs = [] did_first = false @@ -213,17 +195,20 @@ function rewrite_with_block(block) # we just do the local_context transformation for the first non LineNumberNode # we encounter if !(did_first || expr isa LineNumberNode) - expr = :(local $local_context = Kezdi.RuntimeContext($expr)) + expr = :(local $local_value = $expr) did_first = true push!(rewritten_exprs, expr) continue end - rewritten, replacement = rewrite(expr, replacement) + rewritten, replaced_value, aside = rewrite(expr, current_df) push!(rewritten_exprs, rewritten) + if !aside + push!(rewritten_exprs, :(local $current_df = $replaced_value)) + end end - result = Expr(:block, rewritten_exprs..., replacement) + result = Expr(:block, rewritten_exprs..., replaced_value) :($(esc(result))) end diff --git a/src/functions.jl b/src/functions.jl index 53dac2d..dd343f4 100644 --- a/src/functions.jl +++ b/src/functions.jl @@ -6,6 +6,7 @@ use(fname::AbstractString) = readstat(fname) |> DataFrame |> setdf Return the data frame set in the current scope. """ getdf() = Kezdi.runtime_context[].df +setdf(df::Union{AbstractDataFrame, Nothing}) = Kezdi.runtime_context[].df = df display_and_return(x) = (display(x); x) From dbd4fbfb26feed9e947de25ebe21b4417c03138d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikl=C3=B3s=20Koren?= Date: Sun, 14 Jul 2024 18:03:18 +0200 Subject: [PATCH 08/11] Introduce a global_runtime_context, as a fallback if runtime_context does not exist (WIP) TODO: this also needs to be modified --- src/Kezdi.jl | 1 + src/codegen.jl | 2 +- src/functions.jl | 10 ++++++++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Kezdi.jl b/src/Kezdi.jl index 5a62998..1961b93 100644 --- a/src/Kezdi.jl +++ b/src/Kezdi.jl @@ -34,5 +34,6 @@ include("With.jl") @reexport using .With: @with, @with! runtime_context = ScopedValues.ScopedValue(RuntimeContext(nothing)) compile_context = ScopedValues.ScopedValue(CompileContext()) +global_runtime_context = RuntimeContext(nothing) end # module diff --git a/src/codegen.jl b/src/codegen.jl index 1ca34a0..bab6053 100644 --- a/src/codegen.jl +++ b/src/codegen.jl @@ -1,5 +1,5 @@ function generate_command(command::Command; options=[], allowed=[]) - df = :(Kezdi.runtime_context[].df) + df = :(getdf()) df2 = gensym() sdf = gensym() diff --git a/src/functions.jl b/src/functions.jl index dd343f4..603fa5b 100644 --- a/src/functions.jl +++ b/src/functions.jl @@ -5,8 +5,14 @@ use(fname::AbstractString) = readstat(fname) |> DataFrame |> setdf Return the data frame set in the current scope. """ -getdf() = Kezdi.runtime_context[].df -setdf(df::Union{AbstractDataFrame, Nothing}) = Kezdi.runtime_context[].df = df +getdf() = Kezdi.runtime_context[].df isa Nothing ? Kezdi.global_runtime_context.df : Kezdi.runtime_context[].df + +""" + setdf(df::AbstractDataFrame) + +Set the data frame in the global scope. +""" +setdf(df::Union{AbstractDataFrame, Nothing}) = Kezdi.global_runtime_context = RuntimeContext(df) display_and_return(x) = (display(x); x) From 1d478dd509534c67c719bb641c9c5923ba1bac57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikl=C3=B3s=20Koren?= Date: Sun, 14 Jul 2024 21:07:23 +0200 Subject: [PATCH 09/11] bugfix: if global context is used, save global df --- src/Kezdi.jl | 4 ++-- src/codegen.jl | 12 ++++++++---- src/functions.jl | 9 +++++---- src/structs.jl | 2 ++ test/commands.jl | 2 +- 5 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/Kezdi.jl b/src/Kezdi.jl index 1961b93..30d2df9 100644 --- a/src/Kezdi.jl +++ b/src/Kezdi.jl @@ -32,8 +32,8 @@ include("side_effects.jl") include("With.jl") @reexport using .With: @with, @with! -runtime_context = ScopedValues.ScopedValue(RuntimeContext(nothing)) +runtime_context = ScopedValues.ScopedValue(RuntimeContext(nothing, true)) compile_context = ScopedValues.ScopedValue(CompileContext()) -global_runtime_context = RuntimeContext(nothing) +global_runtime_context = RuntimeContext(nothing, true) end # module diff --git a/src/codegen.jl b/src/codegen.jl index bab6053..4d9f805 100644 --- a/src/codegen.jl +++ b/src/codegen.jl @@ -1,9 +1,8 @@ function generate_command(command::Command; options=[], allowed=[]) - df = :(getdf()) - df2 = gensym() sdf = gensym() gdf = gensym() + context = gensym() setup = Expr[] teardown = Expr[] process = (x -> x) @@ -30,8 +29,11 @@ function generate_command(command::Command; options=[], allowed=[]) (opt in allowed) || ArgumentError("Invalid option \"$opt\" for this command: @$(command.command)") |> throw end - push!(setup, :($df isa AbstractDataFrame || error("Kezdi.jl commands can only operate on a DataFrame"))) - push!(setup, :(local $df2 = copy($df))) + push!(setup, quote + local $context = Kezdi.get_runtime_context() + $context.df isa AbstractDataFrame || error("Kezdi.jl commands can only operate on a DataFrame") + local $df2 = copy($context.df) + end) variables_condition = (:ifable in options) ? vcat(extract_variable_references(command.condition)...) : Symbol[] variables_RHS = (:variables in options) ? vcat(extract_variable_references.(command.arguments)...) : Symbol[] variables = vcat(variables_condition, variables_RHS) @@ -73,6 +75,8 @@ function generate_command(command::Command; options=[], allowed=[]) end push!(setup, quote function $tdfunction(x) + # add global dataframe save here + $context.inplace && setdf($target_df) $(Expr(:block, teardown...)) x end diff --git a/src/functions.jl b/src/functions.jl index 603fa5b..2bdff0b 100644 --- a/src/functions.jl +++ b/src/functions.jl @@ -1,18 +1,19 @@ use(fname::AbstractString) = readstat(fname) |> DataFrame |> setdf """ - getdf() -> AbstractDataFrame + get_runtime_context() -> RuntimeContext -Return the data frame set in the current scope. +Return the current runtime context. This can be passed on as a ScopedValue or set as a global. """ -getdf() = Kezdi.runtime_context[].df isa Nothing ? Kezdi.global_runtime_context.df : Kezdi.runtime_context[].df +get_runtime_context() = Kezdi.runtime_context[].df isa Nothing ? Kezdi.global_runtime_context : Kezdi.runtime_context[] +getdf() = get_runtime_context().df """ setdf(df::AbstractDataFrame) Set the data frame in the global scope. """ -setdf(df::Union{AbstractDataFrame, Nothing}) = Kezdi.global_runtime_context = RuntimeContext(df) +setdf(df::Union{AbstractDataFrame, Nothing}) = Kezdi.global_runtime_context = RuntimeContext(df, true) display_and_return(x) = (display(x); x) diff --git a/src/structs.jl b/src/structs.jl index 3e9645d..1c439c2 100644 --- a/src/structs.jl +++ b/src/structs.jl @@ -27,7 +27,9 @@ CompileContext() = CompileContext(Symbol[], DEFAULT_FLAGS) struct RuntimeContext df::Any + inplace::Bool end +RuntimeContext(df) = RuntimeContext(df, false) using DataFrames using Statistics diff --git a/test/commands.jl b/test/commands.jl index 991c3a8..329f0a1 100644 --- a/test/commands.jl +++ b/test/commands.jl @@ -704,5 +704,5 @@ end @test nrow(getdf()) == 3 @drop @if x == 1 - @test nrow(getdf()) == 2 + @test nrow(getdf()) == 1 end \ No newline at end of file From baf5c26697870fe6583c83526e9b06ce14c2926d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikl=C3=B3s=20Koren?= Date: Mon, 15 Jul 2024 16:54:40 +0200 Subject: [PATCH 10/11] Add compile time context 375/375 --- src/With.jl | 10 ++++++++-- src/functions.jl | 1 + src/structs.jl | 4 +++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/With.jl b/src/With.jl index 7ec2775..13923b9 100644 --- a/src/With.jl +++ b/src/With.jl @@ -177,6 +177,8 @@ function flatten_to_single_block(args...) end function rewrite_with_block(block) + current_context = Kezdi.get_compile_context() + local line_number = current_context.line_number block_expressions = block.args isempty(block_expressions) || (length(block_expressions) == 1 && block_expressions[] isa LineNumberNode) && @@ -200,8 +202,12 @@ function rewrite_with_block(block) push!(rewritten_exprs, expr) continue end - - rewritten, replaced_value, aside = rewrite(expr, current_df) + + if expr isa LineNumberNode + line_number = expr.line + end + + rewritten, replaced_value, aside = Kezdi.ScopedValues.@with Kezdi.compile_context => Kezdi.CompileContext(current_context.scalars, current_context.flags, true, line_number) rewrite(expr, current_df) push!(rewritten_exprs, rewritten) if !aside push!(rewritten_exprs, :(local $current_df = $replaced_value)) diff --git a/src/functions.jl b/src/functions.jl index 2bdff0b..b5bfa15 100644 --- a/src/functions.jl +++ b/src/functions.jl @@ -7,6 +7,7 @@ Return the current runtime context. This can be passed on as a ScopedValue or se """ get_runtime_context() = Kezdi.runtime_context[].df isa Nothing ? Kezdi.global_runtime_context : Kezdi.runtime_context[] getdf() = get_runtime_context().df +get_compile_context() = Kezdi.compile_context[] """ setdf(df::AbstractDataFrame) diff --git a/src/structs.jl b/src/structs.jl index 1c439c2..421b6c1 100644 --- a/src/structs.jl +++ b/src/structs.jl @@ -22,8 +22,10 @@ end struct CompileContext scalars::Vector{Symbol} flags::Set{Symbol} + with_block::Bool + line_number::Int end -CompileContext() = CompileContext(Symbol[], DEFAULT_FLAGS) +CompileContext() = CompileContext(Symbol[], DEFAULT_FLAGS, false, 0) struct RuntimeContext df::Any From 86cb7325183989192bfb299c592f43c936ded6b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikl=C3=B3s=20Koren?= Date: Mon, 15 Jul 2024 17:17:19 +0200 Subject: [PATCH 11/11] Add another ScopedValue to macro expansion of @with to save compile-time context This does not seem to pass on to macros --- src/With.jl | 2 +- src/codegen.jl | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/With.jl b/src/With.jl index 13923b9..2d1a45f 100644 --- a/src/With.jl +++ b/src/With.jl @@ -23,7 +23,7 @@ function call_with_context(e::Expr, firstarg; assignment = false) righthandside = call_with_context(args[2], firstarg; assignment = true) return :($variable = $righthandside) end - :(Kezdi.ScopedValues.@with Kezdi.runtime_context => Kezdi.RuntimeContext($firstarg) $e) + :(Kezdi.ScopedValues.@with Kezdi.compile_context => $(Kezdi.get_compile_context()) Kezdi.runtime_context => Kezdi.RuntimeContext($firstarg) $e) end function rewrite(expr, replacement) diff --git a/src/codegen.jl b/src/codegen.jl index 4d9f805..e85b675 100644 --- a/src/codegen.jl +++ b/src/codegen.jl @@ -11,6 +11,9 @@ function generate_command(command::Command; options=[], allowed=[]) target_df = df2 given_options = get_top_symbol.(command.options) + current_context = Kezdi.get_compile_context() + @warn current_context + current_context.with_block && @warn "I am in a with block, line number is $(current_context.line_number)" # check for syntax if !(:ifable in options) && !isnothing(command.condition)