@@ -64,7 +64,9 @@ enum TokenType {
6464 NOT = "NOT" ,
6565 DOT = "DOT" , // .
6666 ARROW = "ARROW" , // → or ->
67+ ASSIGN = "ASSIGN" , // ← or <-
6768 SEMICOLON = "SEMICOLON" ,
69+ NEWLINE = "NEWLINE" , // line break (statement separator)
6870 EOF = "EOF" ,
6971}
7072
@@ -96,6 +98,7 @@ const UNICODE_OPS: Record<string, TokenType> = {
9698 "⋉" : TokenType . LEFTSEMIJOIN ,
9799 "⋊" : TokenType . RIGHTSEMIJOIN ,
98100 "▷" : TokenType . ANTIJOIN ,
101+ "←" : TokenType . ASSIGN ,
99102} ;
100103
101104// Keyword → TokenType map (case-insensitive)
@@ -134,7 +137,25 @@ function tokenize(input: string): Token[] {
134137 let i = 0 ;
135138
136139 while ( i < input . length ) {
137- // Skip whitespace
140+ // Skip comments (lines starting with --)
141+ if ( input [ i ] === "-" && i + 1 < input . length && input [ i + 1 ] === "-" ) {
142+ while ( i < input . length && input [ i ] !== "\n" ) i ++ ;
143+ continue ;
144+ }
145+
146+ // Emit newlines as statement separators, skip other whitespace
147+ if ( input [ i ] === "\n" || input [ i ] === "\r" ) {
148+ // Skip consecutive newlines and emit at most one NEWLINE token
149+ while ( i < input . length && ( input [ i ] === "\n" || input [ i ] === "\r" ) ) i ++ ;
150+ // Only emit if there are tokens before this (not leading newlines)
151+ // and the last token isn't already a newline/semicolon
152+ if ( tokens . length > 0 &&
153+ tokens [ tokens . length - 1 ] . type !== TokenType . NEWLINE &&
154+ tokens [ tokens . length - 1 ] . type !== TokenType . SEMICOLON ) {
155+ tokens . push ( { type : TokenType . NEWLINE , value : "\\n" , pos : i } ) ;
156+ }
157+ continue ;
158+ }
138159 if ( / \s / . test ( input [ i ] ) ) {
139160 i ++ ;
140161 continue ;
@@ -168,6 +189,11 @@ function tokenize(input: string): Token[] {
168189 continue ;
169190 }
170191
192+ // Assignment: <- (must be checked before comparison operators)
193+ if ( ch === "<" && i + 1 < input . length && input [ i + 1 ] === "-" ) {
194+ tokens . push ( { type : TokenType . ASSIGN , value : "<-" , pos } ) ; i += 2 ; continue ;
195+ }
196+
171197 // Comparison operators
172198 if ( ch === "=" ) { tokens . push ( { type : TokenType . EQ , value : "=" , pos } ) ; i ++ ; continue ; }
173199 if ( ch === "<" && i + 1 < input . length && input [ i + 1 ] === ">" ) {
@@ -324,6 +350,11 @@ export class RAError extends Error {
324350
325351// ─── Parser ─────────────────────────────────────────────────────────────────
326352
353+ interface RAProgram {
354+ assignments : { name : string ; expr : RANode } [ ] ;
355+ result : RANode ;
356+ }
357+
327358class Parser {
328359 private tokens : Token [ ] ;
329360 private pos : number ;
@@ -367,15 +398,44 @@ class Parser {
367398 // unary_expr = (σ[cond] | π[cols] | ρ[mappings] | γ[...] | τ[...] | δ) unary_expr | primary
368399 // primary = IDENTIFIER | '(' expr ')'
369400
370- parse ( ) : RANode {
371- const node = this . parseUnionExpr ( ) ;
372- if ( this . peek ( ) . type === TokenType . SEMICOLON ) {
373- this . advance ( ) ;
374- }
375- if ( this . peek ( ) . type !== TokenType . EOF ) {
376- throw new RAError ( `Unexpected token '${ this . peek ( ) . value } ' at position ${ this . peek ( ) . pos } ` ) ;
401+ parse ( ) : RAProgram {
402+ const assignments : { name : string ; expr : RANode } [ ] = [ ] ;
403+
404+ // Skip leading newlines
405+ while ( this . peek ( ) . type === TokenType . NEWLINE ) this . advance ( ) ;
406+
407+ while ( this . peek ( ) . type !== TokenType . EOF ) {
408+ // Try to detect assignment: IDENTIFIER (← | <-) expr
409+ if ( this . peek ( ) . type === TokenType . IDENTIFIER ) {
410+ const saved = this . pos ;
411+ const name = this . advance ( ) . value ;
412+ if ( this . peek ( ) . type === TokenType . ASSIGN ) {
413+ this . advance ( ) ; // consume ← / <-
414+ const expr = this . parseUnionExpr ( ) ;
415+ assignments . push ( { name, expr } ) ;
416+ // Consume statement separator (newline, semicolon, or EOF)
417+ while ( this . peek ( ) . type === TokenType . SEMICOLON || this . peek ( ) . type === TokenType . NEWLINE ) {
418+ this . advance ( ) ;
419+ }
420+ continue ;
421+ }
422+ // Not an assignment — backtrack and parse as expression
423+ this . pos = saved ;
424+ }
425+
426+ // Parse the final result expression
427+ const result = this . parseUnionExpr ( ) ;
428+ // Consume trailing separators
429+ while ( this . peek ( ) . type === TokenType . SEMICOLON || this . peek ( ) . type === TokenType . NEWLINE ) {
430+ this . advance ( ) ;
431+ }
432+ if ( this . peek ( ) . type !== TokenType . EOF ) {
433+ throw new RAError ( `Unexpected token '${ this . peek ( ) . value } ' at position ${ this . peek ( ) . pos } ` ) ;
434+ }
435+ return { assignments, result } ;
377436 }
378- return node ;
437+
438+ throw new RAError ( "Empty expression" ) ;
379439 }
380440
381441 private parseUnionExpr ( ) : RANode {
@@ -977,6 +1037,26 @@ function nodeToSQL(node: RANode, db?: DatabaseHandle): string {
9771037 }
9781038}
9791039
1040+ function programToSQL ( program : RAProgram , db ?: DatabaseHandle ) : string {
1041+ if ( program . assignments . length === 0 ) {
1042+ return nodeToSQL ( program . result , db ) ;
1043+ }
1044+
1045+ // Use CTEs (WITH clauses) for assignments
1046+ const ctes = program . assignments . map ( a => {
1047+ const sql = nodeToSQL ( a . expr , db ) ;
1048+ // Wrap non-table expressions in SELECT * FROM (...) for CTE compatibility
1049+ const wrappedSQL = / ^ \w + $ / . test ( sql ) ? `SELECT * FROM ${ sql } ` : sql ;
1050+ return `${ a . name } AS (${ wrappedSQL } )` ;
1051+ } ) ;
1052+
1053+ const resultSQL = nodeToSQL ( program . result , db ) ;
1054+ // Wrap bare table reference in SELECT for the final expression
1055+ const wrappedResult = / ^ \w + $ / . test ( resultSQL ) ? `SELECT * FROM ${ resultSQL } ` : resultSQL ;
1056+
1057+ return `WITH ${ ctes . join ( ", " ) } ${ wrappedResult } ` ;
1058+ }
1059+
9801060// ─── Public API ─────────────────────────────────────────────────────────────
9811061
9821062/**
@@ -1018,8 +1098,8 @@ export function raToSQL(input: string, db?: DatabaseHandle): string {
10181098 subqueryCounter = 0 ;
10191099 const tokens = tokenize ( input ) ;
10201100 const parser = new Parser ( tokens ) ;
1021- const ast = parser . parse ( ) ;
1022- return nodeToSQL ( ast , db ) ;
1101+ const program = parser . parse ( ) ;
1102+ return programToSQL ( program , db ) ;
10231103}
10241104
10251105// Re-export for potential future use (e.g., AST visualization)
0 commit comments