diff --git a/.gitignore b/.gitignore index 2934b7fe..0aa3bf2a 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ instruments*.trace *.cpu *.object *.dSYM +.vscode/ +vendor/ \ No newline at end of file diff --git a/dev.yml b/dev.yml new file mode 100644 index 00000000..7f2f8443 --- /dev/null +++ b/dev.yml @@ -0,0 +1,20 @@ +name: liquid-c + +type: ruby + +up: + - ruby: 3.1.1 + - bundler + +commands: + console: + desc: 'start a console' + run: bin/console + run: + desc: 'start the application' + run: bin/run + test: + syntax: + argument: file + optional: args... + run: bin/testunit diff --git a/ext/liquid_c/block.c b/ext/liquid_c/block.c index 5bd90070..aa69cd9c 100644 --- a/ext/liquid_c/block.c +++ b/ext/liquid_c/block.c @@ -1,12 +1,12 @@ #include "liquid.h" #include "block.h" #include "intutil.h" -#include "tokenizer.h" #include "stringutil.h" #include "vm.h" #include "variable.h" #include "context.h" #include "parse_context.h" +#include "expression.h" #include "vm_assembler.h" #include @@ -15,6 +15,7 @@ static ID intern_raise_missing_tag_terminator, intern_is_blank, intern_parse, + intern_new, intern_square_brackets, intern_unknown_tag_in_liquid_tag, intern_ivar_nodelist; @@ -22,17 +23,6 @@ static ID static VALUE tag_registry; static VALUE variable_placeholder = Qnil; -typedef struct tag_markup { - VALUE name; - VALUE markup; -} tag_markup_t; - -typedef struct parse_context { - tokenizer_t *tokenizer; - VALUE tokenizer_obj; - VALUE ruby_obj; -} parse_context_t; - static void ensure_body_compiled(const block_body_t *body) { if (!body->compiled) { @@ -192,7 +182,6 @@ static tag_markup_t internal_block_body_parse(block_body_t *body, parse_context_ case TOKEN_TAG: { const char *start = token.str_trimmed, *end = token.str_trimmed + token.len_trimmed; - // Imitate \s*(\w+)\s*(.*)? regex const char *name_start = read_while(start, end, rb_isspace); const char *name_end = read_while(name_start, end, is_id); @@ -222,11 +211,28 @@ static tag_markup_t internal_block_body_parse(block_body_t *body, parse_context_ break; } - VALUE tag_name = rb_enc_str_new(name_start, name_end - name_start, utf8_encoding); - VALUE tag_class = rb_funcall(tag_registry, intern_square_brackets, 1, tag_name); - const char *markup_start = read_while(name_end, end, rb_isspace); VALUE markup = rb_enc_str_new(markup_start, end - markup_start, utf8_encoding); + VALUE tag_name = rb_enc_str_new(name_start, name_end - name_start, utf8_encoding); + + if (name_len == 2 && strncmp(name_start, "if", 2) == 0) { + unknown_tag = parse_if_tag(markup, body, parse_context); + if (unknown_tag.name != Qnil) { + goto loop_break; + } + render_score_increment += 1; + body->as.intermediate.blank = false; + break; + } else if ( + (name_len == 5 && strncmp(name_start, "elsif", 5) == 0) + ||(name_len == 4 && strncmp(name_start, "else", 4) == 0) + || (name_len == 5 && strncmp(name_start, "endif", 5) == 0) + ) { + unknown_tag = (tag_markup_t) { tag_name, markup }; + goto loop_break; + } + + VALUE tag_class = rb_funcall(tag_registry, intern_square_brackets, 1, tag_name); if (tag_class == Qnil) { unknown_tag = (tag_markup_t) { tag_name, markup }; @@ -262,6 +268,110 @@ static tag_markup_t internal_block_body_parse(block_body_t *body, parse_context_ return unknown_tag; } +VALUE parse_single_binary_comparison(VALUE markup) { + if (NIL_P(markup)) + return Qnil; + + StringValue(markup); + char *start = RSTRING_PTR(markup); + + parser_t p; + init_parser(&p, start, start + RSTRING_LEN(markup)); + VALUE a = internal_expression_parse(&p); + lexer_token_t op = parser_consume(&p, TOKEN_COMPARISON); + + if(op.type) { + VALUE op_str = rb_enc_str_new(op.val, op.val_end - op.val, utf8_encoding); + VALUE b = internal_expression_parse(&p); + return rb_funcall(cLiquidCondition, intern_new, 3, a, op_str, b); + } + + return rb_funcall(cLiquidCondition, intern_new, 1, a); +} + +tag_markup_t parse_if_tag(VALUE markup, block_body_t *body, parse_context_t *parse_context) { + /* + 1 parse expression into condition object + 2 push OP_EVAL with condition object + 3 push OP_BRANCH_UNLESS with placeholder address + 4 recursively parse body + 5 on else/elsif + - push OP_BRANCH with placeholder address, this will make previous blocks jump to endif once they are done + - resolve the address for the previous OP_BRANCH_UNLESS + 6 on endif resolve the address for any OP_BRANCH/OP_BRANCH_UNLESS + */ + vm_assembler_t* body_code = body->as.intermediate.code; + VALUE condition_obj = parse_single_binary_comparison(markup); + vm_assembler_add_op_with_constant(body_code, condition_obj, OP_EVAL_CONDITION); + + ptrdiff_t exit_branches[10]; + ptrdiff_t* exit_start = exit_branches; + ptrdiff_t* exit_end = exit_branches; + + ptrdiff_t open_branch = vm_assembler_open_branch(body_code, OP_BRANCH_UNLESS); + ptrdiff_t jump; + + tag_markup_t unknown_tag; + + while(true) { + unknown_tag = internal_block_body_parse(body, parse_context); + + if(unknown_tag.name != Qnil) { + StringValue(unknown_tag.name); + char *name_start = RSTRING_PTR(unknown_tag.name); + int name_len = RSTRING_LEN(unknown_tag.name); + + if (name_len == 4 && strncmp(name_start, "else", 4) == 0) { + // Unconditionally branch to endif for the previous block + *exit_end++ = vm_assembler_open_branch(body_code, OP_BRANCH); + + // Calculate the offset that would jump to here, this is where the jumps to if it fails the condition. + jump = (ptrdiff_t) (body_code->instructions.data_end - body_code->instructions.data); + jump = jump - open_branch - 1; + + // Resolve the open branch from the with the calculated offset. + vm_assembler_close_branch(body_code, open_branch, jump); + open_branch = -1; + } else if(name_len == 5 && strncmp(name_start, "elsif", 5) == 0) { + // Unconditionally branch to endif for the previous block + *exit_end++ = vm_assembler_open_branch(body_code, OP_BRANCH); + + // Calculate the offset that would jump to here, this is where the jumps to if it fails the condition. + jump = (ptrdiff_t) (body_code->instructions.data_end - body_code->instructions.data); + jump = jump - open_branch - 1; + + // Resolve the open branch from the with the calculated offset. + vm_assembler_close_branch(body_code, open_branch, jump); + open_branch = -1; + + // Start a new condition eval and branch for the elsif. + condition_obj = parse_single_binary_comparison(unknown_tag.markup); + vm_assembler_add_op_with_constant(body_code, condition_obj, OP_EVAL_CONDITION); + open_branch = vm_assembler_open_branch(body_code, OP_BRANCH_UNLESS); + } else if(name_len == 5 && strncmp(name_start, "endif", 5) == 0) { + ptrdiff_t jump_dest = (ptrdiff_t) (body_code->instructions.data_end - body_code->instructions.data); + + // Resolve an open branch from an if/elsif. + if(open_branch != -1) { + jump = jump_dest - open_branch - 1; + vm_assembler_close_branch(body_code, open_branch, jump); + } + + // Resolve all the open uncoditional branches. + while(exit_start < exit_end) { + jump = jump_dest - *exit_start - 1; + vm_assembler_close_branch(body_code, *exit_start, jump); + exit_start++; + } + + return (tag_markup_t) { Qnil, Qnil }; + } else { + return unknown_tag; + } + } + } +} + static void ensure_intermediate(block_body_t *body) { if (body->compiled) { @@ -537,6 +647,7 @@ void liquid_define_block_body(void) intern_raise_missing_tag_terminator = rb_intern("raise_missing_tag_terminator"); intern_is_blank = rb_intern("blank?"); intern_parse = rb_intern("parse"); + intern_new = rb_intern("new"); intern_square_brackets = rb_intern("[]"); intern_unknown_tag_in_liquid_tag = rb_intern("unknown_tag_in_liquid_tag"); intern_ivar_nodelist = rb_intern("@nodelist"); diff --git a/ext/liquid_c/block.h b/ext/liquid_c/block.h index b85cbf8b..b326a5b0 100644 --- a/ext/liquid_c/block.h +++ b/ext/liquid_c/block.h @@ -1,6 +1,7 @@ #if !defined(LIQUID_BLOCK_H) #define LIQUID_BLOCK_H +#include "tokenizer.h" #include "document_body.h" #include "vm_assembler_pool.h" @@ -23,7 +24,19 @@ typedef struct block_body { } as; } block_body_t; +typedef struct parse_context { + tokenizer_t *tokenizer; + VALUE tokenizer_obj; + VALUE ruby_obj; +} parse_context_t; + +typedef struct tag_markup { + VALUE name; + VALUE markup; +} tag_markup_t; + void liquid_define_block_body(void); +tag_markup_t parse_if_tag(VALUE markup, block_body_t *body, parse_context_t *parse_context); static inline uint8_t *block_body_instructions_ptr(block_body_header_t *body) { diff --git a/ext/liquid_c/expression.c b/ext/liquid_c/expression.c index db7f99b7..5b4ac1b9 100644 --- a/ext/liquid_c/expression.c +++ b/ext/liquid_c/expression.c @@ -40,7 +40,7 @@ VALUE expression_new(VALUE klass, expression_t **expression_ptr) return obj; } -static VALUE internal_expression_parse(parser_t *p) +VALUE internal_expression_parse(parser_t *p) { if (p->cur.type == TOKEN_EOS) return Qnil; diff --git a/ext/liquid_c/expression.h b/ext/liquid_c/expression.h index 77a4521d..a9da133d 100644 --- a/ext/liquid_c/expression.h +++ b/ext/liquid_c/expression.h @@ -16,6 +16,7 @@ extern const rb_data_type_t expression_data_type; void liquid_define_expression(void); +VALUE internal_expression_parse(parser_t *p); VALUE expression_new(VALUE klass, expression_t **expression_ptr); VALUE expression_evaluate(VALUE self, VALUE context); VALUE internal_expression_evaluate(expression_t *expression, VALUE context); diff --git a/ext/liquid_c/extconf.rb b/ext/liquid_c/extconf.rb index 14e33265..cfef7ddb 100755 --- a/ext/liquid_c/extconf.rb +++ b/ext/liquid_c/extconf.rb @@ -1,9 +1,10 @@ # frozen_string_literal: true require "mkmf" -$CFLAGS << " -std=c11 -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers" +$CFLAGS << " -std=c11 -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -g" append_cflags("-fvisibility=hidden") # In Ruby 2.6 and earlier, the Ruby headers did not have struct timespec defined +RbConfig::MAKEFILE_CONFIG['CC'] = ENV['CC'] if ENV['CC'] valid_headers = RbConfig::CONFIG["host_os"] !~ /linux/ || Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.7") pedantic = !ENV["LIQUID_C_PEDANTIC"].to_s.empty? if pedantic && valid_headers diff --git a/ext/liquid_c/liquid.c b/ext/liquid_c/liquid.c index b8d505a8..e6e6f8cd 100644 --- a/ext/liquid_c/liquid.c +++ b/ext/liquid_c/liquid.c @@ -22,7 +22,7 @@ ID id_call; ID id_compile_evaluate; ID id_ivar_line_number; -VALUE mLiquid, mLiquidC, cLiquidVariable, cLiquidTemplate, cLiquidBlockBody; +VALUE mLiquid, mLiquidC, cLiquidVariable, cLiquidTemplate, cLiquidBlockBody, cLiquidCondition; VALUE cLiquidVariableLookup, cLiquidRangeLookup; VALUE cLiquidArgumentError, cLiquidSyntaxError, cMemoryError; @@ -70,6 +70,9 @@ RUBY_FUNC_EXPORTED void Init_liquid_c(void) cLiquidBlockBody = rb_const_get(mLiquid, rb_intern("BlockBody")); rb_global_variable(&cLiquidBlockBody); + cLiquidCondition = rb_const_get(mLiquid, rb_intern("Condition")); + rb_global_variable(&cLiquidCondition); + cLiquidVariableLookup = rb_const_get(mLiquid, rb_intern("VariableLookup")); rb_global_variable(&cLiquidVariableLookup); diff --git a/ext/liquid_c/liquid.h b/ext/liquid_c/liquid.h index 8c9f223b..45962c2b 100644 --- a/ext/liquid_c/liquid.h +++ b/ext/liquid_c/liquid.h @@ -12,7 +12,7 @@ extern ID id_call; extern ID id_compile_evaluate; extern ID id_ivar_line_number; -extern VALUE mLiquid, mLiquidC, cLiquidVariable, cLiquidTemplate, cLiquidBlockBody; +extern VALUE mLiquid, mLiquidC, cLiquidVariable, cLiquidTemplate, cLiquidBlockBody, cLiquidCondition; extern VALUE cLiquidVariableLookup, cLiquidRangeLookup; extern VALUE cLiquidArgumentError, cLiquidSyntaxError, cMemoryError; extern rb_encoding *utf8_encoding; diff --git a/ext/liquid_c/vm.c b/ext/liquid_c/vm.c index 744fdb85..c42543e7 100644 --- a/ext/liquid_c/vm.c +++ b/ext/liquid_c/vm.c @@ -362,6 +362,36 @@ static VALUE vm_render_until_error(VALUE uncast_args) resource_limits_increment_write_score(vm->context.resource_limits, output); break; } + + case OP_EVAL_CONDITION: + { + constant_index = (ip[0] << 8) | ip[1]; + constant = constants[constant_index]; + ip += 2; + VALUE condition_eval = rb_funcall(constant, id_evaluate, 1, vm->context.self); + vm_stack_push(vm, RTEST(condition_eval)); + break; + } + + case OP_BRANCH_UNLESS: + { + VALUE condition_truthy = vm_stack_pop(vm); + if(!condition_truthy) { + constant_index = (ip[0] << 8) | ip[1]; + ip += constant_index; + break; + } + ip += 2; + break; + } + + case OP_BRANCH: + { + constant_index = (ip[0] << 8) | ip[1]; + ip += constant_index; + break; + } + case OP_JUMP_FWD_W: { size_t size = bytes_to_uint24(ip); @@ -465,6 +495,9 @@ void liquid_vm_next_instruction(const uint8_t **ip_ptr) ip++; break; + case OP_BRANCH: + case OP_BRANCH_UNLESS: + case OP_EVAL_CONDITION: case OP_BUILTIN_FILTER: case OP_PUSH_INT16: case OP_PUSH_CONST: diff --git a/ext/liquid_c/vm_assembler.c b/ext/liquid_c/vm_assembler.c index 12d66f1a..d942f3d1 100644 --- a/ext/liquid_c/vm_assembler.c +++ b/ext/liquid_c/vm_assembler.c @@ -141,6 +141,24 @@ VALUE vm_assembler_disassemble(const uint8_t *start_ip, const uint8_t *end_ip, c rb_str_catf(output, "lookup_key\n"); break; + case OP_EVAL_CONDITION: + rb_str_catf(output, "eval_condition\n"); + break; + + case OP_BRANCH_UNLESS: + { + int num = (ip[1] << 8) | ip[2]; + rb_str_catf(output, "branch_unless %04x\n", num); + break; + } + + case OP_BRANCH: + { + int num = (ip[1] << 8) | ip[2]; + rb_str_catf(output, "branch %04x\n", num); + break; + } + case OP_NEW_INT_RANGE: rb_str_catf(output, "new_int_range\n"); break; @@ -466,8 +484,19 @@ void vm_assembler_add_filter_from_ruby(vm_assembler_t *code, VALUE filter_name, vm_assembler_add_filter(code, filter_name, arg_count); } +ptrdiff_t vm_assembler_open_branch(vm_assembler_t *code, enum opcode op) +{ + ptrdiff_t index = (ptrdiff_t) (code->instructions.data_end - code->instructions.data); + uint8_t *instructions = c_buffer_extend_for_write(&code->instructions, 3); + instructions[0] = op; + instructions[1] = 0; + instructions[2] = 0; + return index; +} + bool vm_assembler_opcode_has_constant(uint8_t ip) { if ( + ip == OP_EVAL_CONDITION || ip == OP_PUSH_CONST || ip == OP_WRITE_NODE || ip == OP_FIND_STATIC_VAR || diff --git a/ext/liquid_c/vm_assembler.h b/ext/liquid_c/vm_assembler.h index 638f7f8c..72c16621 100644 --- a/ext/liquid_c/vm_assembler.h +++ b/ext/liquid_c/vm_assembler.h @@ -31,6 +31,9 @@ enum opcode { OP_WRITE_RAW, OP_JUMP_FWD_W, OP_JUMP_FWD, + OP_EVAL_CONDITION, + OP_BRANCH_UNLESS, + OP_BRANCH }; typedef struct { @@ -72,6 +75,7 @@ void vm_assembler_add_lookup_key_from_ruby(vm_assembler_t *code, VALUE code_obj, void vm_assembler_add_new_int_range_from_ruby(vm_assembler_t *code); void vm_assembler_add_hash_new_from_ruby(vm_assembler_t *code, VALUE hash_size_obj); void vm_assembler_add_filter_from_ruby(vm_assembler_t *code, VALUE filter_name, VALUE arg_count_obj); +ptrdiff_t vm_assembler_open_branch(vm_assembler_t *code, enum opcode op); bool vm_assembler_opcode_has_constant(uint8_t ip); @@ -237,4 +241,12 @@ static inline void vm_assembler_add_render_variable_rescue(vm_assembler_t *code, uint24_to_bytes((unsigned int)node_line_number, &instructions[1]); } +static inline void vm_assembler_close_branch(vm_assembler_t *code, ptrdiff_t index, uint16_t jump) +{ + uint8_t* branch_instruction = code->instructions.data + index; + branch_instruction[1] = jump >> 8; + branch_instruction[2] = (uint8_t) jump; +} + + #endif diff --git a/test.rb b/test.rb new file mode 100644 index 00000000..a22f8684 --- /dev/null +++ b/test.rb @@ -0,0 +1,27 @@ +require "bundler/setup" +require "liquid/c" + +@template = Liquid::Template.parse( +" +normal raw text + +{% if name == 'match' %} + text matched from if + {% if name == 'frank' %} + 1 text matched from nested if + {% elsif name == 'match' %} + 1 text matched from nested elsif + {% endif -%} +{% else %} + 1 text matched from else +{% endif -%} +{% if name == 'frank' %} + 2 text matched from if +{% else %} + 2 text matched from else +{%- endif %} + +normal raw text +", line_numbers: false) +puts @template.render({'name' => 'match'}) +puts @template.root.body.disassemble