diff --git a/src/parser/commands_data.jl b/src/parser/commands_data.jl index a03364f..16e88d3 100644 --- a/src/parser/commands_data.jl +++ b/src/parser/commands_data.jl @@ -107,7 +107,43 @@ combining_accents = [ ] punctuation_symbols = split(raw", ; . !") -delimiter_symbols = split(raw"| / ( ) [ ] < >") +delimiter_symbols = split(raw"| / \ ( ) [ ] ⟨ ⟩ ‖ ⌈ ⌉ ⌊ ⌋ ⌜ ⌝ ⌞ ⌟") + ## NOTE `<` and `>` not included in `delimiter_symbols` because + ## they should not be used as such. In math mode, they are relation symbols and + ## we don't want to change that by overwriting the `symbol_to_canonical` entries. + ## **However**, `\left<` and `\right>` should work to produce `\left\langle` and + ## `\right\rangle`, respectively, due to intercepting `delimiter(...)`. + ## NOTE `{` and `}` not included because they are group delimiters that the tokenizer + ## should recognize as `lcurly` and `rcurly`. + ## The symbols have to be typed by command. + +## some delimiter symbols can also be typed with commands; +### instead of relying on `get_symbol_char` (like with `space_commands` in `commands_registration.jl`) +### we define them explicitly, because `latex_symbols` misses some +delimiter_commands = Dict( + raw"\vert" => '|', + raw"\slash" => '/', # NOTE seems to work with LaTeX, but not MathJax + raw"\backslash" => '\\', + raw"\lbrack" => '[', + raw"\rbrack" => ']', + raw"\langle" => '⟨', + raw"\rangle" => '⟩', + raw"\|" => '‖', + raw"\Vert" => '‖', + raw"\lceil" => '⌈', + raw"\rceil" => '⌉', + raw"\lfloor" => '⌊', + raw"\lfloor" => '⌊', + raw"\ulcorner" => '⌜', + raw"\urcorner" => '⌝', + raw"\llcorner" => '⌞', + raw"\lrcorner" => '⌟', + raw"\{" => '{', + raw"\}" => '}', + raw"\lbrace" => '{', + raw"\rbrace" => '}', +) + font_names = split(raw"rm cal it tt sf bf default bb frak scr regular") # TODO Add to the parser what come below, if needed diff --git a/src/parser/commands_registration.jl b/src/parser/commands_registration.jl index dea53cf..78bd571 100644 --- a/src/parser/commands_registration.jl +++ b/src/parser/commands_registration.jl @@ -58,8 +58,6 @@ const command_definitions = Dict( raw"\frac" => (TeXExpr(:frac), 2), raw"\sqrt" => (TeXExpr(:sqrt), 1), raw"\overline" => (TeXExpr(:overline), 1), - raw"\{" => (TeXExpr(:delimiter, '{'), 0), - raw"\}" => (TeXExpr(:delimiter, '}'), 0), raw"\_" => (TeXExpr(:symbol, '_'), 0), raw"\%" => (TeXExpr(:symbol, '%'), 0), raw"\$" => (TeXExpr(:symbol, '$'), 0), @@ -150,11 +148,20 @@ for symbol in punctuation_symbols symbol_to_canonical[symbol] = TeXExpr(:punctuation, symbol) end +# Delimiters for symbol in delimiter_symbols symbol = first(symbol) symbol_to_canonical[symbol] = TeXExpr(:delimiter, symbol) end +for (com_str, symbol) in pairs(delimiter_commands) + delim_expr = TeXExpr(:delimiter, symbol) + if !haskey(symbol_to_canonical, symbol) + symbol_to_canonical[symbol] = delim_expr + end + command_definitions[com_str] = (delim_expr, 0) +end + ## ## Default behavior ## diff --git a/src/parser/parser.jl b/src/parser/parser.jl index f2194aa..0759323 100644 --- a/src/parser/parser.jl +++ b/src/parser/parser.jl @@ -106,7 +106,13 @@ end function delimiter(com_str, str) str = str[length(com_str)+1:end] if length(str) == 1 - return TeXExpr(:delimiter, only(str)) + char = only(str) + if char == '<' + char = '⟨' + elseif char == '>' + char = '⟩' + end + return TeXExpr(:delimiter, char) else return only(texparse(str).args) end diff --git a/src/parser/tokenizer.jl b/src/parser/tokenizer.jl index 9619622..8029e1a 100644 --- a/src/parser/tokenizer.jl +++ b/src/parser/tokenizer.jl @@ -1,3 +1,4 @@ +const token_command = re"\\[a-zA-Z]+" | re"\\." tex_tokens = [ :char => re".", :primes => re"'+", @@ -5,9 +6,9 @@ tex_tokens = [ :underscore => re"_", :rcurly => re"}", :lcurly => re"{", - :command => re"\\[a-zA-Z]+" | re"\\.", - :right => re"\\right.", - :left => re"\\left.", + :command => token_command, + :right => re"\\right." | re"\\right" * token_command, + :left => re"\\left." | re"\\left" * token_command, :newline => (re"\\" * re"\\") | re"\\n", :dollar => re"$" ] diff --git a/test/parser.jl b/test/parser.jl index 19c59a0..d3a2d59 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -55,6 +55,65 @@ end @test_throws TeXParseError texparse(raw"\left( x") @test_throws TeXParseError texparse(raw"x \right)") + + ## all `delimiter_symbols` + test_parse(raw"|", (:delimiter, '|')) + test_parse(raw"/", (:delimiter, '/')) + test_parse(raw"\\", (:delimiter, '\\')) + test_parse(raw"(", (:delimiter, '(')) + test_parse(raw")", (:delimiter, ')')) + test_parse(raw"[", (:delimiter, '[')) + test_parse(raw"]", (:delimiter, ']')) + test_parse(raw"⟨", (:delimiter, '⟨')) + test_parse(raw"⟩", (:delimiter, '⟩')) + test_parse(raw"‖", (:delimiter, '‖')) + test_parse(raw"⌈", (:delimiter, '⌈')) + test_parse(raw"⌉", (:delimiter, '⌉')) + test_parse(raw"⌊", (:delimiter, '⌊')) + test_parse(raw"⌋", (:delimiter, '⌋')) + test_parse(raw"⌜", (:delimiter, '⌜')) + test_parse(raw"⌝", (:delimiter, '⌝')) + test_parse(raw"⌞", (:delimiter, '⌞')) + test_parse(raw"⌟", (:delimiter, '⌟')) + + test_parse(raw"<", (:space, '<')) # formerly, this was a delimiter + test_parse(raw">", (:space, '>')) + + ## all `delimiter_commands` + test_parse(raw"\vert", (:delimiter, '|')) + test_parse(raw"\slash", (:delimiter, '/')) + test_parse(raw"\backslash", (:delimiter, '\\')) + test_parse(raw"\lbrack", (:delimiter, '[')) + test_parse(raw"\rbrack", (:delimiter, ']')) + test_parse(raw"\langle", (:delimiter, '⟨')) + test_parse(raw"\rangle", (:delimiter, '⟩')) + test_parse(raw"\|", (:delimiter, '‖')) + test_parse(raw"\Vert", (:delimiter, '‖')) + test_parse(raw"\lceil", (:delimiter, '⌈')) + test_parse(raw"\rceil", (:delimiter, '⌉')) + test_parse(raw"\lfloor", (:delimiter, '⌊')) + test_parse(raw"\lfloor", (:delimiter, '⌊')) + test_parse(raw"\ulcorner", (:delimiter, '⌜')) + test_parse(raw"\urcorner", (:delimiter, '⌝')) + test_parse(raw"\llcorner", (:delimiter, '⌞')) + test_parse(raw"\lrcorner", (:delimiter, '⌟')) + test_parse(raw"\{", (:delimiter, '{')) + test_parse(raw"\}", (:delimiter, '}')) + test_parse(raw"\lbrace", (:delimiter, '{')) + test_parse(raw"\rbrace", (:delimiter, '}')) + + ### test commands as arguments to a delimited group + for (cmd_str, delim_symb) in pairs(MathTeXEngine.delimiter_commands) + ## NOTE this does not check for "correct" left right pairs like `\lbrack` and `\rbrack` + test_parse( + "\\left$(cmd_str)\\right$(cmd_str)", + (:delimited, + (:delimiter, delim_symb), + (:group,), + (:delimiter, delim_symb) + ) + ) + end end @testset "Fonts" begin