From 61304d3fb6c89af53321ac1111905d645e31e183 Mon Sep 17 00:00:00 2001
From: sdasda7777 <17746796+sdasda7777@users.noreply.github.com>
Date: Thu, 19 Oct 2023 19:10:33 +0200
Subject: [PATCH 1/3] First attempt at multiline comments

---
 build-jar.sh                                  |  10 +-
 src/rars/assembler/Assembler.java             |  48 +++---
 src/rars/assembler/Tokenizer.java             | 150 +++++++++++++-----
 src/rars/riscv/Instruction.java               |   2 +-
 .../tokenmarker/RISCVTokenMarker.java         |  53 +++++++
 5 files changed, 196 insertions(+), 67 deletions(-)
diff --git a/build-jar.sh b/build-jar.sh
index e072edfb..ee874232 100755
--- a/build-jar.sh
+++ b/build-jar.sh
@@ -1,7 +1,14 @@
 #!/bin/bash
-if git submodule status | grep \( > /dev/null ; then 
+if git submodule status | grep \( > /dev/null ; then
+    # Create build folder
     mkdir -p build
+    # Compile src/*.java files
     find src -name "*.java" | xargs javac -d build
+    # Exit if compilation error happened
+    if [ $? -ne 0 ]; then
+        exit 1
+    fi
+    # Copy src/*.java files to build/
     if [[ "$OSTYPE" == "darwin"* ]]; then
         find src -type f -not -name "*.java" -exec rsync -R {} build \;
     else
@@ -11,6 +18,7 @@ if git submodule status | grep \( > /dev/null ; then
     rm -r build/src
     cp README.md License.txt build
     cd build
+    # Create .jar file from build/
     jar cfm ../rars.jar ./META-INF/MANIFEST.MF *
 else
     echo "It looks like JSoftFloat is not cloned. Consider running \"git submodule update --init\""
diff --git a/src/rars/assembler/Assembler.java b/src/rars/assembler/Assembler.java
index e4dc2164..db6c0a11 100644
--- a/src/rars/assembler/Assembler.java
+++ b/src/rars/assembler/Assembler.java
@@ -229,8 +229,9 @@ public ArrayList<ProgramStatement> assemble(ArrayList<RISCVprogram> tokenizedPro
                     ExtendedInstruction inst = (ExtendedInstruction) statement.getInstruction();
                     String basicAssembly = statement.getBasicAssemblyStatement();
                     int sourceLine = statement.getSourceLine();
-                    TokenList theTokenList = new Tokenizer().tokenizeLine(sourceLine,
-                            basicAssembly, errors, false);
+                    Tokenizer.TokenizationResult tr = new Tokenizer().tokenizeLine(
+                                    sourceLine, basicAssembly, errors, false, false);
+                    boolean inMultilineComment = tr.unterminatedMultilineComment;
 
                     // ////////////////////////////////////////////////////////////////////////////
                     // If we are using compact memory config and there is a compact expansion, use it
@@ -245,7 +246,7 @@ public ArrayList<ProgramStatement> assemble(ArrayList<RISCVprogram> tokenizedPro
                     for (int instrNumber = 0; instrNumber < templateList.size(); instrNumber++) {
                         String instruction = ExtendedInstruction.makeTemplateSubstitutions(
                                 this.fileCurrentlyBeingAssembled,
-                                templateList.get(instrNumber), theTokenList, PC);
+                                templateList.get(instrNumber), tr.tokenList, PC);
 
                         // All substitutions have been made so we have generated
                         // a valid basic instruction!
@@ -253,19 +254,21 @@ public ArrayList<ProgramStatement> assemble(ArrayList<RISCVprogram> tokenizedPro
                             System.out.println("PSEUDO generated: " + instruction);
                         // For generated instruction: tokenize, build program
                         // statement, add to list.
-                        TokenList newTokenList = new Tokenizer().tokenizeLine(sourceLine,
-                                instruction, errors, false);
-                        ArrayList<Instruction> instrMatches = this.matchInstruction(newTokenList.get(0));
-                        Instruction instr = OperandFormat.bestOperandMatch(newTokenList,
+                        Tokenizer.TokenizationResult newTR = new Tokenizer().tokenizeLine(sourceLine, instruction,
+                                                                                errors, false, inMultilineComment);
+
+                        ArrayList<Instruction> instrMatches = this.matchInstruction(newTR.tokenList.get(0));
+                        Instruction instr = OperandFormat.bestOperandMatch(newTR.tokenList,
                                 instrMatches);
                         // Only first generated instruction is linked to original source
                         ProgramStatement ps = new ProgramStatement(
                                 this.fileCurrentlyBeingAssembled,
-                                (instrNumber == 0) ? statement.getSource() : "", newTokenList,
-                                newTokenList, instr, textAddress.get(), statement.getSourceLine());
+                                (instrNumber == 0) ? statement.getSource() : "", newTR.tokenList,
+                                newTR.tokenList, instr, textAddress.get(), statement.getSourceLine());
                         textAddress.increment(Instruction.INSTRUCTION_LENGTH);
                         ps.buildBasicStatementFromBasicInstruction(errors);
                         machineList.add(ps);
+                        inMultilineComment = newTR.unterminatedMultilineComment;
                     } // end of FOR loop, repeated for each template in list.
                 } // end of ELSE part for extended instruction.
 
@@ -352,7 +355,7 @@ private ArrayList<ProgramStatement> parseLine(TokenList tokenList, String source
         ArrayList<ProgramStatement> ret = new ArrayList<>();
 
         ProgramStatement programStatement;
-        TokenList tokens = this.stripComment(tokenList);
+        TokenList tokens = this.stripComments(tokenList);
 
         // Labels should not be processed in macro definition segment.
         MacroPool macroPool = fileCurrentlyBeingAssembled.getLocalMacroPool();
@@ -413,16 +416,17 @@ private ArrayList<ProgramStatement> parseLine(TokenList tokenList, String source
                 for (int i = macro.getFromLine() + 1; i < macro.getToLine(); i++) {
 
                     String substituted = macro.getSubstitutedLine(i, tokens, counter, errors);
-                    TokenList tokenList2 = fileCurrentlyBeingAssembled.getTokenizer().tokenizeLine(
-                            i, substituted, errors);
+                    Tokenizer.TokenizationResult tokenizationResult2 = fileCurrentlyBeingAssembled.getTokenizer().tokenizeLine(
+                                                                        i, substituted, errors);
 
                     // If token list getProcessedLine() is not empty, then .eqv was performed and it contains the modified source.
                     // Put it into the line to be parsed, so it will be displayed properly in text segment display. DPS 23 Jan 2013
-                    if (tokenList2.getProcessedLine().length() > 0)
-                        substituted = tokenList2.getProcessedLine();
+                    if (tokenizationResult2.tokenList.getProcessedLine().length() > 0)
+                        substituted = tokenizationResult2.tokenList.getProcessedLine();
 
                     // recursively parse lines of expanded macro
-                    ArrayList<ProgramStatement> statements = parseLine(tokenList2, "<" + (i - macro.getFromLine() + macro.getOriginalFromLine()) + "> "
+                    ArrayList<ProgramStatement> statements = parseLine(tokenizationResult2.tokenList,
+                            "<" + (i - macro.getFromLine() + macro.getOriginalFromLine()) + "> "
                             + substituted.trim(), sourceLineNumber, extendedAssemblerEnabled);
                     if (statements != null)
                         ret.addAll(statements);
@@ -511,17 +515,19 @@ private void detectLabels(TokenList tokens, Macro current) {
     // Pre-process the token list for a statement by stripping off any comment.
     // NOTE: the ArrayList parameter is not modified; a new one is cloned and
     // returned.
-    private TokenList stripComment(TokenList tokenList) {
+    private TokenList stripComments(TokenList tokenList) {
         if (tokenList.isEmpty())
             return tokenList;
         TokenList tokens = (TokenList) tokenList.clone();
-        // If there is a comment, strip it off.
-        int last = tokens.size() - 1;
-        if (tokens.get(last).getType() == TokenTypes.COMMENT) {
-            tokens.remove(last);
+        // Remove all comments
+        for (int ii = 0; ii < tokens.size(); ++ii) {
+            if (tokens.get(ii).getType() == TokenTypes.COMMENT) {
+                tokens.remove(ii);
+                --ii;
+            }
         }
         return tokens;
-    } // stripComment()
+    } // stripComments()
 
     /**
      * Pre-process the token list for a statement by stripping off any label, if
diff --git a/src/rars/assembler/Tokenizer.java b/src/rars/assembler/Tokenizer.java
index d6ffd30f..6c4b125f 100644
--- a/src/rars/assembler/Tokenizer.java
+++ b/src/rars/assembler/Tokenizer.java
@@ -52,6 +52,14 @@ a copy of this software and associated documentation files (the
  **/
 
 public class Tokenizer {
+    public class TokenizationResult {
+        public TokenList tokenList;
+        public boolean unterminatedMultilineComment;
+        public TokenizationResult(TokenList tokenList, boolean unterminatedMultilineComment) {
+            this.tokenList = tokenList;
+            this.unterminatedMultilineComment = unterminatedMultilineComment;
+        }
+    }
 
     private ErrorList errors;
     private RISCVprogram sourceRISCVprogram;
@@ -95,19 +103,22 @@ public ArrayList<TokenList> tokenize(RISCVprogram p) throws AssemblyException {
         //ArrayList source = p.getSourceList();
         ArrayList<SourceLine> source = processIncludes(p, new HashMap<>()); // DPS 9-Jan-2013
         p.setSourceLineList(source);
-        TokenList currentLineTokens;
+        TokenizationResult currentLineResult = new TokenizationResult(null, false);
         String sourceLine;
         for (int i = 0; i < source.size(); i++) {
             sourceLine = source.get(i).getSource();
-            currentLineTokens = this.tokenizeLine(i + 1, sourceLine);
-            tokenList.add(currentLineTokens);
+            currentLineResult = this.tokenizeLine(
+                                    sourceRISCVprogram, i + 1, sourceLine, true,
+                                    currentLineResult.unterminatedMultilineComment);
+            tokenList.add(currentLineResult.tokenList);
             // DPS 03-Jan-2013. Related to 11-July-2012. If source code substitution was made
             // based on .eqv directive during tokenizing, the processed line, a String, is
             // not the same object as the original line.  Thus I can use != instead of !equals()
             // This IF statement will replace original source with source modified by .eqv substitution.
             // Not needed by assembler, but looks better in the Text Segment Display.
-            if (sourceLine.length() > 0 && sourceLine != currentLineTokens.getProcessedLine()) {
-                source.set(i, new SourceLine(currentLineTokens.getProcessedLine(), source.get(i).getRISCVprogram(), source.get(i).getLineNumber()));
+            if (sourceLine.length() > 0 && sourceLine != currentLineResult.tokenList.getProcessedLine()) {
+                source.set(i, new SourceLine(currentLineResult.tokenList.getProcessedLine(),
+                                             source.get(i).getRISCVprogram(), source.get(i).getLineNumber()));
             }
         }
         if (errors.errorsOccurred()) {
@@ -127,9 +138,11 @@ public ArrayList<TokenList> tokenize(RISCVprogram p) throws AssemblyException {
     private ArrayList<SourceLine> processIncludes(RISCVprogram program, Map<String, String> inclFiles) throws AssemblyException {
         ArrayList<String> source = program.getSourceList();
         ArrayList<SourceLine> result = new ArrayList<>(source.size());
+        boolean inMultilineComment = false;
         for (int i = 0; i < source.size(); i++) {
             String line = source.get(i);
-            TokenList tl = tokenizeLine(program, i + 1, line, false);
+            TokenizationResult tr = tokenizeLine(program, i + 1, line, false, inMultilineComment);
+            TokenList tl = tr.tokenList;
             boolean hasInclude = false;
             for (int ii = 0; ii < tl.size(); ii++) {
                 if (tl.get(ii).getValue().equalsIgnoreCase(Directives.INCLUDE.getName())
@@ -167,6 +180,7 @@ private ArrayList<SourceLine> processIncludes(RISCVprogram program, Map<String,
             if (!hasInclude) {
                 result.add(new SourceLine(line, program, i + 1));//line);
             }
+            inMultilineComment = tr.unterminatedMultilineComment;
         }
         return result;
     }
@@ -182,8 +196,8 @@ private ArrayList<SourceLine> processIncludes(RISCVprogram program, Map<String,
      *                           contains one or more lexical (i.e. token) errors.
      **/
 
-    public TokenList tokenizeExampleInstruction(String example) throws AssemblyException {
-        TokenList result = tokenizeLine(sourceRISCVprogram, 0, example, false);
+    public TokenizationResult tokenizeExampleInstruction(String example) throws AssemblyException {
+        TokenizationResult result = tokenizeLine(sourceRISCVprogram, 0, example, false, false);
         if (errors.errorsOccurred()) {
             throw new AssemblyException(errors);
         }
@@ -218,8 +232,8 @@ public TokenList tokenizeExampleInstruction(String example) throws AssemblyExcep
     */
 
     // Modified for release 4.3, to preserve existing API.
-    public TokenList tokenizeLine(int lineNum, String theLine) {
-        return tokenizeLine(sourceRISCVprogram, lineNum, theLine, true);
+    public TokenizationResult tokenizeLine(int lineNum, String theLine) {
+        return tokenizeLine(sourceRISCVprogram, lineNum, theLine, true, false);
     }
 
     /**
@@ -232,12 +246,12 @@ public TokenList tokenizeLine(int lineNum, String theLine) {
      * @param callerErrorList errors will go into this list instead of tokenizer's list.
      * @return the generated token list for that line
      **/
-    public TokenList tokenizeLine(int lineNum, String theLine, ErrorList callerErrorList) {
+    public TokenizationResult tokenizeLine(int lineNum, String theLine, ErrorList callerErrorList) {
         ErrorList saveList = this.errors;
         this.errors = callerErrorList;
-        TokenList tokens = this.tokenizeLine(lineNum, theLine);
+        TokenizationResult tr = this.tokenizeLine(lineNum, theLine);
         this.errors = saveList;
-        return tokens;
+        return tr;
     }
 
 
@@ -252,12 +266,13 @@ public TokenList tokenizeLine(int lineNum, String theLine, ErrorList callerError
      * @param doEqvSubstitutes boolean param set true to perform .eqv substitutions, else false
      * @return the generated token list for that line
      **/
-    public TokenList tokenizeLine(int lineNum, String theLine, ErrorList callerErrorList, boolean doEqvSubstitutes) {
+    public TokenizationResult tokenizeLine(int lineNum, String theLine, ErrorList callerErrorList,
+                                           boolean doEqvSubstitutes, boolean startsAsMultilineComment) {
         ErrorList saveList = this.errors;
         this.errors = callerErrorList;
-        TokenList tokens = this.tokenizeLine(sourceRISCVprogram, lineNum, theLine, doEqvSubstitutes);
+        TokenizationResult tr = this.tokenizeLine(sourceRISCVprogram, lineNum, theLine, doEqvSubstitutes, startsAsMultilineComment);
         this.errors = saveList;
-        return tokens;
+        return tr;
     }
 
     /**
@@ -271,11 +286,13 @@ public TokenList tokenizeLine(int lineNum, String theLine, ErrorList callerError
      * @param doEqvSubstitutes boolean param set true to perform .eqv substitutions, else false
      * @return the generated token list for that line
      **/
-    public TokenList tokenizeLine(RISCVprogram program, int lineNum, String theLine, boolean doEqvSubstitutes) {
+    public TokenizationResult tokenizeLine(RISCVprogram program, int lineNum, String theLine,
+                                           boolean doEqvSubstitutes, boolean startsAsMultilineComment) {
         TokenTypes tokenType;
-        TokenList result = new TokenList();
+        TokenList resultList = new TokenList();
+        boolean currentlyInsideMultilineComment = startsAsMultilineComment;
         if (theLine.length() == 0)
-            return result;
+            return new TokenizationResult(resultList, currentlyInsideMultilineComment);
         // will be faster to work with char arrays instead of strings
         char c;
         char[] line = theLine.toCharArray();
@@ -289,31 +306,73 @@ public TokenList tokenizeLine(RISCVprogram program, int lineNum, String theLine,
         // Each iteration of this loop processes one character in the source line.
         while (linePos < line.length) {
             c = line[linePos];
+
             if (insideQuotedString) { // everything goes into token
                 token[tokenPos++] = c;
                 if (c == '"' && token[tokenPos - 2] != '\\') { // If quote not preceded by backslash, this is end
-                    this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, result);
+                    this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, resultList);
                     tokenPos = 0;
                     insideQuotedString = false;
                 }
+            } else if (currentlyInsideMultilineComment) {
+                // Try finding the closing "*/", otherwise return the whole line as comment
+                tokenStartPos = linePos;
+                tokenPos = line.length - linePos;
+
+                int last = line.length;
+                for (int ii = linePos + 2; ii + 1 < line.length; ++ii) {
+                    if (line[ii] == '*' && line[ii+1] == '/') {
+                        last = ii + 1;
+                        tokenPos = last - linePos - 2;
+                        currentlyInsideMultilineComment = false;
+                        break;
+                    }
+                }
+
+                resultList.add(new Token(TokenTypes.COMMENT, new String(line, tokenStartPos, tokenPos), program, lineNum, linePos));
+                linePos = last;
+                tokenPos = 0;
             } else { // not inside a quoted string, so be sensitive to delimiters
                 switch (c) {
                     case '#':  // # denotes comment that takes remainder of line
                         if (tokenPos > 0) {
-                            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, result);
+                            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, resultList);
                         }
                         tokenStartPos = linePos + 1;
                         tokenPos = line.length - linePos;
                         System.arraycopy(line, linePos, token, 0, tokenPos);
-                        this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, result);
+                        this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, resultList);
                         linePos = line.length;
                         tokenPos = 0;
                         break;
+                    case '/':  // '/' might be a start of block comment (/* comment */)
+                        if (line.length > linePos + 1 && line[linePos + 1] == '*') {
+
+                            tokenStartPos = linePos + 2;
+                            tokenPos = line.length - linePos - 2;
+                            currentlyInsideMultilineComment = true;
+
+                            // Try finding closing "*/"
+                            int last = line.length;
+                            for (int ii = linePos + 2; ii + 1 < line.length; ++ii) {
+                                if (line[ii] == '*' && line[ii+1] == '/') {
+                                    last = ii + 1;
+                                    tokenPos = last - linePos - 3;
+                                    currentlyInsideMultilineComment = false;
+                                    break;
+                                }
+                            }
+
+                            resultList.add(new Token(TokenTypes.COMMENT, new String(line, tokenStartPos, tokenPos), program, lineNum, linePos));
+                            linePos = last;
+                            tokenPos = 0;
+                            break;
+                        }
                     case ' ':
                     case '\t':
                     case ',': // space, tab or comma is delimiter
                         if (tokenPos > 0) {
-                            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, result);
+                            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, resultList);
                             tokenPos = 0;
                         }
                         break;
@@ -332,22 +391,22 @@ public TokenList tokenizeLine(RISCVprogram program, int lineNum, String theLine,
                         }
                         // End of REAL hack.
                         if (tokenPos > 0) {
-                            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, result);
+                            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, resultList);
                             tokenPos = 0;
                         }
 
                         tokenStartPos = linePos + 1;
                         token[tokenPos++] = c;
                         if (line.length > linePos + 3 && line[linePos + 1] == 'I' && line[linePos + 2] == 'n' && line[linePos + 3] == 'f') {
-                            result.add(new Token(TokenTypes.REAL_NUMBER, "-Inf", program, lineNum, tokenStartPos));
+                            resultList.add(new Token(TokenTypes.REAL_NUMBER, "-Inf", program, lineNum, tokenStartPos));
                             linePos += 3;
                             tokenPos = 0;
                             break;
                         }
-                        if (!((result.isEmpty() || result.get(result.size() - 1).getType() != TokenTypes.IDENTIFIER) &&
+                        if (!((resultList.isEmpty() || resultList.get(resultList.size() - 1).getType() != TokenTypes.IDENTIFIER) &&
                                 (line.length >= linePos + 2 && Character.isDigit(line[linePos + 1])))) {
                             // treat it as binary.....
-                            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, result);
+                            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, resultList);
                             tokenPos = 0;
                         }
                         break;
@@ -356,17 +415,17 @@ public TokenList tokenizeLine(RISCVprogram program, int lineNum, String theLine,
                     case '(':
                     case ')':
                         if (tokenPos > 0) {
-                            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, result);
+                            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, resultList);
                             tokenPos = 0;
                         }
                         tokenStartPos = linePos + 1;
                         token[tokenPos++] = c;
-                        this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, result);
+                        this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, resultList);
                         tokenPos = 0;
                         break;
                     case '"': // we're not inside a quoted string, so start a new token...
                         if (tokenPos > 0) {
-                            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, result);
+                            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, resultList);
                             tokenPos = 0;
                         }
                         tokenStartPos = linePos + 1;
@@ -375,7 +434,7 @@ public TokenList tokenizeLine(RISCVprogram program, int lineNum, String theLine,
                         break;
                     case '\'': // start of character constant (single quote).
                         if (tokenPos > 0) {
-                            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, result);
+                            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, resultList);
                             tokenPos = 0;
                         }
                         // Our strategy is to process the whole thing right now...
@@ -393,7 +452,7 @@ public TokenList tokenizeLine(RISCVprogram program, int lineNum, String theLine,
                         token[tokenPos++] = c; // grab third character, put it in token[2]
                         // Process if we've either reached second, non-escaped, quote or end of line.
                         if (c == '\'' && token[1] != '\\' || lookaheadChars == 2) {
-                            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, result);
+                            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, resultList);
                             tokenPos = 0;
                             tokenStartPos = linePos + 1;
                             break;
@@ -405,7 +464,7 @@ public TokenList tokenizeLine(RISCVprogram program, int lineNum, String theLine,
                         token[tokenPos++] = c; // grab fourth character, put it in token[3]
                         // Process, if this is ending quote for escaped character or if at end of line
                         if (c == '\'' || lookaheadChars == 3) {
-                            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, result);
+                            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, resultList);
                             tokenPos = 0;
                             tokenStartPos = linePos + 1;
                             break;
@@ -422,7 +481,7 @@ public TokenList tokenizeLine(RISCVprogram program, int lineNum, String theLine,
                             }
                         }
                         // process no matter what...we either have a valid character by now or not
-                        this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, result);
+                        this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, resultList);
                         tokenPos = 0;
                         tokenStartPos = linePos + 1;
                         break;
@@ -440,13 +499,14 @@ public TokenList tokenizeLine(RISCVprogram program, int lineNum, String theLine,
                 errors.add(new ErrorMessage(program, lineNum, tokenStartPos,
                         "String is not terminated."));
             }
-            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, result);
+            this.processCandidateToken(token, program, lineNum, theLine, tokenPos, tokenStartPos, resultList);
             tokenPos = 0;
         }
         if (doEqvSubstitutes) {
-            result = processEqv(program, lineNum, theLine, result); // DPS 11-July-2012
+            // DPS 11-July-2012
+            return processEqv(program, lineNum, theLine, new TokenizationResult(resultList, currentlyInsideMultilineComment), startsAsMultilineComment);
         }
-        return result;
+        return new TokenizationResult(resultList, currentlyInsideMultilineComment);
     }
 
     // Process the .eqv directive, which needs to be applied prior to tokenizing of subsequent statements.
@@ -455,10 +515,12 @@ public TokenList tokenizeLine(RISCVprogram program, int lineNum, String theLine,
     // contains a symbol that was previously defined in an .eqv directive, in which case
     // the substitution needs to be made.
     // DPS 11-July-2012
-    private TokenList processEqv(RISCVprogram program, int lineNum, String theLine, TokenList tokens) {
+    private TokenizationResult processEqv(RISCVprogram program, int lineNum, String theLine, TokenizationResult resultSoFar,
+                                            boolean startsAsMultilineComment) {
         // See if it is .eqv directive.  If so, record it...
         // Have to assure it is a well-formed statement right now (can't wait for assembler).
 
+        TokenList tokens = resultSoFar.tokenList;
         if (tokens.size() > 2 && (tokens.get(0).getType() == TokenTypes.DIRECTIVE || tokens.get(2).getType() == TokenTypes.DIRECTIVE)) {
             // There should not be a label but if there is, the directive is in token position 2 (ident, colon, directive).
             int dirPos = (tokens.get(0).getType() == TokenTypes.DIRECTIVE) ? 0 : 2;
@@ -469,13 +531,13 @@ private TokenList processEqv(RISCVprogram program, int lineNum, String theLine,
                 if (tokenPosLastOperand < dirPos + 2) {
                     errors.add(new ErrorMessage(program, lineNum, tokens.get(dirPos).getStartPos(),
                             "Too few operands for " + Directives.EQV.getName() + " directive"));
-                    return tokens;
+                    return resultSoFar;
                 }
                 // Token following the directive has to be IDENTIFIER
                 if (tokens.get(dirPos + 1).getType() != TokenTypes.IDENTIFIER) {
                     errors.add(new ErrorMessage(program, lineNum, tokens.get(dirPos).getStartPos(),
                             "Malformed " + Directives.EQV.getName() + " directive"));
-                    return tokens;
+                    return resultSoFar;
                 }
                 String symbol = tokens.get(dirPos + 1).getValue();
                 // Make sure the symbol is not contained in the expression.  Not likely to occur but if left
@@ -484,7 +546,7 @@ private TokenList processEqv(RISCVprogram program, int lineNum, String theLine,
                     if (tokens.get(i).getValue().equals(symbol)) {
                         errors.add(new ErrorMessage(program, lineNum, tokens.get(dirPos).getStartPos(),
                                 "Cannot substitute " + symbol + " for itself in " + Directives.EQV.getName() + " directive"));
-                        return tokens;
+                        return resultSoFar;
                     }
                 }
                 // Expected syntax is symbol, expression.  I'm allowing the expression to comprise
@@ -497,10 +559,10 @@ private TokenList processEqv(RISCVprogram program, int lineNum, String theLine,
                 if (equivalents.containsKey(symbol) && !equivalents.get(symbol).equals(expression)) {
                     errors.add(new ErrorMessage(program, lineNum, tokens.get(dirPos + 1).getStartPos(),
                             "\"" + symbol + "\" is already defined"));
-                    return tokens;
+                    return resultSoFar;
                 }
                 equivalents.put(symbol, expression);
-                return tokens;
+                return resultSoFar;
             }
         }
         // Check if a substitution from defined .eqv is to be made.  If so, make one.
@@ -518,7 +580,7 @@ private TokenList processEqv(RISCVprogram program, int lineNum, String theLine,
         }
         tokens.setProcessedLine(theLine); // DPS 03-Jan-2013. Related to changes of 11-July-2012.
 
-        return (substitutionMade) ? tokenizeLine(lineNum, theLine) : tokens;
+        return (substitutionMade) ? tokenizeLine(sourceRISCVprogram, lineNum, theLine, true, startsAsMultilineComment) : resultSoFar;
     }
 
 
diff --git a/src/rars/riscv/Instruction.java b/src/rars/riscv/Instruction.java
index caf9ac7c..72b0e3f1 100644
--- a/src/rars/riscv/Instruction.java
+++ b/src/rars/riscv/Instruction.java
@@ -145,7 +145,7 @@ protected String extractOperator(String example) {
      **/
     protected void createExampleTokenList() {
         try {
-            tokenList = ((new Tokenizer()).tokenizeExampleInstruction(exampleFormat));
+            tokenList = ((new Tokenizer()).tokenizeExampleInstruction(exampleFormat)).tokenList;
         } catch (AssemblyException pe) {
             System.out.println("CONFIGURATION ERROR: Instruction example \"" + exampleFormat + "\" contains invalid token(s).");
         }
diff --git a/src/rars/venus/editors/jeditsyntax/tokenmarker/RISCVTokenMarker.java b/src/rars/venus/editors/jeditsyntax/tokenmarker/RISCVTokenMarker.java
index 2602c6de..6849c181 100644
--- a/src/rars/venus/editors/jeditsyntax/tokenmarker/RISCVTokenMarker.java
+++ b/src/rars/venus/editors/jeditsyntax/tokenmarker/RISCVTokenMarker.java
@@ -141,6 +141,33 @@ public byte markTokensImpl(byte token, Segment line, int lineIndex) {
                                 break loop;
                             }
                             break;
+                        case '/': // '/' might be a start of block comment (/* comment */)
+                            if (length > i + 1 && array[i + 1] == '*') {
+                                backslash = false;
+                                doKeyword(line, i, c);
+
+                                if (length - i >= 1) {
+                                    // Try finding the end of this line's highlight
+                                    for (int jj = i + 2; jj < length; ++jj) {
+                                        if (jj + 1 < length && array[jj] == '*' && array[jj+1] == '/') {
+                                            addToken(i - lastOffset, token);
+                                            addToken(jj - i + 2, Token.COMMENT2);
+                                            lastOffset = lastKeyword = jj + 2;
+                                            token = Token.NULL;
+                                            break loop;
+                                        }
+                                        if (array[jj] == '\n' || jj + 1 >= length) {
+                                            addToken(i - lastOffset, token);
+                                            addToken(length - i, Token.COMMENT2);
+                                            lastOffset = lastKeyword = length;
+                                            token = Token.COMMENT2;
+                                            break loop;
+                                        }
+                                    }
+                                    break loop;
+                                }
+                                break;
+                            }
                         default:
                             backslash = false;
                             // . and $ added 4/6/10 DPS; % added 12/12 M.Sekhavat
@@ -150,6 +177,32 @@ public byte markTokensImpl(byte token, Segment line, int lineIndex) {
                             break;
                     }
                     break;
+                case Token.COMMENT2:
+                    // This means the previous line ended with an unterminated block comment
+                    backslash = false;
+                    doKeyword(line, i, c);
+
+                    if (length - i >= 1) {
+                        // Try finding the end of this line's highlight
+                        for (int jj = i + 2; jj < length; ++jj) {
+                            if (jj + 1 < length && array[jj] == '*' && array[jj+1] == '/') {
+                                addToken(i - lastOffset, token);
+                                addToken(jj - i + 2, Token.COMMENT2);
+                                lastOffset = lastKeyword = jj + 2;
+                                token = Token.NULL;
+                                break loop;
+                            }
+                            if (array[jj] == '\n' || jj + 1 >= length) {
+                                addToken(i - lastOffset, token);
+                                addToken(length - i, Token.COMMENT2);
+                                lastOffset = lastKeyword = length;
+                                token = Token.COMMENT2;
+                                break loop;
+                            }
+                        }
+                        break loop;
+                    }
+                    break;
                 case Token.LITERAL1:
                     if (backslash)
                         backslash = false;

From 1e4d627a210b1d413139ed9bbfef5fc1afc4c258 Mon Sep 17 00:00:00 2001
From: sdasda7777 <17746796+sdasda7777@users.noreply.github.com>
Date: Thu, 19 Oct 2023 22:16:50 +0200
Subject: [PATCH 2/3] Fixed bugs in the highlighting, added block comment color
 options to settings

---
 .../tokenmarker/RISCVTokenMarker.java         | 71 +++++++++----------
 1 file changed, 32 insertions(+), 39 deletions(-)

diff --git a/src/rars/venus/editors/jeditsyntax/tokenmarker/RISCVTokenMarker.java b/src/rars/venus/editors/jeditsyntax/tokenmarker/RISCVTokenMarker.java
index 6849c181..2732d615 100644
--- a/src/rars/venus/editors/jeditsyntax/tokenmarker/RISCVTokenMarker.java
+++ b/src/rars/venus/editors/jeditsyntax/tokenmarker/RISCVTokenMarker.java
@@ -43,6 +43,7 @@ public static String[] getRISCVTokenLabels() {
         if (tokenLabels == null) {
             tokenLabels = new String[Token.ID_COUNT];
             tokenLabels[Token.COMMENT1] = "Comment";
+            tokenLabels[Token.COMMENT2] = "Block comment";
             tokenLabels[Token.LITERAL1] = "String literal";
             tokenLabels[Token.LITERAL2] = "Character literal";
             tokenLabels[Token.LABEL] = "Label";
@@ -59,6 +60,7 @@ public static String[] getRISCVTokenExamples() {
         if (tokenExamples == null) {
             tokenExamples = new String[Token.ID_COUNT];
             tokenExamples[Token.COMMENT1] = "# Load";
+            tokenExamples[Token.COMMENT2] = "/* Save */";
             tokenExamples[Token.LITERAL1] = "\"First\"";
             tokenExamples[Token.LITERAL2] = "'\\n'";
             tokenExamples[Token.LABEL] = "main:";
@@ -142,31 +144,26 @@ public byte markTokensImpl(byte token, Segment line, int lineIndex) {
                             }
                             break;
                         case '/': // '/' might be a start of block comment (/* comment */)
-                            if (length > i + 1 && array[i + 1] == '*') {
+                            if (length >= i + 1 && array[i + 1] == '*') {
                                 backslash = false;
                                 doKeyword(line, i, c);
 
-                                if (length - i >= 1) {
-                                    // Try finding the end of this line's highlight
-                                    for (int jj = i + 2; jj < length; ++jj) {
-                                        if (jj + 1 < length && array[jj] == '*' && array[jj+1] == '/') {
-                                            addToken(i - lastOffset, token);
-                                            addToken(jj - i + 2, Token.COMMENT2);
-                                            lastOffset = lastKeyword = jj + 2;
-                                            token = Token.NULL;
-                                            break loop;
-                                        }
-                                        if (array[jj] == '\n' || jj + 1 >= length) {
-                                            addToken(i - lastOffset, token);
-                                            addToken(length - i, Token.COMMENT2);
-                                            lastOffset = lastKeyword = length;
-                                            token = Token.COMMENT2;
-                                            break loop;
-                                        }
+                                // Try finding the end of the comment
+                                for (int jj = i + 2; jj < length; ++jj) {
+                                    if (jj + 1 < length && array[jj] == '*' && array[jj+1] == '/') {
+                                        addToken(i - lastOffset, token);
+                                        addToken(jj - i + 2, Token.COMMENT2);
+                                        lastOffset = lastKeyword = jj + 2;
+                                        i = jj + 1;
+                                        continue loop;
                                     }
-                                    break loop;
                                 }
-                                break;
+                                // Otherwise highlight the rest of the line and continue
+                                addToken(i - lastOffset, token);
+                                addToken(length - i, Token.COMMENT2);
+                                i = lastOffset = lastKeyword = length;
+                                token = Token.COMMENT2;
+                                break loop;
                             }
                         default:
                             backslash = false;
@@ -182,27 +179,23 @@ public byte markTokensImpl(byte token, Segment line, int lineIndex) {
                     backslash = false;
                     doKeyword(line, i, c);
 
-                    if (length - i >= 1) {
-                        // Try finding the end of this line's highlight
-                        for (int jj = i + 2; jj < length; ++jj) {
-                            if (jj + 1 < length && array[jj] == '*' && array[jj+1] == '/') {
-                                addToken(i - lastOffset, token);
-                                addToken(jj - i + 2, Token.COMMENT2);
-                                lastOffset = lastKeyword = jj + 2;
-                                token = Token.NULL;
-                                break loop;
-                            }
-                            if (array[jj] == '\n' || jj + 1 >= length) {
-                                addToken(i - lastOffset, token);
-                                addToken(length - i, Token.COMMENT2);
-                                lastOffset = lastKeyword = length;
-                                token = Token.COMMENT2;
-                                break loop;
-                            }
+                    // Try finding the end of the comment
+                    for (int jj = i + 2; jj < length; ++jj) {
+                        if (jj + 1 < length && array[jj] == '*' && array[jj+1] == '/') {
+                            addToken(i - lastOffset, token);
+                            addToken(jj - i + 2, Token.COMMENT2);
+                            lastOffset = lastKeyword = jj + 2;
+                            i = jj + 1;
+                            token = Token.NULL;
+                            continue loop;
                         }
-                        break loop;
                     }
-                    break;
+                    // Otherwise highlight the whole line and continue
+                    addToken(i - lastOffset, token);
+                    addToken(length - i, Token.COMMENT2);
+                    i = lastOffset = lastKeyword = length;
+                    token = Token.COMMENT2;
+                    break loop;
                 case Token.LITERAL1:
                     if (backslash)
                         backslash = false;

From 769220bbe177212311a219e60fb7ae0f42d97d00 Mon Sep 17 00:00:00 2001
From: sdasda7777 <17746796+sdasda7777@users.noreply.github.com>
Date: Wed, 25 Oct 2023 14:09:56 +0200
Subject: [PATCH 3/3] Fixed a bug with multiline comments termination check

---
 src/rars/assembler/Tokenizer.java                             | 4 ++--
 .../editors/jeditsyntax/tokenmarker/RISCVTokenMarker.java     | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/rars/assembler/Tokenizer.java b/src/rars/assembler/Tokenizer.java
index 6c4b125f..031061ab 100644
--- a/src/rars/assembler/Tokenizer.java
+++ b/src/rars/assembler/Tokenizer.java
@@ -320,10 +320,10 @@ public TokenizationResult tokenizeLine(RISCVprogram program, int lineNum, String
                 tokenPos = line.length - linePos;
 
                 int last = line.length;
-                for (int ii = linePos + 2; ii + 1 < line.length; ++ii) {
+                for (int ii = linePos; ii + 1 < line.length; ++ii) {
                     if (line[ii] == '*' && line[ii+1] == '/') {
                         last = ii + 1;
-                        tokenPos = last - linePos - 2;
+                        tokenPos = last - linePos;
                         currentlyInsideMultilineComment = false;
                         break;
                     }
diff --git a/src/rars/venus/editors/jeditsyntax/tokenmarker/RISCVTokenMarker.java b/src/rars/venus/editors/jeditsyntax/tokenmarker/RISCVTokenMarker.java
index 2732d615..3482e787 100644
--- a/src/rars/venus/editors/jeditsyntax/tokenmarker/RISCVTokenMarker.java
+++ b/src/rars/venus/editors/jeditsyntax/tokenmarker/RISCVTokenMarker.java
@@ -180,7 +180,7 @@ public byte markTokensImpl(byte token, Segment line, int lineIndex) {
                     doKeyword(line, i, c);
 
                     // Try finding the end of the comment
-                    for (int jj = i + 2; jj < length; ++jj) {
+                    for (int jj = i; jj < length; ++jj) {
                         if (jj + 1 < length && array[jj] == '*' && array[jj+1] == '/') {
                             addToken(i - lastOffset, token);
                             addToken(jj - i + 2, Token.COMMENT2);