GoelandProver · jrosain · Jul 20, 2025 · Feb 7, 2026 · Feb 7, 2026 · Jul 20, 2025
diff --git a/.github/workflows/full-ci.yml b/.github/workflows/full-ci.yml
@@ -86,9 +86,19 @@ jobs:
       - uses: actions/checkout@v5
         with:
           ref: ${{ github.event.pull_request.head.sha }}
+      - uses: cachix/install-nix-action@v31
+        with:
+          nix_path: nixpkgs=channel:nixos-unstable
       - uses: actions/setup-go@v3
         with:
           go-version: ${{ env.goVersion }}
       - run: go install golang.org/x/tools/cmd/goyacc@latest
       - run: cd src && make
-      - run: cd devtools && make test-suite
+      - uses: rrbutani/use-nix-shell-action@v1
+        with:
+          file: shell.nix
+          script: |
+            cd devtools
+            make test-suite
+
+
diff --git a/devtools/run-test-suite.py b/devtools/run-test-suite.py
@@ -5,13 +5,15 @@
 import sys
 import re
 import difflib
+import shutil
 from subprocess import PIPE, run
 
 class Parser:
     ARGS = "% args: "
     RES = "% result: "
     ENV = "% env: "
     EXIT_CODE = "% exit: "
+    no_rocq_check = False
 
     def __init__(self, filename):
         self.filename = filename
@@ -20,6 +22,10 @@ def __init__(self, filename):
         self.parseEnv()
         self.parseExitCode()
 
+        args_avoid_chk = ["-proof", "-otptp", "-osctptp"]
+        if "no_chk" in filename or self.expectedExitCode != "" or any(arg in self.arguments for arg in args_avoid_chk) or self.expectedResult == "NOT VALID":
+            self.no_rocq_check = True
+
     def parseGen(self, pat):
         with open(self.filename) as f:
             for line in f.readlines():
@@ -43,23 +49,103 @@ def parseExitCode(self):
         self.expectedExitCode = self.parseGen(self.EXIT_CODE).strip()
 
     def getCommandLine(self):
-        return self.env + " ../src/_build/goeland " + self.arguments + " " + self.filename
+        arguments = self.arguments
+        if not self.no_rocq_check:
+            arguments += " -context -orocq"
+        return self.env + " ../src/_build/goeland " + arguments + " " + self.filename
+
+    def getArgsForPrinting(self):
+        rocq_chk_str = ""
+        if self.no_rocq_check:
+            rocq_chk_str = " (no Rocq check)"
+        return self.arguments + rocq_chk_str
 
 def sanitize(s):
     return s.encode('utf-8', errors='ignore').decode(errors='ignore')
 
-def runProver(command):
-    print(f"Launching: {command}")
+def runProver(f, command):
+    print(f"{f}\t{parser.getArgsForPrinting()}")
     result = run(command, stdout=PIPE, stderr=PIPE, universal_newlines=True, shell=True, encoding='utf-8')
     return (sanitize(result.stdout), sanitize(result.stderr), result.returncode)
 
+def getRelevantOutput(output):
+    relevantLines = []
+    ok = -1
+    for line in output.split("\n"):
+        if re.search("% SZS output start", line):
+            ok = 1
+        elif re.search("% SZS output end", line):
+            ok = -1
+        elif ok == 0:
+            if len(line.strip()) > 0:
+                relevantLines.append(line.strip())
+        else:
+            ok -= 1
+
+    return relevantLines
+
+def isExecutable(prog) :
+    return shutil.which(prog) is not None
+
+def getRocqCompiler() :
+    if isExecutable("rocq"):
+        return "rocq compile"
+    elif isExecutable("coqc"):
+        return "coqc"
+    else:
+        raise Exception("No Rocq executable found on the system")
+
+def makeRocqCheck(f, output):
+    rocq = getRocqCompiler()
+    rocq_lines = getRelevantOutput(output)
+    compile_success = False
+    filename = os.getcwd() + "/" + os.path.basename(f)[:-2].replace("-", "_")
+    with open(f"{filename}.v", "w") as tmp:
+        tmp.write("\n".join(rocq_lines))
+
+    result = run(f"{rocq} {filename}.v", stdout=PIPE, stderr=PIPE, universal_newlines=True, shell=True, encoding='utf-8')
+    compile_success = result.returncode == 0
+
+    try:
+        os.remove(f"{filename}.glob")
+    except FileNotFoundError:
+        pass
+
+    if not compile_success:
+        print(f"ROCQ compile has failed.")
+        print(f"Reason: {result.stderr}")
+        exit(1)
+    else:
+        try:
+            os.remove(f"{filename}.v")
+            os.remove(f"{filename}.vo")
+            os.remove(f"{filename}.vok")
+            os.remove(f"{filename}.vos")
+        except FileNotFoundError:
+            pass
+
 def runWithExpected(f, parser):
-    output, err, exit_code = runProver(parser.getCommandLine())
+    """
+    Runs Goéland on [f] using the parsed command line, then checks if the output corresponds to the expected one.
+    This function manages:
+    - error codes (e.g., it can detect whether Goéland exits with error code i
+    - results (e.g., VALID, NOT VALID).
+
+    If Goéland runs into an unexpected error, we report it. Moreover, if the kind of expected return is a VALID
+    result, we run Rocq to check that the proof is indeed valid (except for files in the no-chk folder).
+    """
+    output, err, exit_code = runProver(f, parser.getCommandLine())
 
     if err != "":
         print(f"Runtime error: {err}")
         exit(1)
 
+    if parser.expectedExitCode != '':
+        if int(parser.expectedExitCode) != exit_code:
+            print(f"Error: expected exit code '{parser.expectedExitCode}', got: '{exit_code}'")
+            exit(1)
+        else: return
+
     search = re.compile(".*% RES : (.*)$")
     for line in output.split("\n"):
         res = search.match(line)
@@ -69,37 +155,21 @@ def runWithExpected(f, parser):
                 print(f"Error: expected '{parser.expectedResult}', got: '{actual}'")
                 exit(1)
             else:
+                if parser.no_rocq_check: return
+                makeRocqCheck(f, output)
                 return
 
-    if parser.expectedExitCode != '':
-        if int(parser.expectedExitCode) != exit_code:
-            print(f"Error: expected exit code '{parser.expectedExitCode}', got: '{exit_code}'")
-            exit(1)
-        else: return
-
     print(f"Unknown error: got\n{output}")
     exit(1)
 
 def compareOutputs(f, parser):
-    output, err, _ = runProver(parser.getCommandLine())
+    output, err, exit_code = runProver(f, parser.getCommandLine())
 
-    if err != "":
+    if err != "" or exit_code != 0:
         print(f"Runtime error: {err}")
         exit(1)
 
-    relevantLines = []
-    ok = -1
-    for line in output.split("\n"):
-        if re.search("% SZS output start", line):
-            ok = 1
-        elif re.search("% SZS output end", line):
-            ok = -1
-        elif ok == 0:
-            if len(line.strip()) > 0:
-                relevantLines.append(line.strip())
-        else:
-            ok -= 1
-
+    relevantLines = getRelevantOutput(output)
     relevantExpectedLines = []
 
     with open(os.path.splitext(f)[0] + ".out") as f:
@@ -119,7 +189,7 @@ def compareOutputs(f, parser):
 
 if __name__ == "__main__":
     outputTest = ["-proof", "-otptp", "-osctptp"]
-    for f in glob.glob("test-suite/**/*.p"):
+    for f in sorted(glob.glob("test-suite/**/*.p")):
         parser = Parser(f)
 
         if any(out in parser.arguments for out in outputTest) :

diff --git a/devtools/test-suite/basic/test-typing-elab-1.p b/devtools/test-suite/basic/test-typing-elab-1.p
@@ -0,0 +1,7 @@
+% should fail because $lesseq is a defined function for $int, $rat and $real but is not polymorphic
+% for any type
+% exit: 1
+
+tff(ty_type, type, ty : $tType).
+
+tff(conj, conjecture, ! [A : ty] : $lesseq(A, A)).
diff --git a/devtools/test-suite/basic/test-typing-elab-2.p b/devtools/test-suite/basic/test-typing-elab-2.p
@@ -0,0 +1,7 @@
+% should fail because $quotient is a defined function for $int, $rat and $real but is not polymorphic
+% for any type
+% exit: 1
+
+tff(ty_type, type, ty : $tType).
+
+tff(conj, conjecture, ! [A : ty] : $lesseq($quotient(A, A), 1)).
diff --git a/devtools/test-suite/basic/test-typing-elab-3.p b/devtools/test-suite/basic/test-typing-elab-3.p
@@ -0,0 +1,4 @@
+% quotient is defined specifically for integers
+% result: NOT VALID
+
+tff(conj, conjecture, $lesseq($quotient(2, 2), 1)).
diff --git a/devtools/test-suite/basic/tf1_syntax_chk.p b/devtools/test-suite/basic/tf1_syntax_chk.p
@@ -1,5 +1,5 @@
-% TODO: args -one_step and result NOT_VALID once typing is working again (this is satisfiable)
-% exit: 3
+% args: -one_step
+% result: NOT VALID
 
 tff(beverage_type,type,
     beverage: $tType ).

diff --git a/devtools/test-suite/basic/tfa_syntax_chk.p b/devtools/test-suite/basic/tfa_syntax_chk.p
@@ -1,5 +1,5 @@
 % As arithmetic is not handled by Goeland, we simply check that parsing is OK
-% args: -ari -one_step -l 1
+% args: -one_step -l 1
 % result: NOT VALID
 
 tff(p_int_type,type,

diff --git a/devtools/test-suite/bugs/bug_52-1.p b/devtools/test-suite/bugs/bug_52-1.p
@@ -0,0 +1,3 @@
+% result: VALID
+
+fof(rocq_output_test,conjecture,! [X]: (p(X) | ~p(X))).
diff --git a/devtools/test-suite/bugs/bug_52-3.p b/devtools/test-suite/bugs/bug_52-3.p
@@ -0,0 +1,4 @@
+% tests for the presence of metavariables
+% result: VALID
+
+fof(rocq_output_test,conjecture,? [X]: (p(X) | ~p(X))).
diff --git a/devtools/test-suite/bugs/bug_53-1.p b/devtools/test-suite/bugs/bug_53-1.p
@@ -0,0 +1,33 @@
+% args: -one_step
+% exit: 1
+
+tff(list_type,type,
+	list: $tType > $tType ).
+
+tff(maybe_type,type,
+	maybe: $tType > $tType ).
+
+%----Polymorphic symbols
+tff(nil_type,type,
+	nil:
+	  !>[A: $tType] : list(A) ).
+
+tff(cons_type,type,
+	cons:
+	  !>[A: $tType] : ( ( A * list(A) ) > list(A) ) ).
+
+tff(none_type,type,
+	none:
+	  !>[A: $tType] : maybe(A) ).
+
+tff(some_type,type,
+	some:
+	  !>[A: $tType] : ( A > maybe(A) ) ).
+
+tff(head_type,type,
+	head:
+	  !>[A: $tType] : ( list(A) > maybe(A) ) ).
+
+%----This cannot be well typed, the last cons is not well formed
+tff(solve_this,conjecture,
+	  head($int,cons($int,1,cons($int,2,cons($int,3)))) = some($int,1) ).
diff --git a/devtools/test-suite/bugs_no_chk/bug_52-2.p b/devtools/test-suite/bugs_no_chk/bug_52-2.p
@@ -0,0 +1,13 @@
+% TODO: move this file to bugs once we enable typed GS3 proofs
+% result: VALID
+
+tff(nat_type,type,nat : $tType).
+tff(zero_nat,type,zero : nat).
+tff(succ_nat,type,succ : nat > nat).
+tff(list_type,type,list : $tType > $tType).
+tff(nil_list,type,nil : !> [A]: list(A)).
+tff(cons_list,type,cons : !> [A]: ((A * list(A)) > list(A))).
+tff(p_type,type,p : !> [A : $tType]: (list(A) > $o)).
+
+tff(rocq_output_test,conjecture,! [A: $tType, X : A]:
+  ((succ(zero) = succ(zero)) | (p(A,cons(A,X,nil(A))) | ~p(A,cons(A,X,nil(A)))))).
diff --git a/devtools/test-suite/basic/Axioms/SYN000+0.ax → ...ools/test-suite/no_chk/Axioms/SYN000+0.ax b/devtools/test-suite/basic/Axioms/SYN000+0.ax → ...ools/test-suite/no_chk/Axioms/SYN000+0.ax
diff --git a/devtools/test-suite/basic/Axioms/SYN000_0.ax → ...ools/test-suite/no_chk/Axioms/SYN000_0.ax b/devtools/test-suite/basic/Axioms/SYN000_0.ax → ...ools/test-suite/no_chk/Axioms/SYN000_0.ax
diff --git a/devtools/test-suite/basic/fol_syntax_chk.p → devtools/test-suite/no_chk/fol_syntax_chk.p b/devtools/test-suite/basic/fol_syntax_chk.p → devtools/test-suite/no_chk/fol_syntax_chk.p
diff --git a/devtools/test-suite/basic/tf0_syntax_chk.p → devtools/test-suite/no_chk/tf0_syntax_chk.p b/devtools/test-suite/basic/tf0_syntax_chk.p → devtools/test-suite/no_chk/tf0_syntax_chk.p
diff --git a/devtools/test-suite/proofs/example_sctptp.out b/devtools/test-suite/proofs/example_sctptp.out
@@ -1,29 +1,29 @@
-fof(c_example_sctptp_p, conjecture, ((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))).
+fof(c_example_sctptp_p, conjecture, (! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))).
 
-fof(f8, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), ~((! [X4] : (p(X4)))), ~((? [Y6] : (~(p(Y6))))), ~((~((? [Y6] : (~(p(Y6))))) => (! [X4] : (p(X4))))), ~(p(Sko_0)), ~(~(p(Sko_0))), p(Sko_0)] --> [], inference(leftHyp, [status(thm), 6], [])).
+fof(f8, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ~(! [X4]: p(X4)), ~(? [Y6]: ~p(Y6)), ~(~(? [Y6]: ~p(Y6)) => (! [X4]: p(X4))), ~p(Sko_0), ~~p(Sko_0), p(Sko_0)] --> [], inference(leftHyp, [status(thm), 6], [])).
 
-fof(f7, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), ~((! [X4] : (p(X4)))), ~((? [Y6] : (~(p(Y6))))), ~((~((? [Y6] : (~(p(Y6))))) => (! [X4] : (p(X4))))), ~(p(Sko_0)), ~(~(p(Sko_0)))] --> [], inference(leftNotNot, [status(thm), 5], [f8])).
+fof(f7, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ~(! [X4]: p(X4)), ~(? [Y6]: ~p(Y6)), ~(~(? [Y6]: ~p(Y6)) => (! [X4]: p(X4))), ~p(Sko_0), ~~p(Sko_0)] --> [], inference(leftNotNot, [status(thm), 5], [f8])).
 
-fof(f6, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), ~((! [X4] : (p(X4)))), ~((? [Y6] : (~(p(Y6))))), ~((~((? [Y6] : (~(p(Y6))))) => (! [X4] : (p(X4))))), ~(p(Sko_0))] --> [], inference(leftNotEx, [status(thm), 2, $fot(Sko_0)], [f7])).
+fof(f6, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ~(! [X4]: p(X4)), ~(? [Y6]: ~p(Y6)), ~(~(? [Y6]: ~p(Y6)) => (! [X4]: p(X4))), ~p(Sko_0)] --> [], inference(leftNotEx, [status(thm), 2, $fot(Sko_0)], [f7])).
 
-fof(f4, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), ~((! [X4] : (p(X4)))), ~((? [Y6] : (~(p(Y6))))), ~((~((? [Y6] : (~(p(Y6))))) => (! [X4] : (p(X4)))))] --> [], inference(leftNotAll, [status(thm), 1, 'Sko_0'], [f6])).
+fof(f4, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ~(! [X4]: p(X4)), ~(? [Y6]: ~p(Y6)), ~(~(? [Y6]: ~p(Y6)) => (! [X4]: p(X4)))] --> [], inference(leftNotAll, [status(thm), 1, 'Sko_0'], [f6])).
 
-fof(f11, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), (! [X4] : (p(X4))), ~(~((? [Y6] : (~(p(Y6)))))), ~(((! [X4] : (p(X4))) => ~((? [Y6] : (~(p(Y6))))))), (? [Y6] : (~(p(Y6)))), ~(p(Sko_1)), p(Sko_1)] --> [], inference(leftHyp, [status(thm), 6], [])).
+fof(f11, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ! [X4]: p(X4), ~~(? [Y6]: ~p(Y6)), ~((! [X4]: p(X4)) => ~(? [Y6]: ~p(Y6))), ? [Y6]: ~p(Y6), ~p(Sko_1), p(Sko_1)] --> [], inference(leftHyp, [status(thm), 6], [])).
 
-fof(f10, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), (! [X4] : (p(X4))), ~(~((? [Y6] : (~(p(Y6)))))), ~(((! [X4] : (p(X4))) => ~((? [Y6] : (~(p(Y6))))))), (? [Y6] : (~(p(Y6)))), ~(p(Sko_1))] --> [], inference(leftForall, [status(thm), 1, $fot(Sko_1)], [f11])).
+fof(f10, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ! [X4]: p(X4), ~~(? [Y6]: ~p(Y6)), ~((! [X4]: p(X4)) => ~(? [Y6]: ~p(Y6))), ? [Y6]: ~p(Y6), ~p(Sko_1)] --> [], inference(leftForall, [status(thm), 1, $fot(Sko_1)], [f11])).
 
-fof(f9, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), (! [X4] : (p(X4))), ~(~((? [Y6] : (~(p(Y6)))))), ~(((! [X4] : (p(X4))) => ~((? [Y6] : (~(p(Y6))))))), (? [Y6] : (~(p(Y6))))] --> [], inference(leftExists, [status(thm), 4, 'Sko_1'], [f10])).
+fof(f9, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ! [X4]: p(X4), ~~(? [Y6]: ~p(Y6)), ~((! [X4]: p(X4)) => ~(? [Y6]: ~p(Y6))), ? [Y6]: ~p(Y6)] --> [], inference(leftExists, [status(thm), 4, 'Sko_1'], [f10])).
 
-fof(f5, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), (! [X4] : (p(X4))), ~(~((? [Y6] : (~(p(Y6)))))), ~(((! [X4] : (p(X4))) => ~((? [Y6] : (~(p(Y6)))))))] --> [], inference(leftNotNot, [status(thm), 2], [f9])).
+fof(f5, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ! [X4]: p(X4), ~~(? [Y6]: ~p(Y6)), ~((! [X4]: p(X4)) => ~(? [Y6]: ~p(Y6)))] --> [], inference(leftNotNot, [status(thm), 2], [f9])).
 
-fof(f3ext2, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), ~((~((? [Y6] : (~(p(Y6))))) => (! [X4] : (p(X4)))))] --> [], inference(leftNotImplies, [status(thm), 1], [f4])).
+fof(f3ext2, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ~(~(? [Y6]: ~p(Y6)) => (! [X4]: p(X4)))] --> [], inference(leftNotImplies, [status(thm), 1], [f4])).
 
-fof(f3ext1, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), ~(((! [X4] : (p(X4))) => ~((? [Y6] : (~(p(Y6)))))))] --> [], inference(leftNotImplies, [status(thm), 1], [f5])).
+fof(f3ext1, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ~((! [X4]: p(X4)) => ~(? [Y6]: ~p(Y6)))] --> [], inference(leftNotImplies, [status(thm), 1], [f5])).
 
-fof(f3, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6)))))))] --> [], inference(leftNotIff, [status(thm), 0], [f3ext1, f3ext2])).
+fof(f3, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6)))] --> [], inference(leftNotIff, [status(thm), 0], [f3ext1, f3ext2])).
 
-fof(f2, plain, [((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))] --> [((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))], inference(hyp, [status(thm), 0], [])).
+fof(f2, plain, [(! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))] --> [(! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))], inference(hyp, [status(thm), 0], [])).
 
-fof(f1, plain, [] --> [((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6)))))), ~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6)))))))], inference(rightNot, [status(thm), 1], [f2])).
+fof(f1, plain, [] --> [(! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6)), ~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6)))], inference(rightNot, [status(thm), 1], [f2])).
 
-fof(f0, plain, [] --> [((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))], inference(cut, [status(thm), 1], [f1, f3])).
+fof(f0, plain, [] --> [(! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))], inference(cut, [status(thm), 1], [f1, f3])).
diff --git a/devtools/test-suite/proofs/example_sctptp.p b/devtools/test-suite/proofs/example_sctptp.p
@@ -1,7 +1,5 @@
-% args: -no_id -osctptp
+% args: -osctptp
 % result: VALID
 
 fof(test_sctptp, conjecture, 
    (! [X] : p(X)) <=> (~(? [Y] : ~p(Y)))).
-
-
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		% result: VALID

		fof(rocq_output_test,conjecture,! [X]: (p(X) \| ~p(X))).