Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion .github/workflows/full-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,19 @@ jobs:
- uses: actions/checkout@v5
with:
ref: ${{ github.event.pull_request.head.sha }}
- uses: cachix/install-nix-action@v31
with:
nix_path: nixpkgs=channel:nixos-unstable
- uses: actions/setup-go@v3
with:
go-version: ${{ env.goVersion }}
- run: go install golang.org/x/tools/cmd/goyacc@latest
- run: cd src && make
- run: cd devtools && make test-suite
- uses: rrbutani/use-nix-shell-action@v1
with:
file: shell.nix
script: |
cd devtools
make test-suite


122 changes: 96 additions & 26 deletions devtools/run-test-suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
import sys
import re
import difflib
import shutil
from subprocess import PIPE, run

class Parser:
ARGS = "% args: "
RES = "% result: "
ENV = "% env: "
EXIT_CODE = "% exit: "
no_rocq_check = False

def __init__(self, filename):
self.filename = filename
Expand All @@ -20,6 +22,10 @@ def __init__(self, filename):
self.parseEnv()
self.parseExitCode()

args_avoid_chk = ["-proof", "-otptp", "-osctptp"]
if "no_chk" in filename or self.expectedExitCode != "" or any(arg in self.arguments for arg in args_avoid_chk) or self.expectedResult == "NOT VALID":
self.no_rocq_check = True

def parseGen(self, pat):
with open(self.filename) as f:
for line in f.readlines():
Expand All @@ -43,23 +49,103 @@ def parseExitCode(self):
self.expectedExitCode = self.parseGen(self.EXIT_CODE).strip()

def getCommandLine(self):
return self.env + " ../src/_build/goeland " + self.arguments + " " + self.filename
arguments = self.arguments
if not self.no_rocq_check:
arguments += " -context -orocq"
return self.env + " ../src/_build/goeland " + arguments + " " + self.filename

def getArgsForPrinting(self):
rocq_chk_str = ""
if self.no_rocq_check:
rocq_chk_str = " (no Rocq check)"
return self.arguments + rocq_chk_str

def sanitize(s):
return s.encode('utf-8', errors='ignore').decode(errors='ignore')

def runProver(command):
print(f"Launching: {command}")
def runProver(f, command):
print(f"{f}\t{parser.getArgsForPrinting()}")
result = run(command, stdout=PIPE, stderr=PIPE, universal_newlines=True, shell=True, encoding='utf-8')
return (sanitize(result.stdout), sanitize(result.stderr), result.returncode)

def getRelevantOutput(output):
relevantLines = []
ok = -1
for line in output.split("\n"):
if re.search("% SZS output start", line):
ok = 1
elif re.search("% SZS output end", line):
ok = -1
elif ok == 0:
if len(line.strip()) > 0:
relevantLines.append(line.strip())
else:
ok -= 1

return relevantLines

def isExecutable(prog) :
return shutil.which(prog) is not None

def getRocqCompiler() :
if isExecutable("rocq"):
return "rocq compile"
elif isExecutable("coqc"):
return "coqc"
else:
raise Exception("No Rocq executable found on the system")

def makeRocqCheck(f, output):
rocq = getRocqCompiler()
rocq_lines = getRelevantOutput(output)
compile_success = False
filename = os.getcwd() + "/" + os.path.basename(f)[:-2].replace("-", "_")
with open(f"{filename}.v", "w") as tmp:
tmp.write("\n".join(rocq_lines))

result = run(f"{rocq} {filename}.v", stdout=PIPE, stderr=PIPE, universal_newlines=True, shell=True, encoding='utf-8')
compile_success = result.returncode == 0

try:
os.remove(f"{filename}.glob")
except FileNotFoundError:
pass

if not compile_success:
print(f"ROCQ compile has failed.")
print(f"Reason: {result.stderr}")
exit(1)
else:
try:
os.remove(f"{filename}.v")
os.remove(f"{filename}.vo")
os.remove(f"{filename}.vok")
os.remove(f"{filename}.vos")
except FileNotFoundError:
pass

def runWithExpected(f, parser):
output, err, exit_code = runProver(parser.getCommandLine())
"""
Runs Goéland on [f] using the parsed command line, then checks if the output corresponds to the expected one.
This function manages:
- error codes (e.g., it can detect whether Goéland exits with error code i
- results (e.g., VALID, NOT VALID).

If Goéland runs into an unexpected error, we report it. Moreover, if the kind of expected return is a VALID
result, we run Rocq to check that the proof is indeed valid (except for files in the no-chk folder).
"""
output, err, exit_code = runProver(f, parser.getCommandLine())

if err != "":
print(f"Runtime error: {err}")
exit(1)

if parser.expectedExitCode != '':
if int(parser.expectedExitCode) != exit_code:
print(f"Error: expected exit code '{parser.expectedExitCode}', got: '{exit_code}'")
exit(1)
else: return

search = re.compile(".*% RES : (.*)$")
for line in output.split("\n"):
res = search.match(line)
Expand All @@ -69,37 +155,21 @@ def runWithExpected(f, parser):
print(f"Error: expected '{parser.expectedResult}', got: '{actual}'")
exit(1)
else:
if parser.no_rocq_check: return
makeRocqCheck(f, output)
return

if parser.expectedExitCode != '':
if int(parser.expectedExitCode) != exit_code:
print(f"Error: expected exit code '{parser.expectedExitCode}', got: '{exit_code}'")
exit(1)
else: return

print(f"Unknown error: got\n{output}")
exit(1)

def compareOutputs(f, parser):
output, err, _ = runProver(parser.getCommandLine())
output, err, exit_code = runProver(f, parser.getCommandLine())

if err != "":
if err != "" or exit_code != 0:
print(f"Runtime error: {err}")
exit(1)

relevantLines = []
ok = -1
for line in output.split("\n"):
if re.search("% SZS output start", line):
ok = 1
elif re.search("% SZS output end", line):
ok = -1
elif ok == 0:
if len(line.strip()) > 0:
relevantLines.append(line.strip())
else:
ok -= 1

relevantLines = getRelevantOutput(output)
relevantExpectedLines = []

with open(os.path.splitext(f)[0] + ".out") as f:
Expand All @@ -119,7 +189,7 @@ def compareOutputs(f, parser):

if __name__ == "__main__":
outputTest = ["-proof", "-otptp", "-osctptp"]
for f in glob.glob("test-suite/**/*.p"):
for f in sorted(glob.glob("test-suite/**/*.p")):
parser = Parser(f)

if any(out in parser.arguments for out in outputTest) :
Expand Down
7 changes: 7 additions & 0 deletions devtools/test-suite/basic/test-typing-elab-1.p
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
% should fail because $lesseq is a defined function for $int, $rat and $real but is not polymorphic
% for any type
% exit: 1

tff(ty_type, type, ty : $tType).

tff(conj, conjecture, ! [A : ty] : $lesseq(A, A)).
7 changes: 7 additions & 0 deletions devtools/test-suite/basic/test-typing-elab-2.p
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
% should fail because $quotient is a defined function for $int, $rat and $real but is not polymorphic
% for any type
% exit: 1

tff(ty_type, type, ty : $tType).

tff(conj, conjecture, ! [A : ty] : $lesseq($quotient(A, A), 1)).
4 changes: 4 additions & 0 deletions devtools/test-suite/basic/test-typing-elab-3.p
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
% quotient is defined specifically for integers
% result: NOT VALID

tff(conj, conjecture, $lesseq($quotient(2, 2), 1)).
4 changes: 2 additions & 2 deletions devtools/test-suite/basic/tf1_syntax_chk.p
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
% TODO: args -one_step and result NOT_VALID once typing is working again (this is satisfiable)
% exit: 3
% args: -one_step
% result: NOT VALID

tff(beverage_type,type,
beverage: $tType ).
Expand Down
2 changes: 1 addition & 1 deletion devtools/test-suite/basic/tfa_syntax_chk.p
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
% As arithmetic is not handled by Goeland, we simply check that parsing is OK
% args: -ari -one_step -l 1
% args: -one_step -l 1
% result: NOT VALID

tff(p_int_type,type,
Expand Down
3 changes: 3 additions & 0 deletions devtools/test-suite/bugs/bug_52-1.p
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
% result: VALID

fof(rocq_output_test,conjecture,! [X]: (p(X) | ~p(X))).
4 changes: 4 additions & 0 deletions devtools/test-suite/bugs/bug_52-3.p
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
% tests for the presence of metavariables
% result: VALID

fof(rocq_output_test,conjecture,? [X]: (p(X) | ~p(X))).
33 changes: 33 additions & 0 deletions devtools/test-suite/bugs/bug_53-1.p
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
% args: -one_step
% exit: 1

tff(list_type,type,
list: $tType > $tType ).

tff(maybe_type,type,
maybe: $tType > $tType ).

%----Polymorphic symbols
tff(nil_type,type,
nil:
!>[A: $tType] : list(A) ).

tff(cons_type,type,
cons:
!>[A: $tType] : ( ( A * list(A) ) > list(A) ) ).

tff(none_type,type,
none:
!>[A: $tType] : maybe(A) ).

tff(some_type,type,
some:
!>[A: $tType] : ( A > maybe(A) ) ).

tff(head_type,type,
head:
!>[A: $tType] : ( list(A) > maybe(A) ) ).

%----This cannot be well typed, the last cons is not well formed
tff(solve_this,conjecture,
head($int,cons($int,1,cons($int,2,cons($int,3)))) = some($int,1) ).
13 changes: 13 additions & 0 deletions devtools/test-suite/bugs_no_chk/bug_52-2.p
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
% TODO: move this file to bugs once we enable typed GS3 proofs
% result: VALID

tff(nat_type,type,nat : $tType).
tff(zero_nat,type,zero : nat).
tff(succ_nat,type,succ : nat > nat).
tff(list_type,type,list : $tType > $tType).
tff(nil_list,type,nil : !> [A]: list(A)).
tff(cons_list,type,cons : !> [A]: ((A * list(A)) > list(A))).
tff(p_type,type,p : !> [A : $tType]: (list(A) > $o)).

tff(rocq_output_test,conjecture,! [A: $tType, X : A]:
((succ(zero) = succ(zero)) | (p(A,cons(A,X,nil(A))) | ~p(A,cons(A,X,nil(A)))))).
30 changes: 15 additions & 15 deletions devtools/test-suite/proofs/example_sctptp.out
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
fof(c_example_sctptp_p, conjecture, ((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))).
fof(c_example_sctptp_p, conjecture, (! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))).

fof(f8, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), ~((! [X4] : (p(X4)))), ~((? [Y6] : (~(p(Y6))))), ~((~((? [Y6] : (~(p(Y6))))) => (! [X4] : (p(X4))))), ~(p(Sko_0)), ~(~(p(Sko_0))), p(Sko_0)] --> [], inference(leftHyp, [status(thm), 6], [])).
fof(f8, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ~(! [X4]: p(X4)), ~(? [Y6]: ~p(Y6)), ~(~(? [Y6]: ~p(Y6)) => (! [X4]: p(X4))), ~p(Sko_0), ~~p(Sko_0), p(Sko_0)] --> [], inference(leftHyp, [status(thm), 6], [])).

fof(f7, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), ~((! [X4] : (p(X4)))), ~((? [Y6] : (~(p(Y6))))), ~((~((? [Y6] : (~(p(Y6))))) => (! [X4] : (p(X4))))), ~(p(Sko_0)), ~(~(p(Sko_0)))] --> [], inference(leftNotNot, [status(thm), 5], [f8])).
fof(f7, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ~(! [X4]: p(X4)), ~(? [Y6]: ~p(Y6)), ~(~(? [Y6]: ~p(Y6)) => (! [X4]: p(X4))), ~p(Sko_0), ~~p(Sko_0)] --> [], inference(leftNotNot, [status(thm), 5], [f8])).

fof(f6, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), ~((! [X4] : (p(X4)))), ~((? [Y6] : (~(p(Y6))))), ~((~((? [Y6] : (~(p(Y6))))) => (! [X4] : (p(X4))))), ~(p(Sko_0))] --> [], inference(leftNotEx, [status(thm), 2, $fot(Sko_0)], [f7])).
fof(f6, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ~(! [X4]: p(X4)), ~(? [Y6]: ~p(Y6)), ~(~(? [Y6]: ~p(Y6)) => (! [X4]: p(X4))), ~p(Sko_0)] --> [], inference(leftNotEx, [status(thm), 2, $fot(Sko_0)], [f7])).

fof(f4, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), ~((! [X4] : (p(X4)))), ~((? [Y6] : (~(p(Y6))))), ~((~((? [Y6] : (~(p(Y6))))) => (! [X4] : (p(X4)))))] --> [], inference(leftNotAll, [status(thm), 1, 'Sko_0'], [f6])).
fof(f4, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ~(! [X4]: p(X4)), ~(? [Y6]: ~p(Y6)), ~(~(? [Y6]: ~p(Y6)) => (! [X4]: p(X4)))] --> [], inference(leftNotAll, [status(thm), 1, 'Sko_0'], [f6])).

fof(f11, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), (! [X4] : (p(X4))), ~(~((? [Y6] : (~(p(Y6)))))), ~(((! [X4] : (p(X4))) => ~((? [Y6] : (~(p(Y6))))))), (? [Y6] : (~(p(Y6)))), ~(p(Sko_1)), p(Sko_1)] --> [], inference(leftHyp, [status(thm), 6], [])).
fof(f11, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ! [X4]: p(X4), ~~(? [Y6]: ~p(Y6)), ~((! [X4]: p(X4)) => ~(? [Y6]: ~p(Y6))), ? [Y6]: ~p(Y6), ~p(Sko_1), p(Sko_1)] --> [], inference(leftHyp, [status(thm), 6], [])).

fof(f10, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), (! [X4] : (p(X4))), ~(~((? [Y6] : (~(p(Y6)))))), ~(((! [X4] : (p(X4))) => ~((? [Y6] : (~(p(Y6))))))), (? [Y6] : (~(p(Y6)))), ~(p(Sko_1))] --> [], inference(leftForall, [status(thm), 1, $fot(Sko_1)], [f11])).
fof(f10, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ! [X4]: p(X4), ~~(? [Y6]: ~p(Y6)), ~((! [X4]: p(X4)) => ~(? [Y6]: ~p(Y6))), ? [Y6]: ~p(Y6), ~p(Sko_1)] --> [], inference(leftForall, [status(thm), 1, $fot(Sko_1)], [f11])).

fof(f9, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), (! [X4] : (p(X4))), ~(~((? [Y6] : (~(p(Y6)))))), ~(((! [X4] : (p(X4))) => ~((? [Y6] : (~(p(Y6))))))), (? [Y6] : (~(p(Y6))))] --> [], inference(leftExists, [status(thm), 4, 'Sko_1'], [f10])).
fof(f9, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ! [X4]: p(X4), ~~(? [Y6]: ~p(Y6)), ~((! [X4]: p(X4)) => ~(? [Y6]: ~p(Y6))), ? [Y6]: ~p(Y6)] --> [], inference(leftExists, [status(thm), 4, 'Sko_1'], [f10])).

fof(f5, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), (! [X4] : (p(X4))), ~(~((? [Y6] : (~(p(Y6)))))), ~(((! [X4] : (p(X4))) => ~((? [Y6] : (~(p(Y6)))))))] --> [], inference(leftNotNot, [status(thm), 2], [f9])).
fof(f5, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ! [X4]: p(X4), ~~(? [Y6]: ~p(Y6)), ~((! [X4]: p(X4)) => ~(? [Y6]: ~p(Y6)))] --> [], inference(leftNotNot, [status(thm), 2], [f9])).

fof(f3ext2, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), ~((~((? [Y6] : (~(p(Y6))))) => (! [X4] : (p(X4)))))] --> [], inference(leftNotImplies, [status(thm), 1], [f4])).
fof(f3ext2, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ~(~(? [Y6]: ~p(Y6)) => (! [X4]: p(X4)))] --> [], inference(leftNotImplies, [status(thm), 1], [f4])).

fof(f3ext1, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))), ~(((! [X4] : (p(X4))) => ~((? [Y6] : (~(p(Y6)))))))] --> [], inference(leftNotImplies, [status(thm), 1], [f5])).
fof(f3ext1, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))), ~((! [X4]: p(X4)) => ~(? [Y6]: ~p(Y6)))] --> [], inference(leftNotImplies, [status(thm), 1], [f5])).

fof(f3, plain, [~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6)))))))] --> [], inference(leftNotIff, [status(thm), 0], [f3ext1, f3ext2])).
fof(f3, plain, [~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6)))] --> [], inference(leftNotIff, [status(thm), 0], [f3ext1, f3ext2])).

fof(f2, plain, [((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))] --> [((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))], inference(hyp, [status(thm), 0], [])).
fof(f2, plain, [(! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))] --> [(! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))], inference(hyp, [status(thm), 0], [])).

fof(f1, plain, [] --> [((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6)))))), ~(((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6)))))))], inference(rightNot, [status(thm), 1], [f2])).
fof(f1, plain, [] --> [(! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6)), ~((! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6)))], inference(rightNot, [status(thm), 1], [f2])).

fof(f0, plain, [] --> [((! [X4] : (p(X4))) <=> ~((? [Y6] : (~(p(Y6))))))], inference(cut, [status(thm), 1], [f1, f3])).
fof(f0, plain, [] --> [(! [X4]: p(X4)) <=> ~(? [Y6]: ~p(Y6))], inference(cut, [status(thm), 1], [f1, f3])).
4 changes: 1 addition & 3 deletions devtools/test-suite/proofs/example_sctptp.p
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
% args: -no_id -osctptp
% args: -osctptp
% result: VALID

fof(test_sctptp, conjecture,
(! [X] : p(X)) <=> (~(? [Y] : ~p(Y)))).


Loading