diff --git a/.github/workflows/validate-config.yml b/.github/workflows/validate-config.yml index c99bcbd..9ebc696 100644 --- a/.github/workflows/validate-config.yml +++ b/.github/workflows/validate-config.yml @@ -7,13 +7,18 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v2 - - name: BOM check - run: | - ! grep -rlI $'\xEF\xBB\xBF' ispdb + - name: BOM check + run: | + ! grep -rlI $'\xEF\xBB\xBF' ispdb - - name: Validate file extensions - run: | - set -o pipefail - ls -1 ispdb/ | grep -v '\.xml$' | awk '{print "::error file=ispdb/"$0"::File name \"ispdb/"$0"\" does not end in .xml – Please rename!"}' && exit 1 || true + - name: Validate file extensions + run: | + set -o pipefail + ls -1 ispdb/ | grep -v '\.xml$' | awk '{print "::error file=ispdb/"$0"::File name \"ispdb/"$0"\" does not end in .xml – Please rename!"}' && exit 1 || true + + - name: Validate XML content + run: | + pip install lxml + python tools/validate.py ispdb/* diff --git a/ispdb/naver.com.xml b/ispdb/naver.com.xml index bd11fcf..78a4fa9 100644 --- a/ispdb/naver.com.xml +++ b/ispdb/naver.com.xml @@ -25,6 +25,6 @@ %EMAILADDRESS% password-encrypted - + - + \ No newline at end of file diff --git a/tools/validate.py b/tools/validate.py new file mode 100644 index 0000000..9604470 --- /dev/null +++ b/tools/validate.py @@ -0,0 +1,51 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# This script checks all input files to validate that their content is valid +# XML. It is meant to run in the CI for PRs against the autoconfig repository. + +import argparse +import os +import sys +from typing import List +from lxml import etree + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "file", nargs="*", help="input file(s) to process, wildcards allowed" + ) + args = parser.parse_args(sys.argv[1:]) + + # Defining `files` here isn't strictly necessary, but the extra typing + # (which we can't really get otherwise) helps with maintenance. + files: List[str] = args.file + + # The exit code. Stays 0 unless we encounter a file that doesn't parse. + ret = 0 + + for f in files: + # Filter out directories an non-XML files + if os.path.isdir(f): + print(f"Ignoring directory {f}") + continue + + if not f.endswith(".xml"): + print(f"Ignoring non-XML file {f}") + continue + + # Try parsing the file. If this did not work, print the error and set + # the exit code to 1. + try: + etree.parse(f) + except Exception as e: + print(f"File {f} did not parse: {e}") + ret = 1 + + exit(ret) + + +if __name__ == "__main__": + main()