diff --git a/.github/workflows/validate-config.yml b/.github/workflows/validate-config.yml
index c99bcbd..9ebc696 100644
--- a/.github/workflows/validate-config.yml
+++ b/.github/workflows/validate-config.yml
@@ -7,13 +7,18 @@ jobs:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v2
- - name: BOM check
- run: |
- ! grep -rlI $'\xEF\xBB\xBF' ispdb
+ - name: BOM check
+ run: |
+ ! grep -rlI $'\xEF\xBB\xBF' ispdb
- - name: Validate file extensions
- run: |
- set -o pipefail
- ls -1 ispdb/ | grep -v '\.xml$' | awk '{print "::error file=ispdb/"$0"::File name \"ispdb/"$0"\" does not end in .xml – Please rename!"}' && exit 1 || true
+ - name: Validate file extensions
+ run: |
+ set -o pipefail
+ ls -1 ispdb/ | grep -v '\.xml$' | awk '{print "::error file=ispdb/"$0"::File name \"ispdb/"$0"\" does not end in .xml – Please rename!"}' && exit 1 || true
+
+ - name: Validate XML content
+ run: |
+ pip install lxml
+ python tools/validate.py ispdb/*
diff --git a/ispdb/naver.com.xml b/ispdb/naver.com.xml
index bd11fcf..78a4fa9 100644
--- a/ispdb/naver.com.xml
+++ b/ispdb/naver.com.xml
@@ -25,6 +25,6 @@
%EMAILADDRESS%
password-encrypted
-
+
-
+
\ No newline at end of file
diff --git a/tools/validate.py b/tools/validate.py
new file mode 100644
index 0000000..9604470
--- /dev/null
+++ b/tools/validate.py
@@ -0,0 +1,51 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This script checks all input files to validate that their content is valid
+# XML. It is meant to run in the CI for PRs against the autoconfig repository.
+
+import argparse
+import os
+import sys
+from typing import List
+from lxml import etree
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "file", nargs="*", help="input file(s) to process, wildcards allowed"
+ )
+ args = parser.parse_args(sys.argv[1:])
+
+ # Defining `files` here isn't strictly necessary, but the extra typing
+ # (which we can't really get otherwise) helps with maintenance.
+ files: List[str] = args.file
+
+ # The exit code. Stays 0 unless we encounter a file that doesn't parse.
+ ret = 0
+
+ for f in files:
+ # Filter out directories an non-XML files
+ if os.path.isdir(f):
+ print(f"Ignoring directory {f}")
+ continue
+
+ if not f.endswith(".xml"):
+ print(f"Ignoring non-XML file {f}")
+ continue
+
+ # Try parsing the file. If this did not work, print the error and set
+ # the exit code to 1.
+ try:
+ etree.parse(f)
+ except Exception as e:
+ print(f"File {f} did not parse: {e}")
+ ret = 1
+
+ exit(ret)
+
+
+if __name__ == "__main__":
+ main()