diff --git a/docs/about/changelog.md b/docs/about/changelog.md index 6f91a8e..4ad4c89 100644 --- a/docs/about/changelog.md +++ b/docs/about/changelog.md @@ -1,229 +1,85 @@ # Changelog -All notable changes to this project will be documented in this file. - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## [Unreleased] - -### Added - -- v1.1 Italian translation. - -## [1.1.1] - 2023-03-05 - -### Added - -- Arabic translation (#444). -- v1.1 French translation. -- v1.1 Dutch translation (#371). -- v1.1 Russian translation (#410). -- v1.1 Japanese translation (#363). -- v1.1 Norwegian Bokmål translation (#383). -- v1.1 "Inconsistent Changes" Turkish translation (#347). -- Default to most recent versions available for each languages -- Display count of available translations (26 to date!) -- Centralize all links into `/data/links.json` so they can be updated easily - -### Fixed - -- Improve French translation (#377). -- Improve id-ID translation (#416). -- Improve Persian translation (#457). -- Improve Russian translation (#408). -- Improve Swedish title (#419). -- Improve zh-CN translation (#359). -- Improve French translation (#357). -- Improve zh-TW translation (#360, #355). -- Improve Spanish (es-ES) transltion (#362). -- Foldout menu in Dutch translation (#371). -- Missing periods at the end of each change (#451). -- Fix missing logo in 1.1 pages -- Display notice when translation isn't for most recent version -- Various broken links, page versions, and indentations. +## [1.0.3](https://github.com/AI-READI/pyfairdatatools/compare/v1.0.2...v1.0.3) (2025-11-26) -### Changed -- Upgrade dependencies: Ruby 3.2.1, Middleman, etc. +### Bug Fixes -### Removed +* remove matrix from ci workflow ([#23](https://github.com/AI-READI/pyfairdatatools/issues/23)) ([44df713](https://github.com/AI-READI/pyfairdatatools/commit/44df713f25add43cedfb3eba4e9f20b32d4823cf)) -- Unused normalize.css file -- Identical links assigned in each translation file -- Duplicate index file for the english version +## [1.0.2](https://github.com/AI-READI/pyfairdatatools/compare/v1.0.1...v1.0.2) (2023-11-16) -## [1.1.0] - 2019-02-15 -### Added +### Bug Fixes -- Danish translation (#297). -- Georgian translation from (#337). -- Changelog inconsistency section in Bad Practices. +* add affiliationValue to datasetDescription ([63b7946](https://github.com/AI-READI/pyfairdatatools/commit/63b7946065dac6cf5984829bc5b02689049cb750)) -### Fixed +## 1.0.1 (2023-11-16) -- Italian translation (#332). -- Indonesian translation (#336). -## [1.0.0] - 2017-06-20 +### ⚠ BREAKING CHANGES -### Added +* v1 +* v1 +* v1 +* update package name to pyfairdatatools -- New visual identity by [@tylerfortune8](https://github.com/tylerfortune8). -- Version navigation. -- Links to latest released version in previous versions. -- "Why keep a changelog?" section. -- "Who needs a changelog?" section. -- "How do I make a changelog?" section. -- "Frequently Asked Questions" section. -- New "Guiding Principles" sub-section to "How do I make a changelog?". -- Simplified and Traditional Chinese translations from [@tianshuo](https://github.com/tianshuo). -- German translation from [@mpbzh](https://github.com/mpbzh) & [@Art4](https://github.com/Art4). -- Italian translation from [@azkidenz](https://github.com/azkidenz). -- Swedish translation from [@magol](https://github.com/magol). -- Turkish translation from [@emreerkan](https://github.com/emreerkan). -- French translation from [@zapashcanon](https://github.com/zapashcanon). -- Brazilian Portuguese translation from [@Webysther](https://github.com/Webysther). -- Polish translation from [@amielucha](https://github.com/amielucha) & [@m-aciek](https://github.com/m-aciek). -- Russian translation from [@aishek](https://github.com/aishek). -- Czech translation from [@h4vry](https://github.com/h4vry). -- Slovak translation from [@jkostolansky](https://github.com/jkostolansky). -- Korean translation from [@pierceh89](https://github.com/pierceh89). -- Croatian translation from [@porx](https://github.com/porx). -- Persian translation from [@Hameds](https://github.com/Hameds). -- Ukrainian translation from [@osadchyi-s](https://github.com/osadchyi-s). +### release -### Changed +* v1 ([386a2d3](https://github.com/AI-READI/pyfairdatatools/commit/386a2d3568a9be9a6be807d03666950f1f2b7ff3)) +* v1 ([1dacdd1](https://github.com/AI-READI/pyfairdatatools/commit/1dacdd17f36218285578b6109c39956c9aa1f2da)) +* v1 ([078c513](https://github.com/AI-READI/pyfairdatatools/commit/078c513eaaf7346e6eff1465bb0d8fa645e7b34f)) -- Start using "changelog" over "change log" since it's the common usage. -- Start versioning based on the current English version at 0.3.0 to help - translation authors keep things up-to-date. -- Rewrite "What makes unicorns cry?" section. -- Rewrite "Ignoring Deprecations" sub-section to clarify the ideal - scenario. -- Improve "Commit log diffs" sub-section to further argument against - them. -- Merge "Why can’t people just use a git log diff?" with "Commit log - diffs". -- Fix typos in Simplified Chinese and Traditional Chinese translations. -- Fix typos in Brazilian Portuguese translation. -- Fix typos in Turkish translation. -- Fix typos in Czech translation. -- Fix typos in Swedish translation. -- Improve phrasing in French translation. -- Fix phrasing and spelling in German translation. -### Removed +### Features -- Section about "changelog" vs "CHANGELOG". +* add additional data types ([d3d4b21](https://github.com/AI-READI/pyfairdatatools/commit/d3d4b21db3c6ef558935054e95986e1dc8b09cdf)) +* add additional validation for dataset description ([0830308](https://github.com/AI-READI/pyfairdatatools/commit/0830308b248310bd569e37d48147748eeadefebc)) +* add base study_description schema ([8ca9a94](https://github.com/AI-READI/pyfairdatatools/commit/8ca9a942a4c6aad77aabe87f2f3c3ec66c07c701)) +* add changelog generate function ([e448f41](https://github.com/AI-READI/pyfairdatatools/commit/e448f41cee39fec4b8778549e8aadebe3bfc1100)) +* add dataset_description validation schema ([3f23381](https://github.com/AI-READI/pyfairdatatools/commit/3f233817ff23f5172ee96ea65d9c9c25a1e0c8c2)) +* add file path validation to generate functions ([2e9c39a](https://github.com/AI-READI/pyfairdatatools/commit/2e9c39a2d0960ef8de7d56aa42d85526768eb00f)) +* add functions for study description.json ([81693a9](https://github.com/AI-READI/pyfairdatatools/commit/81693a92d5b78498bdeb065bcee938155d50609b)) +* add generate function for dataset_description ([decda23](https://github.com/AI-READI/pyfairdatatools/commit/decda23757b0c955812bb1d55c2b20c90d41bd1c)) +* add generate function for study_descripition ([7759e0f](https://github.com/AI-READI/pyfairdatatools/commit/7759e0f9db0366f6ede6ea8a94eb10598dd630db)) +* add license generation ([acf22df](https://github.com/AI-READI/pyfairdatatools/commit/acf22df24afa6fde2b01f0f2f95167ffc68e3b98)) +* add links to schema ids ([bdc905e](https://github.com/AI-READI/pyfairdatatools/commit/bdc905e6bfebc4f32a8867670b9dc0901a262be3)) +* add more valid language codes ([022fdee](https://github.com/AI-READI/pyfairdatatools/commit/022fdee8e521704dc9fe8147000d1a7c5285c73c)) +* add participants validate function ([d7a1bc2](https://github.com/AI-READI/pyfairdatatools/commit/d7a1bc2d6008217f34cf76644eb786fb2a64adb2)) +* add readme generate and validate functions ([4b4b076](https://github.com/AI-READI/pyfairdatatools/commit/4b4b0760ffac58577055ffafd3132b23f8d1e8ff)) +* add sample validate schema ([34464fe](https://github.com/AI-READI/pyfairdatatools/commit/34464fefe6b6597606228fa4cce1811f01ec6566)) +* add support for folder level validation ([8de70c4](https://github.com/AI-READI/pyfairdatatools/commit/8de70c44cb653310bd10ec0bb3334a35c4569ea3)) +* add validation for langauges ([a882fee](https://github.com/AI-READI/pyfairdatatools/commit/a882feeae6580743810764c3a853ed650b11f628)) +* update dataset_description schema ([20fd4d2](https://github.com/AI-READI/pyfairdatatools/commit/20fd4d23741eb7c6810f67d5d6d9819a6fa8d233)) +* update language list for BCP 47 ([17ec74b](https://github.com/AI-READI/pyfairdatatools/commit/17ec74bfd78a611c1cec91092b0efca017ab5862)) +* update package name to pyfairdatatools ([6b8fe01](https://github.com/AI-READI/pyfairdatatools/commit/6b8fe0140f11692bca1c0c7fe2de3f096b4b245d)) -## [0.3.0] - 2015-12-03 -### Added +### Bug Fixes -- RU translation from [@aishek](https://github.com/aishek). -- pt-BR translation from [@tallesl](https://github.com/tallesl). -- es-ES translation from [@ZeliosAriex](https://github.com/ZeliosAriex). +* add fields to validate study description ([6834f55](https://github.com/AI-READI/pyfairdatatools/commit/6834f55ac8dd5dc7e4396b59caed3d56c0c48f9d)) +* add fix for urlllib3 ([dbe21ed](https://github.com/AI-READI/pyfairdatatools/commit/dbe21edbcf4aeeebc7345947376e5161aeb57133)) +* add typing for empty collection ([62a5444](https://github.com/AI-READI/pyfairdatatools/commit/62a544497c52568786d87b480c57296e5f56bf05)) +* update imports ([4109423](https://github.com/AI-READI/pyfairdatatools/commit/4109423b749fb60df3510802dcba4e81100daa80)) +* update key name to lowercase ([607e815](https://github.com/AI-READI/pyfairdatatools/commit/607e815710efac5b7aca2d3bc40e37ae5949358a)) +* update tests ([2bec288](https://github.com/AI-READI/pyfairdatatools/commit/2bec2887ddd4559f900f37b628e0f56811b8fbd5)) +* update versions ([49af758](https://github.com/AI-READI/pyfairdatatools/commit/49af7586de946476b8395e1d98470a0775654c4b)) +* update versions ([127c982](https://github.com/AI-READI/pyfairdatatools/commit/127c98266fcc088ff3cc8334c8e0d901bac99d81)) -## [0.2.0] - 2015-10-06 -### Changed +### Documentation -- Remove exclusionary mentions of "open source" since this project can - benefit both "open" and "closed" source projects equally. - -## [0.1.0] - 2015-10-06 - -### Added - -- Answer "Should you ever rewrite a change log?". - -### Changed - -- Improve argument against commit logs. -- Start following [SemVer](https://semver.org) properly. - -## [0.0.8] - 2015-02-17 - -### Changed - -- Update year to match in every README example. -- Reluctantly stop making fun of Brits only, since most of the world - writes dates in a strange way. - -### Fixed - -- Fix typos in recent README changes. -- Update outdated unreleased diff link. - -## [0.0.7] - 2015-02-16 - -### Added - -- Link, and make it obvious that date format is ISO 8601. - -### Changed - -- Clarified the section on "Is there a standard change log format?". - -### Fixed - -- Fix Markdown links to tag comparison URL with footnote-style links. - -## [0.0.6] - 2014-12-12 - -### Added - -- README section on "yanked" releases. - -## [0.0.5] - 2014-08-09 - -### Added - -- Markdown links to version tags on release headings. -- Unreleased section to gather unreleased changes and encourage note - keeping prior to releases. - -## [0.0.4] - 2014-08-09 - -### Added - -- Better explanation of the difference between the file ("CHANGELOG") - and its function "the change log". - -### Changed - -- Refer to a "change log" instead of a "CHANGELOG" throughout the site - to differentiate between the file and the purpose of the file — the - logging of changes. - -### Removed - -- Remove empty sections from CHANGELOG, they occupy too much space and - create too much noise in the file. People will have to assume that the - missing sections were intentionally left out because they contained no - notable changes. - -## [0.0.3] - 2014-08-09 - -### Added - -- "Why should I care?" section mentioning The Changelog podcast. - -## [0.0.2] - 2014-07-10 - -### Added - -- Explanation of the recommended reverse chronological release ordering. - -## [0.0.1] - 2014-05-31 - -### Added - -- This CHANGELOG file to hopefully serve as an evolving example of a - standardized open source project CHANGELOG. -- CNAME file to enable GitHub Pages custom domain. -- README now contains answers to common questions about CHANGELOGs. -- Good examples and basic guidelines, including proper date formatting. -- Counter-examples: "What makes unicorns cry?". +* add base content ([c9e9d9d](https://github.com/AI-READI/pyfairdatatools/commit/c9e9d9d2ea70d10b5255116876384af867f9b1dd)) +* add base text ([aeca7b3](https://github.com/AI-READI/pyfairdatatools/commit/aeca7b35cf7f4da5ebcc0b1b001c27efaabd2d98)) +* add docs for dataset_description ([8be3d27](https://github.com/AI-READI/pyfairdatatools/commit/8be3d275e23bb1e69944d6c4b2a52f12e1504b4d)) +* add docs for generate functions ([0249d42](https://github.com/AI-READI/pyfairdatatools/commit/0249d425e65787ea8c5ca32f0b1aaa5ccf49c3fb)) +* add validate readme sections ([6f2f773](https://github.com/AI-READI/pyfairdatatools/commit/6f2f773ec049f059a50adce5495ce08069de06ab)) +* fix build ([22581c9](https://github.com/AI-READI/pyfairdatatools/commit/22581c97869d0bbc065c8f84d8c6b231c0bf0acd)) +* fix build ([c2b9878](https://github.com/AI-READI/pyfairdatatools/commit/c2b9878edb3fb2c84389f3e4841a79cd3bd7cdbe)) +* fix links ([cbdabfd](https://github.com/AI-READI/pyfairdatatools/commit/cbdabfd95d14bf20c31379f5f781fb72e6dba4e1)) +* update for study_description ([e7e63b2](https://github.com/AI-READI/pyfairdatatools/commit/e7e63b2e0314e4e5e49355e35390376ae4476a50)) +* update publish instructions ([dbd0969](https://github.com/AI-READI/pyfairdatatools/commit/dbd09696c94d35ebcfdaf60d12c77148b7acbf0d)) +* update readme with template ([cf0d241](https://github.com/AI-READI/pyfairdatatools/commit/cf0d241c14ed913ba8fad90362134e39b3ffa603)) +* update text ([84b5a1f](https://github.com/AI-READI/pyfairdatatools/commit/84b5a1fab2f97de8732dee3275cf1d24b2a9f41b)) +* update text ([cc85509](https://github.com/AI-READI/pyfairdatatools/commit/cc855094e612f182bcd67710b94055ab84b9511c)) +* update wording ([b5f5547](https://github.com/AI-READI/pyfairdatatools/commit/b5f5547cd5a0d1425fe7564fd32f678bda20e0d8)) \ No newline at end of file diff --git a/docs/about/contributing.md b/docs/about/contributing.md index d17b7ad..8c0730f 100644 --- a/docs/about/contributing.md +++ b/docs/about/contributing.md @@ -21,11 +21,11 @@ And if you like the project, but just don't have time to contribute, that's fine > If you want to ask a question, we assume that you have read the available [Documentation](https://aireadi.org). -Before you ask a question, it is best to search for existing [Issues](https://github.com/aireadi/pyfairdatatools/issues) that might help you. In case you have found a suitable issue and still need clarification, you can write your question in this issue. It is also advisable to search the internet for answers first. +Before you ask a question, it is best to search for existing [Issues](https://github.com/AI-READI/pyfairdatatools/issues) that might help you. In case you have found a suitable issue and still need clarification, you can write your question in this issue. It is also advisable to search the internet for answers first. If you then still feel the need to ask a question and need clarification, we recommend the following: -- Open an [Issue](https://github.com/aireadi/pyfairdatatools/issues/new). +- Open an [Issue](https://github.com/AI-READI/pyfairdatatools/issues/new). - Provide as much context as you can about what you're running into. - Provide project and platform versions (nodejs, npm, etc), depending on what seems relevant. @@ -34,7 +34,7 @@ We will then take care of the issue as soon as possible. ## I Want To Contribute > ### Legal Notice -> + > When contributing to this project, you must agree that you have authored 100% of the content, that you have the necessary rights to the content and that the content you contribute may be provided under the project license. ### Reporting Bugs @@ -45,7 +45,7 @@ A good bug report shouldn't leave others needing to chase you up for more inform - Make sure that you are using the latest version. - Determine if your bug is really a bug and not an error on your side e.g. using incompatible environment components/versions (Make sure that you have read the [documentation](https://aireadi.org). If you are looking for support, you might want to check [this section](#i-have-a-question)). -- To see if other users have experienced (and potentially already solved) the same issue you are having, check if there is not already a bug report existing for your bug or error in the [bug tracker](https://github.com/fairdataihub/pyfairdatatools/issues?q=label%3Abug). +- To see if other users have experienced (and potentially already solved) the same issue you are having, check if there is not already a bug report existing for your bug or error in the [bug tracker](https://github.com/AI-READI/pyfairdatatools/issues?q=label%3Abug). - Also make sure to search the internet (including Stack Overflow) to see if users outside of the GitHub community have discussed the issue. - Collect information about the bug: - Stack trace (Traceback) @@ -58,7 +58,7 @@ A good bug report shouldn't leave others needing to chase you up for more inform We use GitHub issues to track bugs and errors. If you run into an issue with the project: -- Open an [Issue](https://github.com/fairdataihub/pyfairdatatools/issues/new). (Since we can't be sure at this point whether it is a bug or not, we ask you not to talk about a bug yet and not to label the issue.) +- Open an [Issue](https://github.com/AI-READI/pyfairdatatools/issues/new). (Since we can't be sure at this point whether it is a bug or not, we ask you not to talk about a bug yet and not to label the issue.) - Explain the behavior you would expect and the actual behavior. - Please provide as much context as possible and describe the _reproduction steps_ that someone else can follow to recreate the issue on their own. This usually includes your code. For good bug reports you should isolate the problem and create a reduced test case. - Provide the information you collected in the previous section. @@ -80,15 +80,15 @@ This section guides you through submitting an enhancement suggestion for pyfaird #### Before Submitting an Enhancement - Make sure that you are using the latest version. -- Read the [documentation](https://aireadi.org) carefully and find out if the functionality is already covered, maybe by an individual configuration. -- Perform a [search](https://github.com/fairdataihub/pyfairdatatools/issues) to see if the enhancement has already been suggested. If it has, add a comment to the existing issue instead of opening a new one. +- Read the [documentation](https://docs.aireadi.org) carefully and find out if the functionality is already covered, maybe by an individual configuration. +- Perform a [search](https://github.com/AI-READI/pyfairdatatools/issues) to see if the enhancement has already been suggested. If it has, add a comment to the existing issue instead of opening a new one. - Find out whether your idea fits with the scope and aims of the project. It's up to you to make a strong case to convince the project's developers of the merits of this feature. Keep in mind that we want features that will be useful to the majority of our users and not just a small subset. If you're just targeting a minority of users, consider writing an add-on/plugin library. #### How Do I Submit a Good Enhancement Suggestion? -Enhancement suggestions are tracked as [GitHub issues](https://github.com/fairdataihub/fairdatatools/issues). +Enhancement suggestions are tracked as [GitHub issues](https://github.com/AI-READI/pyfairdatatools/issues/new). - Use a **clear and descriptive title** for the issue to identify the suggestion. - Provide a **step-by-step description of the suggested enhancement** in as many details as possible. diff --git a/docs/modules/validate.md b/docs/modules/validate.md index 5ee5de2..b17de4a 100644 --- a/docs/modules/validate.md +++ b/docs/modules/validate.md @@ -16,6 +16,44 @@ The validation module provides a way to validate data against a set of rules. The following methods are available in the `validate` module. Each method is described in detail below. +### Validate Folder Structure + +You can call the `validate_folder_structure` method to validate the data needed to create a dataset_description file. + +#### Required or optional files + +Following files are required to have inside the directory path: + +- `dataset_description.json` +- `participants.json` +- `participants.tsv` +- `README.md` +- `CHANGELOG.md` +- `LICENSE.md` + +------------------------------------------------------------------------ + +#### Returns + +| Field | Type | Description | +|----------|---------|----------------------------------------------------------| +| valid | Boolean | Returns `True` if the dataset is valid, `False` otherwise. | +| errors | List | List of validation errors (blocking issues). | +| warnings | List | List of validation warnings (non-blocking issues). | + +#### How to use + +```python +from pyfairdatatools import validate + +folder_path = "/dataset_path" + +output = validate.validate_folder_structure(folder_path=folder_path) + +print(output) # True + +``` + ### Validate Dataset Description You can call the `validate_dataset_description` method to validate the data needed to create a dataset_description file. diff --git a/mkdocs.yml b/mkdocs.yml index 715731d..34da8e4 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -13,7 +13,8 @@ markdown_extensions: - codehilite - admonition - toc: - permalink: ' + permalink:  + nav: - Home: index.md diff --git a/pyfairdatatools/validate.py b/pyfairdatatools/validate.py index f4309b3..e56c3cf 100644 --- a/pyfairdatatools/validate.py +++ b/pyfairdatatools/validate.py @@ -3,8 +3,47 @@ import yaml from jsonschema import ValidationError, validate +import logging +import sys +import re +import csv -# from . import utils +SUCCESS = 25 +logging.addLevelName(SUCCESS, "SUCCESS") + +def success(self, message, *args, **kwargs): + if self.isEnabledFor(SUCCESS): + self._log(SUCCESS, message, args, **kwargs) + +logging.Logger.success = success + +# Color Formatter +class ColorFormatter(logging.Formatter): + + COLORS = { + logging.DEBUG: "\033[90m", # Gray + logging.INFO: "\033[94m", # Blue + logging.WARNING: "\033[93m", # Yellow + logging.ERROR: "\033[91m", # Red + logging.CRITICAL: "\033[95m", # Magenta + SUCCESS: "\033[92m", # Green + } + + RESET = "\033[0m" + + def format(self, record): + color = self.COLORS.get(record.levelno, self.RESET) + msg = super().format(record) + return f"{color}{msg}{self.RESET}" + +# Logger Setup +handler = logging.StreamHandler(sys.stdout) +handler.setFormatter(ColorFormatter("%(levelname)s ▶ %(message)s")) + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +logger.handlers.clear() +logger.addHandler(handler) def validate_dataset_description(data, verbose=False): # sourcery skip: extract-method @@ -229,13 +268,21 @@ def validate_participants(data): """ schema = {} - # Import the schema from the schemas folder with open( os.path.join(os.path.dirname(__file__), "schemas", "participants.schema.json"), encoding="utf-8", ) as f: schema = json.load(f) + # Allow person_id as an alternative to participant_id + if "items" in schema and "required" in schema["items"]: + if "person_id" not in schema["items"]["properties"]: + schema["items"]["properties"]["person_id"] = schema["items"]["properties"]["participant_id"].copy() + + for participant in data: + if "person_id" in participant and "participant_id" not in participant: + participant["participant_id"] = participant["person_id"] + try: validate(instance=data, schema=schema) @@ -252,61 +299,6 @@ def validate_participants(data): raise error -def validate_folder_structure(folder_path): - """Validate that a folder structure is valid. - - We do this by generating a json tree of the folder and file structure and - validating it against a schema. - This will allow us to expand the schema in the future to include more complex - folder structures. - Certain folder structures (ones inside of dynamic folders) will not be able to - be validated by this method. - - Args: - folder_path (str): The path to the folder to validate - Returns: - bool: True if the folder structure is valid, False otherwise - """ - - def path_to_dict(path): - d = {} # type: dict - - if not os.path.exists(path): - return d - - for x in os.listdir(path): - key = os.path.basename(x) - - if os.path.isdir(os.path.join(path, x)): - d[key] = path_to_dict(os.path.join(path, x)) - else: - d[key] = "file" - - return d - - # Import the schema from the schemas folder - with open( - os.path.join( - os.path.dirname(__file__), "schemas", "folder_structure.schema.json" - ), - encoding="utf-8", - ) as f: - schema = json.load(f) - - folder_structure_as_dict = path_to_dict(folder_path) - - try: - validate(instance=folder_structure_as_dict, schema=schema) - - return True - except ValidationError as e: - print(e.schema["error_msg"] if "error_msg" in e.schema else e.message) - return False - except Exception as error: - print(error) - raise error - - def validate_datatype_dictionary(data): """Validate a datatype description against the scheme. @@ -350,3 +342,253 @@ def validate_datatype_dictionary(data): except Exception as error: print(error) raise error + + +def validate_file_path(file_path, preexisting_file=False, writable=False): + """Validate a file path. Checks if the file exists, is a file, and is writable.""" + if file_path == "": + print("File path is empty.") + raise ValueError("Invalid input") + + if preexisting_file: + if not os.path.exists(file_path): + print("File path does not exist.") + raise FileNotFoundError("File not found") + + if not os.path.isfile(file_path): + print("File path is not a file.") + raise ValueError("Invalid input") + + if writable and not os.access(file_path, os.W_OK): + print("File path is not writable.") + raise PermissionError("Permission denied") + + return True + + +def validate_names(root): + """This function validates the names of the + files against the schema.""" + windows_reserved = { + "con", "prn", "aux", "nul", + "lpt1","lpt2","lpt3","lpt4","lpt5","lpt6","lpt7","lpt8","lpt9", + "com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9" + } + allowed_uppercase_files = { + "readme.md", "readme.txt", "readme.rst", + "changelog.md", "changelog.txt", "changelog.rst", + "license.md", "license.txt", "license.rst", "license", + } + errors = [] + warnings = [] + for dirpath, dir_names, file_names in os.walk(root): + for name in dir_names + file_names: + full = os.path.join(dirpath, name) + base = os.path.splitext(name)[0].lower() + name_lower = name.lower() + + # Spaces + if " " in name: + errors.append(f"Space in name: {full}") + + # Allow some conventional files + if any(c.isupper() for c in name): + if name_lower not in allowed_uppercase_files: + errors.append(f"Uppercase in name: {full}") + + # Invalid chars + name_pattern = re.compile(r'^[a-z0-9._-]+$') + if name_lower not in allowed_uppercase_files: + if not name_pattern.match(name): + errors.append(f"Invalid characters: {full}") + + # Hidden files + if name.startswith("."): + warnings.append(f"Hidden file/folder: {full}") + + # Windows reserved + if base in windows_reserved: + errors.append(f"Windows reserved name: {full}") + + if len(name) > 150: + warnings.append(f"Very long name: {full}") + + return errors, warnings + + +def validate_folder_structure(folder_path): + logger.info("Starting folder structure validation...") + if not os.path.isdir(folder_path): + logger.error(f"Folder not found: {folder_path}", ) + return False + name_errors, name_warnings = validate_names(folder_path) + for w in name_warnings: + logger.warning(w) + if name_errors: + for e in name_errors: + logger.error(e) + return False + + required_files = [] + try: + with open( + os.path.join(os.path.dirname(__file__), "schemas", "folder_structure.schema.json"), + encoding="utf-8" + ) as f: + schema = json.load(f) + folder_tree = { + f: "file" if os.path.isfile(os.path.join(folder_path, f)) else "directory" + for f in os.listdir(folder_path) + } + + required_files.extend(schema["required"]) + validate(instance=folder_tree, schema=schema) + logger.info("Folder structure matches schema") + + except ValidationError as e: + logger.error("FAILED: Folder structure invalid (%s)", e.message) + return False + except Exception as e: + logger.error("Schema error: %s", e) + return False + files = os.listdir(folder_path) + all_valid = True + + # Required files check + for f in required_files: + if f not in files: + logger.error("Missing required file: %s", f) + return False + logger.info("Found required file: %s", f) + + # dataset_description.json + dd_path = os.path.join(folder_path, "dataset_description.json") + try: + with open(dd_path, encoding="utf-8") as f: + dd_data = json.load(f) + logger.info("Validating dataset_description.json...") + if validate_dataset_description(dd_data): + logger.success("%s is valid", dd_path) + else: + logger.error("dataset_description.json failed validation") + all_valid = False + except json.JSONDecodeError as e: + logger.error("dataset_description.json is not valid JSON: %s", e) + all_valid = False + except Exception as e: + logger.error("Error reading dataset_description.json: %s", e) + all_valid = False + + # study_description + s_description_path = os.path.join(folder_path, "study_description.json") + if os.path.isfile(s_description_path): + logger.info("Validating %s...", s_description_path) + try: + with open(s_description_path, encoding="utf-8") as f: + study_data = json.load(f) + if validate_study_description(study_data): + logger.success("%s is valid", s_description_path) + else: + logger.error("%s failed validation", s_description_path) + all_valid = False + except json.JSONDecodeError as e: + logger.error("%s is not valid JSON: %s", s_description_path, e) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", s_description_path, e) + all_valid = False + + # readme + readme_path = os.path.join(folder_path, "readme.md") + logger.info("Validating %s...", readme_path) + try: + with open(readme_path, encoding="utf-8") as f: + content = f.read() + readme_data = {} + current_key = None + current_value = [] + for line in content.split("\n"): + stripped = line.strip() + if stripped.startswith("#"): + if current_key: + readme_data[current_key] = "\n".join(current_value).strip() + current_key = stripped.lstrip("#").strip() + current_value = [] + elif current_key: + current_value.append(stripped) + if current_key: + readme_data[current_key] = "\n".join(current_value).strip() + + if validate_readme(readme_data): + logger.success("%s is valid", readme_path) + else: + logger.error("%s failed validation", readme_path) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", readme_path, e) + all_valid = False + + # changelog + all_changelog_paths = next((file for file in files if file.lower() == "changelog.md"), None) + logger.info("Validating %s...", all_changelog_paths) + changelog_path = os.path.join(folder_path, all_changelog_paths) + try: + if not changelog_path: + logger.error("Missing required file: changelog.md") + return False + except Exception as e: + logger.error("Error reading %s: %s", changelog_path, e) + all_valid = False + logger.success("%s is valid", changelog_path) + + # license + all_license_paths = next((file for file in files if file.lower() == "license.txt"), None) + license_path = os.path.join(folder_path, all_license_paths) + try: + if not license_path: + logger.error("Missing required file: license.txt") + return False + logger.info("Validating %s...", license_path) + with open(license_path, encoding="utf-8") as f: + license_text = f.read().strip() + if validate_license(license_text): + logger.success("%s is valid", license_path) + else: + logger.error("%s has an invalid license identifier: '%s'", license_path, license_text) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", license_path, e) + all_valid = False + + # participants json + participant_json_path = os.path.join(folder_path, "participants.json") + logger.info("Validating participants file: %s...", participant_json_path) + try: + with open(participant_json_path, encoding="utf-8") as f: + participants_data = json.load(f) + if validate_participants(participants_data): + logger.success("%s is valid", participant_json_path) + else: + logger.error("%s failed validation", participant_json_path) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", participant_json_path, e) + all_valid = False + + # participant.tsv + participant_tsv_path = os.path.join(folder_path, "participants.tsv") + logger.info("Validating %s...", participant_tsv_path) + try: + if not participant_tsv_path: + logger.error("Missing required file: participant.tsv") + return False + logger.success("%s is valid", participant_tsv_path) + except Exception as e: + logger.error("Error reading %s: %s", participant_tsv_path, e) + all_valid = False + + if all_valid: + logger.success("All files fully validated") + else: + logger.error("Validation completed with errors") + return all_valid diff --git a/pyproject.toml b/pyproject.toml index d97665f..2649e46 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "pyfairdatatools" -version = "1.2.0b12" +version = "1.2.0b13" description = "Tools for AI-READI" packages = [{ include = "pyfairdatatools" }]