From 44df713f25add43cedfb3eba4e9f20b32d4823cf Mon Sep 17 00:00:00 2001 From: Aidan <62059163+Aydawka@users.noreply.github.com> Date: Wed, 26 Nov 2025 12:08:58 -0800 Subject: [PATCH 01/17] fix: remove matrix from ci workflow (#23) * fix: remove matrix from ci workflow * fix: add secret token line to ci workflow * fix: remove codeflow step --- .github/workflows/ci.yml | 10 +--------- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7e33795..ca5a09f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,9 +5,6 @@ on: [push] jobs: ci: runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.11"] steps: - uses: actions/checkout@v2 @@ -40,9 +37,4 @@ jobs: # run: make check - name: Test code - run: make test - - - name: Upload coverage - uses: codecov/codecov-action@v1 - with: - fail_ci_if_error: true + run: make test \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index e59e052..d97665f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "pyfairdatatools" -version = "1.2.0b11" +version = "1.2.0b12" description = "Tools for AI-READI" packages = [{ include = "pyfairdatatools" }] From c05151e2a6b7d98dc2da58e6929b0c154f1346fe Mon Sep 17 00:00:00 2001 From: aydawka Date: Wed, 28 Jan 2026 09:42:02 -0800 Subject: [PATCH 02/17] fix: docs pages --- docs/about/changelog.md | 266 ++++-------------- docs/about/contributing.md | 16 +- docs/stylesheets/extra.css | 4 + mkdocs.yml | 6 +- .../schemas/folder_structure.schema.json | 134 +-------- 5 files changed, 83 insertions(+), 343 deletions(-) create mode 100644 docs/stylesheets/extra.css diff --git a/docs/about/changelog.md b/docs/about/changelog.md index 6f91a8e..4ad4c89 100644 --- a/docs/about/changelog.md +++ b/docs/about/changelog.md @@ -1,229 +1,85 @@ # Changelog -All notable changes to this project will be documented in this file. - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## [Unreleased] - -### Added - -- v1.1 Italian translation. - -## [1.1.1] - 2023-03-05 - -### Added - -- Arabic translation (#444). -- v1.1 French translation. -- v1.1 Dutch translation (#371). -- v1.1 Russian translation (#410). -- v1.1 Japanese translation (#363). -- v1.1 Norwegian Bokmål translation (#383). -- v1.1 "Inconsistent Changes" Turkish translation (#347). -- Default to most recent versions available for each languages -- Display count of available translations (26 to date!) -- Centralize all links into `/data/links.json` so they can be updated easily - -### Fixed - -- Improve French translation (#377). -- Improve id-ID translation (#416). -- Improve Persian translation (#457). -- Improve Russian translation (#408). -- Improve Swedish title (#419). -- Improve zh-CN translation (#359). -- Improve French translation (#357). -- Improve zh-TW translation (#360, #355). -- Improve Spanish (es-ES) transltion (#362). -- Foldout menu in Dutch translation (#371). -- Missing periods at the end of each change (#451). -- Fix missing logo in 1.1 pages -- Display notice when translation isn't for most recent version -- Various broken links, page versions, and indentations. +## [1.0.3](https://github.com/AI-READI/pyfairdatatools/compare/v1.0.2...v1.0.3) (2025-11-26) -### Changed -- Upgrade dependencies: Ruby 3.2.1, Middleman, etc. +### Bug Fixes -### Removed +* remove matrix from ci workflow ([#23](https://github.com/AI-READI/pyfairdatatools/issues/23)) ([44df713](https://github.com/AI-READI/pyfairdatatools/commit/44df713f25add43cedfb3eba4e9f20b32d4823cf)) -- Unused normalize.css file -- Identical links assigned in each translation file -- Duplicate index file for the english version +## [1.0.2](https://github.com/AI-READI/pyfairdatatools/compare/v1.0.1...v1.0.2) (2023-11-16) -## [1.1.0] - 2019-02-15 -### Added +### Bug Fixes -- Danish translation (#297). -- Georgian translation from (#337). -- Changelog inconsistency section in Bad Practices. +* add affiliationValue to datasetDescription ([63b7946](https://github.com/AI-READI/pyfairdatatools/commit/63b7946065dac6cf5984829bc5b02689049cb750)) -### Fixed +## 1.0.1 (2023-11-16) -- Italian translation (#332). -- Indonesian translation (#336). -## [1.0.0] - 2017-06-20 +### ⚠ BREAKING CHANGES -### Added +* v1 +* v1 +* v1 +* update package name to pyfairdatatools -- New visual identity by [@tylerfortune8](https://github.com/tylerfortune8). -- Version navigation. -- Links to latest released version in previous versions. -- "Why keep a changelog?" section. -- "Who needs a changelog?" section. -- "How do I make a changelog?" section. -- "Frequently Asked Questions" section. -- New "Guiding Principles" sub-section to "How do I make a changelog?". -- Simplified and Traditional Chinese translations from [@tianshuo](https://github.com/tianshuo). -- German translation from [@mpbzh](https://github.com/mpbzh) & [@Art4](https://github.com/Art4). -- Italian translation from [@azkidenz](https://github.com/azkidenz). -- Swedish translation from [@magol](https://github.com/magol). -- Turkish translation from [@emreerkan](https://github.com/emreerkan). -- French translation from [@zapashcanon](https://github.com/zapashcanon). -- Brazilian Portuguese translation from [@Webysther](https://github.com/Webysther). -- Polish translation from [@amielucha](https://github.com/amielucha) & [@m-aciek](https://github.com/m-aciek). -- Russian translation from [@aishek](https://github.com/aishek). -- Czech translation from [@h4vry](https://github.com/h4vry). -- Slovak translation from [@jkostolansky](https://github.com/jkostolansky). -- Korean translation from [@pierceh89](https://github.com/pierceh89). -- Croatian translation from [@porx](https://github.com/porx). -- Persian translation from [@Hameds](https://github.com/Hameds). -- Ukrainian translation from [@osadchyi-s](https://github.com/osadchyi-s). +### release -### Changed +* v1 ([386a2d3](https://github.com/AI-READI/pyfairdatatools/commit/386a2d3568a9be9a6be807d03666950f1f2b7ff3)) +* v1 ([1dacdd1](https://github.com/AI-READI/pyfairdatatools/commit/1dacdd17f36218285578b6109c39956c9aa1f2da)) +* v1 ([078c513](https://github.com/AI-READI/pyfairdatatools/commit/078c513eaaf7346e6eff1465bb0d8fa645e7b34f)) -- Start using "changelog" over "change log" since it's the common usage. -- Start versioning based on the current English version at 0.3.0 to help - translation authors keep things up-to-date. -- Rewrite "What makes unicorns cry?" section. -- Rewrite "Ignoring Deprecations" sub-section to clarify the ideal - scenario. -- Improve "Commit log diffs" sub-section to further argument against - them. -- Merge "Why can’t people just use a git log diff?" with "Commit log - diffs". -- Fix typos in Simplified Chinese and Traditional Chinese translations. -- Fix typos in Brazilian Portuguese translation. -- Fix typos in Turkish translation. -- Fix typos in Czech translation. -- Fix typos in Swedish translation. -- Improve phrasing in French translation. -- Fix phrasing and spelling in German translation. -### Removed +### Features -- Section about "changelog" vs "CHANGELOG". +* add additional data types ([d3d4b21](https://github.com/AI-READI/pyfairdatatools/commit/d3d4b21db3c6ef558935054e95986e1dc8b09cdf)) +* add additional validation for dataset description ([0830308](https://github.com/AI-READI/pyfairdatatools/commit/0830308b248310bd569e37d48147748eeadefebc)) +* add base study_description schema ([8ca9a94](https://github.com/AI-READI/pyfairdatatools/commit/8ca9a942a4c6aad77aabe87f2f3c3ec66c07c701)) +* add changelog generate function ([e448f41](https://github.com/AI-READI/pyfairdatatools/commit/e448f41cee39fec4b8778549e8aadebe3bfc1100)) +* add dataset_description validation schema ([3f23381](https://github.com/AI-READI/pyfairdatatools/commit/3f233817ff23f5172ee96ea65d9c9c25a1e0c8c2)) +* add file path validation to generate functions ([2e9c39a](https://github.com/AI-READI/pyfairdatatools/commit/2e9c39a2d0960ef8de7d56aa42d85526768eb00f)) +* add functions for study description.json ([81693a9](https://github.com/AI-READI/pyfairdatatools/commit/81693a92d5b78498bdeb065bcee938155d50609b)) +* add generate function for dataset_description ([decda23](https://github.com/AI-READI/pyfairdatatools/commit/decda23757b0c955812bb1d55c2b20c90d41bd1c)) +* add generate function for study_descripition ([7759e0f](https://github.com/AI-READI/pyfairdatatools/commit/7759e0f9db0366f6ede6ea8a94eb10598dd630db)) +* add license generation ([acf22df](https://github.com/AI-READI/pyfairdatatools/commit/acf22df24afa6fde2b01f0f2f95167ffc68e3b98)) +* add links to schema ids ([bdc905e](https://github.com/AI-READI/pyfairdatatools/commit/bdc905e6bfebc4f32a8867670b9dc0901a262be3)) +* add more valid language codes ([022fdee](https://github.com/AI-READI/pyfairdatatools/commit/022fdee8e521704dc9fe8147000d1a7c5285c73c)) +* add participants validate function ([d7a1bc2](https://github.com/AI-READI/pyfairdatatools/commit/d7a1bc2d6008217f34cf76644eb786fb2a64adb2)) +* add readme generate and validate functions ([4b4b076](https://github.com/AI-READI/pyfairdatatools/commit/4b4b0760ffac58577055ffafd3132b23f8d1e8ff)) +* add sample validate schema ([34464fe](https://github.com/AI-READI/pyfairdatatools/commit/34464fefe6b6597606228fa4cce1811f01ec6566)) +* add support for folder level validation ([8de70c4](https://github.com/AI-READI/pyfairdatatools/commit/8de70c44cb653310bd10ec0bb3334a35c4569ea3)) +* add validation for langauges ([a882fee](https://github.com/AI-READI/pyfairdatatools/commit/a882feeae6580743810764c3a853ed650b11f628)) +* update dataset_description schema ([20fd4d2](https://github.com/AI-READI/pyfairdatatools/commit/20fd4d23741eb7c6810f67d5d6d9819a6fa8d233)) +* update language list for BCP 47 ([17ec74b](https://github.com/AI-READI/pyfairdatatools/commit/17ec74bfd78a611c1cec91092b0efca017ab5862)) +* update package name to pyfairdatatools ([6b8fe01](https://github.com/AI-READI/pyfairdatatools/commit/6b8fe0140f11692bca1c0c7fe2de3f096b4b245d)) -## [0.3.0] - 2015-12-03 -### Added +### Bug Fixes -- RU translation from [@aishek](https://github.com/aishek). -- pt-BR translation from [@tallesl](https://github.com/tallesl). -- es-ES translation from [@ZeliosAriex](https://github.com/ZeliosAriex). +* add fields to validate study description ([6834f55](https://github.com/AI-READI/pyfairdatatools/commit/6834f55ac8dd5dc7e4396b59caed3d56c0c48f9d)) +* add fix for urlllib3 ([dbe21ed](https://github.com/AI-READI/pyfairdatatools/commit/dbe21edbcf4aeeebc7345947376e5161aeb57133)) +* add typing for empty collection ([62a5444](https://github.com/AI-READI/pyfairdatatools/commit/62a544497c52568786d87b480c57296e5f56bf05)) +* update imports ([4109423](https://github.com/AI-READI/pyfairdatatools/commit/4109423b749fb60df3510802dcba4e81100daa80)) +* update key name to lowercase ([607e815](https://github.com/AI-READI/pyfairdatatools/commit/607e815710efac5b7aca2d3bc40e37ae5949358a)) +* update tests ([2bec288](https://github.com/AI-READI/pyfairdatatools/commit/2bec2887ddd4559f900f37b628e0f56811b8fbd5)) +* update versions ([49af758](https://github.com/AI-READI/pyfairdatatools/commit/49af7586de946476b8395e1d98470a0775654c4b)) +* update versions ([127c982](https://github.com/AI-READI/pyfairdatatools/commit/127c98266fcc088ff3cc8334c8e0d901bac99d81)) -## [0.2.0] - 2015-10-06 -### Changed +### Documentation -- Remove exclusionary mentions of "open source" since this project can - benefit both "open" and "closed" source projects equally. - -## [0.1.0] - 2015-10-06 - -### Added - -- Answer "Should you ever rewrite a change log?". - -### Changed - -- Improve argument against commit logs. -- Start following [SemVer](https://semver.org) properly. - -## [0.0.8] - 2015-02-17 - -### Changed - -- Update year to match in every README example. -- Reluctantly stop making fun of Brits only, since most of the world - writes dates in a strange way. - -### Fixed - -- Fix typos in recent README changes. -- Update outdated unreleased diff link. - -## [0.0.7] - 2015-02-16 - -### Added - -- Link, and make it obvious that date format is ISO 8601. - -### Changed - -- Clarified the section on "Is there a standard change log format?". - -### Fixed - -- Fix Markdown links to tag comparison URL with footnote-style links. - -## [0.0.6] - 2014-12-12 - -### Added - -- README section on "yanked" releases. - -## [0.0.5] - 2014-08-09 - -### Added - -- Markdown links to version tags on release headings. -- Unreleased section to gather unreleased changes and encourage note - keeping prior to releases. - -## [0.0.4] - 2014-08-09 - -### Added - -- Better explanation of the difference between the file ("CHANGELOG") - and its function "the change log". - -### Changed - -- Refer to a "change log" instead of a "CHANGELOG" throughout the site - to differentiate between the file and the purpose of the file — the - logging of changes. - -### Removed - -- Remove empty sections from CHANGELOG, they occupy too much space and - create too much noise in the file. People will have to assume that the - missing sections were intentionally left out because they contained no - notable changes. - -## [0.0.3] - 2014-08-09 - -### Added - -- "Why should I care?" section mentioning The Changelog podcast. - -## [0.0.2] - 2014-07-10 - -### Added - -- Explanation of the recommended reverse chronological release ordering. - -## [0.0.1] - 2014-05-31 - -### Added - -- This CHANGELOG file to hopefully serve as an evolving example of a - standardized open source project CHANGELOG. -- CNAME file to enable GitHub Pages custom domain. -- README now contains answers to common questions about CHANGELOGs. -- Good examples and basic guidelines, including proper date formatting. -- Counter-examples: "What makes unicorns cry?". +* add base content ([c9e9d9d](https://github.com/AI-READI/pyfairdatatools/commit/c9e9d9d2ea70d10b5255116876384af867f9b1dd)) +* add base text ([aeca7b3](https://github.com/AI-READI/pyfairdatatools/commit/aeca7b35cf7f4da5ebcc0b1b001c27efaabd2d98)) +* add docs for dataset_description ([8be3d27](https://github.com/AI-READI/pyfairdatatools/commit/8be3d275e23bb1e69944d6c4b2a52f12e1504b4d)) +* add docs for generate functions ([0249d42](https://github.com/AI-READI/pyfairdatatools/commit/0249d425e65787ea8c5ca32f0b1aaa5ccf49c3fb)) +* add validate readme sections ([6f2f773](https://github.com/AI-READI/pyfairdatatools/commit/6f2f773ec049f059a50adce5495ce08069de06ab)) +* fix build ([22581c9](https://github.com/AI-READI/pyfairdatatools/commit/22581c97869d0bbc065c8f84d8c6b231c0bf0acd)) +* fix build ([c2b9878](https://github.com/AI-READI/pyfairdatatools/commit/c2b9878edb3fb2c84389f3e4841a79cd3bd7cdbe)) +* fix links ([cbdabfd](https://github.com/AI-READI/pyfairdatatools/commit/cbdabfd95d14bf20c31379f5f781fb72e6dba4e1)) +* update for study_description ([e7e63b2](https://github.com/AI-READI/pyfairdatatools/commit/e7e63b2e0314e4e5e49355e35390376ae4476a50)) +* update publish instructions ([dbd0969](https://github.com/AI-READI/pyfairdatatools/commit/dbd09696c94d35ebcfdaf60d12c77148b7acbf0d)) +* update readme with template ([cf0d241](https://github.com/AI-READI/pyfairdatatools/commit/cf0d241c14ed913ba8fad90362134e39b3ffa603)) +* update text ([84b5a1f](https://github.com/AI-READI/pyfairdatatools/commit/84b5a1fab2f97de8732dee3275cf1d24b2a9f41b)) +* update text ([cc85509](https://github.com/AI-READI/pyfairdatatools/commit/cc855094e612f182bcd67710b94055ab84b9511c)) +* update wording ([b5f5547](https://github.com/AI-READI/pyfairdatatools/commit/b5f5547cd5a0d1425fe7564fd32f678bda20e0d8)) \ No newline at end of file diff --git a/docs/about/contributing.md b/docs/about/contributing.md index d17b7ad..8c0730f 100644 --- a/docs/about/contributing.md +++ b/docs/about/contributing.md @@ -21,11 +21,11 @@ And if you like the project, but just don't have time to contribute, that's fine > If you want to ask a question, we assume that you have read the available [Documentation](https://aireadi.org). -Before you ask a question, it is best to search for existing [Issues](https://github.com/aireadi/pyfairdatatools/issues) that might help you. In case you have found a suitable issue and still need clarification, you can write your question in this issue. It is also advisable to search the internet for answers first. +Before you ask a question, it is best to search for existing [Issues](https://github.com/AI-READI/pyfairdatatools/issues) that might help you. In case you have found a suitable issue and still need clarification, you can write your question in this issue. It is also advisable to search the internet for answers first. If you then still feel the need to ask a question and need clarification, we recommend the following: -- Open an [Issue](https://github.com/aireadi/pyfairdatatools/issues/new). +- Open an [Issue](https://github.com/AI-READI/pyfairdatatools/issues/new). - Provide as much context as you can about what you're running into. - Provide project and platform versions (nodejs, npm, etc), depending on what seems relevant. @@ -34,7 +34,7 @@ We will then take care of the issue as soon as possible. ## I Want To Contribute > ### Legal Notice -> + > When contributing to this project, you must agree that you have authored 100% of the content, that you have the necessary rights to the content and that the content you contribute may be provided under the project license. ### Reporting Bugs @@ -45,7 +45,7 @@ A good bug report shouldn't leave others needing to chase you up for more inform - Make sure that you are using the latest version. - Determine if your bug is really a bug and not an error on your side e.g. using incompatible environment components/versions (Make sure that you have read the [documentation](https://aireadi.org). If you are looking for support, you might want to check [this section](#i-have-a-question)). -- To see if other users have experienced (and potentially already solved) the same issue you are having, check if there is not already a bug report existing for your bug or error in the [bug tracker](https://github.com/fairdataihub/pyfairdatatools/issues?q=label%3Abug). +- To see if other users have experienced (and potentially already solved) the same issue you are having, check if there is not already a bug report existing for your bug or error in the [bug tracker](https://github.com/AI-READI/pyfairdatatools/issues?q=label%3Abug). - Also make sure to search the internet (including Stack Overflow) to see if users outside of the GitHub community have discussed the issue. - Collect information about the bug: - Stack trace (Traceback) @@ -58,7 +58,7 @@ A good bug report shouldn't leave others needing to chase you up for more inform We use GitHub issues to track bugs and errors. If you run into an issue with the project: -- Open an [Issue](https://github.com/fairdataihub/pyfairdatatools/issues/new). (Since we can't be sure at this point whether it is a bug or not, we ask you not to talk about a bug yet and not to label the issue.) +- Open an [Issue](https://github.com/AI-READI/pyfairdatatools/issues/new). (Since we can't be sure at this point whether it is a bug or not, we ask you not to talk about a bug yet and not to label the issue.) - Explain the behavior you would expect and the actual behavior. - Please provide as much context as possible and describe the _reproduction steps_ that someone else can follow to recreate the issue on their own. This usually includes your code. For good bug reports you should isolate the problem and create a reduced test case. - Provide the information you collected in the previous section. @@ -80,15 +80,15 @@ This section guides you through submitting an enhancement suggestion for pyfaird #### Before Submitting an Enhancement - Make sure that you are using the latest version. -- Read the [documentation](https://aireadi.org) carefully and find out if the functionality is already covered, maybe by an individual configuration. -- Perform a [search](https://github.com/fairdataihub/pyfairdatatools/issues) to see if the enhancement has already been suggested. If it has, add a comment to the existing issue instead of opening a new one. +- Read the [documentation](https://docs.aireadi.org) carefully and find out if the functionality is already covered, maybe by an individual configuration. +- Perform a [search](https://github.com/AI-READI/pyfairdatatools/issues) to see if the enhancement has already been suggested. If it has, add a comment to the existing issue instead of opening a new one. - Find out whether your idea fits with the scope and aims of the project. It's up to you to make a strong case to convince the project's developers of the merits of this feature. Keep in mind that we want features that will be useful to the majority of our users and not just a small subset. If you're just targeting a minority of users, consider writing an add-on/plugin library. #### How Do I Submit a Good Enhancement Suggestion? -Enhancement suggestions are tracked as [GitHub issues](https://github.com/fairdataihub/fairdatatools/issues). +Enhancement suggestions are tracked as [GitHub issues](https://github.com/AI-READI/pyfairdatatools/issues/new). - Use a **clear and descriptive title** for the issue to identify the suggestion. - Provide a **step-by-step description of the suggested enhancement** in as many details as possible. diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css new file mode 100644 index 0000000..fd4a286 --- /dev/null +++ b/docs/stylesheets/extra.css @@ -0,0 +1,4 @@ +/* Expand the main content area width */ +.wy-nav-content { +max-width: 1200px !important; /* Change this number to your liking */ width: 100% !important; +} diff --git a/mkdocs.yml b/mkdocs.yml index 715731d..7b1341d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -8,16 +8,20 @@ edit_uri: https://github.com/AI-READI/pyfairdatatools/edit/main/docs dev_addr: 127.0.0.1:3000 theme: readthedocs +extra_css: + - stylesheets/extra.css markdown_extensions: - codehilite - admonition - toc: - permalink: ' + permalink: "" + nav: - Home: index.md - Modules: + - Required Paths: modules/required-paths.md - Validate: modules/validate.md - Generate: modules/generate.md - About: diff --git a/pyfairdatatools/schemas/folder_structure.schema.json b/pyfairdatatools/schemas/folder_structure.schema.json index 1a45f92..e3812d5 100644 --- a/pyfairdatatools/schemas/folder_structure.schema.json +++ b/pyfairdatatools/schemas/folder_structure.schema.json @@ -5,136 +5,12 @@ "description": "This is a schema for a folder structure that is used to store a dataset. It is based on the BIDS folder structure, but is not identical.", "type": "object", "properties": { - "activity_monitor": { - "README.md": { - "const": "file" - } - }, - "best_corrected_visual_acuity": { - "README.md": { - "const": "file" - } - }, - "blood_test": { - "README.md": { - "const": "file" - } - }, - "center_for_epidemiologic_studies_depression_scale": { - "README.md": { - "const": "file" - } - }, - "consent_form": { - "README.md": { - "const": "file" - } - }, - "continuous_glucose_monitoring": { - "README.md": { - "const": "file" - } - }, - "contrast_sensitivity_test": { - "README.md": { - "const": "file" - } - }, - "demographic_survey": { - "README.md": { - "const": "file" - } - }, - "diabetes_wellbeing_questionnaire": { - "README.md": { - "const": "file" - } - }, - "dietary_questionnaire": { - "README.md": { - "const": "file" - } - }, - "electrocardiogram": { - "README.md": { - "const": "file" - } - }, - "eye_fundus_photography": { - "README.md": { - "const": "file" - } - }, - "general_health_survey": { - "README.md": { - "const": "file" - } - }, - "medication_list": { - "README.md": { - "const": "file" - } - }, - "monofilament_foot_sensation_test": { - "README.md": { - "const": "file" - } - }, - "montreal_cognitive_assessment": { - "README.md": { - "const": "file" - } - }, - "optical_coherence_tomography": { - "README.md": { - "const": "file" - } - }, - "screening_questionnaire": { - "README.md": { - "const": "file" - } - }, - "sdsca": { - "README.md": { - "const": "file" - } - }, - "urine_test": { - "README.md": { - "const": "file" - } - }, - "vital_signs_measurement": { - "README.md": { - "const": "file" - } - }, - "CHANGELOG.md": { - "const": "file" - }, - "LICENSE.txt": { - "const": "file" - }, - "README.md": { - "const": "file" - }, - "dataset_description.json": { - "const": "file" - }, - "participants.json": { - "const": "file" - }, - "participants.tsv": { - "const": "file" - } + "dataset_description.json": { "type": "string" } }, + "additionalProperties": true, + "required": [ - "CHANGELOG.md", - "LICENSE.txt", - "README.md", - "dataset_description.json", - "participants.json", - "participants.tsv" + + "dataset_description.json" ] } From cdad6f5158bc01956afc48eb681199820ee3010c Mon Sep 17 00:00:00 2001 From: aydawka Date: Tue, 10 Feb 2026 15:48:46 -0800 Subject: [PATCH 03/17] fix: fix schema --- .../schemas/folder_structure.schema.json | 134 +++++++++++++++++- 1 file changed, 129 insertions(+), 5 deletions(-) diff --git a/pyfairdatatools/schemas/folder_structure.schema.json b/pyfairdatatools/schemas/folder_structure.schema.json index e3812d5..1a45f92 100644 --- a/pyfairdatatools/schemas/folder_structure.schema.json +++ b/pyfairdatatools/schemas/folder_structure.schema.json @@ -5,12 +5,136 @@ "description": "This is a schema for a folder structure that is used to store a dataset. It is based on the BIDS folder structure, but is not identical.", "type": "object", "properties": { - "dataset_description.json": { "type": "string" } + "activity_monitor": { + "README.md": { + "const": "file" + } + }, + "best_corrected_visual_acuity": { + "README.md": { + "const": "file" + } + }, + "blood_test": { + "README.md": { + "const": "file" + } + }, + "center_for_epidemiologic_studies_depression_scale": { + "README.md": { + "const": "file" + } + }, + "consent_form": { + "README.md": { + "const": "file" + } + }, + "continuous_glucose_monitoring": { + "README.md": { + "const": "file" + } + }, + "contrast_sensitivity_test": { + "README.md": { + "const": "file" + } + }, + "demographic_survey": { + "README.md": { + "const": "file" + } + }, + "diabetes_wellbeing_questionnaire": { + "README.md": { + "const": "file" + } + }, + "dietary_questionnaire": { + "README.md": { + "const": "file" + } + }, + "electrocardiogram": { + "README.md": { + "const": "file" + } + }, + "eye_fundus_photography": { + "README.md": { + "const": "file" + } + }, + "general_health_survey": { + "README.md": { + "const": "file" + } + }, + "medication_list": { + "README.md": { + "const": "file" + } + }, + "monofilament_foot_sensation_test": { + "README.md": { + "const": "file" + } + }, + "montreal_cognitive_assessment": { + "README.md": { + "const": "file" + } + }, + "optical_coherence_tomography": { + "README.md": { + "const": "file" + } + }, + "screening_questionnaire": { + "README.md": { + "const": "file" + } + }, + "sdsca": { + "README.md": { + "const": "file" + } + }, + "urine_test": { + "README.md": { + "const": "file" + } + }, + "vital_signs_measurement": { + "README.md": { + "const": "file" + } + }, + "CHANGELOG.md": { + "const": "file" + }, + "LICENSE.txt": { + "const": "file" + }, + "README.md": { + "const": "file" + }, + "dataset_description.json": { + "const": "file" + }, + "participants.json": { + "const": "file" + }, + "participants.tsv": { + "const": "file" + } }, - "additionalProperties": true, - "required": [ - - "dataset_description.json" + "CHANGELOG.md", + "LICENSE.txt", + "README.md", + "dataset_description.json", + "participants.json", + "participants.tsv" ] } From a09bf8c51ba377dc226c40ec64fd3e11c63c58c6 Mon Sep 17 00:00:00 2001 From: aydawka Date: Mon, 16 Feb 2026 23:47:18 -0800 Subject: [PATCH 04/17] fix: folder validation --- docs/modules/validate.md | 43 ++++ mkdocs.yml | 1 - pyfairdatatools/validate.py | 393 ++++++++++++++++++++++++++++++------ 3 files changed, 379 insertions(+), 58 deletions(-) diff --git a/docs/modules/validate.md b/docs/modules/validate.md index 5ee5de2..eedef51 100644 --- a/docs/modules/validate.md +++ b/docs/modules/validate.md @@ -16,6 +16,49 @@ The validation module provides a way to validate data against a set of rules. The following methods are available in the `validate` module. Each method is described in detail below. +### Validate Folder Structure + +You can call the `validate_folder_structure` method to validate the data needed to create a dataset_description file. + +#### Required or optional files + +Following files are required to have inside the directory path: + +- `dataset_description.json` +- `study_description.json` +- `README\[.md\|.rst\|.txt\]` +- `CITATION.cff` +- `CHANGELOG\[.md\|.rst\|.txt\]` +- `LICENSE\[.md\|.rst\|.txt\]` + +------------------------------------------------------------------------ + +## dataset_description.json Fields + +The file `dataset_description.json` is a JSON file describing the dataset. + + +#### Returns- + +| Field | Type | Description | +|----------|---------|----------------------------------------------------------| +| valid | Boolean | Returns `True` if the dataset is valid, `False` otherwise. | +| errors | List | List of validation errors (blocking issues). | +| warnings | List | List of validation warnings (non-blocking issues). | + +#### How to use + +```python +from pyfairdatatools import validate + +folder_path = "/dataset_path" + +output = validate.validate_folder_structure(folder_path=folder_path) + +print(output) # True + +``` + ### Validate Dataset Description You can call the `validate_dataset_description` method to validate the data needed to create a dataset_description file. diff --git a/mkdocs.yml b/mkdocs.yml index 7b1341d..1a098ab 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -21,7 +21,6 @@ markdown_extensions: nav: - Home: index.md - Modules: - - Required Paths: modules/required-paths.md - Validate: modules/validate.md - Generate: modules/generate.md - About: diff --git a/pyfairdatatools/validate.py b/pyfairdatatools/validate.py index f4309b3..9404fbc 100644 --- a/pyfairdatatools/validate.py +++ b/pyfairdatatools/validate.py @@ -3,8 +3,47 @@ import yaml from jsonschema import ValidationError, validate +import logging +import sys +import re +import csv -# from . import utils +SUCCESS = 25 +logging.addLevelName(SUCCESS, "SUCCESS") + +def success(self, message, *args, **kwargs): + if self.isEnabledFor(SUCCESS): + self._log(SUCCESS, message, args, **kwargs) + +logging.Logger.success = success + +# Color Formatter +class ColorFormatter(logging.Formatter): + + COLORS = { + logging.DEBUG: "\033[90m", # Gray + logging.INFO: "\033[94m", # Blue + logging.WARNING: "\033[93m", # Yellow + logging.ERROR: "\033[91m", # Red + logging.CRITICAL: "\033[95m", # Magenta + SUCCESS: "\033[92m", # Green + } + + RESET = "\033[0m" + + def format(self, record): + color = self.COLORS.get(record.levelno, self.RESET) + msg = super().format(record) + return f"{color}{msg}{self.RESET}" + +# Logger Setup +handler = logging.StreamHandler(sys.stdout) +handler.setFormatter(ColorFormatter("%(levelname)s ▶ %(message)s")) + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +logger.handlers.clear() +logger.addHandler(handler) def validate_dataset_description(data, verbose=False): # sourcery skip: extract-method @@ -229,13 +268,25 @@ def validate_participants(data): """ schema = {} - # Import the schema from the schemas folder with open( os.path.join(os.path.dirname(__file__), "schemas", "participants.schema.json"), encoding="utf-8", ) as f: schema = json.load(f) + # Allow person_id as an alternative to participant_id + if "items" in schema and "required" in schema["items"]: + if "participant_id" in schema["items"]["properties"]: + schema["items"]["properties"]["participant_id"].pop("pattern", None) + if "person_id" not in schema["items"]["properties"]: + schema["items"]["properties"]["person_id"] = schema["items"]["properties"]["participant_id"].copy() + + for participant in data: + if "person_id" in participant and "participant_id" not in participant: + participant["participant_id"] = participant["person_id"] + if "age" in participant and participant["age"]: + participant["age"] = float(participant["age"]) + try: validate(instance=data, schema=schema) @@ -252,61 +303,6 @@ def validate_participants(data): raise error -def validate_folder_structure(folder_path): - """Validate that a folder structure is valid. - - We do this by generating a json tree of the folder and file structure and - validating it against a schema. - This will allow us to expand the schema in the future to include more complex - folder structures. - Certain folder structures (ones inside of dynamic folders) will not be able to - be validated by this method. - - Args: - folder_path (str): The path to the folder to validate - Returns: - bool: True if the folder structure is valid, False otherwise - """ - - def path_to_dict(path): - d = {} # type: dict - - if not os.path.exists(path): - return d - - for x in os.listdir(path): - key = os.path.basename(x) - - if os.path.isdir(os.path.join(path, x)): - d[key] = path_to_dict(os.path.join(path, x)) - else: - d[key] = "file" - - return d - - # Import the schema from the schemas folder - with open( - os.path.join( - os.path.dirname(__file__), "schemas", "folder_structure.schema.json" - ), - encoding="utf-8", - ) as f: - schema = json.load(f) - - folder_structure_as_dict = path_to_dict(folder_path) - - try: - validate(instance=folder_structure_as_dict, schema=schema) - - return True - except ValidationError as e: - print(e.schema["error_msg"] if "error_msg" in e.schema else e.message) - return False - except Exception as error: - print(error) - raise error - - def validate_datatype_dictionary(data): """Validate a datatype description against the scheme. @@ -350,3 +346,286 @@ def validate_datatype_dictionary(data): except Exception as error: print(error) raise error + + +def validate_file_path(file_path, preexisting_file=False, writable=False): + """Validate a file path. Checks if the file exists, is a file, and is writable.""" + if file_path == "": + print("File path is empty.") + raise ValueError("Invalid input") + + if preexisting_file: + if not os.path.exists(file_path): + print("File path does not exist.") + raise FileNotFoundError("File not found") + + if not os.path.isfile(file_path): + print("File path is not a file.") + raise ValueError("Invalid input") + + if writable and not os.access(file_path, os.W_OK): # ← DÜZELTİLDİ: 'not' eklendi + print("File path is not writable.") + raise PermissionError("Permission denied") + + return True + + +REQUIRED_FILES = [ + "dataset_description.json", + "CHANGELOG.md", + "LICENSE.txt", + "README.md", + "participants.json", + "participants.tsv" +] + +OPTIONAL_FILES = [ + "readme.md", + "changelog.md", + "study_description.txt", + "license.txt", +] + +ALLOWED_TEXT_EXTS = {".md", ".txt", ".rst"} + +NAME_PATTERN = re.compile(r'^[a-z0-9._-]+$') + +WINDOWS_RESERVED = { + "con", "prn", "aux", "nul", + "com1","com2","com3","com4","com5","com6","com7","com8","com9", + "lpt1","lpt2","lpt3","lpt4","lpt5","lpt6","lpt7","lpt8","lpt9" +} + +ALLOWED_UPPERCASE_FILES = { + "readme.md", "readme.txt", "readme.rst", + "changelog.md", "changelog.txt", "changelog.rst", + "license.md", "license.txt", "license.rst", "license", +} + + +def validate_names(root): + + errors = [] + warnings = [] + + for dirpath, dirnames, filenames in os.walk(root): + + for name in dirnames + filenames: + + full = os.path.join(dirpath, name) + base = os.path.splitext(name)[0].lower() + name_lower = name.lower() + + # Spaces + if " " in name: + errors.append(f"Space in name: {full}") + + # Uppercase - but allow certain conventional files + if any(c.isupper() for c in name): + if name_lower not in ALLOWED_UPPERCASE_FILES: + errors.append(f"Uppercase in name: {full}") + + # Invalid chars - skip check for allowed uppercase files + if name_lower not in ALLOWED_UPPERCASE_FILES: # ← YENİ SATIR + if not NAME_PATTERN.match(name): + errors.append(f"Invalid characters: {full}") + + # Hidden files + if name.startswith("."): + warnings.append(f"Hidden file/folder: {full}") + + # Windows reserved + if base in WINDOWS_RESERVED: + errors.append(f"Windows reserved name: {full}") + + # Long name + if len(name) > 150: + warnings.append(f"Very long name: {full}") + + return errors, warnings + + +def validate_folder_structure(folder_path): + + logger.info("Starting folder structure validation...") + + if not os.path.isdir(folder_path): + logger.error("Folder not found: %s", folder_path) + return False + + name_errors, name_warnings = validate_names(folder_path) + + for w in name_warnings: + logger.warning(w) + + if name_errors: + for e in name_errors: + logger.error(e) + return False + + # SCHEMA CHECK + def path_to_dict(path): + d = {} + for x in os.listdir(path): + p = os.path.join(path, x) + if os.path.isdir(p): + d[x] = path_to_dict(p) + else: + d[x] = "file" + return d + + try: + with open( + os.path.join(os.path.dirname(__file__), "schemas", "folder_structure.schema.json"), + encoding="utf-8" + ) as f: + schema = json.load(f) + + tree = path_to_dict(folder_path) + validate(instance=tree, schema=schema) + logger.info("Folder structure matches schema") + + except ValidationError as e: + logger.error("FAILED: Folder structure invalid (%s)", e.message) + return False + except Exception as e: + logger.error("Schema error: %s", e) + return False + + # FILE VALIDATION + files = os.listdir(folder_path) + all_valid = True + + # ── Required files ────────────────────────────────────────────────────────── + for fname in REQUIRED_FILES: + if fname not in files: + logger.error("Missing required file: %s", fname) + return False + logger.info("Found required file: %s", fname) + + # ── dataset_description.json ─────────────────────────────────────────────── + dd_path = os.path.join(folder_path, "dataset_description.json") + logger.info("Validating dataset_description.json...") + try: + with open(dd_path, encoding="utf-8") as f: + dd_data = json.load(f) + if validate_dataset_description(dd_data): + logger.success("dataset_description.json is valid") + else: + logger.error("dataset_description.json failed validation") + all_valid = False + except json.JSONDecodeError as e: + logger.error("dataset_description.json is not valid JSON: %s", e) + all_valid = False + except Exception as e: # ← YENİ EKLENEN EXCEPTION HANDLER + logger.error("Error reading dataset_description.json: %s", e) + all_valid = False + + for fname in files: + name_lower = fname.lower() + file_path = os.path.join(folder_path, fname) + _, ext = os.path.splitext(fname) + ext = ext.lower() + + # readme + if name_lower.startswith("readme") and ext in ALLOWED_TEXT_EXTS: + logger.info("Validating %s...", fname) + try: + with open(file_path, encoding="utf-8") as f: + content = f.read() + + readme_data = {} + current_key = None + current_value = [] + + for line in content.split("\n"): + stripped = line.strip() + if stripped.startswith("#"): + if current_key: + readme_data[current_key] = "\n".join(current_value).strip() + current_key = stripped.lstrip("#").strip() + current_value = [] + elif current_key: + current_value.append(stripped) + + if current_key: + readme_data[current_key] = "\n".join(current_value).strip() + + if validate_readme(readme_data): + logger.success("%s is valid", fname) + else: + logger.error("%s failed validation", fname) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", fname, e) + all_valid = False + + # study_description + elif name_lower.startswith("study_description") and ext in ALLOWED_TEXT_EXTS: + logger.info("Validating %s...", fname) + try: + with open(file_path, encoding="utf-8") as f: + study_data = json.load(f) + if validate_study_description(study_data): + logger.success("%s is valid", fname) + else: + logger.error("%s failed validation", fname) + all_valid = False + except json.JSONDecodeError as e: + logger.error("%s is not valid JSON: %s", fname, e) + all_valid = False + except Exception as e: # ← YENİ EKLENEN EXCEPTION HANDLER + logger.error("Error reading %s: %s", fname, e) + all_valid = False + + # changelog — no dedicated validator, just check extension + elif name_lower.startswith("changelog"): + if ext not in ALLOWED_TEXT_EXTS: + logger.error("Invalid extension for %s", fname) + all_valid = False + else: + logger.info("Found changelog file: %s (no schema validator)", fname) + + # license + elif name_lower.startswith("license"): + logger.info("Validating license in %s...", fname) + try: + with open(file_path, encoding="utf-8") as f: + license_text = f.read().strip() + if validate_license(license_text): + logger.success("%s has a valid license identifier", fname) + else: + logger.error("%s has an invalid license identifier: '%s'", fname, license_text) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", fname, e) + all_valid = False + + # participants files (anywhere in the folder tree) ─────────────────────── + for dirpath, _, filenames in os.walk(folder_path): + for fname in filenames: + if fname.lower().startswith("participants") and fname.lower().endswith(".tsv"): + file_path = os.path.join(dirpath, fname) + logger.info("Validating participants file: %s...", file_path) + try: + with open(file_path, encoding="utf-8") as f: + reader = csv.DictReader(f, delimiter="\t") + participants_data = list(reader) + if validate_participants(participants_data): + logger.success("%s is valid", file_path) + else: + logger.error("%s failed validation", file_path) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", file_path, e) + all_valid = False + # ── DONE ─────────────────────────────────────────────────────────────────── + if all_valid: + logger.success("All files fully validated") + else: + logger.error("Validation completed with errors") + + return all_valid + + +validate_folder_structure("tests") \ No newline at end of file From 3098984cc4ce20d1ea6b095fb7a91f9caf9a56f5 Mon Sep 17 00:00:00 2001 From: aydawka Date: Wed, 4 Mar 2026 16:43:05 -0800 Subject: [PATCH 05/17] fix: update css and docs --- docs/modules/validate.md | 5 ----- docs/stylesheets/extra.css | 4 ---- mkdocs.yml | 2 -- 3 files changed, 11 deletions(-) delete mode 100644 docs/stylesheets/extra.css diff --git a/docs/modules/validate.md b/docs/modules/validate.md index eedef51..4ec6c90 100644 --- a/docs/modules/validate.md +++ b/docs/modules/validate.md @@ -33,11 +33,6 @@ Following files are required to have inside the directory path: ------------------------------------------------------------------------ -## dataset_description.json Fields - -The file `dataset_description.json` is a JSON file describing the dataset. - - #### Returns- | Field | Type | Description | diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css deleted file mode 100644 index fd4a286..0000000 --- a/docs/stylesheets/extra.css +++ /dev/null @@ -1,4 +0,0 @@ -/* Expand the main content area width */ -.wy-nav-content { -max-width: 1200px !important; /* Change this number to your liking */ width: 100% !important; -} diff --git a/mkdocs.yml b/mkdocs.yml index 1a098ab..53b8202 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -8,8 +8,6 @@ edit_uri: https://github.com/AI-READI/pyfairdatatools/edit/main/docs dev_addr: 127.0.0.1:3000 theme: readthedocs -extra_css: - - stylesheets/extra.css markdown_extensions: - codehilite From c462c18e451f2e0d8d7ca551ea2f6bbfa01c2cde Mon Sep 17 00:00:00 2001 From: aydawka Date: Wed, 4 Mar 2026 17:51:18 -0800 Subject: [PATCH 06/17] fix: update the validation path --- mkdocs.yml | 2 +- pyfairdatatools/validate.py | 200 ++++++++++++++++-------------------- 2 files changed, 87 insertions(+), 115 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index 53b8202..34da8e4 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -13,7 +13,7 @@ markdown_extensions: - codehilite - admonition - toc: - permalink: "" + permalink:  nav: diff --git a/pyfairdatatools/validate.py b/pyfairdatatools/validate.py index 9404fbc..aca353c 100644 --- a/pyfairdatatools/validate.py +++ b/pyfairdatatools/validate.py @@ -363,22 +363,13 @@ def validate_file_path(file_path, preexisting_file=False, writable=False): print("File path is not a file.") raise ValueError("Invalid input") - if writable and not os.access(file_path, os.W_OK): # ← DÜZELTİLDİ: 'not' eklendi + if writable and not os.access(file_path, os.W_OK): print("File path is not writable.") raise PermissionError("Permission denied") return True -REQUIRED_FILES = [ - "dataset_description.json", - "CHANGELOG.md", - "LICENSE.txt", - "README.md", - "participants.json", - "participants.tsv" -] - OPTIONAL_FILES = [ "readme.md", "changelog.md", @@ -386,8 +377,6 @@ def validate_file_path(file_path, preexisting_file=False, writable=False): "license.txt", ] -ALLOWED_TEXT_EXTS = {".md", ".txt", ".rst"} - NAME_PATTERN = re.compile(r'^[a-z0-9._-]+$') WINDOWS_RESERVED = { @@ -420,12 +409,12 @@ def validate_names(root): if " " in name: errors.append(f"Space in name: {full}") - # Uppercase - but allow certain conventional files + # Allow certain conventional files if any(c.isupper() for c in name): if name_lower not in ALLOWED_UPPERCASE_FILES: errors.append(f"Uppercase in name: {full}") - # Invalid chars - skip check for allowed uppercase files + # Invalid chars if name_lower not in ALLOWED_UPPERCASE_FILES: # ← YENİ SATIR if not NAME_PATTERN.match(name): errors.append(f"Invalid characters: {full}") @@ -438,7 +427,6 @@ def validate_names(root): if base in WINDOWS_RESERVED: errors.append(f"Windows reserved name: {full}") - # Long name if len(name) > 150: warnings.append(f"Very long name: {full}") @@ -446,24 +434,18 @@ def validate_names(root): def validate_folder_structure(folder_path): - logger.info("Starting folder structure validation...") - if not os.path.isdir(folder_path): - logger.error("Folder not found: %s", folder_path) + logger.error(f"Folder not found: {folder_path}", ) return False - name_errors, name_warnings = validate_names(folder_path) - for w in name_warnings: logger.warning(w) - if name_errors: for e in name_errors: logger.error(e) return False - - # SCHEMA CHECK + # schema check def path_to_dict(path): d = {} for x in os.listdir(path): @@ -474,14 +456,15 @@ def path_to_dict(path): d[x] = "file" return d + required_files = [] try: with open( os.path.join(os.path.dirname(__file__), "schemas", "folder_structure.schema.json"), encoding="utf-8" ) as f: schema = json.load(f) - tree = path_to_dict(folder_path) + required_files.extend(schema["required"]) validate(instance=tree, schema=schema) logger.info("Folder structure matches schema") @@ -491,20 +474,19 @@ def path_to_dict(path): except Exception as e: logger.error("Schema error: %s", e) return False - - # FILE VALIDATION files = os.listdir(folder_path) all_valid = True - # ── Required files ────────────────────────────────────────────────────────── - for fname in REQUIRED_FILES: - if fname not in files: - logger.error("Missing required file: %s", fname) + # Required files check + for f in required_files: + if f not in files: + logger.error("Missing required file: %s", f) return False - logger.info("Found required file: %s", fname) + logger.info("Found required file: %s", f) - # ── dataset_description.json ─────────────────────────────────────────────── dd_path = os.path.join(folder_path, "dataset_description.json") + + # dataset_description.json logger.info("Validating dataset_description.json...") try: with open(dd_path, encoding="utf-8") as f: @@ -517,95 +499,87 @@ def path_to_dict(path): except json.JSONDecodeError as e: logger.error("dataset_description.json is not valid JSON: %s", e) all_valid = False - except Exception as e: # ← YENİ EKLENEN EXCEPTION HANDLER + except Exception as e: logger.error("Error reading dataset_description.json: %s", e) all_valid = False - for fname in files: - name_lower = fname.lower() - file_path = os.path.join(folder_path, fname) - _, ext = os.path.splitext(fname) - ext = ext.lower() - - # readme - if name_lower.startswith("readme") and ext in ALLOWED_TEXT_EXTS: - logger.info("Validating %s...", fname) - try: - with open(file_path, encoding="utf-8") as f: - content = f.read() - - readme_data = {} - current_key = None - current_value = [] - - for line in content.split("\n"): - stripped = line.strip() - if stripped.startswith("#"): - if current_key: - readme_data[current_key] = "\n".join(current_value).strip() - current_key = stripped.lstrip("#").strip() - current_value = [] - elif current_key: - current_value.append(stripped) + # study_description + s_description_path = os.path.join(folder_path, "study_description.json") + logger.info("Validating %s...", s_description_path) + try: + with open(s_description_path, encoding="utf-8") as f: + study_data = json.load(f) + if validate_study_description(study_data): + logger.success("%s is valid", s_description_path) + else: + logger.error("%s failed validation", s_description_path) + all_valid = False + except json.JSONDecodeError as e: + logger.error("%s is not valid JSON: %s", s_description_path, e) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", s_description_path, e) + all_valid = False + # readme + readme_path = os.path.join(folder_path, "readme.md") + logger.info("Validating %s...", readme_path) + try: + with open(readme_path, encoding="utf-8") as f: + content = f.read() + readme_data = {} + current_key = None + current_value = [] + for line in content.split("\n"): + stripped = line.strip() + if stripped.startswith("#"): if current_key: readme_data[current_key] = "\n".join(current_value).strip() + current_key = stripped.lstrip("#").strip() + current_value = [] + elif current_key: + current_value.append(stripped) - if validate_readme(readme_data): - logger.success("%s is valid", fname) - else: - logger.error("%s failed validation", fname) - all_valid = False - except Exception as e: - logger.error("Error reading %s: %s", fname, e) - all_valid = False - - # study_description - elif name_lower.startswith("study_description") and ext in ALLOWED_TEXT_EXTS: - logger.info("Validating %s...", fname) - try: - with open(file_path, encoding="utf-8") as f: - study_data = json.load(f) - if validate_study_description(study_data): - logger.success("%s is valid", fname) - else: - logger.error("%s failed validation", fname) - all_valid = False - except json.JSONDecodeError as e: - logger.error("%s is not valid JSON: %s", fname, e) - all_valid = False - except Exception as e: # ← YENİ EKLENEN EXCEPTION HANDLER - logger.error("Error reading %s: %s", fname, e) - all_valid = False - - # changelog — no dedicated validator, just check extension - elif name_lower.startswith("changelog"): - if ext not in ALLOWED_TEXT_EXTS: - logger.error("Invalid extension for %s", fname) - all_valid = False - else: - logger.info("Found changelog file: %s (no schema validator)", fname) - - # license - elif name_lower.startswith("license"): - logger.info("Validating license in %s...", fname) - try: - with open(file_path, encoding="utf-8") as f: - license_text = f.read().strip() - if validate_license(license_text): - logger.success("%s has a valid license identifier", fname) - else: - logger.error("%s has an invalid license identifier: '%s'", fname, license_text) - all_valid = False - except Exception as e: - logger.error("Error reading %s: %s", fname, e) - all_valid = False + if current_key: + readme_data[current_key] = "\n".join(current_value).strip() - # participants files (anywhere in the folder tree) ─────────────────────── + if validate_readme(readme_data): + logger.success("%s is valid", readme_path) + else: + logger.error("%s failed validation", readme_path) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", readme_path, e) + all_valid = False + + # changelog — no dedicated validator, just check extension + if os.path.join(folder_path, "readme.md"): + logger.info("Found changelog file: %s", readme_path) + + # license + all_license_paths = next((f for f in files if f.lower() == "license.txt"), None) + license_path = os.path.join(folder_path, all_license_paths) + try: + if not license_path: + logger.error("Missing required file: license.txt") + return False + logger.info("Validating license in %s...", license_path) + + with open(license_path, encoding="utf-8") as f: + license_text = f.read().strip() + if validate_license(license_text): + logger.success("%s has a valid license identifier", license_path) + else: + logger.error("%s has an invalid license identifier: '%s'", license_path, license_text) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", license_path, e) + all_valid = False + # participants files for dirpath, _, filenames in os.walk(folder_path): - for fname in filenames: - if fname.lower().startswith("participants") and fname.lower().endswith(".tsv"): - file_path = os.path.join(dirpath, fname) + for f_name in filenames: + if f_name.lower().startswith("participants") and f_name.lower().endswith(".tsv"): + file_path = os.path.join(dirpath, f_name) logger.info("Validating participants file: %s...", file_path) try: with open(file_path, encoding="utf-8") as f: @@ -619,12 +593,10 @@ def path_to_dict(path): except Exception as e: logger.error("Error reading %s: %s", file_path, e) all_valid = False - # ── DONE ─────────────────────────────────────────────────────────────────── if all_valid: logger.success("All files fully validated") else: logger.error("Validation completed with errors") - return all_valid From 32d372e4867026de9733a12ab8492ac025800d46 Mon Sep 17 00:00:00 2001 From: aydawka Date: Wed, 4 Mar 2026 21:55:32 -0800 Subject: [PATCH 07/17] fix: finish validation --- docs/modules/validate.md | 12 +-- pyfairdatatools/validate.py | 176 ++++++++++++++++++------------------ 2 files changed, 92 insertions(+), 96 deletions(-) diff --git a/docs/modules/validate.md b/docs/modules/validate.md index 4ec6c90..b17de4a 100644 --- a/docs/modules/validate.md +++ b/docs/modules/validate.md @@ -25,15 +25,15 @@ You can call the `validate_folder_structure` method to validate the data needed Following files are required to have inside the directory path: - `dataset_description.json` -- `study_description.json` -- `README\[.md\|.rst\|.txt\]` -- `CITATION.cff` -- `CHANGELOG\[.md\|.rst\|.txt\]` -- `LICENSE\[.md\|.rst\|.txt\]` +- `participants.json` +- `participants.tsv` +- `README.md` +- `CHANGELOG.md` +- `LICENSE.md` ------------------------------------------------------------------------ -#### Returns- +#### Returns | Field | Type | Description | |----------|---------|----------------------------------------------------------| diff --git a/pyfairdatatools/validate.py b/pyfairdatatools/validate.py index aca353c..06a7955 100644 --- a/pyfairdatatools/validate.py +++ b/pyfairdatatools/validate.py @@ -370,37 +370,23 @@ def validate_file_path(file_path, preexisting_file=False, writable=False): return True -OPTIONAL_FILES = [ - "readme.md", - "changelog.md", - "study_description.txt", - "license.txt", -] - -NAME_PATTERN = re.compile(r'^[a-z0-9._-]+$') - -WINDOWS_RESERVED = { - "con", "prn", "aux", "nul", - "com1","com2","com3","com4","com5","com6","com7","com8","com9", - "lpt1","lpt2","lpt3","lpt4","lpt5","lpt6","lpt7","lpt8","lpt9" -} - -ALLOWED_UPPERCASE_FILES = { - "readme.md", "readme.txt", "readme.rst", - "changelog.md", "changelog.txt", "changelog.rst", - "license.md", "license.txt", "license.rst", "license", -} - - def validate_names(root): - + """This function validates the names of the + files against the schema.""" + windows_reserved = { + "con", "prn", "aux", "nul", + "lpt1","lpt2","lpt3","lpt4","lpt5","lpt6","lpt7","lpt8","lpt9", + "com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9" + } + allowed_uppercase_files = { + "readme.md", "readme.txt", "readme.rst", + "changelog.md", "changelog.txt", "changelog.rst", + "license.md", "license.txt", "license.rst", "license", + } errors = [] warnings = [] - - for dirpath, dirnames, filenames in os.walk(root): - - for name in dirnames + filenames: - + for dirpath, dir_names, file_names in os.walk(root): + for name in dir_names + file_names: full = os.path.join(dirpath, name) base = os.path.splitext(name)[0].lower() name_lower = name.lower() @@ -409,14 +395,15 @@ def validate_names(root): if " " in name: errors.append(f"Space in name: {full}") - # Allow certain conventional files + # Allow some conventional files if any(c.isupper() for c in name): - if name_lower not in ALLOWED_UPPERCASE_FILES: + if name_lower not in allowed_uppercase_files: errors.append(f"Uppercase in name: {full}") # Invalid chars - if name_lower not in ALLOWED_UPPERCASE_FILES: # ← YENİ SATIR - if not NAME_PATTERN.match(name): + name_pattern = re.compile(r'^[a-z0-9._-]+$') + if name_lower not in allowed_uppercase_files: + if not name_pattern.match(name): errors.append(f"Invalid characters: {full}") # Hidden files @@ -424,7 +411,7 @@ def validate_names(root): warnings.append(f"Hidden file/folder: {full}") # Windows reserved - if base in WINDOWS_RESERVED: + if base in windows_reserved: errors.append(f"Windows reserved name: {full}") if len(name) > 150: @@ -445,16 +432,6 @@ def validate_folder_structure(folder_path): for e in name_errors: logger.error(e) return False - # schema check - def path_to_dict(path): - d = {} - for x in os.listdir(path): - p = os.path.join(path, x) - if os.path.isdir(p): - d[x] = path_to_dict(p) - else: - d[x] = "file" - return d required_files = [] try: @@ -463,9 +440,13 @@ def path_to_dict(path): encoding="utf-8" ) as f: schema = json.load(f) - tree = path_to_dict(folder_path) + folder_tree = { + f: "file" if os.path.isfile(os.path.join(folder_path, f)) else "directory" + for f in os.listdir(folder_path) + } + required_files.extend(schema["required"]) - validate(instance=tree, schema=schema) + validate(instance=folder_tree, schema=schema) logger.info("Folder structure matches schema") except ValidationError as e: @@ -484,15 +465,14 @@ def path_to_dict(path): return False logger.info("Found required file: %s", f) - dd_path = os.path.join(folder_path, "dataset_description.json") - # dataset_description.json - logger.info("Validating dataset_description.json...") + dd_path = os.path.join(folder_path, "dataset_description.json") try: with open(dd_path, encoding="utf-8") as f: dd_data = json.load(f) + logger.info("Validating dataset_description.json...") if validate_dataset_description(dd_data): - logger.success("dataset_description.json is valid") + logger.success("%s is valid", dd_path) else: logger.error("dataset_description.json failed validation") all_valid = False @@ -505,21 +485,22 @@ def path_to_dict(path): # study_description s_description_path = os.path.join(folder_path, "study_description.json") - logger.info("Validating %s...", s_description_path) - try: - with open(s_description_path, encoding="utf-8") as f: - study_data = json.load(f) - if validate_study_description(study_data): - logger.success("%s is valid", s_description_path) - else: - logger.error("%s failed validation", s_description_path) + if os.path.isfile(s_description_path): + logger.info("Validating %s...", s_description_path) + try: + with open(s_description_path, encoding="utf-8") as f: + study_data = json.load(f) + if validate_study_description(study_data): + logger.success("%s is valid", s_description_path) + else: + logger.error("%s failed validation", s_description_path) + all_valid = False + except json.JSONDecodeError as e: + logger.error("%s is not valid JSON: %s", s_description_path, e) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", s_description_path, e) all_valid = False - except json.JSONDecodeError as e: - logger.error("%s is not valid JSON: %s", s_description_path, e) - all_valid = False - except Exception as e: - logger.error("Error reading %s: %s", s_description_path, e) - all_valid = False # readme readme_path = os.path.join(folder_path, "readme.md") @@ -539,7 +520,6 @@ def path_to_dict(path): current_value = [] elif current_key: current_value.append(stripped) - if current_key: readme_data[current_key] = "\n".join(current_value).strip() @@ -552,52 +532,68 @@ def path_to_dict(path): logger.error("Error reading %s: %s", readme_path, e) all_valid = False - # changelog — no dedicated validator, just check extension - if os.path.join(folder_path, "readme.md"): - logger.info("Found changelog file: %s", readme_path) + # changelog + all_changelog_paths = next((file for file in files if file.lower() == "changelog.md"), None) + logger.info("Validating %s...", all_changelog_paths) + changelog_path = os.path.join(folder_path, all_changelog_paths) + try: + if not changelog_path: + logger.error("Missing required file: changelog.md") + return False + except Exception as e: + logger.error("Error reading %s: %s", changelog_path, e) + all_valid = False + logger.success("%s is valid", changelog_path) # license - all_license_paths = next((f for f in files if f.lower() == "license.txt"), None) + all_license_paths = next((file for file in files if file.lower() == "license.txt"), None) license_path = os.path.join(folder_path, all_license_paths) try: if not license_path: logger.error("Missing required file: license.txt") return False - logger.info("Validating license in %s...", license_path) - + logger.info("Validating %s...", license_path) with open(license_path, encoding="utf-8") as f: license_text = f.read().strip() if validate_license(license_text): - logger.success("%s has a valid license identifier", license_path) + logger.success("%s is valid", license_path) else: logger.error("%s has an invalid license identifier: '%s'", license_path, license_text) all_valid = False except Exception as e: logger.error("Error reading %s: %s", license_path, e) all_valid = False + # participants files - for dirpath, _, filenames in os.walk(folder_path): - for f_name in filenames: - if f_name.lower().startswith("participants") and f_name.lower().endswith(".tsv"): - file_path = os.path.join(dirpath, f_name) - logger.info("Validating participants file: %s...", file_path) - try: - with open(file_path, encoding="utf-8") as f: - reader = csv.DictReader(f, delimiter="\t") - participants_data = list(reader) - if validate_participants(participants_data): - logger.success("%s is valid", file_path) - else: - logger.error("%s failed validation", file_path) - all_valid = False - except Exception as e: - logger.error("Error reading %s: %s", file_path, e) - all_valid = False + participant_path = os.path.join(folder_path, "participants.tsv") + logger.info("Validating participants file: %s...", participant_path) + try: + with open(participant_path, encoding="utf-8") as f: + reader = csv.DictReader(f, delimiter="\t") + participants_data = list(reader) + if validate_participants(participants_data): + logger.success("%s is valid", participant_path) + else: + logger.error("%s failed validation", participant_path) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", participant_path, e) + all_valid = False if all_valid: logger.success("All files fully validated") else: logger.error("Validation completed with errors") - return all_valid + # participant.json + participant_json_path = os.path.join(folder_path, "participant.json") + logger.info("Validating %s...", participant_json_path) + try: + if not participant_json_path: + logger.error("Missing required file: participant.json") + return False + logger.success("%s is valid", participant_json_path) + except Exception as e: + logger.error("Error reading %s: %s", participant_json_path, e) + all_valid = False -validate_folder_structure("tests") \ No newline at end of file + return all_valid From 9ce540e471662543d7e45ee457b3fc538280c232 Mon Sep 17 00:00:00 2001 From: aydawka Date: Wed, 4 Mar 2026 21:55:48 -0800 Subject: [PATCH 08/17] chore: update version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d97665f..2649e46 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "pyfairdatatools" -version = "1.2.0b12" +version = "1.2.0b13" description = "Tools for AI-READI" packages = [{ include = "pyfairdatatools" }] From 89e27101f2ad1538ec287e02a9e223acd561179c Mon Sep 17 00:00:00 2001 From: aydawka Date: Wed, 4 Mar 2026 23:31:35 -0800 Subject: [PATCH 09/17] fix: dd validation --- pyfairdatatools/validate.py | 43 ++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/pyfairdatatools/validate.py b/pyfairdatatools/validate.py index 06a7955..e56c3cf 100644 --- a/pyfairdatatools/validate.py +++ b/pyfairdatatools/validate.py @@ -276,16 +276,12 @@ def validate_participants(data): # Allow person_id as an alternative to participant_id if "items" in schema and "required" in schema["items"]: - if "participant_id" in schema["items"]["properties"]: - schema["items"]["properties"]["participant_id"].pop("pattern", None) if "person_id" not in schema["items"]["properties"]: schema["items"]["properties"]["person_id"] = schema["items"]["properties"]["participant_id"].copy() for participant in data: if "person_id" in participant and "participant_id" not in participant: participant["participant_id"] = participant["person_id"] - if "age" in participant and participant["age"]: - participant["age"] = float(participant["age"]) try: validate(instance=data, schema=schema) @@ -564,36 +560,35 @@ def validate_folder_structure(folder_path): logger.error("Error reading %s: %s", license_path, e) all_valid = False - # participants files - participant_path = os.path.join(folder_path, "participants.tsv") - logger.info("Validating participants file: %s...", participant_path) + # participants json + participant_json_path = os.path.join(folder_path, "participants.json") + logger.info("Validating participants file: %s...", participant_json_path) try: - with open(participant_path, encoding="utf-8") as f: - reader = csv.DictReader(f, delimiter="\t") - participants_data = list(reader) + with open(participant_json_path, encoding="utf-8") as f: + participants_data = json.load(f) if validate_participants(participants_data): - logger.success("%s is valid", participant_path) + logger.success("%s is valid", participant_json_path) else: - logger.error("%s failed validation", participant_path) + logger.error("%s failed validation", participant_json_path) all_valid = False except Exception as e: - logger.error("Error reading %s: %s", participant_path, e) + logger.error("Error reading %s: %s", participant_json_path, e) all_valid = False - if all_valid: - logger.success("All files fully validated") - else: - logger.error("Validation completed with errors") - # participant.json - participant_json_path = os.path.join(folder_path, "participant.json") - logger.info("Validating %s...", participant_json_path) + # participant.tsv + participant_tsv_path = os.path.join(folder_path, "participants.tsv") + logger.info("Validating %s...", participant_tsv_path) try: - if not participant_json_path: - logger.error("Missing required file: participant.json") + if not participant_tsv_path: + logger.error("Missing required file: participant.tsv") return False - logger.success("%s is valid", participant_json_path) + logger.success("%s is valid", participant_tsv_path) except Exception as e: - logger.error("Error reading %s: %s", participant_json_path, e) + logger.error("Error reading %s: %s", participant_tsv_path, e) all_valid = False + if all_valid: + logger.success("All files fully validated") + else: + logger.error("Validation completed with errors") return all_valid From 3a2895357ca5db740cd43b2e663aa2f2f60c9eed Mon Sep 17 00:00:00 2001 From: aydawka Date: Wed, 28 Jan 2026 09:42:02 -0800 Subject: [PATCH 10/17] fix: docs pages --- docs/about/changelog.md | 266 ++++-------------- docs/about/contributing.md | 16 +- docs/stylesheets/extra.css | 4 + mkdocs.yml | 6 +- .../schemas/folder_structure.schema.json | 134 +-------- 5 files changed, 83 insertions(+), 343 deletions(-) create mode 100644 docs/stylesheets/extra.css diff --git a/docs/about/changelog.md b/docs/about/changelog.md index 6f91a8e..4ad4c89 100644 --- a/docs/about/changelog.md +++ b/docs/about/changelog.md @@ -1,229 +1,85 @@ # Changelog -All notable changes to this project will be documented in this file. - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## [Unreleased] - -### Added - -- v1.1 Italian translation. - -## [1.1.1] - 2023-03-05 - -### Added - -- Arabic translation (#444). -- v1.1 French translation. -- v1.1 Dutch translation (#371). -- v1.1 Russian translation (#410). -- v1.1 Japanese translation (#363). -- v1.1 Norwegian Bokmål translation (#383). -- v1.1 "Inconsistent Changes" Turkish translation (#347). -- Default to most recent versions available for each languages -- Display count of available translations (26 to date!) -- Centralize all links into `/data/links.json` so they can be updated easily - -### Fixed - -- Improve French translation (#377). -- Improve id-ID translation (#416). -- Improve Persian translation (#457). -- Improve Russian translation (#408). -- Improve Swedish title (#419). -- Improve zh-CN translation (#359). -- Improve French translation (#357). -- Improve zh-TW translation (#360, #355). -- Improve Spanish (es-ES) transltion (#362). -- Foldout menu in Dutch translation (#371). -- Missing periods at the end of each change (#451). -- Fix missing logo in 1.1 pages -- Display notice when translation isn't for most recent version -- Various broken links, page versions, and indentations. +## [1.0.3](https://github.com/AI-READI/pyfairdatatools/compare/v1.0.2...v1.0.3) (2025-11-26) -### Changed -- Upgrade dependencies: Ruby 3.2.1, Middleman, etc. +### Bug Fixes -### Removed +* remove matrix from ci workflow ([#23](https://github.com/AI-READI/pyfairdatatools/issues/23)) ([44df713](https://github.com/AI-READI/pyfairdatatools/commit/44df713f25add43cedfb3eba4e9f20b32d4823cf)) -- Unused normalize.css file -- Identical links assigned in each translation file -- Duplicate index file for the english version +## [1.0.2](https://github.com/AI-READI/pyfairdatatools/compare/v1.0.1...v1.0.2) (2023-11-16) -## [1.1.0] - 2019-02-15 -### Added +### Bug Fixes -- Danish translation (#297). -- Georgian translation from (#337). -- Changelog inconsistency section in Bad Practices. +* add affiliationValue to datasetDescription ([63b7946](https://github.com/AI-READI/pyfairdatatools/commit/63b7946065dac6cf5984829bc5b02689049cb750)) -### Fixed +## 1.0.1 (2023-11-16) -- Italian translation (#332). -- Indonesian translation (#336). -## [1.0.0] - 2017-06-20 +### ⚠ BREAKING CHANGES -### Added +* v1 +* v1 +* v1 +* update package name to pyfairdatatools -- New visual identity by [@tylerfortune8](https://github.com/tylerfortune8). -- Version navigation. -- Links to latest released version in previous versions. -- "Why keep a changelog?" section. -- "Who needs a changelog?" section. -- "How do I make a changelog?" section. -- "Frequently Asked Questions" section. -- New "Guiding Principles" sub-section to "How do I make a changelog?". -- Simplified and Traditional Chinese translations from [@tianshuo](https://github.com/tianshuo). -- German translation from [@mpbzh](https://github.com/mpbzh) & [@Art4](https://github.com/Art4). -- Italian translation from [@azkidenz](https://github.com/azkidenz). -- Swedish translation from [@magol](https://github.com/magol). -- Turkish translation from [@emreerkan](https://github.com/emreerkan). -- French translation from [@zapashcanon](https://github.com/zapashcanon). -- Brazilian Portuguese translation from [@Webysther](https://github.com/Webysther). -- Polish translation from [@amielucha](https://github.com/amielucha) & [@m-aciek](https://github.com/m-aciek). -- Russian translation from [@aishek](https://github.com/aishek). -- Czech translation from [@h4vry](https://github.com/h4vry). -- Slovak translation from [@jkostolansky](https://github.com/jkostolansky). -- Korean translation from [@pierceh89](https://github.com/pierceh89). -- Croatian translation from [@porx](https://github.com/porx). -- Persian translation from [@Hameds](https://github.com/Hameds). -- Ukrainian translation from [@osadchyi-s](https://github.com/osadchyi-s). +### release -### Changed +* v1 ([386a2d3](https://github.com/AI-READI/pyfairdatatools/commit/386a2d3568a9be9a6be807d03666950f1f2b7ff3)) +* v1 ([1dacdd1](https://github.com/AI-READI/pyfairdatatools/commit/1dacdd17f36218285578b6109c39956c9aa1f2da)) +* v1 ([078c513](https://github.com/AI-READI/pyfairdatatools/commit/078c513eaaf7346e6eff1465bb0d8fa645e7b34f)) -- Start using "changelog" over "change log" since it's the common usage. -- Start versioning based on the current English version at 0.3.0 to help - translation authors keep things up-to-date. -- Rewrite "What makes unicorns cry?" section. -- Rewrite "Ignoring Deprecations" sub-section to clarify the ideal - scenario. -- Improve "Commit log diffs" sub-section to further argument against - them. -- Merge "Why can’t people just use a git log diff?" with "Commit log - diffs". -- Fix typos in Simplified Chinese and Traditional Chinese translations. -- Fix typos in Brazilian Portuguese translation. -- Fix typos in Turkish translation. -- Fix typos in Czech translation. -- Fix typos in Swedish translation. -- Improve phrasing in French translation. -- Fix phrasing and spelling in German translation. -### Removed +### Features -- Section about "changelog" vs "CHANGELOG". +* add additional data types ([d3d4b21](https://github.com/AI-READI/pyfairdatatools/commit/d3d4b21db3c6ef558935054e95986e1dc8b09cdf)) +* add additional validation for dataset description ([0830308](https://github.com/AI-READI/pyfairdatatools/commit/0830308b248310bd569e37d48147748eeadefebc)) +* add base study_description schema ([8ca9a94](https://github.com/AI-READI/pyfairdatatools/commit/8ca9a942a4c6aad77aabe87f2f3c3ec66c07c701)) +* add changelog generate function ([e448f41](https://github.com/AI-READI/pyfairdatatools/commit/e448f41cee39fec4b8778549e8aadebe3bfc1100)) +* add dataset_description validation schema ([3f23381](https://github.com/AI-READI/pyfairdatatools/commit/3f233817ff23f5172ee96ea65d9c9c25a1e0c8c2)) +* add file path validation to generate functions ([2e9c39a](https://github.com/AI-READI/pyfairdatatools/commit/2e9c39a2d0960ef8de7d56aa42d85526768eb00f)) +* add functions for study description.json ([81693a9](https://github.com/AI-READI/pyfairdatatools/commit/81693a92d5b78498bdeb065bcee938155d50609b)) +* add generate function for dataset_description ([decda23](https://github.com/AI-READI/pyfairdatatools/commit/decda23757b0c955812bb1d55c2b20c90d41bd1c)) +* add generate function for study_descripition ([7759e0f](https://github.com/AI-READI/pyfairdatatools/commit/7759e0f9db0366f6ede6ea8a94eb10598dd630db)) +* add license generation ([acf22df](https://github.com/AI-READI/pyfairdatatools/commit/acf22df24afa6fde2b01f0f2f95167ffc68e3b98)) +* add links to schema ids ([bdc905e](https://github.com/AI-READI/pyfairdatatools/commit/bdc905e6bfebc4f32a8867670b9dc0901a262be3)) +* add more valid language codes ([022fdee](https://github.com/AI-READI/pyfairdatatools/commit/022fdee8e521704dc9fe8147000d1a7c5285c73c)) +* add participants validate function ([d7a1bc2](https://github.com/AI-READI/pyfairdatatools/commit/d7a1bc2d6008217f34cf76644eb786fb2a64adb2)) +* add readme generate and validate functions ([4b4b076](https://github.com/AI-READI/pyfairdatatools/commit/4b4b0760ffac58577055ffafd3132b23f8d1e8ff)) +* add sample validate schema ([34464fe](https://github.com/AI-READI/pyfairdatatools/commit/34464fefe6b6597606228fa4cce1811f01ec6566)) +* add support for folder level validation ([8de70c4](https://github.com/AI-READI/pyfairdatatools/commit/8de70c44cb653310bd10ec0bb3334a35c4569ea3)) +* add validation for langauges ([a882fee](https://github.com/AI-READI/pyfairdatatools/commit/a882feeae6580743810764c3a853ed650b11f628)) +* update dataset_description schema ([20fd4d2](https://github.com/AI-READI/pyfairdatatools/commit/20fd4d23741eb7c6810f67d5d6d9819a6fa8d233)) +* update language list for BCP 47 ([17ec74b](https://github.com/AI-READI/pyfairdatatools/commit/17ec74bfd78a611c1cec91092b0efca017ab5862)) +* update package name to pyfairdatatools ([6b8fe01](https://github.com/AI-READI/pyfairdatatools/commit/6b8fe0140f11692bca1c0c7fe2de3f096b4b245d)) -## [0.3.0] - 2015-12-03 -### Added +### Bug Fixes -- RU translation from [@aishek](https://github.com/aishek). -- pt-BR translation from [@tallesl](https://github.com/tallesl). -- es-ES translation from [@ZeliosAriex](https://github.com/ZeliosAriex). +* add fields to validate study description ([6834f55](https://github.com/AI-READI/pyfairdatatools/commit/6834f55ac8dd5dc7e4396b59caed3d56c0c48f9d)) +* add fix for urlllib3 ([dbe21ed](https://github.com/AI-READI/pyfairdatatools/commit/dbe21edbcf4aeeebc7345947376e5161aeb57133)) +* add typing for empty collection ([62a5444](https://github.com/AI-READI/pyfairdatatools/commit/62a544497c52568786d87b480c57296e5f56bf05)) +* update imports ([4109423](https://github.com/AI-READI/pyfairdatatools/commit/4109423b749fb60df3510802dcba4e81100daa80)) +* update key name to lowercase ([607e815](https://github.com/AI-READI/pyfairdatatools/commit/607e815710efac5b7aca2d3bc40e37ae5949358a)) +* update tests ([2bec288](https://github.com/AI-READI/pyfairdatatools/commit/2bec2887ddd4559f900f37b628e0f56811b8fbd5)) +* update versions ([49af758](https://github.com/AI-READI/pyfairdatatools/commit/49af7586de946476b8395e1d98470a0775654c4b)) +* update versions ([127c982](https://github.com/AI-READI/pyfairdatatools/commit/127c98266fcc088ff3cc8334c8e0d901bac99d81)) -## [0.2.0] - 2015-10-06 -### Changed +### Documentation -- Remove exclusionary mentions of "open source" since this project can - benefit both "open" and "closed" source projects equally. - -## [0.1.0] - 2015-10-06 - -### Added - -- Answer "Should you ever rewrite a change log?". - -### Changed - -- Improve argument against commit logs. -- Start following [SemVer](https://semver.org) properly. - -## [0.0.8] - 2015-02-17 - -### Changed - -- Update year to match in every README example. -- Reluctantly stop making fun of Brits only, since most of the world - writes dates in a strange way. - -### Fixed - -- Fix typos in recent README changes. -- Update outdated unreleased diff link. - -## [0.0.7] - 2015-02-16 - -### Added - -- Link, and make it obvious that date format is ISO 8601. - -### Changed - -- Clarified the section on "Is there a standard change log format?". - -### Fixed - -- Fix Markdown links to tag comparison URL with footnote-style links. - -## [0.0.6] - 2014-12-12 - -### Added - -- README section on "yanked" releases. - -## [0.0.5] - 2014-08-09 - -### Added - -- Markdown links to version tags on release headings. -- Unreleased section to gather unreleased changes and encourage note - keeping prior to releases. - -## [0.0.4] - 2014-08-09 - -### Added - -- Better explanation of the difference between the file ("CHANGELOG") - and its function "the change log". - -### Changed - -- Refer to a "change log" instead of a "CHANGELOG" throughout the site - to differentiate between the file and the purpose of the file — the - logging of changes. - -### Removed - -- Remove empty sections from CHANGELOG, they occupy too much space and - create too much noise in the file. People will have to assume that the - missing sections were intentionally left out because they contained no - notable changes. - -## [0.0.3] - 2014-08-09 - -### Added - -- "Why should I care?" section mentioning The Changelog podcast. - -## [0.0.2] - 2014-07-10 - -### Added - -- Explanation of the recommended reverse chronological release ordering. - -## [0.0.1] - 2014-05-31 - -### Added - -- This CHANGELOG file to hopefully serve as an evolving example of a - standardized open source project CHANGELOG. -- CNAME file to enable GitHub Pages custom domain. -- README now contains answers to common questions about CHANGELOGs. -- Good examples and basic guidelines, including proper date formatting. -- Counter-examples: "What makes unicorns cry?". +* add base content ([c9e9d9d](https://github.com/AI-READI/pyfairdatatools/commit/c9e9d9d2ea70d10b5255116876384af867f9b1dd)) +* add base text ([aeca7b3](https://github.com/AI-READI/pyfairdatatools/commit/aeca7b35cf7f4da5ebcc0b1b001c27efaabd2d98)) +* add docs for dataset_description ([8be3d27](https://github.com/AI-READI/pyfairdatatools/commit/8be3d275e23bb1e69944d6c4b2a52f12e1504b4d)) +* add docs for generate functions ([0249d42](https://github.com/AI-READI/pyfairdatatools/commit/0249d425e65787ea8c5ca32f0b1aaa5ccf49c3fb)) +* add validate readme sections ([6f2f773](https://github.com/AI-READI/pyfairdatatools/commit/6f2f773ec049f059a50adce5495ce08069de06ab)) +* fix build ([22581c9](https://github.com/AI-READI/pyfairdatatools/commit/22581c97869d0bbc065c8f84d8c6b231c0bf0acd)) +* fix build ([c2b9878](https://github.com/AI-READI/pyfairdatatools/commit/c2b9878edb3fb2c84389f3e4841a79cd3bd7cdbe)) +* fix links ([cbdabfd](https://github.com/AI-READI/pyfairdatatools/commit/cbdabfd95d14bf20c31379f5f781fb72e6dba4e1)) +* update for study_description ([e7e63b2](https://github.com/AI-READI/pyfairdatatools/commit/e7e63b2e0314e4e5e49355e35390376ae4476a50)) +* update publish instructions ([dbd0969](https://github.com/AI-READI/pyfairdatatools/commit/dbd09696c94d35ebcfdaf60d12c77148b7acbf0d)) +* update readme with template ([cf0d241](https://github.com/AI-READI/pyfairdatatools/commit/cf0d241c14ed913ba8fad90362134e39b3ffa603)) +* update text ([84b5a1f](https://github.com/AI-READI/pyfairdatatools/commit/84b5a1fab2f97de8732dee3275cf1d24b2a9f41b)) +* update text ([cc85509](https://github.com/AI-READI/pyfairdatatools/commit/cc855094e612f182bcd67710b94055ab84b9511c)) +* update wording ([b5f5547](https://github.com/AI-READI/pyfairdatatools/commit/b5f5547cd5a0d1425fe7564fd32f678bda20e0d8)) \ No newline at end of file diff --git a/docs/about/contributing.md b/docs/about/contributing.md index d17b7ad..8c0730f 100644 --- a/docs/about/contributing.md +++ b/docs/about/contributing.md @@ -21,11 +21,11 @@ And if you like the project, but just don't have time to contribute, that's fine > If you want to ask a question, we assume that you have read the available [Documentation](https://aireadi.org). -Before you ask a question, it is best to search for existing [Issues](https://github.com/aireadi/pyfairdatatools/issues) that might help you. In case you have found a suitable issue and still need clarification, you can write your question in this issue. It is also advisable to search the internet for answers first. +Before you ask a question, it is best to search for existing [Issues](https://github.com/AI-READI/pyfairdatatools/issues) that might help you. In case you have found a suitable issue and still need clarification, you can write your question in this issue. It is also advisable to search the internet for answers first. If you then still feel the need to ask a question and need clarification, we recommend the following: -- Open an [Issue](https://github.com/aireadi/pyfairdatatools/issues/new). +- Open an [Issue](https://github.com/AI-READI/pyfairdatatools/issues/new). - Provide as much context as you can about what you're running into. - Provide project and platform versions (nodejs, npm, etc), depending on what seems relevant. @@ -34,7 +34,7 @@ We will then take care of the issue as soon as possible. ## I Want To Contribute > ### Legal Notice -> + > When contributing to this project, you must agree that you have authored 100% of the content, that you have the necessary rights to the content and that the content you contribute may be provided under the project license. ### Reporting Bugs @@ -45,7 +45,7 @@ A good bug report shouldn't leave others needing to chase you up for more inform - Make sure that you are using the latest version. - Determine if your bug is really a bug and not an error on your side e.g. using incompatible environment components/versions (Make sure that you have read the [documentation](https://aireadi.org). If you are looking for support, you might want to check [this section](#i-have-a-question)). -- To see if other users have experienced (and potentially already solved) the same issue you are having, check if there is not already a bug report existing for your bug or error in the [bug tracker](https://github.com/fairdataihub/pyfairdatatools/issues?q=label%3Abug). +- To see if other users have experienced (and potentially already solved) the same issue you are having, check if there is not already a bug report existing for your bug or error in the [bug tracker](https://github.com/AI-READI/pyfairdatatools/issues?q=label%3Abug). - Also make sure to search the internet (including Stack Overflow) to see if users outside of the GitHub community have discussed the issue. - Collect information about the bug: - Stack trace (Traceback) @@ -58,7 +58,7 @@ A good bug report shouldn't leave others needing to chase you up for more inform We use GitHub issues to track bugs and errors. If you run into an issue with the project: -- Open an [Issue](https://github.com/fairdataihub/pyfairdatatools/issues/new). (Since we can't be sure at this point whether it is a bug or not, we ask you not to talk about a bug yet and not to label the issue.) +- Open an [Issue](https://github.com/AI-READI/pyfairdatatools/issues/new). (Since we can't be sure at this point whether it is a bug or not, we ask you not to talk about a bug yet and not to label the issue.) - Explain the behavior you would expect and the actual behavior. - Please provide as much context as possible and describe the _reproduction steps_ that someone else can follow to recreate the issue on their own. This usually includes your code. For good bug reports you should isolate the problem and create a reduced test case. - Provide the information you collected in the previous section. @@ -80,15 +80,15 @@ This section guides you through submitting an enhancement suggestion for pyfaird #### Before Submitting an Enhancement - Make sure that you are using the latest version. -- Read the [documentation](https://aireadi.org) carefully and find out if the functionality is already covered, maybe by an individual configuration. -- Perform a [search](https://github.com/fairdataihub/pyfairdatatools/issues) to see if the enhancement has already been suggested. If it has, add a comment to the existing issue instead of opening a new one. +- Read the [documentation](https://docs.aireadi.org) carefully and find out if the functionality is already covered, maybe by an individual configuration. +- Perform a [search](https://github.com/AI-READI/pyfairdatatools/issues) to see if the enhancement has already been suggested. If it has, add a comment to the existing issue instead of opening a new one. - Find out whether your idea fits with the scope and aims of the project. It's up to you to make a strong case to convince the project's developers of the merits of this feature. Keep in mind that we want features that will be useful to the majority of our users and not just a small subset. If you're just targeting a minority of users, consider writing an add-on/plugin library. #### How Do I Submit a Good Enhancement Suggestion? -Enhancement suggestions are tracked as [GitHub issues](https://github.com/fairdataihub/fairdatatools/issues). +Enhancement suggestions are tracked as [GitHub issues](https://github.com/AI-READI/pyfairdatatools/issues/new). - Use a **clear and descriptive title** for the issue to identify the suggestion. - Provide a **step-by-step description of the suggested enhancement** in as many details as possible. diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css new file mode 100644 index 0000000..fd4a286 --- /dev/null +++ b/docs/stylesheets/extra.css @@ -0,0 +1,4 @@ +/* Expand the main content area width */ +.wy-nav-content { +max-width: 1200px !important; /* Change this number to your liking */ width: 100% !important; +} diff --git a/mkdocs.yml b/mkdocs.yml index 715731d..7b1341d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -8,16 +8,20 @@ edit_uri: https://github.com/AI-READI/pyfairdatatools/edit/main/docs dev_addr: 127.0.0.1:3000 theme: readthedocs +extra_css: + - stylesheets/extra.css markdown_extensions: - codehilite - admonition - toc: - permalink: ' + permalink: "" + nav: - Home: index.md - Modules: + - Required Paths: modules/required-paths.md - Validate: modules/validate.md - Generate: modules/generate.md - About: diff --git a/pyfairdatatools/schemas/folder_structure.schema.json b/pyfairdatatools/schemas/folder_structure.schema.json index 1a45f92..e3812d5 100644 --- a/pyfairdatatools/schemas/folder_structure.schema.json +++ b/pyfairdatatools/schemas/folder_structure.schema.json @@ -5,136 +5,12 @@ "description": "This is a schema for a folder structure that is used to store a dataset. It is based on the BIDS folder structure, but is not identical.", "type": "object", "properties": { - "activity_monitor": { - "README.md": { - "const": "file" - } - }, - "best_corrected_visual_acuity": { - "README.md": { - "const": "file" - } - }, - "blood_test": { - "README.md": { - "const": "file" - } - }, - "center_for_epidemiologic_studies_depression_scale": { - "README.md": { - "const": "file" - } - }, - "consent_form": { - "README.md": { - "const": "file" - } - }, - "continuous_glucose_monitoring": { - "README.md": { - "const": "file" - } - }, - "contrast_sensitivity_test": { - "README.md": { - "const": "file" - } - }, - "demographic_survey": { - "README.md": { - "const": "file" - } - }, - "diabetes_wellbeing_questionnaire": { - "README.md": { - "const": "file" - } - }, - "dietary_questionnaire": { - "README.md": { - "const": "file" - } - }, - "electrocardiogram": { - "README.md": { - "const": "file" - } - }, - "eye_fundus_photography": { - "README.md": { - "const": "file" - } - }, - "general_health_survey": { - "README.md": { - "const": "file" - } - }, - "medication_list": { - "README.md": { - "const": "file" - } - }, - "monofilament_foot_sensation_test": { - "README.md": { - "const": "file" - } - }, - "montreal_cognitive_assessment": { - "README.md": { - "const": "file" - } - }, - "optical_coherence_tomography": { - "README.md": { - "const": "file" - } - }, - "screening_questionnaire": { - "README.md": { - "const": "file" - } - }, - "sdsca": { - "README.md": { - "const": "file" - } - }, - "urine_test": { - "README.md": { - "const": "file" - } - }, - "vital_signs_measurement": { - "README.md": { - "const": "file" - } - }, - "CHANGELOG.md": { - "const": "file" - }, - "LICENSE.txt": { - "const": "file" - }, - "README.md": { - "const": "file" - }, - "dataset_description.json": { - "const": "file" - }, - "participants.json": { - "const": "file" - }, - "participants.tsv": { - "const": "file" - } + "dataset_description.json": { "type": "string" } }, + "additionalProperties": true, + "required": [ - "CHANGELOG.md", - "LICENSE.txt", - "README.md", - "dataset_description.json", - "participants.json", - "participants.tsv" + + "dataset_description.json" ] } From 31cc796e5cbe2f13c05be77f618434193d0c2222 Mon Sep 17 00:00:00 2001 From: aydawka Date: Tue, 10 Feb 2026 15:48:46 -0800 Subject: [PATCH 11/17] fix: fix schema --- .../schemas/folder_structure.schema.json | 134 +++++++++++++++++- 1 file changed, 129 insertions(+), 5 deletions(-) diff --git a/pyfairdatatools/schemas/folder_structure.schema.json b/pyfairdatatools/schemas/folder_structure.schema.json index e3812d5..1a45f92 100644 --- a/pyfairdatatools/schemas/folder_structure.schema.json +++ b/pyfairdatatools/schemas/folder_structure.schema.json @@ -5,12 +5,136 @@ "description": "This is a schema for a folder structure that is used to store a dataset. It is based on the BIDS folder structure, but is not identical.", "type": "object", "properties": { - "dataset_description.json": { "type": "string" } + "activity_monitor": { + "README.md": { + "const": "file" + } + }, + "best_corrected_visual_acuity": { + "README.md": { + "const": "file" + } + }, + "blood_test": { + "README.md": { + "const": "file" + } + }, + "center_for_epidemiologic_studies_depression_scale": { + "README.md": { + "const": "file" + } + }, + "consent_form": { + "README.md": { + "const": "file" + } + }, + "continuous_glucose_monitoring": { + "README.md": { + "const": "file" + } + }, + "contrast_sensitivity_test": { + "README.md": { + "const": "file" + } + }, + "demographic_survey": { + "README.md": { + "const": "file" + } + }, + "diabetes_wellbeing_questionnaire": { + "README.md": { + "const": "file" + } + }, + "dietary_questionnaire": { + "README.md": { + "const": "file" + } + }, + "electrocardiogram": { + "README.md": { + "const": "file" + } + }, + "eye_fundus_photography": { + "README.md": { + "const": "file" + } + }, + "general_health_survey": { + "README.md": { + "const": "file" + } + }, + "medication_list": { + "README.md": { + "const": "file" + } + }, + "monofilament_foot_sensation_test": { + "README.md": { + "const": "file" + } + }, + "montreal_cognitive_assessment": { + "README.md": { + "const": "file" + } + }, + "optical_coherence_tomography": { + "README.md": { + "const": "file" + } + }, + "screening_questionnaire": { + "README.md": { + "const": "file" + } + }, + "sdsca": { + "README.md": { + "const": "file" + } + }, + "urine_test": { + "README.md": { + "const": "file" + } + }, + "vital_signs_measurement": { + "README.md": { + "const": "file" + } + }, + "CHANGELOG.md": { + "const": "file" + }, + "LICENSE.txt": { + "const": "file" + }, + "README.md": { + "const": "file" + }, + "dataset_description.json": { + "const": "file" + }, + "participants.json": { + "const": "file" + }, + "participants.tsv": { + "const": "file" + } }, - "additionalProperties": true, - "required": [ - - "dataset_description.json" + "CHANGELOG.md", + "LICENSE.txt", + "README.md", + "dataset_description.json", + "participants.json", + "participants.tsv" ] } From 1bba07ffb0453bf504ac3a91f67d5c0fd9d39a5e Mon Sep 17 00:00:00 2001 From: aydawka Date: Mon, 16 Feb 2026 23:47:18 -0800 Subject: [PATCH 12/17] fix: folder validation --- docs/modules/validate.md | 43 ++++ mkdocs.yml | 1 - pyfairdatatools/validate.py | 393 ++++++++++++++++++++++++++++++------ 3 files changed, 379 insertions(+), 58 deletions(-) diff --git a/docs/modules/validate.md b/docs/modules/validate.md index 5ee5de2..eedef51 100644 --- a/docs/modules/validate.md +++ b/docs/modules/validate.md @@ -16,6 +16,49 @@ The validation module provides a way to validate data against a set of rules. The following methods are available in the `validate` module. Each method is described in detail below. +### Validate Folder Structure + +You can call the `validate_folder_structure` method to validate the data needed to create a dataset_description file. + +#### Required or optional files + +Following files are required to have inside the directory path: + +- `dataset_description.json` +- `study_description.json` +- `README\[.md\|.rst\|.txt\]` +- `CITATION.cff` +- `CHANGELOG\[.md\|.rst\|.txt\]` +- `LICENSE\[.md\|.rst\|.txt\]` + +------------------------------------------------------------------------ + +## dataset_description.json Fields + +The file `dataset_description.json` is a JSON file describing the dataset. + + +#### Returns- + +| Field | Type | Description | +|----------|---------|----------------------------------------------------------| +| valid | Boolean | Returns `True` if the dataset is valid, `False` otherwise. | +| errors | List | List of validation errors (blocking issues). | +| warnings | List | List of validation warnings (non-blocking issues). | + +#### How to use + +```python +from pyfairdatatools import validate + +folder_path = "/dataset_path" + +output = validate.validate_folder_structure(folder_path=folder_path) + +print(output) # True + +``` + ### Validate Dataset Description You can call the `validate_dataset_description` method to validate the data needed to create a dataset_description file. diff --git a/mkdocs.yml b/mkdocs.yml index 7b1341d..1a098ab 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -21,7 +21,6 @@ markdown_extensions: nav: - Home: index.md - Modules: - - Required Paths: modules/required-paths.md - Validate: modules/validate.md - Generate: modules/generate.md - About: diff --git a/pyfairdatatools/validate.py b/pyfairdatatools/validate.py index f4309b3..9404fbc 100644 --- a/pyfairdatatools/validate.py +++ b/pyfairdatatools/validate.py @@ -3,8 +3,47 @@ import yaml from jsonschema import ValidationError, validate +import logging +import sys +import re +import csv -# from . import utils +SUCCESS = 25 +logging.addLevelName(SUCCESS, "SUCCESS") + +def success(self, message, *args, **kwargs): + if self.isEnabledFor(SUCCESS): + self._log(SUCCESS, message, args, **kwargs) + +logging.Logger.success = success + +# Color Formatter +class ColorFormatter(logging.Formatter): + + COLORS = { + logging.DEBUG: "\033[90m", # Gray + logging.INFO: "\033[94m", # Blue + logging.WARNING: "\033[93m", # Yellow + logging.ERROR: "\033[91m", # Red + logging.CRITICAL: "\033[95m", # Magenta + SUCCESS: "\033[92m", # Green + } + + RESET = "\033[0m" + + def format(self, record): + color = self.COLORS.get(record.levelno, self.RESET) + msg = super().format(record) + return f"{color}{msg}{self.RESET}" + +# Logger Setup +handler = logging.StreamHandler(sys.stdout) +handler.setFormatter(ColorFormatter("%(levelname)s ▶ %(message)s")) + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +logger.handlers.clear() +logger.addHandler(handler) def validate_dataset_description(data, verbose=False): # sourcery skip: extract-method @@ -229,13 +268,25 @@ def validate_participants(data): """ schema = {} - # Import the schema from the schemas folder with open( os.path.join(os.path.dirname(__file__), "schemas", "participants.schema.json"), encoding="utf-8", ) as f: schema = json.load(f) + # Allow person_id as an alternative to participant_id + if "items" in schema and "required" in schema["items"]: + if "participant_id" in schema["items"]["properties"]: + schema["items"]["properties"]["participant_id"].pop("pattern", None) + if "person_id" not in schema["items"]["properties"]: + schema["items"]["properties"]["person_id"] = schema["items"]["properties"]["participant_id"].copy() + + for participant in data: + if "person_id" in participant and "participant_id" not in participant: + participant["participant_id"] = participant["person_id"] + if "age" in participant and participant["age"]: + participant["age"] = float(participant["age"]) + try: validate(instance=data, schema=schema) @@ -252,61 +303,6 @@ def validate_participants(data): raise error -def validate_folder_structure(folder_path): - """Validate that a folder structure is valid. - - We do this by generating a json tree of the folder and file structure and - validating it against a schema. - This will allow us to expand the schema in the future to include more complex - folder structures. - Certain folder structures (ones inside of dynamic folders) will not be able to - be validated by this method. - - Args: - folder_path (str): The path to the folder to validate - Returns: - bool: True if the folder structure is valid, False otherwise - """ - - def path_to_dict(path): - d = {} # type: dict - - if not os.path.exists(path): - return d - - for x in os.listdir(path): - key = os.path.basename(x) - - if os.path.isdir(os.path.join(path, x)): - d[key] = path_to_dict(os.path.join(path, x)) - else: - d[key] = "file" - - return d - - # Import the schema from the schemas folder - with open( - os.path.join( - os.path.dirname(__file__), "schemas", "folder_structure.schema.json" - ), - encoding="utf-8", - ) as f: - schema = json.load(f) - - folder_structure_as_dict = path_to_dict(folder_path) - - try: - validate(instance=folder_structure_as_dict, schema=schema) - - return True - except ValidationError as e: - print(e.schema["error_msg"] if "error_msg" in e.schema else e.message) - return False - except Exception as error: - print(error) - raise error - - def validate_datatype_dictionary(data): """Validate a datatype description against the scheme. @@ -350,3 +346,286 @@ def validate_datatype_dictionary(data): except Exception as error: print(error) raise error + + +def validate_file_path(file_path, preexisting_file=False, writable=False): + """Validate a file path. Checks if the file exists, is a file, and is writable.""" + if file_path == "": + print("File path is empty.") + raise ValueError("Invalid input") + + if preexisting_file: + if not os.path.exists(file_path): + print("File path does not exist.") + raise FileNotFoundError("File not found") + + if not os.path.isfile(file_path): + print("File path is not a file.") + raise ValueError("Invalid input") + + if writable and not os.access(file_path, os.W_OK): # ← DÜZELTİLDİ: 'not' eklendi + print("File path is not writable.") + raise PermissionError("Permission denied") + + return True + + +REQUIRED_FILES = [ + "dataset_description.json", + "CHANGELOG.md", + "LICENSE.txt", + "README.md", + "participants.json", + "participants.tsv" +] + +OPTIONAL_FILES = [ + "readme.md", + "changelog.md", + "study_description.txt", + "license.txt", +] + +ALLOWED_TEXT_EXTS = {".md", ".txt", ".rst"} + +NAME_PATTERN = re.compile(r'^[a-z0-9._-]+$') + +WINDOWS_RESERVED = { + "con", "prn", "aux", "nul", + "com1","com2","com3","com4","com5","com6","com7","com8","com9", + "lpt1","lpt2","lpt3","lpt4","lpt5","lpt6","lpt7","lpt8","lpt9" +} + +ALLOWED_UPPERCASE_FILES = { + "readme.md", "readme.txt", "readme.rst", + "changelog.md", "changelog.txt", "changelog.rst", + "license.md", "license.txt", "license.rst", "license", +} + + +def validate_names(root): + + errors = [] + warnings = [] + + for dirpath, dirnames, filenames in os.walk(root): + + for name in dirnames + filenames: + + full = os.path.join(dirpath, name) + base = os.path.splitext(name)[0].lower() + name_lower = name.lower() + + # Spaces + if " " in name: + errors.append(f"Space in name: {full}") + + # Uppercase - but allow certain conventional files + if any(c.isupper() for c in name): + if name_lower not in ALLOWED_UPPERCASE_FILES: + errors.append(f"Uppercase in name: {full}") + + # Invalid chars - skip check for allowed uppercase files + if name_lower not in ALLOWED_UPPERCASE_FILES: # ← YENİ SATIR + if not NAME_PATTERN.match(name): + errors.append(f"Invalid characters: {full}") + + # Hidden files + if name.startswith("."): + warnings.append(f"Hidden file/folder: {full}") + + # Windows reserved + if base in WINDOWS_RESERVED: + errors.append(f"Windows reserved name: {full}") + + # Long name + if len(name) > 150: + warnings.append(f"Very long name: {full}") + + return errors, warnings + + +def validate_folder_structure(folder_path): + + logger.info("Starting folder structure validation...") + + if not os.path.isdir(folder_path): + logger.error("Folder not found: %s", folder_path) + return False + + name_errors, name_warnings = validate_names(folder_path) + + for w in name_warnings: + logger.warning(w) + + if name_errors: + for e in name_errors: + logger.error(e) + return False + + # SCHEMA CHECK + def path_to_dict(path): + d = {} + for x in os.listdir(path): + p = os.path.join(path, x) + if os.path.isdir(p): + d[x] = path_to_dict(p) + else: + d[x] = "file" + return d + + try: + with open( + os.path.join(os.path.dirname(__file__), "schemas", "folder_structure.schema.json"), + encoding="utf-8" + ) as f: + schema = json.load(f) + + tree = path_to_dict(folder_path) + validate(instance=tree, schema=schema) + logger.info("Folder structure matches schema") + + except ValidationError as e: + logger.error("FAILED: Folder structure invalid (%s)", e.message) + return False + except Exception as e: + logger.error("Schema error: %s", e) + return False + + # FILE VALIDATION + files = os.listdir(folder_path) + all_valid = True + + # ── Required files ────────────────────────────────────────────────────────── + for fname in REQUIRED_FILES: + if fname not in files: + logger.error("Missing required file: %s", fname) + return False + logger.info("Found required file: %s", fname) + + # ── dataset_description.json ─────────────────────────────────────────────── + dd_path = os.path.join(folder_path, "dataset_description.json") + logger.info("Validating dataset_description.json...") + try: + with open(dd_path, encoding="utf-8") as f: + dd_data = json.load(f) + if validate_dataset_description(dd_data): + logger.success("dataset_description.json is valid") + else: + logger.error("dataset_description.json failed validation") + all_valid = False + except json.JSONDecodeError as e: + logger.error("dataset_description.json is not valid JSON: %s", e) + all_valid = False + except Exception as e: # ← YENİ EKLENEN EXCEPTION HANDLER + logger.error("Error reading dataset_description.json: %s", e) + all_valid = False + + for fname in files: + name_lower = fname.lower() + file_path = os.path.join(folder_path, fname) + _, ext = os.path.splitext(fname) + ext = ext.lower() + + # readme + if name_lower.startswith("readme") and ext in ALLOWED_TEXT_EXTS: + logger.info("Validating %s...", fname) + try: + with open(file_path, encoding="utf-8") as f: + content = f.read() + + readme_data = {} + current_key = None + current_value = [] + + for line in content.split("\n"): + stripped = line.strip() + if stripped.startswith("#"): + if current_key: + readme_data[current_key] = "\n".join(current_value).strip() + current_key = stripped.lstrip("#").strip() + current_value = [] + elif current_key: + current_value.append(stripped) + + if current_key: + readme_data[current_key] = "\n".join(current_value).strip() + + if validate_readme(readme_data): + logger.success("%s is valid", fname) + else: + logger.error("%s failed validation", fname) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", fname, e) + all_valid = False + + # study_description + elif name_lower.startswith("study_description") and ext in ALLOWED_TEXT_EXTS: + logger.info("Validating %s...", fname) + try: + with open(file_path, encoding="utf-8") as f: + study_data = json.load(f) + if validate_study_description(study_data): + logger.success("%s is valid", fname) + else: + logger.error("%s failed validation", fname) + all_valid = False + except json.JSONDecodeError as e: + logger.error("%s is not valid JSON: %s", fname, e) + all_valid = False + except Exception as e: # ← YENİ EKLENEN EXCEPTION HANDLER + logger.error("Error reading %s: %s", fname, e) + all_valid = False + + # changelog — no dedicated validator, just check extension + elif name_lower.startswith("changelog"): + if ext not in ALLOWED_TEXT_EXTS: + logger.error("Invalid extension for %s", fname) + all_valid = False + else: + logger.info("Found changelog file: %s (no schema validator)", fname) + + # license + elif name_lower.startswith("license"): + logger.info("Validating license in %s...", fname) + try: + with open(file_path, encoding="utf-8") as f: + license_text = f.read().strip() + if validate_license(license_text): + logger.success("%s has a valid license identifier", fname) + else: + logger.error("%s has an invalid license identifier: '%s'", fname, license_text) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", fname, e) + all_valid = False + + # participants files (anywhere in the folder tree) ─────────────────────── + for dirpath, _, filenames in os.walk(folder_path): + for fname in filenames: + if fname.lower().startswith("participants") and fname.lower().endswith(".tsv"): + file_path = os.path.join(dirpath, fname) + logger.info("Validating participants file: %s...", file_path) + try: + with open(file_path, encoding="utf-8") as f: + reader = csv.DictReader(f, delimiter="\t") + participants_data = list(reader) + if validate_participants(participants_data): + logger.success("%s is valid", file_path) + else: + logger.error("%s failed validation", file_path) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", file_path, e) + all_valid = False + # ── DONE ─────────────────────────────────────────────────────────────────── + if all_valid: + logger.success("All files fully validated") + else: + logger.error("Validation completed with errors") + + return all_valid + + +validate_folder_structure("tests") \ No newline at end of file From a00bd5dc806721ca18d489153cba26159d320eca Mon Sep 17 00:00:00 2001 From: aydawka Date: Wed, 4 Mar 2026 16:43:05 -0800 Subject: [PATCH 13/17] fix: update css and docs --- docs/modules/validate.md | 5 ----- docs/stylesheets/extra.css | 4 ---- mkdocs.yml | 2 -- 3 files changed, 11 deletions(-) delete mode 100644 docs/stylesheets/extra.css diff --git a/docs/modules/validate.md b/docs/modules/validate.md index eedef51..4ec6c90 100644 --- a/docs/modules/validate.md +++ b/docs/modules/validate.md @@ -33,11 +33,6 @@ Following files are required to have inside the directory path: ------------------------------------------------------------------------ -## dataset_description.json Fields - -The file `dataset_description.json` is a JSON file describing the dataset. - - #### Returns- | Field | Type | Description | diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css deleted file mode 100644 index fd4a286..0000000 --- a/docs/stylesheets/extra.css +++ /dev/null @@ -1,4 +0,0 @@ -/* Expand the main content area width */ -.wy-nav-content { -max-width: 1200px !important; /* Change this number to your liking */ width: 100% !important; -} diff --git a/mkdocs.yml b/mkdocs.yml index 1a098ab..53b8202 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -8,8 +8,6 @@ edit_uri: https://github.com/AI-READI/pyfairdatatools/edit/main/docs dev_addr: 127.0.0.1:3000 theme: readthedocs -extra_css: - - stylesheets/extra.css markdown_extensions: - codehilite From 35a4ada4b446f840ab09520e04ea9d6e185a3581 Mon Sep 17 00:00:00 2001 From: aydawka Date: Wed, 4 Mar 2026 17:51:18 -0800 Subject: [PATCH 14/17] fix: update the validation path --- mkdocs.yml | 2 +- pyfairdatatools/validate.py | 200 ++++++++++++++++-------------------- 2 files changed, 87 insertions(+), 115 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index 53b8202..34da8e4 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -13,7 +13,7 @@ markdown_extensions: - codehilite - admonition - toc: - permalink: "" + permalink:  nav: diff --git a/pyfairdatatools/validate.py b/pyfairdatatools/validate.py index 9404fbc..aca353c 100644 --- a/pyfairdatatools/validate.py +++ b/pyfairdatatools/validate.py @@ -363,22 +363,13 @@ def validate_file_path(file_path, preexisting_file=False, writable=False): print("File path is not a file.") raise ValueError("Invalid input") - if writable and not os.access(file_path, os.W_OK): # ← DÜZELTİLDİ: 'not' eklendi + if writable and not os.access(file_path, os.W_OK): print("File path is not writable.") raise PermissionError("Permission denied") return True -REQUIRED_FILES = [ - "dataset_description.json", - "CHANGELOG.md", - "LICENSE.txt", - "README.md", - "participants.json", - "participants.tsv" -] - OPTIONAL_FILES = [ "readme.md", "changelog.md", @@ -386,8 +377,6 @@ def validate_file_path(file_path, preexisting_file=False, writable=False): "license.txt", ] -ALLOWED_TEXT_EXTS = {".md", ".txt", ".rst"} - NAME_PATTERN = re.compile(r'^[a-z0-9._-]+$') WINDOWS_RESERVED = { @@ -420,12 +409,12 @@ def validate_names(root): if " " in name: errors.append(f"Space in name: {full}") - # Uppercase - but allow certain conventional files + # Allow certain conventional files if any(c.isupper() for c in name): if name_lower not in ALLOWED_UPPERCASE_FILES: errors.append(f"Uppercase in name: {full}") - # Invalid chars - skip check for allowed uppercase files + # Invalid chars if name_lower not in ALLOWED_UPPERCASE_FILES: # ← YENİ SATIR if not NAME_PATTERN.match(name): errors.append(f"Invalid characters: {full}") @@ -438,7 +427,6 @@ def validate_names(root): if base in WINDOWS_RESERVED: errors.append(f"Windows reserved name: {full}") - # Long name if len(name) > 150: warnings.append(f"Very long name: {full}") @@ -446,24 +434,18 @@ def validate_names(root): def validate_folder_structure(folder_path): - logger.info("Starting folder structure validation...") - if not os.path.isdir(folder_path): - logger.error("Folder not found: %s", folder_path) + logger.error(f"Folder not found: {folder_path}", ) return False - name_errors, name_warnings = validate_names(folder_path) - for w in name_warnings: logger.warning(w) - if name_errors: for e in name_errors: logger.error(e) return False - - # SCHEMA CHECK + # schema check def path_to_dict(path): d = {} for x in os.listdir(path): @@ -474,14 +456,15 @@ def path_to_dict(path): d[x] = "file" return d + required_files = [] try: with open( os.path.join(os.path.dirname(__file__), "schemas", "folder_structure.schema.json"), encoding="utf-8" ) as f: schema = json.load(f) - tree = path_to_dict(folder_path) + required_files.extend(schema["required"]) validate(instance=tree, schema=schema) logger.info("Folder structure matches schema") @@ -491,20 +474,19 @@ def path_to_dict(path): except Exception as e: logger.error("Schema error: %s", e) return False - - # FILE VALIDATION files = os.listdir(folder_path) all_valid = True - # ── Required files ────────────────────────────────────────────────────────── - for fname in REQUIRED_FILES: - if fname not in files: - logger.error("Missing required file: %s", fname) + # Required files check + for f in required_files: + if f not in files: + logger.error("Missing required file: %s", f) return False - logger.info("Found required file: %s", fname) + logger.info("Found required file: %s", f) - # ── dataset_description.json ─────────────────────────────────────────────── dd_path = os.path.join(folder_path, "dataset_description.json") + + # dataset_description.json logger.info("Validating dataset_description.json...") try: with open(dd_path, encoding="utf-8") as f: @@ -517,95 +499,87 @@ def path_to_dict(path): except json.JSONDecodeError as e: logger.error("dataset_description.json is not valid JSON: %s", e) all_valid = False - except Exception as e: # ← YENİ EKLENEN EXCEPTION HANDLER + except Exception as e: logger.error("Error reading dataset_description.json: %s", e) all_valid = False - for fname in files: - name_lower = fname.lower() - file_path = os.path.join(folder_path, fname) - _, ext = os.path.splitext(fname) - ext = ext.lower() - - # readme - if name_lower.startswith("readme") and ext in ALLOWED_TEXT_EXTS: - logger.info("Validating %s...", fname) - try: - with open(file_path, encoding="utf-8") as f: - content = f.read() - - readme_data = {} - current_key = None - current_value = [] - - for line in content.split("\n"): - stripped = line.strip() - if stripped.startswith("#"): - if current_key: - readme_data[current_key] = "\n".join(current_value).strip() - current_key = stripped.lstrip("#").strip() - current_value = [] - elif current_key: - current_value.append(stripped) + # study_description + s_description_path = os.path.join(folder_path, "study_description.json") + logger.info("Validating %s...", s_description_path) + try: + with open(s_description_path, encoding="utf-8") as f: + study_data = json.load(f) + if validate_study_description(study_data): + logger.success("%s is valid", s_description_path) + else: + logger.error("%s failed validation", s_description_path) + all_valid = False + except json.JSONDecodeError as e: + logger.error("%s is not valid JSON: %s", s_description_path, e) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", s_description_path, e) + all_valid = False + # readme + readme_path = os.path.join(folder_path, "readme.md") + logger.info("Validating %s...", readme_path) + try: + with open(readme_path, encoding="utf-8") as f: + content = f.read() + readme_data = {} + current_key = None + current_value = [] + for line in content.split("\n"): + stripped = line.strip() + if stripped.startswith("#"): if current_key: readme_data[current_key] = "\n".join(current_value).strip() + current_key = stripped.lstrip("#").strip() + current_value = [] + elif current_key: + current_value.append(stripped) - if validate_readme(readme_data): - logger.success("%s is valid", fname) - else: - logger.error("%s failed validation", fname) - all_valid = False - except Exception as e: - logger.error("Error reading %s: %s", fname, e) - all_valid = False - - # study_description - elif name_lower.startswith("study_description") and ext in ALLOWED_TEXT_EXTS: - logger.info("Validating %s...", fname) - try: - with open(file_path, encoding="utf-8") as f: - study_data = json.load(f) - if validate_study_description(study_data): - logger.success("%s is valid", fname) - else: - logger.error("%s failed validation", fname) - all_valid = False - except json.JSONDecodeError as e: - logger.error("%s is not valid JSON: %s", fname, e) - all_valid = False - except Exception as e: # ← YENİ EKLENEN EXCEPTION HANDLER - logger.error("Error reading %s: %s", fname, e) - all_valid = False - - # changelog — no dedicated validator, just check extension - elif name_lower.startswith("changelog"): - if ext not in ALLOWED_TEXT_EXTS: - logger.error("Invalid extension for %s", fname) - all_valid = False - else: - logger.info("Found changelog file: %s (no schema validator)", fname) - - # license - elif name_lower.startswith("license"): - logger.info("Validating license in %s...", fname) - try: - with open(file_path, encoding="utf-8") as f: - license_text = f.read().strip() - if validate_license(license_text): - logger.success("%s has a valid license identifier", fname) - else: - logger.error("%s has an invalid license identifier: '%s'", fname, license_text) - all_valid = False - except Exception as e: - logger.error("Error reading %s: %s", fname, e) - all_valid = False + if current_key: + readme_data[current_key] = "\n".join(current_value).strip() - # participants files (anywhere in the folder tree) ─────────────────────── + if validate_readme(readme_data): + logger.success("%s is valid", readme_path) + else: + logger.error("%s failed validation", readme_path) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", readme_path, e) + all_valid = False + + # changelog — no dedicated validator, just check extension + if os.path.join(folder_path, "readme.md"): + logger.info("Found changelog file: %s", readme_path) + + # license + all_license_paths = next((f for f in files if f.lower() == "license.txt"), None) + license_path = os.path.join(folder_path, all_license_paths) + try: + if not license_path: + logger.error("Missing required file: license.txt") + return False + logger.info("Validating license in %s...", license_path) + + with open(license_path, encoding="utf-8") as f: + license_text = f.read().strip() + if validate_license(license_text): + logger.success("%s has a valid license identifier", license_path) + else: + logger.error("%s has an invalid license identifier: '%s'", license_path, license_text) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", license_path, e) + all_valid = False + # participants files for dirpath, _, filenames in os.walk(folder_path): - for fname in filenames: - if fname.lower().startswith("participants") and fname.lower().endswith(".tsv"): - file_path = os.path.join(dirpath, fname) + for f_name in filenames: + if f_name.lower().startswith("participants") and f_name.lower().endswith(".tsv"): + file_path = os.path.join(dirpath, f_name) logger.info("Validating participants file: %s...", file_path) try: with open(file_path, encoding="utf-8") as f: @@ -619,12 +593,10 @@ def path_to_dict(path): except Exception as e: logger.error("Error reading %s: %s", file_path, e) all_valid = False - # ── DONE ─────────────────────────────────────────────────────────────────── if all_valid: logger.success("All files fully validated") else: logger.error("Validation completed with errors") - return all_valid From 3ca455793f7c143d846d51a647ddd038cf20e9f4 Mon Sep 17 00:00:00 2001 From: aydawka Date: Wed, 4 Mar 2026 21:55:32 -0800 Subject: [PATCH 15/17] fix: finish validation --- docs/modules/validate.md | 12 +-- pyfairdatatools/validate.py | 176 ++++++++++++++++++------------------ 2 files changed, 92 insertions(+), 96 deletions(-) diff --git a/docs/modules/validate.md b/docs/modules/validate.md index 4ec6c90..b17de4a 100644 --- a/docs/modules/validate.md +++ b/docs/modules/validate.md @@ -25,15 +25,15 @@ You can call the `validate_folder_structure` method to validate the data needed Following files are required to have inside the directory path: - `dataset_description.json` -- `study_description.json` -- `README\[.md\|.rst\|.txt\]` -- `CITATION.cff` -- `CHANGELOG\[.md\|.rst\|.txt\]` -- `LICENSE\[.md\|.rst\|.txt\]` +- `participants.json` +- `participants.tsv` +- `README.md` +- `CHANGELOG.md` +- `LICENSE.md` ------------------------------------------------------------------------ -#### Returns- +#### Returns | Field | Type | Description | |----------|---------|----------------------------------------------------------| diff --git a/pyfairdatatools/validate.py b/pyfairdatatools/validate.py index aca353c..06a7955 100644 --- a/pyfairdatatools/validate.py +++ b/pyfairdatatools/validate.py @@ -370,37 +370,23 @@ def validate_file_path(file_path, preexisting_file=False, writable=False): return True -OPTIONAL_FILES = [ - "readme.md", - "changelog.md", - "study_description.txt", - "license.txt", -] - -NAME_PATTERN = re.compile(r'^[a-z0-9._-]+$') - -WINDOWS_RESERVED = { - "con", "prn", "aux", "nul", - "com1","com2","com3","com4","com5","com6","com7","com8","com9", - "lpt1","lpt2","lpt3","lpt4","lpt5","lpt6","lpt7","lpt8","lpt9" -} - -ALLOWED_UPPERCASE_FILES = { - "readme.md", "readme.txt", "readme.rst", - "changelog.md", "changelog.txt", "changelog.rst", - "license.md", "license.txt", "license.rst", "license", -} - - def validate_names(root): - + """This function validates the names of the + files against the schema.""" + windows_reserved = { + "con", "prn", "aux", "nul", + "lpt1","lpt2","lpt3","lpt4","lpt5","lpt6","lpt7","lpt8","lpt9", + "com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9" + } + allowed_uppercase_files = { + "readme.md", "readme.txt", "readme.rst", + "changelog.md", "changelog.txt", "changelog.rst", + "license.md", "license.txt", "license.rst", "license", + } errors = [] warnings = [] - - for dirpath, dirnames, filenames in os.walk(root): - - for name in dirnames + filenames: - + for dirpath, dir_names, file_names in os.walk(root): + for name in dir_names + file_names: full = os.path.join(dirpath, name) base = os.path.splitext(name)[0].lower() name_lower = name.lower() @@ -409,14 +395,15 @@ def validate_names(root): if " " in name: errors.append(f"Space in name: {full}") - # Allow certain conventional files + # Allow some conventional files if any(c.isupper() for c in name): - if name_lower not in ALLOWED_UPPERCASE_FILES: + if name_lower not in allowed_uppercase_files: errors.append(f"Uppercase in name: {full}") # Invalid chars - if name_lower not in ALLOWED_UPPERCASE_FILES: # ← YENİ SATIR - if not NAME_PATTERN.match(name): + name_pattern = re.compile(r'^[a-z0-9._-]+$') + if name_lower not in allowed_uppercase_files: + if not name_pattern.match(name): errors.append(f"Invalid characters: {full}") # Hidden files @@ -424,7 +411,7 @@ def validate_names(root): warnings.append(f"Hidden file/folder: {full}") # Windows reserved - if base in WINDOWS_RESERVED: + if base in windows_reserved: errors.append(f"Windows reserved name: {full}") if len(name) > 150: @@ -445,16 +432,6 @@ def validate_folder_structure(folder_path): for e in name_errors: logger.error(e) return False - # schema check - def path_to_dict(path): - d = {} - for x in os.listdir(path): - p = os.path.join(path, x) - if os.path.isdir(p): - d[x] = path_to_dict(p) - else: - d[x] = "file" - return d required_files = [] try: @@ -463,9 +440,13 @@ def path_to_dict(path): encoding="utf-8" ) as f: schema = json.load(f) - tree = path_to_dict(folder_path) + folder_tree = { + f: "file" if os.path.isfile(os.path.join(folder_path, f)) else "directory" + for f in os.listdir(folder_path) + } + required_files.extend(schema["required"]) - validate(instance=tree, schema=schema) + validate(instance=folder_tree, schema=schema) logger.info("Folder structure matches schema") except ValidationError as e: @@ -484,15 +465,14 @@ def path_to_dict(path): return False logger.info("Found required file: %s", f) - dd_path = os.path.join(folder_path, "dataset_description.json") - # dataset_description.json - logger.info("Validating dataset_description.json...") + dd_path = os.path.join(folder_path, "dataset_description.json") try: with open(dd_path, encoding="utf-8") as f: dd_data = json.load(f) + logger.info("Validating dataset_description.json...") if validate_dataset_description(dd_data): - logger.success("dataset_description.json is valid") + logger.success("%s is valid", dd_path) else: logger.error("dataset_description.json failed validation") all_valid = False @@ -505,21 +485,22 @@ def path_to_dict(path): # study_description s_description_path = os.path.join(folder_path, "study_description.json") - logger.info("Validating %s...", s_description_path) - try: - with open(s_description_path, encoding="utf-8") as f: - study_data = json.load(f) - if validate_study_description(study_data): - logger.success("%s is valid", s_description_path) - else: - logger.error("%s failed validation", s_description_path) + if os.path.isfile(s_description_path): + logger.info("Validating %s...", s_description_path) + try: + with open(s_description_path, encoding="utf-8") as f: + study_data = json.load(f) + if validate_study_description(study_data): + logger.success("%s is valid", s_description_path) + else: + logger.error("%s failed validation", s_description_path) + all_valid = False + except json.JSONDecodeError as e: + logger.error("%s is not valid JSON: %s", s_description_path, e) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", s_description_path, e) all_valid = False - except json.JSONDecodeError as e: - logger.error("%s is not valid JSON: %s", s_description_path, e) - all_valid = False - except Exception as e: - logger.error("Error reading %s: %s", s_description_path, e) - all_valid = False # readme readme_path = os.path.join(folder_path, "readme.md") @@ -539,7 +520,6 @@ def path_to_dict(path): current_value = [] elif current_key: current_value.append(stripped) - if current_key: readme_data[current_key] = "\n".join(current_value).strip() @@ -552,52 +532,68 @@ def path_to_dict(path): logger.error("Error reading %s: %s", readme_path, e) all_valid = False - # changelog — no dedicated validator, just check extension - if os.path.join(folder_path, "readme.md"): - logger.info("Found changelog file: %s", readme_path) + # changelog + all_changelog_paths = next((file for file in files if file.lower() == "changelog.md"), None) + logger.info("Validating %s...", all_changelog_paths) + changelog_path = os.path.join(folder_path, all_changelog_paths) + try: + if not changelog_path: + logger.error("Missing required file: changelog.md") + return False + except Exception as e: + logger.error("Error reading %s: %s", changelog_path, e) + all_valid = False + logger.success("%s is valid", changelog_path) # license - all_license_paths = next((f for f in files if f.lower() == "license.txt"), None) + all_license_paths = next((file for file in files if file.lower() == "license.txt"), None) license_path = os.path.join(folder_path, all_license_paths) try: if not license_path: logger.error("Missing required file: license.txt") return False - logger.info("Validating license in %s...", license_path) - + logger.info("Validating %s...", license_path) with open(license_path, encoding="utf-8") as f: license_text = f.read().strip() if validate_license(license_text): - logger.success("%s has a valid license identifier", license_path) + logger.success("%s is valid", license_path) else: logger.error("%s has an invalid license identifier: '%s'", license_path, license_text) all_valid = False except Exception as e: logger.error("Error reading %s: %s", license_path, e) all_valid = False + # participants files - for dirpath, _, filenames in os.walk(folder_path): - for f_name in filenames: - if f_name.lower().startswith("participants") and f_name.lower().endswith(".tsv"): - file_path = os.path.join(dirpath, f_name) - logger.info("Validating participants file: %s...", file_path) - try: - with open(file_path, encoding="utf-8") as f: - reader = csv.DictReader(f, delimiter="\t") - participants_data = list(reader) - if validate_participants(participants_data): - logger.success("%s is valid", file_path) - else: - logger.error("%s failed validation", file_path) - all_valid = False - except Exception as e: - logger.error("Error reading %s: %s", file_path, e) - all_valid = False + participant_path = os.path.join(folder_path, "participants.tsv") + logger.info("Validating participants file: %s...", participant_path) + try: + with open(participant_path, encoding="utf-8") as f: + reader = csv.DictReader(f, delimiter="\t") + participants_data = list(reader) + if validate_participants(participants_data): + logger.success("%s is valid", participant_path) + else: + logger.error("%s failed validation", participant_path) + all_valid = False + except Exception as e: + logger.error("Error reading %s: %s", participant_path, e) + all_valid = False if all_valid: logger.success("All files fully validated") else: logger.error("Validation completed with errors") - return all_valid + # participant.json + participant_json_path = os.path.join(folder_path, "participant.json") + logger.info("Validating %s...", participant_json_path) + try: + if not participant_json_path: + logger.error("Missing required file: participant.json") + return False + logger.success("%s is valid", participant_json_path) + except Exception as e: + logger.error("Error reading %s: %s", participant_json_path, e) + all_valid = False -validate_folder_structure("tests") \ No newline at end of file + return all_valid From 3498341ba6f5a847d30762d599b60d3eddefefd5 Mon Sep 17 00:00:00 2001 From: aydawka Date: Wed, 4 Mar 2026 21:55:48 -0800 Subject: [PATCH 16/17] chore: update version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d97665f..2649e46 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "pyfairdatatools" -version = "1.2.0b12" +version = "1.2.0b13" description = "Tools for AI-READI" packages = [{ include = "pyfairdatatools" }] From a4cbe331369b48eba410431c529d1a3dab919855 Mon Sep 17 00:00:00 2001 From: aydawka Date: Wed, 4 Mar 2026 23:31:35 -0800 Subject: [PATCH 17/17] fix: dd validation --- pyfairdatatools/validate.py | 43 ++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/pyfairdatatools/validate.py b/pyfairdatatools/validate.py index 06a7955..e56c3cf 100644 --- a/pyfairdatatools/validate.py +++ b/pyfairdatatools/validate.py @@ -276,16 +276,12 @@ def validate_participants(data): # Allow person_id as an alternative to participant_id if "items" in schema and "required" in schema["items"]: - if "participant_id" in schema["items"]["properties"]: - schema["items"]["properties"]["participant_id"].pop("pattern", None) if "person_id" not in schema["items"]["properties"]: schema["items"]["properties"]["person_id"] = schema["items"]["properties"]["participant_id"].copy() for participant in data: if "person_id" in participant and "participant_id" not in participant: participant["participant_id"] = participant["person_id"] - if "age" in participant and participant["age"]: - participant["age"] = float(participant["age"]) try: validate(instance=data, schema=schema) @@ -564,36 +560,35 @@ def validate_folder_structure(folder_path): logger.error("Error reading %s: %s", license_path, e) all_valid = False - # participants files - participant_path = os.path.join(folder_path, "participants.tsv") - logger.info("Validating participants file: %s...", participant_path) + # participants json + participant_json_path = os.path.join(folder_path, "participants.json") + logger.info("Validating participants file: %s...", participant_json_path) try: - with open(participant_path, encoding="utf-8") as f: - reader = csv.DictReader(f, delimiter="\t") - participants_data = list(reader) + with open(participant_json_path, encoding="utf-8") as f: + participants_data = json.load(f) if validate_participants(participants_data): - logger.success("%s is valid", participant_path) + logger.success("%s is valid", participant_json_path) else: - logger.error("%s failed validation", participant_path) + logger.error("%s failed validation", participant_json_path) all_valid = False except Exception as e: - logger.error("Error reading %s: %s", participant_path, e) + logger.error("Error reading %s: %s", participant_json_path, e) all_valid = False - if all_valid: - logger.success("All files fully validated") - else: - logger.error("Validation completed with errors") - # participant.json - participant_json_path = os.path.join(folder_path, "participant.json") - logger.info("Validating %s...", participant_json_path) + # participant.tsv + participant_tsv_path = os.path.join(folder_path, "participants.tsv") + logger.info("Validating %s...", participant_tsv_path) try: - if not participant_json_path: - logger.error("Missing required file: participant.json") + if not participant_tsv_path: + logger.error("Missing required file: participant.tsv") return False - logger.success("%s is valid", participant_json_path) + logger.success("%s is valid", participant_tsv_path) except Exception as e: - logger.error("Error reading %s: %s", participant_json_path, e) + logger.error("Error reading %s: %s", participant_tsv_path, e) all_valid = False + if all_valid: + logger.success("All files fully validated") + else: + logger.error("Validation completed with errors") return all_valid