diff --git a/bot/commands/validations/index.js b/bot/commands/validations/index.js index 5d0cc5a6..cf8d2998 100644 --- a/bot/commands/validations/index.js +++ b/bot/commands/validations/index.js @@ -3,22 +3,22 @@ import { renderIssues, createIssue } from "../../utils/renderer/index.js"; import dbInstance from "../../db.js"; import { logwatch } from "../../utils/logwatch.js"; import { applyLastModifiedTemplate } from "../../utils/tools/index.js"; -import { validateLicense } from "../../compliance-checks/license/index.js"; +import { + checkForLicense, + updateLicenseDatabase, + applyLicenseTemplate, +} from "../../compliance-checks/license/index.js"; import { getCWLFiles } from "../../compliance-checks/cwl/index.js"; import { - validateMetadata, - getCitationContent, - getCodemetaContent, - gatherMetadata, - convertDateToUnix, - applyDbMetadata, - applyCodemetaMetadata, - applyCitationMetadata, + checkMetadataFilesExists, + updateMetadataDatabase, + applyMetadataTemplate, } from "../../compliance-checks/metadata/index.js"; import { checkForReadme } from "../../compliance-checks/readme/index.js"; -import { createId } from "../../utils/tools/index.js"; -import { checkForCodeofConduct } from "../../compliance-checks/code-of-conduct/index.js"; -import { checkForContributingFile } from "../../compliance-checks/contributing/index.js"; +import { + checkForContributingFile, + checkForCodeofConduct, +} from "../../compliance-checks/additional-checks/index.js"; const ISSUE_TITLE = `FAIR Compliance Dashboard`; const db = dbInstance; @@ -234,125 +234,131 @@ export async function rerunMetadataValidation( repository, issueBody ) { - logwatch.start("Validating metadata files..."); - try { - let metadata = await gatherMetadata(context, owner, repository); - let containsCitation = false, - containsCodemeta = false, - validCitation = false, - validCodemeta = false; + const repoInfo = `${owner}/${repository.name}`; + logwatch.start( + `Rerunning metadata validation for repo: ${repository.name} (ID: ${repository.id})` + ); - let existingMetadataEntry = await db.codeMetadata.findUnique({ - where: { - repository_id: repository.id, + try { + // Check which metadata files exist + const subjects = await checkMetadataFilesExists(context, owner, repository); + + // Get license status (needed for metadata checks) + const licenseCheck = await checkForLicense(context, owner, repository.name); + subjects.license = licenseCheck; + + // Force revalidation by creating a synthetic context that looks like bot push + const syntheticContext = { + ...context, + payload: { + ...context.payload, + pusher: { name: `${process.env.GH_APP_NAME}[bot]` }, // Triggers full revalidation }, - }); + }; - if (existingMetadataEntry?.metadata) { - // Update the metadata variable - containsCitation = existingMetadataEntry.contains_citation; - containsCodemeta = existingMetadataEntry.contains_codemeta; - metadata = applyDbMetadata(existingMetadataEntry, metadata); - } else { - // create blank entry to prevent issues down the line - existingMetadataEntry = await db.codeMetadata.create({ - data: { - identifier: createId(), - repository: { - connect: { - id: repository.id, - }, - }, - }, - }); - } + await updateMetadataDatabase( + repository.id, + subjects, + repository, + owner, + syntheticContext + ); - const citation = await getCitationContent(context, owner, repository); - const codemeta = await getCodemetaContent(context, owner, repository); + // Generate new metadata section + let newMetadataSection = ""; + newMetadataSection = await applyMetadataTemplate( + subjects, + newMetadataSection, + repository, + owner, + syntheticContext + ); - if (codemeta) { - containsCodemeta = true; - validCodemeta = await validateMetadata(codemeta, "codemeta", repository); - metadata = await applyCodemetaMetadata(codemeta, metadata, repository); - } + // Parse the existing issue body to replace just the metadata section + const issueBodyWithoutTimestamp = issueBody.substring( + 0, + issueBody.indexOf(`Last updated`) + ); - if (citation) { - containsCitation = true; - validCitation = await validateMetadata(citation, "citation", repository); - metadata = await applyCitationMetadata(citation, metadata, repository); - // consola.info("Metadata so far after citation update", JSON.stringify(metadata, null, 2)); - } + // Find the metadata section boundaries + const metadataStartMarker = "## Metadata"; + const metadataStartIndex = + issueBodyWithoutTimestamp.indexOf(metadataStartMarker); - // Ensure all dates have been converted to ISO strings split by the T - if (metadata.creationDate) { - metadata.creationDate = convertDateToUnix(metadata.creationDate); - } - if (metadata.firstReleaseDate) { - metadata.firstReleaseDate = convertDateToUnix(metadata.firstReleaseDate); - } - if (metadata.currentVersionReleaseDate) { - metadata.currentVersionReleaseDate = convertDateToUnix( - metadata.currentVersionReleaseDate - ); + if (metadataStartIndex === -1) { + throw new Error("Could not find Metadata section in issue body"); } - // update the database with the metadata information - if (existingMetadataEntry) { - await db.codeMetadata.update({ - data: { - codemeta_status: validCodemeta ? "valid" : "invalid", - citation_status: validCitation ? "valid" : "invalid", - contains_citation: containsCitation, - contains_codemeta: containsCodemeta, - metadata: metadata, - }, - where: { - repository_id: repository.id, - }, - }); + // Find the next section (starts with ## or end of string) + const afterMetadataStart = issueBodyWithoutTimestamp.substring( + metadataStartIndex + metadataStartMarker.length + ); + const nextSectionMatch = afterMetadataStart.match(/\n## /); + + let updatedBody; + if (nextSectionMatch) { + // There's another section after Metadata + const nextSectionIndex = + metadataStartIndex + + metadataStartMarker.length + + nextSectionMatch.index; + + updatedBody = + issueBodyWithoutTimestamp.substring(0, metadataStartIndex) + // Before Metadata + newMetadataSection + // New Metadata section + issueBodyWithoutTimestamp.substring(nextSectionIndex); // After Metadata } else { - await db.codeMetadata.create({ - data: { - codemeta_status: validCodemeta ? "valid" : "invalid", - citation_status: validCitation ? "valid" : "invalid", - contains_citation: containsCitation, - contains_codemeta: containsCodemeta, - metadata: metadata, - }, - where: { - repository_id: repository.id, - }, - }); + // Metadata is the last section + updatedBody = + issueBodyWithoutTimestamp.substring(0, metadataStartIndex) + // Before Metadata + newMetadataSection; // New Metadata section } - const issueBodyRemovedCommand = issueBody.substring( - 0, - issueBody.indexOf(`Last updated`) - ); - const lastModified = await applyLastModifiedTemplate( - issueBodyRemovedCommand + // Add timestamp + updatedBody = applyLastModifiedTemplate(updatedBody); + + // Update the issue + await createIssue(context, owner, repository, ISSUE_TITLE, updatedBody); + logwatch.info( + `Metadata validation rerun completed for repo: ${repository.name} (ID: ${repository.id})` ); - await createIssue(context, owner, repository, ISSUE_TITLE, lastModified); } catch (error) { - // Remove the command from the issue body - const issueBodyRemovedCommand = issueBody.substring( - 0, - issueBody.indexOf(`Last updated`) - ); - const lastModified = await applyLastModifiedTemplate( - issueBodyRemovedCommand + logwatch.error( + { + message: "Failed to rerun metadata validation", + repo: repoInfo, + error: error.message, + stack: error.stack, + }, + true ); - await createIssue(context, owner, repository, ISSUE_TITLE, lastModified); - if (error.cause) { - logwatch.error( + + // rrestore issue body without command + try { + const issueBodyRemovedCommand = issueBody.substring( + 0, + issueBody.indexOf(`Last updated`) + ); + const lastModified = applyLastModifiedTemplate(issueBodyRemovedCommand); + + await context.octokit.issues.update({ + owner, + repo: repository.name, + issue_number: context.payload.issue.number, + body: lastModified, + }); + } catch (restoreError) { + logwatch.warn( { - message: "Error.cause message for Metadata Validation", - error: error.cause, + message: "Failed to restore issue body after error", + repo: repoInfo, + error: restoreError.message, }, true ); } - throw new Error("Error rerunning metadata validation", error); + + throw error; } } @@ -363,91 +369,119 @@ export async function rerunLicenseValidation( issueBody ) { // Run the license validation again - logwatch.start("Rerunning License Validation..."); - try { - const licenseRequest = await context.octokit.rest.licenses.getForRepo({ - owner, - repo: repository.name, - }); - - const existingLicense = await db.licenseRequest.findUnique({ - where: { - repository_id: repository.id, - }, - }); + const repoInfo = `${owner}/${repository.name}`; + logwatch.start(`Rerunning License Validation for repo: ${repoInfo}...`); - const license = !!licenseRequest.data.license; + try { + // Step 1: Check for license + logwatch.info(`Fetching license information for ${repoInfo}`); + const license = await checkForLicense(context, owner, repository.name); if (!license) { throw new Error("License not found in the repository"); } - const { licenseId, licenseContent, licenseContentEmpty } = validateLicense( - licenseRequest, - existingLicense + logwatch.info( + `License found: ${license.license || "unknown"} at ${license.path || "unknown path"}` ); - logwatch.info({ - message: `License validation complete`, - licenseId, - licenseContent, - licenseContentEmpty, - }); + // Step 2: Update the database with the license information + logwatch.info(`Updating license database for ${repoInfo}`); + await updateLicenseDatabase(repository, license); - // Update the database with the license information - if (existingLicense) { - await db.licenseRequest.update({ - data: { - license_id: licenseId, - license_content: licenseContent, - license_status: licenseContentEmpty ? "invalid" : "valid", - }, - where: { - repository_id: repository.id, - }, - }); - } else { - await db.licenseRequest.create({ - data: { - license_id: licenseId, - license_content: licenseContent, - license_status: licenseContentEmpty ? "invalid" : "valid", - }, - where: { - repository_id: repository.id, - }, - }); - } - - // Update the issue body + // Step 3: Prepare issue body const issueBodyRemovedCommand = issueBody.substring( 0, issueBody.indexOf(`Last updated`) ); - const lastModified = await applyLastModifiedTemplate( - issueBodyRemovedCommand + + // Step 4: Generate new license section + logwatch.info(`Generating new license section for ${repoInfo}`); + let newLicenseSection = ""; + const subjects = { license }; + newLicenseSection = await applyLicenseTemplate( + subjects, + newLicenseSection, + repository, + owner, + context + ); + + // Step 5: Parse the existing issue body to replace just the license section + const issueBodyWithoutTimestamp = issueBodyRemovedCommand; + const licenseStartMarker = "## LICENSE"; + const licenseStartIndex = + issueBodyWithoutTimestamp.indexOf(licenseStartMarker); + + if (licenseStartIndex === -1) { + throw new Error("Could not find LICENSE section in issue body"); + } + + // Find the next section (starts with ## or end of string) + const afterLicenseStart = issueBodyWithoutTimestamp.substring( + licenseStartIndex + licenseStartMarker.length ); + const nextSectionMatch = afterLicenseStart.match(/\n## /); + + let updatedBody; + if (nextSectionMatch) { + // There's another section after LICENSE + const nextSectionIndex = + licenseStartIndex + licenseStartMarker.length + nextSectionMatch.index; + updatedBody = + issueBodyWithoutTimestamp.substring(0, licenseStartIndex) + // Before LICENSE + newLicenseSection + // New LICENSE section + issueBodyWithoutTimestamp.substring(nextSectionIndex); // After LICENSE + } else { + // LICENSE is the last section + updatedBody = + issueBodyWithoutTimestamp.substring(0, licenseStartIndex) + // Before LICENSE + newLicenseSection; // New LICENSE section + } + + // Step 6: Update the issue + logwatch.info(`Updating issue for ${repoInfo}`); + const lastModified = applyLastModifiedTemplate(updatedBody); await createIssue(context, owner, repository, ISSUE_TITLE, lastModified); - } catch (error) { - // Remove the command from the issue body - const issueBodyRemovedCommand = issueBody.substring( - 0, - issueBody.indexOf(`Last updated`) + + logwatch.info( + `License validation rerun completed successfully for ${repoInfo}` ); - const lastModified = await applyLastModifiedTemplate( - issueBodyRemovedCommand + } catch (error) { + logwatch.error( + { + message: "Failed to rerun license validation", + repo: repoInfo, + error: error.message, + stack: error.stack, + cause: error.cause, + }, + true ); - await createIssue(context, owner, repository, ISSUE_TITLE, lastModified); - if (error.cause) { - logwatch.error( + + // Restore issue body without command + try { + const issueBodyRemovedCommand = issueBody.substring( + 0, + issueBody.indexOf(`Last updated`) + ); + const lastModified = applyLastModifiedTemplate(issueBodyRemovedCommand); + await createIssue(context, owner, repository, ISSUE_TITLE, lastModified); + } catch (restoreError) { + logwatch.warn( { - message: "Error.cause message for License Validation", - error: error.cause, + message: "Failed to restore issue body after error", + repo: repoInfo, + error: restoreError.message, }, true ); } - throw new Error("Error rerunning license validation", error); + + throw new Error( + `Error rerunning license validation for ${repoInfo}: ${error.message}`, + { cause: error } + ); } } diff --git a/bot/compliance-checks/additional-checks/index.js b/bot/compliance-checks/additional-checks/index.js index 9b710fe6..38ffabff 100644 --- a/bot/compliance-checks/additional-checks/index.js +++ b/bot/compliance-checks/additional-checks/index.js @@ -1,8 +1,101 @@ import prisma from "../../db.js"; +import { checkForFile } from "../../utils/tools/index.js"; import { createId } from "../../utils/tools/index.js"; const CODEFAIR_DOMAIN = process.env.CODEFAIR_APP_DOMAIN; const db = prisma; +/** + * * Check if a Code of conduct file exists (CODE_OF_CONDUCT.md, CODE_OF_CONDUCT.txt, or CODE_OF_CONDUCT) + * @param {Object} context - The context of the GitHub Event + * @param {String} owner - The owner of the repository + * @param {String} repoName - The name of the repository + * @returns {Boolean} - True if a CODE_OF_CONDUCT file exists, false otherwise + */ +export async function checkForCodeofConduct(context, owner, repoName) { + const cofcFilesTypes = [ + "CODE_OF_CONDUCT.md", + "CODE_OF_CONDUCT.txt", + "CODE_OF_CONDUCT", + "docs/CODE_OF_CONDUCT.md", + "docs/CODE_OF_CONDUCT.txt", + "docs/CODE_OF_CONDUCT", + ".github/CODE_OF_CONDUCT.md", + ".github/CODE_OF_CONDUCT.txt", + ".github/CODE_OF_CONDUCT", + ]; + + for (const filePath of cofcFilesTypes) { + const cofc = await checkForFile(context, owner, repoName, filePath); + if (cofc) { + const content = await context.octokit.repos.getContent({ + owner, + repo: repoName, + path: filePath, + }); + const contentData = Buffer.from(content.data.content, "base64").toString( + "utf-8" + ); + + return { + status: true, + path: filePath, + content: contentData, + }; + } + } + return { + path: "No Code of Conduct file found", + status: false, + content: "", + }; +} + +/** + * * Check if a CONTRIBUTING file exists (CONTRIBUTING.md) + * @param {Object} context - The context of the GitHub Event + * @param {String} owner - The owner of the repository + * @param {String} repoName - The name of the repository + * @returns {Boolean} - True if a CONTRIBUTING file exists, false otherwise + */ +export async function checkForContributingFile(context, owner, repoName) { + const contributingFilesTypes = [ + "CONTRIBUTING.md", + "CONTRIBUTING.txt", + "CONTRIBUTING", + "docs/CONTRIBUTING.md", + "docs/CONTRIBUTING.txt", + "docs/CONTRIBUTING", + ".github/CONTRIBUTING.md", + ".github/CONTRIBUTING.txt", + ".github/CONTRIBUTING", + ]; + + for (const filePath of contributingFilesTypes) { + const contrib = await checkForFile(context, owner, repoName, filePath); + if (contrib) { + const content = await context.octokit.repos.getContent({ + owner, + repo: repoName, + path: filePath, + }); + const contentData = Buffer.from(content.data.content, "base64").toString( + "utf-8" + ); + + return { + status: true, + path: filePath, + content: contentData, + }; + } + } + return { + path: "No Contributing file found", + status: false, + content: "", + }; +} + export async function applyAdditionalChecksTemplate( subjects, template, @@ -108,7 +201,7 @@ export async function applyAdditionalChecksTemplate( const section = `## Additional Recommendations\n\n` + - `Although these files aren not part of the core FAIR compliance checks, ` + + `Although these files are not part of the core FAIR compliance checks, ` + `Codefair recommends including them to improve project governance, community engagement, and contributor experience:\n\n` + additionalSubjects .map( diff --git a/bot/compliance-checks/archival/index.js b/bot/compliance-checks/archival/index.js index ec110d7e..ef000958 100644 --- a/bot/compliance-checks/archival/index.js +++ b/bot/compliance-checks/archival/index.js @@ -181,7 +181,7 @@ export async function applyArchivalTemplate( const noLicenseText = `\n\nTo make your software FAIR, a license file is required.\n> [!WARNING]\n> Codefair will run this check after a LICENSE file is detected in your repository.`; const noLicenseBadge = `![FAIR Release not checked](https://img.shields.io/badge/FAIR_Release_Not_Checked-fbbf24)`; - if (!subjects.license) { + if (!subjects.license.status) { logwatch.info("License not found. Skipping FAIR release check."); baseTemplate += `${archiveTitle}\n\n${noLicenseText}\n\n${noLicenseBadge}\n\n`; return baseTemplate; diff --git a/bot/compliance-checks/citation/index.js b/bot/compliance-checks/citation/index.js deleted file mode 100644 index 53f77106..00000000 --- a/bot/compliance-checks/citation/index.js +++ /dev/null @@ -1,284 +0,0 @@ -import { gatherRepoAuthors } from "../../utils/tools/index.js"; - -export async function checkForCitation(context, owner, repo) { - try { - await context.octokit.rest.repos.getContent({ - owner, - path: "CITATION.cff", - repo, - }); - - return true; - } catch (error) { - return false; - } -} - -// Currently not being used -async function gatherCitationInfo(context, owner, repo) { - // Verify there is no PR open already for the CITATION.cff file - const openPR = await context.octokit.pulls.list({ - owner, - repo, - state: "open", - }); - - let prExists = false; - - for (const pr of openPR.data) { - if (pr.title === "feat: ✨ CITATION.cff created for repo") { - prExists = true; - } - } - - if (prExists) { - await context.octokit.issues.createComment({ - body: `A PR for the CITATION.cff file already exists here: ${openPR.data[0].html_url}`, - issue_number: context.payload.issue.number, - owner, - repo, - }); - return; - } - - // Get the release data of the repo - const releases = await context.octokit.repos.listReleases({ - owner, - repo, - }); - - // Get the metadata of the repo - const repoData = await context.octokit.repos.get({ - owner, - repo, - }); - - // Get authors of repo - const parsedAuthors = await gatherRepoAuthors( - context, - owner, - repo, - "citation" - ); - // Get DOI of repo (if it exists) - const doi = await getDOI(context, owner, repo); - // Get the repo description - const abstract = repoData.data.description; - // Get the license of the repo - const licenseName = repoData.data.license; - - // date released is dependent on whether the repo has a release data (if not, use the created date) - let dateReleased; - if (repoData.data.released_at) { - dateReleased = repoData.data.released_at; - } else { - // The date needs to be in this pattern: - dateReleased = new Date().toISOString().split("T")[0]; - } - - // Get the homepage of the repo - let url; - if (repoData.data.homepage != null) { - url = repoData.data.homepage; - } - - // Get the keywords of the repo - let keywords = []; - if (repoData.data.topics != null && repoData.data.topics.length > 0) { - keywords = repoData.data.topics; - } - - // Begin creating json for CITATION.cff file - let citationObj = { - title: repoData.data.name, - "cff-version": "1.2.0", - identifiers: [ - { - description: "DOI for this software's record on Zenodo.", - type: "doi", - }, - ], - message: "If you use this software, please cite it as below.", - "repository-code": repoData.data.html_url, - type: "software", - }; - - if (doi[0]) { - citationObj.identifiers[0].value = doi[1]; - } else { - citationObj.identifiers[0].value = ""; - } - - if (parsedAuthors.length > 0) { - citationObj.authors = parsedAuthors; - } - - if (licenseName !== null) { - citationObj.license = licenseName.spdx_id; - } - - if (abstract !== null) { - citationObj.abstract = abstract; - } else { - citationObj.abstract = ""; - } - - if (keywords.length > 0) { - citationObj.keywords = keywords; - } - - if (url !== null && url !== "") { - citationObj.url = url; - } else { - citationObj.url = repoData.data.html_url; - } - - if (dateReleased !== null && dateReleased !== "") { - citationObj["date-released"] = dateReleased; - } else { - citationObj["date-released"] = ""; - } - - // sort keys alphabetically - citationObj = Object.keys(citationObj) - .sort() - .reduce((acc, key) => { - acc[key] = citationObj[key]; - return acc; - }, {}); - - const citationTemplate = yaml.dump(citationObj); - - await createCitationFile(context, owner, repo, citationTemplate); -} - -export async function createCitationFile(context, owner, repo, citationText) { - // Here we take the citation text passed as a parameter - // It could from probot's initial gathering or an updated version from the user - - // Create a new branch - const branch = `citation-${Math.floor(Math.random() * 9999)}`; - - // Get the default branch of the repo - const defaultBranch = await getDefaultBranch(context, owner, repo); - const defaultBranchName = defaultBranch.data.name; - - // Create a new branch based off the default branch - await context.octokit.git.createRef({ - owner, - ref: `refs/heads/${branch}`, - repo, - sha: defaultBranch.data.commit.sha, - }); - - // Create a new file - await context.octokit.repos.createOrUpdateFileContents({ - branch, - content: Buffer.from(citationText).toString("base64"), - message: `feat: ✨ add CITATION.cff file`, - owner, - path: "CITATION.cff", - repo, - }); - - // Create a PR with the branch - await context.octokit.pulls.create({ - title: "feat: ✨ CITATION.cff create for repo", - base: defaultBranchName, - body: `Resolves #${context.payload.issue.number}`, - head: branch, - maintainer_can_modify: true, - owner, - repo, - }); - - // Get the link to the CITATION.cff file in the branch created - let citationLink = await context.octokit.repos.getContent({ - owner, - path: "CITATION.cff", - ref: `refs/heads/${branch}`, - repo, - }); - - citationLink = citationLink.data.html_url; - const editLink = citationLink.replace("blob", "edit"); - - await context.octokit.issues.createComment({ - body: - "```yaml\n" + - citationText + - "\n```" + - `\n\nHere is the information I was able to gather from this repo. If you would like to add more please follow the link to edit using the GitHub UI. Once you are satisfied with the CITATION.cff you can merge the pull request and I will close this issue. - \n\n[Edit CITATION.cff](${editLink})`, - issue_number: context.payload.issue.number, - owner, - repo, - }); -} - -/** - * * Applies the citation template to the base template - * - * @param {object} subjects - The subjects to check for - * @param {string} baseTemplate - The base template to add to - * @param {*} db - The database - * @param {object} repository - The GitHub repository information - * @param {string} owner - The owner of the repository - * - * @returns {string} - The updated base template - */ -export async function applyCitationTemplate( - subjects, - baseTemplate, - db, - repository, - owner -) { - if (!subjects.citation && subjects.license) { - // License was found but no citation file was found - const identifier = createId(); - - let badgeURL = `${CODEFAIR_DOMAIN}/add/citation/${identifier}`; - const citationCollection = db.citationRequests; - const existingCitation = await citationCollection.fineUnique({ - where: { - repository_id: repository.id, - }, - }); - - if (!existingCitation) { - // Entry does not exist in db, create a new one - const newDate = new Date(); - await citationCollection.create({ - data: { - created_at: newDate, - identifier, - owner, - repo: repository.name, - repository_id: repository.id, - updated_at: newDate, - }, - }); - } else { - // Get the identifier of the existing citation request - await citationCollection.updateOne({ - data: { updated_at: new Date() }, - where: { repository_id: repository.id }, - }); - badgeURL = `${CODEFAIR_DOMAIN}/add/citation/${existingCitation.identifier}`; - } - - const citationBadge = `[![Citation](https://img.shields.io/badge/Add_Citation-dc2626.svg)](${badgeURL})`; - baseTemplate += `\n\n## CITATION.cff\n\nA CITATION.cff file was not found in the repository. The [FAIR-BioRS guidelines](https://fair-biors.org/docs/guidelines) suggests to include that file for providing metadata about your software and make it FAIR.\n\n${citationBadge}`; - } else if (subjects.citation && subjects.license) { - // Citation file was found and license was found - const citationBadge = `![Citation](https://img.shields.io/badge/Citation_Added-6366f1.svg)`; - baseTemplate += `\n\n## CITATION.cff\n\nA CITATION.cff file found in the repository.\n\n${citationBadge}`; - } else { - // Citation file was not found and license was not found - const citationBadge = `![Citation](https://img.shields.io/badge/Citation_Not_Checked-fbbf24)`; - baseTemplate += `\n\n## CITATION.cff\n\nA CITATION.cff file will be checked after a license file is added. The [FAIR-BioRS guidelines](https://fair-biors.org/docs/guidelines) suggests to include that file for providing metadata about your software and make it FAIR.\n\n${citationBadge}`; - } - - return baseTemplate; -} diff --git a/bot/compliance-checks/code-of-conduct/index.js b/bot/compliance-checks/code-of-conduct/index.js deleted file mode 100644 index f3ae0dd9..00000000 --- a/bot/compliance-checks/code-of-conduct/index.js +++ /dev/null @@ -1,135 +0,0 @@ -import { checkForFile } from "../../utils/tools/index.js"; -import dbInstance from "../../db.js"; -import { createId } from "../../utils/tools/index.js"; -const db = dbInstance; - -const CODEFAIR_DOMAIN = process.env.CODEFAIR_APP_DOMAIN; -/** - * * Check if a Code of conduct file exists (CODE_OF_CONDUCT.md, CODE_OF_CONDUCT.txt, or CODE_OF_CONDUCT) - * @param {Object} context - The context of the GitHub Event - * @param {String} owner - The owner of the repository - * @param {String} repoName - The name of the repository - * @returns {Boolean} - True if a CODE_OF_CONDUCT file exists, false otherwise - */ -export async function checkForCodeofConduct(context, owner, repoName) { - const cofcFilesTypes = [ - "CODE_OF_CONDUCT.md", - "CODE_OF_CONDUCT.txt", - "CODE_OF_CONDUCT", - "docs/CODE_OF_CONDUCT.md", - "docs/CODE_OF_CONDUCT.txt", - "docs/CODE_OF_CONDUCT", - ".github/CODE_OF_CONDUCT.md", - ".github/CODE_OF_CONDUCT.txt", - ".github/CODE_OF_CONDUCT", - ]; - - for (const filePath of cofcFilesTypes) { - const cofc = await checkForFile(context, owner, repoName, filePath); - if (cofc) { - const content = await context.octokit.repos.getContent({ - owner, - repo: repoName, - path: filePath, - }); - const contentData = Buffer.from(content.data.content, "base64").toString( - "utf-8" - ); - - return { - status: true, - path: filePath, - content: contentData, - }; - } - } - return { - path: "No Code of Conduct file found", - status: false, - content: "", - }; -} - -export async function applyCodeofConductTemplate( - owner, - repository, - subjects, - baseTemplate -) { - // 1) Prepare identifier and badge URL - let identifier, badgeURL; - try { - identifier = createId(); - badgeURL = `${CODEFAIR_DOMAIN}/dashboard/${owner}/${repository.name}/edit/code-of-conduct`; - } catch (error) { - throw new Error( - `Failed to initialize CODE_OF_CONDUCT template parameters: ${error.message}`, - { cause: error } - ); - } - - // 2) Upsert the codeOfConductValidation record - try { - const existing = await db.codeofConductValidation.findUnique({ - where: { repository_id: repository.id }, - }); - - const upsertData = { - contains_cof: subjects.cofc.status, - code_content: subjects.cofc.content, - code_path: subjects.cofc.path, - code_template_title: subjects.cof.status ? "Custom" : "", - }; - - if (existing) { - await db.codeofConductValidation.update({ - where: { id: existing.id }, - data: upsertData, - }); - } else { - await db.codeofConductValidation.create({ - data: { - identifier, - ...upsertData, - repository: { connect: { id: repository.id } }, - }, - }); - } - } catch (error) { - throw new Error( - `Database error in applyCodeofConductTemplate: ${error.message}`, - { - cause: error, - } - ); - } - - // 3) Prepare the template data - try { - const { status, path } = subjects.cof; - const verb = status ? "Edit" : "Create"; - const colorCode = status ? "0ea5e9" : "dc2626"; - const badgeLabel = `${verb}_Code_of_Conduct-${colorCode}`; - const cofBadge = `[![${verb} Code of Conduct](https://img.shields.io/badge/${badgeLabel}.svg)](${badgeURL})`; - - const header = status ? "## Code of Conduct ✔️" : "## Code of Conduct ❌"; - const desc = status - ? `A \`${path}\` file was found at the within your repository.` - : `A Code of Conduct file was not found within your .github, docs or root of your repository. The Code of Conduct file is a document that outlines the expected behavior and responsibilities of contributors to a project. It helps create a welcoming and inclusive environment for all participants. You can create one in Codefair's editor that follows the latest [Code of Conduct template](https://www.contributor-covenant.org/version/2/1/code_of_conduct/) and add it to your repository. Click on the badge below to create a file with Codefair's editor.`; - - return ( - baseTemplate + - `\n\n` + - `${header}\n\n` + - `${desc}\n\n` + - `${cofBadge}\n\n` - ); - } catch (error) { - throw new Error( - `Error constructing Code of Conduct section: ${error.message}`, - { - cause: error, - } - ); - } -} diff --git a/bot/compliance-checks/codemeta/index.js b/bot/compliance-checks/codemeta/index.js deleted file mode 100644 index 20554393..00000000 --- a/bot/compliance-checks/codemeta/index.js +++ /dev/null @@ -1,254 +0,0 @@ -import { gatherRepoAuthors } from "../../utils/tools/index.js"; - -export async function checkForCodeMeta(context, owner, repo) { - try { - await context.octokit.rest.repos.getContent({ - owner, - path: "codemeta.json", - repo, - }); - - return true; - } catch (error) { - return false; - } -} - -export async function gatherCodeMetaInfo(context, owner, repo) { - // Gather metadata from the repo to create a codemeta.json file - const repoData = await context.octokit.repos.get({ - owner, - repo, - }); - - // Get the languages used in the repo - const languagesUsed = await gatherLanguagesUsed(context, owner, repo); - const authors = await gatherRepoAuthors(context, owner, repo, "codemeta"); - const codeRepository = repoData.data.html_url; - const dataCreated = repoData.data.created_at; - const dataModified = repoData.data.updated_at; - const keywords = repoData.data.topics; - const license = repoData.data.license.spdx_id; - const { description } = repoData.data; - const identifier = repoData.data.id; - const name = repoData.data.full_name; - let issueTracker = repoData.data.issues_url; - - let metadata = { - "@context": "https://w3id.org/codemeta/3.0", - "@type": "SoftwareSourceCode", - }; - - if (license !== null || license !== "") { - metadata.license = `https://spdx.org/licenses/${license}`; - } - - if (codeRepository !== null || codeRepository !== "") { - metadata.codeRepository = codeRepository; - } - - if (dataCreated !== null || dataCreated !== "") { - metadata.dateCreated = dataCreated.split("T")[0]; - } - - if (dataModified !== null || dataModified !== "") { - metadata.dateModified = dataModified.split("T")[0]; - } - - if (keywords.length > 0) { - metadata.keywords = keywords; - } else { - metadata.keywords = []; - } - - if (description !== null || description !== "") { - metadata.description = description; - } - - if (identifier !== null || identifier !== "") { - metadata.identifier = identifier; - } - - if (name !== null || name !== "") { - metadata.name = name; - } - - if (issueTracker !== null || issueTracker !== "") { - // Remove the {/number} from the issue tracker url - issueTracker = issueTracker.replace("{/number}", ""); - metadata.issueTracker = issueTracker; - } - - if (languagesUsed.length > 0) { - metadata.programmingLanguage = languagesUsed; - } else { - metadata.programmingLanguage = []; - } - - if (authors.length > 0) { - metadata.author = authors; - } else { - metadata.author = []; - } - - // sort keys alphabetically - metadata = Object.keys(metadata) - .sort() - .reduce((acc, key) => { - acc[key] = metadata[key]; - return acc; - }, {}); - - await createCodeMetaFile(context, owner, repo, metadata); -} - -export async function createCodeMetaFile(context, owner, repo, codeMetaText) { - // Create a new branch - const branch = `codemeta-${Math.floor(Math.random() * 9999)}`; - - // Get the default branch of the repo - const defaultBranch = await getDefaultBranch(context, owner, repo); - const defaultBranchName = defaultBranch.data.name; - - // Create a new branch based off the default branch - await context.octokit.git.createRef({ - owner, - ref: `refs/heads/${branch}`, - repo, - sha: defaultBranch.data.commit.sha, - }); - - // Create a new file - await context.octokit.repos.createOrUpdateFileContents({ - branch, - content: Buffer.from(JSON.stringify(codeMetaText, null, 2)).toString( - "base64" - ), - message: `feat: ✨ add codemeta.json file`, - owner, - path: "codemeta.json", - repo, - }); - - // Create a PR with the branch - await context.octokit.pulls.create({ - title: "feat: ✨ codemeta.json created for repo", - base: defaultBranchName, - body: `Resolves #${context.payload.issue.number}`, - head: branch, - maintainer_can_modify: true, - owner, - repo, - }); - - // Get the link to the codemeta.json file in the branch created - let codemetaLink = await context.octokit.repos.getContent({ - owner, - path: "codemeta.json", - ref: `refs/heads/${branch}`, - repo, - }); - - codemetaLink = codemetaLink.data.html_url; - const editLink = codemetaLink.replace("blob", "edit"); - - await context.octokit.issues.createComment({ - body: - "```json\n" + - JSON.stringify(codeMetaText, null, 2) + - "\n```" + - `\n\nHere is the information I was able to gather from this repo. If you would like to add more please follow the link to edit using the GitHub UI. Once you are satisfied with the codemeta.json you can merge the pull request and I will close this issue. - \n\n[Edit codemeta.json](${editLink})`, - issue_number: context.payload.issue.number, - owner, - repo, - }); -} - -/** - * * Applies the codemeta template to the base template - * - * @param {object} subjects - The subjects to check for - * @param {string} baseTemplate - The base template to add to - * @param {*} db - The database - * @param {object} repository - The GitHub repository information - * @param {string} owner - The owner of the repository - * - * @returns {string} - The updated base template - */ -export async function applyCodemetaTemplate( - subjects, - baseTemplate, - db, - repository, - owner -) { - if (!subjects.codemeta && subjects.license) { - // License was found but no codemeta.json exists - const identifier = createId(); - - let badgeURL = `${CODEFAIR_DOMAIN}/add/codemeta/${identifier}`; - - const codemetaCollection = db.codeMetadata; - const existingCodemeta = await codemetaCollection.findUnique({ - repository_id: repository.id, - }); - - if (!existingCodemeta) { - // Entry does not exist in db, create a new one - const newDate = new Date(); - await codemetaCollection.create({ - created_at: newDate, - identifier, - owner, - repo: repository.name, - repository_id: repository.id, - updated_at: newDate, - }); - } else { - // Get the identifier of the existing codemeta request - await codemetaCollection.update({ - data: { updated_at: new Date() }, - where: { repository_id: repository.id }, - }); - badgeURL = `${CODEFAIR_DOMAIN}/add/codemeta/${existingCodemeta.identifier}`; - } - - const codemetaBadge = `[![Citation](https://img.shields.io/badge/Add_Codemeta-dc2626.svg)](${badgeURL})`; - baseTemplate += `\n\n## codemeta.json\n\nA codemeta.json file was not found in the repository. To make your software reusable a codemeta.json is expected at the root level of your repository.\n\n${codemetaBadge}`; - } else if (subjects.codemeta && subjects.license) { - // License was found and codemetata.json also exists - // Then add codemeta section mentioning it will be checked after license is added - - if (!existingLicense) { - // Entry does not exist in db, create a new one - const newDate = new Date(); - await licenseCollection.create({ - data: { - created_at: newDate, - identifier, - owner, - repo: repository.name, - repository_id: repository.id, - updated_at: newDate, - }, - }); - } else { - // Get the identifier of the existing license request - // Update the database - await licenseCollection.update({ - data: { updated_at: new Date() }, - where: { repository_id: repository.id }, - }); - badgeURL = `${CODEFAIR_DOMAIN}/add/license/${existingLicense.identifier}`; - } - const codemetaBadge = `[![Citation](https://img.shields.io/badge/Edit_Codemeta-dc2626.svg)](${badgeURL})`; - baseTemplate += `\n\n## codemeta.json\n\nA codemeta.json file found in the repository.\n\n${codemetaBadge}`; - } else { - // codemeta and license does not exist - const codemetaBadge = `![CodeMeta](https://img.shields.io/badge/Codemeta_Not_Checked-fbbf24)`; - baseTemplate += `\n\n## codemeta.json\n\nA codemeta.json file will be checked after a license file is added. To make your software reusable a codemeta.json is expected at the root level of your repository.\n\n${codemetaBadge}`; - } - - return baseTemplate; -} diff --git a/bot/compliance-checks/contributing/index.js b/bot/compliance-checks/contributing/index.js deleted file mode 100644 index 4373266b..00000000 --- a/bot/compliance-checks/contributing/index.js +++ /dev/null @@ -1,133 +0,0 @@ -import { checkForFile } from "../../utils/tools/index.js"; -import dbInstance from "../../db.js"; -import { createId } from "../../utils/tools/index.js"; -import logwatch from "../../utils/logwatch.js"; -const db = dbInstance; - -const CODEFAIR_DOMAIN = process.env.CODEFAIR_APP_DOMAIN; - -/** - * * Check if a CONTRIBUTING file exists (CONTRIBUTING.md) - * @param {Object} context - The context of the GitHub Event - * @param {String} owner - The owner of the repository - * @param {String} repoName - The name of the repository - * @returns {Boolean} - True if a CONTRIBUTING file exists, false otherwise - */ -export async function checkForContributingFile(context, owner, repoName) { - const contributingFilesTypes = [ - "CONTRIBUTING.md", - "CONTRIBUTING.txt", - "CONTRIBUTING", - "docs/CONTRIBUTING.md", - "docs/CONTRIBUTING.txt", - "docs/CONTRIBUTING", - ".github/CONTRIBUTING.md", - ".github/CONTRIBUTING.txt", - ".github/CONTRIBUTING", - ]; - - for (const filePath of contributingFilesTypes) { - const contrib = await checkForFile(context, owner, repoName, filePath); - if (contrib) { - const content = await context.octokit.repos.getContent({ - owner, - repo: repoName, - path: filePath, - }); - const contentData = Buffer.from(content.data.content, "base64").toString( - "utf-8" - ); - - return { - status: true, - path: filePath, - content: contentData, - }; - } - } - return { - path: "No Contributing file found", - status: false, - content: "", - }; -} - -export async function applyContributingTemplate( - owner, - repository, - subjects, - baseTemplate -) { - // 1) Prepare identifier and badge URL - let identifier, badgeURL; - try { - identifier = createId(); - badgeURL = `${CODEFAIR_DOMAIN}/dashboard/${owner}/${repository.name}/edit/contributing`; - } catch (error) { - throw new Error( - `Failed to initialize CONTRIBUTING template parameters: ${error.message}`, - { cause: error } - ); - } - - // 2) Upsert the contributingValidation record - try { - const existing = await db.contributingValidation.findUnique({ - where: { repository_id: repository.id }, - }); - - const upsertData = { - contains_contributing: subjects.contributing.status, - contributing_content: subjects.contributing.content, - contributing_path: subjects.contributing.path, - contrib_template_title: subjects.contributing.status ? "Custom" : "", - }; - - if (existing) { - await db.contributingValidation.update({ - where: { id: existing.id }, - data: upsertData, - }); - } else { - await db.contributingValidation.create({ - data: { - identifier, - ...upsertData, - repository: { connect: { id: repository.id } }, - }, - }); - } - } catch (error) { - throw new Error( - `Database error in applyContributingTemplate: ${error.message}`, - { cause: error } - ); - } - - // 3) Prepare the template data - try { - const { status } = subjects.contributing; - const verb = status ? "Edit" : "Create"; - const colorCode = status ? "0ea5e9" : "dc2626"; - const badgeLabel = `${verb}_CONTRIBUTING-${colorCode}`; - const contributingBadge = `[![${verb} CONTRIBUTING](https://img.shields.io/badge/${badgeLabel}.svg)](${badgeURL})`; - - const header = status ? "## CONTRIBUTING ✔️" : "## CONTRIBUTING ❌"; - const desc = status - ? `A CONTRIBUTING file was found at the root level of the repository.` - : `A CONTRIBUTING file was not found at the root of your repository. We recommend creating one to help contributors understand how to contribute to your project. We have create a template for you to edit and add to your repository. Click on the badge below to create a file with Codefair's editor.`; - - return ( - baseTemplate + - `\n\n` + - `${header}\n\n` + - `${desc}\n\n` + - `${contributingBadge}\n\n` - ); - } catch (error) { - throw new Error( - `Failed to prepare template data in applyContributingTemplate: ${error.message}`, - { cause: error } - ); - } -} diff --git a/bot/compliance-checks/index.js b/bot/compliance-checks/index.js index 6fac4548..3177c440 100644 --- a/bot/compliance-checks/index.js +++ b/bot/compliance-checks/index.js @@ -1,10 +1,11 @@ import { checkForLicense } from "./license/index.js"; -import { checkForCitation } from "./citation/index.js"; -import { checkForCodeMeta } from "./codemeta/index.js"; import { getCWLFiles } from "./cwl/index.js"; import { checkForReadme } from "./readme/index.js"; -import { checkForContributingFile } from "./contributing/index.js"; -import { checkForCodeofConduct } from "./code-of-conduct/index.js"; +import { checkMetadataFilesExists } from "./metadata/index.js"; +import { + checkForCodeofConduct, + checkForContributingFile, +} from "./additional-checks/index.js"; import logwatch from "../utils/logwatch.js"; /** @@ -36,8 +37,12 @@ export async function runComplianceChecks( }; const readme = await checkForReadme(context, owner, repository.name); const license = await checkForLicense(context, owner, repository.name); - const citation = await checkForCitation(context, owner, repository.name); - const codemeta = await checkForCodeMeta(context, owner, repository.name); + const { citation, codemeta } = await checkMetadataFilesExists( + context, + owner, + repository + ); + const contributing = await checkForContributingFile( context, owner, diff --git a/bot/compliance-checks/license/index.js b/bot/compliance-checks/license/index.js index 11b85f45..7f4c8547 100644 --- a/bot/compliance-checks/license/index.js +++ b/bot/compliance-checks/license/index.js @@ -4,6 +4,7 @@ import { logwatch } from "../../utils/logwatch.js"; import dbInstance from "../../db.js"; import { createId } from "../../utils/tools/index.js"; +import { checkForFile } from "../../utils/tools/index.js"; const CODEFAIR_DOMAIN = process.env.CODEFAIR_APP_DOMAIN; @@ -12,270 +13,143 @@ const CODEFAIR_DOMAIN = process.env.CODEFAIR_APP_DOMAIN; * * @param {object} context - The GitHub context object * @param {string} owner - The owner of the repository - * @param {string} repo - The name of the repository + * @param {string} repoName - The name of the repository * @returns {boolean} - Returns true if a license is found in the repository, false otherwise */ -export async function checkForLicense(context, owner, repo) { - try { - await context.octokit.rest.licenses.getForRepo({ - owner, - repo, - }); - - logwatch.success("License found in the repository!"); - return true; - } catch (error) { - logwatch.warn("No license found in the repository"); - // Errors when no License is found in the repo - return false; - } -} - -/** - * * Create a new license file in the repository - * - * @param {object1} context - The GitHub context object - * @param {string} owner - The owner of the repository - * @param {string} repo - The name of the repository - * @param {string} license - The license identifier - */ -export async function createLicense(context, owner, repo, license) { - // Verify there is no PR open already for the LICENSE file - const openPR = await context.octokit.pulls.list({ - owner, - repo, - state: "open", - }); - - let prExists = false; - - for (const pr of openPR.data) { - if (pr.title === "feat: ✨ LICENSE file added") { - prExists = true; - } - } - - if (prExists) { - await context.octokit.issues.createComment({ - body: `A pull request for the LICENSE file already exists here: ${openPR.data[0].html_url}`, - issue_number: context.payload.issue.number, - owner, - repo, - }); - - // // comment on pull request to resolve issue - // await context.octokit.issues.createComment({ - // repo, - // owner, - // issue_number: openPR.data[0].number, - // body: `Resolves #${context.payload.issue.number}`, - // }); - return; - } - - // Create a new file with the license parameter (use axios to get the license from the licenses.json file) - // Create a new branch with the license file and open a PR - const licenseRequest = licensesAvail.find( - (item) => item.licenseId === license - ); - if (licenseRequest) { - try { - const response = await axios.get(licenseRequest.detailsUrl); - const responseData = response.data; - - // Create a new file - const branch = `license-${Math.floor(Math.random() * 9999)}`; +export async function checkForLicense(context, owner, repoName) { + const licenseFilesTypes = ["LICENSE", "LICENSE.md", "LICENSE.txt"]; + + for (const filePath of licenseFilesTypes) { + const file = await checkForFile(context, owner, repoName, filePath); + if (file) { + // Get the actual file content from the repository + const fileContent = await context.octokit.rest.repos.getContent({ + owner, + repo: repoName, + path: filePath, + }); - let defaultBranch; - let defaultBranchName; + const contentData = Buffer.from( + fileContent.data.content, + "base64" + ).toString("utf-8"); + // Try to get the detected license information for the repository + let spdxId = null; try { - defaultBranch = await context.octokit.repos.getBranch({ - branch: context.payload.repository.default_branch, + const repoLicense = await context.octokit.rest.licenses.getForRepo({ owner, - repo, + repo: repoName, }); - defaultBranchName = defaultBranch.data.name; + spdxId = repoLicense.data?.license?.spdx_id || null; } catch (error) { - logwatch.error( - { message: "Error getting default branch:", error }, - true - ); - return; + logwatch.warn(`Could not detect license SPDX ID: ${error.message}`); } - // Create a new branch base off the default branch - logwatch.info("Creating branch..."); - await context.octokit.git.createRef({ - owner, - ref: `refs/heads/${branch}`, - repo, - sha: defaultBranch.data.commit.sha, - }); - - // Create a new file - logwatch.info("Creating file..."); - await context.octokit.repos.createOrUpdateFileContents({ - branch, - content: Buffer.from(responseData.licenseText).toString("base64"), - message: `feat: ✨ add LICENSE file with ${license} license terms`, - owner, - path: "LICENSE", - repo, - }); - - // Create a PR from that branch with the commit of our added file - logwatch.info("Creating PR..."); - await context.octokit.pulls.create({ - title: "feat: ✨ LICENSE file added", - base: defaultBranchName, - body: `Resolves #${context.payload.issue.number}`, - head: branch, - maintainer_can_modify: true, // Allows maintainers to edit your app's PR - owner, - repo, - }); - - // Comment on issue to notify user that license has been added - logwatch.info("Commenting on issue..."); - await context.octokit.issues.createComment({ - body: `A LICENSE file with ${license} license terms has been added to a new branch and a pull request is awaiting approval. I will close this issue automatically once the pull request is approved.`, - issue_number: context.payload.issue.number, - owner, - repo, - }); - } catch (error) { - logwatch.error({ message: "Error fetching license file:", error }, true); + return { + status: true, + path: filePath, + content: contentData, + spdx_id: spdxId, + }; } - } else { - // License not found, comment on issue to notify user - await context.octokit.issues.createComment({ - body: `The license identifier “${license}” was not found in the SPDX License List. Please reply with a valid license identifier.`, - issue_number: context.payload.issue.number, - owner, - repo, - }); } + return { + path: "No LICENSE file found", + status: false, + content: "", + spdx_id: null, + }; } -export function validateLicense(licenseRequest, existingLicense) { - let licenseId = licenseRequest.data?.license?.spdx_id || null; - let licenseContent = ""; - let licenseContentEmpty = true; - - if (licenseRequest.data?.content) { - try { - licenseContent = Buffer.from(licenseRequest.data.content, "base64") - .toString("utf-8") - .trim(); - } catch (error) { - logwatch.error( - { message: "Error decoding license content:", error }, - true - ); - licenseContent = ""; - } - } +export function validateLicense(license, existingLicense) { + let licenseId = license.spdx_id; + let licenseContent = license.content; + let licenseContentEmpty = license.content === "" ? true : false; // Check for specific license conditions if (licenseId === "no-license" || !licenseId) { + logwatch.info(`No license or 'no-license' found`); licenseId = null; licenseContent = ""; + licenseContentEmpty = true; } - // console.log("Existing License:", existingLicense?.license_id); - // consola.warn(existingLicense?.license_content.trim()); - // consola.warn(licenseContent.trim()); - if (licenseId === "NOASSERTION") { - if (licenseContent === "") { + if (licenseContentEmpty) { // No assertion and no content indicates no valid license - logwatch.info("No assertion and no content indicates no valid license"); + logwatch.info(`No assertion ID with no content provided`); licenseId = null; } else { // Custom license with content provided licenseContentEmpty = false; if (existingLicense?.license_content.trim() !== licenseContent.trim()) { - logwatch.info( - "No assertion ID with different content from db provided" - ); licenseId = "Custom"; // New custom license + logwatch.info(`Custom license with new content provided`); } else if (existingLicense?.license_id) { + licenseId = existingLicense.license_id; // Use existing custom license ID logwatch.info("Custom license with existing content provided"); - licenseId = existingLicense.license_id; // Use existing custom license ID if it matches } } } - if (licenseContent !== "") { - licenseContentEmpty = false; - } - return { licenseId, licenseContent, licenseContentEmpty }; } -/** - * * Applies the license template to the base template - * - * @param {object} subjects - The subjects to check for - * @param {string} baseTemplate - The base template to add to - * @param {object} repository - The GitHub repository information - * @param {string} owner - The owner of the repository - * - * @returns {string} - The updated base template - */ -export async function applyLicenseTemplate( - subjects, - baseTemplate, - repository, - owner, - context -) { - const identifier = createId(); - let badgeURL = `${CODEFAIR_DOMAIN}/dashboard/${owner}/${repository.name}/edit/license`; - const existingLicense = await dbInstance.licenseRequest.findUnique({ +export async function updateLicenseDatabase(repository, license) { + let licenseContentEmpty = license.content === "" ? true : false; + let licenseId = license.spdx_id; + let licenseContent = license.content; + + // Update or create the license entry in the database + let existingLicense = await dbInstance.licenseRequest.findUnique({ where: { repository_id: repository.id }, }); - let licenseId = null; - let licenseContent = ""; - let licenseContentEmpty = true; - - if (subjects.license) { - // Get the license identifier - const licenseRequest = await context.octokit.rest.licenses.getForRepo({ - owner, - repo: repository.name, - }); - - ({ licenseId, licenseContent, licenseContentEmpty } = validateLicense( - licenseRequest, - existingLicense - )); - - // logwatch.info("License ID:", licenseId); - // logwatch.info("License Content Empty:", licenseContentEmpty); - } - if (existingLicense) { - logwatch.info("Updating existing license request..."); + logwatch.info( + `Updating existing license entry for repo: ${repository.name} (ID: ${repository.id})` + ); + + // If license exists, validate it + if (license.status) { + ({ licenseId, licenseContent, licenseContentEmpty } = validateLicense( + license, + existingLicense + )); + + logwatch.success({ + message: `License validation complete`, + licenseId, + licenseContent, + licenseContentEmpty, + repo: `${repository.name} (ID: ${repository.id})`, + }); + } await dbInstance.licenseRequest.update({ data: { - contains_license: subjects.license, + contains_license: license.status, license_status: licenseContentEmpty ? "invalid" : "valid", license_id: licenseId, license_content: licenseContent, + custom_license_title: + licenseId === "Custom" + ? existingLicense.custom_license_title + : "", }, where: { repository_id: repository.id }, }); } else { - logwatch.info("Creating new license request..."); - await dbInstance.licenseRequest.create({ + logwatch.info( + `Creating new license entry for repo: ${repository.name} (ID: ${repository.id})` + ); + existingLicense = await dbInstance.licenseRequest.create({ data: { - contains_license: subjects.license, + identifier: createId(), + contains_license: license.status, license_status: licenseContentEmpty ? "invalid" : "valid", license_id: licenseId, license_content: licenseContent, - identifier, + custom_license_title: "", repository: { connect: { id: repository.id, @@ -284,23 +158,42 @@ export async function applyLicenseTemplate( }, }); } + return existingLicense; +} + +/** + * * Applies the license template to the base template + * + * @param {object} subjects - The subjects to check for + * @param {string} baseTemplate - The base template to add to + * @param {object} repository - The GitHub repository information + * @param {string} owner - The owner of the repository + * + * @returns {string} - The updated base template + */ +export async function applyLicenseTemplate( + subjects, + baseTemplate, + repository, + owner, + context +) { + const badgeURL = `${CODEFAIR_DOMAIN}/dashboard/${owner}/${repository.name}/edit/license`; + const licenseBadge = `[![License](https://img.shields.io/badge/${subjects.license.status ? "Edit_License-0ea5e9" : "Add_License-dc2626"}.svg)](${badgeURL})`; - const licenseBadge = `[![License](https://img.shields.io/badge/${subjects.license ? "Edit_License-0ea5e9" : "Add_License-dc2626"}.svg)](${badgeURL})`; + const existingLicense = await updateLicenseDatabase( + repository, + subjects.license + ); + let licenseId = existingLicense.license_id; + const customTitle = existingLicense.custom_license_title || ""; - if (subjects.license && licenseId && licenseId !== "Custom") { + if (subjects.license.status && licenseId && licenseId !== "Custom") { baseTemplate += `## LICENSE ✔️\n\nA \`LICENSE\` file is found at the root level of the repository.\n\n${licenseBadge}\n\n`; - } else if ( - subjects.license && - licenseId === "Custom" && - !existingLicense?.custom_license_title - ) { - baseTemplate += `## LICENSE ❗\n\nA custom \`LICENSE\` file has been found at the root level of this repository. While using a custom license is normally acceptable for Zenodo, please note that Zenodo's API currently cannot handle custom licenses. If you plan to make a FAIR release, you will be required to select a license from the SPDX license list to ensure proper archival and compliance.\n\nClick the "Edit license" button below to provide a license title or to select a new license.\n\n${licenseBadge}\n\n`; - } else if ( - subjects.license && - licenseId === "Custom" && - existingLicense?.custom_license_title - ) { - baseTemplate += `## LICENSE ✔️\n\nA custom \`LICENSE\` file titled as **${existingLicense?.custom_license_title}**, has been found at the root level of this repository. If you would like to update the title or change license, click the "Edit license" button below.\n\n${licenseBadge}\n\n`; + } else if (subjects.license.status && licenseId === "Custom" && !customTitle) { + baseTemplate += `## LICENSE ❗\n\nA custom \`LICENSE\` file has been found at the root level of this repository.\n > [!NOTE]\n> While using a custom license is normally acceptable for Zenodo, please note that Zenodo's API currently cannot handle custom licenses. If you plan to make a FAIR release, you will be required to select a license from the SPDX license list to ensure proper archival and compliance.\n\nClick the "Edit license" button below to provide a license title or to select a new license.\n\n${licenseBadge}\n\n`; + } else if (subjects.license.status && licenseId === "Custom" && customTitle) { + baseTemplate += `## LICENSE ✔️\n\nA custom \`LICENSE\` file titled as **${customTitle}**, has been found at the root level of this repository. If you would like to update the title or change license, click the "Edit license" button below.\n\n${licenseBadge}\n\n`; } else { baseTemplate += `## LICENSE ❌\n\nTo make your software reusable, a \`LICENSE\` file is expected at the root level of your repository.\nIf you would like Codefair to add a license file, click the "Add license" button below to go to our interface for selecting and adding a license. You can also add a license file yourself, and Codefair will update the dashboard when it detects it on the main branch.\n\n${licenseBadge}\n\n`; } diff --git a/bot/compliance-checks/metadata/index.js b/bot/compliance-checks/metadata/index.js index 8e8dc29d..de86ca3b 100644 --- a/bot/compliance-checks/metadata/index.js +++ b/bot/compliance-checks/metadata/index.js @@ -9,7 +9,752 @@ import dbInstance from "../../db.js"; import { logwatch } from "../../utils/logwatch.js"; const CODEFAIR_DOMAIN = process.env.CODEFAIR_APP_DOMAIN; -const { GH_APP_NAME, VALIDATOR_URL } = process.env; +const { VALIDATOR_URL, GH_APP_NAME } = process.env; + +export async function checkMetadataFilesExists(context, owner, repository) { + // Promise.all to fetch both files in parallel + const [codemetaInfo, citationInfo] = await Promise.all([ + await context.octokit.repos + .getContent({ + owner, + repo: repository.name, + path: "codemeta.json", + }) + .catch((error) => { + if (error.status === 404) { + return null; + } + throw new Error("Error getting codemeta.json file", error); + }), + await context.octokit.repos + .getContent({ + owner, + repo: repository.name, + path: "CITATION.cff", + }) + .catch((error) => { + if (error.status === 404) { + return null; + } + throw new Error("Error getting CITATION.cff file", error); + }), + ]); + const citationExists = citationInfo !== null; + const codemetaExists = codemetaInfo !== null; + logwatch.info( + `Metadata files check for ${owner}/${repository.name}: codemeta.json exists: ${codemetaExists}, CITATION.cff exists: ${citationExists}` + ); + return { + citation: citationExists, + codemeta: codemetaExists, + }; +} + +/** + * Helper object to create structured validation results + * Makes error handling cleaner and preserves context + */ +const ValidationResult = { + valid: (message, details = null) => ({ + isValid: true, + status: "valid", + message, + details, + }), + invalid: (message, details = null) => ({ + isValid: false, + status: "invalid", + message, + details, + }), + unknown: (message, details = null) => ({ + isValid: false, + status: "unknown", + message, + details, + }), + error: (error) => ({ + isValid: false, + status: "unknown", + message: `Validation error: ${error.message}`, + details: { + error: error.message, + stack: error.stack, + }, + }), +}; + +/** + * * Gets the metadata for the repository and returns it as a string + * @param {Object} context - The GitHub context object + * @param {String} owner - The owner of the repository + * @param {Object} repository - Object containing the repository information + * @returns - The content of the codemeta.json file as a string object + */ +export async function getCodemetaContent(context, owner, repository) { + try { + const codemetaFile = await context.octokit.repos.getContent({ + owner, + path: "codemeta.json", + repo: repository.name, + }); + + const raw = Buffer.from(codemetaFile.data.content, "base64").toString(); + + return { + content: raw, + sha: codemetaFile.data.sha, + file_path: codemetaFile.data.download_url, + }; + } catch (error) { + if (error.status === 404) { + return null; + } + throw new Error("Error getting codemeta.json file", error); + } +} + +/** + * * Get the content of the CITATION.cff file + * @param {Object} context - The GitHub context object + * @param {String} owner - The owner of the repository + * @param {Object} repository - The repository information + * @returns {String} - The content of the CITATION.cff file as a string object (need to yaml load it) + */ +export async function getCitationContent(context, owner, repository) { + try { + const citationFile = await context.octokit.repos.getContent({ + owner, + path: "CITATION.cff", + repo: repository.name, + }); + + const raw = Buffer.from(citationFile.data.content, "base64").toString(); + + return { + content: raw, + sha: citationFile.data.sha, + file_path: citationFile.data.download_url, + }; + } catch (error) { + if (error.status === 404) { + return null; + } + throw new Error("Error getting CITATION.cff file", error); + } +} + +/** + * Validate codemeta.json + * Returns ValidationResult instead of throwing or updating DB directly + */ +async function validateCodemeta(metadataInfo) { + if (!metadataInfo || metadataInfo.content == null) { + return ValidationResult.invalid( + "codemeta.json content is null or undefined" + ); + } + + let { content } = metadataInfo; + + let obj; + try { + if (typeof content === "string") { + const text = normalizeText(content); + obj = JSON.parse(text); // ← Parse the string to object and assign to obj + } else if (typeof content === "object" && !Array.isArray(content)) { + obj = content; // Already parsed + } else { + return ValidationResult.invalid( + `Unexpected content type: ${typeof content}${Array.isArray(content) ? " (array)" : ""}` + ); + } + } catch (err) { + return ValidationResult.invalid( + `Invalid JSON in codemeta.json: ${err.message}` + ); + } + + const missing = ["name", "author", "description"].filter((f) => !obj[f]); + if (missing.length) { + return ValidationResult.invalid( + `Required fields missing: ${missing.join(", ")}` + ); + } + + try { + const resp = await fetch(`${VALIDATOR_URL}/validate-codemeta`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ file_content: obj }), + }); + const result = await resp.json(); + + if (!resp.ok) { + return ValidationResult.unknown( + `Validator service returned error (${resp.status})`, + { statusCode: resp.status, response: result } + ); + } + + if (result.message === "valid") { + return ValidationResult.valid( + `Codemeta is valid according to schema v${result.version}`, + { version: result.version } + ); + } else { + return ValidationResult.invalid(result.error || "Validation failed", { + version: result.version, + }); + } + } catch (error) { + return ValidationResult.error(error); + } +} + +/** + * Validate CITATION.cff + */ +async function validateCitation(metadataInfo) { + if (!metadataInfo || metadataInfo.content == null) { + return ValidationResult.invalid( + "CITATION.cff content is null or undefined" + ); + } + + let { content, file_path } = metadataInfo; + + let doc; + try { + if (typeof content !== "string") { + content = JSON.stringify(content); + } + const text = normalizeText(content); + doc = yaml.load(text); + } catch (err) { + return ValidationResult.invalid( + `Invalid YAML in CITATION.cff: ${err.message}` + ); + } + + if (!doc.title || !Array.isArray(doc.authors) || doc.authors.length === 0) { + return ValidationResult.invalid( + "Required fields (title, authors) missing or empty" + ); + } + + try { + const resp = await fetch(`${VALIDATOR_URL}/validate-citation`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ file_path }), + }); + + const result = await resp.json(); + + if (!resp.ok) { + return ValidationResult.unknown( + `Validator service returned error (${resp.status})`, + { statusCode: resp.status, response: result } + ); + } + + if (result.message === "valid") { + return ValidationResult.valid(result.output || "Valid CITATION.cff"); + } else { + return ValidationResult.invalid(result.error || "Validation failed"); + } + } catch (err) { + return ValidationResult.error(err); + } +} + +/** + * * Ensures the metadata is valid based on certain fields + * @param {String} content - The content of the metadata file + * @param {String} fileType - The type of metadata file (codemeta or citation) + * @param {String} file_path - Raw GitHub file path + * @returns {Boolean} - True if the metadata is valid, false otherwise + */ +export async function validateMetadata(metadataInfo, fileType) { + switch (fileType) { + case "codemeta": + return await validateCodemeta(metadataInfo); + case "citation": + return await validateCitation(metadataInfo); + default: + return ValidationResult.invalid(`Unsupported file type: ${fileType}`); + } +} + +function determineRevalidationNeeds(context, subjects) { + const actor = context.payload?.pusher?.name; + + // If no actor or it's our bot, revalidate everything that exists + logwatch.info(`Push actor: ${actor}`); + if (!actor || actor === `${GH_APP_NAME}[bot]`) { + logwatch.info( + `Revalidating all metadata files due to bot push or no actor` + ); + return { + revalidate: true, + codemeta: subjects.codemeta, + citation: subjects.citation, + }; + } + + // Check ALL commits in the push for changes + const commits = context.payload.commits || []; + + let licenseChanged = false; + let codemetaChanged = false; + let citationChanged = false; + + commits.forEach((commit) => { + const allFiles = [ + ...(commit.added || []), + ...(commit.modified || []), + ...(commit.removed || []), + ]; + + if (allFiles.includes("LICENSE")) { + licenseChanged = true; + } + if (allFiles.includes("codemeta.json")) { + codemetaChanged = true; + } + if (allFiles.includes("CITATION.cff")) { + citationChanged = true; + } + }); + + if (licenseChanged) { + return { + revalidate: true, + codemeta: subjects.codemeta, + citation: subjects.citation, + }; + } + + return { + revalidate: codemetaChanged || citationChanged, + codemeta: codemetaChanged && subjects.codemeta, + citation: citationChanged && subjects.citation, + }; +} + +/** + * Ensure a metadata record exists in the database + * Creates skeleton record if it doesn't exist + * Returns the existing or newly created record + */ +async function ensureMetadataRecord(repoId, subjects) { + try { + let existing = await dbInstance.codeMetadata.findUnique({ + where: { repository_id: repoId }, + }); + + if (!existing) { + //Create skeleton record + existing = await dbInstance.codeMetadata.create({ + data: { + identifier: createId(), + repository: { connect: { id: repoId } }, + contains_citation: subjects.citation || false, + contains_codemeta: subjects.codemeta || false, + contains_metadata: !!(subjects.citation & subjects.codemeta), + codemeta_status: "", + citation_status: "", + codemeta_validation_message: "", + citation_validation_message: "", + metadata: {}, + }, + }); + + logwatch.info( + { + message: `Created new metadata record for repository ID ${repoId}`, + identifier: existing.identifier, + }, + true + ); + } + + return existing; + } catch (error) { + logwatch.error( + { + message: "Database error in ensureMetadataRecord", + repoId, + error: error.message, + stack: error.stack, + }, + true + ); + throw new Error("Failed to ensure metadata record exists", { + cause: error, + }); + } +} + +/** + * Update metadata record in database with validation results + */ +async function updateMetadataRecord( + repoId, + metadata, + codemetaValidation, + citationValidation, + subjects +) { + try { + const dataObject = { + contains_citation: subjects.citation, + contains_codemeta: subjects.codemeta, + contains_metadata: !!(subjects.citation & subjects.codemeta), + metadata, + codemeta_status: codemetaValidation.status, + codemeta_validation_message: codemetaValidation.message, + citation_status: citationValidation.status, + citation_validation_message: citationValidation.message, + }; + + await dbInstance.codeMetadata.update({ + where: { repository_id: repoId }, + data: dataObject, + }); + + logwatch.info( + { + message: `Metadata record updated for repository ID ${repoId}`, + repoId, + codemetaStatus: codemetaValidation.status, + citationStatus: citationValidation.status, + }, + true + ); + } catch (error) { + logwatch.error( + { + message: "database error in updateMetadataRecord", + repoId, + error: error.message, + stack: error.stack, + }, + true + ); + throw new Error("Failed to update metadata record", { cause: error }); + } +} + +/** + * Main function to update metadata in database + * Handles validation, merging, and database operations + * @param {number} repoId - The repository ID in the database + * @param {Object} subjects - The subjects object indicating which metadata files exist + * @param {Object} repository - The GitHub repository information + * @param {string} owner - The owner of the repository + * @param {Object} context - The GitHub context object + * @returns {Object} - The updated metadata record + */ +export async function updateMetadataDatabase( + repoId, + subjects, + repository, + owner, + context +) { + const repoInfo = `${owner}/${repository.name}`; + + // 1. Ensure database record exists (creates sekeleton if not) + let existing; + try { + existing = await ensureMetadataRecord(repository.id, subjects); + } catch (err) { + logwatch.error( + { + message: "Failed to ensure metadata record exisxts", + repo: repoInfo, + repoId: repository.id, + error: err.message, + stack: err.stack, + }, + true + ); + throw err; + } + + // 2) Determine which files to revalidate + const revalidationNeeds = determineRevalidationNeeds(context, subjects); + + logwatch.info( + { + message: "Determined revalidation needs", + repo: repoInfo, + revalidate: revalidationNeeds.revalidate, + codemeta: revalidationNeeds.codemeta, + citation: revalidationNeeds.citation, + }, + true + ); + + // 3) Initialize metadata and validations results from existing record + let metadata = existing?.metadata || {}; + let codemetaValidation = { + isValid: existing.codemeta_status === "valid", + status: existing.codemeta_status || "", + message: existing.codemeta_validation_message || "Not yet validated", + }; + let citationValidation = { + isValid: existing.citation_status === "valid", + status: existing.citation_status || "", + message: existing.citation_validation_message || "Not yet validated", + }; + + // 4) Gather and validate metadata if needed + if (revalidationNeeds.revalidate) { + try { + // Gather base metadata from GitHub API + metadata = await gatherMetadata(context, owner, repository); + logwatch.info( + { + message: `Gathered metadata from GitHub API for ${repoInfo}`, + metadata, + }, + true + ); + + // Merge with existing DB metadata (preserving user edits) + if (existing?.metadata && Object.keys(existing.metadata).length > 0) { + metadata = applyDbMetadata(existing, metadata); + logwatch.info( + `Merged existing DB metadata for ${owner}/${repository.name}` + ); + } + } catch (err) { + logwatch.warn( + { + message: "Failed to gather base emtadata, using existing", + repo: repoInfo, + error: err.message, + }, + true + ); + // Continue with existing metadata + } + + // Process codemeta.json if it exists and needs revalidation + if (subjects.codemeta && revalidationNeeds.codemeta) { + try { + const codemetaContent = await getCodemetaContent( + context, + owner, + repository + ); + + if (codemetaContent) { + logwatch.info( + `Fetched codemeta.json content for repo: ${repository.name} (ID: ${repoId})` + ); + + // Validate codemeta.json + codemetaValidation = await validateMetadata( + codemetaContent, + "codemeta" + ); + + // Log based on validation status + if (codemetaValidation.isValid) { + logwatch.success( + `Codemeta.json is valid for repo: ${repository.name} (ID: ${repoId})` + ); + } else if (codemetaValidation.status === "unknown") { + logwatch.warn( + { + message: `Codemeta.json validation service error`, + repo: `${repository.name} (ID: ${repoId})`, + validationMessage: codemetaValidation.message, + details: codemetaValidation.details, + }, + true + ); + } else { + logwatch.info( + { + message: "Codemeta.json validation failed (user issue)", + repo: `${repository.name} (ID: ${repoId})`, + validationMessage: codemetaValidation.message, + details: codemetaValidation.details, + }, + true + ); + } + + // Apply metadata regardless of validation status + try { + metadata = await applyCodemetaMetadata( + codemetaContent, + metadata, + repository + ); + logwatch.info( + `Applied codemeta.json metadata for repo: ${repository.name} (ID: ${repoId})` + ); + } catch (error) { + logwatch.warn( + { + message: + "Failed to apply codemeta.json metadata, using existing", + repo: `${repository.name} (ID: ${repoId})`, + error: error.message, + }, + true + ); + } + } else { + codemetaValidation = ValidationResult.invalid("File not found"); + logwatch.info( + `No codemeta.json found for repo: ${repository.name} (ID: ${repoId})` + ); + } + } catch (error) { + // Fetch or other error + logwatch.warn( + { + message: "Error fetching codemeta.json", + repo: `${repository.name} (ID: ${repoId})`, + error: error.message, + stack: error.stack, + }, + true + ); + codemetaValidation = ValidationResult.error(error); + } + } + + logwatch.info(`subjects.citation: ${subjects.citation}`); + logwatch.info(`revalidationNeeds.citation: ${revalidationNeeds.citation}`); + // Process CITATION.cff if it exists and needs revalidation + if (subjects.citation && revalidationNeeds.citation) { + try { + const citationContent = await getCitationContent( + context, + owner, + repository + ); + + if (citationContent) { + logwatch.info( + `Fetched CITATION.cff content for repo: ${repository.name} (ID: ${repoId})` + ); + + // Validate + citationValidation = await validateMetadata( + citationContent, + "citation" + ); + + // Log based on validation status + if (citationValidation.isValid) { + logwatch.success( + `CITATION.cff is valid for repo: ${repository.name} (ID: ${repoId})` + ); + } else if (citationValidation.status === "unknown") { + logwatch.warn( + { + message: `CITATION.cff validation service error`, + repo: `${repository.name} (ID: ${repoId})`, + validationMessage: citationValidation.message, + details: citationValidation.details, + }, + true + ); + } else { + logwatch.info( + { + message: "CITATION.cff validation failed (user issue)", + repo: `${repository.name} (ID: ${repoId})`, + validationMessage: citationValidation.message, + details: citationValidation.details, + }, + true + ); + } + + try { + metadata = await applyCitationMetadata( + citationContent, + metadata, + repository + ); + logwatch.info( + `Applied CITATION.cff metadata for repo: ${repository.name} (ID: ${repoId})` + ); + } catch (error) { + logwatch.warn( + { + message: "Failed to apply CITATION.cff metadata, skipping", + repo: `${repository.name} (ID: ${repoId})`, + error: error.message, + }, + true + ); + } + } else { + citationValidation = ValidationResult.invalid("File not found"); + logwatch.info( + `No CITATION.cff found for repo: ${repository.name} (ID: ${repoId})` + ); + } + } catch (error) { + // Fetch or other error + logwatch.warn( + { + message: "Error fetching CITATION.cff", + repo: `${repository.name} (ID: ${repoId})`, + error: error.message, + stack: error.stack, + }, + true + ); + citationValidation = ValidationResult.error(error); + } + } + + // 5) Update db + try { + await updateMetadataRecord( + repoId, + metadata, + codemetaValidation, + citationValidation, + subjects + ); + } catch (error) { + logwatch.error( + { + message: "Failed to update metadata record in database", + repo: repoInfo, + repoId, + error: error.message, + stack: error.stack, + }, + true + ); + throw error; + } + } + + // 6) return results + return { + metadata, + validCodemeta: codemetaValidation.isValid, + validCitation: citationValidation.isValid, + codemetaValidation, + citationValidation, + existing, + }; +} /** * * Converts the date to a Unix timestamp @@ -26,6 +771,28 @@ export function convertDateToUnix(date) { return Math.floor(newDate.getTime()); } +/** + * * Get status emoji and text for validation display + * @param {*} validation + * @returns + */ +function getValidationDisplay(validation) { + if (validation.status === "valid") { + return { emoji: "✅", text: "Valid" }; + } else if (validation.status === "unknown") { + return { emoji: "⚠️", text: "Unknown (Service error, try again)" }; + } else { + return { emoji: "❌", text: "Invalid" }; + } +} + +/** + * Strip BOM, then trim + */ +function normalizeText(raw) { + return raw.replace(/^\uFEFF/, "").trim(); +} + /** * * Converts the codemeta.json file content to a metadata object for the database * @param {JSON} codemetaContent - The codemeta.json file content @@ -380,239 +1147,6 @@ export async function gatherMetadata(context, owner, repo) { return codeMeta; } -/** - * * Gets the metadata for the repository and returns it as a string - * @param {Object} context - The GitHub context object - * @param {String} owner - The owner of the repository - * @param {Object} repository - Object containing the repository information - * @returns - The content of the codemeta.json file as a string object - */ -export async function getCodemetaContent(context, owner, repository) { - try { - const codemetaFile = await context.octokit.repos.getContent({ - owner, - path: "codemeta.json", - repo: repository.name, - }); - - const raw = Buffer.from(codemetaFile.data.content, "base64").toString(); - - return { - content: raw, - sha: codemetaFile.data.sha, - file_path: codemetaFile.data.download_url, - }; - - // return JSON.parse(Buffer.from(codemetaFile.data.content, "base64").toString()); - } catch (error) { - throw new Error("Error getting codemeta.json file", error); - } -} - -/** - * * Get the content of the CITATION.cff file - * @param {Object} context - The GitHub context object - * @param {String} owner - The owner of the repository - * @param {Object} repository - The repository information - * @returns {String} - The content of the CITATION.cff file as a string object (need to yaml load it) - */ -export async function getCitationContent(context, owner, repository) { - try { - const citationFile = await context.octokit.repos.getContent({ - owner, - path: "CITATION.cff", - repo: repository.name, - }); - - const raw = Buffer.from(citationFile.data.content, "base64").toString(); - - return { - content: raw, - sha: citationFile.data.sha, - file_path: citationFile.data.download_url, - }; - } catch (error) { - throw new Error("Error getting CITATION.cff file", error); - } -} - -/** - * * Ensures the metadata is valid based on certain fields - * @param {String} content - The content of the metadata file - * @param {String} fileType - The type of metadata file (codemeta or citation) - * @param {String} file_path - Raw GitHub file path - * @returns {Boolean} - True if the metadata is valid, false otherwise - */ -export async function validateMetadata(metadataInfo, fileType, repository) { - switch (fileType) { - case "codemeta": - return validateCodemeta(metadataInfo, repository); - case "citation": - return validateCitation(metadataInfo, repository); - default: - throw new Error(`Unsupported metadata type: ${fileType}`); - } -} - -/** - * Validate codemeta.json - */ -async function validateCodemeta(metadataInfo, repository) { - const repoId = repository.id; - let { content } = metadataInfo; - - if (content == null) { - const msg = "codemeta.json content is null or undefined"; - await updateStatus(repoId, { - codemeta_status: "invalid", - codemeta_validation_message: msg, - }); - return false; - } - - if (typeof content !== "string") { - content = JSON.stringify(content); - } - - const text = normalizeText(content); - let obj; - try { - obj = JSON.parse(text); - } catch (err) { - throw new Error(`Invalid JSON in codemeta.json: ${err.message}`); - } - - const missing = ["name", "author", "description"].filter((f) => !obj[f]); - if (missing.length) { - const msg = `Missing required codemeta fields: ${missing.join(", ")}`; - await updateStatus(repoId, { - codemeta_status: "invalid", - codemeta_validation_message: msg, - }); - return false; - } - - let resp, result; - try { - resp = await fetch(`${VALIDATOR_URL}/validate-codemeta`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ file_content: obj }), - }); - result = await resp.json(); - } catch (err) { - throw new Error(`Error calling codemeta validator: ${err.message}`); - } - - if (!resp.ok) { - throw new Error( - `codemeta validator returned ${resp.status}: ${JSON.stringify(result)}` - ); - } - - if (result.message !== "valid") { - await updateStatus(repoId, { - codemeta_status: "invalid", - codemeta_validation_message: result.error, - }); - return false; - } - - // 6) Success - const success = `Valid according to schema v${result.version}`; - await updateStatus(repoId, { - codemeta_status: "valid", - codemeta_validation_message: success, - }); - return true; -} - -/** - * Validate CITATION.cff - */ -async function validateCitation(metadataInfo, repository) { - const repoId = repository.id; - let { content, file_path } = metadataInfo; - - if (content == null) { - const msg = "CITATION.cff content is null or undefined"; - await updateStatus(repoId, { - citation_status: "invalid", - citation_validation_message: msg, - }); - return false; - } - if (typeof content !== "string") { - content = JSON.stringify(content); - } - - const text = content.replace(/^\uFEFF/, "").trim(); - let doc; - try { - doc = yaml.load(text); - } catch (err) { - throw new Error(`Invalid YAML in CITATION.cff: ${err.message}`); - } - - if (!doc.title || !Array.isArray(doc.authors) || doc.authors.length === 0) { - const msg = "Required fields (title, authors) missing or empty"; - await updateStatus(repoId, { - citation_status: "invalid", - citation_validation_message: msg, - }); - return false; - } - - let resp, result; - try { - resp = await fetch(`${VALIDATOR_URL}/validate-citation`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ file_path }), - }); - result = await resp.json(); - } catch (err) { - throw new Error(`Error calling citation validator: ${err.message}`); - } - - if (!resp.ok) { - throw new Error( - `citation validator returned ${resp.status}: ${JSON.stringify(result)}` - ); - } - - if (result.message !== "valid") { - await updateStatus(repoId, { - citation_status: "invalid", - citation_validation_message: result.error, - }); - return false; - } - - await updateStatus(repoId, { - citation_status: "valid", - citation_validation_message: result.output, - }); - return true; -} - -/** - * Helper to update the DB in one place - */ -async function updateStatus(repoId, fields) { - await dbInstance.codeMetadata.update({ - where: { repository_id: repoId }, - data: fields, - }); -} - -/** - * Strip BOM, then trim - */ -function normalizeText(raw) { - return raw.replace(/^\uFEFF/, "").trim(); -} - /** * * Updates the metadata files with the Zenodo identifier * @param {Object} context - The GitHub context object @@ -1116,119 +1650,6 @@ export async function applyCitationMetadata(citation, metadata, repository) { return metadata; } -export async function updateMetadataDatabase( - repoId, - subjects, - repository, - owner, - context -) { - let existing; - try { - existing = await dbInstance.codeMetadata.findUnique({ - where: { repository_id: repoId }, - }); - } catch (err) { - logwatch.error("Failed to fetch existing metadata from DB", err); - throw new Error("DB lookup failed", { cause: err }); - } - - // 2) Determine which files to revalidate - let revalidate = true; - let revalCitation = true; - let revalCodemeta = true; - - const actor = context.payload?.pusher?.name; - if (actor && actor !== `${GH_APP_NAME}[bot]`) { - logwatch.info(`Push by ${actor}, checking changed files…`); - revalidate = revalCitation = revalCodemeta = false; - - const { added = [], modified = [] } = context.payload.head_commit || {}; - - if ([...added, ...modified].some((f) => f === "LICENSE")) { - revalidate = revalCitation = revalCodemeta = true; - } - - // codemeta.json changed? - if ([...added, ...modified].some((f) => f === "codemeta.json")) { - revalidate = revalCodemeta = true; - } - - // CITATION.cff changed? - if ([...added, ...modified].some((f) => f === "CITATION.cff")) { - revalidate = revalCitation = true; - } - - logwatch.info( - `Revalidate? ${revalidate}, Codemeta? ${revalCodemeta}, Citation? ${revalCitation}` - ); - } - - let metadata = existing?.metadata || {}; - let validCodemeta = existing?.codemeta_status === "valid"; - let validCitation = existing?.citation_status === "valid"; - - if (revalidate) { - try { - // Gather metadata from GitHub API - metadata = await gatherMetadata(context, owner, repository); - logwatch.info("gatherMetadata succeeded"); - } catch (err) { - logwatch.error("gatherMetadata failed", err); - throw new Error("gatherMetadata failed", { cause: err }); - } - - if (existing?.metadata) { - // Merge existing metadata from DB, preserving any user edits - metadata = applyDbMetadata(existing, metadata); - logwatch.info("applyDbMetadata merged existing onto gathered"); - } - - // Validate and apply codemeta.json if it exists - if (subjects.codemeta && revalCodemeta) { - const cm = await getCodemetaContent(context, owner, repository); - logwatch.info("getCodemetaContent succeeded"); - - try { - validCodemeta = await validateMetadata(cm, "codemeta", repository); - logwatch.info(`validateMetadata(codemeta) => ${validCodemeta}`); - } catch (err) { - logwatch.error( - "validateMetadata(codemeta) failed; marking invalid", - err - ); - validCodemeta = false; - } - - metadata = await applyCodemetaMetadata(cm, metadata, repository); - logwatch.info("applyCodemetaMetadata succeeded"); - } - - // Validate and apply CITATION.cff if it exists - if (subjects.citation && revalCitation) { - let cf; - cf = await getCitationContent(context, owner, repository); - logwatch.info("getCitationContent succeeded"); - - try { - validCitation = await validateMetadata(cf, "citation", repository); - logwatch.info(`validateMetadata(citation) => ${validCitation}`); - } catch (err) { - logwatch.error( - "validateMetadata(citation) failed; marking invalid", - err - ); - validCitation = false; - } - - metadata = await applyCitationMetadata(cf, metadata, repository); - logwatch.info("applyCitationMetadata succeeded"); - } - } - - return { metadata, validCodemeta, validCitation, existing }; -} - // TODO: Prevent the user from creating/updating metadata if custom license file exists and has no license title /** * * Applies the metadata template to the base template (CITATION.cff and codemeta.json) @@ -1250,74 +1671,87 @@ export async function applyMetadataTemplate( ) { const repoName = repository.name; const repoId = repository.id; - const url = `${CODEFAIR_DOMAIN}/dashboard/${owner}/${repoName}/edit/code-metadata`; try { - const { metadata, validCodemeta, validCitation, existing } = - await updateMetadataDatabase( - repoId, - subjects, - repository, - owner, - context - ); - - let dataObject = { - contains_citation: subjects.citation, - contains_codemeta: subjects.codemeta, - contains_metadata: subjects.citation && subjects.codemeta, - metadata, - citation_status: validCitation ? "valid" : "invalid", - codemeta_status: validCodemeta ? "valid" : "invalid", - }; + // Fetch metadata (handles all validations and db updates) + const result = await updateMetadataDatabase( + repoId, + subjects, + repository, + owner, + context + ); - if (!existing) { - dataObject.identifier = createId(); - dataObject.repository = { connect: { id: repoId } }; - await dbInstance.codeMetadata.create({ data: dataObject }); - logwatch.info("Created new metadata record in DB"); - } else { - // Preserve any existing fields not in dataObject - dataObject.metadata = { ...existing.metadata, ...dataObject.metadata }; - await dbInstance.codeMetadata.update({ - where: { repository_id: repoId }, - data: dataObject, - }); - logwatch.info("Updated metadata record in DB"); - } + // Urls + const url = `${CODEFAIR_DOMAIN}/dashboard/${owner}/${repoName}/edit/code-metadata`; + const validationsUrl = `${CODEFAIR_DOMAIN}/dashboard/${owner}/${repoName}/view/metadata-validation`; - if ((!subjects.codemeta || !subjects.citation) && subjects.license) { - // License was found but no codemeta.json or CITATION.cff exists + // Render appropriate template based on state + if (!subjects.license.status) { + // No license - metadata check not run + const metadataBadge = `![Metadata](https://img.shields.io/badge/Metadata_Not_Checked-fbbf24)`; + baseTemplate += `## Metadata\n\nTo make your software FAIR a \`CITATION.cff\` and \`codemeta.json\` metadata files are expected at the root level of your repository.\n> [!WARNING]\n> Codefair will run this check after a LICENSE file is detected in your repository.\n\n${metadataBadge}\n\n`; + } else if (!subjects.codemeta || !subjects.citation) { + // License exists but no codemeta.json and/or CITATION.cff const metadataBadge = `[![Metadata](https://img.shields.io/badge/Add_Metadata-dc2626.svg)](${url})`; baseTemplate += `## Metadata ❌\n\nTo make your software FAIR, a \`CITATION.cff\` and \`codemeta.json\` are expected at the root level of your repository. These files are not found in the repository. If you would like Codefair to add these files, click the "Add metadata" button below to go to our interface for providing metadata and generating these files.\n\n${metadataBadge}\n\n`; - } - - if (subjects.codemeta && subjects.citation && subjects.license) { - const allValid = validCodemeta && validCitation; + } else { + // Both metadata files exist + const allValid = result.validCodemeta && result.validCitation; + const hasUnknown = + result.codemetaValidation.status === "unknown" || + result.citationValidation.status === "unknown"; - const validationsUrl = `${CODEFAIR_DOMAIN}/dashboard/${owner}/${repoName}/view/metadata-validation`; const editBadge = `[![Metadata](https://img.shields.io/badge/Edit_Metadata-0ea5e9.svg)](${url})`; const validationsBadge = `[![View Validations](https://img.shields.io/badge/View_Validations-f59e0b.svg)](${validationsUrl})`; - const headingIcon = allValid ? "✔️" : "⚠️"; - const bodyIntro = allValid - ? `A \`CITATION.cff\` and \`codemeta.json\` file are found in the repository. They may need to be updated over time as new people are contributing to the software, etc.` - : `A \`CITATION.cff\` and \`codemeta.json\` file are found in the repository, but there are **validation issues**. Click **View Validations** to review and resolve them.`; + let headingIcon = "✔️"; + let bodyIntro = ""; + + if (allValid) { + headingIcon = "✔️"; + bodyIntro = `A \`CITATION.cff\` and \`codemeta.json\` file are found in the repository. They may need to be updated over time as new people are contributing to the software, etc.`; + } else if (hasUnknown) { + headingIcon = "⚠️"; + bodyIntro = `A \`CITATION.cff\` and \`codemeta.json\` file are found in the repository, but there was an **issue validating them** (our validation service may be down). Click **View Validations** for more details.`; + } else { + headingIcon = "⚠️"; + bodyIntro = `A \`CITATION.cff\` and \`codemeta.json\` file are found in the repository, but there are **validation issues**. Click **View Validations** to review and resolve them.`; + } + + const citationDisplay = getValidationDisplay(result.citationValidation); + const codemetaDisplay = getValidationDisplay(result.codemetaValidation); - const resultsTable = `\n\n| File | Status |\n|-----------------|-------------|\n| \`CITATION.cff\` | ${validCitation ? "✅ Valid" : "❌ Invalid"} |\n| \`codemeta.json\` | ${validCodemeta ? "✅ Valid" : "❌ Invalid"} |\n`; + // Escape pipe characters and remove newlines in validation messages to prevent breaking the markdown table + const sanitizeMessage = (msg) => + (msg || "").replace(/\|/g, "\\|").replace(/\n/g, " "); + const citationMessage = sanitizeMessage( + result.citationValidation.message + ); + const codemetaMessage = sanitizeMessage( + result.codemetaValidation.message + ); - baseTemplate += `## Metadata ${headingIcon}\n\n${bodyIntro}${resultsTable}\n${editBadge} ${validationsBadge}\n\n`; - } + const resultsTable = `\n\n| File | Status | Message |\n|-----------------|-------------|----------|\n| \`CITATION.cff\` | ${citationDisplay.emoji} ${citationDisplay.text} | ${citationMessage} |\n| \`codemeta.json\` | ${codemetaDisplay.emoji} ${codemetaDisplay.text} | ${codemetaMessage} |\n`; - if (!subjects.license) { - // License was not found - const metadataBadge = `![Metadata](https://img.shields.io/badge/Metadata_Not_Checked-fbbf24)`; - baseTemplate += `## Metadata\n\nTo make your software FAIR a \`CITATION.cff\` and \`codemeta.json\` metadata files are expected at the root level of your repository.\n> [!WARNING]\n> Codefair will run this check after a LICENSE file is detected in your repository.\n\n${metadataBadge}\n\n`; + baseTemplate += `## Metadata ${headingIcon}\n\n${bodyIntro}${resultsTable}\n${editBadge} ${validationsBadge}\n\n`; } return baseTemplate; } catch (error) { - logwatch.error("Error applying metadata template", error); - throw error; + logwatch.error( + { + message: "Error applying metadata template", + error: JSON.stringify(error), + repository: repoName, + owner, + error_stack: JSON.stringify(error.stack), + error_message: JSON.stringify(error.message), + }, + true + ); + // Return template with error state + baseTemplate += `## Metadata ⚠️\n\nAn error occurred while checking metadata files. Please try again later or contact support if the issue persists.\n\n`; + return baseTemplate; } } diff --git a/bot/index.js b/bot/index.js index 3ebcc6d0..8c0753a8 100644 --- a/bot/index.js +++ b/bot/index.js @@ -267,6 +267,7 @@ export default async (app, { getRouter }) => { owner ); } + const issueBody = await renderIssues( context, owner, diff --git a/bot/package.json b/bot/package.json index 9db34d9f..12c5c84b 100644 --- a/bot/package.json +++ b/bot/package.json @@ -31,7 +31,8 @@ "prisma:db:pull": "prisma db pull", "prisma:generate": "prisma generate", "scripts:truncate:tables": "tsx ./scripts/prismaM.ts", - "analytics": "tsx ./scripts/analytics.ts" + "analytics": "tsx ./scripts/analytics.ts", + "reset": "rm -rf node_modules && rm -rf .nuxt && rm -rf .output && rm -rf dist && yarn install" }, "dependencies": { "@paralleldrive/cuid2": "^2.2.2", diff --git a/bot/utils/logwatch.js b/bot/utils/logwatch.js index 93fa2c6f..1fed8022 100644 --- a/bot/utils/logwatch.js +++ b/bot/utils/logwatch.js @@ -16,6 +16,15 @@ class Logwatch { } } + /** + * Helper to format message for console output + */ + _formatForConsole(message) { + return typeof message === "object" + ? JSON.stringify(message, null, 2) + : message; + } + /** * Internal method to send log to endpoint * @param {string} level - Log level @@ -63,7 +72,7 @@ class Logwatch { * @param {boolean} [isJson=false] - Whether the message is a JSON object */ trace(message, isJson = false) { - consola.trace(message); + consola.trace(this._formatForConsole(message)); this._sendLog("trace", message, isJson ? "json" : "text"); } @@ -73,7 +82,7 @@ class Logwatch { * @param {boolean} [isJson=false] - Whether the message is a JSON object */ debug(message, isJson = false) { - consola.debug(message); + consola.debug(this._formatForConsole(message)); this._sendLog("debug", message, isJson ? "json" : "text"); } @@ -83,7 +92,7 @@ class Logwatch { * @param {boolean} [isJson=false] - Whether the message is a JSON object */ info(message, isJson = false) { - consola.info(message); + consola.info(this._formatForConsole(message)); this._sendLog("info", message, isJson ? "json" : "text"); } @@ -93,7 +102,7 @@ class Logwatch { * @param {boolean} [isJson=false] - Whether the message is a JSON object */ start(message, isJson = false) { - consola.start(message); + consola.start(this._formatForConsole(message)); this._sendLog("info", message, isJson ? "json" : "text"); } @@ -103,7 +112,7 @@ class Logwatch { * @param {boolean} [isJson=false] - Whether the message is a JSON object */ success(message, isJson = false) { - consola.success(message); + consola.success(this._formatForConsole(message)); this._sendLog("info", message, isJson ? "json" : "text"); } @@ -113,7 +122,7 @@ class Logwatch { * @param {boolean} [isJson=false] - Whether the message is a JSON object */ warn(message, isJson = false) { - consola.warn(message); + consola.warn(this._formatForConsole(message)); this._sendLog("warning", message, isJson ? "json" : "text"); } @@ -123,7 +132,7 @@ class Logwatch { * @param {boolean} [isJson=false] - Whether the message is a JSON object */ error(message, isJson = false) { - consola.error(message); + consola.error(this._formatForConsole(message)); this._sendLog("error", message, isJson ? "json" : "text"); } @@ -133,13 +142,13 @@ class Logwatch { * @param {boolean} [isJson=false] - Whether the message is a JSON object */ critical(message, isJson = false) { - consola.fatal(message); + consola.fatal(this._formatForConsole(message)); this._sendLog("critical", message, isJson ? "json" : "text"); } /** Explicit JSON logging */ json(level = "debug", message) { - consola[level](message); + consola[level](this._formatForConsole(message)); this._sendLog(level, message, "json"); } } diff --git a/bot/utils/tools/index.js b/bot/utils/tools/index.js index 7a18765a..db5ebc3a 100644 --- a/bot/utils/tools/index.js +++ b/bot/utils/tools/index.js @@ -8,6 +8,7 @@ import dayjs from "dayjs"; import timezone from "dayjs/plugin/timezone.js"; import utc from "dayjs/plugin/utc.js"; import dbInstance from "../../db.js"; +import { checkForLicense } from "../../compliance-checks/license/index.js"; import { updateMetadataDatabase } from "../../compliance-checks/metadata/index.js"; dayjs.extend(utc); @@ -650,8 +651,15 @@ export async function iterateCommitDetails( if (commits[i]?.added?.length > 0) { // Iterate through the added files for (let j = 0; j < commits[i]?.added.length; j++) { - if (commits[i].added[j] === "LICENSE") { - subjects.license = true; + if ( + ["LICENSE", "LICENSE.md", "LICENSE.txt"].includes(commits[i].added[j]) + ) { + const license = await checkForLicense( + context, + owner, + repository.name + ); + subjects.license = license; continue; } if (commits[i].added[j] === "CITATION.cff") { @@ -686,7 +694,7 @@ export async function iterateCommitDetails( } } - // Iterate through the remove files + // Iterate through the removed files if (commits[i]?.removed?.length > 0) { for (let j = 0; j < commits[i]?.removed.length; j++) { const fileSplit = commits[i]?.removed[j].split("."); @@ -694,8 +702,17 @@ export async function iterateCommitDetails( removedCWLFiles.push(commits[i].removed[j]); continue; } - if (commits[i]?.removed[j] === "LICENSE") { - subjects.license = false; + if ( + ["LICENSE", "LICENSE.md", "LICENSE.txt"].includes( + commits[i]?.removed[j] + ) + ) { + subjects.license = { + path: "No LICENSE file found", + status: false, + content: "", + spdx_id: null, + }; continue; } if (commits[i]?.removed[j] === "CITATION.cff") { diff --git a/validator/apis/__init__.py b/validator/apis/__init__.py index 321ab91c..ecb29c4c 100644 --- a/validator/apis/__init__.py +++ b/validator/apis/__init__.py @@ -4,7 +4,7 @@ import re import subprocess import jsonschema -from flask_restx import Api, Resource +from flask_restx import Api, Resource, fields api = Api( @@ -187,17 +187,26 @@ class ValidateCodemeta(Resource): @api.response(200, "Success") @api.response(400, "Validation Error") @api.expect( - api.parser().add_argument( - "file_content", - type=str, - help="The content of the codemeta.json file", - required=True, + api.model( + "CodemetaValidation", + { + "file_content": fields.Raw( + required=True, description="The content of the codemeta.json file" + ) + }, ) ) def post(self): """Validate a codemeta.json file""" file_content = api.payload.get("file_content") + # Add null check + if file_content is None: + return { + "message": "Validation Error", + "error": "file_content is required", + }, 400 + # if file content is string, convert to json if isinstance(file_content, str): try: