diff --git a/src/Poseidon/CLI/Forge.hs b/src/Poseidon/CLI/Forge.hs index 98f04aa0..165e427c 100644 --- a/src/Poseidon/CLI/Forge.hs +++ b/src/Poseidon/CLI/Forge.hs @@ -51,7 +51,7 @@ import Control.Exception (catch, throwIO) import Control.Monad (filterM, forM, forM_, unless, when) import Data.List (intercalate, nub) -import Data.Maybe (mapMaybe) +import Data.Maybe (catMaybes, mapMaybe) import Data.Time (getCurrentTime) import qualified Data.Vector as V import qualified Data.Vector.Unboxed as VU @@ -186,6 +186,7 @@ runForge ( maybeSnpFile of Nothing -> snpSetMergeList snpSetList intersect_ Just _ -> SNPSetOther + (newRefName, newRefUrl) <- fillMissingReferenceAssemblyInfo relevantPackages -- compile genotype data structure let gz = if outZip then "gz" else "" genotypeFileData <- case outFormat of @@ -198,7 +199,7 @@ runForge ( (outName <.> "bim" <.> gz) Nothing (outName <.> "fam") Nothing GenotypeOutFormatVCF -> return $ GenotypeVCF (outName <.> "vcf" <.> gz) Nothing - let genotypeData = GenotypeDataSpec genotypeFileData (Just newSNPSet) + let genotypeData = GenotypeDataSpec genotypeFileData (Just newSNPSet) newRefName newRefUrl -- assemble and write result depending on outMode -- logInfo "Creating new package entity" @@ -351,3 +352,23 @@ fillMissingSnpSets packages = forM packages $ \pac -> do logWarning $ "Warning for package " ++ show pac_ ++ ": field \"snpSet\" \ \is not set. I will interpret this as \"snpSet: Other\"" return SNPSetOther + +fillMissingReferenceAssemblyInfo :: [PoseidonPackage] -> PoseidonIO (Maybe String, Maybe String) +fillMissingReferenceAssemblyInfo packages = do + let refNames = map (genotypeRefAssemblyName . posPacGenotypeData) packages + refUrls = map (genotypeRefAssemblyName . posPacGenotypeData) packages + uniqueRefNames = nub $ catMaybes refNames + uniqueRefUrls = nub $ catMaybes refUrls + when (length uniqueRefNames > 1) $ + logWarning $ "different reference genome assembly names given: " ++ show uniqueRefNames ++ + ". I will pick the first for the forge output file" + when (length uniqueRefUrls > 1) $ + logWarning $ "different reference genome assembly URLs given: " ++ show uniqueRefUrls ++ + ". I will pick the first for the forge output file" + let finalRefName = case uniqueRefNames of + [] -> Nothing + (x:_) -> Just x + finalRefUrl = case uniqueRefUrls of + [] -> Nothing + (x:_) -> Just x + return (finalRefName, finalRefUrl) diff --git a/src/Poseidon/CLI/Genoconvert.hs b/src/Poseidon/CLI/Genoconvert.hs index d580a4c2..0d70577d 100644 --- a/src/Poseidon/CLI/Genoconvert.hs +++ b/src/Poseidon/CLI/Genoconvert.hs @@ -157,7 +157,7 @@ convertGenoTo outFormat onlyGeno outPath removeOld outPlinkPopMode outZip pac = GenotypeOutFormatPlink -> return $ GenotypePlink (outFilesRel !! 0) Nothing (outFilesRel !! 1) Nothing (outFilesRel !! 2) Nothing GenotypeOutFormatVCF -> return $ GenotypeVCF (outFilesRel !! 0) Nothing - let newGenotypeData = GenotypeDataSpec gFileSpec (genotypeSnpSet . posPacGenotypeData $ pac) + let newGenotypeData = GenotypeDataSpec gFileSpec (genotypeSnpSet . posPacGenotypeData $ pac) Nothing Nothing newPac = pac { posPacGenotypeData = newGenotypeData } logInfo $ "Adjusting POSEIDON.yml for " ++ show (posPacNameAndVersion pac) liftIO $ writePoseidonPackage newPac diff --git a/src/Poseidon/CLI/OptparseApplicativeParsers.hs b/src/Poseidon/CLI/OptparseApplicativeParsers.hs index 00676bdf..84af1798 100644 --- a/src/Poseidon/CLI/OptparseApplicativeParsers.hs +++ b/src/Poseidon/CLI/OptparseApplicativeParsers.hs @@ -463,10 +463,10 @@ parseBasePath = OP.strOption ( OP.help "A base directory to search for Poseidon packages.") parseInGenoWithoutSNPSet :: OP.Parser GenotypeDataSpec -parseInGenoWithoutSNPSet = GenotypeDataSpec <$> (parseInGenoOne <|> parseInGenoSep) <*> pure Nothing +parseInGenoWithoutSNPSet = GenotypeDataSpec <$> (parseInGenoOne <|> parseInGenoSep) <*> pure Nothing <*> pure Nothing <*> pure Nothing parseInGenotypeDataset :: OP.Parser GenotypeDataSpec -parseInGenotypeDataset = GenotypeDataSpec <$> (parseInGenoOne <|> parseInGenoSep) <*> (Just <$> parseGenotypeSNPSet) +parseInGenotypeDataset = GenotypeDataSpec <$> (parseInGenoOne <|> parseInGenoSep) <*> (Just <$> parseGenotypeSNPSet) <*> pure Nothing <*> pure Nothing parseInGenoOne :: OP.Parser GenotypeFileSpec parseInGenoOne = OP.option (OP.eitherReader readGenoInput) ( diff --git a/src/Poseidon/GenotypeData.hs b/src/Poseidon/GenotypeData.hs index b25a96ef..d1cd78ae 100644 --- a/src/Poseidon/GenotypeData.hs +++ b/src/Poseidon/GenotypeData.hs @@ -62,8 +62,10 @@ data GenoDataSource = PacBaseDir deriving Show data GenotypeDataSpec = GenotypeDataSpec { - genotypeFileSpec :: GenotypeFileSpec, - genotypeSnpSet :: Maybe SNPSetSpec + genotypeFileSpec :: GenotypeFileSpec, + genotypeSnpSet :: Maybe SNPSetSpec, + genotypeRefAssemblyName :: Maybe String, + genotypeRefAssemblyURL :: Maybe String } deriving (Show, Eq) data GenotypeFileSpec = GenotypeEigenstrat { @@ -122,11 +124,13 @@ instance FromJSON GenotypeDataSpec where <*> v .:? "genoFileChkSum" _ -> fail ("unknown format " ++ T.unpack gformat) snpSet <- v .:? "snpSet" - return $ GenotypeDataSpec gfileSpec snpSet + refName <- v .:? "referenceGenomeAssembly" + refURL <- v .:? "referenceGenomeAssemblyURL" + return $ GenotypeDataSpec gfileSpec snpSet refName refURL instance ToJSON GenotypeDataSpec where -- this encodes directly to a bytestring Builder - toJSON (GenotypeDataSpec gfileSpec snpSet) = case gfileSpec of + toJSON (GenotypeDataSpec gfileSpec snpSet refName refURL) = case gfileSpec of GenotypeEigenstrat genoF genoFchk snpF snpFchk indF indFchk -> object [ "format" .= ("EIGENSTRAT" :: String), @@ -136,7 +140,9 @@ instance ToJSON GenotypeDataSpec where "snpFileChkSum" .= snpFchk, "indFile" .= indF, "indFileChkSum" .= indFchk, - "snpSet" .= snpSet + "snpSet" .= snpSet, + "referenceGenomeAssembly" .= refName, + "referenceGenomeAssemblyURL" .= refURL ] GenotypePlink genoF genoFchk snpF snpFchk indF indFchk -> object [ @@ -147,13 +153,18 @@ instance ToJSON GenotypeDataSpec where "snpFileChkSum" .= snpFchk, "indFile" .= indF, "indFileChkSum" .= indFchk, - "snpSet" .= snpSet + "snpSet" .= snpSet, + "referenceGenomeAssembly" .= refName, + "referenceGenomeAssemblyURL" .= refURL ] GenotypeVCF genoF genoFchk -> object [ "format" .= ("VCF" :: String), "genoFile" .= genoF, - "genoFileChkSum".= genoFchk + "genoFileChkSum".= genoFchk, + "snpSet" .= snpSet, + "referenceGenomeAssembly" .= refName, + "referenceGenomeAssemblyURL" .= refURL ] data SNPSetSpec = SNPSet1240K @@ -196,7 +207,7 @@ snpSetMerge SNPSetHumanOrigins SNPSet1240K False = SNPSet1240K -- | removes directories of all filenames and returns a tuple of the basename and a modified GenotypeDataSpec with pure filenames -- In case basedirectories do not match, this function will throw an exception reduceGenotypeFilepaths :: (MonadThrow m) => GenotypeDataSpec -> m (FilePath, GenotypeDataSpec) -reduceGenotypeFilepaths gd@(GenotypeDataSpec gFileSpec _) = do +reduceGenotypeFilepaths gd@(GenotypeDataSpec gFileSpec _ _ _) = do (baseDir, newGfileSpec) <- case gFileSpec of GenotypeEigenstrat genoF _ snpF _ indF _ -> do let baseDirs = map takeDirectory [genoF, snpF, indF] @@ -218,7 +229,7 @@ reduceGenotypeFilepaths gd@(GenotypeDataSpec gFileSpec _) = do loadIndividuals :: FilePath -- ^ the base directory -> GenotypeDataSpec -- ^ the Genotype spec -> PoseidonIO [EigenstratIndEntry] -- ^ the returned list of EigenstratIndEntries. -loadIndividuals d (GenotypeDataSpec gFileSpec _) = do +loadIndividuals d (GenotypeDataSpec gFileSpec _ _ _) = do popMode <- envInputPlinkMode case gFileSpec of GenotypeEigenstrat _ _ _ _ fn _ -> readEigenstratInd (d fn) @@ -260,7 +271,7 @@ loadGenotypeData :: (MonadSafe m) => -> GenotypeDataSpec -- ^ the genotype spec -> m (Producer (EigenstratSnpEntry, GenoLine) m ()) -- ^ a Producer over the Snp position values and the genotype line. -loadGenotypeData baseDir (GenotypeDataSpec gFileSpec _) = +loadGenotypeData baseDir (GenotypeDataSpec gFileSpec _ _ _) = case gFileSpec of GenotypeEigenstrat genoF _ snpF _ indF _ -> snd <$> readEigenstrat (baseDir genoF) (baseDir snpF) (baseDir indF) GenotypePlink genoF _ snpF _ indF _ -> snd <$> readPlink (baseDir genoF) (baseDir snpF) (baseDir indF) diff --git a/src/Poseidon/Package.hs b/src/Poseidon/Package.hs index 99456ece..c564ad55 100644 --- a/src/Poseidon/Package.hs +++ b/src/Poseidon/Package.hs @@ -7,6 +7,7 @@ module Poseidon.Package ( PoseidonPackage(..), PoseidonException(..), PackageReadOptions (..), + LicenseSpec (..), findAllPoseidonYmlFiles, checkJannoIndConsistency, readPoseidonPackageCollection, @@ -130,6 +131,7 @@ data PoseidonYamlStruct = PoseidonYamlStruct , _posYamlContributor :: [ContributorSpec] , _posYamlPackageVersion :: Maybe Version , _posYamlLastModified :: Maybe Day + , _posYamlLicense :: Maybe LicenseSpec , _posYamlGenotypeData :: GenotypeDataSpec , _posYamlJannoFile :: Maybe FilePath , _posYamlJannoFileChkSum :: Maybe String @@ -142,6 +144,13 @@ data PoseidonYamlStruct = PoseidonYamlStruct } deriving (Show, Eq, Generic) +data LicenseSpec = LicenseSpec + { licenseName :: String + , licenseURL :: Maybe String + , licenseFile :: Maybe FilePath + } + deriving (Show, Eq, Generic) + poseidonJannoFilePath :: FilePath -> PoseidonYamlStruct -> Maybe FilePath poseidonJannoFilePath baseDir yml = (baseDir ) <$> _posYamlJannoFile yml poseidonSeqSourceFilePath :: FilePath -> PoseidonYamlStruct -> Maybe FilePath @@ -161,6 +170,7 @@ instance FromJSON PoseidonYamlStruct where <*> v .:? "contributor" .!= [] <*> v .:? "packageVersion" <*> v .:? "lastModified" + <*> v .:? "license" <*> v .: "genotypeData" <*> v .:? "jannoFile" <*> v .:? "jannoFileChkSum" @@ -179,6 +189,7 @@ instance ToJSON PoseidonYamlStruct where (if not $ null (_posYamlContributor x) then ["contributor" .= _posYamlContributor x] else []) ++ ["packageVersion" .= _posYamlPackageVersion x, "lastModified" .= _posYamlLastModified x, + "license" .= _posYamlLicense x, "genotypeData" .= _posYamlGenotypeData x, "jannoFile" .= _posYamlJannoFile x, "jannoFileChkSum" .= _posYamlJannoFileChkSum x, @@ -190,6 +201,19 @@ instance ToJSON PoseidonYamlStruct where "changelogFile" .= _posYamlChangelogFile x ] +instance FromJSON LicenseSpec where + parseJSON = withObject "LicenseSpec" $ \v -> LicenseSpec + <$> v .: "name" + <*> v .:? "url" + <*> v .:? "file" + +instance ToJSON LicenseSpec where + toJSON x = object [ + "name" .= licenseName x, + "url" .= licenseURL x, + "file" .= licenseFile x + ] + instance HasNameAndVersion PoseidonYamlStruct where getPacName = _posYamlTitle getPacVersion = _posYamlPackageVersion @@ -208,6 +232,7 @@ data PoseidonPackage = PoseidonPackage -- ^ the contributor(s) of the package , posPacLastModified :: Maybe Day -- ^ the optional date of last update + , posPacLicense :: Maybe LicenseSpec , posPacGenotypeData :: GenotypeDataSpec -- ^ the paths to the genotype files , posPacJannoFile :: Maybe FilePath @@ -382,7 +407,7 @@ readPoseidonPackage opts ymlPath = do bs <- liftIO $ B.readFile ymlPath -- read yml files - yml@(PoseidonYamlStruct ver tit des con pacVer mod_ geno jannoF jannoC seqSourceF seqSourceC bibF bibC readF changeF) <- case decodeEither' bs of + yml@(PoseidonYamlStruct ver tit des con pacVer mod_ lic geno jannoF jannoC seqSourceF seqSourceC bibF bibC readF changeF) <- case decodeEither' bs of Left err -> throwM $ PoseidonYamlParseException ymlPath err Right pac -> return pac checkYML yml @@ -431,7 +456,7 @@ readPoseidonPackage opts ymlPath = do logInfo $ "Trying to parse genotype data for package: " ++ tit -- create PoseidonPackage - let pac = PoseidonPackage baseDir ver (PacNameAndVersion tit pacVer) des con mod_ geno jannoF janno jannoC seqSourceF seqSource seqSourceC bibF bib bibC readF changeF + let pac = PoseidonPackage baseDir ver (PacNameAndVersion tit pacVer) des con mod_ lic geno jannoF janno jannoC seqSourceF seqSource seqSourceC bibF bib bibC readF changeF -- validate genotype data when (not (_readOptIgnoreGeno opts) && _readOptGenoCheck opts) $ @@ -713,6 +738,7 @@ newMinimalPackageTemplate baseDir name gd = do , posPacDescription = Nothing , posPacContributor = [] , posPacLastModified = Nothing + , posPacLicense = Nothing , posPacGenotypeData = reducedGD , posPacJannoFile = Nothing , posPacJanno = mempty @@ -790,8 +816,8 @@ newPackageTemplate baseDir name genoData indsOrJanno seqSource bib = do } writePoseidonPackage :: PoseidonPackage -> IO () -writePoseidonPackage (PoseidonPackage baseDir ver nameAndVer des con mod_ geno jannoF _ jannoC seqSourceF _ seqSourceC bibF _ bibFC readF changeF) = do - let yamlPac = PoseidonYamlStruct ver (getPacName nameAndVer) des con (getPacVersion nameAndVer) mod_ geno jannoF jannoC seqSourceF seqSourceC bibF bibFC readF changeF +writePoseidonPackage (PoseidonPackage baseDir ver nameAndVer des con mod_ lic geno jannoF _ jannoC seqSourceF _ seqSourceC bibF _ bibFC readF changeF) = do + let yamlPac = PoseidonYamlStruct ver (getPacName nameAndVer) des con (getPacVersion nameAndVer) mod_ lic geno jannoF jannoC seqSourceF seqSourceC bibF bibFC readF changeF outF = baseDir "POSEIDON.yml" B.writeFile outF $!! encodePretty opts yamlPac where @@ -807,6 +833,7 @@ writePoseidonPackage (PoseidonPackage baseDir ver nameAndVer des con mod_ geno j "orcid", "packageVersion", "lastModified", + "license", "genotypeData", "format", "genoFile", diff --git a/test/Poseidon/GenotypeDataSpec.hs b/test/Poseidon/GenotypeDataSpec.hs index 47e9cda5..9af95e8d 100644 --- a/test/Poseidon/GenotypeDataSpec.hs +++ b/test/Poseidon/GenotypeDataSpec.hs @@ -61,16 +61,16 @@ testJoinGenoEntries = testLoadVCF :: Spec testLoadVCF = describe "loadIndividuals(VCF)" $ do it "should correctly read group names and genetic sex from VCF header" $ do - let gSpec = GenotypeDataSpec (GenotypeVCF "geno.vcf" Nothing) Nothing + let gSpec = GenotypeDataSpec (GenotypeVCF "geno.vcf" Nothing) Nothing Nothing Nothing let baseDir = "test/testDat/testPackages/other_test_packages/Schiffels_2016_vcf/" fmap (take 3) (testLog $ loadIndividuals baseDir gSpec) `shouldReturn` [EigenstratIndEntry "XXX001" Male "POP1", EigenstratIndEntry "XXX002" Female "POP2", EigenstratIndEntry "XXX003" Male "POP1"] it "should throw if encountering wrong number of group names" $ do - let gSpec = GenotypeDataSpec (GenotypeVCF "geno_wrong_groupnames.vcf" Nothing) Nothing + let gSpec = GenotypeDataSpec (GenotypeVCF "geno_wrong_groupnames.vcf" Nothing) Nothing Nothing Nothing let baseDir = "test/testDat/testGenoFiles" testLog (loadIndividuals baseDir gSpec) `shouldThrow` groupNameExc it "should throw if encountering wrong number of genetic sex entries" $ do - let gSpec = GenotypeDataSpec (GenotypeVCF "geno_wrong_sexEntries.vcf" Nothing) Nothing + let gSpec = GenotypeDataSpec (GenotypeVCF "geno_wrong_sexEntries.vcf" Nothing) Nothing Nothing Nothing let baseDir = "test/testDat/testGenoFiles" testLog (loadIndividuals baseDir gSpec) `shouldThrow` sexEntryExc where diff --git a/test/Poseidon/PackageSpec.hs b/test/Poseidon/PackageSpec.hs index f13cf4b8..d08255a9 100644 --- a/test/Poseidon/PackageSpec.hs +++ b/test/Poseidon/PackageSpec.hs @@ -8,7 +8,8 @@ import Poseidon.GenotypeData (GenotypeDataSpec (..), GenotypeFileSpec (..), SNPSetSpec (..)) import Poseidon.Janno (createMinimalJanno) -import Poseidon.Package (PackageReadOptions (..), +import Poseidon.Package (LicenseSpec (..), + PackageReadOptions (..), PoseidonPackage (..), PoseidonYamlStruct (..), checkJannoIndConsistency, @@ -71,6 +72,9 @@ contributor: email: schiffels@institute.org orcid: 0000-0002-1017-9150 packageVersion: 1.0.0 +license: + name: CC-BY-4.0 + url: https://creativecommons.org/licenses/by/4.0/ lastModified: 2020-02-28 bibFile: sources.bib genotypeData: @@ -100,6 +104,7 @@ truePackageRelPaths = PoseidonYamlStruct { ], _posYamlPackageVersion = Just $ makeVersion [1, 0, 0], _posYamlLastModified = Just $ fromGregorian 2020 2 28, + _posYamlLicense = Just $ LicenseSpec "CC-BY-4.0" (Just "https://creativecommons.org/licenses/by/4.0/") Nothing, _posYamlGenotypeData = GenotypeDataSpec { genotypeFileSpec = GenotypePlink { _plGenoFile = "Schiffels_2016.bed", @@ -109,7 +114,9 @@ truePackageRelPaths = PoseidonYamlStruct { _plIndFile = "Schiffels_2016.fam", _plIndFileChkSum = Nothing }, - genotypeSnpSet = Just SNPSet1240K + genotypeSnpSet = Just SNPSet1240K, + genotypeRefAssemblyName = Nothing, + genotypeRefAssemblyURL = Nothing }, _posYamlJannoFile = Just "Schiffels_2016.janno", _posYamlJannoFileChkSum = Nothing, @@ -181,6 +188,7 @@ testPoseidonFromYAML = describe "PoseidonPackage.fromYAML" $ do _posYamlContributor = [], _posYamlPackageVersion = Nothing, _posYamlLastModified = Nothing, + _posYamlLicense = Nothing, _posYamlGenotypeData = GenotypeDataSpec { genotypeFileSpec = GenotypePlink { _plGenoFile = "test.bed", @@ -190,7 +198,9 @@ testPoseidonFromYAML = describe "PoseidonPackage.fromYAML" $ do _plIndFile = "test.fam", _plIndFileChkSum = Nothing }, - genotypeSnpSet = Nothing + genotypeSnpSet = Nothing, + genotypeRefAssemblyName = Nothing, + genotypeRefAssemblyURL = Nothing }, _posYamlJannoFile = Nothing, _posYamlJannoFileChkSum = Nothing, diff --git a/test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt b/test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt index 7f3fb225..f1c595d5 100644 --- a/test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt +++ b/test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt @@ -7,7 +7,7 @@ fd632717ecaf337a39cfd7a828a54e99 init init/Schiffels/Schiffels.janno 9edc4a757f785a8ecb59c54d16c5690a init init/Schiffels/Schiffels.bib c35421d9be15aa66fa3a3c46df1f746c init init/Wang/POSEIDON.yml ae66d851301f4a761b819f97ec28fa55 init init/Wang/Wang_2020.bed -956c7bf4c6999cc322ad8407d8bef776 init init_vcf/Schiffels_vcf/POSEIDON.yml +72400156a00aa01e4da7c84a1fcfe829 init init_vcf/Schiffels_vcf/POSEIDON.yml fd632717ecaf337a39cfd7a828a54e99 init init_vcf/Schiffels_vcf/Schiffels.janno b088fa0fea0d013ddebacd7b6276fc53 init init_vcf/Schiffels_vcf/geno.vcf 9edc4a757f785a8ecb59c54d16c5690a init init_vcf/Schiffels_vcf/Schiffels.bib @@ -130,7 +130,7 @@ ad7e56177aad0a720f0bde13d47f2ac1 forge forge/ForgePac19/CHANGELOG.md b7b649620cd37bd4a6d6f0f31c1c56da forge forge/ForgePac19/ForgePac19.janno b36b3ca509c235d0f15571c96195e801 forge forge/ForgePac20/POSEIDON.yml e375863bca9e4a91c9855396abde31c7 forge forge/ForgePac20/ForgePac20.janno -1f24e4ad0943c830a58e9ae168f9ffa6 forge forge/ForgePac21/POSEIDON.yml +d17a13be042bc19941d3b45fe9e699eb forge forge/ForgePac21/POSEIDON.yml abdb2335dc85cbd21af5c41db3d8394e forge forge/ForgePac21/ForgePac21.vcf 8846333d9a1de6510f25a3816cc70fef forge forge/ForgePac21/ForgePac21.janno 9089f5d5602937bb7713e1dc8d7a8f2d forge forge/ForgePac21/ForgePac21.ssf diff --git a/test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml b/test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml index 7a0e4127..13293597 100644 --- a/test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml +++ b/test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml @@ -1,29 +1,29 @@ title: Chronicle title description: Chronicle description chronicleVersion: 0.2.0 -lastModified: 2025-10-27 +lastModified: 2026-01-06 packages: - title: Lamnidis_2018 version: 1.0.0 - commit: d3f9c9c65f3bcc7e3eafd9fe1f1c9f5fc62b2f8e + commit: 42aa4ed94a92a370940c1cbf7d92f6c7f3a20835 path: Lamnidis_2018 - title: Lamnidis_2018 version: 1.0.1 - commit: d3f9c9c65f3bcc7e3eafd9fe1f1c9f5fc62b2f8e + commit: 42aa4ed94a92a370940c1cbf7d92f6c7f3a20835 path: Lamnidis_2018_newVersion - title: Schiffels version: 1.1.1 - commit: 6bdc0d002cceb4208a60a6ef3e8b3d63cea244d5 + commit: 40a20143a2d7f604a6b8a7dd9bd0fdc30061019e path: Schiffels - title: Schiffels_2016 version: 1.0.1 - commit: d3f9c9c65f3bcc7e3eafd9fe1f1c9f5fc62b2f8e + commit: 42aa4ed94a92a370940c1cbf7d92f6c7f3a20835 path: Schiffels_2016 - title: Schmid_2028 version: 1.0.0 - commit: d3f9c9c65f3bcc7e3eafd9fe1f1c9f5fc62b2f8e + commit: 42aa4ed94a92a370940c1cbf7d92f6c7f3a20835 path: Schmid_2028 - title: Wang_2020 version: 0.1.0 - commit: d3f9c9c65f3bcc7e3eafd9fe1f1c9f5fc62b2f8e + commit: 42aa4ed94a92a370940c1cbf7d92f6c7f3a20835 path: Wang_2020 diff --git a/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac21/POSEIDON.yml b/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac21/POSEIDON.yml index 8905488f..d977a963 100644 --- a/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac21/POSEIDON.yml +++ b/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac21/POSEIDON.yml @@ -6,6 +6,7 @@ lastModified: 1970-01-01 genotypeData: format: VCF genoFile: ForgePac21.vcf + snpSet: Other jannoFile: ForgePac21.janno sequencingSourceFile: ForgePac21.ssf bibFile: ForgePac21.bib diff --git a/test/PoseidonGoldenTests/GoldenTestData/genoconvert/out_vcf/Schiffels_2016.vcf.gz b/test/PoseidonGoldenTests/GoldenTestData/genoconvert/out_vcf/Schiffels_2016.vcf.gz index 1212982c..ec5865e5 100644 Binary files a/test/PoseidonGoldenTests/GoldenTestData/genoconvert/out_vcf/Schiffels_2016.vcf.gz and b/test/PoseidonGoldenTests/GoldenTestData/genoconvert/out_vcf/Schiffels_2016.vcf.gz differ diff --git a/test/PoseidonGoldenTests/GoldenTestData/init_vcf/Schiffels_vcf/POSEIDON.yml b/test/PoseidonGoldenTests/GoldenTestData/init_vcf/Schiffels_vcf/POSEIDON.yml index 8febf145..f5dbc109 100644 --- a/test/PoseidonGoldenTests/GoldenTestData/init_vcf/Schiffels_vcf/POSEIDON.yml +++ b/test/PoseidonGoldenTests/GoldenTestData/init_vcf/Schiffels_vcf/POSEIDON.yml @@ -6,5 +6,6 @@ lastModified: 1970-01-01 genotypeData: format: VCF genoFile: geno.vcf + snpSet: Other jannoFile: Schiffels.janno bibFile: Schiffels.bib diff --git a/test/PoseidonGoldenTests/GoldenTestsRunCommands.hs b/test/PoseidonGoldenTests/GoldenTestsRunCommands.hs index 3aecbed4..311bb75e 100644 --- a/test/PoseidonGoldenTests/GoldenTestsRunCommands.hs +++ b/test/PoseidonGoldenTests/GoldenTestsRunCommands.hs @@ -234,6 +234,8 @@ testPipelineInit testDir checkFilePath = do , _esIndFileChkSum = Nothing } , genotypeSnpSet = Just SNPSetOther + , genotypeRefAssemblyName = Nothing + , genotypeRefAssemblyURL = Nothing } , _initPacPath = testDir "init" "Schiffels" , _initPacName = Just "Schiffels" @@ -259,6 +261,8 @@ testPipelineInit testDir checkFilePath = do , _plIndFileChkSum = Nothing } , genotypeSnpSet = Just SNPSetOther + , genotypeRefAssemblyName = Nothing + , genotypeRefAssemblyURL = Nothing } , _initPacPath = testDir "init" "Wang" , _initPacName = Nothing @@ -278,6 +282,8 @@ testPipelineInit testDir checkFilePath = do , _vcfGenoFileChkSum = Nothing } , genotypeSnpSet = Just SNPSetOther + , genotypeRefAssemblyName = Nothing + , genotypeRefAssemblyURL = Nothing } , _initPacPath = testDir "init_vcf" "Schiffels_vcf" , _initPacName = Just "Schiffels" @@ -335,6 +341,8 @@ testPipelineValidate testDir checkFilePath = do , _esIndFileChkSum = Nothing } , genotypeSnpSet = Nothing + , genotypeRefAssemblyName = Nothing + , genotypeRefAssemblyURL = Nothing } } & run 6 validateOpts1 { @@ -501,6 +509,8 @@ testPipelineGenoconvert testDir checkFilePath = do , _esIndFileChkSum = Nothing } , genotypeSnpSet = Just SNPSetOther + , genotypeRefAssemblyName = Nothing + , genotypeRefAssemblyURL = Nothing } ] , _genoConvertOutFormat = GenotypeOutFormatPlink @@ -525,6 +535,8 @@ testPipelineGenoconvert testDir checkFilePath = do , _vcfGenoFileChkSum = Nothing } , genotypeSnpSet = Just SNPSetOther + , genotypeRefAssemblyName = Nothing + , genotypeRefAssemblyURL = Nothing } ] , _genoConvertOutFormat = GenotypeOutFormatPlink @@ -557,7 +569,7 @@ testPipelineGenoconvert testDir checkFilePath = do let gSpec = GenotypePlink (testDir "genoconvert" "zip_roundtrip" "Schiffels_2016.bed.gz") Nothing (testDir "genoconvert" "zip_roundtrip" "Schiffels_2016.bim.gz") Nothing (testDir "genoconvert" "zip_roundtrip" "Schiffels_2016.fam") Nothing - in [GenoDirect $ GenotypeDataSpec gSpec Nothing] + in [GenoDirect $ GenotypeDataSpec gSpec Nothing Nothing Nothing] , _genoConvertOutFormat = GenotypeOutFormatPlink , _genoMaybeOutPackagePath = Nothing , _genoconvertRemoveOld = True @@ -807,6 +819,8 @@ testPipelineForge testDir checkFilePath = do , _esIndFileChkSum = Nothing } , genotypeSnpSet = Just SNPSetOther + , genotypeRefAssemblyName = Nothing + , genotypeRefAssemblyURL = Nothing }, GenoDirect $ GenotypeDataSpec { @@ -819,6 +833,8 @@ testPipelineForge testDir checkFilePath = do , _plIndFileChkSum = Nothing } , genotypeSnpSet = Just SNPSetOther + , genotypeRefAssemblyName = Nothing + , genotypeRefAssemblyURL = Nothing } ] , _forgeEntityInput = [EntitiesDirect (fromRight [] $ readEntitiesFromString "POP2,,")] @@ -855,6 +871,8 @@ testPipelineForge testDir checkFilePath = do , _plIndFileChkSum = Nothing } , genotypeSnpSet = Just SNPSetOther + , genotypeRefAssemblyName = Nothing + , genotypeRefAssemblyURL = Nothing } ] , _forgeEntityInput = [EntitiesDirect (fromRight [] $ readEntitiesFromString "POP2,,")]