diff --git a/src/Poseidon/CLI/Forge.hs b/src/Poseidon/CLI/Forge.hs index 98f04aa0..919a1222 100644 --- a/src/Poseidon/CLI/Forge.hs +++ b/src/Poseidon/CLI/Forge.hs @@ -4,6 +4,7 @@ module Poseidon.CLI.Forge where import Poseidon.BibFile (BibEntry (..), BibTeX, writeBibTeXFile) +import Poseidon.ColumnTypesJanno (PoseidonID (..)) import Poseidon.ColumnTypesUtils (ListColumn (..), getMaybeListColumn) import Poseidon.EntityTypes (EntityInput, @@ -329,9 +330,9 @@ sumNonMissingSNPs accumulator (_, geno) = do filterSeqSourceRows :: JannoRows -> SeqSourceRows -> SeqSourceRows filterSeqSourceRows (JannoRows jRows) (SeqSourceRows sRows) = let desiredPoseidonIDs = map jPoseidonID jRows - in SeqSourceRows $ filter (hasAPoseidonID desiredPoseidonIDs) sRows + in SeqSourceRows $ filter (hasAPoseidonID desiredPoseidonIDs) sRows where - hasAPoseidonID :: [String] -> SeqSourceRow -> Bool + hasAPoseidonID :: [PoseidonID] -> SeqSourceRow -> Bool hasAPoseidonID jIDs seqSourceRow = let sIDs = getMaybeListColumn $ sPoseidonID seqSourceRow in any (`elem` jIDs) sIDs diff --git a/src/Poseidon/CLI/Serve.hs b/src/Poseidon/CLI/Serve.hs index ba540b47..794006a6 100644 --- a/src/Poseidon/CLI/Serve.hs +++ b/src/Poseidon/CLI/Serve.hs @@ -337,7 +337,7 @@ extractPosJannoRow archiveName pacName pacVersion row = case (jLatitude row, jLo let poseidonID = jPoseidonID row loc = show <$> jLocation row age = show <$> jDateBCADMedian row - in Just $ MapMarker lat lon poseidonID pacName pacVersion archiveName loc age + in Just $ MapMarker lat lon (show poseidonID) pacName pacVersion archiveName loc age _ -> Nothing prepPacVersions :: String -> [PoseidonPackage] -> ActionM [PoseidonPackage] @@ -360,7 +360,7 @@ prepSamples pac = return $ getJannoRowsFromPac pac prepSample :: String -> [JannoRow] -> ActionM JannoRow prepSample sampleName rows = do - case filter (\r -> jPoseidonID r == sampleName) rows of + case filter (\r -> show (jPoseidonID r) == sampleName) rows of [] -> fail $ "Sample " <> sampleName <> " does not exist" [x] -> return x _ -> fail $ "Sample " <> sampleName <> " exists multiple times" diff --git a/src/Poseidon/CLI/Summarise.hs b/src/Poseidon/CLI/Summarise.hs index d03b4ac1..2d05d77c 100644 --- a/src/Poseidon/CLI/Summarise.hs +++ b/src/Poseidon/CLI/Summarise.hs @@ -51,7 +51,7 @@ summariseJannoRows (JannoRows rows) rawOutput = do ["Nr Samples" , show (length rows)], ["Samples" - , paste . sort . map jPoseidonID $ rows], + , paste . sort . map (show . jPoseidonID) $ rows], ["Nr Primary Groups" , uniqueNumber . map (head . getListColumn . jGroupName) $ rows], ["Primary Groups" diff --git a/src/Poseidon/CLI/Survey.hs b/src/Poseidon/CLI/Survey.hs index f5f4ef61..33ab440d 100644 --- a/src/Poseidon/CLI/Survey.hs +++ b/src/Poseidon/CLI/Survey.hs @@ -8,7 +8,7 @@ module Poseidon.CLI.Survey where import Poseidon.BibFile (BibTeX) -import Poseidon.ColumnTypesJanno (GeneticSex (..)) +import Poseidon.ColumnTypesJanno (GeneticSex (..), PoseidonID (..)) import Poseidon.ColumnTypesUtils (CsvNamedRecord, ListColumn (..)) import Poseidon.GenotypeData (GenotypeDataSpec (..), GenotypeFileSpec (..)) @@ -123,6 +123,9 @@ renderJannoCompleteness (JannoRows rows) = -- A typeclass to determine if a field in a .janno row is filled class PresenceCountable a where countPresence :: a -> Int + +instance PresenceCountable PoseidonID where + countPresence _ = 1 instance PresenceCountable (Maybe a) where countPresence Nothing = 0 countPresence (Just _) = 1 diff --git a/src/Poseidon/ColumnTypesJanno.hs b/src/Poseidon/ColumnTypesJanno.hs index 33bff493..76b56a93 100644 --- a/src/Poseidon/ColumnTypesJanno.hs +++ b/src/Poseidon/ColumnTypesJanno.hs @@ -1,4 +1,5 @@ {-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE InstanceSigs #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TemplateHaskell #-} @@ -9,7 +10,10 @@ import Poseidon.ColumnTypesUtils import Country (Country, alphaTwoUpper, decodeAlphaTwo) +import qualified Data.ByteString.Char8 as B +import qualified Data.Char as C import qualified Data.Csv as Csv +import Data.String (IsString (..)) import qualified Data.Text as T import qualified Data.Text.Read as T import GHC.Generics (Generic) @@ -40,11 +44,59 @@ instance Ord GeneticSex where compare (GeneticSex Unknown) (GeneticSex Female) = LT compare _ _ = EQ instance Csv.ToField GeneticSex where toField x = Csv.toField $ show x -instance Csv.FromField GeneticSex where parseField = parseTypeCSV "Genetic_Sex" +instance Csv.FromField GeneticSex where parseField :: Csv.Field -> Csv.Parser GeneticSex + parseField = parseTypeCSV "Genetic_Sex" + +newtype PoseidonID = PoseidonID {unPoseidonID :: B.ByteString} deriving (Eq, Ord) + +instance Makeable PoseidonID where + make txt = + if T.all isValidPoseidonIDChar txt + then return . PoseidonID . B.pack . T.unpack $ txt + else fail $ "PoseidonID contains invalid characters: " ++ T.unpack txt + +isValidPoseidonIDChar :: Char -> Bool +isValidPoseidonIDChar c = + C.isAscii c && (C.isAlphaNum c || c `elem` ['_', '-', '.']) + +instance Suspicious PoseidonID where inspect _ = Nothing + +instance Show PoseidonID where + show (PoseidonID x) = B.unpack x + +instance Csv.ToField PoseidonID where + toField (PoseidonID x) = Csv.toField x + +instance Csv.FromField PoseidonID where + parseField = parseTypeCSV "PoseidonID" + +-- the IsString instance allows us to write PoseidonID "MyID" directly, mainly for testing purposes. +instance IsString PoseidonID where + fromString str = PoseidonID (B.pack str) -- | A datatype for the Group_Name .janno column -newtype GroupName = GroupName T.Text deriving (Eq, Ord) -$(makeInstances ''GroupName "Group_Name") +newtype GroupName = GroupName {unGroupName :: B.ByteString} deriving (Eq, Ord) + +instance Makeable GroupName where + make txt = + if T.all isValidPoseidonIDChar txt + then return . GroupName . B.pack . T.unpack $ txt + else fail $ "GroupName contains invalid characters: " ++ T.unpack txt + +instance Suspicious GroupName where inspect _ = Nothing + +instance Show GroupName where + show (GroupName x) = B.unpack x + +instance Csv.ToField GroupName where + toField (GroupName x) = Csv.toField x + +instance Csv.FromField GroupName where + parseField = parseTypeCSV "Group_Name" + +-- the IsString instance allows us to write GroupName "MyGroup" directly, mainly for testing purposes. +instance IsString GroupName where + fromString str = GroupName (B.pack str) -- | A datatype for the Alternative_IDs .janno column newtype JannoAlternativeID = JannoAlternativeID T.Text deriving (Eq) diff --git a/src/Poseidon/GenotypeData.hs b/src/Poseidon/GenotypeData.hs index b25a96ef..1fb53588 100644 --- a/src/Poseidon/GenotypeData.hs +++ b/src/Poseidon/GenotypeData.hs @@ -3,7 +3,8 @@ module Poseidon.GenotypeData where import Paths_poseidon_hs (version) import Poseidon.ColumnTypesJanno (GroupName (..), - JannoGenotypePloidy (..)) + JannoGenotypePloidy (..), + PoseidonID (..)) import Poseidon.ColumnTypesUtils (ListColumn (..)) import Poseidon.Janno (JannoRow (..)) import Poseidon.Utils (LogA, PoseidonException (..), @@ -413,12 +414,12 @@ selectIndices indices (snpEntry, genoLine) = (snpEntry, V.fromList [genoLine V.! writeVCF :: (MonadSafe m) => LogA -> [JannoRow] -> FilePath -> Consumer (EigenstratSnpEntry, GenoLine) m () writeVCF logA jannoRows vcfFile = do - let sampleNames = map (B.pack . jPoseidonID) jannoRows - groupNames = map ((\(GroupName n) -> T.unpack n) . head . getListColumn . jGroupName) jannoRows + let sampleNames = map (unPoseidonID . jPoseidonID) jannoRows + groupNames = map (B.unpack . unGroupName . head . getListColumn . jGroupName) jannoRows sex = map jGeneticSex jannoRows forM_ jannoRows $ \jannoRow -> do when (jGenotypePloidy jannoRow == Nothing) . logWithEnv logA . logWarning $ - "Missing GenotypePloidy for individual ++ " ++ jPoseidonID jannoRow ++ + "Missing GenotypePloidy for individual ++ " ++ show (jPoseidonID jannoRow) ++ ". For VCF output I will assume diploid genotypes. " ++ "Please set the GenotypePloidy column explitly in the Janno File to haploid or diploid." let metaInfoLines = map B.pack [ @@ -452,7 +453,7 @@ createVCFentry logA jannoRows (EigenstratSnpEntry chrom pos _ id_ ref alt, genoL (Missing, Just Haploid) -> return ["."] (HomRef , Just Haploid) -> return ["0"] (Het , Just Haploid) -> do - logWithEnv logA . logWarning $ "Encountered a heterozygous genotype for " ++ s ++ + logWithEnv logA . logWarning $ "Encountered a heterozygous genotype for " ++ show (unPoseidonID s) ++ " at position " ++ show chrom ++ ":" ++ show pos ++ ", but the individual's GenotypePloidy is given as " ++ " Haploid in the Janno-File. I have to encode this in the VCF as a diploid genotype. Consider changing this " ++ "individual's GenotypePloidy to diploid!" diff --git a/src/Poseidon/Janno.hs b/src/Poseidon/Janno.hs index 2a6f9079..5f9a5ad5 100644 --- a/src/Poseidon/Janno.hs +++ b/src/Poseidon/Janno.hs @@ -42,7 +42,6 @@ import Data.List (elemIndex, foldl', intercalate, nub, sort, transpose, (\\)) import Data.Maybe (catMaybes, fromJust) -import qualified Data.Text as T import qualified Data.Vector as V import Generics.SOP.TH (deriveGeneric) import GHC.Generics (Generic) @@ -77,7 +76,7 @@ instance Monoid JannoRows where -- See https://github.com/poseidon-framework/poseidon2-schema/blob/master/janno_columns.tsv -- for more details data JannoRow = JannoRow - { jPoseidonID :: String + { jPoseidonID :: PoseidonID , jGeneticSex :: GeneticSex , jGroupName :: ListColumn GroupName , jAlternativeIDs :: Maybe (ListColumn JannoAlternativeID) @@ -288,9 +287,9 @@ createMinimalJanno xs = JannoRows $ map createMinimalSample xs createMinimalSample :: EigenstratIndEntry -> JannoRow createMinimalSample (EigenstratIndEntry id_ sex pop) = JannoRow { - jPoseidonID = Bchs.unpack id_ -- TODO: this will have to change. We need to make PoseidonID itself ByteString + jPoseidonID = PoseidonID id_ , jGeneticSex = GeneticSex sex - , jGroupName = ListColumn [GroupName . T.pack . Bchs.unpack $ pop] -- same thing, see above. + , jGroupName = ListColumn [GroupName pop] , jAlternativeIDs = Nothing , jRelationTo = Nothing , jRelationDegree = Nothing @@ -450,8 +449,7 @@ readJannoFileRow mandatoryCols jannoPath (lineNumber, row) = do renderWarning e = "Cross-column anomaly in " ++ jannoPath ++ " " ++ "in line " ++ renderLocation ++ ": " ++ e renderLocation :: String - renderLocation = show lineNumber ++ - " (Poseidon_ID: " ++ jPoseidonID jannoRow ++ ")" + renderLocation = show lineNumber ++ " (Poseidon_ID: " ++ show (jPoseidonID jannoRow) ++ ")" -- Global janno consistency checks @@ -479,7 +477,7 @@ checkJannoRowConsistency x = checkMandatoryStringNotEmpty :: JannoRow -> JannoRowLog JannoRow checkMandatoryStringNotEmpty x = - let notEmpty = (not . null . jPoseidonID $ x) && + let notEmpty = (not . Bchs.null . unPoseidonID . jPoseidonID $ x) && (not . null . getListColumn . jGroupName $ x) && (not . null . show . head . getListColumn . jGroupName $ x) in case notEmpty of @@ -543,4 +541,5 @@ jannoRows2EigenstratIndEntries :: JannoRows -> [EigenstratIndEntry] jannoRows2EigenstratIndEntries (JannoRows jannoRows) = do -- list monad jannoRow <- jannoRows -- looping over jannoRows let GroupName gText = head . getListColumn . jGroupName $ jannoRow - return $ EigenstratIndEntry (Bchs.pack $ jPoseidonID jannoRow) (sfSex (jGeneticSex jannoRow)) (Bchs.pack $ T.unpack gText) + PoseidonID id_ = jPoseidonID jannoRow + return $ EigenstratIndEntry id_ (sfSex (jGeneticSex jannoRow)) gText diff --git a/src/Poseidon/Package.hs b/src/Poseidon/Package.hs index 99456ece..d2885b1b 100644 --- a/src/Poseidon/Package.hs +++ b/src/Poseidon/Package.hs @@ -33,10 +33,10 @@ module Poseidon.Package ( import Poseidon.BibFile (BibEntry (..), BibTeX, readBibTeXFile) -import Poseidon.ColumnTypesJanno (GeneticSex (..), +import Poseidon.ColumnTypesJanno (GeneticSex (..), GroupName (..), JannoLibraryBuilt (..), JannoPublication (..), - JannoUDG (..)) + JannoUDG (..), PoseidonID (..)) import Poseidon.ColumnTypesSSF (SSFLibraryBuilt (..), SSFUDG (..)) import Poseidon.ColumnTypesUtils (ListColumn (..), getMaybeListColumn) @@ -542,12 +542,12 @@ checkFiles baseDir ignoreChecksums ignoreGenotypeFilesMissing yml = do -- the last flag is important for reading VCFs, which can lack group and sex information. checkJannoIndConsistency :: String -> JannoRows -> [EigenstratIndEntry] -> Bool -> IO () checkJannoIndConsistency pacName (JannoRows rows) indEntries isVCF = do - let genoIDs = [ BSC.unpack x | EigenstratIndEntry x _ _ <- indEntries] + let genoIDs = [ x | EigenstratIndEntry x _ _ <- indEntries] genoSexs = [ x | EigenstratIndEntry _ x _ <- indEntries] - genoGroups = [ BSC.unpack x | EigenstratIndEntry _ _ x <- indEntries] - let jannoIDs = map jPoseidonID rows + genoGroups = [ x | EigenstratIndEntry _ _ x <- indEntries] + let jannoIDs = map (unPoseidonID . jPoseidonID) rows jannoSexs = map (sfSex . jGeneticSex) rows - jannoGroups = map (show . head . getListColumn . jGroupName) rows + jannoGroups = map (unGroupName . head . getListColumn . jGroupName) rows let idMis = genoIDs /= jannoIDs sexMis = genoSexs /= jannoSexs groupMis = genoGroups /= jannoGroups @@ -555,7 +555,7 @@ checkJannoIndConsistency pacName (JannoRows rows) indEntries isVCF = do groupsAllUnknown = all (=="unknown") genoGroups when idMis $ throwM $ PoseidonCrossFileConsistencyException pacName $ "Individual ID mismatch between genotype data (left) and .janno files (right): " ++ - renderMismatch genoIDs jannoIDs + renderMismatch (map show genoIDs) (map show jannoIDs) when (sexMis && not (isVCF && sexAllUnknown)) $ throwM $ PoseidonCrossFileConsistencyException pacName $ "Individual Sex mismatch between genotype data (left) and .janno files (right): " ++ renderMismatch (map show genoSexs) (map show jannoSexs) @@ -563,7 +563,7 @@ checkJannoIndConsistency pacName (JannoRows rows) indEntries isVCF = do "Individual GroupID mismatch between genotype data (left) and .janno files (right). Note \ \that this could be due to a wrong Plink file population-name encoding \ \(see the --inPlinkPopName option). " ++ - renderMismatch genoGroups jannoGroups + renderMismatch (map show genoGroups) (map show jannoGroups) renderMismatch :: [String] -> [String] -> String renderMismatch a b = @@ -583,20 +583,20 @@ checkSeqSourceJannoConsistency pacName (SeqSourceRows sRows) (JannoRows jRows) = checkPoseidonIDOverlap checkUDGandLibraryBuiltOverlap where - js = map (\r -> (jPoseidonID r, jUDG r, jLibraryBuilt r)) jRows - ss = map (\r -> (getMaybeListColumn $ sPoseidonID r, sUDG r, sLibraryBuilt r)) sRows + js = [(jPoseidonID r, jUDG r, jLibraryBuilt r) | r <- jRows] + ss = [(getMaybeListColumn $ sPoseidonID r, sUDG r, sLibraryBuilt r) | r <- sRows] checkPoseidonIDOverlap :: PoseidonIO () checkPoseidonIDOverlap = do let flatSeqSourceIDs = nub $ concat $ [a | (a,_,_) <- ss] misMatch = flatSeqSourceIDs \\ [a | (a,_,_) <- js] unless (null misMatch) $ do logWarning $ "The .ssf file in the package " ++ pacName ++ - " features Poseidon_IDs that are not in the package: " ++ intercalate ", " misMatch + " features Poseidon_IDs that are not in the package: " ++ (intercalate ", " . map show $ misMatch) checkUDGandLibraryBuiltOverlap :: PoseidonIO () checkUDGandLibraryBuiltOverlap = do mapM_ checkOneIndividual js where - checkOneIndividual :: (String, Maybe JannoUDG, Maybe JannoLibraryBuilt) -> PoseidonIO () + checkOneIndividual :: (PoseidonID, Maybe JannoUDG, Maybe JannoLibraryBuilt) -> PoseidonIO () checkOneIndividual (jannoPoseidonID, jannoUDG, jannoLibraryBuilt) = do let relevantSeqSourceRows = filter (\(seqSourcePoseidonID,_,_) -> jannoPoseidonID `elem` seqSourcePoseidonID) ss allSeqSourceUDGs = catMaybes $ [b | (_,b,_) <- relevantSeqSourceRows] @@ -606,13 +606,13 @@ checkSeqSourceJannoConsistency pacName (SeqSourceRows sRows) (JannoRows jRows) = Just j -> unless (all (compareU j) allSeqSourceUDGs) $ throwM $ PoseidonCrossFileConsistencyException pacName $ "The information on UDG treatment in .janno and .ssf do not match" ++ - " for the individual: " ++ jannoPoseidonID ++ " (" ++ show j ++ " <> " ++ show allSeqSourceUDGs ++ ")" + " for the individual: " ++ show jannoPoseidonID ++ " (" ++ show j ++ " <> " ++ show allSeqSourceUDGs ++ ")" case jannoLibraryBuilt of Nothing -> return () Just j -> unless (all (compareL j) allSeqSourceLibraryBuilts) $ throwM $ PoseidonCrossFileConsistencyException pacName $ "The information on library strandedness in .janno and .ssf do not match" ++ - " for the individual: " ++ jannoPoseidonID ++ " (" ++ show j ++ " <> " ++ show allSeqSourceLibraryBuilts ++ ")" + " for the individual: " ++ show jannoPoseidonID ++ " (" ++ show j ++ " <> " ++ show allSeqSourceLibraryBuilts ++ ")" compareU :: JannoUDG -> SSFUDG -> Bool compareU Mixed _ = True compareU Minus SSFMinus = True @@ -872,7 +872,7 @@ getJointIndividualInfo packages = do isLatest <- isLatestInCollection packages pac forM (getJannoRowsFromPac pac) $ \jannoRow -> do let indInfo = IndividualInfo - (jPoseidonID jannoRow) + (show . jPoseidonID $ jannoRow) ((map show . getListColumn . jGroupName) jannoRow) (makePacNameAndVersion pac) return (indInfo, isLatest) @@ -883,7 +883,7 @@ getExtendedIndividualInfo :: (MonadThrow m) => [PoseidonPackage] -> AddColSpec - getExtendedIndividualInfo allPackages addJannoColSpec = sequence $ do -- list monad pac <- allPackages -- outer loop (automatically concatenating over inner loops) jannoRow <- getJannoRowsFromPac pac -- inner loop - let name = jPoseidonID jannoRow + let name = show $ jPoseidonID jannoRow groups = map show $ getListColumn . jGroupName $ jannoRow colNames = case addJannoColSpec of AddColAll -> jannoHeaderString \\ ["Poseidon_ID", "Group_Name"] -- Nothing means all Janno columns diff --git a/src/Poseidon/SequencingSource.hs b/src/Poseidon/SequencingSource.hs index ba11ffc0..870d4d1c 100644 --- a/src/Poseidon/SequencingSource.hs +++ b/src/Poseidon/SequencingSource.hs @@ -8,6 +8,7 @@ module Poseidon.SequencingSource where +import Poseidon.ColumnTypesJanno (PoseidonID) import Poseidon.ColumnTypesSSF import Poseidon.ColumnTypesUtils import Poseidon.Utils @@ -56,7 +57,7 @@ instance Monoid SeqSourceRows where -- See https://github.com/poseidon-framework/poseidon2-schema/blob/master/seqSourceFile_columns.tsv -- for more details data SeqSourceRow = SeqSourceRow - { sPoseidonID :: Maybe (ListColumn String) + { sPoseidonID :: Maybe (ListColumn PoseidonID) , sUDG :: Maybe SSFUDG , sLibraryBuilt :: Maybe SSFLibraryBuilt , sSampleAccession :: Maybe SSFAccessionIDSample diff --git a/src/Poseidon/ServerHTML.hs b/src/Poseidon/ServerHTML.hs index 10cc82e2..37fc2a5f 100644 --- a/src/Poseidon/ServerHTML.hs +++ b/src/Poseidon/ServerHTML.hs @@ -15,6 +15,7 @@ import qualified Control.Monad as OP import Data.Aeson (defaultOptions, encode, genericToEncoding) import Data.Aeson.Types (ToJSON (..)) +import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy.Char8 as C import Data.Csv (ToNamedRecord (..)) import qualified Data.HashMap.Strict as HM @@ -356,11 +357,11 @@ packageVersionPage H.th $ H.b "Genetic_Sex" H.th $ H.b "Group_Name" forM_ samples $ \jannoRow -> do - let link = "/explorer/" <> H.toValue archiveName <> "/" <> H.toValue pacName <> "/" <> H.toValue (renderMaybeVersion pacVersion) <> "/" <> H.toValue (jPoseidonID jannoRow) + let link = "/explorer/" <> H.toValue archiveName <> "/" <> H.toValue pacName <> "/" <> H.toValue (renderMaybeVersion pacVersion) <> "/" <> H.toValue (BS.unpack . unPoseidonID . jPoseidonID $ jannoRow) H.tr $ do - H.td $ H.a ! A.href link $ H.toMarkup $ jPoseidonID jannoRow + H.td $ H.a ! A.href link $ H.toMarkup . T.pack . BS.unpack . unPoseidonID . jPoseidonID $ jannoRow H.td $ H.toMarkup $ show $ jGeneticSex jannoRow - H.td $ H.toMarkup $ T.intercalate ", " $ map (\(GroupName t) -> t) $ getListColumn $ jGroupName jannoRow + H.td . H.toMarkup . T.intercalate ", " . map (T.pack . BS.unpack . unGroupName) . getListColumn . jGroupName $ jannoRow samplePage :: Maybe MapMarker @@ -374,7 +375,7 @@ samplePage maybeMapMarker row = do case maybeMapMarker of Just mapMarker -> H.script ! A.type_ "text/javascript" $ H.preEscapedToHtml (onloadJS (dataToJSON ((1,0) :: (Int,Int))) (dataToJSON [mapMarker])) Nothing -> pure () - H.h1 (H.toMarkup $ "Sample: " <> jPoseidonID row) + H.h1 (H.toMarkup $ "Sample: " <> show (jPoseidonID row)) case maybeMapMarker of Just _ -> H.div ! A.id "mapid" ! A.style "height: 350px;" $ "" Nothing -> pure () diff --git a/test/Poseidon/JannoSpec.hs b/test/Poseidon/JannoSpec.hs index be79ce9e..ddfef035 100644 --- a/test/Poseidon/JannoSpec.hs +++ b/test/Poseidon/JannoSpec.hs @@ -9,18 +9,22 @@ import Poseidon.Janno (JannoRow (..), JannoRows (..), readJannoFile) import Poseidon.Utils (testLog) +import Control.Monad (forM_) import Country (decodeAlphaTwo) import qualified Data.Csv as C import Data.HashMap.Strict (fromList) +import qualified Data.Text as T import SequenceFormats.Eigenstrat (Sex (..)) import System.FilePath (()) -import Test.Hspec (Spec, anyException, describe, it, - shouldBe, shouldThrow) +import Test.Hspec (Spec, anyException, anyIOException, + describe, it, shouldBe, + shouldThrow) spec :: Spec spec = do testEnAndDecoding testPoseidonSampleFromJannoFile + testIllegalCharacterCheck testEnAndDecoding :: Spec testEnAndDecoding = describe "Poseidon.Janno: JSON and CSV en- and decoding" $ do @@ -57,6 +61,23 @@ checkEnDe xs = cassavaCycle xs `shouldBe` cassavaResult xs cassavaCycle = map (C.runParser . C.parseField . C.toField) cassavaResult = map Right +testIllegalCharacterCheck :: Spec +testIllegalCharacterCheck = describe "Poseidon.Janno: illegal character check" $ do + it "should identify illegal characters in Poseidon IDs" $ do + let illegalIDs = ["invàlïd", "in valid", "invalid,comma", "invalid;semi", "invalid\nnewline", "invalid\rcarriage", "invalid\"quote"] + let legalIDs = ["validID1", "valid_ID-2"] + forM_ illegalIDs $ \pid -> do + parsePoseidonID pid `shouldThrow` anyIOException + parseGroupName pid `shouldThrow` anyIOException + forM_ legalIDs $ \pid -> do + _ <- parsePoseidonID pid + _ <- parseGroupName pid + return () + where + parsePoseidonID :: T.Text -> IO PoseidonID + parsePoseidonID = make + parseGroupName :: T.Text -> IO GroupName + parseGroupName = make testPoseidonSampleFromJannoFile :: Spec testPoseidonSampleFromJannoFile = describe "Poseidon.Janno.readJannoFile" $ do @@ -81,7 +102,7 @@ testPoseidonSampleFromJannoFile = describe "Poseidon.Janno.readJannoFile" $ do map jDateType janno `shouldBe` [Nothing, Nothing, Nothing] map jCaptureType janno `shouldBe` [Nothing, Nothing, Nothing] map jGenotypePloidy janno `shouldBe` [Nothing, Nothing, Nothing] - map jGroupName janno `shouldBe` [ListColumn [GroupName "POP1"], ListColumn [GroupName "POP2"], ListColumn [GroupName "POP1"]] + map jGroupName janno `shouldBe` [ListColumn ["POP1"], ListColumn ["POP2"], ListColumn ["POP1"]] map jGeneticSex janno `shouldBe` [GeneticSex Male, GeneticSex Female, GeneticSex Male] map jCoverageOnTargets janno `shouldBe` [Nothing, Nothing, Nothing] map jUDG janno `shouldBe` [Nothing, Nothing, Nothing]