{-# LANGUAGE DeriveGeneric       #-}
{-# LANGUAGE OverloadedStrings   #-}
{-# LANGUAGE ScopedTypeVariables #-}
-- the following ones are necessary for the generics-sop magic (deriveGeneric)
{-# LANGUAGE DataKinds           #-}
{-# LANGUAGE TemplateHaskell     #-}
{-# LANGUAGE TypeFamilies        #-}

module Poseidon.SequencingSource where

import           Poseidon.ColumnTypesSSF
import           Poseidon.ColumnTypesUtils
import           Poseidon.Utils

import           Control.Exception          (throwIO)
import           Control.Monad              (unless, when)
import qualified Control.Monad              as OP
import           Control.Monad.IO.Class     (liftIO)
import           Data.Bifunctor             (second)
import qualified Data.ByteString.Char8      as Bchs
import qualified Data.ByteString.Lazy.Char8 as Bch
import qualified Data.Csv                   as Csv
import           Data.Either                (lefts, rights)
import qualified Data.HashMap.Strict        as HM
import           Data.List                  (foldl', nub, sort)
import           Data.Maybe                 (catMaybes, isJust, mapMaybe)
import qualified Data.Vector                as V
import           Generics.SOP.TH            (deriveGeneric)
import           GHC.Generics               (Generic)
import qualified Text.Parsec                as P

-- | A data type to represent a seqSourceFile
newtype SeqSourceRows = SeqSourceRows {SeqSourceRows -> [SeqSourceRow]
getSeqSourceRowList :: [SeqSourceRow]}
    deriving (Int -> SeqSourceRows -> ShowS
[SeqSourceRows] -> ShowS
SeqSourceRows -> [Char]
(Int -> SeqSourceRows -> ShowS)
-> (SeqSourceRows -> [Char])
-> ([SeqSourceRows] -> ShowS)
-> Show SeqSourceRows
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> SeqSourceRows -> ShowS
showsPrec :: Int -> SeqSourceRows -> ShowS
$cshow :: SeqSourceRows -> [Char]
show :: SeqSourceRows -> [Char]
$cshowList :: [SeqSourceRows] -> ShowS
showList :: [SeqSourceRows] -> ShowS
Show, SeqSourceRows -> SeqSourceRows -> Bool
(SeqSourceRows -> SeqSourceRows -> Bool)
-> (SeqSourceRows -> SeqSourceRows -> Bool) -> Eq SeqSourceRows
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: SeqSourceRows -> SeqSourceRows -> Bool
== :: SeqSourceRows -> SeqSourceRows -> Bool
$c/= :: SeqSourceRows -> SeqSourceRows -> Bool
/= :: SeqSourceRows -> SeqSourceRows -> Bool
Eq, (forall x. SeqSourceRows -> Rep SeqSourceRows x)
-> (forall x. Rep SeqSourceRows x -> SeqSourceRows)
-> Generic SeqSourceRows
forall x. Rep SeqSourceRows x -> SeqSourceRows
forall x. SeqSourceRows -> Rep SeqSourceRows x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. SeqSourceRows -> Rep SeqSourceRows x
from :: forall x. SeqSourceRows -> Rep SeqSourceRows x
$cto :: forall x. Rep SeqSourceRows x -> SeqSourceRows
to :: forall x. Rep SeqSourceRows x -> SeqSourceRows
Generic)

instance Semigroup SeqSourceRows where
    (SeqSourceRows [SeqSourceRow]
j1) <> :: SeqSourceRows -> SeqSourceRows -> SeqSourceRows
<> (SeqSourceRows [SeqSourceRow]
j2) = [SeqSourceRow] -> SeqSourceRows
SeqSourceRows ([SeqSourceRow] -> SeqSourceRows)
-> [SeqSourceRow] -> SeqSourceRows
forall a b. (a -> b) -> a -> b
$ [SeqSourceRow]
j1 [SeqSourceRow] -> [SeqSourceRow] -> [SeqSourceRow]
`combineTwoSeqSources` [SeqSourceRow]
j2
        where
        combineTwoSeqSources :: [SeqSourceRow] -> [SeqSourceRow] -> [SeqSourceRow]
        combineTwoSeqSources :: [SeqSourceRow] -> [SeqSourceRow] -> [SeqSourceRow]
combineTwoSeqSources [SeqSourceRow]
seqSource1 [SeqSourceRow]
seqSource2 =
            let simpleSeqSourceSum :: [SeqSourceRow]
simpleSeqSourceSum = [SeqSourceRow]
seqSource1 [SeqSourceRow] -> [SeqSourceRow] -> [SeqSourceRow]
forall a. [a] -> [a] -> [a]
++ [SeqSourceRow]
seqSource2
                toAddColNames :: [ByteString]
toAddColNames = HashMap ByteString ByteString -> [ByteString]
forall k v. HashMap k v -> [k]
HM.keys ([HashMap ByteString ByteString] -> HashMap ByteString ByteString
forall k v. Eq k => [HashMap k v] -> HashMap k v
HM.unions ((SeqSourceRow -> HashMap ByteString ByteString)
-> [SeqSourceRow] -> [HashMap ByteString ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (CsvNamedRecord -> HashMap ByteString ByteString
getCsvNR (CsvNamedRecord -> HashMap ByteString ByteString)
-> (SeqSourceRow -> CsvNamedRecord)
-> SeqSourceRow
-> HashMap ByteString ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SeqSourceRow -> CsvNamedRecord
sAdditionalColumns) [SeqSourceRow]
simpleSeqSourceSum))
                toAddEmptyCols :: HashMap ByteString ByteString
toAddEmptyCols = [(ByteString, ByteString)] -> HashMap ByteString ByteString
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HM.fromList ((ByteString -> (ByteString, ByteString))
-> [ByteString] -> [(ByteString, ByteString)]
forall a b. (a -> b) -> [a] -> [b]
map (\ByteString
k -> (ByteString
k, ByteString
"n/a")) [ByteString]
toAddColNames)
            in (SeqSourceRow -> SeqSourceRow) -> [SeqSourceRow] -> [SeqSourceRow]
forall a b. (a -> b) -> [a] -> [b]
map (HashMap ByteString ByteString -> SeqSourceRow -> SeqSourceRow
addEmptyAddColsToSeqSourceRow HashMap ByteString ByteString
toAddEmptyCols) [SeqSourceRow]
simpleSeqSourceSum
        addEmptyAddColsToSeqSourceRow :: Csv.NamedRecord -> SeqSourceRow -> SeqSourceRow
        addEmptyAddColsToSeqSourceRow :: HashMap ByteString ByteString -> SeqSourceRow -> SeqSourceRow
addEmptyAddColsToSeqSourceRow HashMap ByteString ByteString
toAdd SeqSourceRow
x =
            SeqSourceRow
x { sAdditionalColumns = CsvNamedRecord $ fillAddCols toAdd (getCsvNR $ sAdditionalColumns x) }
        fillAddCols :: Csv.NamedRecord -> Csv.NamedRecord -> Csv.NamedRecord
        fillAddCols :: HashMap ByteString ByteString
-> HashMap ByteString ByteString -> HashMap ByteString ByteString
fillAddCols HashMap ByteString ByteString
toAdd HashMap ByteString ByteString
cur = HashMap ByteString ByteString
-> HashMap ByteString ByteString -> HashMap ByteString ByteString
forall k v. Eq k => HashMap k v -> HashMap k v -> HashMap k v
HM.union HashMap ByteString ByteString
cur (HashMap ByteString ByteString
toAdd HashMap ByteString ByteString
-> HashMap ByteString ByteString -> HashMap ByteString ByteString
forall k v w.
(Eq k, Hashable k) =>
HashMap k v -> HashMap k w -> HashMap k v
`HM.difference` HashMap ByteString ByteString
cur)

instance Monoid SeqSourceRows where
    mempty :: SeqSourceRows
mempty = [SeqSourceRow] -> SeqSourceRows
SeqSourceRows []
    mconcat :: [SeqSourceRows] -> SeqSourceRows
mconcat = (SeqSourceRows -> SeqSourceRows -> SeqSourceRows)
-> SeqSourceRows -> [SeqSourceRows] -> SeqSourceRows
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' SeqSourceRows -> SeqSourceRows -> SeqSourceRows
forall a. Monoid a => a -> a -> a
mappend SeqSourceRows
forall a. Monoid a => a
mempty

-- | A data type to represent a row in the seqSourceFile
-- See https://github.com/poseidon-framework/poseidon2-schema/blob/master/seqSourceFile_columns.tsv
-- for more details
data SeqSourceRow = SeqSourceRow
    { SeqSourceRow -> Maybe (ListColumn [Char])
sPoseidonID               :: Maybe (ListColumn String)
    , SeqSourceRow -> Maybe SSFUDG
sUDG                      :: Maybe SSFUDG
    , SeqSourceRow -> Maybe SSFLibraryBuilt
sLibraryBuilt             :: Maybe SSFLibraryBuilt
    , SeqSourceRow -> Maybe SSFAccessionIDSample
sSampleAccession          :: Maybe SSFAccessionIDSample
    , SeqSourceRow -> Maybe SSFAccessionIDStudy
sStudyAccession           :: Maybe SSFAccessionIDStudy
    , SeqSourceRow -> Maybe SSFAccessionIDRun
sRunAccession             :: Maybe SSFAccessionIDRun
    , SeqSourceRow -> Maybe SSFSampleAlias
sSampleAlias              :: Maybe SSFSampleAlias
    , SeqSourceRow -> Maybe SSFSecondarySampleAccession
sSecondarySampleAccession :: Maybe SSFSecondarySampleAccession
    , SeqSourceRow -> Maybe SSFFirstPublicSimpleDate
sFirstPublic              :: Maybe SSFFirstPublicSimpleDate
    , SeqSourceRow -> Maybe SSFLastUpdatedSimpleDate
sLastUpdated              :: Maybe SSFLastUpdatedSimpleDate
    , SeqSourceRow -> Maybe SSFInstrumentModel
sInstrumentModel          :: Maybe SSFInstrumentModel
    , SeqSourceRow -> Maybe SSFLibraryLayout
sLibraryLayout            :: Maybe SSFLibraryLayout
    , SeqSourceRow -> Maybe SSFLibrarySource
sLibrarySource            :: Maybe SSFLibrarySource
    , SeqSourceRow -> Maybe SSFInstrumentPlatform
sInstrumentPlatform       :: Maybe SSFInstrumentPlatform
    , SeqSourceRow -> Maybe SSFLibraryName
sLibraryName              :: Maybe SSFLibraryName
    , SeqSourceRow -> Maybe SSFLibraryStrategy
sLibraryStrategy          :: Maybe SSFLibraryStrategy
    , SeqSourceRow -> Maybe (ListColumn SSFFastqFTPURI)
sFastqFTP                 :: Maybe (ListColumn SSFFastqFTPURI)
    , SeqSourceRow -> Maybe (ListColumn SSFFastqASPERAURI)
sFastqASPERA              :: Maybe (ListColumn SSFFastqASPERAURI)
    -- integer, not int, because it can be a very large number
    , SeqSourceRow -> Maybe (ListColumn SSFFastqBytes)
sFastqBytes               :: Maybe (ListColumn SSFFastqBytes)
    , SeqSourceRow -> Maybe (ListColumn SSFFastqMD5)
sFastqMD5                 :: Maybe (ListColumn SSFFastqMD5)
    -- integer, not int, because it can be a very large number
    , SeqSourceRow -> Maybe SSFReadCount
sReadCount                :: Maybe SSFReadCount
    , SeqSourceRow -> Maybe (ListColumn SSFSubmittedFTPURI)
sSubmittedFTP             :: Maybe (ListColumn SSFSubmittedFTPURI)
    , SeqSourceRow -> CsvNamedRecord
sAdditionalColumns        :: CsvNamedRecord
    }
    deriving (Int -> SeqSourceRow -> ShowS
[SeqSourceRow] -> ShowS
SeqSourceRow -> [Char]
(Int -> SeqSourceRow -> ShowS)
-> (SeqSourceRow -> [Char])
-> ([SeqSourceRow] -> ShowS)
-> Show SeqSourceRow
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> SeqSourceRow -> ShowS
showsPrec :: Int -> SeqSourceRow -> ShowS
$cshow :: SeqSourceRow -> [Char]
show :: SeqSourceRow -> [Char]
$cshowList :: [SeqSourceRow] -> ShowS
showList :: [SeqSourceRow] -> ShowS
Show, SeqSourceRow -> SeqSourceRow -> Bool
(SeqSourceRow -> SeqSourceRow -> Bool)
-> (SeqSourceRow -> SeqSourceRow -> Bool) -> Eq SeqSourceRow
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: SeqSourceRow -> SeqSourceRow -> Bool
== :: SeqSourceRow -> SeqSourceRow -> Bool
$c/= :: SeqSourceRow -> SeqSourceRow -> Bool
/= :: SeqSourceRow -> SeqSourceRow -> Bool
Eq, (forall x. SeqSourceRow -> Rep SeqSourceRow x)
-> (forall x. Rep SeqSourceRow x -> SeqSourceRow)
-> Generic SeqSourceRow
forall x. Rep SeqSourceRow x -> SeqSourceRow
forall x. SeqSourceRow -> Rep SeqSourceRow x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cfrom :: forall x. SeqSourceRow -> Rep SeqSourceRow x
from :: forall x. SeqSourceRow -> Rep SeqSourceRow x
$cto :: forall x. Rep SeqSourceRow x -> SeqSourceRow
to :: forall x. Rep SeqSourceRow x -> SeqSourceRow
Generic)

-- deriving with TemplateHaskell necessary for the generics magic
deriveGeneric ''SeqSourceRow

-- This header also defines the output column order when writing to csv!
seqSourceHeader :: [Bchs.ByteString]
seqSourceHeader :: [ByteString]
seqSourceHeader = [
      ByteString
"poseidon_IDs"
    , ByteString
"udg"
    , ByteString
"library_built"
    , ByteString
"sample_accession"
    , ByteString
"study_accession"
    , ByteString
"run_accession"
    , ByteString
"sample_alias"
    , ByteString
"secondary_sample_accession"
    , ByteString
"first_public"
    , ByteString
"last_updated"
    , ByteString
"instrument_model"
    , ByteString
"library_layout"
    , ByteString
"library_source"
    , ByteString
"instrument_platform"
    , ByteString
"library_name"
    , ByteString
"library_strategy"
    , ByteString
"fastq_ftp"
    , ByteString
"fastq_aspera"
    , ByteString
"fastq_bytes"
    , ByteString
"fastq_md5"
    , ByteString
"read_count"
    , ByteString
"submitted_ftp"
    ]

instance Csv.DefaultOrdered SeqSourceRow where
    headerOrder :: SeqSourceRow -> Header
headerOrder SeqSourceRow
_ = [ByteString] -> Header
Csv.header [ByteString]
seqSourceHeader

seqSourceHeaderString :: [String]
seqSourceHeaderString :: [[Char]]
seqSourceHeaderString = (ByteString -> [Char]) -> [ByteString] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map ByteString -> [Char]
Bchs.unpack [ByteString]
seqSourceHeader

-- This hashmap represents an empty seqSourceFile with all normal, specified columns
seqSourceRefHashMap :: HM.HashMap Bchs.ByteString ()
seqSourceRefHashMap :: HashMap ByteString ()
seqSourceRefHashMap = [(ByteString, ())] -> HashMap ByteString ()
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HM.fromList ([(ByteString, ())] -> HashMap ByteString ())
-> [(ByteString, ())] -> HashMap ByteString ()
forall a b. (a -> b) -> a -> b
$ (ByteString -> (ByteString, ()))
-> [ByteString] -> [(ByteString, ())]
forall a b. (a -> b) -> [a] -> [b]
map (\ByteString
x -> (ByteString
x, ())) [ByteString]
seqSourceHeader

instance Csv.FromNamedRecord SeqSourceRow where
    parseNamedRecord :: HashMap ByteString ByteString -> Parser SeqSourceRow
parseNamedRecord HashMap ByteString ByteString
m = Maybe (ListColumn [Char])
-> Maybe SSFUDG
-> Maybe SSFLibraryBuilt
-> Maybe SSFAccessionIDSample
-> Maybe SSFAccessionIDStudy
-> Maybe SSFAccessionIDRun
-> Maybe SSFSampleAlias
-> Maybe SSFSecondarySampleAccession
-> Maybe SSFFirstPublicSimpleDate
-> Maybe SSFLastUpdatedSimpleDate
-> Maybe SSFInstrumentModel
-> Maybe SSFLibraryLayout
-> Maybe SSFLibrarySource
-> Maybe SSFInstrumentPlatform
-> Maybe SSFLibraryName
-> Maybe SSFLibraryStrategy
-> Maybe (ListColumn SSFFastqFTPURI)
-> Maybe (ListColumn SSFFastqASPERAURI)
-> Maybe (ListColumn SSFFastqBytes)
-> Maybe (ListColumn SSFFastqMD5)
-> Maybe SSFReadCount
-> Maybe (ListColumn SSFSubmittedFTPURI)
-> CsvNamedRecord
-> SeqSourceRow
SeqSourceRow
        (Maybe (ListColumn [Char])
 -> Maybe SSFUDG
 -> Maybe SSFLibraryBuilt
 -> Maybe SSFAccessionIDSample
 -> Maybe SSFAccessionIDStudy
 -> Maybe SSFAccessionIDRun
 -> Maybe SSFSampleAlias
 -> Maybe SSFSecondarySampleAccession
 -> Maybe SSFFirstPublicSimpleDate
 -> Maybe SSFLastUpdatedSimpleDate
 -> Maybe SSFInstrumentModel
 -> Maybe SSFLibraryLayout
 -> Maybe SSFLibrarySource
 -> Maybe SSFInstrumentPlatform
 -> Maybe SSFLibraryName
 -> Maybe SSFLibraryStrategy
 -> Maybe (ListColumn SSFFastqFTPURI)
 -> Maybe (ListColumn SSFFastqASPERAURI)
 -> Maybe (ListColumn SSFFastqBytes)
 -> Maybe (ListColumn SSFFastqMD5)
 -> Maybe SSFReadCount
 -> Maybe (ListColumn SSFSubmittedFTPURI)
 -> CsvNamedRecord
 -> SeqSourceRow)
-> Parser (Maybe (ListColumn [Char]))
-> Parser
     (Maybe SSFUDG
      -> Maybe SSFLibraryBuilt
      -> Maybe SSFAccessionIDSample
      -> Maybe SSFAccessionIDStudy
      -> Maybe SSFAccessionIDRun
      -> Maybe SSFSampleAlias
      -> Maybe SSFSecondarySampleAccession
      -> Maybe SSFFirstPublicSimpleDate
      -> Maybe SSFLastUpdatedSimpleDate
      -> Maybe SSFInstrumentModel
      -> Maybe SSFLibraryLayout
      -> Maybe SSFLibrarySource
      -> Maybe SSFInstrumentPlatform
      -> Maybe SSFLibraryName
      -> Maybe SSFLibraryStrategy
      -> Maybe (ListColumn SSFFastqFTPURI)
      -> Maybe (ListColumn SSFFastqASPERAURI)
      -> Maybe (ListColumn SSFFastqBytes)
      -> Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe (ListColumn [Char]))
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"poseidon_IDs"
        Parser
  (Maybe SSFUDG
   -> Maybe SSFLibraryBuilt
   -> Maybe SSFAccessionIDSample
   -> Maybe SSFAccessionIDStudy
   -> Maybe SSFAccessionIDRun
   -> Maybe SSFSampleAlias
   -> Maybe SSFSecondarySampleAccession
   -> Maybe SSFFirstPublicSimpleDate
   -> Maybe SSFLastUpdatedSimpleDate
   -> Maybe SSFInstrumentModel
   -> Maybe SSFLibraryLayout
   -> Maybe SSFLibrarySource
   -> Maybe SSFInstrumentPlatform
   -> Maybe SSFLibraryName
   -> Maybe SSFLibraryStrategy
   -> Maybe (ListColumn SSFFastqFTPURI)
   -> Maybe (ListColumn SSFFastqASPERAURI)
   -> Maybe (ListColumn SSFFastqBytes)
   -> Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe SSFUDG)
-> Parser
     (Maybe SSFLibraryBuilt
      -> Maybe SSFAccessionIDSample
      -> Maybe SSFAccessionIDStudy
      -> Maybe SSFAccessionIDRun
      -> Maybe SSFSampleAlias
      -> Maybe SSFSecondarySampleAccession
      -> Maybe SSFFirstPublicSimpleDate
      -> Maybe SSFLastUpdatedSimpleDate
      -> Maybe SSFInstrumentModel
      -> Maybe SSFLibraryLayout
      -> Maybe SSFLibrarySource
      -> Maybe SSFInstrumentPlatform
      -> Maybe SSFLibraryName
      -> Maybe SSFLibraryStrategy
      -> Maybe (ListColumn SSFFastqFTPURI)
      -> Maybe (ListColumn SSFFastqASPERAURI)
      -> Maybe (ListColumn SSFFastqBytes)
      -> Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe SSFUDG)
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"udg"
        Parser
  (Maybe SSFLibraryBuilt
   -> Maybe SSFAccessionIDSample
   -> Maybe SSFAccessionIDStudy
   -> Maybe SSFAccessionIDRun
   -> Maybe SSFSampleAlias
   -> Maybe SSFSecondarySampleAccession
   -> Maybe SSFFirstPublicSimpleDate
   -> Maybe SSFLastUpdatedSimpleDate
   -> Maybe SSFInstrumentModel
   -> Maybe SSFLibraryLayout
   -> Maybe SSFLibrarySource
   -> Maybe SSFInstrumentPlatform
   -> Maybe SSFLibraryName
   -> Maybe SSFLibraryStrategy
   -> Maybe (ListColumn SSFFastqFTPURI)
   -> Maybe (ListColumn SSFFastqASPERAURI)
   -> Maybe (ListColumn SSFFastqBytes)
   -> Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe SSFLibraryBuilt)
-> Parser
     (Maybe SSFAccessionIDSample
      -> Maybe SSFAccessionIDStudy
      -> Maybe SSFAccessionIDRun
      -> Maybe SSFSampleAlias
      -> Maybe SSFSecondarySampleAccession
      -> Maybe SSFFirstPublicSimpleDate
      -> Maybe SSFLastUpdatedSimpleDate
      -> Maybe SSFInstrumentModel
      -> Maybe SSFLibraryLayout
      -> Maybe SSFLibrarySource
      -> Maybe SSFInstrumentPlatform
      -> Maybe SSFLibraryName
      -> Maybe SSFLibraryStrategy
      -> Maybe (ListColumn SSFFastqFTPURI)
      -> Maybe (ListColumn SSFFastqASPERAURI)
      -> Maybe (ListColumn SSFFastqBytes)
      -> Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe SSFLibraryBuilt)
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"library_built"
        Parser
  (Maybe SSFAccessionIDSample
   -> Maybe SSFAccessionIDStudy
   -> Maybe SSFAccessionIDRun
   -> Maybe SSFSampleAlias
   -> Maybe SSFSecondarySampleAccession
   -> Maybe SSFFirstPublicSimpleDate
   -> Maybe SSFLastUpdatedSimpleDate
   -> Maybe SSFInstrumentModel
   -> Maybe SSFLibraryLayout
   -> Maybe SSFLibrarySource
   -> Maybe SSFInstrumentPlatform
   -> Maybe SSFLibraryName
   -> Maybe SSFLibraryStrategy
   -> Maybe (ListColumn SSFFastqFTPURI)
   -> Maybe (ListColumn SSFFastqASPERAURI)
   -> Maybe (ListColumn SSFFastqBytes)
   -> Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe SSFAccessionIDSample)
-> Parser
     (Maybe SSFAccessionIDStudy
      -> Maybe SSFAccessionIDRun
      -> Maybe SSFSampleAlias
      -> Maybe SSFSecondarySampleAccession
      -> Maybe SSFFirstPublicSimpleDate
      -> Maybe SSFLastUpdatedSimpleDate
      -> Maybe SSFInstrumentModel
      -> Maybe SSFLibraryLayout
      -> Maybe SSFLibrarySource
      -> Maybe SSFInstrumentPlatform
      -> Maybe SSFLibraryName
      -> Maybe SSFLibraryStrategy
      -> Maybe (ListColumn SSFFastqFTPURI)
      -> Maybe (ListColumn SSFFastqASPERAURI)
      -> Maybe (ListColumn SSFFastqBytes)
      -> Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe SSFAccessionIDSample)
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"sample_accession"
        Parser
  (Maybe SSFAccessionIDStudy
   -> Maybe SSFAccessionIDRun
   -> Maybe SSFSampleAlias
   -> Maybe SSFSecondarySampleAccession
   -> Maybe SSFFirstPublicSimpleDate
   -> Maybe SSFLastUpdatedSimpleDate
   -> Maybe SSFInstrumentModel
   -> Maybe SSFLibraryLayout
   -> Maybe SSFLibrarySource
   -> Maybe SSFInstrumentPlatform
   -> Maybe SSFLibraryName
   -> Maybe SSFLibraryStrategy
   -> Maybe (ListColumn SSFFastqFTPURI)
   -> Maybe (ListColumn SSFFastqASPERAURI)
   -> Maybe (ListColumn SSFFastqBytes)
   -> Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe SSFAccessionIDStudy)
-> Parser
     (Maybe SSFAccessionIDRun
      -> Maybe SSFSampleAlias
      -> Maybe SSFSecondarySampleAccession
      -> Maybe SSFFirstPublicSimpleDate
      -> Maybe SSFLastUpdatedSimpleDate
      -> Maybe SSFInstrumentModel
      -> Maybe SSFLibraryLayout
      -> Maybe SSFLibrarySource
      -> Maybe SSFInstrumentPlatform
      -> Maybe SSFLibraryName
      -> Maybe SSFLibraryStrategy
      -> Maybe (ListColumn SSFFastqFTPURI)
      -> Maybe (ListColumn SSFFastqASPERAURI)
      -> Maybe (ListColumn SSFFastqBytes)
      -> Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe SSFAccessionIDStudy)
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"study_accession"
        Parser
  (Maybe SSFAccessionIDRun
   -> Maybe SSFSampleAlias
   -> Maybe SSFSecondarySampleAccession
   -> Maybe SSFFirstPublicSimpleDate
   -> Maybe SSFLastUpdatedSimpleDate
   -> Maybe SSFInstrumentModel
   -> Maybe SSFLibraryLayout
   -> Maybe SSFLibrarySource
   -> Maybe SSFInstrumentPlatform
   -> Maybe SSFLibraryName
   -> Maybe SSFLibraryStrategy
   -> Maybe (ListColumn SSFFastqFTPURI)
   -> Maybe (ListColumn SSFFastqASPERAURI)
   -> Maybe (ListColumn SSFFastqBytes)
   -> Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe SSFAccessionIDRun)
-> Parser
     (Maybe SSFSampleAlias
      -> Maybe SSFSecondarySampleAccession
      -> Maybe SSFFirstPublicSimpleDate
      -> Maybe SSFLastUpdatedSimpleDate
      -> Maybe SSFInstrumentModel
      -> Maybe SSFLibraryLayout
      -> Maybe SSFLibrarySource
      -> Maybe SSFInstrumentPlatform
      -> Maybe SSFLibraryName
      -> Maybe SSFLibraryStrategy
      -> Maybe (ListColumn SSFFastqFTPURI)
      -> Maybe (ListColumn SSFFastqASPERAURI)
      -> Maybe (ListColumn SSFFastqBytes)
      -> Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe SSFAccessionIDRun)
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser a
filterLookup         HashMap ByteString ByteString
m ByteString
"run_accession"
        Parser
  (Maybe SSFSampleAlias
   -> Maybe SSFSecondarySampleAccession
   -> Maybe SSFFirstPublicSimpleDate
   -> Maybe SSFLastUpdatedSimpleDate
   -> Maybe SSFInstrumentModel
   -> Maybe SSFLibraryLayout
   -> Maybe SSFLibrarySource
   -> Maybe SSFInstrumentPlatform
   -> Maybe SSFLibraryName
   -> Maybe SSFLibraryStrategy
   -> Maybe (ListColumn SSFFastqFTPURI)
   -> Maybe (ListColumn SSFFastqASPERAURI)
   -> Maybe (ListColumn SSFFastqBytes)
   -> Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe SSFSampleAlias)
-> Parser
     (Maybe SSFSecondarySampleAccession
      -> Maybe SSFFirstPublicSimpleDate
      -> Maybe SSFLastUpdatedSimpleDate
      -> Maybe SSFInstrumentModel
      -> Maybe SSFLibraryLayout
      -> Maybe SSFLibrarySource
      -> Maybe SSFInstrumentPlatform
      -> Maybe SSFLibraryName
      -> Maybe SSFLibraryStrategy
      -> Maybe (ListColumn SSFFastqFTPURI)
      -> Maybe (ListColumn SSFFastqASPERAURI)
      -> Maybe (ListColumn SSFFastqBytes)
      -> Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe SSFSampleAlias)
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"sample_alias"
        Parser
  (Maybe SSFSecondarySampleAccession
   -> Maybe SSFFirstPublicSimpleDate
   -> Maybe SSFLastUpdatedSimpleDate
   -> Maybe SSFInstrumentModel
   -> Maybe SSFLibraryLayout
   -> Maybe SSFLibrarySource
   -> Maybe SSFInstrumentPlatform
   -> Maybe SSFLibraryName
   -> Maybe SSFLibraryStrategy
   -> Maybe (ListColumn SSFFastqFTPURI)
   -> Maybe (ListColumn SSFFastqASPERAURI)
   -> Maybe (ListColumn SSFFastqBytes)
   -> Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe SSFSecondarySampleAccession)
-> Parser
     (Maybe SSFFirstPublicSimpleDate
      -> Maybe SSFLastUpdatedSimpleDate
      -> Maybe SSFInstrumentModel
      -> Maybe SSFLibraryLayout
      -> Maybe SSFLibrarySource
      -> Maybe SSFInstrumentPlatform
      -> Maybe SSFLibraryName
      -> Maybe SSFLibraryStrategy
      -> Maybe (ListColumn SSFFastqFTPURI)
      -> Maybe (ListColumn SSFFastqASPERAURI)
      -> Maybe (ListColumn SSFFastqBytes)
      -> Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe SSFSecondarySampleAccession)
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"secondary_sample_accession"
        Parser
  (Maybe SSFFirstPublicSimpleDate
   -> Maybe SSFLastUpdatedSimpleDate
   -> Maybe SSFInstrumentModel
   -> Maybe SSFLibraryLayout
   -> Maybe SSFLibrarySource
   -> Maybe SSFInstrumentPlatform
   -> Maybe SSFLibraryName
   -> Maybe SSFLibraryStrategy
   -> Maybe (ListColumn SSFFastqFTPURI)
   -> Maybe (ListColumn SSFFastqASPERAURI)
   -> Maybe (ListColumn SSFFastqBytes)
   -> Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe SSFFirstPublicSimpleDate)
-> Parser
     (Maybe SSFLastUpdatedSimpleDate
      -> Maybe SSFInstrumentModel
      -> Maybe SSFLibraryLayout
      -> Maybe SSFLibrarySource
      -> Maybe SSFInstrumentPlatform
      -> Maybe SSFLibraryName
      -> Maybe SSFLibraryStrategy
      -> Maybe (ListColumn SSFFastqFTPURI)
      -> Maybe (ListColumn SSFFastqASPERAURI)
      -> Maybe (ListColumn SSFFastqBytes)
      -> Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe SSFFirstPublicSimpleDate)
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"first_public"
        Parser
  (Maybe SSFLastUpdatedSimpleDate
   -> Maybe SSFInstrumentModel
   -> Maybe SSFLibraryLayout
   -> Maybe SSFLibrarySource
   -> Maybe SSFInstrumentPlatform
   -> Maybe SSFLibraryName
   -> Maybe SSFLibraryStrategy
   -> Maybe (ListColumn SSFFastqFTPURI)
   -> Maybe (ListColumn SSFFastqASPERAURI)
   -> Maybe (ListColumn SSFFastqBytes)
   -> Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe SSFLastUpdatedSimpleDate)
-> Parser
     (Maybe SSFInstrumentModel
      -> Maybe SSFLibraryLayout
      -> Maybe SSFLibrarySource
      -> Maybe SSFInstrumentPlatform
      -> Maybe SSFLibraryName
      -> Maybe SSFLibraryStrategy
      -> Maybe (ListColumn SSFFastqFTPURI)
      -> Maybe (ListColumn SSFFastqASPERAURI)
      -> Maybe (ListColumn SSFFastqBytes)
      -> Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe SSFLastUpdatedSimpleDate)
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"last_updated"
        Parser
  (Maybe SSFInstrumentModel
   -> Maybe SSFLibraryLayout
   -> Maybe SSFLibrarySource
   -> Maybe SSFInstrumentPlatform
   -> Maybe SSFLibraryName
   -> Maybe SSFLibraryStrategy
   -> Maybe (ListColumn SSFFastqFTPURI)
   -> Maybe (ListColumn SSFFastqASPERAURI)
   -> Maybe (ListColumn SSFFastqBytes)
   -> Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe SSFInstrumentModel)
-> Parser
     (Maybe SSFLibraryLayout
      -> Maybe SSFLibrarySource
      -> Maybe SSFInstrumentPlatform
      -> Maybe SSFLibraryName
      -> Maybe SSFLibraryStrategy
      -> Maybe (ListColumn SSFFastqFTPURI)
      -> Maybe (ListColumn SSFFastqASPERAURI)
      -> Maybe (ListColumn SSFFastqBytes)
      -> Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe SSFInstrumentModel)
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"instrument_model"
        Parser
  (Maybe SSFLibraryLayout
   -> Maybe SSFLibrarySource
   -> Maybe SSFInstrumentPlatform
   -> Maybe SSFLibraryName
   -> Maybe SSFLibraryStrategy
   -> Maybe (ListColumn SSFFastqFTPURI)
   -> Maybe (ListColumn SSFFastqASPERAURI)
   -> Maybe (ListColumn SSFFastqBytes)
   -> Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe SSFLibraryLayout)
-> Parser
     (Maybe SSFLibrarySource
      -> Maybe SSFInstrumentPlatform
      -> Maybe SSFLibraryName
      -> Maybe SSFLibraryStrategy
      -> Maybe (ListColumn SSFFastqFTPURI)
      -> Maybe (ListColumn SSFFastqASPERAURI)
      -> Maybe (ListColumn SSFFastqBytes)
      -> Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe SSFLibraryLayout)
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"library_layout"
        Parser
  (Maybe SSFLibrarySource
   -> Maybe SSFInstrumentPlatform
   -> Maybe SSFLibraryName
   -> Maybe SSFLibraryStrategy
   -> Maybe (ListColumn SSFFastqFTPURI)
   -> Maybe (ListColumn SSFFastqASPERAURI)
   -> Maybe (ListColumn SSFFastqBytes)
   -> Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe SSFLibrarySource)
-> Parser
     (Maybe SSFInstrumentPlatform
      -> Maybe SSFLibraryName
      -> Maybe SSFLibraryStrategy
      -> Maybe (ListColumn SSFFastqFTPURI)
      -> Maybe (ListColumn SSFFastqASPERAURI)
      -> Maybe (ListColumn SSFFastqBytes)
      -> Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe SSFLibrarySource)
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"library_source"
        Parser
  (Maybe SSFInstrumentPlatform
   -> Maybe SSFLibraryName
   -> Maybe SSFLibraryStrategy
   -> Maybe (ListColumn SSFFastqFTPURI)
   -> Maybe (ListColumn SSFFastqASPERAURI)
   -> Maybe (ListColumn SSFFastqBytes)
   -> Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe SSFInstrumentPlatform)
-> Parser
     (Maybe SSFLibraryName
      -> Maybe SSFLibraryStrategy
      -> Maybe (ListColumn SSFFastqFTPURI)
      -> Maybe (ListColumn SSFFastqASPERAURI)
      -> Maybe (ListColumn SSFFastqBytes)
      -> Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe SSFInstrumentPlatform)
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"instrument_platform"
        Parser
  (Maybe SSFLibraryName
   -> Maybe SSFLibraryStrategy
   -> Maybe (ListColumn SSFFastqFTPURI)
   -> Maybe (ListColumn SSFFastqASPERAURI)
   -> Maybe (ListColumn SSFFastqBytes)
   -> Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe SSFLibraryName)
-> Parser
     (Maybe SSFLibraryStrategy
      -> Maybe (ListColumn SSFFastqFTPURI)
      -> Maybe (ListColumn SSFFastqASPERAURI)
      -> Maybe (ListColumn SSFFastqBytes)
      -> Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe SSFLibraryName)
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"library_name"
        Parser
  (Maybe SSFLibraryStrategy
   -> Maybe (ListColumn SSFFastqFTPURI)
   -> Maybe (ListColumn SSFFastqASPERAURI)
   -> Maybe (ListColumn SSFFastqBytes)
   -> Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe SSFLibraryStrategy)
-> Parser
     (Maybe (ListColumn SSFFastqFTPURI)
      -> Maybe (ListColumn SSFFastqASPERAURI)
      -> Maybe (ListColumn SSFFastqBytes)
      -> Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe SSFLibraryStrategy)
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"library_strategy"
        Parser
  (Maybe (ListColumn SSFFastqFTPURI)
   -> Maybe (ListColumn SSFFastqASPERAURI)
   -> Maybe (ListColumn SSFFastqBytes)
   -> Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe (ListColumn SSFFastqFTPURI))
-> Parser
     (Maybe (ListColumn SSFFastqASPERAURI)
      -> Maybe (ListColumn SSFFastqBytes)
      -> Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe (ListColumn SSFFastqFTPURI))
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"fastq_ftp"
        Parser
  (Maybe (ListColumn SSFFastqASPERAURI)
   -> Maybe (ListColumn SSFFastqBytes)
   -> Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe (ListColumn SSFFastqASPERAURI))
-> Parser
     (Maybe (ListColumn SSFFastqBytes)
      -> Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe (ListColumn SSFFastqASPERAURI))
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"fastq_aspera"
        Parser
  (Maybe (ListColumn SSFFastqBytes)
   -> Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe (ListColumn SSFFastqBytes))
-> Parser
     (Maybe (ListColumn SSFFastqMD5)
      -> Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe (ListColumn SSFFastqBytes))
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"fastq_bytes"
        Parser
  (Maybe (ListColumn SSFFastqMD5)
   -> Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe (ListColumn SSFFastqMD5))
-> Parser
     (Maybe SSFReadCount
      -> Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord
      -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe (ListColumn SSFFastqMD5))
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"fastq_md5"
        Parser
  (Maybe SSFReadCount
   -> Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord
   -> SeqSourceRow)
-> Parser (Maybe SSFReadCount)
-> Parser
     (Maybe (ListColumn SSFSubmittedFTPURI)
      -> CsvNamedRecord -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe SSFReadCount)
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"read_count"
        Parser
  (Maybe (ListColumn SSFSubmittedFTPURI)
   -> CsvNamedRecord -> SeqSourceRow)
-> Parser (Maybe (ListColumn SSFSubmittedFTPURI))
-> Parser (CsvNamedRecord -> SeqSourceRow)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> HashMap ByteString ByteString
-> ByteString -> Parser (Maybe (ListColumn SSFSubmittedFTPURI))
forall a.
FromField a =>
HashMap ByteString ByteString -> ByteString -> Parser (Maybe a)
filterLookupOptional HashMap ByteString ByteString
m ByteString
"submitted_ftp"
        -- beyond that read everything that is not in the set of defined variables
        -- as a separate hashmap
        Parser (CsvNamedRecord -> SeqSourceRow)
-> Parser CsvNamedRecord -> Parser SeqSourceRow
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> CsvNamedRecord -> Parser CsvNamedRecord
forall a. a -> Parser a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (HashMap ByteString ByteString -> CsvNamedRecord
CsvNamedRecord (HashMap ByteString ByteString
m HashMap ByteString ByteString
-> HashMap ByteString () -> HashMap ByteString ByteString
forall k v w.
(Eq k, Hashable k) =>
HashMap k v -> HashMap k w -> HashMap k v
`HM.difference` HashMap ByteString ()
seqSourceRefHashMap))

instance Csv.ToNamedRecord SeqSourceRow where
    toNamedRecord :: SeqSourceRow -> HashMap ByteString ByteString
toNamedRecord SeqSourceRow
s = HashMap ByteString ByteString -> HashMap ByteString ByteString
explicitNA (HashMap ByteString ByteString -> HashMap ByteString ByteString)
-> HashMap ByteString ByteString -> HashMap ByteString ByteString
forall a b. (a -> b) -> a -> b
$ [(ByteString, ByteString)] -> HashMap ByteString ByteString
Csv.namedRecord [
          ByteString
"poseidon_IDs"               ByteString -> Maybe (ListColumn [Char]) -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe (ListColumn [Char])
sPoseidonID SeqSourceRow
s
        , ByteString
"udg"                        ByteString -> Maybe SSFUDG -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe SSFUDG
sUDG SeqSourceRow
s
        , ByteString
"library_built"              ByteString -> Maybe SSFLibraryBuilt -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe SSFLibraryBuilt
sLibraryBuilt SeqSourceRow
s
        , ByteString
"sample_accession"           ByteString
-> Maybe SSFAccessionIDSample -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe SSFAccessionIDSample
sSampleAccession SeqSourceRow
s
        , ByteString
"study_accession"            ByteString -> Maybe SSFAccessionIDStudy -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe SSFAccessionIDStudy
sStudyAccession SeqSourceRow
s
        , ByteString
"run_accession"              ByteString -> Maybe SSFAccessionIDRun -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe SSFAccessionIDRun
sRunAccession SeqSourceRow
s
        , ByteString
"sample_alias"               ByteString -> Maybe SSFSampleAlias -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe SSFSampleAlias
sSampleAlias SeqSourceRow
s
        , ByteString
"secondary_sample_accession" ByteString
-> Maybe SSFSecondarySampleAccession -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe SSFSecondarySampleAccession
sSecondarySampleAccession SeqSourceRow
s
        , ByteString
"first_public"               ByteString
-> Maybe SSFFirstPublicSimpleDate -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe SSFFirstPublicSimpleDate
sFirstPublic SeqSourceRow
s
        , ByteString
"last_updated"               ByteString
-> Maybe SSFLastUpdatedSimpleDate -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe SSFLastUpdatedSimpleDate
sLastUpdated SeqSourceRow
s
        , ByteString
"instrument_model"           ByteString -> Maybe SSFInstrumentModel -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe SSFInstrumentModel
sInstrumentModel SeqSourceRow
s
        , ByteString
"library_layout"             ByteString -> Maybe SSFLibraryLayout -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe SSFLibraryLayout
sLibraryLayout SeqSourceRow
s
        , ByteString
"library_source"             ByteString -> Maybe SSFLibrarySource -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe SSFLibrarySource
sLibrarySource SeqSourceRow
s
        , ByteString
"instrument_platform"        ByteString
-> Maybe SSFInstrumentPlatform -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe SSFInstrumentPlatform
sInstrumentPlatform SeqSourceRow
s
        , ByteString
"library_name"               ByteString -> Maybe SSFLibraryName -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe SSFLibraryName
sLibraryName SeqSourceRow
s
        , ByteString
"library_strategy"           ByteString -> Maybe SSFLibraryStrategy -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe SSFLibraryStrategy
sLibraryStrategy SeqSourceRow
s
        , ByteString
"fastq_ftp"                  ByteString
-> Maybe (ListColumn SSFFastqFTPURI) -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe (ListColumn SSFFastqFTPURI)
sFastqFTP SeqSourceRow
s
        , ByteString
"fastq_aspera"               ByteString
-> Maybe (ListColumn SSFFastqASPERAURI) -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe (ListColumn SSFFastqASPERAURI)
sFastqASPERA SeqSourceRow
s
        , ByteString
"fastq_bytes"                ByteString
-> Maybe (ListColumn SSFFastqBytes) -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe (ListColumn SSFFastqBytes)
sFastqBytes SeqSourceRow
s
        , ByteString
"fastq_md5"                  ByteString
-> Maybe (ListColumn SSFFastqMD5) -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe (ListColumn SSFFastqMD5)
sFastqMD5 SeqSourceRow
s
        , ByteString
"read_count"                 ByteString -> Maybe SSFReadCount -> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe SSFReadCount
sReadCount SeqSourceRow
s
        , ByteString
"submitted_ftp"              ByteString
-> Maybe (ListColumn SSFSubmittedFTPURI)
-> (ByteString, ByteString)
forall a. ToField a => ByteString -> a -> (ByteString, ByteString)
Csv..= SeqSourceRow -> Maybe (ListColumn SSFSubmittedFTPURI)
sSubmittedFTP SeqSourceRow
s
        -- beyond that add what is in the hashmap of additional columns
        ] HashMap ByteString ByteString
-> HashMap ByteString ByteString -> HashMap ByteString ByteString
forall k v. Eq k => HashMap k v -> HashMap k v -> HashMap k v
`HM.union` (CsvNamedRecord -> HashMap ByteString ByteString
getCsvNR (CsvNamedRecord -> HashMap ByteString ByteString)
-> CsvNamedRecord -> HashMap ByteString ByteString
forall a b. (a -> b) -> a -> b
$ SeqSourceRow -> CsvNamedRecord
sAdditionalColumns SeqSourceRow
s)

-- | A function to write one seqSourceFile
writeSeqSourceFile :: FilePath -> SeqSourceRows -> IO ()
writeSeqSourceFile :: [Char] -> SeqSourceRows -> IO ()
writeSeqSourceFile [Char]
path (SeqSourceRows [SeqSourceRow]
rows) = do
    let seqSourceAsBytestring :: ByteString
seqSourceAsBytestring = EncodeOptions -> Header -> [SeqSourceRow] -> ByteString
forall a.
ToNamedRecord a =>
EncodeOptions -> Header -> [a] -> ByteString
Csv.encodeByNameWith EncodeOptions
encodingOptions Header
makeHeaderWithAdditionalColumns [SeqSourceRow]
rows
    [Char] -> ByteString -> IO ()
Bch.writeFile [Char]
path ByteString
seqSourceAsBytestring
    where
        makeHeaderWithAdditionalColumns :: Csv.Header
        makeHeaderWithAdditionalColumns :: Header
makeHeaderWithAdditionalColumns =
            [ByteString] -> Header
forall a. [a] -> Vector a
V.fromList ([ByteString] -> Header) -> [ByteString] -> Header
forall a b. (a -> b) -> a -> b
$ [ByteString]
seqSourceHeader [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ [ByteString] -> [ByteString]
forall a. Ord a => [a] -> [a]
sort (HashMap ByteString ByteString -> [ByteString]
forall k v. HashMap k v -> [k]
HM.keys ([HashMap ByteString ByteString] -> HashMap ByteString ByteString
forall k v. Eq k => [HashMap k v] -> HashMap k v
HM.unions ((SeqSourceRow -> HashMap ByteString ByteString)
-> [SeqSourceRow] -> [HashMap ByteString ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (CsvNamedRecord -> HashMap ByteString ByteString
getCsvNR (CsvNamedRecord -> HashMap ByteString ByteString)
-> (SeqSourceRow -> CsvNamedRecord)
-> SeqSourceRow
-> HashMap ByteString ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SeqSourceRow -> CsvNamedRecord
sAdditionalColumns) [SeqSourceRow]
rows)))

-- | A function to read one seqSourceFile
readSeqSourceFile :: FilePath -> PoseidonIO SeqSourceRows
readSeqSourceFile :: [Char] -> PoseidonIO SeqSourceRows
readSeqSourceFile [Char]
seqSourcePath = do
    [Char] -> PoseidonIO ()
logDebug ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Reading: " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
seqSourcePath
    ByteString
seqSourceFile <- IO ByteString -> ReaderT Env IO ByteString
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO ByteString -> ReaderT Env IO ByteString)
-> IO ByteString -> ReaderT Env IO ByteString
forall a b. (a -> b) -> a -> b
$ [Char] -> IO ByteString
Bch.readFile [Char]
seqSourcePath
    let seqSourceFileRows :: [ByteString]
seqSourceFileRows = ByteString -> [ByteString]
Bch.lines ByteString
seqSourceFile
    Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ([ByteString] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [ByteString]
seqSourceFileRows Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
2) (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ PoseidonException -> IO ()
forall e a. Exception e => e -> IO a
throwIO (PoseidonException -> IO ()) -> PoseidonException -> IO ()
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char] -> PoseidonException
PoseidonFileConsistencyException [Char]
seqSourcePath [Char]
"File has less than two lines"
    [Char] -> PoseidonIO ()
logDebug ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ Int -> [Char]
forall a. Show a => a -> [Char]
show ([ByteString] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [ByteString]
seqSourceFileRows Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
" sequencing entities in this file"
    -- tupel with row number and row bytestring
    let seqSourceFileRowsWithNumber :: [(Int, ByteString)]
seqSourceFileRowsWithNumber = [Int] -> [ByteString] -> [(Int, ByteString)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
1..([ByteString] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [ByteString]
seqSourceFileRows)] [ByteString]
seqSourceFileRows
    -- filter out empty lines
        seqSourceFileRowsWithNumberFiltered :: [(Int, ByteString)]
seqSourceFileRowsWithNumberFiltered = ((Int, ByteString) -> Bool)
-> [(Int, ByteString)] -> [(Int, ByteString)]
forall a. (a -> Bool) -> [a] -> [a]
filter (\(Int
_, ByteString
y) -> ByteString
y ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
/= ByteString
Bch.empty) [(Int, ByteString)]
seqSourceFileRowsWithNumber
    -- create header + individual line combination
        headerOnlyPotentiallyWithQuotes :: ByteString
headerOnlyPotentiallyWithQuotes = (Int, ByteString) -> ByteString
forall a b. (a, b) -> b
snd ((Int, ByteString) -> ByteString)
-> (Int, ByteString) -> ByteString
forall a b. (a -> b) -> a -> b
$ [(Int, ByteString)] -> (Int, ByteString)
forall a. HasCallStack => [a] -> a
head [(Int, ByteString)]
seqSourceFileRowsWithNumberFiltered
        -- removing the quotes like this might cause issues in edge cases
        headerOnly :: ByteString
headerOnly = (Char -> Bool) -> ByteString -> ByteString
Bch.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'"') ByteString
headerOnlyPotentiallyWithQuotes
        rowsOnly :: [(Int, ByteString)]
rowsOnly = [(Int, ByteString)] -> [(Int, ByteString)]
forall a. HasCallStack => [a] -> [a]
tail [(Int, ByteString)]
seqSourceFileRowsWithNumberFiltered
        seqSourceFileRowsWithHeader :: [(Int, ByteString)]
seqSourceFileRowsWithHeader = ((Int, ByteString) -> (Int, ByteString))
-> [(Int, ByteString)] -> [(Int, ByteString)]
forall a b. (a -> b) -> [a] -> [b]
map ((ByteString -> ByteString)
-> (Int, ByteString) -> (Int, ByteString)
forall b c a. (b -> c) -> (a, b) -> (a, c)
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second (\ByteString
x -> ByteString
headerOnly ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
"\n" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
x)) [(Int, ByteString)]
rowsOnly
    -- read seqSourceFile by rows
    [Either PoseidonException SeqSourceRow]
seqSourceRepresentation <- ((Int, ByteString)
 -> ReaderT Env IO (Either PoseidonException SeqSourceRow))
-> [(Int, ByteString)]
-> ReaderT Env IO [Either PoseidonException SeqSourceRow]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM ([Char]
-> (Int, ByteString)
-> ReaderT Env IO (Either PoseidonException SeqSourceRow)
readSeqSourceFileRow [Char]
seqSourcePath) [(Int, ByteString)]
seqSourceFileRowsWithHeader
    -- error case management
    if Bool -> Bool
not ([PoseidonException] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null ([Either PoseidonException SeqSourceRow] -> [PoseidonException]
forall a b. [Either a b] -> [a]
lefts [Either PoseidonException SeqSourceRow]
seqSourceRepresentation))
    then do
        (PoseidonException -> PoseidonIO ())
-> [PoseidonException] -> PoseidonIO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ ([Char] -> PoseidonIO ()
logError ([Char] -> PoseidonIO ())
-> (PoseidonException -> [Char])
-> PoseidonException
-> PoseidonIO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PoseidonException -> [Char]
renderPoseidonException) ([PoseidonException] -> PoseidonIO ())
-> [PoseidonException] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ Int -> [PoseidonException] -> [PoseidonException]
forall a. Int -> [a] -> [a]
take Int
5 ([PoseidonException] -> [PoseidonException])
-> [PoseidonException] -> [PoseidonException]
forall a b. (a -> b) -> a -> b
$ [Either PoseidonException SeqSourceRow] -> [PoseidonException]
forall a b. [Either a b] -> [a]
lefts [Either PoseidonException SeqSourceRow]
seqSourceRepresentation
        IO SeqSourceRows -> PoseidonIO SeqSourceRows
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO SeqSourceRows -> PoseidonIO SeqSourceRows)
-> IO SeqSourceRows -> PoseidonIO SeqSourceRows
forall a b. (a -> b) -> a -> b
$ PoseidonException -> IO SeqSourceRows
forall e a. Exception e => e -> IO a
throwIO (PoseidonException -> IO SeqSourceRows)
-> PoseidonException -> IO SeqSourceRows
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char] -> PoseidonException
PoseidonFileConsistencyException [Char]
seqSourcePath [Char]
"Broken lines."
    else do
        let seqSource :: SeqSourceRows
seqSource = [SeqSourceRow] -> SeqSourceRows
SeqSourceRows ([SeqSourceRow] -> SeqSourceRows)
-> [SeqSourceRow] -> SeqSourceRows
forall a b. (a -> b) -> a -> b
$ [Either PoseidonException SeqSourceRow] -> [SeqSourceRow]
forall a b. [Either a b] -> [b]
rights [Either PoseidonException SeqSourceRow]
seqSourceRepresentation
        [Char] -> SeqSourceRows -> PoseidonIO ()
warnSeqSourceConsistency [Char]
seqSourcePath SeqSourceRows
seqSource
        SeqSourceRows -> PoseidonIO SeqSourceRows
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return SeqSourceRows
seqSource

-- | A function to read one row of a seqSourceFile
readSeqSourceFileRow :: FilePath -> (Int, Bch.ByteString) -> PoseidonIO (Either PoseidonException SeqSourceRow)
readSeqSourceFileRow :: [Char]
-> (Int, ByteString)
-> ReaderT Env IO (Either PoseidonException SeqSourceRow)
readSeqSourceFileRow [Char]
seqSourcePath (Int
lineNumber, ByteString
row) = do
    let decoded :: Either [Char] (Header, Vector SeqSourceRow)
decoded = DecodeOptions
-> ByteString -> Either [Char] (Header, Vector SeqSourceRow)
forall a.
FromNamedRecord a =>
DecodeOptions -> ByteString -> Either [Char] (Header, Vector a)
Csv.decodeByNameWith DecodeOptions
decodingOptions ByteString
row
        simplifiedDecoded :: Either [Char] SeqSourceRow
simplifiedDecoded = (\(Header
_,Vector SeqSourceRow
rs) -> Vector SeqSourceRow -> SeqSourceRow
forall a. Vector a -> a
V.head Vector SeqSourceRow
rs) ((Header, Vector SeqSourceRow) -> SeqSourceRow)
-> Either [Char] (Header, Vector SeqSourceRow)
-> Either [Char] SeqSourceRow
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Either [Char] (Header, Vector SeqSourceRow)
decoded
    case Either [Char] SeqSourceRow
simplifiedDecoded of
        Left [Char]
e -> do
            let betterError :: [Char]
betterError = case Parsec [Char] () CsvParseError
-> [Char] -> [Char] -> Either ParseError CsvParseError
forall s t a.
Stream s Identity t =>
Parsec s () a -> [Char] -> s -> Either ParseError a
P.parse Parsec [Char] () CsvParseError
parseCsvParseError [Char]
"" [Char]
e of
                    Left ParseError
_       -> ShowS
removeUselessSuffix [Char]
e
                    Right CsvParseError
result -> CsvParseError -> [Char]
renderCsvParseError CsvParseError
result
            Either PoseidonException SeqSourceRow
-> ReaderT Env IO (Either PoseidonException SeqSourceRow)
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Either PoseidonException SeqSourceRow
 -> ReaderT Env IO (Either PoseidonException SeqSourceRow))
-> Either PoseidonException SeqSourceRow
-> ReaderT Env IO (Either PoseidonException SeqSourceRow)
forall a b. (a -> b) -> a -> b
$ PoseidonException -> Either PoseidonException SeqSourceRow
forall a b. a -> Either a b
Left (PoseidonException -> Either PoseidonException SeqSourceRow)
-> PoseidonException -> Either PoseidonException SeqSourceRow
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char] -> [Char] -> PoseidonException
PoseidonFileRowException [Char]
seqSourcePath (Int -> [Char]
forall a. Show a => a -> [Char]
show Int
lineNumber) [Char]
betterError
        Right SeqSourceRow
seqSourceRow -> do
            -- cell-wise checks
            let inspectRes :: [[Char]]
inspectRes = [[[Char]]] -> [[Char]]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[[Char]]] -> [[Char]]) -> [[[Char]]] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ [Maybe [[Char]]] -> [[[Char]]]
forall a. [Maybe a] -> [a]
catMaybes ([Maybe [[Char]]] -> [[[Char]]]) -> [Maybe [[Char]]] -> [[[Char]]]
forall a b. (a -> b) -> a -> b
$ SeqSourceRow -> [Maybe [[Char]]]
forall a (xs :: [*]).
(Generic a, Code a ~ '[xs], All Suspicious xs) =>
a -> [Maybe [[Char]]]
inspectEachField SeqSourceRow
seqSourceRow
            Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
OP.unless ([[Char]] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [[Char]]
inspectRes) (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ do
                [Char] -> PoseidonIO ()
logWarning ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Value anomaly in " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
seqSourcePath [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
" in line " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
renderLocation [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
": "
                ([Char] -> PoseidonIO ()) -> [[Char]] -> PoseidonIO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ [Char] -> PoseidonIO ()
logWarning [[Char]]
inspectRes
            -- return result
            Either PoseidonException SeqSourceRow
-> ReaderT Env IO (Either PoseidonException SeqSourceRow)
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Either PoseidonException SeqSourceRow
 -> ReaderT Env IO (Either PoseidonException SeqSourceRow))
-> Either PoseidonException SeqSourceRow
-> ReaderT Env IO (Either PoseidonException SeqSourceRow)
forall a b. (a -> b) -> a -> b
$ SeqSourceRow -> Either PoseidonException SeqSourceRow
forall a b. b -> Either a b
Right SeqSourceRow
seqSourceRow
            where
                renderLocation :: String
                renderLocation :: [Char]
renderLocation =  Int -> [Char]
forall a. Show a => a -> [Char]
show Int
lineNumber

-- Global SSF consistency checks

warnSeqSourceConsistency :: FilePath -> SeqSourceRows -> PoseidonIO ()
warnSeqSourceConsistency :: [Char] -> SeqSourceRows -> PoseidonIO ()
warnSeqSourceConsistency [Char]
seqSourcePath SeqSourceRows
xs = do
    Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (SeqSourceRows -> Bool
checkRunsUnique SeqSourceRows
xs) (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$
        [Char] -> PoseidonIO ()
logWarning ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Potential consistency issues in file " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
seqSourcePath [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
": " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
                     [Char]
"The values in the run_accession column are not unique"
    Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (SeqSourceRows -> Bool
checkAtLeastOnePoseidonID SeqSourceRows
xs) (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$
        [Char] -> PoseidonIO ()
logWarning ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Potential consistency issues in file " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
seqSourcePath [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
": " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++
                     [Char]
"The poseidon_IDs column is completely empty. Package and .ssf file are not linked"

checkRunsUnique :: SeqSourceRows -> Bool
checkRunsUnique :: SeqSourceRows -> Bool
checkRunsUnique (SeqSourceRows [SeqSourceRow]
rows) =
    let justRunAccessions :: [SSFAccessionIDRun]
justRunAccessions = (SeqSourceRow -> Maybe SSFAccessionIDRun)
-> [SeqSourceRow] -> [SSFAccessionIDRun]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe SeqSourceRow -> Maybe SSFAccessionIDRun
sRunAccession [SeqSourceRow]
rows
    in [SSFAccessionIDRun]
justRunAccessions [SSFAccessionIDRun] -> [SSFAccessionIDRun] -> Bool
forall a. Eq a => a -> a -> Bool
== [SSFAccessionIDRun] -> [SSFAccessionIDRun]
forall a. Eq a => [a] -> [a]
nub [SSFAccessionIDRun]
justRunAccessions

checkAtLeastOnePoseidonID :: SeqSourceRows -> Bool
checkAtLeastOnePoseidonID :: SeqSourceRows -> Bool
checkAtLeastOnePoseidonID (SeqSourceRows [SeqSourceRow]
rows) =
    (SeqSourceRow -> Bool) -> [SeqSourceRow] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Maybe (ListColumn [Char]) -> Bool
forall a. Maybe a -> Bool
isJust (Maybe (ListColumn [Char]) -> Bool)
-> (SeqSourceRow -> Maybe (ListColumn [Char]))
-> SeqSourceRow
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SeqSourceRow -> Maybe (ListColumn [Char])
sPoseidonID) [SeqSourceRow]
rows