{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections     #-}

module Poseidon.CLI.Jannocoalesce where

import           Poseidon.Janno         (JannoRow (..), JannoRows (..),
                                         parseJannoRowFromNamedRecord,
                                         readJannoFile, writeJannoFile)
import           Poseidon.Package       (PackageReadOptions (..),
                                         defaultPackageReadOptions,
                                         getJointJanno,
                                         readPoseidonPackageCollection)
import           Poseidon.Utils         (PoseidonException (..), PoseidonIO,
                                         logDebug, logInfo, logWarning)

import           Control.Monad          (filterM, forM_, when)
import           Control.Monad.Catch    (MonadThrow, throwM)
import           Control.Monad.IO.Class (liftIO)
import qualified Data.ByteString.Char8  as BSC
import qualified Data.Csv               as Csv
import qualified Data.HashMap.Strict    as HM
import qualified Data.IORef             as R
import           Data.List              ((\\))
import           Data.Text              (pack, replace, unpack)
import           System.Directory       (createDirectoryIfMissing)
import           System.FilePath        (takeDirectory)
import           Text.Regex.TDFA        ((=~))

-- the source can be a single janno file, or a set of base directories as usual.
data JannoSourceSpec = JannoSourceSingle FilePath | JannoSourceBaseDirs [FilePath]

data CoalesceJannoColumnSpec =
      AllJannoColumns
    | IncludeJannoColumns [BSC.ByteString]
    | ExcludeJannoColumns [BSC.ByteString]

data JannoCoalesceOptions = JannoCoalesceOptions
    { JannoCoalesceOptions -> JannoSourceSpec
_jannocoalesceSource           :: JannoSourceSpec
    , JannoCoalesceOptions -> [Char]
_jannocoalesceTarget           :: FilePath
    , JannoCoalesceOptions -> Maybe [Char]
_jannocoalesceOutSpec          :: Maybe FilePath -- Nothing means "in place"
    , JannoCoalesceOptions -> CoalesceJannoColumnSpec
_jannocoalesceJannoColumns     :: CoalesceJannoColumnSpec
    , JannoCoalesceOptions -> Bool
_jannocoalesceOverwriteColumns :: Bool
    , JannoCoalesceOptions -> [Char]
_jannocoalesceSourceKey        :: String -- by default set to "Poseidon_ID"
    , JannoCoalesceOptions -> [Char]
_jannocoalesceTargetKey        :: String -- by default set to "Poseidon_ID"
    , JannoCoalesceOptions -> Maybe [Char]
_jannocoalesceIdStrip          :: Maybe String -- an optional regex to strip from target and source keys
    }

runJannocoalesce :: JannoCoalesceOptions -> PoseidonIO ()
runJannocoalesce :: JannoCoalesceOptions -> PoseidonIO ()
runJannocoalesce (JannoCoalesceOptions JannoSourceSpec
sourceSpec [Char]
target Maybe [Char]
outSpec CoalesceJannoColumnSpec
fields Bool
overwrite [Char]
sKey [Char]
tKey Maybe [Char]
maybeStrip) = do
    JannoRows [JannoRow]
sourceRows <- case JannoSourceSpec
sourceSpec of
        JannoSourceSingle [Char]
sourceFile -> [ByteString] -> [Char] -> ReaderT Env IO JannoRows
readJannoFile [] [Char]
sourceFile
        JannoSourceBaseDirs [[Char]]
sourceDirs -> do
            let pacReadOpts :: PackageReadOptions
pacReadOpts = PackageReadOptions
defaultPackageReadOptions {
                      _readOptIgnoreChecksums      = True
                    , _readOptGenoCheck            = False
                    , _readOptIgnoreGeno           = True
                    , _readOptOnlyLatest           = True
                }
            [PoseidonPackage] -> JannoRows
getJointJanno ([PoseidonPackage] -> JannoRows)
-> ReaderT Env IO [PoseidonPackage] -> ReaderT Env IO JannoRows
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> PackageReadOptions -> [[Char]] -> ReaderT Env IO [PoseidonPackage]
readPoseidonPackageCollection PackageReadOptions
pacReadOpts [[Char]]
sourceDirs
    JannoRows [JannoRow]
targetRows <- [ByteString] -> [Char] -> ReaderT Env IO JannoRows
readJannoFile [] [Char]
target

    [JannoRow]
newJanno <- [JannoRow]
-> [JannoRow]
-> CoalesceJannoColumnSpec
-> Bool
-> [Char]
-> [Char]
-> Maybe [Char]
-> PoseidonIO [JannoRow]
makeNewJannoRows [JannoRow]
sourceRows [JannoRow]
targetRows CoalesceJannoColumnSpec
fields Bool
overwrite [Char]
sKey [Char]
tKey Maybe [Char]
maybeStrip

    let outPath :: [Char]
outPath = [Char] -> ([Char] -> [Char]) -> Maybe [Char] -> [Char]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [Char]
target [Char] -> [Char]
forall a. a -> a
id Maybe [Char]
outSpec
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Writing to file (directory will be created if missing): " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
outPath
    IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ do
        Bool -> [Char] -> IO ()
createDirectoryIfMissing Bool
True ([Char] -> [Char]
takeDirectory [Char]
outPath)
        [Char] -> JannoRows -> IO ()
writeJannoFile [Char]
outPath ([JannoRow] -> JannoRows
JannoRows [JannoRow]
newJanno)

type CounterMismatches = R.IORef Int
type CounterCopied     = R.IORef Int

makeNewJannoRows :: [JannoRow] -> [JannoRow] -> CoalesceJannoColumnSpec -> Bool -> String -> String -> Maybe String -> PoseidonIO [JannoRow]
makeNewJannoRows :: [JannoRow]
-> [JannoRow]
-> CoalesceJannoColumnSpec
-> Bool
-> [Char]
-> [Char]
-> Maybe [Char]
-> PoseidonIO [JannoRow]
makeNewJannoRows [JannoRow]
sourceRows [JannoRow]
targetRows CoalesceJannoColumnSpec
fields Bool
overwrite [Char]
sKey [Char]
tKey Maybe [Char]
maybeStrip = do
    [Char] -> PoseidonIO ()
logInfo [Char]
"Starting to coalesce..."
    IORef Int
counterMismatches <- IO (IORef Int) -> ReaderT Env IO (IORef Int)
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (IORef Int) -> ReaderT Env IO (IORef Int))
-> IO (IORef Int) -> ReaderT Env IO (IORef Int)
forall a b. (a -> b) -> a -> b
$ Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
R.newIORef Int
0
    IORef Int
counterCopied <- IO (IORef Int) -> ReaderT Env IO (IORef Int)
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (IORef Int) -> ReaderT Env IO (IORef Int))
-> IO (IORef Int) -> ReaderT Env IO (IORef Int)
forall a b. (a -> b) -> a -> b
$ Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
R.newIORef Int
0
    [JannoRow]
newRows <- (JannoRow -> ReaderT Env IO JannoRow)
-> [JannoRow] -> PoseidonIO [JannoRow]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (IORef Int -> IORef Int -> JannoRow -> ReaderT Env IO JannoRow
makeNewJannoRow IORef Int
counterMismatches IORef Int
counterCopied) [JannoRow]
targetRows
    Int
counterCopiedVal <- IO Int -> ReaderT Env IO Int
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Int -> ReaderT Env IO Int) -> IO Int -> ReaderT Env IO Int
forall a b. (a -> b) -> a -> b
$ IORef Int -> IO Int
forall a. IORef a -> IO a
R.readIORef IORef Int
counterCopied
    Int
counterMismatchesVal <- IO Int -> ReaderT Env IO Int
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Int -> ReaderT Env IO Int) -> IO Int -> ReaderT Env IO Int
forall a b. (a -> b) -> a -> b
$ IORef Int -> IO Int
forall a. IORef a -> IO a
R.readIORef IORef Int
counterMismatches
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Copied " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
counterCopiedVal [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" values"
    Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
counterMismatchesVal Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0) (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$
        [Char] -> PoseidonIO ()
logWarning ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Failed to find matches for " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
counterMismatchesVal [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" target rows in source"
    [JannoRow] -> PoseidonIO [JannoRow]
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return [JannoRow]
newRows
    where
        makeNewJannoRow :: CounterMismatches -> CounterCopied -> JannoRow -> PoseidonIO JannoRow
        makeNewJannoRow :: IORef Int -> IORef Int -> JannoRow -> ReaderT Env IO JannoRow
makeNewJannoRow IORef Int
cm IORef Int
cp JannoRow
targetRow = do
            [Char]
posId <- JannoRow -> [Char] -> ReaderT Env IO [Char]
forall (m :: * -> *).
MonadThrow m =>
JannoRow -> [Char] -> m [Char]
getKeyFromJanno JannoRow
targetRow [Char]
tKey
            [JannoRow]
sourceRowCandidates <- (JannoRow -> ReaderT Env IO Bool)
-> [JannoRow] -> PoseidonIO [JannoRow]
forall (m :: * -> *) a.
Applicative m =>
(a -> m Bool) -> [a] -> m [a]
filterM (\JannoRow
r -> (Maybe [Char] -> [Char] -> [Char] -> Bool
matchWithOptionalStrip Maybe [Char]
maybeStrip [Char]
posId) ([Char] -> Bool) -> ReaderT Env IO [Char] -> ReaderT Env IO Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> JannoRow -> [Char] -> ReaderT Env IO [Char]
forall (m :: * -> *).
MonadThrow m =>
JannoRow -> [Char] -> m [Char]
getKeyFromJanno JannoRow
r [Char]
sKey) [JannoRow]
sourceRows
            case [JannoRow]
sourceRowCandidates of
                [] -> do
                    [Char] -> PoseidonIO ()
logWarning ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"no match for target " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
posId [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" in source"
                    IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ IORef Int -> (Int -> Int) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
R.modifyIORef IORef Int
cm (Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
                    JannoRow -> ReaderT Env IO JannoRow
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return JannoRow
targetRow
                [JannoRow
keyRow] -> IORef Int
-> JannoRow
-> JannoRow
-> CoalesceJannoColumnSpec
-> Bool
-> [Char]
-> [Char]
-> ReaderT Env IO JannoRow
mergeRow IORef Int
cp JannoRow
targetRow JannoRow
keyRow CoalesceJannoColumnSpec
fields Bool
overwrite [Char]
sKey [Char]
tKey
                [JannoRow]
_ -> PoseidonException -> ReaderT Env IO JannoRow
forall e a. (HasCallStack, Exception e) => e -> ReaderT Env IO a
forall (m :: * -> *) e a.
(MonadThrow m, HasCallStack, Exception e) =>
e -> m a
throwM (PoseidonException -> ReaderT Env IO JannoRow)
-> PoseidonException -> ReaderT Env IO JannoRow
forall a b. (a -> b) -> a -> b
$ [Char] -> PoseidonException
PoseidonGenericException ([Char] -> PoseidonException) -> [Char] -> PoseidonException
forall a b. (a -> b) -> a -> b
$ [Char]
"source file contains multiple rows with key " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
posId

getKeyFromJanno :: (MonadThrow m) => JannoRow -> String -> m String
getKeyFromJanno :: forall (m :: * -> *).
MonadThrow m =>
JannoRow -> [Char] -> m [Char]
getKeyFromJanno JannoRow
jannoRow [Char]
key = do
    let jannoRowDict :: NamedRecord
jannoRowDict = JannoRow -> NamedRecord
forall a. ToNamedRecord a => a -> NamedRecord
Csv.toNamedRecord JannoRow
jannoRow
    case NamedRecord
jannoRowDict NamedRecord -> ByteString -> Maybe ByteString
forall k v. (Eq k, Hashable k) => HashMap k v -> k -> Maybe v
HM.!? ([Char] -> ByteString
BSC.pack [Char]
key) of
        Maybe ByteString
Nothing -> PoseidonException -> m [Char]
forall e a. (HasCallStack, Exception e) => e -> m a
forall (m :: * -> *) e a.
(MonadThrow m, HasCallStack, Exception e) =>
e -> m a
throwM (PoseidonException -> m [Char]) -> PoseidonException -> m [Char]
forall a b. (a -> b) -> a -> b
$ [Char] -> PoseidonException
PoseidonGenericException ([Char]
"Key " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
key [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" not present in .janno file")
        Just ByteString
r -> [Char] -> m [Char]
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Char] -> m [Char]) -> [Char] -> m [Char]
forall a b. (a -> b) -> a -> b
$ ByteString -> [Char]
BSC.unpack ByteString
r

matchWithOptionalStrip :: (Maybe String) -> String -> String -> Bool
matchWithOptionalStrip :: Maybe [Char] -> [Char] -> [Char] -> Bool
matchWithOptionalStrip Maybe [Char]
maybeRegex [Char]
id1 [Char]
id2 =
    case Maybe [Char]
maybeRegex of
        Maybe [Char]
Nothing -> [Char]
id1 [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
== [Char]
id2
        Just [Char]
r ->
            let id1stripped :: [Char]
id1stripped = [Char] -> [Char] -> [Char]
stripR [Char]
r [Char]
id1
                id2stripped :: [Char]
id2stripped = [Char] -> [Char] -> [Char]
stripR [Char]
r [Char]
id2
            in  [Char]
id1stripped [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
== [Char]
id2stripped
  where
    stripR :: String -> String -> String
    stripR :: [Char] -> [Char] -> [Char]
stripR [Char]
r [Char]
s =
        let match :: [Char]
match = [Char]
s [Char] -> [Char] -> [Char]
forall source source1 target.
(RegexMaker Regex CompOption ExecOption source,
 RegexContext Regex source1 target) =>
source1 -> source -> target
=~ [Char]
r
        in  if [Char] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Char]
match then [Char]
s else Text -> [Char]
unpack (Text -> [Char]) -> Text -> [Char]
forall a b. (a -> b) -> a -> b
$ HasCallStack => Text -> Text -> Text -> Text
Text -> Text -> Text -> Text
replace ([Char] -> Text
pack [Char]
match) Text
"" ([Char] -> Text
pack [Char]
s)

mergeRow :: CounterCopied -> JannoRow -> JannoRow -> CoalesceJannoColumnSpec -> Bool -> String -> String -> PoseidonIO JannoRow
mergeRow :: IORef Int
-> JannoRow
-> JannoRow
-> CoalesceJannoColumnSpec
-> Bool
-> [Char]
-> [Char]
-> ReaderT Env IO JannoRow
mergeRow IORef Int
cp JannoRow
targetRow JannoRow
sourceRow CoalesceJannoColumnSpec
fields Bool
overwrite [Char]
sKey [Char]
tKey = do
    let sourceKeys :: [ByteString]
sourceKeys        = NamedRecord -> [ByteString]
forall k v. HashMap k v -> [k]
HM.keys NamedRecord
sourceRowRecord
        sourceKeysDesired :: [ByteString]
sourceKeysDesired = [ByteString] -> CoalesceJannoColumnSpec -> [ByteString]
determineDesiredSourceKeys [ByteString]
sourceKeys CoalesceJannoColumnSpec
fields
        -- fill in the target row with dummy values for desired fields that might not be present yet
        targetComplete :: NamedRecord
targetComplete    = NamedRecord -> NamedRecord -> NamedRecord
forall k v. Eq k => HashMap k v -> HashMap k v -> HashMap k v
HM.union NamedRecord
targetRowRecord ([(ByteString, ByteString)] -> NamedRecord
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HM.fromList ([(ByteString, ByteString)] -> NamedRecord)
-> [(ByteString, ByteString)] -> NamedRecord
forall a b. (a -> b) -> a -> b
$ (ByteString -> (ByteString, ByteString))
-> [ByteString] -> [(ByteString, ByteString)]
forall a b. (a -> b) -> [a] -> [b]
map (, ByteString
BSC.empty) [ByteString]
sourceKeysDesired)
        newRowRecord :: NamedRecord
newRowRecord      = (ByteString -> ByteString -> ByteString)
-> NamedRecord -> NamedRecord
forall k v1 v2. (k -> v1 -> v2) -> HashMap k v1 -> HashMap k v2
HM.mapWithKey ByteString -> ByteString -> ByteString
fillFromSource NamedRecord
targetComplete
        parseResult :: Either [Char] JannoRow
parseResult       = Parser JannoRow -> Either [Char] JannoRow
forall a. Parser a -> Either [Char] a
Csv.runParser (Parser JannoRow -> Either [Char] JannoRow)
-> (NamedRecord -> Parser JannoRow)
-> NamedRecord
-> Either [Char] JannoRow
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> NamedRecord -> Parser JannoRow
parseJannoRowFromNamedRecord [] (NamedRecord -> Either [Char] JannoRow)
-> NamedRecord -> Either [Char] JannoRow
forall a b. (a -> b) -> a -> b
$ NamedRecord
newRowRecord
    [Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"matched target " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
BSC.unpack (NamedRecord
targetComplete  NamedRecord -> ByteString -> ByteString
forall k v.
(Eq k, Hashable k, HasCallStack) =>
HashMap k v -> k -> v
HM.! [Char] -> ByteString
BSC.pack [Char]
tKey) [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
              [Char]
" with source "   [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
BSC.unpack (NamedRecord
sourceRowRecord NamedRecord -> ByteString -> ByteString
forall k v.
(Eq k, Hashable k, HasCallStack) =>
HashMap k v -> k -> v
HM.! [Char] -> ByteString
BSC.pack [Char]
sKey)
    case Either [Char] JannoRow
parseResult of
        Left [Char]
err -> PoseidonException -> ReaderT Env IO JannoRow
forall e a. (HasCallStack, Exception e) => e -> ReaderT Env IO a
forall (m :: * -> *) e a.
(MonadThrow m, HasCallStack, Exception e) =>
e -> m a
throwM (PoseidonException -> ReaderT Env IO JannoRow)
-> ([Char] -> PoseidonException)
-> [Char]
-> ReaderT Env IO JannoRow
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> PoseidonException
PoseidonGenericException ([Char] -> ReaderT Env IO JannoRow)
-> [Char] -> ReaderT Env IO JannoRow
forall a b. (a -> b) -> a -> b
$ [Char]
".janno row-merge error: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
err
        Right JannoRow
r  -> do
            let newFields :: NamedRecord
newFields = (ByteString -> ByteString -> Maybe ByteString)
-> NamedRecord -> NamedRecord -> NamedRecord
forall k v w.
(Eq k, Hashable k) =>
(v -> w -> Maybe v) -> HashMap k v -> HashMap k w -> HashMap k v
HM.differenceWith (\ByteString
v1 ByteString
v2 -> if ByteString
v1 ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
v2 then Maybe ByteString
forall a. Maybe a
Nothing else ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just ByteString
v1) NamedRecord
newRowRecord NamedRecord
targetComplete
            if NamedRecord -> Bool
forall k v. HashMap k v -> Bool
HM.null NamedRecord
newFields then do
                [Char] -> PoseidonIO ()
logDebug [Char]
"-- no changes"
            else do
                [(ByteString, ByteString)]
-> ((ByteString, ByteString) -> PoseidonIO ()) -> PoseidonIO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ (NamedRecord -> [(ByteString, ByteString)]
forall k v. HashMap k v -> [(k, v)]
HM.toList NamedRecord
newFields) (((ByteString, ByteString) -> PoseidonIO ()) -> PoseidonIO ())
-> ((ByteString, ByteString) -> PoseidonIO ()) -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ \(ByteString
key, ByteString
val) -> do
                    IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ IORef Int -> (Int -> Int) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
R.modifyIORef IORef Int
cp (Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
                    [Char] -> PoseidonIO ()
logDebug ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"-- copied \"" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
BSC.unpack ByteString
val [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\" from column " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
BSC.unpack ByteString
key
            JannoRow -> ReaderT Env IO JannoRow
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return JannoRow
r
  where
    targetRowRecord :: Csv.NamedRecord
    targetRowRecord :: NamedRecord
targetRowRecord = JannoRow -> NamedRecord
forall a. ToNamedRecord a => a -> NamedRecord
Csv.toNamedRecord JannoRow
targetRow
    sourceRowRecord :: Csv.NamedRecord
    sourceRowRecord :: NamedRecord
sourceRowRecord = JannoRow -> NamedRecord
forall a. ToNamedRecord a => a -> NamedRecord
Csv.toNamedRecord JannoRow
sourceRow
    determineDesiredSourceKeys :: [BSC.ByteString] -> CoalesceJannoColumnSpec -> [BSC.ByteString]
    determineDesiredSourceKeys :: [ByteString] -> CoalesceJannoColumnSpec -> [ByteString]
determineDesiredSourceKeys [ByteString]
keys  CoalesceJannoColumnSpec
AllJannoColumns               = [ByteString]
keys
    determineDesiredSourceKeys [ByteString]
_    (IncludeJannoColumns [ByteString]
included) = [ByteString]
included
    determineDesiredSourceKeys [ByteString]
keys (ExcludeJannoColumns [ByteString]
excluded) = [ByteString]
keys [ByteString] -> [ByteString] -> [ByteString]
forall a. Eq a => [a] -> [a] -> [a]
\\ [ByteString]
excluded
    fillFromSource :: BSC.ByteString -> BSC.ByteString -> BSC.ByteString
    fillFromSource :: ByteString -> ByteString -> ByteString
fillFromSource ByteString
key ByteString
targetVal =
           -- don't overwrite key
        if ByteString
key ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
/= [Char] -> ByteString
BSC.pack [Char]
tKey
           -- overwrite field only if it's requested
           Bool -> Bool -> Bool
&& ByteString -> CoalesceJannoColumnSpec -> Bool
includeField ByteString
key CoalesceJannoColumnSpec
fields
           -- overwrite only empty fields, except overwrite is set
           Bool -> Bool -> Bool
&& (ByteString
targetVal ByteString -> [ByteString] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [ByteString
"n/a", ByteString
"", ByteString
BSC.empty] Bool -> Bool -> Bool
|| Bool
overwrite)
        then ByteString -> ByteString -> NamedRecord -> ByteString
forall k v. (Eq k, Hashable k) => v -> k -> HashMap k v -> v
HM.findWithDefault ByteString
"" ByteString
key NamedRecord
sourceRowRecord
        else ByteString
targetVal
    includeField :: BSC.ByteString -> CoalesceJannoColumnSpec -> Bool
    includeField :: ByteString -> CoalesceJannoColumnSpec -> Bool
includeField ByteString
_    CoalesceJannoColumnSpec
AllJannoColumns         = Bool
True
    includeField ByteString
key (IncludeJannoColumns [ByteString]
xs) = ByteString
key ByteString -> [ByteString] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [ByteString]
xs
    includeField ByteString
key (ExcludeJannoColumns [ByteString]
xs) = ByteString
key ByteString -> [ByteString] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [ByteString]
xs