{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
module Poseidon.CLI.Jannocoalesce where
import Poseidon.Janno (JannoRow (..), JannoRows (..),
parseJannoRowFromNamedRecord,
readJannoFile, writeJannoFile)
import Poseidon.Package (PackageReadOptions (..),
defaultPackageReadOptions,
getJointJanno,
readPoseidonPackageCollection)
import Poseidon.Utils (PoseidonException (..), PoseidonIO,
logDebug, logInfo, logWarning)
import Control.Monad (filterM, forM_, when)
import Control.Monad.Catch (MonadThrow, throwM)
import Control.Monad.IO.Class (liftIO)
import qualified Data.ByteString.Char8 as BSC
import qualified Data.Csv as Csv
import qualified Data.HashMap.Strict as HM
import qualified Data.IORef as R
import Data.List ((\\))
import Data.Text (pack, replace, unpack)
import System.Directory (createDirectoryIfMissing)
import System.FilePath (takeDirectory)
import Text.Regex.TDFA ((=~))
data JannoSourceSpec = JannoSourceSingle FilePath | JannoSourceBaseDirs [FilePath]
data CoalesceJannoColumnSpec =
AllJannoColumns
| IncludeJannoColumns [BSC.ByteString]
| ExcludeJannoColumns [BSC.ByteString]
data JannoCoalesceOptions = JannoCoalesceOptions
{ JannoCoalesceOptions -> JannoSourceSpec
_jannocoalesceSource :: JannoSourceSpec
, JannoCoalesceOptions -> [Char]
_jannocoalesceTarget :: FilePath
, JannoCoalesceOptions -> Maybe [Char]
_jannocoalesceOutSpec :: Maybe FilePath
, JannoCoalesceOptions -> CoalesceJannoColumnSpec
_jannocoalesceJannoColumns :: CoalesceJannoColumnSpec
, JannoCoalesceOptions -> Bool
_jannocoalesceOverwriteColumns :: Bool
, JannoCoalesceOptions -> [Char]
_jannocoalesceSourceKey :: String
, JannoCoalesceOptions -> [Char]
_jannocoalesceTargetKey :: String
, JannoCoalesceOptions -> Maybe [Char]
_jannocoalesceIdStrip :: Maybe String
}
runJannocoalesce :: JannoCoalesceOptions -> PoseidonIO ()
runJannocoalesce :: JannoCoalesceOptions -> PoseidonIO ()
runJannocoalesce (JannoCoalesceOptions JannoSourceSpec
sourceSpec [Char]
target Maybe [Char]
outSpec CoalesceJannoColumnSpec
fields Bool
overwrite [Char]
sKey [Char]
tKey Maybe [Char]
maybeStrip) = do
JannoRows [JannoRow]
sourceRows <- case JannoSourceSpec
sourceSpec of
JannoSourceSingle [Char]
sourceFile -> [ByteString] -> [Char] -> ReaderT Env IO JannoRows
readJannoFile [] [Char]
sourceFile
JannoSourceBaseDirs [[Char]]
sourceDirs -> do
let pacReadOpts :: PackageReadOptions
pacReadOpts = PackageReadOptions
defaultPackageReadOptions {
_readOptIgnoreChecksums = True
, _readOptGenoCheck = False
, _readOptIgnoreGeno = True
, _readOptOnlyLatest = True
}
[PoseidonPackage] -> JannoRows
getJointJanno ([PoseidonPackage] -> JannoRows)
-> ReaderT Env IO [PoseidonPackage] -> ReaderT Env IO JannoRows
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> PackageReadOptions -> [[Char]] -> ReaderT Env IO [PoseidonPackage]
readPoseidonPackageCollection PackageReadOptions
pacReadOpts [[Char]]
sourceDirs
JannoRows [JannoRow]
targetRows <- [ByteString] -> [Char] -> ReaderT Env IO JannoRows
readJannoFile [] [Char]
target
[JannoRow]
newJanno <- [JannoRow]
-> [JannoRow]
-> CoalesceJannoColumnSpec
-> Bool
-> [Char]
-> [Char]
-> Maybe [Char]
-> PoseidonIO [JannoRow]
makeNewJannoRows [JannoRow]
sourceRows [JannoRow]
targetRows CoalesceJannoColumnSpec
fields Bool
overwrite [Char]
sKey [Char]
tKey Maybe [Char]
maybeStrip
let outPath :: [Char]
outPath = [Char] -> ([Char] -> [Char]) -> Maybe [Char] -> [Char]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [Char]
target [Char] -> [Char]
forall a. a -> a
id Maybe [Char]
outSpec
[Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Writing to file (directory will be created if missing): " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
outPath
IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ do
Bool -> [Char] -> IO ()
createDirectoryIfMissing Bool
True ([Char] -> [Char]
takeDirectory [Char]
outPath)
[Char] -> JannoRows -> IO ()
writeJannoFile [Char]
outPath ([JannoRow] -> JannoRows
JannoRows [JannoRow]
newJanno)
type CounterMismatches = R.IORef Int
type CounterCopied = R.IORef Int
makeNewJannoRows :: [JannoRow] -> [JannoRow] -> CoalesceJannoColumnSpec -> Bool -> String -> String -> Maybe String -> PoseidonIO [JannoRow]
makeNewJannoRows :: [JannoRow]
-> [JannoRow]
-> CoalesceJannoColumnSpec
-> Bool
-> [Char]
-> [Char]
-> Maybe [Char]
-> PoseidonIO [JannoRow]
makeNewJannoRows [JannoRow]
sourceRows [JannoRow]
targetRows CoalesceJannoColumnSpec
fields Bool
overwrite [Char]
sKey [Char]
tKey Maybe [Char]
maybeStrip = do
[Char] -> PoseidonIO ()
logInfo [Char]
"Starting to coalesce..."
IORef Int
counterMismatches <- IO (IORef Int) -> ReaderT Env IO (IORef Int)
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (IORef Int) -> ReaderT Env IO (IORef Int))
-> IO (IORef Int) -> ReaderT Env IO (IORef Int)
forall a b. (a -> b) -> a -> b
$ Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
R.newIORef Int
0
IORef Int
counterCopied <- IO (IORef Int) -> ReaderT Env IO (IORef Int)
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (IORef Int) -> ReaderT Env IO (IORef Int))
-> IO (IORef Int) -> ReaderT Env IO (IORef Int)
forall a b. (a -> b) -> a -> b
$ Int -> IO (IORef Int)
forall a. a -> IO (IORef a)
R.newIORef Int
0
[JannoRow]
newRows <- (JannoRow -> ReaderT Env IO JannoRow)
-> [JannoRow] -> PoseidonIO [JannoRow]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (IORef Int -> IORef Int -> JannoRow -> ReaderT Env IO JannoRow
makeNewJannoRow IORef Int
counterMismatches IORef Int
counterCopied) [JannoRow]
targetRows
Int
counterCopiedVal <- IO Int -> ReaderT Env IO Int
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Int -> ReaderT Env IO Int) -> IO Int -> ReaderT Env IO Int
forall a b. (a -> b) -> a -> b
$ IORef Int -> IO Int
forall a. IORef a -> IO a
R.readIORef IORef Int
counterCopied
Int
counterMismatchesVal <- IO Int -> ReaderT Env IO Int
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Int -> ReaderT Env IO Int) -> IO Int -> ReaderT Env IO Int
forall a b. (a -> b) -> a -> b
$ IORef Int -> IO Int
forall a. IORef a -> IO a
R.readIORef IORef Int
counterMismatches
[Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Copied " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
counterCopiedVal [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" values"
Bool -> PoseidonIO () -> PoseidonIO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
counterMismatchesVal Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0) (PoseidonIO () -> PoseidonIO ()) -> PoseidonIO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$
[Char] -> PoseidonIO ()
logWarning ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"Failed to find matches for " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
counterMismatchesVal [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" target rows in source"
[JannoRow] -> PoseidonIO [JannoRow]
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return [JannoRow]
newRows
where
makeNewJannoRow :: CounterMismatches -> CounterCopied -> JannoRow -> PoseidonIO JannoRow
makeNewJannoRow :: IORef Int -> IORef Int -> JannoRow -> ReaderT Env IO JannoRow
makeNewJannoRow IORef Int
cm IORef Int
cp JannoRow
targetRow = do
[Char]
posId <- JannoRow -> [Char] -> ReaderT Env IO [Char]
forall (m :: * -> *).
MonadThrow m =>
JannoRow -> [Char] -> m [Char]
getKeyFromJanno JannoRow
targetRow [Char]
tKey
[JannoRow]
sourceRowCandidates <- (JannoRow -> ReaderT Env IO Bool)
-> [JannoRow] -> PoseidonIO [JannoRow]
forall (m :: * -> *) a.
Applicative m =>
(a -> m Bool) -> [a] -> m [a]
filterM (\JannoRow
r -> (Maybe [Char] -> [Char] -> [Char] -> Bool
matchWithOptionalStrip Maybe [Char]
maybeStrip [Char]
posId) ([Char] -> Bool) -> ReaderT Env IO [Char] -> ReaderT Env IO Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> JannoRow -> [Char] -> ReaderT Env IO [Char]
forall (m :: * -> *).
MonadThrow m =>
JannoRow -> [Char] -> m [Char]
getKeyFromJanno JannoRow
r [Char]
sKey) [JannoRow]
sourceRows
case [JannoRow]
sourceRowCandidates of
[] -> do
[Char] -> PoseidonIO ()
logWarning ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"no match for target " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
posId [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" in source"
IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ IORef Int -> (Int -> Int) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
R.modifyIORef IORef Int
cm (Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
JannoRow -> ReaderT Env IO JannoRow
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return JannoRow
targetRow
[JannoRow
keyRow] -> IORef Int
-> JannoRow
-> JannoRow
-> CoalesceJannoColumnSpec
-> Bool
-> [Char]
-> [Char]
-> ReaderT Env IO JannoRow
mergeRow IORef Int
cp JannoRow
targetRow JannoRow
keyRow CoalesceJannoColumnSpec
fields Bool
overwrite [Char]
sKey [Char]
tKey
[JannoRow]
_ -> PoseidonException -> ReaderT Env IO JannoRow
forall e a. (HasCallStack, Exception e) => e -> ReaderT Env IO a
forall (m :: * -> *) e a.
(MonadThrow m, HasCallStack, Exception e) =>
e -> m a
throwM (PoseidonException -> ReaderT Env IO JannoRow)
-> PoseidonException -> ReaderT Env IO JannoRow
forall a b. (a -> b) -> a -> b
$ [Char] -> PoseidonException
PoseidonGenericException ([Char] -> PoseidonException) -> [Char] -> PoseidonException
forall a b. (a -> b) -> a -> b
$ [Char]
"source file contains multiple rows with key " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
posId
getKeyFromJanno :: (MonadThrow m) => JannoRow -> String -> m String
getKeyFromJanno :: forall (m :: * -> *).
MonadThrow m =>
JannoRow -> [Char] -> m [Char]
getKeyFromJanno JannoRow
jannoRow [Char]
key = do
let jannoRowDict :: NamedRecord
jannoRowDict = JannoRow -> NamedRecord
forall a. ToNamedRecord a => a -> NamedRecord
Csv.toNamedRecord JannoRow
jannoRow
case NamedRecord
jannoRowDict NamedRecord -> ByteString -> Maybe ByteString
forall k v. (Eq k, Hashable k) => HashMap k v -> k -> Maybe v
HM.!? ([Char] -> ByteString
BSC.pack [Char]
key) of
Maybe ByteString
Nothing -> PoseidonException -> m [Char]
forall e a. (HasCallStack, Exception e) => e -> m a
forall (m :: * -> *) e a.
(MonadThrow m, HasCallStack, Exception e) =>
e -> m a
throwM (PoseidonException -> m [Char]) -> PoseidonException -> m [Char]
forall a b. (a -> b) -> a -> b
$ [Char] -> PoseidonException
PoseidonGenericException ([Char]
"Key " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
key [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" not present in .janno file")
Just ByteString
r -> [Char] -> m [Char]
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return ([Char] -> m [Char]) -> [Char] -> m [Char]
forall a b. (a -> b) -> a -> b
$ ByteString -> [Char]
BSC.unpack ByteString
r
matchWithOptionalStrip :: (Maybe String) -> String -> String -> Bool
matchWithOptionalStrip :: Maybe [Char] -> [Char] -> [Char] -> Bool
matchWithOptionalStrip Maybe [Char]
maybeRegex [Char]
id1 [Char]
id2 =
case Maybe [Char]
maybeRegex of
Maybe [Char]
Nothing -> [Char]
id1 [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
== [Char]
id2
Just [Char]
r ->
let id1stripped :: [Char]
id1stripped = [Char] -> [Char] -> [Char]
stripR [Char]
r [Char]
id1
id2stripped :: [Char]
id2stripped = [Char] -> [Char] -> [Char]
stripR [Char]
r [Char]
id2
in [Char]
id1stripped [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
== [Char]
id2stripped
where
stripR :: String -> String -> String
stripR :: [Char] -> [Char] -> [Char]
stripR [Char]
r [Char]
s =
let match :: [Char]
match = [Char]
s [Char] -> [Char] -> [Char]
forall source source1 target.
(RegexMaker Regex CompOption ExecOption source,
RegexContext Regex source1 target) =>
source1 -> source -> target
=~ [Char]
r
in if [Char] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Char]
match then [Char]
s else Text -> [Char]
unpack (Text -> [Char]) -> Text -> [Char]
forall a b. (a -> b) -> a -> b
$ HasCallStack => Text -> Text -> Text -> Text
Text -> Text -> Text -> Text
replace ([Char] -> Text
pack [Char]
match) Text
"" ([Char] -> Text
pack [Char]
s)
mergeRow :: CounterCopied -> JannoRow -> JannoRow -> CoalesceJannoColumnSpec -> Bool -> String -> String -> PoseidonIO JannoRow
mergeRow :: IORef Int
-> JannoRow
-> JannoRow
-> CoalesceJannoColumnSpec
-> Bool
-> [Char]
-> [Char]
-> ReaderT Env IO JannoRow
mergeRow IORef Int
cp JannoRow
targetRow JannoRow
sourceRow CoalesceJannoColumnSpec
fields Bool
overwrite [Char]
sKey [Char]
tKey = do
let sourceKeys :: [ByteString]
sourceKeys = NamedRecord -> [ByteString]
forall k v. HashMap k v -> [k]
HM.keys NamedRecord
sourceRowRecord
sourceKeysDesired :: [ByteString]
sourceKeysDesired = [ByteString] -> CoalesceJannoColumnSpec -> [ByteString]
determineDesiredSourceKeys [ByteString]
sourceKeys CoalesceJannoColumnSpec
fields
targetComplete :: NamedRecord
targetComplete = NamedRecord -> NamedRecord -> NamedRecord
forall k v. Eq k => HashMap k v -> HashMap k v -> HashMap k v
HM.union NamedRecord
targetRowRecord ([(ByteString, ByteString)] -> NamedRecord
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HM.fromList ([(ByteString, ByteString)] -> NamedRecord)
-> [(ByteString, ByteString)] -> NamedRecord
forall a b. (a -> b) -> a -> b
$ (ByteString -> (ByteString, ByteString))
-> [ByteString] -> [(ByteString, ByteString)]
forall a b. (a -> b) -> [a] -> [b]
map (, ByteString
BSC.empty) [ByteString]
sourceKeysDesired)
newRowRecord :: NamedRecord
newRowRecord = (ByteString -> ByteString -> ByteString)
-> NamedRecord -> NamedRecord
forall k v1 v2. (k -> v1 -> v2) -> HashMap k v1 -> HashMap k v2
HM.mapWithKey ByteString -> ByteString -> ByteString
fillFromSource NamedRecord
targetComplete
parseResult :: Either [Char] JannoRow
parseResult = Parser JannoRow -> Either [Char] JannoRow
forall a. Parser a -> Either [Char] a
Csv.runParser (Parser JannoRow -> Either [Char] JannoRow)
-> (NamedRecord -> Parser JannoRow)
-> NamedRecord
-> Either [Char] JannoRow
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> NamedRecord -> Parser JannoRow
parseJannoRowFromNamedRecord [] (NamedRecord -> Either [Char] JannoRow)
-> NamedRecord -> Either [Char] JannoRow
forall a b. (a -> b) -> a -> b
$ NamedRecord
newRowRecord
[Char] -> PoseidonIO ()
logInfo ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"matched target " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
BSC.unpack (NamedRecord
targetComplete NamedRecord -> ByteString -> ByteString
forall k v.
(Eq k, Hashable k, HasCallStack) =>
HashMap k v -> k -> v
HM.! [Char] -> ByteString
BSC.pack [Char]
tKey) [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++
[Char]
" with source " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
BSC.unpack (NamedRecord
sourceRowRecord NamedRecord -> ByteString -> ByteString
forall k v.
(Eq k, Hashable k, HasCallStack) =>
HashMap k v -> k -> v
HM.! [Char] -> ByteString
BSC.pack [Char]
sKey)
case Either [Char] JannoRow
parseResult of
Left [Char]
err -> PoseidonException -> ReaderT Env IO JannoRow
forall e a. (HasCallStack, Exception e) => e -> ReaderT Env IO a
forall (m :: * -> *) e a.
(MonadThrow m, HasCallStack, Exception e) =>
e -> m a
throwM (PoseidonException -> ReaderT Env IO JannoRow)
-> ([Char] -> PoseidonException)
-> [Char]
-> ReaderT Env IO JannoRow
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> PoseidonException
PoseidonGenericException ([Char] -> ReaderT Env IO JannoRow)
-> [Char] -> ReaderT Env IO JannoRow
forall a b. (a -> b) -> a -> b
$ [Char]
".janno row-merge error: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
err
Right JannoRow
r -> do
let newFields :: NamedRecord
newFields = (ByteString -> ByteString -> Maybe ByteString)
-> NamedRecord -> NamedRecord -> NamedRecord
forall k v w.
(Eq k, Hashable k) =>
(v -> w -> Maybe v) -> HashMap k v -> HashMap k w -> HashMap k v
HM.differenceWith (\ByteString
v1 ByteString
v2 -> if ByteString
v1 ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
v2 then Maybe ByteString
forall a. Maybe a
Nothing else ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just ByteString
v1) NamedRecord
newRowRecord NamedRecord
targetComplete
if NamedRecord -> Bool
forall k v. HashMap k v -> Bool
HM.null NamedRecord
newFields then do
[Char] -> PoseidonIO ()
logDebug [Char]
"-- no changes"
else do
[(ByteString, ByteString)]
-> ((ByteString, ByteString) -> PoseidonIO ()) -> PoseidonIO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ (NamedRecord -> [(ByteString, ByteString)]
forall k v. HashMap k v -> [(k, v)]
HM.toList NamedRecord
newFields) (((ByteString, ByteString) -> PoseidonIO ()) -> PoseidonIO ())
-> ((ByteString, ByteString) -> PoseidonIO ()) -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ \(ByteString
key, ByteString
val) -> do
IO () -> PoseidonIO ()
forall a. IO a -> ReaderT Env IO a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> PoseidonIO ()) -> IO () -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ IORef Int -> (Int -> Int) -> IO ()
forall a. IORef a -> (a -> a) -> IO ()
R.modifyIORef IORef Int
cp (Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
[Char] -> PoseidonIO ()
logDebug ([Char] -> PoseidonIO ()) -> [Char] -> PoseidonIO ()
forall a b. (a -> b) -> a -> b
$ [Char]
"-- copied \"" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
BSC.unpack ByteString
val [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
"\" from column " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ ByteString -> [Char]
BSC.unpack ByteString
key
JannoRow -> ReaderT Env IO JannoRow
forall a. a -> ReaderT Env IO a
forall (m :: * -> *) a. Monad m => a -> m a
return JannoRow
r
where
targetRowRecord :: Csv.NamedRecord
targetRowRecord :: NamedRecord
targetRowRecord = JannoRow -> NamedRecord
forall a. ToNamedRecord a => a -> NamedRecord
Csv.toNamedRecord JannoRow
targetRow
sourceRowRecord :: Csv.NamedRecord
sourceRowRecord :: NamedRecord
sourceRowRecord = JannoRow -> NamedRecord
forall a. ToNamedRecord a => a -> NamedRecord
Csv.toNamedRecord JannoRow
sourceRow
determineDesiredSourceKeys :: [BSC.ByteString] -> CoalesceJannoColumnSpec -> [BSC.ByteString]
determineDesiredSourceKeys :: [ByteString] -> CoalesceJannoColumnSpec -> [ByteString]
determineDesiredSourceKeys [ByteString]
keys CoalesceJannoColumnSpec
AllJannoColumns = [ByteString]
keys
determineDesiredSourceKeys [ByteString]
_ (IncludeJannoColumns [ByteString]
included) = [ByteString]
included
determineDesiredSourceKeys [ByteString]
keys (ExcludeJannoColumns [ByteString]
excluded) = [ByteString]
keys [ByteString] -> [ByteString] -> [ByteString]
forall a. Eq a => [a] -> [a] -> [a]
\\ [ByteString]
excluded
fillFromSource :: BSC.ByteString -> BSC.ByteString -> BSC.ByteString
fillFromSource :: ByteString -> ByteString -> ByteString
fillFromSource ByteString
key ByteString
targetVal =
if ByteString
key ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
/= [Char] -> ByteString
BSC.pack [Char]
tKey
Bool -> Bool -> Bool
&& ByteString -> CoalesceJannoColumnSpec -> Bool
includeField ByteString
key CoalesceJannoColumnSpec
fields
Bool -> Bool -> Bool
&& (ByteString
targetVal ByteString -> [ByteString] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [ByteString
"n/a", ByteString
"", ByteString
BSC.empty] Bool -> Bool -> Bool
|| Bool
overwrite)
then ByteString -> ByteString -> NamedRecord -> ByteString
forall k v. (Eq k, Hashable k) => v -> k -> HashMap k v -> v
HM.findWithDefault ByteString
"" ByteString
key NamedRecord
sourceRowRecord
else ByteString
targetVal
includeField :: BSC.ByteString -> CoalesceJannoColumnSpec -> Bool
includeField :: ByteString -> CoalesceJannoColumnSpec -> Bool
includeField ByteString
_ CoalesceJannoColumnSpec
AllJannoColumns = Bool
True
includeField ByteString
key (IncludeJannoColumns [ByteString]
xs) = ByteString
key ByteString -> [ByteString] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [ByteString]
xs
includeField ByteString
key (ExcludeJannoColumns [ByteString]
xs) = ByteString
key ByteString -> [ByteString] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [ByteString]
xs