Last active
May 27, 2019 16:34
-
-
Save simg/035ec4d08b866d59196bcc793c13d85e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Codec.Archive.Zip (sourceEntry, EntrySelector, getEntrySource, getEntry, getEntryName, getEntries, withArchive) | |
import Control.Monad.IO.Class (MonadIO, liftIO) | |
import Data.ByteString (ByteString) | |
--import Data.Foldable (for) | |
import Conduit (mapC, mapM_C, yieldMany, ($$)) | |
import Data.Conduit (Conduit(..), yield, runConduit, (.|), awaitForever) | |
import Data.Conduit.Binary as CB | |
import qualified Data.Conduit.List as CL | |
import qualified Data.Csv as Csv | |
import Lib.ParseCSV (parseCSV, parseCSVLine, CSV) | |
--import Data.Csv hiding (Csv) | |
--import Data.Csv.Conduit | |
import Data.Map (keys, Map) | |
import Data.String.Conversions (cs) | |
import Data.Text (Text) | |
import Path (parseAbsFile) | |
--import System.Directory (parseAbsFile) | |
import System.FilePath.Posix (takeExtension) | |
import System.TimeIt | |
parseCSVFromZip :: FilePath -> IO () | |
parseCSVFromZip fp = do | |
withArchive fp $ do | |
name:_ <- keys <$> getEntries | |
source <- getEntrySource name | |
runConduit $ source | |
.| CB.lines | |
.| mapC (csvToThing) | |
.| mapM_C print | |
fails with: Main.hs:27:5: error: | |
• Couldn't match type ‘IO’ with ‘Codec.Archive.Zip.ZipArchive’ | |
Expected type: Codec.Archive.Zip.ZipArchive () | |
Actual type: IO () | |
http://hackage.haskell.org/package/zip-1.2.0/docs/Codec-Archive-Zip.html
parseFromZip :: FilePath -> FilePath -> IO ()
parseFromZip inp out = do
source <- withArchive inp $ do
name <- head . keys <$> getEntries
getEntrySource name
dest <- createArchive out $ do
sinkEntry Deflate m "data.txt" -- m causes a compile error obviously, but how should I reference the conduit here
runConduitRes $
source
.| CB.lines
.| mapC csvToThing
.| mapC (cs . show)
.| dest
the error:
Variable not in scope:
m :: conduit-1.3.1.1:Data.Conduit.Internal.Conduit.ConduitT
() ByteString (ResourceT IO) ()
|
44 | sinkEntry Deflate m "data.txt"
parseCompaniesZip :: FilePath -> FilePath -> IO ()
parseCompaniesZip inp out = do
source <- withArchive inp $ do
name <- head . keys <$> getEntries
getEntrySource name
runConduitRes $
source
.| CB.lines
.| mapC csvToThink
.| mapC (cs . show)
.| sinkZip out "data.txt"
sinkZip :: (MonadResource m) => FilePath -> FilePath -> Conduit ByteString m a -> Conduit ByteString m ()
sinkZip zp name source = createArchive zp $ do
sinkEntry Deflate source name
errors with:
• Couldn't match type ‘Conduit ByteString m0 a0
-> Conduit ByteString m0 ()’
with ‘conduit-1.3.1.1:Data.Conduit.Internal.Conduit.ConduitT
b0 Data.Void.Void (ResourceT IO) ()’
Expected type: conduit-1.3.1.1:Data.Conduit.Internal.Conduit.ConduitM
b0 Data.Void.Void (ResourceT IO) ()
Actual type: Conduit ByteString m0 a0 -> Conduit ByteString m0 ()
• Probable cause: ‘sinkZip’ is applied to too few arguments
In the second argument of ‘(.|)’, namely
‘sinkZip out "data.txt"’
In the second argument of ‘(.|)’, namely
‘mapC (cs . show) .| sinkZip out "data.txt"’
In the second argument of ‘(.|)’, namely
‘mapC csvToCompany
.| mapC (cs . show) .| sinkZip out "data.txt"’
|
50 | .| sinkZip out "data.txt"
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Main.hs:57:33: error:
• Couldn't match type ‘ByteString’ with ‘()’
Expected type: conduit-1.3.1.1:Data.Conduit.Internal.Conduit.ConduitT
() ByteString (ResourceT IO) ()
Actual type: Conduit ByteString m a
• In the second argument of ‘sinkEntry’, namely ‘source’
In a stmt of a 'do' block: sinkEntry Deflate source name
In the second argument of ‘($)’, namely
‘do sinkEntry Deflate source name’
|
57 | sinkEntry Deflate source name
| ^^^^^^
Main.hs:57:40: error:
• Couldn't match type ‘[Char]’ with ‘EntrySelector’
Expected type: EntrySelector
Actual type: FilePath
• In the third argument of ‘sinkEntry’, namely ‘name’
In a stmt of a 'do' block: sinkEntry Deflate source name
In the second argument of ‘($)’, namely
‘do sinkEntry Deflate source name’
|
57 | sinkEntry Deflate source name
| ^^^^
what I'm ultimately working towards
parseCompaniesZip :: FilePath -> FilePath -> IO ()
parseCompaniesZip inp out = do
source <- withArchive inp $ do
name <- head . keys <$> getEntries
getEntrySource name
runConduitRes $
source
.| CB.lines
.| mapC csvToThink
.| mapC (cs . show)
.| filterLeft -> sinkZip out "errors.txt"
.| filterRight -> sinkZip out "data.txt"
sinkZip :: (MonadResource m) => FilePath -> FilePath -> Conduit ByteString m a -> Conduit ByteString m ()
sinkZip zp name source = createArchive zp $ do
sinkEntry Deflate source name
parseCompaniesZip :: FilePath -> FilePath -> IO ()
parseCompaniesZip inp out = do
source <- withArchive inp $ do
name <- head . keys <$> getEntries
getEntrySource name
errorSink <- sinkZip out "errors.txt"
dataSink <- sinkZip out "data.txt"
runConduitRes $
source
.| CB.lines
.| mapC csvToThink
.| mapC (cs . show)
.| filterLeft -> errorSink
.| filterRight -> dataSink
sinkZip :: (MonadResource m) => FilePath -> FilePath -> Conduit ByteString m a -> Conduit ByteString m ()
sinkZip zp name source = createArchive zp $ do
sinkEntry Deflate source name
The zip library seems to be missing a function to give you a sink in this way. It might be a good idea to submit a feature request to the library, it doesn't hurt to ask. Much like there are already sourcEntry
and getEntrySource
, maybe there should be a getEntrySink
to complement sinkEntry
.
That's a good idea, thanks :)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
uses http://hackage.haskell.org/package/zip-1.2.0/docs/Codec-Archive-Zip.html#g:7