Skip to content

Instantly share code, notes, and snippets.

@simg
Last active May 27, 2019 16:34
Show Gist options
  • Save simg/035ec4d08b866d59196bcc793c13d85e to your computer and use it in GitHub Desktop.
Save simg/035ec4d08b866d59196bcc793c13d85e to your computer and use it in GitHub Desktop.
import Codec.Archive.Zip (sourceEntry, EntrySelector, getEntrySource, getEntry, getEntryName, getEntries, withArchive)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.ByteString (ByteString)
--import Data.Foldable (for)
import Conduit (mapC, mapM_C, yieldMany, ($$))
import Data.Conduit (Conduit(..), yield, runConduit, (.|), awaitForever)
import Data.Conduit.Binary as CB
import qualified Data.Conduit.List as CL
import qualified Data.Csv as Csv
import Lib.ParseCSV (parseCSV, parseCSVLine, CSV)
--import Data.Csv hiding (Csv)
--import Data.Csv.Conduit
import Data.Map (keys, Map)
import Data.String.Conversions (cs)
import Data.Text (Text)
import Path (parseAbsFile)
--import System.Directory (parseAbsFile)
import System.FilePath.Posix (takeExtension)
import System.TimeIt
parseCSVFromZip :: FilePath -> IO ()
parseCSVFromZip fp = do
withArchive fp $ do
name:_ <- keys <$> getEntries
source <- getEntrySource name
runConduit $ source
.| CB.lines
.| mapC (csvToThing)
.| mapM_C print
fails with: Main.hs:27:5: error:
• Couldn't match type ‘IO’ with ‘Codec.Archive.Zip.ZipArchive’
Expected type: Codec.Archive.Zip.ZipArchive ()
Actual type: IO ()
@simg
Copy link
Author

simg commented May 27, 2019

parseCompaniesZip :: FilePath -> FilePath -> IO ()
parseCompaniesZip inp out = do
    source <- withArchive inp $ do
      name <- head . keys <$> getEntries
      getEntrySource name
    errorSink <- sinkZip out "errors.txt"
    dataSink <- sinkZip out "data.txt"
    runConduitRes $
         source
      .| CB.lines
      .| mapC csvToThink
      .| mapC (cs . show)
      .| filterLeft -> errorSink
      .| filterRight -> dataSink

sinkZip :: (MonadResource m) => FilePath -> FilePath -> Conduit ByteString m a -> Conduit ByteString m ()
sinkZip zp name source = createArchive zp $ do
              sinkEntry Deflate source name

@Lysxia
Copy link

Lysxia commented May 27, 2019

The zip library seems to be missing a function to give you a sink in this way. It might be a good idea to submit a feature request to the library, it doesn't hurt to ask. Much like there are already sourcEntry and getEntrySource, maybe there should be a getEntrySink to complement sinkEntry.

@simg
Copy link
Author

simg commented May 27, 2019

That's a good idea, thanks :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment