summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOzgunAtaman <>2011-07-23 21:51:05 (GMT)
committerLuite Stegeman <luite@luite.com>2011-07-23 21:51:05 (GMT)
commita5df3b81f054d7bc75b5b5d5214764911a9f1ef9 (patch)
tree10fbf24ee6999e39445c49dd8442c8a6e25b7a1b
parent1d6e81527d47f6269ffae270d19a0a9218a46387 (diff)
version 0.9.20.9.2
-rw-r--r--csv-enumerator.cabal9
-rw-r--r--src/Data/CSV/Enumerator.hs79
-rw-r--r--src/Data/CSV/Enumerator/Types.hs8
-rw-r--r--test/csv-enumerator-test.cabal36
4 files changed, 95 insertions, 37 deletions
diff --git a/csv-enumerator.cabal b/csv-enumerator.cabal
index 88f27d5..b8b02d7 100644
--- a/csv-enumerator.cabal
+++ b/csv-enumerator.cabal
@@ -1,5 +1,5 @@
Name: csv-enumerator
-Version: 0.9.0
+Version: 0.9.2
Synopsis: A flexible, fast, enumerator-based CSV parser library for Haskell.
Homepage: http://github.com/ozataman/csv-enumerator
License: BSD3
@@ -60,15 +60,16 @@ Library
Other-modules:
Data.CSV.Enumerator.Types
build-depends:
- attoparsec >= 0.8 && < 0.10
- , attoparsec-enumerator >= 0.2.0.3
+ attoparsec >= 0.8
+ , attoparsec-enumerator >= 0.2
, base >= 4 && < 5
, containers >= 0.3
, directory
, bytestring
- , enumerator >= 0.4.5 && < 0.5
+ , enumerator >= 0.4.5
, transformers >= 0.2
, safe
, unix-compat >= 0.2.1.1
extensions:
ScopedTypeVariables
+ ghc-options: -funbox-strict-fields
diff --git a/src/Data/CSV/Enumerator.hs b/src/Data/CSV/Enumerator.hs
index daf773b..269a0a8 100644
--- a/src/Data/CSV/Enumerator.hs
+++ b/src/Data/CSV/Enumerator.hs
@@ -32,6 +32,7 @@ module Data.CSV.Enumerator
-- * Mapping Over CSV Files
, mapCSVFile
+ , mapAccumCSVFile
, mapIntoHandle
-- * Primitive Iteratees
@@ -307,6 +308,30 @@ mapCSVFile fi s f fo = do
------------------------------------------------------------------------------
+-- | Map-accumulate over a CSV file. Similar to 'mapAccumL' in 'Data.List'.
+mapAccumCSVFile
+ :: (CSVeable r)
+ => FilePath
+ -> CSVSettings
+ -> (acc -> r -> (acc, [r]))
+ -> acc
+ -> FilePath
+ -> IO (Either SomeException acc)
+mapAccumCSVFile fi s f acc fo = do
+ res <- foldCSVFile fi s iter (acc, (Nothing, 0))
+ return $ fst `fmap` res
+ where
+ iter (a, outa) (ParsedRow (Just !r)) = foldM chain (a', outa) rs
+ where (a', rs) = f a r
+ iter (a, outa) x = do
+ outa' <- fileSink s fo outa x
+ return $ (a, outa')
+ chain (a, outa) !r = do
+ outa' <- fileSink s fo outa (ParsedRow (Just r))
+ return $ (a, outa')
+
+
+------------------------------------------------------------------------------
readCSVFile :: (CSVeable r) => CSVSettings -- ^ CSV settings
-> FilePath -- ^ FilePath
-> IO (Either SomeException [r]) -- ^ Collected data
@@ -337,19 +362,19 @@ appendCSVFile :: (CSVeable r) => CSVSettings -- ^ CSV settings
-> [r] -- ^ Data to be output
-> IO Int -- ^ Number of rows written
appendCSVFile s fp rs =
- let doOutput (c,h) = when c (writeHeaders s h rs) >> outputRowsIter h
+ let doOutput (c,h) = when c (writeHeaders s h rs >> return ()) >> outputRowsIter h
outputRowsIter h = foldM (step h) 0 . map (rowToStr s) $ rs
step h acc x = (B.hPutStrLn h x) >> return (acc+1)
chkOpen = do
- writeHeaders <- do
+ wrHeader <- do
fe <- doesFileExist fp
if fe
- then do
- fs <- getFileStatus fp >>= return . fileSize
- return $ if fs > 0 then False else True
+ then do
+ fs <- getFileStatus fp >>= return . fileSize
+ return $ if fs > 0 then False else True
else return True
h <- openFile fp AppendMode
- return (writeHeaders, h)
+ return (wrHeader, h)
in bracket
(chkOpen)
(hClose . snd)
@@ -380,10 +405,10 @@ outputColumns s h cs r = outputRow s h r'
-writeHeaders :: CSVeable r => CSVSettings -> Handle -> [r] -> IO ()
+writeHeaders :: CSVeable r => CSVSettings -> Handle -> [r] -> IO Bool
writeHeaders s h rs = case fileHeaders rs of
- Just hs -> B.hPutStrLn h . rowToStr s $ hs
- Nothing -> return ()
+ Just hs -> (B.hPutStrLn h . rowToStr s) hs >> return True
+ Nothing -> return False
outputRowIter :: CSVeable r => CSVSettings -> Handle -> r -> E.Iteratee B.ByteString IO ()
@@ -468,10 +493,12 @@ mapIntoHandle csvs outh h f = do
where
f' acc EOF = return acc
f' acc (ParsedRow Nothing) = return acc
- f' (False, _) r'@(ParsedRow (Just r)) = do
+ f' (False, i) r'@(ParsedRow (Just r)) = do
rs <- f r
- when outh $ writeHeaders csvs h rs
- f' (True, 0) r'
+ headerDone <- if outh then writeHeaders csvs h rs else return True
+ if headerDone
+ then f' (headerDone, 0) r' -- Headers are done, now process row
+ else return (False, i+1) -- Problem in this row, move on to next
f' (True, !i) (ParsedRow (Just r)) = do
rs <- f r
outputRows csvs h rs
@@ -487,9 +514,13 @@ collectRows acc (ParsedRow (Just r)) = let a' = (r:acc)
in a' `seq` yield a' (E.Chunks [])
collectRows acc (ParsedRow Nothing) = yield acc (E.Chunks [])
--- * Parsers
-rowParser :: (Monad m, MonadIO m) => CSVSettings -> E.Iteratee B.ByteString m (Maybe Row)
+------------------------------------------------------------------------------
+-- Parsers
+
+rowParser
+ :: (Monad m, MonadIO m)
+ => CSVSettings -> E.Iteratee B.ByteString m (Maybe Row)
rowParser csvs = E.catchError p handler
where
p = iterParser $ row csvs
@@ -497,6 +528,7 @@ rowParser csvs = E.catchError p handler
liftIO $ putStrLn ("Error in parsing: " ++ show e)
yield Nothing (E.Chunks [])
+
row :: CSVSettings -> Parser (Maybe Row)
row csvs = csvrow csvs <|> badrow
@@ -506,8 +538,8 @@ badrow = P.takeWhile (not . C8.isEndOfLine) *>
csvrow :: CSVSettings -> Parser (Maybe Row)
csvrow c =
- let !rowbody = (quotedField' <|> (field c)) `sepBy` C8.char (csvSep c)
- !properrow = rowbody <* (C8.endOfLine <|> P.endOfInput)
+ let rowbody = (quotedField' <|> (field c)) `sepBy` C8.char (csvSep c)
+ properrow = rowbody <* (C8.endOfLine <|> P.endOfInput)
quotedField' = case csvQuoteChar c of
Nothing -> mzero
Just q' -> try (quotedField q')
@@ -516,7 +548,7 @@ csvrow c =
return $ Just res
field :: CSVSettings -> Parser Field
-field s = P.takeWhile (isFieldChar s) <?> "Parsing a regular field"
+field s = P.takeWhile (isFieldChar s)
isFieldChar s = notInClass xs'
where xs = csvSep s : "\n\r"
@@ -525,10 +557,13 @@ isFieldChar s = notInClass xs'
Just x -> x : xs
quotedField :: Char -> Parser Field
-quotedField c = let w = c2w c in do
- (C8.char c) <?> "Quote start"
- f <- many (notWord8 w <|> (string (B.pack $ [w,w]) *> return w))
- (C8.char c) <?> "Quote end"
- return $ B.pack f
+quotedField c =
+ let quoted = string dbl *> return c
+ dbl = B8.pack [c,c]
+ in do
+ C8.char c
+ f <- many (C8.notChar c <|> quoted)
+ C8.char c
+ return $ B8.pack f
diff --git a/src/Data/CSV/Enumerator/Types.hs b/src/Data/CSV/Enumerator/Types.hs
index cf0981e..82064d6 100644
--- a/src/Data/CSV/Enumerator/Types.hs
+++ b/src/Data/CSV/Enumerator/Types.hs
@@ -13,16 +13,16 @@ import qualified Data.Map as M
data CSVSettings = CSVS
{
-- | Separator character to be used in between fields
- csvSep :: Char
+ csvSep :: !Char
-- | Quote character that may sometimes be present around fields. If 'Nothing' is given, the library will never expect quotation even if it is present.
- , csvQuoteChar :: Maybe Char
+ , csvQuoteChar :: !(Maybe Char)
-- | Quote character that should be used in the output.
- , csvOutputQuoteChar :: Maybe Char
+ , csvOutputQuoteChar :: !(Maybe Char)
-- | Field separator that should be used in the output.
- , csvOutputColSep :: Char
+ , csvOutputColSep :: !Char
} deriving (Read, Show, Eq)
-- | Default settings for a CSV file.
diff --git a/test/csv-enumerator-test.cabal b/test/csv-enumerator-test.cabal
index dcc88d2..02a4b47 100644
--- a/test/csv-enumerator-test.cabal
+++ b/test/csv-enumerator-test.cabal
@@ -9,13 +9,6 @@ Maintainer: Ozgun Ataman <ozataman@gmail.com>
Category: Data
Build-type: Simple
Cabal-version: >=1.2
-Description:
- For more information and examples, check out the README at:
- <http://github.com/ozataman/csv-enumerator>.
-
- The API is fairly well documented and I would encourage you to keep your
- haddocks handy.
-
extra-source-files:
README.markdown
@@ -45,3 +38,32 @@ Executable test
extensions:
ScopedTypeVariables
OverloadedStrings
+
+
+Executable bench
+ main-is: Bench.hs
+ hs-source-dirs: ./ ../src
+ Other-modules:
+ Data.CSV.Enumerator.Types
+ Data.CSV.Enumerator
+ build-depends:
+ attoparsec >= 0.8 && < 0.10
+ , attoparsec-enumerator >= 0.2.0.3
+ , base >= 4 && < 5
+ , containers >= 0.3
+ , directory
+ , bytestring
+ , enumerator >= 0.4.5 && < 0.5
+ , transformers >= 0.2
+ , safe
+ , unix-compat >= 0.2.1.1
+ , test-framework
+ , test-framework-quickcheck2
+ , test-framework-hunit
+ , QuickCheck >= 2
+ , HUnit >= 1.2
+ extensions:
+ ScopedTypeVariables
+ OverloadedStrings
+ ghc-options: -rtsopts
+ ghc-prof-options: -rtsopts -caf-all -auto-all