summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOzgunAtaman <>2011-06-06 03:18:43 (GMT)
committerLuite Stegeman <luite@luite.com>2011-06-06 03:18:43 (GMT)
commit3cf9384b2a7083685e7a4463a40376800cb2a713 (patch)
tree0a6c85347b24ec076a8dabfd7e490b80f8b58eb4
parent977595d16a64ad46f42f3f581bca8d1066ce624e (diff)
version 0.8.20.8.2
-rw-r--r--README.markdown3
-rw-r--r--csv-enumerator.cabal43
-rw-r--r--src/Data/CSV/Enumerator.hs22
-rw-r--r--test/Test.hs57
-rw-r--r--test/csv-enumerator-test.cabal47
-rw-r--r--test/test.csv4
6 files changed, 168 insertions, 8 deletions
diff --git a/README.markdown b/README.markdown
index 4b0e933..3c3a54b 100644
--- a/README.markdown
+++ b/README.markdown
@@ -15,7 +15,7 @@ project's start in 2010, there wasn't one that provided all of the following:
* Fast operation
* Convenient interface that supports a variety of use cases
-This library is an attempt to close this gap.
+This library is an attempt to close these gaps.
## This package
@@ -88,6 +88,7 @@ Further examples to be provided at a later time.
* The CSVeable typeclass can be refactored to have a more minimal definition.
* Operating on Text in addition to ByteString would be phenomenal.
* A test-suite needs to be added.
+* Some benchmarking would be nice.
Any and all kinds of help is much appreciated!
diff --git a/csv-enumerator.cabal b/csv-enumerator.cabal
index 8f27630..4795641 100644
--- a/csv-enumerator.cabal
+++ b/csv-enumerator.cabal
@@ -1,5 +1,5 @@
Name: csv-enumerator
-Version: 0.8
+Version: 0.8.2
Synopsis: A flexible, fast, enumerator-based CSV parser library for Haskell.
Homepage: http://github.com/ozataman/csv-enumerator
License: BSD3
@@ -10,15 +10,48 @@ Category: Data
Build-type: Simple
Cabal-version: >=1.2
Description:
- For more information and examples, check out the README at:
- <http://github.com/ozataman/csv-enumerator>.
+
+ CSV files are the de-facto standard in many situations involving data transfer,
+ particularly when dealing with enterprise application or disparate database
+ systems.
+
+ .
+
+ While there are a number of CSV libraries in Haskell, at the time of this
+ project's start in 2010, there wasn't one that provided all of the following:
+
+ .
+
+ * Full flexibility in quote characters, separators, input/output
+ .
+ * Constant space operation
+ .
+ * Robust parsing, correctness and error resiliency
+ .
+ * Convenient interface that supports a variety of use cases
+ .
+ * Fast operation
+ .
+
+ This library is an attempt to close these gaps.
+ .
+
+ For more documentation and examples, check out the README at:
+ .
+
+ <http://github.com/ozataman/csv-enumerator>
+ .
The API is fairly well documented and I would encourage you to keep your
- haddocks handy.
+ haddocks handy. If you run into problems, just email me or holler over at
+ #haskell.
extra-source-files:
README.markdown
+ test/csv-enumerator-test.cabal
+ test/test.csv
+ test/Test.hs
Library
hs-source-dirs: src
@@ -27,7 +60,7 @@ Library
Other-modules:
Data.CSV.Enumerator.Types
build-depends:
- attoparsec >= 0.8 && < 0.9
+ attoparsec >= 0.8 && < 0.10
, attoparsec-enumerator >= 0.2.0.3
, base >= 4 && < 5
, containers >= 0.3
diff --git a/src/Data/CSV/Enumerator.hs b/src/Data/CSV/Enumerator.hs
index 2cbeae3..63ef42f 100644
--- a/src/Data/CSV/Enumerator.hs
+++ b/src/Data/CSV/Enumerator.hs
@@ -36,6 +36,7 @@ module Data.CSV.Enumerator
-- * Other Utilities
, outputRow
, outputRows
+ , outputColumns
, writeHeaders
)
@@ -47,6 +48,7 @@ import Control.Monad (mzero, mplus, foldM, when)
import Control.Monad.IO.Class (liftIO, MonadIO)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
+import Data.ByteString.Char8 (ByteString)
import Data.ByteString.Internal (c2w)
import qualified Data.Map as M
import System.Directory
@@ -131,9 +133,9 @@ instance CSVeable Row where
let
sep = B.pack [c2w (csvOutputColSep s)]
wrapField !f = case (csvOutputQuoteChar s) of
- Just !x -> let qt = c2w x
- in qt `B.cons` f `B.snoc` qt
+ Just !x -> x `B8.cons` escape x f `B8.snoc` x
otherwise -> f
+ escape c str = B8.intercalate (B8.pack [c,c]) $ B8.split c str
in B.intercalate sep . map wrapField $ r
fileHeaders _ = Nothing
@@ -355,6 +357,21 @@ outputRows :: CSVeable r => CSVSettings -> Handle -> [r] -> IO ()
outputRows s oh = mapM_ (outputRow s oh)
+-- | Expand or contract the given 'MapRow' to contain exactly the given set of
+-- columns and then write the row into the given 'Handle'.
+--
+-- This is helpful in filtering the columns or perhaps combining a number of
+-- files that don't have the same columns.
+--
+-- Missing columns will be left empty.
+outputColumns :: CSVSettings -> Handle -> [ByteString] -> MapRow -> IO ()
+outputColumns s h cs r = outputRow s h r'
+ where
+ r' = M.fromList $ map formCol cs
+ formCol x = (x, maybe "" id $ M.lookup x r)
+
+
+
writeHeaders :: CSVeable r => CSVSettings -> Handle -> [r] -> IO ()
writeHeaders s h rs = case fileHeaders rs of
Just hs -> B.hPutStrLn h . rowToStr s $ hs
@@ -381,6 +398,7 @@ outputRowsIter s oh rs = mapM_ (outputRowIter s oh) rs
-- functions (Row enumarators).
data (CSVeable r) => ParsedRow r = ParsedRow (Maybe r) | EOF
+
------------------------------------------------------------------------------
-- | An iteratee that processes each row of a CSV file and updates the
-- accumulator.
diff --git a/test/Test.hs b/test/Test.hs
new file mode 100644
index 0000000..42b6b25
--- /dev/null
+++ b/test/Test.hs
@@ -0,0 +1,57 @@
+
+module Main where
+
+import qualified Data.ByteString.Char8 as B
+import Data.Map ((!))
+import System.Directory
+
+import Test.Framework (defaultMain, testGroup)
+import Test.Framework.Providers.HUnit
+import Test.Framework.Providers.QuickCheck2 (testProperty)
+
+import Test.QuickCheck
+import Test.HUnit
+
+
+import Data.CSV.Enumerator
+
+
+main = defaultMain tests
+
+tests = [ testGroup "Basic Ops" baseTests ]
+
+
+baseTests = [
+ testCase "mapping with id works" test_identityMap
+ , testCase "simple parsing works" test_simpleParse
+ ]
+
+
+test_identityMap = do
+ Right r <- mapCSVFile "test.csv" csvSettings f "testOut.csv"
+ 3 @=? r
+ f1 <- readFile "test.csv"
+ f2 <- readFile "testOut.csv"
+ f1 @=? f2
+ removeFile "testOut.csv"
+ where
+ f :: MapRow -> [MapRow]
+ f = return
+
+
+test_simpleParse = do
+ Right (d :: [MapRow]) <- readCSVFile csvSettings "test.csv"
+ mapM_ assertRow d
+ where
+ assertRow r = v3 @=? (v1 + v2)
+ where v1 = readBS $ r ! "Col2"
+ v2 = readBS $ r ! "Col3"
+ v3 = readBS $ r ! "Sum"
+
+
+csvSettings =
+ defCSVSettings { csvQuoteChar = Just '`'
+ , csvOutputQuoteChar = Just '`' }
+
+
+readBS = read . B.unpack
diff --git a/test/csv-enumerator-test.cabal b/test/csv-enumerator-test.cabal
new file mode 100644
index 0000000..dcc88d2
--- /dev/null
+++ b/test/csv-enumerator-test.cabal
@@ -0,0 +1,47 @@
+Name: csv-enumerator-test
+Version: 0.8.2
+Synopsis: A flexible, fast, enumerator-based CSV parser library for Haskell.
+Homepage: http://github.com/ozataman/csv-enumerator
+License: BSD3
+License-file: LICENSE
+Author: Ozgun Ataman
+Maintainer: Ozgun Ataman <ozataman@gmail.com>
+Category: Data
+Build-type: Simple
+Cabal-version: >=1.2
+Description:
+ For more information and examples, check out the README at:
+ <http://github.com/ozataman/csv-enumerator>.
+
+ The API is fairly well documented and I would encourage you to keep your
+ haddocks handy.
+
+
+extra-source-files:
+ README.markdown
+
+Executable test
+ main-is: Test.hs
+ hs-source-dirs: ./ ../src
+ Other-modules:
+ Data.CSV.Enumerator.Types
+ Data.CSV.Enumerator
+ build-depends:
+ attoparsec >= 0.8 && < 0.10
+ , attoparsec-enumerator >= 0.2.0.3
+ , base >= 4 && < 5
+ , containers >= 0.3
+ , directory
+ , bytestring
+ , enumerator >= 0.4.5 && < 0.5
+ , transformers >= 0.2
+ , safe
+ , unix-compat >= 0.2.1.1
+ , test-framework
+ , test-framework-quickcheck2
+ , test-framework-hunit
+ , QuickCheck >= 2
+ , HUnit >= 1.2
+ extensions:
+ ScopedTypeVariables
+ OverloadedStrings
diff --git a/test/test.csv b/test/test.csv
new file mode 100644
index 0000000..3e4d152
--- /dev/null
+++ b/test/test.csv
@@ -0,0 +1,4 @@
+`Col1`,`Col2`,`Col3`,`Sum`
+`A`,`2`,`3`,`5`
+`B`,`3`,`4`,`7`
+`Field using the quote char ``this is the in-quoted value```,`4`,`5`,`9`