summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOzgunAtaman <>2011-04-29 22:37:18 (GMT)
committerLuite Stegeman <luite@luite.com>2011-04-29 22:37:18 (GMT)
commit977595d16a64ad46f42f3f581bca8d1066ce624e (patch)
treeb6dadfd1d6b4ed4cd3090967d4efbcf9a73b8f92
version 0.80.8
-rw-r--r--LICENSE30
-rw-r--r--README.markdown94
-rw-r--r--Setup.hs2
-rw-r--r--csv-enumerator.cabal41
-rw-r--r--src/Data/CSV/Enumerator.hs469
-rw-r--r--src/Data/CSV/Enumerator/Types.hs46
6 files changed, 682 insertions, 0 deletions
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..90536f9
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,30 @@
+Copyright (c)2010, Ozgun Ataman
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+ * Neither the name of Ozgun Ataman nor the names of other
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.markdown b/README.markdown
new file mode 100644
index 0000000..4b0e933
--- /dev/null
+++ b/README.markdown
@@ -0,0 +1,94 @@
+# README
+
+## CSV Files and Haskell
+
+CSV files are the de-facto standard in many cases of data transfer,
+particularly when dealing with enterprise application or disparate database
+systems.
+
+While there are a number of csv libraries in Haskell, at the time of this
+project's start in 2010, there wasn't one that provided all of the following:
+
+* Full flexibility in quote characters, separators, input/output
+* Constant space operation
+* Robust parsing and error resiliency
+* Fast operation
+* Convenient interface that supports a variety of use cases
+
+This library is an attempt to close this gap.
+
+
+## This package
+
+csv-enumerator is an enumerator-based CSV parsing library that is easy to use,
+flexible and fast. Furthermore, it provides ways to use constant-space during
+operation, which is absolutely critical in many real world use cases.
+
+
+### Introduction
+
+* ByteStrings are used for everything
+* There are 2 basic row types and they implement *exactly* the same operations,
+ so you can chose the right one for the job at hand:
+ - type MapRow = Map ByteString ByteString
+ - type Row = [ByteString]
+* Folding over a CSV file can be thought of as the most basic operation.
+* Higher level convenience functions are provided to "map" over CSV files,
+ modifying and transforming them along the way.
+* Helpers are provided for simple input/output of CSV files for simple use
+ cases.
+* For extreme / advanced use cases, the user can drop down to the
+ Enumerator/Iteratee level and do interleaved IO among other things.
+
+### API Docs
+
+The API is quite well documented and I would encourage you to keep it handy.
+
+### Speed
+
+While fast operation is of concern, I have so far cared more about correct
+operation and a flexible API. Please let me know if you notice any performance
+regressions or optimization opportunities.
+
+
+### Usage Examples
+
+#### Example 1: Basic Operation
+
+ {-# LANGUAGE OverloadedStrings #-}
+
+ import Data.CSV.Enumerator
+ import Data.Char (isSpace)
+ import qualified Data.Map as M
+ import Data.Map ((!))
+
+ -- Naive whitespace stripper
+ strip = reverse . B.dropWhile isSpace . reverse . B.dropWhile isSpace
+
+ -- A function that takes a row and "emits" zero or more rows as output.
+ processRow :: MapRow -> [MapRow]
+ processRow row = [M.insert "Column1" fixedCol row]
+ where fixedCol = strip (row ! "Column1")
+
+ main = mapCSVFile "InputFile.csv" defCSVSettings procesRow "OutputFile.csv"
+
+and we are done.
+
+
+Further examples to be provided at a later time.
+
+
+
+### TODO - Next Steps
+
+* Need to think about specializing an Exception type for the library and
+ properly notifying the user when parsing-related problems occur.
+* Some operations can be further broken down to their atoms, increasing the
+ flexibility of the library.
+* The CSVeable typeclass can be refactored to have a more minimal definition.
+* Operating on Text in addition to ByteString would be phenomenal.
+* A test-suite needs to be added.
+
+
+Any and all kinds of help is much appreciated!
+
diff --git a/Setup.hs b/Setup.hs
new file mode 100644
index 0000000..9a994af
--- /dev/null
+++ b/Setup.hs
@@ -0,0 +1,2 @@
+import Distribution.Simple
+main = defaultMain
diff --git a/csv-enumerator.cabal b/csv-enumerator.cabal
new file mode 100644
index 0000000..8f27630
--- /dev/null
+++ b/csv-enumerator.cabal
@@ -0,0 +1,41 @@
+Name: csv-enumerator
+Version: 0.8
+Synopsis: A flexible, fast, enumerator-based CSV parser library for Haskell.
+Homepage: http://github.com/ozataman/csv-enumerator
+License: BSD3
+License-file: LICENSE
+Author: Ozgun Ataman
+Maintainer: Ozgun Ataman <ozataman@gmail.com>
+Category: Data
+Build-type: Simple
+Cabal-version: >=1.2
+Description:
+ For more information and examples, check out the README at:
+ <http://github.com/ozataman/csv-enumerator>.
+
+ The API is fairly well documented and I would encourage you to keep your
+ haddocks handy.
+
+
+extra-source-files:
+ README.markdown
+
+Library
+ hs-source-dirs: src
+ Exposed-modules:
+ Data.CSV.Enumerator
+ Other-modules:
+ Data.CSV.Enumerator.Types
+ build-depends:
+ attoparsec >= 0.8 && < 0.9
+ , attoparsec-enumerator >= 0.2.0.3
+ , base >= 4 && < 5
+ , containers >= 0.3
+ , directory
+ , bytestring
+ , enumerator >= 0.4.5 && < 0.5
+ , transformers >= 0.2
+ , safe
+ , unix-compat >= 0.2.1.1
+ extensions:
+ ScopedTypeVariables
diff --git a/src/Data/CSV/Enumerator.hs b/src/Data/CSV/Enumerator.hs
new file mode 100644
index 0000000..2cbeae3
--- /dev/null
+++ b/src/Data/CSV/Enumerator.hs
@@ -0,0 +1,469 @@
+{-# LANGUAGE OverloadedStrings, BangPatterns #-}
+{-# LANGUAGE PackageImports #-}
+{-# LANGUAGE TypeSynonymInstances #-}
+
+module Data.CSV.Enumerator
+ (
+ -- * CSV Data types
+ Row -- Simply @[ByteString]@
+ , Field -- Simply @ByteString@
+ , MapRow
+
+ , CSVeable(..)
+
+ , ParsedRow(..)
+
+ -- * CSV Setttings
+ , CSVSettings(..)
+ , defCSVSettings
+
+ -- * Reading / Writing CSV Files
+ , readCSVFile
+ , writeCSVFile
+ , appendCSVFile
+
+ -- * Folding Over CSV Files
+ -- | These enumerators generalize the map* family of functions with a running accumulator.
+ , CSVAction
+ , funToIter
+ , funToIterIO
+
+ -- * Primitive Iteratees
+ , collectRows
+ , outputRowIter
+ , outputRowsIter
+
+ -- * Other Utilities
+ , outputRow
+ , outputRows
+ , writeHeaders
+ )
+
+where
+
+import Control.Applicative hiding (many)
+import Control.Exception (bracket, SomeException)
+import Control.Monad (mzero, mplus, foldM, when)
+import Control.Monad.IO.Class (liftIO, MonadIO)
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Char8 as B8
+import Data.ByteString.Internal (c2w)
+import qualified Data.Map as M
+import System.Directory
+import System.IO
+import System.PosixCompat.Files (getFileStatus, fileSize)
+
+import Data.Attoparsec as P hiding (take)
+import qualified Data.Attoparsec.Char8 as C8
+-- import Data.Attoparsec.Enum
+import Data.Attoparsec.Enumerator
+import qualified Data.Enumerator as E
+import Data.Enumerator (($$), yield, continue)
+import Data.Enumerator.Binary (enumFile)
+import Data.Word (Word8)
+import Safe (headMay)
+
+import Data.CSV.Enumerator.Types
+
+class CSVeable r where
+
+ -- | Convert a CSV row into strict ByteString equivalent.
+ rowToStr :: CSVSettings -> r -> B.ByteString
+
+ -- | Possibly return headers for a list of rows.
+ fileHeaders :: [r] -> Maybe Row
+
+ -- | The raw iteratee to process any Enumerator stream
+ iterCSV :: CSVSettings
+ -> CSVAction r a
+ -> a
+ -> E.Iteratee B.ByteString IO a
+
+ -- | Iteratee to push rows into a given file
+ fileSink :: CSVSettings
+ -> FilePath
+ -> (Maybe Handle, Int)
+ -> ParsedRow r
+ -> E.Iteratee B.ByteString IO (Maybe Handle, Int)
+
+ -- | Open & fold over the CSV file. Processing starts on row 2 for MapRow
+ -- instance to use first row as column headers.
+ foldCSVFile :: FilePath -- ^ File to open as a CSV file
+ -> CSVSettings -- ^ CSV settings to use on the input file
+ -> CSVAction r a -- ^ Fold action
+ -> a -- ^ Initial accumulator
+ -> IO (Either SomeException a) -- ^ Error or the resulting accumulator
+
+ -- | Take a CSV file, apply function to each of its rows and save the
+ -- resulting rows into a new file.
+ --
+ -- Each row is simply a list of fields.
+ mapCSVFile :: FilePath -- ^ Input file
+ -> CSVSettings -- ^ CSV Settings
+ -> (r -> [r]) -- ^ A function to map a row onto rows
+ -> FilePath -- ^ Output file
+ -> IO (Either SomeException Int) -- ^ Number of rows processed
+ mapCSVFile fi s f fo = do
+ res <- foldCSVFile fi s iter (Nothing, 0)
+ return $ snd `fmap` res
+ where
+ iter !acc (ParsedRow (Just !r)) = foldM chain acc (f r)
+ iter !acc x = fileSink s fo acc x
+
+ chain !acc !r = singleSink r acc
+
+ singleSink !x !acc = fileSink s fo acc (ParsedRow (Just x))
+
+
+ ----------------------------------------------------------------------------
+ -- | Like 'mapCSVFile' but operates on multiple files pouring results into
+ -- a single file.
+ mapCSVFiles :: [FilePath] -- ^ Input files
+ -> CSVSettings -- ^ CSV Settings
+ -> (r -> [r]) -- ^ A function to map a row onto rows
+ -> FilePath -- ^ Output file
+ -> IO (Either SomeException Int) -- ^ Number of rows processed
+
+------------------------------------------------------------------------------
+-- | 'Row' instance for 'CSVeable'
+instance CSVeable Row where
+ rowToStr s !r =
+ let
+ sep = B.pack [c2w (csvOutputColSep s)]
+ wrapField !f = case (csvOutputQuoteChar s) of
+ Just !x -> let qt = c2w x
+ in qt `B.cons` f `B.snoc` qt
+ otherwise -> f
+ in B.intercalate sep . map wrapField $ r
+
+ fileHeaders _ = Nothing
+
+
+ iterCSV csvs f acc = loop acc
+ where
+ loop !acc' = do
+ eof <- E.isEOF
+ case eof of
+ True -> f acc' EOF
+ False -> comboIter acc'
+ procRow acc' = rowParser csvs >>= f acc' . ParsedRow
+ comboIter acc' = procRow acc' >>= loop
+
+
+ foldCSVFile fp csvs f acc = E.run iter
+ where
+ iter = enumFile fp $$ iterCSV csvs f acc
+
+
+ fileSink csvs fo = iter
+ where
+ iter :: (Maybe Handle, Int)
+ -> ParsedRow Row
+ -> E.Iteratee B.ByteString IO (Maybe Handle, Int)
+
+ iter acc@(oh, i) EOF = case oh of
+ Just oh' -> liftIO (hClose oh') >> yield (Nothing, i) E.EOF
+ Nothing -> yield acc E.EOF
+
+ iter acc (ParsedRow Nothing) = yield acc (E.Chunks [])
+
+ iter (Nothing, !i) r = do
+ oh <- liftIO $ openFile fo WriteMode
+ iter (Just oh, i) r
+
+ iter (Just oh, !i) (ParsedRow (Just r)) = do
+ outputRowIter csvs oh r
+ yield (Just oh, i+1) (E.Chunks [])
+
+
+ mapCSVFiles fis s f fo = foldM stepFile (Right 0) fis
+ where
+ stepFile :: (Either SomeException Int)
+ -> FilePath
+ -> IO (Either SomeException Int)
+ stepFile res0 fi = do
+ case res0 of
+ Left x -> return $ Left x
+ Right i -> do
+ res <- foldCSVFile fi s (iter fi) (Nothing, i)
+ return $ fmap snd res
+
+ iter :: FilePath
+ -> (Maybe Handle, Int)
+ -> ParsedRow Row
+ -> E.Iteratee B.ByteString IO (Maybe Handle, Int)
+ iter fi acc@(oh, i) EOF = case oh of
+ Just oh' -> liftIO (hClose oh') >> yield (Nothing, i) E.EOF
+ Nothing -> yield acc E.EOF
+ iter fi acc (ParsedRow Nothing) = return acc
+ iter fi (Nothing, !i) (ParsedRow (Just r)) = do
+ let row' = f r
+ oh <- liftIO $ openFile fo AppendMode
+ iter fi (Just oh, i) (ParsedRow (Just r))
+ iter fi (Just oh, !i) (ParsedRow (Just r)) = do
+ outputRowsIter s oh (f r)
+ return (Just oh, i+1)
+
+
+
+------------------------------------------------------------------------------
+-- 'MapRow' instance for 'CSVeable'
+instance CSVeable MapRow where
+ rowToStr s r = rowToStr s . M.elems $ r
+
+ fileHeaders rs = headMay rs >>= return . M.keys
+
+ iterCSV csvs f !acc = loop ([], acc)
+ where
+ loop (headers, !acc') = do
+ eof <- E.isEOF
+ case eof of
+ True -> f acc' EOF
+ False -> comboIter headers acc'
+
+ comboIter !headers !acc' = do
+ a <- procRow headers acc'
+ loop (headers, a)
+
+ -- Fill headers if not yet filled
+ procRow [] !acc' = rowParser csvs >>= (\(Just hs) -> loop (hs, acc'))
+
+ -- Process starting w/ the second row
+ procRow !headers !acc' = rowParser csvs >>=
+ toMapCSV headers >>=
+ f acc' . ParsedRow
+
+ toMapCSV !headers !fs = yield (fs >>= (Just . M.fromList . zip headers)) (E.Chunks [])
+
+ foldCSVFile fp csvs f !acc = E.run (enumFile fp $$ iterCSV csvs f acc)
+
+
+ fileSink s fo = mapIter
+ where
+ mapIter :: (Maybe Handle, Int)
+ -> ParsedRow MapRow
+ -> E.Iteratee B.ByteString IO (Maybe Handle, Int)
+ mapIter acc@(oh, !i) EOF = case oh of
+ Just oh' -> liftIO (hClose oh') >> yield (Nothing, i) E.EOF
+ Nothing -> yield acc E.EOF
+ mapIter !acc (ParsedRow Nothing) = return acc
+ mapIter (Nothing, !i) (ParsedRow (Just (!r))) = do
+ oh <- liftIO $ do
+ oh' <- openFile fo WriteMode
+ B.hPutStrLn oh' . rowToStr s . M.keys $ r
+ return oh'
+ mapIter (Just oh, i) (ParsedRow (Just r))
+ mapIter (Just oh, !i) (ParsedRow (Just (!r))) = do
+ outputRowIter s oh r
+ return (Just oh, i+1)
+
+
+ mapCSVFiles fis s f fo = foldM stepFile (Right 0) fis
+ where
+ stepFile res0 fi = do
+ case res0 of
+ Left x -> return $ Left x
+ Right i -> do
+ res <- foldCSVFile fi s (iter fi) (Nothing, i)
+ return $ fmap snd res
+
+ addFileSource fi r = M.insert "FromFile" (B8.pack fi) r
+
+ iter :: FilePath
+ -> (Maybe Handle, Int)
+ -> ParsedRow MapRow
+ -> E.Iteratee B.ByteString IO (Maybe Handle, Int)
+ iter fi acc@(oh, i) EOF = case oh of
+ Just oh' -> liftIO (hClose oh') >> yield (Nothing, i) E.EOF
+ Nothing -> yield acc E.EOF
+ iter fi acc (ParsedRow Nothing) = return acc
+ iter fi (Nothing, !i) (ParsedRow (Just r)) = do
+ case f r of
+ [] -> return (Nothing, i) -- the fn did not return any rows at all!
+ (x:_) -> do
+ oh <- liftIO $ do
+ exist <- doesFileExist fo
+ oh' <- openFile fo AppendMode
+ case exist of
+ True -> return ()
+ False -> B.hPutStrLn oh' . rowToStr s . M.keys . (addFileSource fi) $ x
+ return oh'
+ iter fi (Just oh, i) (ParsedRow (Just r))
+ iter fi (Just oh, !i) (ParsedRow (Just r)) =
+ let rows = f . addFileSource fi $ r
+ in do
+ outputRowsIter s oh rows
+ return (Just oh, i+1)
+
+
+------------------------------------------------------------------------------
+readCSVFile :: (CSVeable r) => CSVSettings -- ^ CSV settings
+ -> FilePath -- ^ FilePath
+ -> IO (Either SomeException [r]) -- ^ Collected data
+readCSVFile s fp = do
+ res <- foldCSVFile fp s collectRows []
+ return $ case res of
+ Left e -> Left e
+ Right rs -> Right (reverse rs)
+
+
+------------------------------------------------------------------------------
+writeCSVFile :: (CSVeable r) => CSVSettings -- ^ CSV settings
+ -> FilePath -- ^ Target file path
+ -> [r] -- ^ Data to be output
+ -> IO Int -- ^ Number of rows written
+writeCSVFile s fp rs =
+ let doOutput h = writeHeaders s h rs >> outputRowsIter h
+ outputRowsIter h = foldM (step h) 0 . map (rowToStr s) $ rs
+ step h acc x = (B.hPutStrLn h x) >> return (acc+1)
+ in bracket
+ (openFile fp WriteMode)
+ (hClose)
+ (doOutput)
+
+------------------------------------------------------------------------------
+appendCSVFile :: (CSVeable r) => CSVSettings -- ^ CSV settings
+ -> FilePath -- ^ Target file path
+ -> [r] -- ^ Data to be output
+ -> IO Int -- ^ Number of rows written
+appendCSVFile s fp rs =
+ let doOutput (c,h) = when c (writeHeaders s h rs) >> outputRowsIter h
+ outputRowsIter h = foldM (step h) 0 . map (rowToStr s) $ rs
+ step h acc x = (B.hPutStrLn h x) >> return (acc+1)
+ chkOpen = do
+ writeHeaders <- do
+ fe <- doesFileExist fp
+ if fe
+ then do
+ fs <- getFileStatus fp >>= return . fileSize
+ return $ if fs > 0 then False else True
+ else return True
+ h <- openFile fp AppendMode
+ return (writeHeaders, h)
+ in bracket
+ (chkOpen)
+ (hClose . snd)
+ (doOutput)
+
+------------------------------------------------------------------------------
+-- | Output given row into given handle
+outputRow :: CSVeable r => CSVSettings -> Handle -> r -> IO ()
+outputRow s oh = B.hPutStrLn oh . rowToStr s
+
+
+outputRows :: CSVeable r => CSVSettings -> Handle -> [r] -> IO ()
+outputRows s oh = mapM_ (outputRow s oh)
+
+
+writeHeaders :: CSVeable r => CSVSettings -> Handle -> [r] -> IO ()
+writeHeaders s h rs = case fileHeaders rs of
+ Just hs -> B.hPutStrLn h . rowToStr s $ hs
+ Nothing -> return ()
+
+
+outputRowIter :: CSVeable r => CSVSettings -> Handle -> r -> E.Iteratee B.ByteString IO ()
+outputRowIter s oh = liftIO . outputRow s oh
+
+
+outputRowsIter :: CSVeable r => CSVSettings -> Handle -> [r] -> E.Iteratee B.ByteString IO ()
+outputRowsIter s oh rs = mapM_ (outputRowIter s oh) rs
+
+
+------------------------------------------------------------------------------
+-- | A datatype that incorporates the signaling of parsing status to the
+--user-developed iteratee.
+--
+-- We need this because some iteratees do interleaved IO (such as outputting to
+-- a file via a handle inside the accumulator) and some final actions may need
+-- to be taken upon encountering EOF (such as closing the interleaved handle).
+--
+-- Use this datatype when developing iteratees for use with fold* family of
+-- functions (Row enumarators).
+data (CSVeable r) => ParsedRow r = ParsedRow (Maybe r) | EOF
+
+------------------------------------------------------------------------------
+-- | An iteratee that processes each row of a CSV file and updates the
+-- accumulator.
+--
+-- You would implement one of these to use with the 'foldCSVFile' function.
+type CSVAction r a = a -> ParsedRow r -> E.Iteratee B.ByteString IO a
+
+
+------------------------------------------------------------------------------
+-- | Convenience converter for fold step functions that live in the IO monad.
+--
+-- Use this if you don't want to deal with Iteratees when writing your fold
+-- functions.
+funToIterIO :: (CSVeable r) => (a -> ParsedRow r -> IO a) -> CSVAction r a
+funToIterIO f = iterf
+ where
+ iterf !acc EOF = liftIO (f acc EOF) >>= \(!acc') -> yield acc' E.EOF
+ iterf !acc r = liftIO (f acc r) >>= \(!acc') -> yield acc' (E.Chunks [])
+
+
+------------------------------------------------------------------------------
+-- | Convenience converter for fold step functions that are pure.
+--
+-- Use this if you don't want to deal with Iteratees when writing your fold
+-- functions.
+funToIter :: (CSVeable r) => (a -> ParsedRow r -> a) -> CSVAction r a
+funToIter f = iterf
+ where
+ iterf !acc EOF = yield (f acc EOF) E.EOF
+ iterf !acc r = yield (f acc r) (E.Chunks [])
+
+
+------------------------------------------------------------------------------
+-- | Just collect all rows into an array. This will cancel out the incremental
+-- nature of this library.
+collectRows :: CSVeable r => CSVAction r [r]
+collectRows acc EOF = yield acc (E.Chunks [])
+collectRows acc (ParsedRow (Just r)) = let a' = (r:acc)
+ in a' `seq` yield a' (E.Chunks [])
+collectRows acc (ParsedRow Nothing) = yield acc (E.Chunks [])
+
+-- * Parsers
+
+rowParser :: (Monad m, MonadIO m) => CSVSettings -> E.Iteratee B.ByteString m (Maybe Row)
+rowParser csvs = E.catchError p handler
+ where
+ p = iterParser $ row csvs
+ handler e = do
+ liftIO $ putStrLn ("Error in parsing: " ++ show e)
+ yield Nothing (E.Chunks [])
+
+row :: CSVSettings -> Parser (Maybe Row)
+row csvs = csvrow csvs <|> badrow
+
+badrow :: Parser (Maybe Row)
+badrow = P.takeWhile (not . C8.isEndOfLine) *>
+ (C8.endOfLine <|> C8.endOfInput) *> return Nothing
+
+csvrow :: CSVSettings -> Parser (Maybe Row)
+csvrow c =
+ let !rowbody = (quotedField' <|> (field c)) `sepBy` C8.char (csvSep c)
+ !properrow = rowbody <* (C8.endOfLine <|> P.endOfInput)
+ quotedField' = case csvQuoteChar c of
+ Nothing -> mzero
+ Just q' -> try (quotedField q')
+ in do
+ res <- properrow
+ return $ Just res
+
+field :: CSVSettings -> Parser Field
+field s = P.takeWhile (isFieldChar s) <?> "Parsing a regular field"
+
+isFieldChar s = notInClass xs'
+ where xs = csvSep s : "\n\r"
+ xs' = case csvQuoteChar s of
+ Nothing -> xs
+ Just x -> x : xs
+
+quotedField :: Char -> Parser Field
+quotedField c = let w = c2w c in do
+ (C8.char c) <?> "Quote start"
+ f <- many (notWord8 w <|> (string (B.pack $ [w,w]) *> return w))
+ (C8.char c) <?> "Quote end"
+ return $ B.pack f
+
+
diff --git a/src/Data/CSV/Enumerator/Types.hs b/src/Data/CSV/Enumerator/Types.hs
new file mode 100644
index 0000000..cf0981e
--- /dev/null
+++ b/src/Data/CSV/Enumerator/Types.hs
@@ -0,0 +1,46 @@
+{-# LANGUAGE OverloadedStrings, BangPatterns #-}
+{-# LANGUAGE PackageImports #-}
+{-# LANGUAGE TypeSynonymInstances #-}
+
+
+module Data.CSV.Enumerator.Types where
+
+
+import qualified Data.ByteString as B
+import qualified Data.Map as M
+
+-- | Settings for a CSV file. This library is intended to be flexible and offer a way to process the majority of text data files out there.
+data CSVSettings = CSVS
+ {
+ -- | Separator character to be used in between fields
+ csvSep :: Char
+
+ -- | Quote character that may sometimes be present around fields. If 'Nothing' is given, the library will never expect quotation even if it is present.
+ , csvQuoteChar :: Maybe Char
+
+ -- | Quote character that should be used in the output.
+ , csvOutputQuoteChar :: Maybe Char
+
+ -- | Field separator that should be used in the output.
+ , csvOutputColSep :: Char
+ } deriving (Read, Show, Eq)
+
+-- | Default settings for a CSV file.
+--
+-- > csvSep = ','
+-- > csvQuoteChar = Just '"'
+-- > csvOutputQuoteChar = Just '"'
+-- > csvOutputColSep = ','
+--
+defCSVSettings :: CSVSettings
+defCSVSettings = CSVS
+ { csvSep = ','
+ , csvQuoteChar = Just '"'
+ , csvOutputQuoteChar = Just '"'
+ , csvOutputColSep = ','
+ }
+
+type Row = [Field]
+type Field = B.ByteString
+type MapRow = M.Map B.ByteString B.ByteString
+