summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkoral <>2015-12-25 19:46:00 (GMT)
committerhdiff <hdiff@hdiff.luite.com>2015-12-25 19:46:00 (GMT)
commitf49fa2df258021d2b6db75c033f886e19982c48e (patch)
treea7ee2dd958c97b442530e2ca2f37a5016b3085fe
parent6ac627edeb1b817ef353f382a9ba74c44fb1e40e (diff)
version 0.1.1.00.1.1.0
-rw-r--r--Data/Conduit/Parser.hs158
-rw-r--r--Data/Conduit/Parser/Internal.hs193
-rw-r--r--README.md5
-rw-r--r--conduit-parse.cabal15
-rw-r--r--test/Main.hs19
5 files changed, 223 insertions, 167 deletions
diff --git a/Data/Conduit/Parser.hs b/Data/Conduit/Parser.hs
index 83575d4..0b232bf 100644
--- a/Data/Conduit/Parser.hs
+++ b/Data/Conduit/Parser.hs
@@ -1,8 +1,3 @@
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE RankNTypes #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE StandaloneDeriving #-}
-- | This module introduces 'ConduitParser', a wrapper around 'Sink' that behaves like a parser.
--
-- You probably want to import the "Text.Parser.Combinators" module together with this module.
@@ -21,155 +16,4 @@ module Data.Conduit.Parser
, ConduitParserException(..)
) where
--- {{{ Imports
-import Control.Applicative
-import Control.Arrow (second)
-import Control.Monad
-import Control.Monad.Catch as Exception
-import Control.Monad.Trans.Class
-import Control.Monad.Trans.State
-
-import Data.Conduit hiding (await, leftover)
-import qualified Data.Conduit as Conduit
-import qualified Data.Conduit.List as Conduit
-import Data.Text as Text (Text, null, pack, unpack)
-
-import Text.Parser.Combinators as Parser
--- }}}
-
--- | Core type of the package. This is basically a 'Sink' with a parsing state.
-newtype ConduitParser i m a = ConduitParser (StateT (Text, [i]) (Sink i m) a)
-
-deriving instance Applicative (ConduitParser i m)
-deriving instance Functor (ConduitParser i m)
-deriving instance (Monad m) => Monad (ConduitParser i m)
-
--- | Parsers can be combined with @(\<|\>)@, 'some', 'many', 'optional', 'choice'.
---
--- The use of 'guard' is not recommended as it generates unhelpful error messages.
--- Please consider using 'throwM' or 'unexpected' instead.
---
--- Note: only 'ConduitParserException's will trigger the 'Alternative' features, all other exceptions are rethrown.
-instance (MonadCatch m) => Alternative (ConduitParser i m) where
- empty = throwM $ Unexpected "ConduitParser.empty"
-
- parserA <|> parserB = catch parserA $ \(ea :: ConduitParserException) ->
- catch parserB $ \(eb :: ConduitParserException) ->
- throwM $ BothFailed ea eb
-
--- | Consumed elements are pushed back with 'leftover' whenever an exception occurs. In other words, backtracking is supported.
-instance (MonadThrow m) => MonadThrow (ConduitParser i m) where
- throwM e = case fromException (toException e) of
- Just (e' :: ConduitParserException) -> do
- backtrack
- name <- getParserName
- if Text.null name
- then ConduitParser $ throwM e'
- else ConduitParser . throwM $ NamedParserException name e'
- _ -> ConduitParser $ throwM e
-
-instance (MonadCatch m) => MonadCatch (ConduitParser i m) where
- catch (ConduitParser f) handler = do
- buffer <- resetBuffer
- result <- ConduitParser $ Exception.try f
- case result of
- Right a -> prependBuffer buffer >> return a
- Left e -> prependBuffer buffer >> handler e
-
-instance MonadTrans (ConduitParser i) where
- lift = ConduitParser . lift . lift
-
--- | Parsing combinators can be used with 'ConduitParser's.
-instance (MonadCatch m) => Parsing (ConduitParser i m) where
- try parser = parser
-
- parser <?> name = do
- oldName <- getParserName
- setParserName $ pack name
- a <- parser
- setParserName oldName
- return a
-
- unexpected = throwM . Unexpected . pack
-
- eof = do
- result <- peek
- maybe (return ()) (const $ throwM ExpectedEndOfInput) result
-
- notFollowedBy parser = do
- result <- optional parser
- name <- getParserName
- forM_ result $ \_ -> throwM $ UnexpectedFollowedBy name
-
--- | Flipped version of @(\<?\>)@.
-named :: (MonadCatch m) => Text -> ConduitParser i m a -> ConduitParser i m a
-named name = flip (<?>) (unpack name)
-
-
--- | Run a 'ConduitParser'.
--- Any parsing failure will be thrown as an exception.
-runConduitParser :: (MonadThrow m) => ConduitParser i m a -> Sink i m a
-runConduitParser (ConduitParser p) = fst <$> runStateT p (mempty, mempty)
-
--- | Return the name of the parser (assigned through @(\<?\>)@), or 'mempty' if has none.
-getParserName :: ConduitParser i m Text
-getParserName = ConduitParser $ gets fst
-
-setParserName :: Text -> ConduitParser i m ()
-setParserName name = ConduitParser . modify $ \(_, b) -> (name, b)
-
-getBuffer :: ConduitParser i m [i]
-getBuffer = ConduitParser $ gets snd
-
-appendBuffer :: [i] -> ConduitParser i m ()
-appendBuffer new = ConduitParser $ modify (\(n, b) -> (n, b ++ new))
-
-prependBuffer :: [i] -> ConduitParser i m ()
-prependBuffer new = ConduitParser $ modify (second (new ++))
-
-resetBuffer :: (Monad m) => ConduitParser i m [i]
-resetBuffer = do
- b <- getBuffer
- ConduitParser $ modify (\(n, _) -> (n, mempty))
- return b
-
-backtrack :: (Monad m) => ConduitParser i m ()
-backtrack = mapM_ leftover . reverse =<< resetBuffer
-
--- | 'Conduit.await' wrapped as a 'ConduitParser'.
---
--- If no data is available, 'UnexpectedEndOfInput' is thrown.
-await :: (MonadCatch m) => ConduitParser i m i
-await = do
- event <- ConduitParser . lift $ Conduit.await
- e <- maybe (throwM UnexpectedEndOfInput) return event
- appendBuffer [e]
- return e
-
--- | 'Conduit.leftover' wrapped as a 'ConduitParser'.
-leftover :: (Monad m) => i -> ConduitParser i m ()
-leftover = ConduitParser . lift . Conduit.leftover
-
--- | 'Conduit.peek' wrapped as a 'ConduitParser'.
-peek :: (Monad m) => ConduitParser i m (Maybe i)
-peek = ConduitParser . lift $ Conduit.peek
-
-
-data ConduitParserException = BothFailed ConduitParserException ConduitParserException
- | ExpectedEndOfInput
- | NamedParserException Text ConduitParserException
- | UnexpectedEndOfInput
- | UnexpectedFollowedBy Text
- | Unexpected Text
-
-deriving instance Eq ConduitParserException
-
-instance Show ConduitParserException where
- show (BothFailed ea eb) = show ea ++ "\n" ++ show eb
- show ExpectedEndOfInput = "Unexpected input, expected end of input."
- show (NamedParserException t e) = "While parsing " ++ unpack t ++ ": " ++ show e
- show UnexpectedEndOfInput = "Unexpected end of input."
- show (UnexpectedFollowedBy t) = "Should not be followed by " ++ unpack t
- show (Unexpected t) = unpack t
-
-instance Exception ConduitParserException
+import Data.Conduit.Parser.Internal
diff --git a/Data/Conduit/Parser/Internal.hs b/Data/Conduit/Parser/Internal.hs
new file mode 100644
index 0000000..4d66a01
--- /dev/null
+++ b/Data/Conduit/Parser/Internal.hs
@@ -0,0 +1,193 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE StandaloneDeriving #-}
+module Data.Conduit.Parser.Internal where
+
+-- {{{ Imports
+import Control.Applicative
+import Control.Arrow (second)
+import Control.Monad
+import Control.Monad.Catch
+import Control.Monad.Error.Class
+import Control.Monad.Except
+import Control.Monad.Trans.State
+
+import Data.Conduit hiding (await, leftover)
+import qualified Data.Conduit as Conduit
+import qualified Data.Conduit.List as Conduit
+import Data.DList
+import Data.Maybe (fromMaybe)
+import Data.Text as Text (Text, null, pack, unpack)
+
+import Text.Parser.Combinators as Parser
+-- }}}
+
+-- | Core type of the package. This is basically a 'Sink' with a parsing state.
+newtype ConduitParser i m a = ConduitParser (ExceptT ConduitParserException (StateT (Text, Buffer i) (Sink i m)) a)
+
+deriving instance Applicative (ConduitParser i m)
+deriving instance Functor (ConduitParser i m)
+deriving instance Monad (ConduitParser i m)
+deriving instance (MonadCatch m) => MonadCatch (ConduitParser i m)
+deriving instance (MonadIO m) => MonadIO (ConduitParser i m)
+deriving instance (MonadThrow m) => MonadThrow (ConduitParser i m)
+
+instance MonadTrans (ConduitParser i) where
+ lift = ConduitParser . lift . lift . lift
+
+
+-- | Backtracking is supported by pushing back consumed elements (using 'leftover') whenever an error is catched.
+--
+-- As a consequence, within the scope of a `catchError`,
+-- all streamed items are kept in memory,
+-- which means the consumer no longer uses constant memory.
+instance MonadError ConduitParserException (ConduitParser i m) where
+ throwError e = do
+ name <- getParserName
+ if Text.null name
+ then ConduitParser $ throwError e
+ else ConduitParser . throwError $ NamedParserException name e
+ catchError (ConduitParser f) handler = do
+ buffer <- withBuffer resetBuffer
+ withBuffer $ setEnabled True
+
+ result <- ConduitParser $ (Right <$> f) `catchError` (return . Left)
+
+ case result of
+ Left e -> backtrack >> setBuffer buffer >> handler e
+ Right a -> withBuffer (prependBuffer buffer) >> return a
+
+-- | Parsers can be combined with ('<|>'), 'some', 'many', 'optional', 'choice'.
+--
+-- The use of 'guard' is not recommended as it generates unhelpful error messages.
+-- Please consider using 'throwError' or 'unexpected' instead.
+instance Alternative (ConduitParser i m) where
+ empty = ConduitParser $ throwError $ Unexpected "ConduitParser.empty"
+
+ parserA <|> parserB = catchError parserA $ \ea ->
+ catchError parserB $ \eb ->
+ throwError $ BothFailed ea eb
+
+-- | Parsing combinators can be used with 'ConduitParser's.
+instance (Monad m) => Parsing (ConduitParser i m) where
+ try parser = parser
+
+ parser <?> name = do
+ oldName <- getParserName
+ setParserName $ pack name
+ a <- parser
+ setParserName oldName
+ return a
+
+ unexpected = throwError . Unexpected . pack
+
+ eof = do
+ result <- peek
+ maybe (return ()) (const $ throwError ExpectedEndOfInput) result
+
+ notFollowedBy parser = do
+ result <- optional parser
+ name <- getParserName
+ forM_ result $ \_ -> throwError $ UnexpectedFollowedBy name
+
+-- | Flipped version of ('<?>').
+named :: (Monad m) => Text -> ConduitParser i m a -> ConduitParser i m a
+named name = flip (<?>) (unpack name)
+
+
+-- | Run a 'ConduitParser'.
+-- Any parsing failure will be thrown as an exception.
+runConduitParser :: (MonadThrow m) => ConduitParser i m a -> Sink i m a
+runConduitParser (ConduitParser p) = either throwM return . fst =<< runStateT (runExceptT p) (mempty, mempty)
+
+-- | Return the name of the parser (assigned through ('<?>')), or 'mempty' if has none.
+getParserName :: ConduitParser i m Text
+getParserName = ConduitParser $ lift $ gets fst
+
+setParserName :: Text -> ConduitParser i m ()
+setParserName name = ConduitParser $ lift $ modify $ \(_, b) -> (name, b)
+
+getBuffer :: ConduitParser i m (Buffer i)
+getBuffer = ConduitParser $ lift $ gets snd
+
+setBuffer :: Buffer i -> ConduitParser i m (Buffer i)
+setBuffer buffer = withBuffer (const buffer)
+
+withBuffer :: (Buffer i -> Buffer i) -> ConduitParser i m (Buffer i)
+withBuffer f = do
+ buffer <- ConduitParser $ lift $ gets snd
+ ConduitParser $ lift $ modify (second f)
+ return buffer
+
+backtrack :: ConduitParser i m ()
+backtrack = mapM_ leftover =<< withBuffer resetBuffer
+
+
+newtype Buffer i = Buffer (Maybe (DList i)) deriving(Monoid)
+
+deriving instance (Show i) => Show (Buffer i)
+
+instance Functor Buffer where
+ fmap _ (Buffer Nothing) = Buffer mempty
+ fmap f (Buffer (Just a)) = Buffer $ Just $ fmap f a
+
+instance Foldable Buffer where
+ foldMap _ (Buffer Nothing) = mempty
+ foldMap f (Buffer (Just a)) = foldMap f a
+
+
+setEnabled :: Bool -> Buffer i -> Buffer i
+setEnabled True (Buffer a) = Buffer (a <|> Just mempty)
+setEnabled _ (Buffer _) = Buffer mempty
+
+prependItem :: i -> Buffer i -> Buffer i
+prependItem new (Buffer a) = Buffer $ fmap (cons new) a
+
+-- Warning: this function is asymetric
+prependBuffer :: Buffer i -> Buffer i -> Buffer i
+prependBuffer (Buffer a) (Buffer b) = case a of
+ Just a' -> Buffer $ Just (fromMaybe mempty b `append` a')
+ _ -> Buffer a
+
+resetBuffer :: Buffer i -> Buffer i
+resetBuffer (Buffer a) = Buffer $ fmap (const mempty) a
+
+-- | 'Conduit.await' wrapped as a 'ConduitParser'.
+--
+-- If no data is available, 'UnexpectedEndOfInput' is thrown.
+await :: (Monad m) => ConduitParser i m i
+await = do
+ event <- ConduitParser $ lift $ lift Conduit.await
+ e <- maybe (throwError UnexpectedEndOfInput) return event
+ withBuffer $ prependItem e
+ return e
+
+-- | 'Conduit.leftover' wrapped as a 'ConduitParser'.
+leftover :: i -> ConduitParser i m ()
+leftover = ConduitParser . lift . lift . Conduit.leftover
+
+-- | 'Conduit.peek' wrapped as a 'ConduitParser'.
+peek :: (Monad m) => ConduitParser i m (Maybe i)
+peek = ConduitParser $ lift $ lift Conduit.peek
+
+
+data ConduitParserException = BothFailed ConduitParserException ConduitParserException
+ | ExpectedEndOfInput
+ | NamedParserException Text ConduitParserException
+ | UnexpectedEndOfInput
+ | UnexpectedFollowedBy Text
+ | Unexpected Text
+
+deriving instance Eq ConduitParserException
+deriving instance Show ConduitParserException
+
+instance Exception ConduitParserException where
+ displayException (BothFailed ea eb) = show ea ++ "\n" ++ show eb
+ displayException ExpectedEndOfInput = "Unexpected input, expected end of input."
+ displayException (NamedParserException t e) = "While parsing " ++ unpack t ++ ": " ++ show e
+ displayException UnexpectedEndOfInput = "Unexpected end of input."
+ displayException (UnexpectedFollowedBy t) = "Should not be followed by " ++ unpack t
+ displayException (Unexpected t) = unpack t
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e6af4fa
--- /dev/null
+++ b/README.md
@@ -0,0 +1,5 @@
+# conduit-parse
+
+The `conduit-extra` package provides utilities to turn a `Parser` into a `Consumer`, but only for streams of `ByteString`s of `Text`s (cf `Data.Conduit.Attoparsec` module).
+
+This library makes it possible to work with any kind of input by providing a general-purpose parsing framework based on `conduit`.
diff --git a/conduit-parse.cabal b/conduit-parse.cabal
index 0c26ec8..f136581 100644
--- a/conduit-parse.cabal
+++ b/conduit-parse.cabal
@@ -1,10 +1,7 @@
name: conduit-parse
-version: 0.1.0.0
+version: 0.1.1.0
synopsis: Parsing framework based on conduit.
-description:
- The @conduit-extra@ package provides utilities to turn a 'Parser' into a 'Consumer', but only for streams of 'ByteString's of 'Text's (cf @Data.Conduit.Attoparsec@ module).
- .
- This library makes it possible to work with any kind of input by providing a general-purpose parsing framework based on 'conduit'.
+description: Please refer to README.
homepage: https://github.com/k0ral/conduit-parse
license: OtherLicense
license-file: LICENSE
@@ -13,6 +10,7 @@ maintainer: koral <koral@mailoo.org>
category: Conduit, Text
build-type: Simple
cabal-version: >=1.10
+extra-source-files: README.md
source-repository head
type: git
@@ -21,10 +19,14 @@ source-repository head
library
exposed-modules:
Data.Conduit.Parser
+ other-modules:
+ Data.Conduit.Parser.Internal
build-depends:
base >= 4.8 && < 5
, conduit
+ , dlist
, exceptions
+ , mtl
, parsers
, text
, transformers
@@ -41,10 +43,11 @@ test-suite tests
, conduit-parse
, exceptions
, hlint
+ , mtl
, parsers
, resourcet
, tasty
, tasty-hunit
-- , tasty-quickcheck
- default-language: Haskell2010
+ default-language: Haskell2010
ghc-options: -Wall -fno-warn-unused-do-bind
diff --git a/test/Main.hs b/test/Main.hs
index 551d8bd..5c44da8 100644
--- a/test/Main.hs
+++ b/test/Main.hs
@@ -2,6 +2,7 @@
{-# LANGUAGE OverloadedStrings #-}
import Control.Applicative
import Control.Monad.Catch as Exception
+import Control.Monad.Error.Class
import Control.Monad.Trans.Resource
import Data.Conduit hiding (await, leftover)
@@ -30,6 +31,7 @@ unitTests = testGroup "Unit tests"
[ awaitCase
, peekCase
, leftoverCase
+ , errorCase
, alternativeCase
, catchCase
, parsingCase
@@ -43,8 +45,8 @@ hlint = testCase "HLint check" $ do
awaitCase :: TestTree
awaitCase = testCase "await" $ do
i <- runResourceT . runConduit $ sourceList [1 :: Int] =$= runConduitParser parser
- i @=? (1, Left UnexpectedEndOfInput)
- where parser = (,) <$> await <*> Exception.try await
+ i @=? (1, Nothing)
+ where parser = (,) <$> await <*> optional await
peekCase :: TestTree
peekCase = testCase "peek" $ do
@@ -61,6 +63,15 @@ leftoverCase = testCase "leftover" $ do
leftover a >> leftover b >> leftover c
(,,) <$> await <*> await <*> await
+errorCase :: TestTree
+errorCase = testCase "error" $ do
+ result1 <- Exception.try . runResourceT . runConduit $ sourceList [] =$= runConduitParser (parser <?> "Unexpected error")
+ result2 <- Exception.try . runResourceT . runConduit $ sourceList [] =$= runConduitParser parser
+ result1 @=? Left (NamedParserException "Unexpected error" $ Unexpected "ERROR")
+ result2 @=? Left (Unexpected "ERROR")
+ where parser = unexpected "ERROR" >> return (1 :: Int)
+
+
alternativeCase :: TestTree
alternativeCase = testCase "alternative" $ do
result <- runResourceT . runConduit $ sourceList [1 :: Int, 2, 3] =$= runConduitParser parser
@@ -72,7 +83,7 @@ alternativeCase = testCase "alternative" $ do
await
eof
return (a, b, c)
- parseInt :: (MonadCatch m) => Int -> ConduitParser Int m Int
+ parseInt :: (Monad m) => Int -> ConduitParser Int m Int
parseInt i = do
a <- await
if i == a then return a else unexpected ("Expected " ++ show i ++ ", got " ++ show a)
@@ -81,7 +92,7 @@ catchCase :: TestTree
catchCase = testCase "catch" $ do
result <- runResourceT . runConduit $ sourceList [1 :: Int, 2] =$= runConduitParser parser
result @=? (1, 2)
- where parser = catchAll (await >> await >> throwM (Unexpected "ERROR")) . const $ (,) <$> await <*> await
+ where parser = catchError (await >> await >> throwError (Unexpected "ERROR")) . const $ (,) <$> await <*> await
parsingCase :: TestTree
parsingCase = testCase "parsing" $ do