summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEugeneSmolanka <>2018-06-13 01:22:00 (GMT)
committerhdiff <hdiff@hdiff.luite.com>2018-06-13 01:22:00 (GMT)
commitc89900b6f680f3b09750ecaf847ab714e09cb342 (patch)
tree08a10bff6fe07772d60f1e25f269546c2d731afa
parentd9d2ffe725605c55988fab6153381de04532311d (diff)
version 2.0.1HEAD2.0.1master
-rw-r--r--sexp-grammar.cabal3
-rw-r--r--src/Language/Sexp/Encode.hs22
-rw-r--r--src/Language/Sexp/Lexer.x23
-rw-r--r--src/Language/Sexp/Pretty.hs7
-rw-r--r--src/Language/Sexp/Token.hs54
-rw-r--r--src/Language/Sexp/Types.hs7
-rw-r--r--src/Language/SexpGrammar/Base.hs35
-rw-r--r--test/Main.hs81
8 files changed, 162 insertions, 70 deletions
diff --git a/sexp-grammar.cabal b/sexp-grammar.cabal
index 964c8e9..15b051f 100644
--- a/sexp-grammar.cabal
+++ b/sexp-grammar.cabal
@@ -1,5 +1,5 @@
name: sexp-grammar
-version: 2.0.0
+version: 2.0.1
license: BSD3
license-file: LICENSE
author: Eugene Smolanka, Sergey Vinokurov
@@ -58,7 +58,6 @@ library
, recursion-schemes >=5.0 && <6.0
, scientific >=0.3.3 && <0.4
, semigroups >=0.16 && <0.19
- , split >=0.2 && <0.3
, text >=1.2 && <1.3
, utf8-string >=1.0 && <2.0
diff --git a/src/Language/Sexp/Encode.hs b/src/Language/Sexp/Encode.hs
index cbd31bc..bcfb762 100644
--- a/src/Language/Sexp/Encode.hs
+++ b/src/Language/Sexp/Encode.hs
@@ -5,15 +5,11 @@
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeSynonymInstances #-}
-module Language.Sexp.Encode
- ( encode
- , escape
- ) where
+module Language.Sexp.Encode (encode) where
import Data.Functor.Foldable (cata)
import Data.List (intersperse)
import Data.Scientific
-import qualified Data.Text as T
import qualified Data.Text.Encoding as T (encodeUtf8)
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL (encodeUtf8)
@@ -25,19 +21,7 @@ import Data.Semigroup
#endif
import Language.Sexp.Types
-
-escape :: T.Text -> TL.Text
-escape = TL.concat . go [] . TL.fromStrict
- where
- go acc text
- | TL.null text = acc
- | otherwise =
- let (chunk, rest) = TL.break (\c -> c == '"' || c == '\\') text
- in case TL.uncons rest of
- Nothing -> chunk : acc
- Just ('"', rest') -> go (chunk : "\\\"" : acc) rest'
- Just ('\\',rest') -> go (chunk : "\\\\" : acc) rest'
- Just (other, rest') -> go (chunk : TL.singleton other : acc) rest'
+import Language.Sexp.Token (escape)
buildSexp :: Fix SexpF -> Builder
buildSexp = cata alg
@@ -51,7 +35,7 @@ buildSexp = cata alg
AtomNumber a
| isInteger a -> string8 (formatScientific Fixed (Just 0) a)
| otherwise -> string8 (formatScientific Fixed Nothing a)
- AtomString a -> char8 '"' <> lazyByteString (TL.encodeUtf8 (escape a)) <> char8 '"'
+ AtomString a -> char8 '"' <> lazyByteString (TL.encodeUtf8 (escape (TL.fromStrict a))) <> char8 '"'
AtomSymbol a -> byteString (T.encodeUtf8 a)
ParenListF ss -> char8 '(' <> hsep ss <> char8 ')'
BracketListF ss -> char8 '[' <> hsep ss <> char8 ']'
diff --git a/src/Language/Sexp/Lexer.x b/src/Language/Sexp/Lexer.x
index 71a96f7..dc92f05 100644
--- a/src/Language/Sexp/Lexer.x
+++ b/src/Language/Sexp/Lexer.x
@@ -26,8 +26,6 @@ import Language.Sexp.LexerInterface
import Language.Sexp.Token
import Language.Sexp.Types (Position (..), LocatedBy (..))
-import Debug.Trace
-
}
$hspace = [\ \t]
@@ -46,8 +44,8 @@ $alpha = [a-z A-Z]
$unicode = $allgraphic # [\x20-\x80]
-$syminitial = [$alpha \:\@\#\!\$\%\&\*\/\<\=\>\?\~\_\^\.\|\+\- $unicode]
-$symsubseq = [$syminitial $digit \'\`\,]
+$syminitial = [$alpha \:\@\!\$\%\&\*\/\<\=\>\?\~\_\^\.\|\+\- $unicode]
+$symsubseq = [$syminitial $digit \#\'\`\,]
@symescape = \\ [$alpha $digit \(\)\[\]\{\}\\\|\;\'\`\"\#\.\,]
@symbol = ($syminitial | @symescape) ($symsubseq | @symescape)*
@@ -80,22 +78,7 @@ $whitespace+ ;
type AlexAction = LineCol -> ByteString -> LocatedBy LineCol Token
readString :: ByteString -> T.Text
-readString = TL.toStrict . TL.concat . unescape [] . TL.tail . TL.init . decodeUtf8
- where
- unescape acc text
- | TL.null text = acc
- | otherwise =
- let (chunk, rest) = TL.break (== '\\') text in
- case TL.uncons rest of
- Nothing -> (chunk : acc)
- Just (_, rest') ->
- case TL.uncons rest' of
- Nothing -> error "Invalid escape sequence"
- Just ('n', rest'') -> unescape (chunk `TL.snoc` '\n' : acc) rest''
- Just ('r', rest'') -> unescape (chunk `TL.snoc` '\r' : acc) rest''
- Just ('t', rest'') -> unescape (chunk `TL.snoc` '\t' : acc) rest''
- Just (lit, rest'') -> unescape (chunk `TL.snoc` lit : acc) rest''
-
+readString = TL.toStrict . unescape . TL.tail . TL.init . decodeUtf8
readNum :: ByteString -> Scientific
readNum = read . TL.unpack . decodeUtf8
diff --git a/src/Language/Sexp/Pretty.hs b/src/Language/Sexp/Pretty.hs
index 5d7fe15..5ed9f32 100644
--- a/src/Language/Sexp/Pretty.hs
+++ b/src/Language/Sexp/Pretty.hs
@@ -11,22 +11,23 @@ module Language.Sexp.Pretty
import Data.ByteString.Lazy.Char8 (ByteString)
import Data.Functor.Foldable (para)
import Data.Scientific
+import qualified Data.Text.Lazy as TL
import Data.Text.Lazy.Encoding (encodeUtf8)
import Data.Text.Prettyprint.Doc
+import Data.Text.Prettyprint.Doc.Internal (unsafeTextWithoutNewlines)
import qualified Data.Text.Prettyprint.Doc.Render.Text as Render
import Language.Sexp.Types
-import Language.Sexp.Encode (escape)
+import Language.Sexp.Token (escape)
instance Pretty Atom where
pretty = \case
AtomNumber a
| isInteger a -> pretty $ formatScientific Fixed (Just 0) a
| otherwise -> pretty $ formatScientific Fixed Nothing $ a
- AtomString a -> dquotes (pretty (escape a))
+ AtomString a -> dquotes (unsafeTextWithoutNewlines . TL.toStrict . escape . TL.fromStrict $ a)
AtomSymbol a -> pretty a
-
ppList :: [(Fix SexpF, Doc ann)] -> Doc ann
ppList ls = case ls of
((Fix (AtomF _),_) : _) ->
diff --git a/src/Language/Sexp/Token.hs b/src/Language/Sexp/Token.hs
index f6ec3ee..d91ea78 100644
--- a/src/Language/Sexp/Token.hs
+++ b/src/Language/Sexp/Token.hs
@@ -4,11 +4,15 @@
module Language.Sexp.Token
( Token (..)
, Prefix (..)
+ , escape
+ , unescape
) where
import Data.Text (Text)
+import qualified Data.Text.Lazy as TL
import Data.Scientific
import Data.Text.Prettyprint.Doc
+import Data.Semigroup
import Language.Sexp.Types (Prefix(..))
@@ -38,3 +42,53 @@ instance Pretty Token where
pretty (TokNumber n) = "number" <+> pretty (show n)
pretty (TokString s) = "string" <+> pretty (show s)
pretty (TokUnknown u) = "unrecognized" <+> pretty (show u)
+
+
+newtype DText = DText (TL.Text -> TL.Text)
+
+instance Semigroup DText where
+ DText a <> DText b = DText (a . b)
+
+instance Monoid DText where
+ mempty = DText id
+ mappend = (<>)
+
+delay :: TL.Text -> DText
+delay t = DText (t `TL.append`)
+
+force :: DText -> TL.Text
+force (DText f) = f TL.empty
+
+
+unescape :: TL.Text -> TL.Text
+unescape = force . go mempty
+ where
+ go :: DText -> TL.Text -> DText
+ go acc text
+ | TL.null text = acc
+ | otherwise =
+ let (chunk, rest) = TL.break (== '\\') text in
+ case TL.uncons rest of
+ Nothing -> acc <> delay chunk
+ Just (_, rest') ->
+ case TL.uncons rest' of
+ Nothing -> error "Invalid escape sequence"
+ Just ('n', rest'') -> go (acc <> delay (chunk `TL.snoc` '\n')) rest''
+ Just ('r', rest'') -> go (acc <> delay (chunk `TL.snoc` '\r')) rest''
+ Just ('t', rest'') -> go (acc <> delay (chunk `TL.snoc` '\t')) rest''
+ Just (lit, rest'') -> go (acc <> delay (chunk `TL.snoc` lit )) rest''
+
+
+escape :: TL.Text -> TL.Text
+escape = force . go mempty
+ where
+ go :: DText -> TL.Text -> DText
+ go acc text
+ | TL.null text = acc
+ | otherwise =
+ let (chunk, rest) = TL.break (\c -> c == '"' || c == '\\') text
+ in case TL.uncons rest of
+ Nothing -> acc <> delay chunk
+ Just ('"', rest') -> go (acc <> delay chunk <> delay "\\\"") rest'
+ Just ('\\',rest') -> go (acc <> delay chunk <> delay "\\\\") rest'
+ Just (other, rest') -> go (acc <> delay chunk <> delay (TL.singleton other)) rest'
diff --git a/src/Language/Sexp/Types.hs b/src/Language/Sexp/Types.hs
index 1d82e69..98e717b 100644
--- a/src/Language/Sexp/Types.hs
+++ b/src/Language/Sexp/Types.hs
@@ -1,10 +1,9 @@
-{-# LANGUAGE LambdaCase #-}
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveTraversable #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE LambdaCase #-}
module Language.Sexp.Types
( Atom (..)
@@ -117,6 +116,7 @@ instance Eq1 SexpF where
go _ _ = False
instance NFData Atom
+
instance NFData Position
instance NFData (Fix SexpF) where
@@ -132,4 +132,3 @@ instance NFData (Fix SexpF) where
instance NFData (Fix (Compose (LocatedBy Position) SexpF)) where
rnf = rnf . stripLocation
-
diff --git a/src/Language/SexpGrammar/Base.hs b/src/Language/SexpGrammar/Base.hs
index 707bc8d..d135429 100644
--- a/src/Language/SexpGrammar/Base.hs
+++ b/src/Language/SexpGrammar/Base.hs
@@ -181,7 +181,7 @@ braceList g = beginBraceList >>> Dive (g >>> endList)
--
-- * @el symbol@ consumes a symbol and produces a 'Text' value
-- corresponding to the symbol.
-el :: Grammar p (Sexp :- t) t' -> Grammar p (List :- t) (List :- t')
+el :: Grammar Position (Sexp :- t) t' -> Grammar Position (List :- t) (List :- t')
el g = coerced (Flip cons >>> onTail g >>> Step)
@@ -193,8 +193,8 @@ el g = coerced (Flip cons >>> onTail g >>> Step)
-- >>> encodeWith grammar [2, 3, 5, 7, 11, 13]
-- Right "(check-primes 2 3 5 7 11 13)"
rest
- :: (forall t. Grammar p (Sexp :- t) (a :- t))
- -> Grammar p (List :- t) (List :- [a] :- t)
+ :: (forall t. Grammar Position (Sexp :- t) (a :- t))
+ -> Grammar Position (List :- t) (List :- [a] :- t)
rest g =
iso coerce coerce >>>
onHead (Traverse (sealed g >>> Step)) >>>
@@ -203,7 +203,7 @@ rest g =
----------------------------------------------------------------------
beginProperties
- :: Grammar p (List :- t) (List :- PropertyList :- t)
+ :: Grammar Position (List :- t) (List :- PropertyList :- t)
beginProperties = Flip $ PartialIso
(\(List rest :- PropertyList alist :- t) ->
List (concatMap (\(k, v) -> [Atom (AtomSymbol (':' `TS.cons` k)), v]) alist ++ rest) :- t)
@@ -219,8 +219,9 @@ beginProperties = Flip $ PartialIso
takePairs other acc = (other, acc)
+
endProperties
- :: Grammar p t (PropertyList :- t)
+ :: Grammar Position t (PropertyList :- t)
endProperties = PartialIso
(\t -> PropertyList [] :- t)
(\(PropertyList lst :- t) ->
@@ -247,8 +248,8 @@ endProperties = PartialIso
-- :}
-- Right "{:real 0 :img -1 / :real 1 :img 0}"
props
- :: Grammar p (PropertyList :- t) (PropertyList :- t')
- -> Grammar p (List :- t) (List :- t')
+ :: Grammar Position (PropertyList :- t) (PropertyList :- t')
+ -> Grammar Position (List :- t) (List :- t')
props g = beginProperties >>> Dive (onTail (g >>> Flip endProperties))
@@ -259,8 +260,8 @@ props g = beginProperties >>> Dive (onTail (g >>> Flip endProperties))
-- Note: performs linear lookup, /O(n)/
key
:: Text
- -> (forall t. Grammar p (Sexp :- t) (a :- t))
- -> Grammar p (PropertyList :- t) (PropertyList :- a :- t)
+ -> (forall t. Grammar Position (Sexp :- t) (a :- t))
+ -> Grammar Position (PropertyList :- t) (PropertyList :- a :- t)
key k g =
coerced (
Flip (insert k (expected $ ppKey k)) >>>
@@ -275,8 +276,8 @@ key k g =
-- Note: performs linear lookup, /O(n)/
optKey
:: Text
- -> (forall t. Grammar p (Sexp :- t) (a :- t))
- -> Grammar p (PropertyList :- t) (PropertyList :- Maybe a :- t)
+ -> (forall t. Grammar Position (Sexp :- t) (a :- t))
+ -> Grammar Position (PropertyList :- t) (PropertyList :- Maybe a :- t)
optKey k g =
coerced (Flip (insertMay k) >>>
Step >>>
@@ -290,24 +291,24 @@ infix 3 .:?
-- | Property by a key grammar. Infix version of 'key'.
(.:)
:: Text
- -> (forall t. Grammar p (Sexp :- t) (a :- t))
- -> Grammar p (PropertyList :- t) (PropertyList :- a :- t)
+ -> (forall t. Grammar Position (Sexp :- t) (a :- t))
+ -> Grammar Position (PropertyList :- t) (PropertyList :- a :- t)
(.:) = key
-- | Optional property by a key grammar. Infix version of 'optKey'.
(.:?)
:: Text
- -> (forall t. Grammar p (Sexp :- t) (a :- t))
- -> Grammar p (PropertyList :- t) (PropertyList :- Maybe a :- t)
+ -> (forall t. Grammar Position (Sexp :- t) (a :- t))
+ -> Grammar Position (PropertyList :- t) (PropertyList :- Maybe a :- t)
(.:?) = optKey
-- | Remaining properties grammar. Extracts all key-value pairs and
-- applies a grammar on every element.
restKeys
- :: (forall t. Grammar p (Sexp :- Text :- t) (a :- t))
- -> Grammar p (PropertyList :- t) (PropertyList :- [a] :- t)
+ :: (forall t. Grammar Position (Sexp :- Text :- t) (a :- t))
+ -> Grammar Position (PropertyList :- t) (PropertyList :- [a] :- t)
restKeys f =
iso coerce coerce >>>
onHead (Traverse (sealed (Flip pair >>> f) >>> Step)) >>>
diff --git a/test/Main.hs b/test/Main.hs
index 8cedd85..9c87939 100644
--- a/test/Main.hs
+++ b/test/Main.hs
@@ -4,6 +4,7 @@
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
@@ -11,6 +12,8 @@
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeSynonymInstances #-}
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+
module Main (main) where
import Prelude hiding ((.), id)
@@ -20,17 +23,18 @@ import Control.Applicative
#endif
import Control.Category
+import Data.ByteString.Lazy.UTF8 (fromString)
+import Data.Char
import Data.Scientific
import Data.Semigroup
-import Data.ByteString.Lazy.UTF8 (fromString)
-import qualified Data.Text as TS
import qualified Data.Set as S
+import qualified Data.Text as TS
+import Data.Text.Prettyprint.Doc (Pretty, pretty)
import GHC.Generics
import Test.QuickCheck ()
import Test.Tasty
import Test.Tasty.HUnit
import Test.Tasty.QuickCheck as QC
-import Data.Text.Prettyprint.Doc (Pretty, pretty)
import Language.Sexp.Located as Sexp
import Language.Sexp () -- for Show instance
@@ -41,9 +45,56 @@ import Language.SexpGrammar as G
import Language.SexpGrammar.Generic
import Language.SexpGrammar.TH hiding (match)
+import Debug.Trace
+
parseSexp' :: String -> Either String Sexp
parseSexp' input = Sexp.decode (fromString input)
+instance Arbitrary Atom where
+ arbitrary = oneof
+ [ AtomNumber . fromFloatDigits <$> (arbitrary :: Gen Double)
+ , AtomNumber . fromIntegral <$> (arbitrary :: Gen Integer)
+ , AtomString . TS.pack <$> listOf
+ (oneof [ elements $ ['\n','\r','\t','"','\\', ' ']
+ , arbitrary `suchThat` (\c -> isAlphaNum c || isPunctuation c)
+ ])
+ , pure (AtomSymbol ":foo")
+ , pure (AtomSymbol "bar")
+ , pure (AtomSymbol "~qux")
+ , pure (AtomSymbol "символ")
+ , pure (AtomSymbol "@baz")
+ ]
+
+instance Arbitrary Prefix where
+ arbitrary = elements
+ [ Quote
+ , Backtick
+ , Comma
+ , CommaAt
+ , Hash
+ ]
+
+instance Arbitrary Sexp where
+ arbitrary =
+ frequency
+ [ (3, Atom <$> arbitrary)
+ , (1, ParenList <$> scale (`div` 2) (listOf arbitrary))
+ , (1, BracketList <$> scale (`div` 2) (listOf arbitrary))
+ , (1, BraceList <$> scale (`div` 2) (listOf arbitrary))
+ , (1, Modified <$> arbitrary <*> (arbitrary `suchThat` (\case {Symbol s -> not ("@" `TS.isPrefixOf` s); _other -> True})))
+ ]
+ shrink = \case
+ Atom a -> map Atom (shrink a)
+ ParenList [x] -> shrink x
+ ParenList xs -> map ParenList (shrinkList shrink xs)
+ BracketList [x] -> shrink x
+ BracketList xs -> map BracketList (shrinkList shrink xs)
+ BraceList [x] -> shrink x
+ BraceList xs -> map BraceList (shrinkList shrink xs)
+ Modified m s -> shrink s ++ s : Modified m (Symbol "foo") : map (Modified m) (shrink s)
+ other -> [other]
+
+
fromSexp' :: SexpGrammar a -> Sexp.Sexp -> Either (ErrorMessage Position) a
fromSexp' g = runGrammar Sexp.dummyPos . forward (G.sealed g)
@@ -168,6 +219,10 @@ personGenericIso = with
allTests :: TestTree
allTests = testGroup "All tests"
[ lexerTests
+ , QC.testProperty "Format/decode invertibility"
+ (\a -> case Sexp.decode (Sexp.format a) of
+ Left _ -> trace "Cannot parse" False
+ Right b -> if toSimple a == toSimple b then True else trace ("Parsed " ++ show b) False)
, grammarTests
]
@@ -227,6 +282,21 @@ lexerTests = testGroup "Sexp lexer/parser tests"
, testCase "string with japanese characters" $
parseSexp' "\"媯綩 づ竤バ り姥娩ぎょひ\""
`sexpEq` Right (String "媯綩 づ竤バ り姥娩ぎょひ")
+ , testCase "string with newline" $
+ parseSexp' "\"foo\nbar\""
+ `sexpEq` Right (String "foo\nbar")
+ , testCase "string with \\n" $
+ parseSexp' "\"foo\\nbar\""
+ `sexpEq` Right (String "foo\nbar")
+ , testCase "string with \\t" $
+ parseSexp' "\"foo\\tbar\""
+ `sexpEq` Right (String "foo\tbar")
+ , testCase "string with \\\"" $
+ parseSexp' "\"foo\\\"bar\""
+ `sexpEq` Right (String "foo\"bar")
+ , testCase "string with \\\\" $
+ parseSexp' "\"foo\\\\bar\""
+ `sexpEq` Right (String "foo\\bar")
, testCase "paren-list" $
parseSexp' "(foo bar)"
`sexpEq` Right (ParenList [Symbol "foo", Symbol "bar"])
@@ -241,7 +311,7 @@ lexerTests = testGroup "Sexp lexer/parser tests"
`sexpEq` Right (Modified Quote (Symbol "foo"))
, testCase "hashed" $
parseSexp' "#foo"
- `sexpEq` Right (Symbol "#foo")
+ `sexpEq` Right (Modified Hash (Symbol "foo"))
, testCase "keyword" $
parseSexp' ":foo"
`sexpEq` Right (Symbol ":foo")
@@ -252,6 +322,7 @@ grammarTests :: TestTree
grammarTests = testGroup "Grammar tests"
[ baseTypeTests
, listTests
+ , prefixTests
, dictTests
, revStackPrismTests
, parseTests
@@ -385,7 +456,7 @@ prefixTests = testGroup "Prefix combinator tests"
, testCase "hashed" $
fromSexp'
(hashed (bracketList (rest int)))
- (Modified Quote (BracketList [Number 1, Number 2])) `otherEq`
+ (Modified Hash (BracketList [Number 1, Number 2])) `otherEq`
Right [1, 2]
, testCase "backticked" $