summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwinterland <>2019-05-15 10:07:00 (GMT)
committerhdiff <hdiff@hdiff.luite.com>2019-05-15 10:07:00 (GMT)
commitfe34aa93eb48d2b9c1f60d77dd63f00b80479626 (patch)
treef99451054c1957ba15deffd9eca9b077d24da648
parent22c229ece61ff0b895c6a5095ad01cce283f411c (diff)
version 0.2.0.0HEAD0.2.0.0master
-rw-r--r--ChangeLog.md11
-rw-r--r--Std/Data/Array.hs11
-rw-r--r--Std/Data/Builder.hs4
-rw-r--r--Std/Data/Builder/Base.hs156
-rw-r--r--Std/Data/Builder/Numeric.hs139
-rw-r--r--Std/Data/CBytes.hs8
-rw-r--r--Std/Data/Generics/Utils.hs34
-rw-r--r--Std/Data/JSON.hs160
-rw-r--r--Std/Data/JSON/Base.hs1302
-rw-r--r--Std/Data/JSON/Builder.hs110
-rw-r--r--Std/Data/JSON/Value.hs280
-rw-r--r--Std/Data/LEON.hs50
-rw-r--r--Std/Data/Parser.hs17
-rw-r--r--Std/Data/Parser/Base.hs604
-rw-r--r--Std/Data/Parser/Numeric.hs335
-rw-r--r--Std/Data/PrimArray/BitTwiddle.hs2
-rw-r--r--Std/Data/PrimArray/Cast.hs2
-rw-r--r--Std/Data/Text/Base.hs44
-rw-r--r--Std/Data/TextBuilder.hs435
-rw-r--r--Std/Data/Vector.hs2
-rw-r--r--Std/Data/Vector/Base.hs176
-rw-r--r--Std/Data/Vector/Extra.hs27
-rw-r--r--Std/Data/Vector/FlatIntMap.hs394
-rw-r--r--Std/Data/Vector/FlatIntSet.hs232
-rw-r--r--Std/Data/Vector/FlatMap.hs396
-rw-r--r--Std/Data/Vector/FlatSet.hs233
-rw-r--r--Std/Data/Vector/QQ.hs22
-rw-r--r--Std/Data/Vector/Search.hs12
-rw-r--r--Std/Data/Vector/Sort.hs69
-rw-r--r--Std/Foreign/PrimArray.hs15
-rw-r--r--Std/IO/Buffered.hs53
-rw-r--r--Std/IO/Exception.hs3
-rw-r--r--Std/IO/Logger.hs41
-rw-r--r--Std/IO/LowResTimer.hs24
-rw-r--r--Std/IO/SockAddr.hsc12
-rw-r--r--Std/IO/StdStream.hs44
-rw-r--r--Std/IO/TCP.hs47
-rw-r--r--Std/IO/UDP.hs303
-rw-r--r--Std/IO/UV/FFI.hsc59
-rw-r--r--Std/IO/UV/Manager.hs39
-rw-r--r--cbits/bytes.c1
-rw-r--r--cbits/dtoa.c87
-rw-r--r--cbits/hs_uv_udp.c128
-rw-r--r--cbits/text.c302
-rw-r--r--include/dtoa.h1
-rw-r--r--include/hs_uv.h10
-rw-r--r--include/text.h5
-rw-r--r--stdio.cabal36
-rw-r--r--test/Std/Data/Builder/NumericSpec.hs43
-rw-r--r--test/Std/Data/JSON/BaseSpec.hs71
-rw-r--r--test/Std/Data/JSON/ValueSpec.hs24
-rw-r--r--test/Std/Data/LEONSpec.hs67
-rw-r--r--test/Std/Data/Parser/BaseSpec.hs17
-rw-r--r--test/Std/Data/Parser/NumericSpec.hs75
-rw-r--r--test/Std/Data/Text/BaseSpec.hs2
-rw-r--r--test/Std/Data/TextBuilderSpec.hs68
-rw-r--r--test/Std/Data/Vector/FlatMapSpec.hs38
-rw-r--r--test/Std/Data/Vector/FlatSetSpec.hs34
-rw-r--r--test/Std/IO/FileSystemSpec.hs19
-rw-r--r--test/Std/IO/FileSystemTSpec.hs20
-rw-r--r--test/Std/IO/UDPSpec.hs69
61 files changed, 6248 insertions, 776 deletions
diff --git a/ChangeLog.md b/ChangeLog.md
index 52afcf5..3028f5d 100644
--- a/ChangeLog.md
+++ b/ChangeLog.md
@@ -5,3 +5,14 @@
* Add LEON, a little endian first serialization/deserialization module.
* Use pkg-config to find libuv by default, which can be turned off via cabal flag no-pkg-config
* Export `Result` constructor in `Std.Data.Parser` module.
+
+## 0.2.0.0 --2019-05-15
+
+* Add UDP module.
+* Add JSON module.
+* Add `ToText` class to `TextBuilder` module.
+* Improve numeric builders by using FFI code.
+* Change `readParser` 's type in `Std.IO.Buffered` module to directly return parsing result.
+* Add `FlatMap/FlatSet/FlatIntMap/FlatIntSet` module.
+* Fix a bug of `Parser` 's `Alternative` instance.
+* Fix a bug of `PrimVector` 's `QuasiQuoter`.
diff --git a/Std/Data/Array.hs b/Std/Data/Array.hs
index 9fbc378..65ea02b 100644
--- a/Std/Data/Array.hs
+++ b/Std/Data/Array.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE TypeFamilyDependencies #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
@@ -100,6 +101,8 @@ uninitialized = throw (UndefinedElement "Data.Array.uninitialized")
-- They are used across this package and perform identical to their monomophric counterpart.
--
class Arr (marr :: * -> * -> *) (arr :: * -> * ) a | arr -> marr, marr -> arr where
+ type IArr marr = (ar :: * -> *) | ar -> marr
+ type MArr arr = (mar :: * -> * -> *) | mar -> arr
-- | Make a new array with given size.
--
@@ -190,6 +193,8 @@ class Arr (marr :: * -> * -> *) (arr :: * -> * ) a | arr -> marr, marr -> arr wh
sameArr :: arr a -> arr a -> Bool
instance Arr MutableArray Array a where
+ type MArr Array = MutableArray
+ type IArr MutableArray = Array
newArr n = newArray n uninitialized
{-# INLINE newArr #-}
newArrWith = newArray
@@ -274,6 +279,8 @@ instance Arr MutableArray Array a where
{-# INLINE sameArr #-}
instance Arr SmallMutableArray SmallArray a where
+ type MArr SmallArray = SmallMutableArray
+ type IArr SmallMutableArray = SmallArray
newArr n = newSmallArray n uninitialized
{-# INLINE newArr #-}
newArrWith = newSmallArray
@@ -359,6 +366,8 @@ instance Arr SmallMutableArray SmallArray a where
{-# INLINE sameArr #-}
instance Prim a => Arr MutablePrimArray PrimArray a where
+ type MArr PrimArray = MutablePrimArray
+ type IArr MutablePrimArray = PrimArray
newArr = newPrimArray
{-# INLINE newArr #-}
newArrWith n x = do
@@ -432,6 +441,8 @@ instance Prim a => Arr MutablePrimArray PrimArray a where
{-# INLINE sameArr #-}
instance PrimUnlifted a => Arr MutableUnliftedArray UnliftedArray a where
+ type MArr UnliftedArray = MutableUnliftedArray
+ type IArr MutableUnliftedArray = UnliftedArray
newArr = unsafeNewUnliftedArray
{-# INLINE newArr #-}
newArrWith = newUnliftedArray
diff --git a/Std/Data/Builder.hs b/Std/Data/Builder.hs
index d8367af..e71dfc4 100644
--- a/Std/Data/Builder.hs
+++ b/Std/Data/Builder.hs
@@ -41,7 +41,7 @@ module Std.Data.Builder
, encodePrimLE
, encodePrimBE
-- * More builders
- , stringUTF8, charUTF8, string7, char7, string8, char8, text
+ , stringModifiedUTF8, charModifiedUTF8, stringUTF8, charUTF8, string7, char7, string8, char8, text
-- * Numeric builders
-- ** Integral type formatting
, IFormat(..)
@@ -60,6 +60,8 @@ module Std.Data.Builder
, floatWith
, scientific
, scientificWith
+ -- * Builder helpers
+ , paren, curly, square, angle, quotes, squotes, colon, comma, intercalateVec, intercalateList
) where
import Std.Data.Builder.Base
diff --git a/Std/Data/Builder/Base.hs b/Std/Data/Builder/Base.hs
index 0f925bf..b64aa5e 100644
--- a/Std/Data/Builder/Base.hs
+++ b/Std/Data/Builder/Base.hs
@@ -6,6 +6,7 @@
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE UnboxedTuples #-}
{-|
@@ -66,6 +67,8 @@ module Std.Data.Builder.Base
, encodePrimBE
-- * More builders
, stringModifiedUTF8, charModifiedUTF8, stringUTF8, charUTF8, string7, char7, string8, char8, text
+ -- * Builder helpers
+ , paren, curly, square, angle, quotes, squotes, colon, comma, intercalateVec, intercalateList
) where
import Control.Monad
@@ -80,7 +83,7 @@ import Data.Semigroup (Semigroup (..))
import Data.String (IsString (..))
import Data.Word
import Data.Int
-import GHC.CString (unpackCString#)
+import GHC.CString (unpackCString#, unpackCStringUtf8#)
import GHC.Prim
import GHC.Ptr
import GHC.Types
@@ -89,7 +92,9 @@ import Std.Data.PrimArray.UnalignedAccess
import qualified Std.Data.Text.Base as T
import qualified Std.Data.Text.UTF8Codec as T
import qualified Std.Data.Vector.Base as V
+import qualified Std.Data.Vector as V
import System.IO.Unsafe
+import Test.QuickCheck.Arbitrary (Arbitrary(..), CoArbitrary(..))
-- | 'AllocateStrategy' will decide how each 'BuildStep' proceed when previous buffer is not enough.
--
@@ -121,6 +126,9 @@ type BuildStep s = Buffer s -> ST s [V.Bytes]
newtype Builder a = Builder
{ runBuilder :: forall s. AllocateStrategy s -> (a -> BuildStep s) -> BuildStep s}
+instance Show (Builder a) where
+ show = show . buildBytes
+
instance Functor Builder where
{-# INLINE fmap #-}
fmap f (Builder b) = Builder (\ al k -> b al (k . f))
@@ -157,11 +165,21 @@ instance (a ~ ()) => IsString (Builder a) where
{-# INLINE fromString #-}
fromString = stringModifiedUTF8
+instance Arbitrary (Builder ()) where
+ arbitrary = bytes <$> arbitrary
+ shrink b = (bytes . V.pack) <$> shrink (V.unpack (buildBytes b))
+
+instance CoArbitrary (Builder ()) where
+ coarbitrary = coarbitrary . buildBytes
+
-- | Encode string with modified UTF-8 encoding, will be rewritten to a memcpy if possible.
stringModifiedUTF8 :: String -> Builder ()
-{-# INLINE CONLIKE [1] stringModifiedUTF8 #-}
+{-# INLINE CONLIKE [0] stringModifiedUTF8 #-}
{-# RULES
- "stringModifiedUTF8/addrLiteral" forall addr . stringModifiedUTF8 (unpackCString# addr) = addrLiteral addr
+ "stringModifiedUTF8/packAddrModified" forall addr . stringModifiedUTF8 (unpackCString# addr) = packAddrModified addr
+ #-}
+{-# RULES
+ "stringModifiedUTF8/packAddrModified" forall addr . stringModifiedUTF8 (unpackCStringUtf8# addr) = packAddrModified addr
#-}
stringModifiedUTF8 = mapM_ charModifiedUTF8
@@ -176,9 +194,8 @@ charModifiedUTF8 chr = do
i' <- T.encodeCharModifiedUTF8 mba i chr
k () (Buffer mba i'))
-addrLiteral :: Addr# -> Builder ()
-{-# INLINE addrLiteral #-}
-addrLiteral addr# = copy addr#
+packAddrModified :: Addr# -> Builder ()
+packAddrModified addr# = copy addr#
where
len = fromIntegral . unsafeDupablePerformIO $ V.c_strlen addr#
copy addr# = do
@@ -256,18 +273,18 @@ insertChunk !chunkSiz !wantSiz k buffer@(Buffer buf offset) = do
!siz <- A.sizeofMutableArr buf
case () of
_
- | offset /= 0 -> do -- this is certainly hold, but we still guard it
+ | offset /= 0 -> do
when (offset < siz)
(A.shrinkMutableArr buf offset) -- shrink old buffer if not full
arr <- A.unsafeFreezeArr buf -- popup old buffer
buf' <- A.newArr (max wantSiz chunkSiz) -- make a new buffer
- xs <- unsafeInterleaveST (k (Buffer buf' 0)) -- delay the rest building process
+ xs <- unsafeInterleaveST (k (Buffer buf' 0)) -- delay the rest building process
let v = V.fromArr arr 0 offset
- v `seq` return (v : xs)
- | wantSiz <= siz -> k (Buffer buf 0)
+ v `seq` pure (v : xs)
+ | wantSiz <= siz -> k (Buffer buf 0) -- this should certainly not hold, but we still guard it
| otherwise -> do
- buf' <- A.newArr wantSiz -- make a new buffer
- k (Buffer buf' 0 )
+ buf' <- A.newArr (max wantSiz chunkSiz) -- make a new buffer
+ k (Buffer buf' 0)
oneShotAction :: (V.Bytes -> ST s ()) -> Int -> BuildStep s -> BuildStep s
{-# INLINE oneShotAction #-}
@@ -302,13 +319,13 @@ buildBytesWith :: Int -> Builder a -> V.Bytes
buildBytesWith initSiz (Builder b) = runST $ do
buf <- A.newArr initSiz
[bs] <- b DoubleBuffer lastStep (Buffer buf 0 )
- return bs
+ pure bs
where
lastStep _ (Buffer buf offset) = do
siz <- A.sizeofMutableArr buf
when (offset < siz) (A.shrinkMutableArr buf offset)
arr <- A.unsafeFreezeArr buf
- return [V.PrimVector arr 0 offset]
+ pure [V.PrimVector arr 0 offset]
-- | shortcut to 'buildBytesListWith' 'V.defaultChunkSize'.
buildBytesList :: Builder a -> [V.Bytes]
@@ -325,7 +342,7 @@ buildBytesListWith initSiz chunkSiz (Builder b) = runST $ do
where
lastStep _ (Buffer buf offset) = do
arr <- A.unsafeFreezeArr buf
- return [V.PrimVector arr 0 offset]
+ pure [V.PrimVector arr 0 offset]
-- | shortcut to 'buildAndRunWith' 'V.defaultChunkSize'.
buildAndRun :: (V.Bytes -> IO ()) -> Builder a -> IO ()
@@ -337,19 +354,19 @@ buildAndRunWith :: Int -> (V.Bytes -> IO ()) -> Builder a -> IO ()
buildAndRunWith chunkSiz action (Builder b) = do
buf <- A.newArr chunkSiz
_ <- stToIO (b (OneShotAction (\ bs -> ioToPrim (action bs))) lastStep (Buffer buf 0))
- return ()
+ pure ()
where
lastStep :: a -> BuildStep RealWorld
lastStep _ (Buffer buf offset) = do
arr <- A.unsafeFreezeArr buf
ioToPrim (action (V.PrimVector arr 0 offset))
- return [] -- to match the silly return type
+ pure [] -- to match the silly pure type
{-# INLINABLE buildAndRun #-}
--------------------------------------------------------------------------------
atMost :: Int -- ^ size bound
- -> (forall s. A.MutablePrimArray s Word8 -> Int -> ST s Int) -- ^ the writer which return a new offset
+ -> (forall s. A.MutablePrimArray s Word8 -> Int -> ST s Int) -- ^ the writer which pure a new offset
-- for next write
-> Builder ()
{-# INLINE atMost #-}
@@ -358,7 +375,7 @@ atMost n f = ensureN n `append`
f buf offset >>= \ offset' -> k () (Buffer buf offset'))
writeN :: Int -- ^ size bound
- -> (forall s. A.MutablePrimArray s Word8 -> Int -> ST s ()) -- ^ the writer which return a new offset
+ -> (forall s. A.MutablePrimArray s Word8 -> Int -> ST s ()) -- ^ the writer which pure a new offset
-- for next write
-> Builder ()
{-# INLINE writeN #-}
@@ -426,15 +443,27 @@ encodePrimBE = encodePrim . BE
-- This function will be rewritten into a memcpy if possible, (running a fast UTF-8 validation
-- at runtime first).
stringUTF8 :: String -> Builder ()
-{-# INLINE CONLIKE [1] stringUTF8 #-}
+{-# INLINE CONLIKE [0] stringUTF8 #-}
{-# RULES
- "stringUTF8/addrUTF8" forall addr . stringUTF8 (unpackCString# addr) = addrUTF8 addr
+ "stringUTF8/packASCIIAddr" forall addr . stringUTF8 (unpackCString# addr) = packASCIIAddr addr
+ #-}
+{-# RULES
+ "stringUTF8/packUTF8Addr" forall addr . stringUTF8 (unpackCString# addr) = packUTF8Addr addr
#-}
stringUTF8 = mapM_ charUTF8
-addrUTF8 :: Addr# -> Builder ()
-{-# INLINABLE addrUTF8 #-}
-addrUTF8 addr# = validateAndCopy addr#
+packASCIIAddr :: Addr# -> Builder ()
+packASCIIAddr addr# = copy addr#
+ where
+ len = fromIntegral . unsafeDupablePerformIO $ V.c_strlen addr#
+ copy addr# = do
+ ensureN len
+ Builder (\ _ k (Buffer mba i) -> do
+ copyPtrToMutablePrimArray mba i (Ptr addr#) len
+ k () (Buffer mba (i + len)))
+
+packUTF8Addr :: Addr# -> Builder ()
+packUTF8Addr addr# = validateAndCopy addr#
where
len = fromIntegral . unsafeDupablePerformIO $ V.c_strlen addr#
valid = unsafeDupablePerformIO $ T.c_utf8_validate_addr addr# len
@@ -507,3 +536,82 @@ char8 chr = do
text :: T.Text -> Builder ()
{-# INLINE text #-}
text (T.Text bs) = bytes bs
+
+--------------------------------------------------------------------------------
+
+#define BACKSLASH 92
+#define CLOSE_ANGLE 62
+#define CLOSE_CURLY 125
+#define CLOSE_PAREN 41
+#define CLOSE_SQUARE 93
+#define COMMA 44
+#define COLON 58
+#define DOUBLE_QUOTE 34
+#define OPEN_ANGLE 60
+#define OPEN_CURLY 123
+#define OPEN_PAREN 40
+#define OPEN_SQUARE 91
+#define SINGLE_QUOTE 39
+
+-- | add @{...}@ to original builder.
+paren :: Builder () -> Builder ()
+{-# INLINE paren #-}
+paren b = encodePrim @Word8 OPEN_PAREN >> b >> encodePrim @Word8 CLOSE_PAREN
+
+-- | add @{...}@ to original builder.
+curly :: Builder () -> Builder ()
+{-# INLINE curly #-}
+curly b = encodePrim @Word8 OPEN_CURLY >> b >> encodePrim @Word8 CLOSE_CURLY
+
+-- | add @[...]@ to original builder.
+square :: Builder () -> Builder ()
+{-# INLINE square #-}
+square b = encodePrim @Word8 OPEN_SQUARE >> b >> encodePrim @Word8 CLOSE_SQUARE
+
+-- | add @<...>@ to original builder.
+angle :: Builder () -> Builder ()
+{-# INLINE angle #-}
+angle b = encodePrim @Word8 OPEN_ANGLE >> b >> encodePrim @Word8 CLOSE_ANGLE
+
+-- | add @"..."@ to original builder.
+quotes :: Builder () -> Builder ()
+{-# INLINE quotes #-}
+quotes b = encodePrim @Word8 DOUBLE_QUOTE >> b >> encodePrim @Word8 DOUBLE_QUOTE
+
+-- | add @'...'@ to original builder.
+squotes :: Builder () -> Builder ()
+{-# INLINE squotes #-}
+squotes b = encodePrim @Word8 SINGLE_QUOTE >> b >> encodePrim @Word8 SINGLE_QUOTE
+
+-- | write an ASCII @:@
+colon :: Builder ()
+{-# INLINE colon #-}
+colon = encodePrim @Word8 COLON
+
+-- | write an ASCII @,@
+comma :: Builder ()
+{-# INLINE comma #-}
+comma = encodePrim @Word8 COMMA
+
+-- | Use separator to connect a vector of builders.
+intercalateVec :: (V.Vec v a)
+ => Builder () -- ^ the seperator
+ -> (a -> Builder ()) -- ^ value formatter
+ -> v a -- ^ value vector
+ -> Builder ()
+{-# INLINE intercalateVec #-}
+intercalateVec s f v = do
+ V.traverseVec_ (\ x -> f x >> s) (V.initMayEmpty v)
+ forM_ (V.lastMaybe v) f
+
+-- | Use separator to connect list of builders.
+intercalateList :: Builder () -- ^ the seperator
+ -> (a -> Builder ()) -- ^ value formatter
+ -> [a] -- ^ value list
+ -> Builder ()
+{-# INLINE intercalateList #-}
+intercalateList s f xs = go xs
+ where
+ go [] = pure ()
+ go [x] = f x
+ go (x:xs) = f x >> s >> go xs
diff --git a/Std/Data/Builder/Numeric.hs b/Std/Data/Builder/Numeric.hs
index ec7accd..9c244c5 100644
--- a/Std/Data/Builder/Numeric.hs
+++ b/Std/Data/Builder/Numeric.hs
@@ -23,6 +23,7 @@ Textual numeric builders.
module Std.Data.Builder.Numeric (
-- * Integral type formatting
+
IFormat(..)
, defaultIFormat
, Padding(..)
@@ -44,10 +45,12 @@ module Std.Data.Builder.Numeric (
, grisu3_sp
, i2wDec, i2wHex, i2wHeX
, countDigits
+ , c_intWith, hs_intWith
) where
import Control.Monad
import Control.Monad.ST
+import Control.Monad.ST.Unsafe
import Data.Bits
import Data.Char
import Data.Int
@@ -70,52 +73,82 @@ import System.IO.Unsafe
import GHC.Integer.GMP.Internals
#endif
import GHC.Float (roundTo)
+import Test.QuickCheck.Arbitrary (Arbitrary(..), CoArbitrary(..))
--------------------------------------------------------------------------------
+foreign import ccall unsafe "dtoa.h" c_int_dec :: Word64 -> Int -> Int -> Word8 -> MBA# Word8 -> Int -> IO Int
+
-- | Integral formatting options.
--
data IFormat = IFormat
- { width :: Int -- ^ total width, only effective with padding options
+ { width :: Int -- ^ total width, only effective with padding options
, padding :: Padding -- ^ padding options
- , postiveSign :: Bool -- ^ show @+@ when the number is positive
+ , postiveSign :: Bool -- ^ show @+@ when the number is positive
} deriving (Show, Eq, Ord)
--- | @defaultIFormat = IFormat 0 NoPadding False Decimal@
+instance Arbitrary IFormat where
+ arbitrary = IFormat <$> arbitrary <*> arbitrary <*> arbitrary
+
+-- | @defaultIFormat = IFormat 0 NoPadding False@
defaultIFormat :: IFormat
defaultIFormat = IFormat 0 NoPadding False
-data Padding = NoPadding | ZeroPadding | LeftSpacePadding | RightSpacePadding deriving (Show, Eq, Ord)
+data Padding = NoPadding | RightSpacePadding | LeftSpacePadding | ZeroPadding deriving (Show, Eq, Ord, Enum)
+
+instance Arbitrary Padding where
+ arbitrary = toEnum . (`mod` 4) <$> arbitrary
-- | @int = intWith defaultIFormat@
int :: (Integral a, Bounded a) => a -> Builder ()
+{-# INLINE int #-}
int = intWith defaultIFormat
-- | Format a 'Bounded' 'Integral' type like @Int@ or @Word16@ into decimal ASCII digits.
-intWith :: (Integral a, Bounded a)
- => IFormat
- -> a
- -> Builder ()
-{-# INLINE[1] intWith #-}
-{-# RULES "intWith'/Int8" intWith = intWith' :: IFormat -> Int8 -> Builder () #-}
-{-# RULES "intWith'/Int" intWith = intWith' :: IFormat -> Int -> Builder () #-}
-{-# RULES "intWith'/Int16" intWith = intWith' :: IFormat -> Int16 -> Builder () #-}
-{-# RULES "intWith'/Int32" intWith = intWith' :: IFormat -> Int32 -> Builder () #-}
-{-# RULES "intWith'/Int64" intWith = intWith' :: IFormat -> Int64 -> Builder () #-}
-{-# RULES "intWith'/Word" intWith = positiveInt :: IFormat -> Word -> Builder () #-}
-{-# RULES "intWith'/Word8" intWith = positiveInt :: IFormat -> Word8 -> Builder () #-}
-{-# RULES "intWith'/Word16" intWith = positiveInt :: IFormat -> Word16 -> Builder () #-}
-{-# RULES "intWith'/Word32" intWith = positiveInt :: IFormat -> Word32 -> Builder () #-}
-{-# RULES "intWith'/Word64" intWith = positiveInt :: IFormat -> Word64 -> Builder () #-}
-intWith = intWith'
-
-intWith' :: (Integral a, Bounded a) => IFormat -> a -> Builder ()
-{-# SPECIALIZE INLINE intWith' :: IFormat -> Int -> Builder () #-}
-{-# SPECIALIZE INLINE intWith' :: IFormat -> Int8 -> Builder () #-}
-{-# SPECIALIZE INLINE intWith' :: IFormat -> Int16 -> Builder () #-}
-{-# SPECIALIZE INLINE intWith' :: IFormat -> Int32 -> Builder () #-}
-{-# SPECIALIZE INLINE intWith' :: IFormat -> Int64 -> Builder () #-}
-intWith' format@(IFormat width padding _) i
+intWith :: (Integral a, Bounded a) => IFormat -> a -> Builder ()
+intWith = hs_intWith
+{-# INLINE[0] intWith #-}
+{-# RULES "intWith'/Int8" intWith = c_intWith :: IFormat -> Int8 -> Builder () #-}
+{-# RULES "intWith'/Int" intWith = c_intWith :: IFormat -> Int -> Builder () #-}
+{-# RULES "intWith'/Int16" intWith = c_intWith :: IFormat -> Int16 -> Builder () #-}
+{-# RULES "intWith'/Int32" intWith = c_intWith :: IFormat -> Int32 -> Builder () #-}
+{-# RULES "intWith'/Int64" intWith = c_intWith :: IFormat -> Int64 -> Builder () #-}
+{-# RULES "intWith'/Word" intWith = c_intWith :: IFormat -> Word -> Builder () #-}
+{-# RULES "intWith'/Word8" intWith = c_intWith :: IFormat -> Word8 -> Builder () #-}
+{-# RULES "intWith'/Word16" intWith = c_intWith :: IFormat -> Word16 -> Builder () #-}
+{-# RULES "intWith'/Word32" intWith = c_intWith :: IFormat -> Word32 -> Builder () #-}
+{-# RULES "intWith'/Word64" intWith = c_intWith :: IFormat -> Word64 -> Builder () #-}
+
+-- | Internal formatting backed by C FFI, it must be used with type smaller than 'Word64'.
+--
+-- We use rewrite rules to rewrite most of the integral types formatting to this function.
+c_intWith :: (Integral a, Bits a) => IFormat -> a -> Builder ()
+{-# INLINE c_intWith #-}
+c_intWith (IFormat width padding posSign) x
+ | x < 0 =
+ let !x' = (fromIntegral (complement x) :: Word64) + 1
+ in atMost width' (\ mba@(MutablePrimArray mba#) i ->
+ unsafeIOToST (c_int_dec x' (-1) width pad (unsafeCoerce# mba#) i))
+ | posSign =
+ atMost width' (\ mba@(MutablePrimArray mba#) i ->
+ unsafeIOToST (c_int_dec (fromIntegral x) 1 width pad (unsafeCoerce# mba#) i))
+ | otherwise =
+ atMost width' (\ mba@(MutablePrimArray mba#) i ->
+ unsafeIOToST (c_int_dec (fromIntegral x) 0 width pad (unsafeCoerce# mba#) i))
+ where
+ width' = max 21 width
+ pad = case padding of NoPadding -> 0
+ RightSpacePadding -> 1
+ LeftSpacePadding -> 2
+ ZeroPadding -> 3
+
+-- | Internal formatting in haskell, it can be used with any bounded integral type.
+--
+-- Other than provide fallback for the c version, this function is also used to check
+-- the c version's formatting result.
+hs_intWith :: (Integral a, Bounded a) => IFormat -> a -> Builder ()
+{-# INLINABLE hs_intWith #-}
+hs_intWith format@(IFormat width padding _) i
| i < 0 =
if i == minBound -- can't directly negate in this case
then do
@@ -213,16 +246,7 @@ intWith' format@(IFormat width padding _) i
| otherwise = positiveInt format i
positiveInt :: (Integral a) => IFormat -> a -> Builder ()
-{-# SPECIALIZE INLINE positiveInt :: IFormat -> Int -> Builder () #-}
-{-# SPECIALIZE INLINE positiveInt :: IFormat -> Int8 -> Builder () #-}
-{-# SPECIALIZE INLINE positiveInt :: IFormat -> Int16 -> Builder () #-}
-{-# SPECIALIZE INLINE positiveInt :: IFormat -> Int32 -> Builder () #-}
-{-# SPECIALIZE INLINE positiveInt :: IFormat -> Int64 -> Builder () #-}
-{-# SPECIALIZE INLINE positiveInt :: IFormat -> Word -> Builder () #-}
-{-# SPECIALIZE INLINE positiveInt :: IFormat -> Word8 -> Builder () #-}
-{-# SPECIALIZE INLINE positiveInt :: IFormat -> Word16 -> Builder () #-}
-{-# SPECIALIZE INLINE positiveInt :: IFormat -> Word32 -> Builder () #-}
-{-# SPECIALIZE INLINE positiveInt :: IFormat -> Word64 -> Builder () #-}
+{-# INLINABLE positiveInt #-}
positiveInt (IFormat width padding ps) i =
let !n = countDigits i
in if ps
@@ -361,7 +385,7 @@ integer n0
jprinth [] = errorWithoutStackTrace "jprinth []"
jprintb :: [Integer] -> Builder ()
- jprintb [] = return ()
+ jprintb [] = pure ()
jprintb (n:ns) = case n `quotRemInteger` BASE of
(# q', r' #) ->
let q = fromInteger q'
@@ -374,7 +398,7 @@ integer n0
jhead :: Int -> Builder ()
jhead = int
jblock :: Int -> Builder ()
- jblock d = writeN DIGITS $ \ marr off -> writePositiveDec marr off DIGITS d
+ jblock = intWith defaultIFormat{padding = ZeroPadding, width=DIGITS}
-- Split n into digits in base p. We first split n into digits
-- in base p*p and then split each of these digits into two.
@@ -539,6 +563,7 @@ data FFormat = Exponent -- ^ Scientific notation (e.g. @2.3e123@).
-- @9,999,999@, and scientific notation otherwise.
deriving (Enum, Read, Show)
+
-- | Decimal encoding of an IEEE 'Float'.
--
-- Using standard decimal notation for arguments whose absolute value lies
@@ -564,13 +589,10 @@ floatWith :: FFormat
floatWith fmt decs x
| isNaN x = "NaN"
| isInfinite x = if x < 0 then "-Infinity" else "Infinity"
- | x < 0 = char8 '-' >> doFmt fmt decs (digits (-x))
+ | x < 0 = char8 '-' >> doFmt fmt decs (grisu3_sp (-x))
| isNegativeZero x = char8 '-' >> doFmt fmt decs ([0], 0)
| x == 0 = doFmt fmt decs ([0], 0)
- | otherwise = doFmt fmt decs (digits x) -- Grisu only handles strictly positive finite numbers.
- where
- digits y = case grisu3_sp y of Just r -> r
- Nothing -> floatToDigits 10 y
+ | otherwise = doFmt fmt decs (grisu3_sp x) -- Grisu only handles strictly positive finite numbers.
-- | Format double-precision float using drisu3 with dragon4 fallback.
doubleWith :: FFormat
@@ -581,13 +603,10 @@ doubleWith :: FFormat
doubleWith fmt decs x
| isNaN x = "NaN"
| isInfinite x = if x < 0 then "-Infinity" else "Infinity"
- | x < 0 = char8 '-' >> doFmt fmt decs (digits (-x))
+ | x < 0 = char8 '-' >> doFmt fmt decs (grisu3 (-x))
| isNegativeZero x = char8 '-' >> doFmt fmt decs ([0], 0)
| x == 0 = doFmt fmt decs ([0], 0)
- | otherwise = doFmt fmt decs (digits x) -- Grisu only handles strictly positive finite numbers.
- where
- digits y = case grisu3 y of Just r -> r
- Nothing -> floatToDigits 10 y
+ | otherwise = doFmt fmt decs (grisu3 x) -- Grisu only handles strictly positive finite numbers.
-- | Worker function to do formatting.
doFmt :: FFormat
@@ -682,8 +701,8 @@ foreign import ccall unsafe "static grisu3" c_grisu3
-> MBA# Int -- ^ Int
-> IO Int
--- | Decimal encoding of a 'Double'.
-grisu3 :: Double -> Maybe ([Int], Int)
+-- | Decimal encoding of a 'Double', note grisu only handles strictly positive finite numbers.
+grisu3 :: Double -> ([Int], Int)
{-# INLINE grisu3 #-}
grisu3 d = unsafePerformIO $
withMutableByteArrayUnsafe GRISU3_DOUBLE_BUF_LEN $ \ pBuf -> do
@@ -691,13 +710,13 @@ grisu3 d = unsafePerformIO $
withPrimUnsafe' $ \ pE ->
c_grisu3 (realToFrac d) pBuf pLen pE
if success == 0 -- grisu3 fail
- then return Nothing
+ then pure (floatToDigits 10 d)
else do
buf <- forM [0..len-1] $ \ i -> do
w8 <- readByteArray (MutableByteArray pBuf) i :: IO Word8
- return (fromIntegral w8)
+ pure (fromIntegral w8)
let !e' = e + len
- return $ Just (buf, e')
+ pure (buf, e')
foreign import ccall unsafe "static grisu3_sp" c_grisu3_sp
:: Float
@@ -706,8 +725,8 @@ foreign import ccall unsafe "static grisu3_sp" c_grisu3_sp
-> MBA# Int -- ^ Int
-> IO Int
--- | Decimal encoding of a 'Float'.
-grisu3_sp :: Float -> Maybe ([Int], Int)
+-- | Decimal encoding of a 'Float', note grisu3_sp only handles strictly positive finite numbers.
+grisu3_sp :: Float -> ([Int], Int)
{-# INLINE grisu3_sp #-}
grisu3_sp d = unsafePerformIO $
withMutableByteArrayUnsafe GRISU3_SINGLE_BUF_LEN $ \ pBuf -> do
@@ -715,13 +734,13 @@ grisu3_sp d = unsafePerformIO $
withPrimUnsafe' $ \ pE ->
c_grisu3_sp (realToFrac d) pBuf pLen pE
if success == 0 -- grisu3 fail
- then return Nothing
+ then pure (floatToDigits 10 d)
else do
buf <- forM [0..len-1] $ \ i -> do
w8 <- readByteArray (MutableByteArray pBuf) i :: IO Word8
- return (fromIntegral w8)
+ pure (fromIntegral w8)
let !e' = e + len
- return $ Just (buf, e')
+ pure (buf, e')
--------------------------------------------------------------------------------
diff --git a/Std/Data/CBytes.hs b/Std/Data/CBytes.hs
index f689970..a36dd25 100644
--- a/Std/Data/CBytes.hs
+++ b/Std/Data/CBytes.hs
@@ -49,6 +49,7 @@ module Std.Data.CBytes
, withCBytes
) where
+import Control.DeepSeq
import Control.Monad
import Control.Monad.Primitive
import Control.Monad.ST
@@ -117,7 +118,13 @@ instance Show CBytes where
instance Read CBytes where
readsPrec p s = [(pack x, r) | (x, r) <- readsPrec p s]
+instance NFData CBytes where
+ {-# INLINE rnf #-}
+ rnf (CBytesOnHeap _) = ()
+ rnf (CBytesLiteral _) = ()
+
instance Eq CBytes where
+ {-# INLINE (==) #-}
cbyteA == cbyteB = unsafeDupablePerformIO $
withCBytes cbyteA $ \ pA ->
withCBytes cbyteB $ \ pB ->
@@ -128,6 +135,7 @@ instance Eq CBytes where
return (r == 0)
instance Ord CBytes where
+ {-# INLINE compare #-}
cbyteA `compare` cbyteB = unsafeDupablePerformIO $
withCBytes cbyteA $ \ pA ->
withCBytes cbyteB $ \ pB ->
diff --git a/Std/Data/Generics/Utils.hs b/Std/Data/Generics/Utils.hs
new file mode 100644
index 0000000..f262e19
--- /dev/null
+++ b/Std/Data/Generics/Utils.hs
@@ -0,0 +1,34 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE TypeOperators #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE KindSignatures #-}
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE FunctionalDependencies #-}
+
+
+module Std.Data.Generics.Utils
+ ( ProductSize(..)
+ , productSize
+ ) where
+
+import Data.Proxy
+import GHC.Generics
+import GHC.TypeNats
+import GHC.Exts (Proxy#, proxy#)
+
+-- | type class for calculating product size.
+class KnownNat (PSize f) => ProductSize (f :: * -> *) where
+ type PSize f :: Nat
+
+instance ProductSize (S1 s a) where
+ type PSize (S1 s a) = 1
+instance (KnownNat (PSize a + PSize b), ProductSize a, ProductSize b) => ProductSize (a :*: b) where
+ type PSize (a :*: b) = PSize a + PSize b
+
+productSize :: forall f. KnownNat (PSize f) => Proxy# f -> Int
+productSize _ = fromIntegral (natVal' (proxy# :: Proxy# (PSize f)))
diff --git a/Std/Data/JSON.hs b/Std/Data/JSON.hs
new file mode 100644
index 0000000..fdd3ffa
--- /dev/null
+++ b/Std/Data/JSON.hs
@@ -0,0 +1,160 @@
+{-|
+Module : Std.Data.JSON
+Description : Fast JSON serialization/deserialization
+Copyright : (c) Dong Han, 2019
+License : BSD
+Maintainer : winterland1989@gmail.com
+Stability : experimental
+Portability : non-portable
+
+Types and functions for working efficiently with JSON data, the design is quite similar to @aeson@ or @json@:
+
+ * Encode to bytes can be done directly via 'EncodeJSON'.
+ * Decode are split in two step, first we parse JSON doc into 'Value', then convert to haskell data via 'FromValue'.
+ * 'ToValue' are provided so that other doc formats can be easily supported, such as 'YAML'.
+
+= How to use this module.
+
+This module is intended to be used qualified, e.g.
+
+@
+ import qualified Std.Data.JSON as JSON
+ import Std.Data.JSON ((.:), ToValue(..), FromValue(..), EncodeJSON(..))
+@
+
+The easiest way to use the library is to define target data type, deriving 'GHC.Generics.Generic' and following instances:
+
+ * 'FromValue', which provides 'fromValue' to convert 'Value' to Haskell values.
+ * 'ToValue', which provides 'ToValue' to convert Haskell values to 'Value'.
+ * 'EncodeJSON', which provides 'encodeJSON' to directly write Haskell value into JSON bytes.
+
+The 'Generic' instances convert(encode) Haskell data with following rules:
+
+ * Constructors without payloads are encoded as JSON String, @data T = A | B@ are encoded as @\"A\"@ or @\"B\"@.
+ * Single constructor are ingored if there're payloads, @data T = T ...@, @A@ is ingored:
+
+ * Records are encoded as JSON object. @data T = T{k1 :: .., k2 :: ..}@ are encoded as @{\"k1\":...,\"k2\":...}@.
+ * Plain product are encoded as JSON array. @data T = T t1 t2@ are encoded as "[x1,x2]".
+ * Single field plain product are encoded as it is, i.e. @data T = T t@ are encoded as \"x\" just like its payload.
+
+ * Multiple constructors are convert to single key JSON object if there're payloads:
+
+ * Records are encoded as JSON object like above. @data T = A | B {k1 :: .., k2 :: ..}@ are encoded as
+ @{\"B\":{\"k1\":...,\"k2\":...}}@ in @B .. ..@ case, or @\"A\"@ in @A@ case.
+ * Plain product are similar to above, wrappered by an outer single-key object layer marking which constructor.
+
+These rules apply to user defined ADTs, but some built-in instances have different behaviour, namely:
+
+ * 'Maybe a' are encoded as JSON @null@ in 'Nothing' case, or directly encoded to its payload in 'Just' case.
+ * '[a]' are encoded to JSON array, including 'String', to get JSON string, use 'T.Text' or 'Str'.
+ * 'NonEmpty', 'Vector', 'PrimVector', 'HashSet', 'FlatSet', 'FlatIntSet' are also encoded to JSON array.
+ * 'HashMap', 'FlatMap', 'FlatIntMap' are encoded to JSON object.
+
+There're some modifying options if you providing a custom 'Settings', which allow you to modify field name or constructor
+name, but please don't produce control characters during your modification, since we assume field labels and constructor
+name won't contain them, thus we can save an extra escaping pass. To use constom 'Settings' just write:
+
+@
+ data T = T {foo :: Int, bar :: [Int]} deriving (Generic)
+ instance ToValue T where toValue = JSON.gToValue JSON.defaultSettings{ JSON.fieldFmt = JSON.snakeCase } . from
+
+ > JSON.toValue (T 0 [1,2,3])
+ Object [("foo",Number 0.0),("bar",Array [Number 1.0,Number 2.0,Number 3.0])]
+@
+
+= Write instances manually.
+
+You can write 'ToValue' and 'FromValue' instances by hand if the 'Generic' based one doesn't suit you. Here is an example
+similar to aeson's.
+
+@
+ import qualified Std.Data.Text as T
+ import qualified Std.Data.Vector as V
+ import qualified Std.Data.Builder as B
+
+ data Person = Person { name :: T.Text , age :: Int } deriving Show
+
+ instance FromValue Person where
+ fromValue = JSON.withFlatMapR "Person" $ \\ v -> Person
+ \<$\> v .: "name"
+ \<*\> v .: "age"
+
+ instance ToValue Person where
+ toValue (Person n a) = JSON.Object $ V.pack [("name", toValue n),("age", toValue a)]
+
+ instance EncodeJSON Person where
+ encodeJSON (Person n a) = B.curly $ do
+ B.quotes "name" >> B.colon >> encodeJSON n
+ B.comma
+ B.quotes "age" >> B.colon >> encodeJSON a
+
+ > toValue (Person "Joe" 12)
+ Object [("name",String "Joe"),("age",Number 12.0)]
+ > JSON.convert' @Person . JSON.Object $ V.pack [("name",JSON.String "Joe"),("age",JSON.Number 12.0)]
+ Right (Person {name = "Joe", age = 12})
+ > JSON.encodeText (Person "Joe" 12)
+ "{\"name\":\"Joe\",\"age\":12}"
+@
+
+The 'Value' type is different from aeson's one in that we use @Vector (Text, Value)@ to represent JSON objects, thus
+we can choose different strategies on key duplication, the lookup map type, etc. so instead of a single 'withObject',
+we provide 'withHashMap', 'withHashMapR', 'withObject' and 'withObjectR' which use different lookup map type, and different
+key order piority. Most of time 'FlatMap' is faster than 'HashMap' since we only use the lookup map once, the cost of
+constructing a 'HashMap' is higher. If you want to directly working on key-values, 'withKeyValues' provide key-values
+vector access.
+
+-}
+
+module Std.Data.JSON
+ ( -- * Encode & Decode
+ DecodeError
+ , decode, decode', decodeChunks, decodeChunks', encodeBytes, encodeText, encodeTextBuilder
+ -- * Value type
+ , Value(..)
+ -- * parse into JSON Value
+ , parseValue, parseValue', parseValueChunks, parseValueChunks'
+ -- * Convert 'Value' to Haskell data
+ , convert, convert', Converter(..), fail', (<?>), prependContext
+ , PathElement(..), ConvertError
+ , typeMismatch, fromNull, withBool, withScientific, withBoundedScientific, withRealFloat
+ , withBoundedIntegral, withText, withArray, withKeyValues, withFlatMap, withFlatMapR
+ , withHashMap, withHashMapR, withEmbeddedJSON
+ , (.:), (.:?), (.:!), convertField, convertFieldMaybe, convertFieldMaybe'
+ -- * FromValue, ToValue & EncodeJSON
+ , ToValue(..)
+ , FromValue(..)
+ , EncodeJSON(..)
+ , defaultSettings, Settings(..), snakeCase, trainCase
+ , gToValue, gFromValue, gEncodeJSON
+ ) where
+
+
+import Std.Data.JSON.Base
+import qualified Std.Data.Text as T
+import Data.Char
+
+
+-- | Snake casing a pascal cased constructor name or camel cased field name, words are always lower cased and separated by an
+-- underscore.
+snakeCase :: String -> T.Text
+{-# INLINE snakeCase #-}
+snakeCase = symbCase '_'
+
+-- | Train casing a pascal cased constructor name or camel cased field name, words are always lower cased and separated by
+-- a hyphen.
+trainCase :: String -> T.Text
+{-# INLINE trainCase #-}
+trainCase = symbCase '-'
+
+--------------------------------------------------------------------------------
+
+symbCase :: Char -> String -> T.Text
+{-# INLINE symbCase #-}
+symbCase sym = T.pack . go . applyFirst toLower
+ where
+ go [] = []
+ go (x:xs) | isUpper x = sym : toLower x : go xs
+ | otherwise = x : go xs
+
+ applyFirst _ [] = []
+ applyFirst f (x:xs) = f x: xs
diff --git a/Std/Data/JSON/Base.hs b/Std/Data/JSON/Base.hs
new file mode 100644
index 0000000..d860f5a
--- /dev/null
+++ b/Std/Data/JSON/Base.hs
@@ -0,0 +1,1302 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE DefaultSignatures #-}
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE DerivingStrategies #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE KindSignatures #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE QuantifiedConstraints #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE TypeOperators #-}
+
+{-|
+Module : Std.Data.JSON.Base
+Description : Fast JSON serialization/deserialization
+Copyright : (c) Dong Han, 2019
+License : BSD
+Maintainer : winterland1989@gmail.com
+Stability : experimental
+Portability : non-portable
+
+-- This module provides 'Converter' to convert 'Value' to haskell data types, and various tools to help
+-- user define 'FromValue', 'ToValue' and 'EncodeJSON' instance.
+-}
+
+module Std.Data.JSON.Base
+ ( -- * Encode & Decode
+ DecodeError
+ , decode, decode', decodeChunks, decodeChunks', encodeBytes, encodeText, encodeTextBuilder
+ -- * Re-export 'Value' type
+ , Value(..)
+ -- * parse into JSON Value
+ , JV.parseValue, JV.parseValue', JV.parseValueChunks, JV.parseValueChunks'
+ -- * Convert 'Value' to Haskell data
+ , convert, convert', Converter(..), fail', (<?>), prependContext
+ , PathElement(..), ConvertError
+ , typeMismatch, fromNull, withBool, withScientific, withBoundedScientific, withRealFloat
+ , withBoundedIntegral, withText, withArray, withKeyValues, withFlatMap, withFlatMapR
+ , withHashMap, withHashMapR, withEmbeddedJSON
+ , (.:), (.:?), (.:!), convertField, convertFieldMaybe, convertFieldMaybe'
+ -- * FromValue, ToValue & EncodeJSON
+ , defaultSettings, Settings(..)
+ , ToValue(..), GToValue(..)
+ , FromValue(..), GFromValue(..)
+ , EncodeJSON(..), GEncodeJSON(..)
+ -- * Helper classes for generics
+ , Field, GWriteFields(..), GMergeFields(..), GConstrToValue(..)
+ , LookupTable, GFromFields(..), GBuildLookup(..), GConstrFromValue(..)
+ , GAddPunctuation(..), GConstrEncodeJSON(..)
+ ) where
+
+import Control.Applicative
+import Control.DeepSeq
+import Control.Monad
+import qualified Control.Monad.Fail as Fail
+import Control.Monad.ST
+import Data.Data
+import Data.Fixed
+import Data.Functor.Compose
+import Data.Functor.Const
+import Data.Functor.Identity
+import Data.Functor.Product
+import Data.Functor.Sum
+import Data.Hashable
+import qualified Data.HashMap.Strict as HM
+import qualified Data.HashSet as HS
+import Data.Int
+import Data.List.NonEmpty (NonEmpty (..))
+import qualified Data.List.NonEmpty as NonEmpty
+import qualified Data.Monoid as Monoid
+import Data.Primitive.Types (Prim)
+import qualified Data.Primitive.SmallArray as A
+import Data.Proxy (Proxy (..))
+import Data.Ratio (Ratio, (%), numerator, denominator)
+import Data.Scientific (Scientific, base10Exponent, toBoundedInteger)
+import qualified Data.Scientific as Scientific
+import qualified Data.Semigroup as Semigroup
+import Data.Tagged (Tagged (..))
+import Data.Typeable
+import Data.Version (Version, parseVersion)
+import Data.Word
+import Data.Word
+import GHC.Exts (Proxy#, proxy#)
+import GHC.Generics
+import GHC.Natural
+import GHC.TypeNats
+import qualified Std.Data.Builder as B
+import Std.Data.Generics.Utils
+import Std.Data.JSON.Value (Value(..))
+import qualified Std.Data.JSON.Value as JV
+import qualified Std.Data.JSON.Builder as JB
+import qualified Std.Data.Parser as P
+import qualified Std.Data.Parser.Numeric as P
+import qualified Std.Data.Text as T
+import qualified Std.Data.TextBuilder as TB
+import qualified Std.Data.Vector.Base as V
+import qualified Std.Data.Vector.Extra as V
+import qualified Std.Data.Vector.FlatIntMap as FIM
+import qualified Std.Data.Vector.FlatIntSet as FIS
+import qualified Std.Data.Vector.FlatMap as FM
+import qualified Std.Data.Vector.FlatSet as FS
+import Text.ParserCombinators.ReadP (readP_to_S)
+
+--------------------------------------------------------------------------------
+
+-- There're two possible failures here:
+--
+-- * 'P.ParseError' is an error during parsing bytes to 'Value'.
+-- * 'ConvertError' is an error when converting 'Value' to target data type.
+type DecodeError = Either P.ParseError ConvertError
+
+-- | Decode a JSON doc, only trailing JSON whitespace are allowed.
+--
+decode' :: FromValue a => V.Bytes -> Either DecodeError a
+{-# INLINE decode' #-}
+decode' bs = case P.parse_ (JV.value <* JV.skipSpaces <* P.endOfInput) bs of
+ Left pErr -> Left (Left pErr)
+ Right v -> case convert fromValue v of
+ Left cErr -> Left (Right cErr)
+ Right r -> Right r
+
+-- | Decode a JSON bytes, return any trailing bytes.
+decode :: FromValue a => V.Bytes -> (V.Bytes, Either DecodeError a)
+{-# INLINE decode #-}
+decode bs = case P.parse JV.value bs of
+ (bs', Left pErr) -> (bs', Left (Left pErr))
+ (bs', Right v) -> case convert fromValue v of
+ Left cErr -> (bs', Left (Right cErr))
+ Right r -> (bs', Right r)
+
+-- | Decode JSON doc chunks, return trailing bytes.
+decodeChunks :: (FromValue a, Monad m) => m V.Bytes -> V.Bytes -> m (V.Bytes, Either DecodeError a)
+{-# INLINE decodeChunks #-}
+decodeChunks mb bs = do
+ mr <- (P.parseChunks JV.value mb bs)
+ case mr of
+ (bs', Left pErr) -> pure (bs', Left (Left pErr))
+ (bs', Right v) -> case convert fromValue v of
+ Left cErr -> pure (bs', Left (Right cErr))
+ Right r -> pure (bs', Right r)
+
+-- | Decode JSON doc chunks, consuming trailing JSON whitespaces (other trailing bytes are not allowed).
+decodeChunks' :: (FromValue a, Monad m) => m V.Bytes -> V.Bytes -> m (Either DecodeError a)
+{-# INLINE decodeChunks' #-}
+decodeChunks' mb bs = do
+ mr <- (P.parseChunks (JV.value <* JV.skipSpaces <* P.endOfInput) mb bs)
+ case mr of
+ (_, Left pErr) -> pure (Left (Left pErr))
+ (_, Right v) -> case convert fromValue v of
+ Left cErr -> pure (Left (Right cErr))
+ Right r -> pure (Right r)
+
+-- | Directly encode data to JSON bytes.
+encodeBytes :: EncodeJSON a => a -> V.Bytes
+{-# INLINE encodeBytes #-}
+encodeBytes = B.buildBytes . encodeJSON
+
+-- | Text version 'encodeBytes'.
+encodeText :: EncodeJSON a => a -> T.Text
+{-# INLINE encodeText #-}
+encodeText = TB.buildText . encodeTextBuilder
+
+-- | JSON Docs are guaranteed to be valid UTF-8 texts, so we provide this.
+encodeTextBuilder :: EncodeJSON a => a -> TB.TextBuilder ()
+{-# INLINE encodeTextBuilder #-}
+encodeTextBuilder = TB.unsafeFromBuilder . encodeJSON
+
+-- | Run a 'Converter' with input value.
+convert :: (a -> Converter r) -> a -> Either ConvertError r
+{-# INLINE convert #-}
+convert m v = runConverter (m v) (\ paths msg -> (Left (ConvertError paths msg))) Right
+
+-- | Run a 'Converter' with input value.
+convert' :: (FromValue a) => Value -> Either ConvertError a
+{-# INLINE convert' #-}
+convert' = convert fromValue
+
+--------------------------------------------------------------------------------
+
+-- | Elements of a (JSON) Value path used to describe the location of an error.
+data PathElement
+ = Key {-# UNPACK #-} !T.Text
+ -- ^ Path element of a key into an object,
+ -- \"object.key\".
+ | Index {-# UNPACK #-} !Int
+ -- ^ Path element of an index into an
+ -- array, \"array[index]\".
+ | Embedded
+ -- ^ path of a embedded (JSON) String
+ deriving (Eq, Show, Typeable, Ord, Generic, NFData)
+
+data ConvertError = ConvertError { errPath :: [PathElement], errMsg :: T.Text } deriving (Eq, Ord, Generic, NFData)
+
+instance Show ConvertError where
+ -- TODO use standard format
+ show (ConvertError paths msg) = T.unpack . TB.buildText $ do
+ "<"
+ mapM_ renderPath (reverse paths)
+ "> "
+ TB.text msg
+ where
+ renderPath (Index ix) = TB.char7 '[' >> TB.int ix >> TB.char7 ']'
+ renderPath (Key k) = TB.char7 '.' >> (TB.unsafeFromBuilder $ JB.string k)
+ renderPath Embedded = "<Embedded>"
+
+-- | 'Converter' for convert result from JSON 'Value'.
+--
+-- This is intended to be named differently from 'P.Parser' to clear confusions.
+newtype Converter a = Converter { runConverter :: forall r. ([PathElement] -> T.Text -> r) -> (a -> r) -> r }
+
+instance Functor Converter where
+ fmap f m = Converter (\ kf k -> runConverter m kf (k . f))
+ {-# INLINE fmap #-}
+
+instance Applicative Converter where
+ pure a = Converter (\ _ k -> k a)
+ {-# INLINE pure #-}
+ (Converter f) <*> (Converter g) = Converter (\ kf k ->
+ f kf (\ f' -> g kf (k . f')))
+ {-# INLINE (<*>) #-}
+
+instance Alternative Converter where
+ {-# INLINE (<|>) #-}
+ (Converter f) <|> (Converter g) = Converter (\ kf k -> f (\ _ _ -> g kf k) k)
+ {-# INLINE empty #-}
+ empty = fail' "Std.Data.JSON.Base(Alternative).empty"
+
+instance MonadPlus Converter where
+ mzero = empty
+ {-# INLINE mzero #-}
+ mplus = (<|>)
+ {-# INLINE mplus #-}
+
+instance Monad Converter where
+ (Converter f) >>= g = Converter (\ kf k ->
+ f kf (\ a -> runConverter (g a) kf k))
+ {-# INLINE (>>=) #-}
+ return = pure
+ {-# INLINE return #-}
+ fail = Fail.fail
+ {-# INLINE fail #-}
+
+instance Fail.MonadFail Converter where
+ {-# INLINE fail #-}
+ fail = fail' . T.pack
+
+-- | 'T.Text' version of 'fail'.
+fail' :: T.Text -> Converter a
+{-# INLINE fail' #-}
+fail' msg = Converter (\ kf _ -> kf [] msg)
+
+--------------------------------------------------------------------------------
+
+-- | Produce an error message like @converting XXX failed, expected XXX, encountered XXX@.
+typeMismatch :: T.Text -- ^ The name of the type you are trying to convert.
+ -> T.Text -- ^ The JSON value type you expecting to meet.
+ -> Value -- ^ The actual value encountered.
+ -> Converter a
+typeMismatch name expected v =
+ fail' $ T.concat ["converting ", name, " failed, expected ", expected, ", encountered ", actual]
+ where
+ actual = case v of
+ Object _ -> "Object"
+ Array _ -> "Array"
+ String _ -> "String"
+ Number _ -> "Number"
+ Bool _ -> "Boolean"
+ Null -> "Null"
+
+-- | Add JSON Path context to a converter
+--
+-- When converting a complex structure, it helps to annotate (sub)converters
+-- with context, so that if an error occurs, you can find its location.
+--
+-- > withFlatMapR "Person" $ \o ->
+-- > Person
+-- > <$> o .: "name" <?> Key "name"
+-- > <*> o .: "age" <?> Key "age"
+--
+-- (Standard methods like '(.:)' already do this.)
+--
+-- With such annotations, if an error occurs, you will get a JSON Path
+-- location of that error.
+(<?>) :: Converter a -> PathElement -> Converter a
+{-# INLINE (<?>) #-}
+(Converter p) <?> path = Converter (\ kf k -> p (kf . (path:)) k)
+infixl 9 <?>
+
+-- | Add context to a failure message, indicating the name of the structure
+-- being converted.
+--
+-- > prependContext "MyType" (fail "[error message]")
+-- > -- Error: "converting MyType failed, [error message]"
+prependContext :: T.Text -> Converter a -> Converter a
+{-# INLINE prependContext #-}
+prependContext name (Converter p) = Converter (\ kf k ->
+ p (\ paths msg -> kf paths (T.concat ["converting ", name, " failed, ", msg])) k)
+
+fromNull :: T.Text -> a -> Value -> Converter a
+{-# INLINE fromNull #-}
+fromNull _ a Null = pure a
+fromNull c _ v = typeMismatch c "Null" v
+
+withBool :: T.Text -> (Bool -> Converter a) -> Value -> Converter a
+{-# INLINE withBool #-}
+withBool _ f (Bool x) = f x
+withBool name f v = typeMismatch name "Bool" v
+
+-- | @'withScientific' name f value@ applies @f@ to the 'Scientific' number
+-- when @value@ is a 'Data.Aeson.Number' and fails using 'typeMismatch'
+-- otherwise.
+--
+-- /Warning/: If you are converting from a scientific to an unbounded
+-- type such as 'Integer' you may want to add a restriction on the
+-- size of the exponent (see 'withBoundedScientific') to prevent
+-- malicious input from filling up the memory of the target system.
+--
+-- ==== Error message example
+--
+-- > withScientific "MyType" f (String "oops")
+-- > -- Error: "converting MyType failed, expected Number, but encountered String"
+withScientific :: T.Text -> (Scientific -> Converter a) -> Value -> Converter a
+{-# INLINE withScientific #-}
+withScientific _ f (Number x) = f x
+withScientific name f v = typeMismatch name "Number" v
+
+-- | @'withRealFloat' try to convert floating number with following rules:
+--
+-- * Use @±Infinity@ to represent out of range numbers.
+-- * Convert @Null@ as @NaN@
+--
+withRealFloat :: RealFloat a => T.Text -> (a -> Converter r) -> Value -> Converter r
+{-# INLINE withRealFloat #-}
+withRealFloat _ f (Number s) = f (Scientific.toRealFloat s)
+withRealFloat _ f Null = f (0/0)
+withRealFloat name f v = typeMismatch name "Number or Null" v
+
+-- | @'withBoundedScientific' name f value@ applies @f@ to the 'Scientific' number
+-- when @value@ is a 'Number' with exponent less than or equal to 1024.
+withBoundedScientific :: T.Text -> (Scientific -> Converter a) -> Value -> Converter a
+{-# INLINE withBoundedScientific #-}
+withBoundedScientific name f (Number x)
+ | e <= 1024 = f x
+ | otherwise = fail' . TB.buildText $ do
+ "converting "
+ TB.text name
+ " failed, found a number with exponent "
+ TB.int e
+ ", but it must not be greater than 1024"
+ where e = base10Exponent x
+withBoundedScientific name f v = typeMismatch name "Number" v
+
+-- | @'withBoundedScientific' name f value@ applies @f@ to the 'Scientific' number
+-- when @value@ is a 'Number' and value is within @minBound ~ maxBound@.
+withBoundedIntegral :: (Bounded a, Integral a) => T.Text -> (a -> Converter r) -> Value -> Converter r
+{-# INLINE withBoundedIntegral #-}
+withBoundedIntegral name f (Number x) =
+ case toBoundedInteger x of
+ Just i -> f i
+ _ -> fail' . TB.buildText $ do
+ "converting "
+ TB.text name
+ "failed, value is either floating or will cause over or underflow "
+ TB.scientific x
+withBoundedIntegral name f v = typeMismatch name "Number" v
+
+withText :: T.Text -> (T.Text -> Converter a) -> Value -> Converter a
+{-# INLINE withText #-}
+withText _ f (String x) = f x
+withText name f v = typeMismatch name "String" v
+
+withArray :: T.Text -> (V.Vector Value -> Converter a) -> Value -> Converter a
+{-# INLINE withArray #-}
+withArray _ f (Array arr) = f arr
+withArray name f v = typeMismatch name "Array" v
+
+-- | Directly use 'Object' as key-values for further converting.
+withKeyValues :: T.Text -> (V.Vector (T.Text, Value) -> Converter a) -> Value -> Converter a
+{-# INLINE withKeyValues #-}
+withKeyValues _ f (Object kvs) = f kvs
+withKeyValues name f v = typeMismatch name "Object" v
+
+-- | Take a 'Object' as an 'FM.FlatMap T.Text Value', on key duplication prefer first one.
+withFlatMap :: T.Text -> (FM.FlatMap T.Text Value -> Converter a) -> Value -> Converter a
+{-# INLINE withFlatMap #-}
+withFlatMap _ f (Object obj) = f (FM.packVector obj)
+withFlatMap name f v = typeMismatch name "Object" v
+
+-- | Take a 'Object' as an 'FM.FlatMap T.Text Value', on key duplication prefer last one.
+withFlatMapR :: T.Text -> (FM.FlatMap T.Text Value -> Converter a) -> Value -> Converter a
+{-# INLINE withFlatMapR #-}
+withFlatMapR _ f (Object obj) = f (FM.packVectorR obj)
+withFlatMapR name f v = typeMismatch name "Object" v
+
+-- | Take a 'Object' as an 'HM.HashMap T.Text Value', on key duplication prefer first one.
+withHashMap :: T.Text -> (HM.HashMap T.Text Value -> Converter a) -> Value -> Converter a
+{-# INLINE withHashMap #-}
+withHashMap _ f (Object obj) = f (HM.fromList (V.unpackR obj))
+withHashMap name f v = typeMismatch name "Object" v
+
+-- | Take a 'Object' as an 'HM.HashMap T.Text Value', on key duplication prefer last one.
+withHashMapR :: T.Text -> (HM.HashMap T.Text Value -> Converter a) -> Value -> Converter a
+{-# INLINE withHashMapR #-}
+withHashMapR _ f (Object obj) = f (HM.fromList (V.unpack obj))
+withHashMapR name f v = typeMismatch name "Object" v
+
+-- | Decode a nested JSON-encoded string.
+withEmbeddedJSON :: T.Text -- ^ data type name
+ -> (Value -> Converter a) -- ^ a inner converter which will get the converted 'Value'.
+ -> Value -> Converter a -- a converter take a JSON String
+{-# INLINE withEmbeddedJSON #-}
+withEmbeddedJSON name innerConverter (String txt) = Converter (\ kf k ->
+ case decode' (T.getUTF8Bytes txt) of
+ Right v -> runConverter (innerConverter v) (\ paths msg -> kf (Embedded:paths) msg) k
+ Left (Left pErr) -> kf [] (T.intercalate ", " ("parsing embeded JSON failed ": pErr))
+ _ -> error "Std.JSON.Base: impossible, converting to Value should not fail")
+withEmbeddedJSON name _ v = typeMismatch name "String" v
+
+-- | Retrieve the value associated with the given key of an 'Object'.
+-- The result is 'empty' if the key is not present or the value cannot
+-- be converted to the desired type.
+--
+-- This accessor is appropriate if the key and value /must/ be present
+-- in an object for it to be valid. If the key and value are
+-- optional, use '.:?' instead.
+(.:) :: (FromValue a) => FM.FlatMap T.Text Value -> T.Text -> Converter a
+{-# INLINE (.:) #-}
+(.:) = convertField fromValue
+
+-- | Retrieve the value associated with the given key of an 'Object'. The
+-- result is 'Nothing' if the key is not present or if its value is 'Null',
+-- or 'empty' if the value cannot be converted to the desired type.
+--
+-- This accessor is most useful if the key and value can be absent
+-- from an object without affecting its validity. If the key and
+-- value are mandatory, use '.:' instead.
+(.:?) :: (FromValue a) => FM.FlatMap T.Text Value -> T.Text -> Converter (Maybe a)
+{-# INLINE (.:?) #-}
+(.:?) = convertFieldMaybe fromValue
+
+-- | Retrieve the value associated with the given key of an 'Object'.
+-- The result is 'Nothing' if the key is not present or 'empty' if the
+-- value cannot be converted to the desired type.
+--
+-- This differs from '.:?' by attempting to convert 'Null' the same as any
+-- other JSON value, instead of interpreting it as 'Nothing'.
+(.:!) :: (FromValue a) => FM.FlatMap T.Text Value -> T.Text -> Converter (Maybe a)
+{-# INLINE (.:!) #-}
+(.:!) = convertFieldMaybe' fromValue
+
+convertField :: (Value -> Converter a) -- ^ the field converter (value part of a key value pair)
+ -> FM.FlatMap T.Text Value -> T.Text -> Converter a
+{-# INLINE convertField #-}
+convertField p obj key = case FM.lookup key obj of
+ Just v -> p v <?> Key key
+ _ -> fail' (T.concat $ ["key ", key, " not present"])
+
+-- | Variant of '.:?' with explicit converter function.
+convertFieldMaybe :: (Value -> Converter a) -> FM.FlatMap T.Text Value -> T.Text -> Converter (Maybe a)
+{-# INLINE convertFieldMaybe #-}
+convertFieldMaybe p obj key = case FM.lookup key obj of
+ Just Null -> pure Nothing
+ Just v -> Just <$> p v <?> Key key
+ _ -> pure Nothing
+
+-- | Variant of '.:!' with explicit converter function.
+convertFieldMaybe' :: (Value -> Converter a) -> FM.FlatMap T.Text Value -> T.Text -> Converter (Maybe a)
+{-# INLINE convertFieldMaybe' #-}
+convertFieldMaybe' p obj key = case FM.lookup key obj of
+ Just v -> Just <$> p v <?> Key key
+ _ -> pure Nothing
+
+--------------------------------------------------------------------------------
+
+-- | Use @,@ as separator to connect list of builders.
+commaList' :: EncodeJSON a => [a] -> B.Builder ()
+{-# INLINE commaList' #-}
+commaList' = B.intercalateList B.comma encodeJSON
+
+-- | Use @,@ as separator to connect a vector of builders.
+commaVec' :: (EncodeJSON a, V.Vec v a) => v a -> B.Builder ()
+{-# INLINE commaVec' #-}
+commaVec' = B.intercalateVec B.comma encodeJSON
+
+--------------------------------------------------------------------------------
+
+-- | Generic encode/decode Settings
+--
+-- There should be no control charactors in formatted texts since we don't escaping those
+-- field names or constructor names ('defaultSettings' relys on Haskell's lexical property).
+-- Otherwise 'encodeJSON' will output illegal JSON string.
+data Settings = Settings
+ { fieldFmt :: String -> T.Text -- ^ format field labels
+ , constrFmt :: String -> T.Text -- ^ format constructor names.
+ }
+
+defaultSettings :: Settings
+defaultSettings = Settings T.pack T.pack
+
+--------------------------------------------------------------------------------
+-- ToValue
+--------------------------------------------------------------------------------
+
+-- | Typeclass for converting to JSON 'Value'.
+class ToValue a where
+ toValue :: a -> Value
+ default toValue :: (Generic a, GToValue (Rep a)) => a -> Value
+ toValue = gToValue defaultSettings . from
+
+class GToValue f where
+ gToValue :: Settings -> f a -> Value
+
+--------------------------------------------------------------------------------
+-- Selectors
+
+type family Field f where
+ Field (a :*: b) = Field a
+ Field (S1 (MetaSel Nothing u ss ds) f) = Value
+ Field (S1 (MetaSel (Just l) u ss ds) f) = (T.Text, Value)
+
+class GWriteFields f where
+ gWriteFields :: Settings -> A.SmallMutableArray s (Field f) -> Int -> f a -> ST s ()
+
+instance (ProductSize a, GWriteFields a, GWriteFields b, Field a ~ Field b) => GWriteFields (a :*: b) where
+ {-# INLINE gWriteFields #-}
+ gWriteFields s marr idx (a :*: b) = do
+ gWriteFields s marr idx a
+ gWriteFields s marr (idx + productSize (proxy# :: Proxy# a)) b
+
+instance (GToValue f) => GWriteFields (S1 (MetaSel Nothing u ss ds) f) where
+ {-# INLINE gWriteFields #-}
+ gWriteFields s marr idx (M1 x) = A.writeSmallArray marr idx (gToValue s x)
+
+instance (GToValue f, Selector (MetaSel (Just l) u ss ds)) => GWriteFields (S1 (MetaSel (Just l) u ss ds) f) where
+ {-# INLINE gWriteFields #-}
+ gWriteFields s marr idx m1@(M1 x) = A.writeSmallArray marr idx ((fieldFmt s) (selName m1), gToValue s x)
+
+instance (GToValue f, Selector (MetaSel (Just l) u ss ds)) => GToValue (S1 (MetaSel (Just l) u ss ds) f) where
+ {-# INLINE gToValue #-}
+ gToValue s m1@(M1 x) =
+ let k = fieldFmt s $ selName m1
+ v = gToValue s x
+ in Object (V.singleton (k, v))
+
+instance GToValue f => GToValue (S1 (MetaSel Nothing u ss ds) f) where
+ {-# INLINE gToValue #-}
+ gToValue s (M1 x) = gToValue s x
+
+instance ToValue a => GToValue (K1 i a) where
+ {-# INLINE gToValue #-}
+ gToValue s (K1 x) = toValue x
+
+class GMergeFields f where
+ gMergeFields :: Proxy# f -> A.SmallMutableArray s (Field f) -> ST s Value
+
+instance GMergeFields a => GMergeFields (a :*: b) where
+ {-# INLINE gMergeFields #-}
+ gMergeFields _ = gMergeFields (proxy# :: Proxy# a)
+
+instance GMergeFields (S1 (MetaSel Nothing u ss ds) f) where
+ {-# INLINE gMergeFields #-}
+ gMergeFields _ marr = do
+ arr <- A.unsafeFreezeSmallArray marr
+ let l = A.sizeofSmallArray arr
+ pure (Array (V.Vector arr 0 l))
+
+instance GMergeFields (S1 (MetaSel (Just l) u ss ds) f) where
+ {-# INLINE gMergeFields #-}
+ gMergeFields _ marr = do
+ arr <- A.unsafeFreezeSmallArray marr
+ let l = A.sizeofSmallArray arr
+ pure (Object (V.Vector arr 0 l))
+
+--------------------------------------------------------------------------------
+-- Constructors
+
+class GConstrToValue f where
+ gConstrToValue :: Bool -> Settings -> f a -> Value
+
+instance GConstrToValue V1 where
+ {-# INLINE gConstrToValue #-}
+ gConstrToValue _ _ _ = error "Std.Data.JSON.Base: empty data type"
+
+instance (GConstrToValue f, GConstrToValue g) => GConstrToValue (f :+: g) where
+ {-# INLINE gConstrToValue #-}
+ gConstrToValue _ s (L1 x) = gConstrToValue True s x
+ gConstrToValue _ s (R1 x) = gConstrToValue True s x
+
+-- | Constructor without payload, convert to String
+instance (Constructor c) => GConstrToValue (C1 c U1) where
+ {-# INLINE gConstrToValue #-}
+ gConstrToValue _ s (M1 _) = String . constrFmt s $ conName (undefined :: t c U1 a)
+
+-- | Constructor with a single payload
+instance (Constructor c, GToValue (S1 sc f)) => GConstrToValue (C1 c (S1 sc f)) where
+ {-# INLINE gConstrToValue #-}
+ gConstrToValue False s (M1 x) = gToValue s x
+ gConstrToValue True s (M1 x) =
+ let k = constrFmt s $ conName (undefined :: t c f a)
+ v = gToValue s x
+ in Object (V.singleton (k, v))
+
+-- | Constructor with multiple payloads
+instance (ProductSize (a :*: b), GWriteFields (a :*: b), GMergeFields (a :*: b), Constructor c)
+ => GConstrToValue (C1 c (a :*: b)) where
+ {-# INLINE gConstrToValue #-}
+ gConstrToValue False s (M1 x) = runST (do
+ marr <- A.newSmallArray (productSize (proxy# :: Proxy# (a :*: b))) undefined
+ gWriteFields s marr 0 x
+ gMergeFields (proxy# :: Proxy# (a :*: b)) marr)
+ gConstrToValue True s (M1 x) =
+ let k = constrFmt s $ conName (undefined :: t c f a)
+ v = runST (do
+ marr <- A.newSmallArray (productSize (proxy# :: Proxy# (a :*: b))) undefined
+ gWriteFields s marr 0 x
+ gMergeFields (proxy# :: Proxy# (a :*: b)) marr)
+ in Object (V.singleton (k, v))
+
+--------------------------------------------------------------------------------
+-- Data types
+instance GConstrToValue f => GToValue (D1 c f) where
+ {-# INLINE gToValue #-}
+ gToValue s (M1 x) = gConstrToValue False s x
+
+--------------------------------------------------------------------------------
+-- EncodeJSON
+--------------------------------------------------------------------------------
+
+class EncodeJSON a where
+ encodeJSON :: a -> B.Builder ()
+ default encodeJSON :: (Generic a, GEncodeJSON (Rep a)) => a -> B.Builder ()
+ encodeJSON = gEncodeJSON defaultSettings . from
+
+encodeJSONText :: EncodeJSON a => a -> TB.TextBuilder ()
+{-# INLINE encodeJSONText #-}
+encodeJSONText = TB.unsafeFromBuilder . encodeJSON
+
+class GEncodeJSON f where
+ gEncodeJSON :: Settings -> f a -> B.Builder ()
+
+--------------------------------------------------------------------------------
+-- Selectors
+
+instance (GEncodeJSON f, Selector (MetaSel (Just l) u ss ds)) => GEncodeJSON (S1 (MetaSel (Just l) u ss ds) f) where
+ {-# INLINE gEncodeJSON #-}
+ gEncodeJSON s m1@(M1 x) = (fieldFmt s $ selName m1) `JB.kv` gEncodeJSON s x
+
+instance GEncodeJSON f => GEncodeJSON (S1 (MetaSel Nothing u ss ds) f) where
+ {-# INLINE gEncodeJSON #-}
+ gEncodeJSON s (M1 x) = gEncodeJSON s x
+
+instance (GEncodeJSON a, GEncodeJSON b) => GEncodeJSON (a :*: b) where
+ {-# INLINE gEncodeJSON #-}
+ gEncodeJSON s (a :*: b) = gEncodeJSON s a >> B.comma >> gEncodeJSON s b
+
+instance EncodeJSON a => GEncodeJSON (K1 i a) where
+ {-# INLINE gEncodeJSON #-}
+ gEncodeJSON s (K1 x) = encodeJSON x
+
+class GAddPunctuation (f :: * -> *) where
+ gAddPunctuation :: Proxy# f -> B.Builder () -> B.Builder ()
+
+instance GAddPunctuation a => GAddPunctuation (a :*: b) where
+ {-# INLINE gAddPunctuation #-}
+ gAddPunctuation _ = gAddPunctuation (proxy# :: Proxy# a)
+
+instance GAddPunctuation (S1 (MetaSel Nothing u ss ds) f) where
+ {-# INLINE gAddPunctuation #-}
+ gAddPunctuation _ b = B.square b
+
+instance GAddPunctuation (S1 (MetaSel (Just l) u ss ds) f) where
+ {-# INLINE gAddPunctuation #-}
+ gAddPunctuation _ b = B.curly b
+
+--------------------------------------------------------------------------------
+-- Constructors
+
+class GConstrEncodeJSON f where
+ gConstrEncodeJSON :: Bool -> Settings -> f a -> B.Builder ()
+
+instance GConstrEncodeJSON V1 where
+ {-# INLINE gConstrEncodeJSON #-}
+ gConstrEncodeJSON _ _ _ = error "Std.Data.JSON.Base: empty data type"
+
+instance (GConstrEncodeJSON f, GConstrEncodeJSON g) => GConstrEncodeJSON (f :+: g) where
+ {-# INLINE gConstrEncodeJSON #-}
+ gConstrEncodeJSON _ s (L1 x) = gConstrEncodeJSON True s x
+ gConstrEncodeJSON _ s (R1 x) = gConstrEncodeJSON True s x
+
+-- | Constructor without payload, convert to String
+instance (Constructor c) => GConstrEncodeJSON (C1 c U1) where
+ {-# INLINE gConstrEncodeJSON #-}
+ -- There should be no chars need escaping in constructor name
+ gConstrEncodeJSON _ s (M1 _) = B.quotes $
+ B.text . constrFmt s $ conName (undefined :: t c U1 a)
+
+-- | Constructor with a single payload
+instance (Constructor c, GEncodeJSON (S1 sc f)) => GConstrEncodeJSON (C1 c (S1 sc f)) where
+ {-# INLINE gConstrEncodeJSON #-}
+ gConstrEncodeJSON False s (M1 x) = gEncodeJSON s x
+ gConstrEncodeJSON True s (M1 x) = B.curly $ do
+ (constrFmt s $ conName (undefined :: t c f a)) `JB.kv` gEncodeJSON s x
+
+-- | Constructor with multiple payloads
+instance (GEncodeJSON (a :*: b), GAddPunctuation (a :*: b), Constructor c)
+ => GConstrEncodeJSON (C1 c (a :*: b)) where
+ {-# INLINE gConstrEncodeJSON #-}
+ gConstrEncodeJSON False s (M1 x) = gAddPunctuation (proxy# :: Proxy# (a :*: b)) (gEncodeJSON s x)
+ gConstrEncodeJSON True s (M1 x) = B.curly $ do
+ (constrFmt s $ conName (undefined :: t c f a)) `JB.kv`
+ gAddPunctuation (proxy# :: Proxy# (a :*: b)) (gEncodeJSON s x)
+
+--------------------------------------------------------------------------------
+-- Data types
+instance GConstrEncodeJSON f => GEncodeJSON (D1 c f) where
+ {-# INLINE gEncodeJSON #-}
+ gEncodeJSON s (M1 x) = gConstrEncodeJSON False s x
+
+--------------------------------------------------------------------------------
+-- FromValue
+--------------------------------------------------------------------------------
+
+class FromValue a where
+ fromValue :: Value -> Converter a
+ default fromValue :: (Generic a, GFromValue (Rep a)) => Value -> Converter a
+ fromValue v = to <$> gFromValue defaultSettings v
+
+class GFromValue f where
+ gFromValue :: Settings -> Value -> Converter (f a)
+
+--------------------------------------------------------------------------------
+-- Selectors
+
+type family LookupTable f where
+ LookupTable (a :*: b) = LookupTable a
+ LookupTable (S1 (MetaSel Nothing u ss ds) f) = V.Vector Value
+ LookupTable (S1 (MetaSel (Just l) u ss ds) f) = FM.FlatMap T.Text Value
+
+class GFromFields f where
+ gFromFields :: Settings -> LookupTable f -> Int -> Converter (f a)
+
+instance (ProductSize a, GFromFields a, GFromFields b, LookupTable a ~ LookupTable b)
+ => GFromFields (a :*: b) where
+ {-# INLINE gFromFields #-}
+ gFromFields s v idx = do
+ a <- gFromFields s v idx
+ b <- gFromFields s v (idx + productSize (proxy# :: Proxy# a))
+ pure (a :*: b)
+
+instance (GFromValue f) => GFromFields (S1 (MetaSel Nothing u ss ds) f) where
+ {-# INLINE gFromFields #-}
+ gFromFields s v idx = do
+ v' <- V.unsafeIndexM v idx
+ M1 <$> gFromValue s v' <?> Index idx
+
+instance (GFromValue f, Selector (MetaSel (Just l) u ss ds)) => GFromFields (S1 (MetaSel (Just l) u ss ds) f) where
+ {-# INLINE gFromFields #-}
+ gFromFields s v _ = do
+ case FM.lookup fn v of
+ Just v' -> M1 <$> gFromValue s v' <?> Key fn
+ _ -> fail' ("Std.Data.JSON.Base: missing field " <> fn)
+ where
+ fn = (fieldFmt s) (selName (undefined :: S1 (MetaSel (Just l) u ss ds) f a))
+
+instance GFromValue f => GFromValue (S1 (MetaSel Nothing u ss ds) f) where
+ {-# INLINE gFromValue #-}
+ gFromValue s x = M1 <$> gFromValue s x
+
+instance (GFromValue f, Selector (MetaSel (Just l) u ss ds)) => GFromValue (S1 (MetaSel (Just l) u ss ds) f) where
+ {-# INLINE gFromValue #-}
+ gFromValue s (Object v) = do
+ case FM.lookup fn (FM.packVectorR v) of
+ Just v' -> M1 <$> gFromValue s v' <?> Key fn
+ _ -> fail' ("Std.Data.JSON.Base: missing field " <> fn)
+ where fn = (fieldFmt s) (selName (undefined :: S1 (MetaSel (Just l) u ss ds) f a))
+ gFromValue s v = typeMismatch ("field " <> fn) "Object" v <?> Key fn
+ where fn = (fieldFmt s) (selName (undefined :: S1 (MetaSel (Just l) u ss ds) f a))
+
+instance FromValue a => GFromValue (K1 i a) where
+ {-# INLINE gFromValue #-}
+ gFromValue s x = K1 <$> fromValue x
+
+class GBuildLookup f where
+ gBuildLookup :: Proxy# f -> Int -> T.Text -> Value -> Converter (LookupTable f)
+
+instance (GBuildLookup a, GBuildLookup b) => GBuildLookup (a :*: b) where
+ {-# INLINE gBuildLookup #-}
+ gBuildLookup _ siz = gBuildLookup (proxy# :: Proxy# a) siz
+
+instance GBuildLookup (S1 (MetaSel Nothing u ss ds) f) where
+ {-# INLINE gBuildLookup #-}
+ gBuildLookup _ siz name (Array v)
+ | siz' /= siz = fail' . TB.buildText $ do
+ "converting "
+ TB.text name
+ " failed, product size mismatch, expected "
+ TB.int siz
+ ", get"
+ TB.int siz'
+ | otherwise = pure v
+ where siz' = V.length v
+ gBuildLookup _ _ name x = typeMismatch name "Array" x
+
+instance GBuildLookup (S1 ((MetaSel (Just l) u ss ds)) f) where
+ {-# INLINE gBuildLookup #-}
+ gBuildLookup _ siz name (Object v)
+ | siz' /= siz = fail' . TB.buildText $ do
+ "converting "
+ TB.text name
+ " failed, product size mismatch, expected "
+ TB.int siz
+ ", get"
+ TB.int siz'
+ | otherwise = pure m
+ where siz' = FM.size m
+ m = FM.packVectorR v
+ gBuildLookup _ _ name x = typeMismatch name "Object" x
+
+--------------------------------------------------------------------------------
+-- Constructors
+
+class GConstrFromValue f where
+ gConstrFromValue :: Bool -> Settings -> Value -> Converter (f a)
+
+instance GConstrFromValue V1 where
+ {-# INLINE gConstrFromValue #-}
+ gConstrFromValue _ _ _ = error "Std.Data.JSON.Base: empty data type"
+
+instance (GConstrFromValue f, GConstrFromValue g) => GConstrFromValue (f :+: g) where
+ {-# INLINE gConstrFromValue #-}
+ gConstrFromValue _ s x = (L1 <$> gConstrFromValue True s x) <|> (R1 <$> gConstrFromValue True s x)
+
+-- | Constructor without payload, convert to String
+instance (Constructor c) => GConstrFromValue (C1 c U1) where
+ {-# INLINE gConstrFromValue #-}
+ gConstrFromValue _ s (String x)
+ | cn == x = pure (M1 U1)
+ | otherwise = fail' . T.concat $ ["converting ", cn', "failed, unknown constructor name ", x]
+ where cn = constrFmt s $ conName (undefined :: t c U1 a)
+ cn' = T.pack $ conName (undefined :: t c U1 a)
+ gConstrFromValue _ _ v = typeMismatch cn' "String" v
+ where cn' = T.pack $ conName (undefined :: t c U1 a)
+
+-- | Constructor with a single payload
+instance (Constructor c, GFromValue (S1 sc f)) => GConstrFromValue (C1 c (S1 sc f)) where
+ {-# INLINE gConstrFromValue #-}
+ gConstrFromValue False s x = M1 <$> gFromValue s x
+ gConstrFromValue True s x = case x of
+ Object v -> case V.indexM v 0 of
+ Just (k, v') | k == cn -> M1 <$> gFromValue s v' <?> Key cn
+ _ -> fail' .T.concat $ ["converting ", cn', " failed, constructor not found"]
+ _ -> typeMismatch cn' "Object" x
+ where cn = constrFmt s $ conName (undefined :: t c f a)
+ cn' = T.pack $ conName (undefined :: t c f a)
+
+-- | Constructor with multiple payloads
+instance (ProductSize (a :*: b), GFromFields (a :*: b), GBuildLookup (a :*: b), Constructor c)
+ => GConstrFromValue (C1 c (a :*: b)) where
+ {-# INLINE gConstrFromValue #-}
+ gConstrFromValue False s x = do
+ t <- gBuildLookup p (productSize p) cn' x
+ M1 <$> gFromFields s t 0
+ where cn' = T.pack $ conName (undefined :: t c f a)
+ p = proxy# :: Proxy# (a :*: b)
+ gConstrFromValue True s x = case x of
+ Object v -> case V.indexM v 0 of
+ Just (k, v') | k == cn -> do t <- gBuildLookup p (productSize p) cn' v'
+ M1 <$> gFromFields s t 0
+ _ -> fail' .T.concat $ ["converting ", cn', " failed, constructor not found"]
+ _ -> typeMismatch cn' "Object" x
+ where cn = constrFmt s $ conName (undefined :: t c f a)
+ cn' = T.pack $ conName (undefined :: t c f a)
+ p = proxy# :: Proxy# (a :*: b)
+
+--------------------------------------------------------------------------------
+-- Data types
+instance GConstrFromValue f => GFromValue (D1 c f) where
+ {-# INLINE gFromValue #-}
+ gFromValue s x = M1 <$> gConstrFromValue False s x
+
+--------------------------------------------------------------------------------
+-- Built-in Instances
+--------------------------------------------------------------------------------
+-- | Use 'Null' as @Proxy a@
+instance FromValue (Proxy a) where {{-# INLINE fromValue #-}; fromValue = fromNull "Proxy" Proxy;}
+instance ToValue (Proxy a) where {{-# INLINE toValue #-}; toValue _ = Null;}
+instance EncodeJSON (Proxy a) where {{-# INLINE encodeJSON #-}; encodeJSON _ = "null";}
+
+instance FromValue Value where {{-# INLINE fromValue #-}; fromValue = pure;}
+instance ToValue Value where { {-# INLINE toValue #-}; toValue = id; }
+instance EncodeJSON Value where { {-# INLINE encodeJSON #-}; encodeJSON = JB.value; }
+
+instance FromValue T.Text where {{-# INLINE fromValue #-}; fromValue = withText "Text" pure;}
+instance ToValue T.Text where {{-# INLINE toValue #-}; toValue = String;}
+instance EncodeJSON T.Text where {{-# INLINE encodeJSON #-}; encodeJSON = JB.string;}
+
+instance FromValue TB.Str where
+ {-# INLINE fromValue #-}
+ fromValue = withText "Str" (pure . TB.Str . T.unpack)
+instance ToValue TB.Str where
+ {-# INLINE toValue #-}
+ toValue = String . T.pack . TB.chrs
+instance EncodeJSON TB.Str where
+ {-# INLINE encodeJSON #-}
+ encodeJSON = JB.string . T.pack . TB.chrs
+
+instance FromValue Scientific where {{-# INLINE fromValue #-}; fromValue = withScientific "Scientific" pure;}
+instance ToValue Scientific where {{-# INLINE toValue #-}; toValue = Number;}
+instance EncodeJSON Scientific where {{-# INLINE encodeJSON #-}; encodeJSON = B.scientific;}
+
+-- | default instance prefer later key
+instance FromValue a => FromValue (FM.FlatMap T.Text a) where
+ {-# INLINE fromValue #-}
+ fromValue = withFlatMapR "Std.Data.Vector.FlatMap.FlatMap"
+ (FM.traverseWithKey $ \ k v -> fromValue v <?> Key k)
+instance ToValue a => ToValue (FM.FlatMap T.Text a) where
+ {-# INLINE toValue #-}
+ toValue = Object . FM.sortedKeyValues . FM.map' toValue
+instance EncodeJSON a => EncodeJSON (FM.FlatMap T.Text a) where
+ {-# INLINE encodeJSON #-}
+ encodeJSON = JB.object' encodeJSON . FM.sortedKeyValues
+
+instance (Ord a, FromValue a) => FromValue (FS.FlatSet a) where
+ {-# INLINE fromValue #-}
+ fromValue = withArray "Std.Data.Vector.FlatSet.FlatSet" $ \ v ->
+ FS.packRN (V.length v) <$>
+ (zipWithM (\ k v -> fromValue v <?> Index k) [0..] (V.unpack v))
+instance ToValue a => ToValue (FS.FlatSet a) where
+ {-# INLINE toValue #-}
+ toValue = Array . V.map' toValue . FS.sortedValues
+instance EncodeJSON a => EncodeJSON (FS.FlatSet a) where
+ {-# INLINE encodeJSON #-}
+ encodeJSON = JB.array' encodeJSON . FS.sortedValues
+
+-- | default instance prefer later key
+instance FromValue a => FromValue (HM.HashMap T.Text a) where
+ {-# INLINE fromValue #-}
+ fromValue = withHashMapR "Data.HashMap.HashMap"
+ (HM.traverseWithKey $ \ k v -> fromValue v <?> Key k)
+instance ToValue a => ToValue (HM.HashMap T.Text a) where
+ {-# INLINE toValue #-}
+ toValue = Object . V.pack . HM.toList . HM.map toValue
+instance EncodeJSON a => EncodeJSON (HM.HashMap T.Text a) where
+ {-# INLINE encodeJSON #-}
+ encodeJSON = B.curly . B.intercalateList B.comma (\ (k, v) -> k `JB.kv'` encodeJSON v) . HM.toList
+
+instance FromValue a => FromValue (FIM.FlatIntMap a) where
+ {-# INLINE fromValue #-}
+ fromValue = withFlatMapR "Std.Data.Vector.FlatIntMap.FlatIntMap" $ \ m ->
+ let kvs = FM.sortedKeyValues m
+ in FIM.packVectorR <$> (forM kvs $ \ (k, v) -> do
+ case P.parse_ P.int (T.getUTF8Bytes k) of
+ Right k' -> do
+ v' <- fromValue v <?> Key k
+ return (V.IPair k' v')
+ _ -> fail' ("converting Std.Data.Vector.FlatIntMap.FlatIntMap failed, unexpected key " <> k))
+instance ToValue a => ToValue (FIM.FlatIntMap a) where
+ {-# INLINE toValue #-}
+ toValue = Object . V.map' toKV . FIM.sortedKeyValues
+ where toKV (V.IPair i x) = let !k = TB.buildText (TB.int i)
+ !v = toValue x
+ in (k, v)
+instance EncodeJSON a => EncodeJSON (FIM.FlatIntMap a) where
+ {-# INLINE encodeJSON #-}
+ encodeJSON = B.curly . B.intercalateVec B.comma (\ (V.IPair i x) -> do
+ B.quotes (B.int i)
+ B.colon
+ encodeJSON x) . FIM.sortedKeyValues
+
+instance FromValue FIS.FlatIntSet where
+ {-# INLINE fromValue #-}
+ fromValue = withArray "Std.Data.Vector.FlatIntSet.FlatIntSet" $ \ v ->
+ FIS.packRN (V.length v) <$> zipWithM (\ k v -> fromValue v <?> Index k) [0..] (V.unpack v)
+instance ToValue FIS.FlatIntSet where
+ {-# INLINE toValue #-}
+ toValue = toValue . FIS.sortedValues
+instance EncodeJSON FIS.FlatIntSet where
+ {-# INLINE encodeJSON #-}
+ encodeJSON = encodeJSON . FIS.sortedValues
+
+instance FromValue a => FromValue (V.Vector a) where
+ {-# INLINE fromValue #-}
+ fromValue = withArray "Std.Data.Vector.Vector"
+ (V.traverseWithIndex $ \ k v -> fromValue v <?> Index k)
+instance ToValue a => ToValue (V.Vector a) where
+ {-# INLINE toValue #-}
+ toValue = Array . V.map toValue
+instance EncodeJSON a => EncodeJSON (V.Vector a) where
+ {-# INLINE encodeJSON #-}
+ encodeJSON = B.square . commaVec'
+
+instance (Prim a, FromValue a) => FromValue (V.PrimVector a) where
+ {-# INLINE fromValue #-}
+ fromValue = withArray "Std.Data.Vector.PrimVector"
+ (V.traverseWithIndex $ \ k v -> fromValue v <?> Index k)
+instance (Prim a, ToValue a) => ToValue (V.PrimVector a) where
+ {-# INLINE toValue #-}
+ toValue = Array . V.map toValue
+instance (Prim a, EncodeJSON a) => EncodeJSON (V.PrimVector a) where
+ {-# INLINE encodeJSON #-}
+ encodeJSON = B.square . commaVec'
+
+instance (Eq a, Hashable a, FromValue a) => FromValue (HS.HashSet a) where
+ {-# INLINE fromValue #-}
+ fromValue = withArray "Std.Data.Vector.FlatSet.FlatSet" $ \ v ->
+ HS.fromList <$>
+ (zipWithM (\ k v -> fromValue v <?> Index k) [0..] (V.unpack v))
+instance (ToValue a) => ToValue (HS.HashSet a) where
+ {-# INLINE toValue #-}
+ toValue = toValue . HS.toList
+instance (EncodeJSON a) => EncodeJSON (HS.HashSet a) where
+ {-# INLINE encodeJSON #-}
+ encodeJSON = encodeJSON . HS.toList
+
+instance FromValue a => FromValue [a] where
+ {-# INLINE fromValue #-}
+ fromValue = withArray "[a]" $ \ v ->
+ zipWithM (\ k v -> fromValue v <?> Index k) [0..] (V.unpack v)
+instance ToValue a => ToValue [a] where
+ {-# INLINE toValue #-}
+ toValue = Array . V.pack . map toValue
+instance EncodeJSON a => EncodeJSON [a] where
+ {-# INLINE encodeJSON #-}
+ encodeJSON = B.square . commaList'
+
+instance FromValue a => FromValue (NonEmpty a) where
+ {-# INLINE fromValue #-}
+ fromValue = withArray "NonEmpty" $ \ v -> do
+ l <- zipWithM (\ k v -> fromValue v <?> Index k) [0..] (V.unpack v)
+ case l of (x:xs) -> pure (x :| xs)
+ _ -> fail' "unexpected empty array"
+instance (ToValue a) => ToValue (NonEmpty a) where
+ {-# INLINE toValue #-}
+ toValue = toValue . NonEmpty.toList
+instance (EncodeJSON a) => EncodeJSON (NonEmpty a) where
+ {-# INLINE encodeJSON #-}
+ encodeJSON = encodeJSON . NonEmpty.toList
+
+instance FromValue Bool where {{-# INLINE fromValue #-}; fromValue = withBool "Bool" pure;}
+instance ToValue Bool where {{-# INLINE toValue #-}; toValue = Bool; }
+instance EncodeJSON Bool where {{-# INLINE encodeJSON #-}; encodeJSON True = "true"; encodeJSON _ = "false";}
+
+instance FromValue Char where
+ {-# INLINE fromValue #-}
+ fromValue = withText "Char" $ \ t ->
+ case T.headMaybe t of
+ Just c -> pure c
+ _ -> fail' (T.concat ["converting Char failed, expected a string of length 1"])
+instance ToValue Char where
+ {-# INLINE toValue #-}
+ toValue = String . T.singleton
+instance EncodeJSON Char where
+ {-# INLINE encodeJSON #-}
+ encodeJSON = JB.string . T.singleton
+
+
+instance FromValue Double where {{-# INLINE fromValue #-}; fromValue = withRealFloat "Double" pure;}
+instance FromValue Float where {{-# INLINE fromValue #-}; fromValue = withRealFloat "Double" pure;}
+instance ToValue Float where {{-# INLINE toValue #-}; toValue = Number . P.floatToScientific;}
+instance ToValue Double where {{-# INLINE toValue #-}; toValue = Number . P.doubleToScientific;}
+instance EncodeJSON Float where {{-# INLINE encodeJSON #-}; encodeJSON = B.float;}
+instance EncodeJSON Double where {{-# INLINE encodeJSON #-}; encodeJSON = B.double;}
+
+instance FromValue Int where {{-# INLINE fromValue #-}; fromValue = withBoundedIntegral "Int" pure;}
+instance FromValue Int8 where {{-# INLINE fromValue #-}; fromValue = withBoundedIntegral "Int8" pure;}
+instance FromValue Int16 where {{-# INLINE fromValue #-}; fromValue = withBoundedIntegral "Int16" pure;}
+instance FromValue Int32 where {{-# INLINE fromValue #-}; fromValue = withBoundedIntegral "Int32" pure;}
+instance FromValue Int64 where {{-# INLINE fromValue #-}; fromValue = withBoundedIntegral "Int64" pure;}
+instance FromValue Word where {{-# INLINE fromValue #-}; fromValue = withBoundedIntegral "Word" pure;}
+instance FromValue Word8 where {{-# INLINE fromValue #-}; fromValue = withBoundedIntegral "Word8" pure;}
+instance FromValue Word16 where {{-# INLINE fromValue #-}; fromValue = withBoundedIntegral "Word16" pure;}
+instance FromValue Word32 where {{-# INLINE fromValue #-}; fromValue = withBoundedIntegral "Word32" pure;}
+instance FromValue Word64 where {{-# INLINE fromValue #-}; fromValue = withBoundedIntegral "Word64" pure;}
+instance ToValue Int where {{-# INLINE toValue #-}; toValue = Number . fromIntegral;}
+instance ToValue Int8 where {{-# INLINE toValue #-}; toValue = Number . fromIntegral;}
+instance ToValue Int16 where {{-# INLINE toValue #-}; toValue = Number . fromIntegral;}
+instance ToValue Int32 where {{-# INLINE toValue #-}; toValue = Number . fromIntegral;}
+instance ToValue Int64 where {{-# INLINE toValue #-}; toValue = Number . fromIntegral;}
+instance ToValue Word where {{-# INLINE toValue #-}; toValue = Number . fromIntegral;}
+instance ToValue Word8 where {{-# INLINE toValue #-}; toValue = Number . fromIntegral;}
+instance ToValue Word16 where {{-# INLINE toValue #-}; toValue = Number . fromIntegral;}
+instance ToValue Word32 where {{-# INLINE toValue #-}; toValue = Number . fromIntegral;}
+instance ToValue Word64 where {{-# INLINE toValue #-}; toValue = Number . fromIntegral;}
+instance EncodeJSON Int where {{-# INLINE encodeJSON #-}; encodeJSON = B.int;}
+instance EncodeJSON Int8 where {{-# INLINE encodeJSON #-}; encodeJSON = B.int;}
+instance EncodeJSON Int16 where {{-# INLINE encodeJSON #-}; encodeJSON = B.int;}
+instance EncodeJSON Int32 where {{-# INLINE encodeJSON #-}; encodeJSON = B.int;}
+instance EncodeJSON Int64 where {{-# INLINE encodeJSON #-}; encodeJSON = B.int;}
+instance EncodeJSON Word where {{-# INLINE encodeJSON #-}; encodeJSON = B.int;}
+instance EncodeJSON Word8 where {{-# INLINE encodeJSON #-}; encodeJSON = B.int;}
+instance EncodeJSON Word16 where {{-# INLINE encodeJSON #-}; encodeJSON = B.int;}
+instance EncodeJSON Word32 where {{-# INLINE encodeJSON #-}; encodeJSON = B.int;}
+instance EncodeJSON Word64 where {{-# INLINE encodeJSON #-}; encodeJSON = B.int;}
+
+-- | This instance includes a bounds check to prevent maliciously large inputs to fill up the memory of the target system. You can newtype Scientific and provide your own instance using 'withScientific' if you want to allow larger inputs.
+instance FromValue Integer where
+ {-# INLINE fromValue #-}
+ fromValue = withBoundedScientific "Integer" $ \ n ->
+ case Scientific.floatingOrInteger n :: Either Double Integer of
+ Right x -> pure x
+ Left _ -> fail' . TB.buildText $ do
+ "converting Integer failed, unexpected floating number "
+ TB.scientific n
+instance ToValue Integer where
+ {-# INLINE toValue #-}
+ toValue = Number . fromIntegral
+instance EncodeJSON Integer where
+ {-# INLINE encodeJSON #-}
+ encodeJSON = B.integer
+
+instance FromValue Natural where
+ {-# INLINE fromValue #-}
+ fromValue = withBoundedScientific "Natural" $ \ n ->
+ if n < 0
+ then fail' . TB.buildText $ do
+ "converting Natural failed, unexpected negative number "
+ TB.scientific n
+ else case Scientific.floatingOrInteger n :: Either Double Natural of
+ Right x -> pure x
+ Left _ -> fail' . TB.buildText $ do
+ "converting Natural failed, unexpected floating number "
+ TB.scientific n
+instance ToValue Natural where
+ {-# INLINE toValue #-}
+ toValue = Number . fromIntegral
+instance EncodeJSON Natural where
+ {-# INLINE encodeJSON #-}
+ encodeJSON = B.integer . fromIntegral
+
+instance FromValue Ordering where
+ fromValue = withText "Ordering" $ \ s ->
+ case s of
+ "LT" -> pure LT
+ "EQ" -> pure EQ
+ "GT" -> pure GT
+ _ -> fail' . T.concat $ ["converting Ordering failed, unexpected ",
+ s, " expected \"LT\", \"EQ\", or \"GT\""]
+instance ToValue Ordering where
+ {-# INLINE toValue #-}
+ toValue LT = String "LT"
+ toValue EQ = String "EQ"
+ toValue GT = String "GT"
+instance EncodeJSON Ordering where
+ {-# INLINE encodeJSON #-}
+ encodeJSON LT = "LT"
+ encodeJSON EQ = "EQ"
+ encodeJSON GT = "GT"
+
+instance FromValue () where
+ {-# INLINE fromValue #-}
+ fromValue = withArray "()" $ \ v ->
+ if V.null v
+ then pure ()
+ else fail' "converting () failed, expected an empty array"
+instance ToValue () where
+ {-# INLINE toValue #-}
+ toValue () = Array V.empty
+instance EncodeJSON () where
+ {-# INLINE encodeJSON #-}
+ encodeJSON () = "[]"
+
+instance FromValue Version where
+ {-# INLINE fromValue #-}
+ fromValue = withText "Version" (go . readP_to_S parseVersion . T.unpack)
+ where
+ go [(v,[])] = pure v
+ go (_ : xs) = go xs
+ go _ = fail "converting Version failed"
+instance ToValue Version where
+ {-# INLINE toValue #-}
+ toValue = String . T.pack . show
+instance EncodeJSON Version where
+ {-# INLINE encodeJSON #-}
+ encodeJSON = B.string7 . show
+
+instance FromValue a => FromValue (Maybe a) where
+ {-# INLINE fromValue #-}
+ fromValue Null = pure Nothing
+ fromValue v = Just <$> fromValue v
+instance ToValue a => ToValue (Maybe a) where
+ {-# INLINE toValue #-}
+ toValue Nothing = Null
+ toValue (Just x) = toValue x
+instance EncodeJSON a => EncodeJSON (Maybe a) where
+ {-# INLINE encodeJSON #-}
+ encodeJSON Nothing = "null"
+ encodeJSON (Just x) = encodeJSON x
+
+-- | This instance includes a bounds check to prevent maliciously large inputs to fill up the memory of the target system. You can newtype Ratio and provide your own instance using 'withScientific' if you want to allow larger inputs.
+instance (FromValue a, Integral a) => FromValue (Ratio a) where
+ {-# INLINE fromValue #-}
+ fromValue = withFlatMapR "Rational" $ \obj -> do
+ numerator <- obj .: "numerator"
+ denominator <- obj .: "denominator"
+ if denominator == 0
+ then fail' "Ratio denominator was 0"
+ else pure (numerator % denominator)
+instance (ToValue a, Integral a) => ToValue (Ratio a) where
+ {-# INLINE toValue #-}
+ toValue x = Object (V.pack [("numerator", n), ("denominator", d)])
+ where !n = toValue (numerator x)
+ !d = toValue (denominator x)
+instance (EncodeJSON a, Integral a) => EncodeJSON (Ratio a) where
+ {-# INLINE encodeJSON #-}
+ encodeJSON x =
+ B.curly $ ("\"numerator\"" >> B.colon >> encodeJSON (numerator x))
+ >> B.comma >> ("\"denominator\"" >> B.colon >> encodeJSON (denominator x))
+
+-- | This instance includes a bounds check to prevent maliciously large inputs to fill up the memory of the target system. You can newtype Fixed and provide your own instance using 'withScientific' if you want to allow larger inputs.
+instance HasResolution a => FromValue (Fixed a) where
+ {-# INLINE fromValue #-}
+ fromValue = withBoundedScientific "Fixed" (pure . realToFrac)
+instance HasResolution a => ToValue (Fixed a) where
+ {-# INLINE toValue #-}
+ toValue = Number . realToFrac
+instance HasResolution a => EncodeJSON (Fixed a) where
+ {-# INLINE encodeJSON #-}
+ encodeJSON = B.scientific . realToFrac
+
+--------------------------------------------------------------------------------
+
+deriving newtype instance FromValue (f (g a)) => FromValue (Compose f g a)
+deriving newtype instance FromValue a => FromValue (Semigroup.Min a)
+deriving newtype instance FromValue a => FromValue (Semigroup.Max a)
+deriving newtype instance FromValue a => FromValue (Semigroup.First a)
+deriving newtype instance FromValue a => FromValue (Semigroup.Last a)
+deriving newtype instance FromValue a => FromValue (Semigroup.WrappedMonoid a)
+deriving newtype instance FromValue a => FromValue (Semigroup.Dual a)
+deriving newtype instance FromValue a => FromValue (Monoid.First a)
+deriving newtype instance FromValue a => FromValue (Monoid.Last a)
+deriving newtype instance FromValue a => FromValue (Identity a)
+deriving newtype instance FromValue a => FromValue (Const a b)
+deriving newtype instance FromValue b => FromValue (Tagged a b)
+
+deriving newtype instance ToValue (f (g a)) => ToValue (Compose f g a)
+deriving newtype instance ToValue a => ToValue (Semigroup.Min a)
+deriving newtype instance ToValue a => ToValue (Semigroup.Max a)
+deriving newtype instance ToValue a => ToValue (Semigroup.First a)
+deriving newtype instance ToValue a => ToValue (Semigroup.Last a)
+deriving newtype instance ToValue a => ToValue (Semigroup.WrappedMonoid a)
+deriving newtype instance ToValue a => ToValue (Semigroup.Dual a)
+deriving newtype instance ToValue a => ToValue (Monoid.First a)
+deriving newtype instance ToValue a => ToValue (Monoid.Last a)
+deriving newtype instance ToValue a => ToValue (Identity a)
+deriving newtype instance ToValue a => ToValue (Const a b)
+deriving newtype instance ToValue b => ToValue (Tagged a b)
+
+deriving newtype instance EncodeJSON (f (g a)) => EncodeJSON (Compose f g a)
+deriving newtype instance EncodeJSON a => EncodeJSON (Semigroup.Min a)
+deriving newtype instance EncodeJSON a => EncodeJSON (Semigroup.Max a)
+deriving newtype instance EncodeJSON a => EncodeJSON (Semigroup.First a)
+deriving newtype instance EncodeJSON a => EncodeJSON (Semigroup.Last a)
+deriving newtype instance EncodeJSON a => EncodeJSON (Semigroup.WrappedMonoid a)
+deriving newtype instance EncodeJSON a => EncodeJSON (Semigroup.Dual a)
+deriving newtype instance EncodeJSON a => EncodeJSON (Monoid.First a)
+deriving newtype instance EncodeJSON a => EncodeJSON (Monoid.Last a)
+deriving newtype instance EncodeJSON a => EncodeJSON (Identity a)
+deriving newtype instance EncodeJSON a => EncodeJSON (Const a b)
+deriving newtype instance EncodeJSON b => EncodeJSON (Tagged a b)
+
+--------------------------------------------------------------------------------
+
+deriving anyclass instance (FromValue (f a), FromValue (g a), FromValue a) => FromValue (Sum f g a)
+deriving anyclass instance (FromValue a, FromValue b) => FromValue (Either a b)
+deriving anyclass instance (FromValue (f a), FromValue (g a)) => FromValue (Product f g a)
+deriving anyclass instance (FromValue a, FromValue b) => FromValue (a, b)
+deriving anyclass instance (FromValue a, FromValue b, FromValue c) => FromValue (a, b, c)
+deriving anyclass instance (FromValue a, FromValue b, FromValue c, FromValue d) => FromValue (a, b, c, d)
+deriving anyclass instance (FromValue a, FromValue b, FromValue c, FromValue d, FromValue e) => FromValue (a, b, c, d, e)
+deriving anyclass instance (FromValue a, FromValue b, FromValue c, FromValue d, FromValue e, FromValue f) => FromValue (a, b, c, d, e, f)
+deriving anyclass instance (FromValue a, FromValue b, FromValue c, FromValue d, FromValue e, FromValue f, FromValue g) => FromValue (a, b, c, d, e, f, g)
+
+deriving anyclass instance (ToValue (f a), ToValue (g a), ToValue a) => ToValue (Sum f g a)
+deriving anyclass instance (ToValue a, ToValue b) => ToValue (Either a b)
+deriving anyclass instance (ToValue (f a), ToValue (g a)) => ToValue (Product f g a)
+deriving anyclass instance (ToValue a, ToValue b) => ToValue (a, b)
+deriving anyclass instance (ToValue a, ToValue b, ToValue c) => ToValue (a, b, c)
+deriving anyclass instance (ToValue a, ToValue b, ToValue c, ToValue d) => ToValue (a, b, c, d)
+deriving anyclass instance (ToValue a, ToValue b, ToValue c, ToValue d, ToValue e) => ToValue (a, b, c, d, e)
+deriving anyclass instance (ToValue a, ToValue b, ToValue c, ToValue d, ToValue e, ToValue f) => ToValue (a, b, c, d, e, f)
+deriving anyclass instance (ToValue a, ToValue b, ToValue c, ToValue d, ToValue e, ToValue f, ToValue g) => ToValue (a, b, c, d, e, f, g)
+
+deriving anyclass instance (EncodeJSON (f a), EncodeJSON (g a), EncodeJSON a) => EncodeJSON (Sum f g a)
+deriving anyclass instance (EncodeJSON a, EncodeJSON b) => EncodeJSON (Either a b)
+deriving anyclass instance (EncodeJSON (f a), EncodeJSON (g a)) => EncodeJSON (Product f g a)
+deriving anyclass instance (EncodeJSON a, EncodeJSON b) => EncodeJSON (a, b)
+deriving anyclass instance (EncodeJSON a, EncodeJSON b, EncodeJSON c) => EncodeJSON (a, b, c)
+deriving anyclass instance (EncodeJSON a, EncodeJSON b, EncodeJSON c, EncodeJSON d) => EncodeJSON (a, b, c, d)
+deriving anyclass instance (EncodeJSON a, EncodeJSON b, EncodeJSON c, EncodeJSON d, EncodeJSON e) => EncodeJSON (a, b, c, d, e)
+deriving anyclass instance (EncodeJSON a, EncodeJSON b, EncodeJSON c, EncodeJSON d, EncodeJSON e, EncodeJSON f) => EncodeJSON (a, b, c, d, e, f)
+deriving anyclass instance (EncodeJSON a, EncodeJSON b, EncodeJSON c, EncodeJSON d, EncodeJSON e, EncodeJSON f, EncodeJSON g) => EncodeJSON (a, b, c, d, e, f, g)
diff --git a/Std/Data/JSON/Builder.hs b/Std/Data/JSON/Builder.hs
new file mode 100644
index 0000000..e2d94c9
--- /dev/null
+++ b/Std/Data/JSON/Builder.hs
@@ -0,0 +1,110 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE UnliftedFFITypes #-}
+{-# LANGUAGE TypeApplications #-}
+
+{-|
+Module : Std.Data.JSON.Builder
+Description : JSON representation and builders
+Copyright : (c) Dong Han, 2019
+License : BSD
+Maintainer : winterland1989@gmail.com
+Stability : experimental
+Portability : non-portable
+
+This module provides builders for JSON 'Value's, a Haskell JSON representation. These builders are designed to comply with <https://tools.ietf.org/html/rfc8258 rfc8258>. Only control characters are escaped, other unicode codepoints are directly written instead of being escaped.
+
+-}
+module Std.Data.JSON.Builder
+ ( -- * Value Builders
+ value
+ , object
+ , object'
+ , array
+ , array'
+ , string
+ -- * Builder helpers
+ , kv, kv'
+ -- * Re-export 'Value' type
+ , Value(..)
+ ) where
+
+import Control.Monad
+import Control.Monad.ST.Unsafe (unsafeIOToST)
+import Data.Bits (shiftL)
+import Data.Functor
+import Data.Primitive.PrimArray
+import Data.Scientific (Scientific)
+import Data.Typeable
+import Data.Word
+import GHC.Prim (unsafeCoerce#)
+import qualified Std.Data.Builder as B
+import qualified Std.Data.Builder.Base as B
+import qualified Std.Data.Text as T
+import qualified Std.Data.Text.Base as T
+import Std.Data.Vector.Base as V
+import Std.Data.Vector.Extra as V
+import Std.Foreign.PrimArray
+import Std.Data.JSON.Value (Value(..))
+
+#define DOUBLE_QUOTE 34
+
+-- | Use @:@ as separator to connect a label(no need to escape, only add quotes) with field builders.
+kv :: T.Text -> B.Builder () -> B.Builder ()
+{-# INLINE kv #-}
+l `kv` b = B.quotes (B.text l) >> B.colon >> b
+
+-- | Use @:@ as separator to connect a label(escaped and add quotes) with field builders.
+kv' :: T.Text -> B.Builder () -> B.Builder ()
+{-# INLINE kv' #-}
+l `kv'` b = string l >> B.colon >> b
+
+value :: Value -> B.Builder ()
+{-# INLINABLE value #-}
+value (Object kvs) = object kvs
+value (Array vs) = array vs
+value (String t) = string t
+value (Number n) = B.scientific n
+value (Bool True) = "true"
+value (Bool False) = "false"
+value Null = "null"
+
+array :: V.Vector Value -> B.Builder ()
+{-# INLINE array #-}
+array = B.square . B.intercalateVec B.comma value
+
+array' :: (a -> B.Builder ()) -> V.Vector a -> B.Builder ()
+{-# INLINE array' #-}
+array' f = B.square . B.intercalateVec B.comma f
+
+object :: V.Vector (T.Text, Value) -> B.Builder ()
+{-# INLINE object #-}
+object = B.curly . B.intercalateVec B.comma (\ (k, v) -> k `kv'` value v)
+
+object' :: (a -> B.Builder ()) -> V.Vector (T.Text, a) -> B.Builder ()
+{-# INLINE object' #-}
+object' f = B.curly . B.intercalateVec B.comma (\ (k, v) -> k `kv'` f v)
+
+string :: T.Text -> B.Builder ()
+{-# INLINE string #-}
+string (T.Text (V.PrimVector ba@(PrimArray ba#) s l)) = do
+ let siz = escape_json_string_length ba# s l
+ B.ensureN siz
+ B.Builder (\ _ k (B.Buffer mba@(MutablePrimArray mba#) i) -> do
+ if siz == l+2 -- no need to escape
+ then do
+ writePrimArray mba i DOUBLE_QUOTE
+ copyPrimArray mba (i+1) ba s l
+ writePrimArray mba (i+1+l) DOUBLE_QUOTE
+ else void $ unsafeIOToST (escape_json_string ba# s l (unsafeCoerce# mba#) i)
+ k () (B.Buffer mba (i+siz)))
+
+foreign import ccall unsafe escape_json_string_length
+ :: BA# Word8 -> Int -> Int -> Int
+
+foreign import ccall unsafe escape_json_string
+ :: BA# Word8 -> Int -> Int -> MBA# Word8 -> Int -> IO Int
diff --git a/Std/Data/JSON/Value.hs b/Std/Data/JSON/Value.hs
new file mode 100644
index 0000000..d65dd8a
--- /dev/null
+++ b/Std/Data/JSON/Value.hs
@@ -0,0 +1,280 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE UnliftedFFITypes #-}
+
+{-|
+Module : Std.Data.JSON.Value
+Description : JSON representation and parsers
+Copyright : (c) Dong Han, 2019
+License : BSD
+Maintainer : winterland1989@gmail.com
+Stability : experimental
+Portability : non-portable
+
+This module provides definition and parsers for JSON 'Value's, a Haskell JSON representation. The parsers is designed to comply with <https://tools.ietf.org/html/rfc8258 rfc8258>, notable pitfalls are:
+
+ * The numeric representation use 'Scientific', which impose a limit on number's exponent part(limited to 'Int').
+ * Unescaped control characters(<=0x1F) are NOT accepted, (different from aeson).
+ * Only @0x20, 0x09, 0x0A, 0x0D@ are valid JSON whitespaces, 'skipSpaces' from this module is different from 'P.skipSpaces'.
+ * A JSON document shouldn't have trailing characters except whitespaces describe above, see 'parseValue''
+ and 'parseValueChunks''.
+ * Objects are represented as key-value vectors, key order and duplicated keys are preserved for further processing.
+
+Note that rfc8258 doesn't enforce unique key in objects, it's up to users to decided how to deal with key duplication, e.g. prefer first or last key, see 'Std.Data.JSON.Base.withFlatMap' or 'Std.Data.JSON.Base.withFlatMapR' for example.
+
+There's no lazy parsers here, every pieces of JSON document will be parsed into a normal form 'Value'. 'Object' and 'Array's payloads are packed into 'Vector's to avoid accumulating lists in memory. Read more about <http://winterland.me/2019/03/05/aeson's-mysterious-lazy-parsing why no lazy parsing is needed>.
+-}
+
+module Std.Data.JSON.Value
+ ( -- * Value type
+ Value(..)
+ -- * parse into JSON Value
+ , parseValue
+ , parseValue'
+ , parseValueChunks
+ , parseValueChunks'
+ -- * Value Parsers
+ , value
+ , object
+ , array
+ , string
+ , skipSpaces
+ ) where
+
+import Control.DeepSeq
+import Control.Monad
+import Data.Bits ((.&.))
+import Data.Functor
+import Data.Primitive.PrimArray
+import Data.Scientific (Scientific, scientific)
+import Data.Typeable
+import Data.Word
+import GHC.Generics
+import qualified Std.Data.Parser as P
+import Std.Data.Parser ((<?>))
+import qualified Std.Data.Text as T
+import Std.Data.TextBuilder (ToText)
+import qualified Std.Data.Text.Base as T
+import Std.Data.Vector.Base as V
+import Std.Data.Vector.Extra as V
+import Std.Foreign.PrimArray
+import System.IO.Unsafe (unsafeDupablePerformIO)
+import Test.QuickCheck.Arbitrary (Arbitrary(..))
+import Test.QuickCheck.Gen (Gen(..), listOf)
+
+#define BACKSLASH 92
+#define CLOSE_CURLY 125
+#define CLOSE_SQUARE 93
+#define COMMA 44
+#define COLON 58
+#define DOUBLE_QUOTE 34
+#define OPEN_CURLY 123
+#define OPEN_SQUARE 91
+#define C_0 48
+#define C_9 57
+#define C_A 65
+#define C_F 70
+#define C_a 97
+#define C_f 102
+#define C_n 110
+#define C_t 116
+#define MINUS 45
+
+--------------------------------------------------------------------------------
+-- | A JSON value represented as a Haskell value.
+--
+-- The 'Object''s payload is a key-value vector instead of a map, which parsed
+-- directly from JSON document. This design choice has following advantages:
+--
+-- * Allow different strategies handling duplicated keys.
+-- * Allow different 'Map' type to do further parsing, e.g. 'Std.Data.Vector.FlatMap'
+-- * Roundtrip without touching the original key-value order.
+-- * Save time if constructing map is not neccessary, e.g.
+-- using a linear scan to find a key if only that key is needed.
+--
+data Value = Object {-# UNPACK #-} !(V.Vector (T.Text, Value))
+ | Array {-# UNPACK #-} !(V.Vector Value)
+ | String {-# UNPACK #-} !T.Text
+ | Number {-# UNPACK #-} !Scientific
+ | Bool !Bool
+ | Null
+ deriving (Eq, Show, Typeable, Generic, ToText)
+
+instance NFData Value where
+ {-# INLINE rnf #-}
+ rnf (Object o) = rnf o
+ rnf (Array a) = rnf a
+ rnf (String s) = rnf s
+ rnf (Number n) = rnf n
+ rnf (Bool b) = rnf b
+ rnf Null = ()
+
+instance Arbitrary Value where
+ -- limit maximum depth of JSON document, otherwise it's too slow to run any tests
+ arbitrary = arbitraryValue 0 4
+ where
+ arbitraryValue d s = do
+ i <- arbitrary :: Gen Word
+ case (i `mod` 6) of
+ 0 -> if d < s then Object . V.pack <$> listOf (arbitraryKV (d+1) s)
+ else pure Null
+ 1 -> if d < s then Array . V.pack <$> listOf (arbitraryValue (d+1) s)
+ else pure Null
+ 2 -> String <$> arbitrary
+ 3 -> do
+ c <- arbitrary
+ e <- arbitrary
+ pure . Number $ scientific c e
+ 4 -> Bool <$> arbitrary
+ _ -> pure Null
+
+ arbitraryKV d s = (,) <$> arbitrary <*> arbitraryValue d s
+
+ shrink (Object kvs) = snd <$> (V.unpack kvs)
+ shrink (Array vs) = V.unpack vs
+ shrink _ = []
+
+-- | Parse 'Value' without consuming trailing bytes.
+parseValue :: V.Bytes -> (V.Bytes, Either P.ParseError Value)
+{-# INLINE parseValue #-}
+parseValue = P.parse value
+
+-- | Parse 'Value', and consume all trailing JSON white spaces, if there're
+-- bytes left, parsing will fail.
+parseValue' :: V.Bytes -> Either P.ParseError Value
+{-# INLINE parseValue' #-}
+parseValue' = P.parse_ (value <* skipSpaces <* P.endOfInput)
+
+-- | Increamental parse 'Value' without consuming trailing bytes.
+parseValueChunks :: Monad m => m V.Bytes -> V.Bytes -> m (V.Bytes, Either P.ParseError Value)
+{-# INLINE parseValueChunks #-}
+parseValueChunks = P.parseChunks value
+
+-- | Increamental parse 'Value' and consume all trailing JSON white spaces, if there're
+-- bytes left, parsing will fail.
+parseValueChunks' :: Monad m => m V.Bytes -> V.Bytes -> m (Either P.ParseError Value)
+{-# INLINE parseValueChunks' #-}
+parseValueChunks' mi inp = snd <$> P.parseChunks (value <* skipSpaces <* P.endOfInput) mi inp
+
+--------------------------------------------------------------------------------
+
+-- | The only valid whitespace in a JSON document is space, newline,
+-- carriage pure, and tab.
+skipSpaces :: P.Parser ()
+{-# INLINE skipSpaces #-}
+skipSpaces = P.skipWhile (\ w -> w == 0x20 || w == 0x0a || w == 0x0d || w == 0x09)
+
+-- | JSON 'Value' parser.
+value :: P.Parser Value
+{-# INLINABLE value #-}
+value = "Std.Data.JSON.Value.value" <?> do
+ skipSpaces
+ w <- P.peek
+ case w of
+ DOUBLE_QUOTE -> P.skipWord8 *> (String <$> string_)
+ OPEN_CURLY -> P.skipWord8 *> (Object <$> object_)
+ OPEN_SQUARE -> P.skipWord8 *> (Array <$> array_)
+ C_f -> P.bytes "false" $> (Bool False)
+ C_t -> P.bytes "true" $> (Bool True)
+ C_n -> P.bytes "null" $> Null
+ _ | w >= 48 && w <= 57 || w == MINUS -> Number <$> P.scientific'
+ | otherwise -> fail "Std.Data.JSON.Value.value: not a valid json value"
+
+-- | parse json array with leading OPEN_SQUARE.
+array :: P.Parser (V.Vector Value)
+{-# INLINE array #-}
+array = "Std.Data.JSON.Value.array" <?> P.word8 OPEN_SQUARE *> array_
+
+-- | parse json array without leading OPEN_SQUARE.
+array_ :: P.Parser (V.Vector Value)
+{-# INLINABLE array_ #-}
+array_ = do
+ skipSpaces
+ w <- P.peek
+ if w == CLOSE_SQUARE
+ then P.skipWord8 $> V.empty
+ else loop [] 1
+ where
+ loop :: [Value] -> Int -> P.Parser (V.Vector Value)
+ loop acc !n = do
+ !v <- value
+ skipSpaces
+ let acc' = v:acc
+ ch <- P.satisfy $ \w -> w == COMMA || w == CLOSE_SQUARE
+ if ch == COMMA
+ then skipSpaces *> loop acc' (n+1)
+ else pure $! V.packRN n acc' -- n start from 1, so no need to +1 here
+
+-- | parse json array with leading OPEN_CURLY.
+object :: P.Parser (V.Vector (T.Text, Value))
+{-# INLINE object #-}
+object = "Std.Data.JSON.Value.object" <?> P.word8 OPEN_CURLY *> object_
+
+-- | parse json object without leading OPEN_CURLY.
+object_ :: P.Parser (V.Vector (T.Text, Value))
+{-# INLINABLE object_ #-}
+object_ = do
+ skipSpaces
+ w <- P.peek
+ if w == CLOSE_CURLY
+ then P.skipWord8 $> V.empty
+ else loop [] 1
+ where
+ loop :: [(T.Text, Value)] -> Int -> P.Parser (V.Vector (T.Text, Value))
+ loop acc !n = do
+ !k <- string
+ skipSpaces
+ P.word8 COLON
+ !v <- value
+ skipSpaces
+ let acc' = (k, v) : acc
+ ch <- P.satisfy $ \w -> w == COMMA || w == CLOSE_CURLY
+ if ch == COMMA
+ then skipSpaces *> loop acc' (n+1)
+ else pure $! V.packRN n acc' -- n start from 1, so no need to +1 here
+
+--------------------------------------------------------------------------------
+
+string :: P.Parser T.Text
+{-# INLINE string #-}
+string = "Std.Data.JSON.Value.string" <?> P.word8 DOUBLE_QUOTE *> string_
+
+string_ :: P.Parser T.Text
+{-# INLINE string_ #-}
+string_ = do
+ (bs, state) <- P.scanChunks 0 go
+ let mt = case state .&. 0xFF of
+ -- need escaping
+ 1 -> unsafeDupablePerformIO (do
+ let !len = V.length bs
+ !mpa <- newPrimArray len
+ !len' <- withMutablePrimArrayUnsafe mpa (\ mba# _ ->
+ withPrimVectorUnsafe bs (decode_json_string mba#))
+ !pa <- unsafeFreezePrimArray mpa
+ if len' >= 0
+ then pure (Just (T.Text (V.PrimVector pa 0 len'))) -- unescaping also validate utf8
+ else pure Nothing)
+ 3 -> Nothing -- reject unescaped control characters
+ _ -> T.validateMaybe bs
+ case mt of
+ Just t -> P.skipWord8 $> t
+ _ -> fail "Std.Data.JSON.Value.string_: utf8 validation or unescaping failed"
+ where
+ go :: Word32 -> V.Bytes -> Either Word32 (V.Bytes, V.Bytes, Word32)
+ go !state v =
+ case unsafeDupablePerformIO . withPrimUnsafe state $ \ ps ->
+ withPrimVectorUnsafe v (find_json_string_end ps)
+ of (state', len)
+ | len >= 0 ->
+ let !r = V.unsafeTake len v
+ !rest = V.unsafeDrop len v
+ in Right (r, rest, state')
+ | otherwise -> Left state'
+
+foreign import ccall unsafe find_json_string_end :: MBA# Word32 -> BA# Word8 -> Int -> Int -> IO Int
+foreign import ccall unsafe decode_json_string :: MBA# Word8 -> BA# Word8 -> Int -> Int -> IO Int
diff --git a/Std/Data/LEON.hs b/Std/Data/LEON.hs
index e1153b7..0e73012 100644
--- a/Std/Data/LEON.hs
+++ b/Std/Data/LEON.hs
@@ -171,32 +171,40 @@ BE_INST(Char)
instance LEON a => LEON (V.Vector a) where
{-# INLINE encode #-}
- encode xs = do
- encode (V.length xs)
- mapM_ encode xs
+ encode = encodeVec
{-# INLINE decode #-}
- decode = do
- len <- decode @Int
- V.packN len <$> replicateM len decode
+ decode = decodeVec
-instance {-# OVERLAPPABLE #-} (Prim a, LEON a) => LEON (V.PrimVector a) where
+instance (Prim a, LEON a) => LEON (V.PrimVector a) where
{-# INLINE encode #-}
- encode xs = do
- encode (V.length xs)
- mapM_ encode (V.unpack xs)
+ encode = encodeVec
{-# INLINE decode #-}
- decode = do
- len <- decode @Int
- V.packN len <$> replicateM len decode
+ decode = decodeVec
-instance {-# OVERLAPPING #-} LEON V.Bytes where
- {-# INLINE encode #-}
- encode bs = do
- let l = V.length bs
- encode l
- B.bytes bs
- {-# INLINE decode #-}
- decode = decode @Int >>= P.take
+encodeVec :: (V.Vec v a, LEON a) => v a -> Builder ()
+{-# INLINE [1] encodeVec #-}
+{-# RULES "encodeVec/Bytes" encodeVec = encodeBytes #-}
+encodeVec xs = do
+ encode (V.length xs)
+ V.traverseVec_ encode xs
+
+decodeVec :: (V.Vec v a, LEON a) => Parser (v a)
+{-# INLINE [1] decodeVec #-}
+{-# RULES "decodeVec/Bytes" decodeVec = decodeBytes #-}
+decodeVec = do
+ len <- decode @Int
+ V.packN len <$> replicateM len decode
+
+encodeBytes :: V.Bytes -> Builder ()
+{-# INLINE encodeBytes #-}
+encodeBytes bs = do
+ let l = V.length bs
+ encode l
+ B.bytes bs
+
+decodeBytes :: Parser V.Bytes
+{-# INLINE decodeBytes #-}
+decodeBytes = decode @Int >>= P.take
instance LEON T.Text where
{-# INLINE encode #-}
diff --git a/Std/Data/Parser.hs b/Std/Data/Parser.hs
index 0c21383..2937286 100644
--- a/Std/Data/Parser.hs
+++ b/Std/Data/Parser.hs
@@ -22,17 +22,19 @@ You can use 'Alternative' instance to do backtracking, each branch will either s
module Std.Data.Parser
( -- * Parser types
Result(..)
+ , ParseError
, Parser
+ , (<?>)
-- * Running a parser
- , parse, parse', parseChunk, parseChunks, finishParsing
- , runAndKeepTrack
+ , parse, parse_, parseChunk, parseChunks, finishParsing
+ , runAndKeepTrack, match
-- * Basic parsers
- , ensureN, endOfInput
+ , ensureN, endOfInput, atEnd
-- * Primitive decoders
, decodePrim, decodePrimLE, decodePrimBE
-- * More parsers
, scan, scanChunks, peekMaybe, peek, satisfy, satisfyWith
- , word8, anyWord8, endOfLine, skip, skipWhile, skipSpaces
+ , word8, char8, skipWord8, endOfLine, skip, skipWhile, skipSpaces
, take, takeTill, takeWhile, takeWhile1, bytes, bytesCI
, text
-- * Numeric parsers
@@ -45,6 +47,13 @@ module Std.Data.Parser
, float, double
, scientific
, scientifically
+ -- * Stricter fractional(rfc8259)
+ , rational'
+ , float', double'
+ , scientific'
+ , scientifically'
+ -- * Misc
+ , isSpace, isHexDigit, isDigit
) where
import Std.Data.Parser.Base
diff --git a/Std/Data/Parser/Base.hs b/Std/Data/Parser/Base.hs
index 982e273..2513724 100644
--- a/Std/Data/Parser/Base.hs
+++ b/Std/Data/Parser/Base.hs
@@ -1,5 +1,7 @@
{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE CPP #-}
+{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE RankNTypes #-}
@@ -14,29 +16,41 @@ Maintainer : winterland1989@gmail.com
Stability : experimental
Portability : non-portable
-This module provide a simple resumable 'Parser', which is suitable for binary protocol and simple textual protocol parsing.
+This module provide a simple resumable 'Parser', which is suitable for binary protocol and simple textual protocol parsing. Both binary parsers ('decodePrim' ,etc) and textual parsers are provided, and they all work on 'V.Bytes'.
-You can use 'Alternative' instance to do backtracking, each branch will either succeed and may consume some input, or fail without consume anything. It's recommend to use 'peek' to avoid backtracking if possible to get high performance.
+You can use 'Alternative' instance to do backtracking, each branch will either succeed and may consume some input, or fail without consume anything. It's recommend to use 'peek' or 'peekMaybe' to avoid backtracking if possible to get high performance.
+
+Error message can be attached using '<?>', which have very small overhead, so it's recommended to attach a message in front of a composed parser like @xPacket = "Foo.Bar.xPacket" <?> do ...@, following is an example message when parsing an integer failed:
+
+@
+ >parse int "foo"
+ ([102,111,111],Left ["Std.Data.Parser.Numeric.int","Std.Data.Parser.Base.takeWhile1: no satisfied byte"])
+ -- It's easy to see we're trying to match a leading sign or digit here
+@
-}
module Std.Data.Parser.Base
( -- * Parser types
Result(..)
+ , ParseError
, ParseStep
, Parser(..)
+ , (<?>)
-- * Running a parser
- , parse, parse', parseChunk, parseChunks, finishParsing
- , runAndKeepTrack
+ , parse, parse_, parseChunk, parseChunks, finishParsing
+ , runAndKeepTrack, match
-- * Basic parsers
- , ensureN, endOfInput
+ , ensureN, endOfInput, atEnd
-- * Primitive decoders
, decodePrim, decodePrimLE, decodePrimBE
-- * More parsers
, scan, scanChunks, peekMaybe, peek, satisfy, satisfyWith
- , word8, char8, anyWord8, endOfLine, skip, skipWhile, skipSpaces
+ , word8, char8, skipWord8, endOfLine, skip, skipWhile, skipSpaces
, take, takeTill, takeWhile, takeWhile1, bytes, bytesCI
, text
+ -- * Misc
+ , isSpace
) where
import Control.Applicative
@@ -45,66 +59,96 @@ import qualified Control.Monad.Fail as Fail
import qualified Data.CaseInsensitive as CI
import qualified Data.Primitive.PrimArray as A
import Data.Int
+import Data.Typeable
+import qualified Data.List as List
import Data.Word
-import Data.Word8 (isSpace)
import GHC.Types
import Prelude hiding (take, takeWhile)
import Std.Data.PrimArray.UnalignedAccess
import qualified Std.Data.Text.Base as T
import qualified Std.Data.Vector.Base as V
import qualified Std.Data.Vector.Extra as V
+import Std.IO.Exception
+import GHC.Stack
+
-- | Simple parsing result, that represent respectively:
--
--- * success: the remaining unparsed data and the parsed value
+-- * Success: the remaining unparsed data and the parsed value
--
--- * failure: the remaining unparsed data and the error message
+-- * Failure: the remaining unparsed data and the error message
--
--- * partial: that need for more input data, supply empty bytes to indicate 'endOfInput'
+-- * Partial: that need for more input data, supply empty bytes to indicate 'endOfInput'
--
data Result a
- = Success !V.Bytes a
- | Failure !V.Bytes String
- | Partial (V.Bytes -> Result a)
+ = Success a !V.Bytes
+ | Failure ParseError !V.Bytes
+ | Partial (ParseStep a)
+
+-- | A parse step consumes 'V.Bytes' and produce 'Result'.
+type ParseStep r = V.Bytes -> Result r
+
+-- | Type alias for error message
+type ParseError = [T.Text]
instance Functor Result where
- fmap f (Success s a) = Success s (f a)
- fmap _ (Failure s msg) = Failure s msg
+ fmap f (Success a s) = Success (f a) s
fmap f (Partial k) = Partial (fmap f . k)
+ fmap _ (Failure e v) = Failure e v
instance Show a => Show (Result a) where
- show (Success _ a) = "Success " ++ show a
+ show (Success a _) = "Success " ++ show a
show (Partial _) = "Partial _"
- show (Failure _ errs) = "Failure: " ++ show errs
+ show (Failure errs _) = "Failure: " ++ show errs
-type ParseStep r = V.Bytes -> Result r
-- | Simple CPSed parser
--
-newtype Parser a = Parser { runParser :: forall r . (a -> ParseStep r) -> ParseStep r }
-
+-- A parser takes a failure continuation, and a success one, while the success continuation is
+-- usually composed by 'Monad' instance, the failure one is more like a reader part, which can
+-- be modified via '<?>'. If you build parsers from ground, a pattern like this can be used:
+--
+-- @
+-- xxParser = do
+-- ensureN errMsg ... -- make sure we have some bytes
+-- Parser $ \ kf k inp -> -- fail continuation, success continuation and input
+-- ...
+-- ... kf errMsg (if input not OK)
+-- ... k ... (if we get something useful for next parser)
+-- @
+newtype Parser a = Parser {
+ runParser :: forall r . (ParseError -> ParseStep r) -> (a -> ParseStep r) -> ParseStep r
+ }
+
+-- It seems eta-expand all params to ensure parsers are saturated is helpful
instance Functor Parser where
- fmap f (Parser pa) = Parser (\ k -> pa (\ a -> k (f a)))
+ fmap f (Parser pa) = Parser (\ kf k inp -> pa kf (k . f) inp)
{-# INLINE fmap #-}
- a <$ Parser pb = Parser (\ k -> pb (\ _ -> k a))
+ a <$ Parser pb = Parser (\ kf k inp -> pb kf (\ _ -> k a) inp)
{-# INLINE (<$) #-}
instance Applicative Parser where
- pure x = Parser (\ k -> k x)
+ pure x = Parser (\ _ k inp -> k x inp)
{-# INLINE pure #-}
- Parser pf <*> Parser pa = Parser (\ k -> pf (\ f -> pa (k . f)))
+ Parser pf <*> Parser pa = Parser (\ kf k inp -> pf kf (\ f -> pa kf (k . f)) inp)
{-# INLINE (<*>) #-}
+ Parser pa *> Parser pb = Parser (\ kf k inp -> pa kf (\ _ inp' -> pb kf k inp') inp)
+ {-# INLINE (*>) #-}
+ Parser pa <* Parser pb = Parser (\ kf k inp -> pa kf (\ x inp' -> pb kf (\ _ -> k x) inp') inp)
+ {-# INLINE (<*) #-}
instance Monad Parser where
return = pure
{-# INLINE return #-}
- Parser pa >>= f = Parser (\ k -> pa (\ a -> runParser (f a) k))
+ Parser pa >>= f = Parser (\ kf k inp -> pa kf (\ a -> runParser (f a) kf k) inp)
{-# INLINE (>>=) #-}
- fail str = Parser (\ _ input -> Failure input str)
+ (>>) = (*>)
+ {-# INLINE (>>) #-}
+ fail = Fail.fail
{-# INLINE fail #-}
instance Fail.MonadFail Parser where
- fail str = Parser (\ _ input -> Failure input str)
+ fail = fail' . T.pack
{-# INLINE fail #-}
instance MonadPlus Parser where
@@ -114,38 +158,43 @@ instance MonadPlus Parser where
{-# INLINE mplus #-}
instance Alternative Parser where
- empty = Parser (\ _ input -> Failure input "Std.Data.Parser.Base(Alternative).empty")
+ empty = fail' "Std.Data.Parser.Base(Alternative).empty"
{-# INLINE empty #-}
f <|> g = do
- (r, bs) <- runAndKeepTrack f
+ (r, bss) <- runAndKeepTrack f
case r of
- Success input x -> Parser (\ k _ -> k x input)
- Failure _ _ -> pushBack bs >> g
+ Success x inp -> Parser (\ _ k _ -> k x inp)
+ Failure _ _ -> let !bs = V.concat (reverse bss)
+ in Parser (\ kf k _ -> runParser g kf k bs)
_ -> error "Std.Data.Parser.Base: impossible"
{-# INLINE (<|>) #-}
+-- | 'T.Text' version of 'fail'.
+fail' :: T.Text -> Parser a
+{-# INLINE fail' #-}
+fail' msg = Parser (\ kf _ inp -> kf [msg] inp)
-- | Parse the complete input, without resupplying
-parse :: Parser a -> V.Bytes -> Either String a
-{-# INLINE parse #-}
-parse (Parser p) input = snd $ finishParsing (p (flip Success) input)
+parse_ :: Parser a -> V.Bytes -> Either ParseError a
+{-# INLINE parse_ #-}
+parse_ (Parser p) inp = snd $ finishParsing (p Failure Success inp)
-- | Parse the complete input, without resupplying, return the rest bytes
-parse' :: Parser a -> V.Bytes -> (V.Bytes, Either String a)
-{-# INLINE parse' #-}
-parse' (Parser p) input = finishParsing (p (flip Success) input)
+parse :: Parser a -> V.Bytes -> (V.Bytes, Either ParseError a)
+{-# INLINE parse #-}
+parse (Parser p) inp = finishParsing (p Failure Success inp)
-- | Parse an input chunk
parseChunk :: Parser a -> V.Bytes -> Result a
{-# INLINE parseChunk #-}
-parseChunk (Parser p) = p (flip Success)
+parseChunk (Parser p) = p Failure Success
-- | Finish parsing and fetch result, feed empty bytes if it's 'Partial' result.
-finishParsing :: Result a -> (V.Bytes, Either String a)
+finishParsing :: Result a -> (V.Bytes, Either ParseError a)
{-# INLINABLE finishParsing #-}
finishParsing r = case r of
- Success rest a -> (rest, Right a)
- Failure rest errs -> (rest, Left errs)
+ Success a rest -> (rest, Right a)
+ Failure errs rest -> (rest, Left errs)
Partial f -> finishParsing (f V.empty)
-- | Run a parser with an initial input string, and a monadic action
@@ -153,72 +202,101 @@ finishParsing r = case r of
--
-- Note, once the monadic action return empty bytes, parsers will stop drawing
-- more bytes (take it as 'endOfInput').
-parseChunks :: Monad m => m V.Bytes -> Parser a -> V.Bytes -> m (V.Bytes, Either String a)
+parseChunks :: Monad m => Parser a -> m V.Bytes -> V.Bytes -> m (V.Bytes, Either ParseError a)
{-# INLINABLE parseChunks #-}
-parseChunks m (Parser p) input = go m (p (flip Success) input)
+parseChunks (Parser p) m inp = go m (p Failure Success inp)
where
go m r = case r of
Partial f -> do
inp <- m
if V.null inp
- then go (return V.empty) (f V.empty)
+ then go (pure V.empty) (f V.empty)
else go m (f inp)
- Success rest a -> return (rest, Right a)
- Failure rest errs -> return (rest, Left errs)
+ Success a rest -> pure (rest, Right a)
+ Failure errs rest -> pure (rest, Left errs)
+
+(<?>) :: T.Text -> Parser a -> Parser a
+{-# INLINE (<?>) #-}
+msg <?> (Parser p) = Parser (\ kf k inp -> p (kf . (msg:)) k inp)
+infixr 0 <?>
--- | Run a parser and keep track of all the input it consumes.
+-- | Run a parser and keep track of all the input chunks it consumes.
-- Once it's finished, return the final result (always 'Success' or 'Failure') and
-- all consumed chunks.
--
runAndKeepTrack :: Parser a -> Parser (Result a, [V.Bytes])
{-# INLINE runAndKeepTrack #-}
-runAndKeepTrack (Parser pa) = Parser $ \ k0 input ->
- let r0 = pa (\ a input' -> Success input' a) input in go [] r0 k0
- where
- go !acc r k0 = case r of
- Partial k -> Partial (\ input -> go (input:acc) (k input) k0)
- Success input' _ -> k0 (r, reverse acc) input'
- Failure input' _ -> k0 (r, reverse acc) input'
-
-pushBack :: [V.Bytes] -> Parser ()
-{-# INLINE pushBack #-}
-pushBack [] = return ()
-pushBack bs = Parser (\ k input -> k () (V.concat (input : bs)))
+runAndKeepTrack (Parser pa) = Parser $ \ _ k0 inp ->
+ let go !acc r k0 = case r of
+ Partial k -> Partial (\ inp -> go (inp:acc) (k inp) k0)
+ Success _ inp' -> k0 (r, reverse acc) inp'
+ Failure _ inp' -> k0 (r, reverse acc) inp'
+ r0 = pa Failure Success inp
+ in go [inp] r0 k0
+
+-- | Return both the result of a parse and the portion of the input
+-- that was consumed while it was being parsed.
+match :: Parser a -> Parser (V.Bytes, a)
+{-# INLINE match #-}
+match p = do
+ (r, bss) <- runAndKeepTrack p
+ Parser (\ _ k _ ->
+ case r of
+ Success r' inp' -> let !consumed = V.dropR (V.length inp') (V.concat (reverse bss))
+ in k (consumed , r') inp'
+ Failure err inp' -> Failure err inp'
+ Partial k -> error "Std.Data.Parser.Base.match: impossible")
-- | Ensure that there are at least @n@ bytes available. If not, the
-- computation will escape with 'Partial'.
-ensureN :: Int -> Parser ()
+--
+-- Since this parser is used in many other parsers, an extra error param is provide
+-- to attach custom error info.
+ensureN :: Int -> ParseError -> Parser ()
{-# INLINE ensureN #-}
-ensureN n0 = Parser $ \ ks input -> do
- let l = V.length input
+ensureN n0 err = Parser $ \ kf k inp -> do
+ let l = V.length inp
if l >= n0
- then ks () input
- else Partial (go n0 ks [input] l)
+ then k () inp
+ else Partial (ensureNPartial l inp kf k)
where
- go n0 ks acc l = \ !input' -> do
- let l' = V.length input'
- if l' == 0
- then Failure
- (V.concat (reverse (input':acc)))
- "Std.Data.Parser.Base.ensureN: Not enough bytes"
- else do
- let l'' = l + l'
- if l'' < n0
- then Partial (go n0 ks (input':acc) l'')
- else do
- let input'' = V.concat (reverse (input':acc))
- ks () input''
+ {-# INLINABLE ensureNPartial #-}
+ ensureNPartial l inp kf k =
+ let go acc !l = \ inp -> do
+ let l' = V.length inp
+ if l' == 0
+ then kf err (V.concat (reverse (inp:acc)))
+ else do
+ let l'' = l + l'
+ if l'' < n0
+ then Partial (go (inp:acc) l'')
+ else
+ let !inp' = V.concat (reverse (inp:acc))
+ in k () inp'
+ in go [inp] l
-- | Test whether all input has been consumed, i.e. there are no remaining
--- undecoded bytes.
-endOfInput :: Parser Bool
+-- undecoded bytes. Fail if not 'atEnd'.
+endOfInput :: Parser ()
{-# INLINE endOfInput #-}
-endOfInput = Parser $ \ k inp ->
+endOfInput = Parser $ \ kf k inp ->
+ if V.null inp
+ then Partial (\ inp' ->
+ if (V.null inp')
+ then k () inp'
+ else kf ["Std.Data.Parser.Base.endOfInput: end not reached yet"] inp)
+ else kf ["Std.Data.Parser.Base.endOfInput: end not reached yet"] inp
+
+-- | Test whether all input has been consumed, i.e. there are no remaining
+-- undecoded bytes.
+atEnd :: Parser Bool
+{-# INLINE atEnd #-}
+atEnd = Parser $ \ _ k inp ->
if V.null inp
then Partial (\ inp' -> k (V.null inp') inp')
else k False inp
-decodePrim :: forall a. UnalignedAccess a => Parser a
+decodePrim :: forall a. (UnalignedAccess a) => Parser a
{-# INLINE decodePrim #-}
{-# SPECIALIZE INLINE decodePrim :: Parser Word #-}
{-# SPECIALIZE INLINE decodePrim :: Parser Word64 #-}
@@ -231,14 +309,14 @@ decodePrim :: forall a. UnalignedAccess a => Parser a
{-# SPECIALIZE INLINE decodePrim :: Parser Int16 #-}
{-# SPECIALIZE INLINE decodePrim :: Parser Int8 #-}
decodePrim = do
- ensureN n
- Parser (\ k (V.PrimVector (A.PrimArray ba#) i@(I# i#) len) ->
+ ensureN n ["Std.Data.Parser.Base.decodePrim: not enough bytes"]
+ Parser (\ _ k (V.PrimVector (A.PrimArray ba#) i@(I# i#) len) ->
let !r = indexWord8ArrayAs ba# i#
in k r (V.PrimVector (A.PrimArray ba#) (i+n) (len-n)))
where
- n = (getUnalignedSize (unalignedSize :: UnalignedSize a))
+ n = getUnalignedSize (unalignedSize :: UnalignedSize a)
-decodePrimLE :: forall a. UnalignedAccess (LE a) => Parser a
+decodePrimLE :: forall a. (UnalignedAccess (LE a)) => Parser a
{-# INLINE decodePrimLE #-}
{-# SPECIALIZE INLINE decodePrimLE :: Parser Word #-}
{-# SPECIALIZE INLINE decodePrimLE :: Parser Word64 #-}
@@ -248,9 +326,15 @@ decodePrimLE :: forall a. UnalignedAccess (LE a) => Parser a
{-# SPECIALIZE INLINE decodePrimLE :: Parser Int64 #-}
{-# SPECIALIZE INLINE decodePrimLE :: Parser Int32 #-}
{-# SPECIALIZE INLINE decodePrimLE :: Parser Int16 #-}
-decodePrimLE = getLE <$> decodePrim
+decodePrimLE = do
+ ensureN n ["Std.Data.Parser.Base.decodePrimLE: not enough bytes"]
+ Parser (\ _ k (V.PrimVector (A.PrimArray ba#) i@(I# i#) len) ->
+ let !r = indexWord8ArrayAs ba# i#
+ in k (getLE r) (V.PrimVector (A.PrimArray ba#) (i+n) (len-n)))
+ where
+ n = getUnalignedSize (unalignedSize :: UnalignedSize (LE a))
-decodePrimBE :: forall a. UnalignedAccess (BE a) => Parser a
+decodePrimBE :: forall a. (UnalignedAccess (BE a)) => Parser a
{-# INLINE decodePrimBE #-}
{-# SPECIALIZE INLINE decodePrimBE :: Parser Word #-}
{-# SPECIALIZE INLINE decodePrimBE :: Parser Word64 #-}
@@ -260,7 +344,13 @@ decodePrimBE :: forall a. UnalignedAccess (BE a) => Parser a
{-# SPECIALIZE INLINE decodePrimBE :: Parser Int64 #-}
{-# SPECIALIZE INLINE decodePrimBE :: Parser Int32 #-}
{-# SPECIALIZE INLINE decodePrimBE :: Parser Int16 #-}
-decodePrimBE = getBE <$> decodePrim
+decodePrimBE = do
+ ensureN n ["Std.Data.Parser.Base.decodePrimBE: not enough bytes"]
+ Parser (\ _ k (V.PrimVector (A.PrimArray ba#) i@(I# i#) len) ->
+ let !r = indexWord8ArrayAs ba# i#
+ in k (getBE r) (V.PrimVector (A.PrimArray ba#) (i+n) (len-n)))
+ where
+ n = getUnalignedSize (unalignedSize :: UnalignedSize (BE a))
-- | A stateful scanner. The predicate consumes and transforms a
-- state argument, and each transformed state is passed to successive
@@ -270,21 +360,23 @@ decodePrimBE = getBE <$> decodePrim
-- This parser does not fail. It will return an empty string if the
-- predicate returns 'Nothing' on the first byte of input.
--
-scan :: s -> (s -> Word8 -> Maybe s) -> Parser V.Bytes
+scan :: s -> (s -> Word8 -> Maybe s) -> Parser (V.Bytes, s)
{-# INLINE scan #-}
scan s0 f = scanChunks s0 f'
where
- f' st (V.Vec arr s l) = go f st arr s s (s+l)
- go f !st arr off !i !end
- | i < end = do
- let !w = A.indexPrimArray arr i
- case f st w of
- Just st' -> go f st' arr off (i+1) end
- _ ->
- let !len1 = i - off
- !len2 = end - off
- in Right (V.Vec arr off len1, V.Vec arr i len2)
- | otherwise = Left st
+ f' st (V.PrimVector arr off l) =
+ let !end = off + l
+ go !st !i
+ | i < end = do
+ let !w = A.indexPrimArray arr i
+ case f st w of
+ Just st' -> go st' (i+1)
+ _ ->
+ let !len1 = i - off
+ !len2 = end - off
+ in Right (V.PrimVector arr off len1, V.PrimVector arr i len2, st)
+ | otherwise = Left st
+ in go s0 off
-- | Similar to 'scan', but working on 'V.Bytes' chunks, The predicate
-- consumes a 'V.Bytes' chunk and transforms a state argument,
@@ -292,27 +384,26 @@ scan s0 f = scanChunks s0 f'
-- the predicate on each chunk of the input until one chunk got splited to
-- @Right (V.Bytes, V.Bytes)@ or the input ends.
--
-scanChunks :: s -> (s -> V.Bytes -> Either s (V.Bytes, V.Bytes)) -> Parser V.Bytes
+scanChunks :: s -> (s -> V.Bytes -> Either s (V.Bytes, V.Bytes, s)) -> Parser (V.Bytes, s)
{-# INLINE scanChunks #-}
-scanChunks s0 consume = Parser (go s0 [])
+scanChunks s consume = Parser (\ _ k inp ->
+ case consume s inp of
+ Right (want, rest, s') -> k (want, s') rest
+ Left s' -> Partial (scanChunksPartial s' k inp))
where
- go s acc k inp =
- case consume s inp of
- Left s' -> do
- let acc' = inp : acc
- Partial (go' s' acc' k)
- Right (want,rest) ->
- k (V.concat (reverse (want:acc))) rest
- go' s acc k inp
- | V.null inp = k (V.concat (reverse acc)) inp
- | otherwise =
- case consume s inp of
- Left s' -> do
- let acc' = inp : acc
- Partial (go' s' acc' k)
- Right (want,rest) ->
- k (V.concat (reverse (want:acc))) rest
-
+ -- we want to inline consume if possible
+ {-# INLINABLE scanChunksPartial #-}
+ scanChunksPartial s' k inp =
+ let go s acc = \ inp ->
+ if V.null inp
+ then k (V.concat (reverse acc), s) inp
+ else case consume s inp of
+ Left s' -> do
+ let acc' = inp : acc
+ Partial (go s' acc')
+ Right (want,rest,s') ->
+ let !r = V.concat (reverse (want:acc)) in k (r, s') rest
+ in go s' [inp]
--------------------------------------------------------------------------------
@@ -321,19 +412,27 @@ scanChunks s0 consume = Parser (go s0 [])
--
peekMaybe :: Parser (Maybe Word8)
{-# INLINE peekMaybe #-}
-peekMaybe = do
- e <- endOfInput
- if e then return Nothing
- else Just <$> peek
+peekMaybe =
+ Parser $ \ _ k inp ->
+ if V.null inp
+ then Partial (\ inp' -> k (if V.null inp'
+ then Nothing
+ else Just (V.unsafeHead inp)) inp')
+ else k (Just (V.unsafeHead inp)) inp
-- | Match any byte, to perform lookahead. Does not consume any
-- input, but will fail if end of input has been reached.
--
peek :: Parser Word8
{-# INLINE peek #-}
-peek = do
- ensureN 1
- Parser (\ k inp -> k (V.unsafeHead inp) inp)
+peek =
+ Parser $ \ kf k inp ->
+ if V.null inp
+ then Partial (\ inp' ->
+ if V.null inp'
+ then kf ["Std.Data.Parser.Base.peek: not enough bytes"] inp'
+ else k (V.unsafeHead inp') inp')
+ else k (V.unsafeHead inp) inp
-- | The parser @satisfy p@ succeeds for any byte for which the
-- predicate @p@ returns 'True'. Returns the byte that is actually
@@ -345,12 +444,12 @@ peek = do
satisfy :: (Word8 -> Bool) -> Parser Word8
{-# INLINE satisfy #-}
satisfy p = do
- ensureN 1
- Parser (\ k inp ->
+ ensureN 1 ["Std.Data.Parser.Base.satisfy: not enough bytes"]
+ Parser $ \ kf k inp ->
let w = V.unsafeHead inp
in if p w
then k w (V.unsafeTail inp)
- else Failure inp "Std.Data.Parser.Base.satisfy")
+ else kf ["Std.Data.Parser.Base.satisfy: unsatisfied byte"] (V.unsafeTail inp)
-- | The parser @satisfyWith f p@ transforms a byte, and succeeds if
-- the predicate @p@ returns 'True' on the transformed value. The
@@ -359,43 +458,31 @@ satisfy p = do
satisfyWith :: (Word8 -> a) -> (a -> Bool) -> Parser a
{-# INLINE satisfyWith #-}
satisfyWith f p = do
- ensureN 1
- Parser (\ k inp ->
- let w = f (V.unsafeHead inp)
- in if p w
- then k w (V.unsafeTail inp)
- else Failure inp "Std.Data.Parser.Base.satisfyWith")
+ ensureN 1 ["Std.Data.Parser.Base.satisfyWith: not enough bytes"]
+ Parser $ \ kf k inp ->
+ let a = f (V.unsafeHead inp)
+ in if p a
+ then k a (V.unsafeTail inp)
+ else kf ["Std.Data.Parser.Base.satisfyWith: unsatisfied byte"] (V.unsafeTail inp)
-- | Match a specific byte.
--
word8 :: Word8 -> Parser ()
{-# INLINE word8 #-}
word8 w' = do
- ensureN 1
- Parser (\ k inp ->
+ ensureN 1 ["Std.Data.Parser.Base.word8: not enough bytes"]
+ Parser (\ kf k inp ->
let w = V.unsafeHead inp
in if w == w'
then k () (V.unsafeTail inp)
- else Failure inp "Std.Data.Parser.Base.word8")
+ else kf ["Std.Data.Parser.Base.word8: mismatch byte"] inp)
-- | Match a specific 8bit char.
--
char8 :: Char -> Parser ()
{-# INLINE char8 #-}
-char8 c = do
- let !w' = V.c2w c
- ensureN 1
- Parser (\ k inp ->
- let w = V.unsafeHead inp
- in if w == w'
- then k () (V.unsafeTail inp)
- else Failure inp "Std.Data.Parser.Base.char8")
+char8 = word8 . V.c2w
--- | Match any byte.
---
-anyWord8 :: Parser Word8
-{-# INLINE anyWord8 #-}
-anyWord8 = decodePrim
-- | Match either a single newline byte @\'\\n\'@, or a carriage
-- return followed by a newline byte @\"\\r\\n\"@.
@@ -406,7 +493,7 @@ endOfLine = do
case w of
10 -> return ()
13 -> word8 10
- _ -> fail "endOfLine"
+ _ -> fail "Std.Data.Parser.Base.endOfLine: mismatch byte"
--------------------------------------------------------------------------------
@@ -414,37 +501,60 @@ endOfLine = do
--
skip :: Int -> Parser ()
{-# INLINE skip #-}
-skip n
- | n <= 0 = return () -- we use unsafe slice, guard negative n here
- | otherwise =
- Parser (\ k inp ->
- let l = V.length inp
- in if l >= n
- then k () (V.unsafeDrop n inp)
- else Partial (go k (n-l)))
- where
- go k !n inp =
+skip n =
+ Parser (\ kf k inp ->
let l = V.length inp
- in if l >= n
- then k () (V.unsafeDrop n inp)
- else if l == 0
- then Failure inp "Std.Data.Parser.Base.skip"
- else Partial (go k (n-l))
+ !n' = max n 0
+ in if l >= n'
+ then k () $! V.unsafeDrop n' inp
+ else Partial (skipPartial (n'-l) kf k))
+
+skipPartial :: Int -> (ParseError -> ParseStep r) -> (() -> ParseStep r) -> ParseStep r
+{-# INLINABLE skipPartial #-}
+skipPartial n kf k =
+ let go !n' = \ inp ->
+ let l = V.length inp
+ in if l >= n'
+ then k () $! V.unsafeDrop n' inp
+ else if l == 0
+ then kf ["Std.Data.Parser.Base.skip: not enough bytes"] inp
+ else Partial (go (n'-l))
+ in go n
+
+-- | Skip a byte.
+--
+skipWord8 :: Parser ()
+{-# INLINE skipWord8 #-}
+skipWord8 =
+ Parser $ \ kf k inp ->
+ if V.null inp
+ then Partial (\ inp' ->
+ if V.null inp'
+ then kf ["Std.Data.Parser.Base.skipWord8: not enough bytes"] inp'
+ else k () (V.unsafeTail inp'))
+ else k () (V.unsafeTail inp)
-- | Skip past input for as long as the predicate returns 'True'.
--
skipWhile :: (Word8 -> Bool) -> Parser ()
{-# INLINE skipWhile #-}
skipWhile p =
- Parser (\ k inp ->
+ Parser (\ _ k inp ->
let rest = V.dropWhile p inp
in if V.null rest
- then Partial (go k p)
+ then Partial (skipWhilePartial k)
else k () rest)
where
- go k p inp =
- let rest = V.dropWhile p inp -- If we ever enter 'Partial', empty input
- in k () rest -- means 'endOfInput'
+ -- we want to inline p if possible
+ {-# INLINABLE skipWhilePartial #-}
+ skipWhilePartial k =
+ let go = \ inp ->
+ if V.null inp
+ then k () inp
+ else
+ let !rest = V.dropWhile p inp
+ in if V.null rest then Partial go else k () rest
+ in go
-- | Skip over white space using 'isSpace'.
--
@@ -452,68 +562,70 @@ skipSpaces :: Parser ()
{-# INLINE skipSpaces #-}
skipSpaces = skipWhile isSpace
+-- | @isSpace w = w == 32 || w - 9 <= 4 || w == 0xA0@
+isSpace :: Word8 -> Bool
+{-# INLINE isSpace #-}
+isSpace w = w == 32 || w - 9 <= 4 || w == 0xA0
+
take :: Int -> Parser V.Bytes
{-# INLINE take #-}
-take n
- | n <= 0 = return V.empty -- we use unsafe slice, guard negative n here
- | otherwise =
- Parser (\ k inp ->
- let l = V.length inp
- in if l >= n
- then k (V.unsafeTake n inp) (V.unsafeDrop n inp)
- else Partial (go k (n-l) [inp]))
- where
- go k !n acc inp =
- let l = V.length inp
- in if l >= n
- then
- let !r = V.concat (reverse (V.unsafeTake n inp:acc))
- in k r (V.unsafeDrop n inp)
- else if l == 0
- then Failure inp "Std.Data.Parser.Base.take: Not enough bytes"
- else Partial (go k (n-l) (inp:acc))
+take n = do
+ -- we use unsafe slice, guard negative n here
+ ensureN n' ["Std.Data.Parser.Base.take: not enough bytes"]
+ Parser (\ _ k inp ->
+ let !r = V.unsafeTake n' inp
+ !inp' = V.unsafeDrop n' inp
+ in k r inp')
+ where !n' = max 0 n
-- | Consume input as long as the predicate returns 'False' or reach the end of input,
-- and return the consumed input.
--
takeTill :: (Word8 -> Bool) -> Parser V.Bytes
{-# INLINE takeTill #-}
-takeTill p = Parser (\ k inp ->
+takeTill p = Parser (\ _ k inp ->
let (want, rest) = V.break p inp
in if V.null rest
- then Partial (go k [want])
+ then Partial (takeTillPartial k want)
else k want rest)
where
- go k acc inp =
- if V.null inp
- then k (V.concat (reverse acc)) inp
- else
- let (want, rest) = V.break p inp
- acc' = want : acc
- in if V.null rest
- then Partial (go k acc')
- else k (V.concat (reverse acc')) rest
+ {-# INLINABLE takeTillPartial #-}
+ takeTillPartial k want =
+ let go acc = \ inp ->
+ if V.null inp
+ then let !r = V.concat (reverse acc) in k r inp
+ else
+ let (want, rest) = V.break p inp
+ acc' = want : acc
+ in if V.null rest
+ then Partial (go acc')
+ else let !r = V.concat (reverse acc') in k r rest
+ in go [want]
-- | Consume input as long as the predicate returns 'True' or reach the end of input,
-- and return the consumed input.
--
takeWhile :: (Word8 -> Bool) -> Parser V.Bytes
{-# INLINE takeWhile #-}
-takeWhile p = Parser (\ k inp ->
+takeWhile p = Parser (\ _ k inp ->
let (want, rest) = V.span p inp
in if V.null rest
- then Partial (go k [want])
+ then Partial (takeWhilePartial k want)
else k want rest)
where
- go k acc inp =
- if V.null inp
- then k (V.concat (reverse acc)) inp
- else
- let (want, rest) = V.span p inp
- acc' = want : acc
- in if V.null rest
- then Partial (go k acc')
- else k (V.concat (reverse acc')) rest
+ -- we want to inline p if possible
+ {-# INLINABLE takeWhilePartial #-}
+ takeWhilePartial k want =
+ let go acc = \ inp ->
+ if V.null inp
+ then let !r = V.concat (reverse acc) in k r inp
+ else
+ let (want, rest) = V.span p inp
+ acc' = want : acc
+ in if V.null rest
+ then Partial (go acc')
+ else let !r = V.concat (reverse acc') in k r rest
+ in go [want]
-- | Similar to 'takeWhile', but requires the predicate to succeed on at least one byte
-- of input: it will fail if the predicate never returns 'True' or reach the end of input
@@ -522,8 +634,9 @@ takeWhile1 :: (Word8 -> Bool) -> Parser V.Bytes
{-# INLINE takeWhile1 #-}
takeWhile1 p = do
bs <- takeWhile p
- if V.null bs then fail "Std.Data.Parser.Base.takeWhile1" else return bs
-
+ if V.null bs
+ then fail "Std.Data.Parser.Base.takeWhile1: no satisfied byte"
+ else return bs
-- | @bytes s@ parses a sequence of bytes that identically match @s@.
--
@@ -531,63 +644,26 @@ bytes :: V.Bytes -> Parser ()
{-# INLINE bytes #-}
bytes bs = do
let n = V.length bs
- Parser (\ k inp ->
- let l = V.length inp
- in if l >= n
- then
- if bs == (V.unsafeTake n inp)
- then k () (V.unsafeDrop n inp)
- else Failure inp "Std.Data.Parser.Base.bytes"
- else
- if inp == (V.unsafeTake l bs)
- then Partial (go k (n-l) (V.unsafeDrop l bs))
- else Failure inp "Std.Data.Parser.Base.bytes")
- where
- go k !n !bs inp =
- let l = V.length inp
- in if l >= n
- then
- if bs == (V.unsafeTake n inp)
- then k () (V.unsafeDrop n inp)
- else Failure inp "Std.Data.Parser.Base.bytes"
- else if l == 0
- then Failure inp "Std.Data.Parser.Base.bytes: Not enough bytes"
- else
- if inp == (V.unsafeTake l bs)
- then Partial (go k (n-l) (V.unsafeDrop l bs))
- else Failure inp "Std.Data.Parser.Base.bytes"
+ ensureN n ["Std.Data.Parser.Base.bytes: not enough bytes"]
+ Parser (\ kf k inp ->
+ if bs == V.unsafeTake n inp
+ then k () $! V.unsafeDrop n inp
+ else kf ["Std.Data.Parser.Base.bytes: mismatch bytes"] inp)
+
-- | Same as 'bytes' but ignoring case.
bytesCI :: V.Bytes -> Parser ()
{-# INLINE bytesCI #-}
bytesCI bs = do
- let n = V.length bs'
- Parser (\ k inp ->
- let l = V.length inp
- in if l >= n
- then
- if bs' == CI.foldCase (V.unsafeTake n inp)
- then k () (V.unsafeDrop n inp)
- else Failure inp "Std.Data.Parser.Base.bytesCI"
- else
- if CI.foldCase inp == V.unsafeTake l bs'
- then Partial (go k (n-l) (V.unsafeDrop l bs'))
- else Failure inp "Std.Data.Parser.Base.bytesCI")
+ let n = V.length bs
+ -- casefold an ASCII string should not change it's length
+ ensureN n ["Std.Data.Parser.Base.bytesCI: not enough bytes"]
+ Parser (\ kf k inp ->
+ if bs' == CI.foldCase (V.unsafeTake n inp)
+ then k () $! V.unsafeDrop n inp
+ else kf ["Std.Data.Parser.Base.bytesCI: mismatch bytes"] inp)
where
bs' = CI.foldCase bs
- go k !n !bs inp =
- let l = V.length inp
- in if l >= n
- then
- if bs == CI.foldCase (V.unsafeTake n inp)
- then k () (V.unsafeDrop n inp)
- else Failure inp "Std.Data.Parser.Base.bytesCI"
- else if l == 0
- then Failure inp "Std.Data.Parser.Base.bytesCI: Not enough bytes"
- else
- if CI.foldCase inp == V.unsafeTake l bs
- then Partial (go k (n-l) (V.unsafeDrop l bs))
- else Failure inp "Std.Data.Parser.Base.bytesCI"
-- | @text s@ parses a sequence of UTF8 bytes that identically match @s@.
--
diff --git a/Std/Data/Parser/Numeric.hs b/Std/Data/Parser/Numeric.hs
index 57ba92e..e42a8e2 100644
--- a/Std/Data/Parser/Numeric.hs
+++ b/Std/Data/Parser/Numeric.hs
@@ -1,4 +1,7 @@
{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE CPP #-}
{-|
Module : Std.Data.Parser.Numeric
@@ -14,37 +17,53 @@ Textual numeric parsers.
-}
module Std.Data.Parser.Numeric
- ( -- * decimal
+ ( -- * Decimal
uint, int
- -- * hex
+ -- * Hex
, hex
- -- * fractional
+ -- * Fractional
, rational
, float, double
, scientific
, scientifically
+ -- * Stricter fractional(rfc8259)
+ , rational'
+ , float', double'
+ , scientific'
+ , scientifically'
+ -- * Misc
+ , hexLoop
+ , decLoop
+ , decLoopIntegerFast
+ , isHexDigit
+ , isDigit
+ , floatToScientific
+ , doubleToScientific
) where
import Control.Applicative
import Control.Monad
import Data.Bits
import Data.Int
-import qualified Data.Primitive.PrimArray as A
+import qualified Data.Primitive.PrimArray as A
import qualified Data.Scientific as Sci
import Data.Word
-import Data.Word8 (isDigit, isHexDigit)
import Foreign.Ptr (IntPtr)
-import Std.Data.Parser.Base (Parser)
+import qualified Std.Data.Builder.Numeric as B
+import Std.Data.Parser.Base (Parser, (<?>))
import qualified Std.Data.Parser.Base as P
import qualified Std.Data.Vector.Base as V
import qualified Std.Data.Vector.Extra as V
+import Std.IO.Exception
-minus, plus, littleE, bigE, dot :: Word8
-minus = 45
-plus = 43
-littleE = 101
-bigE = 69
-dot = 46
+#define WORD64_MAX_DIGITS_LEN 18
+
+#define PLUS 43
+#define MINUS 45
+#define DOT 46
+#define LITTLE_E 101
+#define BIG_E 69
+#define C_0 48
-- | Parse and decode an unsigned hex number. The hex digits
-- @\'a\'@ through @\'f\'@ may be upper or lower case.
@@ -65,27 +84,31 @@ hex :: (Integral a, Bits a) => Parser a
{-# SPECIALIZE INLINE hex :: Parser Word32 #-}
{-# SPECIALIZE INLINE hex :: Parser Word16 #-}
{-# SPECIALIZE INLINE hex :: Parser Word8 #-}
+{-# SPECIALIZE INLINE hex :: Parser Integer #-}
{-# SPECIALIZE INLINE hex :: Parser IntPtr #-}
-hex = do
- (V.Vec arr s l) <- P.takeWhile1 isHexDigit
- return $! hexLoop arr s (l-1) 0
- where
- hexLoop arr !i !j !acc
- | j == 0 = acc .|. w2iHex (A.indexPrimArray arr i)
- | otherwise =
- let acc' = acc .|. w2iHex (A.indexPrimArray arr i) `unsafeShiftL` (j*4)
- in hexLoop arr (i+1) (j-1) acc'
+hex = "Std.Data.Parser.Numeric.hex" <?> hexLoop 0 <$> P.takeWhile1 isHexDigit
-w2iHex :: (Integral a) => Word8 -> a
-{-# INLINE w2iHex #-}
-w2iHex w
- | w <= 57 = fromIntegral w - 48
- | 65 <= w && w <= 70 = fromIntegral w - 55
- | 97 <= w && w <= 102 = fromIntegral w - 87
+-- | decode hex digits sequence within an array.
+hexLoop :: (Integral a, Bits a)
+ => a -- ^ accumulator, usually start from 0
+ -> V.Bytes
+ -> a
+{-# INLINE hexLoop #-}
+hexLoop = V.foldl' step
+ where
+ step a w = a `unsafeShiftL` 4 + fromIntegral (w2iHex w)
+ w2iHex w
+ | w <= 57 = w - 48
+ | w <= 70 = w - 55
+ | w <= 102 = w - 87
+-- | A fast digit predicate.
+isHexDigit :: Word8 -> Bool
+{-# INLINE isHexDigit #-}
+isHexDigit w = w - 48 <= 9 || w - 65 <= 5 || w - 97 <= 5
-- | Parse and decode an unsigned decimal number.
-uint :: Integral a => Parser a
+uint :: (Integral a) => Parser a
{-# INLINE uint #-}
{-# SPECIALIZE INLINE uint :: Parser Int #-}
{-# SPECIALIZE INLINE uint :: Parser Int64 #-}
@@ -97,23 +120,35 @@ uint :: Integral a => Parser a
{-# SPECIALIZE INLINE uint :: Parser Word32 #-}
{-# SPECIALIZE INLINE uint :: Parser Word16 #-}
{-# SPECIALIZE INLINE uint :: Parser Word8 #-}
-uint = do
- (V.Vec arr s l) <- P.takeWhile1 isDigit
- return $! decLoop arr s (l-1) 0
- where
- decLoop arr !i !j !acc
- | j == 0 = acc*10 + w2iDec (A.indexPrimArray arr i)
- | otherwise =
- let acc' = acc*10 + w2iDec (A.indexPrimArray arr i)
- in decLoop arr (i+1) (j-1) acc'
+{-# SPECIALIZE INLINE uint :: Parser Integer #-}
+uint = "Std.Data.Parser.Numeric.uint" <?> decLoop 0 <$> P.takeWhile1 isDigit
+
+-- | decode digits sequence within an array.
+decLoop :: Integral a
+ => a -- ^ accumulator, usually start from 0
+ -> V.Bytes
+ -> a
+{-# INLINE decLoop #-}
+decLoop a bs@(V.PrimVector arr s l) = V.foldl' step a bs
+ where step a w = a * 10 + fromIntegral (w - 48)
+
+-- | decode digits sequence within an array.
+--
+-- A fast version to decode 'Integer' using machine word as much as possible.
+decLoopIntegerFast :: V.Bytes -> Integer
+{-# INLINE decLoopIntegerFast #-}
+decLoopIntegerFast bs
+ | V.length bs <= WORD64_MAX_DIGITS_LEN = fromIntegral (decLoop @Word64 0 bs)
+ | otherwise = decLoop @Integer 0 bs
-w2iDec :: (Integral a) => Word8 -> a
-{-# INLINE w2iDec #-}
-w2iDec w = fromIntegral w - 48
+-- | A fast digit predicate.
+isDigit :: Word8 -> Bool
+isDigit w = w - 48 <= 9
+{-# INLINE isDigit #-}
-- | Parse a decimal number with an optional leading @\'+\'@ or @\'-\'@ sign
-- character.
-int :: Integral a => Parser a
+int :: (Integral a) => Parser a
{-# INLINE int #-}
{-# SPECIALIZE INLINE int :: Parser Int #-}
{-# SPECIALIZE INLINE int :: Parser Int64 #-}
@@ -125,11 +160,15 @@ int :: Integral a => Parser a
{-# SPECIALIZE INLINE int :: Parser Word32 #-}
{-# SPECIALIZE INLINE int :: Parser Word16 #-}
{-# SPECIALIZE INLINE int :: Parser Word8 #-}
-int = do
+{-# SPECIALIZE INLINE int :: Parser Integer #-}
+int = "Std.Data.Parser.Numeric.int" <?> do
w <- P.peek
- if w == minus
- then P.skip 1 >> negate <$> uint
- else if w == plus then P.skip 1 >> uint else uint
+ if w == MINUS
+ then P.skipWord8 *> (negate <$> uint')
+ else if w == PLUS then P.skipWord8 *> uint' else uint'
+ where
+ -- strip uint's message
+ uint' = decLoop 0 <$> P.takeWhile1 isDigit
-- | Parse a rational number.
--
@@ -143,9 +182,9 @@ int = do
-- In most cases, it is better to use 'double' or 'scientific'
-- instead.
--
-rational :: Fractional a => Parser a
+rational :: (Fractional a) => Parser a
{-# INLINE rational #-}
-rational = scientifically realToFrac
+rational = "Std.Data.Parser.Numeric.rational" <?> scientificallyInternal realToFrac
-- | Parse a rational number and round to 'Double'.
--
@@ -156,32 +195,33 @@ rational = scientifically realToFrac
--
-- Examples with behaviour identical to 'read':
--
--- >parseOnly double "3" == Right ("",1,3.0)
--- >parseOnly double "3.1" == Right ("",3,3.1)
--- >parseOnly double "3e4" == Right ("",3,30000.0)
--- >parseOnly double "3.1e4" == Right ("",5,31000.0)
+-- >parse_ double "3" == ("", Right 3.0)
+-- >parse_ double "3.1" == ("", Right 3.1)
+-- >parse_ double "3e4" == ("", Right 30000.0)
+-- >parse_ double "3.1e4" == ("", Right 31000.0)
--
--- >parseOnly double ".3" == Left (".3",0,"takeWhile1")
--- >parseOnly double "e3" == Left ("e3",0,"takeWhile1")
+-- >parse_ double ".3" == (".3", Left ParserError)
+-- >parse_ double "e3" == ("e3", Left ParserError)
--
-- Examples of differences from 'read':
--
--- >parseOnly double "3.foo" == Right (".foo",1,3.0)
--- >parseOnly double "3e" == Right ("e",1,3.0)
+-- >parse_ double "3.foo" == (".foo", Right 3.0)
+-- >parse_ double "3e" == ("e", Right 3.0)
+-- >parse_ double "-3e" == ("e", Right -3.0)
--
-- This function does not accept string representations of \"NaN\" or
-- \"Infinity\".
--
double :: Parser Double
{-# INLINE double #-}
-double = scientifically Sci.toRealFloat
+double = "Std.Data.Parser.Numeric.double" <?> scientificallyInternal Sci.toRealFloat
-- | Parse a rational number and round to 'Float'.
--
-- Single precision version of 'double'.
float :: Parser Float
{-# INLINE float #-}
-float = scientifically Sci.toRealFloat
+float = "Std.Data.Parser.Numeric.float" <?> scientificallyInternal Sci.toRealFloat
-- | Parse a scientific number.
--
@@ -189,33 +229,172 @@ float = scientifically Sci.toRealFloat
--
scientific :: Parser Sci.Scientific
{-# INLINE scientific #-}
-scientific = scientifically id
+scientific = "Std.Data.Parser.Numeric.scientific" <?> scientificallyInternal id
-- | Parse a scientific number and convert to result using a user supply function.
--
-- The syntax accepted by this parser is the same as for 'double'.
---
scientifically :: (Sci.Scientific -> a) -> Parser a
{-# INLINE scientifically #-}
-scientifically h = do
- sign <- P.peek
- when (sign == plus || sign == minus) (P.skip 1)
- intPart <- uint
- sci <- (do (V.Vec arr s l) <- P.word8 dot >> P.takeWhile1 isDigit
- let intPart' = intPart * (10 ^ l)
- fracPart = decLoop arr s (l-1) 0
- parseE (intPart' + fracPart) l
- ) <|> (parseE intPart 0)
-
- if sign /= minus then return $! h sci else return $! h (negate sci)
+scientifically h = "Std.Data.Parser.Numeric.scientifically" <?> scientificallyInternal h
+
+-- | Strip message version.
+scientificallyInternal :: (Sci.Scientific -> a) -> Parser a
+{-# INLINE scientificallyInternal #-}
+scientificallyInternal h = do
+ !sign <- P.peek
+ when (sign == PLUS || sign == MINUS) (P.skipWord8)
+ !intPart <- P.takeWhile1 isDigit
+ -- backtrack here is neccessary to avoid eating extra dot or e
+ -- attoparsec is doing it wrong here: https://github.com/bos/attoparsec/issues/112
+ !sci <- (do
+ -- during number parsing we want to use machine word as much as possible
+ -- so as long as range permit, we use Word64 instead of final Integer
+ !fracPart <- P.word8 DOT *> P.takeWhile1 isDigit
+ let !ilen = V.length intPart
+ !flen = V.length fracPart
+ !base =
+ if ilen + flen <= WORD64_MAX_DIGITS_LEN
+ then fromIntegral (decLoop @Word64 (decLoop @Word64 0 intPart) fracPart)
+ else
+ let int = decLoopIntegerFast intPart
+ frac = decLoopIntegerFast fracPart
+ in int * 10 ^ flen + frac
+ parseE base flen) <|> (parseE (decLoopIntegerFast intPart) 0)
+
+ pure $! if sign /= MINUS then h sci else h (negate sci)
where
{-# INLINE parseE #-}
parseE c e =
- (do _ <- P.satisfy (\w -> w == littleE || w == bigE)
- (Sci.scientific c . (subtract e) <$> int)) <|> return (Sci.scientific c (negate e))
-
- decLoop arr !i !j !acc
- | j == 0 = acc*10 + w2iDec (A.indexPrimArray arr i)
- | otherwise =
- let acc' = acc*10 + w2iDec (A.indexPrimArray arr i)
- in decLoop arr (i+1) (j-1) acc'
+ (do _ <- P.satisfy (\w -> w == LITTLE_E || w == BIG_E)
+ Sci.scientific c . subtract e <$> int) <|> pure (Sci.scientific c (negate e))
+
+--------------------------------------------------------------------------------
+
+-- | Parse a rational number.
+--
+-- The syntax accepted by this parser is the same as for 'double''.
+--
+-- /Note/: this parser is not safe for use with inputs from untrusted
+-- sources. An input with a suitably large exponent such as
+-- @"1e1000000000"@ will cause a huge 'Integer' to be allocated,
+-- resulting in what is effectively a denial-of-service attack.
+--
+-- In most cases, it is better to use 'double'' or 'scientific''
+-- instead.
+--
+rational' :: (Fractional a) => Parser a
+{-# INLINE rational' #-}
+rational' = "Std.Data.Parser.Numeric.rational'" <?> scientificallyInternal' realToFrac
+
+-- | More strict number parsing(rfc8259).
+--
+-- 'scientific' support parse @2314.@ and @21321exyz@ without eating extra dot or @e@ via
+-- backtrack, this is not allowed in some strict grammer such as JSON, so we make an
+-- non-backtrack strict number parser separately using LL(1) lookahead. This parser also
+-- agree with 'read' on extra dot or e handling:
+--
+-- >parse_ double "3.foo" == Left ParseError
+-- >parse_ double "3e" == Left ParseError
+--
+-- Leading zeros or @+@ sign is also not allowed:
+--
+-- >parse_ double "+3.14" == Left ParseError
+-- >parse_ double "0014" == Left ParseError
+--
+-- If you have a similar grammer, you can use this parser to save considerable time.
+--
+-- @
+-- number = [ minus ] int [ frac ] [ exp ]
+-- decimal-point = %x2E ; .
+-- digit1-9 = %x31-39 ; 1-9
+-- e = %x65 / %x45 ; e E
+-- exp = e [ minus / plus ] 1*DIGIT
+-- frac = decimal-point 1*DIGIT
+-- @
+--
+-- This function does not accept string representations of \"NaN\" or
+-- \"Infinity\".
+-- reference: https://tools.ietf.org/html/rfc8259#section-6
+double' :: Parser Double
+{-# INLINE double' #-}
+double' = "Std.Data.Parser.Numeric.double'" <?> scientificallyInternal' Sci.toRealFloat
+
+-- | Parse a rational number and round to 'Float' using stricter grammer.
+--
+-- Single precision version of 'double''.
+float' :: Parser Float
+{-# INLINE float' #-}
+float' = "Std.Data.Parser.Numeric.float'" <?> scientificallyInternal' Sci.toRealFloat
+
+-- | Parse a scientific number.
+--
+-- The syntax accepted by this parser is the same as for 'double''.
+scientific' :: Parser Sci.Scientific
+{-# INLINE scientific' #-}
+scientific' = "Std.Data.Parser.Numeric.scientific'" <?> scientificallyInternal' id
+
+-- | Parse a scientific number and convert to result using a user supply function.
+--
+-- The syntax accepted by this parser is the same as for 'double''.
+scientifically' :: (Sci.Scientific -> a) -> P.Parser a
+{-# INLINE scientifically' #-}
+scientifically' h = "Std.Data.Parser.Numeric.scientifically'" <?> scientificallyInternal' h
+
+-- | Strip message version of scientifically'.
+scientificallyInternal' :: (Sci.Scientific -> a) -> P.Parser a
+{-# INLINE scientificallyInternal' #-}
+scientificallyInternal' h = do
+ !sign <- P.peek
+ when (sign == MINUS) (P.skipWord8) -- no leading plus is allowed
+ !intPart <- P.takeWhile1 isDigit
+ when (V.length intPart > 1 && V.head intPart == C_0) (fail "leading zeros are not allowed")
+ mdot <- P.peekMaybe
+ !sci <- case mdot of
+ Just DOT -> do
+ !fracPart <- P.skipWord8 *> P.takeWhile1 isDigit
+ -- during number parsing we want to use machine word as much as possible
+ -- so as long as range permit, we use Word64 instead of final Integer
+ let !ilen = V.length intPart
+ !flen = V.length fracPart
+ !base =
+ if ilen + flen <= WORD64_MAX_DIGITS_LEN
+ then fromIntegral (decLoop @Word64 (decLoop @Word64 0 intPart) fracPart)
+ else
+ let int = decLoopIntegerFast intPart
+ frac = decLoopIntegerFast fracPart
+ in int * 10 ^ flen + frac
+ parseE base flen
+ _ -> parseE (decLoopIntegerFast intPart) 0
+ pure $! if sign /= MINUS then h sci else h (negate sci)
+ where
+ {-# INLINE parseE #-}
+ parseE !c !exp = do
+ me <- P.peekMaybe
+ exp' <- case me of
+ Just e | e == LITTLE_E || e == BIG_E -> P.skipWord8 *> int
+ _ -> pure 0
+ pure $! Sci.scientific c (exp' - exp)
+
+--------------------------------------------------------------------------------
+
+floatToScientific :: Float -> Sci.Scientific
+{-# INLINE floatToScientific #-}
+floatToScientific rf | rf < 0 = -(fromFloatingDigits (B.grisu3_sp (-rf)))
+ | rf == 0 = 0
+ | otherwise = fromFloatingDigits (B.grisu3_sp rf)
+
+doubleToScientific :: Double -> Sci.Scientific
+{-# INLINE doubleToScientific #-}
+doubleToScientific rf | rf < 0 = -(fromFloatingDigits (B.grisu3 (-rf)))
+ | rf == 0 = 0
+ | otherwise = fromFloatingDigits (B.grisu3 rf)
+
+fromFloatingDigits :: ([Int], Int) -> Sci.Scientific
+{-# INLINE fromFloatingDigits #-}
+fromFloatingDigits (digits, e) = go digits 0 0
+ where
+ -- There's no way a float or double has more digits a 'Int64' can't handle
+ go :: [Int] -> Int64 -> Int -> Sci.Scientific
+ go [] !c !n = Sci.scientific (fromIntegral c) (e - n)
+ go (d:ds) !c !n = go ds (c * 10 + fromIntegral d) (n + 1)
diff --git a/Std/Data/PrimArray/BitTwiddle.hs b/Std/Data/PrimArray/BitTwiddle.hs
index e18b5ad..d556746 100644
--- a/Std/Data/PrimArray/BitTwiddle.hs
+++ b/Std/Data/PrimArray/BitTwiddle.hs
@@ -120,7 +120,7 @@ memchr# ba# c# s# siz# = beforeAlignedLoop# ba# c# s# (s# +# siz#)
-- | Search a word8 array in reverse order.
--
--- This function is used in @elemIndexEnd@, since there's no c equivalent.
+-- This function is used in @elemIndexEnd@, since there's no c equivalent (memrchr) on OSX.
--
memchrReverse :: PrimArray Word8 -- array
-> Word8 -- target
diff --git a/Std/Data/PrimArray/Cast.hs b/Std/Data/PrimArray/Cast.hs
index 24798ef..de2a820 100644
--- a/Std/Data/PrimArray/Cast.hs
+++ b/Std/Data/PrimArray/Cast.hs
@@ -37,7 +37,7 @@ import Data.Coerce
class Cast source destination where
cast :: source -> destination
-instance {-# OVERLAPPABLE #-} Coercible a b => Cast a b where
+instance {-# INCOHERENT #-} Coercible a b => Cast a b where
cast = coerce
instance Cast Int8 Word8 where
diff --git a/Std/Data/Text/Base.hs b/Std/Data/Text/Base.hs
index ee1ee46..006ee93 100644
--- a/Std/Data/Text/Base.hs
+++ b/Std/Data/Text/Base.hs
@@ -4,6 +4,7 @@
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE CPP #-}
{-|
@@ -125,7 +126,9 @@ import Data.Char hiding (toLower, toUpper, toTitle)
import Data.Foldable (foldlM)
import Data.Hashable (Hashable(..))
import qualified Data.List as List
+import Data.Monoid (Monoid (..))
import Data.Primitive.PrimArray
+import Data.Semigroup (Semigroup ((<>)))
import Data.Typeable
import Data.String (IsString(..))
import Data.Word
@@ -134,7 +137,7 @@ import GHC.Exts (build)
import GHC.Ptr
import GHC.Types
import GHC.Stack
-import GHC.CString (unpackCString#)
+import GHC.CString (unpackCString#, unpackCStringUtf8#)
import Std.Data.Array
import Std.Data.Text.UTF8Codec
import Std.Data.Text.UTF8Rewind
@@ -151,11 +154,13 @@ import Prelude hiding (concat, concatMap,
maximum, minimum, product, sum,
all, any, replicate, traverse)
+import Test.QuickCheck.Arbitrary (Arbitrary(..), CoArbitrary(..))
+
-- | 'Text' represented as UTF-8 encoded 'Bytes'
--
newtype Text = Text
{ getUTF8Bytes :: Bytes -- ^ Extract UTF-8 encoded 'Bytes' from 'Text'
- }
+ } deriving (Semigroup, Monoid)
instance Eq Text where
Text b1 == Text b2 = b1 == b2
@@ -174,6 +179,13 @@ instance Read Text where
instance NFData Text where
rnf (Text bs) = rnf bs
+instance Arbitrary Text where
+ arbitrary = pack <$> arbitrary
+ shrink a = pack <$> shrink (unpack a)
+
+instance CoArbitrary Text where
+ coarbitrary = coarbitrary . unpack
+
instance Hashable Text where
{-# INLINE hashWithSalt #-}
hashWithSalt salt (Text bs) = hashWithSalt salt bs
@@ -182,14 +194,25 @@ instance IsString Text where
{-# INLINE fromString #-}
fromString = pack
-packStringAddr :: Addr# -> Text
-{-# INLINABLE packStringAddr #-}
-packStringAddr addr# = validateAndCopy addr#
+packASCIIAddr :: Addr# -> Text
+packASCIIAddr addr# = copy addr#
+ where
+ len = fromIntegral . unsafeDupablePerformIO $ c_strlen addr#
+ copy addr# = runST $ do
+ marr <- newPrimArray len
+ copyPtrToMutablePrimArray marr 0 (Ptr addr#) len
+ arr <- unsafeFreezePrimArray marr
+ return $ Text (PrimVector arr 0 len)
+
+packUTF8Addr :: Addr# -> Text
+packUTF8Addr addr# = validateAndCopy addr#
where
len = fromIntegral . unsafeDupablePerformIO $ c_strlen addr#
valid = unsafeDupablePerformIO $ c_utf8_validate_addr addr# len
validateAndCopy addr#
- | valid == 0 = pack (unpackCString# addr#)
+ | valid == 0 = packN len (unpackCStringUtf8# addr#) -- three bytes surrogate -> three bytes replacement
+ -- two bytes NUL -> \NUL
+ -- the result's length will either smaller or equal
| otherwise = runST $ do
marr <- newPrimArray len
copyPtrToMutablePrimArray marr 0 (Ptr addr#) len
@@ -286,13 +309,12 @@ foreign import ccall unsafe "text.h utf8_validate_addr"
-- | /O(n)/ Convert a string into a text
--
--- Alias for @'packN' 'defaultInitSize'@.
+-- Alias for @'packN' 'defaultInitSize'@, will be rewritten to a memcpy if possible.
pack :: String -> Text
pack = packN V.defaultInitSize
-{-# INLINE CONLIKE [1] pack #-}
-{-# RULES
- "pack/packStringAddr" forall addr . pack (unpackCString# addr) = packStringAddr addr
- #-}
+{-# INLINE CONLIKE [0] pack #-}
+{-# RULES "pack/packASCIIAddr" forall addr . pack (unpackCString# addr) = packASCIIAddr addr #-}
+{-# RULES "pack/packUTF8Addr" forall addr . pack (unpackCStringUtf8# addr) = packUTF8Addr addr #-}
-- | /O(n)/ Convert a list into a text with an approximate size(in bytes, not codepoints).
--
diff --git a/Std/Data/TextBuilder.hs b/Std/Data/TextBuilder.hs
index f815e99..cb10ed1 100644
--- a/Std/Data/TextBuilder.hs
+++ b/Std/Data/TextBuilder.hs
@@ -1,11 +1,20 @@
{-# LANGUAGE CPP #-}
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE DerivingStrategies #-}
+{-# LANGUAGE DeriveAnyClass #-}
{-|
-Module : Std.Data.Builder.Numeric
+Module : Std.Data.TextBuilder
Description : UTF8 compatible builders.
Copyright : (c) Dong Han, 2017-2019
License : BSD
@@ -13,14 +22,23 @@ Maintainer : winterland1989@gmail.com
Stability : experimental
Portability : non-portable
-UTF8 compatible textual builders.
+Base on UTF8 compatible textual builders from 'Std.Data.Builder', we provide a newtype wrapper
+'TextBuilder' which can be directly used to build 'Text'.
+
+We also provide faster alternative to 'Show' class, i.e. 'ToText', which also provides 'Generic'
+based instances deriving.
-}
module Std.Data.TextBuilder
- (
+ ( -- * ToText class
+ ToText(..), toText, toBuilder, toBytes, toString
+ -- * Str newtype
+ , Str(..)
-- * Textual Builder
- TextBuilder(..)
+ , TextBuilder
+ , getBuilder
+ , unsafeFromBuilder
, buildText
-- * Basic UTF8 builders
, stringUTF8, charUTF8, string7, char7, text
@@ -42,40 +60,89 @@ module Std.Data.TextBuilder
, floatWith
, scientific
, scientificWith
+ -- * Builder helpers
+ , paren, parenWhen, curly, square, angle, quotes, squotes, colon, comma, intercalateVec, intercalateList
) where
+import Control.Monad
import qualified Data.Scientific as Sci
import Data.String
import Data.Bits
-import qualified Std.Data.Builder.Base as B
+import Data.Data (Data(..))
+import Data.Fixed
+import Data.Functor.Compose
+import Data.Functor.Const
+import Data.Functor.Identity
+import Data.Functor.Product
+import Data.Functor.Sum
+import Data.Int
+import Data.List.NonEmpty (NonEmpty (..))
+import qualified Data.List.NonEmpty as NonEmpty
+import qualified Data.Monoid as Monoid
+import Data.Proxy (Proxy (..))
+import Data.Ratio (Ratio, (%), numerator, denominator)
+import Data.Tagged (Tagged (..))
+import Data.Word
+import qualified Data.Semigroup as Semigroup
+import Data.Typeable
+import GHC.Natural
+import GHC.Generics
+import Data.Version
+import Data.Primitive.Types
+import qualified Std.Data.Builder as B
import qualified Std.Data.Builder.Numeric as B
-import Std.Data.Text.Base (Text (..))
+import qualified Std.Data.Text.Base as T
+import Std.Data.Text.Base (Text(..))
+import Std.Data.Generics.Utils
+import qualified Std.Data.Vector.Base as V
+import Text.Read (Read(..))
+import Test.QuickCheck.Arbitrary (Arbitrary(..), CoArbitrary(..))
-- | Buidlers which guarantee UTF-8 encoding, thus can be used to build
-- text directly.
--
--- Notes on 'IsString' instance: It's recommended to use 'IsString' instance instead of 'stringUTF8'
--- or 'string7' since there's a rewrite rule to turn encoding loop into a memcpy, which is much faster.
---
--- Different from 'Builder ()', 'TextBuilder ()''s 'IsString' instance will gives desired UTF8 guarantees:
+-- Notes on 'IsString' instance: It's recommended to use 'IsString' instance, there's a rewrite rule to
+-- turn encoding loop into a memcpy, which is much faster (the same rule also apply to 'stringUTF8').
+-- Different from @Builder ()@, @TextBuilder ()@'s 'IsString' instance will give you desired UTF8 guarantees:
--
--- * "\NUL" will be written directly as @\x00@.
+-- * @\NUL@ will be written directly as @\x00@.
--
-- * @\xD800@ ~ @\xDFFF@ will be replaced by replacement char.
--
-newtype TextBuilder a = TextBuilder { toBuilder :: B.Builder a }
- deriving (Functor, Applicative, Monad)
+newtype TextBuilder a = TextBuilder { getBuilder :: B.Builder a }
+ deriving newtype (Functor, Applicative, Monad)
+
+deriving newtype instance Semigroup (TextBuilder ())
+deriving newtype instance Monoid (TextBuilder ())
instance (a ~ ()) => IsString (TextBuilder a) where
{-# INLINE fromString #-}
- fromString = stringUTF8
+ fromString = TextBuilder <$> B.stringUTF8
+
+instance Arbitrary (TextBuilder ()) where
+ arbitrary = TextBuilder . B.text <$> arbitrary
+ shrink b = TextBuilder . B.text <$> shrink (buildText b)
+
+instance CoArbitrary (TextBuilder ()) where
+ coarbitrary = coarbitrary . buildText
-deriving instance Semigroup (TextBuilder ())
-deriving instance Monoid (TextBuilder ())
+instance Show (TextBuilder a) where
+ show = show . buildText
+instance ToText (TextBuilder a) where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ b = quotes (void b)
+
+-- | Build a 'Text' using 'TextBuilder', which provide UTF-8 encoding guarantee.
buildText :: TextBuilder a -> Text
{-# INLINE buildText #-}
-buildText = Text . B.buildBytes . toBuilder
+buildText = Text . B.buildBytes . getBuilder
+
+-- | Unsafely turn a 'B.Builder' into 'TextBuilder', thus it's user's responsibility to
+-- ensure only UTF-8 complied bytes are written.
+unsafeFromBuilder :: B.Builder a -> TextBuilder a
+{-# INLINE unsafeFromBuilder #-}
+unsafeFromBuilder = TextBuilder
--------------------------------------------------------------------------------
@@ -107,12 +174,12 @@ char7 :: Char -> TextBuilder ()
{-# INLINE char7 #-}
char7 = TextBuilder . B.char7
--- | Write UTF8 encoded 'Text' using 'Builder'.
+-- | Write UTF8 encoded 'T.Text' using 'Builder'.
--
-- Note, if you're trying to write string literals builders,
-- please open 'OverloadedStrings' and use 'Builder's 'IsString' instance,
-- it will be rewritten into a memcpy.
-text :: Text -> TextBuilder ()
+text :: T.Text -> TextBuilder ()
{-# INLINE text #-}
text = TextBuilder . B.text
@@ -195,3 +262,333 @@ scientificWith :: B.FFormat
-> TextBuilder ()
{-# INLINE scientificWith #-}
scientificWith fmt ds x = TextBuilder (B.scientificWith fmt ds x)
+
+--------------------------------------------------------------------------------
+
+-- | add @(...)@ to original builder.
+paren :: TextBuilder () -> TextBuilder ()
+{-# INLINE paren #-}
+paren (TextBuilder b) = TextBuilder (B.paren b)
+
+-- | Add "(..)" around builders when condition is met, otherwise add nothing.
+--
+-- This is useful when defining 'ToText' instances.
+parenWhen :: Bool -> TextBuilder () -> TextBuilder ()
+{-# INLINE parenWhen #-}
+parenWhen True b = paren b
+parenWhen _ b = b
+
+-- | add @{...}@ to original builder.
+curly :: TextBuilder () -> TextBuilder ()
+{-# INLINE curly #-}
+curly (TextBuilder b) = TextBuilder (B.curly b)
+
+-- | add @[...]@ to original builder.
+square :: TextBuilder () -> TextBuilder ()
+{-# INLINE square #-}
+square (TextBuilder b) = TextBuilder (B.square b)
+
+-- | add @<...>@ to original builder.
+angle :: TextBuilder () -> TextBuilder ()
+{-# INLINE angle #-}
+angle (TextBuilder b) = TextBuilder (B.angle b)
+
+-- | add @"..."@ to original builder.
+quotes :: TextBuilder () -> TextBuilder ()
+{-# INLINE quotes #-}
+quotes (TextBuilder b) = TextBuilder (B.quotes b)
+
+-- | add @'...'@ to original builder.
+squotes :: TextBuilder () -> TextBuilder ()
+{-# INLINE squotes #-}
+squotes (TextBuilder b) = TextBuilder (B.squotes b)
+
+-- | write an ASCII @:@
+colon :: TextBuilder ()
+{-# INLINE colon #-}
+colon = TextBuilder B.colon
+
+-- | write an ASCII @,@
+comma :: TextBuilder ()
+{-# INLINE comma #-}
+comma = TextBuilder B.comma
+
+-- | Use separator to connect a vector of builders.
+intercalateVec :: (V.Vec v a)
+ => TextBuilder () -- ^ the seperator
+ -> (a -> TextBuilder ()) -- ^ value formatter
+ -> v a -- ^ value list
+ -> TextBuilder ()
+{-# INLINE intercalateVec #-}
+intercalateVec (TextBuilder s) f = TextBuilder . B.intercalateVec s (getBuilder . f)
+
+-- | Use separator to connect a list of builders.
+intercalateList :: TextBuilder () -- ^ the seperator
+ -> (a -> TextBuilder ()) -- ^ value formatter
+ -> [a] -- ^ value vector
+ -> TextBuilder ()
+{-# INLINE intercalateList #-}
+intercalateList (TextBuilder s) f = TextBuilder . B.intercalateList s (getBuilder . f)
+
+--------------------------------------------------------------------------------
+-- | Newtype wrapper for @[Char]@ to provide textual instances.
+--
+-- To encourage using 'Text' as the textual representation, we didn't provide special
+-- treatment to differentiate instances between @[a]@ and @[Char]@ in various places.
+-- This newtype is therefore to provide instances similar to @T.Text@, in case you really
+-- need to wrap a 'String'.
+newtype Str = Str { chrs :: [Char] } deriving stock (Eq, Ord, Data, Typeable, Generic)
+
+instance Show Str where show = show . chrs
+instance Read Str where readPrec = Str <$> readPrec
+
+instance ToText Str where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ = TextBuilder . B.string8 . show
+
+--------------------------------------------------------------------------------
+-- Data types
+--
+-- | A class similar to 'Show', serving the purpose that quickly convert a data type
+-- to a 'Text' value.
+class ToText a where
+ toTextBuilder :: Int -> a -> TextBuilder ()
+ default toTextBuilder :: (Generic a, GToText (Rep a)) => Int -> a -> TextBuilder ()
+ toTextBuilder p = gToTextBuilder p . from
+
+class GToText f where
+ gToTextBuilder :: Int -> f a -> TextBuilder ()
+
+
+class GFieldToText f where
+ gFieldToTextBuilder :: B.Builder () -> Int -> f a -> B.Builder ()
+
+instance (GFieldToText a, GFieldToText b) => GFieldToText (a :*: b) where
+ {-# INLINE gFieldToTextBuilder #-}
+ gFieldToTextBuilder sep p (a :*: b) =
+ gFieldToTextBuilder sep p a >> sep >> gFieldToTextBuilder sep p b
+
+instance (GToText f) => GFieldToText (S1 (MetaSel Nothing u ss ds) f) where
+ {-# INLINE gFieldToTextBuilder #-}
+ gFieldToTextBuilder _ p (M1 x) = getBuilder (gToTextBuilder p x)
+
+instance (GToText f, Selector (MetaSel (Just l) u ss ds)) => GFieldToText (S1 (MetaSel (Just l) u ss ds) f) where
+ {-# INLINE gFieldToTextBuilder #-}
+ gFieldToTextBuilder _ _ m1@(M1 x) =
+ B.stringModifiedUTF8 (selName m1) >> " = " >> getBuilder (gToTextBuilder 0 x)
+
+instance GToText V1 where
+ {-# INLINE gToTextBuilder #-}
+ gToTextBuilder _ = error "Std.Data.TextBuilder: empty data type"
+
+instance (GToText f, GToText g) => GToText (f :+: g) where
+ {-# INLINE gToTextBuilder #-}
+ gToTextBuilder p (L1 x) = gToTextBuilder p x
+ gToTextBuilder p (R1 x) = gToTextBuilder p x
+
+-- | Constructor without payload, convert to String
+instance (Constructor c) => GToText (C1 c U1) where
+ {-# INLINE gToTextBuilder #-}
+ gToTextBuilder _ m1 =
+ TextBuilder . B.stringModifiedUTF8 $ conName m1
+
+-- | Constructor with payloads
+instance (GFieldToText (S1 sc f), Constructor c) => GToText (C1 c (S1 sc f)) where
+ {-# INLINE gToTextBuilder #-}
+ gToTextBuilder p m1@(M1 x) =
+ parenWhen (p > 10) . TextBuilder $ do
+ B.stringModifiedUTF8 $ conName m1
+ B.char8 ' '
+ if conIsRecord m1
+ then B.curly $ gFieldToTextBuilder (B.char7 ',' >> B.char7 ' ') p x
+ else gFieldToTextBuilder (B.char7 ' ') 11 x
+
+instance (GFieldToText (a :*: b), Constructor c) => GToText (C1 c (a :*: b)) where
+ {-# INLINE gToTextBuilder #-}
+ gToTextBuilder p m1@(M1 x) =
+ case conFixity m1 of
+ Prefix -> parenWhen (p > 10) . TextBuilder $ do
+ B.stringModifiedUTF8 $ conName m1
+ B.char8 ' '
+ if conIsRecord m1
+ then B.curly $ gFieldToTextBuilder (B.char7 ',' >> B.char7 ' ') p x
+ else gFieldToTextBuilder (B.char7 ' ') 11 x
+ Infix _ p' -> parenWhen (p > p') . TextBuilder $ do
+ gFieldToTextBuilder
+ (B.char8 ' ' >> B.stringModifiedUTF8 (conName m1) >> B.char8 ' ') (p'+1) x
+
+instance ToText a => GToText (K1 i a) where
+ {-# INLINE gToTextBuilder #-}
+ gToTextBuilder p (K1 x) = toTextBuilder p x
+
+--------------------------------------------------------------------------------
+-- Data types
+instance GToText f => GToText (D1 c f) where
+ {-# INLINE gToTextBuilder #-}
+ gToTextBuilder p (M1 x) = gToTextBuilder p x
+
+
+-- | Directly convert data to 'Text'.
+toText :: ToText a => a -> Text
+{-# INLINE toText #-}
+toText = buildText . toTextBuilder 0
+
+-- | Directly convert data to 'B.Builder'.
+toBuilder :: ToText a => a -> B.Builder ()
+{-# INLINE toBuilder #-}
+toBuilder = getBuilder . toTextBuilder 0
+
+-- | Directly convert data to 'V.Bytes'.
+toBytes :: ToText a => a -> V.Bytes
+{-# INLINE toBytes #-}
+toBytes = B.buildBytes . toBuilder
+
+-- | Faster 'show' replacement.
+toString :: ToText a => a -> String
+{-# INLINE toString #-}
+toString = T.unpack . toText
+
+instance ToText Bool where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ True = TextBuilder "True"
+ toTextBuilder _ _ = TextBuilder "False"
+
+instance ToText Char where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ = TextBuilder . B.string8 . show
+
+instance ToText Double where {{-# INLINE toTextBuilder #-}; toTextBuilder _ = double;}
+instance ToText Float where {{-# INLINE toTextBuilder #-}; toTextBuilder _ = float;}
+
+instance ToText Int where {{-# INLINE toTextBuilder #-}; toTextBuilder _ = int;}
+instance ToText Int8 where {{-# INLINE toTextBuilder #-}; toTextBuilder _ = int;}
+instance ToText Int16 where {{-# INLINE toTextBuilder #-}; toTextBuilder _ = int;}
+instance ToText Int32 where {{-# INLINE toTextBuilder #-}; toTextBuilder _ = int;}
+instance ToText Int64 where {{-# INLINE toTextBuilder #-}; toTextBuilder _ = int;}
+instance ToText Word where {{-# INLINE toTextBuilder #-}; toTextBuilder _ = int;}
+instance ToText Word8 where {{-# INLINE toTextBuilder #-}; toTextBuilder _ = int;}
+instance ToText Word16 where {{-# INLINE toTextBuilder #-}; toTextBuilder _ = int;}
+instance ToText Word32 where {{-# INLINE toTextBuilder #-}; toTextBuilder _ = int;}
+instance ToText Word64 where {{-# INLINE toTextBuilder #-}; toTextBuilder _ = int;}
+
+instance ToText Integer where {{-# INLINE toTextBuilder #-}; toTextBuilder _ = integer;}
+instance ToText Natural where {{-# INLINE toTextBuilder #-}; toTextBuilder _ = integer . fromIntegral}
+instance ToText Ordering where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ GT = TextBuilder "GT"
+ toTextBuilder _ EQ = TextBuilder "EQ"
+ toTextBuilder _ _ = TextBuilder "LT"
+
+instance ToText () where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ () = TextBuilder "()"
+
+instance ToText Version where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ = stringUTF8 . show
+
+-- | To keep sync with 'Show' instance's escaping rule, we reuse show here, so it won't be as fast as memcpy.
+instance ToText Text where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ = stringUTF8 . show
+
+instance ToText Sci.Scientific where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ = scientific
+
+instance ToText a => ToText [a] where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ = square . intercalateList comma (toTextBuilder 0)
+
+instance ToText a => ToText (V.Vector a) where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ = square . intercalateVec comma (toTextBuilder 0)
+
+instance (Prim a, ToText a) => ToText (V.PrimVector a) where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ = square . intercalateVec comma (toTextBuilder 0)
+
+instance (ToText a, ToText b) => ToText (a, b) where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ (a, b) = paren $ toTextBuilder 0 a
+ >> comma >> toTextBuilder 0 b
+
+instance (ToText a, ToText b, ToText c) => ToText (a, b, c) where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ (a, b, c) = paren $ toTextBuilder 0 a
+ >> comma >> toTextBuilder 0 b
+ >> comma >> toTextBuilder 0 c
+
+instance (ToText a, ToText b, ToText c, ToText d) => ToText (a, b, c, d) where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ (a, b, c, d) = paren $ toTextBuilder 0 a
+ >> comma >> toTextBuilder 0 b
+ >> comma >> toTextBuilder 0 c
+ >> comma >> toTextBuilder 0 d
+
+instance (ToText a, ToText b, ToText c, ToText d, ToText e) => ToText (a, b, c, d, e) where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ (a, b, c, d, e) = paren $ toTextBuilder 0 a
+ >> comma >> toTextBuilder 0 b
+ >> comma >> toTextBuilder 0 c
+ >> comma >> toTextBuilder 0 d
+ >> comma >> toTextBuilder 0 e
+
+instance (ToText a, ToText b, ToText c, ToText d, ToText e, ToText f) => ToText (a, b, c, d, e, f) where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ (a, b, c, d, e, f) = paren $ toTextBuilder 0 a
+ >> comma >> toTextBuilder 0 b
+ >> comma >> toTextBuilder 0 c
+ >> comma >> toTextBuilder 0 d
+ >> comma >> toTextBuilder 0 e
+ >> comma >> toTextBuilder 0 f
+
+instance (ToText a, ToText b, ToText c, ToText d, ToText e, ToText f, ToText g) => ToText (a, b, c, d, e, f, g) where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ (a, b, c, d, e, f, g) = paren $ toTextBuilder 0 a
+ >> comma >> toTextBuilder 0 b
+ >> comma >> toTextBuilder 0 c
+ >> comma >> toTextBuilder 0 d
+ >> comma >> toTextBuilder 0 e
+ >> comma >> toTextBuilder 0 f
+ >> comma >> toTextBuilder 0 g
+
+instance ToText a => ToText (Maybe a) where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder p (Just x) = parenWhen (p > 10) $ do TextBuilder "Just "
+ toTextBuilder 11 x
+ toTextBuilder _ _ = TextBuilder "Nothing"
+
+instance (ToText a, ToText b) => ToText (Either a b) where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder p (Left x) = parenWhen (p > 10) $ do TextBuilder "Left "
+ toTextBuilder 11 x
+ toTextBuilder p (Right x) = parenWhen (p > 10) $ do TextBuilder "Right "
+ toTextBuilder 11 x
+
+instance (ToText a, Integral a) => ToText (Ratio a) where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder p r = parenWhen (p > 10) $ do toTextBuilder 8 (numerator r)
+ TextBuilder " % "
+ toTextBuilder 8 (denominator r)
+
+instance HasResolution a => ToText (Fixed a) where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder _ = TextBuilder . B.string8 . show
+
+deriving anyclass instance ToText a => ToText (Semigroup.Min a)
+deriving anyclass instance ToText a => ToText (Semigroup.Max a)
+deriving anyclass instance ToText a => ToText (Semigroup.First a)
+deriving anyclass instance ToText a => ToText (Semigroup.Last a)
+deriving anyclass instance ToText a => ToText (Semigroup.WrappedMonoid a)
+deriving anyclass instance ToText a => ToText (Semigroup.Dual a)
+deriving anyclass instance ToText a => ToText (Monoid.First a)
+deriving anyclass instance ToText a => ToText (Monoid.Last a)
+deriving anyclass instance ToText a => ToText (NonEmpty a)
+deriving anyclass instance ToText a => ToText (Identity a)
+deriving anyclass instance ToText a => ToText (Const a b)
+deriving anyclass instance ToText (Proxy a)
+deriving anyclass instance ToText b => ToText (Tagged a b)
+deriving anyclass instance ToText (f (g a)) => ToText (Compose f g a)
+deriving anyclass instance (ToText (f a), ToText (g a)) => ToText (Product f g a)
+deriving anyclass instance (ToText (f a), ToText (g a), ToText a) => ToText (Sum f g a)
diff --git a/Std/Data/Vector.hs b/Std/Data/Vector.hs
index dcb2cd4..02de745 100644
--- a/Std/Data/Vector.hs
+++ b/Std/Data/Vector.hs
@@ -91,7 +91,7 @@ module Std.Data.Vector (
, null
, length
, append
- , map, map', imap'
+ , map, map', imap', traverseVec, traverseWithIndex, traverseVec_, traverseWithIndex_
, foldl', ifoldl', foldl1', foldl1Maybe'
, foldr', ifoldr', foldr1', foldr1Maybe'
-- ** Special folds
diff --git a/Std/Data/Vector/Base.hs b/Std/Data/Vector/Base.hs
index 95da0dc..a281fe4 100644
--- a/Std/Data/Vector/Base.hs
+++ b/Std/Data/Vector/Base.hs
@@ -56,7 +56,7 @@ module Std.Data.Vector.Base (
, null
, length
, append
- , map, map', imap'
+ , map, map', imap', traverseVec, traverseWithIndex, traverseVec_, traverseWithIndex_
, foldl', ifoldl', foldl1', foldl1Maybe'
, foldr', ifoldr', foldr1', foldr1Maybe'
-- ** Special folds
@@ -79,7 +79,7 @@ module Std.Data.Vector.Base (
-- * Searching by equality
, elem, notElem, elemIndex
-- * Misc
- , IPair(..)
+ , IPair(..), mapIPair'
, defaultInitSize
, chunkOverhead
, defaultChunkSize
@@ -110,7 +110,6 @@ import Data.Hashable.Lifted (Hashable1(..), hashWithSalt1)
import qualified Data.List as List
import Data.Maybe
import Data.Monoid (Monoid (..))
-import Data.Word8 (toLower)
import qualified Data.CaseInsensitive as CI
import Data.Primitive
import Data.Primitive.PrimArray
@@ -133,6 +132,7 @@ import Prelude hiding (concat, concatMap,
foldl, foldl1, foldr, foldr1,
maximum, minimum, product, sum,
all, any, replicate, traverse)
+import Test.QuickCheck.Arbitrary (Arbitrary(..), CoArbitrary(..))
import System.IO.Unsafe (unsafeDupablePerformIO)
import Std.Data.Array
@@ -141,6 +141,8 @@ import Std.Data.PrimArray.Cast
-- | Typeclass for box and unboxed vectors, which are created by slicing arrays.
--
+-- Instead of providing a generalized vector with polymorphric array field, we use this typeclass
+-- so that instances use concrete array type can unpack their array payload.
class (Arr (MArray v) (IArray v) a) => Vec v a where
-- | Vector's mutable array type
type MArray v = (marr :: * -> * -> *) | marr -> v
@@ -277,7 +279,15 @@ instance F.Foldable Vector where
sum = sum
instance T.Traversable Vector where
- traverse = traverse
+ {-# INLINE traverse #-}
+ traverse = traverseVec
+
+instance Arbitrary a => Arbitrary (Vector a) where
+ arbitrary = pack <$> arbitrary
+ shrink v = pack <$> shrink (unpack v)
+
+instance CoArbitrary a => CoArbitrary (Vector a) where
+ coarbitrary = coarbitrary . unpack
instance Hashable a => Hashable (Vector a) where
{-# INLINE hashWithSalt #-}
@@ -292,15 +302,21 @@ instance Hashable1 Vector where
| i >= end = salt
| otherwise = go (h salt (indexArr arr i)) (i+1)
-traverse :: (Vec v a, Vec u b, Applicative f) => (a -> f b) -> v a -> f (u b)
-{-# INLINE [1] traverse #-}
-{-# RULES "traverse/ST" traverse = traverseST #-}
-{-# RULES "traverse/IO" traverse = traverseIO #-}
-traverse f v = packN (length v) <$> T.traverse f (unpack v)
-
-traverseST :: forall v u a b s. (Vec v a, Vec u b) => (a -> ST s b) -> v a -> ST s (u b)
-{-# INLINE traverseST #-}
-traverseST f (Vec arr s l)
+traverseVec :: (Vec v a, Vec u b, Applicative f) => (a -> f b) -> v a -> f (u b)
+{-# INLINE [1] traverseVec #-}
+{-# RULES "traverseVec/ST" forall f. traverseVec f = traverseWithIndexST (const f) #-}
+{-# RULES "traverseVec/IO" forall f. traverseVec f = traverseWithIndexIO (const f) #-}
+traverseVec f v = packN (length v) <$> T.traverse f (unpack v)
+
+traverseWithIndex :: (Vec v a, Vec u b, Applicative f) => (Int -> a -> f b) -> v a -> f (u b)
+{-# INLINE [1] traverseWithIndex #-}
+{-# RULES "traverseWithIndex/ST" traverseWithIndex = traverseWithIndexST #-}
+{-# RULES "traverseWithIndex/IO" traverseWithIndex = traverseWithIndexIO #-}
+traverseWithIndex f v = packN (length v) <$> zipWithM f [0..] (unpack v)
+
+traverseWithIndexST :: forall v u a b s. (Vec v a, Vec u b) => (Int -> a -> ST s b) -> v a -> ST s (u b)
+{-# INLINE traverseWithIndexST #-}
+traverseWithIndexST f (Vec arr s l)
| l == 0 = return empty
| otherwise = do
marr <- newArr l
@@ -312,13 +328,13 @@ traverseST f (Vec arr s l)
go !marr !i
| i >= l = return ()
| otherwise = do
- x <- indexArrM arr i
- writeArr marr i =<< f x
+ x <- indexArrM arr (i+s)
+ writeArr marr i =<< f i x
go marr (i+1)
-traverseIO :: forall v u a b. (Vec v a, Vec u b) => (a -> IO b) -> v a -> IO (u b)
-{-# INLINE traverseIO #-}
-traverseIO f (Vec arr s l)
+traverseWithIndexIO :: forall v u a b. (Vec v a, Vec u b) => (Int -> a -> IO b) -> v a -> IO (u b)
+{-# INLINE traverseWithIndexIO #-}
+traverseWithIndexIO f (Vec arr s l)
| l == 0 = return empty
| otherwise = do
marr <- newArr l
@@ -330,10 +346,23 @@ traverseIO f (Vec arr s l)
go !marr !i
| i >= l = return ()
| otherwise = do
- x <- indexArrM arr i
- writeArr marr i =<< f x
+ x <- indexArrM arr (i+s)
+ writeArr marr i =<< f i x
go marr (i+1)
+traverseVec_ :: (Vec v a, Applicative f) => (a -> f b) -> v a -> f ()
+{-# INLINE traverseVec_ #-}
+traverseVec_ f = traverseWithIndex_ (\ _ x -> f x)
+
+traverseWithIndex_ :: (Vec v a, Applicative f) => (Int -> a -> f b) -> v a -> f ()
+{-# INLINE traverseWithIndex_ #-}
+traverseWithIndex_ f (Vec arr s l) = go s
+ where
+ end = s + l
+ go !i
+ | i >= l = pure ()
+ | otherwise = f (i-s) (indexArr arr i) *> go (i+1)
+
--------------------------------------------------------------------------------
-- | Primitive vector
--
@@ -367,16 +396,16 @@ eqPrimVector (PrimVector (PrimArray baA#) (I# sA#) lA@(I# lA#))
siz@(I# siz#) = sizeOf (undefined :: a)
(I# n#) = min (lA*siz) (lB*siz)
-instance {-# OVERLAPPABLE #-} (Prim a, Ord a) => Ord (PrimVector a) where
+instance (Prim a, Ord a) => Ord (PrimVector a) where
{-# INLINE compare #-}
compare = comparePrimVector
-instance {-# OVERLAPPING #-} Ord (PrimVector Word8) where
- {-# INLINE compare #-}
- compare = compareBytes
comparePrimVector :: (Prim a, Ord a) => PrimVector a -> PrimVector a -> Ordering
-{-# INLINE comparePrimVector #-}
+{-# INLINE [1] comparePrimVector #-}
+{-# RULES
+ "comparePrimVector/Bytes" comparePrimVector = compareBytes
+ #-}
comparePrimVector (PrimVector baA sA lA) (PrimVector baB sB lB)
| baA `sameArr` baB = if sA == sB then lA `compare` lB else go sA sB
| otherwise = go sA sB
@@ -421,20 +450,34 @@ instance (Prim a, Show a) => Show (PrimVector a) where
instance (Prim a, Read a) => Read (PrimVector a) where
readsPrec p str = [ (pack x, y) | (x, y) <- readsPrec p str ]
-instance {-# OVERLAPPABLE #-} (Hashable a, Prim a) => Hashable (PrimVector a) where
+instance (Prim a, Arbitrary a) => Arbitrary (PrimVector a) where
+ arbitrary = pack <$> arbitrary
+ shrink v = pack <$> shrink (unpack v)
+
+instance (Prim a, CoArbitrary a) => CoArbitrary (PrimVector a) where
+ coarbitrary = coarbitrary . unpack
+
+instance (Hashable a, Prim a) => Hashable (PrimVector a) where
{-# INLINE hashWithSalt #-}
+ hashWithSalt = hashWithSaltPrimVector
+
+hashWithSaltPrimVector :: (Hashable a, Prim a) => Int -> PrimVector a -> Int
+{-# INLINE [1] hashWithSaltPrimVector #-}
+{-# RULES
+ "hashWithSaltPrimVector/Bytes" hashWithSaltPrimVector = hashWithSaltBytes
+ #-}
+hashWithSaltPrimVector salt (PrimVector arr s l) = go salt s
+ where
-- we don't do a final hash with length to keep consistent with Bytes's instance
- hashWithSalt salt (PrimVector arr s l) = go salt s
- where
- !end = s + l
- go !salt !i
- | i >= end = salt
- | otherwise = go (hashWithSalt salt (indexPrimArray arr i)) (i+1)
+ !end = s + l
+ go !salt !i
+ | i >= end = salt
+ | otherwise = go (hashWithSalt salt (indexPrimArray arr i)) (i+1)
-instance {-# OVERLAPPING #-} Hashable (PrimVector Word8) where
- {-# INLINE hashWithSalt #-}
- hashWithSalt salt (PrimVector (PrimArray ba#) s l) =
- unsafeDupablePerformIO (c_fnv_hash_ba ba# s l salt)
+hashWithSaltBytes :: Int -> Bytes -> Int
+{-# INLINE hashWithSaltBytes #-}
+hashWithSaltBytes salt (PrimVector (PrimArray ba#) s l) =
+ unsafeDupablePerformIO (c_fnv_hash_ba ba# s l salt)
--------------------------------------------------------------------------------
@@ -447,28 +490,31 @@ instance (a ~ Word8) => IsString (PrimVector a) where
instance CI.FoldCase Bytes where
{-# INLINE foldCase #-}
- foldCase = map toLower
+ foldCase = map toLower8
+ where
+ toLower8 :: Word8 -> Word8
+ toLower8 w
+ | 65 <= w && w <= 90 ||
+ 192 <= w && w <= 214 ||
+ 216 <= w && w <= 222 = w + 32
+ | otherwise = w
packASCII :: String -> Bytes
-{-# INLINE CONLIKE [1] packASCII #-}
+{-# INLINE CONLIKE [0] packASCII #-}
{-# RULES
- "packASCII/packStringAddr" forall addr . packASCII (unpackCString# addr) = packStringAddr addr
+ "packASCII/packASCIIAddr" forall addr . packASCII (unpackCString# addr) = packASCIIAddr addr
#-}
packASCII = pack . fmap (fromIntegral . ord)
-packStringAddr :: Addr# -> Bytes
-{-# INLINABLE packStringAddr #-}
-packStringAddr addr# = validateAndCopy addr#
+packASCIIAddr :: Addr# -> Bytes
+packASCIIAddr addr# = copy addr#
where
len = fromIntegral . unsafeDupablePerformIO $ c_strlen addr#
- valid = unsafeDupablePerformIO $ c_ascii_validate_addr addr# len
- validateAndCopy addr#
- | valid == 0 = pack . fmap (fromIntegral . ord) $ unpackCString# addr#
- | otherwise = runST $ do
- marr <- newPrimArray len
- copyPtrToMutablePrimArray marr 0 (Ptr addr#) len
- arr <- unsafeFreezePrimArray marr
- return (PrimVector arr 0 len)
+ copy addr# = runST $ do
+ marr <- newPrimArray len
+ copyPtrToMutablePrimArray marr 0 (Ptr addr#) len
+ arr <- unsafeFreezePrimArray marr
+ return (PrimVector arr 0 len)
-- | Conversion between 'Word8' and 'Char'. Should compile to a no-op.
--
@@ -1187,7 +1233,35 @@ elemIndexBytes w (PrimVector (PrimArray ba#) s l) =
--------------------------------------------------------------------------------
-- | Index pair type to help GHC unpack in some loops, useful when write fast folds.
-data IPair a = IPair {-# UNPACK #-}!Int a
+data IPair a = IPair { ifst :: {-# UNPACK #-}!Int, isnd :: a } deriving (Show, Eq, Ord)
+
+instance (Arbitrary v) => Arbitrary (IPair v) where
+ arbitrary = iPairFromTuple <$> arbitrary
+ shrink v = iPairFromTuple <$> shrink (iPairToTuple v)
+
+instance (CoArbitrary v) => CoArbitrary (IPair v) where
+ coarbitrary = coarbitrary . iPairToTuple
+
+instance Functor IPair where
+ {-# INLINE fmap #-}
+ fmap f (IPair i v) = IPair i (f v)
+
+instance NFData a => NFData (IPair a) where
+ {-# INLINE rnf #-}
+ rnf (IPair _ a) = rnf a
+
+-- | Unlike 'Functor' instance, this mapping evaluate value inside 'IPair' strictly.
+mapIPair' :: (a -> b) -> IPair a -> IPair b
+{-# INLINE mapIPair' #-}
+mapIPair' f (IPair i v) = let !v' = f v in IPair i (f v)
+
+iPairToTuple :: IPair a -> (Int, a)
+{-# INLINE iPairToTuple #-}
+iPairToTuple (IPair i v) = (i, v)
+
+iPairFromTuple :: (Int, a) -> IPair a
+{-# INLINE iPairFromTuple #-}
+iPairFromTuple (i, v) = IPair i v
-- | The chunk size used for I\/O. Currently set to @32k-chunkOverhead@
defaultChunkSize :: Int
diff --git a/Std/Data/Vector/Extra.hs b/Std/Data/Vector/Extra.hs
index c215fed..adc884b 100644
--- a/Std/Data/Vector/Extra.hs
+++ b/Std/Data/Vector/Extra.hs
@@ -54,12 +54,12 @@ module Std.Data.Vector.Extra (
, tail
, init
, last
- , index
+ , index, indexM
, unsafeHead
, unsafeTail
, unsafeInit
, unsafeLast
- , unsafeIndex
+ , unsafeIndex, unsafeIndexM
, unsafeTake
, unsafeDrop
) where
@@ -268,6 +268,8 @@ dropAround f = dropWhile f . dropWhileR f
-- | /O(n)/ Split the vector into the longest prefix of elements that do not satisfy the predicate and the rest without copying.
+--
+-- @break (==x)@ will be rewritten using a @memchr@.
break :: Vec v a => (a -> Bool) -> v a -> (v a, v a)
{-# INLINE break #-}
break f vs@(Vec arr s l) =
@@ -277,9 +279,13 @@ break f vs@(Vec arr s l) =
in (v1, v2)
-- | /O(n)/ Split the vector into the longest prefix of elements that satisfy the predicate and the rest without copying.
+--
+-- @span (/=x)@ will be rewritten using a @memchr@.
span :: Vec v a => (a -> Bool) -> v a -> (v a, v a)
-{-# INLINE span #-}
+{-# INLINE [1] span #-}
span f = break (not . f)
+{-# RULES "spanNEq/breakEq1" forall w. span (w `neWord8`) = break (w `eqWord8`) #-}
+{-# RULES "spanNEq/breakEq2" forall w. span (`neWord8` w) = break (`eqWord8` w) #-}
-- | 'breakR' behaves like 'break' but from the end of the vector.
--
@@ -782,6 +788,14 @@ index :: (Vec v a, HasCallStack) => v a -> Int -> a
index (Vec arr s l) i | i < 0 || i >= l = errorOutRange i
| otherwise = arr `indexArr` (s + i)
+-- | /O(1)/ Index array element.
+--
+-- Throw 'IndexOutOfVectorRange' if index outside of the vector.
+indexM :: (Vec v a, Monad m, HasCallStack) => v a -> Int -> m a
+{-# INLINE indexM #-}
+indexM (Vec arr s l) i | i < 0 || i >= l = errorOutRange i
+ | otherwise = arr `indexArrM` (s + i)
+
-- | /O(1)/ Extract the first element of a vector.
--
-- Make sure vector is non-empty, otherwise segmentation fault await!
@@ -817,6 +831,13 @@ unsafeIndex :: Vec v a => v a -> Int -> a
{-# INLINE unsafeIndex #-}
unsafeIndex (Vec arr s l) i = indexArr arr (s + i)
+-- | /O(1)/ Index array element.
+--
+-- Make sure index is in bound, otherwise segmentation fault await!
+unsafeIndexM :: (Vec v a, Monad m) => v a -> Int -> m a
+{-# INLINE unsafeIndexM #-}
+unsafeIndexM (Vec arr s l) i = indexArrM arr (s + i)
+
-- | /O(1)/ 'take' @n@, applied to a vector @xs@, returns the prefix
-- of @xs@ of length @n@.
--
diff --git a/Std/Data/Vector/FlatIntMap.hs b/Std/Data/Vector/FlatIntMap.hs
new file mode 100644
index 0000000..df0109b
--- /dev/null
+++ b/Std/Data/Vector/FlatIntMap.hs
@@ -0,0 +1,394 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE PatternSynonyms #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+{-|
+Module : Std.Data.Vector.FlatIntMap
+Description : Fast map based on sorted vector
+Copyright : (c) Dong Han, 2017-2019
+ (c) Tao He, 2018-2019
+License : BSD
+Maintainer : winterland1989@gmail.com
+Stability : experimental
+Portability : non-portable
+
+This module provides a simple int key value map based on sorted vector and binary search. It's particularly
+suitable for small sized key value collections such as deserializing intermediate representation.
+But can also used in various place where insertion and deletion is rare but require fast lookup.
+
+-}
+
+module Std.Data.Vector.FlatIntMap
+ ( -- * FlatIntMap backed by sorted vector
+ FlatIntMap, sortedKeyValues, size, null, empty, map', imap'
+ , pack, packN, packR, packRN
+ , unpack, unpackR, packVector, packVectorR
+ , lookup
+ , delete
+ , insert
+ , adjust'
+ , merge, mergeWithKey'
+ -- * fold and traverse
+ , foldrWithKey, foldrWithKey', foldlWithKey, foldlWithKey', traverseWithKey
+ -- * binary & linear search on vectors
+ , binarySearch
+ , linearSearch, linearSearchR
+ ) where
+
+import Control.DeepSeq
+import Control.Monad
+import Control.Monad.ST
+import qualified Data.Foldable as Foldable
+import qualified Data.Traversable as Traversable
+import qualified Data.Semigroup as Semigroup
+import qualified Data.Monoid as Monoid
+import qualified Data.Primitive.SmallArray as A
+import qualified Std.Data.Vector.Base as V
+import qualified Std.Data.Vector.Sort as V
+import qualified Std.Data.Text as T
+import qualified Std.Data.TextBuilder as T
+import Data.Function (on)
+import Data.Bits (shiftR)
+import Data.Data
+import Data.Typeable
+import Prelude hiding (lookup, null)
+import Test.QuickCheck.Arbitrary (Arbitrary(..), CoArbitrary(..))
+
+--------------------------------------------------------------------------------
+
+newtype FlatIntMap v = FlatIntMap { sortedKeyValues :: V.Vector (V.IPair v) }
+ deriving (Show, Eq, Ord, Typeable)
+
+instance T.ToText v => T.ToText (FlatIntMap v) where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder p (FlatIntMap vec) = T.parenWhen (p > 10) $ do
+ T.unsafeFromBuilder "FlatIntMap {"
+ T.intercalateVec T.comma (\ (V.IPair i v) ->
+ T.toTextBuilder 0 i >> ":" >> T.toTextBuilder 0 v) vec
+ T.char7 '}'
+
+instance (Arbitrary v) => Arbitrary (FlatIntMap v) where
+ arbitrary = pack <$> arbitrary
+ shrink v = pack <$> shrink (unpack v)
+
+instance (CoArbitrary v) => CoArbitrary (FlatIntMap v) where
+ coarbitrary = coarbitrary . unpack
+
+instance Semigroup.Semigroup (FlatIntMap v) where
+ {-# INLINE (<>) #-}
+ (<>) = merge
+
+instance Monoid.Monoid (FlatIntMap v) where
+ {-# INLINE mappend #-}
+ mappend = merge
+ {-# INLINE mempty #-}
+ mempty = empty
+
+instance NFData v => NFData (FlatIntMap v) where
+ {-# INLINE rnf #-}
+ rnf (FlatIntMap ivs) = rnf ivs
+
+instance Functor (FlatIntMap) where
+ {-# INLINE fmap #-}
+ fmap f (FlatIntMap vs) = FlatIntMap (V.map' (fmap f) vs)
+
+instance Foldable.Foldable FlatIntMap where
+ {-# INLINE foldr' #-}
+ foldr' f = foldrWithKey' (const f)
+ {-# INLINE foldr #-}
+ foldr f = foldrWithKey (const f)
+ {-# INLINE foldl' #-}
+ foldl' f = foldlWithKey' (\ a k v -> f a v)
+ {-# INLINE foldl #-}
+ foldl f = foldlWithKey (\ a k v -> f a v)
+ {-# INLINE toList #-}
+ toList = fmap V.isnd . unpack
+ {-# INLINE null #-}
+ null (FlatIntMap vs) = V.null vs
+ {-# INLINE length #-}
+ length (FlatIntMap vs) = V.length vs
+ {-# INLINE elem #-}
+ elem a (FlatIntMap vs) = elem a (map V.isnd $ V.unpack vs)
+
+instance Traversable.Traversable FlatIntMap where
+ {-# INLINE traverse #-}
+ traverse f = traverseWithKey (const f)
+
+size :: FlatIntMap v -> Int
+{-# INLINE size #-}
+size = V.length . sortedKeyValues
+
+null :: FlatIntMap v -> Bool
+{-# INLINE null #-}
+null = V.null . sortedKeyValues
+
+map' :: (v -> v') -> FlatIntMap v -> FlatIntMap v'
+{-# INLINE map' #-}
+map' f (FlatIntMap vs) = FlatIntMap (V.map' (V.mapIPair' f) vs)
+
+imap' :: (Int -> v -> v') -> FlatIntMap v -> FlatIntMap v'
+{-# INLINE imap' #-}
+imap' f (FlatIntMap vs) = FlatIntMap (V.imap' (\ i -> V.mapIPair' (f i)) vs)
+
+-- | /O(1)/ empty flat map.
+empty :: FlatIntMap v
+{-# INLINE empty #-}
+empty = FlatIntMap V.empty
+
+-- | /O(N*logN)/ Pack list of key values, on key duplication prefer left one.
+pack :: [V.IPair v] -> FlatIntMap v
+{-# INLINE pack #-}
+pack kvs = FlatIntMap (V.mergeDupAdjacentLeft ((==) `on` V.ifst) (V.mergeSortBy (compare `on` V.ifst) (V.pack kvs)))
+
+-- | /O(N*logN)/ Pack list of key values with suggested size, on key duplication prefer left one.
+packN :: Int -> [V.IPair v] -> FlatIntMap v
+{-# INLINE packN #-}
+packN n kvs = FlatIntMap (V.mergeDupAdjacentLeft ((==) `on` V.ifst) (V.mergeSortBy (compare `on` V.ifst) (V.packN n kvs)))
+
+-- | /O(N*logN)/ Pack list of key values, on key duplication prefer right one.
+packR :: [V.IPair v] -> FlatIntMap v
+{-# INLINE packR #-}
+packR kvs = FlatIntMap (V.mergeDupAdjacentRight ((==) `on` V.ifst) (V.mergeSortBy (compare `on` V.ifst) (V.pack kvs)))
+
+-- | /O(N*logN)/ Pack list of key values with suggested size, on key duplication prefer right one.
+packRN :: Int -> [V.IPair v] -> FlatIntMap v
+{-# INLINE packRN #-}
+packRN n kvs = FlatIntMap (V.mergeDupAdjacentRight ((==) `on` V.ifst) (V.mergeSortBy (compare `on` V.ifst) (V.packN n kvs)))
+
+-- | /O(N)/ Unpack key value pairs to a list sorted by keys in ascending order.
+--
+-- This function works with @foldr/build@ fusion in base.
+unpack :: FlatIntMap v -> [V.IPair v]
+{-# INLINE unpack #-}
+unpack = V.unpack . sortedKeyValues
+
+-- | /O(N)/ Unpack key value pairs to a list sorted by keys in descending order.
+--
+-- This function works with @foldr/build@ fusion in base.
+unpackR :: FlatIntMap v -> [V.IPair v]
+{-# INLINE unpackR #-}
+unpackR = V.unpackR . sortedKeyValues
+
+-- | /O(N*logN)/ Pack vector of key values, on key duplication prefer left one.
+packVector :: V.Vector (V.IPair v) -> FlatIntMap v
+{-# INLINE packVector #-}
+packVector kvs = FlatIntMap (V.mergeDupAdjacentLeft ((==) `on` V.ifst) (V.mergeSortBy (compare `on` V.ifst) kvs))
+
+-- | /O(N*logN)/ Pack vector of key values, on key duplication prefer right one.
+packVectorR :: V.Vector (V.IPair v) -> FlatIntMap v
+{-# INLINE packVectorR #-}
+packVectorR kvs = FlatIntMap (V.mergeDupAdjacentRight ((==) `on` V.ifst) (V.mergeSortBy (compare `on` V.ifst) kvs))
+
+-- | /O(logN)/ Binary search on flat map.
+lookup :: Int -> FlatIntMap v -> Maybe v
+{-# INLINABLE lookup #-}
+lookup _ (FlatIntMap (V.Vector arr s 0)) = Nothing
+lookup k' (FlatIntMap (V.Vector arr s l)) = go s (s+l-1)
+ where
+ go !s !e
+ | s == e =
+ case arr `A.indexSmallArray` s of (V.IPair k v) | k == k' -> Just v
+ | otherwise -> Nothing
+ | s > e = Nothing
+ | otherwise =
+ let mid = (s+e) `shiftR` 1
+ (V.IPair k v) = arr `A.indexSmallArray` mid
+ in case k' `compare` k of LT -> go s (mid-1)
+ GT -> go (mid+1) e
+ _ -> Just v
+
+-- | /O(N)/ Insert new key value into map, replace old one if key exists.
+insert :: Int -> v -> FlatIntMap v -> FlatIntMap v
+{-# INLINE insert #-}
+insert k v (FlatIntMap vec@(V.Vector arr s l)) =
+ case binarySearch vec k of
+ Left i -> FlatIntMap (V.create (l+1) (\ marr -> do
+ when (i>s) $ A.copySmallArray marr 0 arr s (i-s)
+ A.writeSmallArray marr i (V.IPair k v)
+ when (i<(s+l)) $ A.copySmallArray marr (i+1) arr i (s+l-i)))
+ Right i -> FlatIntMap (V.Vector (runST (do
+ let arr' = A.cloneSmallArray arr s l
+ marr <- A.unsafeThawSmallArray arr'
+ A.writeSmallArray marr i (V.IPair k v)
+ A.unsafeFreezeSmallArray marr)) 0 l)
+
+-- | /O(N)/ Delete a key value pair by key.
+delete :: Int -> FlatIntMap v -> FlatIntMap v
+{-# INLINE delete #-}
+delete k m@(FlatIntMap vec@(V.Vector arr s l)) =
+ case binarySearch vec k of
+ Left i -> m
+ Right i -> FlatIntMap $ V.create (l-1) (\ marr -> do
+ when (i>s) $ A.copySmallArray marr 0 arr s (i-s)
+ let !end = s+l
+ !j = i+1
+ when (end > j) $ A.copySmallArray marr 0 arr j (end-j))
+
+-- | /O(N)/ Modify a value by key.
+--
+-- The value is evaluated to WHNF before writing into map.
+adjust' :: (v -> v) -> Int -> FlatIntMap v -> FlatIntMap v
+{-# INLINE adjust' #-}
+adjust' f k m@(FlatIntMap vec@(V.Vector arr s l)) =
+ case binarySearch vec k of
+ Left i -> m
+ Right i -> FlatIntMap $ V.create l (\ marr -> do
+ A.copySmallArray marr 0 arr s l
+ let !v' = f (V.isnd (A.indexSmallArray arr i))
+ A.writeSmallArray marr i (V.IPair k v'))
+
+-- | /O(n+m)/ Merge two 'FlatIntMap', prefer right value on key duplication.
+merge :: forall v. FlatIntMap v -> FlatIntMap v -> FlatIntMap v
+{-# INLINE merge #-}
+merge fmL@(FlatIntMap (V.Vector arrL sL lL)) fmR@(FlatIntMap (V.Vector arrR sR lR))
+ | null fmL = fmR
+ | null fmR = fmL
+ | otherwise = FlatIntMap (V.createN (lL+lR) (go sL sR 0))
+ where
+ endL = sL + lL
+ endR = sR + lR
+ go :: Int -> Int -> Int -> A.SmallMutableArray s (V.IPair v) -> ST s Int
+ go !i !j !k marr
+ | i >= endL = do
+ A.copySmallArray marr k arrR j (lR-j)
+ return $! k+lR-j
+ | j >= endR = do
+ A.copySmallArray marr k arrL i (lL-i)
+ return $! k+lL-i
+ | otherwise = do
+ kvL@(V.IPair kL vL) <- arrL `A.indexSmallArrayM` i
+ kvR@(V.IPair kR vR) <- arrR `A.indexSmallArrayM` j
+ case kL `compare` kR of LT -> do A.writeSmallArray marr k kvL
+ go (i+1) j (k+1) marr
+ EQ -> do A.writeSmallArray marr k kvR
+ go (i+1) (j+1) (k+1) marr
+ _ -> do A.writeSmallArray marr k kvR
+ go i (j+1) (k+1) marr
+
+-- | /O(n+m)/ Merge two 'FlatIntMap' with a merge function.
+mergeWithKey' :: forall v. (Int -> v -> v -> v) -> FlatIntMap v -> FlatIntMap v -> FlatIntMap v
+{-# INLINABLE mergeWithKey' #-}
+mergeWithKey' f fmL@(FlatIntMap (V.Vector arrL sL lL)) fmR@(FlatIntMap (V.Vector arrR sR lR))
+ | null fmL = fmR
+ | null fmR = fmL
+ | otherwise = FlatIntMap (V.createN (lL+lR) (go sL sR 0))
+ where
+ endL = sL + lL
+ endR = sR + lR
+ go :: Int -> Int -> Int -> A.SmallMutableArray s (V.IPair v) -> ST s Int
+ go !i !j !k marr
+ | i >= endL = do
+ A.copySmallArray marr k arrR j (lR-j)
+ return $! k+lR-j
+ | j >= endR = do
+ A.copySmallArray marr k arrL i (lL-i)
+ return $! k+lL-i
+ | otherwise = do
+ kvL@(V.IPair kL vL) <- arrL `A.indexSmallArrayM` i
+ kvR@(V.IPair kR vR) <- arrR `A.indexSmallArrayM` j
+ case kL `compare` kR of LT -> do A.writeSmallArray marr k kvL
+ go (i+1) j (k+1) marr
+ EQ -> do let !v' = f kL vL vR
+ A.writeSmallArray marr k (V.IPair kL v')
+ go (i+1) (j+1) (k+1) marr
+ _ -> do A.writeSmallArray marr k kvR
+ go i (j+1) (k+1) marr
+
+-- | /O(n)/ Reduce this map by applying a binary operator to all
+-- elements, using the given starting value (typically the
+-- right-identity of the operator).
+--
+-- During folding k is in descending order.
+foldrWithKey :: (Int -> v -> a -> a) -> a -> FlatIntMap v -> a
+{-# INLINE foldrWithKey #-}
+foldrWithKey f a (FlatIntMap vs) = foldr (\ (V.IPair k v) a -> f k v a) a vs
+
+-- | /O(n)/ Reduce this map by applying a binary operator to all
+-- elements, using the given starting value (typically the
+-- right-identity of the operator).
+--
+-- During folding Int is in ascending order.
+foldlWithKey :: (a -> Int -> v -> a) -> a -> FlatIntMap v -> a
+{-# INLINE foldlWithKey #-}
+foldlWithKey f a (FlatIntMap vs) = foldl (\ a' (V.IPair k v) -> f a' k v) a vs
+
+-- | /O(n)/ Reduce this map by applying a binary operator to all
+-- elements, using the given starting value (typically the
+-- right-identity of the operator).
+--
+-- During folding Int is in descending order.
+foldrWithKey' :: (Int -> v -> a -> a) -> a -> FlatIntMap v -> a
+{-# INLINE foldrWithKey' #-}
+foldrWithKey' f a (FlatIntMap vs) = V.foldr' (\ (V.IPair k v) -> f k v) a vs
+
+-- | /O(n)/ Reduce this map by applying a binary operator to all
+-- elements, using the given starting value (typically the
+-- right-identity of the operator).
+--
+-- During folding Int is in ascending order.
+foldlWithKey' :: (a -> Int -> v -> a) -> a -> FlatIntMap v -> a
+{-# INLINE foldlWithKey' #-}
+foldlWithKey' f a (FlatIntMap vs) = V.foldl' (\ a' (V.IPair k v) -> f a' k v) a vs
+
+-- | /O(n)/.
+-- @'traverseWithKey' f s == 'pack' <$> 'traverse' (\(k, v) -> (,) k <$> f k v) ('unpack' m)@
+-- That is, behaves exactly like a regular 'traverse' except that the traversing
+-- function also has access to the key associated with a value.
+traverseWithKey :: Applicative t => (Int -> a -> t b) -> FlatIntMap a -> t (FlatIntMap b)
+{-# INLINE traverseWithKey #-}
+traverseWithKey f (FlatIntMap vs) = FlatIntMap <$> traverse (\ (V.IPair k v) -> V.IPair k <$> f k v) vs
+
+--------------------------------------------------------------------------------
+
+-- | Find the key's index in the vector slice, if key exists return 'Right',
+-- otherwise 'Left', i.e. the insert index
+--
+-- This function only works on ascending sorted vectors.
+binarySearch :: V.Vector (V.IPair v) -> Int -> Either Int Int
+{-# INLINABLE binarySearch #-}
+binarySearch (V.Vector arr s 0) _ = Left 0
+binarySearch (V.Vector arr s l) !k' = go s (s+l-1)
+ where
+ go !s !e
+ | s == e =
+ let V.IPair k v = arr `A.indexSmallArray` s
+ in case k' `compare` k of LT -> Left s
+ GT -> let !s' = s+1 in Left s'
+ _ -> Right s
+ | s > e = Left s
+ | otherwise =
+ let !mid = (s+e) `shiftR` 1
+ (V.IPair k v) = arr `A.indexSmallArray` mid
+ in case k' `compare` k of LT -> go s (mid-1)
+ GT -> go (mid+1) e
+ _ -> Right mid
+
+--------------------------------------------------------------------------------
+
+-- | linear scan search from left to right, return the first one if exist.
+linearSearch :: V.Vector (V.IPair v) -> Int -> Maybe v
+{-# INLINABLE linearSearch #-}
+linearSearch (V.Vector arr s l) !k' = go s
+ where
+ !end = s + l
+ go !i
+ | i >= end = Nothing
+ | otherwise =
+ let V.IPair k v = arr `A.indexSmallArray` i
+ in if k' == k then Just v else go (i+1)
+
+-- | linear scan search from right to left, return the first one if exist.
+linearSearchR :: V.Vector (V.IPair v) -> Int -> Maybe v
+{-# INLINABLE linearSearchR #-}
+linearSearchR (V.Vector arr s l) !k' = go (s+l-1)
+ where
+ go !i
+ | i < s = Nothing
+ | otherwise =
+ let V.IPair k v = arr `A.indexSmallArray` i
+ in if k' == k then Just v else go (i-1)
diff --git a/Std/Data/Vector/FlatIntSet.hs b/Std/Data/Vector/FlatIntSet.hs
new file mode 100644
index 0000000..1170ba3
--- /dev/null
+++ b/Std/Data/Vector/FlatIntSet.hs
@@ -0,0 +1,232 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE PatternSynonyms #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TupleSections #-}
+
+{-|
+Module : Std.Data.Vector.FlatIntSet
+Description : Fast map based on sorted vector
+Copyright : (c) Dong Han, 2017-2019
+ (c) Tao He, 2018-2019
+License : BSD
+Maintainer : winterland1989@gmail.com
+Stability : experimental
+Portability : non-portable
+
+This module provides a simple int set based on sorted vector and binary search. It's particularly
+suitable for small sized value collections such as deserializing intermediate representation.
+But can also used in various place where insertion and deletion is rare but require fast elem.
+
+-}
+
+module Std.Data.Vector.FlatIntSet
+ ( -- * FlatIntSet backed by sorted vector
+ FlatIntSet, sortedValues, size, null, empty, map'
+ , pack, packN, packR, packRN
+ , unpack, unpackR, packVector, packVectorR
+ , elem
+ , delete
+ , insert
+ , merge
+ -- * binary & linear search on vectors
+ , binarySearch
+ ) where
+
+import Control.DeepSeq
+import Control.Monad
+import Control.Monad.ST
+import qualified Data.Foldable as Foldable
+import qualified Data.Traversable as Traversable
+import qualified Data.Semigroup as Semigroup
+import qualified Data.Monoid as Monoid
+import qualified Data.Primitive.PrimArray as A
+import qualified Std.Data.Vector.Base as V
+import qualified Std.Data.Vector.Sort as V
+import qualified Std.Data.Vector.Search as V
+import qualified Std.Data.Text as T
+import qualified Std.Data.TextBuilder as T
+import Data.Function (on)
+import Data.Bits (shiftR)
+import Data.Data
+import Data.Typeable
+import Prelude hiding (elem, null)
+import Test.QuickCheck.Arbitrary (Arbitrary(..), CoArbitrary(..))
+
+--------------------------------------------------------------------------------
+
+newtype FlatIntSet = FlatIntSet { sortedValues :: V.PrimVector Int }
+ deriving (Show, Eq, Ord, Typeable, NFData)
+
+instance T.ToText FlatIntSet where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder p (FlatIntSet vec) = T.parenWhen (p > 10) $ do
+ T.unsafeFromBuilder "FlatIntSet {"
+ T.intercalateVec T.comma (T.toTextBuilder 0) vec
+ T.char7 '}'
+
+instance Semigroup.Semigroup FlatIntSet where
+ {-# INLINE (<>) #-}
+ (<>) = merge
+
+instance Monoid.Monoid FlatIntSet where
+ {-# INLINE mappend #-}
+ mappend = merge
+ {-# INLINE mempty #-}
+ mempty = empty
+
+instance Arbitrary FlatIntSet where
+ arbitrary = pack <$> arbitrary
+ shrink v = pack <$> shrink (unpack v)
+
+instance CoArbitrary FlatIntSet where
+ coarbitrary = coarbitrary . unpack
+
+size :: FlatIntSet -> Int
+{-# INLINE size #-}
+size = V.length . sortedValues
+
+null :: FlatIntSet -> Bool
+{-# INLINE null #-}
+null = V.null . sortedValues
+
+-- | Mapping values of within a set, the result size may change if there're duplicated values
+-- after mapping.
+map' :: (Int -> Int) -> FlatIntSet -> FlatIntSet
+{-# INLINE map' #-}
+map' f (FlatIntSet vs) = packVector (V.map' f vs)
+
+-- | /O(1)/ empty flat map.
+empty :: FlatIntSet
+{-# INLINE empty #-}
+empty = FlatIntSet V.empty
+
+-- | /O(N*logN)/ Pack list of key values, on key duplication prefer left one.
+pack :: [Int] -> FlatIntSet
+{-# INLINE pack #-}
+pack vs = FlatIntSet (V.mergeDupAdjacentLeft (==) (V.mergeSort (V.pack vs)))
+
+-- | /O(N*logN)/ Pack list of key values with suggested size, on key duplication prefer left one.
+packN :: Int -> [Int] -> FlatIntSet
+{-# INLINE packN #-}
+packN n vs = FlatIntSet (V.mergeDupAdjacentLeft (==) (V.mergeSort (V.packN n vs)))
+
+-- | /O(N*logN)/ Pack list of key values, on key duplication prefer right one.
+packR :: [Int] -> FlatIntSet
+{-# INLINE packR #-}
+packR vs = FlatIntSet (V.mergeDupAdjacentRight (==) (V.mergeSort (V.pack vs)))
+
+-- | /O(N*logN)/ Pack list of key values with suggested size, on key duplication prefer right one.
+packRN :: Int -> [Int] -> FlatIntSet
+{-# INLINE packRN #-}
+packRN n vs = FlatIntSet (V.mergeDupAdjacentRight (==) (V.mergeSort (V.packN n vs)))
+
+-- | /O(N)/ Unpack a set of values to a list s in ascending order.
+--
+-- This function works with @foldr/build@ fusion in base.
+unpack :: FlatIntSet -> [Int]
+{-# INLINE unpack #-}
+unpack = V.unpack . sortedValues
+
+-- | /O(N)/ Unpack a set of values to a list s in descending order.
+--
+-- This function works with @foldr/build@ fusion in base.
+unpackR :: FlatIntSet -> [Int]
+{-# INLINE unpackR #-}
+unpackR = V.unpackR . sortedValues
+
+-- | /O(N*logN)/ Pack vector of key values, on key duplication prefer left one.
+packVector :: V.PrimVector Int -> FlatIntSet
+{-# INLINE packVector #-}
+packVector vs = FlatIntSet (V.mergeDupAdjacentLeft (==) (V.mergeSort vs))
+
+-- | /O(N*logN)/ Pack vector of key values, on key duplication prefer right one.
+packVectorR :: V.PrimVector Int -> FlatIntSet
+{-# INLINE packVectorR #-}
+packVectorR vs = FlatIntSet (V.mergeDupAdjacentRight (==) (V.mergeSort vs))
+
+-- | /O(logN)/ Binary search on flat map.
+elem :: Int -> FlatIntSet -> Bool
+{-# INLINABLE elem #-}
+elem _ (FlatIntSet (V.PrimVector arr s 0)) = False
+elem v (FlatIntSet vec) = case binarySearch vec v of Left _ -> False
+ _ -> True
+-- | /O(N)/ Insert new key value into map, replace old one if key exists.
+insert :: Int -> FlatIntSet -> FlatIntSet
+{-# INLINE insert #-}
+insert v m@(FlatIntSet vec@(V.PrimVector arr s l)) =
+ case binarySearch vec v of
+ Left i -> FlatIntSet (V.create (l+1) (\ marr -> do
+ when (i>s) $ A.copyPrimArray marr 0 arr s (i-s)
+ A.writePrimArray marr i v
+ when (i<(s+l)) $ A.copyPrimArray marr (i+1) arr i (s+l-i)))
+ Right i -> m
+
+-- | /O(N)/ Delete a key value pair by key.
+delete :: Int -> FlatIntSet -> FlatIntSet
+{-# INLINE delete #-}
+delete v m@(FlatIntSet vec@(V.PrimVector arr s l)) =
+ case binarySearch vec v of
+ Left i -> m
+ Right i -> FlatIntSet $ V.create (l-1) (\ marr -> do
+ when (i>s) $ A.copyPrimArray marr 0 arr s (i-s)
+ let !end = s+l
+ !j = i+1
+ when (end > j) $ A.copyPrimArray marr 0 arr j (end-j))
+
+-- | /O(n+m)/ Merge two 'FlatIntSet', prefer right value on value duplication.
+merge :: FlatIntSet -> FlatIntSet -> FlatIntSet
+{-# INLINE merge #-}
+merge fmL@(FlatIntSet (V.PrimVector arrL sL lL)) fmR@(FlatIntSet (V.PrimVector arrR sR lR))
+ | null fmL = fmR
+ | null fmR = fmL
+ | otherwise = FlatIntSet (V.createN (lL+lR) (go sL sR 0))
+ where
+ endL = sL + lL
+ endR = sR + lR
+ go :: Int -> Int -> Int -> A.MutablePrimArray s Int -> ST s Int
+ go !i !j !k marr
+ | i >= endL = do
+ A.copyPrimArray marr k arrR j (lR-j)
+ return $! k+lR-j
+ | j >= endR = do
+ A.copyPrimArray marr k arrL i (lL-i)
+ return $! k+lL-i
+ | otherwise = do
+ let !vL = arrL `A.indexPrimArray` i
+ let !vR = arrR `A.indexPrimArray` j
+ case vL `compare` vR of LT -> do A.writePrimArray marr k vL
+ go (i+1) j (k+1) marr
+ EQ -> do A.writePrimArray marr k vR
+ go (i+1) (j+1) (k+1) marr
+ _ -> do A.writePrimArray marr k vR
+ go i (j+1) (k+1) marr
+
+--------------------------------------------------------------------------------
+
+-- | Find the key's index in the vector slice, if key exists return 'Right',
+-- otherwise 'Left', i.e. the insert index
+--
+-- This function only works on ascending sorted vectors.
+binarySearch :: V.PrimVector Int -> Int -> Either Int Int
+{-# INLINABLE binarySearch #-}
+binarySearch (V.PrimVector arr s 0) _ = Left 0
+binarySearch (V.PrimVector arr s l) !v' = go s (s+l-1)
+ where
+ go !s !e
+ | s == e =
+ let v = arr `A.indexPrimArray` s
+ in case v' `compare` v of LT -> Left s
+ GT -> let !s' = s+1 in Left s'
+ _ -> Right s
+ | s > e = Left s
+ | otherwise =
+ let !mid = (s+e) `shiftR` 1
+ v = arr `A.indexPrimArray` mid
+ in case v' `compare` v of LT -> go s (mid-1)
+ GT -> go (mid+1) e
+ _ -> Right mid
diff --git a/Std/Data/Vector/FlatMap.hs b/Std/Data/Vector/FlatMap.hs
new file mode 100644
index 0000000..b554ca9
--- /dev/null
+++ b/Std/Data/Vector/FlatMap.hs
@@ -0,0 +1,396 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE PatternSynonyms #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+{-|
+Module : Std.Data.Vector.FlatMap
+Description : Fast map based on sorted vector
+Copyright : (c) Dong Han, 2017-2019
+ (c) Tao He, 2018-2019
+License : BSD
+Maintainer : winterland1989@gmail.com
+Stability : experimental
+Portability : non-portable
+
+This module provides a simple key value map based on sorted vector and binary search. It's particularly
+suitable for small sized key value collections such as deserializing intermediate representation.
+But can also used in various place where insertion and deletion is rare but require fast lookup.
+
+-}
+
+module Std.Data.Vector.FlatMap
+ ( -- * FlatMap backed by sorted vector
+ FlatMap, sortedKeyValues, size, null, empty, map', kmap'
+ , pack, packN, packR, packRN
+ , unpack, unpackR, packVector, packVectorR
+ , lookup
+ , delete
+ , insert
+ , adjust'
+ , merge, mergeWithKey'
+ -- * fold and traverse
+ , foldrWithKey, foldrWithKey', foldlWithKey, foldlWithKey', traverseWithKey
+ -- * binary & linear search on vectors
+ , binarySearch
+ , linearSearch, linearSearchR
+ ) where
+
+import Control.DeepSeq
+import Control.Monad
+import Control.Monad.ST
+import qualified Data.Primitive.SmallArray as A
+import qualified Data.Foldable as Foldable
+import qualified Data.Traversable as Traversable
+import qualified Data.Semigroup as Semigroup
+import qualified Data.Monoid as Monoid
+import qualified Std.Data.Vector.Base as V
+import qualified Std.Data.Vector.Sort as V
+import qualified Std.Data.Text as T
+import qualified Std.Data.TextBuilder as T
+import Data.Function (on)
+import Data.Bits (shiftR)
+import Data.Data
+import Data.Typeable
+import Prelude hiding (lookup, null)
+import Test.QuickCheck.Arbitrary (Arbitrary(..), CoArbitrary(..))
+
+--------------------------------------------------------------------------------
+
+newtype FlatMap k v = FlatMap { sortedKeyValues :: V.Vector (k, v) }
+ deriving (Show, Eq, Ord, Typeable)
+
+instance (T.ToText k, T.ToText v) => T.ToText (FlatMap k v) where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder p (FlatMap vec) = T.parenWhen (p > 10) $ do
+ T.unsafeFromBuilder "FlatMap {"
+ T.intercalateVec T.comma (\ (k, v) ->
+ T.toTextBuilder 0 k >> ":" >> T.toTextBuilder 0 v) vec
+ T.char7 '}'
+
+instance (Ord k, Arbitrary k, Arbitrary v) => Arbitrary (FlatMap k v) where
+ arbitrary = pack <$> arbitrary
+ shrink v = pack <$> shrink (unpack v)
+
+instance (CoArbitrary k, CoArbitrary v) => CoArbitrary (FlatMap k v) where
+ coarbitrary = coarbitrary . unpack
+
+instance Ord k => Semigroup.Semigroup (FlatMap k v) where
+ {-# INLINE (<>) #-}
+ (<>) = merge
+
+instance Ord k => Monoid.Monoid (FlatMap k v) where
+ {-# INLINE mappend #-}
+ mappend = merge
+ {-# INLINE mempty #-}
+ mempty = empty
+
+instance (NFData k, NFData v) => NFData (FlatMap k v) where
+ {-# INLINE rnf #-}
+ rnf (FlatMap kvs) = rnf kvs
+
+instance Functor (FlatMap k) where
+ {-# INLINE fmap #-}
+ fmap f (FlatMap vs) = FlatMap (V.map' (fmap f) vs)
+
+instance Foldable.Foldable (FlatMap k) where
+ {-# INLINE foldr' #-}
+ foldr' f = foldrWithKey' (const f)
+ {-# INLINE foldr #-}
+ foldr f = foldrWithKey (const f)
+ {-# INLINE foldl' #-}
+ foldl' f = foldlWithKey' (\ a k v -> f a v)
+ {-# INLINE foldl #-}
+ foldl f = foldlWithKey (\ a k v -> f a v)
+ {-# INLINE toList #-}
+ toList = fmap snd . unpack
+ {-# INLINE null #-}
+ null (FlatMap vs) = V.null vs
+ {-# INLINE length #-}
+ length (FlatMap vs) = V.length vs
+ {-# INLINE elem #-}
+ elem a (FlatMap vs) = elem a (map snd $ V.unpack vs)
+
+instance Traversable.Traversable (FlatMap k) where
+ {-# INLINE traverse #-}
+ traverse f = traverseWithKey (const f)
+
+size :: FlatMap k v -> Int
+{-# INLINE size #-}
+size = V.length . sortedKeyValues
+
+null :: FlatMap k v -> Bool
+{-# INLINE null #-}
+null = V.null . sortedKeyValues
+
+map' :: (v -> v') -> FlatMap k v -> FlatMap k v'
+{-# INLINE map' #-}
+map' f (FlatMap vs) = FlatMap (V.map' (fmap f) vs)
+
+kmap' :: (k -> v -> v') -> FlatMap k v -> FlatMap k v'
+{-# INLINE kmap' #-}
+kmap' f (FlatMap vs) = FlatMap (V.map' (\ (k, v) -> (k, f k v)) vs)
+
+-- | /O(1)/ empty flat map.
+empty :: FlatMap k v
+{-# INLINE empty #-}
+empty = FlatMap V.empty
+
+-- | /O(N*logN)/ Pack list of key values, on key duplication prefer left one.
+pack :: Ord k => [(k, v)] -> FlatMap k v
+{-# INLINE pack #-}
+pack kvs = FlatMap (V.mergeDupAdjacentLeft ((==) `on` fst) (V.mergeSortBy (compare `on` fst) (V.pack kvs)))
+
+-- | /O(N*logN)/ Pack list of key values with suggested size, on key duplication prefer left one.
+packN :: Ord k => Int -> [(k, v)] -> FlatMap k v
+{-# INLINE packN #-}
+packN n kvs = FlatMap (V.mergeDupAdjacentLeft ((==) `on` fst) (V.mergeSortBy (compare `on` fst) (V.packN n kvs)))
+
+-- | /O(N*logN)/ Pack list of key values, on key duplication prefer right one.
+packR :: Ord k => [(k, v)] -> FlatMap k v
+{-# INLINE packR #-}
+packR kvs = FlatMap (V.mergeDupAdjacentRight ((==) `on` fst) (V.mergeSortBy (compare `on` fst) (V.pack kvs)))
+
+-- | /O(N*logN)/ Pack list of key values with suggested size, on key duplication prefer right one.
+packRN :: Ord k => Int -> [(k, v)] -> FlatMap k v
+{-# INLINE packRN #-}
+packRN n kvs = FlatMap (V.mergeDupAdjacentRight ((==) `on` fst) (V.mergeSortBy (compare `on` fst) (V.packN n kvs)))
+
+-- | /O(N)/ Unpack key value pairs to a list sorted by keys in ascending order.
+--
+-- This function works with @foldr/build@ fusion in base.
+unpack :: FlatMap k v -> [(k, v)]
+{-# INLINE unpack #-}
+unpack = V.unpack . sortedKeyValues
+
+-- | /O(N)/ Unpack key value pairs to a list sorted by keys in descending order.
+--
+-- This function works with @foldr/build@ fusion in base.
+unpackR :: FlatMap k v -> [(k, v)]
+{-# INLINE unpackR #-}
+unpackR = V.unpackR . sortedKeyValues
+
+-- | /O(N*logN)/ Pack vector of key values, on key duplication prefer left one.
+packVector :: Ord k => V.Vector (k, v) -> FlatMap k v
+{-# INLINE packVector #-}
+packVector kvs = FlatMap (V.mergeDupAdjacentLeft ((==) `on` fst) (V.mergeSortBy (compare `on` fst) kvs))
+
+-- | /O(N*logN)/ Pack vector of key values, on key duplication prefer right one.
+packVectorR :: Ord k => V.Vector (k, v) -> FlatMap k v
+{-# INLINE packVectorR #-}
+packVectorR kvs = FlatMap (V.mergeDupAdjacentRight ((==) `on` fst) (V.mergeSortBy (compare `on` fst) kvs))
+
+-- | /O(logN)/ Binary search on flat map.
+lookup :: Ord k => k -> FlatMap k v -> Maybe v
+{-# INLINABLE lookup #-}
+lookup _ (FlatMap (V.Vector arr s 0)) = Nothing
+lookup k' (FlatMap (V.Vector arr s l)) = go s (s+l-1)
+ where
+ go !s !e
+ | s == e =
+ case arr `A.indexSmallArray` s of (k, v) | k == k' -> Just v
+ | otherwise -> Nothing
+ | s > e = Nothing
+ | otherwise =
+ let mid = (s+e) `shiftR` 1
+ (k, v) = arr `A.indexSmallArray` mid
+ in case k' `compare` k of LT -> go s (mid-1)
+ GT -> go (mid+1) e
+ _ -> Just v
+
+-- | /O(N)/ Insert new key value into map, replace old one if key exists.
+insert :: Ord k => k -> v -> FlatMap k v -> FlatMap k v
+{-# INLINE insert #-}
+insert k v (FlatMap vec@(V.Vector arr s l)) =
+ case binarySearch vec k of
+ Left i -> FlatMap (V.create (l+1) (\ marr -> do
+ when (i>s) $ A.copySmallArray marr 0 arr s (i-s)
+ A.writeSmallArray marr i (k, v)
+ when (i<(s+l)) $ A.copySmallArray marr (i+1) arr i (s+l-i)))
+ Right i -> FlatMap (V.Vector (runST (do
+ let arr' = A.cloneSmallArray arr s l
+ marr <- A.unsafeThawSmallArray arr'
+ A.writeSmallArray marr i (k, v)
+ A.unsafeFreezeSmallArray marr)) 0 l)
+
+-- | /O(N)/ Delete a key value pair by key.
+delete :: Ord k => k -> FlatMap k v -> FlatMap k v
+{-# INLINE delete #-}
+delete k m@(FlatMap vec@(V.Vector arr s l)) =
+ case binarySearch vec k of
+ Left i -> m
+ Right i -> FlatMap $ V.create (l-1) (\ marr -> do
+ when (i>s) $ A.copySmallArray marr 0 arr s (i-s)
+ let !end = s+l
+ !j = i+1
+ when (end > j) $ A.copySmallArray marr 0 arr j (end-j))
+
+-- | /O(N)/ Modify a value by key.
+--
+-- The value is evaluated to WHNF before writing into map.
+adjust' :: Ord k => (v -> v) -> k -> FlatMap k v -> FlatMap k v
+{-# INLINE adjust' #-}
+adjust' f k m@(FlatMap vec@(V.Vector arr s l)) =
+ case binarySearch vec k of
+ Left i -> m
+ Right i -> FlatMap $ V.create l (\ marr -> do
+ A.copySmallArray marr 0 arr s l
+ let !v' = f (snd (A.indexSmallArray arr i))
+ A.writeSmallArray marr i (k, v'))
+
+-- | /O(n+m)/ Merge two 'FlatMap', prefer right value on key duplication.
+merge :: forall k v. Ord k => FlatMap k v -> FlatMap k v -> FlatMap k v
+{-# INLINE merge #-}
+merge fmL@(FlatMap (V.Vector arrL sL lL)) fmR@(FlatMap (V.Vector arrR sR lR))
+ | null fmL = fmR
+ | null fmR = fmL
+ | otherwise = FlatMap (V.createN (lL+lR) (go sL sR 0))
+ where
+ endL = sL + lL
+ endR = sR + lR
+ go :: Int -> Int -> Int -> A.SmallMutableArray s (k, v) -> ST s Int
+ go !i !j !k marr
+ | i >= endL = do
+ A.copySmallArray marr k arrR j (lR-j)
+ return $! k+lR-j
+ | j >= endR = do
+ A.copySmallArray marr k arrL i (lL-i)
+ return $! k+lL-i
+ | otherwise = do
+ kvL@(kL, vL) <- arrL `A.indexSmallArrayM` i
+ kvR@(kR, vR) <- arrR `A.indexSmallArrayM` j
+ case kL `compare` kR of LT -> do A.writeSmallArray marr k kvL
+ go (i+1) j (k+1) marr
+ EQ -> do A.writeSmallArray marr k kvR
+ go (i+1) (j+1) (k+1) marr
+ _ -> do A.writeSmallArray marr k kvR
+ go i (j+1) (k+1) marr
+
+-- | /O(n+m)/ Merge two 'FlatMap' with a merge function.
+mergeWithKey' :: forall k v. Ord k => (k -> v -> v -> v) -> FlatMap k v -> FlatMap k v -> FlatMap k v
+{-# INLINABLE mergeWithKey' #-}
+mergeWithKey' f fmL@(FlatMap (V.Vector arrL sL lL)) fmR@(FlatMap (V.Vector arrR sR lR))
+ | null fmL = fmR
+ | null fmR = fmL
+ | otherwise = FlatMap (V.createN (lL+lR) (go sL sR 0))
+ where
+ endL = sL + lL
+ endR = sR + lR
+ go :: Int -> Int -> Int -> A.SmallMutableArray s (k, v) -> ST s Int
+ go !i !j !k marr
+ | i >= endL = do
+ A.copySmallArray marr k arrR j (lR-j)
+ return $! k+lR-j
+ | j >= endR = do
+ A.copySmallArray marr k arrL i (lL-i)
+ return $! k+lL-i
+ | otherwise = do
+ kvL@(kL, vL) <- arrL `A.indexSmallArrayM` i
+ kvR@(kR, vR) <- arrR `A.indexSmallArrayM` j
+ case kL `compare` kR of LT -> do A.writeSmallArray marr k kvL
+ go (i+1) j (k+1) marr
+ EQ -> do let !v' = f kL vL vR
+ A.writeSmallArray marr k (kL, v')
+ go (i+1) (j+1) (k+1) marr
+ _ -> do A.writeSmallArray marr k kvR
+ go i (j+1) (k+1) marr
+
+
+-- | /O(n)/ Reduce this map by applying a binary operator to all
+-- elements, using the given starting value (typically the
+-- right-identity of the operator).
+--
+-- During folding k is in descending order.
+foldrWithKey :: (k -> v -> a -> a) -> a -> FlatMap k v -> a
+{-# INLINE foldrWithKey #-}
+foldrWithKey f a (FlatMap vs) = foldr (uncurry f) a vs
+
+-- | /O(n)/ Reduce this map by applying a binary operator to all
+-- elements, using the given starting value (typically the
+-- right-identity of the operator).
+--
+-- During folding k is in ascending order.
+foldlWithKey :: (a -> k -> v -> a) -> a -> FlatMap k v -> a
+{-# INLINE foldlWithKey #-}
+foldlWithKey f a (FlatMap vs) = foldl (\ a' (k,v) -> f a' k v) a vs
+
+-- | /O(n)/ Reduce this map by applying a binary operator to all
+-- elements, using the given starting value (typically the
+-- right-identity of the operator).
+--
+-- During folding k is in descending order.
+foldrWithKey' :: (k -> v -> a -> a) -> a -> FlatMap k v -> a
+{-# INLINE foldrWithKey' #-}
+foldrWithKey' f a (FlatMap vs) = V.foldr' (uncurry f) a vs
+
+-- | /O(n)/ Reduce this map by applying a binary operator to all
+-- elements, using the given starting value (typically the
+-- right-identity of the operator).
+--
+-- During folding k is in ascending order.
+foldlWithKey' :: (a -> k -> v -> a) -> a -> FlatMap k v -> a
+{-# INLINE foldlWithKey' #-}
+foldlWithKey' f a (FlatMap vs) = V.foldl' (\ a' (k,v) -> f a' k v) a vs
+
+-- | /O(n)/.
+-- @'traverseWithKey' f s == 'pack' <$> 'traverse' (\(k, v) -> (,) k <$> f k v) ('unpack' m)@
+-- That is, behaves exactly like a regular 'traverse' except that the traversing
+-- function also has access to the key associated with a value.
+traverseWithKey :: Applicative t => (k -> a -> t b) -> FlatMap k a -> t (FlatMap k b)
+{-# INLINE traverseWithKey #-}
+traverseWithKey f (FlatMap vs) = FlatMap <$> traverse (\ (k,v) -> (k,) <$> f k v) vs
+
+--------------------------------------------------------------------------------
+
+-- | Find the key's index in the vector slice, if key exists return 'Right',
+-- otherwise 'Left', i.e. the insert index
+--
+-- This function only works on ascending sorted vectors.
+binarySearch :: Ord k => V.Vector (k, v) -> k -> Either Int Int
+{-# INLINABLE binarySearch #-}
+binarySearch (V.Vector arr s 0) _ = Left 0
+binarySearch (V.Vector arr s l) !k' = go s (s+l-1)
+ where
+ go !s !e
+ | s == e =
+ let (k, v) = arr `A.indexSmallArray` s
+ in case k' `compare` k of LT -> Left s
+ GT -> let !s' = s+1 in Left s'
+ _ -> Right s
+ | s > e = Left s
+ | otherwise =
+ let !mid = (s+e) `shiftR` 1
+ (k, v) = arr `A.indexSmallArray` mid
+ in case k' `compare` k of LT -> go s (mid-1)
+ GT -> go (mid+1) e
+ _ -> Right mid
+
+--------------------------------------------------------------------------------
+
+-- | linear scan search from left to right, return the first one if exist.
+linearSearch :: Ord k => V.Vector (k, v) -> k -> Maybe v
+{-# INLINABLE linearSearch #-}
+linearSearch (V.Vector arr s l) !k' = go s
+ where
+ !end = s + l
+ go !i
+ | i >= end = Nothing
+ | otherwise =
+ let (k, v) = arr `A.indexSmallArray` i
+ in if k' == k then Just v else go (i+1)
+
+-- | linear scan search from right to left, return the first one if exist.
+linearSearchR :: Ord k => V.Vector (k, v) -> k -> Maybe v
+{-# INLINABLE linearSearchR #-}
+linearSearchR (V.Vector arr s l) !k' = go (s+l-1)
+ where
+ go !i
+ | i < s = Nothing
+ | otherwise =
+ let (k, v) = arr `A.indexSmallArray` i
+ in if k' == k then Just v else go (i-1)
diff --git a/Std/Data/Vector/FlatSet.hs b/Std/Data/Vector/FlatSet.hs
new file mode 100644
index 0000000..c646c3c
--- /dev/null
+++ b/Std/Data/Vector/FlatSet.hs
@@ -0,0 +1,233 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE PatternSynonyms #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE DeriveDataTypeable #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+{-|
+Module : Std.Data.Vector.FlatSet
+Description : Fast map based on sorted vector
+Copyright : (c) Dong Han, 2017-2019
+ (c) Tao He, 2018-2019
+License : BSD
+Maintainer : winterland1989@gmail.com
+Stability : experimental
+Portability : non-portable
+
+This module provides a simple value set based on sorted vector and binary search. It's particularly
+suitable for small sized value collections such as deserializing intermediate representation.
+But can also used in various place where insertion and deletion is rare but require fast elem.
+
+-}
+
+module Std.Data.Vector.FlatSet
+ ( -- * FlatSet backed by sorted vector
+ FlatSet, sortedValues, size, null, empty, map'
+ , pack, packN, packR, packRN
+ , unpack, unpackR, packVector, packVectorR
+ , elem
+ , delete
+ , insert
+ , merge
+ -- * binary & linear search on vectors
+ , binarySearch
+ ) where
+
+import Control.DeepSeq
+import Control.Monad
+import Control.Monad.ST
+import qualified Data.Primitive.SmallArray as A
+import qualified Data.Foldable as Foldable
+import qualified Data.Traversable as Traversable
+import qualified Data.Semigroup as Semigroup
+import qualified Data.Monoid as Monoid
+import qualified Std.Data.Vector.Base as V
+import qualified Std.Data.Vector.Sort as V
+import qualified Std.Data.Vector.Search as V
+import qualified Std.Data.Text as T
+import qualified Std.Data.TextBuilder as T
+import Data.Function (on)
+import Data.Bits (shiftR)
+import Data.Data
+import Data.Typeable
+import Prelude hiding (elem, null)
+import Test.QuickCheck.Arbitrary (Arbitrary(..), CoArbitrary(..))
+
+--------------------------------------------------------------------------------
+
+newtype FlatSet v = FlatSet { sortedValues :: V.Vector v }
+ deriving (Show, Eq, Ord, Typeable, Foldable, NFData)
+
+instance T.ToText v => T.ToText (FlatSet v) where
+ {-# INLINE toTextBuilder #-}
+ toTextBuilder p (FlatSet vec) = T.parenWhen (p > 10) $ do
+ T.unsafeFromBuilder "FlatSet {"
+ T.intercalateVec T.comma (T.toTextBuilder 0) vec
+ T.char7 '}'
+
+instance Ord v => Semigroup.Semigroup (FlatSet v) where
+ {-# INLINE (<>) #-}
+ (<>) = merge
+
+instance Ord v => Monoid.Monoid (FlatSet v) where
+ {-# INLINE mappend #-}
+ mappend = merge
+ {-# INLINE mempty #-}
+ mempty = empty
+
+instance (Ord v, Arbitrary v) => Arbitrary (FlatSet v) where
+ arbitrary = pack <$> arbitrary
+ shrink v = pack <$> shrink (unpack v)
+
+instance (CoArbitrary v) => CoArbitrary (FlatSet v) where
+ coarbitrary = coarbitrary . unpack
+
+size :: FlatSet v -> Int
+{-# INLINE size #-}
+size = V.length . sortedValues
+
+null :: FlatSet v -> Bool
+{-# INLINE null #-}
+null = V.null . sortedValues
+
+-- | Mapping values of within a set, the result size may change if there're duplicated values
+-- after mapping.
+map' :: forall v. Ord v => (v -> v) -> FlatSet v -> FlatSet v
+{-# INLINE map' #-}
+map' f (FlatSet vs) = packVector (V.map' f vs :: V.Vector v)
+
+-- | /O(1)/ empty flat map.
+empty :: FlatSet v
+{-# INLINE empty #-}
+empty = FlatSet V.empty
+
+-- | /O(N*logN)/ Pack list of key values, on key duplication prefer left one.
+pack :: Ord v => [v] -> FlatSet v
+{-# INLINE pack #-}
+pack vs = FlatSet (V.mergeDupAdjacentLeft (==) (V.mergeSort (V.pack vs)))
+
+-- | /O(N*logN)/ Pack list of key values with suggested size, on key duplication prefer left one.
+packN :: Ord v => Int -> [v] -> FlatSet v
+{-# INLINE packN #-}
+packN n vs = FlatSet (V.mergeDupAdjacentLeft (==) (V.mergeSort (V.packN n vs)))
+
+-- | /O(N*logN)/ Pack list of key values, on key duplication prefer right one.
+packR :: Ord v => [v] -> FlatSet v
+{-# INLINE packR #-}
+packR vs = FlatSet (V.mergeDupAdjacentRight (==) (V.mergeSort (V.pack vs)))
+
+-- | /O(N*logN)/ Pack list of key values with suggested size, on key duplication prefer right one.
+packRN :: Ord v => Int -> [v] -> FlatSet v
+{-# INLINE packRN #-}
+packRN n vs = FlatSet (V.mergeDupAdjacentRight (==) (V.mergeSort (V.packN n vs)))
+
+-- | /O(N)/ Unpack a set of values to a list s in ascending order.
+--
+-- This function works with @foldr/build@ fusion in base.
+unpack :: FlatSet v -> [v]
+{-# INLINE unpack #-}
+unpack = V.unpack . sortedValues
+
+-- | /O(N)/ Unpack a set of values to a list s in descending order.
+--
+-- This function works with @foldr/build@ fusion in base.
+unpackR :: FlatSet v -> [v]
+{-# INLINE unpackR #-}
+unpackR = V.unpackR . sortedValues
+
+-- | /O(N*logN)/ Pack vector of key values, on key duplication prefer left one.
+packVector :: Ord v => V.Vector v -> FlatSet v
+{-# INLINE packVector #-}
+packVector vs = FlatSet (V.mergeDupAdjacentLeft (==) (V.mergeSort vs))
+
+-- | /O(N*logN)/ Pack vector of key values, on key duplication prefer right one.
+packVectorR :: Ord v => V.Vector v -> FlatSet v
+{-# INLINE packVectorR #-}
+packVectorR vs = FlatSet (V.mergeDupAdjacentRight (==) (V.mergeSort vs))
+
+-- | /O(logN)/ Binary search on flat map.
+elem :: Ord v => v -> FlatSet v -> Bool
+{-# INLINABLE elem #-}
+elem _ (FlatSet (V.Vector arr s 0)) = False
+elem v (FlatSet vec) = case binarySearch vec v of Left _ -> False
+ _ -> True
+-- | /O(N)/ Insert new key value into map, replace old one if key exists.
+insert :: Ord v => v -> FlatSet v -> FlatSet v
+{-# INLINE insert #-}
+insert v m@(FlatSet vec@(V.Vector arr s l)) =
+ case binarySearch vec v of
+ Left i -> FlatSet (V.create (l+1) (\ marr -> do
+ when (i>s) $ A.copySmallArray marr 0 arr s (i-s)
+ A.writeSmallArray marr i v
+ when (i<(s+l)) $ A.copySmallArray marr (i+1) arr i (s+l-i)))
+ Right i -> m
+
+-- | /O(N)/ Delete a key value pair by key.
+delete :: Ord v => v -> FlatSet v -> FlatSet v
+{-# INLINE delete #-}
+delete v m@(FlatSet vec@(V.Vector arr s l)) =
+ case binarySearch vec v of
+ Left i -> m
+ Right i -> FlatSet $ V.create (l-1) (\ marr -> do
+ when (i>s) $ A.copySmallArray marr 0 arr s (i-s)
+ let !end = s+l
+ !j = i+1
+ when (end > j) $ A.copySmallArray marr 0 arr j (end-j))
+
+-- | /O(n+m)/ Merge two 'FlatSet', prefer right value on value duplication.
+merge :: forall v . Ord v => FlatSet v -> FlatSet v -> FlatSet v
+{-# INLINE merge #-}
+merge fmL@(FlatSet (V.Vector arrL sL lL)) fmR@(FlatSet (V.Vector arrR sR lR))
+ | null fmL = fmR
+ | null fmR = fmL
+ | otherwise = FlatSet (V.createN (lL+lR) (go sL sR 0))
+ where
+ endL = sL + lL
+ endR = sR + lR
+ go :: Int -> Int -> Int -> A.SmallMutableArray s v -> ST s Int
+ go !i !j !k marr
+ | i >= endL = do
+ A.copySmallArray marr k arrR j (lR-j)
+ return $! k+lR-j
+ | j >= endR = do
+ A.copySmallArray marr k arrL i (lL-i)
+ return $! k+lL-i
+ | otherwise = do
+ vL <- arrL `A.indexSmallArrayM` i
+ vR <- arrR `A.indexSmallArrayM` j
+ case vL `compare` vR of LT -> do A.writeSmallArray marr k vL
+ go (i+1) j (k+1) marr
+ EQ -> do A.writeSmallArray marr k vR
+ go (i+1) (j+1) (k+1) marr
+ _ -> do A.writeSmallArray marr k vR
+ go i (j+1) (k+1) marr
+
+--------------------------------------------------------------------------------
+
+-- | Find the key's index in the vector slice, if key exists return 'Right',
+-- otherwise 'Left', i.e. the insert index
+--
+-- This function only works on ascending sorted vectors.
+binarySearch :: Ord v => V.Vector v -> v -> Either Int Int
+{-# INLINABLE binarySearch #-}
+binarySearch (V.Vector arr s 0) _ = Left 0
+binarySearch (V.Vector arr s l) !v' = go s (s+l-1)
+ where
+ go !s !e
+ | s == e =
+ let v = arr `A.indexSmallArray` s
+ in case v' `compare` v of LT -> Left s
+ GT -> let !s' = s+1 in Left s'
+ _ -> Right s
+ | s > e = Left s
+ | otherwise =
+ let !mid = (s+e) `shiftR` 1
+ v = arr `A.indexSmallArray` mid
+ in case v' `compare` v of LT -> go s (mid-1)
+ GT -> go (mid+1) e
+ _ -> Right mid
diff --git a/Std/Data/Vector/QQ.hs b/Std/Data/Vector/QQ.hs
index 65721a5..4f57cf0 100644
--- a/Std/Data/Vector/QQ.hs
+++ b/Std/Data/Vector/QQ.hs
@@ -32,35 +32,35 @@ import GHC.Types (Int(..))
ascii :: QQ.QuasiQuoter
ascii = QQ.QuasiQuoter
- (asciiLiteral $ \ len addr -> [| PrimVector (QQ.word8ArrayFromAddr $(len) $(addr)) 0 (I# $(len)) |])
+ (asciiLiteral $ \ len addr -> [| PrimVector (QQ.word8ArrayFromAddr $(len) $(addr)) 0 $(len) |])
(error "Cannot use ascii as a pattern")
(error "Cannot use ascii as a type")
(error "Cannot use ascii as a dec")
vecW8 :: QQ.QuasiQuoter
vecW8 = QQ.QuasiQuoter
- (QQ.word8Literal $ \ len addr -> [| PrimVector (QQ.word8ArrayFromAddr $(len) $(addr)) 0 (I# $(len)) |])
+ (QQ.word8Literal $ \ len addr -> [| PrimVector (QQ.word8ArrayFromAddr $(len) $(addr)) 0 $(len) |])
(error "Cannot use vecW8 as a pattern")
(error "Cannot use vecW8 as a type")
(error "Cannot use vecW8 as a dec")
vecW16 :: QQ.QuasiQuoter
vecW16 = QQ.QuasiQuoter
- (QQ.word16Literal $ \ len addr -> [| PrimVector (QQ.word16ArrayFromAddr $(len) $(addr)) 0 (I# $(len)) |])
+ (QQ.word16Literal $ \ len addr -> [| PrimVector (QQ.word16ArrayFromAddr $(len) $(addr)) 0 $(len) |])
(error "Cannot use vecW16 as a pattern")
(error "Cannot use vecW16 as a type")
(error "Cannot use vecW16 as a dec")
vecW32 :: QQ.QuasiQuoter
vecW32 = QQ.QuasiQuoter
- (QQ.word32Literal $ \ len addr -> [| PrimVector (QQ.word32ArrayFromAddr $(len) $(addr)) 0 (I# $(len)) |])
+ (QQ.word32Literal $ \ len addr -> [| PrimVector (QQ.word32ArrayFromAddr $(len) $(addr)) 0 $(len) |])
(error "Cannot use vecW32 as a pattern")
(error "Cannot use vecW32 as a type")
(error "Cannot use vecW32 as a dec")
vecW64 :: QQ.QuasiQuoter
vecW64 = QQ.QuasiQuoter
- (QQ.word64Literal $ \ len addr -> [| PrimVector (QQ.word64ArrayFromAddr $(len) $(addr)) 0 (I# $(len)) |])
+ (QQ.word64Literal $ \ len addr -> [| PrimVector (QQ.word64ArrayFromAddr $(len) $(addr)) 0 $(len) |])
(error "Cannot use vecW64 as a pattern")
(error "Cannot use vecW64 as a type")
(error "Cannot use vecW64 as a dec")
@@ -68,7 +68,7 @@ vecW64 = QQ.QuasiQuoter
vecWord :: QQ.QuasiQuoter
vecWord = QQ.QuasiQuoter
(QQ.wordLiteral $ \ len addr ->
- [| PrimVector (QQ.wordArrayFromAddr $(len) $(addr)) 0 (I# $(len)) |])
+ [| PrimVector (QQ.wordArrayFromAddr $(len) $(addr)) 0 $(len) |])
(error "Cannot use vecWord as a pattern")
(error "Cannot use vecWord as a type")
(error "Cannot use vecWord as a dec")
@@ -76,7 +76,7 @@ vecWord = QQ.QuasiQuoter
vecI8 :: QQ.QuasiQuoter
vecI8 = QQ.QuasiQuoter
(QQ.int8Literal $ \ len addr ->
- [| PrimVector (QQ.int8ArrayFromAddr $(len) $(addr)) 0 (I# $(len)) |])
+ [| PrimVector (QQ.int8ArrayFromAddr $(len) $(addr)) 0 $(len) |])
(error "Cannot use vecI8 as a pattern")
(error "Cannot use vecI8 as a type")
(error "Cannot use vecI8 as a dec")
@@ -84,7 +84,7 @@ vecI8 = QQ.QuasiQuoter
vecI16 :: QQ.QuasiQuoter
vecI16 = QQ.QuasiQuoter
(QQ.int16Literal $ \ len addr ->
- [| PrimVector (QQ.int16ArrayFromAddr $(len) $(addr)) 0 (I# $(len)) |])
+ [| PrimVector (QQ.int16ArrayFromAddr $(len) $(addr)) 0 $(len) |])
(error "Cannot use vecI16 as a pattern")
(error "Cannot use vecI16 as a type")
(error "Cannot use vecI16 as a dec")
@@ -92,7 +92,7 @@ vecI16 = QQ.QuasiQuoter
vecI32 :: QQ.QuasiQuoter
vecI32 = QQ.QuasiQuoter
(QQ.int32Literal $ \ len addr ->
- [| PrimVector (QQ.int32ArrayFromAddr $(len) $(addr)) 0 (I# $(len)) |])
+ [| PrimVector (QQ.int32ArrayFromAddr $(len) $(addr)) 0 $(len) |])
(error "Cannot use vecI32 as a pattern")
(error "Cannot use vecI32 as a type")
(error "Cannot use vecI32 as a dec")
@@ -100,7 +100,7 @@ vecI32 = QQ.QuasiQuoter
vecI64 :: QQ.QuasiQuoter
vecI64 = QQ.QuasiQuoter
(QQ.int64Literal $ \ len addr ->
- [| PrimVector (QQ.int64ArrayFromAddr $(len) $(addr)) 0 (I# $(len)) |])
+ [| PrimVector (QQ.int64ArrayFromAddr $(len) $(addr)) 0 $(len) |])
(error "Cannot use vecI64 as a pattern")
(error "Cannot use vecI64 as a type")
(error "Cannot use vecI64 as a dec")
@@ -108,7 +108,7 @@ vecI64 = QQ.QuasiQuoter
vecInt :: QQ.QuasiQuoter
vecInt = QQ.QuasiQuoter
(QQ.intLiteral $ \ len addr ->
- [| PrimVector (QQ.intArrayFromAddr $(len) $(addr)) 0 (I# $(len)) |])
+ [| PrimVector (QQ.intArrayFromAddr $(len) $(addr)) 0 $(len) |])
(error "Cannot use vecInt as a pattern")
(error "Cannot use vecInt as a type")
(error "Cannot use vecInt as a dec")
diff --git a/Std/Data/Vector/Search.hs b/Std/Data/Vector/Search.hs
index 1925f8a..8d1a49f 100644
--- a/Std/Data/Vector/Search.hs
+++ b/Std/Data/Vector/Search.hs
@@ -73,8 +73,8 @@ elemIndices w (Vec arr s l) = go s
-- satisfying the predicate.
findIndices :: Vec v a => (a -> Bool) -> v a -> [Int]
{-# INLINE [1] findIndices #-}
-{-# RULES "findIndices/Bytes" forall w. findIndices (w `eqWord8`) = elemIndicesBytes w #-}
-{-# RULES "findIndices/Bytes" forall w. findIndices (`eqWord8` w) = elemIndicesBytes w #-}
+{-# RULES "findIndices/Bytes1" forall w. findIndices (w `eqWord8`) = elemIndicesBytes w #-}
+{-# RULES "findIndices/Bytes2" forall w. findIndices (`eqWord8` w) = elemIndicesBytes w #-}
findIndices f (Vec arr s l) = go s
where
!end = s + l
@@ -111,8 +111,8 @@ findIndexR f v = fst (findR f v)
--
find :: Vec v a => (a -> Bool) -> v a -> (Int, Maybe a)
{-# INLINE [1] find #-}
-{-# RULES "find/Bytes" forall w. find (w `eqWord8`) = findByte w #-}
-{-# RULES "find/Bytes" forall w. find (`eqWord8` w) = findByte w #-}
+{-# RULES "find/Bytes1" forall w. find (w `eqWord8`) = findByte w #-}
+{-# RULES "find/Bytes2" forall w. find (`eqWord8` w) = findByte w #-}
find f (Vec arr s l) = go s
where
!end = s + l
@@ -134,8 +134,8 @@ findByte w (PrimVector (PrimArray ba#) s l) =
-- in a vector from right to left, if there isn't one, return '(-1, Nothing)'.
findR :: Vec v a => (a -> Bool) -> v a -> (Int, Maybe a)
{-# INLINE [1] findR #-}
-{-# RULES "findR/Bytes" forall w. findR (w `eqWord8`) = findByteR w #-}
-{-# RULES "findR/Bytes" forall w. findR (`eqWord8` w) = findByteR w #-}
+{-# RULES "findR/Bytes1" forall w. findR (w `eqWord8`) = findByteR w #-}
+{-# RULES "findR/Bytes2" forall w. findR (`eqWord8` w) = findByteR w #-}
findR f (Vec arr s l) = go (s+l-1)
where
go !p | p < s = (-1, Nothing)
diff --git a/Std/Data/Vector/Sort.hs b/Std/Data/Vector/Sort.hs
index d98f0bf..27607c5 100644
--- a/Std/Data/Vector/Sort.hs
+++ b/Std/Data/Vector/Sort.hs
@@ -53,6 +53,11 @@ module Std.Data.Vector.Sort (
, radixSort
, Radix(..)
, RadixDown(..)
+ -- * merge duplicated
+ , mergeDupAdjacent
+ , mergeDupAdjacentLeft
+ , mergeDupAdjacentRight
+ , mergeDupAdjacentBy
) where
import Control.Monad.ST
@@ -136,10 +141,10 @@ mergeSortBy cmp v@(Vec _ _ l)
then copyMutableArr target k' src j (rightEnd - j)
else mergeBlock src target leftEnd rightEnd i' j k'
--- | The mergesort tile size, @mergeTileSize = 16@.
+-- | The mergesort tile size, @mergeTileSize = 8@.
mergeTileSize :: Int
{-# INLINE mergeTileSize #-}
-mergeTileSize = 16
+mergeTileSize = 8
-- | /O(n^2)/ Sort vector based on element's 'Ord' instance with simple
-- <https://en.wikipedia.org/wiki/Insertion_sort insertion-sort> algorithm.
@@ -152,8 +157,8 @@ insertSort = insertSortBy compare
insertSortBy :: Vec v a => (a -> a -> Ordering) -> v a -> v a
{-# INLINE insertSortBy #-}
-insertSortBy _ v@(Vec _ _ 0) = empty
-insertSortBy _ v@(Vec arr s 1) = case indexArr' arr s of (# x #) -> singleton x
+insertSortBy _ v@(Vec _ _ 0) = v
+insertSortBy _ v@(Vec arr s 1) = v
insertSortBy cmp v@(Vec arr s l) = create l (insertSortToMArr cmp v 0)
insertSortToMArr :: Vec v a
@@ -287,8 +292,8 @@ instance Radix a => Radix (RadixDown a) where
-- vectors (turning point around 2^(2*passes)).
radixSort :: forall v a. (Vec v a, Radix a) => v a -> v a
{-# INLINABLE radixSort #-}
-radixSort v@(Vec _ _ 0) = empty
-radixSort v@(Vec arr s 1) = case indexArr' arr s of (# x #) -> singleton x
+radixSort v@(Vec _ _ 0) = v
+radixSort v@(Vec arr s 1) = v
radixSort (Vec arr s l) = runST (do
bucket <- newArrWith buktSiz 0 :: ST s (MutablePrimArray s Int)
w1 <- newArr l
@@ -442,3 +447,55 @@ instance Radix RadixFloat where
radixSortFloat :: PrimVector Float -> PrimVector Float
radixSortFloat v = castVector (radixSort (castVector v :: PrimVector RadixFloat))
-}
+
+--------------------------------------------------------------------------------
+-- | merge duplicated adjacent element, prefer left element.
+--
+-- Use this function on a sorted vector will have the same effects as 'nub'.
+mergeDupAdjacent :: (Vec v a, Eq a) => v a -> v a
+{-# INLINE mergeDupAdjacent #-}
+mergeDupAdjacent = mergeDupAdjacentBy (==) const
+
+-- | Merge duplicated adjacent element, prefer left element.
+mergeDupAdjacentLeft :: Vec v a
+ => (a -> a -> Bool) -- ^ equality tester, @\ left right -> eq left right@
+ -> v a
+ -> v a
+mergeDupAdjacentLeft eq = mergeDupAdjacentBy eq const
+{-# INLINE mergeDupAdjacentLeft #-}
+
+-- | Merge duplicated adjacent element, prefer right element.
+mergeDupAdjacentRight :: Vec v a
+ => (a -> a -> Bool) -- ^ equality tester, @\ left right -> eq left right@
+ -> v a
+ -> v a
+{-# INLINE mergeDupAdjacentRight #-}
+mergeDupAdjacentRight eq = mergeDupAdjacentBy eq (\ _ x -> x)
+
+-- | Merge duplicated adjacent element, based on a equality tester and a merger function.
+mergeDupAdjacentBy :: Vec v a
+ => (a -> a -> Bool) -- ^ equality tester, @\ left right -> eq left right@
+ -> (a -> a -> a) -- ^ the merger, @\ left right -> merge left right@
+ -> v a -> v a
+{-# INLINABLE mergeDupAdjacentBy #-}
+mergeDupAdjacentBy eq merger v@(Vec arr s l)
+ | l == 0 = empty
+ | l == 1 = v
+ | otherwise = createN l $ \ marr -> do
+ x0 <- indexArrM arr 0
+ writeArr marr 0 x0
+ go arr marr s 1 x0
+ where
+ !end = s + l
+ go !arr !marr !i !j !x
+ | i >= end = return j
+ | otherwise = do
+ x' <- indexArrM arr i
+ if x `eq` x'
+ then do
+ let !x'' = merger x x'
+ writeArr marr (j-1) x''
+ go arr marr (i+1) j x''
+ else do
+ writeArr marr j x'
+ go arr marr (i+1) (j+1) x'
diff --git a/Std/Foreign/PrimArray.hs b/Std/Foreign/PrimArray.hs
index 3af1ecb..d0b72b6 100644
--- a/Std/Foreign/PrimArray.hs
+++ b/Std/Foreign/PrimArray.hs
@@ -139,6 +139,7 @@ type MBA# a = MutableByteArray# RealWorld
-- USE THIS FUNCTION WITH UNSAFE FFI CALL ONLY.
--
withPrimArrayUnsafe :: (Prim a) => PrimArray a -> (BA# a -> Int -> IO b) -> IO b
+{-# INLINABLE withPrimArrayUnsafe #-}
withPrimArrayUnsafe pa@(PrimArray ba#) f = f ba# (sizeofPrimArray pa)
-- | Pass mutable primitive array to unsafe FFI as pointer.
@@ -149,11 +150,13 @@ withPrimArrayUnsafe pa@(PrimArray ba#) f = f ba# (sizeofPrimArray pa)
--
withMutablePrimArrayUnsafe :: (Prim a) => MutablePrimArray RealWorld a
-> (MBA# a -> Int -> IO b) -> IO b
+{-# INLINABLE withMutablePrimArrayUnsafe #-}
withMutablePrimArrayUnsafe mpa@(MutablePrimArray mba#) f =
getSizeofMutablePrimArray mpa >>= f mba#
withMutableByteArrayUnsafe :: Int -- ^ In bytes
-> (MBA# Word8 -> IO b) -> IO b
+{-# INLINABLE withMutableByteArrayUnsafe #-}
withMutableByteArrayUnsafe len f = do
(MutableByteArray mba#) <- newByteArray len
f mba#
@@ -169,6 +172,7 @@ withMutableByteArrayUnsafe len f = do
--
withPrimVectorUnsafe :: (Prim a)
=> PrimVector a -> (BA# a -> Int -> Int -> IO b) -> IO b
+{-# INLINABLE withPrimVectorUnsafe #-}
withPrimVectorUnsafe (PrimVector arr s l) f = withPrimArrayUnsafe arr $ \ ba# _ -> f ba# s l
@@ -180,6 +184,7 @@ withPrimVectorUnsafe (PrimVector arr s l) f = withPrimArrayUnsafe arr $ \ ba# _
--
withPrimUnsafe :: (Prim a)
=> a -> (MBA# a -> IO b) -> IO (a, b)
+{-# INLINABLE withPrimUnsafe #-}
withPrimUnsafe v f = do
mpa@(MutablePrimArray mba#) <- newPrimArray 1 -- All heap objects are WORD aligned
writePrimArray mpa 0 v
@@ -189,6 +194,7 @@ withPrimUnsafe v f = do
withPrimUnsafe' :: (Prim a)
=> (MBA# a -> IO b) -> IO (a, b)
+{-# INLINABLE withPrimUnsafe' #-}
withPrimUnsafe' f = do
mpa@(MutablePrimArray mba#) <- newPrimArray 1 -- All heap objects are WORD aligned
!b <- f mba# -- so no need to do extra alignment
@@ -206,6 +212,7 @@ withPrimUnsafe' f = do
--
-- Don't pass a forever loop to this function, see <https://ghc.haskell.org/trac/ghc/ticket/14346 #14346>.
withPrimArraySafe :: (Prim a) => PrimArray a -> (Ptr a -> Int -> IO b) -> IO b
+{-# INLINABLE withPrimArraySafe #-}
withPrimArraySafe arr f
| isPrimArrayPinned arr = do
let siz = sizeofPrimArray arr
@@ -223,6 +230,7 @@ withPrimArraySafe arr f
--
-- Don't pass a forever loop to this function, see <https://ghc.haskell.org/trac/ghc/ticket/14346 #14346>.
withMutablePrimArraySafe :: (Prim a) => MutablePrimArray RealWorld a -> (Ptr a -> Int -> IO b) -> IO b
+{-# INLINABLE withMutablePrimArraySafe #-}
withMutablePrimArraySafe marr f
| isMutablePrimArrayPinned marr = do
siz <- getSizeofMutablePrimArray marr
@@ -234,6 +242,7 @@ withMutablePrimArraySafe marr f
withMutablePrimArrayContents buf $ \ ptr -> f ptr siz
withMutableByteArraySafe :: Int -> (Ptr Word8 -> IO b) -> IO b
+{-# INLINABLE withMutableByteArraySafe #-}
withMutableByteArraySafe siz f = do
buf <- newPinnedPrimArray siz
withMutablePrimArrayContents buf f
@@ -245,6 +254,7 @@ withMutableByteArraySafe siz f = do
--
-- Don't pass a forever loop to this function, see <https://ghc.haskell.org/trac/ghc/ticket/14346 #14346>.
withPrimVectorSafe :: forall a b. (Prim a) => PrimVector a -> (Ptr a -> Int -> IO b) -> IO b
+{-# INLINABLE withPrimVectorSafe #-}
withPrimVectorSafe v@(PrimVector arr s l) f
| isPrimArrayPinned arr =
withPrimArrayContents arr $ \ ptr ->
@@ -260,6 +270,7 @@ withPrimVectorSafe v@(PrimVector arr s l) f
--
-- Don't pass a forever loop to this function, see <https://ghc.haskell.org/trac/ghc/ticket/14346 #14346>.
withPrimSafe :: forall a b. Prim a => a -> (Ptr a -> IO b) -> IO (a, b)
+{-# INLINABLE withPrimSafe #-}
withPrimSafe v f = do
buf <- newAlignedPinnedPrimArray 1
writePrimArray buf 0 v
@@ -268,6 +279,7 @@ withPrimSafe v f = do
return (a, b)
withPrimSafe' :: forall a b. Prim a => (Ptr a -> IO b) -> IO (a, b)
+{-# INLINABLE withPrimSafe' #-}
withPrimSafe' f = do
buf <- newAlignedPinnedPrimArray 1
!b <- withMutablePrimArrayContents buf $ \ ptr -> f ptr
@@ -284,12 +296,15 @@ foreign import ccall unsafe "string.h" memset :: Ptr a -> CInt -> CSize -> IO ()
-- should be given in bytes.
--
clearPtr :: Ptr a -> Int -> IO ()
+{-# INLINE clearPtr #-}
clearPtr dest nbytes = memset dest 0 (fromIntegral nbytes)
-- | Cast between raw address and tagged pointer.
addrToPtr :: Addr -> Ptr a
+{-# INLINE addrToPtr #-}
addrToPtr (Addr addr#) = Ptr addr#
-- | Cast between tagged pointer and raw address.
ptrToAddr :: Ptr a -> Addr
+{-# INLINE ptrToAddr #-}
ptrToAddr (Ptr addr#) = Addr addr#
diff --git a/Std/IO/Buffered.hs b/Std/IO/Buffered.hs
index 8e8d79b..e11ff2d 100644
--- a/Std/IO/Buffered.hs
+++ b/Std/IO/Buffered.hs
@@ -29,6 +29,7 @@ module Std.IO.Buffered
, readExactly
, readToMagic, readToMagic'
, readLine, readLine'
+ , readAll, readAll'
-- * Buffered Output
, BufferedOutput
, newBufferedOutput
@@ -37,6 +38,9 @@ module Std.IO.Buffered
, flushBuffer
-- * Exceptions
, ShortReadException(..)
+ -- * common buffer size
+ , V.defaultChunkSize
+ , V.smallChunkSize
) where
import Control.Concurrent.MVar
@@ -54,6 +58,7 @@ import qualified Std.Data.Builder.Base as B
import qualified Std.Data.Parser as P
import qualified Std.Data.Vector as V
import qualified Std.Data.Vector.Base as V
+import qualified Std.Data.Text as T
import Std.Data.PrimIORef
import Std.Foreign.PrimArray
import Std.IO.Exception
@@ -101,7 +106,7 @@ newBufferedInput :: input
-> IO (BufferedInput input)
newBufferedInput i bufSiz = do
pb <- newIORef V.empty
- buf <- newPinnedPrimArray bufSiz
+ buf <- newPinnedPrimArray (max bufSiz 0)
inputBuffer <- newIORef buf
return (BufferedInput i pb inputBuffer)
@@ -110,7 +115,7 @@ newBufferedOutput :: output
-> IO (BufferedOutput output)
newBufferedOutput o bufSiz = do
index <- newPrimIORef 0
- buf <- newPinnedPrimArray bufSiz
+ buf <- newPinnedPrimArray (max bufSiz 0)
return (BufferedOutput o index buf)
-- | Request bytes from 'BufferedInput'.
@@ -170,39 +175,39 @@ readExactly n h = V.concat `fmap` (go h n)
chunks <- go h (n - l)
return (chunk : chunks)
+-- | Read all chunks from a 'BufferedInput'.
+readAll :: (HasCallStack, Input i) => BufferedInput i -> IO [V.Bytes]
+readAll i = loop []
+ where
+ loop acc = do
+ chunk <- readBuffer i
+ if V.null chunk
+ then return $! reverse (chunk:acc)
+ else loop (chunk:acc)
+
+-- | Read all chunks from a 'BufferedInput', and concat chunks together.
+readAll' :: (HasCallStack, Input i) => BufferedInput i -> IO V.Bytes
+readAll' i = V.concat <$> readAll i
+
+
data ShortReadException = ShortReadException IOEInfo deriving (Show, Typeable)
instance Exception ShortReadException where
toException = ioExceptionToException
fromException = ioExceptionFromException
-
-- | Push bytes back into buffer
--
unReadBuffer :: (HasCallStack, Input i) => V.Bytes -> BufferedInput i -> IO ()
unReadBuffer pb' BufferedInput{..} = do
modifyIORef' bufPushBack $ \ pb -> pb' `V.append` pb
--- | Result returned by 'readParser'.
-data ReadResult a
- = ReadSuccess a -- ^ read and parse successfully
- | ReadFailure String -- ^ parse failed
- | ReadEOF -- ^ EOF reached
- deriving Show
-
-- | Read buffer and parse with 'Parser'.
--
-readParser :: (HasCallStack, Input i) => P.Parser a -> BufferedInput i -> IO (ReadResult a)
+readParser :: (HasCallStack, Input i) => P.Parser a -> BufferedInput i -> IO (V.Bytes, Either P.ParseError a)
readParser p i = do
bs <- readBuffer i
- if V.null bs
- then return ReadEOF
- else do
- (rest, r) <- P.parseChunks (readBuffer i) p bs
- unless (V.null rest) $ unReadBuffer rest i
- case r of
- Left err -> return (ReadFailure err)
- Right a -> return (ReadSuccess a)
+ P.parseChunks p (readBuffer i) bs
-- | Read until reach a magic bytes
--
@@ -249,18 +254,22 @@ readToMagic' magic h = V.concat `fmap` (go h magic)
readLine :: (HasCallStack, Input i) => BufferedInput i -> IO V.Bytes
readLine i = do
bs@(V.PrimVector arr s l) <- readToMagic 10 i
- return $ case bs `V.indexMaybe` (l-2) of
+ if l == 0
+ then return bs
+ else return $ case bs `V.indexMaybe` (l-2) of
Nothing -> V.PrimVector arr s (l-1)
Just r | r == 13 -> V.PrimVector arr s (l-2)
| otherwise -> V.PrimVector arr s (l-1)
-- | Read to a linefeed ('\n' or '\r\n'), return 'Bytes' before it.
--
--- If EOF reached before meet a magic byte, a 'ShortReadException' will be thrown.
+-- If EOF reached before meet a '\n', a 'ShortReadException' will be thrown.
readLine' :: (HasCallStack, Input i) => BufferedInput i -> IO V.Bytes
readLine' i = do
bs@(V.PrimVector arr s l) <- readToMagic' 10 i
- return $ case bs `V.indexMaybe` (l-2) of
+ if l == 0
+ then return bs
+ else return $ case bs `V.indexMaybe` (l-2) of
Nothing -> V.PrimVector arr s (l-1)
Just r | r == 13 -> V.PrimVector arr s (l-2)
| otherwise -> V.PrimVector arr s (l-1)
diff --git a/Std/IO/Exception.hs b/Std/IO/Exception.hs
index a0cd656..198334e 100644
--- a/Std/IO/Exception.hs
+++ b/Std/IO/Exception.hs
@@ -75,6 +75,7 @@ module Std.IO.Exception
-- * Re-exports
, module Control.Exception
, HasCallStack
+ , CallStack
, callStack
) where
@@ -239,7 +240,7 @@ throwUVError e info = case e of
UV_EISDIR -> throwIO (InappropriateType info)
UV_ELOOP -> throwIO (InvalidArgument info)
UV_EMFILE -> throwIO (ResourceExhausted info)
- UV_EMSGSIZE -> throwIO (ResourceExhausted info)
+ UV_EMSGSIZE -> throwIO (InvalidArgument info)
UV_ENAMETOOLONG -> throwIO (InvalidArgument info)
UV_ENETDOWN -> throwIO (ResourceVanished info)
UV_ENETUNREACH -> throwIO (NoSuchThing info)
diff --git a/Std/IO/Logger.hs b/Std/IO/Logger.hs
index 61ee1a0..8f73662 100644
--- a/Std/IO/Logger.hs
+++ b/Std/IO/Logger.hs
@@ -61,8 +61,7 @@ import Std.IO.LowResTimer
import Std.IO.StdStream
import Std.IO.Buffered
import System.IO.Unsafe (unsafePerformIO)
-import GHC.Stack
-import Data.Word8 (_space, _lf, _bracketleft, _bracketright)
+import Std.IO.Exception
import Data.IORef
import Control.Concurrent.MVar
import qualified Std.Data.Builder.Base as B
@@ -79,8 +78,7 @@ data Logger = Logger
-- | Logger configuration.
data LoggerConfig = LoggerConfig
- { loggerBufferSize :: {-# UNPACK #-} !Int -- ^ Buffer size used when creating logger's 'BufferedOutput'
- , loggerMinFlushInterval :: {-# UNPACK #-} !Int -- ^ Minimal flush interval, see Notes on 'debug'
+ { loggerMinFlushInterval :: {-# UNPACK #-} !Int -- ^ Minimal flush interval, see Notes on 'debug'
, loggerTsCache :: IO (B.Builder ()) -- ^ A IO action return a formatted date/time string
, loggerLineBufSize :: {-# UNPACK #-} !Int -- ^ Buffer size to build each log/line
, loggerShowDebug :: Bool -- ^ Set to 'False' to filter debug logs
@@ -90,20 +88,25 @@ data LoggerConfig = LoggerConfig
-- | A default logger config with
--
-- * debug ON
--- * data/time@%Y-%m-%dT%H:%M:%S%Z@ ON
-- * 0.1s minimal flush interval
+-- * defaultTSCache
-- * line buffer size 128 bytes
+-- * show debug True
+-- * show timestamp True
-- * 'BufferedOutput' buffer size equals to 'V.defaultChunkSize'.
defaultLoggerConfig :: LoggerConfig
-{-# NOINLINE defaultLoggerConfig #-}
-defaultLoggerConfig = unsafePerformIO $ do
- tsCache <- throttle 1 $ do
+defaultLoggerConfig = LoggerConfig 1 defaultTSCache 128 True True
+
+-- | A default timestamp cache with format @%Y-%m-%dT%H:%M:%S%Z@
+defaultTSCache :: IO (B.Builder ())
+{-# NOINLINE defaultTSCache #-}
+defaultTSCache = unsafePerformIO $ do
+ throttle 1 $ do
t <- Time.getCurrentTime
return . B.string8 $
Time.formatTime Time.defaultTimeLocale "%Y-%m-%dT%H:%M:%S%Z" t
- return $ LoggerConfig V.defaultChunkSize 10 tsCache 128 True True
-flushLog :: Output o => MVar (BufferedOutput o) -> IORef [V.Bytes] -> IO ()
+flushLog :: (HasCallStack, Output o) => MVar (BufferedOutput o) -> IORef [V.Bytes] -> IO ()
flushLog oLock bList =
withMVar oLock $ \ o -> do
bss <- atomicModifyIORef' bList (\ bss -> ([], bss))
@@ -113,11 +116,11 @@ flushLog oLock bList =
-- | Make a new logger
newLogger :: Output o
=> LoggerConfig
- -> o
+ -> BufferedOutput o
-> IO Logger
newLogger config o = do
bList <- newIORef []
- oLock <- newMVar =<< newBufferedOutput o (loggerBufferSize config)
+ oLock <- newMVar o
let flush = flushLog oLock bList
throttledFlush <- throttleTrailing_ (loggerMinFlushInterval config) flush
return $ Logger flush throttledFlush bList config
@@ -125,7 +128,7 @@ newLogger config o = do
globalLogger :: IORef Logger
{-# NOINLINE globalLogger #-}
globalLogger = unsafePerformIO $
- newIORef =<< newLogger defaultLoggerConfig stderr
+ newIORef =<< newLogger defaultLoggerConfig stderrBuf
-- | Change stderr logger.
setStdLogger :: Logger -> IO ()
@@ -188,15 +191,15 @@ otherLevelWith :: Logger
-> B.Builder () -- ^ log content
-> IO ()
otherLevelWith logger level flushNow b = case logger of
- (Logger flush throttledFlush blist (LoggerConfig _ _ tscache lbsiz showdebug showts)) -> do
+ (Logger flush throttledFlush blist (LoggerConfig _ tscache lbsiz showdebug showts)) -> do
ts <- if showts then tscache else return ""
when showdebug $ do
pushLog blist lbsiz $ do
- B.encodePrim _bracketleft
+ B.char8 '['
level
- B.encodePrim _bracketright
- B.encodePrim _space
- when showts $ ts >> B.encodePrim _space
+ B.char8 ']'
+ B.char8 ' '
+ when showts $ ts >> B.char8 ' '
b
- B.encodePrim _lf
+ B.char8 '\n'
if flushNow then flush else throttledFlush
diff --git a/Std/IO/LowResTimer.hs b/Std/IO/LowResTimer.hs
index 3b589fa..2130e4f 100644
--- a/Std/IO/LowResTimer.hs
+++ b/Std/IO/LowResTimer.hs
@@ -35,6 +35,7 @@ module Std.IO.LowResTimer
, cancelLowResTimer
, cancelLowResTimer_
, timeoutLowRes
+ , timeoutLowResEx
, throttle
, throttle_
, throttleTrailing_
@@ -51,7 +52,7 @@ import GHC.Event
#endif
import Control.Concurrent
import Control.Concurrent.MVar
-import Control.Exception
+import Std.IO.Exception
import Control.Monad
import Data.IORef
import Std.Data.PrimIORef
@@ -221,9 +222,24 @@ timeoutLowRes timeo io = do
return (Just r))
( \ (e :: TimeOutException) -> return Nothing )
where
- timeoutAThread id = void . forkIO $ throwTo id TimeOutException
+ timeoutAThread id = void . forkIO $ throwTo id (TimeOutException id undefined)
+
+-- | similar to 'timeoutLowRes', but raise a 'TimeOutException' instead of return 'Nothing'
+-- if timeout.
+timeoutLowResEx :: HasCallStack
+ => Int -- ^ timeout in unit of 0.1s
+ -> IO a
+ -> IO a
+timeoutLowResEx timeo io = do
+ mid <- myThreadId
+ timer <- registerLowResTimer timeo (timeoutAThread mid)
+ r <- io
+ cancelLowResTimer timer
+ return r
+ where
+ timeoutAThread id = void . forkIO $ throwTo id (TimeOutException id callStack)
-data TimeOutException = TimeOutException deriving Show
+data TimeOutException = TimeOutException ThreadId CallStack deriving Show
instance Exception TimeOutException
--------------------------------------------------------------------------------
@@ -316,7 +332,7 @@ throttle t action = do
return r
else readIORef resultRef
--- | Debounce IO action without caching result.
+-- | Throttle an IO action without caching result.
--
-- The IO action will run at leading edge. i.e. once run, during following (t/10)s throttled action will
-- no-ops.
diff --git a/Std/IO/SockAddr.hsc b/Std/IO/SockAddr.hsc
index 1636191..434edaf 100644
--- a/Std/IO/SockAddr.hsc
+++ b/Std/IO/SockAddr.hsc
@@ -20,6 +20,7 @@ module Std.IO.SockAddr
, sockAddrFamily
, peekSockAddr
, withSockAddr
+ , withSockAddrStorage
-- ** IPv4 address
, InetAddr
, inetAny
@@ -72,7 +73,6 @@ import Data.Typeable
import Foreign
import Foreign.C
import GHC.ForeignPtr (mallocPlainForeignPtrAlignedBytes)
-import GHC.Stack
import Numeric (showHex)
import Std.Data.CBytes
import qualified Std.Data.Vector as V
@@ -306,6 +306,16 @@ withSockAddr sa@(SockAddrInet6 _ _ _ _) f = do
(#size struct sockaddr_in6)
(#alignment struct sockaddr_in6) $ \ p -> pokeSockAddr p sa >> f p
+withSockAddrStorage :: (Ptr SockAddr -> Ptr CInt -> IO ()) -> IO SockAddr
+withSockAddrStorage f = do
+ allocaBytesAligned
+ (#size struct sockaddr_storage)
+ (#alignment struct sockaddr_storage) $ \ p ->
+ alloca $ \ p' -> do
+ poke p' (#size struct sockaddr_storage)
+ f p p'
+ peekSockAddr p
+
-- The peek32 and poke32 functions work around the fact that the RFCs
-- don't require 32-bit-wide address fields to be present. We can
-- only portably rely on an 8-bit field, s6_addr.
diff --git a/Std/IO/StdStream.hs b/Std/IO/StdStream.hs
index 4737665..b3195ce 100644
--- a/Std/IO/StdStream.hs
+++ b/Std/IO/StdStream.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE MultiWayIf #-}
+{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE BangPatterns #-}
@@ -33,8 +34,10 @@ module Std.IO.StdStream
( -- * Standard input & output streams
StdStream
, isStdStreamTTY
+ , UVTTYMode(UV_TTY_MODE_NORMAL, UV_TTY_MODE_RAW)
+ , setStdinTTYMode
, stdin, stdout, stderr
- , stdinBuf, stdoutBuf
+ , stdinBuf, stdoutBuf, stderrBuf
-- * utils
, printStd
, readLineStd
@@ -44,6 +47,7 @@ module Std.IO.StdStream
import Std.Data.Builder as B
import Std.Data.Vector as V
+import Std.Data.TextBuilder (ToText, toBuilder)
import Std.IO.UV.FFI
import Std.IO.UV.Manager
import Control.Monad
@@ -63,8 +67,8 @@ import Foreign.Ptr
-- 'uv_guess_handle' is called to decide which type of devices are connected
-- to standard streams.
--
--- 'StdStream' is different from other 'UVStream' in that exception during reading & writing
--- won't close 'StdStream'.
+-- Note 'StdStream' is not thread safe, you shouldn't use them without lock.
+-- For the same reason you shouldn't use stderr directly, use `Std.IO.Logger` module instead.
data StdStream
= StdTTY {-# UNPACK #-}!(Ptr UVHandle) {-# UNPACK #-}!UVSlot UVManager -- similar to UVStream
@@ -82,7 +86,15 @@ instance Input StdStream where
throwUVIfMinus_ (hs_uv_read_start handle)
pokeBufferTable uvm slot buf len
tryTakeMVar m
- r <- takeMVar m
+ -- since we are inside mask, this is the only place
+ -- async exceptions could possibly kick in, and we should stop reading
+ r <- catch (takeMVar m) (\ (e :: SomeException) -> do
+ withUVManager_ uvm (uv_read_stop handle)
+ -- after we locked uvm and stop reading, the reading probably finished
+ -- so try again
+ r <- tryTakeMVar m
+ case r of Just r -> return r
+ _ -> throwIO e)
if | r > 0 -> return r
-- r == 0 should be impossible, since we guard this situation in c side
| r == fromIntegral UV_EOF -> return 0
@@ -98,7 +110,7 @@ instance Output StdStream where
m <- getBlockMVar uvm slot
tryTakeMVar m
return (slot, m)
- throwUVIfMinus_ (takeMVar m)
+ throwUVIfMinus_ (uninterruptibleMask_ $ takeMVar m)
writeOutput (StdFile fd) buf len = go buf len
where
go !buf !bufSiz = do
@@ -115,6 +127,7 @@ stdout :: StdStream
{-# NOINLINE stdout #-}
stdout = unsafePerformIO (makeStdStream 1)
+-- | Don't use 'stderr' directly, use 'Std.IO.Logger' instead.
stderr :: StdStream
{-# NOINLINE stderr #-}
stderr = unsafePerformIO (makeStdStream 2)
@@ -127,6 +140,10 @@ stdoutBuf :: BufferedOutput StdStream
{-# NOINLINE stdoutBuf #-}
stdoutBuf = unsafePerformIO (newBufferedOutput stdout defaultChunkSize)
+stderrBuf :: BufferedOutput StdStream
+{-# NOINLINE stderrBuf #-}
+stderrBuf = unsafePerformIO (newBufferedOutput stderr defaultChunkSize)
+
makeStdStream :: UVFD -> IO StdStream
makeStdStream fd = do
typ <- uv_guess_handle fd
@@ -142,24 +159,31 @@ makeStdStream fd = do
return (StdTTY handle slot uvm)
else return (StdFile fd)
+-- | Change terminal's mode if stdin is connected to a terminal.
+setStdinTTYMode :: UVTTYMode -> IO ()
+setStdinTTYMode mode = case stdin of
+ StdTTY handle _ uvm ->
+ withUVManager_ uvm . throwUVIfMinus_ $ uv_tty_set_mode handle mode
+ _ -> return ()
+
--------------------------------------------------------------------------------
--- | print a 'Show' to stdout
-printStd :: Show a => a -> IO ()
+-- | print a 'ToText' to stdout
+printStd :: ToText a => a -> IO ()
printStd s = do
- writeBuffer stdoutBuf (B.buildBytes . B.stringUTF8 . show $ s)
+ writeBuilder stdoutBuf (toBuilder $ s)
flushBuffer stdoutBuf
-- | print a 'Builder' and flush to stdout.
putStd :: Builder a -> IO ()
putStd b = do
- writeBuffer stdoutBuf (B.buildBytes b)
+ writeBuilder stdoutBuf b
flushBuffer stdoutBuf
-- | print a 'Builder' and flush to stdout stdout, with a linefeed.
putLineStd :: Builder a -> IO ()
putLineStd b = do
- writeBuffer stdoutBuf (B.buildBytes $ b >> B.char8 '\n')
+ writeBuilder stdoutBuf (b >> B.char8 '\n')
flushBuffer stdoutBuf
-- | read a line from stdin
diff --git a/Std/IO/TCP.hs b/Std/IO/TCP.hs
index 449602a..915f8d1 100644
--- a/Std/IO/TCP.hs
+++ b/Std/IO/TCP.hs
@@ -3,6 +3,7 @@
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ExistentialQuantification #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-|
Module : Std.IO.TCP
@@ -58,18 +59,21 @@ initTCPExStream family = initUVStream (\ loop handle ->
--------------------------------------------------------------------------------
+-- | TCP Stream.
+newtype TCP = TCP UVStream deriving (Show, Input, Output)
+
-- | A TCP client configuration
--
data ClientConfig = ClientConfig
- { clientLocalAddr :: Maybe SockAddr
- , clientTargetAddr :: SockAddr
- , clientNoDelay :: Bool
+ { clientLocalAddr :: Maybe SockAddr -- ^ assign a local address, or let OS pick one
+ , clientTargetAddr :: SockAddr -- ^ target address
+ , clientNoDelay :: Bool -- ^ if we want to use @TCP_NODELAY@
}
defaultClientConfig :: ClientConfig
defaultClientConfig = ClientConfig Nothing (SockAddrInet 8888 inetLoopback) True
-initClient :: HasCallStack => ClientConfig -> Resource UVStream
+initClient :: HasCallStack => ClientConfig -> Resource TCP
initClient ClientConfig{..} = do
uvm <- liftIO getUVManager
client <- initTCPStream uvm
@@ -82,17 +86,18 @@ initClient ClientConfig{..} = do
-- nodelay is safe without withUVManager
when clientNoDelay $ throwUVIfMinus_ (uv_tcp_nodelay handle 1)
withUVRequest uvm $ \ _ -> hs_uv_tcp_connect handle targetPtr
- return client
+ return (TCP client)
--------------------------------------------------------------------------------
-- | A TCP server configuration
--
data ServerConfig = ServerConfig
- { serverAddr :: SockAddr
- , serverBackLog :: Int
- , serverWorker :: UVStream -> IO ()
- , serverWorkerNoDelay :: Bool
+ { serverAddr :: SockAddr -- ^ listening address
+ , serverBackLog :: Int -- ^ listening socket's backlog size
+ , serverWorker :: TCP -> IO () -- ^ worker which get an accepted TCP stream,
+ -- the socket will be closed upon exception or worker finishes.
+ , serverWorkerNoDelay :: Bool -- ^ if we want to use @TCP_NODELAY@
}
-- | A default hello world server on localhost:8888
@@ -110,23 +115,23 @@ defaultServerConfig = ServerConfig
--
-- Fork new worker thread upon a new connection.
--
-startServer :: ServerConfig -> IO ()
+startServer :: HasCallStack => ServerConfig -> IO ()
startServer ServerConfig{..} = do
serverManager <- getUVManager
withResource (initTCPStream serverManager) $ \ (UVStream serverHandle serverSlot _ _) ->
bracket
(throwOOMIfNull $ hs_uv_accept_check_alloc serverHandle)
- (hs_uv_accept_check_close) $ \ check -> do
+ hs_uv_accept_check_close $ \ check -> do
throwUVIfMinus_ $ hs_uv_accept_check_init check
withSockAddr serverAddr $ \ addrPtr -> do
m <- getBlockMVar serverManager serverSlot
acceptBuf <- newPinnedPrimArray ACCEPT_BUFFER_SIZE
- let acceptBufPtr = (coerce (mutablePrimArrayContents acceptBuf :: Ptr UVFD))
+ let acceptBufPtr = coerce (mutablePrimArrayContents acceptBuf :: Ptr UVFD)
withUVManager_ serverManager $ do
pokeBufferTable serverManager serverSlot acceptBufPtr 0
throwUVIfMinus_ (uv_tcp_bind serverHandle addrPtr 0)
- throwUVIfMinus_ (hs_uv_listen serverHandle (fromIntegral serverBackLog))
+ throwUVIfMinus_ (hs_uv_listen serverHandle (max 4 (fromIntegral serverBackLog)))
forever $ do
takeMVar m
@@ -156,7 +161,21 @@ startServer ServerConfig{..} = do
when serverWorkerNoDelay . throwUVIfMinus_ $
-- safe without withUVManager
uv_tcp_nodelay (uvsHandle client) 1
- serverWorker client
+ serverWorker (TCP client)
when (accepted == ACCEPT_BUFFER_SIZE) $
withUVManager_ serverManager (hs_uv_listen_resume serverHandle)
+
+-- The buffer passing of accept is a litte complicated here, to get maximum performance,
+-- we do batch accepting. i.e. recv multiple client inside libuv's event loop:
+--
+-- + we poke uvmanager's buffer table as a Ptr Word8, with byte size (ACCEPT_BUFFER_SIZE*sizeof(UVFD))
+-- + inside libuv event loop, we cast the buffer back to int32_t* pointer.
+-- + each accept callback push a new socket fd to the buffer, and increase a counter(buffer table's size).
+-- + ACCEPT_BUFFER_SIZE is large enough 1020, so under windows we can't possibly filled it up within one
+-- uv_run.
+-- + under unix we hacked uv internal to provide a stop and resume function, when ACCEPT_BUFFER_SIZE is
+-- reached, we will stop receiving.
+-- + once back to haskell side, we poked all the accepted sockets and fork worker threads.
+-- + if ACCEPT_BUFFER_SIZE is reached, we resume receiving from haskell side, which will affect next
+-- uv_run.
diff --git a/Std/IO/UDP.hs b/Std/IO/UDP.hs
new file mode 100644
index 0000000..f11edf3
--- /dev/null
+++ b/Std/IO/UDP.hs
@@ -0,0 +1,303 @@
+{-# LANGUAGE MultiWayIf #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE PatternSynonyms #-}
+
+{-|
+Module : Std.IO.UDP
+Description : UDP servers and clients
+Copyright : (c) Dong Han, 2018
+License : BSD
+Maintainer : winterland1989@gmail.com
+Stability : experimental
+Portability : non-portable
+
+This module provides an API for creating UDP sender and receiver.
+
+-}
+
+module Std.IO.UDP (
+ -- * TCP Client
+ UDP(..)
+ , initUDP
+ , UDPConfig(..)
+ , defaultUDPConfig
+ , UVUDPFlag(UV_UDP_DEFAULT, UV_UDP_IPV6ONLY, UV_UDP_REUSEADDR)
+ , recvUDP
+ , sendUDP
+ , getSockName
+ -- * multicast and broadcast
+ , UVMembership(UV_JOIN_GROUP, UV_LEAVE_GROUP)
+ , setMembership
+ , setMulticastLoop
+ , setMulticastTTL
+ , setMulticastInterface
+ , setBroadcast
+ , setTTL
+ ) where
+
+import Control.Monad.Primitive (primitive_)
+import Data.Primitive.PrimArray as A
+import Data.Primitive.Ptr (copyPtrToMutablePrimArray)
+import Data.IORef
+import GHC.Prim (touch#)
+import Std.Data.Array as A
+import Std.Data.Vector.Base as V
+import Std.Data.Vector.Extra as V
+import Std.Data.CBytes as CBytes
+import Std.IO.SockAddr
+import Std.Foreign.PrimArray
+import Std.IO.UV.Errno (pattern UV_EMSGSIZE)
+import Std.IO.UV.FFI
+import Std.IO.UV.Manager
+import Std.IO.Exception
+import Std.IO.Resource
+import Data.Word
+import Data.Int
+import Data.Bits ((.&.))
+import Control.Monad
+import Control.Concurrent.MVar
+import Foreign.Storable (peek, poke)
+import Foreign.Ptr (plusPtr)
+
+-- | UDP socket.
+--
+-- UDP socket is not thread safe, don't use it among multiple thread! UDP is not a sequential
+-- protocol, thus not an instance of 'Input/Output'. Message are received or sent individually,
+-- we do provide batch receiving to improve performance under high load.
+data UDP = UDP
+ { udpHandle :: {-# UNPACK #-} !(Ptr UVHandle)
+ , udpSlot :: {-# UNPACK #-} !UVSlot
+ , udpManager :: UVManager
+ , udpRecvLargeBuffer :: {-# UNPACK #-} !(A.MutablePrimArray RealWorld Word8)
+ , udpRecvBufferSiz :: {-# UNPACK #-} !Int32
+ , udpRecvBufferArray :: {-# UNPACK #-} !(A.MutablePrimArray RealWorld (Ptr Word8))
+ , udpSendBuffer :: {-# UNPACK #-} !(A.MutablePrimArray RealWorld Word8)
+ , udpClosed :: {-# UNPACK #-} !(IORef Bool)
+ }
+
+-- The buffer passing of UDP is a litte complicated here, to get maximum performance,
+-- we do batch receiving. i.e. recv multiple messages inside libuv's event loop:
+--
+-- udpRecvLargeBuffer:
+--
+-- +---------+--------------+-----------+----------+--------+---------+------------
+-- | buf siz | partial flag | addr flag | addr | buffer | buf siz | partial ...
+-- +--4bytes-+----4bytes----+--4bytes---+-128bytes-+-bufsiz-+---------+------------
+-- ^ ^
+-- | |
+-- +---------------------+ +--------------------------+
+-- | |
+-- +--+---+---+--+----
+-- udpRecvBufferArray | buf0 | buf1 | ...
+-- +------+------+----
+--
+-- + we allocate a large buffer (buffer_size * buffer_number)
+-- + each time we poke the udpRecvBufferArray and its last index (size - 1) to uv manager's buffer table.
+-- + libuv side each alloc callback picks the last pointer from udpRecvBufferArray, decrease last index by 1
+-- + the read result is write into the `buf siz` cell, then followed with partial flag, if addr is not NULL
+-- then addr flag is 1 (otherwise 0), following addr if not NULL, the buffer is already written when
+-- recv callback is called.
+-- + On haskell side, we read buffer table's size, which is decreased by n which is times callback are called.
+-- Then we poke those cells out.
+
+instance Show UDP where
+ show (UDP handle slot uvm _ bufsiz _ _ _) =
+ "UDP{udpHandle = " ++ show handle ++
+ ",udpRecvBufferSiz = " ++ show bufsiz ++
+ ",udpSlot = " ++ show slot ++
+ ",udpManager =" ++ show uvm ++ "}"
+
+-- | UDP options.
+--
+-- Though technically message length field in the UDP header is a max of 65535, but large packets
+-- could be more likely dropped by routers, usually a packet(IPV4) with a payload <= 508 bytes is considered safe.
+data UDPConfig = UDPConfig
+ { recvMsgSize :: {-# UNPACK #-} !Int32 -- ^ maximum size of a received message
+ , recvBatchSize :: {-# UNPACK #-} !Int -- ^ how many messages we want to receive per uv loop,
+ -- inside each uv_run, we do batch receiving,
+ -- increase this number can improve receiving performance,
+ -- at the cost of memory and potential GHC thread starving.
+ , sendMsgSize :: {-# UNPACK #-} !Int -- ^ maximum size of sending buffer
+ , localUDPAddr :: Maybe (SockAddr, UVUDPFlag) -- ^ do we want bind a local address before receiving & sending?
+ -- set to Nothing to let OS pick a random one.
+ } deriving (Show, Eq, Ord)
+
+-- | default 'UDPConfig', @defaultUDPConfig = UDPConfig 512 6 512 Nothing@
+defaultUDPConfig = UDPConfig 512 6 512 Nothing
+
+-- | Initialize a UDP socket.
+--
+initUDP :: HasCallStack
+ => UDPConfig
+ -> Resource UDP
+initUDP (UDPConfig rbsiz rbArrSiz sbsiz maddr) = initResource
+ (do uvm <- getUVManager
+ -- (message size + sockaddr flag + + flag size) + sockaddr_in size + buffer
+ -- see diagram above
+ let rbufsiz'' = 140 + rbsiz'
+ rbuf <- A.newPinnedPrimArray (fromIntegral rbufsiz'' * rbArrSiz')
+ rbufArr <- A.newPinnedPrimArray rbArrSiz'
+
+ -- initialize buffer array with right index
+ withMutablePrimArrayContents rbuf $ \ p ->
+ forM_ [0..rbArrSiz'-1] $ \ i -> do
+ let bufNPtr = p `plusPtr` (i * fromIntegral rbufsiz'')
+ writePrimArray rbufArr i bufNPtr
+
+ (handle, slot) <- withUVManager uvm $ \ loop -> do
+ handle <- hs_uv_handle_alloc loop
+ slot <- getUVSlot uvm (peekUVHandleData handle)
+ tryTakeMVar =<< getBlockMVar uvm slot -- clear the parking spot
+
+ -- init uv struct
+ (do throwUVIfMinus_ (uv_udp_init loop handle)
+ -- bind the socket if address is available
+ forM_ maddr $ \ (addr, flag) ->
+ withSockAddr addr $ \ p ->
+ throwUVIfMinus_ (uv_udp_bind handle p flag)
+ ) `onException` hs_uv_handle_free handle
+ return (handle, slot)
+
+ sbuf <- A.newPinnedPrimArray sbsiz'
+ closed <- newIORef False
+ return (UDP handle slot uvm rbuf rbsiz' rbufArr sbuf closed))
+ closeUDP
+ where
+ rbsiz' = max 0 rbsiz
+ rbArrSiz' = max 1 rbArrSiz
+ sbsiz' = max 0 sbsiz
+
+closeUDP :: UDP -> IO ()
+closeUDP (UDP handle _ uvm _ _ _ _ closed) = withUVManager_ uvm $ do
+ c <- readIORef closed
+ unless c $ writeIORef closed True >> hs_uv_handle_close handle
+
+-- | Recv messages from UDP socket, return source address if available, and a `Bool`
+-- to indicate if the message is partial (larger than receive buffer size).
+recvUDP :: HasCallStack => UDP -> IO [(Maybe SockAddr, Bool, V.Bytes)]
+recvUDP (UDP handle slot uvm (A.MutablePrimArray mba#) rbufsiz rbufArr _ closed) = mask_ $ do
+ c <- readIORef closed
+ if c
+ then throwECLOSED
+ else do
+ m <- getBlockMVar uvm slot
+ rbufArrSiz <- getSizeofMutablePrimArray rbufArr
+
+ -- we have to reset the buffer size, during receiving it'll be overwritten
+ forM_ [0..rbufArrSiz-1] $ \ i -> do
+ p <- readPrimArray rbufArr i
+ poke (castPtr p :: Ptr Int32) rbufsiz
+
+ -- reset buffer table's size with buffer array's length, during receiving it'll be decreased
+ withMutablePrimArrayContents rbufArr $ \ p ->
+ pokeBufferTable uvm slot (castPtr p) rbufArrSiz
+
+ withUVManager_ uvm $ do
+ throwUVIfMinus_ (hs_uv_udp_recv_start handle)
+ tryTakeMVar m
+
+ r <- catch (takeMVar m) (\ (e :: SomeException) -> do
+ withUVManager_ uvm (uv_udp_recv_stop handle)
+ -- after we locked uvm and stop reading, the reading probably finished
+ -- so try again
+ r <- tryTakeMVar m
+ case r of Just r -> return r
+ _ -> throwIO e)
+ if r < rbufArrSiz
+ then forM [rbufArrSiz-1, rbufArrSiz-2 .. r] $ \ i -> do
+ p <- readPrimArray rbufArr i
+ -- see the buffer struct diagram above
+ result <- throwUVIfMinus (fromIntegral <$> peek @Int32 (castPtr p))
+ flag <- peek @Int32 (castPtr (p `plusPtr` 4))
+ addrFlag <- peek @Int32 (castPtr (p `plusPtr` 8))
+ !addr <- if addrFlag == 1
+ then Just <$> peekSockAddr (castPtr (p `plusPtr` 12))
+ else return Nothing
+ let !partial = flag .&. UV_UDP_PARTIAL /= 0
+ mba <- A.newPrimArray result
+ copyPtrToMutablePrimArray mba 0 (p `plusPtr` 140) result
+ ba <- A.unsafeFreezePrimArray mba
+ -- It's important to keep recv buffer alive
+ primitive_ (touch# mba#)
+ return (addr, partial, V.PrimVector ba 0 result)
+ else return []
+
+
+-- | Send a UDP message to target address.
+--
+-- WARNING: A 'InvalidArgument' with errno 'UV_EMSGSIZE' will be thrown
+-- if message is larger than 'sendMsgSize'.
+sendUDP :: HasCallStack => UDP -> SockAddr -> V.Bytes -> IO ()
+sendUDP (UDP handle slot uvm _ _ _ sbuf closed) addr (V.PrimVector ba s la) = mask_ $ do
+ c <- readIORef closed
+ when c throwECLOSED
+ -- copy message to pinned buffer
+ lb <- getSizeofMutablePrimArray sbuf
+ when (la > lb) (throwUVIfMinus_ (return UV_EMSGSIZE))
+ copyPrimArray sbuf 0 ba s la
+ withSockAddr addr $ \ paddr ->
+ withMutablePrimArrayContents sbuf $ \ pbuf -> do
+ (slot, m) <- withUVManager_ uvm $ do
+ slot <- getUVSlot uvm (hs_uv_udp_send handle paddr pbuf la)
+ m <- getBlockMVar uvm slot
+ tryTakeMVar m
+ return (slot, m)
+ -- we can't cancel uv_udp_send_t in current libuv
+ -- and disaster will happen if buffer got collected.
+ -- so we have to turn to uninterruptibleMask_'s help.
+ -- i.e. sendUDP is an uninterruptible operation.
+ -- OS will guarantee writing a socket will not
+ -- hang forever anyway.
+ throwUVIfMinus_ (uninterruptibleMask_ $ takeMVar m)
+
+--------------------------------------------------------------------------------
+
+getSockName :: HasCallStack => UDP -> IO SockAddr
+getSockName (UDP handle _ _ _ _ _ _ closed) = do
+ c <- readIORef closed
+ when c throwECLOSED
+ withSockAddrStorage (\ paddr plen -> throwUVIfMinus_ (uv_udp_getsockname handle paddr plen))
+
+setMembership :: HasCallStack => UDP -> CBytes -> CBytes -> UVMembership ->IO ()
+setMembership (UDP handle _ _ _ _ _ _ closed) gaddr iaddr member = do
+ c <- readIORef closed
+ when c throwECLOSED
+ withCBytes gaddr $ \ gaddrp ->
+ withCBytes iaddr $ \ iaddrp ->
+ throwUVIfMinus_ (uv_udp_set_membership handle gaddrp iaddrp member)
+
+setMulticastLoop :: HasCallStack => UDP -> Bool -> IO ()
+setMulticastLoop (UDP handle _ _ _ _ _ _ closed) loop = do
+ c <- readIORef closed
+ when c throwECLOSED
+ throwUVIfMinus_ (uv_udp_set_multicast_loop handle (if loop then 1 else 0))
+
+setMulticastTTL :: HasCallStack => UDP -> Int -> IO ()
+setMulticastTTL (UDP handle _ _ _ _ _ _ closed) ttl = do
+ c <- readIORef closed
+ when c throwECLOSED
+ throwUVIfMinus_ (uv_udp_set_multicast_ttl handle (fromIntegral ttl'))
+ where ttl' = V.rangeCut ttl 1 255
+
+setMulticastInterface :: HasCallStack => UDP -> CBytes ->IO ()
+setMulticastInterface (UDP handle _ _ _ _ _ _ closed) iaddr = do
+ c <- readIORef closed
+ when c throwECLOSED
+ withCBytes iaddr $ \ iaddrp ->
+ throwUVIfMinus_ (uv_udp_set_multicast_interface handle iaddrp)
+
+setBroadcast :: HasCallStack => UDP -> Bool -> IO ()
+setBroadcast (UDP handle _ _ _ _ _ _ closed) b = do
+ c <- readIORef closed
+ when c throwECLOSED
+ throwUVIfMinus_ (uv_udp_set_broadcast handle (if b then 1 else 0))
+
+setTTL :: HasCallStack => UDP -> Int -> IO ()
+setTTL (UDP handle _ _ _ _ _ _ closed) ttl = do
+ c <- readIORef closed
+ when c throwECLOSED
+ throwUVIfMinus_ (uv_udp_set_ttl handle (fromIntegral ttl'))
+ where ttl' = V.rangeCut ttl 1 255
diff --git a/Std/IO/UV/FFI.hsc b/Std/IO/UV/FFI.hsc
index 12f7b51..72a1de1 100644
--- a/Std/IO/UV/FFI.hsc
+++ b/Std/IO/UV/FFI.hsc
@@ -83,7 +83,7 @@ peekUVBufferTable p = (,)
<*> (#{peek hs_loop_data, buffer_size_table } p)
newtype UVRunMode = UVRunMode CInt
- deriving (Eq, Ord, Read, Show, FiniteBits, Bits, Storable)
+ deriving (Eq, Ord, Read, Show, FiniteBits, Bits, Storable, Num)
pattern UV_RUN_DEFAULT :: UVRunMode
pattern UV_RUN_DEFAULT = UVRunMode #{const UV_RUN_DEFAULT}
@@ -137,6 +137,7 @@ foreign import ccall unsafe hs_uv_listen :: Ptr UVHandle -> CInt -> IO CInt
foreign import ccall unsafe hs_uv_listen_resume :: Ptr UVHandle -> IO ()
foreign import ccall unsafe hs_uv_read_start :: Ptr UVHandle -> IO CInt
+foreign import ccall unsafe uv_read_stop :: Ptr UVHandle -> IO CInt
foreign import ccall unsafe hs_uv_write :: Ptr UVHandle -> Ptr Word8 -> Int -> IO UVSlotUnSafe
foreign import ccall unsafe hs_uv_accept_check_alloc :: Ptr UVHandle -> IO (Ptr UVHandle)
@@ -158,13 +159,54 @@ foreign import ccall unsafe hs_uv_tcp_connect :: Ptr UVHandle -> Ptr SockAddr ->
foreign import ccall unsafe hs_set_socket_reuse :: Ptr UVHandle -> IO CInt
--------------------------------------------------------------------------------
+-- udp
+foreign import ccall unsafe uv_udp_init :: Ptr UVLoop -> Ptr UVHandle -> IO CInt
+foreign import ccall unsafe uv_udp_init_ex :: Ptr UVLoop -> Ptr UVHandle -> CUInt -> IO CInt
+foreign import ccall unsafe uv_udp_open :: Ptr UVHandle -> UVFD -> IO CInt
+foreign import ccall unsafe uv_udp_bind :: Ptr UVHandle -> Ptr SockAddr -> UVUDPFlag -> IO CInt
+
+newtype UVMembership = UVMembership CInt deriving (Show, Eq, Ord)
+pattern UV_LEAVE_GROUP = UVMembership #{const UV_LEAVE_GROUP}
+pattern UV_JOIN_GROUP = UVMembership #{const UV_JOIN_GROUP}
+
+newtype UVUDPFlag = UVUDPFlag CInt deriving (Show, Eq, Ord, Storable, Bits, FiniteBits, Num)
+pattern UV_UDP_DEFAULT = UVUDPFlag 0
+pattern UV_UDP_IPV6ONLY = UVUDPFlag #{const UV_UDP_IPV6ONLY}
+pattern UV_UDP_REUSEADDR = UVUDPFlag #{const UV_UDP_REUSEADDR}
+
+pattern UV_UDP_PARTIAL :: Int32
+pattern UV_UDP_PARTIAL = #{const UV_UDP_PARTIAL}
+
+foreign import ccall unsafe uv_udp_set_membership ::
+ Ptr UVHandle -> CString -> CString -> UVMembership -> IO CInt
+foreign import ccall unsafe uv_udp_set_multicast_loop :: Ptr UVHandle -> CInt -> IO CInt
+foreign import ccall unsafe uv_udp_set_multicast_ttl :: Ptr UVHandle -> CInt -> IO CInt
+foreign import ccall unsafe uv_udp_set_multicast_interface :: Ptr UVHandle -> CString -> IO CInt
+foreign import ccall unsafe uv_udp_set_broadcast :: Ptr UVHandle -> CInt -> IO CInt
+foreign import ccall unsafe uv_udp_set_ttl :: Ptr UVHandle -> CInt -> IO CInt
+
+foreign import ccall unsafe hs_uv_udp_recv_start :: Ptr UVHandle -> IO CInt
+foreign import ccall unsafe uv_udp_recv_stop :: Ptr UVHandle -> IO CInt
+foreign import ccall unsafe hs_uv_udp_send
+ :: Ptr UVHandle -> Ptr SockAddr -> Ptr Word8 -> Int -> IO UVSlotUnSafe
+foreign import ccall unsafe uv_udp_getsockname
+ :: Ptr UVHandle -> Ptr SockAddr -> Ptr CInt -> IO CInt
+
+
+--------------------------------------------------------------------------------
-- pipe
foreign import ccall unsafe uv_pipe_init :: Ptr UVLoop -> Ptr UVHandle -> CInt -> IO CInt
--------------------------------------------------------------------------------
-- tty
+
+-- | Terminal mode.
+--
+-- When in 'UV_TTY_MODE_RAW' mode, input is always available character-by-character,
+-- not including modifiers. Additionally, all special processing of characters by the terminal is disabled,
+-- including echoing input characters. Note that CTRL+C will no longer cause a SIGINT when in this mode.
newtype UVTTYMode = UVTTYMode CInt
- deriving (Eq, Ord, Read, Show, FiniteBits, Bits, Storable)
+ deriving (Eq, Ord, Read, Show, FiniteBits, Bits, Storable, Num)
pattern UV_TTY_MODE_NORMAL :: UVTTYMode
pattern UV_TTY_MODE_NORMAL = UVTTYMode #{const UV_TTY_MODE_NORMAL}
@@ -174,12 +216,13 @@ pattern UV_TTY_MODE_IO :: UVTTYMode
pattern UV_TTY_MODE_IO = UVTTYMode #{const UV_TTY_MODE_IO}
foreign import ccall unsafe uv_tty_init :: Ptr UVLoop -> Ptr UVHandle -> CInt -> IO CInt
+foreign import ccall unsafe uv_tty_set_mode :: Ptr UVHandle -> UVTTYMode -> IO CInt
--------------------------------------------------------------------------------
-- fs
newtype UVFileMode = UVFileMode CInt
- deriving (Eq, Ord, Read, Show, FiniteBits, Bits, Storable)
+ deriving (Eq, Ord, Read, Show, FiniteBits, Bits, Storable, Num)
-- | 00700 user (file owner) has read, write and execute permission
pattern S_IRWXU :: UVFileMode
@@ -262,7 +305,7 @@ foreign import ccall unsafe hs_uv_fs_mkdtemp_threaded
:: CString -> Int -> CString -> Ptr UVLoop -> IO UVSlotUnSafe
newtype UVFileFlag = UVFileFlag CInt
- deriving (Eq, Ord, Read, Show, FiniteBits, Bits, Storable)
+ deriving (Eq, Ord, Read, Show, FiniteBits, Bits, Storable, Num)
-- | The file is opened in append mode. Before each write, the file offset is positioned at the end of the file.
pattern O_APPEND :: UVFileFlag
@@ -383,7 +426,7 @@ newtype UVDirEntType = UVDirEntType CInt
#else
newtype UVDirEntType = UVDirEntType CChar
#endif
- deriving (Eq, Ord, Read, Show, FiniteBits, Bits, Storable)
+ deriving (Eq, Ord, Read, Show, FiniteBits, Bits, Storable, Num)
data DirEntType
= DirEntUnknown
@@ -518,7 +561,7 @@ foreign import ccall unsafe hs_uv_fs_ftruncate_threaded
-- * 'COPYFILE_FICLONE': If present, uv_fs_copyfile() will attempt to create a copy-on-write reflink. If the underlying platform does not support copy-on-write, then a fallback copy mechanism is used.
--
newtype UVCopyFileFlag = UVCopyFileFlag CInt
- deriving (Eq, Ord, Read, Show, FiniteBits, Bits, Storable)
+ deriving (Eq, Ord, Read, Show, FiniteBits, Bits, Storable, Num)
pattern COPYFILE_DEFAULT :: UVCopyFileFlag
pattern COPYFILE_DEFAULT = UVCopyFileFlag 0
@@ -538,7 +581,7 @@ foreign import ccall unsafe hs_uv_fs_copyfile_threaded
:: CString -> CString -> UVCopyFileFlag -> Ptr UVLoop -> IO UVSlotUnSafe
newtype UVAccessMode = UVAccessMode CInt
- deriving (Eq, Ord, Read, Show, FiniteBits, Bits, Storable)
+ deriving (Eq, Ord, Read, Show, FiniteBits, Bits, Storable, Num)
pattern F_OK :: UVAccessMode
pattern F_OK = UVAccessMode #{const F_OK}
@@ -572,7 +615,7 @@ foreign import ccall unsafe hs_uv_fs_futime_threaded
:: UVFD -> Double -> Double -> Ptr UVLoop -> IO UVSlotUnSafe
newtype UVSymlinkFlag = UVSymlinkFlag CInt
- deriving (Eq, Ord, Read, Show, FiniteBits, Bits, Storable)
+ deriving (Eq, Ord, Read, Show, FiniteBits, Bits, Storable, Num)
pattern SYMLINK_DEFAULT :: UVSymlinkFlag
pattern SYMLINK_DEFAULT = UVSymlinkFlag 0
diff --git a/Std/IO/UV/Manager.hs b/Std/IO/UV/Manager.hs
index eb9df37..7008f4e 100644
--- a/Std/IO/UV/Manager.hs
+++ b/Std/IO/UV/Manager.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE MultiWayIf #-}
@@ -52,6 +53,7 @@ import Control.Concurrent
import Control.Concurrent.MVar
import Control.Monad
import Control.Monad.IO.Class
+import Control.Monad.Primitive (touch)
import Data.IORef
import Data.Bits (shiftL)
import Data.Primitive.PrimArray
@@ -308,8 +310,7 @@ withUVRequest uvm f = do
m <- getBlockMVar uvm slot
tryTakeMVar m
return (slot, m)
- throwUVIfMinus $
- takeMVar m `onException` cancelUVReq uvm slot no_extra_cleanup
+ throwUVIfMinus (takeMVar m `onException` cancelUVReq uvm slot no_extra_cleanup)
where no_extra_cleanup = const $ return ()
-- | Same with 'withUVRequest' but disgard the result.
@@ -333,7 +334,7 @@ withUVRequest' uvm f g = do
m <- getBlockMVar uvm slot
tryTakeMVar m
return (slot, m)
- (g =<< takeMVar m) `onException` cancelUVReq uvm slot no_extra_cleanup
+ g =<< (takeMVar m `onException` cancelUVReq uvm slot no_extra_cleanup)
where no_extra_cleanup = const $ return ()
-- | Same with 'withUVRequest', but will also run an extra cleanup function
@@ -347,8 +348,7 @@ withUVRequestEx uvm f extra_cleanup = do
m <- getBlockMVar uvm slot
tryTakeMVar m
return (slot, m)
- throwUVIfMinus $
- takeMVar m `onException` cancelUVReq uvm slot extra_cleanup
+ throwUVIfMinus (takeMVar m `onException` cancelUVReq uvm slot extra_cleanup)
--------------------------------------------------------------------------------
@@ -377,10 +377,11 @@ forkBa io = do
-- 'UVStream' DO NOT provide thread safety! Use 'UVStream' concurrently in multiple
-- threads will lead to undefined behavior.
data UVStream = UVStream
- { uvsHandle :: {-# UNPACK #-} !(Ptr UVHandle)
+ { uvsHandle :: {-# UNPACK #-} !(Ptr UVHandle)
, uvsSlot :: {-# UNPACK #-} !UVSlot
, uvsManager :: UVManager
- , uvsClosed :: {-# UNPACK #-} !(IORef Bool)
+ , uvsClosed :: {-# UNPACK #-} !(IORef Bool) -- We have no thread-safe guarantee,
+ -- so no need to use atomic read&write
}
instance Show UVStream where
@@ -398,7 +399,7 @@ instance Show UVStream where
--
-- And this is what 'initUVStream' do, all you need to do is to provide the manager you want to hook the handle
-- onto(usually the one on the same capability, i.e. the one obtained by 'getUVManager'),
--- and provide a custom initialization function.
+-- and provide a custom initialization function (which should throw an exception if failed).
--
initUVStream :: HasCallStack
=> (Ptr UVLoop -> Ptr UVHandle -> IO ())
@@ -429,9 +430,16 @@ instance Input UVStream where
throwUVIfMinus_ (hs_uv_read_start handle)
pokeBufferTable uvm slot buf len
tryTakeMVar m
- -- We really can't do much when async exception hit a stream IO
- -- There's no way to cancel, all we can do is to close the stream
- r <- takeMVar m `onException` closeUVStream uvs
+ -- since we are inside mask, this is the only place
+ -- async exceptions could possibly kick in, and we should stop reading
+ r <- catch (takeMVar m) (\ (e :: SomeException) -> do
+ withUVManager_ uvm (uv_read_stop handle)
+ -- after we locked uvm and stop reading, the reading probably finished
+ -- so try again
+ r <- tryTakeMVar m
+ case r of Just r -> return r
+ _ -> throwIO e)
+
if | r > 0 -> return r
-- r == 0 should be impossible, since we guard this situation in c side
| r == fromIntegral UV_EOF -> return 0
@@ -448,7 +456,12 @@ instance Output UVStream where
m <- getBlockMVar uvm slot
tryTakeMVar m
return (slot, m)
- -- cancel uv_write_t will also close the stream
- throwUVIfMinus_ (takeMVar m `onException` closeUVStream uvs)
+ -- we can't cancel uv_write_t with current libuv,
+ -- and disaster will happen if buffer got collected.
+ -- so we have to turn to uninterruptibleMask_'s help.
+ -- i.e. writing UVStream is an uninterruptible operation.
+ -- OS will guarantee writing TTY and socket will not
+ -- hang forever anyway.
+ throwUVIfMinus_ (uninterruptibleMask_ $ takeMVar m)
--------------------------------------------------------------------------------
diff --git a/cbits/bytes.c b/cbits/bytes.c
index a14e9e3..eeb2e1c 100644
--- a/cbits/bytes.c
+++ b/cbits/bytes.c
@@ -1,4 +1,5 @@
/*
+Copyright (c) 2017-2019 Dong Han
Copyright Johan Tibell 2011, Dong Han 2019
All rights reserved.
Redistribution and use in source and binary forms, with or without
diff --git a/cbits/dtoa.c b/cbits/dtoa.c
index 0e04cb7..4998c9f 100644
--- a/cbits/dtoa.c
+++ b/cbits/dtoa.c
@@ -1,5 +1,5 @@
/*
- * Copyright Winterland1989
+ * Copyright (c) 2017-2019 Dong Han
* Copyright author of MathGeoLib (https://github.com/juj)
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -365,3 +365,88 @@ HsInt grisu3_sp(float v, char *buffer, HsInt *length, HsInt *d_exp)
*d_exp = kappa - mk;
return (HsInt)success;
}
+
+////////////////////////////////////////////////////////////////////////////////
+
+static const char* digits = "0123456789abcdef";
+
+// convert a positive uint64_t to ascii digits, with following params
+// sign:
+// -1: negative
+// 0: non-negative
+// 1: non-negative with show positive sign options
+// width: value smaller than necessary will be ignored
+// pad:
+// 0: no padding
+// 1: right space padding
+// 2: left space padding
+// 3: left zero padding
+// ba, off: buffer bytearray and offset
+// buffer must be guaranteed to be have max(width, 21) bytes left for (sign + digits)
+//
+// return: new offset for next writing
+HsInt c_int_dec (uint64_t x, HsInt sign, HsInt width, uint8_t pad, char* ba, HsInt off)
+{
+ // writing from the right end
+ char *start = ba + off, *end = start + (width > 21 ? width : 21), *p = end, *q = start;
+ uint64_t mod;
+
+ // encode positive number as little-endian decimal
+ do {
+ mod = x % 10;
+ x = x / 10;
+ *(--p) = digits[mod];
+ } while ( x );
+
+ switch(pad){
+ // no padding, copy to left part
+ case 0:
+ if (sign != 0) *(q++) = (sign == -1 ? '-' : '+');
+ if (q < p) {
+ do {
+ *(q++) = *(p++);
+ } while (p < end);
+ return (q - start) + off;
+ } else return (end - start) + off;
+ // write right space paddings
+ case 1:
+ if (sign != 0) *(q++) = (sign == -1 ? '-' : '+');
+ if (q < p) {
+ do {
+ *(q++) = *(p++);
+ } while (p < end);
+ while (q < start + width) {
+ *(q++) = ' ';
+ }
+ return (q - start) + off;
+ } else return (end - start) + off;
+ // write left space paddings
+ case 2:
+ if (sign != 0) *(--p) = (sign == -1 ? '-' : '+');
+ while (p > end - width){
+ *(--p) = ' ';
+ }
+ if (q < p) {
+ do {
+ *(q++) = *(p++);
+ } while (p < end);
+ return (q - start) + off;
+ } else return (end - start) + off;
+ // write left zero paddings
+ //case 3:
+ default:
+ if (sign != 0) {
+ *(q++) = (sign == -1 ? '-' : '+');
+ // we have to make one byte's room for the sign
+ while (p > end - width + 1) *(--p) = '0';
+ } else {
+ while (p > end - width) *(--p) = '0';
+ }
+ if (q < p) {
+ do {
+ *(q++) = *(p++);
+ } while (p < end);
+ return (q - start) + off;
+ } else return (end -start) + off;
+ }
+}
diff --git a/cbits/hs_uv_udp.c b/cbits/hs_uv_udp.c
new file mode 100644
index 0000000..3be1cb6
--- /dev/null
+++ b/cbits/hs_uv_udp.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2017-2019 Dong Han
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the authors or the names of any contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <hs_uv.h>
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// udp
+
+// We do batch read per uv_run, the buffer index keep decreasing until hit zero
+// then we call uv_udp_recv_stop to stop receiving.
+void hs_udp_alloc_cb(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf){
+ HsInt slot = (HsInt)handle->data;
+ hs_loop_data* loop_data = handle->loop->data;
+ // fetch buffer_table from buffer_table table
+ // the first 12 + 128 bytes is reserved for sockaddr and flag
+ char** buffer_array = (char**)loop_data->buffer_table[slot];
+ (loop_data->buffer_size_table[slot])--;
+ ssize_t buffer_index = loop_data->buffer_size_table[slot];
+ if (buffer_index < 0) {
+ uv_udp_recv_stop((uv_udp_t*)handle);
+ buf->base = NULL;
+ buf->len = 0;
+ } else {
+ buf->base = (char*)buffer_array[buffer_index] + 140;
+ buf->len = *((int32_t*)buffer_array[buffer_index]);
+ }
+}
+
+void hs_udp_recv_cb (uv_udp_t* udp, ssize_t nread, const uv_buf_t* _buf
+ , const struct sockaddr* addr, unsigned flags){
+ if (nread ==0 && addr == NULL) return;
+ HsInt slot = (HsInt)udp->data;
+ hs_loop_data* loop_data = udp->loop->data;
+
+ char* buf = (char*)(_buf->base)-140;
+ struct sockaddr* addr_buf = (struct sockaddr*)(buf+12);
+ // result
+ *(int32_t*)buf = (int32_t)nread;
+ // flag
+ *(int32_t*)(buf+4) = (int32_t)flags;
+
+ if (addr == NULL) {
+ // set sockaddr flag
+ *(int32_t*)(buf+8) = 0;
+ } else {
+ // set sockaddr flag
+ *(int32_t*)(buf+8) = 1;
+ // copy sockaddr
+ if (addr->sa_family == AF_INET){
+ memcpy(addr_buf, addr, sizeof(struct sockaddr_in));
+ } else if (addr->sa_family == AF_INET6){
+ memcpy(addr_buf, addr, sizeof(struct sockaddr_in6));
+ } else {
+ memcpy(addr_buf, addr, sizeof(struct sockaddr));
+ }
+ }
+ if (nread != 0) {
+ loop_data->event_queue[loop_data->event_counter] = slot; // push the slot to event queue
+ loop_data->event_counter += 1;
+ uv_udp_recv_stop(udp);
+ }
+}
+
+int hs_uv_udp_recv_start(uv_udp_t* handle){
+ return uv_udp_recv_start(handle, hs_udp_alloc_cb, hs_udp_recv_cb);
+}
+
+void hs_uv_udp_send_cb(uv_udp_send_t* req, int status){
+ HsInt slot = (HsInt)req->data;
+ uv_loop_t* loop = req->handle->loop;
+ hs_loop_data* loop_data = loop->data;
+ loop_data->buffer_size_table[slot] = (HsInt)status; // 0 in case of success, < 0 otherwise.
+ loop_data->event_queue[loop_data->event_counter] = slot; // push the slot to event queue
+ loop_data->event_counter += 1;
+ free_slot(loop_data, slot); // free the uv_req_t
+}
+
+HsInt hs_uv_udp_send(uv_udp_t* handle, const struct sockaddr* addr, char* buf, HsInt buf_siz){
+ uv_loop_t* loop = handle->loop;
+ hs_loop_data* loop_data = loop->data;
+ HsInt slot = alloc_slot(loop_data);
+ if (slot < 0) return UV_ENOMEM;
+ uv_udp_send_t* req =
+ (uv_udp_send_t*)fetch_uv_struct(loop_data, slot);
+ req->data = (void*)slot;
+
+ // on windows this struct is captured by WSASend
+ // on unix this struct is copied by libuv's uv_udp_send
+ // so it's safe to allocate it on stack
+ uv_buf_t buf_t = { .base = buf, .len = (size_t)buf_siz };
+
+ int r = uv_udp_send(req, handle, &buf_t, 1, addr, hs_uv_udp_send_cb);
+ // we never use writev: we do our own
+ // user-space buffering in haskell.
+ if (r < 0) {
+ free_slot(loop_data, slot); // free the uv_req_t, the callback won't fired
+ return (HsInt)r;
+ } else return slot;
+
+}
diff --git a/cbits/text.c b/cbits/text.c
index be237f5..5515490 100644
--- a/cbits/text.c
+++ b/cbits/text.c
@@ -95,71 +95,283 @@ static inline int ascii_u64(const uint8_t *data, size_t len)
////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
+
#define UTF8_ACCEPT 0
-#define UTF8_REJECT 1
+#define UTF8_REJECT 12
static const uint8_t utf8d[] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00..1f
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20..3f
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40..5f
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60..7f
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 80..9f
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // a0..bf
- 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // c0..df
- 0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3,
- 0x3, 0x3, 0x4, 0x3, 0x3, // e0..ef
- 0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
- 0x8, 0x8, 0x8, 0x8, 0x8 // f0..ff
-};
+ // The first part of the table maps bytes to character classes that
+ // to reduce the size of the transition table and create bitmasks.
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x00 ~ 0x1F
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x20 ~ 0x3F
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x40 ~ 0x5F
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x60 ~ 0x7F
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 0x80 ~ 0x9F
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // 0xA0 ~ 0xBF
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xC0 ~ 0xDF
+ 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, // 0xE0 ~ 0xFF
-static const uint8_t utf8d_transition[] = {
- 0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4,
- 0x6, 0x1, 0x1, 0x1, 0x1, // s0..s0
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
- 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, // s1..s2
- 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // s3..s4
- 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, // s5..s6
- 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1,
- 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // s7..s8
+ // The second part is a transition table that maps a combination
+ // of a state of the automaton and a character class to a state.
+ 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+ 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+ 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+ 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+ 12,36,12,12,12,12,12,12,12,12,12,12,
};
+
static uint32_t inline updatestate(uint32_t *state, uint32_t byte) {
uint32_t type = utf8d[byte];
- *state = utf8d_transition[16 * *state + type];
+ *state = utf8d[256 + *state + type];
return *state;
}
+// return 2 instead of 1, so that we can observe difference if SIMD is not used
HsInt utf8_validate_slow(const char* c, size_t len){
const unsigned char *cu = (const unsigned char *)c;
- uint32_t state = 0;
+ uint32_t state = UTF8_ACCEPT;
for (size_t i = 0; i < len; i++) {
uint32_t byteval = (uint32_t)cu[i];
if (updatestate(&state, byteval) == UTF8_REJECT)
return 0;
}
- return 1;
+ return ((state == UTF8_ACCEPT) ? 2 : 0);
+}
+
+static inline uint32_t decode_hex(uint32_t c) {
+ if (c >= '0' && c <= '9') return c - '0';
+ else if (c >= 'a' && c <= 'f') return c - 'a' + 10;
+ else if (c >= 'A' && c <= 'F') return c - 'A' + 10;
+ return 0xFFFFFFFF; // Should not happen
+}
+
+// Decode, return negative value on error
+HsInt decode_json_string(char *dest, const char *src, HsInt srcoff, HsInt srclen) {
+ char *d = dest;
+ const char *s = src + srcoff;
+ const char *srcend = s + srclen;
+
+ uint32_t state = UTF8_ACCEPT;
+ unsigned char cur_byte;
+
+ uint8_t surrogate = 0;
+ uint32_t temp_hex = 0;
+ uint32_t unidata;
+ // ECMA 404 require codepoints beyond Basic Multilingual Plane encoded as surrogate pair
+ uint32_t h_surrogate;
+ uint32_t l_surrogate;
+
+// read current byte to cur_byte and guard input end
+#define DISPATCH(label) {\
+ if (s >= srcend) {\
+ return -1;\
+ }\
+ cur_byte = *s++;\
+ goto label;\
+}
+
+standard:
+ // Test end of stream
+ while (s < srcend) {
+ cur_byte = *s++;
+ if (updatestate(&state, (uint32_t)cur_byte) == UTF8_REJECT) { return -1; }
+
+ if (cur_byte == '\\')
+ DISPATCH(backslash)
+ else {
+ *d++ = cur_byte;
+ }
+ }
+ // Exit point, use sign bit to indicate utf8 validation error
+ return (state == UTF8_ACCEPT) ? (d - dest) : (dest - d);
+
+backslash:
+ switch (cur_byte) {
+ case '"':
+ case '\\':
+ case '/':
+ *d++ = cur_byte;
+ goto standard;
+ break;
+ case 'b': *d++ = '\b';goto standard;
+ case 'f': *d++ = '\f';goto standard;
+ case 'n': *d++ = '\n';goto standard;
+ case 'r': *d++ = '\r';goto standard;
+ case 't': *d++ = '\t';goto standard;
+ case 'u': DISPATCH(unicode1);;break;
+ default:
+ return -1;
+ }
+
+unicode1:
+ temp_hex = decode_hex(cur_byte);
+ if (temp_hex == 0xFFFFFFFF) { return -1; }
+ else unidata = temp_hex << 12;
+ DISPATCH(unicode2);
+unicode2:
+ temp_hex = decode_hex(cur_byte);
+ if (temp_hex == 0xFFFFFFFF) { return -1; }
+ else unidata |= temp_hex << 8;
+ DISPATCH(unicode3);
+unicode3:
+ temp_hex = decode_hex(cur_byte);
+ if (temp_hex == 0xFFFFFFFF) { return -1; }
+ else unidata |= temp_hex << 4;
+ DISPATCH(unicode4);
+unicode4:
+ temp_hex = decode_hex(cur_byte);
+ if (temp_hex == 0xFFFFFFFF) { return -1; }
+ else unidata |= temp_hex;
+ if (surrogate) {
+ if (unidata < 0xDC00 || unidata > 0xDFFF) // is not low surrogate
+ return -1;
+ surrogate = 0;
+ // decode surrogate pair
+ l_surrogate = unidata;
+ unidata = 0x10000;
+ unidata += (h_surrogate & 0x03FF) << 10;
+ unidata += (l_surrogate & 0x03FF);
+ } else if (unidata >= 0xD800 && unidata <= 0xDBFF ) { // is high surrogate
+ surrogate = 1;
+ DISPATCH(surrogate1);
+ } else if (unidata >= 0xDC00 && unidata <= 0xDFFF) { // is low surrogate
+ return -1;
+ }
+ // encode unidata into UTF8 bytes
+ if (unidata <= 0x7F) {
+ // plain ASCII
+ *d++ = (char) unidata;
+ }
+ else if (unidata <= 0x07FF) {
+ // 2-byte unicode
+ *d++ = (char) (((unidata >> 6) & 0x1F) | 0xC0);
+ *d++ = (char) (((unidata >> 0) & 0x3F) | 0x80);
+ }
+ else if (unidata <= 0xFFFF) {
+ // 3-byte unicode
+ *d++ = (char) (((unidata >> 12) & 0x0F) | 0xE0);
+ *d++ = (char) (((unidata >> 6) & 0x3F) | 0x80);
+ *d++ = (char) (((unidata >> 0) & 0x3F) | 0x80);
+ }
+ else if (unidata <= 0x10FFFF) {
+ // 4-byte unicode
+ *d++ = (char) (((unidata >> 18) & 0x07) | 0xF0);
+ *d++ = (char) (((unidata >> 12) & 0x3F) | 0x80);
+ *d++ = (char) (((unidata >> 6) & 0x3F) | 0x80);
+ *d++ = (char) (((unidata >> 0) & 0x3F) | 0x80);
+ }
+ else {
+ // error
+ return -1;
+ }
+ goto standard;
+surrogate1:
+ if (cur_byte != '\\') { return -1; }
+ h_surrogate = unidata;
+ DISPATCH(surrogate2)
+surrogate2:
+ if (cur_byte != 'u') { return -1; }
+ DISPATCH(unicode1)
}
+// This function is used to find the ending double quote for a json string
+// if return >= 0, it's the split offset, excluding the last double quote
+// return == -1, string is not ended yet
+// the lowest two bytes of state record two things:
+// skip: 1 if we should skip next char, 0 otherwise
+// escaped(LSB): 1 if this string contain escaped char(s),
+// 3 if this string contain unescaped control char(s),
+// 0 otherwise
+HsInt find_json_string_end(uint32_t* state, const unsigned char* ba, HsInt offset, HsInt len){
+ const unsigned char *s = ba + offset;
+ const unsigned char *end = s + len;
+ uint32_t skip = *state >> 8;
+ uint32_t escaped = *state & 0xFF;
+ for (; s < end; s++) {
+ if (skip == 1){
+ skip = 0; // skip this char
+ }
+ else if (*s == '\\') { // backslash
+ escaped = 1;
+ skip = 1;
+ }
+ else if (*s == '\"') { // double quote
+ *state = (skip << 8) | escaped; // save the state
+ return (s - ba - offset);
+ } else if (*s <= 0x1F) { // unescaped control characters
+ escaped = 3; // even if it's skipped, it will be rejected in decode_json_string
+ }
+ }
+ *state = (skip << 8) | escaped; // save the state
+ return (-1);
+}
+
+HsInt escape_json_string_length(const unsigned char *src, HsInt srcoff, HsInt srclen){
+ HsInt rv = 2; // for start and end quotes
+ const unsigned char *i = src + srcoff;
+ const unsigned char *srcend = i + srclen;
+ for (; i < srcend; i++) {
+ switch (*i) {
+ case '\b': rv += 2; break;
+ case '\f': rv += 2; break;
+ case '\n': rv += 2; break;
+ case '\r': rv += 2; break;
+ case '\t': rv += 2; break;
+ case '\"': rv += 2; break;
+ case '\\': rv += 2; break;
+ case '/': rv += 2; break;
+ default:
+ if (*i <= 0x1F) {
+ rv += 6;
+ } else {
+ rv += 1;
+ }
+ }
+ }
+ return rv;
+}
+
+static const unsigned char DEC2HEX[16] = {
+ '0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'
+};
+
+HsInt escape_json_string(const unsigned char *src, HsInt srcoff, HsInt srclen, unsigned char *dest, HsInt desoff){
+ const unsigned char *i = src + srcoff;
+ const unsigned char *srcend = i + srclen;
+ unsigned char *j = dest + desoff;
+ *j++ = '\"'; // start quote
+ for (; i < srcend; i++){
+ switch (*i) {
+ case '\b': *j++ = '\\'; *j++ = 'b'; break;
+ case '\f': *j++ = '\\'; *j++ = 'f'; break;
+ case '\n': *j++ = '\\'; *j++ = 'n'; break;
+ case '\r': *j++ = '\\'; *j++ = 'r'; break;
+ case '\t': *j++ = '\\'; *j++ = 't'; break;
+ case '\"': *j++ = '\\'; *j++ = '\"'; break;
+ case '\\': *j++ = '\\'; *j++ = '\\'; break;
+ case '/': *j++ = '\\'; *j++ = '/'; break;
+ default:
+ if (*i <= 0x1F) {
+ *j++ = '\\';
+ *j++ = 'u';
+ *j++ = '0';
+ *j++ = '0';
+ *j++ = DEC2HEX[*i >> 4];
+ *j++ = DEC2HEX[*i & 0xF];
+ } else {
+ *j++ = *i;
+ }
+ }
+ }
+ *j++ = '\"'; // end quote
+ return (HsInt)(j-dest);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
HsInt utf8_isnormalized(const char* p, HsInt off, HsInt len, size_t flag){
size_t offset;
return (HsInt)utf8isnormalized(p+off, len, flag, &offset);
diff --git a/include/dtoa.h b/include/dtoa.h
index 5f83845..7f2e001 100644
--- a/include/dtoa.h
+++ b/include/dtoa.h
@@ -32,3 +32,4 @@
HsInt grisu3(double v, char *buffer, HsInt *length, HsInt *d_exp);
HsInt grisu3_sp(float v, char *buffer, HsInt *length, HsInt *d_exp);
+HsInt c_int_dec (uint64_t x, HsInt sign, HsInt width, uint8_t pad, char* ba, HsInt off);
diff --git a/include/hs_uv.h b/include/hs_uv.h
index 4d98254..96cc3d1 100644
--- a/include/hs_uv.h
+++ b/include/hs_uv.h
@@ -55,7 +55,7 @@
////////////////////////////////////////////////////////////////////////////////
// CONSTANT
-#define ACCEPT_BUFFER_SIZE 1024
+#define ACCEPT_BUFFER_SIZE 1020
#define INIT_LOOP_SIZE 128
#define INIT_LOOP_SIZE_BIT 7
@@ -262,7 +262,7 @@ void hs_uv_accept_check_close(uv_check_t* check);
////////////////////////////////////////////////////////////////////////////////
// tcp
-int hs_uv_tcp_open(uv_tcp_t* handle, int sock);
+int hs_uv_tcp_open(uv_tcp_t* handle, int32_t sock);
HsInt hs_uv_tcp_connect(uv_tcp_t* handle, const struct sockaddr* addr);
#if defined(_WIN32)
@@ -288,6 +288,12 @@ void uv__io_start(uv_loop_t* loop, uv__io_t* w, unsigned int events);
#endif
////////////////////////////////////////////////////////////////////////////////
+// udp
+
+int hs_uv_udp_recv_start(uv_udp_t* handle);
+HsInt hs_uv_upd_send(uv_udp_t* handle, const struct sockaddr* addr, char* buf, HsInt buf_siz);
+
+////////////////////////////////////////////////////////////////////////////////
// fs
// we define file open flag here for compatibility on libuv < v1.16
diff --git a/include/text.h b/include/text.h
index 61ba6d9..c28c8bb 100644
--- a/include/text.h
+++ b/include/text.h
@@ -35,6 +35,11 @@ HsInt ascii_validate_addr(const char* p, HsInt len);
HsInt utf8_validate(const char* p, HsInt off, HsInt len);
HsInt utf8_validate_addr(const char* p, HsInt len);
+HsInt find_json_string_end(uint32_t* state, const unsigned char* ba, HsInt offset, HsInt len);
+HsInt decode_json_string(char *dest, const char *src, HsInt srcoff, HsInt srclen);
+HsInt escape_json_string_length(const unsigned char *src, HsInt srcoff, HsInt srclen);
+HsInt escape_json_string(const unsigned char *src, HsInt srcoff, HsInt srclen, unsigned char *dest, HsInt desoff);
+
HsInt utf8_isnormalized(const char* p, HsInt off, HsInt len, size_t flag);
HsInt utf8_normalize(const char* p, HsInt off, HsInt len, char* q, HsInt len2, size_t flag);
HsInt utf8_normalize_length(const char* p, HsInt off, HsInt len, size_t flag);
diff --git a/stdio.cabal b/stdio.cabal
index 995b0ef..db257e1 100644
--- a/stdio.cabal
+++ b/stdio.cabal
@@ -1,5 +1,5 @@
name: stdio
-version: 0.1.1.0
+version: 0.2.0.0
synopsis: A simple and high performance IO toolkit for Haskell
description: This package provides a simple and high performance IO toolkit for Haskell, including
packed vectors, unicode texts, socket, file system, timers and more!
@@ -27,6 +27,7 @@ extra-source-files: ChangeLog.md
cbits/dtoa.c
cbits/hs_uv_base.c
cbits/hs_uv_stream.c
+ cbits/hs_uv_udp.c
cbits/hs_uv_file.c
cbits/text.c
@@ -119,6 +120,7 @@ source-repository head
flag no-pkg-config
description: Don't use pkg-config to check for library dependences
default: False
+ manual: True
flag integer-simple
description:
@@ -134,12 +136,17 @@ library
Std.Data.Vector.Search
Std.Data.Vector.Sort
Std.Data.Vector.QQ
+ Std.Data.Vector.FlatIntMap
+ Std.Data.Vector.FlatIntSet
+ Std.Data.Vector.FlatMap
+ Std.Data.Vector.FlatSet
Std.Data.PrimArray.Cast
Std.Data.PrimArray.QQ
Std.Data.PrimArray.BitTwiddle
Std.Data.PrimArray.UnalignedAccess
Std.Data.Array
Std.Data.Array.Checked
+ -- Std.Data.Array.Compound
Std.Data.CBytes
Std.Data.Text
@@ -154,7 +161,9 @@ library
Std.Data.Builder.Base
Std.Data.Builder.Numeric
Std.Data.Builder.Numeric.DigitTable
-
+
+ Std.Data.Generics.Utils
+
Std.Data.Parser
Std.Data.Parser.Base
Std.Data.Parser.Numeric
@@ -164,6 +173,10 @@ library
Std.Data.PrimSTRef.Base
Std.Data.LEON
+ Std.Data.JSON
+ Std.Data.JSON.Base
+ Std.Data.JSON.Builder
+ Std.Data.JSON.Value
Std.Foreign.PrimArray
@@ -175,6 +188,7 @@ library
Std.IO.FileSystem
Std.IO.FileSystemT
Std.IO.TCP
+ Std.IO.UDP
Std.IO.SockAddr
Std.IO.StdStream
@@ -190,14 +204,16 @@ library
, ghc-prim >= 0.5.3 && <= 0.5.4
, primitive >= 0.6.4 && <= 0.6.5
, exceptions == 0.10.*
- , word8 == 0.1.*
, scientific == 0.3.*
, hashable == 1.2.*
, case-insensitive == 1.2.*
, time >= 1.8 && < 2.0
, deepseq >= 1.4 && < 1.5
+ , QuickCheck >= 2.10
, template-haskell == 2.14.*
, stm == 2.5.*
+ , unordered-containers == 0.2.*
+ , tagged == 0.8.*
if flag(integer-simple)
cpp-options: -DINTEGER_SIMPLE
@@ -231,6 +247,7 @@ library
cbits/text.c
cbits/hs_uv_base.c
cbits/hs_uv_stream.c
+ cbits/hs_uv_udp.c
cbits/hs_uv_file.c
third_party/utf8rewind/source/unicodedatabase.c
third_party/utf8rewind/source/internal/casemapping.c
@@ -327,7 +344,8 @@ test-suite test
other-modules:
Std.Data.Builder.NumericSpec
- Std.Data.CBytesSpec
+ Std.Data.JSON.BaseSpec
+ Std.Data.JSON.ValueSpec
Std.Data.Parser.BaseSpec
Std.Data.Parser.NumericSpec
Std.Data.PrimArray.UnalignedAccessSpec
@@ -338,11 +356,19 @@ test-suite test
Std.Data.Vector.ExtraSpec
Std.Data.Vector.SearchSpec
Std.Data.Vector.SortSpec
+ Std.Data.Vector.FlatMapSpec
+ Std.Data.Vector.FlatSetSpec
+ Std.Data.CBytesSpec
+ Std.Data.LEONSpec
+ Std.Data.TextBuilderSpec
Std.IO.FileSystemSpec
Std.IO.FileSystemTSpec
Std.IO.LowResTimerSpec
Std.IO.ResourceSpec
+ Std.IO.UDPSpec
+ ghc-options: -threaded
+ default-language: Haskell2010
if flag(integer-simple)
cpp-options: -DINTEGER_SIMPLE
@@ -351,5 +377,3 @@ test-suite test
cpp-options: -DINTEGER_GMP
build-depends: integer-gmp >= 0.2 && < 1.1
- ghc-options: -threaded
- default-language: Haskell2010
diff --git a/test/Std/Data/Builder/NumericSpec.hs b/test/Std/Data/Builder/NumericSpec.hs
index 56267f4..9c76431 100644
--- a/test/Std/Data/Builder/NumericSpec.hs
+++ b/test/Std/Data/Builder/NumericSpec.hs
@@ -73,9 +73,35 @@ spec = describe "builder numeric" . modifyMaxSuccess (*50) . modifyMaxSize (*50)
prop "padding roundtrip" $ \ i ->
i === (read . T.unpack . T.validate . B.buildBytes $ B.intWith @Int f i)
+ describe "c_intWith == hs_intWith" $ do
+ prop "c_intWith == hs_intWith @Word" $ \ i f ->
+ (B.buildBytes $ B.hs_intWith f i) === (B.buildBytes $ B.c_intWith @Word f i)
+ prop "c_intWith == hs_intWith @Word8" $ \ i f ->
+ (B.buildBytes $ B.hs_intWith f i) === (B.buildBytes $ B.c_intWith @Word8 f i)
+ prop "c_intWith == hs_intWith @Word16" $ \ i f ->
+ (B.buildBytes $ B.hs_intWith f i) === (B.buildBytes $ B.c_intWith @Word16 f i)
+ prop "c_intWith == hs_intWith @Word32" $ \ i f ->
+ (B.buildBytes $ B.hs_intWith f i) === (B.buildBytes $ B.c_intWith @Word32 f i)
+ prop "c_intWith == hs_intWith @Word64" $ \ i f ->
+ (B.buildBytes $ B.hs_intWith f i) === (B.buildBytes $ B.c_intWith @Word64 f i)
+ prop "c_intWith == hs_intWith @Int" $ \ i f ->
+ (B.buildBytes $ B.hs_intWith f i) === (B.buildBytes $ B.c_intWith @Int f i)
+ prop "c_intWith == hs_intWith @Int8" $ \ i f ->
+ (B.buildBytes $ B.hs_intWith f i) === (B.buildBytes $ B.c_intWith @Int8 f i)
+ prop "c_intWith == hs_intWith @Int16" $ \ i f ->
+ (B.buildBytes $ B.hs_intWith f i) === (B.buildBytes $ B.c_intWith @Int16 f i)
+ prop "c_intWith == hs_intWith @Int32" $ \ i f ->
+ (B.buildBytes $ B.hs_intWith f i) === (B.buildBytes $ B.c_intWith @Int32 f i)
+ prop "c_intWith == hs_intWith @Int64" $ \ i f ->
+ (B.buildBytes $ B.hs_intWith f i) === (B.buildBytes $ B.c_intWith @Int64 f i)
+
describe "integer roundtrip" $ do
prop "integer roundtrip" $ \ i ->
i === (read . T.unpack . T.validate . B.buildBytes $ B.integer i)
+ prop "integer roundtrip II" $
+ -- there're an issue with leading zeros in front of an block, so we add a case manually here
+ (2132132100000000000000000000000000213213 :: Integer) ===
+ (read . T.unpack . T.validate . B.buildBytes $ B.integer 2132132100000000000000000000000000213213)
describe "scientific roundtrip" $ do
prop "scientific roundtrip" $ \ c e ->
@@ -181,23 +207,22 @@ spec = describe "builder numeric" . modifyMaxSuccess (*50) . modifyMaxSize (*50)
show i === (T.unpack . T.validate . B.buildBytes $ B.int @Int8 i)
describe "intWith === printf" $ do
- prop "int === printf" $ \ i ->
+ prop "int === printf %d" $ \ i ->
printf "%d" i ===
(T.unpack . T.validate . B.buildBytes $ B.intWith @Int B.defaultIFormat i)
- prop "int === printf" $ \ i (Positive w) ->
+ prop "int === printf %xxd" $ \ i (Positive w) ->
printf ("%" ++ show w ++ "d") i ===
(T.unpack . T.validate . B.buildBytes $ B.intWith @Int B.defaultIFormat
{B.padding = B.LeftSpacePadding, B.width = w} i)
- prop "int === printf" $ \ i (Positive w) ->
+ prop "int === printf %0xxd" $ \ i (Positive w) ->
printf ("%0" ++ show w ++ "d") i ===
(T.unpack . T.validate . B.buildBytes $ B.intWith @Int B.defaultIFormat
{B.padding = B.ZeroPadding, B.width = w} i)
- prop "int === printf" $ \ i (Positive w) ->
+ prop "int === printf %-xx%" $ \ i (Positive w) ->
printf ("%-" ++ show w ++ "d") i ===
(T.unpack . T.validate . B.buildBytes $ B.intWith @Int B.defaultIFormat
{B.padding = B.RightSpacePadding, B.width = w} i)
-
- prop "int === printf" $ \ i ->
+ prop "hex === printf %08x" $ \ i ->
printf "%08x" i ===
(T.unpack . T.validate . B.buildBytes $ B.hex @Int32 i)
@@ -228,3 +253,9 @@ spec = describe "builder numeric" . modifyMaxSuccess (*50) . modifyMaxSize (*50)
prop "doubleWith === formatRealFloat" $ \ i l ->
formatRealFloat FFExponent l i ===
(T.unpack . T.validate . B.buildBytes $ B.doubleWith B.Exponent l i)
+
+ describe "grisu3, grisu3_sp === floatToDigits 10" $ do
+ prop "grisu3 === floatToDigits" $ \ (Positive f) ->
+ B.grisu3 f === floatToDigits 10 f
+ prop "grisu3_sp === floatToDigits" $ \ (Positive f) ->
+ B.grisu3_sp f === floatToDigits 10 f
diff --git a/test/Std/Data/JSON/BaseSpec.hs b/test/Std/Data/JSON/BaseSpec.hs
new file mode 100644
index 0000000..4955e64
--- /dev/null
+++ b/test/Std/Data/JSON/BaseSpec.hs
@@ -0,0 +1,71 @@
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module Std.Data.JSON.BaseSpec where
+
+import qualified Data.List as L
+import Data.Word
+import Data.Int
+import GHC.Generics
+import qualified Std.Data.Text as T
+import qualified Std.Data.Builder as B
+import Test.QuickCheck
+import Test.QuickCheck.Function
+import Test.QuickCheck.Property
+import Test.Hspec
+import Test.Hspec.QuickCheck
+import qualified Std.Data.JSON as JSON
+import Std.Data.JSON (FromValue, ToValue, EncodeJSON)
+
+
+data T a
+ = Nullary
+ | Unary Int
+ | Product T.Text (Maybe Char) a
+ | Record { testOne :: Double
+ , testTwo :: Maybe Bool
+ , testThree :: Maybe a
+ }
+ | List [a]
+ deriving (Show, Eq, Generic, FromValue, ToValue, EncodeJSON)
+
+spec :: Spec
+spec = describe "JSON Base instances" $ do
+
+ it "Nullary constructor are encoded as text" $
+ JSON.encodeText (Nullary :: T Integer) === "\"Nullary\""
+
+ it "Unary constructor are encoded as single field object" $
+ JSON.encodeText (Unary 123456 :: T Integer) === "{\"Unary\":123456}"
+
+ it "Product are encoded as array" $
+ JSON.encodeText (Product "ABC" (Just 'x') (123456::Integer)) ===
+ "{\"Product\":[\"ABC\",\"x\",123456]}"
+
+ it "Record are encoded as key values" $
+ JSON.encodeText (Record 0.123456 Nothing (Just (123456::Integer))) ===
+ "{\"Record\":{\
+ \\"testOne\":0.123456,\
+ \\"testTwo\":null,\
+ \\"testThree\":123456}}"
+
+ it "List are encode as array" $
+ JSON.encodeText (List [Nullary
+ , Unary 123456
+ , (Product "ABC" (Just 'x') (123456::Integer))
+ , (Record 0.123456 Nothing (Just (123456::Integer)))]) ===
+ "{\"List\":[\"Nullary\",\
+ \{\"Unary\":123456},\
+ \{\"Product\":[\"ABC\",\"x\",123456]},\
+ \{\"Record\":{\
+ \\"testOne\":0.123456,\
+ \\"testTwo\":null,\
+ \\"testThree\":123456}}]}"
+
+ it "control characters are escaped" $
+ JSON.encodeText (T.pack $ map toEnum [0..0x1F]) ===
+ "\"\\u0000\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\b\\t\\n\\u000b\\f\\r\\u000e\\u000f\
+ \\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017\\u0018\\u0019\\u001a\\u001b\\u001c\\u001d\\u001e\\u001f\""
diff --git a/test/Std/Data/JSON/ValueSpec.hs b/test/Std/Data/JSON/ValueSpec.hs
new file mode 100644
index 0000000..d8921c5
--- /dev/null
+++ b/test/Std/Data/JSON/ValueSpec.hs
@@ -0,0 +1,24 @@
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+module Std.Data.JSON.ValueSpec where
+
+import qualified Data.List as L
+import Data.Word
+import Data.Int
+import GHC.Float
+import Data.Word8 (toLower, toUpper)
+import qualified Std.Data.Builder as B
+import Test.QuickCheck
+import Test.QuickCheck.Function
+import Test.QuickCheck.Property
+import Test.Hspec
+import Test.Hspec.QuickCheck
+import qualified Std.Data.JSON.Value as JSON
+import qualified Std.Data.JSON.Builder as JSONB
+
+
+spec :: Spec
+spec = describe "JSON" $ do -- large size will generate too huge JSON document
+ prop "value roundtrip" $ \ v ->
+ Right v === JSON.parseValue' (B.buildBytes (JSONB.value v))
diff --git a/test/Std/Data/LEONSpec.hs b/test/Std/Data/LEONSpec.hs
new file mode 100644
index 0000000..9791947
--- /dev/null
+++ b/test/Std/Data/LEONSpec.hs
@@ -0,0 +1,67 @@
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE DeriveAnyClass #-}
+
+module Std.Data.LEONSpec where
+
+import qualified Data.List as List
+import Data.Word
+import Data.Int
+import GHC.Natural
+import qualified Std.Data.Builder as B
+import qualified Std.Data.Parser as P
+import qualified Std.Data.CBytes as CB
+import qualified Std.Data.Text as T
+import qualified Std.Data.Vector.Base as V
+import qualified Std.Data.LEON as LEON
+import GHC.Generics
+import Test.QuickCheck
+import Test.QuickCheck.Function
+import Test.QuickCheck.Property
+import Test.Hspec
+import Test.Hspec.QuickCheck
+
+data Test1 = Test1 Int8 Int16 Int32 Int64 Int Word8 Word16 Word32 Word64 Word
+ deriving (Generic, LEON.LEON, Eq, Show)
+
+data Test2 = Test2 (LEON.BE Int16) (LEON.BE Word32) (LEON.BE Int64) (LEON.BE Word)
+ deriving (Generic, LEON.LEON, Eq, Show)
+
+data Test3 = Test3Integer Integer | Test3Natural Natural
+ deriving (Generic, LEON.LEON, Eq, Show)
+
+data Test4 = Test4 [Integer]
+ deriving (Generic, LEON.LEON, Eq, Show)
+
+data Test5 = Test5 Ordering Bool
+ deriving (Generic, LEON.LEON, Eq, Show)
+
+data Test6 = Test6 (V.Vector Integer) V.Bytes (V.PrimVector Int) T.Text CB.CBytes
+ deriving (Generic, LEON.LEON, Eq, Show)
+
+spec :: Spec
+spec = describe "LEON instance roundtrip" . modifyMaxSuccess (*10) . modifyMaxSize (*10) $ do
+ prop "Test1 roundtrip" $ \ a b c d e f g h i j ->
+ let t = Test1 a b c d e f g h i j
+ in P.parse_ LEON.decode (B.buildBytes $ LEON.encode t) === Right t
+
+ prop "Test2 roundtrip" $ \ a b c d ->
+ let t = Test2 (LEON.BE a) (LEON.BE b) (LEON.BE c) (LEON.BE d)
+ in P.parse_ LEON.decode (B.buildBytes $ LEON.encode t) === Right t
+
+ prop "Test3 roundtrip" $ \ a b (Positive c) ->
+ let t = if a then Test3Integer b else Test3Natural (fromIntegral (c :: Integer))
+ in P.parse_ LEON.decode (B.buildBytes $ LEON.encode t) === Right t
+
+ prop "Test4 roundtrip" $ \ xs ->
+ let t = Test4 xs
+ in P.parse_ LEON.decode (B.buildBytes $ LEON.encode t) === Right t
+
+ prop "Test5 roundtrip" $ \ a b ->
+ let t = Test5 a b
+ in P.parse_ LEON.decode (B.buildBytes $ LEON.encode t) === Right t
+
+ prop "Test6 roundtrip" $ \ xs ys zs ts bs ->
+ let t = Test6 (V.pack xs) (V.pack ys) (V.pack zs) (T.pack ts) (CB.pack bs)
+ in P.parse_ LEON.decode (B.buildBytes $ LEON.encode t) === Right t
diff --git a/test/Std/Data/Parser/BaseSpec.hs b/test/Std/Data/Parser/BaseSpec.hs
index 68fb66d..3769245 100644
--- a/test/Std/Data/Parser/BaseSpec.hs
+++ b/test/Std/Data/Parser/BaseSpec.hs
@@ -20,12 +20,12 @@ import Test.Hspec.QuickCheck
parse' :: P.Parser a -> [Word8] -> Maybe a
-parse' p str = case P.parse p (V.pack str) of
+parse' p str = case P.parse_ p (V.pack str) of
Left msg -> Nothing
Right a -> Just a
parse'' :: P.Parser a -> [Word8] -> Maybe (V.Bytes, a)
-parse'' p str = case P.parse' p (V.pack str) of
+parse'' p str = case P.parse p (V.pack str) of
(rest, Right a) -> Just (rest, a)
_ -> Nothing
@@ -69,11 +69,10 @@ spec = describe "parsers" . modifyMaxSuccess (*10) . modifyMaxSize (*10) $ do
then Just (V.pack (L.drop n s), ())
else Nothing
- prop "anyWord8" $ \ s ->
- parse' ((,) <$> P.anyWord8 <*> P.takeWhile (const True)) s ===
+ prop "skipWord8" $ \ s ->
+ parse' (P.skipWord8 *> P.takeWhile (const True)) s ===
case s of [] -> Nothing
- (w:s') -> Just (w, V.pack s')
-
+ (w:s') -> Just (V.pack s')
prop "peek" $ \ s ->
parse' ((,) <$> P.peek <*> P.takeWhile (const True)) s ===
@@ -101,15 +100,15 @@ spec = describe "parsers" . modifyMaxSuccess (*10) . modifyMaxSize (*10) $ do
prop "bytesCI" $ \ s t ->
parse'' (P.bytesCI . V.pack $ t) (L.map toLower t ++ s) === Just (V.pack s, ())
- prop "endOfInput" $ \ s ->
- parse' P.endOfInput s ===
+ prop "atEnd" $ \ s ->
+ parse' P.atEnd s ===
case s of [] -> Just True
_ -> Just False
prop "scan" $ \ s l ->
let go l _ | l <= 0 = Nothing
| otherwise = Just (l-1)
- in parse' (P.scan l go) s === Just (V.pack $ L.take l s)
+ in (fst <$> parse' (P.scan l go) s) === Just (V.pack $ L.take l s)
prop "endOfLine" $ \ s ->
let r = fromIntegral (fromEnum '\r')
diff --git a/test/Std/Data/Parser/NumericSpec.hs b/test/Std/Data/Parser/NumericSpec.hs
index 4866db0..28bf27e 100644
--- a/test/Std/Data/Parser/NumericSpec.hs
+++ b/test/Std/Data/Parser/NumericSpec.hs
@@ -15,6 +15,7 @@ import qualified Std.Data.Builder.Numeric as B
import qualified Std.Data.Builder.Base as B
import qualified Std.Data.Text as T
import qualified Std.Data.Vector.Base as V
+import qualified Data.Scientific as Sci
import Test.QuickCheck
import Test.QuickCheck.Function
import Test.QuickCheck.Property
@@ -22,74 +23,80 @@ import Test.Hspec
import Test.Hspec.QuickCheck
spec :: Spec
-spec = describe "numeric parsers roundtrip" . modifyMaxSuccess (*10) . modifyMaxSize (*10) $ do
+spec = do
+ describe "numeric parsers roundtrip" . modifyMaxSuccess (*10) . modifyMaxSize (*10) $ do
prop "positive hex roundtrip" $ \ i ->
- P.parse P.hex (B.buildBytes (B.hex i)) === Right (i :: Int)
+ P.parse_ P.hex (B.buildBytes (B.hex i)) === Right (i :: Int)
prop "positive hex roundtrip" $ \ i ->
- P.parse P.hex (B.buildBytes (B.hex i)) === Right (i :: Int64)
+ P.parse_ P.hex (B.buildBytes (B.hex i)) === Right (i :: Int64)
prop "positive hex roundtrip" $ \ i ->
- P.parse P.hex (B.buildBytes (B.hex i)) === Right (i :: Int32)
+ P.parse_ P.hex (B.buildBytes (B.hex i)) === Right (i :: Int32)
prop "positive hex roundtrip" $ \ i ->
- P.parse P.hex (B.buildBytes (B.hex i)) === Right (i :: Int16)
+ P.parse_ P.hex (B.buildBytes (B.hex i)) === Right (i :: Int16)
prop "positive hex roundtrip" $ \ i ->
- P.parse P.hex (B.buildBytes (B.hex i)) === Right (i :: Int8)
+ P.parse_ P.hex (B.buildBytes (B.hex i)) === Right (i :: Int8)
prop "positive hex roundtrip" $ \ i ->
- P.parse P.hex (B.buildBytes (B.hex i)) === Right (i :: Word)
+ P.parse_ P.hex (B.buildBytes (B.hex i)) === Right (i :: Word)
prop "positive hex roundtrip" $ \ i ->
- P.parse P.hex (B.buildBytes (B.hex i)) === Right (i :: Word64)
+ P.parse_ P.hex (B.buildBytes (B.hex i)) === Right (i :: Word64)
prop "positive hex roundtrip" $ \ i ->
- P.parse P.hex (B.buildBytes (B.hex i)) === Right (i :: Word32)
+ P.parse_ P.hex (B.buildBytes (B.hex i)) === Right (i :: Word32)
prop "positive hex roundtrip" $ \ i ->
- P.parse P.hex (B.buildBytes (B.hex i)) === Right (i :: Word16)
+ P.parse_ P.hex (B.buildBytes (B.hex i)) === Right (i :: Word16)
prop "positive hex roundtrip" $ \ i ->
- P.parse P.hex (B.buildBytes (B.hex i)) === Right (i :: Word8)
+ P.parse_ P.hex (B.buildBytes (B.hex i)) === Right (i :: Word8)
prop "positive int roundtrip" $ \ (Positive i) ->
- P.parse P.uint (B.buildBytes (B.int i)) === Right (i :: Int)
+ P.parse_ P.uint (B.buildBytes (B.int i)) === Right (i :: Int)
prop "positive int roundtrip" $ \ (Positive i) ->
- P.parse P.uint (B.buildBytes (B.int i)) === Right (i :: Int64)
+ P.parse_ P.uint (B.buildBytes (B.int i)) === Right (i :: Int64)
prop "positive int roundtrip" $ \ (Positive i) ->
- P.parse P.uint (B.buildBytes (B.int i)) === Right (i :: Int32)
+ P.parse_ P.uint (B.buildBytes (B.int i)) === Right (i :: Int32)
prop "positive int roundtrip" $ \ (Positive i) ->
- P.parse P.uint (B.buildBytes (B.int i)) === Right (i :: Int16)
+ P.parse_ P.uint (B.buildBytes (B.int i)) === Right (i :: Int16)
prop "positive int roundtrip" $ \ (Positive i) ->
- P.parse P.uint (B.buildBytes (B.int i)) === Right (i :: Int8)
+ P.parse_ P.uint (B.buildBytes (B.int i)) === Right (i :: Int8)
prop "positive int roundtrip" $ \ (Positive i) ->
- P.parse P.uint (B.buildBytes (B.int i)) === Right (i :: Word)
+ P.parse_ P.uint (B.buildBytes (B.int i)) === Right (i :: Word)
prop "positive int roundtrip" $ \ (Positive i) ->
- P.parse P.uint (B.buildBytes (B.int i)) === Right (i :: Word64)
+ P.parse_ P.uint (B.buildBytes (B.int i)) === Right (i :: Word64)
prop "positive int roundtrip" $ \ (Positive i) ->
- P.parse P.uint (B.buildBytes (B.int i)) === Right (i :: Word32)
+ P.parse_ P.uint (B.buildBytes (B.int i)) === Right (i :: Word32)
prop "positive int roundtrip" $ \ (Positive i) ->
- P.parse P.uint (B.buildBytes (B.int i)) === Right (i :: Word16)
+ P.parse_ P.uint (B.buildBytes (B.int i)) === Right (i :: Word16)
prop "positive int roundtrip" $ \ (Positive i) ->
- P.parse P.uint (B.buildBytes (B.int i)) === Right (i :: Word8)
+ P.parse_ P.uint (B.buildBytes (B.int i)) === Right (i :: Word8)
prop "positive int roundtrip" $ \ i ->
- P.parse P.int (B.buildBytes (B.int i)) === Right (i :: Int)
+ P.parse_ P.int (B.buildBytes (B.int i)) === Right (i :: Int)
prop "positive int roundtrip" $ \ i ->
- P.parse P.int (B.buildBytes (B.int i)) === Right (i :: Int64)
+ P.parse_ P.int (B.buildBytes (B.int i)) === Right (i :: Int64)
prop "positive int roundtrip" $ \ i ->
- P.parse P.int (B.buildBytes (B.int i)) === Right (i :: Int32)
+ P.parse_ P.int (B.buildBytes (B.int i)) === Right (i :: Int32)
prop "positive int roundtrip" $ \ i ->
- P.parse P.int (B.buildBytes (B.int i)) === Right (i :: Int16)
+ P.parse_ P.int (B.buildBytes (B.int i)) === Right (i :: Int16)
prop "positive int roundtrip" $ \ i ->
- P.parse P.int (B.buildBytes (B.int i)) === Right (i :: Int8)
+ P.parse_ P.int (B.buildBytes (B.int i)) === Right (i :: Int8)
prop "positive int roundtrip" $ \ i ->
- P.parse P.int (B.buildBytes (B.int i)) === Right (i :: Word)
+ P.parse_ P.int (B.buildBytes (B.int i)) === Right (i :: Word)
prop "positive int roundtrip" $ \ i ->
- P.parse P.int (B.buildBytes (B.int i)) === Right (i :: Word64)
+ P.parse_ P.int (B.buildBytes (B.int i)) === Right (i :: Word64)
prop "positive int roundtrip" $ \ i ->
- P.parse P.int (B.buildBytes (B.int i)) === Right (i :: Word32)
+ P.parse_ P.int (B.buildBytes (B.int i)) === Right (i :: Word32)
prop "positive int roundtrip" $ \ i ->
- P.parse P.int (B.buildBytes (B.int i)) === Right (i :: Word16)
+ P.parse_ P.int (B.buildBytes (B.int i)) === Right (i :: Word16)
prop "positive int roundtrip" $ \ i ->
- P.parse P.int (B.buildBytes (B.int i)) === Right (i :: Word8)
-
+ P.parse_ P.int (B.buildBytes (B.int i)) === Right (i :: Word8)
prop "float roundtrip" $ \ i ->
- P.parse P.float (B.buildBytes (B.float i)) === Right (i :: Float)
+ P.parse_ P.float (B.buildBytes (B.float i)) === Right (i :: Float)
prop "double roundtrip" $ \ i ->
- P.parse P.double (B.buildBytes (B.double i)) === Right (i :: Double)
+ P.parse_ P.double (B.buildBytes (B.double i)) === Right (i :: Double)
+
+ describe "floatToScientific, doubleToScientific === fromFloatDigits" $ do
+ prop "floatToScientific == fromFloatDigits" $ \ i ->
+ P.floatToScientific i === Sci.fromFloatDigits i
+ prop "floatToScientific === fromFloatDigits" $ \ i ->
+ P.doubleToScientific i === Sci.fromFloatDigits i
diff --git a/test/Std/Data/Text/BaseSpec.hs b/test/Std/Data/Text/BaseSpec.hs
index ca645f0..8f4aa27 100644
--- a/test/Std/Data/Text/BaseSpec.hs
+++ b/test/Std/Data/Text/BaseSpec.hs
@@ -51,6 +51,8 @@ spec = describe "text-base" $ do
"你好\NUL世界" === T.pack "你好\NUL世界"
prop "surrogate codepoint" $
"你好\xFFFD世界" === T.pack "你好\xD800世界"
+ prop "surrogate codepoint2" $
+ "你好\xD800世界" === T.pack "你好\xD800世界"
describe "text length == List.length" $ do
prop "text length === List.length" $ \ xs ->
diff --git a/test/Std/Data/TextBuilderSpec.hs b/test/Std/Data/TextBuilderSpec.hs
new file mode 100644
index 0000000..e3da63a
--- /dev/null
+++ b/test/Std/Data/TextBuilderSpec.hs
@@ -0,0 +1,68 @@
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+module Std.Data.TextBuilderSpec where
+
+import qualified Data.List as L
+import Data.Word
+import Data.Int
+import GHC.Generics
+import qualified Std.Data.Text as T
+import Std.Data.TextBuilder
+import Std.Data.JSON (Value)
+import Test.QuickCheck
+import Test.QuickCheck.Function
+import Test.QuickCheck.Property
+import Test.Hspec
+import Test.Hspec.QuickCheck
+
+
+data T a
+ = Nullary
+ | Unary Int
+ | Product T.Text (Maybe Char) a
+ | Record { testOne :: Double
+ , testTwo :: Maybe Bool
+ , testThree :: Maybe a
+ }
+ | List [a]
+ deriving (Show, Eq, ToText, Generic)
+
+data I a = I a :+ I a | I a :- I a | J a deriving (Show, Generic, ToText)
+infixr 5 :+
+infixl 6 :-
+
+spec :: Spec
+spec = describe "JSON Base instances" $ do
+
+ it "Nullary constructor are encoded as text" $
+ toText (Nullary :: T Integer) === "Nullary"
+
+ it "Unary constructor are encoded as single field" $
+ toText (Unary 123456 :: T Integer) === "Unary 123456"
+
+ it "Product are encoded as multiple field" $
+ toText (Product "ABC" (Just 'x') (123456::Integer)) ===
+ "Product \"ABC\" (Just 'x') 123456"
+
+ it "Record are encoded as key values" $
+ toText (Record 0.123456 Nothing (Just (123456::Integer))) ===
+ "Record {testOne = 0.123456, testTwo = Nothing, testThree = Just 123456}"
+
+ it "List are encode as array" $
+ toText (List [Nullary
+ , Unary 123456
+ , (Product "ABC" (Just 'x') (123456::Integer))
+ , (Record 0.123456 Nothing (Just (123456::Integer)))]) ===
+ "List [Nullary,Unary 123456,Product \"ABC\" (Just 'x') 123456,\
+ \Record {testOne = 0.123456, testTwo = Nothing, testThree = Just 123456}]"
+
+ it "infix constructor should respect piority" $
+ toString (J 1 :- J 2 :+ J 3 :- J 4 :- J 5 :+ J 6 :+ J 7 :+ J 8 :- J 9 :- J 10 :- J 11 :: I Int)
+ === show (J 1 :- J 2 :+ J 3 :- J 4 :- J 5 :+ J 6 :+ J 7 :+ J 8 :- J 9 :- J 10 :- J 11)
+
+ prop "Value Show instance === ToText instances" $ \ (v :: Value) ->
+ toString v === show v
diff --git a/test/Std/Data/Vector/FlatMapSpec.hs b/test/Std/Data/Vector/FlatMapSpec.hs
new file mode 100644
index 0000000..1ba814f
--- /dev/null
+++ b/test/Std/Data/Vector/FlatMapSpec.hs
@@ -0,0 +1,38 @@
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+module Std.Data.Vector.FlatMapSpec where
+
+import qualified Data.List as List
+import Data.Word
+import qualified Std.Data.Vector as V
+import qualified Std.Data.Vector.FlatMap as FM
+import Test.QuickCheck
+import Test.QuickCheck.Function
+import Test.QuickCheck.Property
+import Test.Hspec
+import Test.Hspec.QuickCheck
+
+type FMS = FM.FlatMap String String
+
+spec :: Spec
+spec = do
+ describe "flatmap-semigroup-monoid" $ do
+ prop "flatmap monoid unit law" $ \ (m :: FMS) ->
+ (m <> FM.empty) === m
+ prop "flatmap monoid unit law" $ \ (m :: FMS) ->
+ (FM.empty <> m) === m
+ prop "flatmap semigroup associativity low" $ \ (m1 :: FMS) m2 m3 ->
+ (m1 <> m2) <> m3 === m1 <> (m2 <> m3)
+
+ describe "flatmap insert lookup roundtrip" $ do
+ prop "flatmap insert lookup roundtrip" $ \ (m :: FMS) k v ->
+ FM.lookup k (FM.insert k v m) === Just v
+
+ describe "flatmap delete lookup" $ do
+ prop "flatmap delete lookup" $ \ (m :: FMS) k ->
+ FM.lookup k (FM.delete k m) === Nothing
+
+ describe "flatmap adjust lookup roundtrip" $ do
+ prop "flatmap adjust lookup roundtrip" $ \ (m :: FMS) k (Fun _ f) ->
+ FM.lookup k (FM.adjust' f k m) === f `fmap` FM.lookup k m
diff --git a/test/Std/Data/Vector/FlatSetSpec.hs b/test/Std/Data/Vector/FlatSetSpec.hs
new file mode 100644
index 0000000..a1590e6
--- /dev/null
+++ b/test/Std/Data/Vector/FlatSetSpec.hs
@@ -0,0 +1,34 @@
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+
+module Std.Data.Vector.FlatSetSpec where
+
+import qualified Data.List as List
+import Data.Word
+import qualified Std.Data.Vector as V
+import qualified Std.Data.Vector.FlatSet as FS
+import Test.QuickCheck
+import Test.QuickCheck.Function
+import Test.QuickCheck.Property
+import Test.Hspec
+import Test.Hspec.QuickCheck
+
+type FMS = FS.FlatSet String
+
+spec :: Spec
+spec = do
+ describe "flatset-semigroup-monoid" $ do
+ prop "flatset monoid unit law" $ \ (m :: FMS) ->
+ (m <> FS.empty) === m
+ prop "flatset monoid unit law" $ \ (m :: FMS) ->
+ (FS.empty <> m) === m
+ prop "flatset semigroup associativity low" $ \ (m1 :: FMS) m2 m3 ->
+ (m1 <> m2) <> m3 === m1 <> (m2 <> m3)
+
+ describe "flatset insert elem roundtrip" $ do
+ prop "flatset insert elem roundtrip" $ \ (m :: FMS) v ->
+ FS.elem v (FS.insert v m) === True
+
+ describe "flatset delete elem" $ do
+ prop "flatset delete elem" $ \ (m :: FMS) v ->
+ FS.elem v (FS.delete v m) === False
diff --git a/test/Std/IO/FileSystemSpec.hs b/test/Std/IO/FileSystemSpec.hs
index e4431b8..b13cfc0 100644
--- a/test/Std/IO/FileSystemSpec.hs
+++ b/test/Std/IO/FileSystemSpec.hs
@@ -26,17 +26,17 @@ spec = describe "filesystem operations" $ do
size = V.length content
size2 = V.length content2
- tempdir <- runIO $ mkdtemp "stdio-filesystem-unit"
it "create a temp dir" $ do
-
+ tempdir <- mkdtemp "stdio-filesystem-unit"
dirs <- scandir "./"
+ rmdir tempdir
List.lookup tempdir dirs @?= Just DirEntDir
let flags = O_RDWR .|. O_CREAT
mode = DEFAULT_MODE
- filename = tempdir <> "/test-file"
+ filename = "test-file"
it "Opens and writes a file" $ do
withResource (initUVFile filename flags mode) $ \ file -> do
@@ -79,18 +79,20 @@ spec = describe "filesystem operations" $ do
firstLine @=? fst (V.break (== V.c2w '\n') content2)
unlink filename
- let dirname = tempdir <> "/test-dir"
it "create and remove dir" $ do
+ tempdir <- mkdtemp "stdio-filesystem-unit"
+ let dirname = tempdir <> "/test-dir"
mkdir dirname mode
dirs <- scandir tempdir
print dirs
- List.lookup "test-dir" dirs @?= Just DirEntDir
rmdir dirname
+ rmdir tempdir
+ List.lookup "test-dir" dirs @?= Just DirEntDir
- let linkname = tempdir <> "/test-link"
- symlinkname = tempdir <> "/test-symlink"
- symlinkname2 = tempdir <> "/test-symlink2"
+ let linkname = "test-link"
+ symlinkname = "test-symlink"
+ symlinkname2 = "test-symlink2"
it "link stat should be equal to target file" $ do
@@ -143,4 +145,3 @@ spec = describe "filesystem operations" $ do
uvtNanoSecond (stMtim s) @?= 800000000
unlink filename
- it "remove test temp dir" $ rmdir tempdir
diff --git a/test/Std/IO/FileSystemTSpec.hs b/test/Std/IO/FileSystemTSpec.hs
index 72470a2..7076ae0 100644
--- a/test/Std/IO/FileSystemTSpec.hs
+++ b/test/Std/IO/FileSystemTSpec.hs
@@ -26,17 +26,16 @@ spec = describe "filesystem (threadpool version) operations" $ do
size = V.length content
size2 = V.length content2
- tempdir <- runIO $ mkdtemp "stdio-filesystem-unit"
-
it "create a temp dir" $ do
-
+ tempdir <- mkdtemp "stdio-filesystem-unit"
dirs <- scandir "./"
+ rmdir tempdir
List.lookup tempdir dirs @?= Just DirEntDir
let flags = O_RDWR .|. O_CREAT
mode = DEFAULT_MODE
- filename = tempdir <> "/test-file"
+ filename = "test-file"
it "Opens and writes a file" $ do
withResource (initUVFile filename flags mode) $ \ file -> do
@@ -79,18 +78,20 @@ spec = describe "filesystem (threadpool version) operations" $ do
firstLine @=? fst (V.break (== V.c2w '\n') content2)
unlink filename
- let dirname = tempdir <> "/test-dir"
it "create and remove dir" $ do
+ tempdir <- mkdtemp "stdio-filesystem-unit"
+ let dirname = tempdir <> "/test-dir"
mkdir dirname mode
dirs <- scandir tempdir
print dirs
- List.lookup "test-dir" dirs @?= Just DirEntDir
rmdir dirname
+ rmdir tempdir
+ List.lookup "test-dir" dirs @?= Just DirEntDir
- let linkname = tempdir <> "/test-link"
- symlinkname = tempdir <> "/test-symlink"
- symlinkname2 = tempdir <> "/test-symlink2"
+ let linkname = "test-link"
+ symlinkname = "test-symlink"
+ symlinkname2 = "test-symlink2"
it "link stat should be equal to target file" $ do
@@ -143,4 +144,3 @@ spec = describe "filesystem (threadpool version) operations" $ do
uvtNanoSecond (stMtim s) @?= 800000000
unlink filename
- it "remove test temp dir" $ rmdir tempdir
diff --git a/test/Std/IO/UDPSpec.hs b/test/Std/IO/UDPSpec.hs
new file mode 100644
index 0000000..786d175
--- /dev/null
+++ b/test/Std/IO/UDPSpec.hs
@@ -0,0 +1,69 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module Std.IO.UDPSpec where
+
+import Control.Concurrent
+import Control.Monad
+import Data.Bits
+import Std.Data.Vector as V
+import Std.Data.Vector.Base as V
+import Data.List as List
+import Foreign.Marshal.Array
+import Foreign.Ptr
+import Std.IO.Exception
+import Std.IO.UDP
+import Std.IO.Resource
+import Std.IO.SockAddr
+import Test.Hspec
+import Test.HUnit
+
+spec :: Spec
+spec = describe "UDP operations" $ do
+ it "roundtrip test" $ do
+ let testMsg = V.replicate 256 48
+ longMsg = V.replicate 2048 48
+ addr = SockAddrInet 12345 inetLoopback
+ withResource (initUDP defaultUDPConfig{sendMsgSize = 2048}) $ \ c ->
+ withResource (initUDP defaultUDPConfig{localUDPAddr = Just (addr,UV_UDP_DEFAULT)}) $ \ s -> do
+ forkIO $ sendUDP c addr testMsg
+ [(_, partial, rcvMsg)]<- recvUDP s
+ partial @=? False
+ rcvMsg @=? testMsg
+
+ threadDelay 100000
+
+ forkIO $ sendUDP c addr longMsg
+ [(_, partial, rcvMsg)]<- recvUDP s
+ partial @=? True
+
+ it "UDP sending addr test" $ do
+ let testMsg = V.replicate 256 48
+ addr = SockAddrInet 12346 inetLoopback
+ addr' = SockAddrInet 12347 inetLoopback
+ withResource (initUDP defaultUDPConfig{localUDPAddr = Just (addr,UV_UDP_DEFAULT)}) $ \ c ->
+ withResource (initUDP defaultUDPConfig{localUDPAddr = Just (addr',UV_UDP_DEFAULT)}) $ \ s -> do
+ forkIO $ sendUDP c addr' testMsg
+ [(rcvAddr, _, _)]<- recvUDP s
+ Just addr @=? rcvAddr
+
+ it "overlong message exception" $ do
+ let testMsg = V.replicate 4096 48
+ addr = SockAddrInet 12348 inetLoopback
+ withResource (initUDP defaultUDPConfig) $ \ c ->
+ withResource (initUDP defaultUDPConfig) $ \ s -> do
+ sendUDP c addr testMsg `shouldThrow` anyException
+
+ {- This test need a local broadcast address, so it's disabled by default.
+ it "UDP sending addr test" $ do
+ let testMsg = V.replicate 256 48
+ addr = SockAddrInet 12349 (tupleToInetAddr (10,92,239,187))
+ addr' = SockAddrInet 12350 inetAny
+ broadcastAddr = SockAddrInet 12350 (tupleToInetAddr (10,92,239,255))
+ withResource (initUDP defaultUDPConfig{localUDPAddr = Just (addr,UV_UDP_DEFAULT)}) $ \ c ->
+ withResource (initUDP defaultUDPConfig{localUDPAddr = Just (addr',UV_UDP_DEFAULT)}) $ \ s -> do
+ setBroadcast c True
+ forkIO $ sendUDP c broadcastAddr testMsg
+ [(rcvAddr, _, rcvMsg)]<- recvUDP s
+ Just addr @=? rcvAddr
+ rcvMsg @=? testMsg
+ -}