summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoachimBreitner <>2020-11-20 15:15:00 (GMT)
committerhdiff <hdiff@hdiff.luite.com>2020-11-20 15:15:00 (GMT)
commitce3b6e1dff23d31354f5eeb5b835f4da19fd482e (patch)
tree95f8529b7b0cf451abb54ddc0e73360198a182f2
parenta88853b129a90df26e91eb0316f5f5256fbb0e35 (diff)
version 1.2HEAD1.2master
-rwxr-xr-xCHANGELOG.md6
-rw-r--r--leb128-cereal.cabal14
-rw-r--r--src/Data/Serialize/LEB128.hs19
-rw-r--r--src/Data/Serialize/LEB128/Lenient.hs228
4 files changed, 253 insertions, 14 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b0db5d1..bad3e77 100755
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,11 @@
# Revision history for leb128-serialize
+## 1.2 -- 2020-11-20
+
+* Add `Data.Serialize.LEB128.Lenient` for a decoder that
+ allows overlong encodings.
+ For now, a simple code copy; may be refactored later.
+
## 1.1 -- 2020-06-08
* Fix check for overlong encodings; it was not strict enough
diff --git a/leb128-cereal.cabal b/leb128-cereal.cabal
index 39a2a64..74d5663 100644
--- a/leb128-cereal.cabal
+++ b/leb128-cereal.cabal
@@ -1,6 +1,6 @@
cabal-version: >=1.10
name: leb128-cereal
-version: 1.1
+version: 1.2
synopsis: LEB128 and SLEB128 encoding
description:
This module implements encoding and decoding of 'Natural' and 'Integer'
@@ -22,6 +22,7 @@ tested-with: GHC == 8.2, GHC == 8.4, GHC == 8.6, GHC == 8.8, GHC == 8.10
library
exposed-modules: Data.Serialize.LEB128
+ exposed-modules: Data.Serialize.LEB128.Lenient
build-depends: base >=4.10 && <5
build-depends: bytestring >= 0.10
build-depends: cereal >= 0.5
@@ -40,6 +41,17 @@ test-suite test
build-depends: leb128-cereal
build-depends: bytestring
+test-suite test-lenient
+ default-language: Haskell2010
+ type: exitcode-stdio-1.0
+ main-is: test.hs
+ build-depends: base >= 4 && < 5
+ build-depends: tasty >= 0.7
+ build-depends: tasty-quickcheck
+ build-depends: tasty-hunit
+ build-depends: leb128-cereal
+ build-depends: bytestring
+
source-repository head
type: git
location: https://github.com/nomeata/haskell-leb128-cereal
diff --git a/src/Data/Serialize/LEB128.hs b/src/Data/Serialize/LEB128.hs
index 50a8f11..d625bc7 100644
--- a/src/Data/Serialize/LEB128.hs
+++ b/src/Data/Serialize/LEB128.hs
@@ -24,6 +24,7 @@
--
-- The decoders will fail if the input is not in canonical representation,
-- i.e. longer than necessary.
+-- Use "Data.Serialize.LEB128.Lenient" if you need the strict semantics.
--
-- This code is inspired by Andreas Klebinger's LEB128 implementation in GHC.
module Data.Serialize.LEB128
@@ -126,20 +127,12 @@ isFinite = isJust (bitSizeMaybe (undefined :: a))
-- | SLEB128-encodes an integer via a builder
buildSLEB128 :: SLEB128 a => a -> B.Builder
-buildSLEB128 = go
+buildSLEB128 val
+ | val >= -64 && val < 64 = stopByte
+ | otherwise = goByte <> buildSLEB128 (shiftR val 7)
where
- go val = do
- let !byte = fromIntegral (clearBit val 7) :: Word8
- let !val' = val `unsafeShiftR` 7
- let !signBit = testBit byte 6
- let !done =
- -- Unsigned value, val' == 0 and and last value can
- -- be discriminated from a negative number.
- (val' == 0 && not signBit) ||
- -- Signed value,
- (val' == -1 && signBit)
- let !byte' = if done then byte else setBit byte 7
- B.word8 byte' <> if done then mempty else go val'
+ stopByte = B.word8 (fromIntegral $ clearBit val 7)
+ goByte = B.word8 (fromIntegral $ setBit val 7)
{-# SPECIALIZE buildSLEB128 :: Integer -> B.Builder #-}
{-# SPECIALIZE buildSLEB128 :: Int -> B.Builder #-}
diff --git a/src/Data/Serialize/LEB128/Lenient.hs b/src/Data/Serialize/LEB128/Lenient.hs
new file mode 100644
index 0000000..2523619
--- /dev/null
+++ b/src/Data/Serialize/LEB128/Lenient.hs
@@ -0,0 +1,228 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE AllowAmbiguousTypes #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# OPTIONS_GHC -Wno-dodgy-imports #-}
+{-# OPTIONS_GHC -O2 #-}
+-- {-# OPTIONS_GHC -ddump-simpl -dsuppress-unfoldings -dsuppress-idinfo -dsuppress-module-prefixes -ddump-to-file #-}
+
+-- |
+-- Module : Data.Serialize.LEB128
+-- Description : LEB128 encoding
+-- License : MIT
+-- Maintainer : Joachim Breitner
+--
+-- | This module implements encoding and decoding of 'Natural' and 'Integer'
+-- values according to LEB128 and SLEB128. See
+-- https://en.wikipedia.org/wiki/LEB128 for a specification.
+--
+-- The module provides conversion to and from strict bytestrings.
+--
+-- Additionally, to integrate these into your own parsers and serializers, you
+-- can use the interfaces based on 'B.Builder' as well as @cereal@'s 'G.Get'
+-- and 'P.Put' monad.
+--
+-- The decoders in this module will accept over-long representations.
+-- Use "Data.Serialize.LEB128" if you need the strict semantics.
+--
+-- This code is inspired by Andreas Klebinger's LEB128 implementation in GHC.
+module Data.Serialize.LEB128.Lenient
+ (
+ -- * The class of encodable and decodable types
+ LEB128
+ , SLEB128
+ -- * Bytestring-based interface
+ , toLEB128
+ , fromLEB128
+ , toSLEB128
+ , fromSLEB128
+ -- * Builder interface
+ , buildLEB128
+ , buildSLEB128
+ -- * Cereal interface
+ , getLEB128
+ , getSLEB128
+ , putLEB128
+ , putSLEB128
+ ) where
+
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Lazy as BSL
+import qualified Data.ByteString.Builder as B
+import qualified Data.ByteString.Builder.Extra as B
+import qualified Data.Serialize.Get as G
+import qualified Data.Serialize.Put as P
+import Numeric.Natural
+import Control.Applicative
+import Control.Monad
+import Data.Bits
+import Data.Word
+import Data.Int
+import Data.Maybe
+import Data.Monoid ((<>))
+import Prelude hiding ((<>))
+
+-- | Unsigned number types can be LEB128-encoded
+class (Bits a, Num a, Integral a) => LEB128 a where
+instance LEB128 Natural
+instance LEB128 Word
+instance LEB128 Word8
+instance LEB128 Word16
+instance LEB128 Word32
+instance LEB128 Word64
+
+-- | Signed number types can be SLEB128-encoded
+class (Bits a, Num a, Integral a) => SLEB128 a
+instance SLEB128 Integer
+instance SLEB128 Int
+instance SLEB128 Int8
+instance SLEB128 Int16
+instance SLEB128 Int32
+instance SLEB128 Int64
+
+-- | LEB128-encodes a natural number to a strict bytestring
+toLEB128 :: LEB128 a => a -> BS.ByteString
+toLEB128 = BSL.toStrict . B.toLazyByteStringWith (B.safeStrategy 32 32) BSL.empty . buildLEB128
+
+{-# SPECIALIZE toLEB128 :: Natural -> BS.ByteString #-}
+{-# SPECIALIZE toLEB128 :: Word -> BS.ByteString #-}
+{-# SPECIALIZE toLEB128 :: Word8 -> BS.ByteString #-}
+{-# SPECIALIZE toLEB128 :: Word16 -> BS.ByteString #-}
+{-# SPECIALIZE toLEB128 :: Word32 -> BS.ByteString #-}
+{-# SPECIALIZE toLEB128 :: Word64 -> BS.ByteString #-}
+
+-- | SLEB128-encodes an integer to a strict bytestring
+toSLEB128 :: SLEB128 a => a -> BS.ByteString
+toSLEB128 = BSL.toStrict . B.toLazyByteStringWith (B.safeStrategy 32 32) BSL.empty . buildSLEB128
+
+{-# SPECIALIZE toSLEB128 :: Integer -> BS.ByteString #-}
+{-# SPECIALIZE toSLEB128 :: Int -> BS.ByteString #-}
+{-# SPECIALIZE toSLEB128 :: Int8 -> BS.ByteString #-}
+{-# SPECIALIZE toSLEB128 :: Int16 -> BS.ByteString #-}
+{-# SPECIALIZE toSLEB128 :: Int32 -> BS.ByteString #-}
+{-# SPECIALIZE toSLEB128 :: Int64 -> BS.ByteString #-}
+
+-- | LEB128-encodes a natural number via a builder
+buildLEB128 :: LEB128 a => a -> B.Builder
+buildLEB128 = go
+ where
+ go i
+ | i <= 127
+ = B.word8 (fromIntegral i :: Word8)
+ | otherwise =
+ -- bit 7 (8th bit) indicates more to come.
+ B.word8 (setBit (fromIntegral i) 7) <> go (i `unsafeShiftR` 7)
+
+{-# SPECIALIZE buildLEB128 :: Natural -> B.Builder #-}
+{-# SPECIALIZE buildLEB128 :: Word -> B.Builder #-}
+{-# SPECIALIZE buildLEB128 :: Word8 -> B.Builder #-}
+{-# SPECIALIZE buildLEB128 :: Word16 -> B.Builder #-}
+{-# SPECIALIZE buildLEB128 :: Word32 -> B.Builder #-}
+{-# SPECIALIZE buildLEB128 :: Word64 -> B.Builder #-}
+
+-- This gets inlined for the specialied variants
+isFinite :: forall a. Bits a => Bool
+isFinite = isJust (bitSizeMaybe (undefined :: a))
+
+-- | SLEB128-encodes an integer via a builder
+buildSLEB128 :: SLEB128 a => a -> B.Builder
+buildSLEB128 val
+ | val >= -64 && val < 64 = stopByte
+ | otherwise = goByte <> buildSLEB128 (shiftR val 7)
+ where
+ stopByte = B.word8 (fromIntegral $ clearBit val 7)
+ goByte = B.word8 (fromIntegral $ setBit val 7)
+
+{-# SPECIALIZE buildSLEB128 :: Integer -> B.Builder #-}
+{-# SPECIALIZE buildSLEB128 :: Int -> B.Builder #-}
+{-# SPECIALIZE buildSLEB128 :: Int8 -> B.Builder #-}
+{-# SPECIALIZE buildSLEB128 :: Int16 -> B.Builder #-}
+{-# SPECIALIZE buildSLEB128 :: Int32 -> B.Builder #-}
+{-# SPECIALIZE buildSLEB128 :: Int64 -> B.Builder #-}
+
+-- | LEB128-encodes a natural number in @cereal@'s 'P.Put' monad
+putLEB128 :: LEB128 a => P.Putter a
+putLEB128 = P.putBuilder . buildLEB128
+{-# INLINE putLEB128 #-}
+
+-- | SLEB128-encodes an integer in @cereal@'s 'P.Put' monad
+putSLEB128 :: SLEB128 a => P.Putter a
+putSLEB128 = P.putBuilder . buildSLEB128
+{-# INLINE putSLEB128 #-}
+
+-- | LEB128-decodes a natural number from a strict bytestring
+fromLEB128 :: LEB128 a => BS.ByteString -> Either String a
+fromLEB128 = runComplete getLEB128
+{-# INLINE fromLEB128 #-}
+
+-- | SLEB128-decodes an integer from a strict bytestring
+fromSLEB128 :: SLEB128 a => BS.ByteString -> Either String a
+fromSLEB128 = runComplete getSLEB128
+{-# INLINE fromSLEB128 #-}
+
+runComplete :: G.Get a -> BS.ByteString -> Either String a
+runComplete p bs = do
+ (x,r) <- G.runGetState p bs 0
+ unless (BS.null r) $ Left "extra bytes in input"
+ return x
+
+-- | LEB128-decodes a natural number via @cereal@
+getLEB128 :: forall a. LEB128 a => G.Get a
+getLEB128 = G.label "LEB128" $ go 0 0
+ where
+ go :: Int -> a -> G.Get a
+ go !shift !w = do
+ byte <- G.getWord8 <|> fail "short encoding"
+ let !byteVal = fromIntegral (clearBit byte 7)
+ case bitSizeMaybe w of
+ Just bs | shift > bs -> fail "overflow"
+ _ -> return ()
+ when (isFinite @a) $
+ unless (byteVal `unsafeShiftL` shift `unsafeShiftR` shift == byteVal) $
+ fail "overflow"
+ let !val = w .|. (byteVal `unsafeShiftL` shift)
+ let !shift' = shift+7
+ if hasMore byte
+ then go shift' val
+ else return $! val
+
+ hasMore b = testBit b 7
+
+{-# SPECIALIZE getLEB128 :: G.Get Natural #-}
+{-# SPECIALIZE getLEB128 :: G.Get Word #-}
+{-# SPECIALIZE getLEB128 :: G.Get Word8 #-}
+{-# SPECIALIZE getLEB128 :: G.Get Word16 #-}
+{-# SPECIALIZE getLEB128 :: G.Get Word32 #-}
+{-# SPECIALIZE getLEB128 :: G.Get Word64 #-}
+
+-- | SLEB128-decodes an integer via @cereal@
+getSLEB128 :: forall a. SLEB128 a => G.Get a
+getSLEB128 = G.label "SLEB128" $ go 0 0 0
+ where
+ go :: Word8 -> Int -> a -> G.Get a
+ go !prev !shift !w = do
+ byte <- G.getWord8 <|> fail "short encoding"
+ let !byteVal = fromIntegral (clearBit byte 7)
+ case bitSizeMaybe w of
+ Just bs | shift > bs -> fail "overflow"
+ _ -> return ()
+ when (isFinite @a) $
+ unless ((byteVal `unsafeShiftL` shift `unsafeShiftR` shift) .&. 0x7f == byteVal) $
+ fail "overflow"
+ let !val = w .|. (byteVal `unsafeShiftL` shift)
+ let !shift' = shift+7
+ if hasMore byte
+ then go byte shift' val
+ else if signed byte
+ then return $! val - bit shift'
+ else return $! val
+
+ hasMore b = testBit b 7
+ signed b = testBit b 6
+
+{-# SPECIALIZE getSLEB128 :: G.Get Integer #-}
+{-# SPECIALIZE getSLEB128 :: G.Get Int #-}
+{-# SPECIALIZE getSLEB128 :: G.Get Int8 #-}
+{-# SPECIALIZE getSLEB128 :: G.Get Int16 #-}
+{-# SPECIALIZE getSLEB128 :: G.Get Int32 #-}
+{-# SPECIALIZE getSLEB128 :: G.Get Int64 #-}