diff options
author | JoachimBreitner <> | 2020-11-20 15:15:00 (GMT) |
---|---|---|
committer | hdiff <hdiff@hdiff.luite.com> | 2020-11-20 15:15:00 (GMT) |
commit | ce3b6e1dff23d31354f5eeb5b835f4da19fd482e (patch) | |
tree | 95f8529b7b0cf451abb54ddc0e73360198a182f2 | |
parent | a88853b129a90df26e91eb0316f5f5256fbb0e35 (diff) |
-rwxr-xr-x | CHANGELOG.md | 6 | ||||
-rw-r--r-- | leb128-cereal.cabal | 14 | ||||
-rw-r--r-- | src/Data/Serialize/LEB128.hs | 19 | ||||
-rw-r--r-- | src/Data/Serialize/LEB128/Lenient.hs | 228 |
4 files changed, 253 insertions, 14 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index b0db5d1..bad3e77 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Revision history for leb128-serialize +## 1.2 -- 2020-11-20 + +* Add `Data.Serialize.LEB128.Lenient` for a decoder that + allows overlong encodings. + For now, a simple code copy; may be refactored later. + ## 1.1 -- 2020-06-08 * Fix check for overlong encodings; it was not strict enough diff --git a/leb128-cereal.cabal b/leb128-cereal.cabal index 39a2a64..74d5663 100644 --- a/leb128-cereal.cabal +++ b/leb128-cereal.cabal @@ -1,6 +1,6 @@ cabal-version: >=1.10 name: leb128-cereal -version: 1.1 +version: 1.2 synopsis: LEB128 and SLEB128 encoding description: This module implements encoding and decoding of 'Natural' and 'Integer' @@ -22,6 +22,7 @@ tested-with: GHC == 8.2, GHC == 8.4, GHC == 8.6, GHC == 8.8, GHC == 8.10 library exposed-modules: Data.Serialize.LEB128 + exposed-modules: Data.Serialize.LEB128.Lenient build-depends: base >=4.10 && <5 build-depends: bytestring >= 0.10 build-depends: cereal >= 0.5 @@ -40,6 +41,17 @@ test-suite test build-depends: leb128-cereal build-depends: bytestring +test-suite test-lenient + default-language: Haskell2010 + type: exitcode-stdio-1.0 + main-is: test.hs + build-depends: base >= 4 && < 5 + build-depends: tasty >= 0.7 + build-depends: tasty-quickcheck + build-depends: tasty-hunit + build-depends: leb128-cereal + build-depends: bytestring + source-repository head type: git location: https://github.com/nomeata/haskell-leb128-cereal diff --git a/src/Data/Serialize/LEB128.hs b/src/Data/Serialize/LEB128.hs index 50a8f11..d625bc7 100644 --- a/src/Data/Serialize/LEB128.hs +++ b/src/Data/Serialize/LEB128.hs @@ -24,6 +24,7 @@ -- -- The decoders will fail if the input is not in canonical representation, -- i.e. longer than necessary. +-- Use "Data.Serialize.LEB128.Lenient" if you need the strict semantics. -- -- This code is inspired by Andreas Klebinger's LEB128 implementation in GHC. module Data.Serialize.LEB128 @@ -126,20 +127,12 @@ isFinite = isJust (bitSizeMaybe (undefined :: a)) -- | SLEB128-encodes an integer via a builder buildSLEB128 :: SLEB128 a => a -> B.Builder -buildSLEB128 = go +buildSLEB128 val + | val >= -64 && val < 64 = stopByte + | otherwise = goByte <> buildSLEB128 (shiftR val 7) where - go val = do - let !byte = fromIntegral (clearBit val 7) :: Word8 - let !val' = val `unsafeShiftR` 7 - let !signBit = testBit byte 6 - let !done = - -- Unsigned value, val' == 0 and and last value can - -- be discriminated from a negative number. - (val' == 0 && not signBit) || - -- Signed value, - (val' == -1 && signBit) - let !byte' = if done then byte else setBit byte 7 - B.word8 byte' <> if done then mempty else go val' + stopByte = B.word8 (fromIntegral $ clearBit val 7) + goByte = B.word8 (fromIntegral $ setBit val 7) {-# SPECIALIZE buildSLEB128 :: Integer -> B.Builder #-} {-# SPECIALIZE buildSLEB128 :: Int -> B.Builder #-} diff --git a/src/Data/Serialize/LEB128/Lenient.hs b/src/Data/Serialize/LEB128/Lenient.hs new file mode 100644 index 0000000..2523619 --- /dev/null +++ b/src/Data/Serialize/LEB128/Lenient.hs @@ -0,0 +1,228 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE AllowAmbiguousTypes #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# OPTIONS_GHC -Wno-dodgy-imports #-} +{-# OPTIONS_GHC -O2 #-} +-- {-# OPTIONS_GHC -ddump-simpl -dsuppress-unfoldings -dsuppress-idinfo -dsuppress-module-prefixes -ddump-to-file #-} + +-- | +-- Module : Data.Serialize.LEB128 +-- Description : LEB128 encoding +-- License : MIT +-- Maintainer : Joachim Breitner +-- +-- | This module implements encoding and decoding of 'Natural' and 'Integer' +-- values according to LEB128 and SLEB128. See +-- https://en.wikipedia.org/wiki/LEB128 for a specification. +-- +-- The module provides conversion to and from strict bytestrings. +-- +-- Additionally, to integrate these into your own parsers and serializers, you +-- can use the interfaces based on 'B.Builder' as well as @cereal@'s 'G.Get' +-- and 'P.Put' monad. +-- +-- The decoders in this module will accept over-long representations. +-- Use "Data.Serialize.LEB128" if you need the strict semantics. +-- +-- This code is inspired by Andreas Klebinger's LEB128 implementation in GHC. +module Data.Serialize.LEB128.Lenient + ( + -- * The class of encodable and decodable types + LEB128 + , SLEB128 + -- * Bytestring-based interface + , toLEB128 + , fromLEB128 + , toSLEB128 + , fromSLEB128 + -- * Builder interface + , buildLEB128 + , buildSLEB128 + -- * Cereal interface + , getLEB128 + , getSLEB128 + , putLEB128 + , putSLEB128 + ) where + +import qualified Data.ByteString as BS +import qualified Data.ByteString.Lazy as BSL +import qualified Data.ByteString.Builder as B +import qualified Data.ByteString.Builder.Extra as B +import qualified Data.Serialize.Get as G +import qualified Data.Serialize.Put as P +import Numeric.Natural +import Control.Applicative +import Control.Monad +import Data.Bits +import Data.Word +import Data.Int +import Data.Maybe +import Data.Monoid ((<>)) +import Prelude hiding ((<>)) + +-- | Unsigned number types can be LEB128-encoded +class (Bits a, Num a, Integral a) => LEB128 a where +instance LEB128 Natural +instance LEB128 Word +instance LEB128 Word8 +instance LEB128 Word16 +instance LEB128 Word32 +instance LEB128 Word64 + +-- | Signed number types can be SLEB128-encoded +class (Bits a, Num a, Integral a) => SLEB128 a +instance SLEB128 Integer +instance SLEB128 Int +instance SLEB128 Int8 +instance SLEB128 Int16 +instance SLEB128 Int32 +instance SLEB128 Int64 + +-- | LEB128-encodes a natural number to a strict bytestring +toLEB128 :: LEB128 a => a -> BS.ByteString +toLEB128 = BSL.toStrict . B.toLazyByteStringWith (B.safeStrategy 32 32) BSL.empty . buildLEB128 + +{-# SPECIALIZE toLEB128 :: Natural -> BS.ByteString #-} +{-# SPECIALIZE toLEB128 :: Word -> BS.ByteString #-} +{-# SPECIALIZE toLEB128 :: Word8 -> BS.ByteString #-} +{-# SPECIALIZE toLEB128 :: Word16 -> BS.ByteString #-} +{-# SPECIALIZE toLEB128 :: Word32 -> BS.ByteString #-} +{-# SPECIALIZE toLEB128 :: Word64 -> BS.ByteString #-} + +-- | SLEB128-encodes an integer to a strict bytestring +toSLEB128 :: SLEB128 a => a -> BS.ByteString +toSLEB128 = BSL.toStrict . B.toLazyByteStringWith (B.safeStrategy 32 32) BSL.empty . buildSLEB128 + +{-# SPECIALIZE toSLEB128 :: Integer -> BS.ByteString #-} +{-# SPECIALIZE toSLEB128 :: Int -> BS.ByteString #-} +{-# SPECIALIZE toSLEB128 :: Int8 -> BS.ByteString #-} +{-# SPECIALIZE toSLEB128 :: Int16 -> BS.ByteString #-} +{-# SPECIALIZE toSLEB128 :: Int32 -> BS.ByteString #-} +{-# SPECIALIZE toSLEB128 :: Int64 -> BS.ByteString #-} + +-- | LEB128-encodes a natural number via a builder +buildLEB128 :: LEB128 a => a -> B.Builder +buildLEB128 = go + where + go i + | i <= 127 + = B.word8 (fromIntegral i :: Word8) + | otherwise = + -- bit 7 (8th bit) indicates more to come. + B.word8 (setBit (fromIntegral i) 7) <> go (i `unsafeShiftR` 7) + +{-# SPECIALIZE buildLEB128 :: Natural -> B.Builder #-} +{-# SPECIALIZE buildLEB128 :: Word -> B.Builder #-} +{-# SPECIALIZE buildLEB128 :: Word8 -> B.Builder #-} +{-# SPECIALIZE buildLEB128 :: Word16 -> B.Builder #-} +{-# SPECIALIZE buildLEB128 :: Word32 -> B.Builder #-} +{-# SPECIALIZE buildLEB128 :: Word64 -> B.Builder #-} + +-- This gets inlined for the specialied variants +isFinite :: forall a. Bits a => Bool +isFinite = isJust (bitSizeMaybe (undefined :: a)) + +-- | SLEB128-encodes an integer via a builder +buildSLEB128 :: SLEB128 a => a -> B.Builder +buildSLEB128 val + | val >= -64 && val < 64 = stopByte + | otherwise = goByte <> buildSLEB128 (shiftR val 7) + where + stopByte = B.word8 (fromIntegral $ clearBit val 7) + goByte = B.word8 (fromIntegral $ setBit val 7) + +{-# SPECIALIZE buildSLEB128 :: Integer -> B.Builder #-} +{-# SPECIALIZE buildSLEB128 :: Int -> B.Builder #-} +{-# SPECIALIZE buildSLEB128 :: Int8 -> B.Builder #-} +{-# SPECIALIZE buildSLEB128 :: Int16 -> B.Builder #-} +{-# SPECIALIZE buildSLEB128 :: Int32 -> B.Builder #-} +{-# SPECIALIZE buildSLEB128 :: Int64 -> B.Builder #-} + +-- | LEB128-encodes a natural number in @cereal@'s 'P.Put' monad +putLEB128 :: LEB128 a => P.Putter a +putLEB128 = P.putBuilder . buildLEB128 +{-# INLINE putLEB128 #-} + +-- | SLEB128-encodes an integer in @cereal@'s 'P.Put' monad +putSLEB128 :: SLEB128 a => P.Putter a +putSLEB128 = P.putBuilder . buildSLEB128 +{-# INLINE putSLEB128 #-} + +-- | LEB128-decodes a natural number from a strict bytestring +fromLEB128 :: LEB128 a => BS.ByteString -> Either String a +fromLEB128 = runComplete getLEB128 +{-# INLINE fromLEB128 #-} + +-- | SLEB128-decodes an integer from a strict bytestring +fromSLEB128 :: SLEB128 a => BS.ByteString -> Either String a +fromSLEB128 = runComplete getSLEB128 +{-# INLINE fromSLEB128 #-} + +runComplete :: G.Get a -> BS.ByteString -> Either String a +runComplete p bs = do + (x,r) <- G.runGetState p bs 0 + unless (BS.null r) $ Left "extra bytes in input" + return x + +-- | LEB128-decodes a natural number via @cereal@ +getLEB128 :: forall a. LEB128 a => G.Get a +getLEB128 = G.label "LEB128" $ go 0 0 + where + go :: Int -> a -> G.Get a + go !shift !w = do + byte <- G.getWord8 <|> fail "short encoding" + let !byteVal = fromIntegral (clearBit byte 7) + case bitSizeMaybe w of + Just bs | shift > bs -> fail "overflow" + _ -> return () + when (isFinite @a) $ + unless (byteVal `unsafeShiftL` shift `unsafeShiftR` shift == byteVal) $ + fail "overflow" + let !val = w .|. (byteVal `unsafeShiftL` shift) + let !shift' = shift+7 + if hasMore byte + then go shift' val + else return $! val + + hasMore b = testBit b 7 + +{-# SPECIALIZE getLEB128 :: G.Get Natural #-} +{-# SPECIALIZE getLEB128 :: G.Get Word #-} +{-# SPECIALIZE getLEB128 :: G.Get Word8 #-} +{-# SPECIALIZE getLEB128 :: G.Get Word16 #-} +{-# SPECIALIZE getLEB128 :: G.Get Word32 #-} +{-# SPECIALIZE getLEB128 :: G.Get Word64 #-} + +-- | SLEB128-decodes an integer via @cereal@ +getSLEB128 :: forall a. SLEB128 a => G.Get a +getSLEB128 = G.label "SLEB128" $ go 0 0 0 + where + go :: Word8 -> Int -> a -> G.Get a + go !prev !shift !w = do + byte <- G.getWord8 <|> fail "short encoding" + let !byteVal = fromIntegral (clearBit byte 7) + case bitSizeMaybe w of + Just bs | shift > bs -> fail "overflow" + _ -> return () + when (isFinite @a) $ + unless ((byteVal `unsafeShiftL` shift `unsafeShiftR` shift) .&. 0x7f == byteVal) $ + fail "overflow" + let !val = w .|. (byteVal `unsafeShiftL` shift) + let !shift' = shift+7 + if hasMore byte + then go byte shift' val + else if signed byte + then return $! val - bit shift' + else return $! val + + hasMore b = testBit b 7 + signed b = testBit b 6 + +{-# SPECIALIZE getSLEB128 :: G.Get Integer #-} +{-# SPECIALIZE getSLEB128 :: G.Get Int #-} +{-# SPECIALIZE getSLEB128 :: G.Get Int8 #-} +{-# SPECIALIZE getSLEB128 :: G.Get Int16 #-} +{-# SPECIALIZE getSLEB128 :: G.Get Int32 #-} +{-# SPECIALIZE getSLEB128 :: G.Get Int64 #-} |