summaryrefslogtreecommitdiff
path: root/Text/ProtocolBuffers/Basic.hs
blob: 3a405e59017cbd2c0576e00e10e9021dbe857691 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
{-# LANGUAGE DeriveDataTypeable,GeneralizedNewtypeDeriving,CPP #-}
-- | "Text.ProtocolBuffers.Basic" defines or re-exports most of the
-- basic field types; 'Maybe','Bool', 'Double', and 'Float' come from
-- the Prelude instead. This module also defines the 'Mergeable' and
-- 'Default' classes. The 'Wire' class is not defined here to avoid orphans.
module Text.ProtocolBuffers.Basic
  ( -- * Basic types for protocol buffer fields in Haskell
    Double,Float,Bool,Maybe,Seq,Utf8(Utf8),ByteString,Int32,Int64,Word32,Word64
    -- * Haskell types that act in the place of DescritorProto values
  , WireTag(..),FieldId(..),WireType(..),FieldType(..),EnumCode(..),WireSize
    -- * Some of the type classes implemented messages and fields
  , Mergeable(..),Default(..) -- ,Wire(..)
  , isValidUTF8, toUtf8, utf8, uToString, uFromString
  ) where

import Data.Aeson
import Data.Bits(Bits)
import Data.ByteString.Lazy(ByteString)
import Data.Foldable as F(Foldable(foldl))
import Data.Generics(Data(..))
import Data.Int(Int32,Int64)
import Data.Ix(Ix)
import Data.Semigroup (Semigroup(..))
import Data.Sequence(Seq,(><))
import Data.Typeable(Typeable)
import Data.Word(Word8,Word32,Word64)

import qualified Data.ByteString.Lazy as L(unpack)
import Data.ByteString.Lazy.UTF8 as U (toString,fromString)
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL

-- Num instances are derived below for the purpose of getting fromInteger for case matching

-- | 'Utf8' is used to mark 'ByteString' values that (should) contain
-- valid utf8 encoded strings.  This type is used to represent
-- 'TYPE_STRING' values.
newtype Utf8 = Utf8 ByteString deriving (Data,Typeable,Eq,Ord)

utf8 :: Utf8 -> ByteString
utf8 (Utf8 bs) = bs

instance Read Utf8 where
  readsPrec d xs = let r :: Int -> ReadS String
                       r = readsPrec
                       f :: (String,String) -> (Utf8,String)
                       f (a,b) = (Utf8 (U.fromString a),b)
                   in map f . r d $ xs

instance Show Utf8 where
  showsPrec d (Utf8 bs) = let s :: Int -> String -> ShowS
                              s = showsPrec
                          in s d (U.toString bs)

instance Semigroup Utf8 where
  (<>) (Utf8 x) (Utf8 y) = Utf8 (x <> y)

instance Monoid Utf8 where
  mempty = Utf8 mempty
  mappend = (<>)

instance ToJSON Utf8 where
    toJSON (Utf8 t) = toJSON (TL.decodeUtf8 t)

instance FromJSON Utf8 where
    parseJSON value =
        case value of
          String t -> return . Utf8 . TL.encodeUtf8 . TL.fromStrict $ t
          _ -> fail ("Value " ++ show value ++ " is not a UTF-8 string")

-- | 'WireTag' is the 32 bit value with the upper 29 bits being the
-- 'FieldId' and the lower 3 bits being the 'WireType'
newtype WireTag = WireTag { getWireTag :: Word32 } -- bit concatenation of FieldId and WireType
  deriving (Eq,Ord,Enum,Read,Show,Num,Bits,Bounded,Data,Typeable)

-- | 'FieldId' is the field number which can be in the range 1 to
-- 2^29-1 but the value from 19000 to 19999 are forbidden (so sayeth
-- Google).
newtype FieldId = FieldId { getFieldId :: Int32 } -- really 29 bits
  deriving (Eq,Ord,Enum,Read,Show,Num,Data,Typeable,Ix)

-- Note that values 19000-19999 are forbidden for FieldId
instance Bounded FieldId where
  minBound = 1
  maxBound = 536870911 -- 2^29-1

-- | 'WireType' is the 3 bit wire encoding value, and is currently in
-- the range 0 to 5, leaving 6 and 7 currently invalid.
--
-- * 0 /Varint/ : int32, int64, uint32, uint64, sint32, sint64, bool, enum
--
-- * 1 /64-bit/ : fixed64, sfixed64, double
--
-- * 2 /Length-delimited/ : string, bytes, embedded messages
--
-- * 3 /Start group/ : groups (deprecated)
--
-- * 4 /End group/ : groups (deprecated)
--
-- * 5 /32-bit/ : fixed32, sfixed32, float
--
newtype WireType = WireType { getWireType :: Word32 }    -- really 3 bits
  deriving (Eq,Ord,Enum,Read,Show,Num,Data,Typeable)

instance Bounded WireType where
  minBound = 0
  maxBound = 5

{- | 'FieldType' is the integer associated with the
  FieldDescriptorProto's Type.  The allowed range is currently 1 to
  18, as shown below (excerpt from descritor.proto)

>    // 0 is reserved for errors.
>    // Order is weird for historical reasons.
>    TYPE_DOUBLE         = 1;
>    TYPE_FLOAT          = 2;
>    TYPE_INT64          = 3;   // Not ZigZag encoded.  Negative numbers
>                               // take 10 bytes.  Use TYPE_SINT64 if negative
>                               // values are likely.
>    TYPE_UINT64         = 4;
>    TYPE_INT32          = 5;   // Not ZigZag encoded.  Negative numbers
>                               // take 10 bytes.  Use TYPE_SINT32 if negative
>                               // values are likely.
>    TYPE_FIXED64        = 6;
>    TYPE_FIXED32        = 7;
>    TYPE_BOOL           = 8;
>    TYPE_STRING         = 9;
>    TYPE_GROUP          = 10;  // Tag-delimited aggregate.
>    TYPE_MESSAGE        = 11;  // Length-delimited aggregate.
>
>    // New in version 2.
>    TYPE_BYTES          = 12;
>    TYPE_UINT32         = 13;
>    TYPE_ENUM           = 14;
>    TYPE_SFIXED32       = 15;
>    TYPE_SFIXED64       = 16;
>    TYPE_SINT32         = 17;  // Uses ZigZag encoding.
>    TYPE_SINT64         = 18;  // Uses ZigZag encoding.

-}

newtype FieldType = FieldType { getFieldType :: Int } -- really [1..18] as fromEnum of Type from Type.hs
  deriving (Eq,Ord,Enum,Read,Show,Num,Data,Typeable)

instance Bounded FieldType where
  minBound = 1
  maxBound = 18

-- | 'EnumCode' is the Int32 assoicated with a
-- EnumValueDescriptorProto and is in the range 0 to 2^31-1.
newtype EnumCode = EnumCode { getEnumCode :: Int32 }  -- really [0..maxBound::Int32] of some .proto defined enumeration
  deriving (Eq,Ord,Read,Show,Num,Data,Typeable)

instance Bounded EnumCode where
  minBound = 0
  maxBound = 2147483647 -- 2^-31 -1

-- | 'WireSize' is the Int64 size type associated with the lazy
-- bytestrings used in the 'Put' and 'Get' monads.
type WireSize = Int64

-- | The 'Mergeable' class is not a 'Monoid', 'mergeEmpty' is not a
-- left or right unit like 'mempty'.  The default 'mergeAppend' is to
-- take the second parameter and discard the first one.  The
-- 'mergeConcat' defaults to @foldl@ associativity.
--
-- NOTE: 'mergeEmpty' has been removed in protocol buffers version 2.
-- Use 'defaultValue' instead.  New strict fields would mean that required
-- fields in messages will be automatic errors with 'mergeEmpty'.
class Default a => Mergeable a where
{-
  -- | The 'mergeEmpty' value of a basic type or a message with
  -- required fields will be undefined and a runtime error to
  -- evaluate.  These are only handy for reading the wire encoding and
  -- users should employ 'defaultValue' instead.
  mergeEmpty :: a
  mergeEmpty = error "You did not define Mergeable.mergeEmpty!"
-}
  -- | 'mergeAppend' is the right-biased merge of two values.  A
  -- message (or group) is merged recursively.  Required field are
  -- always taken from the second message. Optional field values are
  -- taken from the most defined message or the second message if
  -- both are set.  Repeated fields have the sequences concatenated.
  -- Note that strings and bytes are NOT concatenated.
  mergeAppend :: a -> a -> a
  mergeAppend _a b = b

  -- | 'mergeConcat' is @ F.foldl mergeAppend defaultValue @ and this
  -- default definition is not overridden in any of the code except
  -- for the (Seq a) instance.
  mergeConcat :: F.Foldable t => t a -> a
  mergeConcat = F.foldl mergeAppend defaultValue

-- | The Default class has the default-default values of types.  See
-- <http://code.google.com/apis/protocolbuffers/docs/proto.html#optional>
-- and also note that 'Enum' types have a 'defaultValue' that is the
-- first one in the @.proto@ file (there is always at least one
-- value).  Instances of this for messages hold any default value
-- defined in the @.proto@ file.  'defaultValue' is where the
-- 'MessageAPI' function 'getVal' looks when an optional field is not
-- set.
class Default a where
  -- | The 'defaultValue' is never undefined or an error to evalute.
  -- This makes it much more useful compared to 'mergeEmpty'. In a
  -- default message all Optional field values are set to 'Nothing'
  -- and Repeated field values are empty.
  defaultValue :: a

-- Returns Nothing if valid, and the position of the error if invalid
isValidUTF8 :: ByteString -> Maybe Int
isValidUTF8 bs = go 0 (L.unpack bs) 0 where
  go :: Int -> [Word8] -> Int -> Maybe Int
  go 0 [] _ = Nothing
  go 0 (x:xs) n | x <= 127 = go 0 xs $! succ n -- binary 01111111
                | x <= 193 = Just n            -- binary 11000001, decodes to <=127, should not be here
                | x <= 223 = go 1 xs $! succ n -- binary 11011111
                | x <= 239 = go 2 xs $! succ n -- binary 11101111
                | x <= 243 = go 3 xs $! succ n -- binary 11110011
                | x == 244 = high xs $! succ n -- binary 11110100
                | otherwise = Just n
  go i (x:xs) n | 128 <= x && x <= 191 = go (pred i) xs $! succ n
  go _ _ n = Just n
  -- leading 3 bits are 100, so next 6 are at most 001111, i.e. 10001111
  high (x:xs) n | 128 <= x && x <= 143 = go 2 xs $! succ n
                | otherwise = Just n
  high [] n = Just n

toUtf8 :: ByteString -> Either Int Utf8
toUtf8 bs = maybe (Right (Utf8 bs)) Left (isValidUTF8 bs)

uToString :: Utf8 -> String
uToString (Utf8 bs) = U.toString bs

uFromString :: String -> Utf8
uFromString s = Utf8 (U.fromString s)


-- Base types are not very mergeable, but their Maybe and Seq versions are:
instance Mergeable a => Mergeable (Maybe a) where
--    mergeEmpty = Nothing
    mergeAppend = mayMerge

{-# INLINE mayMerge #-}
mayMerge :: (Mergeable b) => Maybe b -> Maybe b -> Maybe b
mayMerge Nothing  y        = y
mayMerge x        Nothing  = x
mayMerge (Just x) (Just y) = Just (mergeAppend x y)

instance Mergeable (Seq a) where
--    mergeEmpty = empty
    mergeAppend = (><)

-- These all have errors as mergeEmpty and use the second paramater for mergeAppend
instance Mergeable Bool
instance Mergeable Utf8
instance Mergeable ByteString
instance Mergeable Double
instance Mergeable Float
instance Mergeable Int32
instance Mergeable Int64
instance Mergeable Word32
instance Mergeable Word64

instance Default Word64 where defaultValue = 0
instance Default Word32 where defaultValue = 0
instance Default Int64 where defaultValue = 0
instance Default Int32 where defaultValue = 0
instance Default Float where defaultValue = 0
instance Default Double where defaultValue = 0
instance Default Bool where defaultValue = False
instance Default (Maybe a) where defaultValue = Nothing
instance Default (Seq a) where defaultValue = mempty
instance Default ByteString where defaultValue = mempty
instance Default Utf8 where defaultValue = mempty