summaryrefslogtreecommitdiff
path: root/src/Language/ECMAScript3/Syntax.hs
blob: 7ed14488711832113f048ee22e2741daa88d233a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
-- |ECMAScript 3 syntax. /Spec/ refers to the ECMA-262 specification,
-- 3rd edition.
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveTraversable #-}
{-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Language.ECMAScript3.Syntax (JavaScript(..)
                                   ,unJavaScript
                                   ,Statement(..)
                                   ,isIterationStmt
                                   ,CaseClause(..)
                                   ,CatchClause(..)
                                   ,ForInit(..)
                                   ,ForInInit(..)
                                   ,VarDecl(..)
                                   ,Expression(..)
                                   ,InfixOp(..)
                                   ,AssignOp(..)
                                   ,Id(..)
                                   ,unId
                                   ,PrefixOp(..)
                                   ,Prop(..)
                                   ,UnaryAssignOp(..)
                                   ,LValue (..)
                                   ,SourcePos
                                   ,isValid
                                   ,isValidIdentifier
                                   ,isValidIdentifierName
                                   ,EnclosingStatement(..)
                                   ,pushLabel
                                   ,pushEnclosing
                                   ,HasLabelSet (..)
                                   ,isIter
                                   ,isIterSwitch
                                   ) where

import Text.Parsec.Pos(initialPos,SourcePos) -- used by data JavaScript
import Data.Data (Data)
import Data.Typeable (Typeable)
import Data.Foldable (Foldable)
import Data.Traversable (Traversable)
import Data.Default.Class
import Data.Generics.Uniplate.Data
import Data.Char
import Control.Monad.State
import Control.Arrow

data JavaScript a
  -- |A script in \<script\> ... \</script\> tags.
  = Script a [Statement a] 
  deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable)

instance Default a => Default (JavaScript a) where
  def = Script def []

-- | extracts statements from a JavaScript type
unJavaScript :: JavaScript a -> [Statement a]
unJavaScript (Script _ stmts) = stmts

instance Default SourcePos where
  def = initialPos ""

data Id a = Id a String 
          deriving (Show,Eq,Ord,Data,Typeable,Functor,Foldable,Traversable)

unId :: Id a -> String
unId (Id _ s) = s

-- | Infix operators: see spec 11.5-11.11
data InfixOp = OpLT -- ^ @<@
             | OpLEq -- ^ @<=@
             | OpGT -- ^ @>@
             | OpGEq -- ^ @>=@
             | OpIn -- ^ @in@
             | OpInstanceof -- ^ @instanceof@
             | OpEq -- ^ @==@
             | OpNEq -- ^ @!=@
             | OpStrictEq -- ^ @===@
             | OpStrictNEq -- ^ @!===@
             | OpLAnd -- ^ @&&@
             | OpLOr -- ^ @||@
             | OpMul -- ^ @*@
             | OpDiv -- ^ @/@
             | OpMod -- ^ @%@
             | OpSub -- ^ @-@
             | OpLShift -- ^ @<<@
             | OpSpRShift -- ^ @>>@
             | OpZfRShift -- ^ @>>>@
             | OpBAnd -- ^ @&@
             | OpBXor -- ^ @^@
             | OpBOr -- ^ @|@
             | OpAdd -- ^ @+@
    deriving (Show,Data,Typeable,Eq,Ord,Enum)

-- | Assignment operators: see spec 11.13
data AssignOp = OpAssign -- ^ simple assignment, @=@
              | OpAssignAdd -- ^ @+=@
              | OpAssignSub -- ^ @-=@
              | OpAssignMul -- ^ @*=@
              | OpAssignDiv -- ^ @/=@
              | OpAssignMod -- ^ @%=@
              | OpAssignLShift -- ^ @<<=@
              | OpAssignSpRShift -- ^ @>>=@
              | OpAssignZfRShift -- ^ @>>>=@
              | OpAssignBAnd -- ^ @&=@
              | OpAssignBXor -- ^ @^=@
              | OpAssignBOr -- ^ @|=@
  deriving (Show,Data,Typeable,Eq,Ord)

-- | Unary assignment operators: see spec 11.3, 11.4.4, 11.4.5
data UnaryAssignOp = PrefixInc -- ^ @++x@
                   | PrefixDec -- ^ @--x@
                   | PostfixInc -- ^ @x++@
                   | PostfixDec -- ^ @x--@
  deriving (Show, Data, Typeable, Eq, Ord)

-- | Prefix operators: see spec 11.4 (excluding 11.4.4, 11.4.5)
data PrefixOp = PrefixLNot -- ^ @!@
              | PrefixBNot -- ^ @~@
              | PrefixPlus -- ^ @+@
              | PrefixMinus -- ^ @-@
              | PrefixTypeof -- ^ @typeof@
              | PrefixVoid -- ^ @void@
              | PrefixDelete -- ^ @delete@
  deriving (Show,Data,Typeable,Eq,Ord)

-- | Property names in an object initializer: see spec 11.1.5
data Prop a = PropId a (Id a) -- ^ property name is an identifier, @foo@
            | PropString a String -- ^ property name is a string, @\"foo\"@
            | PropNum a Integer -- ^ property name is an integer, @42@
  deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable)
 
-- | Left-hand side expressions: see spec 11.2
data LValue a
  = LVar a String -- ^ variable reference, @foo@
  | LDot a (Expression a) String -- ^ @foo.bar@
  | LBracket a (Expression a) (Expression a) -- ^ @foo[bar]@
  deriving (Show, Eq, Ord, Data, Typeable, Functor,Foldable,Traversable) 

-- | Expressions, see spec 11
data Expression a
  = StringLit a String -- ^ @\"foo\"@, spec 11.1.3, 7.8
  | RegexpLit a String Bool Bool 
    -- ^ @RegexpLit a regexp global?  case_insensitive?@ -- regular
    -- expression, see spec 11.1.3, 7.8
  | NumLit a Double -- ^ @41.99999@, spec 11.1.3, 7.8
  | IntLit a Int -- ^ @42@, spec 11.1.3, 7.8
  | BoolLit a Bool -- ^ @true@, spec 11.1.3, 7.8
  | NullLit a -- ^ @null@, spec 11.1.3, 7.8
  | ArrayLit a [Expression a] -- ^ @[1,2,3]@, spec 11.1.4
  | ObjectLit a [(Prop a, Expression a)] -- ^ @{foo:\"bar\", baz: 42}@, spec 11.1.5
  | ThisRef a -- ^ @this@, spec 11.1.1
  | VarRef a (Id a) -- ^ @foo@, spec 11.1.2
  | DotRef a (Expression a) (Id a) -- ^ @foo.bar@, spec 11.2.1
  | BracketRef a (Expression a) {- container -} (Expression a) {- key -} 
    -- ^ @foo[bar@, spec 11.2.1
  | NewExpr a (Expression a) {- constructor -} [Expression a] 
    -- ^ @new foo(bar)@, spec 11.2.2
  | PrefixExpr a PrefixOp (Expression a) 
    -- ^ @\@e@, spec 11.4 (excluding 11.4.4, 111.4.5)
  | UnaryAssignExpr a UnaryAssignOp (LValue a) 
    -- ^ @++x@, @x--@ etc., spec 11.3, 11.4.4, 11.4.5
  | InfixExpr a InfixOp (Expression a) (Expression a) 
    -- ^ @e1\@e2@, spec 11.5, 11.6, 11.7, 11.8, 11.9, 11.10, 11.11
  | CondExpr a (Expression a) (Expression a) (Expression a)
    -- ^ @e1 ? e2 : e3@, spec 11.12
  | AssignExpr a AssignOp (LValue a) (Expression a)
    -- ^ @e1 \@=e2@, spec 11.13
  | ListExpr a [Expression a] -- ^ @e1, e2@, spec 11.14
  | CallExpr a (Expression a) [Expression a] -- ^ @f(x,y,z)@, spec 11.2.3
  --funcexprs are optionally named
  | FuncExpr a (Maybe (Id a)) [Id a] [Statement a]
    -- ^ @function f (x,y,z) {...}@, spec 11.2.5, 13
  deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable)

-- | Case clauses, spec 12.11
data CaseClause a = CaseClause a (Expression a) [Statement a]
                    -- ^ @case e: stmts;@
                  | CaseDefault a [Statement a]
                    -- ^ @default: stmts;@
  deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable)

-- | Catch clause, spec 12.14
data CatchClause a = CatchClause a (Id a) (Statement a) 
                     -- ^ @catch (x) {...}@
  deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable)

-- | A variable declaration, spec 12.2
data VarDecl a = VarDecl a (Id a) (Maybe (Expression a)) 
                 -- ^ @var x = e;@
  deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable)
  
-- | for initializer, spec 12.6
data ForInit a = NoInit -- ^ empty
               | VarInit [VarDecl a] -- ^ @var x, y=42@
               | ExprInit (Expression a) -- ^ @expr@
  deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable)

-- | for..in initializer, spec 12.6
data ForInInit a = ForInVar (Id a) -- ^ @var x@
                 | ForInLVal (LValue a) -- ^ @foo.baz@, @foo[bar]@, @z@
 deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable)
  
-- | Statements, spec 12.
data Statement a 
  = BlockStmt a [Statement a] -- ^ @{stmts}@, spec 12.1
  | EmptyStmt a -- ^ @;@, spec 12.3
  | ExprStmt a (Expression a) -- ^ @expr;@, spec 12.4
  | IfStmt a (Expression a) (Statement a) (Statement a) 
    -- ^ @if (e) stmt@, spec 12.5
  | IfSingleStmt a (Expression a) (Statement a)
    -- ^ @if (e) stmt1 else stmt2@, spec 12.5
  | SwitchStmt a (Expression a) [CaseClause a]
    -- ^ @switch (e) clauses@, spec 12.11
  | WhileStmt a (Expression a) (Statement a)
    -- ^ @while (e) do stmt@, spec 12.6
  | DoWhileStmt a (Statement a) (Expression a)
    -- ^ @do stmt while (e);@, spec 12.6
  | BreakStmt a (Maybe (Id a)) -- ^ @break lab;@, spec 12.8
  | ContinueStmt a (Maybe (Id a)) -- ^ @continue lab;@, spec 12.7
  | LabelledStmt a (Id a) (Statement a) -- ^ @lab: stmt@, spec 12.12
  | ForInStmt a (ForInInit a) (Expression a) (Statement a) 
    -- ^ @for (x in o) stmt@, spec 12.6
  | ForStmt a (ForInit a)        
              (Maybe (Expression a)) -- test
              (Maybe (Expression a)) -- increment
              (Statement a)          -- body 
    -- ^ @ForStmt a init test increment body@, @for (init; test,
    -- increment) body@, spec 12.6
  | TryStmt a (Statement a) {-body-} (Maybe (CatchClause a))
      (Maybe (Statement a)) {-finally-}
    -- ^ @try stmt catch(x) stmt finally stmt@, spec 12.14
  | ThrowStmt a (Expression a)
    -- ^ @throw expr;@, spec 12.13
  | ReturnStmt a (Maybe (Expression a))
    -- ^ @return expr;@, spec 12.9
  | WithStmt a (Expression a) (Statement a)
    -- ^ @with (o) stmt@, spec 12.10
  | VarDeclStmt a [VarDecl a]
    -- ^ @var x, y=42;@, spec 12.2
  | FunctionStmt a (Id a) {-name-} [Id a] {-args-} [Statement a] {-body-}
    -- ^ @function f(x, y, z) {...}@, spec 13
  deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable)  

-- | Returns 'True' if the statement is an /IterationStatement/
-- according to spec 12.6.
isIterationStmt :: Statement a -> Bool
isIterationStmt s = case s of
  WhileStmt {}   -> True
  DoWhileStmt {} -> True
  ForStmt {} -> True
  ForInStmt {} -> True
  _                 -> False
  
-- | The ECMAScript standard defines certain syntactic restrictions on
-- programs (or, more precisely, statements) that aren't easily
-- enforced in the AST datatype. These restrictions have to do with
-- labeled statements and break/continue statement, as well as
-- identifier names. Thus, it is possible to manually generate AST's
-- that correspond to syntactically incorrect programs. Use this
-- predicate to check if an 'JavaScript' AST corresponds to a
-- syntactically correct ECMAScript program.
isValid :: forall a. (Data a, Typeable a) => JavaScript a -> Bool
-- =From ECMA-262-3=
-- A program is considered syntactically incorrect if either of the
-- following is true:
-- * The program contains a continue statement without the optional
-- Identifier, which is not nested, directly or indirectly (but not
-- crossing function boundaries), within an IterationStatement.
-- * The program contains a continue statement with the optional
-- Identifier, where Identifier does not appear in the label set of an
-- enclosing (but not crossing function boundaries) IterationStatement.
-- * The program contains a break statement without the optional
-- Identifier, which is not nested, directly or indirectly (but not
-- crossing function boundaries), within an IterationStatement or a
-- SwitchStatement.
-- * The program contains a break statement with the optional
-- Identifier, where Identifier does not appear in the label set of an
-- enclosing (but not crossing function boundaries) Statement.
-- * The program contains a LabelledStatement that is enclosed by a
-- LabelledStatement with the same Identifier as label. This does not
-- apply to labels appearing within the body of a FunctionDeclaration
-- that is nested, directly or indirectly, within a labelled
-- statement.
-- * The identifiers should be valid. See spec 7.6
isValid js = checkIdentifiers js && checkBreakContinueLabels js
  where checkIdentifiers :: (Data a, Typeable a) => JavaScript a -> Bool
        checkIdentifiers js =
          and $ map isValidIdentifierName $
          [n | (Id _ n) :: Id a <- universeBi js] ++
          [n | (LVar _ n) :: LValue a <- universeBi js] ++
          [n | (LDot _ _ n) :: LValue a <- universeBi js]
        checkBreakContinueLabels js@(Script _ body) = and $ map checkStmt $
           body ++ concat ([body | FuncExpr _ _ _ body <- universeBi js] ++
                           [body | FunctionStmt _ _ _ body <- universeBi js])

checkStmt :: Statement a -> Bool
checkStmt s = evalState (checkStmtM s) ([], [])

checkStmtM :: Statement a -> State ([Label], [EnclosingStatement]) Bool
checkStmtM stmt = case stmt of
  ContinueStmt a mlab -> do
    encls <- gets snd
    let enIts = filter isIter encls
    return $ case mlab of
      Nothing  -> not $ null enIts
      Just lab -> any (elem (unId lab) . getLabelSet) enIts
  BreakStmt a mlab -> do
    encls <- gets snd
    return $ case mlab of
      Nothing  -> any isIterSwitch encls
      Just lab -> any (elem (unId lab) . getLabelSet) encls
  LabelledStmt _ lab s -> do
    labs <- gets fst
    if (unId lab) `elem` labs then return False
      else pushLabel lab $ checkStmtM s
  WhileStmt _ _ s   -> iterCommon s
  DoWhileStmt _ s _ -> iterCommon s
  ForStmt _ _ _ _ s -> iterCommon s
  ForInStmt _ _ _ s -> iterCommon s
  SwitchStmt _ _ cs -> pushEnclosing EnclosingSwitch $ liftM and $ mapM checkCaseM cs
  BlockStmt _ ss -> pushEnclosing EnclosingOther $ liftM and $ mapM checkStmtM ss
  IfStmt _ _ t e -> liftM2 (&&) (checkStmtM t) (checkStmtM e)
  IfSingleStmt _ _ t -> checkStmtM t
  TryStmt _ body mcatch mfinally -> liftM2 (&&) (checkStmtM body) $
    liftM2 (&&) (maybe (return True) checkCatchM mcatch)
                (maybe (return True) checkStmtM mfinally)
  WithStmt _ _ body -> checkStmtM body
  _ -> return True

iterCommon s = pushEnclosing EnclosingIter $ checkStmtM s

pushEnclosing :: Monad m => ([Label] -> EnclosingStatement)
              -> StateT ([Label], [EnclosingStatement]) m a
              -> StateT ([Label], [EnclosingStatement]) m a
pushEnclosing ctor = bracketState (\(labs, encls) -> ([], ctor labs:encls))

pushLabel :: Monad m => Id b -> StateT ([Label], [EnclosingStatement]) m a
          -> StateT ([Label], [EnclosingStatement]) m a
pushLabel l = bracketState (first (unId l:))

checkCaseM c = let ss = case c of
                     CaseClause _ _ body -> body
                     CaseDefault _ body -> body
               in liftM and $ mapM checkStmtM ss

checkCatchM (CatchClause _ _ body) = checkStmtM body

bracketState :: Monad m => (s -> s) -> StateT s m a -> StateT s m a
bracketState f m = do original <- get
                      modify f
                      rv <- m
                      put original
                      return rv

-- | Checks if an identifier name is valid according to the spec
isValidIdentifier :: Id a -> Bool
isValidIdentifier (Id _ name) = isValidIdentifierName name

isValidIdentifierName :: String -> Bool
isValidIdentifierName name = (not $ null name) && name `notElem` reservedWords
  where reservedWords = keyword ++ futureReservedWord ++ nullKw ++ boolLit
        keyword = ["break", "case", "catch", "continue", "default", "delete"
                  ,"do", "else", "finally", "for", "function", "if", "in"
                  ,"instanceof", "new", "return", "switch", "this", "throw"
                  ,"try", "typeof", "var", "void", "while", "with"]
        futureReservedWord = ["abstract", "boolean", "byte", "char", "class"
                             ,"const", "debugger", "enum", "export", "extends"
                             ,"final", "float", "goto", "implements", "int"
                             ,"interface", "long", "native", "package", "private"
                             ,"protected", "short", "static", "super"
                             ,"synchronized", "throws", "transient", "volatile"]
        nullKw = ["null"]
        boolLit = ["true", "false"]
          
data EnclosingStatement = EnclosingIter [Label]
                          -- ^ The enclosing statement is an iteration statement
                        | EnclosingSwitch [Label]
                          -- ^ The enclosing statement is a switch statement
                        | EnclosingOther [Label]
                          -- ^ The enclosing statement is some other
                          -- statement.  Note, `EnclosingOther` is
                          -- never pushed if the current `labelSet` is
                          -- empty, so the list of labels in this
                          -- constructor should always be non-empty

instance Show EnclosingStatement where
  show (EnclosingIter ls) = "iteration" ++ show ls
  show (EnclosingSwitch ls) = "switch" ++ show ls
  show (EnclosingOther ls) = "statement" ++ show ls

isIter :: EnclosingStatement -> Bool
isIter (EnclosingIter _) = True
isIter _                 = False

isIterSwitch :: EnclosingStatement -> Bool
isIterSwitch (EnclosingIter _)   = True
isIterSwitch (EnclosingSwitch _) = True
isIterSwitch _                   = False

class HasLabelSet a where
  getLabelSet :: a -> [Label]
  setLabelSet :: [Label] -> a -> a

modifyLabelSet :: HasLabelSet a => ([Label] -> [Label]) -> a -> a
modifyLabelSet f a = setLabelSet (f $ getLabelSet a) a

instance HasLabelSet EnclosingStatement where
  getLabelSet e = case e of
    EnclosingIter ls   -> ls
    EnclosingSwitch ls -> ls
    EnclosingOther ls  -> ls
  setLabelSet ls e = case e of
    EnclosingIter _   -> EnclosingIter ls
    EnclosingSwitch _ -> EnclosingSwitch ls
    EnclosingOther _  -> EnclosingOther ls

type Label = String