diff options
author | AndreyChudnov <> | 2012-08-04 18:46:48 (GMT) |
---|---|---|
committer | hdiff <hdiff@luite.com> | 2012-08-04 18:46:48 (GMT) |
commit | 406c97fe855a778acd2e025b6ce6df9a2bf862a8 (patch) | |
tree | 28a8fa4f25aa6da94edca37657603dad8e0d4f1c |
version 0.90.9
-rw-r--r-- | LICENSE | 28 | ||||
-rw-r--r-- | Setup.hs | 2 | ||||
-rw-r--r-- | language-ecmascript.cabal | 79 | ||||
-rw-r--r-- | src/Language/ECMAScript3.hs | 12 | ||||
-rw-r--r-- | src/Language/ECMAScript3/Analysis/Environment.hs | 156 | ||||
-rw-r--r-- | src/Language/ECMAScript3/Analysis/LabelSets.hs | 88 | ||||
-rw-r--r-- | src/Language/ECMAScript3/Lexer.hs | 65 | ||||
-rw-r--r-- | src/Language/ECMAScript3/Parser.hs | 759 | ||||
-rw-r--r-- | src/Language/ECMAScript3/PrettyPrint.hs | 231 | ||||
-rw-r--r-- | src/Language/ECMAScript3/Syntax.hs | 236 | ||||
-rw-r--r-- | src/Language/ECMAScript3/Syntax/Annotations.hs | 115 | ||||
-rw-r--r-- | src/UnitTest.hs | 31 |
12 files changed, 1802 insertions, 0 deletions
@@ -0,0 +1,28 @@ +Copyright (c) 2007--2012, Brown University, 2008-2012 Claudiu Saftoiu, +2012 Stevens Institute of Technology. +All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Brown University, Stevens Institute of Technology + nor the names of its contributors may be used to endorse or promote + products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/Setup.hs b/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/language-ecmascript.cabal b/language-ecmascript.cabal new file mode 100644 index 0000000..07c4857 --- /dev/null +++ b/language-ecmascript.cabal @@ -0,0 +1,79 @@ +Name: language-ecmascript +Version: 0.9 +Cabal-Version: >= 1.10 +Copyright: (c) 2007-2012 Brown University, (c) 2008-2010 Claudiu Saftoiu, + (c) 2012 Stevens Institute of Technology +License: BSD3 +License-file: LICENSE +Author: Andrey Chudnov, Arjun Guha, Spiridon Aristides Eliopoulos, + Joe Gibbs Politz, Claudiu Saftoiu +Maintainer: Andrey Chudnov <oss@chudnov.com> +Homepage: http://github.com/jswebtools/language-ecmascript +Bug-reports: http://github.com/jswebtools/language-ecmascript/issues +Stability: provisional +Tested-with: GHC==7.0.4, GHC==7.4.1 +Category: Language +Build-Type: Simple +Synopsis: JavaScript analysis tools +Description: + Some tools for working with ECMAScript 3 (popularly known as JavaScript). + Includes a parser, pretty-printer, and basic building blocks for more + sophisticated tools. + + This package supercedes package WebBits. + +Source-repository head + type: git + location: git://github.com/jswebtools/language-ecmascript.git + +Source-repository this + type: git + location: git://github.com/jswebtools/language-ecmascript.git + tag: 0.9 + +Library + Hs-Source-Dirs: + src + Build-Depends: + base >= 4 && < 5, + mtl >= 1.1.0.1, + parsec < 3.2.0, + pretty >= 0.1, + containers >= 0.1, + syb >= 0.1, + uniplate >= 1.6 && <1.7, + data-default >= 0.4 && <0.5 + ghc-options: + -fwarn-incomplete-patterns + Exposed-Modules: + Language.ECMAScript3 + Language.ECMAScript3.Lexer + Language.ECMAScript3.Parser + Language.ECMAScript3.PrettyPrint + Language.ECMAScript3.Syntax + Language.ECMAScript3.Syntax.Annotations + Language.ECMAScript3.Analysis.Environment + Language.ECMAScript3.Analysis.LabelSets + Default-Extensions: + DeriveDataTypeable, ScopedTypeVariables, DeriveFunctor, DeriveFoldable, DeriveTraversable, FlexibleContexts + Default-Language: Haskell2010 + +Test-Suite unittest + Hs-Source-Dirs: src + Type: exitcode-stdio-1.0 + Main-Is: UnitTest.hs + Build-Depends: + base >= 4 && < 5, + mtl >= 1.1.0.1, + parsec < 3.2.0, + pretty >= 0.1, + containers >= 0.1, + syb >= 0.1, + directory, + filepath, + HUnit, + data-default >=0.4 && <0.5 + Default-Extensions: DeriveDataTypeable, ScopedTypeVariables, DeriveFunctor, DeriveFoldable, DeriveTraversable, FlexibleContexts + Default-Language: Haskell2010 + ghc-options: + -fwarn-incomplete-patterns diff --git a/src/Language/ECMAScript3.hs b/src/Language/ECMAScript3.hs new file mode 100644 index 0000000..77a6917 --- /dev/null +++ b/src/Language/ECMAScript3.hs @@ -0,0 +1,12 @@ +-- |Re-exports commonly used modules. +module Language.ECMAScript3 + ( module Language.ECMAScript3.Syntax + , module Language.ECMAScript3.Parser + , renderStatements + , renderExpression + ) where + + +import Language.ECMAScript3.Syntax +import Language.ECMAScript3.Parser +import Language.ECMAScript3.PrettyPrint diff --git a/src/Language/ECMAScript3/Analysis/Environment.hs b/src/Language/ECMAScript3/Analysis/Environment.hs new file mode 100644 index 0000000..c845203 --- /dev/null +++ b/src/Language/ECMAScript3/Analysis/Environment.hs @@ -0,0 +1,156 @@ +-- | A lexical environment analysis of ECMAScript programs + +module Language.ECMAScript3.Analysis.Environment + ( env + , localVars + , EnvTree (..) + ) where + +import Data.List +import Data.Maybe +import qualified Data.Map as M +import Data.Map (Map) +import qualified Data.Set as S +import Data.Set (Set) +import Text.ParserCombinators.Parsec.Pos (SourcePos) + +import Language.ECMAScript3.Syntax + +-- | Intermediate data structure that contains locally declared names and +-- all references to identifers. +data Partial = Partial { + partialLocals :: M.Map String SourcePos, + partialReferences :: M.Map String SourcePos, + partialNested :: [Partial] +} + +empty :: Partial +empty = Partial M.empty M.empty [] + +ref :: Id SourcePos -> Partial +ref (Id p v) = Partial M.empty (M.singleton v p) [] + +decl :: Id SourcePos -> Partial +decl (Id p v) = Partial (M.singleton v p) M.empty [] + +nest :: Partial -> Partial +nest partial = Partial M.empty M.empty [partial] + +-- Combine partial results from the same lexical scope. +unions :: [Partial] -> Partial +unions ps = Partial (M.unions (map partialLocals ps)) + (M.unions (map partialReferences ps)) + (concatMap partialNested ps) + +javascript :: JavaScript SourcePos -> Partial +javascript (Script _ ss) = unions (map stmt ss) + + +lvalue :: LValue SourcePos -> Partial +lvalue lv = case lv of + LVar p x -> ref (Id p x) + LDot _ e _ -> expr e + LBracket _ e1 e2 -> unions [expr e1, expr e2] + +expr :: Expression SourcePos -> Partial +expr e = case e of + StringLit _ _ -> empty + RegexpLit {} -> empty + NumLit _ _ -> empty + IntLit _ _ -> empty + BoolLit _ _ -> empty + NullLit _ -> empty + ArrayLit _ es -> unions (map expr es) + ObjectLit _ props -> unions (map (expr.snd) props) + ThisRef _ -> empty + VarRef _ id -> empty + DotRef _ e _ -> expr e + BracketRef _ e1 e2 -> unions [expr e1, expr e2] + NewExpr _ e1 es -> unions [expr e1, unions $ map expr es] + PrefixExpr _ _ e -> expr e + InfixExpr _ _ e1 e2 -> unions [expr e1, expr e2] + CondExpr _ e1 e2 e3 -> unions [expr e1, expr e2, expr e3] + AssignExpr _ _ lv e -> unions [lvalue lv, expr e] + UnaryAssignExpr _ _ lv -> lvalue lv + ParenExpr _ e -> expr e + ListExpr _ es -> unions (map expr es) + CallExpr _ e es -> unions [expr e, unions $ map expr es] + FuncExpr _ _ args s -> nest $ unions [unions $ map decl args, stmt s] + +caseClause :: CaseClause SourcePos -> Partial +caseClause cc = case cc of + CaseClause _ e ss -> unions [expr e, unions $ map stmt ss] + CaseDefault _ ss -> unions $ map stmt ss + +-- TODO: Verify that this is a declaration and not a reference. +catchClause :: CatchClause SourcePos -> Partial +catchClause (CatchClause _ id s) = unions [decl id, stmt s] + +varDecl :: VarDecl SourcePos -> Partial +varDecl (VarDecl _ id Nothing) = decl id +varDecl (VarDecl _ id (Just e)) = unions [decl id, expr e] + +forInit :: ForInit SourcePos -> Partial +forInit fi = case fi of + NoInit -> empty + VarInit ds -> unions $ map varDecl ds + ExprInit e -> expr e + +forInInit :: ForInInit SourcePos -> Partial +forInInit (ForInVar id) = decl id +forInInit (ForInLVal lv) = lvalue lv + +stmt :: Statement SourcePos -> Partial +stmt s = case s of + BlockStmt _ ss -> unions $ map stmt ss + EmptyStmt _ -> empty + ExprStmt _ e -> expr e + IfStmt _ e s1 s2 -> unions [expr e, stmt s1, stmt s2] + IfSingleStmt _ e s -> unions [expr e, stmt s] + SwitchStmt _ e cases -> unions [expr e, unions $ map caseClause cases] + WhileStmt _ e s -> unions [expr e, stmt s] + DoWhileStmt _ s e -> unions [stmt s, expr e] + BreakStmt _ _ -> empty + ContinueStmt _ _ -> empty + LabelledStmt _ _ s -> stmt s + ForInStmt _ fii e s -> unions [forInInit fii, expr e, stmt s] + ForStmt _ fi me1 me2 s -> + unions [forInit fi, maybe empty expr me1, maybe empty expr me2, stmt s] + TryStmt _ s mcatch ms -> + unions [stmt s, maybe empty catchClause mcatch, maybe empty stmt ms] + ThrowStmt _ e -> expr e + ReturnStmt _ me -> maybe empty expr me + WithStmt _ e s -> unions [expr e, stmt s] + VarDeclStmt _ decls -> unions $ map varDecl decls + FunctionStmt _ fnId args s -> + unions [decl fnId, nest $ unions [unions $ map decl args, stmt s]] + +-- |The statically-determinate lexical structure of a JavaScript program. +data EnvTree = EnvTree (M.Map String SourcePos) [EnvTree] + +-- A 'Partial' specifies identifier references in addition to identifier +-- declarations. We descend into a 'Partial', pushing enclosing declarations +-- in to remove references to identifiers declared in the enclosing scope. +-- Any referencs to identifiers not declared in either the current or the +-- enclosing scope are local definitions of global variables. +makeEnvTree :: Map String SourcePos -- ^enclosing environment + -> Partial -- ^local environment and references + -> (EnvTree,Map String SourcePos) + -- ^environment and global definitions +makeEnvTree enclosing (Partial locals references nested) = (tree,globals) where + nestedResults = map (makeEnvTree (locals `M.union` enclosing)) nested + tree = EnvTree locals (map fst nestedResults) + globals' = (references `M.difference` locals) `M.difference` enclosing + globals = M.unions (globals':map snd nestedResults) + +env :: Map String SourcePos -- ^browser/testing environment + -> [Statement SourcePos] + -> (EnvTree,Map String SourcePos) +env globals program = makeEnvTree globals (unions $ map stmt program) + + +localVars :: [Statement SourcePos] + -> [(String, SourcePos)] +localVars body = M.toList locals where + Partial locals _ _ = unions $ map stmt body + diff --git a/src/Language/ECMAScript3/Analysis/LabelSets.hs b/src/Language/ECMAScript3/Analysis/LabelSets.hs new file mode 100644 index 0000000..e8b75a9 --- /dev/null +++ b/src/Language/ECMAScript3/Analysis/LabelSets.hs @@ -0,0 +1,88 @@ +-- | Label-set analysis which annotates all the statements in the script +-- with their label sets according to ECMAScript specification, +-- section 12.12. The result of this analysis are useful for building +-- control-flow graphs. + +module Language.ECMAScript3.Analysis.LabelSets (annotateLabelSets + ,Label(..)) where + +import Language.ECMAScript3.Syntax +import Language.ECMAScript3.Syntax.Annotations +import Data.Set (Set) +import qualified Data.Set as Set +import Data.Generics.Uniplate.Data +import Data.Data (Data) +import Control.Applicative +import Data.Typeable (Typeable) + +-- | Labels are either strings (identifiers) or /empty/ (see 12.12 of +-- the spec) +data Label = Label String + | EmptyLabel + deriving (Ord, Eq, Show, Data, Typeable) + +-- | Annotates statements with their label sets; example use: +-- +-- >>> let jsa = reannotate (\a -> (a, Set.empty)) +-- >>> in annotateLabelSets jsa snd (\labs (a, ls) -> (a, labs `Set.union` ls)) +annotateLabelSets :: Data a => + (a -> Set Label) -- ^ annotation read function + -> (Set Label -> a -> a) -- ^ annotation write function + -> JavaScript a -- ^ the script to annotate + -> JavaScript a +annotateLabelSets r w = transformBi (annotateFuncStmtBodies r w) + . transformBi (annotateFuncExprBodies r w) + . descendBi (annotateStatement r w) + +annotateFuncStmtBodies :: Data a => + (a -> Set Label) + -> (Set Label -> a -> a) + -> Statement a + -> Statement a +annotateFuncStmtBodies r w s = case s of + FunctionStmt a name params body -> + let newbody = descend (annotateStatement r w) body + in FunctionStmt a name params newbody + _ -> s + +annotateFuncExprBodies :: Data a => + (a -> Set Label) + -> (Set Label -> a -> a) + -> Expression a + -> Expression a +annotateFuncExprBodies r w e = case e of + FuncExpr a mname params body -> + let newbody = descend (annotateStatement r w) body + in FuncExpr a mname params newbody + _ -> e + +-- | 12.12 ECMA262: the production /Identifier/ : /Statement/ is +-- evaluated by adding /Identifier/ to the label ser of /Statement/ +-- and then evluating /Statement/. If the /LabelledStatement/ itsef +-- has a non-empty label set, these labels are also added to the label +-- set of /Statement/ before evaluating it. ... Prior to evaluation of +-- a /LabelledStatement/, the contained /Statement/ is regarded as +-- possessing an empty label set, unless it is an /IterationStatement/ +-- or a /SwitchStatement/, in which case it is regarded as possessing +-- a label set consisting of the single element, @empty@. +annotateStatement :: Data a => + (a -> Set Label) + -> (Set Label -> a -> a) + -> Statement a + -> Statement a +annotateStatement r w s = case s of + LabelledStmt ann lab stmt -> + let labelset = Set.insert (id2Label lab) (r ann) + newstmt = annotateStatement r w $ w labelset <$> stmt + in LabelledStmt ann lab newstmt + SwitchStmt {} -> + let labelset = Set.insert EmptyLabel (r $ getAnnotation s) + in descend (annotateStatement r w) (w labelset <$> s) + _ | isIterationStmt s -> + let labelset = Set.insert EmptyLabel (r $ getAnnotation s) + in descend (annotateStatement r w) (w labelset <$> s) + _ -> descend (annotateStatement r w) s + +id2Label :: Id a -> Label +id2Label = Label . unId + diff --git a/src/Language/ECMAScript3/Lexer.hs b/src/Language/ECMAScript3/Lexer.hs new file mode 100644 index 0000000..2348994 --- /dev/null +++ b/src/Language/ECMAScript3/Lexer.hs @@ -0,0 +1,65 @@ +-- | This isn't a lexer in the sense that it provides a JavaScript +-- token-stream. This module provides character-parsers for various +-- JavaScript tokens. + +module Language.ECMAScript3.Lexer(lexeme,identifier,reserved,operator,reservedOp,charLiteral, + stringLiteral,natural,integer,float,naturalOrFloat, + decimal,hexadecimal,octal,symbol,whiteSpace,parens, + braces,brackets,squares,semi,comma,colon,dot, + identifierStart) where + +import Prelude hiding (lex) +import Text.Parsec +import qualified Text.Parsec.Token as T + +identifierStart = letter <|> oneOf "$_" + +javascriptDef = + T.LanguageDef "/*" + "*/" + "//" + False -- no nested comments + identifierStart + (alphaNum <|> oneOf "$_") -- identifier rest + (oneOf "{}<>()~.,?:|&^=!+-*/%!") -- operator start + (oneOf "=<>|&+") -- operator rest + ["break", "case", "catch", "const", "continue", "debugger", + "default", "delete", "do", "else", "enum", "false", "finally", + "for", "function", "if", "instanceof", "in", "let", "new", + "null", "return", "switch", "this", "throw", "true", "try", + "typeof", "var", "void", "while", "with"] + ["|=", "^=", "&=", "<<=", ">>=", ">>>=", "+=", "-=", "*=", "/=", + "%=", "=", ";", ",", "?", ":", "||", "&&", "|", "^", "&", + "===", "==", "=", "!==", "!=", "<<", "<=", "<", ">>>", ">>", + ">=", ">", "++", "--", "+", "-", "*", "/", "%", "!", "~", ".", + "[", "]", "{", "}", "(", ")","</","instanceof"] + True -- case-sensitive + +lex :: T.TokenParser st +lex = T.makeTokenParser javascriptDef + +-- everything but commaSep and semiSep +identifier = T.identifier lex +reserved = T.reserved lex +operator = T.operator lex +reservedOp = T.reservedOp lex +charLiteral = T.charLiteral lex +stringLiteral = T.stringLiteral lex +natural = T.natural lex +integer = T.integer lex +float = T.float lex +naturalOrFloat = T.naturalOrFloat lex +decimal = T.decimal lex +hexadecimal = T.hexadecimal lex +octal = T.octal lex +symbol = T.symbol lex +whiteSpace = T.whiteSpace lex +parens = T.parens lex +braces = T.braces lex +squares = T.squares lex +semi = T.semi lex +comma = T.comma lex +colon = T.colon lex +dot = T.dot lex +brackets = T.brackets lex +lexeme = T.lexeme lex diff --git a/src/Language/ECMAScript3/Parser.hs b/src/Language/ECMAScript3/Parser.hs new file mode 100644 index 0000000..cebbf53 --- /dev/null +++ b/src/Language/ECMAScript3/Parser.hs @@ -0,0 +1,759 @@ +-- | Parser for ECMAScript 3. + +module Language.ECMAScript3.Parser + (parse + , parseScriptFromString + , parseJavaScriptFromFile + , parseScript + , parseExpression + , parseString + , ParsedStatement + , ParsedExpression + , parseSimpleExpr' + , parseBlockStmt + , parseStatement + , StatementParser + , ExpressionParser + , assignExpr + ) where + +import Language.ECMAScript3.Lexer hiding (identifier) +import qualified Language.ECMAScript3.Lexer as Lexer +import Language.ECMAScript3.Syntax +import Language.ECMAScript3.Syntax.Annotations +import Data.Default +import Text.Parsec hiding (parse) +import Text.Parsec.Expr +import Control.Monad(liftM,liftM2) +import Control.Monad.Trans (MonadIO,liftIO) +import Numeric(readDec,readOct,readHex) +import Data.Char +import Control.Monad.Identity +import Data.Maybe (isJust, isNothing, fromMaybe) + +-- We parameterize the parse tree over source-locations. +type ParsedStatement = Statement SourcePos +type ParsedExpression = Expression SourcePos + +type CharParser a = ParsecT String ParserState Identity a + +-- These parsers can store some arbitrary state +type StatementParser = CharParser ParsedStatement +type ExpressionParser = CharParser ParsedExpression +-- the statement label stack +type ParserState = [String] + +initialParserState :: ParserState +initialParserState = [] + +-- | checks if the label is not yet on the stack, if it is -- throws +-- an error; otherwise it pushes it onto the stack +pushLabel :: String -> CharParser () +pushLabel lab = do labs <- getState + pos <- getPosition + if lab `elem` labs + then fail $ "Duplicate label at " ++ show pos + else putState (lab:labs) + +popLabel :: CharParser () +popLabel = modifyState safeTail + where safeTail [] = [] + safeTail (_:xs) = xs + +clearLabels :: ParserState -> ParserState +clearLabels _ = [] + +withFreshLabelStack :: CharParser a -> CharParser a +withFreshLabelStack p = do oldState <- getState + putState $ clearLabels oldState + a <- p + putState oldState + return a + +identifier :: CharParser (Id SourcePos) +identifier = + liftM2 Id getPosition Lexer.identifier + +--{{{ Statements + +-- Keep in mind that Token.reserved parsers (exported from the lexer) do not +-- consume any input on failure. Note that all statements (expect for labelled +-- and expression statements) begin with a reserved-word. If we fail to parse +-- this reserved-word, no input is consumed. Hence, we can have the massive or +-- block that is parseExpression. Note that if the reserved-word is parsed, it +-- must be whatever statement the reserved-word indicates. If we fail after the +-- reserved-word, we truly have a syntax error. Since input has been consumed, +-- <|> will not try its alternate in parseExpression, and we will fail. + +parseIfStmt:: StatementParser +parseIfStmt = do + pos <- getPosition + reserved "if" + test <- parseParenExpr <?> "parenthesized test-expression in if statement" + consequent <- parseStatement <?> "true-branch of if statement" + optional semi -- TODO: in spec? + ((do reserved "else" + alternate <- parseStatement + return $ IfStmt pos test consequent alternate) + <|> return (IfSingleStmt pos test consequent)) + +parseSwitchStmt:: StatementParser +parseSwitchStmt = + let parseDefault = do + pos <- getPosition + reserved "default" + colon + statements <- many parseStatement + return (CaseDefault pos statements) + parseCase = do + pos <- getPosition + reserved "case" + condition <- parseListExpr + colon + actions <- many parseStatement + return (CaseClause pos condition actions) + isCaseDefault (CaseDefault _ _) = True + isCaseDefault _ = False + checkClauses cs = case filter isCaseDefault cs of + (_:c:_) -> fail $ "duplicate default clause in switch statement at " ++ + show (getAnnotation c) + _ -> return () + in do pos <- getPosition + reserved "switch" + test <- parseParenExpr + clauses <- braces $ many $ parseDefault <|> parseCase + checkClauses clauses + return (SwitchStmt pos test clauses) + +parseWhileStmt:: StatementParser +parseWhileStmt = do + pos <- getPosition + reserved "while" + test <- parseParenExpr <?> "parenthesized test-expression in while loop" + body <- parseStatement + return (WhileStmt pos test body) + +parseDoWhileStmt:: StatementParser +parseDoWhileStmt = do + pos <- getPosition + reserved "do" + body <- parseBlockStmt + reserved "while" <?> "while at the end of a do block" + test <- parseParenExpr <?> "parenthesized test-expression in do loop" + optional semi + return (DoWhileStmt pos body test) + +parseContinueStmt:: StatementParser +parseContinueStmt = do + pos <- getPosition + reserved "continue" + pos' <- getPosition + -- Ensure that the identifier is on the same line as 'continue.' + id <- if sourceLine pos == sourceLine pos' + then liftM Just identifier <|> return Nothing + else return Nothing + optional semi + return $ ContinueStmt pos id + +parseBreakStmt:: StatementParser +parseBreakStmt = do + pos <- getPosition + reserved "break" + pos' <- getPosition + -- Ensure that the identifier is on the same line as 'break.' + id <- if sourceLine pos == sourceLine pos' + then liftM Just identifier <|> return Nothing + else return Nothing + optional semi + return $ BreakStmt pos id + +parseBlockStmt:: StatementParser +parseBlockStmt = do + pos <- getPosition + statements <- braces (many parseStatement) + return (BlockStmt pos statements) + +parseEmptyStmt:: StatementParser +parseEmptyStmt = do + pos <- getPosition + semi + return (EmptyStmt pos) + +parseLabelledStmt:: StatementParser +parseLabelledStmt = do + pos <- getPosition + -- Lookahead for the colon. If we don't see it, we are parsing an identifier + -- for an expression statement. + label <- try (do label <- identifier + colon + return label) + pushLabel $ unId label + statement <- parseStatement + popLabel + return (LabelledStmt pos label statement) + +parseExpressionStmt:: StatementParser +parseExpressionStmt = do + pos <- getPosition + expr <- parseListExpr -- TODO: spec 12.4? + optional semi + return $ ExprStmt pos expr + + +parseForInStmt:: StatementParser +parseForInStmt = + let parseInit = (reserved "var" >> liftM ForInVar identifier) + <|> liftM ForInLVal lvalue + in do pos <- getPosition + -- Lookahead, so that we don't clash with parseForStmt + (init,expr) <- try $ do reserved "for" + parens $ do init <- parseInit + reserved "in" + expr <- parseExpression + return (init,expr) + body <- parseStatement + return $ ForInStmt pos init expr body + +parseForStmt:: StatementParser +parseForStmt = + let parseInit = (reserved "var" >> liftM VarInit (parseVarDecl `sepBy` comma)) + <|> liftM ExprInit parseListExpr + <|> return NoInit + in do pos <- getPosition + reserved "for" + reservedOp "(" + init <- parseInit + semi + test <- optionMaybe parseExpression + semi + iter <- optionMaybe parseListExpr + reservedOp ")" <?> "closing paren" + stmt <- parseStatement + return $ ForStmt pos init test iter stmt + +parseTryStmt:: StatementParser +parseTryStmt = + let parseCatchClause = do pos <- getPosition + reserved "catch" + id <- parens identifier + stmt <- parseStatement + return $ CatchClause pos id stmt + in do reserved "try" + pos <- getPosition + guarded <- parseStatement + mCatch <- optionMaybe parseCatchClause + mFinally <- optionMaybe $ reserved "finally" >> parseStatement + -- the spec requires at least a catch or a finally block to + -- be present + if isJust mCatch || isJust mFinally + then return $ TryStmt pos guarded mCatch mFinally + else fail $ "A try statement should have at least a catch\ + \ or a finally block, at " ++ show pos + +parseThrowStmt:: StatementParser +parseThrowStmt = do + pos <- getPosition + reserved "throw" + expr <- parseExpression + optional semi + return (ThrowStmt pos expr) + +parseReturnStmt:: StatementParser +parseReturnStmt = do + pos <- getPosition + reserved "return" + expr <- optionMaybe parseListExpr + optional semi + return (ReturnStmt pos expr) + +parseWithStmt:: StatementParser +parseWithStmt = do + pos <- getPosition + reserved "with" + context <- parseParenExpr + stmt <- parseStatement + return (WithStmt pos context stmt) + +parseVarDecl = do + pos <- getPosition + id <- identifier + init <- (reservedOp "=" >> liftM Just parseExpression) <|> return Nothing + return (VarDecl pos id init) + +parseVarDeclStmt:: StatementParser +parseVarDeclStmt = do + pos <- getPosition + reserved "var" + decls <- parseVarDecl `sepBy` comma + optional semi + return (VarDeclStmt pos decls) + +parseFunctionStmt:: StatementParser +parseFunctionStmt = do + pos <- getPosition + name <- try (reserved "function" >> identifier) -- ambiguity with FuncExpr + args <- parens (identifier `sepBy` comma) + -- label sets don't cross function boundaries + body <- withFreshLabelStack parseBlockStmt <?> "function body in { ... }" + return (FunctionStmt pos name args body) + +parseStatement:: StatementParser +parseStatement = parseIfStmt <|> parseSwitchStmt <|> parseWhileStmt + <|> parseDoWhileStmt <|> parseContinueStmt <|> parseBreakStmt + <|> parseBlockStmt <|> parseEmptyStmt <|> parseForInStmt <|> parseForStmt + <|> parseTryStmt <|> parseThrowStmt <|> parseReturnStmt <|> parseWithStmt + <|> parseVarDeclStmt <|> parseFunctionStmt + -- labelled, expression and the error message always go last, in this order + <|> parseLabelledStmt <|> parseExpressionStmt <?> "statement" + +--}}} + +--{{{ Expressions + +-- References used to construct this stuff: +-- + http://developer.mozilla.org/en/docs/ +-- Core_JavaScript_1.5_Reference:Operators:Operator_Precedence +-- + http://www.mozilla.org/js/language/grammar14.html +-- +-- Aren't expression tables nice? Well, we can't quite use them, because of +-- JavaScript's ternary (?:) operator. We have to use two expression tables. +-- We use one expression table for the assignment operators that bind looser +-- than ?: (assignTable). The terms of assignTable are ternary expressions +-- (parseTernaryExpr). parseTernaryExpr left-factors the left-recursive +-- production for ?:, and is defined over the second expression table, +-- exprTable, which consists of operators that bind tighter than ?:. The terms +-- of exprTable are atomic expressions, parenthesized expressions, functions and +-- array references. + +--{{{ Primary expressions + +parseThisRef:: ExpressionParser +parseThisRef = do + pos <- getPosition + reserved "this" + return (ThisRef pos) + +parseNullLit:: ExpressionParser +parseNullLit = do + pos <- getPosition + reserved "null" + return (NullLit pos) + + +parseBoolLit:: ExpressionParser +parseBoolLit = do + pos <- getPosition + let parseTrueLit = reserved "true" >> return (BoolLit pos True) + parseFalseLit = reserved "false" >> return (BoolLit pos False) + parseTrueLit <|> parseFalseLit + +parseVarRef:: ExpressionParser +parseVarRef = liftM2 VarRef getPosition identifier + +parseArrayLit:: ExpressionParser +parseArrayLit = liftM2 ArrayLit getPosition (squares (parseExpression `sepEndBy` comma)) + +parseFuncExpr = do + pos <- getPosition + reserved "function" + name <- optionMaybe identifier + args <- parens (identifier `sepBy` comma) + -- labels don't cross function boundaries + body <- withFreshLabelStack parseBlockStmt + return $ FuncExpr pos name args body + +--{{{ parsing strings + +escapeChars = + [('\'','\''),('\"','\"'),('\\','\\'),('b','\b'),('f','\f'),('n','\n'), + ('r','\r'),('t','\t'),('v','\v'),('/','/'),(' ',' '),('0','\0')] + +allEscapes:: String +allEscapes = map fst escapeChars + +parseEscapeChar = do + c <- oneOf allEscapes + let (Just c') = lookup c escapeChars -- will succeed due to line above + return c' + +parseAsciiHexChar = do + char 'x' + d1 <- hexDigit + d2 <- hexDigit + return ((chr.fst.head.readHex) (d1:d2:"")) + +parseUnicodeHexChar = do + char 'u' + liftM (chr.fst.head.readHex) + (sequence [hexDigit,hexDigit,hexDigit,hexDigit]) + +isWhitespace ch = ch `elem` " \t" + + +-- The endWith argument is either single-quote or double-quote, depending on how +-- we opened the string. +parseStringLit' endWith = + (char endWith >> return "") <|> + (do try (string "\\'") + cs <- parseStringLit' endWith + return $ "'" ++ cs) <|> + (do char '\\' + c <- parseEscapeChar <|> parseAsciiHexChar <|> parseUnicodeHexChar <|> + char '\r' <|> char '\n' + cs <- parseStringLit' endWith + if c == '\r' || c == '\n' + then return (c:dropWhile isWhitespace cs) + else return (c:cs)) <|> + liftM2 (:) anyChar (parseStringLit' endWith) + +parseStringLit:: ExpressionParser +parseStringLit = do + pos <- getPosition + -- parseStringLit' takes as an argument the quote-character that opened the + -- string. + str <- lexeme $ (char '\'' >>= parseStringLit') <|> (char '\"' >>= parseStringLit') + -- CRUCIAL: Parsec.Token parsers expect to find their token on the first + -- character, and read whitespaces beyond their tokens. Without 'lexeme' + -- above, expressions like: + -- var s = "string" ; + -- do not parse. + return $ StringLit pos str + +--}}} + +parseRegexpLit:: ExpressionParser +parseRegexpLit = do + let parseFlags = do + flags <- many (oneOf "mgi") + return $ \f -> f ('g' `elem` flags) ('i' `elem` flags) + let parseEscape :: CharParser Char + parseEscape = char '\\' >> anyChar + let parseChar :: CharParser Char + parseChar = noneOf "/" + let parseRe = (char '/' >> return "") <|> + (do char '\\' + ch <- anyChar -- TOOD: too lenient + rest <- parseRe + return ('\\':ch:rest)) <|> + liftM2 (:) anyChar parseRe + pos <- getPosition + char '/' + pat <- parseRe --many1 parseChar + flags <- parseFlags + spaces -- crucial for Parsec.Token parsers + return $ flags (RegexpLit pos pat) + +parseObjectLit:: ExpressionParser +parseObjectLit = + let parseProp = do + -- Parses a string, identifier or integer as the property name. I + -- apologize for the abstruse style, but it really does make the code + -- much shorter. + name <- liftM (\(StringLit p s) -> PropString p s) parseStringLit + <|> liftM2 PropId getPosition identifier + <|> liftM2 PropNum getPosition decimal + colon + val <- assignExpr + return (name,val) + in do pos <- getPosition + props <- braces (parseProp `sepEndBy` comma) <?> "object literal" + return $ ObjectLit pos props + +--{{{ Parsing numbers. From pg. 17-18 of ECMA-262. + +hexLit = do + try (string "0x") + digits <- many1 (oneOf "0123456789abcdefABCDEF") + [(hex,"")] <- return $ Numeric.readHex digits + return (True, hex) + +-- Creates a decimal value from a whole, fractional and exponent part. +mkDecimal:: Double -> Double -> Int -> Double +mkDecimal w f e = if f >= 1.0 + then mkDecimal w (f / 10.0) e + else (w + f) * (10.0 ^^ e) + +exponentPart = do + oneOf "eE" + (char '+' >> decimal) <|> (char '-' >> negate `fmap` decimal) <|> decimal + +--wrap a parser's result in a Just: +jparser = liftM Just + +decLit = + (do whole <- decimal + mfrac <- option Nothing (jparser (char '.' >> decimal)) + mexp <- option Nothing (jparser exponentPart) + if isNothing mfrac && isNothing mexp + then return (True, fromIntegral whole) + else return (False, mkDecimal (fromIntegral whole) + (fromIntegral (fromMaybe 0 mfrac)) + (fromIntegral (fromMaybe 0 mexp)))) <|> + (do frac <- char '.' >> decimal + exp <- option 0 exponentPart + return (False, mkDecimal 0.0 (fromIntegral frac) (fromIntegral exp))) + +parseNumLit:: ExpressionParser +parseNumLit = do + pos <- getPosition + (isint, num) <- lexeme $ hexLit <|> decLit + notFollowedBy identifierStart <?> "whitespace" + if isint + then return $ IntLit pos (round num) + else return $ NumLit pos num + + +------------------------------------------------------------------------------ +-- Position Helper +------------------------------------------------------------------------------ + +withPos cstr p = do { pos <- getPosition; e <- p; return $ cstr pos e } + +------------------------------------------------------------------------------- +-- Compound Expression Parsers +------------------------------------------------------------------------------- + +dotRef e = (reservedOp "." >> withPos cstr identifier) <?> "property.ref" + where cstr pos = DotRef pos e + +funcApp e = parens (withPos cstr (parseExpression `sepBy` comma)) + <?>"(function application)" + where cstr pos = CallExpr pos e + +bracketRef e = brackets (withPos cstr parseExpression) <?> "[property-ref]" + where cstr pos = BracketRef pos e + +------------------------------------------------------------------------------- +-- Expression Parsers +------------------------------------------------------------------------------- + +parseParenExpr:: ExpressionParser +parseParenExpr = withPos ParenExpr (parens parseListExpr) + +-- everything above expect functions +parseExprForNew = parseThisRef <|> parseNullLit <|> parseBoolLit <|> parseStringLit + <|> parseArrayLit <|> parseParenExpr <|> parseNewExpr <|> parseNumLit + <|> parseRegexpLit <|> parseObjectLit <|> parseVarRef + +-- all the expression parsers defined above +parseSimpleExpr' = parseThisRef <|> parseNullLit <|> parseBoolLit + <|> parseStringLit <|> parseArrayLit <|> parseParenExpr + <|> parseFuncExpr <|> parseNumLit <|> parseRegexpLit <|> parseObjectLit + <|> parseVarRef + +parseNewExpr = + (do pos <- getPosition + reserved "new" + constructor <- parseSimpleExprForNew Nothing -- right-associativity + arguments <- try (parens (parseExpression `sepBy` comma)) <|> return [] + return (NewExpr pos constructor arguments)) <|> + parseSimpleExpr' + +parseSimpleExpr (Just e) = ((dotRef e <|> funcApp e <|> bracketRef e) >>= + parseSimpleExpr . Just) + <|> return e +parseSimpleExpr Nothing = do + e <- parseNewExpr <?> "expression (3)" + parseSimpleExpr (Just e) + +parseSimpleExprForNew (Just e) = ((dotRef e <|> bracketRef e) >>= + parseSimpleExprForNew . Just) + <|> return e +parseSimpleExprForNew Nothing = do + e <- parseNewExpr <?> "expression (3)" + parseSimpleExprForNew (Just e) + +--}}} + +makeInfixExpr str constr = Infix parser AssocLeft where + parser:: CharParser (Expression SourcePos -> Expression SourcePos -> Expression SourcePos) + parser = do + pos <- getPosition + reservedOp str + return (InfixExpr pos constr) -- points-free, returns a function + + +-- apparently, expression tables can't handle immediately-nested prefixes +parsePrefixedExpr = do + pos <- getPosition + op <- optionMaybe $ (reservedOp "!" >> return PrefixLNot) <|> + (reservedOp "~" >> return PrefixBNot) <|> + (try (lexeme $ char '-' >> notFollowedBy (char '-')) >> + return PrefixMinus) <|> + (try (lexeme $ char '+' >> notFollowedBy (char '+')) >> + return PrefixPlus) <|> + (reserved "typeof" >> return PrefixTypeof) <|> + (reserved "void" >> return PrefixVoid) <|> + (reserved "delete" >> return PrefixDelete) + case op of + Nothing -> unaryAssignExpr + Just op -> do + innerExpr <- parsePrefixedExpr + return (PrefixExpr pos op innerExpr) + +exprTable:: [[Operator String ParserState Identity ParsedExpression]] +exprTable = + [ [ makeInfixExpr "==" OpEq + , makeInfixExpr "!=" OpNEq + , makeInfixExpr "===" OpStrictEq + , makeInfixExpr "!==" OpStrictNEq + ] + + , [ makeInfixExpr "||" OpLOr ] + + , [ makeInfixExpr "&&" OpLAnd ] + + , [ makeInfixExpr "|" OpBOr ] + + , [ makeInfixExpr "^" OpBXor ] + + , [ makeInfixExpr "&" OpBAnd ] + + , [ makeInfixExpr "<" OpLT + , makeInfixExpr "<=" OpLEq + , makeInfixExpr ">" OpGT + , makeInfixExpr ">=" OpGEq + , makeInfixExpr "instanceof" OpInstanceof + , makeInfixExpr "in" OpIn + ] + + , [ makeInfixExpr "<<" OpLShift + , makeInfixExpr ">>" OpSpRShift + , makeInfixExpr ">>>" OpZfRShift + ] + + , [ makeInfixExpr "+" OpAdd + , makeInfixExpr "-" OpSub + ] + + , [ makeInfixExpr "*" OpMul + , makeInfixExpr "/" OpDiv + , makeInfixExpr "%" OpMod + ] + ] + +parseExpression' = + buildExpressionParser exprTable parsePrefixedExpr <?> "simple expression" + +asLValue :: SourcePos + -> Expression SourcePos + -> CharParser (LValue SourcePos) +asLValue p' e = case e of + VarRef p (Id _ x) -> return (LVar p x) + DotRef p e (Id _ x) -> return (LDot p e x) + BracketRef p e1 e2 -> return (LBracket p e1 e2) + otherwise -> fail $ "expected a left-value at " ++ show p' + +lvalue :: CharParser (LValue SourcePos) +lvalue = do + p <- getPosition + e <- parseSimpleExpr Nothing + asLValue p e + + +unaryAssignExpr :: CharParser ParsedExpression +unaryAssignExpr = do + p <- getPosition + let prefixInc = do + reservedOp "++" + liftM (UnaryAssignExpr p PrefixInc) lvalue + let prefixDec = do + reservedOp "--" + liftM (UnaryAssignExpr p PrefixDec) lvalue + let postfixInc e = do + reservedOp "++" + liftM (UnaryAssignExpr p PostfixInc) (asLValue p e) + let postfixDec e = do + reservedOp "--" + liftM (UnaryAssignExpr p PostfixDec) (asLValue p e) + let other = do + e <- parseSimpleExpr Nothing + postfixInc e <|> postfixDec e <|> return e + prefixInc <|> prefixDec <|> other + + +parseTernaryExpr':: CharParser (ParsedExpression,ParsedExpression) +parseTernaryExpr' = do + reservedOp "?" + l <- assignExpr + colon + r <- assignExpr + return (l,r) + +parseTernaryExpr:: ExpressionParser +parseTernaryExpr = do + e <- parseExpression' + e' <- optionMaybe parseTernaryExpr' + case e' of + Nothing -> return e + Just (l,r) -> do p <- getPosition + return $ CondExpr p e l r + + +assignOp :: CharParser AssignOp +assignOp = + (reservedOp "=" >> return OpAssign) <|> + (reservedOp "+=" >> return OpAssignAdd) <|> + (reservedOp "-=" >> return OpAssignSub) <|> + (reservedOp "*=" >> return OpAssignMul) <|> + (reservedOp "/=" >> return OpAssignDiv) <|> + (reservedOp "%=" >> return OpAssignMod) <|> + (reservedOp "<<=" >> return OpAssignLShift) <|> + (reservedOp ">>=" >> return OpAssignSpRShift) <|> + (reservedOp ">>>=" >> return OpAssignZfRShift) <|> + (reservedOp "&=" >> return OpAssignBAnd) <|> + (reservedOp "^=" >> return OpAssignBXor) <|> + (reservedOp "|=" >> return OpAssignBOr) + + +assignExpr :: ExpressionParser +assignExpr = do + p <- getPosition + lhs <- parseTernaryExpr + let assign = do + op <- assignOp + lhs <- asLValue p lhs + rhs <- assignExpr + return (AssignExpr p op lhs rhs) + assign <|> return lhs + +parseExpression:: ExpressionParser +parseExpression = assignExpr + +parseListExpr = liftM2 ListExpr getPosition (assignExpr `sepBy1` comma) + +parseScript:: CharParser (JavaScript SourcePos) +parseScript = do + whiteSpace + liftM2 Script getPosition (parseStatement `sepBy` whiteSpace) + +-- | Parse from a stream; same as 'Text.Parsec.parse' +parse :: Stream s Identity t => + Parsec s [String] a -- ^ The parser to use + -> SourceName -- ^ Name of the source file + -> s -- ^ the stream to parse, usually a 'String' + -> Either ParseError a +parse p = runParser p initialParserState + +-- | Read a JavaScript program from file an parse it into a list of +-- statements +parseJavaScriptFromFile :: MonadIO m => String -- ^ file name + -> m [Statement SourcePos] +parseJavaScriptFromFile filename = do + chars <- liftIO $ readFile filename + case parse parseScript filename chars of + Left err -> fail (show err) + Right (Script _ stmts) -> return stmts + +-- | Parse a JavaScript program from a string +parseScriptFromString :: String -- ^ source file name + -> String -- ^ JavaScript source to parse + -> Either ParseError (JavaScript SourcePos) +parseScriptFromString = parse parseScript + +-- | Parse a JavaScript source string into a list of statements +parseString :: String -- ^ JavaScript source + -> [Statement SourcePos] +parseString str = case parse parseScript "" str of + Left err -> error (show err) + Right (Script _ stmts) -> stmts diff --git a/src/Language/ECMAScript3/PrettyPrint.hs b/src/Language/ECMAScript3/PrettyPrint.hs new file mode 100644 index 0000000..b67b638 --- /dev/null +++ b/src/Language/ECMAScript3/PrettyPrint.hs @@ -0,0 +1,231 @@ +-- |Pretty-printing JavaScript. +module Language.ECMAScript3.PrettyPrint + ( +-- stmt +-- , expr + javaScript + , renderStatements + , renderExpression + ) where + +import Text.PrettyPrint.HughesPJ +import Language.ECMAScript3.Syntax + +-- | Renders a list of statements as a 'String' +renderStatements :: [Statement a] -> String +renderStatements = render . stmtList + +-- | Renders a list of statements as a 'String' +renderExpression :: Expression a -> String +renderExpression = render . expr + +-- Displays the statement in { ... }, unless it is a block itself. +inBlock:: Statement a -> Doc +inBlock s@(BlockStmt _ _) = stmt s +inBlock s = lbrace $+$ nest 2 (stmt s) $+$ rbrace + +-- Displays the expression in ( ... ), unless it is a parenthesized expression +inParens:: Expression a -> Doc +inParens e@(ParenExpr _ _) = expr e +inParens e = parens (expr e) + +pp (Id _ str) = text str + +forInit :: ForInit a -> Doc +forInit t = case t of + NoInit -> empty + VarInit vs -> text "var" <+> cat (punctuate comma $ map varDecl vs) + ExprInit e -> expr e + +forInInit :: ForInInit a -> Doc +forInInit t = case t of + ForInVar id -> text "var" <+> pp id + ForInLVal lv -> lvalue lv + +caseClause :: CaseClause a -> Doc +caseClause (CaseClause _ e ss) = + text "case" $+$ expr e <+> colon $$ nest 2 (stmtList ss) +caseClause (CaseDefault _ ss) = + text "default:" $$ nest 2 (stmtList ss) + +varDecl :: VarDecl a -> Doc +varDecl (VarDecl _ id Nothing) = pp id +varDecl (VarDecl _ id (Just e)) = pp id <+> equals <+> expr e + +stmt :: Statement a -> Doc +stmt s = case s of + BlockStmt _ ss -> lbrace $+$ nest 2 (stmtList ss) $$ rbrace + EmptyStmt _ -> semi + ExprStmt _ e -> expr e <> semi + IfSingleStmt _ test cons -> text "if" <+> inParens test $$ stmt cons + IfStmt _ test cons alt -> + text "if" <+> inParens test $$ stmt cons $$ text "else" <+> stmt alt + SwitchStmt _ e cases -> + text "switch" <+> inParens e $$ + braces (nest 2 (vcat (map caseClause cases))) + WhileStmt _ test body -> text "while" <+> inParens test $$ stmt body + ReturnStmt _ Nothing -> text "return" + ReturnStmt _ (Just e) -> text "return" <+> expr e + DoWhileStmt _ s e -> + text "do" $$ (stmt s <+> text "while" <+> inParens e <> semi) + BreakStmt _ Nothing -> text "break" <> semi + BreakStmt _ (Just label) -> text "break" <+> pp label <> semi + ContinueStmt _ Nothing -> text "continue" <> semi + ContinueStmt _ (Just label) -> text"continue" <+> pp label <> semi + LabelledStmt _ label s -> pp label <> colon $$ stmt s + ForInStmt p init e body -> + text "for" <+> + parens (forInInit init <+> text "in" <+> expr e) $+$ stmt body + ForStmt _ init incr test body -> + text "for" <+> + parens (forInit init <> semi <+> mexpr incr <> semi <+> mexpr test) $$ + stmt body + TryStmt _ stmt mcatch mfinally -> + text "try" $$ inBlock stmt $$ ppCatch $$ ppFinally + where ppFinally = case mfinally of + Nothing -> empty + Just stmt -> text "finally" <> inBlock stmt + ppCatch = case mcatch of + Nothing -> empty + Just (CatchClause _ id s) -> + text "catch" <+> (parens.pp) id <+> inBlock s + ThrowStmt _ e -> text "throw" <+> expr e <> semi + WithStmt _ expr s -> text "with" <+> inParens expr $$ stmt s + VarDeclStmt _ decls -> + text "var" <+> cat (punctuate comma (map varDecl decls)) <> semi + FunctionStmt _ name args s -> + text "function" <+> pp name <> + parens (cat $ punctuate comma (map pp args)) $$ + inBlock s + +stmtList :: [Statement a] -> Doc +stmtList = vcat . map stmt + +prop :: Prop a -> Doc +prop p = case p of + PropId _ id -> pp id + PropString _ str -> doubleQuotes (text (jsEscape str)) + PropNum _ n -> text (show n) + +infixOp op = text $ case op of + OpMul -> "*" + OpDiv -> "/" + OpMod -> "%" + OpAdd -> "+" + OpSub -> "-" + OpLShift -> "<<" + OpSpRShift -> ">>" + OpZfRShift -> ">>>" + OpLT -> "<" + OpLEq -> "<=" + OpGT -> ">" + OpGEq -> ">=" + OpIn -> "in" + OpInstanceof -> "instanceof" + OpEq -> "==" + OpNEq -> "!=" + OpStrictEq -> "===" + OpStrictNEq -> "!==" + OpBAnd -> "&" + OpBXor -> "^" + OpBOr -> "|" + OpLAnd -> "&&" + OpLOr -> "||" + + +prefixOp op = text $ case op of + PrefixLNot -> "!" + PrefixBNot -> "~" + PrefixPlus -> "+" + PrefixMinus -> "-" + PrefixTypeof -> "typeof" + PrefixVoid -> "void" + PrefixDelete -> "delete" + + +assignOp op = text $ case op of + OpAssign -> "=" + OpAssignAdd -> "+=" + OpAssignSub -> "-=" + OpAssignMul -> "*=" + OpAssignDiv -> "/=" + OpAssignMod -> "%=" + OpAssignLShift -> "<<=" + OpAssignSpRShift -> ">>=" + OpAssignZfRShift -> ">>>=" + OpAssignBAnd -> "&=" + OpAssignBXor -> "^=" + OpAssignBOr -> "|=" + +-- Based on: +-- http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide:Literals +jsEscape:: String -> String +jsEscape "" = "" +jsEscape (ch:chs) = sel ch ++ jsEscape chs where + sel '\b' = "\\b" + sel '\f' = "\\f" + sel '\n' = "\\n" + sel '\r' = "\\r" + sel '\t' = "\\t" + sel '\v' = "\\v" + sel '\'' = "\\'" + sel '\"' = "\\\"" + sel '\\' = "\\\\" + sel x = [x] + -- We don't have to do anything about \X, \x and \u escape sequences. + +lvalue :: LValue a -> Doc +lvalue (LVar _ x) = text x +lvalue (LDot _ e x) = expr e <> text "." <> text x +lvalue (LBracket _ e1 e2) = expr e1 <> brackets (expr e2) + +expr :: Expression a -> Doc +expr e = case e of + StringLit _ str -> doubleQuotes (text (jsEscape str)) + RegexpLit _ re global ci -> + text "/" <> text re <> text "/" <> g <> i where + g = if global then text "g" else empty + i = if ci then text "i" else empty + NumLit _ n -> text (show n) + IntLit _ n -> text (show n) + BoolLit _ True -> text "true" + BoolLit _ False -> text "false" + NullLit _ -> text "null" + ArrayLit _ es -> brackets $ cat $ punctuate comma (map expr es) + ObjectLit _ xs -> + braces (hsep (punctuate comma (map pp' xs))) where + pp' (n,v) = prop n <> colon <+> expr v + ThisRef _ -> text "this" + VarRef _ id -> pp id + DotRef _ e' id -> expr e' <> text "." <> pp id + BracketRef _ container key -> expr container <> brackets (expr key) + NewExpr _ constr args -> + text "new" <+> expr constr <> + parens (cat $ punctuate comma (map expr args)) + PrefixExpr _ op e' -> prefixOp op <+> expr e' + InfixExpr _ op left right -> expr left <+> infixOp op <+> expr right + CondExpr _ test cons alt -> + expr test <+> text "?" <+> expr cons <+> colon <+> expr alt + AssignExpr _ op l r -> lvalue l <+> assignOp op <+> expr r + UnaryAssignExpr _ op e' -> case op of + PrefixInc -> text "++" <> lvalue e' + PrefixDec -> text "--" <> lvalue e' + PostfixInc -> lvalue e' <> text "++" + PostfixDec -> lvalue e' <> text "--" + ParenExpr _ e' -> parens (expr e') + ListExpr _ es -> cat $ punctuate comma (map expr es) + CallExpr _ f args -> + expr f <> parens (cat $ punctuate comma (map expr args)) + FuncExpr _ name args body -> + text "function" <+> text (maybe "" unId name) <+> + parens (cat $ punctuate comma (map pp args)) $$ + inBlock body + +mexpr :: Maybe (Expression a) -> Doc +mexpr Nothing = empty +mexpr (Just e) = expr e + +-- | Renders a JavaScript program as a document, the show instance of +-- 'Doc' will pretty-print it automatically +javaScript :: JavaScript a -> Doc +javaScript (Script _ ss) = stmtList ss diff --git a/src/Language/ECMAScript3/Syntax.hs b/src/Language/ECMAScript3/Syntax.hs new file mode 100644 index 0000000..f43a33a --- /dev/null +++ b/src/Language/ECMAScript3/Syntax.hs @@ -0,0 +1,236 @@ +-- |ECMAScript 3 syntax. /Spec/ refers to the ECMA-262 specification, 3rd edition. +module Language.ECMAScript3.Syntax (JavaScript(..) + ,unJavaScript + ,Statement(..) + ,isIterationStmt + ,CaseClause(..) + ,CatchClause(..) + ,ForInit(..) + ,ForInInit(..) + ,VarDecl(..) + ,Expression(..) + ,InfixOp(..) + ,AssignOp(..) + ,Id(..) + ,unId + ,PrefixOp(..) + ,Prop(..) + ,UnaryAssignOp(..) + ,LValue (..) + ,SourcePos + ) where + +import Text.Parsec.Pos(initialPos,SourcePos) -- used by data JavaScript +import Data.Generics(Data,Typeable) +import Data.Foldable (Foldable) +import Data.Traversable (Traversable) +import Data.Default + +data JavaScript a + -- |A script in \<script\> ... \</script\> tags. + = Script a [Statement a] + deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable) + +instance Default a => Default (JavaScript a) where + def = Script def [] + +-- | extracts statements from a JavaScript type +unJavaScript :: JavaScript a -> [Statement a] +unJavaScript (Script _ stmts) = stmts + +instance Default SourcePos where + def = initialPos "" + +data Id a = Id a String + deriving (Show,Eq,Ord,Data,Typeable,Functor,Foldable,Traversable) + +unId :: Id a -> String +unId (Id _ s) = s + +-- | Infix operators: see spec 11.5-11.11 +data InfixOp = OpLT -- ^ @<@ + | OpLEq -- ^ @<=@ + | OpGT -- ^ @>@ + | OpGEq -- ^ @>=@ + | OpIn -- ^ @in@ + | OpInstanceof -- ^ @instanceof@ + | OpEq -- ^ @==@ + | OpNEq -- ^ @!=@ + | OpStrictEq -- ^ @===@ + | OpStrictNEq -- ^ @!===@ + | OpLAnd -- ^ @&&@ + | OpLOr -- ^ @||@ + | OpMul -- ^ @*@ + | OpDiv -- ^ @/@ + | OpMod -- ^ @%@ + | OpSub -- ^ @-@ + | OpLShift -- ^ @<<@ + | OpSpRShift -- ^ @>>@ + | OpZfRShift -- ^ @>>>@ + | OpBAnd -- ^ @&@ + | OpBXor -- ^ @^@ + | OpBOr -- ^ @|@ + | OpAdd -- ^ @+@ + deriving (Show,Data,Typeable,Eq,Ord,Enum) + +-- | Assignment operators: see spec 11.13 +data AssignOp = OpAssign -- ^ simple assignment, @=@ + | OpAssignAdd -- ^ @+=@ + | OpAssignSub -- ^ @-=@ + | OpAssignMul -- ^ @*=@ + | OpAssignDiv -- ^ @/=@ + | OpAssignMod -- ^ @%=@ + | OpAssignLShift -- ^ @<<=@ + | OpAssignSpRShift -- ^ @>>=@ + | OpAssignZfRShift -- ^ @>>>=@ + | OpAssignBAnd -- ^ @&=@ + | OpAssignBXor -- ^ @^=@ + | OpAssignBOr -- ^ @|=@ + deriving (Show,Data,Typeable,Eq,Ord) + +-- | Unary assignment operators: see spec 11.3, 11.4.4, 11.4.5 +data UnaryAssignOp = PrefixInc -- ^ @++x@ + | PrefixDec -- ^ @--x@ + | PostfixInc -- ^ @x++@ + | PostfixDec -- ^ @x--@ + deriving (Show, Data, Typeable, Eq, Ord) + +-- | Prefix operators: see spec 11.4 (excluding 11.4.4, 11.4.5) +data PrefixOp = PrefixLNot -- ^ @!@ + | PrefixBNot -- ^ @~@ + | PrefixPlus -- ^ @+@ + | PrefixMinus -- ^ @-@ + | PrefixTypeof -- ^ @typeof@ + | PrefixVoid -- ^ @void@ + | PrefixDelete -- ^ @delete@ + deriving (Show,Data,Typeable,Eq,Ord) + +-- | Property names in an object initializer: see spec 11.1.5 +data Prop a = PropId a (Id a) -- ^ property name is an identifier, @foo@ + | PropString a String -- ^ property name is a string, @\"foo\"@ + | PropNum a Integer -- ^ property name is an integer, @42@ + deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable) + +-- | Left-hand side expressions: see spec 11.2 +data LValue a + = LVar a String -- ^ variable reference, @foo@ + | LDot a (Expression a) String -- ^ @foo.bar@ + | LBracket a (Expression a) (Expression a) -- ^ @foo[bar]@ + deriving (Show, Eq, Ord, Data, Typeable, Functor,Foldable,Traversable) + +-- | Expressions, see spec 11 +data Expression a + = StringLit a String -- ^ @\"foo\"@, spec 11.1.3, 7.8 + | RegexpLit a String Bool Bool + -- ^ @RegexpLit a regexp global? case_insensitive?@ -- regular + -- expression, see spec 11.1.3, 7.8 + | NumLit a Double -- ^ @41.99999@, spec 11.1.3, 7.8 + | IntLit a Int -- ^ @42@, spec 11.1.3, 7.8 + | BoolLit a Bool -- ^ @true@, spec 11.1.3, 7.8 + | NullLit a -- ^ @null@, spec 11.1.3, 7.8 + | ArrayLit a [Expression a] -- ^ @[1,2,3]@, spec 11.1.4 + | ObjectLit a [(Prop a, Expression a)] -- ^ @{foo:\"bar\", baz: 42}@, spec 11.1.5 + | ThisRef a -- ^ @this@, spec 11.1.1 + | VarRef a (Id a) -- ^ @foo@, spec 11.1.2 + | DotRef a (Expression a) (Id a) -- ^ @foo.bar@, spec 11.2.1 + | BracketRef a (Expression a) {- container -} (Expression a) {- key -} + -- ^ @foo[bar@, spec 11.2.1 + | NewExpr a (Expression a) {- constructor -} [Expression a] + -- ^ @new foo(bar)@, spec 11.2.2 + | PrefixExpr a PrefixOp (Expression a) + -- ^ @\@e@, spec 11.4 (excluding 11.4.4, 111.4.5) + | UnaryAssignExpr a UnaryAssignOp (LValue a) + -- ^ @++x@, @x--@ etc., spec 11.3, 11.4.4, 11.4.5 + | InfixExpr a InfixOp (Expression a) (Expression a) + -- ^ @e1\@e2@, spec 11.5, 11.6, 11.7, 11.8, 11.9, 11.10, 11.11 + | CondExpr a (Expression a) (Expression a) (Expression a) + -- ^ @e1 ? e2 : e3@, spec 11.12 + | AssignExpr a AssignOp (LValue a) (Expression a) + -- ^ @e1 \@=e2@, spec 11.13 + | ParenExpr a (Expression a) -- ^ @(e)@, spec 11.1.6 + | ListExpr a [Expression a] -- ^ @e1, e2@, spec 11.14 + | CallExpr a (Expression a) [Expression a] -- ^ @f(x,y,z)@, spec 11.2.3 + --funcexprs are optionally named + | FuncExpr a (Maybe (Id a)) [Id a] (Statement a) + -- ^ @function f (x,y,z) {...}@, spec 11.2.5, 13 + deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable) + +-- | Case clauses, spec 12.11 +data CaseClause a = CaseClause a (Expression a) [Statement a] + -- ^ @case e: stmts;@ + | CaseDefault a [Statement a] + -- ^ @default: stmts;@ + deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable) + +-- | Catch clause, spec 12.14 +data CatchClause a = CatchClause a (Id a) (Statement a) + -- ^ @catch (x) {...}@ + deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable) + +-- | A variable declaration, spec 12.2 +data VarDecl a = VarDecl a (Id a) (Maybe (Expression a)) + -- ^ @var x = e;@ + deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable) + +-- | for initializer, spec 12.6 +data ForInit a = NoInit -- ^ empty + | VarInit [VarDecl a] -- ^ @var x, y=42@ + | ExprInit (Expression a) -- ^ @expr@ + deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable) + +-- | for..in initializer, spec 12.6 +data ForInInit a = ForInVar (Id a) -- ^ @var x@ + | ForInLVal (LValue a) -- ^ @foo.baz@, @foo[bar]@, @z@ + deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable) + +-- | Statements, spec 12. +data Statement a + = BlockStmt a [Statement a] -- ^ @{stmts}@, spec 12.1 + | EmptyStmt a -- ^ @;@, spec 12.3 + | ExprStmt a (Expression a) -- ^ @expr;@, spec 12.4 + | IfStmt a (Expression a) (Statement a) (Statement a) + -- ^ @if (e) stmt@, spec 12.5 + | IfSingleStmt a (Expression a) (Statement a) + -- ^ @if (e) stmt1 else stmt2@, spec 12.5 + | SwitchStmt a (Expression a) [CaseClause a] + -- ^ @switch (e) clauses@, spec 12.11 + | WhileStmt a (Expression a) (Statement a) + -- ^ @while (e) do stmt@, spec 12.6 + | DoWhileStmt a (Statement a) (Expression a) + -- ^ @do stmt while (e);@, spec 12.6 + | BreakStmt a (Maybe (Id a)) -- ^ @break lab;@, spec 12.8 + | ContinueStmt a (Maybe (Id a)) -- ^ @continue lab;@, spec 12.7 + | LabelledStmt a (Id a) (Statement a) -- ^ @lab: stmt@, spec 12.12 + | ForInStmt a (ForInInit a) (Expression a) (Statement a) + -- ^ @for (x in o) stmt@, spec 12.6 + | ForStmt a (ForInit a) + (Maybe (Expression a)) -- test + (Maybe (Expression a)) -- increment + (Statement a) -- body + -- ^ @ForStmt a init test increment body@, @for (init; test, + -- increment) body@, spec 12.6 + | TryStmt a (Statement a) {-body-} (Maybe (CatchClause a)) + (Maybe (Statement a)) {-finally-} + -- ^ @try stmt catch(x) stmt finally stmt@, spec 12.14 + | ThrowStmt a (Expression a) + -- ^ @throw expr;@, spec 12.13 + | ReturnStmt a (Maybe (Expression a)) + -- ^ @return expr;@, spec 12.9 + | WithStmt a (Expression a) (Statement a) + -- ^ @with (o) stmt@, spec 12.10 + | VarDeclStmt a [VarDecl a] + -- ^ @var x, y=42;@, spec 12.2 + | FunctionStmt a (Id a) {-name-} [Id a] {-args-} (Statement a) {-body-} + -- ^ @function f(x, y, z) {...}@, spec 13 + deriving (Show,Data,Typeable,Eq,Ord,Functor,Foldable,Traversable) + +-- | Returns 'True' if the statement is an /IterationStatement/ +-- according to spec 12.6. +isIterationStmt :: Statement a -> Bool +isIterationStmt s = case s of + WhileStmt {} -> True + DoWhileStmt {} -> True + ForStmt {} -> True + ForInStmt {} -> True + _ -> False + diff --git a/src/Language/ECMAScript3/Syntax/Annotations.hs b/src/Language/ECMAScript3/Syntax/Annotations.hs new file mode 100644 index 0000000..a467319 --- /dev/null +++ b/src/Language/ECMAScript3/Syntax/Annotations.hs @@ -0,0 +1,115 @@ +-- | A few helpers to work with the AST annotations +module Language.ECMAScript3.Syntax.Annotations where + +import Language.ECMAScript3.Syntax +import Data.Traversable +import Control.Applicative +import Control.Arrow +import Control.Monad.State hiding (mapM) +import Prelude hiding (mapM) + +-- | Removes annotations from a tree +removeAnnotations :: Traversable t => t a -> t () +removeAnnotations = reannotate (const ()) + +-- | Changes all the labels in the tree to another one, given by a +-- function. +reannotate :: Traversable t => (a -> b) -> t a -> t b +reannotate f tree = traverse (pure . f) tree () + +-- | add an extra field to the AST labels (the label would look like @ +-- (a, b) @) +addExtraAnnotationField :: Traversable t => b -> t a -> t (a, b) +addExtraAnnotationField def t = traverse (\z -> pure (z, def)) t () + +-- | remove an extra field +removeExtraAnnotationField :: Traversable t => t (a, b) -> t a +removeExtraAnnotationField t = traverse (pure . fst) t () + + +-- | Assigns unique numeric (Int) ids to each node in the AST. Returns +-- a pair: the tree annotated with UID's and the last ID that was +-- assigned. +assignUniqueIds :: Traversable t => Int -- ^ starting id + -> t a -- ^ tree root + -> (t (a, Int), Int) +assignUniqueIds first tree = + (returnA *** \i -> i-1) $ runState (mapM f tree) first + where f :: a -> State Int (a, Int) + f a = do i <- get + put (i+1) + return (a, i) + +class HasAnnotation a where + -- | Returns the annotation of the root of the tree + getAnnotation :: a b -> b + +instance HasAnnotation Expression where + getAnnotation e = case e of + (StringLit a s) -> a + (RegexpLit a s g ci) -> a + (NumLit a d) -> a + (IntLit a i) -> a + (BoolLit a b) -> a + (NullLit a) -> a + (ArrayLit a exps) -> a + (ObjectLit a props) -> a + (ThisRef a) -> a + (VarRef a id) -> a + (DotRef a exp id) -> a + (BracketRef a container key) -> a + (NewExpr a ctor params) -> a + (PrefixExpr a op e) -> a + (UnaryAssignExpr a op lv) -> a + (InfixExpr a op e1 e2) -> a + (CondExpr a g et ef) -> a + (AssignExpr a op lv e) -> a + (ParenExpr a e) -> a + (ListExpr a es) -> a + (CallExpr a fn params) -> a + (FuncExpr a mid args s) -> a + +instance HasAnnotation Statement where + getAnnotation s = case s of + BlockStmt a _ -> a + EmptyStmt a -> a + ExprStmt a _ -> a + IfStmt a _ _ _ -> a + IfSingleStmt a _ _ -> a + SwitchStmt a _ _ -> a + WhileStmt a _ _ -> a + DoWhileStmt a _ _ -> a + BreakStmt a _ -> a + ContinueStmt a _ -> a + LabelledStmt a _ _ -> a + ForInStmt a _ _ _ -> a + ForStmt a _ _ _ _ -> a + TryStmt a _ _ _ -> a + ThrowStmt a _ -> a + ReturnStmt a _ -> a + WithStmt a _ _ -> a + VarDeclStmt a _ -> a + FunctionStmt a _ _ _ -> a + +instance HasAnnotation LValue where + getAnnotation lv = case lv of + LVar a _ -> a + LDot a _ _ -> a + LBracket a _ _ -> a + +instance HasAnnotation VarDecl where + getAnnotation (VarDecl a _ _) = a + +instance HasAnnotation Prop where + getAnnotation p = case p of + PropId a _ -> a + PropString a _ -> a + PropNum a _ -> a + +instance HasAnnotation CaseClause where + getAnnotation c = case c of + CaseClause a _ _ -> a + CaseDefault a _ -> a + +instance HasAnnotation CatchClause where + getAnnotation (CatchClause a _ _) = a diff --git a/src/UnitTest.hs b/src/UnitTest.hs new file mode 100644 index 0000000..27c34c8 --- /dev/null +++ b/src/UnitTest.hs @@ -0,0 +1,31 @@ +module Main where + +import Test.HUnit +import System.Exit +import System.Directory +import qualified System.FilePath as FilePath +import Language.ECMAScript3.Parser +import Language.ECMAScript3.PrettyPrint +import Language.ECMAScript3.Syntax + +testDir = "tests/parse-pretty" + +parsePrettyTest filename = TestLabel filename $ TestCase $ do + js <- parseJavaScriptFromFile filename + let str = renderStatements js + case parseScriptFromString "" str of + Left err -> assertFailure (show err) + Right (Script _ js') -> do + let str' = renderStatements js' + assertBool "pretty-printed code should re-parse" (str == str') + +main = do + allFiles <- getDirectoryContents testDir + let files = map (testDir `FilePath.combine`) $ + filter (\x -> FilePath.takeExtension x == ".js") allFiles + let parsePretty = TestLabel "parser - printer composition" + (TestList (map parsePrettyTest files)) + results <- runTestTT parsePretty + if errors results > 0 || failures results > 0 + then exitFailure + else putStrLn "All tests passed." |