diff options
author | AndreyChudnov <> | 2014-04-01 20:19:00 (GMT) |
---|---|---|
committer | hdiff <hdiff@hdiff.luite.com> | 2014-04-01 20:19:00 (GMT) |
commit | c1a948cb052ba208ef9d94f6e92f94bced4fe141 (patch) | |
tree | cbde12db54f89c61176fd6f939f36c34442fb4ae | |
parent | f82d352677ef3e45fd2f6b8abe5d17fbbe6bae83 (diff) |
version 0.16.10.16.1
-rw-r--r-- | CHANGELOG | 6 | ||||
-rw-r--r-- | language-ecmascript.cabal | 4 | ||||
-rw-r--r-- | src/Language/ECMAScript3/PrettyPrint.hs | 41 | ||||
-rw-r--r-- | src/Language/ECMAScript3/Syntax.hs | 35 | ||||
-rw-r--r-- | src/Language/ECMAScript3/Syntax/Arbitrary.hs | 13 |
5 files changed, 57 insertions, 42 deletions
@@ -1,5 +1,11 @@ Version change log. +=0.16.1= +Removed unicode-escaping of illegal identifier characters in the +pretty-printer. Adjusted the isValid predicate to check for illegal +characters identifiers. Adjusted the Arbitrary instance to only produce +valid identifier names. + =0.16= Rewritten the Arbitrary instances using 'testing-feat'. Adjusted the pretty-printer to escape invalid characters in identifier names diff --git a/language-ecmascript.cabal b/language-ecmascript.cabal index 975995b..75f9986 100644 --- a/language-ecmascript.cabal +++ b/language-ecmascript.cabal @@ -1,5 +1,5 @@ Name: language-ecmascript -Version: 0.16 +Version: 0.16.1 Cabal-Version: >= 1.10 Copyright: (c) 2007-2012 Brown University, (c) 2008-2010 Claudiu Saftoiu, (c) 2012-2014 Stevens Institute of Technology @@ -29,7 +29,7 @@ Source-repository head Source-repository this type: git location: git://github.com/jswebtools/language-ecmascript.git - tag: 0.16 + tag: 0.16.1 Library Hs-Source-Dirs: diff --git a/src/Language/ECMAScript3/PrettyPrint.hs b/src/Language/ECMAScript3/PrettyPrint.hs index fa0a309..8f17857 100644 --- a/src/Language/ECMAScript3/PrettyPrint.hs +++ b/src/Language/ECMAScript3/PrettyPrint.hs @@ -249,43 +249,12 @@ ppVarDecl hasIn vd = case vd of VarDecl _ id (Just e) -> prettyPrint id <+> equals <+> ppAssignmentExpression hasIn e --- | Pretty prints a string assuming it's used as an --- identifier. Escapes characters that are disallowed by the grammar --- with unicode escape sequences, so that the resulting program can be --- parsed later. Note that it does not (and could not) do anything --- about identifier names that are reserved words as well as empty --- identifier names. +-- | Pretty prints a string assuming it's used as an identifier. Note +-- that per Spec 7.6 unicode escape sequences representing illegal +-- identifier characters are not allowed as well, so we do not +-- unicode-escape illegal characters in identifiers anymore. printIdentifierName :: String -> Doc -printIdentifierName = text . adapt - where adapt [] = [] - adapt (c:cs) = (adaptStart c) ++ (concatMap adaptRest cs) - adaptStart c = if validIdStart c then [c] - else unicodeEscape c - adaptRest c = if validIdPart c then [c] - else unicodeEscape c - validIdStart c = unicodeLetter c - || c == '$' - || c == '_' - validIdPart c = validIdStart c - || validIdPartUnicode c - unicodeLetter c = case generalCategory c of - UppercaseLetter -> True - LowercaseLetter -> True - TitlecaseLetter -> True - ModifierLetter -> True - OtherLetter -> True - LetterNumber -> True - _ -> False - validIdPartUnicode c = case generalCategory c of - NonSpacingMark -> True - SpacingCombiningMark -> True - DecimalNumber -> True - ConnectorPunctuation -> True - _ -> False - -- escapes a given character converting it into a 16-bit - -- unicode escape sequence - unicodeEscape :: Char -> String - unicodeEscape c = "\\u" ++ showHex (ord c) "" +printIdentifierName = text -- Based on: -- http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide:Literals diff --git a/src/Language/ECMAScript3/Syntax.hs b/src/Language/ECMAScript3/Syntax.hs index 7ed1448..8b17d12 100644 --- a/src/Language/ECMAScript3/Syntax.hs +++ b/src/Language/ECMAScript3/Syntax.hs @@ -27,6 +27,9 @@ module Language.ECMAScript3.Syntax (JavaScript(..) ,isValid ,isValidIdentifier ,isValidIdentifierName + ,isReservedWord + ,isValidIdStart + ,isValidIdPart ,EnclosingStatement(..) ,pushLabel ,pushEnclosing @@ -358,8 +361,15 @@ bracketState f m = do original <- get isValidIdentifier :: Id a -> Bool isValidIdentifier (Id _ name) = isValidIdentifierName name +-- | Checks if the 'String' represents a valid identifier name isValidIdentifierName :: String -> Bool -isValidIdentifierName name = (not $ null name) && name `notElem` reservedWords +isValidIdentifierName name = case name of + "" -> False + (c:cs) -> isValidIdStart c && and (map isValidIdPart cs) && (not $ isReservedWord name) + +-- | Checks if a string is in the list of reserved ECMAScript words +isReservedWord :: String -> Bool +isReservedWord = (`elem` reservedWords) where reservedWords = keyword ++ futureReservedWord ++ nullKw ++ boolLit keyword = ["break", "case", "catch", "continue", "default", "delete" ,"do", "else", "finally", "for", "function", "if", "in" @@ -373,6 +383,29 @@ isValidIdentifierName name = (not $ null name) && name `notElem` reservedWords ,"synchronized", "throws", "transient", "volatile"] nullKw = ["null"] boolLit = ["true", "false"] + +-- | Checks if a character is valid at the start of an identifier +isValidIdStart :: Char -> Bool +isValidIdStart c = unicodeLetter c || c == '$' || c == '_' + where unicodeLetter c = case generalCategory c of + UppercaseLetter -> True + LowercaseLetter -> True + TitlecaseLetter -> True + ModifierLetter -> True + OtherLetter -> True + LetterNumber -> True + _ -> False + +-- | Checks if a character is valid in an identifier part +isValidIdPart :: Char -> Bool +isValidIdPart c = isValidIdStart c || isValidIdPartUnicode c + where isValidIdPartUnicode c = case generalCategory c of + NonSpacingMark -> True + SpacingCombiningMark -> True + DecimalNumber -> True + ConnectorPunctuation -> True + _ -> False + data EnclosingStatement = EnclosingIter [Label] -- ^ The enclosing statement is an iteration statement diff --git a/src/Language/ECMAScript3/Syntax/Arbitrary.hs b/src/Language/ECMAScript3/Syntax/Arbitrary.hs index 8354897..a05e348 100644 --- a/src/Language/ECMAScript3/Syntax/Arbitrary.hs +++ b/src/Language/ECMAScript3/Syntax/Arbitrary.hs @@ -267,10 +267,17 @@ identifierFixup :: Id a -> Id a identifierFixup (Id a n) = Id a $ identifierNameFixup n -- | Renames empty identifiers, as well as identifiers that are --- keywords or future reserved words by prepending a '_' to them +-- keywords or future reserved words by prepending a '_' to them. Also +-- substitutes illegal characters with a "_" as well. identifierNameFixup :: String -> String -identifierNameFixup name = if isValidIdentifierName name then name - else '_':name +identifierNameFixup s = + let fixStart c = if isValidIdStart c then c else '_' + fixPart c = if isValidIdPart c then c else '_' + in case s of + "" -> "_" + (start:part) -> let fixed_id = (fixStart start):(map fixPart part) + in if isReservedWord fixed_id then '_':fixed_id + else fixed_id -- | Fixes an incorrect nesting of break/continue, making the program -- abide by the ECMAScript spec (page 92): any continue without a |