summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreyChudnov <>2014-04-01 20:19:00 (GMT)
committerhdiff <hdiff@hdiff.luite.com>2014-04-01 20:19:00 (GMT)
commitc1a948cb052ba208ef9d94f6e92f94bced4fe141 (patch)
treecbde12db54f89c61176fd6f939f36c34442fb4ae
parentf82d352677ef3e45fd2f6b8abe5d17fbbe6bae83 (diff)
version 0.16.10.16.1
-rw-r--r--CHANGELOG6
-rw-r--r--language-ecmascript.cabal4
-rw-r--r--src/Language/ECMAScript3/PrettyPrint.hs41
-rw-r--r--src/Language/ECMAScript3/Syntax.hs35
-rw-r--r--src/Language/ECMAScript3/Syntax/Arbitrary.hs13
5 files changed, 57 insertions, 42 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 579005e..e059d71 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,5 +1,11 @@
Version change log.
+=0.16.1=
+Removed unicode-escaping of illegal identifier characters in the
+pretty-printer. Adjusted the isValid predicate to check for illegal
+characters identifiers. Adjusted the Arbitrary instance to only produce
+valid identifier names.
+
=0.16=
Rewritten the Arbitrary instances using 'testing-feat'. Adjusted the
pretty-printer to escape invalid characters in identifier names
diff --git a/language-ecmascript.cabal b/language-ecmascript.cabal
index 975995b..75f9986 100644
--- a/language-ecmascript.cabal
+++ b/language-ecmascript.cabal
@@ -1,5 +1,5 @@
Name: language-ecmascript
-Version: 0.16
+Version: 0.16.1
Cabal-Version: >= 1.10
Copyright: (c) 2007-2012 Brown University, (c) 2008-2010 Claudiu Saftoiu,
(c) 2012-2014 Stevens Institute of Technology
@@ -29,7 +29,7 @@ Source-repository head
Source-repository this
type: git
location: git://github.com/jswebtools/language-ecmascript.git
- tag: 0.16
+ tag: 0.16.1
Library
Hs-Source-Dirs:
diff --git a/src/Language/ECMAScript3/PrettyPrint.hs b/src/Language/ECMAScript3/PrettyPrint.hs
index fa0a309..8f17857 100644
--- a/src/Language/ECMAScript3/PrettyPrint.hs
+++ b/src/Language/ECMAScript3/PrettyPrint.hs
@@ -249,43 +249,12 @@ ppVarDecl hasIn vd = case vd of
VarDecl _ id (Just e) -> prettyPrint id <+> equals
<+> ppAssignmentExpression hasIn e
--- | Pretty prints a string assuming it's used as an
--- identifier. Escapes characters that are disallowed by the grammar
--- with unicode escape sequences, so that the resulting program can be
--- parsed later. Note that it does not (and could not) do anything
--- about identifier names that are reserved words as well as empty
--- identifier names.
+-- | Pretty prints a string assuming it's used as an identifier. Note
+-- that per Spec 7.6 unicode escape sequences representing illegal
+-- identifier characters are not allowed as well, so we do not
+-- unicode-escape illegal characters in identifiers anymore.
printIdentifierName :: String -> Doc
-printIdentifierName = text . adapt
- where adapt [] = []
- adapt (c:cs) = (adaptStart c) ++ (concatMap adaptRest cs)
- adaptStart c = if validIdStart c then [c]
- else unicodeEscape c
- adaptRest c = if validIdPart c then [c]
- else unicodeEscape c
- validIdStart c = unicodeLetter c
- || c == '$'
- || c == '_'
- validIdPart c = validIdStart c
- || validIdPartUnicode c
- unicodeLetter c = case generalCategory c of
- UppercaseLetter -> True
- LowercaseLetter -> True
- TitlecaseLetter -> True
- ModifierLetter -> True
- OtherLetter -> True
- LetterNumber -> True
- _ -> False
- validIdPartUnicode c = case generalCategory c of
- NonSpacingMark -> True
- SpacingCombiningMark -> True
- DecimalNumber -> True
- ConnectorPunctuation -> True
- _ -> False
- -- escapes a given character converting it into a 16-bit
- -- unicode escape sequence
- unicodeEscape :: Char -> String
- unicodeEscape c = "\\u" ++ showHex (ord c) ""
+printIdentifierName = text
-- Based on:
-- http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide:Literals
diff --git a/src/Language/ECMAScript3/Syntax.hs b/src/Language/ECMAScript3/Syntax.hs
index 7ed1448..8b17d12 100644
--- a/src/Language/ECMAScript3/Syntax.hs
+++ b/src/Language/ECMAScript3/Syntax.hs
@@ -27,6 +27,9 @@ module Language.ECMAScript3.Syntax (JavaScript(..)
,isValid
,isValidIdentifier
,isValidIdentifierName
+ ,isReservedWord
+ ,isValidIdStart
+ ,isValidIdPart
,EnclosingStatement(..)
,pushLabel
,pushEnclosing
@@ -358,8 +361,15 @@ bracketState f m = do original <- get
isValidIdentifier :: Id a -> Bool
isValidIdentifier (Id _ name) = isValidIdentifierName name
+-- | Checks if the 'String' represents a valid identifier name
isValidIdentifierName :: String -> Bool
-isValidIdentifierName name = (not $ null name) && name `notElem` reservedWords
+isValidIdentifierName name = case name of
+ "" -> False
+ (c:cs) -> isValidIdStart c && and (map isValidIdPart cs) && (not $ isReservedWord name)
+
+-- | Checks if a string is in the list of reserved ECMAScript words
+isReservedWord :: String -> Bool
+isReservedWord = (`elem` reservedWords)
where reservedWords = keyword ++ futureReservedWord ++ nullKw ++ boolLit
keyword = ["break", "case", "catch", "continue", "default", "delete"
,"do", "else", "finally", "for", "function", "if", "in"
@@ -373,6 +383,29 @@ isValidIdentifierName name = (not $ null name) && name `notElem` reservedWords
,"synchronized", "throws", "transient", "volatile"]
nullKw = ["null"]
boolLit = ["true", "false"]
+
+-- | Checks if a character is valid at the start of an identifier
+isValidIdStart :: Char -> Bool
+isValidIdStart c = unicodeLetter c || c == '$' || c == '_'
+ where unicodeLetter c = case generalCategory c of
+ UppercaseLetter -> True
+ LowercaseLetter -> True
+ TitlecaseLetter -> True
+ ModifierLetter -> True
+ OtherLetter -> True
+ LetterNumber -> True
+ _ -> False
+
+-- | Checks if a character is valid in an identifier part
+isValidIdPart :: Char -> Bool
+isValidIdPart c = isValidIdStart c || isValidIdPartUnicode c
+ where isValidIdPartUnicode c = case generalCategory c of
+ NonSpacingMark -> True
+ SpacingCombiningMark -> True
+ DecimalNumber -> True
+ ConnectorPunctuation -> True
+ _ -> False
+
data EnclosingStatement = EnclosingIter [Label]
-- ^ The enclosing statement is an iteration statement
diff --git a/src/Language/ECMAScript3/Syntax/Arbitrary.hs b/src/Language/ECMAScript3/Syntax/Arbitrary.hs
index 8354897..a05e348 100644
--- a/src/Language/ECMAScript3/Syntax/Arbitrary.hs
+++ b/src/Language/ECMAScript3/Syntax/Arbitrary.hs
@@ -267,10 +267,17 @@ identifierFixup :: Id a -> Id a
identifierFixup (Id a n) = Id a $ identifierNameFixup n
-- | Renames empty identifiers, as well as identifiers that are
--- keywords or future reserved words by prepending a '_' to them
+-- keywords or future reserved words by prepending a '_' to them. Also
+-- substitutes illegal characters with a "_" as well.
identifierNameFixup :: String -> String
-identifierNameFixup name = if isValidIdentifierName name then name
- else '_':name
+identifierNameFixup s =
+ let fixStart c = if isValidIdStart c then c else '_'
+ fixPart c = if isValidIdPart c then c else '_'
+ in case s of
+ "" -> "_"
+ (start:part) -> let fixed_id = (fixStart start):(map fixPart part)
+ in if isReservedWord fixed_id then '_':fixed_id
+ else fixed_id
-- | Fixes an incorrect nesting of break/continue, making the program
-- abide by the ECMAScript spec (page 92): any continue without a