summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMalcolmWallace <>2013-05-15 11:58:33 (GMT)
committerhdiff <hdiff@hdiff.luite.com>2013-05-15 11:58:33 (GMT)
commit7153cd25741d547aca13208e7d36acf71434ba11 (patch)
treeb2754f84c8eeb1f7916e169ef4d01aa29094df9e
parent0b6b1d5c12748cad3c2a12280c71d6e90767249b (diff)
version 1.241.24
-rwxr-xr-xCOPYRIGHT2
-rwxr-xr-xHaXml.cabal22
-rwxr-xr-xsrc/Text/XML/HaXml/Html/Parse.hs26
-rwxr-xr-xsrc/Text/XML/HaXml/Html/ParseLazy.hs28
-rwxr-xr-xsrc/Text/XML/HaXml/Lex.hs4
-rwxr-xr-xsrc/Text/XML/HaXml/Parse.hs85
-rwxr-xr-xsrc/Text/XML/HaXml/ParseLazy.hs86
-rwxr-xr-xsrc/Text/XML/HaXml/Types.hs71
8 files changed, 157 insertions, 167 deletions
diff --git a/COPYRIGHT b/COPYRIGHT
index 67d3fc7..3d4fd72 100755
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -1,6 +1,6 @@
The HaXml library and tools were written by and are copyright to
(c) copyright 1998-1999 Malcolm Wallace and Colin Runciman
- (c) copyright 2000-2012 Malcolm Wallace
+ (c) copyright 2000-2013 Malcolm Wallace
The HaXml library is licensed under the terms of the GNU Lesser
General Public Licence (LGPL), which can be found in the file called
diff --git a/HaXml.cabal b/HaXml.cabal
index 8e0e19a..4b46428 100755
--- a/HaXml.cabal
+++ b/HaXml.cabal
@@ -1,5 +1,5 @@
name: HaXml
-version: 1.23.3
+version: 1.24
license: LGPL
license-file: COPYRIGHT
author: Malcolm Wallace <Malcolm.Wallace@me.com>
@@ -66,7 +66,7 @@ library
exposed-modules:
Text.XML.HaXml.Schema.Schema
hs-source-dirs: src
- build-depends: polyparse >= 1.5, filepath
+ build-depends: polyparse >= 1.9, filepath
if flag(splitBase)
build-depends: base >= 3 && < 6, pretty, random, containers
else
@@ -76,63 +76,63 @@ library
else
build-depends: base < 2 || >= 3, bytestring
extensions: CPP, ExistentialQuantification
- cpp-options: -DMAJOR=1 -DMINOR=23
+ cpp-options: -DMAJOR=1 -DMINOR=24
nhc98-options: -K10M
Executable Canonicalise
GHC-Options: -Wall
Extensions: CPP
Hs-Source-Dirs: src/tools, src
- cpp-options: -DMAJOR=1 -DMINOR=23
+ cpp-options: -DMAJOR=1 -DMINOR=24
Main-Is: Canonicalise.hs
Executable CanonicaliseLazy
GHC-Options: -Wall
Extensions: CPP
Hs-Source-Dirs: src/tools, src
- cpp-options: -DMAJOR=1 -DMINOR=23
+ cpp-options: -DMAJOR=1 -DMINOR=24
Main-Is: CanonicaliseLazy.hs
Executable Xtract
GHC-Options: -Wall
Extensions: CPP
Hs-Source-Dirs: src/tools, src
- cpp-options: -DMAJOR=1 -DMINOR=23
+ cpp-options: -DMAJOR=1 -DMINOR=24
Main-Is: Xtract.hs
Executable Validate
GHC-Options: -Wall
Extensions: CPP
Hs-Source-Dirs: src/tools, src
- cpp-options: -DMAJOR=1 -DMINOR=23
+ cpp-options: -DMAJOR=1 -DMINOR=24
Main-Is: Validate.hs
Executable MkOneOf
GHC-Options: -Wall
Extensions: CPP
Hs-Source-Dirs: src/tools, src
- cpp-options: -DMAJOR=1 -DMINOR=23
+ cpp-options: -DMAJOR=1 -DMINOR=24
Main-Is: MkOneOf.hs
Executable DtdToHaskell
GHC-Options: -Wall
Extensions: CPP
Hs-Source-Dirs: src/tools, src
- cpp-options: -DMAJOR=1 -DMINOR=23
+ cpp-options: -DMAJOR=1 -DMINOR=24
Main-Is: DtdToHaskell.hs
Executable XsdToHaskell
GHC-Options: -Wall
Extensions: CPP
Hs-Source-Dirs: src/tools, src
- cpp-options: -DMAJOR=1 -DMINOR=23
+ cpp-options: -DMAJOR=1 -DMINOR=24
Main-Is: XsdToHaskell.hs
Executable FpMLToHaskell
GHC-Options: -Wall
Extensions: CPP
Hs-Source-Dirs: src/tools, src
- cpp-options: -DMAJOR=1 -DMINOR=23
+ cpp-options: -DMAJOR=1 -DMINOR=24
Main-Is: FpMLToHaskell.hs
build-depends: directory
diff --git a/src/Text/XML/HaXml/Html/Parse.hs b/src/Text/XML/HaXml/Html/Parse.hs
index 7ad9ae7..a7f0bbf 100755
--- a/src/Text/XML/HaXml/Html/Parse.hs
+++ b/src/Text/XML/HaXml/Html/Parse.hs
@@ -223,7 +223,7 @@ processinginstruction = do
cdsect :: HParser CDSect
cdsect = do
tok TokSectionOpen
- bracket (tok (TokSection CDATAx)) (tok TokSectionClose) chardata
+ bracket (tok (TokSection CDATAx)) (commit $ tok TokSectionClose) chardata
prolog :: HParser Prolog
prolog = do
@@ -252,7 +252,7 @@ versioninfo :: HParser VersionInfo
versioninfo = do
(word "version" `onFail` word "VERSION")
tok TokEqual
- bracket (tok TokQuote) (tok TokQuote) freetext
+ bracket (tok TokQuote) (commit $ tok TokQuote) freetext
misc :: HParser Misc
misc =
@@ -271,7 +271,7 @@ doctypedecl = do
commit $ do
n <- qname
eid <- maybe externalid
--- es <- maybe (bracket (tok TokSqOpen) (tok TokSqClose)) (many markupdecl)
+-- es <- maybe (bracket (tok TokSqOpen) (commit $ tok TokSqClose)) (many markupdecl)
tok TokAnyClose `onFail` failP "missing > in DOCTYPE decl"
-- return (DTD n eid (case es of { Nothing -> []; Just e -> e }))
return (DTD n eid [])
@@ -302,7 +302,7 @@ sddecl = do
(word "standalone" `onFail` word "STANDALONE")
commit $ do
tok TokEqual `onFail` failP "missing = in 'standalone' decl"
- bracket (tok TokQuote) (tok TokQuote)
+ bracket (tok TokQuote) (commit $ tok TokQuote)
( (word "yes" >> return True) `onFail`
(word "no" >> return False) `onFail`
failP "'standalone' decl requires 'yes' or 'no' value" )
@@ -334,7 +334,7 @@ element ctx =
return ([], Elem (N e) avs [])) `onFail`
-- ( do tok TokAnyClose -- sequence <tag></tag> (**not HTML?**)
-- debug (e++"[+")
- -- n <- bracket (tok TokEndOpen) (tok TokAnyClose) qname
+ -- n <- bracket (tok TokEndOpen) (commit $ tok TokAnyClose) qname
-- debug "]"
-- if e == (map toLower n :: Name)
-- then return ([], Elem e avs [])
@@ -349,7 +349,7 @@ element ctx =
( do tok TokAnyClose `onFail` failP "missing > or /> in element tag"
debug (e++"[")
-- zz <- many (content e)
- -- n <- bracket (tok TokEndOpen) (tok TokAnyClose) qname
+ -- n <- bracket (tok TokEndOpen) (commit $ tok TokAnyClose) qname
zz <- manyFinally (content e)
(tok TokEndOpen)
(N n) <- qname
@@ -575,12 +575,12 @@ reference =
entityref :: HParser EntityRef
entityref = do
- n <- bracket (tok TokAmp) (tok TokSemi) name
+ n <- bracket (tok TokAmp) (commit $ tok TokSemi) name
return n
charref :: HParser CharRef
charref = do
- bracket (tok TokAmp) (tok TokSemi) (freetext >>= readCharVal)
+ bracket (tok TokAmp) (commit $ tok TokSemi) (freetext >>= readCharVal)
where
readCharVal ('#':'x':i) = return . fst . head . readHex $ i
readCharVal ('#':i) = return . fst . head . readDec $ i
@@ -668,7 +668,7 @@ encodingdecl :: HParser EncodingDecl
encodingdecl = do
(word "encoding" `onFail` word "ENCODING")
tok TokEqual `onFail` failBadP "expected = in 'encoding' decl"
- f <- bracket (tok TokQuote) (tok TokQuote) freetext
+ f <- bracket (tok TokQuote) (commit $ tok TokQuote) freetext
return (EncodingDecl f)
--notationdecl :: HParser NotationDecl
@@ -688,7 +688,7 @@ encodingdecl = do
--entityvalue :: HParser EntityValue
--entityvalue = do
--- evs <- bracket (tok TokQuote) (tok TokQuote) (many ev)
+-- evs <- bracket (tok TokQuote) (commit $ tok TokQuote) (many ev)
-- return (EntityValue evs)
--ev :: HParser EV
@@ -699,7 +699,7 @@ encodingdecl = do
attvalue :: HParser AttValue
attvalue =
- ( do avs <- bracket (tok TokQuote) (tok TokQuote)
+ ( do avs <- bracket (tok TokQuote) (commit $ tok TokQuote)
(many (either freetext reference))
return (AttValue avs) ) `onFail`
( do v <- nmtoken
@@ -714,12 +714,12 @@ attvalue =
systemliteral :: HParser SystemLiteral
systemliteral = do
- s <- bracket (tok TokQuote) (tok TokQuote) freetext
+ s <- bracket (tok TokQuote) (commit $ tok TokQuote) freetext
return (SystemLiteral s) -- note: need to fold &...; escapes
pubidliteral :: HParser PubidLiteral
pubidliteral = do
- s <- bracket (tok TokQuote) (tok TokQuote) freetext
+ s <- bracket (tok TokQuote) (commit $ tok TokQuote) freetext
return (PubidLiteral s) -- note: need to fold &...; escapes
chardata :: HParser CharData
diff --git a/src/Text/XML/HaXml/Html/ParseLazy.hs b/src/Text/XML/HaXml/Html/ParseLazy.hs
index e22c381..927a750 100755
--- a/src/Text/XML/HaXml/Html/ParseLazy.hs
+++ b/src/Text/XML/HaXml/Html/ParseLazy.hs
@@ -222,7 +222,7 @@ processinginstruction = do
cdsect :: HParser CDSect
cdsect = do
tok TokSectionOpen
- bracket (tok (TokSection CDATAx)) (tok TokSectionClose) chardata
+ bracket (tok (TokSection CDATAx)) (commit $ tok TokSectionClose) chardata
prolog :: HParser Prolog
prolog = do
@@ -251,7 +251,7 @@ versioninfo :: HParser VersionInfo
versioninfo = do
(word "version" `onFail` word "VERSION")
tok TokEqual
- bracket (tok TokQuote) (tok TokQuote) freetext
+ bracket (tok TokQuote) (commit $ tok TokQuote) freetext
misc :: HParser Misc
misc =
@@ -270,7 +270,7 @@ doctypedecl = do
commit $ do
n <- qname
eid <- maybe externalid
--- es <- maybe (bracket (tok TokSqOpen) (tok TokSqClose)) (many markupdecl)
+-- es <- maybe (bracket (tok TokSqOpen) (commit $ tok TokSqClose)) (many markupdecl)
tok TokAnyClose `onFail` failP "missing > in DOCTYPE decl"
-- return (DTD n eid (case es of { Nothing -> []; Just e -> e }))
return (DTD n eid [])
@@ -301,7 +301,7 @@ sddecl = do
(word "standalone" `onFail` word "STANDALONE")
commit $ do
tok TokEqual `onFail` failP "missing = in 'standalone' decl"
- bracket (tok TokQuote) (tok TokQuote)
+ bracket (tok TokQuote) (commit $ tok TokQuote)
( (word "yes" >> return True) `onFail`
(word "no" >> return False) `onFail`
failP "'standalone' decl requires 'yes' or 'no' value" )
@@ -333,7 +333,7 @@ element (N ctx) =
return ([], Elem (N e) avs [])) `onFail`
-- ( do tok TokAnyClose -- sequence <tag></tag> (**not HTML?**)
-- debug (e++"[+")
- -- n <- bracket (tok TokEndOpen) (tok TokAnyClose) qname
+ -- n <- bracket (tok TokEndOpen) (commit $ tok TokAnyClose) qname
-- debug "]"
-- if e == (map toLower n :: Name)
-- then return ([], Elem e avs [])
@@ -514,12 +514,12 @@ attribute = do
--notationtype :: HParser NotationType
--notationtype = do
-- word "NOTATION"
--- bracket (tok TokBraOpen) (tok TokBraClose)
+-- bracket (tok TokBraOpen) (commit $ tok TokBraClose)
-- (name `sepby1` (tok TokPipe))
--
--enumeration :: HParser Enumeration
--enumeration =
--- bracket (tok TokBraOpen) (tok TokBraClose)
+-- bracket (tok TokBraOpen) (commit $ tok TokBraClose)
-- (nmtoken `sepby1` (tok TokPipe))
--
--defaultdecl :: HParser DefaultDecl
@@ -576,12 +576,12 @@ reference =
entityref :: HParser EntityRef
entityref = do
- n <- bracket (tok TokAmp) (tok TokSemi) name
+ n <- bracket (tok TokAmp) (commit $ tok TokSemi) name
return n
charref :: HParser CharRef
charref = do
- bracket (tok TokAmp) (tok TokSemi) (freetext >>= readCharVal)
+ bracket (tok TokAmp) (commit $ tok TokSemi) (freetext >>= readCharVal)
where
readCharVal ('#':'x':i) = return . fst . head . readHex $ i
readCharVal ('#':i) = return . fst . head . readDec $ i
@@ -669,7 +669,7 @@ encodingdecl :: HParser EncodingDecl
encodingdecl = do
(word "encoding" `onFail` word "ENCODING")
tok TokEqual `onFail` failBadP "expected = in 'encoding' decl"
- f <- bracket (tok TokQuote) (tok TokQuote) freetext
+ f <- bracket (tok TokQuote) (commit $ tok TokQuote) freetext
return (EncodingDecl f)
--notationdecl :: HParser NotationDecl
@@ -689,7 +689,7 @@ encodingdecl = do
--entityvalue :: HParser EntityValue
--entityvalue = do
--- evs <- bracket (tok TokQuote) (tok TokQuote) (many ev)
+-- evs <- bracket (tok TokQuote) (commit $ tok TokQuote) (many ev)
-- return (EntityValue evs)
--ev :: HParser EV
@@ -700,7 +700,7 @@ encodingdecl = do
attvalue :: HParser AttValue
attvalue =
- ( do avs <- bracket (tok TokQuote) (tok TokQuote)
+ ( do avs <- bracket (tok TokQuote) (commit $ tok TokQuote)
(many (either freetext reference))
return (AttValue avs) ) `onFail`
( do v <- nmtoken
@@ -715,12 +715,12 @@ attvalue =
systemliteral :: HParser SystemLiteral
systemliteral = do
- s <- bracket (tok TokQuote) (tok TokQuote) freetext
+ s <- bracket (tok TokQuote) (commit $ tok TokQuote) freetext
return (SystemLiteral s) -- note: need to fold &...; escapes
pubidliteral :: HParser PubidLiteral
pubidliteral = do
- s <- bracket (tok TokQuote) (tok TokQuote) freetext
+ s <- bracket (tok TokQuote) (commit $ tok TokQuote) freetext
return (PubidLiteral s) -- note: need to fold &...; escapes
chardata :: HParser CharData
diff --git a/src/Text/XML/HaXml/Lex.hs b/src/Text/XML/HaXml/Lex.hs
index 6f7bea6..3796920 100755
--- a/src/Text/XML/HaXml/Lex.hs
+++ b/src/Text/XML/HaXml/Lex.hs
@@ -159,6 +159,10 @@ textUntil close tok acc pos p (s:ss) k
| close `prefixes` (s:ss) = emit (TokFreeText (reverse acc)) pos:
emit tok p:
skip (length close-1) (addcol 1 p) ss k
+ | tok==TokSemi && length acc >= 8 -- special case for repairing broken &
+ = emit (TokFreeText "amp") pos:
+ emit tok pos:
+ k (addcol 1 pos) (reverse acc++s:ss)
| isSpace s = textUntil close tok (s:acc) pos (white s p) ss k
| otherwise = textUntil close tok (s:acc) pos (addcol 1 p) ss k
diff --git a/src/Text/XML/HaXml/Parse.hs b/src/Text/XML/HaXml/Parse.hs
index 994c7a7..502e0e7 100755
--- a/src/Text/XML/HaXml/Parse.hs
+++ b/src/Text/XML/HaXml/Parse.hs
@@ -155,13 +155,6 @@ thd3 (_,_,a) = a
---- Auxiliary Parsing Functions ----
--- | Parse a bracketed item, discarding the brackets AND NOT using adjustErrBad
-myBracket :: PolyParse p => p bra -> p ket -> p a -> p a
-myBracket open close p = do
- do { open `adjustErr` ("Missing opening bracket:\n\t"++)
- ; p `discard` (close `adjustErr` ("Missing closing bracket:\n\t"++))
- }
-
-- | XParser is just a specialisation of the PolyState parser.
type XParser a = Parser SymTabs (Posn,TokenT) a
@@ -307,7 +300,7 @@ document = do
-- | Return an XML comment.
comment :: XParser Comment
comment = do
- myBracket (tok TokCommentOpen) (tok TokCommentClose) freetext
+ bracket (tok TokCommentOpen) (tok TokCommentClose) freetext
-- tok TokCommentOpen
-- commit $ do
-- c <- freetext
@@ -327,7 +320,7 @@ processinginstruction = do
cdsect :: XParser CDSect
cdsect = do
tok TokSectionOpen
- bracket (tok (TokSection CDATAx)) (tok TokSectionClose) chardata
+ bracket (tok (TokSection CDATAx)) (commit $ tok TokSectionClose) chardata
prolog :: XParser Prolog
prolog = do
@@ -358,7 +351,7 @@ versioninfo :: XParser VersionInfo
versioninfo = do
(word "version" `onFail` word "VERSION")
tok TokEqual
- bracket (tok TokQuote) (tok TokQuote) freetext
+ bracket (tok TokQuote) (commit $ tok TokQuote) freetext
misc :: XParser Misc
misc =
@@ -374,7 +367,7 @@ doctypedecl = do
commit $ do
n <- qname
eid <- maybe externalid
- es <- maybe (bracket (tok TokSqOpen) (tok TokSqClose)
+ es <- maybe (bracket (tok TokSqOpen) (commit $ tok TokSqClose)
(many (peRef markupdecl)))
blank (tok TokAnyClose) `onFail` failP "missing > in DOCTYPE decl"
return (DTD n eid (case es of { Nothing -> []; Just e -> e }))
@@ -408,7 +401,7 @@ sddecl = do
(word "standalone" `onFail` word "STANDALONE")
commit $ do
tok TokEqual `onFail` failP "missing = in 'standalone' decl"
- bracket (tok TokQuote) (tok TokQuote)
+ bracket (tok TokQuote) (commit $ tok TokQuote)
( (word "yes" >> return True) `onFail`
(word "no" >> return False) `onFail`
failP "'standalone' decl requires 'yes' or 'no' value" )
@@ -425,7 +418,7 @@ element = do
, do tok TokAnyClose
cs <- many content
p <- posn
- m <- bracket (tok TokEndOpen) (tok TokAnyClose) qname
+ m <- bracket (tok TokEndOpen) (commit $ tok TokAnyClose) qname
checkmatch p n m
return (Elem n as cs))
] `adjustErr` (("in element tag "++n++",\n")++)
@@ -445,7 +438,7 @@ element = do
manyFinally content
(do p <- posn
m <- bracket (tok TokEndOpen)
- (tok TokAnyClose) qname
+ (commit $ tok TokAnyClose) qname
checkmatch p n m)
) `adjustErrBad` (("in element tag "++printableName n++",\n")++)
@@ -515,7 +508,7 @@ elementdecl = do
blank (tok TokAnyClose) `onFail` failBadP
("expected > terminating ELEMENT decl"
++"\n element name was "++show (printableName n)
- ++"\n contentspec was "++(\ (ContentSpec p)-> show p) c)
+ ++"\n contentspec was "++(\ (ContentSpec p)-> debugShowCP p) c)
return (ElementDecl n c)
contentspec :: XParser ContentSpec
@@ -530,29 +523,29 @@ contentspec =
choice :: XParser [CP]
choice = do
- myBracket (tok TokBraOpen `debug` "Trying choice")
- (blank (tok TokBraClose `debug` "Succeeded with choice"))
- (peRef cp `sepBy1` blank (tok TokPipe))
+ bracket (tok TokBraOpen `debug` "Trying choice")
+ (blank (tok TokBraClose `debug` "Succeeded with choice"))
+ (peRef cp `sepBy1` blank (tok TokPipe))
sequence :: XParser [CP]
sequence = do
- myBracket (tok TokBraOpen `debug` "Trying sequence")
- (blank (tok TokBraClose `debug` "Succeeded with sequence"))
- (peRef cp `sepBy1` blank (tok TokComma))
+ bracket (tok TokBraOpen `debug` "Trying sequence")
+ (blank (tok TokBraClose `debug` "Succeeded with sequence"))
+ (peRef cp `sepBy1` blank (tok TokComma))
cp :: XParser CP
cp = oneOf [ ( do n <- qname
m <- modifier
let c = TagName n m
- return c `debug` ("ContentSpec: name "++show c))
+ return c `debug` ("ContentSpec: name "++debugShowCP c))
, ( do ss <- sequence
m <- modifier
let c = Seq ss m
- return c `debug` ("ContentSpec: sequence "++show c))
+ return c `debug` ("ContentSpec: sequence "++debugShowCP c))
, ( do cs <- choice
m <- modifier
let c = Choice cs m
- return c `debug` ("ContentSpec: choice "++show c))
+ return c `debug` ("ContentSpec: choice "++debugShowCP c))
] `adjustErr` (++"\nwhen looking for a content particle")
modifier :: XParser Modifier
@@ -563,17 +556,17 @@ modifier = oneOf [ ( tok TokStar >> return Star )
]
-- just for debugging
-instance Show CP where
- show (TagName n m) = printableName n++show m
- show (Choice cps m) = '(': concat (intersperse "|" (map show cps))
- ++")"++show m
- show (Seq cps m) = '(': concat (intersperse "," (map show cps))
- ++")"++show m
-instance Show Modifier where
- show None = ""
- show Query = "?"
- show Star = "*"
- show Plus = "+"
+debugShowCP :: CP -> String
+debugShowCP cp = case cp of
+ TagName n m -> printableName n++debugShowModifier m
+ Choice cps m -> '(': concat (intersperse "|" (map debugShowCP cps))++")"++debugShowModifier m
+ Seq cps m -> '(': concat (intersperse "," (map debugShowCP cps))++")"++debugShowModifier m
+debugShowModifier :: Modifier -> String
+debugShowModifier modifier = case modifier of
+ None -> ""
+ Query -> "?"
+ Star -> "*"
+ Plus -> "+"
----
mixed :: XParser Mixed
@@ -644,12 +637,12 @@ enumeratedtype =
notationtype :: XParser NotationType
notationtype = do
word "NOTATION"
- bracket (tok TokBraOpen) (blank (tok TokBraClose))
+ bracket (tok TokBraOpen) (commit $ blank $ tok TokBraClose)
(peRef name `sepBy1` peRef (tok TokPipe))
enumeration :: XParser Enumeration
enumeration =
- bracket (tok TokBraOpen) (blank (tok TokBraClose))
+ bracket (tok TokBraOpen) (commit $ blank $ tok TokBraClose)
(peRef nmtoken `sepBy1` blank (peRef (tok TokPipe)))
defaultdecl :: XParser DefaultDecl
@@ -716,7 +709,7 @@ newIgnore =
-- | Return either a general entity reference, or a character reference.
reference :: XParser Reference
reference = do
- myBracket (tok TokAmp) (tok TokSemi) (freetext >>= val)
+ bracket (tok TokAmp) (tok TokSemi) (freetext >>= val)
where
val ('#':'x':i) | all isHexDigit i
= return . RefChar . fst . head . readHex $ i
@@ -731,11 +724,11 @@ reference =
entityref :: XParser EntityRef
entityref = do
- bracket (tok TokAmp) (tok TokSemi) name
+ bracket (tok TokAmp) (commit $ tok TokSemi) name
charref :: XParser CharRef
charref = do
- bracket (tok TokAmp) (tok TokSemi) (freetext >>= readCharVal)
+ bracket (tok TokAmp) (commit $ tok TokSemi) (freetext >>= readCharVal)
where
readCharVal ('#':'x':i) = return . fst . head . readHex $ i
readCharVal ('#':i) = return . fst . head . readDec $ i
@@ -744,7 +737,7 @@ charref = do
pereference :: XParser PEReference
pereference = do
- myBracket (tok TokPercent) (tok TokSemi) nmtoken
+ bracket (tok TokPercent) (tok TokSemi) nmtoken
entitydecl :: XParser EntityDecl
entitydecl =
@@ -829,7 +822,7 @@ encodingdecl :: XParser EncodingDecl
encodingdecl = do
(word "encoding" `onFail` word "ENCODING")
tok TokEqual `onFail` failBadP "expected = in 'encoding' decl"
- f <- bracket (tok TokQuote) (tok TokQuote) freetext
+ f <- bracket (tok TokQuote) (commit $ tok TokQuote) freetext
return (EncodingDecl f)
notationdecl :: XParser NotationDecl
@@ -849,7 +842,7 @@ publicid = do
entityvalue :: XParser EntityValue
entityvalue = do
- -- evs <- bracket (tok TokQuote) (tok TokQuote) (many (peRef ev))
+ -- evs <- bracket (tok TokQuote) (commit $ tok TokQuote) (many (peRef ev))
tok TokQuote
pn <- posn
evs <- many ev
@@ -874,18 +867,18 @@ ev =
attvalue :: XParser AttValue
attvalue = do
- avs <- bracket (tok TokQuote) (tok TokQuote)
+ avs <- bracket (tok TokQuote) (commit $ tok TokQuote)
(many (either freetext reference))
return (AttValue avs)
systemliteral :: XParser SystemLiteral
systemliteral = do
- s <- bracket (tok TokQuote) (tok TokQuote) freetext
+ s <- bracket (tok TokQuote) (commit $ tok TokQuote) freetext
return (SystemLiteral s) -- note: refs &...; not permitted
pubidliteral :: XParser PubidLiteral
pubidliteral = do
- s <- bracket (tok TokQuote) (tok TokQuote) freetext
+ s <- bracket (tok TokQuote) (commit $ tok TokQuote) freetext
return (PubidLiteral s) -- note: freetext is too liberal here
-- | Return parsed freetext (i.e. until the next markup)
diff --git a/src/Text/XML/HaXml/ParseLazy.hs b/src/Text/XML/HaXml/ParseLazy.hs
index 18afd7d..621bf03 100755
--- a/src/Text/XML/HaXml/ParseLazy.hs
+++ b/src/Text/XML/HaXml/ParseLazy.hs
@@ -159,14 +159,6 @@ thd3 (_,_,a) = a
---- Auxiliary Parsing Functions ----
--- | Parse a bracketed item, discarding the brackets AND NOT using adjustErrBad
-myBracket :: PolyParse p => p bra -> p ket -> p a -> p a
-myBracket open close p = do
- do { open `adjustErr` ("Missing opening bracket:\n\t"++)
- ; p `discard` (close `adjustErr` ("Missing closing bracket:\n\t"++))
- }
-
-
-- | XParser is just a specialisation of the PolyStateLazy parser.
type XParser a = Parser SymTabs (Posn,TokenT) a
@@ -317,7 +309,7 @@ document = do
-- | Return an XML comment.
comment :: XParser Comment
comment = do
- myBracket (tok TokCommentOpen) (tok TokCommentClose) freetext
+ bracket (tok TokCommentOpen) (tok TokCommentClose) freetext
-- tok TokCommentOpen
-- commit $ do
-- c <- freetext
@@ -337,7 +329,7 @@ processinginstruction = do
cdsect :: XParser CDSect
cdsect = do
tok TokSectionOpen
- bracket (tok (TokSection CDATAx)) (tok TokSectionClose) chardata
+ bracket (tok (TokSection CDATAx)) (commit $ tok TokSectionClose) chardata
prolog :: XParser Prolog
prolog = do
@@ -369,7 +361,7 @@ versioninfo :: XParser VersionInfo
versioninfo = do
(word "version" `onFail` word "VERSION")
tok TokEqual
- bracket (tok TokQuote) (tok TokQuote) freetext
+ bracket (tok TokQuote) (commit $ tok TokQuote) freetext
misc :: XParser Misc
misc =
@@ -385,7 +377,7 @@ doctypedecl = do
commit $ do
n <- qname
eid <- maybe externalid
- es <- maybe (bracket (tok TokSqOpen) (tok TokSqClose)
+ es <- maybe (bracket (tok TokSqOpen) (commit $ tok TokSqClose)
(many (peRef markupdecl)))
blank (tok TokAnyClose) `onFail` failP "missing > in DOCTYPE decl"
return (DTD n eid (case es of { Nothing -> []; Just e -> e }))
@@ -419,7 +411,7 @@ sddecl = do
(word "standalone" `onFail` word "STANDALONE")
commit $ do
tok TokEqual `onFail` failP "missing = in 'standalone' decl"
- bracket (tok TokQuote) (tok TokQuote)
+ bracket (tok TokQuote) (commit $ tok TokQuote)
( (word "yes" >> return True) `onFail`
(word "no" >> return False) `onFail`
failP "'standalone' decl requires 'yes' or 'no' value" )
@@ -436,7 +428,7 @@ element = do
, do tok TokAnyClose
cs <- many content
p <- posn
- m <- bracket (tok TokEndOpen) (tok TokAnyClose) qname
+ m <- bracket (tok TokEndOpen) (commit $ tok TokAnyClose) qname
checkmatch p n m
return (Elem n as cs))
] `adjustErr` (("in element tag "++n++",\n")++)
@@ -455,7 +447,7 @@ element = do
commit $ manyFinally content
(do p <- posn
m <- bracket (tok TokEndOpen)
- (tok TokAnyClose) qname
+ (commit $ tok TokAnyClose) qname
checkmatch p n m)
) `adjustErrBad` (("in element tag "++printableName n++",\n")++)
@@ -525,7 +517,7 @@ elementdecl = do
blank (tok TokAnyClose) `onFail` failBadP
("expected > terminating ELEMENT decl"
++"\n element name was "++show (printableName n)
- ++"\n contentspec was "++(\ (ContentSpec p)-> show p) c)
+ ++"\n contentspec was "++(\ (ContentSpec p)-> debugShowCP p) c)
return (ElementDecl n c)
contentspec :: XParser ContentSpec
@@ -540,29 +532,29 @@ contentspec =
choice :: XParser [CP]
choice = do
- myBracket (tok TokBraOpen `debug` "Trying choice")
- (blank (tok TokBraClose `debug` "Succeeded with choice"))
- (peRef cp `sepBy1` blank (tok TokPipe))
+ bracket (tok TokBraOpen `debug` "Trying choice")
+ (blank (tok TokBraClose `debug` "Succeeded with choice"))
+ (peRef cp `sepBy1` blank (tok TokPipe))
sequence :: XParser [CP]
sequence = do
- myBracket (tok TokBraOpen `debug` "Trying sequence")
- (blank (tok TokBraClose `debug` "Succeeded with sequence"))
- (peRef cp `sepBy1` blank (tok TokComma))
+ bracket (tok TokBraOpen `debug` "Trying sequence")
+ (blank (tok TokBraClose `debug` "Succeeded with sequence"))
+ (peRef cp `sepBy1` blank (tok TokComma))
cp :: XParser CP
cp = oneOf [ ( do n <- qname
m <- modifier
let c = TagName n m
- return c `debug` ("ContentSpec: name "++show c))
+ return c `debug` ("ContentSpec: name "++debugShowCP c))
, ( do ss <- sequence
m <- modifier
let c = Seq ss m
- return c `debug` ("ContentSpec: sequence "++show c))
+ return c `debug` ("ContentSpec: sequence "++debugShowCP c))
, ( do cs <- choice
m <- modifier
let c = Choice cs m
- return c `debug` ("ContentSpec: choice "++show c))
+ return c `debug` ("ContentSpec: choice "++debugShowCP c))
] `adjustErr` (++"\nwhen looking for a content particle")
modifier :: XParser Modifier
@@ -573,17 +565,17 @@ modifier = oneOf [ ( tok TokStar >> return Star )
]
-- just for debugging
-instance Show CP where
- show (TagName n m) = printableName n++show m
- show (Choice cps m) = '(': concat (intersperse "|" (map show cps))
- ++")"++show m
- show (Seq cps m) = '(': concat (intersperse "," (map show cps))
- ++")"++show m
-instance Show Modifier where
- show None = ""
- show Query = "?"
- show Star = "*"
- show Plus = "+"
+debugShowCP :: CP -> String
+debugShowCP cp = case cp of
+ TagName n m -> printableName n++debugShowModifier m
+ Choice cps m -> '(': concat (intersperse "|" (map debugShowCP cps))++")"++debugShowModifier m
+ Seq cps m -> '(': concat (intersperse "," (map debugShowCP cps))++")"++debugShowModifier m
+debugShowModifier :: Modifier -> String
+debugShowModifier modifier = case modifier of
+ None -> ""
+ Query -> "?"
+ Star -> "*"
+ Plus -> "+"
----
mixed :: XParser Mixed
@@ -654,12 +646,12 @@ enumeratedtype =
notationtype :: XParser NotationType
notationtype = do
word "NOTATION"
- bracket (tok TokBraOpen) (blank (tok TokBraClose))
+ bracket (tok TokBraOpen) (commit $ blank $ tok TokBraClose)
(peRef name `sepBy1` peRef (tok TokPipe))
enumeration :: XParser Enumeration
enumeration =
- bracket (tok TokBraOpen) (blank (tok TokBraClose))
+ bracket (tok TokBraOpen) (commit $ blank $ tok TokBraClose)
(peRef nmtoken `sepBy1` blank (peRef (tok TokPipe)))
defaultdecl :: XParser DefaultDecl
@@ -726,7 +718,7 @@ newIgnore =
-- | Return either a general entity reference, or a character reference.
reference :: XParser Reference
reference = do
- bracket (tok TokAmp) (tok TokSemi) (freetext >>= val)
+ bracket (tok TokAmp) (commit $ tok TokSemi) (freetext >>= val)
where
val ('#':'x':i) | all isHexDigit i
= return . RefChar . fst . head . readHex $ i
@@ -741,11 +733,11 @@ reference =
entityref :: XParser EntityRef
entityref = do
- bracket (tok TokAmp) (tok TokSemi) name
+ bracket (tok TokAmp) (commit $ tok TokSemi) name
charref :: XParser CharRef
charref = do
- bracket (tok TokAmp) (tok TokSemi) (freetext >>= readCharVal)
+ bracket (tok TokAmp) (commit $ tok TokSemi) (freetext >>= readCharVal)
where
readCharVal ('#':'x':i) = return . fst . head . readHex $ i
readCharVal ('#':i) = return . fst . head . readDec $ i
@@ -754,7 +746,7 @@ charref = do
pereference :: XParser PEReference
pereference = do
- myBracket (tok TokPercent) (tok TokSemi) nmtoken
+ bracket (tok TokPercent) (tok TokSemi) nmtoken
entitydecl :: XParser EntityDecl
entitydecl =
@@ -839,7 +831,7 @@ encodingdecl :: XParser EncodingDecl
encodingdecl = do
(word "encoding" `onFail` word "ENCODING")
tok TokEqual `onFail` failBadP "expected = in 'encoding' decl"
- f <- bracket (tok TokQuote) (tok TokQuote) freetext
+ f <- bracket (tok TokQuote) (commit $ tok TokQuote) freetext
return (EncodingDecl f)
notationdecl :: XParser NotationDecl
@@ -859,7 +851,7 @@ publicid = do
entityvalue :: XParser EntityValue
entityvalue = do
- -- evs <- bracket (tok TokQuote) (tok TokQuote) (many (peRef ev))
+ -- evs <- bracket (tok TokQuote) (commit $ tok TokQuote) (many (peRef ev))
tok TokQuote
pn <- posn
evs <- many ev
@@ -885,18 +877,18 @@ ev =
attvalue :: XParser AttValue
attvalue = do
- avs <- bracket (tok TokQuote) (tok TokQuote)
+ avs <- bracket (tok TokQuote) (commit $ tok TokQuote)
(many (either freetext reference))
return (AttValue avs)
systemliteral :: XParser SystemLiteral
systemliteral = do
- s <- bracket (tok TokQuote) (tok TokQuote) freetext
+ s <- bracket (tok TokQuote) (commit $ tok TokQuote) freetext
return (SystemLiteral s) -- note: refs &...; not permitted
pubidliteral :: XParser PubidLiteral
pubidliteral = do
- s <- bracket (tok TokQuote) (tok TokQuote) freetext
+ s <- bracket (tok TokQuote) (commit $ tok TokQuote) freetext
return (PubidLiteral s) -- note: freetext is too liberal here
-- | Return parsed freetext (i.e. until the next markup)
diff --git a/src/Text/XML/HaXml/Types.hs b/src/Text/XML/HaXml/Types.hs
index 3731020..ab0d82e 100755
--- a/src/Text/XML/HaXml/Types.hs
+++ b/src/Text/XML/HaXml/Types.hs
@@ -133,14 +133,14 @@ lookupST = lookup
-- | The symbol table stored in a document holds all its general entity
-- reference definitions.
data Document i = Document Prolog (SymTab EntityDef) (Element i) [Misc]
- deriving Eq
+ deriving (Eq, Show)
data Prolog = Prolog (Maybe XMLDecl) [Misc] (Maybe DocTypeDecl) [Misc]
- deriving Eq
+ deriving (Eq, Show)
data XMLDecl = XMLDecl VersionInfo (Maybe EncodingDecl) (Maybe SDDecl)
- deriving Eq
+ deriving (Eq, Show)
data Misc = Comment Comment
| PI ProcessingInstruction
- deriving Eq
+ deriving (Eq, Show)
type ProcessingInstruction = (PITarget,String)
@@ -149,20 +149,20 @@ type VersionInfo = String
type Comment = String
type PITarget = String
-data DocTypeDecl = DTD QName (Maybe ExternalID) [MarkupDecl] deriving Eq
+data DocTypeDecl = DTD QName (Maybe ExternalID) [MarkupDecl] deriving (Eq, Show)
data MarkupDecl = Element ElementDecl
| AttList AttListDecl
| Entity EntityDecl
| Notation NotationDecl
| MarkupMisc Misc
- deriving Eq
+ deriving (Eq, Show)
-data ExtSubset = ExtSubset (Maybe TextDecl) [ExtSubsetDecl] deriving Eq
+data ExtSubset = ExtSubset (Maybe TextDecl) [ExtSubsetDecl] deriving (Eq, Show)
data ExtSubsetDecl = ExtMarkupDecl MarkupDecl
| ExtConditionalSect ConditionalSect
- deriving Eq
+ deriving (Eq, Show)
-data Element i = Elem QName [Attribute] [Content i] deriving Eq
+data Element i = Elem QName [Attribute] [Content i] deriving (Eq, Show)
-- ElemTag is an intermediate type for parsing only
data ElemTag = ElemTag QName [Attribute]
type Attribute = (QName, AttValue)
@@ -171,6 +171,7 @@ data Content i = CElem (Element i) i
-- ^ bool is whether whitespace is significant
| CRef Reference i
| CMisc Misc i
+ deriving Show
-- custom instance of Eq, ignoring the informational elements.
instance Eq (Content i) where
@@ -195,30 +196,30 @@ instance Functor Content where
fmap f (CRef r i) = CRef r (f i)
fmap f (CMisc m i) = CMisc m (f i)
-data ElementDecl = ElementDecl QName ContentSpec deriving Eq
+data ElementDecl = ElementDecl QName ContentSpec deriving (Eq, Show)
data ContentSpec = EMPTY
| ANY
| Mixed Mixed
| ContentSpec CP
- deriving Eq
+ deriving (Eq, Show)
data CP = TagName QName Modifier
| Choice [CP] Modifier
| Seq [CP] Modifier
- deriving Eq
+ deriving (Eq, Show)
data Modifier = None -- ^ Just One
| Query -- ^ Zero Or One
| Star -- ^ Zero Or More
| Plus -- ^ One Or More
- deriving Eq
+ deriving (Eq, Show)
data Mixed = PCDATA
| PCDATAplus [QName]
- deriving Eq
-data AttListDecl = AttListDecl QName [AttDef] deriving Eq
-data AttDef = AttDef QName AttType DefaultDecl deriving Eq
+ deriving (Eq, Show)
+data AttListDecl = AttListDecl QName [AttDef] deriving (Eq, Show)
+data AttDef = AttDef QName AttType DefaultDecl deriving (Eq, Show)
data AttType = StringType
| TokenizedType TokenizedType
| EnumeratedType EnumeratedType
- deriving Eq
+ deriving (Eq, Show)
data TokenizedType = ID
| IDREF
| IDREFS
@@ -226,25 +227,25 @@ data TokenizedType = ID
| ENTITIES
| NMTOKEN
| NMTOKENS
- deriving Eq
+ deriving (Eq, Show)
data EnumeratedType = NotationType NotationType
| Enumeration Enumeration
- deriving Eq
+ deriving (Eq, Show)
type NotationType = [Name] -- nonempty list
type Enumeration = [NmToken] -- nonempty list
data DefaultDecl = REQUIRED
| IMPLIED
| DefaultTo AttValue (Maybe FIXED)
- deriving Eq
-data FIXED = FIXED deriving Eq
+ deriving (Eq, Show)
+data FIXED = FIXED deriving (Eq, Show)
data ConditionalSect = IncludeSect IncludeSect
| IgnoreSect IgnoreSect
- deriving Eq
+ deriving (Eq, Show)
type IncludeSect = [ExtSubsetDecl]
type IgnoreSect = [IgnoreSectContents]
-data Ignore = Ignore deriving Eq
-data IgnoreSectContents = IgnoreSectContents Ignore [(IgnoreSectContents,Ignore)] deriving Eq
+data Ignore = Ignore deriving (Eq, Show)
+data IgnoreSectContents = IgnoreSectContents Ignore [(IgnoreSectContents,Ignore)] deriving (Eq, Show)
data Reference = RefEntity EntityRef
| RefChar CharRef
@@ -255,25 +256,25 @@ type PEReference = Name
data EntityDecl = EntityGEDecl GEDecl
| EntityPEDecl PEDecl
- deriving Eq
-data GEDecl = GEDecl Name EntityDef deriving Eq
-data PEDecl = PEDecl Name PEDef deriving Eq
+ deriving (Eq, Show)
+data GEDecl = GEDecl Name EntityDef deriving (Eq, Show)
+data PEDecl = PEDecl Name PEDef deriving (Eq, Show)
data EntityDef = DefEntityValue EntityValue
| DefExternalID ExternalID (Maybe NDataDecl)
- deriving Eq
+ deriving (Eq, Show)
data PEDef = PEDefEntityValue EntityValue
| PEDefExternalID ExternalID deriving (Eq,Show)
data ExternalID = SYSTEM SystemLiteral
| PUBLIC PubidLiteral SystemLiteral deriving (Eq,Show)
-newtype NDataDecl = NDATA Name deriving Eq
+newtype NDataDecl = NDATA Name deriving (Eq, Show)
-data TextDecl = TextDecl (Maybe VersionInfo) EncodingDecl deriving Eq
-data ExtParsedEnt i = ExtParsedEnt (Maybe TextDecl) (Content i) deriving Eq
-data ExtPE = ExtPE (Maybe TextDecl) [ExtSubsetDecl] deriving Eq
+data TextDecl = TextDecl (Maybe VersionInfo) EncodingDecl deriving (Eq, Show)
+data ExtParsedEnt i = ExtParsedEnt (Maybe TextDecl) (Content i) deriving (Eq, Show)
+data ExtPE = ExtPE (Maybe TextDecl) [ExtSubsetDecl] deriving (Eq, Show)
-data NotationDecl = NOTATION Name (Either ExternalID PublicID) deriving Eq
-newtype PublicID = PUBLICID PubidLiteral deriving Eq
-newtype EncodingDecl = EncodingDecl String deriving Eq
+data NotationDecl = NOTATION Name (Either ExternalID PublicID) deriving (Eq, Show)
+newtype PublicID = PUBLICID PubidLiteral deriving (Eq, Show)
+newtype EncodingDecl = EncodingDecl String deriving (Eq, Show)
-- | A QName is a (possibly) qualified name, in the sense of XML namespaces.
data QName = N Name