diff options
author | ross <unknown> | 2005-01-13 11:41:33 +0000 |
---|---|---|
committer | ross <unknown> | 2005-01-13 11:41:33 +0000 |
commit | b899a3812d89ac13c4accd774927301da79da500 (patch) | |
tree | 99521759f5303e6fefc9fbe530886b9f4043c5c9 /src/HaddockLex.x | |
parent | eba2fc4e0360a17df48ec49a1584d760a6538653 (diff) |
[haddock @ 2005-01-13 11:41:33 by ross]
recognize SGML-style numeric character references &#ddd; or &#xhhhh; and
translate them into Chars.
Diffstat (limited to 'src/HaddockLex.x')
-rw-r--r-- | src/HaddockLex.x | 10 |
1 files changed, 7 insertions, 3 deletions
diff --git a/src/HaddockLex.x b/src/HaddockLex.x index 06242ec5..a9ecc99a 100644 --- a/src/HaddockLex.x +++ b/src/HaddockLex.x @@ -11,6 +11,7 @@ module HaddockLex ( ) where import Char +import Numeric import HsSyn import HsLexer hiding (Token) import HsParseMonad @@ -19,6 +20,7 @@ import HsParseMonad $ws = $white # \n $digit = [0-9] +$hexdigit = [0-9a-fA-F] $special = [\"\@\/] $alphanum = [A-Za-z0-9] $ident = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~] @@ -56,11 +58,13 @@ $ident = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~] \#.*\# { strtoken $ \s -> TokAName (init (tail s)) } [\'\`] $ident+ [\'\`] { ident } \\ . { strtoken (TokString . tail) } + "&#" $digit+ \; { strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] } + "&#x" $hexdigit+ \; { strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] } -- allow special characters through if they don't fit one of the previous -- patterns. - [\'\`\<\#\\] { strtoken TokString } - [^ $special \< \# \n \'\` \\ \]]* \n { strtoken TokString `andBegin` line } - [^ $special \< \# \n \'\` \\ \]]+ { strtoken TokString } + [\'\`\<\#\&\\] { strtoken TokString } + [^ $special \< \# \n \'\` \& \\ \]]* \n { strtoken TokString `andBegin` line } + [^ $special \< \# \n \'\` \& \\ \]]+ { strtoken TokString } } <def> { |