From 052de51c3391c8a55539005a806f2c46f9ae2750 Mon Sep 17 00:00:00 2001 From: simonmar Date: Wed, 10 Apr 2002 13:23:13 +0000 Subject: [haddock @ 2002-04-10 13:23:13 by simonmar] Lex URLs as a single token to avoid having to escape special characters inside the URL string. --- src/HaddockLex.hs | 19 ++++++++++++++----- src/HaddockParse.y | 7 +++---- 2 files changed, 17 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/HaddockLex.hs b/src/HaddockLex.hs index 8e721996..15aa521d 100644 --- a/src/HaddockLex.hs +++ b/src/HaddockLex.hs @@ -9,10 +9,9 @@ module HaddockLex ( tokenise ) where -import IOExts --tmp import Char -special = "\'\"/[]<>" +special = "\'\"/[]" data Token = TokPara @@ -20,6 +19,7 @@ data Token | TokBullet | TokSpecial Char | TokString String + | TokURL String deriving Show -- simple finite-state machine for tokenising the doc string @@ -27,10 +27,17 @@ data Token tokenise :: String -> [Token] tokenise "" = [] tokenise str = case str of - c:cs | c `elem` special -> TokSpecial c : tokenise cs + '<':cs -> tokenise_url cs '\n':cs -> tokenise_newline cs + c:cs | c `elem` special -> TokSpecial c : tokenise cs _other -> tokenise_string "" str +tokenise_url cs = + let (url,rest) = break (=='>') cs in + TokURL url : case rest of + '>':rest -> tokenise rest + _ -> tokenise rest + tokenise_newline cs = case dropWhile nonNewlineSpace cs of '\n':cs -> TokPara : tokenise_para cs -- paragraph break @@ -57,8 +64,10 @@ tokenise_string str cs = [] -> [TokString (reverse str)] '\\':c:cs -> tokenise_string (c:str) cs '\n':cs -> tokenise_string_newline str cs - c:cs | c `elem` special -> TokString (reverse str) : tokenise (c:cs) - | otherwise -> tokenise_string (c:str) cs + c:cs | c == '<' || c `elem` special + -> TokString (reverse str) : tokenise (c:cs) + | otherwise + -> tokenise_string (c:str) cs tokenise_string_newline str cs = case dropWhile nonNewlineSpace cs of diff --git a/src/HaddockParse.y b/src/HaddockParse.y index 9411ff44..668e7d28 100644 --- a/src/HaddockParse.y +++ b/src/HaddockParse.y @@ -12,8 +12,7 @@ import HaddockTypes '/' { TokSpecial '/' } '[' { TokSpecial '[' } ']' { TokSpecial ']' } - '<' { TokSpecial '<' } - '>' { TokSpecial '>' } + URL { TokURL $$ } '*' { TokBullet } '(n)' { TokNumber } PARA { TokPara } @@ -48,11 +47,11 @@ seq :: { ParsedDoc } elem :: { ParsedDoc } : STRING { DocString $1 } | '/' STRING '/' { DocEmphasis (DocString $2) } - | '<' STRING '>' { DocURL $2 } + | URL { DocURL $1 } | SQUO STRING SQUO { DocIdentifier $2 } | DQUO STRING DQUO { DocModule $2 } | '[' seq ']' { DocMonospaced $2 } { -happyError = error "Parse error in doc string" +happyError toks = error ("parse error in doc string: " ++ show (take 3 toks)) } -- cgit v1.2.3