diff options
author | simonmar <unknown> | 2002-04-10 13:23:13 +0000 |
---|---|---|
committer | simonmar <unknown> | 2002-04-10 13:23:13 +0000 |
commit | 052de51c3391c8a55539005a806f2c46f9ae2750 (patch) | |
tree | fe8e5a771c95dea2d6a5e8cb5c8618a06a69933a /src/HaddockLex.hs | |
parent | 9e83c54df01ff0de10fc1a6cfc88a6cb93cdec1d (diff) |
[haddock @ 2002-04-10 13:23:13 by simonmar]
Lex URLs as a single token to avoid having to escape special
characters inside the URL string.
Diffstat (limited to 'src/HaddockLex.hs')
-rw-r--r-- | src/HaddockLex.hs | 19 |
1 files changed, 14 insertions, 5 deletions
diff --git a/src/HaddockLex.hs b/src/HaddockLex.hs index 8e721996..15aa521d 100644 --- a/src/HaddockLex.hs +++ b/src/HaddockLex.hs @@ -9,10 +9,9 @@ module HaddockLex ( tokenise ) where -import IOExts --tmp import Char -special = "\'\"/[]<>" +special = "\'\"/[]" data Token = TokPara @@ -20,6 +19,7 @@ data Token | TokBullet | TokSpecial Char | TokString String + | TokURL String deriving Show -- simple finite-state machine for tokenising the doc string @@ -27,10 +27,17 @@ data Token tokenise :: String -> [Token] tokenise "" = [] tokenise str = case str of - c:cs | c `elem` special -> TokSpecial c : tokenise cs + '<':cs -> tokenise_url cs '\n':cs -> tokenise_newline cs + c:cs | c `elem` special -> TokSpecial c : tokenise cs _other -> tokenise_string "" str +tokenise_url cs = + let (url,rest) = break (=='>') cs in + TokURL url : case rest of + '>':rest -> tokenise rest + _ -> tokenise rest + tokenise_newline cs = case dropWhile nonNewlineSpace cs of '\n':cs -> TokPara : tokenise_para cs -- paragraph break @@ -57,8 +64,10 @@ tokenise_string str cs = [] -> [TokString (reverse str)] '\\':c:cs -> tokenise_string (c:str) cs '\n':cs -> tokenise_string_newline str cs - c:cs | c `elem` special -> TokString (reverse str) : tokenise (c:cs) - | otherwise -> tokenise_string (c:str) cs + c:cs | c == '<' || c `elem` special + -> TokString (reverse str) : tokenise (c:cs) + | otherwise + -> tokenise_string (c:str) cs tokenise_string_newline str cs = case dropWhile nonNewlineSpace cs of |