diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/HaddockLex.hs | 54 | 
1 files changed, 38 insertions, 16 deletions
| diff --git a/src/HaddockLex.hs b/src/HaddockLex.hs index 2f510092..f766222d 100644 --- a/src/HaddockLex.hs +++ b/src/HaddockLex.hs @@ -25,25 +25,22 @@ data Token  -- simple finite-state machine for tokenising the doc string +-- ---------------------------------------------------------------------------- +-- At the beginning of a paragraph, we throw away initial whitespace +  tokenise :: String -> [Token]  tokenise "" = []  tokenise str = case str of    '<':cs  -> tokenise_url cs -  '\n':cs -> tokenise_newline cs +  '\n':cs -> tokenise_newline tokenise cs    c:cs | c `elem` special -> TokSpecial c : tokenise cs    _other  -> tokenise_string "" str -tokenise_url cs = -  let (url,rest) = break (=='>') cs in -  TokURL url : case rest of -		 '>':rest -> tokenise rest -		 _ -> tokenise rest - -tokenise_newline cs = +tokenise_newline next cs =   case dropWhile nonNewlineSpace cs of     '\n':cs -> TokPara : tokenise_para cs -- paragraph break -   '>':cs  -> TokBirdTrack : tokenise cs -- bird track -   _other  -> tokenise_string "\n" cs +   '>':cs  -> TokBirdTrack : next cs -- bird track +   _other  -> tokenise_string "" cs  tokenise_para cs =    case dropWhile nonNewlineSpace cs of    @@ -63,20 +60,45 @@ tokenise_para cs =  nonNewlineSpace c = isSpace c && c /= '\n' +-- ---------------------------------------------------------------------------- +-- Within a paragraph, we don't throw away any whitespace (except before a +-- birdtrack, and before a paragraph break). + +tokenise1 :: String -> [Token] +tokenise1 "" = [] +tokenise1 str = case str of +  '<':cs  -> tokenise_url cs +  '\n':cs -> tokenise_newline1 cs +  c:cs | c `elem` special -> TokSpecial c : tokenise1 cs +  _other  -> tokenise_string "" str + +tokenise_newline1 cs = + case dropWhile nonNewlineSpace cs of +   '\n':cs -> TokPara : tokenise_para cs -- paragraph break +   '>':cs  -> TokString "\n" : TokBirdTrack : tokenise1 cs -- bird track +   _other  -> tokenise_string "\n" cs + +tokenise_url cs = +  let (url,rest) = break (=='>') cs in +  TokURL url : case rest of +		 '>':rest -> tokenise1 rest +		 _ -> tokenise1 rest + +-- ---------------------------------------------------------------------------- +-- Within a string, we don't throw away any whitespace +  tokenise_string str cs =     case cs of      [] -> [TokString (reverse str)]      '\\':c:cs -> tokenise_string (c:str) cs      '\n':cs   -> tokenise_string_newline str cs -    c:cs | c == '<' || c `elem` special -		-> TokString (reverse str) : tokenise (c:cs) -         | otherwise -	        -> tokenise_string (c:str) cs +    '<':cs    -> TokString (reverse str) : tokenise_url cs +    c:cs | c `elem` special -> TokString (reverse str) : tokenise1 (c:cs) +         | otherwise 	    -> tokenise_string (c:str) cs  tokenise_string_newline str cs =    case dropWhile nonNewlineSpace cs  of     '\n':cs -> TokString (reverse str) : TokPara : tokenise_para cs -   '>':cs  -> TokString (reverse ('\n':str)) : TokBirdTrack : tokenise cs +   '>':cs  -> TokString (reverse ('\n':str)) : TokBirdTrack : tokenise1 cs  		 -- bird track     _other  -> tokenise_string ('\n':str) cs  -- don't throw away whitespace - | 
