diff options
Diffstat (limited to 'src/HaddockLex.hs')
-rw-r--r-- | src/HaddockLex.hs | 98 |
1 files changed, 69 insertions, 29 deletions
diff --git a/src/HaddockLex.hs b/src/HaddockLex.hs index f766222d..000d62f0 100644 --- a/src/HaddockLex.hs +++ b/src/HaddockLex.hs @@ -10,19 +10,25 @@ module HaddockLex ( ) where import Char - -special = '`' : '\'' : '\"' : '@' : '/' : [] +import HsSyn +import HsLexer hiding (Token) +import HsParseMonad data Token = TokPara | TokNumber | TokBullet | TokSpecial Char + | TokIdent [HsQName] | TokString String | TokURL String | TokBirdTrack deriving Show +isSpecial c = c `elem` ['\"', '@', '/'] +isSingleQuote c = c `elem` ['\'', '`'] +isIdent c = isAlphaNum c || c == '_' + -- simple finite-state machine for tokenising the doc string -- ---------------------------------------------------------------------------- @@ -32,14 +38,15 @@ tokenise :: String -> [Token] tokenise "" = [] tokenise str = case str of '<':cs -> tokenise_url cs - '\n':cs -> tokenise_newline tokenise cs - c:cs | c `elem` special -> TokSpecial c : tokenise cs + '\n':cs -> tokenise_newline cs + c:cs | isSingleQuote c -> tokenise_identifier c cs + | isSpecial c -> TokSpecial c : tokenise1 cs _other -> tokenise_string "" str -tokenise_newline next cs = +tokenise_newline cs = case dropWhile nonNewlineSpace cs of '\n':cs -> TokPara : tokenise_para cs -- paragraph break - '>':cs -> TokBirdTrack : next cs -- bird track + '>':cs -> TokBirdTrack : tokenise_birdtrack cs -- bird track _other -> tokenise_string "" cs tokenise_para cs = @@ -49,7 +56,7 @@ tokenise_para cs = -- bullet: '-' '-':cs -> TokBullet : tokenise cs -- enumerated item: '1.' - '>':cs -> TokBirdTrack : tokenise cs + '>':cs -> TokBirdTrack : tokenise_birdtrack cs -- bird track str | (ds,'.':cs) <- span isDigit str, not (null ds) -> TokNumber : tokenise cs @@ -65,18 +72,14 @@ nonNewlineSpace c = isSpace c && c /= '\n' -- birdtrack, and before a paragraph break). tokenise1 :: String -> [Token] -tokenise1 "" = [] -tokenise1 str = case str of - '<':cs -> tokenise_url cs - '\n':cs -> tokenise_newline1 cs - c:cs | c `elem` special -> TokSpecial c : tokenise1 cs - _other -> tokenise_string "" str +tokenise1 str = tokenise_string "" str -tokenise_newline1 cs = - case dropWhile nonNewlineSpace cs of - '\n':cs -> TokPara : tokenise_para cs -- paragraph break - '>':cs -> TokString "\n" : TokBirdTrack : tokenise1 cs -- bird track - _other -> tokenise_string "\n" cs +-- found a single quote, check whether we have an identifier... +tokenise_identifier q cs = + let (ident,rest) = break (not.isIdent) cs in + case (rest, strToHsQNames ident) of + (c:cs, Just names) | isSingleQuote c -> TokIdent names : tokenise1 cs + _other -> tokenise_string [q] cs tokenise_url cs = let (url,rest) = break (=='>') cs in @@ -84,21 +87,58 @@ tokenise_url cs = '>':rest -> tokenise1 rest _ -> tokenise1 rest --- ---------------------------------------------------------------------------- --- Within a string, we don't throw away any whitespace - tokenise_string str cs = case cs of - [] -> [TokString (reverse str)] + [] -> tokString str [] '\\':c:cs -> tokenise_string (c:str) cs '\n':cs -> tokenise_string_newline str cs - '<':cs -> TokString (reverse str) : tokenise_url cs - c:cs | c `elem` special -> TokString (reverse str) : tokenise1 (c:cs) - | otherwise -> tokenise_string (c:str) cs + '<':cs -> tokString str (tokenise_url cs) + c:cs | isSpecial c -> tokString str (TokSpecial c : tokenise1 cs) + | isSingleQuote c -> tokString str (tokenise_identifier c cs) + | otherwise -> tokenise_string (c:str) cs tokenise_string_newline str cs = case dropWhile nonNewlineSpace cs of - '\n':cs -> TokString (reverse str) : TokPara : tokenise_para cs - '>':cs -> TokString (reverse ('\n':str)) : TokBirdTrack : tokenise1 cs - -- bird track - _other -> tokenise_string ('\n':str) cs -- don't throw away whitespace + '\n':cs -> tokString str (TokPara : tokenise_para cs) + -- paragraph break: throw away all whitespace + '>':cs -> tokString ('\n':str) (TokBirdTrack : tokenise_birdtrack cs) + -- keep the \n, but throw away any space before the '>' + _other -> tokenise_string ('\n':str) cs + -- don't throw away whitespace at all + +tokString [] rest = rest +tokString cs rest = TokString (reverse cs) : rest + +-- A bird-tracked line is verbatim, no markup characters are interpreted +tokenise_birdtrack cs = + let (line, rest) = break (=='\n') cs in + TokString line : tokenise1 rest + +-- ----------------------------------------------------------------------------- +-- Lex a string as a Haskell identifier + +strToHsQNames :: String -> Maybe [HsQName] +strToHsQNames str + = case lexer (\t -> returnP t) str (SrcLoc 1 1) 1 1 [] of + Ok _ (VarId str) + -> Just [ UnQual (HsVarName (HsIdent str)) ] + Ok _ (QVarId (mod,str)) + -> Just [ Qual (Module mod) (HsVarName (HsIdent str)) ] + Ok _ (ConId str) + -> Just [ UnQual (HsTyClsName (HsIdent str)), + UnQual (HsVarName (HsIdent str)) ] + Ok _ (QConId (mod,str)) + -> Just [ Qual (Module mod) (HsTyClsName (HsIdent str)), + Qual (Module mod) (HsVarName (HsIdent str)) ] + Ok _ (VarSym str) + -> Just [ UnQual (HsVarName (HsSymbol str)) ] + Ok _ (ConSym str) + -> Just [ UnQual (HsTyClsName (HsSymbol str)), + UnQual (HsVarName (HsSymbol str)) ] + Ok _ (QVarSym (mod,str)) + -> Just [ Qual (Module mod) (HsVarName (HsSymbol str)) ] + Ok _ (QConSym (mod,str)) + -> Just [ Qual (Module mod) (HsTyClsName (HsSymbol str)), + Qual (Module mod) (HsVarName (HsSymbol str)) ] + other + -> Nothing |