diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/HaddockLex.hs | 98 | ||||
-rw-r--r-- | src/HaddockParse.y | 41 |
2 files changed, 73 insertions, 66 deletions
diff --git a/src/HaddockLex.hs b/src/HaddockLex.hs index f766222d..000d62f0 100644 --- a/src/HaddockLex.hs +++ b/src/HaddockLex.hs @@ -10,19 +10,25 @@ module HaddockLex ( ) where import Char - -special = '`' : '\'' : '\"' : '@' : '/' : [] +import HsSyn +import HsLexer hiding (Token) +import HsParseMonad data Token = TokPara | TokNumber | TokBullet | TokSpecial Char + | TokIdent [HsQName] | TokString String | TokURL String | TokBirdTrack deriving Show +isSpecial c = c `elem` ['\"', '@', '/'] +isSingleQuote c = c `elem` ['\'', '`'] +isIdent c = isAlphaNum c || c == '_' + -- simple finite-state machine for tokenising the doc string -- ---------------------------------------------------------------------------- @@ -32,14 +38,15 @@ tokenise :: String -> [Token] tokenise "" = [] tokenise str = case str of '<':cs -> tokenise_url cs - '\n':cs -> tokenise_newline tokenise cs - c:cs | c `elem` special -> TokSpecial c : tokenise cs + '\n':cs -> tokenise_newline cs + c:cs | isSingleQuote c -> tokenise_identifier c cs + | isSpecial c -> TokSpecial c : tokenise1 cs _other -> tokenise_string "" str -tokenise_newline next cs = +tokenise_newline cs = case dropWhile nonNewlineSpace cs of '\n':cs -> TokPara : tokenise_para cs -- paragraph break - '>':cs -> TokBirdTrack : next cs -- bird track + '>':cs -> TokBirdTrack : tokenise_birdtrack cs -- bird track _other -> tokenise_string "" cs tokenise_para cs = @@ -49,7 +56,7 @@ tokenise_para cs = -- bullet: '-' '-':cs -> TokBullet : tokenise cs -- enumerated item: '1.' - '>':cs -> TokBirdTrack : tokenise cs + '>':cs -> TokBirdTrack : tokenise_birdtrack cs -- bird track str | (ds,'.':cs) <- span isDigit str, not (null ds) -> TokNumber : tokenise cs @@ -65,18 +72,14 @@ nonNewlineSpace c = isSpace c && c /= '\n' -- birdtrack, and before a paragraph break). tokenise1 :: String -> [Token] -tokenise1 "" = [] -tokenise1 str = case str of - '<':cs -> tokenise_url cs - '\n':cs -> tokenise_newline1 cs - c:cs | c `elem` special -> TokSpecial c : tokenise1 cs - _other -> tokenise_string "" str +tokenise1 str = tokenise_string "" str -tokenise_newline1 cs = - case dropWhile nonNewlineSpace cs of - '\n':cs -> TokPara : tokenise_para cs -- paragraph break - '>':cs -> TokString "\n" : TokBirdTrack : tokenise1 cs -- bird track - _other -> tokenise_string "\n" cs +-- found a single quote, check whether we have an identifier... +tokenise_identifier q cs = + let (ident,rest) = break (not.isIdent) cs in + case (rest, strToHsQNames ident) of + (c:cs, Just names) | isSingleQuote c -> TokIdent names : tokenise1 cs + _other -> tokenise_string [q] cs tokenise_url cs = let (url,rest) = break (=='>') cs in @@ -84,21 +87,58 @@ tokenise_url cs = '>':rest -> tokenise1 rest _ -> tokenise1 rest --- ---------------------------------------------------------------------------- --- Within a string, we don't throw away any whitespace - tokenise_string str cs = case cs of - [] -> [TokString (reverse str)] + [] -> tokString str [] '\\':c:cs -> tokenise_string (c:str) cs '\n':cs -> tokenise_string_newline str cs - '<':cs -> TokString (reverse str) : tokenise_url cs - c:cs | c `elem` special -> TokString (reverse str) : tokenise1 (c:cs) - | otherwise -> tokenise_string (c:str) cs + '<':cs -> tokString str (tokenise_url cs) + c:cs | isSpecial c -> tokString str (TokSpecial c : tokenise1 cs) + | isSingleQuote c -> tokString str (tokenise_identifier c cs) + | otherwise -> tokenise_string (c:str) cs tokenise_string_newline str cs = case dropWhile nonNewlineSpace cs of - '\n':cs -> TokString (reverse str) : TokPara : tokenise_para cs - '>':cs -> TokString (reverse ('\n':str)) : TokBirdTrack : tokenise1 cs - -- bird track - _other -> tokenise_string ('\n':str) cs -- don't throw away whitespace + '\n':cs -> tokString str (TokPara : tokenise_para cs) + -- paragraph break: throw away all whitespace + '>':cs -> tokString ('\n':str) (TokBirdTrack : tokenise_birdtrack cs) + -- keep the \n, but throw away any space before the '>' + _other -> tokenise_string ('\n':str) cs + -- don't throw away whitespace at all + +tokString [] rest = rest +tokString cs rest = TokString (reverse cs) : rest + +-- A bird-tracked line is verbatim, no markup characters are interpreted +tokenise_birdtrack cs = + let (line, rest) = break (=='\n') cs in + TokString line : tokenise1 rest + +-- ----------------------------------------------------------------------------- +-- Lex a string as a Haskell identifier + +strToHsQNames :: String -> Maybe [HsQName] +strToHsQNames str + = case lexer (\t -> returnP t) str (SrcLoc 1 1) 1 1 [] of + Ok _ (VarId str) + -> Just [ UnQual (HsVarName (HsIdent str)) ] + Ok _ (QVarId (mod,str)) + -> Just [ Qual (Module mod) (HsVarName (HsIdent str)) ] + Ok _ (ConId str) + -> Just [ UnQual (HsTyClsName (HsIdent str)), + UnQual (HsVarName (HsIdent str)) ] + Ok _ (QConId (mod,str)) + -> Just [ Qual (Module mod) (HsTyClsName (HsIdent str)), + Qual (Module mod) (HsVarName (HsIdent str)) ] + Ok _ (VarSym str) + -> Just [ UnQual (HsVarName (HsSymbol str)) ] + Ok _ (ConSym str) + -> Just [ UnQual (HsTyClsName (HsSymbol str)), + UnQual (HsVarName (HsSymbol str)) ] + Ok _ (QVarSym (mod,str)) + -> Just [ Qual (Module mod) (HsVarName (HsSymbol str)) ] + Ok _ (QConSym (mod,str)) + -> Just [ Qual (Module mod) (HsTyClsName (HsSymbol str)), + Qual (Module mod) (HsVarName (HsSymbol str)) ] + other + -> Nothing diff --git a/src/HaddockParse.y b/src/HaddockParse.y index f74d2b69..d9de9110 100644 --- a/src/HaddockParse.y +++ b/src/HaddockParse.y @@ -3,21 +3,18 @@ module HaddockParse (parseParas, parseString) where import HaddockLex import HsSyn -import HsLexer hiding (Token) -import HsParseMonad } %tokentype { Token } -%token SQUO { TokSpecial '\'' } - BQUO { TokSpecial '`' } - DQUO { TokSpecial '\"' } - '/' { TokSpecial '/' } +%token '/' { TokSpecial '/' } '@' { TokSpecial '@' } + DQUO { TokSpecial '\"' } URL { TokURL $$ } '*' { TokBullet } '(n)' { TokNumber } '>' { TokBirdTrack } + IDENT { TokIdent $$ } PARA { TokPara } STRING { TokString $$ } @@ -69,13 +66,9 @@ elem1 :: { Doc } : STRING { DocString $1 } | '/' STRING '/' { DocEmphasis (DocString $2) } | URL { DocURL $1 } - | squo STRING squo { strToHsQNames $2 } + | IDENT { DocIdentifier $1 } | DQUO STRING DQUO { DocModule $2 } -squo :: { () } - : SQUO { () } - | BQUO { () } - { happyError :: [Token] -> Either String a happyError toks = @@ -88,30 +81,4 @@ instance Monad (Either String) where Left l >>= _ = Left l Right r >>= k = k r fail msg = Left msg - -strToHsQNames :: String -> Doc -strToHsQNames str - = case lexer (\t -> returnP t) str (SrcLoc 1 1) 1 1 [] of - Ok _ (VarId str) - -> DocIdentifier [ UnQual (HsVarName (HsIdent str)) ] - Ok _ (QVarId (mod,str)) - -> DocIdentifier [ Qual (Module mod) (HsVarName (HsIdent str)) ] - Ok _ (ConId str) - -> DocIdentifier [ UnQual (HsTyClsName (HsIdent str)), - UnQual (HsVarName (HsIdent str)) ] - Ok _ (QConId (mod,str)) - -> DocIdentifier [ Qual (Module mod) (HsTyClsName (HsIdent str)), - Qual (Module mod) (HsVarName (HsIdent str)) ] - Ok _ (VarSym str) - -> DocIdentifier [ UnQual (HsVarName (HsSymbol str)) ] - Ok _ (ConSym str) - -> DocIdentifier [ UnQual (HsTyClsName (HsSymbol str)), - UnQual (HsVarName (HsSymbol str)) ] - Ok _ (QVarSym (mod,str)) - -> DocIdentifier [ Qual (Module mod) (HsVarName (HsSymbol str)) ] - Ok _ (QConSym (mod,str)) - -> DocIdentifier [ Qual (Module mod) (HsTyClsName (HsSymbol str)), - Qual (Module mod) (HsVarName (HsSymbol str)) ] - other - -> DocString str } |