aboutsummaryrefslogtreecommitdiff
path: root/src/HaddockLex.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/HaddockLex.hs')
-rw-r--r--src/HaddockLex.hs98
1 files changed, 69 insertions, 29 deletions
diff --git a/src/HaddockLex.hs b/src/HaddockLex.hs
index f766222d..000d62f0 100644
--- a/src/HaddockLex.hs
+++ b/src/HaddockLex.hs
@@ -10,19 +10,25 @@ module HaddockLex (
) where
import Char
-
-special = '`' : '\'' : '\"' : '@' : '/' : []
+import HsSyn
+import HsLexer hiding (Token)
+import HsParseMonad
data Token
= TokPara
| TokNumber
| TokBullet
| TokSpecial Char
+ | TokIdent [HsQName]
| TokString String
| TokURL String
| TokBirdTrack
deriving Show
+isSpecial c = c `elem` ['\"', '@', '/']
+isSingleQuote c = c `elem` ['\'', '`']
+isIdent c = isAlphaNum c || c == '_'
+
-- simple finite-state machine for tokenising the doc string
-- ----------------------------------------------------------------------------
@@ -32,14 +38,15 @@ tokenise :: String -> [Token]
tokenise "" = []
tokenise str = case str of
'<':cs -> tokenise_url cs
- '\n':cs -> tokenise_newline tokenise cs
- c:cs | c `elem` special -> TokSpecial c : tokenise cs
+ '\n':cs -> tokenise_newline cs
+ c:cs | isSingleQuote c -> tokenise_identifier c cs
+ | isSpecial c -> TokSpecial c : tokenise1 cs
_other -> tokenise_string "" str
-tokenise_newline next cs =
+tokenise_newline cs =
case dropWhile nonNewlineSpace cs of
'\n':cs -> TokPara : tokenise_para cs -- paragraph break
- '>':cs -> TokBirdTrack : next cs -- bird track
+ '>':cs -> TokBirdTrack : tokenise_birdtrack cs -- bird track
_other -> tokenise_string "" cs
tokenise_para cs =
@@ -49,7 +56,7 @@ tokenise_para cs =
-- bullet: '-'
'-':cs -> TokBullet : tokenise cs
-- enumerated item: '1.'
- '>':cs -> TokBirdTrack : tokenise cs
+ '>':cs -> TokBirdTrack : tokenise_birdtrack cs
-- bird track
str | (ds,'.':cs) <- span isDigit str, not (null ds)
-> TokNumber : tokenise cs
@@ -65,18 +72,14 @@ nonNewlineSpace c = isSpace c && c /= '\n'
-- birdtrack, and before a paragraph break).
tokenise1 :: String -> [Token]
-tokenise1 "" = []
-tokenise1 str = case str of
- '<':cs -> tokenise_url cs
- '\n':cs -> tokenise_newline1 cs
- c:cs | c `elem` special -> TokSpecial c : tokenise1 cs
- _other -> tokenise_string "" str
+tokenise1 str = tokenise_string "" str
-tokenise_newline1 cs =
- case dropWhile nonNewlineSpace cs of
- '\n':cs -> TokPara : tokenise_para cs -- paragraph break
- '>':cs -> TokString "\n" : TokBirdTrack : tokenise1 cs -- bird track
- _other -> tokenise_string "\n" cs
+-- found a single quote, check whether we have an identifier...
+tokenise_identifier q cs =
+ let (ident,rest) = break (not.isIdent) cs in
+ case (rest, strToHsQNames ident) of
+ (c:cs, Just names) | isSingleQuote c -> TokIdent names : tokenise1 cs
+ _other -> tokenise_string [q] cs
tokenise_url cs =
let (url,rest) = break (=='>') cs in
@@ -84,21 +87,58 @@ tokenise_url cs =
'>':rest -> tokenise1 rest
_ -> tokenise1 rest
--- ----------------------------------------------------------------------------
--- Within a string, we don't throw away any whitespace
-
tokenise_string str cs =
case cs of
- [] -> [TokString (reverse str)]
+ [] -> tokString str []
'\\':c:cs -> tokenise_string (c:str) cs
'\n':cs -> tokenise_string_newline str cs
- '<':cs -> TokString (reverse str) : tokenise_url cs
- c:cs | c `elem` special -> TokString (reverse str) : tokenise1 (c:cs)
- | otherwise -> tokenise_string (c:str) cs
+ '<':cs -> tokString str (tokenise_url cs)
+ c:cs | isSpecial c -> tokString str (TokSpecial c : tokenise1 cs)
+ | isSingleQuote c -> tokString str (tokenise_identifier c cs)
+ | otherwise -> tokenise_string (c:str) cs
tokenise_string_newline str cs =
case dropWhile nonNewlineSpace cs of
- '\n':cs -> TokString (reverse str) : TokPara : tokenise_para cs
- '>':cs -> TokString (reverse ('\n':str)) : TokBirdTrack : tokenise1 cs
- -- bird track
- _other -> tokenise_string ('\n':str) cs -- don't throw away whitespace
+ '\n':cs -> tokString str (TokPara : tokenise_para cs)
+ -- paragraph break: throw away all whitespace
+ '>':cs -> tokString ('\n':str) (TokBirdTrack : tokenise_birdtrack cs)
+ -- keep the \n, but throw away any space before the '>'
+ _other -> tokenise_string ('\n':str) cs
+ -- don't throw away whitespace at all
+
+tokString [] rest = rest
+tokString cs rest = TokString (reverse cs) : rest
+
+-- A bird-tracked line is verbatim, no markup characters are interpreted
+tokenise_birdtrack cs =
+ let (line, rest) = break (=='\n') cs in
+ TokString line : tokenise1 rest
+
+-- -----------------------------------------------------------------------------
+-- Lex a string as a Haskell identifier
+
+strToHsQNames :: String -> Maybe [HsQName]
+strToHsQNames str
+ = case lexer (\t -> returnP t) str (SrcLoc 1 1) 1 1 [] of
+ Ok _ (VarId str)
+ -> Just [ UnQual (HsVarName (HsIdent str)) ]
+ Ok _ (QVarId (mod,str))
+ -> Just [ Qual (Module mod) (HsVarName (HsIdent str)) ]
+ Ok _ (ConId str)
+ -> Just [ UnQual (HsTyClsName (HsIdent str)),
+ UnQual (HsVarName (HsIdent str)) ]
+ Ok _ (QConId (mod,str))
+ -> Just [ Qual (Module mod) (HsTyClsName (HsIdent str)),
+ Qual (Module mod) (HsVarName (HsIdent str)) ]
+ Ok _ (VarSym str)
+ -> Just [ UnQual (HsVarName (HsSymbol str)) ]
+ Ok _ (ConSym str)
+ -> Just [ UnQual (HsTyClsName (HsSymbol str)),
+ UnQual (HsVarName (HsSymbol str)) ]
+ Ok _ (QVarSym (mod,str))
+ -> Just [ Qual (Module mod) (HsVarName (HsSymbol str)) ]
+ Ok _ (QConSym (mod,str))
+ -> Just [ Qual (Module mod) (HsTyClsName (HsSymbol str)),
+ Qual (Module mod) (HsVarName (HsSymbol str)) ]
+ other
+ -> Nothing