--
-- Haddock - A Haskell Documentation Tool
--
-- (c) Simon Marlow 2002
--
module HaddockLex (
Token(..),
tokenise
) where
import Char
import HsSyn
import HsLexer hiding (Token)
import HsParseMonad
data Token
= TokPara
| TokNumber
| TokBullet
| TokSpecial Char
| TokIdent [HsQName]
| TokString String
| TokURL String
| TokBirdTrack
| TokAName String
deriving Show
isSpecial, isSingleQuote, isIdent :: Char -> Bool
isSpecial c = c `elem` ['\"', '@', '/', '#']
isSingleQuote c = c `elem` ['\'', '`']
isIdent c = isAlphaNum c || c `elem` "_.!#$%&*+/<=>?@\\^|-~"
-- simple finite-state machine for tokenising the doc string
-- ----------------------------------------------------------------------------
-- At the beginning of a paragraph, we throw away initial whitespace
tokenise :: String -> [Token]
tokenise "" = []
tokenise str = case str of
'<':cs -> tokenise_url cs
'\n':cs -> tokenise_newline cs
c:cs | isSingleQuote c -> tokenise_identifier c cs
| isSpecial c -> TokSpecial c : tokenise1 cs
_other -> tokenise_string "" str
tokenise_newline :: String -> [Token]
tokenise_newline cs0 =
case dropWhile nonNewlineSpace cs0 of
'\n':cs -> TokPara : tokenise_para cs -- paragraph break
'>':cs -> TokBirdTrack : tokenise_birdtrack cs -- bird track
_ -> tokenise_string "" cs0
tokenise_para :: String -> [Token]
tokenise_para cs0 =
case dropWhile nonNewlineSpace cs0 of
-- bullet: '*'
'*':cs -> TokBullet : tokenise cs
-- bullet: '-'
'-':cs -> TokBullet : tokenise cs
-- enumerated item: '1.'
'>':cs -> TokBirdTrack : tokenise_birdtrack cs
-- bird track
str | (ds,'.':cs) <- span isDigit str, not (null ds)
-> TokNumber : tokenise cs
-- enumerated item: '(1)'
'(':cs | (ds,')':cs') <- span isDigit cs, not (null ds)
-> TokNumber : tokenise cs'
_ -> tokenise cs0
nonNewlineSpace :: Char -> Bool
nonNewlineSpace c = isSpace c && c /= '\n'
-- ----------------------------------------------------------------------------
-- Within a paragraph, we don't throw away any whitespace (except before a
-- birdtrack, and before a paragraph break).
tokenise1 :: String -> [Token]
tokenise1 str = tokenise_string "" str
-- found a single quote, check whether we have an identifier...
tokenise_identifier :: Char -> String -> [Token]
tokenise_identifier q cs0 =
let (ident,rest) = break (not.isIdent) cs0 in
case (rest, strToHsQNames ident) of
(c:cs, Just names) | isSingleQuote c -> TokIdent names : tokenise1 cs
_ -> tokenise_string [q] cs0
tokenise_url :: String -> [Token]
tokenise_url cs =
let (url,rest0) = break (=='>') cs in
TokURL url : case rest0 of
'>':rest -> tokenise1 rest
_ -> tokenise1 rest0
tokenise_string :: String -> String -> [Token]
tokenise_string str cs0 =
case cs0 of
[] -> tokString str []
'\\':c:cs -> tokenise_string (c:str) cs
'\n':cs -> tokenise_string_newline str cs
'<':cs -> tokString str (tokenise_url cs)
c:cs | isSpecial c -> tokString str (TokSpecial c : tokenise1 cs)
| isSingleQuote c -> tokString str (tokenise_identifier c cs)
| otherwise -> tokenise_string (c:str) cs
tokenise_string_newline :: String -> String -> [Token]
tokenise_string_newline str cs0 =
case dropWhile nonNewlineSpace cs0 of
'\n':cs -> tokString str (TokPara : tokenise_para cs)
-- paragraph break: throw away all whitespace
'>':cs -> tokString ('\n':str) (TokBirdTrack : tokenise_birdtrack cs)
-- keep the \n, but throw away any space before the '>'
_ -> tokenise_string ('\n':str) cs0
-- don't throw away whitespace at all
tokString :: String -> [Token] -> [Token]
tokString [] rest = rest
tokString cs rest = TokString (reverse cs) : rest
-- A bird-tracked line is verbatim, no markup characters are interpreted
tokenise_birdtrack :: String -> [Token]
tokenise_birdtrack cs =
let (line, rest) = break (=='\n') cs in
TokString line : tokenise1 rest
-- -----------------------------------------------------------------------------
-- Lex a string as a Haskell identifier
strToHsQNames :: String -> Maybe [HsQName]
strToHsQNames str0
= case lexer (\t -> returnP t) str0 (SrcLoc 1 1) 1 1 [] of
Ok _ (VarId str)
-> Just [ UnQual (HsVarName (HsIdent str)) ]
Ok _ (QVarId (mod0,str))
-> Just [ Qual (Module mod0) (HsVarName (HsIdent str)) ]
Ok _ (ConId str)
-> Just [ UnQual (HsTyClsName (HsIdent str)),
UnQual (HsVarName (HsIdent str)) ]
Ok _ (QConId (mod0,str))
-> Just [ Qual (Module mod0) (HsTyClsName (HsIdent str)),
Qual (Module mod0) (HsVarName (HsIdent str)) ]
Ok _ (VarSym str)
-> Just [ UnQual (HsVarName (HsSymbol str)) ]
Ok _ (ConSym str)
-> Just [ UnQual (HsTyClsName (HsSymbol str)),
UnQual (HsVarName (HsSymbol str)) ]
Ok _ (QVarSym (mod0,str))
-> Just [ Qual (Module mod0) (HsVarName (HsSymbol str)) ]
Ok _ (QConSym (mod0,str))
-> Just [ Qual (Module mod0) (HsTyClsName (HsSymbol str)),
Qual (Module mod0) (HsVarName (HsSymbol str)) ]
_other
-> Nothing