From ff5c7d6ddfabadf0afe24e33a5487d630fbbd406 Mon Sep 17 00:00:00 2001 From: simonmar Date: Fri, 15 Aug 2003 14:42:59 +0000 Subject: [haddock @ 2003-08-15 14:42:59 by simonmar] Convert the lexer to Alex, and fix a bug in the process. --- src/HaddockLex.hs | 155 ------------------------------------------------------ 1 file changed, 155 deletions(-) delete mode 100644 src/HaddockLex.hs (limited to 'src/HaddockLex.hs') diff --git a/src/HaddockLex.hs b/src/HaddockLex.hs deleted file mode 100644 index b1de971e..00000000 --- a/src/HaddockLex.hs +++ /dev/null @@ -1,155 +0,0 @@ --- --- Haddock - A Haskell Documentation Tool --- --- (c) Simon Marlow 2002 --- - -module HaddockLex ( - Token(..), - tokenise - ) where - -import Char -import HsSyn -import HsLexer hiding (Token) -import HsParseMonad - -data Token - = TokPara - | TokNumber - | TokBullet - | TokSpecial Char - | TokIdent [HsQName] - | TokString String - | TokURL String - | TokBirdTrack - | TokAName String - deriving Show - -isSpecial, isSingleQuote, isIdent :: Char -> Bool -isSpecial c = c `elem` ['\"', '@', '/', '#'] -isSingleQuote c = c `elem` ['\'', '`'] -isIdent c = isAlphaNum c || c `elem` "_.!#$%&*+/<=>?@\\^|-~" - --- simple finite-state machine for tokenising the doc string - --- ---------------------------------------------------------------------------- --- At the beginning of a paragraph, we throw away initial whitespace - -tokenise :: String -> [Token] -tokenise "" = [] -tokenise str = case str of - '<':cs -> tokenise_url cs - '\n':cs -> tokenise_newline cs - c:cs | isSingleQuote c -> tokenise_identifier c cs - | isSpecial c -> TokSpecial c : tokenise1 cs - _other -> tokenise_string "" str - -tokenise_newline :: String -> [Token] -tokenise_newline cs0 = - case dropWhile nonNewlineSpace cs0 of - '\n':cs -> TokPara : tokenise_para cs -- paragraph break - '>':cs -> TokBirdTrack : tokenise_birdtrack cs -- bird track - _ -> tokenise_string "" cs0 - -tokenise_para :: String -> [Token] -tokenise_para cs0 = - case dropWhile nonNewlineSpace cs0 of - -- bullet: '*' - '*':cs -> TokBullet : tokenise cs - -- bullet: '-' - '-':cs -> TokBullet : tokenise cs - -- enumerated item: '1.' - '>':cs -> TokBirdTrack : tokenise_birdtrack cs - -- bird track - str | (ds,'.':cs) <- span isDigit str, not (null ds) - -> TokNumber : tokenise cs - -- enumerated item: '(1)' - '(':cs | (ds,')':cs') <- span isDigit cs, not (null ds) - -> TokNumber : tokenise cs' - _ -> tokenise cs0 - -nonNewlineSpace :: Char -> Bool -nonNewlineSpace c = isSpace c && c /= '\n' - --- ---------------------------------------------------------------------------- --- Within a paragraph, we don't throw away any whitespace (except before a --- birdtrack, and before a paragraph break). - -tokenise1 :: String -> [Token] -tokenise1 str = tokenise_string "" str - --- found a single quote, check whether we have an identifier... -tokenise_identifier :: Char -> String -> [Token] -tokenise_identifier q cs0 = - let (ident,rest) = break (not.isIdent) cs0 in - case (rest, strToHsQNames ident) of - (c:cs, Just names) | isSingleQuote c -> TokIdent names : tokenise1 cs - _ -> tokenise_string [q] cs0 - -tokenise_url :: String -> [Token] -tokenise_url cs = - let (url,rest0) = break (=='>') cs in - TokURL url : case rest0 of - '>':rest -> tokenise1 rest - _ -> tokenise1 rest0 - -tokenise_string :: String -> String -> [Token] -tokenise_string str cs0 = - case cs0 of - [] -> tokString str [] - '\\':c:cs -> tokenise_string (c:str) cs - '\n':cs -> tokenise_string_newline str cs - '<':cs -> tokString str (tokenise_url cs) - c:cs | isSpecial c -> tokString str (TokSpecial c : tokenise1 cs) - | isSingleQuote c -> tokString str (tokenise_identifier c cs) - | otherwise -> tokenise_string (c:str) cs - -tokenise_string_newline :: String -> String -> [Token] -tokenise_string_newline str cs0 = - case dropWhile nonNewlineSpace cs0 of - '\n':cs -> tokString str (TokPara : tokenise_para cs) - -- paragraph break: throw away all whitespace - '>':cs -> tokString ('\n':str) (TokBirdTrack : tokenise_birdtrack cs) - -- keep the \n, but throw away any space before the '>' - _ -> tokenise_string ('\n':str) cs0 - -- don't throw away whitespace at all - -tokString :: String -> [Token] -> [Token] -tokString [] rest = rest -tokString cs rest = TokString (reverse cs) : rest - --- A bird-tracked line is verbatim, no markup characters are interpreted -tokenise_birdtrack :: String -> [Token] -tokenise_birdtrack cs = - let (line, rest) = break (=='\n') cs in - TokString line : tokenise1 rest - --- ----------------------------------------------------------------------------- --- Lex a string as a Haskell identifier - -strToHsQNames :: String -> Maybe [HsQName] -strToHsQNames str0 - = case lexer (\t -> returnP t) str0 (SrcLoc 1 1) 1 1 [] of - Ok _ (VarId str) - -> Just [ UnQual (HsVarName (HsIdent str)) ] - Ok _ (QVarId (mod0,str)) - -> Just [ Qual (Module mod0) (HsVarName (HsIdent str)) ] - Ok _ (ConId str) - -> Just [ UnQual (HsTyClsName (HsIdent str)), - UnQual (HsVarName (HsIdent str)) ] - Ok _ (QConId (mod0,str)) - -> Just [ Qual (Module mod0) (HsTyClsName (HsIdent str)), - Qual (Module mod0) (HsVarName (HsIdent str)) ] - Ok _ (VarSym str) - -> Just [ UnQual (HsVarName (HsSymbol str)) ] - Ok _ (ConSym str) - -> Just [ UnQual (HsTyClsName (HsSymbol str)), - UnQual (HsVarName (HsSymbol str)) ] - Ok _ (QVarSym (mod0,str)) - -> Just [ Qual (Module mod0) (HsVarName (HsSymbol str)) ] - Ok _ (QConSym (mod0,str)) - -> Just [ Qual (Module mod0) (HsTyClsName (HsSymbol str)), - Qual (Module mod0) (HsVarName (HsSymbol str)) ] - _other - -> Nothing -- cgit v1.2.3