diff options
| author | simonmar <unknown> | 2003-08-15 14:42:59 +0000 | 
|---|---|---|
| committer | simonmar <unknown> | 2003-08-15 14:42:59 +0000 | 
| commit | ff5c7d6ddfabadf0afe24e33a5487d630fbbd406 (patch) | |
| tree | c0301ea6f2423c1dafdf581cdeaa50f09e6c1ac6 /src | |
| parent | e9d8085c4018838f62dbd6a06757cf3e31c35cf2 (diff) | |
[haddock @ 2003-08-15 14:42:59 by simonmar]
Convert the lexer to Alex, and fix a bug in the process.
Diffstat (limited to 'src')
| -rw-r--r-- | src/HaddockLex.hs | 155 | ||||
| -rw-r--r-- | src/HaddockLex.x | 137 | ||||
| -rw-r--r-- | src/HaddockParse.y | 6 | 
3 files changed, 140 insertions, 158 deletions
diff --git a/src/HaddockLex.hs b/src/HaddockLex.hs deleted file mode 100644 index b1de971e..00000000 --- a/src/HaddockLex.hs +++ /dev/null @@ -1,155 +0,0 @@ --- --- Haddock - A Haskell Documentation Tool --- --- (c) Simon Marlow 2002 --- - -module HaddockLex (  -	Token(..),  -	tokenise  - ) where - -import Char -import HsSyn -import HsLexer hiding (Token) -import HsParseMonad - -data Token -  = TokPara -  | TokNumber -  | TokBullet -  | TokSpecial Char -  | TokIdent [HsQName] -  | TokString String -  | TokURL String -  | TokBirdTrack -  | TokAName String -  deriving Show - -isSpecial, isSingleQuote, isIdent :: Char -> Bool -isSpecial c     = c `elem` ['\"', '@', '/', '#'] -isSingleQuote c = c `elem` ['\'', '`'] -isIdent c       = isAlphaNum c || c `elem` "_.!#$%&*+/<=>?@\\^|-~" - --- simple finite-state machine for tokenising the doc string - --- ---------------------------------------------------------------------------- --- At the beginning of a paragraph, we throw away initial whitespace - -tokenise :: String -> [Token] -tokenise "" = [] -tokenise str = case str of -  '<':cs  -> tokenise_url cs -  '\n':cs -> tokenise_newline cs -  c:cs 	| isSingleQuote c -> tokenise_identifier c cs -	| isSpecial c     -> TokSpecial c : tokenise1 cs -  _other  -> tokenise_string "" str - -tokenise_newline :: String -> [Token] -tokenise_newline cs0 = - case dropWhile nonNewlineSpace cs0 of -   '\n':cs -> TokPara : tokenise_para cs -- paragraph break -   '>':cs  -> TokBirdTrack : tokenise_birdtrack cs -- bird track -   _       -> tokenise_string "" cs0 - -tokenise_para :: String -> [Token] -tokenise_para cs0 = -  case dropWhile nonNewlineSpace cs0 of    -	-- bullet:  '*' -   '*':cs  -> TokBullet  : tokenise cs -	-- bullet: '-' -   '-':cs  -> TokBullet  : tokenise cs -	-- enumerated item: '1.' -   '>':cs  -> TokBirdTrack : tokenise_birdtrack cs -	-- bird track -   str | (ds,'.':cs) <- span isDigit str, not (null ds) -		-> TokNumber : tokenise cs -	-- enumerated item: '(1)' -   '(':cs | (ds,')':cs') <- span isDigit cs, not (null ds) -		-> TokNumber : tokenise cs' -   _ -> tokenise cs0 - -nonNewlineSpace :: Char -> Bool -nonNewlineSpace c = isSpace c && c /= '\n' - --- ---------------------------------------------------------------------------- --- Within a paragraph, we don't throw away any whitespace (except before a --- birdtrack, and before a paragraph break). - -tokenise1 :: String -> [Token] -tokenise1 str = tokenise_string "" str - --- found a single quote, check whether we have an identifier... -tokenise_identifier :: Char -> String -> [Token] -tokenise_identifier q cs0 = -  let (ident,rest) = break (not.isIdent) cs0 in -  case (rest, strToHsQNames ident) of -    (c:cs, Just names) | isSingleQuote c -> TokIdent names : tokenise1 cs -    _ -> tokenise_string [q] cs0 - -tokenise_url :: String -> [Token] -tokenise_url cs = -  let (url,rest0) = break (=='>') cs in -  TokURL url : case rest0 of -		 '>':rest -> tokenise1 rest -		 _ -> tokenise1 rest0 - -tokenise_string :: String -> String -> [Token] -tokenise_string str cs0 =  -  case cs0 of -    []        -> tokString str [] -    '\\':c:cs -> tokenise_string (c:str) cs -    '\n':cs   -> tokenise_string_newline str cs -    '<':cs    -> tokString str (tokenise_url cs) -    c:cs | isSpecial c     -> tokString str (TokSpecial c : tokenise1 cs) -	 | isSingleQuote c -> tokString str (tokenise_identifier c cs) -         | otherwise 	   -> tokenise_string (c:str) cs - -tokenise_string_newline :: String -> String -> [Token] -tokenise_string_newline str cs0 = -  case dropWhile nonNewlineSpace cs0  of -   '\n':cs -> tokString str (TokPara : tokenise_para cs) -		-- paragraph break: throw away all whitespace -   '>':cs  -> tokString ('\n':str) (TokBirdTrack : tokenise_birdtrack cs) -		-- keep the \n, but throw away any space before the '>' -   _       -> tokenise_string ('\n':str) cs0 -		-- don't throw away whitespace at all - -tokString :: String -> [Token] -> [Token] -tokString [] rest = rest -tokString cs rest = TokString (reverse cs) : rest - --- A bird-tracked line is verbatim, no markup characters are interpreted -tokenise_birdtrack :: String -> [Token] -tokenise_birdtrack cs =  -  let (line, rest) = break (=='\n') cs in -  TokString line : tokenise1 rest - --- ----------------------------------------------------------------------------- --- Lex a string as a Haskell identifier - -strToHsQNames :: String -> Maybe [HsQName] -strToHsQNames str0 - = case lexer (\t -> returnP t) str0 (SrcLoc 1 1) 1 1 [] of -	Ok _ (VarId str) -	   -> Just [ UnQual (HsVarName (HsIdent str)) ] -        Ok _ (QVarId (mod0,str)) - 	   -> Just [ Qual (Module mod0) (HsVarName (HsIdent str)) ] -	Ok _ (ConId str) -	   -> Just [ UnQual (HsTyClsName (HsIdent str)), -		     UnQual (HsVarName (HsIdent str)) ] -        Ok _ (QConId (mod0,str)) -	   -> Just [ Qual (Module mod0) (HsTyClsName (HsIdent str)), -		     Qual (Module mod0) (HsVarName (HsIdent str)) ] -        Ok _ (VarSym str) -	   -> Just [ UnQual (HsVarName (HsSymbol str)) ] -        Ok _ (ConSym str) -	   -> Just [ UnQual (HsTyClsName (HsSymbol str)), -		     UnQual (HsVarName (HsSymbol str)) ] -        Ok _ (QVarSym (mod0,str)) -	   -> Just [ Qual (Module mod0) (HsVarName (HsSymbol str)) ] -        Ok _ (QConSym (mod0,str)) -	   -> Just [ Qual (Module mod0) (HsTyClsName (HsSymbol str)), -		     Qual (Module mod0) (HsVarName (HsSymbol str)) ] -	_other -	   -> Nothing diff --git a/src/HaddockLex.x b/src/HaddockLex.x new file mode 100644 index 00000000..d888aeff --- /dev/null +++ b/src/HaddockLex.x @@ -0,0 +1,137 @@ +-- +-- Haddock - A Haskell Documentation Tool +-- +-- (c) Simon Marlow 2002 +-- + +{ +module HaddockLex (  +	Token(..),  +	tokenise  + ) where + +import Char +import HsSyn +import HsLexer hiding (Token) +import HsParseMonad +import Debug.Trace +} + +$ws    = $white # \n +$digit = [0-9] +$special =  [\"\@\/\#] +$alphanum = [A-Za-z0-9] +$ident    = [$alphanum \_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~] + +:- + +-- beginning of a paragraph +<0,para> { + $ws* \n		; + $ws* \>		{ begin birdtrack } + $ws* [\*\-]		{ token TokBullet } + $ws* \( $digit+ \) 	{ token TokNumber } + $ws*			{ begin string }		 +} + +-- beginning of a line: don't ignore whitespace, unless this is the start +-- of a new paragraph. +<line> { +  $ws* \>		{ begin birdtrack } +  $ws* \n		{ token TokPara `andBegin` para } +  () 			{ begin string } +} + +<birdtrack> .*	\n?	{ strtoken TokBirdTrack `andBegin` line } + +<string> { +  $special			{ strtoken $ \s -> TokSpecial (head s) } +  \<.*\>			{ strtoken $ \s -> TokURL (init (tail s)) } +  [\'\`] $ident+ [\'\`]		{ ident } +  \\ .				{ strtoken (TokString . tail) } +  [^ $special \< \n \'\` \\]* \n { strtoken TokString `andBegin` line } +  [^ $special \< \n \'\` \\]+	 { strtoken TokString } +} + +{ +data Token +  = TokPara +  | TokNumber +  | TokBullet +  | TokSpecial Char +  | TokIdent [HsQName] +  | TokString String +  | TokURL String +  | TokBirdTrack String +  deriving Show + +-- ----------------------------------------------------------------------------- +-- Alex support stuff + +type StartCode = Int +type Action = String -> StartCode -> (StartCode -> [Token]) -> [Token] + +type AlexInput = (Char,String) + +alexGetChar (_, [])   = Nothing +alexGetChar (_, c:cs) = Just (c, (c,cs)) + +alexInputPrevChar (c,_) = c + +tokenise :: String -> [Token] +tokenise str = let toks = go ('\n',str) para in trace (show toks) toks +  where go inp@(_,str) sc = +	  case alexScan inp sc of +		AlexEOF -> [] +		AlexError _ -> error "lexical error" +		AlexSkip  inp' len     -> go inp' sc +		AlexToken inp' len act -> act (take len str) sc (\sc -> go inp' sc) + +andBegin  :: Action -> StartCode -> Action +andBegin act new_sc = \str sc cont -> act str new_sc cont + +token :: Token -> Action +token t = \str sc cont -> t : cont sc + +strtoken :: (String -> Token) -> Action +strtoken t = \str sc cont -> t str : cont sc + +begin :: StartCode -> Action +begin sc = \str _ cont -> cont sc + +-- ----------------------------------------------------------------------------- +-- Lex a string as a Haskell identifier + +ident :: Action +ident str sc cont =  +  case strToHsQNames id of +	Just names -> TokIdent names : cont sc +	Nothing -> TokString str : cont sc + where id = init (tail str) + +strToHsQNames :: String -> Maybe [HsQName] +strToHsQNames str0 + = case lexer (\t -> returnP t) str0 (SrcLoc 1 1) 1 1 [] of +	Ok _ (VarId str) +	   -> Just [ UnQual (HsVarName (HsIdent str)) ] +        Ok _ (QVarId (mod0,str)) + 	   -> Just [ Qual (Module mod0) (HsVarName (HsIdent str)) ] +	Ok _ (ConId str) +	   -> Just [ UnQual (HsTyClsName (HsIdent str)), +		     UnQual (HsVarName (HsIdent str)) ] +        Ok _ (QConId (mod0,str)) +	   -> Just [ Qual (Module mod0) (HsTyClsName (HsIdent str)), +		     Qual (Module mod0) (HsVarName (HsIdent str)) ] +        Ok _ (VarSym str) +	   -> Just [ UnQual (HsVarName (HsSymbol str)) ] +        Ok _ (ConSym str) +	   -> Just [ UnQual (HsTyClsName (HsSymbol str)), +		     UnQual (HsVarName (HsSymbol str)) ] +        Ok _ (QVarSym (mod0,str)) +	   -> Just [ Qual (Module mod0) (HsVarName (HsSymbol str)) ] +        Ok _ (QConSym (mod0,str)) +	   -> Just [ Qual (Module mod0) (HsTyClsName (HsSymbol str)), +		     Qual (Module mod0) (HsVarName (HsSymbol str)) ] +	_other +	   -> Nothing +} diff --git a/src/HaddockParse.y b/src/HaddockParse.y index 15eda968..db712c42 100644 --- a/src/HaddockParse.y +++ b/src/HaddockParse.y @@ -14,7 +14,7 @@ import HsSyn  	URL	{ TokURL $$ }  	'*'	{ TokBullet }  	'(n)'	{ TokNumber } -	'>'	{ TokBirdTrack } +	'>..'	{ TokBirdTrack $$ }  	IDENT   { TokIdent $$ }  	PARA    { TokPara }  	STRING	{ TokString $$ } @@ -48,8 +48,8 @@ para    :: { Doc }  	| codepara		{ DocCodeBlock $1 }  codepara :: { Doc } -	: '>' seq codepara	{ docAppend $2 $3 } -	| '>' seq		{ $2 } +	: '>..' codepara	{ docAppend (DocString $1) $2 } +	| '>..'			{ DocString $1 }  seq	:: { Doc }  	: elem seq		{ docAppend $1 $2 }  | 
