diff options
Diffstat (limited to 'src/Haddock/Lex.x')
-rw-r--r-- | src/Haddock/Lex.x | 255 |
1 files changed, 0 insertions, 255 deletions
diff --git a/src/Haddock/Lex.x b/src/Haddock/Lex.x deleted file mode 100644 index 9e59fa4c..00000000 --- a/src/Haddock/Lex.x +++ /dev/null @@ -1,255 +0,0 @@ --- --- Haddock - A Haskell Documentation Tool --- --- (c) Simon Marlow 2002 --- --- This file was modified and integrated into GHC by David Waern 2006. --- Then moved back into Haddock by Isaac Dupree in 2009 :-) --- - -{ -{-# LANGUAGE BangPatterns #-} -- Generated by Alex -{-# OPTIONS -Wwarn -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - -module Haddock.Lex ( - Token(..), - LToken, - tokenise - ) where - -import Lexer hiding (Token) -import Parser ( parseIdentifier ) -import StringBuffer -import RdrName -import SrcLoc -import DynFlags -import FastString - -import qualified Data.Bits -import Data.Char -import Data.Word (Word8) -import Numeric -import System.IO.Unsafe -import Debug.Trace -} - -$ws = $white # \n -$digit = [0-9] -$hexdigit = [0-9a-fA-F] -$special = [\"\@] -$alphanum = [A-Za-z0-9] -$ident = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~\:] - -:- - --- beginning of a paragraph -<0,para> { - $ws* \n ; - $ws* \> { begin birdtrack } - $ws* prop \> .* \n { strtoken TokProperty `andBegin` property} - $ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr } - $ws* [\*\-] { token TokBullet `andBegin` string } - $ws* \[ { token TokDefStart `andBegin` def } - $ws* \( $digit+ \) { token TokNumber `andBegin` string } - $ws* $digit+ \. { token TokNumber `andBegin` string } - $ws* { begin string } -} - --- beginning of a line -<line> { - $ws* \> { begin birdtrack } - $ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr } - $ws* \n { token TokPara `andBegin` para } - -- Here, we really want to be able to say - -- $ws* (\n | <eof>) { token TokPara `andBegin` para} - -- because otherwise a trailing line of whitespace will result in - -- a spurious TokString at the end of a docstring. We don't have <eof>, - -- though (NOW I realise what it was for :-). To get around this, we always - -- append \n to the end of a docstring. - () { begin string } -} - -<birdtrack> .* \n? { strtokenNL TokBirdTrack `andBegin` line } - -<property> () { token TokPara `andBegin` para } - -<example> { - $ws* \n { token TokPara `andBegin` para } - $ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr } - () { begin exampleresult } -} - -<exampleexpr> .* \n { strtokenNL TokExampleExpression `andBegin` example } - -<exampleresult> .* \n { strtokenNL TokExampleResult `andBegin` example } - -<string,def> { - $special { strtoken $ \s -> TokSpecial (head s) } - \<\< [^\>]* \>\> { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) } - \< [^\>]* \> { strtoken $ \s -> TokURL (init (tail s)) } - \# [^\#]* \# { strtoken $ \s -> TokAName (init (tail s)) } - \/ [^\/]* \/ { strtoken $ \s -> TokEmphasis (init (tail s)) } - [\'\`] $ident+ [\'\`] { ident } - \\ . { strtoken (TokString . tail) } - "&#" $digit+ \; { strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] } - "&#" [xX] $hexdigit+ \; { strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] } - -- allow special characters through if they don't fit one of the previous - -- patterns. - [\/\'\`\<\#\&\\] { strtoken TokString } - [^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line } - [^ $special \/ \< \# \n \'\` \& \\ \]]+ { strtoken TokString } -} - -<def> { - \] { token TokDefEnd `andBegin` string } -} - --- ']' doesn't have any special meaning outside of the [...] at the beginning --- of a definition paragraph. -<string> { - \] { strtoken TokString } -} - -{ --- | A located token -type LToken = (Token, AlexPosn) - -data Token - = TokPara - | TokNumber - | TokBullet - | TokDefStart - | TokDefEnd - | TokSpecial Char - | TokIdent RdrName - | TokString String - | TokURL String - | TokPic String - | TokEmphasis String - | TokAName String - | TokBirdTrack String - | TokProperty String - | TokExamplePrompt String - | TokExampleExpression String - | TokExampleResult String --- deriving Show - -tokenPos :: LToken -> (Int, Int) -tokenPos t = let AlexPn _ line col = snd t in (line, col) - --- ----------------------------------------------------------------------------- --- Alex support stuff - --- XXX: copied the posn wrapper code from Alex to make this lexer work --- with both Alex 2.x and Alex 3.x. However, we are not using the --- Unicode/UTF-8 support in Alex 3.x, and Unicode documentation will --- probably get mangled. - --- | Encode a Haskell String to a list of Word8 values, in UTF8 format. -utf8Encode :: Char -> [Word8] -utf8Encode = map fromIntegral . go . ord - where - go oc - | oc <= 0x7f = [oc] - - | oc <= 0x7ff = [ 0xc0 + (oc `Data.Bits.shiftR` 6) - , 0x80 + oc Data.Bits..&. 0x3f - ] - - | oc <= 0xffff = [ 0xe0 + (oc `Data.Bits.shiftR` 12) - , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) - , 0x80 + oc Data.Bits..&. 0x3f - ] - | otherwise = [ 0xf0 + (oc `Data.Bits.shiftR` 18) - , 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f) - , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) - , 0x80 + oc Data.Bits..&. 0x3f - ] - -type Byte = Word8 - -type AlexInput = (AlexPosn, -- current position, - Char, -- previous char - [Byte], -- pending bytes on current char - String) -- current input string - -alexInputPrevChar :: AlexInput -> Char -alexInputPrevChar (p,c,bs,s) = c - -alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) -alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s)) -alexGetByte (p,c,[],[]) = Nothing -alexGetByte (p,_,[],(c:s)) = let p' = alexMove p c - (b:bs) = utf8Encode c - in p' `seq` Just (b, (p', c, bs, s)) - -data AlexPosn = AlexPn !Int !Int !Int - deriving (Eq,Show) - -alexMove :: AlexPosn -> Char -> AlexPosn -alexMove (AlexPn a l c) '\t' = AlexPn (a+1) l (((c+7) `div` 8)*8+1) -alexMove (AlexPn a l c) '\n' = AlexPn (a+1) (l+1) 1 -alexMove (AlexPn a l c) _ = AlexPn (a+1) l (c+1) - -type StartCode = Int -type Action = AlexPosn -> String -> StartCode -> (StartCode -> [LToken]) -> DynFlags -> [LToken] - -tokenise :: DynFlags -> String -> (Int, Int) -> [LToken] -tokenise dflags str (line, col) = go (posn,'\n',[],eofHack str) para - where posn = AlexPn 0 line col - go inp@(pos,_,_,str) sc = - case alexScan inp sc of - AlexEOF -> [] - AlexError _ -> [] - AlexSkip inp' len -> go inp' sc - AlexToken inp' len act -> act pos (take len str) sc (\sc -> go inp' sc) dflags - --- NB. we add a final \n to the string, (see comment in the beginning of line --- production above). -eofHack str = str++"\n" - -andBegin :: Action -> StartCode -> Action -andBegin act new_sc = \pos str _ cont dflags -> act pos str new_sc cont dflags - -token :: Token -> Action -token t = \pos _ sc cont _ -> (t, pos) : cont sc - -strtoken, strtokenNL :: (String -> Token) -> Action -strtoken t = \pos str sc cont _ -> (t str, pos) : cont sc -strtokenNL t = \pos str sc cont _ -> (t (filter (/= '\r') str), pos) : cont sc --- ^ We only want LF line endings in our internal doc string format, so we --- filter out all CRs. - -begin :: StartCode -> Action -begin sc = \_ _ _ cont _ -> cont sc - --- ----------------------------------------------------------------------------- --- Lex a string as a Haskell identifier - -ident :: Action -ident pos str sc cont dflags = - case parseIdent dflags loc id of - Just names -> (TokIdent names, pos) : cont sc - Nothing -> (TokString str, pos) : cont sc - where id = init (tail str) - -- TODO: Get the real filename here. Maybe we should just be - -- using GHC SrcLoc's ourself? - filename = mkFastString "<unknown file>" - loc = case pos of - AlexPn _ line col -> - mkRealSrcLoc filename line col - -parseIdent :: DynFlags -> RealSrcLoc -> String -> Maybe RdrName -parseIdent dflags loc str0 = - let buffer = stringToStringBuffer str0 - pstate = mkPState dflags buffer loc - result = unP parseIdentifier pstate - in case result of - POk _ name -> Just (unLoc name) - _ -> Nothing -} |