aboutsummaryrefslogtreecommitdiff
path: root/src/Haddock/Lex.x
diff options
context:
space:
mode:
Diffstat (limited to 'src/Haddock/Lex.x')
-rw-r--r--src/Haddock/Lex.x255
1 files changed, 0 insertions, 255 deletions
diff --git a/src/Haddock/Lex.x b/src/Haddock/Lex.x
deleted file mode 100644
index 9e59fa4c..00000000
--- a/src/Haddock/Lex.x
+++ /dev/null
@@ -1,255 +0,0 @@
---
--- Haddock - A Haskell Documentation Tool
---
--- (c) Simon Marlow 2002
---
--- This file was modified and integrated into GHC by David Waern 2006.
--- Then moved back into Haddock by Isaac Dupree in 2009 :-)
---
-
-{
-{-# LANGUAGE BangPatterns #-} -- Generated by Alex
-{-# OPTIONS -Wwarn -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
-module Haddock.Lex (
- Token(..),
- LToken,
- tokenise
- ) where
-
-import Lexer hiding (Token)
-import Parser ( parseIdentifier )
-import StringBuffer
-import RdrName
-import SrcLoc
-import DynFlags
-import FastString
-
-import qualified Data.Bits
-import Data.Char
-import Data.Word (Word8)
-import Numeric
-import System.IO.Unsafe
-import Debug.Trace
-}
-
-$ws = $white # \n
-$digit = [0-9]
-$hexdigit = [0-9a-fA-F]
-$special = [\"\@]
-$alphanum = [A-Za-z0-9]
-$ident = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~\:]
-
-:-
-
--- beginning of a paragraph
-<0,para> {
- $ws* \n ;
- $ws* \> { begin birdtrack }
- $ws* prop \> .* \n { strtoken TokProperty `andBegin` property}
- $ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr }
- $ws* [\*\-] { token TokBullet `andBegin` string }
- $ws* \[ { token TokDefStart `andBegin` def }
- $ws* \( $digit+ \) { token TokNumber `andBegin` string }
- $ws* $digit+ \. { token TokNumber `andBegin` string }
- $ws* { begin string }
-}
-
--- beginning of a line
-<line> {
- $ws* \> { begin birdtrack }
- $ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr }
- $ws* \n { token TokPara `andBegin` para }
- -- Here, we really want to be able to say
- -- $ws* (\n | <eof>) { token TokPara `andBegin` para}
- -- because otherwise a trailing line of whitespace will result in
- -- a spurious TokString at the end of a docstring. We don't have <eof>,
- -- though (NOW I realise what it was for :-). To get around this, we always
- -- append \n to the end of a docstring.
- () { begin string }
-}
-
-<birdtrack> .* \n? { strtokenNL TokBirdTrack `andBegin` line }
-
-<property> () { token TokPara `andBegin` para }
-
-<example> {
- $ws* \n { token TokPara `andBegin` para }
- $ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr }
- () { begin exampleresult }
-}
-
-<exampleexpr> .* \n { strtokenNL TokExampleExpression `andBegin` example }
-
-<exampleresult> .* \n { strtokenNL TokExampleResult `andBegin` example }
-
-<string,def> {
- $special { strtoken $ \s -> TokSpecial (head s) }
- \<\< [^\>]* \>\> { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) }
- \< [^\>]* \> { strtoken $ \s -> TokURL (init (tail s)) }
- \# [^\#]* \# { strtoken $ \s -> TokAName (init (tail s)) }
- \/ [^\/]* \/ { strtoken $ \s -> TokEmphasis (init (tail s)) }
- [\'\`] $ident+ [\'\`] { ident }
- \\ . { strtoken (TokString . tail) }
- "&#" $digit+ \; { strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] }
- "&#" [xX] $hexdigit+ \; { strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] }
- -- allow special characters through if they don't fit one of the previous
- -- patterns.
- [\/\'\`\<\#\&\\] { strtoken TokString }
- [^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line }
- [^ $special \/ \< \# \n \'\` \& \\ \]]+ { strtoken TokString }
-}
-
-<def> {
- \] { token TokDefEnd `andBegin` string }
-}
-
--- ']' doesn't have any special meaning outside of the [...] at the beginning
--- of a definition paragraph.
-<string> {
- \] { strtoken TokString }
-}
-
-{
--- | A located token
-type LToken = (Token, AlexPosn)
-
-data Token
- = TokPara
- | TokNumber
- | TokBullet
- | TokDefStart
- | TokDefEnd
- | TokSpecial Char
- | TokIdent RdrName
- | TokString String
- | TokURL String
- | TokPic String
- | TokEmphasis String
- | TokAName String
- | TokBirdTrack String
- | TokProperty String
- | TokExamplePrompt String
- | TokExampleExpression String
- | TokExampleResult String
--- deriving Show
-
-tokenPos :: LToken -> (Int, Int)
-tokenPos t = let AlexPn _ line col = snd t in (line, col)
-
--- -----------------------------------------------------------------------------
--- Alex support stuff
-
--- XXX: copied the posn wrapper code from Alex to make this lexer work
--- with both Alex 2.x and Alex 3.x. However, we are not using the
--- Unicode/UTF-8 support in Alex 3.x, and Unicode documentation will
--- probably get mangled.
-
--- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
-utf8Encode :: Char -> [Word8]
-utf8Encode = map fromIntegral . go . ord
- where
- go oc
- | oc <= 0x7f = [oc]
-
- | oc <= 0x7ff = [ 0xc0 + (oc `Data.Bits.shiftR` 6)
- , 0x80 + oc Data.Bits..&. 0x3f
- ]
-
- | oc <= 0xffff = [ 0xe0 + (oc `Data.Bits.shiftR` 12)
- , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
- , 0x80 + oc Data.Bits..&. 0x3f
- ]
- | otherwise = [ 0xf0 + (oc `Data.Bits.shiftR` 18)
- , 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f)
- , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
- , 0x80 + oc Data.Bits..&. 0x3f
- ]
-
-type Byte = Word8
-
-type AlexInput = (AlexPosn, -- current position,
- Char, -- previous char
- [Byte], -- pending bytes on current char
- String) -- current input string
-
-alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (p,c,bs,s) = c
-
-alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
-alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s))
-alexGetByte (p,c,[],[]) = Nothing
-alexGetByte (p,_,[],(c:s)) = let p' = alexMove p c
- (b:bs) = utf8Encode c
- in p' `seq` Just (b, (p', c, bs, s))
-
-data AlexPosn = AlexPn !Int !Int !Int
- deriving (Eq,Show)
-
-alexMove :: AlexPosn -> Char -> AlexPosn
-alexMove (AlexPn a l c) '\t' = AlexPn (a+1) l (((c+7) `div` 8)*8+1)
-alexMove (AlexPn a l c) '\n' = AlexPn (a+1) (l+1) 1
-alexMove (AlexPn a l c) _ = AlexPn (a+1) l (c+1)
-
-type StartCode = Int
-type Action = AlexPosn -> String -> StartCode -> (StartCode -> [LToken]) -> DynFlags -> [LToken]
-
-tokenise :: DynFlags -> String -> (Int, Int) -> [LToken]
-tokenise dflags str (line, col) = go (posn,'\n',[],eofHack str) para
- where posn = AlexPn 0 line col
- go inp@(pos,_,_,str) sc =
- case alexScan inp sc of
- AlexEOF -> []
- AlexError _ -> []
- AlexSkip inp' len -> go inp' sc
- AlexToken inp' len act -> act pos (take len str) sc (\sc -> go inp' sc) dflags
-
--- NB. we add a final \n to the string, (see comment in the beginning of line
--- production above).
-eofHack str = str++"\n"
-
-andBegin :: Action -> StartCode -> Action
-andBegin act new_sc = \pos str _ cont dflags -> act pos str new_sc cont dflags
-
-token :: Token -> Action
-token t = \pos _ sc cont _ -> (t, pos) : cont sc
-
-strtoken, strtokenNL :: (String -> Token) -> Action
-strtoken t = \pos str sc cont _ -> (t str, pos) : cont sc
-strtokenNL t = \pos str sc cont _ -> (t (filter (/= '\r') str), pos) : cont sc
--- ^ We only want LF line endings in our internal doc string format, so we
--- filter out all CRs.
-
-begin :: StartCode -> Action
-begin sc = \_ _ _ cont _ -> cont sc
-
--- -----------------------------------------------------------------------------
--- Lex a string as a Haskell identifier
-
-ident :: Action
-ident pos str sc cont dflags =
- case parseIdent dflags loc id of
- Just names -> (TokIdent names, pos) : cont sc
- Nothing -> (TokString str, pos) : cont sc
- where id = init (tail str)
- -- TODO: Get the real filename here. Maybe we should just be
- -- using GHC SrcLoc's ourself?
- filename = mkFastString "<unknown file>"
- loc = case pos of
- AlexPn _ line col ->
- mkRealSrcLoc filename line col
-
-parseIdent :: DynFlags -> RealSrcLoc -> String -> Maybe RdrName
-parseIdent dflags loc str0 =
- let buffer = stringToStringBuffer str0
- pstate = mkPState dflags buffer loc
- result = unP parseIdentifier pstate
- in case result of
- POk _ name -> Just (unLoc name)
- _ -> Nothing
-}