aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/HaddockLex.hs155
-rw-r--r--src/HaddockLex.x137
-rw-r--r--src/HaddockParse.y6
3 files changed, 140 insertions, 158 deletions
diff --git a/src/HaddockLex.hs b/src/HaddockLex.hs
deleted file mode 100644
index b1de971e..00000000
--- a/src/HaddockLex.hs
+++ /dev/null
@@ -1,155 +0,0 @@
---
--- Haddock - A Haskell Documentation Tool
---
--- (c) Simon Marlow 2002
---
-
-module HaddockLex (
- Token(..),
- tokenise
- ) where
-
-import Char
-import HsSyn
-import HsLexer hiding (Token)
-import HsParseMonad
-
-data Token
- = TokPara
- | TokNumber
- | TokBullet
- | TokSpecial Char
- | TokIdent [HsQName]
- | TokString String
- | TokURL String
- | TokBirdTrack
- | TokAName String
- deriving Show
-
-isSpecial, isSingleQuote, isIdent :: Char -> Bool
-isSpecial c = c `elem` ['\"', '@', '/', '#']
-isSingleQuote c = c `elem` ['\'', '`']
-isIdent c = isAlphaNum c || c `elem` "_.!#$%&*+/<=>?@\\^|-~"
-
--- simple finite-state machine for tokenising the doc string
-
--- ----------------------------------------------------------------------------
--- At the beginning of a paragraph, we throw away initial whitespace
-
-tokenise :: String -> [Token]
-tokenise "" = []
-tokenise str = case str of
- '<':cs -> tokenise_url cs
- '\n':cs -> tokenise_newline cs
- c:cs | isSingleQuote c -> tokenise_identifier c cs
- | isSpecial c -> TokSpecial c : tokenise1 cs
- _other -> tokenise_string "" str
-
-tokenise_newline :: String -> [Token]
-tokenise_newline cs0 =
- case dropWhile nonNewlineSpace cs0 of
- '\n':cs -> TokPara : tokenise_para cs -- paragraph break
- '>':cs -> TokBirdTrack : tokenise_birdtrack cs -- bird track
- _ -> tokenise_string "" cs0
-
-tokenise_para :: String -> [Token]
-tokenise_para cs0 =
- case dropWhile nonNewlineSpace cs0 of
- -- bullet: '*'
- '*':cs -> TokBullet : tokenise cs
- -- bullet: '-'
- '-':cs -> TokBullet : tokenise cs
- -- enumerated item: '1.'
- '>':cs -> TokBirdTrack : tokenise_birdtrack cs
- -- bird track
- str | (ds,'.':cs) <- span isDigit str, not (null ds)
- -> TokNumber : tokenise cs
- -- enumerated item: '(1)'
- '(':cs | (ds,')':cs') <- span isDigit cs, not (null ds)
- -> TokNumber : tokenise cs'
- _ -> tokenise cs0
-
-nonNewlineSpace :: Char -> Bool
-nonNewlineSpace c = isSpace c && c /= '\n'
-
--- ----------------------------------------------------------------------------
--- Within a paragraph, we don't throw away any whitespace (except before a
--- birdtrack, and before a paragraph break).
-
-tokenise1 :: String -> [Token]
-tokenise1 str = tokenise_string "" str
-
--- found a single quote, check whether we have an identifier...
-tokenise_identifier :: Char -> String -> [Token]
-tokenise_identifier q cs0 =
- let (ident,rest) = break (not.isIdent) cs0 in
- case (rest, strToHsQNames ident) of
- (c:cs, Just names) | isSingleQuote c -> TokIdent names : tokenise1 cs
- _ -> tokenise_string [q] cs0
-
-tokenise_url :: String -> [Token]
-tokenise_url cs =
- let (url,rest0) = break (=='>') cs in
- TokURL url : case rest0 of
- '>':rest -> tokenise1 rest
- _ -> tokenise1 rest0
-
-tokenise_string :: String -> String -> [Token]
-tokenise_string str cs0 =
- case cs0 of
- [] -> tokString str []
- '\\':c:cs -> tokenise_string (c:str) cs
- '\n':cs -> tokenise_string_newline str cs
- '<':cs -> tokString str (tokenise_url cs)
- c:cs | isSpecial c -> tokString str (TokSpecial c : tokenise1 cs)
- | isSingleQuote c -> tokString str (tokenise_identifier c cs)
- | otherwise -> tokenise_string (c:str) cs
-
-tokenise_string_newline :: String -> String -> [Token]
-tokenise_string_newline str cs0 =
- case dropWhile nonNewlineSpace cs0 of
- '\n':cs -> tokString str (TokPara : tokenise_para cs)
- -- paragraph break: throw away all whitespace
- '>':cs -> tokString ('\n':str) (TokBirdTrack : tokenise_birdtrack cs)
- -- keep the \n, but throw away any space before the '>'
- _ -> tokenise_string ('\n':str) cs0
- -- don't throw away whitespace at all
-
-tokString :: String -> [Token] -> [Token]
-tokString [] rest = rest
-tokString cs rest = TokString (reverse cs) : rest
-
--- A bird-tracked line is verbatim, no markup characters are interpreted
-tokenise_birdtrack :: String -> [Token]
-tokenise_birdtrack cs =
- let (line, rest) = break (=='\n') cs in
- TokString line : tokenise1 rest
-
--- -----------------------------------------------------------------------------
--- Lex a string as a Haskell identifier
-
-strToHsQNames :: String -> Maybe [HsQName]
-strToHsQNames str0
- = case lexer (\t -> returnP t) str0 (SrcLoc 1 1) 1 1 [] of
- Ok _ (VarId str)
- -> Just [ UnQual (HsVarName (HsIdent str)) ]
- Ok _ (QVarId (mod0,str))
- -> Just [ Qual (Module mod0) (HsVarName (HsIdent str)) ]
- Ok _ (ConId str)
- -> Just [ UnQual (HsTyClsName (HsIdent str)),
- UnQual (HsVarName (HsIdent str)) ]
- Ok _ (QConId (mod0,str))
- -> Just [ Qual (Module mod0) (HsTyClsName (HsIdent str)),
- Qual (Module mod0) (HsVarName (HsIdent str)) ]
- Ok _ (VarSym str)
- -> Just [ UnQual (HsVarName (HsSymbol str)) ]
- Ok _ (ConSym str)
- -> Just [ UnQual (HsTyClsName (HsSymbol str)),
- UnQual (HsVarName (HsSymbol str)) ]
- Ok _ (QVarSym (mod0,str))
- -> Just [ Qual (Module mod0) (HsVarName (HsSymbol str)) ]
- Ok _ (QConSym (mod0,str))
- -> Just [ Qual (Module mod0) (HsTyClsName (HsSymbol str)),
- Qual (Module mod0) (HsVarName (HsSymbol str)) ]
- _other
- -> Nothing
diff --git a/src/HaddockLex.x b/src/HaddockLex.x
new file mode 100644
index 00000000..d888aeff
--- /dev/null
+++ b/src/HaddockLex.x
@@ -0,0 +1,137 @@
+--
+-- Haddock - A Haskell Documentation Tool
+--
+-- (c) Simon Marlow 2002
+--
+
+{
+module HaddockLex (
+ Token(..),
+ tokenise
+ ) where
+
+import Char
+import HsSyn
+import HsLexer hiding (Token)
+import HsParseMonad
+import Debug.Trace
+}
+
+$ws = $white # \n
+$digit = [0-9]
+$special = [\"\@\/\#]
+$alphanum = [A-Za-z0-9]
+$ident = [$alphanum \_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~]
+
+:-
+
+-- beginning of a paragraph
+<0,para> {
+ $ws* \n ;
+ $ws* \> { begin birdtrack }
+ $ws* [\*\-] { token TokBullet }
+ $ws* \( $digit+ \) { token TokNumber }
+ $ws* { begin string }
+}
+
+-- beginning of a line: don't ignore whitespace, unless this is the start
+-- of a new paragraph.
+<line> {
+ $ws* \> { begin birdtrack }
+ $ws* \n { token TokPara `andBegin` para }
+ () { begin string }
+}
+
+<birdtrack> .* \n? { strtoken TokBirdTrack `andBegin` line }
+
+<string> {
+ $special { strtoken $ \s -> TokSpecial (head s) }
+ \<.*\> { strtoken $ \s -> TokURL (init (tail s)) }
+ [\'\`] $ident+ [\'\`] { ident }
+ \\ . { strtoken (TokString . tail) }
+ [^ $special \< \n \'\` \\]* \n { strtoken TokString `andBegin` line }
+ [^ $special \< \n \'\` \\]+ { strtoken TokString }
+}
+
+{
+data Token
+ = TokPara
+ | TokNumber
+ | TokBullet
+ | TokSpecial Char
+ | TokIdent [HsQName]
+ | TokString String
+ | TokURL String
+ | TokBirdTrack String
+ deriving Show
+
+-- -----------------------------------------------------------------------------
+-- Alex support stuff
+
+type StartCode = Int
+type Action = String -> StartCode -> (StartCode -> [Token]) -> [Token]
+
+type AlexInput = (Char,String)
+
+alexGetChar (_, []) = Nothing
+alexGetChar (_, c:cs) = Just (c, (c,cs))
+
+alexInputPrevChar (c,_) = c
+
+tokenise :: String -> [Token]
+tokenise str = let toks = go ('\n',str) para in trace (show toks) toks
+ where go inp@(_,str) sc =
+ case alexScan inp sc of
+ AlexEOF -> []
+ AlexError _ -> error "lexical error"
+ AlexSkip inp' len -> go inp' sc
+ AlexToken inp' len act -> act (take len str) sc (\sc -> go inp' sc)
+
+andBegin :: Action -> StartCode -> Action
+andBegin act new_sc = \str sc cont -> act str new_sc cont
+
+token :: Token -> Action
+token t = \str sc cont -> t : cont sc
+
+strtoken :: (String -> Token) -> Action
+strtoken t = \str sc cont -> t str : cont sc
+
+begin :: StartCode -> Action
+begin sc = \str _ cont -> cont sc
+
+-- -----------------------------------------------------------------------------
+-- Lex a string as a Haskell identifier
+
+ident :: Action
+ident str sc cont =
+ case strToHsQNames id of
+ Just names -> TokIdent names : cont sc
+ Nothing -> TokString str : cont sc
+ where id = init (tail str)
+
+strToHsQNames :: String -> Maybe [HsQName]
+strToHsQNames str0
+ = case lexer (\t -> returnP t) str0 (SrcLoc 1 1) 1 1 [] of
+ Ok _ (VarId str)
+ -> Just [ UnQual (HsVarName (HsIdent str)) ]
+ Ok _ (QVarId (mod0,str))
+ -> Just [ Qual (Module mod0) (HsVarName (HsIdent str)) ]
+ Ok _ (ConId str)
+ -> Just [ UnQual (HsTyClsName (HsIdent str)),
+ UnQual (HsVarName (HsIdent str)) ]
+ Ok _ (QConId (mod0,str))
+ -> Just [ Qual (Module mod0) (HsTyClsName (HsIdent str)),
+ Qual (Module mod0) (HsVarName (HsIdent str)) ]
+ Ok _ (VarSym str)
+ -> Just [ UnQual (HsVarName (HsSymbol str)) ]
+ Ok _ (ConSym str)
+ -> Just [ UnQual (HsTyClsName (HsSymbol str)),
+ UnQual (HsVarName (HsSymbol str)) ]
+ Ok _ (QVarSym (mod0,str))
+ -> Just [ Qual (Module mod0) (HsVarName (HsSymbol str)) ]
+ Ok _ (QConSym (mod0,str))
+ -> Just [ Qual (Module mod0) (HsTyClsName (HsSymbol str)),
+ Qual (Module mod0) (HsVarName (HsSymbol str)) ]
+ _other
+ -> Nothing
+}
diff --git a/src/HaddockParse.y b/src/HaddockParse.y
index 15eda968..db712c42 100644
--- a/src/HaddockParse.y
+++ b/src/HaddockParse.y
@@ -14,7 +14,7 @@ import HsSyn
URL { TokURL $$ }
'*' { TokBullet }
'(n)' { TokNumber }
- '>' { TokBirdTrack }
+ '>..' { TokBirdTrack $$ }
IDENT { TokIdent $$ }
PARA { TokPara }
STRING { TokString $$ }
@@ -48,8 +48,8 @@ para :: { Doc }
| codepara { DocCodeBlock $1 }
codepara :: { Doc }
- : '>' seq codepara { docAppend $2 $3 }
- | '>' seq { $2 }
+ : '>..' codepara { docAppend (DocString $1) $2 }
+ | '>..' { DocString $1 }
seq :: { Doc }
: elem seq { docAppend $1 $2 }