From 295a27b7558cf4fae144e3cf01e488e54ca961f2 Mon Sep 17 00:00:00 2001 From: David Waern Date: Sat, 28 Nov 2009 22:16:16 +0000 Subject: Move H.Interface.Parse/Lex to H.Parse/Lex These are not just used to build Interfaces. --- src/Haddock/Interface/Lex.x | 171 ----------------------------------- src/Haddock/Interface/LexParseRn.hs | 8 +- src/Haddock/Interface/Parse.y | 106 ---------------------- src/Haddock/Lex.x | 172 ++++++++++++++++++++++++++++++++++++ src/Haddock/Parse.y | 103 +++++++++++++++++++++ src/Main.hs | 6 +- 6 files changed, 282 insertions(+), 284 deletions(-) delete mode 100644 src/Haddock/Interface/Lex.x delete mode 100644 src/Haddock/Interface/Parse.y create mode 100644 src/Haddock/Lex.x create mode 100644 src/Haddock/Parse.y (limited to 'src') diff --git a/src/Haddock/Interface/Lex.x b/src/Haddock/Interface/Lex.x deleted file mode 100644 index 5f93084a..00000000 --- a/src/Haddock/Interface/Lex.x +++ /dev/null @@ -1,171 +0,0 @@ --- --- Haddock - A Haskell Documentation Tool --- --- (c) Simon Marlow 2002 --- --- This file was modified and integrated into GHC by David Waern 2006 --- - -{ -{-# OPTIONS -Wwarn -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - -module Haddock.Interface.Lex ( - Token(..), - tokenise - ) where - -import Lexer hiding (Token) -import Parser ( parseIdentifier ) -import StringBuffer -import RdrName -import SrcLoc -import DynFlags - -import Data.Char -import Numeric -import System.IO.Unsafe -} - -$ws = $white # \n -$digit = [0-9] -$hexdigit = [0-9a-fA-F] -$special = [\"\@] -$alphanum = [A-Za-z0-9] -$ident = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~] - -:- - --- beginning of a paragraph -<0,para> { - $ws* \n ; - $ws* \> { begin birdtrack } - $ws* [\*\-] { token TokBullet `andBegin` string } - $ws* \[ { token TokDefStart `andBegin` def } - $ws* \( $digit+ \) { token TokNumber `andBegin` string } - $ws* { begin string } -} - --- beginning of a line - { - $ws* \> { begin birdtrack } - $ws* \n { token TokPara `andBegin` para } - -- Here, we really want to be able to say - -- $ws* (\n | ) { token TokPara `andBegin` para} - -- because otherwise a trailing line of whitespace will result in - -- a spurious TokString at the end of a docstring. We don't have , - -- though (NOW I realise what it was for :-). To get around this, we always - -- append \n to the end of a docstring. - () { begin string } -} - - .* \n? { strtokenNL TokBirdTrack `andBegin` line } - - { - $special { strtoken $ \s -> TokSpecial (head s) } - \<\<.*\>\> { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) } - \<.*\> { strtoken $ \s -> TokURL (init (tail s)) } - \#.*\# { strtoken $ \s -> TokAName (init (tail s)) } - \/ [^\/]* \/ { strtoken $ \s -> TokEmphasis (init (tail s)) } - [\'\`] $ident+ [\'\`] { ident } - \\ . { strtoken (TokString . tail) } - "&#" $digit+ \; { strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] } - "&#" [xX] $hexdigit+ \; { strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] } - -- allow special characters through if they don't fit one of the previous - -- patterns. - [\/\'\`\<\#\&\\] { strtoken TokString } - [^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line } - [^ $special \/ \< \# \n \'\` \& \\ \]]+ { strtoken TokString } -} - - { - \] { token TokDefEnd `andBegin` string } -} - --- ']' doesn't have any special meaning outside of the [...] at the beginning --- of a definition paragraph. - { - \] { strtoken TokString } -} - -{ -data Token - = TokPara - | TokNumber - | TokBullet - | TokDefStart - | TokDefEnd - | TokSpecial Char - | TokIdent [RdrName] - | TokString String - | TokURL String - | TokPic String - | TokEmphasis String - | TokAName String - | TokBirdTrack String --- deriving Show - --- ----------------------------------------------------------------------------- --- Alex support stuff - -type StartCode = Int -type Action = String -> StartCode -> (StartCode -> [Token]) -> [Token] - -type AlexInput = (Char,String) - -alexGetChar (_, []) = Nothing -alexGetChar (_, c:cs) = Just (c, (c,cs)) - -alexInputPrevChar (c,_) = c - -tokenise :: String -> [Token] -tokenise str = let toks = go ('\n', eofHack str) para in {-trace (show toks)-} toks - where go inp@(_,str) sc = - case alexScan inp sc of - AlexEOF -> [] - AlexError _ -> error "lexical error" - AlexSkip inp' _ -> go inp' sc - AlexToken inp' len act -> act (take len str) sc (\sc -> go inp' sc) - --- NB. we add a final \n to the string, (see comment in the beginning of line --- production above). -eofHack str = str++"\n" - -andBegin :: Action -> StartCode -> Action -andBegin act new_sc = \str _ cont -> act str new_sc cont - -token :: Token -> Action -token t = \_ sc cont -> t : cont sc - -strtoken, strtokenNL :: (String -> Token) -> Action -strtoken t = \str sc cont -> t str : cont sc -strtokenNL t = \str sc cont -> t (filter (/= '\r') str) : cont sc --- ^ We only want LF line endings in our internal doc string format, so we --- filter out all CRs. - -begin :: StartCode -> Action -begin sc = \_ _ cont -> cont sc - --- ----------------------------------------------------------------------------- --- Lex a string as a Haskell identifier - -ident :: Action -ident str sc cont = - case strToHsQNames id of - Just names -> TokIdent names : cont sc - Nothing -> TokString str : cont sc - where id = init (tail str) - -strToHsQNames :: String -> Maybe [RdrName] -strToHsQNames str0 = - let buffer = unsafePerformIO (stringToStringBuffer str0) - pstate = mkPState buffer noSrcLoc defaultDynFlags - result = unP parseIdentifier pstate - in case result of - POk _ name -> Just [unLoc name] - _ -> Nothing -} diff --git a/src/Haddock/Interface/LexParseRn.hs b/src/Haddock/Interface/LexParseRn.hs index fc44cedf..89440139 100644 --- a/src/Haddock/Interface/LexParseRn.hs +++ b/src/Haddock/Interface/LexParseRn.hs @@ -19,8 +19,8 @@ module Haddock.Interface.LexParseRn ( ) where import Haddock.Types -import Haddock.Interface.Lex -import Haddock.Interface.Parse +import Haddock.Lex +import Haddock.Parse import Haddock.Interface.Rn import Haddock.Interface.ParseModuleHeader import Haddock.Doc @@ -46,8 +46,8 @@ lexParseRnHaddockComment hty gre (HsDocString fs) = do let str = unpackFS fs let toks = tokenise str let parse = case hty of - NormalHaddockComment -> parseHaddockParagraphs - DocSectionComment -> parseHaddockString + NormalHaddockComment -> parseParas + DocSectionComment -> parseString case parse toks of Nothing -> do tell ["doc comment parse failed: "++str] diff --git a/src/Haddock/Interface/Parse.y b/src/Haddock/Interface/Parse.y deleted file mode 100644 index a5175ddc..00000000 --- a/src/Haddock/Interface/Parse.y +++ /dev/null @@ -1,106 +0,0 @@ -{ -{-# OPTIONS -Wwarn -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - -module Haddock.Interface.Parse ( - parseHaddockParagraphs, - parseHaddockString -) where - -import Haddock.Interface.Lex -import Haddock.Types (Doc(..)) -import Haddock.Doc -import HsSyn -import RdrName -} - -%expect 0 - -%tokentype { Token } - -%token '/' { TokSpecial '/' } - '@' { TokSpecial '@' } - '[' { TokDefStart } - ']' { TokDefEnd } - DQUO { TokSpecial '\"' } - URL { TokURL $$ } - PIC { TokPic $$ } - ANAME { TokAName $$ } - '/../' { TokEmphasis $$ } - '-' { TokBullet } - '(n)' { TokNumber } - '>..' { TokBirdTrack $$ } - IDENT { TokIdent $$ } - PARA { TokPara } - STRING { TokString $$ } - -%monad { Maybe } - -%name parseHaddockParagraphs doc -%name parseHaddockString seq - -%% - -doc :: { Doc RdrName } - : apara PARA doc { docAppend $1 $3 } - | PARA doc { $2 } - | apara { $1 } - | {- empty -} { DocEmpty } - -apara :: { Doc RdrName } - : ulpara { DocUnorderedList [$1] } - | olpara { DocOrderedList [$1] } - | defpara { DocDefList [$1] } - | para { $1 } - -ulpara :: { Doc RdrName } - : '-' para { $2 } - -olpara :: { Doc RdrName } - : '(n)' para { $2 } - -defpara :: { (Doc RdrName, Doc RdrName) } - : '[' seq ']' seq { ($2, $4) } - -para :: { Doc RdrName } - : seq { docParagraph $1 } - | codepara { DocCodeBlock $1 } - -codepara :: { Doc RdrName } - : '>..' codepara { docAppend (DocString $1) $2 } - | '>..' { DocString $1 } - -seq :: { Doc RdrName } - : elem seq { docAppend $1 $2 } - | elem { $1 } - -elem :: { Doc RdrName } - : elem1 { $1 } - | '@' seq1 '@' { DocMonospaced $2 } - -seq1 :: { Doc RdrName } - : PARA seq1 { docAppend (DocString "\n") $2 } - | elem1 seq1 { docAppend $1 $2 } - | elem1 { $1 } - -elem1 :: { Doc RdrName } - : STRING { DocString $1 } - | '/../' { DocEmphasis (DocString $1) } - | URL { DocURL $1 } - | PIC { DocPic $1 } - | ANAME { DocAName $1 } - | IDENT { DocIdentifier $1 } - | DQUO strings DQUO { DocModule $2 } - -strings :: { String } - : STRING { $1 } - | STRING strings { $1 ++ $2 } - -{ -happyError :: [Token] -> Maybe a -happyError toks = Nothing -} diff --git a/src/Haddock/Lex.x b/src/Haddock/Lex.x new file mode 100644 index 00000000..04b9f0c3 --- /dev/null +++ b/src/Haddock/Lex.x @@ -0,0 +1,172 @@ +-- +-- Haddock - A Haskell Documentation Tool +-- +-- (c) Simon Marlow 2002 +-- +-- This file was modified and integrated into GHC by David Waern 2006. +-- Then moved back into Haddock by Isaac Dupree in 2009 :-) +-- + +{ +{-# OPTIONS -Wwarn -w #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and fix +-- any warnings in the module. See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings +-- for details + +module Haddock.Lex ( + Token(..), + tokenise + ) where + +import Lexer hiding (Token) +import Parser ( parseIdentifier ) +import StringBuffer +import RdrName +import SrcLoc +import DynFlags + +import Data.Char +import Numeric +import System.IO.Unsafe +} + +$ws = $white # \n +$digit = [0-9] +$hexdigit = [0-9a-fA-F] +$special = [\"\@] +$alphanum = [A-Za-z0-9] +$ident = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~] + +:- + +-- beginning of a paragraph +<0,para> { + $ws* \n ; + $ws* \> { begin birdtrack } + $ws* [\*\-] { token TokBullet `andBegin` string } + $ws* \[ { token TokDefStart `andBegin` def } + $ws* \( $digit+ \) { token TokNumber `andBegin` string } + $ws* { begin string } +} + +-- beginning of a line + { + $ws* \> { begin birdtrack } + $ws* \n { token TokPara `andBegin` para } + -- Here, we really want to be able to say + -- $ws* (\n | ) { token TokPara `andBegin` para} + -- because otherwise a trailing line of whitespace will result in + -- a spurious TokString at the end of a docstring. We don't have , + -- though (NOW I realise what it was for :-). To get around this, we always + -- append \n to the end of a docstring. + () { begin string } +} + + .* \n? { strtokenNL TokBirdTrack `andBegin` line } + + { + $special { strtoken $ \s -> TokSpecial (head s) } + \<\<.*\>\> { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) } + \<.*\> { strtoken $ \s -> TokURL (init (tail s)) } + \#.*\# { strtoken $ \s -> TokAName (init (tail s)) } + \/ [^\/]* \/ { strtoken $ \s -> TokEmphasis (init (tail s)) } + [\'\`] $ident+ [\'\`] { ident } + \\ . { strtoken (TokString . tail) } + "&#" $digit+ \; { strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] } + "&#" [xX] $hexdigit+ \; { strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] } + -- allow special characters through if they don't fit one of the previous + -- patterns. + [\/\'\`\<\#\&\\] { strtoken TokString } + [^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line } + [^ $special \/ \< \# \n \'\` \& \\ \]]+ { strtoken TokString } +} + + { + \] { token TokDefEnd `andBegin` string } +} + +-- ']' doesn't have any special meaning outside of the [...] at the beginning +-- of a definition paragraph. + { + \] { strtoken TokString } +} + +{ +data Token + = TokPara + | TokNumber + | TokBullet + | TokDefStart + | TokDefEnd + | TokSpecial Char + | TokIdent [RdrName] + | TokString String + | TokURL String + | TokPic String + | TokEmphasis String + | TokAName String + | TokBirdTrack String +-- deriving Show + +-- ----------------------------------------------------------------------------- +-- Alex support stuff + +type StartCode = Int +type Action = String -> StartCode -> (StartCode -> [Token]) -> [Token] + +type AlexInput = (Char,String) + +alexGetChar (_, []) = Nothing +alexGetChar (_, c:cs) = Just (c, (c,cs)) + +alexInputPrevChar (c,_) = c + +tokenise :: String -> [Token] +tokenise str = let toks = go ('\n', eofHack str) para in {-trace (show toks)-} toks + where go inp@(_,str) sc = + case alexScan inp sc of + AlexEOF -> [] + AlexError _ -> error "lexical error" + AlexSkip inp' _ -> go inp' sc + AlexToken inp' len act -> act (take len str) sc (\sc -> go inp' sc) + +-- NB. we add a final \n to the string, (see comment in the beginning of line +-- production above). +eofHack str = str++"\n" + +andBegin :: Action -> StartCode -> Action +andBegin act new_sc = \str _ cont -> act str new_sc cont + +token :: Token -> Action +token t = \_ sc cont -> t : cont sc + +strtoken, strtokenNL :: (String -> Token) -> Action +strtoken t = \str sc cont -> t str : cont sc +strtokenNL t = \str sc cont -> t (filter (/= '\r') str) : cont sc +-- ^ We only want LF line endings in our internal doc string format, so we +-- filter out all CRs. + +begin :: StartCode -> Action +begin sc = \_ _ cont -> cont sc + +-- ----------------------------------------------------------------------------- +-- Lex a string as a Haskell identifier + +ident :: Action +ident str sc cont = + case strToHsQNames id of + Just names -> TokIdent names : cont sc + Nothing -> TokString str : cont sc + where id = init (tail str) + +strToHsQNames :: String -> Maybe [RdrName] +strToHsQNames str0 = + let buffer = unsafePerformIO (stringToStringBuffer str0) + pstate = mkPState buffer noSrcLoc defaultDynFlags + result = unP parseIdentifier pstate + in case result of + POk _ name -> Just [unLoc name] + _ -> Nothing +} diff --git a/src/Haddock/Parse.y b/src/Haddock/Parse.y new file mode 100644 index 00000000..42553343 --- /dev/null +++ b/src/Haddock/Parse.y @@ -0,0 +1,103 @@ +{ +{-# OPTIONS -Wwarn -w #-} +-- The above warning supression flag is a temporary kludge. +-- While working on this module you are encouraged to remove it and fix +-- any warnings in the module. See +-- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings +-- for details + +module Haddock.Parse where + +import Haddock.Lex +import Haddock.Types (Doc(..)) +import Haddock.Doc +import HsSyn +import RdrName +} + +%expect 0 + +%tokentype { Token } + +%token '/' { TokSpecial '/' } + '@' { TokSpecial '@' } + '[' { TokDefStart } + ']' { TokDefEnd } + DQUO { TokSpecial '\"' } + URL { TokURL $$ } + PIC { TokPic $$ } + ANAME { TokAName $$ } + '/../' { TokEmphasis $$ } + '-' { TokBullet } + '(n)' { TokNumber } + '>..' { TokBirdTrack $$ } + IDENT { TokIdent $$ } + PARA { TokPara } + STRING { TokString $$ } + +%monad { Maybe } + +%name parseParas doc +%name parseString seq + +%% + +doc :: { Doc RdrName } + : apara PARA doc { docAppend $1 $3 } + | PARA doc { $2 } + | apara { $1 } + | {- empty -} { DocEmpty } + +apara :: { Doc RdrName } + : ulpara { DocUnorderedList [$1] } + | olpara { DocOrderedList [$1] } + | defpara { DocDefList [$1] } + | para { $1 } + +ulpara :: { Doc RdrName } + : '-' para { $2 } + +olpara :: { Doc RdrName } + : '(n)' para { $2 } + +defpara :: { (Doc RdrName, Doc RdrName) } + : '[' seq ']' seq { ($2, $4) } + +para :: { Doc RdrName } + : seq { docParagraph $1 } + | codepara { DocCodeBlock $1 } + +codepara :: { Doc RdrName } + : '>..' codepara { docAppend (DocString $1) $2 } + | '>..' { DocString $1 } + +seq :: { Doc RdrName } + : elem seq { docAppend $1 $2 } + | elem { $1 } + +elem :: { Doc RdrName } + : elem1 { $1 } + | '@' seq1 '@' { DocMonospaced $2 } + +seq1 :: { Doc RdrName } + : PARA seq1 { docAppend (DocString "\n") $2 } + | elem1 seq1 { docAppend $1 $2 } + | elem1 { $1 } + +elem1 :: { Doc RdrName } + : STRING { DocString $1 } + | '/../' { DocEmphasis (DocString $1) } + | URL { DocURL $1 } + | PIC { DocPic $1 } + | ANAME { DocAName $1 } + | IDENT { DocIdentifier $1 } + | DQUO strings DQUO { DocModule $2 } + +strings :: { String } + : STRING { $1 } + | STRING strings { $1 ++ $2 } + +{ +happyError :: [Token] -> Maybe a +happyError toks = Nothing +} diff --git a/src/Main.hs b/src/Main.hs index f09dcd29..1f4867e4 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -22,8 +22,8 @@ module Main (main) where import Haddock.Backends.Html import Haddock.Backends.Hoogle import Haddock.Interface -import Haddock.Interface.Lex -import Haddock.Interface.Parse +import Haddock.Lex +import Haddock.Parse import Haddock.Types import Haddock.Version import Haddock.InterfaceFile @@ -400,7 +400,7 @@ getPrologue flags = [] -> return Nothing [filename] -> do str <- readFile filename - case parseHaddockParagraphs (tokenise str) of + case parseParas (tokenise str) of Nothing -> throwE "parsing haddock prologue failed" Just doc -> return (Just doc) _otherwise -> throwE "multiple -p/--prologue options" -- cgit v1.2.3