diff options
Diffstat (limited to 'src/Haddock')
| -rw-r--r-- | src/Haddock/Backends/LaTeX.hs | 47 | ||||
| -rw-r--r-- | src/Haddock/Backends/Xhtml/Decl.hs | 15 | ||||
| -rw-r--r-- | src/Haddock/Backends/Xhtml/DocMarkup.hs | 7 | ||||
| -rw-r--r-- | src/Haddock/Doc.hs | 20 | ||||
| -rw-r--r-- | src/Haddock/Interface/LexParseRn.hs | 11 | ||||
| -rw-r--r-- | src/Haddock/Interface/ParseModuleHeader.hs | 9 | ||||
| -rw-r--r-- | src/Haddock/InterfaceFile.hs | 10 | ||||
| -rw-r--r-- | src/Haddock/Lex.x | 255 | ||||
| -rw-r--r-- | src/Haddock/Parse.y | 171 | ||||
| -rw-r--r-- | src/Haddock/Parser.hs | 435 | ||||
| -rw-r--r-- | src/Haddock/Types.hs | 33 | ||||
| -rw-r--r-- | src/Haddock/Utf8.hs | 74 | 
12 files changed, 598 insertions, 489 deletions
| diff --git a/src/Haddock/Backends/LaTeX.hs b/src/Haddock/Backends/LaTeX.hs index 0bc95719..64966ac2 100644 --- a/src/Haddock/Backends/LaTeX.hs +++ b/src/Haddock/Backends/LaTeX.hs @@ -33,6 +33,9 @@ import Data.Char  import Control.Monad  import Data.Maybe  import Data.List + +import Haddock.Doc (combineDocumentation) +  -- import Debug.Trace  {- SAMPLE OUTPUT @@ -278,11 +281,11 @@ ppDecl :: LHsDecl DocName  ppDecl (L loc decl) (doc, fnArgsDoc) instances subdocs = case decl of    TyClD d@(FamDecl {})          -> ppTyFam False loc doc d unicode -  TyClD d@(DataDecl {})    +  TyClD d@(DataDecl {})                                  -> ppDataDecl instances subdocs loc doc d unicode    TyClD d@(SynDecl {})          -> ppTySyn loc (doc, fnArgsDoc) d unicode  -- Family instances happen via FamInst now ---  TyClD d@(TySynonym {})          +--  TyClD d@(TySynonym {})  --    | Just _  <- tcdTyPats d    -> ppTyInst False loc doc d unicode  -- Family instances happen via FamInst now    TyClD d@(ClassDecl {})         -> ppClassDecl instances loc doc subdocs d unicode @@ -465,7 +468,7 @@ ppClassDecl :: [DocInstance DocName] -> SrcSpan              -> Documentation DocName -> [(DocName, DocForDecl DocName)]              -> TyClDecl DocName -> Bool -> LaTeX  ppClassDecl instances loc doc subdocs -  (ClassDecl { tcdCtxt = lctxt, tcdLName = lname, tcdTyVars = ltyvars, tcdFDs = lfds  +  (ClassDecl { tcdCtxt = lctxt, tcdLName = lname, tcdTyVars = ltyvars, tcdFDs = lfds               , tcdSigs = lsigs, tcdATs = ats, tcdATDefs = at_defs }) unicode    = declWithDoc classheader (if null body then Nothing else Just (vcat body)) $$      instancesBit @@ -504,7 +507,7 @@ ppDocInstances unicode (i : rest)    | Just ihead <- isUndocdInstance i    = declWithDoc (vcat (map (ppInstDecl unicode) (ihead:is))) Nothing $$      ppDocInstances unicode rest' -  | otherwise  +  | otherwise    = ppDocInstance unicode i $$ ppDocInstances unicode rest    where      (is, rest') = spanWith isUndocdInstance rest @@ -650,9 +653,9 @@ ppSideBySideField subdocs unicode (ConDeclField (L _ name) ltype _) =  -- {-  -- ppHsFullConstr :: HsConDecl -> LaTeX --- ppHsFullConstr (HsConDecl _ nm tvs ctxt typeList doc) =  +-- ppHsFullConstr (HsConDecl _ nm tvs ctxt typeList doc) =  --      declWithDoc False doc ( --- 	hsep ((ppHsConstrHdr tvs ctxt +++  +-- 	hsep ((ppHsConstrHdr tvs ctxt +++  -- 		ppHsBinder False nm) : map ppHsBangType typeList)  --       )  -- ppHsFullConstr (HsRecDecl _ nm tvs ctxt fields doc) = @@ -661,35 +664,35 @@ ppSideBySideField subdocs unicode (ConDeclField (L _ name) ltype _) =  --        Nothing -> aboves [hdr, fields_html]  --        Just _  -> aboves [hdr, constr_doc, fields_html]  --    ) ---  +--  --   where hdr = declBox (ppHsConstrHdr tvs ctxt +++ ppHsBinder False nm) ---  --- 	constr_doc	 +-- +-- 	constr_doc  -- 	  | isJust doc = docBox (docToLaTeX (fromJust doc))  -- 	  | otherwise  = LaTeX.emptyTable ---  --- 	fields_html =  --- 	   td <<  +-- +-- 	fields_html = +-- 	   td <<  -- 	      table ! [width "100%", cellpadding 0, cellspacing 8] << (  -- 		   aboves (map ppFullField (concat (map expandField fields)))  -- 		)  -- -} ---  +--  -- ppShortField :: Bool -> Bool -> ConDeclField DocName -> LaTeX  -- ppShortField summary unicode (ConDeclField (L _ name) ltype _)  --   = tda [theclass "recfield"] << (  --       ppBinder summary (docNameOcc name)  --       <+> dcolon unicode <+> ppLType unicode ltype  --     ) ---  +--  -- {-  -- ppFullField :: HsFieldDecl -> LaTeX --- ppFullField (HsFieldDecl [n] ty doc)  +-- ppFullField (HsFieldDecl [n] ty doc)  --   = declWithDoc False doc (  -- 	ppHsBinder False n <+> dcolon <+> ppHsBangType ty  --     )  -- ppFullField _ = error "ppFullField" ---  +--  -- expandField :: HsFieldDecl -> [HsFieldDecl]  -- expandField (HsFieldDecl ns ty doc) = [ HsFieldDecl [n] ty doc | n <- ns ]  -- -} @@ -718,7 +721,7 @@ ppAppNameTypes :: DocName -> [HsType DocName] -> Bool -> LaTeX  ppAppNameTypes n ts unicode = ppTypeApp n ts ppDocName (ppParendType unicode) --- | Print an application of a DocName and a list of Names  +-- | Print an application of a DocName and a list of Names  ppAppDocNameNames :: Bool -> DocName -> [Name] -> LaTeX  ppAppDocNameNames _summ n ns =    ppTypeApp n ns (ppBinder . nameOccName . getName) ppSymName @@ -1002,7 +1005,7 @@ parLatexMarkup ppId = Markup {    markupEmphasis             = \p v -> emph (p v),    markupMonospaced           = \p _ -> tt (p Mono),    markupUnorderedList        = \p v -> itemizedList (map ($v) p) $$ text "", -  markupPic                  = \path _ -> parens (text "image: " <> text path), +  markupPic                  = \p _ -> markupPic p,    markupOrderedList          = \p v -> enumeratedList (map ($v) p) $$ text "",    markupDefList              = \l v -> descriptionList (map (\(a,b) -> (a v, b v)) l),    markupCodeBlock            = \p _ -> quote (verb (p Verb)) $$ text "", @@ -1020,6 +1023,14 @@ parLatexMarkup ppId = Markup {        Just label -> text "\\href" <> braces (text url) <> braces (text label)        Nothing    -> text "\\url"  <> braces (text url) +    -- Is there a better way of doing this? Just a space is an aribtrary choice. +    markupPic (Picture uri title) = parens (imageText title) +      where +        imageText Nothing = beg +        imageText (Just t) = beg <> text " " <> text t + +        beg = text "image: " <> text uri +      markupId ppId_ id v =        case v of          Verb  -> theid diff --git a/src/Haddock/Backends/Xhtml/Decl.hs b/src/Haddock/Backends/Xhtml/Decl.hs index 2ecc6464..91e6871d 100644 --- a/src/Haddock/Backends/Xhtml/Decl.hs +++ b/src/Haddock/Backends/Xhtml/Decl.hs @@ -25,6 +25,7 @@ import Haddock.Backends.Xhtml.Types  import Haddock.Backends.Xhtml.Utils  import Haddock.GhcUtils  import Haddock.Types +import Haddock.Doc (combineDocumentation)  import           Data.List             ( intersperse )  import qualified Data.Map as Map @@ -115,7 +116,7 @@ ppFor _ _ _ _ _ _ _ = error "ppFor"  ppTySyn :: Bool -> LinksInfo -> SrcSpan -> DocForDecl DocName -> TyClDecl DocName -> Bool          -> Qualification -> Html  ppTySyn summary links loc doc (SynDecl { tcdLName = L _ name, tcdTyVars = ltyvars -                                       , tcdRhs = ltype })  +                                       , tcdRhs = ltype })          unicode qual    = ppTypeOrFunSig summary links loc [name] (unLoc ltype) doc                     (full, hdr, spaceHtml +++ equals) unicode qual @@ -204,12 +205,12 @@ ppAssocType summ links doc (L loc decl) unicode qual =  -- * TyClDecl helpers  -------------------------------------------------------------------------------- --- | Print a type family and its variables  +-- | Print a type family and its variables  ppFamDeclBinderWithVars :: Bool -> FamilyDecl DocName -> Html  ppFamDeclBinderWithVars summ (FamilyDecl { fdLName = lname, fdTyVars = tvs }) =    ppAppDocNameNames summ (unLoc lname) (tyvarNames tvs) --- | Print a newtype / data binder and its variables  +-- | Print a newtype / data binder and its variables  ppDataBinderWithVars :: Bool -> TyClDecl DocName -> Html  ppDataBinderWithVars summ decl =    ppAppDocNameNames summ (tcdName decl) (tyvarNames $ tcdTyVars decl) @@ -226,7 +227,7 @@ ppAppNameTypes n ts unicode qual =      ppTypeApp n ts (ppDocName qual) (ppParendType unicode qual) --- | Print an application of a DocName and a list of Names  +-- | Print an application of a DocName and a list of Names  ppAppDocNameNames :: Bool -> DocName -> [Name] -> Html  ppAppDocNameNames summ n ns =    ppTypeApp n ns (ppBinder summ . nameOccName . getName) ppTyName @@ -305,7 +306,7 @@ ppShortClassDecl :: Bool -> LinksInfo -> TyClDecl DocName -> SrcSpan                   -> Html  ppShortClassDecl summary links (ClassDecl { tcdCtxt = lctxt, tcdLName = lname, tcdTyVars = tvs                                            , tcdFDs = fds, tcdSigs = sigs, tcdATs = ats }) loc -    subdocs unicode qual =  +    subdocs unicode qual =    if null sigs && null ats      then (if summary then id else topDeclElem links loc [nm]) hdr      else (if summary then id else topDeclElem links loc [nm]) (hdr <+> keyword "where") @@ -580,7 +581,7 @@ ppShortField summary unicode qual (ConDeclField (L _ name) ltype _)  -- Currently doesn't handle 'data instance' decls or kind signatures  ppDataHeader :: Bool -> TyClDecl DocName -> Bool -> Qualification -> Html  ppDataHeader summary decl@(DataDecl { tcdDataDefn = HsDataDefn { dd_ND = nd -                                                               , dd_ctxt = ctxt } })  +                                                               , dd_ctxt = ctxt } })               unicode qual    = -- newtype or data      (case nd of { NewType -> keyword "newtype"; DataType -> keyword "data" }) <+> @@ -723,5 +724,3 @@ ppr_fun_ty ctxt_prec ty1 ty2 unicode qual      in      maybeParen ctxt_prec pREC_FUN $      hsep [p1, arrow unicode <+> p2] - - diff --git a/src/Haddock/Backends/Xhtml/DocMarkup.hs b/src/Haddock/Backends/Xhtml/DocMarkup.hs index aa4ba377..31d653bc 100644 --- a/src/Haddock/Backends/Xhtml/DocMarkup.hs +++ b/src/Haddock/Backends/Xhtml/DocMarkup.hs @@ -18,18 +18,19 @@ module Haddock.Backends.Xhtml.DocMarkup (    docElement, docSection, docSection_,  ) where +import Control.Applicative ((<$>))  import Haddock.Backends.Xhtml.Names  import Haddock.Backends.Xhtml.Utils  import Haddock.Types  import Haddock.Utils +import Haddock.Doc (combineDocumentation) -import Text.XHtml hiding ( name, title, p, quote ) +import Text.XHtml hiding ( name, p, quote )  import Data.Maybe (fromMaybe)  import GHC -  parHtmlMarkup :: Qualification -> (a -> Html) -> DocMarkup a Html  parHtmlMarkup qual ppId = Markup {    markupEmpty                = noHtml, @@ -49,7 +50,7 @@ parHtmlMarkup qual ppId = Markup {    markupCodeBlock            = pre,    markupHyperlink            = \(Hyperlink url mLabel) -> anchor ! [href url] << fromMaybe url mLabel,    markupAName                = \aname -> namedAnchor aname << "", -  markupPic                  = \path -> image ! [src path], +  markupPic                  = \(Picture uri t) -> image ! ([src uri] ++ fromMaybe [] (return . title <$> t)),    markupProperty             = pre . toHtml,    markupExample              = examplesToHtml    } diff --git a/src/Haddock/Doc.hs b/src/Haddock/Doc.hs index 18555cfb..4d68c554 100644 --- a/src/Haddock/Doc.hs +++ b/src/Haddock/Doc.hs @@ -1,14 +1,27 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-}  module Haddock.Doc (    docAppend,    docParagraph, -  combineStringNodes +  combineStringNodes, +  combineDocumentation    ) where - +import Data.Maybe +import Data.Monoid  import Haddock.Types  import Data.Char (isSpace)  import Control.Arrow ((***)) +-- We put it here so that we can avoid a circular import +-- anything relevant imports this module anyway +instance Monoid (Doc id) where +  mempty  = DocEmpty +  mappend = docAppend + +combineDocumentation :: Documentation name -> Maybe (Doc name) +combineDocumentation (Documentation Nothing Nothing) = Nothing +combineDocumentation (Documentation mDoc mWarning)   = Just (fromMaybe mempty mWarning `mappend` fromMaybe mempty mDoc) +  -- used to make parsing easier; we group the list items later  docAppend :: Doc id -> Doc id -> Doc id  docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) @@ -85,4 +98,7 @@ combineStringNodes x = x  tryjoin :: Doc id -> Doc id  tryjoin (DocAppend (DocString x) (DocString y)) = DocString (x ++ y) +tryjoin (DocAppend (DocString x) (DocAppend (DocString y) z)) = DocAppend (DocString (x ++ y)) z +tryjoin (DocAppend (DocAppend x (DocString y)) (DocString z)) +  = tryjoin (DocAppend (combineStringNodes x) (DocString $ y ++ z))  tryjoin x = x diff --git a/src/Haddock/Interface/LexParseRn.hs b/src/Haddock/Interface/LexParseRn.hs index 9d775bcb..13563532 100644 --- a/src/Haddock/Interface/LexParseRn.hs +++ b/src/Haddock/Interface/LexParseRn.hs @@ -19,8 +19,7 @@ module Haddock.Interface.LexParseRn  import Haddock.Types -import Haddock.Lex -import Haddock.Parse +import Haddock.Parser  import Haddock.Interface.ParseModuleHeader  import Haddock.Doc @@ -50,19 +49,19 @@ processDocStringParas = process parseParas  processDocString :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name))  processDocString = process parseString -process :: ([LToken] -> Maybe (Doc RdrName)) +process :: (DynFlags -> String -> Maybe (Doc RdrName))          -> DynFlags          -> GlobalRdrEnv          -> HsDocString          -> ErrMsgM (Maybe (Doc Name))  process parse dflags gre (HsDocString fs) = do     let str = unpackFS fs -   let toks = tokenise dflags str (0,0)  -- TODO: real position -   case parse toks of +   case parse dflags str of       Nothing -> do         tell [ "doc comment parse failed: " ++ str ]         return Nothing -     Just doc -> return (Just (rename dflags gre doc)) +     Just doc -> do +       return (Just (rename dflags gre doc))  processModuleHeader :: DynFlags -> GlobalRdrEnv -> SafeHaskellMode -> Maybe LHsDocString diff --git a/src/Haddock/Interface/ParseModuleHeader.hs b/src/Haddock/Interface/ParseModuleHeader.hs index 5087affe..2e4fe73b 100644 --- a/src/Haddock/Interface/ParseModuleHeader.hs +++ b/src/Haddock/Interface/ParseModuleHeader.hs @@ -12,8 +12,7 @@  module Haddock.Interface.ParseModuleHeader (parseModuleHeader) where  import Haddock.Types -import Haddock.Lex -import Haddock.Parse +import Haddock.Parser  import RdrName  import DynFlags @@ -47,15 +46,13 @@ parseModuleHeader dflags str0 =        description1 :: Either String (Maybe (Doc RdrName))        description1 = case descriptionOpt of           Nothing -> Right Nothing -         -- TODO: pass real file position -         Just description -> case parseString $ tokenise dflags description (0,0) of +         Just description -> case parseString dflags description of              Nothing -> Left ("Cannot parse Description: " ++ description)              Just doc -> Right (Just doc)     in        case description1 of           Left mess -> Left mess -         -- TODO: pass real file position -         Right docOpt -> case parseParas $ tokenise dflags str8 (0,0) of +         Right docOpt -> case parseParas dflags str8 of             Nothing -> Left "Cannot parse header documentation paragraphs"             Just doc -> Right (HaddockModInfo {              hmi_description = docOpt, diff --git a/src/Haddock/InterfaceFile.hs b/src/Haddock/InterfaceFile.hs index b993dd67..189550b8 100644 --- a/src/Haddock/InterfaceFile.hs +++ b/src/Haddock/InterfaceFile.hs @@ -428,6 +428,16 @@ instance Binary Hyperlink where          label <- get bh          return (Hyperlink url label) +instance Binary Picture where +    put_ bh (Picture uri title) = do +        put_ bh uri +        put_ bh title +    get bh = do +        uri <- get bh +        title <- get bh +        return (Picture uri title) + +  {-* Generated by DrIFT : Look, but Don't Touch. *-}  instance (Binary id) => Binary (Doc id) where diff --git a/src/Haddock/Lex.x b/src/Haddock/Lex.x deleted file mode 100644 index 9e59fa4c..00000000 --- a/src/Haddock/Lex.x +++ /dev/null @@ -1,255 +0,0 @@ --- --- Haddock - A Haskell Documentation Tool --- --- (c) Simon Marlow 2002 --- --- This file was modified and integrated into GHC by David Waern 2006. --- Then moved back into Haddock by Isaac Dupree in 2009 :-) --- - -{ -{-# LANGUAGE BangPatterns #-}   -- Generated by Alex -{-# OPTIONS -Wwarn -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See ---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - -module Haddock.Lex ( -	Token(..), -	LToken, -	tokenise - ) where - -import Lexer hiding (Token) -import Parser ( parseIdentifier ) -import StringBuffer -import RdrName -import SrcLoc -import DynFlags -import FastString - -import qualified Data.Bits -import Data.Char -import Data.Word (Word8) -import Numeric -import System.IO.Unsafe -import Debug.Trace -} - -$ws    = $white # \n -$digit = [0-9] -$hexdigit = [0-9a-fA-F] -$special =  [\"\@] -$alphanum = [A-Za-z0-9] -$ident    = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~\:] - -:- - --- beginning of a paragraph -<0,para> { - $ws* \n		; - $ws* \>		{ begin birdtrack } - $ws* prop \> .* \n	{ strtoken TokProperty `andBegin` property} - $ws* \>\>\>            { strtoken TokExamplePrompt `andBegin` exampleexpr } - $ws* [\*\-]		{ token TokBullet `andBegin` string } - $ws* \[		{ token TokDefStart `andBegin` def } - $ws* \( $digit+ \) 	{ token TokNumber `andBegin` string } - $ws* $digit+ \. 	{ token TokNumber `andBegin` string } - $ws*			{ begin string }		 -} - --- beginning of a line -<line> { -  $ws* \>		{ begin birdtrack } -  $ws* \>\>\>		{ strtoken TokExamplePrompt `andBegin` exampleexpr } -  $ws* \n		{ token TokPara `andBegin` para } -  -- Here, we really want to be able to say -  -- $ws* (\n | <eof>) 	{ token TokPara `andBegin` para} -  -- because otherwise a trailing line of whitespace will result in  -  -- a spurious TokString at the end of a docstring.  We don't have <eof>, -  -- though (NOW I realise what it was for :-).  To get around this, we always -  -- append \n to the end of a docstring. -  () 			{ begin string } -} - -<birdtrack> .*	\n?	{ strtokenNL TokBirdTrack `andBegin` line } - -<property> ()           { token TokPara `andBegin` para } - -<example> { -  $ws*	\n		{ token TokPara `andBegin` para } -  $ws* \>\>\>	        { strtoken TokExamplePrompt `andBegin` exampleexpr } -  ()			{ begin exampleresult } -} - -<exampleexpr> .* \n	{ strtokenNL TokExampleExpression `andBegin` example } - -<exampleresult> .* \n	{ strtokenNL TokExampleResult `andBegin` example } - -<string,def> { -  $special			{ strtoken $ \s -> TokSpecial (head s) } -  \<\< [^\>]* \>\>              { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) } -  \< [^\>]* \>			{ strtoken $ \s -> TokURL (init (tail s)) } -  \# [^\#]* \#			{ strtoken $ \s -> TokAName (init (tail s)) } -  \/ [^\/]* \/                  { strtoken $ \s -> TokEmphasis (init (tail s)) } -  [\'\`] $ident+ [\'\`]		{ ident } -  \\ .				{ strtoken (TokString . tail) } -  "&#" $digit+ \;		{ strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] } -  "&#" [xX] $hexdigit+ \;	{ strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] } -  -- allow special characters through if they don't fit one of the previous -  -- patterns. -  [\/\'\`\<\#\&\\]			{ strtoken TokString } -  [^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line } -  [^ $special \/ \< \# \n \'\` \& \\ \]]+    { strtoken TokString } -} - -<def> { -  \]				{ token TokDefEnd `andBegin` string } -} - --- ']' doesn't have any special meaning outside of the [...] at the beginning --- of a definition paragraph. -<string> { -  \]				{ strtoken TokString } -} - -{ --- | A located token -type LToken = (Token, AlexPosn) - -data Token -  = TokPara -  | TokNumber -  | TokBullet -  | TokDefStart -  | TokDefEnd -  | TokSpecial Char -  | TokIdent RdrName -  | TokString String -  | TokURL String -  | TokPic String -  | TokEmphasis String -  | TokAName String -  | TokBirdTrack String -  | TokProperty String -  | TokExamplePrompt String -  | TokExampleExpression String -  | TokExampleResult String ---  deriving Show - -tokenPos :: LToken -> (Int, Int) -tokenPos t = let AlexPn _ line col = snd t in (line, col) - --- ----------------------------------------------------------------------------- --- Alex support stuff - --- XXX: copied the posn wrapper code from Alex to make this lexer work --- with both Alex 2.x and Alex 3.x.  However, we are not using the --- Unicode/UTF-8 support in Alex 3.x, and Unicode documentation will --- probably get mangled. - --- | Encode a Haskell String to a list of Word8 values, in UTF8 format. -utf8Encode :: Char -> [Word8] -utf8Encode = map fromIntegral . go . ord - where -  go oc -   | oc <= 0x7f       = [oc] - -   | oc <= 0x7ff      = [ 0xc0 + (oc `Data.Bits.shiftR` 6) -                        , 0x80 + oc Data.Bits..&. 0x3f -                        ] - -   | oc <= 0xffff     = [ 0xe0 + (oc `Data.Bits.shiftR` 12) -                        , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) -                        , 0x80 + oc Data.Bits..&. 0x3f -                        ] -   | otherwise        = [ 0xf0 + (oc `Data.Bits.shiftR` 18) -                        , 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f) -                        , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) -                        , 0x80 + oc Data.Bits..&. 0x3f -                        ] - -type Byte = Word8 - -type AlexInput = (AlexPosn,     -- current position, -                  Char,         -- previous char -                  [Byte],       -- pending bytes on current char -                  String)       -- current input string - -alexInputPrevChar :: AlexInput -> Char -alexInputPrevChar (p,c,bs,s) = c - -alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) -alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s)) -alexGetByte (p,c,[],[]) = Nothing -alexGetByte (p,_,[],(c:s))  = let p' = alexMove p c -                                  (b:bs) = utf8Encode c -                              in p' `seq`  Just (b, (p', c, bs, s)) - -data AlexPosn = AlexPn !Int !Int !Int -        deriving (Eq,Show) - -alexMove :: AlexPosn -> Char -> AlexPosn -alexMove (AlexPn a l c) '\t' = AlexPn (a+1)  l     (((c+7) `div` 8)*8+1) -alexMove (AlexPn a l c) '\n' = AlexPn (a+1) (l+1)   1 -alexMove (AlexPn a l c) _    = AlexPn (a+1)  l     (c+1) - -type StartCode = Int -type Action = AlexPosn -> String -> StartCode -> (StartCode -> [LToken]) -> DynFlags -> [LToken] - -tokenise :: DynFlags -> String -> (Int, Int) -> [LToken] -tokenise dflags str (line, col) = go (posn,'\n',[],eofHack str) para -  where posn = AlexPn 0 line col -        go inp@(pos,_,_,str) sc = -          case alexScan inp sc of -                AlexEOF -> [] -                AlexError _ -> [] -                AlexSkip  inp' len     -> go inp' sc -                AlexToken inp' len act -> act pos (take len str) sc (\sc -> go inp' sc) dflags - --- NB. we add a final \n to the string, (see comment in the beginning of line --- production above). -eofHack str = str++"\n" - -andBegin  :: Action -> StartCode -> Action -andBegin act new_sc = \pos str _ cont dflags -> act pos str new_sc cont dflags - -token :: Token -> Action -token t = \pos _ sc cont _ -> (t, pos) : cont sc - -strtoken, strtokenNL :: (String -> Token) -> Action -strtoken t = \pos str sc cont _ -> (t str, pos) : cont sc -strtokenNL t = \pos str sc cont _ -> (t (filter (/= '\r') str), pos) : cont sc --- ^ We only want LF line endings in our internal doc string format, so we --- filter out all CRs. - -begin :: StartCode -> Action -begin sc = \_ _ _ cont _ -> cont sc - --- ----------------------------------------------------------------------------- --- Lex a string as a Haskell identifier - -ident :: Action -ident pos str sc cont dflags =  -  case parseIdent dflags loc id of -	Just names -> (TokIdent names, pos) : cont sc -	Nothing -> (TokString str, pos) : cont sc - where id = init (tail str) -       -- TODO: Get the real filename here. Maybe we should just be -       --       using GHC SrcLoc's ourself? -       filename = mkFastString "<unknown file>" -       loc = case pos of -             AlexPn _ line col -> -                 mkRealSrcLoc filename line col - -parseIdent :: DynFlags -> RealSrcLoc -> String -> Maybe RdrName -parseIdent dflags loc str0 =  -  let buffer = stringToStringBuffer str0 -      pstate = mkPState dflags buffer loc -      result = unP parseIdentifier pstate  -  in case result of  -       POk _ name -> Just (unLoc name) -       _ -> Nothing -} diff --git a/src/Haddock/Parse.y b/src/Haddock/Parse.y deleted file mode 100644 index f40ff521..00000000 --- a/src/Haddock/Parse.y +++ /dev/null @@ -1,171 +0,0 @@ -{ -{-# LANGUAGE BangPatterns #-} -- required for versions of Happy before 1.18.6 -{-# OPTIONS -Wwarn -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See ---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - -module Haddock.Parse (parseString, parseParas) where - -import Haddock.Lex -import Haddock.Types (Doc(..), Example(Example), Hyperlink(..)) -import Haddock.Doc -import HsSyn -import RdrName -import Data.Char  (isSpace) -import Data.Maybe (fromMaybe) -import Data.List  (stripPrefix) -} - -%expect 0 - -%tokentype { LToken } - -%token	'/'	{ (TokSpecial '/',_) } -	'@'	{ (TokSpecial '@',_) } -	'['     { (TokDefStart,_) } -	']'     { (TokDefEnd,_) } -	DQUO 	{ (TokSpecial '\"',_) } -	URL	{ (TokURL $$,_) } -	PIC     { (TokPic $$,_) } -	ANAME	{ (TokAName $$,_) } -	'/../'  { (TokEmphasis $$,_) } -	'-'	{ (TokBullet,_) } -	'(n)'	{ (TokNumber,_) } -	'>..'	{ (TokBirdTrack $$,_) } -	PROP	{ (TokProperty $$,_) } -	PROMPT	{ (TokExamplePrompt $$,_) } -	RESULT	{ (TokExampleResult $$,_) } -	EXP	{ (TokExampleExpression $$,_) } -	IDENT   { (TokIdent $$,_) } -	PARA    { (TokPara,_) } -	STRING	{ (TokString $$,_) } - -%monad { Maybe } - -%name parseParas doc -%name parseString seq - -%% - -doc	:: { Doc RdrName } -	: apara PARA doc	{ docAppend $1 $3 } -	| PARA doc 		{ $2 } -	| apara			{ $1 } -	| {- empty -}		{ DocEmpty } - -apara	:: { Doc RdrName } -	: ulpara		{ DocUnorderedList [$1] } -	| olpara		{ DocOrderedList [$1] } -        | defpara               { DocDefList [$1] } -	| para			{ $1 } - -ulpara  :: { Doc RdrName } -	: '-' para		{ $2 } - -olpara  :: { Doc RdrName }  -	: '(n)' para		{ $2 } - -defpara :: { (Doc RdrName, Doc RdrName) } -	: '[' seq ']' seq	{ ($2, $4) } - -para    :: { Doc RdrName } -	: seq			{ docParagraph $1 } -	| codepara		{ DocCodeBlock $1 } -	| property		{ $1 } -	| examples		{ DocExamples $1 } - -codepara :: { Doc RdrName } -	: '>..' codepara	{ docAppend (DocString $1) $2 } -	| '>..'			{ DocString $1 } - -property :: { Doc RdrName } -	: PROP			{ makeProperty $1 } - -examples :: { [Example] } -	: example examples	{ $1 : $2 } -	| example		{ [$1] } - -example :: { Example } -	: PROMPT EXP result	{ makeExample $1 $2 (lines $3) } -	| PROMPT EXP		{ makeExample $1 $2 [] } - -result :: { String } -	: RESULT result		{ $1 ++ $2 } -	| RESULT		{ $1 } - -seq	:: { Doc RdrName } -	: elem seq		{ docAppend $1 $2 } -	| elem			{ $1 } - -elem	:: { Doc RdrName } -	: elem1			{ $1 } -	| '@' seq1 '@'		{ DocMonospaced $2 } - -seq1	:: { Doc RdrName } -	: PARA seq1             { docAppend (DocString "\n") $2 } -	| elem1 seq1            { docAppend $1 $2 } -	| elem1			{ $1 } - -elem1	:: { Doc RdrName } -	: STRING		{ DocString $1 } -	| '/../'                { DocEmphasis (DocString $1) } -	| URL			{ DocHyperlink (makeHyperlink $1) } -	| PIC                   { DocPic $1 } -	| ANAME			{ DocAName $1 } -	| IDENT			{ DocIdentifier $1 } -	| DQUO strings DQUO	{ DocModule $2 } - -strings  :: { String } -	: STRING		{ $1 } -	| STRING strings	{ $1 ++ $2 } - -{ -happyError :: [LToken] -> Maybe a -happyError toks = Nothing - --- | Create a `Hyperlink` from given string. --- --- A hyperlink consists of a URL and an optional label.  The label is separated --- from the url by one or more whitespace characters. -makeHyperlink :: String -> Hyperlink -makeHyperlink input = case break isSpace $ strip input of -  (url, "")    -> Hyperlink url Nothing -  (url, label) -> Hyperlink url (Just . dropWhile isSpace $ label) - -makeProperty :: String -> Doc RdrName -makeProperty s = case strip s of -  'p':'r':'o':'p':'>':xs -> -	DocProperty (dropWhile isSpace xs) -  xs -> -	error $ "makeProperty: invalid input " ++ show xs - --- | Create an 'Example', stripping superfluous characters as appropriate -makeExample :: String -> String -> [String] -> Example -makeExample prompt expression result = -  Example -	(strip expression)	-- we do not care about leading and trailing -				-- whitespace in expressions, so drop them -	result' -  where -	-- 1. drop trailing whitespace from the prompt, remember the prefix -	(prefix, _) = span isSpace prompt - -	-- 2. drop, if possible, the exact same sequence of whitespace -	-- characters from each result line -	-- -	-- 3. interpret lines that only contain the string "<BLANKLINE>" as an -	-- empty line -	result' = map (substituteBlankLine . tryStripPrefix prefix) result -	  where -		tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys - -		substituteBlankLine "<BLANKLINE>" = "" -		substituteBlankLine line          = line - --- | Remove all leading and trailing whitespace -strip :: String -> String -strip = dropWhile isSpace . reverse . dropWhile isSpace . reverse -} diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs new file mode 100644 index 00000000..81e274ed --- /dev/null +++ b/src/Haddock/Parser.hs @@ -0,0 +1,435 @@ +{-# LANGUAGE OverloadedStrings #-} +-- | +-- Module      :  Haddock.Parser +-- Copyright   :  (c) Mateusz Kowalczyk 2013, +--                    Simon Hengel      2013 +-- License     :  BSD-like +-- +-- Maintainer  :  haddock@projects.haskell.org +-- Stability   :  experimental +-- Portability :  portable + +module Haddock.Parser (parseString, parseParas) where + +import           Control.Applicative +import           Data.Attoparsec.ByteString   hiding (takeWhile1, take, inClass) +import qualified Data.Attoparsec.ByteString.Char8 as A8 +import           Data.Attoparsec.ByteString.Char8 hiding (take, string) +import qualified Data.ByteString as BS +import           Data.Char (chr) +import           Data.List (stripPrefix) +import           Data.Maybe (fromMaybe) +import           Data.Monoid +import           DynFlags +import           FastString (mkFastString) +import           Haddock.Doc +import           Haddock.Types +import           Lexer (mkPState, unP, ParseResult(POk)) +import           Parser (parseIdentifier) +import           RdrName +import           SrcLoc (mkRealSrcLoc, unLoc) +import           StringBuffer (stringToStringBuffer) +import           Haddock.Utf8 + +default (Int) + +-- | Main entry point to the parser. Appends the newline character +-- to the input string. +parseParas :: DynFlags +              -> String -- ^ String to parse +              -> Maybe (Doc RdrName) +parseParas d s = case parseOnly (p <* skipSpace) (encodeUtf8 $ s ++ "\n") of +  Right r -> Just $ combineStringNodes r +  _ -> Nothing +  where +    p :: Parser (Doc RdrName) +    -- make sure that we don't swallow up whitespace belonging to next paragraph +    p = mconcat <$> paragraph d `sepBy` some (optWs *> "\n") + +-- | A parser that parsers separate lines of the comments. Eventually +-- called by 'parseParas'. Appends a newline character to the input string. +-- Drops any whitespace in front of the input string. It's dropped for the sake of +-- section headings. +parseString :: DynFlags -> String -> Maybe (Doc RdrName) +parseString d = parseString'' d . dropWhile isSpace + +-- | A parser that parsers separate lines of the comments. Eventually +-- called by 'parseParas'. Appends a newline character to the input string. +-- Unlike 'parseString', doesn't drop the preceding whitespace. Internal use. +parseString'' :: DynFlags -> String -> Maybe (Doc RdrName) +parseString'' d = parseString' d . (++ "\n") + +-- | An internal use function. Split from the 'parseString' is useful +-- as we can specify separately when we want the newline to be appended. +parseString' :: DynFlags -> String -> Maybe (Doc RdrName) +parseString' d s = case parseOnly p (encodeUtf8 s) of +  Right r -> Just $ combineStringNodes r +  _ -> Nothing +  where +    p :: Parser (Doc RdrName) +    p = mconcat <$> some (charEscape <|> monospace d <|> anchor <|> identifier d +                          <|> moduleName <|> picture <|> url +                          <|> emphasis d <|> encodedChar <|> string' <|> skipChar) + +-- | Parses and processes +-- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references> +-- +-- >>> parseOnly encodedChar "ABC" +-- Right (DocString "ABC") +encodedChar :: Parser (Doc RdrName) +encodedChar = "&#" *> c <* ";" +  where +    c = DocString . return . chr <$> num +    num = hex <|> decimal +    hex = ("x" <|> "X") *> hexadecimal + +-- | Plain, regular parser for text. Called as one of the last parsers +-- to ensure that we have already given a chance to more meaningful parsers +-- before capturing their characers. +string' :: Parser (Doc RdrName) +string' = DocString . decodeUtf8 <$> takeWhile1 (`notElem` "/<@\" &'`\\") + +-- | Emphasis parser. +-- +-- >>> parseOnly emphasis "/Hello world/" +-- Right (DocEmphasis (DocString "Hello world")) +emphasis :: DynFlags -> Parser (Doc RdrName) +emphasis d = stringBlock d id DocEmphasis "/" "/" "\n" + +-- | Skips a single character and treats it as a plain string. +-- This is done to skip over any special characters belonging to other +-- elements but which were not deemed meaningful at their positions. +-- Note that this can only be used in places where we're absolutely certain +-- no unicode is present, such as to skip a 100% certain ASCII delimeter. +skipChar :: Parser (Doc RdrName) +skipChar = DocString . return <$> anyChar + +-- | Treats the next character as a regular string, even if it's normally +-- used for markup. +charEscape :: Parser (Doc RdrName) +charEscape = "\\" *> (DocString . return <$> A8.satisfy (/= '\n')) + +-- | Text anchors to allow for jumping around the generated documentation. +-- +-- >>> parseOnly anchor "#Hello world#" +-- Right (DocAName "Hello world") +anchor :: Parser (Doc RdrName) +anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#") + +-- | Helper for markup structures surrounded with delimiters. +stringBlock +  :: DynFlags +     -> (String -> String) -- ^ Function used to transform parsed out text +                           -- before we send it to 'parseString'' +     -> (Doc RdrName -> Doc RdrName) -- ^ 'Doc' to wrap around the result +     -> String -- ^ Opening delimiter +     -> String -- ^ Closing delimiter +     -> String -- ^ Additional characters to terminate parsing on +     -> Parser (Doc RdrName) +stringBlock d f doc op ed n = do +  inner <- block op ed n +  case parseString' d (f inner) of +    Just r -> return $ doc r +    _ -> fail $ "inner parse fail with op: ‘" ++ op ++ "’, ed: ‘" ++ ed ++ "’" + +-- | Returns sections of text delimited by specified text. +block :: String -> String -> String -> Parser String +block op ed n = reverse . drop (length ed) . reverse <$> block' op ed +  where +    block' op' ed' = string (encodeUtf8 op') *> mid +      where +        mid :: Parser String +        mid = decodeUtf8 <$> string (encodeUtf8 ed') +              <|> do +                inner <- takeWithSkip (head ed') n +                more <- decodeUtf8 <$> string (encodeUtf8 $ tail ed') +                        <|> block' "" ed'  -- not full ending, take more +                return $ inner ++ more + + +-- | Takes all characters until the specified one. Unconditionally +-- takes a character if it's escaped. Fails if it doesn't find the character or +-- when the input string is empty. +takeWithSkip :: Char -> String -> Parser String +takeWithSkip s n = do +  content <- decodeUtf8 <$> A8.scan (False, False) p >>= gotSome +  if or (map (`elem` content) n) || last content /= s +    then fail "failed in takeWithSkip" +    else return content +  where +    gotSome [] = fail "EOF in takeWithSkip" +    gotSome xs = return xs +    -- Apparently ‘scan’ is so magical that it doesn't mangle unicode. +    p (escaped, terminate) c +      | terminate = Nothing -- swallows up that extra character +      | escaped = Just (False, False) +      | c == s = Just (False, True) +      | otherwise = Just (c == '\\', False) + +-- | Monospaced strings. +-- +-- >>> parseOnly (monospace dynflags) "@cruel@" +-- Right (DocMonospaced (DocString "cruel")) +monospace :: DynFlags -> Parser (Doc RdrName) +monospace d = stringBlock d id DocMonospaced "@" "@" "" + +-- | Module name parser, surrounded by double quotes. This does a very primitive and +-- purely syntactic checking so that obviously invalid names are not treated as valid +-- and blindly hyperlinked (not starting with a capital letter or including spaces). +moduleName :: Parser (Doc RdrName) +moduleName = DocModule <$> ("\"" *> legalModule <* "\"") +  where legalModule = do +          n <- (:) <$> A8.satisfy (`elem` ['A' .. 'Z']) +               <*> (decodeUtf8 <$> A8.takeWhile (`notElem` "\"\n")) + +          if any (`elem` n) " &[{}(=*)+]!#|@/;,^?" +            then fail "invalid characters in module name" +            else case n of +            [] -> return [] +            _ -> if last n == '.' then fail "trailing dot in module name" else return n + + +-- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify +-- a title for the picture. +-- +-- >>> parseOnly picture "<<hello.png>>" +-- Right (DocPic (Picture "hello.png" Nothing)) +-- >>> parseOnly picture "<<hello.png world>>" +-- Right (DocPic (Picture "hello.png" (Just "world"))) +picture :: Parser (Doc RdrName) +picture = DocPic . makePicture . decodeUtf8 <$> ("<<" *> takeWhile1 (`notElem` ">\n") <* ">>") + +-- | Paragraph parser, called by 'parseParas'. +paragraph :: DynFlags -> Parser (Doc RdrName) +paragraph d = examples <|> skipSpace *> (list d <|> birdtracks <|> codeblock d +                                         <|> property <|> textParagraph d) + +-- | List parser, called by 'paragraph'. +list :: DynFlags -> Parser (Doc RdrName) +list d = DocUnorderedList <$> unorderedList d +         <|> DocOrderedList <$> orderedList d +         <|> DocDefList <$> definitionList d + +-- | Parse given text with a provided parser, casting +-- Nothing to a failure +parseLine :: (String -> Maybe (Doc RdrName)) -- ^ Parser to use +             -> (Doc RdrName -> a) -- ^ Doc function to wrap around the result +             -> BS.ByteString -- ^ Text to parse +             -> Parser a +parseLine f doc str = maybe (fail "invalid string") (return . doc) (f $ decodeUtf8 str) + +-- | Parses unordered (bullet) lists. +unorderedList :: DynFlags -> Parser [Doc RdrName] +unorderedList d = ("*" <|> "-") *> innerList unorderedList d + +-- | Parses ordered lists (numbered or dashed). +orderedList :: DynFlags -> Parser [Doc RdrName] +orderedList d = skipSpace *> (paren <|> dot) *> innerList orderedList d +  where +    dot = decimal <* "." +    paren = "(" *> (decimal :: Parser Int) <* ")" + +-- | Generic function collecting any further lines belonging to the +-- list entry and recursively collecting any further lists in the +-- same paragraph. Usually used as +-- +-- > someListFunction dynflags = listBeginning *> innerList someListFunction dynflags +innerList :: (DynFlags -> Parser [Doc RdrName]) -- ^ parser calling this function +             -> DynFlags +             -> Parser [Doc RdrName] +innerList p d = do +  cl <- do +    content <- A8.takeWhile (/= '\n') <* "\n" -- allow empty +    parseLine (parseString'' d) id content +  ulcs <- many ulc +  let contents = docParagraph $ mconcat $ cl : [x | Right x <- ulcs] +      unLists = mconcat [x | Left x <- ulcs] +  return $ contents : unLists +  where +    ulc :: Parser (Either [Doc RdrName] (Doc RdrName)) +    ulc = Left <$> (optWs *> p d) +          <|> Right <$> nonEmptyLine d + +-- | Takes the remained of the line until the newline character +-- and calls 'parseLine' using 'parseString'. Fails if it's made +-- up strictly of whitespace. +nonEmptyLine :: DynFlags -> Parser (Doc RdrName) +nonEmptyLine d = do +  s <- (takeWhile1 (/= '\n') >>= nonSpace) <* "\n" +  parseLine (parseString'' d) id s +  where +    nonSpace xs +      | not (any (not . isSpace) (decodeUtf8 xs)) = fail "empty line" +      | otherwise = return xs + +-- | Parses definition lists. +definitionList :: DynFlags -> Parser [(Doc RdrName, Doc RdrName)] +definitionList d = do +  _ <- "[" +  inner <- parseLine (parseString' d) id =<< takeWhile1 (`notElem` "]\n") +  _ <- "]" +  outer <- parseLine (parseString'' d) id =<< (A8.takeWhile (/= '\n') <* "\n") +  ulcs <- many ulc +  let contents = mconcat $ outer : [x | Right x <- ulcs] +      unLists = map mconcat [x | Left x <- ulcs] +  return $ (inner, contents) : unLists +  where +    ulc :: Parser (Either [(Doc RdrName, Doc RdrName)] (Doc RdrName)) +    ulc = Left <$> (optWs *> definitionList d) +          <|> Right <$> nonEmptyLine d + +-- | Parses birdtracks. No further markup is parsed after the birdtrack. +-- Consecutive birdtracks are allowed. +birdtracks :: Parser (Doc RdrName) +birdtracks = DocCodeBlock . mconcat . map (DocString . (++ "\n") . decodeUtf8) <$> line `sepBy1` "\n" +  where +    line = optWs *> ">" *> A8.takeWhile (/= '\n') + +-- | Parses examples. Examples are a paragraph level entitity (separated by an empty line). +-- Consecutive examples are accepted. +examples :: Parser (Doc RdrName) +examples = DocExamples <$> example + +-- | Collects consecutive examples and their results. +example :: Parser [Example] +example = do +  ws <- optWs +  prompt <- decodeUtf8 <$> string ">>>" +  expr <- (++ "\n") . decodeUtf8 <$> (A8.takeWhile (/= '\n') <* "\n") +  results <- many result +  let exs = concat [ e | Left e <- results ] +      res = filter (not . null) [ r | Right r <- results ] +  return $ makeExample (decodeUtf8 ws ++ prompt) expr res : exs +  where +    result = Left <$> example +             <|> Right . decodeUtf8 <$> takeWhile1 (/= '\n') <* "\n" + +-- | Propery parser. +-- +-- >>> parseOnly property "prop> hello world" +-- Right (DocProperty "hello world") +property :: Parser (Doc RdrName) +property = do +    _ <- skipSpace +    s <- decodeUtf8 <$> (string "prop>" *> takeWhile1 (/= '\n')) +    return $ makeProperty ("prop>" ++ s) + +-- | Paragraph level codeblock. Anything between the two delimiting @ +-- is parsed for markup. +codeblock :: DynFlags -> Parser (Doc RdrName) +codeblock d = do +  -- Note that we don't need to use optWs here because in cases where +  -- we don't see a \n immediatelly after the opening @, this parser +  -- fails but we still have a chance to get a codeblock by getting +  -- a monospaced doc on its own in the paragraph. With that, the cases +  -- are covered. This should be updated if the implementation ever changes. +  s <- parseString' d . ('\n':) . decodeUtf8 <$> ("@\n" *> block' <* "@") +  maybe (fail "codeblock") (return . DocCodeBlock) s +  where +    block' = A8.scan False p +      where +        p isNewline c +          | isNewline && c == '@' = Nothing +          | otherwise = Just $ c == '\n' + +-- | Calls 'parseString'' on each line of a paragraph +textParagraph :: DynFlags -> Parser (Doc RdrName) +textParagraph d = do +  s <- parseString' d . concatMap ((++ "\n") . decodeUtf8) <$> line `sepBy1` "\n" +  maybe (fail "textParagraph") (return . docParagraph) s +  where +    line = takeWhile1 (/= '\n') + +-- | See 'picture' for adding a page title. +url :: Parser (Doc RdrName) +url = DocHyperlink . makeHyperlink . decodeUtf8 <$> ("<" *> takeWhile1 (`notElem` ">\n") <* ">") +      <|> autoUrl + +-- | Naive implementation of auto-linking. Will link everything after +-- @http://@, @https://@, @ftp://@, @ssh://@, @gopher://@ until a space. +-- Single trailing punctuation character (.!?,) is split off. +autoUrl :: Parser (Doc RdrName) +autoUrl = do +  link <- decodeUtf8 <$> urlLone +  return $ formatLink link +  where +    urlLone = mappend <$> choice prefixes <*> takeWhile1 (not . isSpace) +    prefixes = [ "http://", "https://", "ftp://" +               , "ssh://", "gopher://" ] +    formatLink :: String -> Doc RdrName +    formatLink s = if last s `elem` ".!?," +                   then docAppend (DocHyperlink $ Hyperlink (init s) Nothing) (DocString [last s]) +                   else DocHyperlink $ Hyperlink s Nothing + +-- | Parses strings between identifier delimiters. Consumes all input that it +-- deems to be valid in an identifier. Note that it simply blindly consumes +-- characters and does no actual validation itself. +parseValid :: Parser String +parseValid = do +  vs <- many' (A8.satisfy (`elem` "_.!#$%&*+/<=>?@\\?|-~:") <|> digit <|> letter_ascii) +  c <- peekChar +  case c of +    Just '`' -> return vs +    Just '\'' -> (do {c'' <- char '\''; y'' <- parseValid; return $ vs ++ [c''] ++ y''}) <|> return vs +    _ -> fail "outofvalid" + +-- | Parses identifiers with help of 'parseValid'. Asks GHC for 'RdrName' from the +-- string it deems valid. +identifier :: DynFlags -> Parser (Doc RdrName) +identifier dflags = do +  o <- idDelim +  vid <- parseValid +  e <- idDelim +  return $ validIdentifier $ o : (vid ++ [e]) +  where idDelim = char '\'' <|> char '`' +        validIdentifier str = case parseIdent (tail $ init str) of +          Just identName -> DocIdentifier identName +          Nothing -> DocString str +        parseIdent :: String -> Maybe RdrName +        parseIdent str0 = +          let buffer = stringToStringBuffer str0 +              realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0 +              pstate = mkPState dflags buffer realSrcLc +          in case unP parseIdentifier pstate of +            POk _ name -> Just (unLoc name) +            _ -> Nothing + +-- | Remove all leading and trailing whitespace +strip :: String -> String +strip = (\f -> f . f) $ dropWhile isSpace . reverse + +-- | Consumes whitespace, excluding a newline. +optWs :: Parser BS.ByteString +optWs = A8.takeWhile (`elem` " \t\f\v\r") + +-- | Create an 'Example', stripping superfluous characters as appropriate. +-- Remembers the amount of indentation used for the prompt. +makeExample :: String -> String -> [String] -> Example +makeExample prompt expression res = +  Example (strip expression) result'       -- drop whitespace in expressions +  where (prefix, _) = span isSpace prompt +        result' = map substituteBlankLine $ filter (not . null) $ map (tryStripPrefix prefix) res +          where tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys +                substituteBlankLine "<BLANKLINE>" = "" +                substituteBlankLine line          = line + +-- | Creates a 'Picture' with an optional title. Called by 'picture'. +makePicture :: String -> Picture +makePicture input = case break isSpace $ strip input of +  (uri, "")    -> Picture uri Nothing +  (uri, label) -> Picture uri (Just $ dropWhile isSpace label) + +-- | Creates a 'Hyperlink' with an optional title. Called by 'example'. +makeHyperlink :: String -> Hyperlink +makeHyperlink input = case break isSpace $ strip input of +  (u, "")    -> Hyperlink u Nothing +  (u, label) -> Hyperlink u (Just $ dropWhile isSpace label) + +-- | Makes a property that can be used by other programs for assertions. +-- Drops whitespace around the property. Called by 'property' +makeProperty :: String -> Doc RdrName +makeProperty s = case strip s of +  'p':'r':'o':'p':'>':xs -> +    DocProperty (dropWhile isSpace xs) +  xs -> +    error $ "makeProperty: invalid input " ++ show xs diff --git a/src/Haddock/Types.hs b/src/Haddock/Types.hs index 73fafd6b..00a8b68f 100644 --- a/src/Haddock/Types.hs +++ b/src/Haddock/Types.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE DeriveDataTypeable, DeriveFunctor, DeriveFoldable, DeriveTraversable #-} +{-# LANGUAGE DeriveDataTypeable, DeriveFunctor, DeriveFoldable, DeriveTraversable, GADTs #-}  {-# OPTIONS_GHC -fno-warn-orphans #-}  -----------------------------------------------------------------------------  -- | @@ -19,7 +19,6 @@ module Haddock.Types (    , HsDocString, LHsDocString   ) where -  import Data.Foldable  import Data.Traversable  import Control.Exception @@ -27,15 +26,12 @@ import Control.Arrow  import Control.DeepSeq  import Data.Typeable  import Data.Map (Map) -import Data.Maybe  import qualified Data.Map as Map -import Data.Monoid  import GHC hiding (NoLink)  import OccName  import Control.Applicative (Applicative(..))  import Control.Monad (ap) -  -----------------------------------------------------------------------------  -- * Convenient synonyms  ----------------------------------------------------------------------------- @@ -205,7 +201,7 @@ data ExportItem name        , expItemSubs :: ![name]        } -  -- | A section heading.  +  -- | A section heading.    | ExportGroup        {          -- | Section level (1, 2, 3, ...). @@ -230,11 +226,6 @@ data Documentation name = Documentation    } deriving Functor -combineDocumentation :: Documentation name -> Maybe (Doc name) -combineDocumentation (Documentation Nothing Nothing) = Nothing -combineDocumentation (Documentation mDoc mWarning)   = Just (fromMaybe mempty mWarning `mappend` fromMaybe mempty mDoc) - -  -- | Arguments and result are indexed by Int, zero-based from the left,  -- because that's the easiest to use when recursing over types.  type FnArgsDoc name = Map Int (Doc name) @@ -289,7 +280,6 @@ type DocInstance name = (InstHead name, Maybe (Doc name))  -- of instance types.  type InstHead name = ([HsType name], name, [HsType name]) -  -----------------------------------------------------------------------------  -- * Documentation comments  ----------------------------------------------------------------------------- @@ -314,18 +304,12 @@ data Doc id    | DocDefList [(Doc id, Doc id)]    | DocCodeBlock (Doc id)    | DocHyperlink Hyperlink -  | DocPic String +  | DocPic Picture    | DocAName String    | DocProperty String    | DocExamples [Example]    deriving (Functor, Foldable, Traversable) - -instance Monoid (Doc id) where -  mempty  = DocEmpty -  mappend = DocAppend - -  instance NFData a => NFData (Doc a) where    rnf doc = case doc of      DocEmpty                  -> () @@ -360,9 +344,18 @@ data Hyperlink = Hyperlink    } deriving (Eq, Show) +data Picture = Picture +  { pictureUri   :: String +  , pictureTitle :: Maybe String +  } deriving (Eq, Show) + +  instance NFData Hyperlink where    rnf (Hyperlink a b) = a `deepseq` b `deepseq` () +instance NFData Picture where +  rnf (Picture a b) = a `deepseq` b `deepseq` () +  data Example = Example    { exampleExpression :: String @@ -396,7 +389,7 @@ data DocMarkup id a = Markup    , markupCodeBlock            :: a -> a    , markupHyperlink            :: Hyperlink -> a    , markupAName                :: String -> a -  , markupPic                  :: String -> a +  , markupPic                  :: Picture -> a    , markupProperty             :: String -> a    , markupExample              :: [Example] -> a    } diff --git a/src/Haddock/Utf8.hs b/src/Haddock/Utf8.hs new file mode 100644 index 00000000..1fb0e818 --- /dev/null +++ b/src/Haddock/Utf8.hs @@ -0,0 +1,74 @@ +module Haddock.Utf8 (encodeUtf8, decodeUtf8) where +import           Data.Bits ((.|.), (.&.), shiftL, shiftR) +import qualified Data.ByteString as BS +import           Data.Char (chr, ord) +import           Data.Word (Word8) + +-- | Helper that encodes and packs a 'String' into a 'BS.ByteString' +encodeUtf8 :: String -> BS.ByteString +encodeUtf8 = BS.pack . encode + +-- | Helper that unpacks and decodes a 'BS.ByteString' into a 'String' +decodeUtf8 :: BS.ByteString -> String +decodeUtf8 = decode . BS.unpack + +-- Copy/pasted functions from Codec.Binary.UTF8.String for encoding/decoding +-- | Character to use when 'encode' or 'decode' fail for a byte. +replacementCharacter :: Char +replacementCharacter = '\xfffd' + +-- | Encode a Haskell String to a list of Word8 values, in UTF8 format. +encode :: String -> [Word8] +encode = concatMap (map fromIntegral . go . ord) + where +  go oc +   | oc <= 0x7f       = [oc] + +   | oc <= 0x7ff      = [ 0xc0 + (oc `shiftR` 6) +                        , 0x80 + oc .&. 0x3f +                        ] + +   | oc <= 0xffff     = [ 0xe0 + (oc `shiftR` 12) +                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f) +                        , 0x80 + oc .&. 0x3f +                        ] +   | otherwise        = [ 0xf0 + (oc `shiftR` 18) +                        , 0x80 + ((oc `shiftR` 12) .&. 0x3f) +                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f) +                        , 0x80 + oc .&. 0x3f +                        ] + +-- | Decode a UTF8 string packed into a list of Word8 values, directly to String +decode :: [Word8] -> String +decode [    ] = "" +decode (c:cs) +  | c < 0x80  = chr (fromEnum c) : decode cs +  | c < 0xc0  = replacementCharacter : decode cs +  | c < 0xe0  = multi1 +  | c < 0xf0  = multi_byte 2 0xf  0x800 +  | c < 0xf8  = multi_byte 3 0x7  0x10000 +  | c < 0xfc  = multi_byte 4 0x3  0x200000 +  | c < 0xfe  = multi_byte 5 0x1  0x4000000 +  | otherwise = replacementCharacter : decode cs +  where +    multi1 = case cs of +      c1 : ds | c1 .&. 0xc0 == 0x80 -> +        let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|.  fromEnum (c1 .&. 0x3f) +        in if d >= 0x000080 then toEnum d : decode ds +                            else replacementCharacter : decode ds +      _ -> replacementCharacter : decode cs + +    multi_byte :: Int -> Word8 -> Int -> String +    multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask)) +      where +        aux 0 rs acc +          | overlong <= acc && acc <= 0x10ffff && +            (acc < 0xd800 || 0xdfff < acc)     && +            (acc < 0xfffe || 0xffff < acc)      = chr acc : decode rs +          | otherwise = replacementCharacter : decode rs + +        aux n (r:rs) acc +          | r .&. 0xc0 == 0x80 = aux (n-1) rs +                               $ shiftL acc 6 .|. fromEnum (r .&. 0x3f) + +        aux _ rs     _ = replacementCharacter : decode rs | 
