aboutsummaryrefslogtreecommitdiff
path: root/src/Haddock
diff options
context:
space:
mode:
Diffstat (limited to 'src/Haddock')
-rw-r--r--src/Haddock/Backends/LaTeX.hs47
-rw-r--r--src/Haddock/Backends/Xhtml/Decl.hs15
-rw-r--r--src/Haddock/Backends/Xhtml/DocMarkup.hs7
-rw-r--r--src/Haddock/Doc.hs20
-rw-r--r--src/Haddock/Interface/LexParseRn.hs11
-rw-r--r--src/Haddock/Interface/ParseModuleHeader.hs9
-rw-r--r--src/Haddock/InterfaceFile.hs10
-rw-r--r--src/Haddock/Lex.x255
-rw-r--r--src/Haddock/Parse.y171
-rw-r--r--src/Haddock/Parser.hs435
-rw-r--r--src/Haddock/Types.hs33
-rw-r--r--src/Haddock/Utf8.hs74
12 files changed, 598 insertions, 489 deletions
diff --git a/src/Haddock/Backends/LaTeX.hs b/src/Haddock/Backends/LaTeX.hs
index 0bc95719..64966ac2 100644
--- a/src/Haddock/Backends/LaTeX.hs
+++ b/src/Haddock/Backends/LaTeX.hs
@@ -33,6 +33,9 @@ import Data.Char
import Control.Monad
import Data.Maybe
import Data.List
+
+import Haddock.Doc (combineDocumentation)
+
-- import Debug.Trace
{- SAMPLE OUTPUT
@@ -278,11 +281,11 @@ ppDecl :: LHsDecl DocName
ppDecl (L loc decl) (doc, fnArgsDoc) instances subdocs = case decl of
TyClD d@(FamDecl {}) -> ppTyFam False loc doc d unicode
- TyClD d@(DataDecl {})
+ TyClD d@(DataDecl {})
-> ppDataDecl instances subdocs loc doc d unicode
TyClD d@(SynDecl {}) -> ppTySyn loc (doc, fnArgsDoc) d unicode
-- Family instances happen via FamInst now
--- TyClD d@(TySynonym {})
+-- TyClD d@(TySynonym {})
-- | Just _ <- tcdTyPats d -> ppTyInst False loc doc d unicode
-- Family instances happen via FamInst now
TyClD d@(ClassDecl {}) -> ppClassDecl instances loc doc subdocs d unicode
@@ -465,7 +468,7 @@ ppClassDecl :: [DocInstance DocName] -> SrcSpan
-> Documentation DocName -> [(DocName, DocForDecl DocName)]
-> TyClDecl DocName -> Bool -> LaTeX
ppClassDecl instances loc doc subdocs
- (ClassDecl { tcdCtxt = lctxt, tcdLName = lname, tcdTyVars = ltyvars, tcdFDs = lfds
+ (ClassDecl { tcdCtxt = lctxt, tcdLName = lname, tcdTyVars = ltyvars, tcdFDs = lfds
, tcdSigs = lsigs, tcdATs = ats, tcdATDefs = at_defs }) unicode
= declWithDoc classheader (if null body then Nothing else Just (vcat body)) $$
instancesBit
@@ -504,7 +507,7 @@ ppDocInstances unicode (i : rest)
| Just ihead <- isUndocdInstance i
= declWithDoc (vcat (map (ppInstDecl unicode) (ihead:is))) Nothing $$
ppDocInstances unicode rest'
- | otherwise
+ | otherwise
= ppDocInstance unicode i $$ ppDocInstances unicode rest
where
(is, rest') = spanWith isUndocdInstance rest
@@ -650,9 +653,9 @@ ppSideBySideField subdocs unicode (ConDeclField (L _ name) ltype _) =
-- {-
-- ppHsFullConstr :: HsConDecl -> LaTeX
--- ppHsFullConstr (HsConDecl _ nm tvs ctxt typeList doc) =
+-- ppHsFullConstr (HsConDecl _ nm tvs ctxt typeList doc) =
-- declWithDoc False doc (
--- hsep ((ppHsConstrHdr tvs ctxt +++
+-- hsep ((ppHsConstrHdr tvs ctxt +++
-- ppHsBinder False nm) : map ppHsBangType typeList)
-- )
-- ppHsFullConstr (HsRecDecl _ nm tvs ctxt fields doc) =
@@ -661,35 +664,35 @@ ppSideBySideField subdocs unicode (ConDeclField (L _ name) ltype _) =
-- Nothing -> aboves [hdr, fields_html]
-- Just _ -> aboves [hdr, constr_doc, fields_html]
-- )
---
+--
-- where hdr = declBox (ppHsConstrHdr tvs ctxt +++ ppHsBinder False nm)
---
--- constr_doc
+--
+-- constr_doc
-- | isJust doc = docBox (docToLaTeX (fromJust doc))
-- | otherwise = LaTeX.emptyTable
---
--- fields_html =
--- td <<
+--
+-- fields_html =
+-- td <<
-- table ! [width "100%", cellpadding 0, cellspacing 8] << (
-- aboves (map ppFullField (concat (map expandField fields)))
-- )
-- -}
---
+--
-- ppShortField :: Bool -> Bool -> ConDeclField DocName -> LaTeX
-- ppShortField summary unicode (ConDeclField (L _ name) ltype _)
-- = tda [theclass "recfield"] << (
-- ppBinder summary (docNameOcc name)
-- <+> dcolon unicode <+> ppLType unicode ltype
-- )
---
+--
-- {-
-- ppFullField :: HsFieldDecl -> LaTeX
--- ppFullField (HsFieldDecl [n] ty doc)
+-- ppFullField (HsFieldDecl [n] ty doc)
-- = declWithDoc False doc (
-- ppHsBinder False n <+> dcolon <+> ppHsBangType ty
-- )
-- ppFullField _ = error "ppFullField"
---
+--
-- expandField :: HsFieldDecl -> [HsFieldDecl]
-- expandField (HsFieldDecl ns ty doc) = [ HsFieldDecl [n] ty doc | n <- ns ]
-- -}
@@ -718,7 +721,7 @@ ppAppNameTypes :: DocName -> [HsType DocName] -> Bool -> LaTeX
ppAppNameTypes n ts unicode = ppTypeApp n ts ppDocName (ppParendType unicode)
--- | Print an application of a DocName and a list of Names
+-- | Print an application of a DocName and a list of Names
ppAppDocNameNames :: Bool -> DocName -> [Name] -> LaTeX
ppAppDocNameNames _summ n ns =
ppTypeApp n ns (ppBinder . nameOccName . getName) ppSymName
@@ -1002,7 +1005,7 @@ parLatexMarkup ppId = Markup {
markupEmphasis = \p v -> emph (p v),
markupMonospaced = \p _ -> tt (p Mono),
markupUnorderedList = \p v -> itemizedList (map ($v) p) $$ text "",
- markupPic = \path _ -> parens (text "image: " <> text path),
+ markupPic = \p _ -> markupPic p,
markupOrderedList = \p v -> enumeratedList (map ($v) p) $$ text "",
markupDefList = \l v -> descriptionList (map (\(a,b) -> (a v, b v)) l),
markupCodeBlock = \p _ -> quote (verb (p Verb)) $$ text "",
@@ -1020,6 +1023,14 @@ parLatexMarkup ppId = Markup {
Just label -> text "\\href" <> braces (text url) <> braces (text label)
Nothing -> text "\\url" <> braces (text url)
+ -- Is there a better way of doing this? Just a space is an aribtrary choice.
+ markupPic (Picture uri title) = parens (imageText title)
+ where
+ imageText Nothing = beg
+ imageText (Just t) = beg <> text " " <> text t
+
+ beg = text "image: " <> text uri
+
markupId ppId_ id v =
case v of
Verb -> theid
diff --git a/src/Haddock/Backends/Xhtml/Decl.hs b/src/Haddock/Backends/Xhtml/Decl.hs
index 2ecc6464..91e6871d 100644
--- a/src/Haddock/Backends/Xhtml/Decl.hs
+++ b/src/Haddock/Backends/Xhtml/Decl.hs
@@ -25,6 +25,7 @@ import Haddock.Backends.Xhtml.Types
import Haddock.Backends.Xhtml.Utils
import Haddock.GhcUtils
import Haddock.Types
+import Haddock.Doc (combineDocumentation)
import Data.List ( intersperse )
import qualified Data.Map as Map
@@ -115,7 +116,7 @@ ppFor _ _ _ _ _ _ _ = error "ppFor"
ppTySyn :: Bool -> LinksInfo -> SrcSpan -> DocForDecl DocName -> TyClDecl DocName -> Bool
-> Qualification -> Html
ppTySyn summary links loc doc (SynDecl { tcdLName = L _ name, tcdTyVars = ltyvars
- , tcdRhs = ltype })
+ , tcdRhs = ltype })
unicode qual
= ppTypeOrFunSig summary links loc [name] (unLoc ltype) doc
(full, hdr, spaceHtml +++ equals) unicode qual
@@ -204,12 +205,12 @@ ppAssocType summ links doc (L loc decl) unicode qual =
-- * TyClDecl helpers
--------------------------------------------------------------------------------
--- | Print a type family and its variables
+-- | Print a type family and its variables
ppFamDeclBinderWithVars :: Bool -> FamilyDecl DocName -> Html
ppFamDeclBinderWithVars summ (FamilyDecl { fdLName = lname, fdTyVars = tvs }) =
ppAppDocNameNames summ (unLoc lname) (tyvarNames tvs)
--- | Print a newtype / data binder and its variables
+-- | Print a newtype / data binder and its variables
ppDataBinderWithVars :: Bool -> TyClDecl DocName -> Html
ppDataBinderWithVars summ decl =
ppAppDocNameNames summ (tcdName decl) (tyvarNames $ tcdTyVars decl)
@@ -226,7 +227,7 @@ ppAppNameTypes n ts unicode qual =
ppTypeApp n ts (ppDocName qual) (ppParendType unicode qual)
--- | Print an application of a DocName and a list of Names
+-- | Print an application of a DocName and a list of Names
ppAppDocNameNames :: Bool -> DocName -> [Name] -> Html
ppAppDocNameNames summ n ns =
ppTypeApp n ns (ppBinder summ . nameOccName . getName) ppTyName
@@ -305,7 +306,7 @@ ppShortClassDecl :: Bool -> LinksInfo -> TyClDecl DocName -> SrcSpan
-> Html
ppShortClassDecl summary links (ClassDecl { tcdCtxt = lctxt, tcdLName = lname, tcdTyVars = tvs
, tcdFDs = fds, tcdSigs = sigs, tcdATs = ats }) loc
- subdocs unicode qual =
+ subdocs unicode qual =
if null sigs && null ats
then (if summary then id else topDeclElem links loc [nm]) hdr
else (if summary then id else topDeclElem links loc [nm]) (hdr <+> keyword "where")
@@ -580,7 +581,7 @@ ppShortField summary unicode qual (ConDeclField (L _ name) ltype _)
-- Currently doesn't handle 'data instance' decls or kind signatures
ppDataHeader :: Bool -> TyClDecl DocName -> Bool -> Qualification -> Html
ppDataHeader summary decl@(DataDecl { tcdDataDefn = HsDataDefn { dd_ND = nd
- , dd_ctxt = ctxt } })
+ , dd_ctxt = ctxt } })
unicode qual
= -- newtype or data
(case nd of { NewType -> keyword "newtype"; DataType -> keyword "data" }) <+>
@@ -723,5 +724,3 @@ ppr_fun_ty ctxt_prec ty1 ty2 unicode qual
in
maybeParen ctxt_prec pREC_FUN $
hsep [p1, arrow unicode <+> p2]
-
-
diff --git a/src/Haddock/Backends/Xhtml/DocMarkup.hs b/src/Haddock/Backends/Xhtml/DocMarkup.hs
index aa4ba377..31d653bc 100644
--- a/src/Haddock/Backends/Xhtml/DocMarkup.hs
+++ b/src/Haddock/Backends/Xhtml/DocMarkup.hs
@@ -18,18 +18,19 @@ module Haddock.Backends.Xhtml.DocMarkup (
docElement, docSection, docSection_,
) where
+import Control.Applicative ((<$>))
import Haddock.Backends.Xhtml.Names
import Haddock.Backends.Xhtml.Utils
import Haddock.Types
import Haddock.Utils
+import Haddock.Doc (combineDocumentation)
-import Text.XHtml hiding ( name, title, p, quote )
+import Text.XHtml hiding ( name, p, quote )
import Data.Maybe (fromMaybe)
import GHC
-
parHtmlMarkup :: Qualification -> (a -> Html) -> DocMarkup a Html
parHtmlMarkup qual ppId = Markup {
markupEmpty = noHtml,
@@ -49,7 +50,7 @@ parHtmlMarkup qual ppId = Markup {
markupCodeBlock = pre,
markupHyperlink = \(Hyperlink url mLabel) -> anchor ! [href url] << fromMaybe url mLabel,
markupAName = \aname -> namedAnchor aname << "",
- markupPic = \path -> image ! [src path],
+ markupPic = \(Picture uri t) -> image ! ([src uri] ++ fromMaybe [] (return . title <$> t)),
markupProperty = pre . toHtml,
markupExample = examplesToHtml
}
diff --git a/src/Haddock/Doc.hs b/src/Haddock/Doc.hs
index 18555cfb..4d68c554 100644
--- a/src/Haddock/Doc.hs
+++ b/src/Haddock/Doc.hs
@@ -1,14 +1,27 @@
+{-# OPTIONS_GHC -fno-warn-orphans #-}
module Haddock.Doc (
docAppend,
docParagraph,
- combineStringNodes
+ combineStringNodes,
+ combineDocumentation
) where
-
+import Data.Maybe
+import Data.Monoid
import Haddock.Types
import Data.Char (isSpace)
import Control.Arrow ((***))
+-- We put it here so that we can avoid a circular import
+-- anything relevant imports this module anyway
+instance Monoid (Doc id) where
+ mempty = DocEmpty
+ mappend = docAppend
+
+combineDocumentation :: Documentation name -> Maybe (Doc name)
+combineDocumentation (Documentation Nothing Nothing) = Nothing
+combineDocumentation (Documentation mDoc mWarning) = Just (fromMaybe mempty mWarning `mappend` fromMaybe mempty mDoc)
+
-- used to make parsing easier; we group the list items later
docAppend :: Doc id -> Doc id -> Doc id
docAppend (DocUnorderedList ds1) (DocUnorderedList ds2)
@@ -85,4 +98,7 @@ combineStringNodes x = x
tryjoin :: Doc id -> Doc id
tryjoin (DocAppend (DocString x) (DocString y)) = DocString (x ++ y)
+tryjoin (DocAppend (DocString x) (DocAppend (DocString y) z)) = DocAppend (DocString (x ++ y)) z
+tryjoin (DocAppend (DocAppend x (DocString y)) (DocString z))
+ = tryjoin (DocAppend (combineStringNodes x) (DocString $ y ++ z))
tryjoin x = x
diff --git a/src/Haddock/Interface/LexParseRn.hs b/src/Haddock/Interface/LexParseRn.hs
index 9d775bcb..13563532 100644
--- a/src/Haddock/Interface/LexParseRn.hs
+++ b/src/Haddock/Interface/LexParseRn.hs
@@ -19,8 +19,7 @@ module Haddock.Interface.LexParseRn
import Haddock.Types
-import Haddock.Lex
-import Haddock.Parse
+import Haddock.Parser
import Haddock.Interface.ParseModuleHeader
import Haddock.Doc
@@ -50,19 +49,19 @@ processDocStringParas = process parseParas
processDocString :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name))
processDocString = process parseString
-process :: ([LToken] -> Maybe (Doc RdrName))
+process :: (DynFlags -> String -> Maybe (Doc RdrName))
-> DynFlags
-> GlobalRdrEnv
-> HsDocString
-> ErrMsgM (Maybe (Doc Name))
process parse dflags gre (HsDocString fs) = do
let str = unpackFS fs
- let toks = tokenise dflags str (0,0) -- TODO: real position
- case parse toks of
+ case parse dflags str of
Nothing -> do
tell [ "doc comment parse failed: " ++ str ]
return Nothing
- Just doc -> return (Just (rename dflags gre doc))
+ Just doc -> do
+ return (Just (rename dflags gre doc))
processModuleHeader :: DynFlags -> GlobalRdrEnv -> SafeHaskellMode -> Maybe LHsDocString
diff --git a/src/Haddock/Interface/ParseModuleHeader.hs b/src/Haddock/Interface/ParseModuleHeader.hs
index 5087affe..2e4fe73b 100644
--- a/src/Haddock/Interface/ParseModuleHeader.hs
+++ b/src/Haddock/Interface/ParseModuleHeader.hs
@@ -12,8 +12,7 @@
module Haddock.Interface.ParseModuleHeader (parseModuleHeader) where
import Haddock.Types
-import Haddock.Lex
-import Haddock.Parse
+import Haddock.Parser
import RdrName
import DynFlags
@@ -47,15 +46,13 @@ parseModuleHeader dflags str0 =
description1 :: Either String (Maybe (Doc RdrName))
description1 = case descriptionOpt of
Nothing -> Right Nothing
- -- TODO: pass real file position
- Just description -> case parseString $ tokenise dflags description (0,0) of
+ Just description -> case parseString dflags description of
Nothing -> Left ("Cannot parse Description: " ++ description)
Just doc -> Right (Just doc)
in
case description1 of
Left mess -> Left mess
- -- TODO: pass real file position
- Right docOpt -> case parseParas $ tokenise dflags str8 (0,0) of
+ Right docOpt -> case parseParas dflags str8 of
Nothing -> Left "Cannot parse header documentation paragraphs"
Just doc -> Right (HaddockModInfo {
hmi_description = docOpt,
diff --git a/src/Haddock/InterfaceFile.hs b/src/Haddock/InterfaceFile.hs
index b993dd67..189550b8 100644
--- a/src/Haddock/InterfaceFile.hs
+++ b/src/Haddock/InterfaceFile.hs
@@ -428,6 +428,16 @@ instance Binary Hyperlink where
label <- get bh
return (Hyperlink url label)
+instance Binary Picture where
+ put_ bh (Picture uri title) = do
+ put_ bh uri
+ put_ bh title
+ get bh = do
+ uri <- get bh
+ title <- get bh
+ return (Picture uri title)
+
+
{-* Generated by DrIFT : Look, but Don't Touch. *-}
instance (Binary id) => Binary (Doc id) where
diff --git a/src/Haddock/Lex.x b/src/Haddock/Lex.x
deleted file mode 100644
index 9e59fa4c..00000000
--- a/src/Haddock/Lex.x
+++ /dev/null
@@ -1,255 +0,0 @@
---
--- Haddock - A Haskell Documentation Tool
---
--- (c) Simon Marlow 2002
---
--- This file was modified and integrated into GHC by David Waern 2006.
--- Then moved back into Haddock by Isaac Dupree in 2009 :-)
---
-
-{
-{-# LANGUAGE BangPatterns #-} -- Generated by Alex
-{-# OPTIONS -Wwarn -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
-module Haddock.Lex (
- Token(..),
- LToken,
- tokenise
- ) where
-
-import Lexer hiding (Token)
-import Parser ( parseIdentifier )
-import StringBuffer
-import RdrName
-import SrcLoc
-import DynFlags
-import FastString
-
-import qualified Data.Bits
-import Data.Char
-import Data.Word (Word8)
-import Numeric
-import System.IO.Unsafe
-import Debug.Trace
-}
-
-$ws = $white # \n
-$digit = [0-9]
-$hexdigit = [0-9a-fA-F]
-$special = [\"\@]
-$alphanum = [A-Za-z0-9]
-$ident = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~\:]
-
-:-
-
--- beginning of a paragraph
-<0,para> {
- $ws* \n ;
- $ws* \> { begin birdtrack }
- $ws* prop \> .* \n { strtoken TokProperty `andBegin` property}
- $ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr }
- $ws* [\*\-] { token TokBullet `andBegin` string }
- $ws* \[ { token TokDefStart `andBegin` def }
- $ws* \( $digit+ \) { token TokNumber `andBegin` string }
- $ws* $digit+ \. { token TokNumber `andBegin` string }
- $ws* { begin string }
-}
-
--- beginning of a line
-<line> {
- $ws* \> { begin birdtrack }
- $ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr }
- $ws* \n { token TokPara `andBegin` para }
- -- Here, we really want to be able to say
- -- $ws* (\n | <eof>) { token TokPara `andBegin` para}
- -- because otherwise a trailing line of whitespace will result in
- -- a spurious TokString at the end of a docstring. We don't have <eof>,
- -- though (NOW I realise what it was for :-). To get around this, we always
- -- append \n to the end of a docstring.
- () { begin string }
-}
-
-<birdtrack> .* \n? { strtokenNL TokBirdTrack `andBegin` line }
-
-<property> () { token TokPara `andBegin` para }
-
-<example> {
- $ws* \n { token TokPara `andBegin` para }
- $ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr }
- () { begin exampleresult }
-}
-
-<exampleexpr> .* \n { strtokenNL TokExampleExpression `andBegin` example }
-
-<exampleresult> .* \n { strtokenNL TokExampleResult `andBegin` example }
-
-<string,def> {
- $special { strtoken $ \s -> TokSpecial (head s) }
- \<\< [^\>]* \>\> { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) }
- \< [^\>]* \> { strtoken $ \s -> TokURL (init (tail s)) }
- \# [^\#]* \# { strtoken $ \s -> TokAName (init (tail s)) }
- \/ [^\/]* \/ { strtoken $ \s -> TokEmphasis (init (tail s)) }
- [\'\`] $ident+ [\'\`] { ident }
- \\ . { strtoken (TokString . tail) }
- "&#" $digit+ \; { strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] }
- "&#" [xX] $hexdigit+ \; { strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] }
- -- allow special characters through if they don't fit one of the previous
- -- patterns.
- [\/\'\`\<\#\&\\] { strtoken TokString }
- [^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line }
- [^ $special \/ \< \# \n \'\` \& \\ \]]+ { strtoken TokString }
-}
-
-<def> {
- \] { token TokDefEnd `andBegin` string }
-}
-
--- ']' doesn't have any special meaning outside of the [...] at the beginning
--- of a definition paragraph.
-<string> {
- \] { strtoken TokString }
-}
-
-{
--- | A located token
-type LToken = (Token, AlexPosn)
-
-data Token
- = TokPara
- | TokNumber
- | TokBullet
- | TokDefStart
- | TokDefEnd
- | TokSpecial Char
- | TokIdent RdrName
- | TokString String
- | TokURL String
- | TokPic String
- | TokEmphasis String
- | TokAName String
- | TokBirdTrack String
- | TokProperty String
- | TokExamplePrompt String
- | TokExampleExpression String
- | TokExampleResult String
--- deriving Show
-
-tokenPos :: LToken -> (Int, Int)
-tokenPos t = let AlexPn _ line col = snd t in (line, col)
-
--- -----------------------------------------------------------------------------
--- Alex support stuff
-
--- XXX: copied the posn wrapper code from Alex to make this lexer work
--- with both Alex 2.x and Alex 3.x. However, we are not using the
--- Unicode/UTF-8 support in Alex 3.x, and Unicode documentation will
--- probably get mangled.
-
--- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
-utf8Encode :: Char -> [Word8]
-utf8Encode = map fromIntegral . go . ord
- where
- go oc
- | oc <= 0x7f = [oc]
-
- | oc <= 0x7ff = [ 0xc0 + (oc `Data.Bits.shiftR` 6)
- , 0x80 + oc Data.Bits..&. 0x3f
- ]
-
- | oc <= 0xffff = [ 0xe0 + (oc `Data.Bits.shiftR` 12)
- , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
- , 0x80 + oc Data.Bits..&. 0x3f
- ]
- | otherwise = [ 0xf0 + (oc `Data.Bits.shiftR` 18)
- , 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f)
- , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
- , 0x80 + oc Data.Bits..&. 0x3f
- ]
-
-type Byte = Word8
-
-type AlexInput = (AlexPosn, -- current position,
- Char, -- previous char
- [Byte], -- pending bytes on current char
- String) -- current input string
-
-alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (p,c,bs,s) = c
-
-alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
-alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s))
-alexGetByte (p,c,[],[]) = Nothing
-alexGetByte (p,_,[],(c:s)) = let p' = alexMove p c
- (b:bs) = utf8Encode c
- in p' `seq` Just (b, (p', c, bs, s))
-
-data AlexPosn = AlexPn !Int !Int !Int
- deriving (Eq,Show)
-
-alexMove :: AlexPosn -> Char -> AlexPosn
-alexMove (AlexPn a l c) '\t' = AlexPn (a+1) l (((c+7) `div` 8)*8+1)
-alexMove (AlexPn a l c) '\n' = AlexPn (a+1) (l+1) 1
-alexMove (AlexPn a l c) _ = AlexPn (a+1) l (c+1)
-
-type StartCode = Int
-type Action = AlexPosn -> String -> StartCode -> (StartCode -> [LToken]) -> DynFlags -> [LToken]
-
-tokenise :: DynFlags -> String -> (Int, Int) -> [LToken]
-tokenise dflags str (line, col) = go (posn,'\n',[],eofHack str) para
- where posn = AlexPn 0 line col
- go inp@(pos,_,_,str) sc =
- case alexScan inp sc of
- AlexEOF -> []
- AlexError _ -> []
- AlexSkip inp' len -> go inp' sc
- AlexToken inp' len act -> act pos (take len str) sc (\sc -> go inp' sc) dflags
-
--- NB. we add a final \n to the string, (see comment in the beginning of line
--- production above).
-eofHack str = str++"\n"
-
-andBegin :: Action -> StartCode -> Action
-andBegin act new_sc = \pos str _ cont dflags -> act pos str new_sc cont dflags
-
-token :: Token -> Action
-token t = \pos _ sc cont _ -> (t, pos) : cont sc
-
-strtoken, strtokenNL :: (String -> Token) -> Action
-strtoken t = \pos str sc cont _ -> (t str, pos) : cont sc
-strtokenNL t = \pos str sc cont _ -> (t (filter (/= '\r') str), pos) : cont sc
--- ^ We only want LF line endings in our internal doc string format, so we
--- filter out all CRs.
-
-begin :: StartCode -> Action
-begin sc = \_ _ _ cont _ -> cont sc
-
--- -----------------------------------------------------------------------------
--- Lex a string as a Haskell identifier
-
-ident :: Action
-ident pos str sc cont dflags =
- case parseIdent dflags loc id of
- Just names -> (TokIdent names, pos) : cont sc
- Nothing -> (TokString str, pos) : cont sc
- where id = init (tail str)
- -- TODO: Get the real filename here. Maybe we should just be
- -- using GHC SrcLoc's ourself?
- filename = mkFastString "<unknown file>"
- loc = case pos of
- AlexPn _ line col ->
- mkRealSrcLoc filename line col
-
-parseIdent :: DynFlags -> RealSrcLoc -> String -> Maybe RdrName
-parseIdent dflags loc str0 =
- let buffer = stringToStringBuffer str0
- pstate = mkPState dflags buffer loc
- result = unP parseIdentifier pstate
- in case result of
- POk _ name -> Just (unLoc name)
- _ -> Nothing
-}
diff --git a/src/Haddock/Parse.y b/src/Haddock/Parse.y
deleted file mode 100644
index f40ff521..00000000
--- a/src/Haddock/Parse.y
+++ /dev/null
@@ -1,171 +0,0 @@
-{
-{-# LANGUAGE BangPatterns #-} -- required for versions of Happy before 1.18.6
-{-# OPTIONS -Wwarn -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
-module Haddock.Parse (parseString, parseParas) where
-
-import Haddock.Lex
-import Haddock.Types (Doc(..), Example(Example), Hyperlink(..))
-import Haddock.Doc
-import HsSyn
-import RdrName
-import Data.Char (isSpace)
-import Data.Maybe (fromMaybe)
-import Data.List (stripPrefix)
-}
-
-%expect 0
-
-%tokentype { LToken }
-
-%token '/' { (TokSpecial '/',_) }
- '@' { (TokSpecial '@',_) }
- '[' { (TokDefStart,_) }
- ']' { (TokDefEnd,_) }
- DQUO { (TokSpecial '\"',_) }
- URL { (TokURL $$,_) }
- PIC { (TokPic $$,_) }
- ANAME { (TokAName $$,_) }
- '/../' { (TokEmphasis $$,_) }
- '-' { (TokBullet,_) }
- '(n)' { (TokNumber,_) }
- '>..' { (TokBirdTrack $$,_) }
- PROP { (TokProperty $$,_) }
- PROMPT { (TokExamplePrompt $$,_) }
- RESULT { (TokExampleResult $$,_) }
- EXP { (TokExampleExpression $$,_) }
- IDENT { (TokIdent $$,_) }
- PARA { (TokPara,_) }
- STRING { (TokString $$,_) }
-
-%monad { Maybe }
-
-%name parseParas doc
-%name parseString seq
-
-%%
-
-doc :: { Doc RdrName }
- : apara PARA doc { docAppend $1 $3 }
- | PARA doc { $2 }
- | apara { $1 }
- | {- empty -} { DocEmpty }
-
-apara :: { Doc RdrName }
- : ulpara { DocUnorderedList [$1] }
- | olpara { DocOrderedList [$1] }
- | defpara { DocDefList [$1] }
- | para { $1 }
-
-ulpara :: { Doc RdrName }
- : '-' para { $2 }
-
-olpara :: { Doc RdrName }
- : '(n)' para { $2 }
-
-defpara :: { (Doc RdrName, Doc RdrName) }
- : '[' seq ']' seq { ($2, $4) }
-
-para :: { Doc RdrName }
- : seq { docParagraph $1 }
- | codepara { DocCodeBlock $1 }
- | property { $1 }
- | examples { DocExamples $1 }
-
-codepara :: { Doc RdrName }
- : '>..' codepara { docAppend (DocString $1) $2 }
- | '>..' { DocString $1 }
-
-property :: { Doc RdrName }
- : PROP { makeProperty $1 }
-
-examples :: { [Example] }
- : example examples { $1 : $2 }
- | example { [$1] }
-
-example :: { Example }
- : PROMPT EXP result { makeExample $1 $2 (lines $3) }
- | PROMPT EXP { makeExample $1 $2 [] }
-
-result :: { String }
- : RESULT result { $1 ++ $2 }
- | RESULT { $1 }
-
-seq :: { Doc RdrName }
- : elem seq { docAppend $1 $2 }
- | elem { $1 }
-
-elem :: { Doc RdrName }
- : elem1 { $1 }
- | '@' seq1 '@' { DocMonospaced $2 }
-
-seq1 :: { Doc RdrName }
- : PARA seq1 { docAppend (DocString "\n") $2 }
- | elem1 seq1 { docAppend $1 $2 }
- | elem1 { $1 }
-
-elem1 :: { Doc RdrName }
- : STRING { DocString $1 }
- | '/../' { DocEmphasis (DocString $1) }
- | URL { DocHyperlink (makeHyperlink $1) }
- | PIC { DocPic $1 }
- | ANAME { DocAName $1 }
- | IDENT { DocIdentifier $1 }
- | DQUO strings DQUO { DocModule $2 }
-
-strings :: { String }
- : STRING { $1 }
- | STRING strings { $1 ++ $2 }
-
-{
-happyError :: [LToken] -> Maybe a
-happyError toks = Nothing
-
--- | Create a `Hyperlink` from given string.
---
--- A hyperlink consists of a URL and an optional label. The label is separated
--- from the url by one or more whitespace characters.
-makeHyperlink :: String -> Hyperlink
-makeHyperlink input = case break isSpace $ strip input of
- (url, "") -> Hyperlink url Nothing
- (url, label) -> Hyperlink url (Just . dropWhile isSpace $ label)
-
-makeProperty :: String -> Doc RdrName
-makeProperty s = case strip s of
- 'p':'r':'o':'p':'>':xs ->
- DocProperty (dropWhile isSpace xs)
- xs ->
- error $ "makeProperty: invalid input " ++ show xs
-
--- | Create an 'Example', stripping superfluous characters as appropriate
-makeExample :: String -> String -> [String] -> Example
-makeExample prompt expression result =
- Example
- (strip expression) -- we do not care about leading and trailing
- -- whitespace in expressions, so drop them
- result'
- where
- -- 1. drop trailing whitespace from the prompt, remember the prefix
- (prefix, _) = span isSpace prompt
-
- -- 2. drop, if possible, the exact same sequence of whitespace
- -- characters from each result line
- --
- -- 3. interpret lines that only contain the string "<BLANKLINE>" as an
- -- empty line
- result' = map (substituteBlankLine . tryStripPrefix prefix) result
- where
- tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys
-
- substituteBlankLine "<BLANKLINE>" = ""
- substituteBlankLine line = line
-
--- | Remove all leading and trailing whitespace
-strip :: String -> String
-strip = dropWhile isSpace . reverse . dropWhile isSpace . reverse
-}
diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs
new file mode 100644
index 00000000..81e274ed
--- /dev/null
+++ b/src/Haddock/Parser.hs
@@ -0,0 +1,435 @@
+{-# LANGUAGE OverloadedStrings #-}
+-- |
+-- Module : Haddock.Parser
+-- Copyright : (c) Mateusz Kowalczyk 2013,
+-- Simon Hengel 2013
+-- License : BSD-like
+--
+-- Maintainer : haddock@projects.haskell.org
+-- Stability : experimental
+-- Portability : portable
+
+module Haddock.Parser (parseString, parseParas) where
+
+import Control.Applicative
+import Data.Attoparsec.ByteString hiding (takeWhile1, take, inClass)
+import qualified Data.Attoparsec.ByteString.Char8 as A8
+import Data.Attoparsec.ByteString.Char8 hiding (take, string)
+import qualified Data.ByteString as BS
+import Data.Char (chr)
+import Data.List (stripPrefix)
+import Data.Maybe (fromMaybe)
+import Data.Monoid
+import DynFlags
+import FastString (mkFastString)
+import Haddock.Doc
+import Haddock.Types
+import Lexer (mkPState, unP, ParseResult(POk))
+import Parser (parseIdentifier)
+import RdrName
+import SrcLoc (mkRealSrcLoc, unLoc)
+import StringBuffer (stringToStringBuffer)
+import Haddock.Utf8
+
+default (Int)
+
+-- | Main entry point to the parser. Appends the newline character
+-- to the input string.
+parseParas :: DynFlags
+ -> String -- ^ String to parse
+ -> Maybe (Doc RdrName)
+parseParas d s = case parseOnly (p <* skipSpace) (encodeUtf8 $ s ++ "\n") of
+ Right r -> Just $ combineStringNodes r
+ _ -> Nothing
+ where
+ p :: Parser (Doc RdrName)
+ -- make sure that we don't swallow up whitespace belonging to next paragraph
+ p = mconcat <$> paragraph d `sepBy` some (optWs *> "\n")
+
+-- | A parser that parsers separate lines of the comments. Eventually
+-- called by 'parseParas'. Appends a newline character to the input string.
+-- Drops any whitespace in front of the input string. It's dropped for the sake of
+-- section headings.
+parseString :: DynFlags -> String -> Maybe (Doc RdrName)
+parseString d = parseString'' d . dropWhile isSpace
+
+-- | A parser that parsers separate lines of the comments. Eventually
+-- called by 'parseParas'. Appends a newline character to the input string.
+-- Unlike 'parseString', doesn't drop the preceding whitespace. Internal use.
+parseString'' :: DynFlags -> String -> Maybe (Doc RdrName)
+parseString'' d = parseString' d . (++ "\n")
+
+-- | An internal use function. Split from the 'parseString' is useful
+-- as we can specify separately when we want the newline to be appended.
+parseString' :: DynFlags -> String -> Maybe (Doc RdrName)
+parseString' d s = case parseOnly p (encodeUtf8 s) of
+ Right r -> Just $ combineStringNodes r
+ _ -> Nothing
+ where
+ p :: Parser (Doc RdrName)
+ p = mconcat <$> some (charEscape <|> monospace d <|> anchor <|> identifier d
+ <|> moduleName <|> picture <|> url
+ <|> emphasis d <|> encodedChar <|> string' <|> skipChar)
+
+-- | Parses and processes
+-- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references>
+--
+-- >>> parseOnly encodedChar "&#65;&#66;&#67;"
+-- Right (DocString "ABC")
+encodedChar :: Parser (Doc RdrName)
+encodedChar = "&#" *> c <* ";"
+ where
+ c = DocString . return . chr <$> num
+ num = hex <|> decimal
+ hex = ("x" <|> "X") *> hexadecimal
+
+-- | Plain, regular parser for text. Called as one of the last parsers
+-- to ensure that we have already given a chance to more meaningful parsers
+-- before capturing their characers.
+string' :: Parser (Doc RdrName)
+string' = DocString . decodeUtf8 <$> takeWhile1 (`notElem` "/<@\" &'`\\")
+
+-- | Emphasis parser.
+--
+-- >>> parseOnly emphasis "/Hello world/"
+-- Right (DocEmphasis (DocString "Hello world"))
+emphasis :: DynFlags -> Parser (Doc RdrName)
+emphasis d = stringBlock d id DocEmphasis "/" "/" "\n"
+
+-- | Skips a single character and treats it as a plain string.
+-- This is done to skip over any special characters belonging to other
+-- elements but which were not deemed meaningful at their positions.
+-- Note that this can only be used in places where we're absolutely certain
+-- no unicode is present, such as to skip a 100% certain ASCII delimeter.
+skipChar :: Parser (Doc RdrName)
+skipChar = DocString . return <$> anyChar
+
+-- | Treats the next character as a regular string, even if it's normally
+-- used for markup.
+charEscape :: Parser (Doc RdrName)
+charEscape = "\\" *> (DocString . return <$> A8.satisfy (/= '\n'))
+
+-- | Text anchors to allow for jumping around the generated documentation.
+--
+-- >>> parseOnly anchor "#Hello world#"
+-- Right (DocAName "Hello world")
+anchor :: Parser (Doc RdrName)
+anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#")
+
+-- | Helper for markup structures surrounded with delimiters.
+stringBlock
+ :: DynFlags
+ -> (String -> String) -- ^ Function used to transform parsed out text
+ -- before we send it to 'parseString''
+ -> (Doc RdrName -> Doc RdrName) -- ^ 'Doc' to wrap around the result
+ -> String -- ^ Opening delimiter
+ -> String -- ^ Closing delimiter
+ -> String -- ^ Additional characters to terminate parsing on
+ -> Parser (Doc RdrName)
+stringBlock d f doc op ed n = do
+ inner <- block op ed n
+ case parseString' d (f inner) of
+ Just r -> return $ doc r
+ _ -> fail $ "inner parse fail with op: ‘" ++ op ++ "’, ed: ‘" ++ ed ++ "’"
+
+-- | Returns sections of text delimited by specified text.
+block :: String -> String -> String -> Parser String
+block op ed n = reverse . drop (length ed) . reverse <$> block' op ed
+ where
+ block' op' ed' = string (encodeUtf8 op') *> mid
+ where
+ mid :: Parser String
+ mid = decodeUtf8 <$> string (encodeUtf8 ed')
+ <|> do
+ inner <- takeWithSkip (head ed') n
+ more <- decodeUtf8 <$> string (encodeUtf8 $ tail ed')
+ <|> block' "" ed' -- not full ending, take more
+ return $ inner ++ more
+
+
+-- | Takes all characters until the specified one. Unconditionally
+-- takes a character if it's escaped. Fails if it doesn't find the character or
+-- when the input string is empty.
+takeWithSkip :: Char -> String -> Parser String
+takeWithSkip s n = do
+ content <- decodeUtf8 <$> A8.scan (False, False) p >>= gotSome
+ if or (map (`elem` content) n) || last content /= s
+ then fail "failed in takeWithSkip"
+ else return content
+ where
+ gotSome [] = fail "EOF in takeWithSkip"
+ gotSome xs = return xs
+ -- Apparently ‘scan’ is so magical that it doesn't mangle unicode.
+ p (escaped, terminate) c
+ | terminate = Nothing -- swallows up that extra character
+ | escaped = Just (False, False)
+ | c == s = Just (False, True)
+ | otherwise = Just (c == '\\', False)
+
+-- | Monospaced strings.
+--
+-- >>> parseOnly (monospace dynflags) "@cruel@"
+-- Right (DocMonospaced (DocString "cruel"))
+monospace :: DynFlags -> Parser (Doc RdrName)
+monospace d = stringBlock d id DocMonospaced "@" "@" ""
+
+-- | Module name parser, surrounded by double quotes. This does a very primitive and
+-- purely syntactic checking so that obviously invalid names are not treated as valid
+-- and blindly hyperlinked (not starting with a capital letter or including spaces).
+moduleName :: Parser (Doc RdrName)
+moduleName = DocModule <$> ("\"" *> legalModule <* "\"")
+ where legalModule = do
+ n <- (:) <$> A8.satisfy (`elem` ['A' .. 'Z'])
+ <*> (decodeUtf8 <$> A8.takeWhile (`notElem` "\"\n"))
+
+ if any (`elem` n) " &[{}(=*)+]!#|@/;,^?"
+ then fail "invalid characters in module name"
+ else case n of
+ [] -> return []
+ _ -> if last n == '.' then fail "trailing dot in module name" else return n
+
+
+-- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify
+-- a title for the picture.
+--
+-- >>> parseOnly picture "<<hello.png>>"
+-- Right (DocPic (Picture "hello.png" Nothing))
+-- >>> parseOnly picture "<<hello.png world>>"
+-- Right (DocPic (Picture "hello.png" (Just "world")))
+picture :: Parser (Doc RdrName)
+picture = DocPic . makePicture . decodeUtf8 <$> ("<<" *> takeWhile1 (`notElem` ">\n") <* ">>")
+
+-- | Paragraph parser, called by 'parseParas'.
+paragraph :: DynFlags -> Parser (Doc RdrName)
+paragraph d = examples <|> skipSpace *> (list d <|> birdtracks <|> codeblock d
+ <|> property <|> textParagraph d)
+
+-- | List parser, called by 'paragraph'.
+list :: DynFlags -> Parser (Doc RdrName)
+list d = DocUnorderedList <$> unorderedList d
+ <|> DocOrderedList <$> orderedList d
+ <|> DocDefList <$> definitionList d
+
+-- | Parse given text with a provided parser, casting
+-- Nothing to a failure
+parseLine :: (String -> Maybe (Doc RdrName)) -- ^ Parser to use
+ -> (Doc RdrName -> a) -- ^ Doc function to wrap around the result
+ -> BS.ByteString -- ^ Text to parse
+ -> Parser a
+parseLine f doc str = maybe (fail "invalid string") (return . doc) (f $ decodeUtf8 str)
+
+-- | Parses unordered (bullet) lists.
+unorderedList :: DynFlags -> Parser [Doc RdrName]
+unorderedList d = ("*" <|> "-") *> innerList unorderedList d
+
+-- | Parses ordered lists (numbered or dashed).
+orderedList :: DynFlags -> Parser [Doc RdrName]
+orderedList d = skipSpace *> (paren <|> dot) *> innerList orderedList d
+ where
+ dot = decimal <* "."
+ paren = "(" *> (decimal :: Parser Int) <* ")"
+
+-- | Generic function collecting any further lines belonging to the
+-- list entry and recursively collecting any further lists in the
+-- same paragraph. Usually used as
+--
+-- > someListFunction dynflags = listBeginning *> innerList someListFunction dynflags
+innerList :: (DynFlags -> Parser [Doc RdrName]) -- ^ parser calling this function
+ -> DynFlags
+ -> Parser [Doc RdrName]
+innerList p d = do
+ cl <- do
+ content <- A8.takeWhile (/= '\n') <* "\n" -- allow empty
+ parseLine (parseString'' d) id content
+ ulcs <- many ulc
+ let contents = docParagraph $ mconcat $ cl : [x | Right x <- ulcs]
+ unLists = mconcat [x | Left x <- ulcs]
+ return $ contents : unLists
+ where
+ ulc :: Parser (Either [Doc RdrName] (Doc RdrName))
+ ulc = Left <$> (optWs *> p d)
+ <|> Right <$> nonEmptyLine d
+
+-- | Takes the remained of the line until the newline character
+-- and calls 'parseLine' using 'parseString'. Fails if it's made
+-- up strictly of whitespace.
+nonEmptyLine :: DynFlags -> Parser (Doc RdrName)
+nonEmptyLine d = do
+ s <- (takeWhile1 (/= '\n') >>= nonSpace) <* "\n"
+ parseLine (parseString'' d) id s
+ where
+ nonSpace xs
+ | not (any (not . isSpace) (decodeUtf8 xs)) = fail "empty line"
+ | otherwise = return xs
+
+-- | Parses definition lists.
+definitionList :: DynFlags -> Parser [(Doc RdrName, Doc RdrName)]
+definitionList d = do
+ _ <- "["
+ inner <- parseLine (parseString' d) id =<< takeWhile1 (`notElem` "]\n")
+ _ <- "]"
+ outer <- parseLine (parseString'' d) id =<< (A8.takeWhile (/= '\n') <* "\n")
+ ulcs <- many ulc
+ let contents = mconcat $ outer : [x | Right x <- ulcs]
+ unLists = map mconcat [x | Left x <- ulcs]
+ return $ (inner, contents) : unLists
+ where
+ ulc :: Parser (Either [(Doc RdrName, Doc RdrName)] (Doc RdrName))
+ ulc = Left <$> (optWs *> definitionList d)
+ <|> Right <$> nonEmptyLine d
+
+-- | Parses birdtracks. No further markup is parsed after the birdtrack.
+-- Consecutive birdtracks are allowed.
+birdtracks :: Parser (Doc RdrName)
+birdtracks = DocCodeBlock . mconcat . map (DocString . (++ "\n") . decodeUtf8) <$> line `sepBy1` "\n"
+ where
+ line = optWs *> ">" *> A8.takeWhile (/= '\n')
+
+-- | Parses examples. Examples are a paragraph level entitity (separated by an empty line).
+-- Consecutive examples are accepted.
+examples :: Parser (Doc RdrName)
+examples = DocExamples <$> example
+
+-- | Collects consecutive examples and their results.
+example :: Parser [Example]
+example = do
+ ws <- optWs
+ prompt <- decodeUtf8 <$> string ">>>"
+ expr <- (++ "\n") . decodeUtf8 <$> (A8.takeWhile (/= '\n') <* "\n")
+ results <- many result
+ let exs = concat [ e | Left e <- results ]
+ res = filter (not . null) [ r | Right r <- results ]
+ return $ makeExample (decodeUtf8 ws ++ prompt) expr res : exs
+ where
+ result = Left <$> example
+ <|> Right . decodeUtf8 <$> takeWhile1 (/= '\n') <* "\n"
+
+-- | Propery parser.
+--
+-- >>> parseOnly property "prop> hello world"
+-- Right (DocProperty "hello world")
+property :: Parser (Doc RdrName)
+property = do
+ _ <- skipSpace
+ s <- decodeUtf8 <$> (string "prop>" *> takeWhile1 (/= '\n'))
+ return $ makeProperty ("prop>" ++ s)
+
+-- | Paragraph level codeblock. Anything between the two delimiting @
+-- is parsed for markup.
+codeblock :: DynFlags -> Parser (Doc RdrName)
+codeblock d = do
+ -- Note that we don't need to use optWs here because in cases where
+ -- we don't see a \n immediatelly after the opening @, this parser
+ -- fails but we still have a chance to get a codeblock by getting
+ -- a monospaced doc on its own in the paragraph. With that, the cases
+ -- are covered. This should be updated if the implementation ever changes.
+ s <- parseString' d . ('\n':) . decodeUtf8 <$> ("@\n" *> block' <* "@")
+ maybe (fail "codeblock") (return . DocCodeBlock) s
+ where
+ block' = A8.scan False p
+ where
+ p isNewline c
+ | isNewline && c == '@' = Nothing
+ | otherwise = Just $ c == '\n'
+
+-- | Calls 'parseString'' on each line of a paragraph
+textParagraph :: DynFlags -> Parser (Doc RdrName)
+textParagraph d = do
+ s <- parseString' d . concatMap ((++ "\n") . decodeUtf8) <$> line `sepBy1` "\n"
+ maybe (fail "textParagraph") (return . docParagraph) s
+ where
+ line = takeWhile1 (/= '\n')
+
+-- | See 'picture' for adding a page title.
+url :: Parser (Doc RdrName)
+url = DocHyperlink . makeHyperlink . decodeUtf8 <$> ("<" *> takeWhile1 (`notElem` ">\n") <* ">")
+ <|> autoUrl
+
+-- | Naive implementation of auto-linking. Will link everything after
+-- @http://@, @https://@, @ftp://@, @ssh://@, @gopher://@ until a space.
+-- Single trailing punctuation character (.!?,) is split off.
+autoUrl :: Parser (Doc RdrName)
+autoUrl = do
+ link <- decodeUtf8 <$> urlLone
+ return $ formatLink link
+ where
+ urlLone = mappend <$> choice prefixes <*> takeWhile1 (not . isSpace)
+ prefixes = [ "http://", "https://", "ftp://"
+ , "ssh://", "gopher://" ]
+ formatLink :: String -> Doc RdrName
+ formatLink s = if last s `elem` ".!?,"
+ then docAppend (DocHyperlink $ Hyperlink (init s) Nothing) (DocString [last s])
+ else DocHyperlink $ Hyperlink s Nothing
+
+-- | Parses strings between identifier delimiters. Consumes all input that it
+-- deems to be valid in an identifier. Note that it simply blindly consumes
+-- characters and does no actual validation itself.
+parseValid :: Parser String
+parseValid = do
+ vs <- many' (A8.satisfy (`elem` "_.!#$%&*+/<=>?@\\?|-~:") <|> digit <|> letter_ascii)
+ c <- peekChar
+ case c of
+ Just '`' -> return vs
+ Just '\'' -> (do {c'' <- char '\''; y'' <- parseValid; return $ vs ++ [c''] ++ y''}) <|> return vs
+ _ -> fail "outofvalid"
+
+-- | Parses identifiers with help of 'parseValid'. Asks GHC for 'RdrName' from the
+-- string it deems valid.
+identifier :: DynFlags -> Parser (Doc RdrName)
+identifier dflags = do
+ o <- idDelim
+ vid <- parseValid
+ e <- idDelim
+ return $ validIdentifier $ o : (vid ++ [e])
+ where idDelim = char '\'' <|> char '`'
+ validIdentifier str = case parseIdent (tail $ init str) of
+ Just identName -> DocIdentifier identName
+ Nothing -> DocString str
+ parseIdent :: String -> Maybe RdrName
+ parseIdent str0 =
+ let buffer = stringToStringBuffer str0
+ realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0
+ pstate = mkPState dflags buffer realSrcLc
+ in case unP parseIdentifier pstate of
+ POk _ name -> Just (unLoc name)
+ _ -> Nothing
+
+-- | Remove all leading and trailing whitespace
+strip :: String -> String
+strip = (\f -> f . f) $ dropWhile isSpace . reverse
+
+-- | Consumes whitespace, excluding a newline.
+optWs :: Parser BS.ByteString
+optWs = A8.takeWhile (`elem` " \t\f\v\r")
+
+-- | Create an 'Example', stripping superfluous characters as appropriate.
+-- Remembers the amount of indentation used for the prompt.
+makeExample :: String -> String -> [String] -> Example
+makeExample prompt expression res =
+ Example (strip expression) result' -- drop whitespace in expressions
+ where (prefix, _) = span isSpace prompt
+ result' = map substituteBlankLine $ filter (not . null) $ map (tryStripPrefix prefix) res
+ where tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys
+ substituteBlankLine "<BLANKLINE>" = ""
+ substituteBlankLine line = line
+
+-- | Creates a 'Picture' with an optional title. Called by 'picture'.
+makePicture :: String -> Picture
+makePicture input = case break isSpace $ strip input of
+ (uri, "") -> Picture uri Nothing
+ (uri, label) -> Picture uri (Just $ dropWhile isSpace label)
+
+-- | Creates a 'Hyperlink' with an optional title. Called by 'example'.
+makeHyperlink :: String -> Hyperlink
+makeHyperlink input = case break isSpace $ strip input of
+ (u, "") -> Hyperlink u Nothing
+ (u, label) -> Hyperlink u (Just $ dropWhile isSpace label)
+
+-- | Makes a property that can be used by other programs for assertions.
+-- Drops whitespace around the property. Called by 'property'
+makeProperty :: String -> Doc RdrName
+makeProperty s = case strip s of
+ 'p':'r':'o':'p':'>':xs ->
+ DocProperty (dropWhile isSpace xs)
+ xs ->
+ error $ "makeProperty: invalid input " ++ show xs
diff --git a/src/Haddock/Types.hs b/src/Haddock/Types.hs
index 73fafd6b..00a8b68f 100644
--- a/src/Haddock/Types.hs
+++ b/src/Haddock/Types.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE DeriveDataTypeable, DeriveFunctor, DeriveFoldable, DeriveTraversable #-}
+{-# LANGUAGE DeriveDataTypeable, DeriveFunctor, DeriveFoldable, DeriveTraversable, GADTs #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
-----------------------------------------------------------------------------
-- |
@@ -19,7 +19,6 @@ module Haddock.Types (
, HsDocString, LHsDocString
) where
-
import Data.Foldable
import Data.Traversable
import Control.Exception
@@ -27,15 +26,12 @@ import Control.Arrow
import Control.DeepSeq
import Data.Typeable
import Data.Map (Map)
-import Data.Maybe
import qualified Data.Map as Map
-import Data.Monoid
import GHC hiding (NoLink)
import OccName
import Control.Applicative (Applicative(..))
import Control.Monad (ap)
-
-----------------------------------------------------------------------------
-- * Convenient synonyms
-----------------------------------------------------------------------------
@@ -205,7 +201,7 @@ data ExportItem name
, expItemSubs :: ![name]
}
- -- | A section heading.
+ -- | A section heading.
| ExportGroup
{
-- | Section level (1, 2, 3, ...).
@@ -230,11 +226,6 @@ data Documentation name = Documentation
} deriving Functor
-combineDocumentation :: Documentation name -> Maybe (Doc name)
-combineDocumentation (Documentation Nothing Nothing) = Nothing
-combineDocumentation (Documentation mDoc mWarning) = Just (fromMaybe mempty mWarning `mappend` fromMaybe mempty mDoc)
-
-
-- | Arguments and result are indexed by Int, zero-based from the left,
-- because that's the easiest to use when recursing over types.
type FnArgsDoc name = Map Int (Doc name)
@@ -289,7 +280,6 @@ type DocInstance name = (InstHead name, Maybe (Doc name))
-- of instance types.
type InstHead name = ([HsType name], name, [HsType name])
-
-----------------------------------------------------------------------------
-- * Documentation comments
-----------------------------------------------------------------------------
@@ -314,18 +304,12 @@ data Doc id
| DocDefList [(Doc id, Doc id)]
| DocCodeBlock (Doc id)
| DocHyperlink Hyperlink
- | DocPic String
+ | DocPic Picture
| DocAName String
| DocProperty String
| DocExamples [Example]
deriving (Functor, Foldable, Traversable)
-
-instance Monoid (Doc id) where
- mempty = DocEmpty
- mappend = DocAppend
-
-
instance NFData a => NFData (Doc a) where
rnf doc = case doc of
DocEmpty -> ()
@@ -360,9 +344,18 @@ data Hyperlink = Hyperlink
} deriving (Eq, Show)
+data Picture = Picture
+ { pictureUri :: String
+ , pictureTitle :: Maybe String
+ } deriving (Eq, Show)
+
+
instance NFData Hyperlink where
rnf (Hyperlink a b) = a `deepseq` b `deepseq` ()
+instance NFData Picture where
+ rnf (Picture a b) = a `deepseq` b `deepseq` ()
+
data Example = Example
{ exampleExpression :: String
@@ -396,7 +389,7 @@ data DocMarkup id a = Markup
, markupCodeBlock :: a -> a
, markupHyperlink :: Hyperlink -> a
, markupAName :: String -> a
- , markupPic :: String -> a
+ , markupPic :: Picture -> a
, markupProperty :: String -> a
, markupExample :: [Example] -> a
}
diff --git a/src/Haddock/Utf8.hs b/src/Haddock/Utf8.hs
new file mode 100644
index 00000000..1fb0e818
--- /dev/null
+++ b/src/Haddock/Utf8.hs
@@ -0,0 +1,74 @@
+module Haddock.Utf8 (encodeUtf8, decodeUtf8) where
+import Data.Bits ((.|.), (.&.), shiftL, shiftR)
+import qualified Data.ByteString as BS
+import Data.Char (chr, ord)
+import Data.Word (Word8)
+
+-- | Helper that encodes and packs a 'String' into a 'BS.ByteString'
+encodeUtf8 :: String -> BS.ByteString
+encodeUtf8 = BS.pack . encode
+
+-- | Helper that unpacks and decodes a 'BS.ByteString' into a 'String'
+decodeUtf8 :: BS.ByteString -> String
+decodeUtf8 = decode . BS.unpack
+
+-- Copy/pasted functions from Codec.Binary.UTF8.String for encoding/decoding
+-- | Character to use when 'encode' or 'decode' fail for a byte.
+replacementCharacter :: Char
+replacementCharacter = '\xfffd'
+
+-- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
+encode :: String -> [Word8]
+encode = concatMap (map fromIntegral . go . ord)
+ where
+ go oc
+ | oc <= 0x7f = [oc]
+
+ | oc <= 0x7ff = [ 0xc0 + (oc `shiftR` 6)
+ , 0x80 + oc .&. 0x3f
+ ]
+
+ | oc <= 0xffff = [ 0xe0 + (oc `shiftR` 12)
+ , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
+ , 0x80 + oc .&. 0x3f
+ ]
+ | otherwise = [ 0xf0 + (oc `shiftR` 18)
+ , 0x80 + ((oc `shiftR` 12) .&. 0x3f)
+ , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
+ , 0x80 + oc .&. 0x3f
+ ]
+
+-- | Decode a UTF8 string packed into a list of Word8 values, directly to String
+decode :: [Word8] -> String
+decode [ ] = ""
+decode (c:cs)
+ | c < 0x80 = chr (fromEnum c) : decode cs
+ | c < 0xc0 = replacementCharacter : decode cs
+ | c < 0xe0 = multi1
+ | c < 0xf0 = multi_byte 2 0xf 0x800
+ | c < 0xf8 = multi_byte 3 0x7 0x10000
+ | c < 0xfc = multi_byte 4 0x3 0x200000
+ | c < 0xfe = multi_byte 5 0x1 0x4000000
+ | otherwise = replacementCharacter : decode cs
+ where
+ multi1 = case cs of
+ c1 : ds | c1 .&. 0xc0 == 0x80 ->
+ let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|. fromEnum (c1 .&. 0x3f)
+ in if d >= 0x000080 then toEnum d : decode ds
+ else replacementCharacter : decode ds
+ _ -> replacementCharacter : decode cs
+
+ multi_byte :: Int -> Word8 -> Int -> String
+ multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask))
+ where
+ aux 0 rs acc
+ | overlong <= acc && acc <= 0x10ffff &&
+ (acc < 0xd800 || 0xdfff < acc) &&
+ (acc < 0xfffe || 0xffff < acc) = chr acc : decode rs
+ | otherwise = replacementCharacter : decode rs
+
+ aux n (r:rs) acc
+ | r .&. 0xc0 == 0x80 = aux (n-1) rs
+ $ shiftL acc 6 .|. fromEnum (r .&. 0x3f)
+
+ aux _ rs _ = replacementCharacter : decode rs