diff options
-rw-r--r-- | haddock.cabal | 27 | ||||
-rw-r--r-- | html-test/ref/TitledPicture.html (renamed from html-test/ref/DeprecationMessageParseError.html) | 62 | ||||
-rw-r--r-- | html-test/src/DeprecationMessageParseError.hs | 12 | ||||
-rw-r--r-- | html-test/src/TitledPicture.hs | 7 | ||||
-rw-r--r-- | html-test/src/Unicode.hs (renamed from html-test/src/Unicode.hs.disabled) | 0 | ||||
-rw-r--r-- | src/Haddock.hs | 6 | ||||
-rw-r--r-- | src/Haddock/Backends/LaTeX.hs | 47 | ||||
-rw-r--r-- | src/Haddock/Backends/Xhtml/Decl.hs | 15 | ||||
-rw-r--r-- | src/Haddock/Backends/Xhtml/DocMarkup.hs | 7 | ||||
-rw-r--r-- | src/Haddock/Doc.hs | 20 | ||||
-rw-r--r-- | src/Haddock/Interface/LexParseRn.hs | 11 | ||||
-rw-r--r-- | src/Haddock/Interface/ParseModuleHeader.hs | 9 | ||||
-rw-r--r-- | src/Haddock/InterfaceFile.hs | 10 | ||||
-rw-r--r-- | src/Haddock/Lex.x | 255 | ||||
-rw-r--r-- | src/Haddock/Parse.y | 171 | ||||
-rw-r--r-- | src/Haddock/Parser.hs | 435 | ||||
-rw-r--r-- | src/Haddock/Types.hs | 33 | ||||
-rw-r--r-- | src/Haddock/Utf8.hs | 74 | ||||
-rw-r--r-- | test/Haddock/ParseSpec.hs | 432 | ||||
-rw-r--r-- | test/Haddock/Utf8Spec.hs | 15 |
20 files changed, 986 insertions, 662 deletions
diff --git a/haddock.cabal b/haddock.cabal index d61c78c5..8067db7c 100644 --- a/haddock.cabal +++ b/haddock.cabal @@ -69,8 +69,7 @@ executable haddock ghc-options: -funbox-strict-fields -Wall -fwarn-tabs else ghc-options: -funbox-strict-fields -Wall -fwarn-tabs -O2 - if !flag(in-ghc-tree) - build-tools: alex >= 3, happy >= 1.18 + build-depends: base >= 4.3 && < 4.8 if flag(in-ghc-tree) @@ -105,8 +104,8 @@ executable haddock Haddock.Interface.AttachInstances Haddock.Interface.LexParseRn Haddock.Interface.ParseModuleHeader - Haddock.Lex - Haddock.Parse + Haddock.Parser + Haddock.Utf8 Haddock.Utils Haddock.Backends.Xhtml Haddock.Backends.Xhtml.Decl @@ -129,10 +128,7 @@ executable haddock Haddock.Convert else build-depends: haddock - -- In a GHC tree - in particular, in a source tarball - we don't - -- require alex or happy - if !flag(in-ghc-tree) - build-tools: alex >= 3, happy >= 1.18 + library default-language: Haskell2010 @@ -179,8 +175,8 @@ library Haddock.Interface.AttachInstances Haddock.Interface.LexParseRn Haddock.Interface.ParseModuleHeader - Haddock.Lex - Haddock.Parse + Haddock.Parser + Haddock.Utf8 Haddock.Utils Haddock.Backends.Xhtml Haddock.Backends.Xhtml.Decl @@ -221,16 +217,6 @@ test-suite latex-test build-depends: base, directory, process, filepath, Cabal test-suite spec - -- NOTE: As of this writing, Cabal does not properly handle alex/happy for - -- test suites. We work around this by adding dist/build to hs-source-dirs, - -- so that the the generated lexer/parser from the library is used. In - -- addition we depend on 'haddock', so that the library is compiled before - -- the test suite. - -- - -- The corresponding cabal ticket is here: - -- https://github.com/haskell/cabal/issues/943 - hs-source-dirs: - dist/build type: exitcode-stdio-1.0 default-language: Haskell2010 main-is: Spec.hs @@ -250,6 +236,7 @@ test-suite spec , deepseq , array , hspec + , QuickCheck == 2.* build-depends: haddock diff --git a/html-test/ref/DeprecationMessageParseError.html b/html-test/ref/TitledPicture.html index c71ab1a7..34788690 100644 --- a/html-test/ref/DeprecationMessageParseError.html +++ b/html-test/ref/TitledPicture.html @@ -3,13 +3,13 @@ ><head ><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><title - >DeprecationMessageParseError</title + >TitledPicture</title ><link href="ocean.css" rel="stylesheet" type="text/css" title="Ocean" /><script src="haddock-util.js" type="text/javascript" ></script ><script type="text/javascript" >//<![CDATA[ -window.onload = function () {pageLoad();setSynopsis("mini_DeprecationMessageParseError.html");}; +window.onload = function () {pageLoad();setSynopsis("mini_TitledPicture.html");}; //]]> </script ></head @@ -39,22 +39,7 @@ window.onload = function () {pageLoad();setSynopsis("mini_DeprecationMessagePars ></tr ></table ><p class="caption" - >DeprecationMessageParseError</p - ></div - ><div id="description" - ><p class="caption" - >Description</p - ><div class="doc" - ><p - >What is tested here: -</p - ><ul - ><li - > If parsing of a deprecation message fails, the message is included - verbatim. -</li - ></ul - ></div + >TitledPicture</p ></div ><div id="synopsis" ><p id="control.syn" class="caption expander" onclick="toggleSection('syn')" @@ -64,7 +49,13 @@ window.onload = function () {pageLoad();setSynopsis("mini_DeprecationMessagePars ><a href="" >foo</a > :: <a href="" - >Int</a + >Integer</a + ></li + ><li class="src short" + ><a href="" + >bar</a + > :: <a href="" + >Integer</a ></li ></ul ></div @@ -76,15 +67,34 @@ window.onload = function () {pageLoad();setSynopsis("mini_DeprecationMessagePars ><a name="v:foo" class="def" >foo</a > :: <a href="" - >Int</a + >Integer</a + ></p + ><div class="doc" + ><p + >Picture for <code + ><a href="" + >foo</a + ></code + > without a title <img src="bar" + /> +</p + ></div + ></div + ><div class="top" + ><p class="src" + ><a name="v:bar" class="def" + >bar</a + > :: <a href="" + >Integer</a ></p ><div class="doc" - ><div class="warning" - ><p - >Deprecated: use @bar instead</p - ></div - ><p - >some documentation for foo + ><p + >Picture for <code + ><a href="" + >bar</a + ></code + > with title <img src="un∣∁∘" title="δ∈" + /> </p ></div ></div diff --git a/html-test/src/DeprecationMessageParseError.hs b/html-test/src/DeprecationMessageParseError.hs deleted file mode 100644 index 2f8fb492..00000000 --- a/html-test/src/DeprecationMessageParseError.hs +++ /dev/null @@ -1,12 +0,0 @@ --- | --- What is tested here: --- --- * If parsing of a deprecation message fails, the message is included --- verbatim. --- -module DeprecationMessageParseError where - --- | some documentation for foo -foo :: Int -foo = 23 -{-# DEPRECATED foo "use @bar instead" #-} diff --git a/html-test/src/TitledPicture.hs b/html-test/src/TitledPicture.hs new file mode 100644 index 00000000..7029d98a --- /dev/null +++ b/html-test/src/TitledPicture.hs @@ -0,0 +1,7 @@ +module TitledPicture where + +-- | Picture for 'foo' without a title <<bar>> +foo = 5 + +-- | Picture for 'bar' with title <<un∣∁∘ δ∈>> +bar = 6 diff --git a/html-test/src/Unicode.hs.disabled b/html-test/src/Unicode.hs index d5bbf445..d5bbf445 100644 --- a/html-test/src/Unicode.hs.disabled +++ b/html-test/src/Unicode.hs diff --git a/src/Haddock.hs b/src/Haddock.hs index 6d16d601..b741f5f1 100644 --- a/src/Haddock.hs +++ b/src/Haddock.hs @@ -23,8 +23,7 @@ import Haddock.Backends.Xhtml.Themes (getThemes) import Haddock.Backends.LaTeX import Haddock.Backends.Hoogle import Haddock.Interface -import Haddock.Lex -import Haddock.Parse +import Haddock.Parser import Haddock.Types import Haddock.Version import Haddock.InterfaceFile @@ -447,8 +446,7 @@ getPrologue dflags flags = [] -> return Nothing [filename] -> do str <- readFile filename - case parseParas (tokenise dflags str - (1,0) {- TODO: real position -}) of + case parseParas dflags str of Nothing -> throwE $ "failed to parse haddock prologue from file: " ++ filename Just doc -> return (Just doc) _otherwise -> throwE "multiple -p/--prologue options" diff --git a/src/Haddock/Backends/LaTeX.hs b/src/Haddock/Backends/LaTeX.hs index 0bc95719..64966ac2 100644 --- a/src/Haddock/Backends/LaTeX.hs +++ b/src/Haddock/Backends/LaTeX.hs @@ -33,6 +33,9 @@ import Data.Char import Control.Monad import Data.Maybe import Data.List + +import Haddock.Doc (combineDocumentation) + -- import Debug.Trace {- SAMPLE OUTPUT @@ -278,11 +281,11 @@ ppDecl :: LHsDecl DocName ppDecl (L loc decl) (doc, fnArgsDoc) instances subdocs = case decl of TyClD d@(FamDecl {}) -> ppTyFam False loc doc d unicode - TyClD d@(DataDecl {}) + TyClD d@(DataDecl {}) -> ppDataDecl instances subdocs loc doc d unicode TyClD d@(SynDecl {}) -> ppTySyn loc (doc, fnArgsDoc) d unicode -- Family instances happen via FamInst now --- TyClD d@(TySynonym {}) +-- TyClD d@(TySynonym {}) -- | Just _ <- tcdTyPats d -> ppTyInst False loc doc d unicode -- Family instances happen via FamInst now TyClD d@(ClassDecl {}) -> ppClassDecl instances loc doc subdocs d unicode @@ -465,7 +468,7 @@ ppClassDecl :: [DocInstance DocName] -> SrcSpan -> Documentation DocName -> [(DocName, DocForDecl DocName)] -> TyClDecl DocName -> Bool -> LaTeX ppClassDecl instances loc doc subdocs - (ClassDecl { tcdCtxt = lctxt, tcdLName = lname, tcdTyVars = ltyvars, tcdFDs = lfds + (ClassDecl { tcdCtxt = lctxt, tcdLName = lname, tcdTyVars = ltyvars, tcdFDs = lfds , tcdSigs = lsigs, tcdATs = ats, tcdATDefs = at_defs }) unicode = declWithDoc classheader (if null body then Nothing else Just (vcat body)) $$ instancesBit @@ -504,7 +507,7 @@ ppDocInstances unicode (i : rest) | Just ihead <- isUndocdInstance i = declWithDoc (vcat (map (ppInstDecl unicode) (ihead:is))) Nothing $$ ppDocInstances unicode rest' - | otherwise + | otherwise = ppDocInstance unicode i $$ ppDocInstances unicode rest where (is, rest') = spanWith isUndocdInstance rest @@ -650,9 +653,9 @@ ppSideBySideField subdocs unicode (ConDeclField (L _ name) ltype _) = -- {- -- ppHsFullConstr :: HsConDecl -> LaTeX --- ppHsFullConstr (HsConDecl _ nm tvs ctxt typeList doc) = +-- ppHsFullConstr (HsConDecl _ nm tvs ctxt typeList doc) = -- declWithDoc False doc ( --- hsep ((ppHsConstrHdr tvs ctxt +++ +-- hsep ((ppHsConstrHdr tvs ctxt +++ -- ppHsBinder False nm) : map ppHsBangType typeList) -- ) -- ppHsFullConstr (HsRecDecl _ nm tvs ctxt fields doc) = @@ -661,35 +664,35 @@ ppSideBySideField subdocs unicode (ConDeclField (L _ name) ltype _) = -- Nothing -> aboves [hdr, fields_html] -- Just _ -> aboves [hdr, constr_doc, fields_html] -- ) --- +-- -- where hdr = declBox (ppHsConstrHdr tvs ctxt +++ ppHsBinder False nm) --- --- constr_doc +-- +-- constr_doc -- | isJust doc = docBox (docToLaTeX (fromJust doc)) -- | otherwise = LaTeX.emptyTable --- --- fields_html = --- td << +-- +-- fields_html = +-- td << -- table ! [width "100%", cellpadding 0, cellspacing 8] << ( -- aboves (map ppFullField (concat (map expandField fields))) -- ) -- -} --- +-- -- ppShortField :: Bool -> Bool -> ConDeclField DocName -> LaTeX -- ppShortField summary unicode (ConDeclField (L _ name) ltype _) -- = tda [theclass "recfield"] << ( -- ppBinder summary (docNameOcc name) -- <+> dcolon unicode <+> ppLType unicode ltype -- ) --- +-- -- {- -- ppFullField :: HsFieldDecl -> LaTeX --- ppFullField (HsFieldDecl [n] ty doc) +-- ppFullField (HsFieldDecl [n] ty doc) -- = declWithDoc False doc ( -- ppHsBinder False n <+> dcolon <+> ppHsBangType ty -- ) -- ppFullField _ = error "ppFullField" --- +-- -- expandField :: HsFieldDecl -> [HsFieldDecl] -- expandField (HsFieldDecl ns ty doc) = [ HsFieldDecl [n] ty doc | n <- ns ] -- -} @@ -718,7 +721,7 @@ ppAppNameTypes :: DocName -> [HsType DocName] -> Bool -> LaTeX ppAppNameTypes n ts unicode = ppTypeApp n ts ppDocName (ppParendType unicode) --- | Print an application of a DocName and a list of Names +-- | Print an application of a DocName and a list of Names ppAppDocNameNames :: Bool -> DocName -> [Name] -> LaTeX ppAppDocNameNames _summ n ns = ppTypeApp n ns (ppBinder . nameOccName . getName) ppSymName @@ -1002,7 +1005,7 @@ parLatexMarkup ppId = Markup { markupEmphasis = \p v -> emph (p v), markupMonospaced = \p _ -> tt (p Mono), markupUnorderedList = \p v -> itemizedList (map ($v) p) $$ text "", - markupPic = \path _ -> parens (text "image: " <> text path), + markupPic = \p _ -> markupPic p, markupOrderedList = \p v -> enumeratedList (map ($v) p) $$ text "", markupDefList = \l v -> descriptionList (map (\(a,b) -> (a v, b v)) l), markupCodeBlock = \p _ -> quote (verb (p Verb)) $$ text "", @@ -1020,6 +1023,14 @@ parLatexMarkup ppId = Markup { Just label -> text "\\href" <> braces (text url) <> braces (text label) Nothing -> text "\\url" <> braces (text url) + -- Is there a better way of doing this? Just a space is an aribtrary choice. + markupPic (Picture uri title) = parens (imageText title) + where + imageText Nothing = beg + imageText (Just t) = beg <> text " " <> text t + + beg = text "image: " <> text uri + markupId ppId_ id v = case v of Verb -> theid diff --git a/src/Haddock/Backends/Xhtml/Decl.hs b/src/Haddock/Backends/Xhtml/Decl.hs index 2ecc6464..91e6871d 100644 --- a/src/Haddock/Backends/Xhtml/Decl.hs +++ b/src/Haddock/Backends/Xhtml/Decl.hs @@ -25,6 +25,7 @@ import Haddock.Backends.Xhtml.Types import Haddock.Backends.Xhtml.Utils import Haddock.GhcUtils import Haddock.Types +import Haddock.Doc (combineDocumentation) import Data.List ( intersperse ) import qualified Data.Map as Map @@ -115,7 +116,7 @@ ppFor _ _ _ _ _ _ _ = error "ppFor" ppTySyn :: Bool -> LinksInfo -> SrcSpan -> DocForDecl DocName -> TyClDecl DocName -> Bool -> Qualification -> Html ppTySyn summary links loc doc (SynDecl { tcdLName = L _ name, tcdTyVars = ltyvars - , tcdRhs = ltype }) + , tcdRhs = ltype }) unicode qual = ppTypeOrFunSig summary links loc [name] (unLoc ltype) doc (full, hdr, spaceHtml +++ equals) unicode qual @@ -204,12 +205,12 @@ ppAssocType summ links doc (L loc decl) unicode qual = -- * TyClDecl helpers -------------------------------------------------------------------------------- --- | Print a type family and its variables +-- | Print a type family and its variables ppFamDeclBinderWithVars :: Bool -> FamilyDecl DocName -> Html ppFamDeclBinderWithVars summ (FamilyDecl { fdLName = lname, fdTyVars = tvs }) = ppAppDocNameNames summ (unLoc lname) (tyvarNames tvs) --- | Print a newtype / data binder and its variables +-- | Print a newtype / data binder and its variables ppDataBinderWithVars :: Bool -> TyClDecl DocName -> Html ppDataBinderWithVars summ decl = ppAppDocNameNames summ (tcdName decl) (tyvarNames $ tcdTyVars decl) @@ -226,7 +227,7 @@ ppAppNameTypes n ts unicode qual = ppTypeApp n ts (ppDocName qual) (ppParendType unicode qual) --- | Print an application of a DocName and a list of Names +-- | Print an application of a DocName and a list of Names ppAppDocNameNames :: Bool -> DocName -> [Name] -> Html ppAppDocNameNames summ n ns = ppTypeApp n ns (ppBinder summ . nameOccName . getName) ppTyName @@ -305,7 +306,7 @@ ppShortClassDecl :: Bool -> LinksInfo -> TyClDecl DocName -> SrcSpan -> Html ppShortClassDecl summary links (ClassDecl { tcdCtxt = lctxt, tcdLName = lname, tcdTyVars = tvs , tcdFDs = fds, tcdSigs = sigs, tcdATs = ats }) loc - subdocs unicode qual = + subdocs unicode qual = if null sigs && null ats then (if summary then id else topDeclElem links loc [nm]) hdr else (if summary then id else topDeclElem links loc [nm]) (hdr <+> keyword "where") @@ -580,7 +581,7 @@ ppShortField summary unicode qual (ConDeclField (L _ name) ltype _) -- Currently doesn't handle 'data instance' decls or kind signatures ppDataHeader :: Bool -> TyClDecl DocName -> Bool -> Qualification -> Html ppDataHeader summary decl@(DataDecl { tcdDataDefn = HsDataDefn { dd_ND = nd - , dd_ctxt = ctxt } }) + , dd_ctxt = ctxt } }) unicode qual = -- newtype or data (case nd of { NewType -> keyword "newtype"; DataType -> keyword "data" }) <+> @@ -723,5 +724,3 @@ ppr_fun_ty ctxt_prec ty1 ty2 unicode qual in maybeParen ctxt_prec pREC_FUN $ hsep [p1, arrow unicode <+> p2] - - diff --git a/src/Haddock/Backends/Xhtml/DocMarkup.hs b/src/Haddock/Backends/Xhtml/DocMarkup.hs index aa4ba377..31d653bc 100644 --- a/src/Haddock/Backends/Xhtml/DocMarkup.hs +++ b/src/Haddock/Backends/Xhtml/DocMarkup.hs @@ -18,18 +18,19 @@ module Haddock.Backends.Xhtml.DocMarkup ( docElement, docSection, docSection_, ) where +import Control.Applicative ((<$>)) import Haddock.Backends.Xhtml.Names import Haddock.Backends.Xhtml.Utils import Haddock.Types import Haddock.Utils +import Haddock.Doc (combineDocumentation) -import Text.XHtml hiding ( name, title, p, quote ) +import Text.XHtml hiding ( name, p, quote ) import Data.Maybe (fromMaybe) import GHC - parHtmlMarkup :: Qualification -> (a -> Html) -> DocMarkup a Html parHtmlMarkup qual ppId = Markup { markupEmpty = noHtml, @@ -49,7 +50,7 @@ parHtmlMarkup qual ppId = Markup { markupCodeBlock = pre, markupHyperlink = \(Hyperlink url mLabel) -> anchor ! [href url] << fromMaybe url mLabel, markupAName = \aname -> namedAnchor aname << "", - markupPic = \path -> image ! [src path], + markupPic = \(Picture uri t) -> image ! ([src uri] ++ fromMaybe [] (return . title <$> t)), markupProperty = pre . toHtml, markupExample = examplesToHtml } diff --git a/src/Haddock/Doc.hs b/src/Haddock/Doc.hs index 18555cfb..4d68c554 100644 --- a/src/Haddock/Doc.hs +++ b/src/Haddock/Doc.hs @@ -1,14 +1,27 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} module Haddock.Doc ( docAppend, docParagraph, - combineStringNodes + combineStringNodes, + combineDocumentation ) where - +import Data.Maybe +import Data.Monoid import Haddock.Types import Data.Char (isSpace) import Control.Arrow ((***)) +-- We put it here so that we can avoid a circular import +-- anything relevant imports this module anyway +instance Monoid (Doc id) where + mempty = DocEmpty + mappend = docAppend + +combineDocumentation :: Documentation name -> Maybe (Doc name) +combineDocumentation (Documentation Nothing Nothing) = Nothing +combineDocumentation (Documentation mDoc mWarning) = Just (fromMaybe mempty mWarning `mappend` fromMaybe mempty mDoc) + -- used to make parsing easier; we group the list items later docAppend :: Doc id -> Doc id -> Doc id docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) @@ -85,4 +98,7 @@ combineStringNodes x = x tryjoin :: Doc id -> Doc id tryjoin (DocAppend (DocString x) (DocString y)) = DocString (x ++ y) +tryjoin (DocAppend (DocString x) (DocAppend (DocString y) z)) = DocAppend (DocString (x ++ y)) z +tryjoin (DocAppend (DocAppend x (DocString y)) (DocString z)) + = tryjoin (DocAppend (combineStringNodes x) (DocString $ y ++ z)) tryjoin x = x diff --git a/src/Haddock/Interface/LexParseRn.hs b/src/Haddock/Interface/LexParseRn.hs index 9d775bcb..13563532 100644 --- a/src/Haddock/Interface/LexParseRn.hs +++ b/src/Haddock/Interface/LexParseRn.hs @@ -19,8 +19,7 @@ module Haddock.Interface.LexParseRn import Haddock.Types -import Haddock.Lex -import Haddock.Parse +import Haddock.Parser import Haddock.Interface.ParseModuleHeader import Haddock.Doc @@ -50,19 +49,19 @@ processDocStringParas = process parseParas processDocString :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name)) processDocString = process parseString -process :: ([LToken] -> Maybe (Doc RdrName)) +process :: (DynFlags -> String -> Maybe (Doc RdrName)) -> DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name)) process parse dflags gre (HsDocString fs) = do let str = unpackFS fs - let toks = tokenise dflags str (0,0) -- TODO: real position - case parse toks of + case parse dflags str of Nothing -> do tell [ "doc comment parse failed: " ++ str ] return Nothing - Just doc -> return (Just (rename dflags gre doc)) + Just doc -> do + return (Just (rename dflags gre doc)) processModuleHeader :: DynFlags -> GlobalRdrEnv -> SafeHaskellMode -> Maybe LHsDocString diff --git a/src/Haddock/Interface/ParseModuleHeader.hs b/src/Haddock/Interface/ParseModuleHeader.hs index 5087affe..2e4fe73b 100644 --- a/src/Haddock/Interface/ParseModuleHeader.hs +++ b/src/Haddock/Interface/ParseModuleHeader.hs @@ -12,8 +12,7 @@ module Haddock.Interface.ParseModuleHeader (parseModuleHeader) where import Haddock.Types -import Haddock.Lex -import Haddock.Parse +import Haddock.Parser import RdrName import DynFlags @@ -47,15 +46,13 @@ parseModuleHeader dflags str0 = description1 :: Either String (Maybe (Doc RdrName)) description1 = case descriptionOpt of Nothing -> Right Nothing - -- TODO: pass real file position - Just description -> case parseString $ tokenise dflags description (0,0) of + Just description -> case parseString dflags description of Nothing -> Left ("Cannot parse Description: " ++ description) Just doc -> Right (Just doc) in case description1 of Left mess -> Left mess - -- TODO: pass real file position - Right docOpt -> case parseParas $ tokenise dflags str8 (0,0) of + Right docOpt -> case parseParas dflags str8 of Nothing -> Left "Cannot parse header documentation paragraphs" Just doc -> Right (HaddockModInfo { hmi_description = docOpt, diff --git a/src/Haddock/InterfaceFile.hs b/src/Haddock/InterfaceFile.hs index b993dd67..189550b8 100644 --- a/src/Haddock/InterfaceFile.hs +++ b/src/Haddock/InterfaceFile.hs @@ -428,6 +428,16 @@ instance Binary Hyperlink where label <- get bh return (Hyperlink url label) +instance Binary Picture where + put_ bh (Picture uri title) = do + put_ bh uri + put_ bh title + get bh = do + uri <- get bh + title <- get bh + return (Picture uri title) + + {-* Generated by DrIFT : Look, but Don't Touch. *-} instance (Binary id) => Binary (Doc id) where diff --git a/src/Haddock/Lex.x b/src/Haddock/Lex.x deleted file mode 100644 index 9e59fa4c..00000000 --- a/src/Haddock/Lex.x +++ /dev/null @@ -1,255 +0,0 @@ --- --- Haddock - A Haskell Documentation Tool --- --- (c) Simon Marlow 2002 --- --- This file was modified and integrated into GHC by David Waern 2006. --- Then moved back into Haddock by Isaac Dupree in 2009 :-) --- - -{ -{-# LANGUAGE BangPatterns #-} -- Generated by Alex -{-# OPTIONS -Wwarn -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - -module Haddock.Lex ( - Token(..), - LToken, - tokenise - ) where - -import Lexer hiding (Token) -import Parser ( parseIdentifier ) -import StringBuffer -import RdrName -import SrcLoc -import DynFlags -import FastString - -import qualified Data.Bits -import Data.Char -import Data.Word (Word8) -import Numeric -import System.IO.Unsafe -import Debug.Trace -} - -$ws = $white # \n -$digit = [0-9] -$hexdigit = [0-9a-fA-F] -$special = [\"\@] -$alphanum = [A-Za-z0-9] -$ident = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~\:] - -:- - --- beginning of a paragraph -<0,para> { - $ws* \n ; - $ws* \> { begin birdtrack } - $ws* prop \> .* \n { strtoken TokProperty `andBegin` property} - $ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr } - $ws* [\*\-] { token TokBullet `andBegin` string } - $ws* \[ { token TokDefStart `andBegin` def } - $ws* \( $digit+ \) { token TokNumber `andBegin` string } - $ws* $digit+ \. { token TokNumber `andBegin` string } - $ws* { begin string } -} - --- beginning of a line -<line> { - $ws* \> { begin birdtrack } - $ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr } - $ws* \n { token TokPara `andBegin` para } - -- Here, we really want to be able to say - -- $ws* (\n | <eof>) { token TokPara `andBegin` para} - -- because otherwise a trailing line of whitespace will result in - -- a spurious TokString at the end of a docstring. We don't have <eof>, - -- though (NOW I realise what it was for :-). To get around this, we always - -- append \n to the end of a docstring. - () { begin string } -} - -<birdtrack> .* \n? { strtokenNL TokBirdTrack `andBegin` line } - -<property> () { token TokPara `andBegin` para } - -<example> { - $ws* \n { token TokPara `andBegin` para } - $ws* \>\>\> { strtoken TokExamplePrompt `andBegin` exampleexpr } - () { begin exampleresult } -} - -<exampleexpr> .* \n { strtokenNL TokExampleExpression `andBegin` example } - -<exampleresult> .* \n { strtokenNL TokExampleResult `andBegin` example } - -<string,def> { - $special { strtoken $ \s -> TokSpecial (head s) } - \<\< [^\>]* \>\> { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) } - \< [^\>]* \> { strtoken $ \s -> TokURL (init (tail s)) } - \# [^\#]* \# { strtoken $ \s -> TokAName (init (tail s)) } - \/ [^\/]* \/ { strtoken $ \s -> TokEmphasis (init (tail s)) } - [\'\`] $ident+ [\'\`] { ident } - \\ . { strtoken (TokString . tail) } - "&#" $digit+ \; { strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] } - "&#" [xX] $hexdigit+ \; { strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] } - -- allow special characters through if they don't fit one of the previous - -- patterns. - [\/\'\`\<\#\&\\] { strtoken TokString } - [^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line } - [^ $special \/ \< \# \n \'\` \& \\ \]]+ { strtoken TokString } -} - -<def> { - \] { token TokDefEnd `andBegin` string } -} - --- ']' doesn't have any special meaning outside of the [...] at the beginning --- of a definition paragraph. -<string> { - \] { strtoken TokString } -} - -{ --- | A located token -type LToken = (Token, AlexPosn) - -data Token - = TokPara - | TokNumber - | TokBullet - | TokDefStart - | TokDefEnd - | TokSpecial Char - | TokIdent RdrName - | TokString String - | TokURL String - | TokPic String - | TokEmphasis String - | TokAName String - | TokBirdTrack String - | TokProperty String - | TokExamplePrompt String - | TokExampleExpression String - | TokExampleResult String --- deriving Show - -tokenPos :: LToken -> (Int, Int) -tokenPos t = let AlexPn _ line col = snd t in (line, col) - --- ----------------------------------------------------------------------------- --- Alex support stuff - --- XXX: copied the posn wrapper code from Alex to make this lexer work --- with both Alex 2.x and Alex 3.x. However, we are not using the --- Unicode/UTF-8 support in Alex 3.x, and Unicode documentation will --- probably get mangled. - --- | Encode a Haskell String to a list of Word8 values, in UTF8 format. -utf8Encode :: Char -> [Word8] -utf8Encode = map fromIntegral . go . ord - where - go oc - | oc <= 0x7f = [oc] - - | oc <= 0x7ff = [ 0xc0 + (oc `Data.Bits.shiftR` 6) - , 0x80 + oc Data.Bits..&. 0x3f - ] - - | oc <= 0xffff = [ 0xe0 + (oc `Data.Bits.shiftR` 12) - , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) - , 0x80 + oc Data.Bits..&. 0x3f - ] - | otherwise = [ 0xf0 + (oc `Data.Bits.shiftR` 18) - , 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f) - , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f) - , 0x80 + oc Data.Bits..&. 0x3f - ] - -type Byte = Word8 - -type AlexInput = (AlexPosn, -- current position, - Char, -- previous char - [Byte], -- pending bytes on current char - String) -- current input string - -alexInputPrevChar :: AlexInput -> Char -alexInputPrevChar (p,c,bs,s) = c - -alexGetByte :: AlexInput -> Maybe (Byte,AlexInput) -alexGetByte (p,c,(b:bs),s) = Just (b,(p,c,bs,s)) -alexGetByte (p,c,[],[]) = Nothing -alexGetByte (p,_,[],(c:s)) = let p' = alexMove p c - (b:bs) = utf8Encode c - in p' `seq` Just (b, (p', c, bs, s)) - -data AlexPosn = AlexPn !Int !Int !Int - deriving (Eq,Show) - -alexMove :: AlexPosn -> Char -> AlexPosn -alexMove (AlexPn a l c) '\t' = AlexPn (a+1) l (((c+7) `div` 8)*8+1) -alexMove (AlexPn a l c) '\n' = AlexPn (a+1) (l+1) 1 -alexMove (AlexPn a l c) _ = AlexPn (a+1) l (c+1) - -type StartCode = Int -type Action = AlexPosn -> String -> StartCode -> (StartCode -> [LToken]) -> DynFlags -> [LToken] - -tokenise :: DynFlags -> String -> (Int, Int) -> [LToken] -tokenise dflags str (line, col) = go (posn,'\n',[],eofHack str) para - where posn = AlexPn 0 line col - go inp@(pos,_,_,str) sc = - case alexScan inp sc of - AlexEOF -> [] - AlexError _ -> [] - AlexSkip inp' len -> go inp' sc - AlexToken inp' len act -> act pos (take len str) sc (\sc -> go inp' sc) dflags - --- NB. we add a final \n to the string, (see comment in the beginning of line --- production above). -eofHack str = str++"\n" - -andBegin :: Action -> StartCode -> Action -andBegin act new_sc = \pos str _ cont dflags -> act pos str new_sc cont dflags - -token :: Token -> Action -token t = \pos _ sc cont _ -> (t, pos) : cont sc - -strtoken, strtokenNL :: (String -> Token) -> Action -strtoken t = \pos str sc cont _ -> (t str, pos) : cont sc -strtokenNL t = \pos str sc cont _ -> (t (filter (/= '\r') str), pos) : cont sc --- ^ We only want LF line endings in our internal doc string format, so we --- filter out all CRs. - -begin :: StartCode -> Action -begin sc = \_ _ _ cont _ -> cont sc - --- ----------------------------------------------------------------------------- --- Lex a string as a Haskell identifier - -ident :: Action -ident pos str sc cont dflags = - case parseIdent dflags loc id of - Just names -> (TokIdent names, pos) : cont sc - Nothing -> (TokString str, pos) : cont sc - where id = init (tail str) - -- TODO: Get the real filename here. Maybe we should just be - -- using GHC SrcLoc's ourself? - filename = mkFastString "<unknown file>" - loc = case pos of - AlexPn _ line col -> - mkRealSrcLoc filename line col - -parseIdent :: DynFlags -> RealSrcLoc -> String -> Maybe RdrName -parseIdent dflags loc str0 = - let buffer = stringToStringBuffer str0 - pstate = mkPState dflags buffer loc - result = unP parseIdentifier pstate - in case result of - POk _ name -> Just (unLoc name) - _ -> Nothing -} diff --git a/src/Haddock/Parse.y b/src/Haddock/Parse.y deleted file mode 100644 index f40ff521..00000000 --- a/src/Haddock/Parse.y +++ /dev/null @@ -1,171 +0,0 @@ -{ -{-# LANGUAGE BangPatterns #-} -- required for versions of Happy before 1.18.6 -{-# OPTIONS -Wwarn -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - -module Haddock.Parse (parseString, parseParas) where - -import Haddock.Lex -import Haddock.Types (Doc(..), Example(Example), Hyperlink(..)) -import Haddock.Doc -import HsSyn -import RdrName -import Data.Char (isSpace) -import Data.Maybe (fromMaybe) -import Data.List (stripPrefix) -} - -%expect 0 - -%tokentype { LToken } - -%token '/' { (TokSpecial '/',_) } - '@' { (TokSpecial '@',_) } - '[' { (TokDefStart,_) } - ']' { (TokDefEnd,_) } - DQUO { (TokSpecial '\"',_) } - URL { (TokURL $$,_) } - PIC { (TokPic $$,_) } - ANAME { (TokAName $$,_) } - '/../' { (TokEmphasis $$,_) } - '-' { (TokBullet,_) } - '(n)' { (TokNumber,_) } - '>..' { (TokBirdTrack $$,_) } - PROP { (TokProperty $$,_) } - PROMPT { (TokExamplePrompt $$,_) } - RESULT { (TokExampleResult $$,_) } - EXP { (TokExampleExpression $$,_) } - IDENT { (TokIdent $$,_) } - PARA { (TokPara,_) } - STRING { (TokString $$,_) } - -%monad { Maybe } - -%name parseParas doc -%name parseString seq - -%% - -doc :: { Doc RdrName } - : apara PARA doc { docAppend $1 $3 } - | PARA doc { $2 } - | apara { $1 } - | {- empty -} { DocEmpty } - -apara :: { Doc RdrName } - : ulpara { DocUnorderedList [$1] } - | olpara { DocOrderedList [$1] } - | defpara { DocDefList [$1] } - | para { $1 } - -ulpara :: { Doc RdrName } - : '-' para { $2 } - -olpara :: { Doc RdrName } - : '(n)' para { $2 } - -defpara :: { (Doc RdrName, Doc RdrName) } - : '[' seq ']' seq { ($2, $4) } - -para :: { Doc RdrName } - : seq { docParagraph $1 } - | codepara { DocCodeBlock $1 } - | property { $1 } - | examples { DocExamples $1 } - -codepara :: { Doc RdrName } - : '>..' codepara { docAppend (DocString $1) $2 } - | '>..' { DocString $1 } - -property :: { Doc RdrName } - : PROP { makeProperty $1 } - -examples :: { [Example] } - : example examples { $1 : $2 } - | example { [$1] } - -example :: { Example } - : PROMPT EXP result { makeExample $1 $2 (lines $3) } - | PROMPT EXP { makeExample $1 $2 [] } - -result :: { String } - : RESULT result { $1 ++ $2 } - | RESULT { $1 } - -seq :: { Doc RdrName } - : elem seq { docAppend $1 $2 } - | elem { $1 } - -elem :: { Doc RdrName } - : elem1 { $1 } - | '@' seq1 '@' { DocMonospaced $2 } - -seq1 :: { Doc RdrName } - : PARA seq1 { docAppend (DocString "\n") $2 } - | elem1 seq1 { docAppend $1 $2 } - | elem1 { $1 } - -elem1 :: { Doc RdrName } - : STRING { DocString $1 } - | '/../' { DocEmphasis (DocString $1) } - | URL { DocHyperlink (makeHyperlink $1) } - | PIC { DocPic $1 } - | ANAME { DocAName $1 } - | IDENT { DocIdentifier $1 } - | DQUO strings DQUO { DocModule $2 } - -strings :: { String } - : STRING { $1 } - | STRING strings { $1 ++ $2 } - -{ -happyError :: [LToken] -> Maybe a -happyError toks = Nothing - --- | Create a `Hyperlink` from given string. --- --- A hyperlink consists of a URL and an optional label. The label is separated --- from the url by one or more whitespace characters. -makeHyperlink :: String -> Hyperlink -makeHyperlink input = case break isSpace $ strip input of - (url, "") -> Hyperlink url Nothing - (url, label) -> Hyperlink url (Just . dropWhile isSpace $ label) - -makeProperty :: String -> Doc RdrName -makeProperty s = case strip s of - 'p':'r':'o':'p':'>':xs -> - DocProperty (dropWhile isSpace xs) - xs -> - error $ "makeProperty: invalid input " ++ show xs - --- | Create an 'Example', stripping superfluous characters as appropriate -makeExample :: String -> String -> [String] -> Example -makeExample prompt expression result = - Example - (strip expression) -- we do not care about leading and trailing - -- whitespace in expressions, so drop them - result' - where - -- 1. drop trailing whitespace from the prompt, remember the prefix - (prefix, _) = span isSpace prompt - - -- 2. drop, if possible, the exact same sequence of whitespace - -- characters from each result line - -- - -- 3. interpret lines that only contain the string "<BLANKLINE>" as an - -- empty line - result' = map (substituteBlankLine . tryStripPrefix prefix) result - where - tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys - - substituteBlankLine "<BLANKLINE>" = "" - substituteBlankLine line = line - --- | Remove all leading and trailing whitespace -strip :: String -> String -strip = dropWhile isSpace . reverse . dropWhile isSpace . reverse -} diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs new file mode 100644 index 00000000..81e274ed --- /dev/null +++ b/src/Haddock/Parser.hs @@ -0,0 +1,435 @@ +{-# LANGUAGE OverloadedStrings #-} +-- | +-- Module : Haddock.Parser +-- Copyright : (c) Mateusz Kowalczyk 2013, +-- Simon Hengel 2013 +-- License : BSD-like +-- +-- Maintainer : haddock@projects.haskell.org +-- Stability : experimental +-- Portability : portable + +module Haddock.Parser (parseString, parseParas) where + +import Control.Applicative +import Data.Attoparsec.ByteString hiding (takeWhile1, take, inClass) +import qualified Data.Attoparsec.ByteString.Char8 as A8 +import Data.Attoparsec.ByteString.Char8 hiding (take, string) +import qualified Data.ByteString as BS +import Data.Char (chr) +import Data.List (stripPrefix) +import Data.Maybe (fromMaybe) +import Data.Monoid +import DynFlags +import FastString (mkFastString) +import Haddock.Doc +import Haddock.Types +import Lexer (mkPState, unP, ParseResult(POk)) +import Parser (parseIdentifier) +import RdrName +import SrcLoc (mkRealSrcLoc, unLoc) +import StringBuffer (stringToStringBuffer) +import Haddock.Utf8 + +default (Int) + +-- | Main entry point to the parser. Appends the newline character +-- to the input string. +parseParas :: DynFlags + -> String -- ^ String to parse + -> Maybe (Doc RdrName) +parseParas d s = case parseOnly (p <* skipSpace) (encodeUtf8 $ s ++ "\n") of + Right r -> Just $ combineStringNodes r + _ -> Nothing + where + p :: Parser (Doc RdrName) + -- make sure that we don't swallow up whitespace belonging to next paragraph + p = mconcat <$> paragraph d `sepBy` some (optWs *> "\n") + +-- | A parser that parsers separate lines of the comments. Eventually +-- called by 'parseParas'. Appends a newline character to the input string. +-- Drops any whitespace in front of the input string. It's dropped for the sake of +-- section headings. +parseString :: DynFlags -> String -> Maybe (Doc RdrName) +parseString d = parseString'' d . dropWhile isSpace + +-- | A parser that parsers separate lines of the comments. Eventually +-- called by 'parseParas'. Appends a newline character to the input string. +-- Unlike 'parseString', doesn't drop the preceding whitespace. Internal use. +parseString'' :: DynFlags -> String -> Maybe (Doc RdrName) +parseString'' d = parseString' d . (++ "\n") + +-- | An internal use function. Split from the 'parseString' is useful +-- as we can specify separately when we want the newline to be appended. +parseString' :: DynFlags -> String -> Maybe (Doc RdrName) +parseString' d s = case parseOnly p (encodeUtf8 s) of + Right r -> Just $ combineStringNodes r + _ -> Nothing + where + p :: Parser (Doc RdrName) + p = mconcat <$> some (charEscape <|> monospace d <|> anchor <|> identifier d + <|> moduleName <|> picture <|> url + <|> emphasis d <|> encodedChar <|> string' <|> skipChar) + +-- | Parses and processes +-- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references> +-- +-- >>> parseOnly encodedChar "ABC" +-- Right (DocString "ABC") +encodedChar :: Parser (Doc RdrName) +encodedChar = "&#" *> c <* ";" + where + c = DocString . return . chr <$> num + num = hex <|> decimal + hex = ("x" <|> "X") *> hexadecimal + +-- | Plain, regular parser for text. Called as one of the last parsers +-- to ensure that we have already given a chance to more meaningful parsers +-- before capturing their characers. +string' :: Parser (Doc RdrName) +string' = DocString . decodeUtf8 <$> takeWhile1 (`notElem` "/<@\" &'`\\") + +-- | Emphasis parser. +-- +-- >>> parseOnly emphasis "/Hello world/" +-- Right (DocEmphasis (DocString "Hello world")) +emphasis :: DynFlags -> Parser (Doc RdrName) +emphasis d = stringBlock d id DocEmphasis "/" "/" "\n" + +-- | Skips a single character and treats it as a plain string. +-- This is done to skip over any special characters belonging to other +-- elements but which were not deemed meaningful at their positions. +-- Note that this can only be used in places where we're absolutely certain +-- no unicode is present, such as to skip a 100% certain ASCII delimeter. +skipChar :: Parser (Doc RdrName) +skipChar = DocString . return <$> anyChar + +-- | Treats the next character as a regular string, even if it's normally +-- used for markup. +charEscape :: Parser (Doc RdrName) +charEscape = "\\" *> (DocString . return <$> A8.satisfy (/= '\n')) + +-- | Text anchors to allow for jumping around the generated documentation. +-- +-- >>> parseOnly anchor "#Hello world#" +-- Right (DocAName "Hello world") +anchor :: Parser (Doc RdrName) +anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#") + +-- | Helper for markup structures surrounded with delimiters. +stringBlock + :: DynFlags + -> (String -> String) -- ^ Function used to transform parsed out text + -- before we send it to 'parseString'' + -> (Doc RdrName -> Doc RdrName) -- ^ 'Doc' to wrap around the result + -> String -- ^ Opening delimiter + -> String -- ^ Closing delimiter + -> String -- ^ Additional characters to terminate parsing on + -> Parser (Doc RdrName) +stringBlock d f doc op ed n = do + inner <- block op ed n + case parseString' d (f inner) of + Just r -> return $ doc r + _ -> fail $ "inner parse fail with op: ‘" ++ op ++ "’, ed: ‘" ++ ed ++ "’" + +-- | Returns sections of text delimited by specified text. +block :: String -> String -> String -> Parser String +block op ed n = reverse . drop (length ed) . reverse <$> block' op ed + where + block' op' ed' = string (encodeUtf8 op') *> mid + where + mid :: Parser String + mid = decodeUtf8 <$> string (encodeUtf8 ed') + <|> do + inner <- takeWithSkip (head ed') n + more <- decodeUtf8 <$> string (encodeUtf8 $ tail ed') + <|> block' "" ed' -- not full ending, take more + return $ inner ++ more + + +-- | Takes all characters until the specified one. Unconditionally +-- takes a character if it's escaped. Fails if it doesn't find the character or +-- when the input string is empty. +takeWithSkip :: Char -> String -> Parser String +takeWithSkip s n = do + content <- decodeUtf8 <$> A8.scan (False, False) p >>= gotSome + if or (map (`elem` content) n) || last content /= s + then fail "failed in takeWithSkip" + else return content + where + gotSome [] = fail "EOF in takeWithSkip" + gotSome xs = return xs + -- Apparently ‘scan’ is so magical that it doesn't mangle unicode. + p (escaped, terminate) c + | terminate = Nothing -- swallows up that extra character + | escaped = Just (False, False) + | c == s = Just (False, True) + | otherwise = Just (c == '\\', False) + +-- | Monospaced strings. +-- +-- >>> parseOnly (monospace dynflags) "@cruel@" +-- Right (DocMonospaced (DocString "cruel")) +monospace :: DynFlags -> Parser (Doc RdrName) +monospace d = stringBlock d id DocMonospaced "@" "@" "" + +-- | Module name parser, surrounded by double quotes. This does a very primitive and +-- purely syntactic checking so that obviously invalid names are not treated as valid +-- and blindly hyperlinked (not starting with a capital letter or including spaces). +moduleName :: Parser (Doc RdrName) +moduleName = DocModule <$> ("\"" *> legalModule <* "\"") + where legalModule = do + n <- (:) <$> A8.satisfy (`elem` ['A' .. 'Z']) + <*> (decodeUtf8 <$> A8.takeWhile (`notElem` "\"\n")) + + if any (`elem` n) " &[{}(=*)+]!#|@/;,^?" + then fail "invalid characters in module name" + else case n of + [] -> return [] + _ -> if last n == '.' then fail "trailing dot in module name" else return n + + +-- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify +-- a title for the picture. +-- +-- >>> parseOnly picture "<<hello.png>>" +-- Right (DocPic (Picture "hello.png" Nothing)) +-- >>> parseOnly picture "<<hello.png world>>" +-- Right (DocPic (Picture "hello.png" (Just "world"))) +picture :: Parser (Doc RdrName) +picture = DocPic . makePicture . decodeUtf8 <$> ("<<" *> takeWhile1 (`notElem` ">\n") <* ">>") + +-- | Paragraph parser, called by 'parseParas'. +paragraph :: DynFlags -> Parser (Doc RdrName) +paragraph d = examples <|> skipSpace *> (list d <|> birdtracks <|> codeblock d + <|> property <|> textParagraph d) + +-- | List parser, called by 'paragraph'. +list :: DynFlags -> Parser (Doc RdrName) +list d = DocUnorderedList <$> unorderedList d + <|> DocOrderedList <$> orderedList d + <|> DocDefList <$> definitionList d + +-- | Parse given text with a provided parser, casting +-- Nothing to a failure +parseLine :: (String -> Maybe (Doc RdrName)) -- ^ Parser to use + -> (Doc RdrName -> a) -- ^ Doc function to wrap around the result + -> BS.ByteString -- ^ Text to parse + -> Parser a +parseLine f doc str = maybe (fail "invalid string") (return . doc) (f $ decodeUtf8 str) + +-- | Parses unordered (bullet) lists. +unorderedList :: DynFlags -> Parser [Doc RdrName] +unorderedList d = ("*" <|> "-") *> innerList unorderedList d + +-- | Parses ordered lists (numbered or dashed). +orderedList :: DynFlags -> Parser [Doc RdrName] +orderedList d = skipSpace *> (paren <|> dot) *> innerList orderedList d + where + dot = decimal <* "." + paren = "(" *> (decimal :: Parser Int) <* ")" + +-- | Generic function collecting any further lines belonging to the +-- list entry and recursively collecting any further lists in the +-- same paragraph. Usually used as +-- +-- > someListFunction dynflags = listBeginning *> innerList someListFunction dynflags +innerList :: (DynFlags -> Parser [Doc RdrName]) -- ^ parser calling this function + -> DynFlags + -> Parser [Doc RdrName] +innerList p d = do + cl <- do + content <- A8.takeWhile (/= '\n') <* "\n" -- allow empty + parseLine (parseString'' d) id content + ulcs <- many ulc + let contents = docParagraph $ mconcat $ cl : [x | Right x <- ulcs] + unLists = mconcat [x | Left x <- ulcs] + return $ contents : unLists + where + ulc :: Parser (Either [Doc RdrName] (Doc RdrName)) + ulc = Left <$> (optWs *> p d) + <|> Right <$> nonEmptyLine d + +-- | Takes the remained of the line until the newline character +-- and calls 'parseLine' using 'parseString'. Fails if it's made +-- up strictly of whitespace. +nonEmptyLine :: DynFlags -> Parser (Doc RdrName) +nonEmptyLine d = do + s <- (takeWhile1 (/= '\n') >>= nonSpace) <* "\n" + parseLine (parseString'' d) id s + where + nonSpace xs + | not (any (not . isSpace) (decodeUtf8 xs)) = fail "empty line" + | otherwise = return xs + +-- | Parses definition lists. +definitionList :: DynFlags -> Parser [(Doc RdrName, Doc RdrName)] +definitionList d = do + _ <- "[" + inner <- parseLine (parseString' d) id =<< takeWhile1 (`notElem` "]\n") + _ <- "]" + outer <- parseLine (parseString'' d) id =<< (A8.takeWhile (/= '\n') <* "\n") + ulcs <- many ulc + let contents = mconcat $ outer : [x | Right x <- ulcs] + unLists = map mconcat [x | Left x <- ulcs] + return $ (inner, contents) : unLists + where + ulc :: Parser (Either [(Doc RdrName, Doc RdrName)] (Doc RdrName)) + ulc = Left <$> (optWs *> definitionList d) + <|> Right <$> nonEmptyLine d + +-- | Parses birdtracks. No further markup is parsed after the birdtrack. +-- Consecutive birdtracks are allowed. +birdtracks :: Parser (Doc RdrName) +birdtracks = DocCodeBlock . mconcat . map (DocString . (++ "\n") . decodeUtf8) <$> line `sepBy1` "\n" + where + line = optWs *> ">" *> A8.takeWhile (/= '\n') + +-- | Parses examples. Examples are a paragraph level entitity (separated by an empty line). +-- Consecutive examples are accepted. +examples :: Parser (Doc RdrName) +examples = DocExamples <$> example + +-- | Collects consecutive examples and their results. +example :: Parser [Example] +example = do + ws <- optWs + prompt <- decodeUtf8 <$> string ">>>" + expr <- (++ "\n") . decodeUtf8 <$> (A8.takeWhile (/= '\n') <* "\n") + results <- many result + let exs = concat [ e | Left e <- results ] + res = filter (not . null) [ r | Right r <- results ] + return $ makeExample (decodeUtf8 ws ++ prompt) expr res : exs + where + result = Left <$> example + <|> Right . decodeUtf8 <$> takeWhile1 (/= '\n') <* "\n" + +-- | Propery parser. +-- +-- >>> parseOnly property "prop> hello world" +-- Right (DocProperty "hello world") +property :: Parser (Doc RdrName) +property = do + _ <- skipSpace + s <- decodeUtf8 <$> (string "prop>" *> takeWhile1 (/= '\n')) + return $ makeProperty ("prop>" ++ s) + +-- | Paragraph level codeblock. Anything between the two delimiting @ +-- is parsed for markup. +codeblock :: DynFlags -> Parser (Doc RdrName) +codeblock d = do + -- Note that we don't need to use optWs here because in cases where + -- we don't see a \n immediatelly after the opening @, this parser + -- fails but we still have a chance to get a codeblock by getting + -- a monospaced doc on its own in the paragraph. With that, the cases + -- are covered. This should be updated if the implementation ever changes. + s <- parseString' d . ('\n':) . decodeUtf8 <$> ("@\n" *> block' <* "@") + maybe (fail "codeblock") (return . DocCodeBlock) s + where + block' = A8.scan False p + where + p isNewline c + | isNewline && c == '@' = Nothing + | otherwise = Just $ c == '\n' + +-- | Calls 'parseString'' on each line of a paragraph +textParagraph :: DynFlags -> Parser (Doc RdrName) +textParagraph d = do + s <- parseString' d . concatMap ((++ "\n") . decodeUtf8) <$> line `sepBy1` "\n" + maybe (fail "textParagraph") (return . docParagraph) s + where + line = takeWhile1 (/= '\n') + +-- | See 'picture' for adding a page title. +url :: Parser (Doc RdrName) +url = DocHyperlink . makeHyperlink . decodeUtf8 <$> ("<" *> takeWhile1 (`notElem` ">\n") <* ">") + <|> autoUrl + +-- | Naive implementation of auto-linking. Will link everything after +-- @http://@, @https://@, @ftp://@, @ssh://@, @gopher://@ until a space. +-- Single trailing punctuation character (.!?,) is split off. +autoUrl :: Parser (Doc RdrName) +autoUrl = do + link <- decodeUtf8 <$> urlLone + return $ formatLink link + where + urlLone = mappend <$> choice prefixes <*> takeWhile1 (not . isSpace) + prefixes = [ "http://", "https://", "ftp://" + , "ssh://", "gopher://" ] + formatLink :: String -> Doc RdrName + formatLink s = if last s `elem` ".!?," + then docAppend (DocHyperlink $ Hyperlink (init s) Nothing) (DocString [last s]) + else DocHyperlink $ Hyperlink s Nothing + +-- | Parses strings between identifier delimiters. Consumes all input that it +-- deems to be valid in an identifier. Note that it simply blindly consumes +-- characters and does no actual validation itself. +parseValid :: Parser String +parseValid = do + vs <- many' (A8.satisfy (`elem` "_.!#$%&*+/<=>?@\\?|-~:") <|> digit <|> letter_ascii) + c <- peekChar + case c of + Just '`' -> return vs + Just '\'' -> (do {c'' <- char '\''; y'' <- parseValid; return $ vs ++ [c''] ++ y''}) <|> return vs + _ -> fail "outofvalid" + +-- | Parses identifiers with help of 'parseValid'. Asks GHC for 'RdrName' from the +-- string it deems valid. +identifier :: DynFlags -> Parser (Doc RdrName) +identifier dflags = do + o <- idDelim + vid <- parseValid + e <- idDelim + return $ validIdentifier $ o : (vid ++ [e]) + where idDelim = char '\'' <|> char '`' + validIdentifier str = case parseIdent (tail $ init str) of + Just identName -> DocIdentifier identName + Nothing -> DocString str + parseIdent :: String -> Maybe RdrName + parseIdent str0 = + let buffer = stringToStringBuffer str0 + realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0 + pstate = mkPState dflags buffer realSrcLc + in case unP parseIdentifier pstate of + POk _ name -> Just (unLoc name) + _ -> Nothing + +-- | Remove all leading and trailing whitespace +strip :: String -> String +strip = (\f -> f . f) $ dropWhile isSpace . reverse + +-- | Consumes whitespace, excluding a newline. +optWs :: Parser BS.ByteString +optWs = A8.takeWhile (`elem` " \t\f\v\r") + +-- | Create an 'Example', stripping superfluous characters as appropriate. +-- Remembers the amount of indentation used for the prompt. +makeExample :: String -> String -> [String] -> Example +makeExample prompt expression res = + Example (strip expression) result' -- drop whitespace in expressions + where (prefix, _) = span isSpace prompt + result' = map substituteBlankLine $ filter (not . null) $ map (tryStripPrefix prefix) res + where tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys + substituteBlankLine "<BLANKLINE>" = "" + substituteBlankLine line = line + +-- | Creates a 'Picture' with an optional title. Called by 'picture'. +makePicture :: String -> Picture +makePicture input = case break isSpace $ strip input of + (uri, "") -> Picture uri Nothing + (uri, label) -> Picture uri (Just $ dropWhile isSpace label) + +-- | Creates a 'Hyperlink' with an optional title. Called by 'example'. +makeHyperlink :: String -> Hyperlink +makeHyperlink input = case break isSpace $ strip input of + (u, "") -> Hyperlink u Nothing + (u, label) -> Hyperlink u (Just $ dropWhile isSpace label) + +-- | Makes a property that can be used by other programs for assertions. +-- Drops whitespace around the property. Called by 'property' +makeProperty :: String -> Doc RdrName +makeProperty s = case strip s of + 'p':'r':'o':'p':'>':xs -> + DocProperty (dropWhile isSpace xs) + xs -> + error $ "makeProperty: invalid input " ++ show xs diff --git a/src/Haddock/Types.hs b/src/Haddock/Types.hs index 73fafd6b..00a8b68f 100644 --- a/src/Haddock/Types.hs +++ b/src/Haddock/Types.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE DeriveDataTypeable, DeriveFunctor, DeriveFoldable, DeriveTraversable #-} +{-# LANGUAGE DeriveDataTypeable, DeriveFunctor, DeriveFoldable, DeriveTraversable, GADTs #-} {-# OPTIONS_GHC -fno-warn-orphans #-} ----------------------------------------------------------------------------- -- | @@ -19,7 +19,6 @@ module Haddock.Types ( , HsDocString, LHsDocString ) where - import Data.Foldable import Data.Traversable import Control.Exception @@ -27,15 +26,12 @@ import Control.Arrow import Control.DeepSeq import Data.Typeable import Data.Map (Map) -import Data.Maybe import qualified Data.Map as Map -import Data.Monoid import GHC hiding (NoLink) import OccName import Control.Applicative (Applicative(..)) import Control.Monad (ap) - ----------------------------------------------------------------------------- -- * Convenient synonyms ----------------------------------------------------------------------------- @@ -205,7 +201,7 @@ data ExportItem name , expItemSubs :: ![name] } - -- | A section heading. + -- | A section heading. | ExportGroup { -- | Section level (1, 2, 3, ...). @@ -230,11 +226,6 @@ data Documentation name = Documentation } deriving Functor -combineDocumentation :: Documentation name -> Maybe (Doc name) -combineDocumentation (Documentation Nothing Nothing) = Nothing -combineDocumentation (Documentation mDoc mWarning) = Just (fromMaybe mempty mWarning `mappend` fromMaybe mempty mDoc) - - -- | Arguments and result are indexed by Int, zero-based from the left, -- because that's the easiest to use when recursing over types. type FnArgsDoc name = Map Int (Doc name) @@ -289,7 +280,6 @@ type DocInstance name = (InstHead name, Maybe (Doc name)) -- of instance types. type InstHead name = ([HsType name], name, [HsType name]) - ----------------------------------------------------------------------------- -- * Documentation comments ----------------------------------------------------------------------------- @@ -314,18 +304,12 @@ data Doc id | DocDefList [(Doc id, Doc id)] | DocCodeBlock (Doc id) | DocHyperlink Hyperlink - | DocPic String + | DocPic Picture | DocAName String | DocProperty String | DocExamples [Example] deriving (Functor, Foldable, Traversable) - -instance Monoid (Doc id) where - mempty = DocEmpty - mappend = DocAppend - - instance NFData a => NFData (Doc a) where rnf doc = case doc of DocEmpty -> () @@ -360,9 +344,18 @@ data Hyperlink = Hyperlink } deriving (Eq, Show) +data Picture = Picture + { pictureUri :: String + , pictureTitle :: Maybe String + } deriving (Eq, Show) + + instance NFData Hyperlink where rnf (Hyperlink a b) = a `deepseq` b `deepseq` () +instance NFData Picture where + rnf (Picture a b) = a `deepseq` b `deepseq` () + data Example = Example { exampleExpression :: String @@ -396,7 +389,7 @@ data DocMarkup id a = Markup , markupCodeBlock :: a -> a , markupHyperlink :: Hyperlink -> a , markupAName :: String -> a - , markupPic :: String -> a + , markupPic :: Picture -> a , markupProperty :: String -> a , markupExample :: [Example] -> a } diff --git a/src/Haddock/Utf8.hs b/src/Haddock/Utf8.hs new file mode 100644 index 00000000..1fb0e818 --- /dev/null +++ b/src/Haddock/Utf8.hs @@ -0,0 +1,74 @@ +module Haddock.Utf8 (encodeUtf8, decodeUtf8) where +import Data.Bits ((.|.), (.&.), shiftL, shiftR) +import qualified Data.ByteString as BS +import Data.Char (chr, ord) +import Data.Word (Word8) + +-- | Helper that encodes and packs a 'String' into a 'BS.ByteString' +encodeUtf8 :: String -> BS.ByteString +encodeUtf8 = BS.pack . encode + +-- | Helper that unpacks and decodes a 'BS.ByteString' into a 'String' +decodeUtf8 :: BS.ByteString -> String +decodeUtf8 = decode . BS.unpack + +-- Copy/pasted functions from Codec.Binary.UTF8.String for encoding/decoding +-- | Character to use when 'encode' or 'decode' fail for a byte. +replacementCharacter :: Char +replacementCharacter = '\xfffd' + +-- | Encode a Haskell String to a list of Word8 values, in UTF8 format. +encode :: String -> [Word8] +encode = concatMap (map fromIntegral . go . ord) + where + go oc + | oc <= 0x7f = [oc] + + | oc <= 0x7ff = [ 0xc0 + (oc `shiftR` 6) + , 0x80 + oc .&. 0x3f + ] + + | oc <= 0xffff = [ 0xe0 + (oc `shiftR` 12) + , 0x80 + ((oc `shiftR` 6) .&. 0x3f) + , 0x80 + oc .&. 0x3f + ] + | otherwise = [ 0xf0 + (oc `shiftR` 18) + , 0x80 + ((oc `shiftR` 12) .&. 0x3f) + , 0x80 + ((oc `shiftR` 6) .&. 0x3f) + , 0x80 + oc .&. 0x3f + ] + +-- | Decode a UTF8 string packed into a list of Word8 values, directly to String +decode :: [Word8] -> String +decode [ ] = "" +decode (c:cs) + | c < 0x80 = chr (fromEnum c) : decode cs + | c < 0xc0 = replacementCharacter : decode cs + | c < 0xe0 = multi1 + | c < 0xf0 = multi_byte 2 0xf 0x800 + | c < 0xf8 = multi_byte 3 0x7 0x10000 + | c < 0xfc = multi_byte 4 0x3 0x200000 + | c < 0xfe = multi_byte 5 0x1 0x4000000 + | otherwise = replacementCharacter : decode cs + where + multi1 = case cs of + c1 : ds | c1 .&. 0xc0 == 0x80 -> + let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|. fromEnum (c1 .&. 0x3f) + in if d >= 0x000080 then toEnum d : decode ds + else replacementCharacter : decode ds + _ -> replacementCharacter : decode cs + + multi_byte :: Int -> Word8 -> Int -> String + multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask)) + where + aux 0 rs acc + | overlong <= acc && acc <= 0x10ffff && + (acc < 0xd800 || 0xdfff < acc) && + (acc < 0xfffe || 0xffff < acc) = chr acc : decode rs + | otherwise = replacementCharacter : decode rs + + aux n (r:rs) acc + | r .&. 0xc0 == 0x80 = aux (n-1) rs + $ shiftL acc 6 .|. fromEnum (r .&. 0x3f) + + aux _ rs _ = replacementCharacter : decode rs diff --git a/test/Haddock/ParseSpec.hs b/test/Haddock/ParseSpec.hs index 799330c2..b649d901 100644 --- a/test/Haddock/ParseSpec.hs +++ b/test/Haddock/ParseSpec.hs @@ -6,15 +6,16 @@ module Haddock.ParseSpec (main, spec) where import Control.Applicative +import Data.Maybe (isJust) import Data.Monoid import Data.String import Haddock.Doc (combineStringNodes) -import Haddock.Lex (tokenise) -import qualified Haddock.Parse as Parse +import qualified Haddock.Parser as Parse import Haddock.Types import Outputable (Outputable, showSDoc, ppr) import RdrName (RdrName) import Test.Hspec +import Test.QuickCheck (property) import Helper @@ -32,25 +33,111 @@ instance IsString a => IsString (Maybe a) where fromString = Just . fromString parseParas :: String -> Maybe (Doc RdrName) -parseParas s = Parse.parseParas $ tokenise dynFlags s (0,0) +parseParas = Parse.parseParas dynFlags parseString :: String -> Maybe (Doc RdrName) -parseString s = Parse.parseString $ tokenise dynFlags s (0,0) +parseString = Parse.parseString dynFlags main :: IO () main = hspec spec -infix 1 `shouldParseTo` -shouldParseTo :: String -> Doc RdrName -> Expectation -shouldParseTo input ast = (combineStringNodes <$> parseParas input) - `shouldBe` Just ast - spec :: Spec spec = do + + let filterSpecial = filter (`notElem` (".(=#-[*`\v\f\n\t\r\\\"'_/@<> " :: String)) + + describe "parseString" $ do + let infix 1 `shouldParseTo` + shouldParseTo :: String -> Doc RdrName -> Expectation + shouldParseTo input ast = parseString input `shouldBe` Just ast + + it "is total" $ do + property $ \xs -> + -- filter out primes as we might end up with an identifier + -- which will fail due to undefined DynFlags + parseString (filter (/= '\'') xs) `shouldSatisfy` isJust + + context "when parsing URLs" $ do + it "parses a URL" $ do + "<http://example.com/>" `shouldParseTo` + hyperlink "http://example.com/" Nothing <> "\n" + + it "accepts an optional label" $ do + "<http://example.com/ some link>" `shouldParseTo` + hyperlink "http://example.com/" "some link" <> "\n" + + it "finishes URL parsing as soon as it sees >, even if it's escaped" $ do + "<http://examp\\>le.com" `shouldParseTo` + hyperlink "http://examp\\" Nothing <> "le.com\n" + + "<http://exa\\>mp\\>le.com>" `shouldParseTo` + hyperlink "http://exa\\" Nothing <> "mp>le.com>\n" + + -- Likewise in label + "<http://example.com f\\>oo>" `shouldParseTo` + hyperlink "http://example.com" "f\\" <> "oo>\n" + + it "parses inline URLs" $ do + "Not yet working, see <http://trac.haskell.org/haddock/ticket/223>\n , isEmptyChan" `shouldParseTo` + "Not yet working, see " + <> hyperlink "http://trac.haskell.org/haddock/ticket/223" Nothing + <> "\n , isEmptyChan\n" + + context "when autolinking URLs" $ do + it "autolinks HTTP URLs" $ do + "http://example.com/" `shouldParseTo` + hyperlink "http://example.com/" Nothing <> "\n" + + it "autolinks HTTPS URLs" $ do + "https://www.example.com/" `shouldParseTo` + hyperlink "https://www.example.com/" Nothing <> "\n" + + it "autolinks FTP URLs" $ do + "ftp://example.com/" `shouldParseTo` + hyperlink "ftp://example.com/" Nothing <> "\n" + + it "does not include a trailing exclamation mark" $ do + "http://example.com/! Some other sentence." `shouldParseTo` + hyperlink "http://example.com/" Nothing <> "! Some other sentence.\n" + + it "does not include a trailing comma" $ do + "http://example.com/, Some other sentence." `shouldParseTo` + hyperlink "http://example.com/" Nothing <> ", Some other sentence.\n" + + it "does not include a trailing dot" $ do + "http://example.com/. Some other sentence." `shouldParseTo` + hyperlink "http://example.com/" Nothing <> ". Some other sentence.\n" + + it "does not include a trailing question mark" $ do + "http://example.com/? Some other sentence." `shouldParseTo` + hyperlink "http://example.com/" Nothing <> "? Some other sentence.\n" + + describe "parseParas" $ do + let infix 1 `shouldParseTo` + shouldParseTo :: String -> Doc RdrName -> Expectation + shouldParseTo input ast = (combineStringNodes <$> parseParas input) + `shouldBe` Just ast + + it "is total" $ do + property $ \xs -> + -- filter out primes as we might end up with an identifier + -- which will fail due to undefined DynFlags + parseParas (filter (/= '\'') xs) `shouldSatisfy` isJust + it "parses a paragraph" $ do "foobar" `shouldParseTo` DocParagraph "foobar\n" + it "empty input produces DocEmpty" $ do + "" `shouldParseTo` DocEmpty + + it "should preserve all regular characters" $ do + property $ \xs -> + let input = filterSpecial xs + in case input of + [] -> input `shouldParseTo` DocEmpty + _ -> input `shouldParseTo` DocParagraph (DocString $ input ++ "\n") + context "when parsing a simple string" $ do it "] should be made into a DocString" $ do "hell]o" `shouldParseTo` DocParagraph "hell]o\n" @@ -65,7 +152,7 @@ spec = do it "should parse a module inline" $ do "This is a \"Module\"." `shouldParseTo` - DocParagraph ("This is a " <> ((DocModule "Module") <> ".\n")) + DocParagraph ("This is a " <> (DocModule "Module" <> ".\n")) context "when parsing emphasised strings" $ do it "emphasises a word on its own" $ do @@ -80,7 +167,7 @@ spec = do "/灼眼のシャナ/" `shouldParseTo` (DocParagraph $ DocEmphasis "灼眼のシャナ" <> "\n") - it "does /multi-line\\n codeblocks/" $ do + it "does not do /multi-line\\n emphasis/" $ do " /multi-line\n emphasis/" `shouldParseTo` DocParagraph "/multi-line\n emphasis/\n" @@ -99,7 +186,7 @@ spec = do it "accepts other elements in a codeblock" $ do "@/emphasis/ \"Module\" <<picture>>@" `shouldParseTo` (DocCodeBlock $ DocEmphasis "emphasis" <> " " - <> DocModule "Module" <> " " <> DocPic "picture") + <> DocModule "Module" <> " " <> pic "picture" Nothing) context "when parsing monospaced strings" $ do it "monospaces inline strings" $ do @@ -107,6 +194,10 @@ spec = do (DocParagraph $ "This comment applies to the " <> DocMonospaced "following" <> " declaration\n") + it "should allow us to escape the @" $ do + "foo @hey \\@ world@ bar" `shouldParseTo` + DocParagraph ("foo " <> DocMonospaced "hey @ world" <> " bar\n") + it "monospaces inline unicode" $ do "hello @灼眼のシャナ@ unicode" `shouldParseTo` (DocParagraph $ "hello " @@ -115,17 +206,43 @@ spec = do it "accepts other elements in a monospaced section" $ do "hey @/emphasis/ \"Module\" <<picture>>@ world" `shouldParseTo` (DocParagraph $ - "hey " - <> DocMonospaced (DocEmphasis "emphasis" <> " " - <> DocModule "Module" <> " " <> DocPic "picture") - <> " world\n") + "hey " + <> DocMonospaced (DocEmphasis "emphasis" <> " " + <> DocModule "Module" <> " " <> pic "picture" Nothing) + <> " world\n") context "when parsing unordered lists" $ do it "parses a simple unordered list" $ do "* point one\n\n* point two" `shouldParseTo` DocUnorderedList [ DocParagraph " point one\n" - , DocParagraph " point two\n"] + , DocParagraph " point two\n"] + + "* 1.parameter re : the derived regular expression" + ++ "\n\n- returns : empty String" `shouldParseTo` + (DocUnorderedList + [DocParagraph " 1.parameter re : the derived regular expression\n", + DocParagraph " returns : empty String\n"]) + + it "doesn't accept a list where unexpected" $ do + " expression?\n -> matches\n\n * 1.parameter \n\n" + `shouldParseTo` + DocParagraph "expression?\n -> matches\n" <> DocUnorderedList [DocParagraph " 1.parameter \n"] + + + it "parses a simple unordered list without the empty line separator" $ do + "* point one\n* point two" `shouldParseTo` + DocUnorderedList [ DocParagraph " point one\n" + , DocParagraph " point two\n"] + + "* point one\nmore one\n* point two\nmore two" `shouldParseTo` + DocUnorderedList [ DocParagraph " point one\nmore one\n" + , DocParagraph " point two\nmore two\n"] + + " * point one\nmore one\n * point two\nmore two" `shouldParseTo` + DocUnorderedList [ DocParagraph " point one\nmore one\n" + , DocParagraph " point two\nmore two\n" + ] it "parses an empty unordered list" $ do "*" `shouldParseTo` DocUnorderedList [DocParagraph "\n"] @@ -134,6 +251,9 @@ spec = do "* 灼眼のシャナ" `shouldParseTo` DocUnorderedList [DocParagraph " 灼眼のシャナ\n"] + it "preserves whitespace on the front of additional lines" $ do + "* foo\n bar" `shouldParseTo` DocUnorderedList [DocParagraph " foo\n bar\n"] + it "accepts other elements in an unordered list" $ do ("* \"Module\"\n\n* /emphasis/" ++ "\n\n* @code@\n\n* a@mono@b \n\n*") `shouldParseTo` @@ -141,17 +261,43 @@ spec = do DocParagraph (" " <> DocModule "Module" <> "\n") , DocParagraph (" " <> DocEmphasis "emphasis" <> "\n") , DocCodeBlock "code" - , DocParagraph (" a" <> (DocMonospaced "mono") <> "b \n") + , DocParagraph (" a" <> DocMonospaced "mono" <> "b \n") + , DocParagraph "\n" + ] + + ("* \"Module\"\n* /emphasis/" + ++ "\n* @code@\n* a@mono@b \n*") `shouldParseTo` + DocUnorderedList [ + DocParagraph (" " <> DocModule "Module" <> "\n") + , DocParagraph (" " <> DocEmphasis "emphasis" <> "\n") + , DocCodeBlock "code" + , DocParagraph (" a" <> DocMonospaced "mono" <> "b \n") , DocParagraph "\n" ] context "when parsing ordered lists" $ do it "parses a simple ordered list" $ do "1. point one\n\n2. point two" `shouldParseTo` - DocOrderedList [ - DocParagraph " point one\n" - , DocParagraph " point two\n" - ] + DocOrderedList [ DocParagraph " point one\n" + , DocParagraph " point two\n" + ] + + it "parses a simple ordered list without the newline separator" $ do + "1. point one\n2. point two" `shouldParseTo` + DocOrderedList [ DocParagraph " point one\n" + , DocParagraph " point two\n" + ] + + "1. point one\nmore\n2. point two\nmore" `shouldParseTo` + DocOrderedList [ DocParagraph " point one\nmore\n" + , DocParagraph " point two\nmore\n" + ] + + -- space before list + " 1. point one\nmore\n 2. point two\nmore" `shouldParseTo` + DocOrderedList [ DocParagraph " point one\nmore\n" + , DocParagraph " point two\nmore\n" + ] it "parses an empty list" $ do "1." `shouldParseTo` DocOrderedList [DocParagraph "\n"] @@ -165,6 +311,9 @@ spec = do "(1) 灼眼のシャナ" `shouldParseTo` DocOrderedList [DocParagraph " 灼眼のシャナ\n"] + it "preserves whitespace on the front of additional lines" $ do + "1. foo\n bar" `shouldParseTo` DocOrderedList [DocParagraph " foo\n bar\n"] + it "accepts other elements" $ do ("1. \"Module\"\n\n2. /emphasis/" ++ "\n\n3. @code@\n\n4. a@mono@b \n\n5.") `shouldParseTo` @@ -181,6 +330,16 @@ spec = do "[foo] bar\n\n[baz] quux" `shouldParseTo` DocDefList [("foo", " bar\n"), ("baz", " quux\n")] + it "parses a simple list without the newline separator" $ do + "[foo] bar\n[baz] quux" `shouldParseTo` + DocDefList [("foo", " bar\n"), ("baz", " quux\n")] + + "[foo] bar\nmore\n[baz] quux\nmore" `shouldParseTo` + DocDefList [("foo", " bar\nmore\n"), ("baz", " quux\nmore\n")] + + " [foo] bar\nmore\n [baz] quux\nmore" `shouldParseTo` + DocDefList [("foo", " bar\nmore\n"), ("baz", " quux\nmore\n")] + it "parses a list with unicode in it" $ do "[灼眼] シャナ" `shouldParseTo` DocDefList [("灼眼", " シャナ\n")] @@ -197,6 +356,26 @@ spec = do "[[world]] bar" `shouldParseTo` DocDefList [("[world", "] bar\n")] + it "treats broken up definition list as regular string" $ do + "[qu\nx] hey" `shouldParseTo` DocParagraph "[qu\nx] hey\n" + + it "preserves whitespace on the front of additional lines" $ do + "[foo] bar\n baz" `shouldParseTo` DocDefList [("foo", " bar\n baz\n")] + + context "when parsing consecutive paragraphs" $ do + it "will not capture irrelevant consecutive lists" $ do + " * bullet\n\n - different bullet\n\n (1) ordered\n \n " + ++ "2. different bullet\n \n [cat] kitten\n \n [pineapple] fruit" + `shouldParseTo` + DocUnorderedList [ DocParagraph " bullet\n" + , DocParagraph " different bullet\n"] + <> DocOrderedList [ DocParagraph " ordered\n" + , DocParagraph " different bullet\n" + ] + <> DocDefList [ ("cat", " kitten\n") + , ("pineapple", " fruit\n") + ] + context "when parsing an example" $ do it ("requires an example to be separated" ++ " from a previous paragraph by an empty line") $ do @@ -205,27 +384,50 @@ spec = do <> DocExamples [Example "fib 10" ["55"]] -- parse error - parseParas "foobar\n>>> fib 10\n55" `shouldBe` Nothing + it "parses bird-tracks inside of paragraphs as plain strings" $ do + "foobar\n>>> fib 10\n55" `shouldParseTo` DocParagraph "foobar\n>>> fib 10\n55\n" it "parses a prompt with no example results" $ do " >>> import Data.Char\n " `shouldParseTo` DocExamples [ Example { exampleExpression = "import Data.Char" - , exampleResult = [] - } - ] + , exampleResult = [] + } + ] it "is able to parse example sections with unicode" $ do " >>> 灼眼\n の\n >>> シャナ\n 封絶" `shouldParseTo` DocExamples [ Example { exampleExpression = "灼眼" - , exampleResult = ["の"] - } - , Example { exampleExpression = "シャナ" - , exampleResult = ["封絶"] - } - ] + , exampleResult = ["の"] + } + , Example { exampleExpression = "シャナ" + , exampleResult = ["封絶"] + } + ] + it "preserves whitespace before the prompt with consecutive paragraphs" $ do + " Examples:\n\n >>> fib 5\n 5\n >>> fib 10\n 55\n\n >>> fib 10\n 55" + `shouldParseTo` + DocParagraph "Examples:\n" + <> DocExamples [ Example { exampleExpression = "fib 5" + , exampleResult = ["5"]} + , Example {exampleExpression = "fib 10" + , exampleResult = ["55"]}] + <> DocExamples [ Example { exampleExpression = "fib 10" + , exampleResult = ["55"]}] + + it "can parse consecutive prompts with results" $ do + " >>> fib 5\n 5\n >>> fib 10\n 55" `shouldParseTo` + DocExamples [ Example { exampleExpression = "fib 5" + , exampleResult = ["5"] } + , Example { exampleExpression = "fib 10" + , exampleResult = ["55"] }] - it ("parses a result line that only " - ++ "contains <BLANKLINE> as an empty line") $ do + it "can parse results if they don't have the same whitespace prefix" $ do + " >>> hey\n5\n 5\n 5" `shouldParseTo` + DocExamples [ Example { exampleExpression = "hey" + , exampleResult = ["5", "5", " 5"] }] + + + it "parses a <BLANKLINE> result as an empty result" $ do ">>> putFooBar\nfoo\n<BLANKLINE>\nbar" `shouldParseTo` DocExamples [Example "putFooBar" ["foo","","bar"]] @@ -235,11 +437,18 @@ spec = do "foobar\n\n> some code" `shouldParseTo` DocParagraph "foobar\n" <> DocCodeBlock " some code\n" - -- parse error - parseParas "foobar\n> some code" `shouldBe` Nothing + it "parses birdtracks inside of paragraphs as plain strings" $ do + "foobar\n> some code" `shouldParseTo` DocParagraph "foobar\n> some code\n" + + it "long birdtrack block without spaces in front" $ do + "beginning\n\n> foo\n> bar\n> baz" `shouldParseTo` + DocParagraph "beginning\n" + <> DocCodeBlock " foo\n bar\n baz\n" - it "consecutive birdtracks " $ do - ">test3\n>test4\n\n" `shouldParseTo` DocCodeBlock "test3\ntest4\n" + it "single DocCodeBlock even if there's space before birdtracks" $ do + "beginning\n\n > foo\n > bar\n > baz" `shouldParseTo` + DocParagraph "beginning\n" + <> DocCodeBlock " foo\n bar\n baz\n" it "consecutive birdtracks with spaces " $ do " > foo\n \n > bar\n \n" `shouldParseTo` @@ -247,72 +456,39 @@ spec = do it "code block + birdtracks" $ do "@\ntest1\ntest2\n@\n\n>test3\n>test4\n\n" `shouldParseTo` - DocCodeBlock "\ntest1\ntest2\n" <> DocCodeBlock "test3\ntest4\n" + DocCodeBlock "\ntest1\ntest2\n" + <> DocCodeBlock "test3\ntest4\n" - it "birdtracks + code block" $ do - ">test3\n>test4\n\n@\ntest1\ntest2\n@\n\n" `shouldParseTo` - DocCodeBlock "test3\ntest4\n" <> DocCodeBlock "\ntest1\ntest2\n" + it "requires the code block to be closed" $ do + "@hello" `shouldParseTo` DocParagraph "@hello\n" + it "preserves the first trailing whitespace after the opening @ in a code block" $ do + "@\ntest1\ntest2\n@" `shouldParseTo` DocCodeBlock "\ntest1\ntest2\n" + "@ \ntest1\ntest2\n@" `shouldParseTo` DocCodeBlock " \ntest1\ntest2\n" - it "can parse consecutive prompts with results" $ do - " >>> fib 5\n 5\n >>> fib 10\n 55" `shouldParseTo` - DocExamples [ Example { exampleExpression = "fib 5" - , exampleResult = ["5"] - } - , Example { exampleExpression = "fib 10" - , exampleResult = ["55"] - } - ] + it "markup in a @ code block" $ do + "@hello <world> \"Foo.Bar\" <<how is>> it /going/?@" `shouldParseTo` + DocCodeBlock + ("hello " <> + (DocHyperlink (Hyperlink {hyperlinkUrl = "world", hyperlinkLabel = Nothing})) + <> " " + <> DocModule "Foo.Bar" + <> " " + <> (DocPic (Picture {pictureUri = "how", pictureTitle = Just "is"})) + <> " it " <> (DocEmphasis "going") + <> "?") - context "when parsing a URL" $ do - it "parses a URL" $ do - "<http://example.com/>" `shouldParseTo` - (DocParagraph $ hyperlink "http://example.com/" Nothing <> "\n") + it "should allow us to escape the @ in a paragraph level @ code block" $ do + "@hello \\@ world@" `shouldParseTo` DocCodeBlock "hello @ world" - it "accepts an optional label" $ do - "<http://example.com/ some link>" `shouldParseTo` - (DocParagraph $ hyperlink "http://example.com/" "some link" <> "\n") - - it "consecutive URL and URL + label" $ do - (" \nA plain URL: <http://example.com/>\n\n A URL with a " - ++ "label: <http://example.com/ some link>") `shouldParseTo` - DocParagraph ( - "A plain URL: " <> - DocHyperlink (Hyperlink "http://example.com/" Nothing) <> "\n" - ) <> - DocParagraph ( - "A URL with a label: " <> - DocHyperlink (Hyperlink "http://example.com/" "some link") <> "\n" - ) - - it "finishes URL parsing as soon as it sees >, even if it's escaped" $ do - "<http://examp\\>le.com" `shouldParseTo` - DocParagraph ( - DocHyperlink (Hyperlink "http://examp\\" Nothing) <> "le.com\n" - ) + it "should swallow up trailing spaces in code blocks" $ do + "@ foo @" `shouldParseTo` DocCodeBlock " foo" - "<http://exa\\>mp\\>le.com>" `shouldParseTo` - DocParagraph ( - DocHyperlink (Hyperlink "http://exa\\" Nothing) <> "mp>le.com>\n" - ) - - -- Likewise in label - "<http://example.com f\\>oo>" `shouldParseTo` - DocParagraph ( - DocHyperlink (Hyperlink "http://example.com" "f\\") <> "oo>\n" - ) - - it "parses inline URLs" $ do - (" Not yet working, see <http://trac.haskell.org" - ++ "/haddock/ticket/223>\n , isEmptyChan") `shouldParseTo` - DocParagraph - ("Not yet working, see " - <> ((DocHyperlink - (Hyperlink { hyperlinkUrl = "http://trac.haskell.org" - ++ "/haddock/ticket/223" - , hyperlinkLabel = Nothing - })) <> "\n , isEmptyChan\n")) + it "birdtracks + code block" $ do + ">test3\n>test4\n\n@\ntest1\ntest2\n@\n\n" `shouldParseTo` + DocCodeBlock "test3\ntest4\n" + <> DocCodeBlock "\ntest1\ntest2\n" context "when parsing properties" $ do it "can parse a single property" $ do @@ -347,28 +523,28 @@ spec = do context "when parsing pictures" $ do it "parses a simple picture" $ do "<<baz>>" `shouldParseTo` - DocParagraph ((DocPic "baz") <> "\n") + DocParagraph (pic "baz" Nothing <> "\n") - it "parses a picture with spaces" $ do + it "parses a picture with a title" $ do "<<b a z>>" `shouldParseTo` - DocParagraph ((DocPic "b a z") <> "\n") + DocParagraph (pic "b" (Just "a z") <> "\n") it "parses a picture with unicode" $ do "<<灼眼のシャナ>>" `shouldParseTo` - DocParagraph ((DocPic "灼眼のシャナ") <> "\n") + DocParagraph ((pic "灼眼のシャナ" Nothing) <> "\n") it "doesn't allow for escaping of the closing tags" $ do -- bug? "<<ba\\>>z>>" `shouldParseTo` - (DocParagraph $ DocPic "ba\\" <> "z>>\n") + (DocParagraph $ pic "ba\\" Nothing <> "z>>\n") context "when parsing anchors" $ do it "should parse a single word anchor" $ do "#foo#" `shouldParseTo` - DocParagraph ((DocAName "foo") <> "\n") + DocParagraph (DocAName "foo" <> "\n") it "should parse a multi word anchor" $ do "#foo bar#" `shouldParseTo` - DocParagraph ((DocAName "foo bar") <> "\n") + DocParagraph (DocAName "foo bar" <> "\n") it "should parse a unicode anchor" $ do "#灼眼のシャナ#" `shouldParseTo` @@ -387,17 +563,18 @@ spec = do it "[@q/uu/x@] h\\ney" $ do "[@q/uu/x@] h\ney" `shouldParseTo` DocDefList - [(DocMonospaced - ((DocString "q") - <> ((DocEmphasis (DocString "uu")) - <> "x")), " h\ney\n")] - - it "[qu\\nx] hey" $ do - parseParas "[qu\nx] hey" `shouldBe` Nothing + [(DocMonospaced ("q" <> DocEmphasis "uu" <> "x"), " h\ney\n")] it "/qu\\nux/" $ do "/qu\nux/" `shouldParseTo` DocParagraph "/qu\nux/\n" + -- regression test + it "requires markup to be fully closed, even if nested" $ do + "@hel/lo" `shouldParseTo` DocParagraph "@hel/lo\n" + + it "will be total even if only the first delimiter is present" $ do + "/" `shouldParseTo` DocParagraph "/\n" + context "when parsing strings with apostrophes" $ do it "parses a word with an one of the delimiters in it as DocString" $ do "don't" `shouldParseTo` DocParagraph "don't\n" @@ -409,6 +586,39 @@ spec = do " don't use apostrophe's in the wrong place's" `shouldParseTo` DocParagraph "don't use apostrophe's in the wrong place's\n" + context "when parsing strings contaning numeric character references" $ do + it "will implicitly convert digits to characters" $ do + "AAAA" `shouldParseTo` DocParagraph "AAAA\n" + + "灼眼のシャナ" `shouldParseTo` + DocParagraph "灼眼のシャナ\n" + + it "will implicitly convert hex encoded characters" $ do + "eeee" `shouldParseTo` DocParagraph "eeee\n" + + context "when parsing module names" $ do + it "can accept a simple module name" $ do + "\"Hello\"" `shouldParseTo` DocParagraph (DocModule "Hello" <> "\n") + + it "can accept a module name with dots" $ do + "\"Hello.World\"" `shouldParseTo` DocParagraph (DocModule "Hello.World" <> "\n") + + it "can accept a module name with unicode" $ do + "\"Hello.Worldλ\"" `shouldParseTo` DocParagraph ((DocModule "Hello.Worldλ") <> "\n") + + it "parses a module name with a trailing dot as regular quoted string" $ do + "\"Hello.\"" `shouldParseTo` DocParagraph "\"Hello.\"\n" + + it "parses a module name with a space as regular quoted string" $ do + "\"Hello World\"" `shouldParseTo` DocParagraph "\"Hello World\"\n" + + it "parses a module name with invalid characters as regular quoted string" $ do + "\"Hello&[{}(=*)+]!\"" `shouldParseTo` DocParagraph "\"Hello&[{}(=*)+]!\"\n" + + where hyperlink :: String -> Maybe String -> Doc RdrName hyperlink url = DocHyperlink . Hyperlink url + + pic :: String -> Maybe String -> Doc RdrName + pic uri = DocPic . Picture uri diff --git a/test/Haddock/Utf8Spec.hs b/test/Haddock/Utf8Spec.hs new file mode 100644 index 00000000..a352bf61 --- /dev/null +++ b/test/Haddock/Utf8Spec.hs @@ -0,0 +1,15 @@ +module Haddock.Utf8Spec (main, spec) where + +import Test.Hspec +import Test.QuickCheck + +import Haddock.Utf8 + +main :: IO () +main = hspec spec + +spec :: Spec +spec = do + describe "decodeUtf8" $ do + it "is inverse to encodeUtf8" $ do + property $ \xs -> (decodeUtf8 . encodeUtf8) xs `shouldBe` xs |