aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk>2014-05-05 09:01:03 +0200
committerMateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk>2014-05-05 11:00:41 +0200
commitcc269e6b0b615e9e237c35a985e4ace7b9ab0dd9 (patch)
treef0264138c81909151f9724c1f02f7bf8b30803cb
parent7ac2d0f2d31c2e1c7ede09828f3d5ba5626bd0d4 (diff)
Move parser + parser tests out to own package.
We move some types out that are necessary as well and then re-export and specialise them in the core Haddock. Reason for moving out spec tests is that if we're working on the parser, we can simply work on that and we can ignore the rest of Haddock. The downside is that it's a little inconvenient if at the end of the day we want to see that everything passes.
-rw-r--r--.ghci2
-rw-r--r--.gitignore1
-rw-r--r--haddock-library/.ghci1
-rw-r--r--haddock-library/LICENSE23
-rw-r--r--haddock-library/Setup.hs2
-rw-r--r--haddock-library/haddock-library.cabal75
-rw-r--r--haddock-library/src/Documentation/Haddock/Doc.hs60
-rw-r--r--haddock-library/src/Documentation/Haddock/Parser.hs474
-rw-r--r--haddock-library/src/Documentation/Haddock/Parser/Util.hs61
-rw-r--r--haddock-library/src/Documentation/Haddock/Types.hs70
-rw-r--r--haddock-library/src/Documentation/Haddock/Utf8.hs (renamed from src/Haddock/Utf8.hs)2
-rw-r--r--haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs (renamed from test/Haddock/Parser/UtilSpec.hs)11
-rw-r--r--haddock-library/test/Documentation/Haddock/ParserSpec.hs (renamed from test/Haddock/ParserSpec.hs)19
-rw-r--r--haddock-library/test/Documentation/Haddock/Utf8Spec.hs (renamed from test/Haddock/Utf8Spec.hs)9
-rw-r--r--haddock-library/test/Spec.hs (renamed from test/Spec.hs)0
-rw-r--r--haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs (renamed from vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs)0
-rw-r--r--haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs (renamed from vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs)0
-rw-r--r--haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs (renamed from vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs)0
-rw-r--r--haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs (renamed from vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs)0
-rw-r--r--haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs (renamed from vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs)0
-rw-r--r--haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs (renamed from vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs)0
-rw-r--r--haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal.hs (renamed from vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal.hs)0
-rw-r--r--haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal/Types.hs (renamed from vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal/Types.hs)0
-rw-r--r--haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Number.hs (renamed from vendor/attoparsec-0.10.4.0/Data/Attoparsec/Number.hs)0
-rw-r--r--haddock.cabal59
-rw-r--r--src/Documentation/Haddock.hs3
-rw-r--r--src/Haddock.hs2
-rw-r--r--src/Haddock/Doc.hs51
-rw-r--r--src/Haddock/Interface/LexParseRn.hs20
-rw-r--r--src/Haddock/Interface/ParseModuleHeader.hs2
-rw-r--r--src/Haddock/Parser.hs529
-rw-r--r--src/Haddock/Parser/Util.hs28
-rw-r--r--src/Haddock/Types.hs58
-rw-r--r--test/Helper.hs186
34 files changed, 841 insertions, 907 deletions
diff --git a/.ghci b/.ghci
index 8312f0ee..5407b2c3 100644
--- a/.ghci
+++ b/.ghci
@@ -1 +1 @@
-:set -isrc -ivendor/attoparsec-0.10.4.0 -itest -idist/build -idist/build/autogen -packageghc -optP-include -optPdist/build/autogen/cabal_macros.h
+:set -isrc -itest -idist/build -idist/build/autogen -packageghc -optP-include -optPdist/build/autogen/cabal_macros.h
diff --git a/.gitignore b/.gitignore
index bc554c00..4752003e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
/dist/
+/haddock-library/dist
/html-test/out/
/latex-test/out/
diff --git a/haddock-library/.ghci b/haddock-library/.ghci
new file mode 100644
index 00000000..6a26395e
--- /dev/null
+++ b/haddock-library/.ghci
@@ -0,0 +1 @@
+:set -isrc -ivendor/attoparsec-0.10.4.0 -itest -idist/build -idist/build/autogen -optP-include -optPdist/build/autogen/cabal_macros.h
diff --git a/haddock-library/LICENSE b/haddock-library/LICENSE
new file mode 100644
index 00000000..1636bfcd
--- /dev/null
+++ b/haddock-library/LICENSE
@@ -0,0 +1,23 @@
+Copyright 2002-2010, Simon Marlow. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/haddock-library/Setup.hs b/haddock-library/Setup.hs
new file mode 100644
index 00000000..9a994af6
--- /dev/null
+++ b/haddock-library/Setup.hs
@@ -0,0 +1,2 @@
+import Distribution.Simple
+main = defaultMain
diff --git a/haddock-library/haddock-library.cabal b/haddock-library/haddock-library.cabal
new file mode 100644
index 00000000..3a016c5a
--- /dev/null
+++ b/haddock-library/haddock-library.cabal
@@ -0,0 +1,75 @@
+name: haddock-library
+version: 2.15.0
+synopsis: Library exposing some functionality of Haddock.
+
+description: Haddock is a documentation-generation tool for Haskell
+ libraries. These modules expose some functionality of it
+ without pulling in the GHC dependency.
+license: BSD3
+license-file: LICENSE
+maintainer: Simon Hengel <sol@typeful.net>, Mateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk>
+homepage: http://www.haskell.org/haddock/
+bug-reports: http://trac.haskell.org/haddock
+category: Documentation
+build-type: Simple
+cabal-version: >= 1.10
+stability: experimental
+
+library
+ default-language: Haskell2010
+
+ build-depends:
+ base >= 4.3 && < 4.8,
+ bytestring,
+ deepseq
+
+ hs-source-dirs: src, vendor/attoparsec-0.10.4.0
+ ghc-options: -funbox-strict-fields -Wall -fwarn-tabs -O2
+
+ exposed-modules:
+ Documentation.Haddock.Parser
+ Documentation.Haddock.Types
+ Documentation.Haddock.Doc
+
+ other-modules:
+ Data.Attoparsec
+ Data.Attoparsec.ByteString
+ Data.Attoparsec.ByteString.Char8
+ Data.Attoparsec.Combinator
+ Data.Attoparsec.Number
+ Data.Attoparsec.ByteString.FastSet
+ Data.Attoparsec.ByteString.Internal
+ Data.Attoparsec.Internal
+ Data.Attoparsec.Internal.Types
+ Documentation.Haddock.Utf8
+ Documentation.Haddock.Parser.Util
+
+test-suite spec
+ type: exitcode-stdio-1.0
+ default-language: Haskell2010
+ main-is: Spec.hs
+ hs-source-dirs:
+ test
+ , src
+ , vendor/attoparsec-0.10.4.0
+
+ cpp-options:
+ -DTEST
+
+ other-modules:
+ Documentation.Haddock.ParserSpec
+ Documentation.Haddock.Utf8Spec
+ Documentation.Haddock.Parser.UtilSpec
+
+ build-depends:
+ base
+ , hspec
+ , bytestring
+ , deepseq
+ , QuickCheck == 2.*
+ , haddock-library
+
+
+source-repository head
+ type: git
+ location: http://git.haskell.org/haddock.git
diff --git a/haddock-library/src/Documentation/Haddock/Doc.hs b/haddock-library/src/Documentation/Haddock/Doc.hs
new file mode 100644
index 00000000..4d6c10a4
--- /dev/null
+++ b/haddock-library/src/Documentation/Haddock/Doc.hs
@@ -0,0 +1,60 @@
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+module Documentation.Haddock.Doc (docParagraph) where
+
+import Data.Monoid
+import Documentation.Haddock.Types
+import Data.Char (isSpace)
+
+-- We put it here so that we can avoid a circular import
+-- anything relevant imports this module anyway
+instance Monoid (DocH mod id) where
+ mempty = DocEmpty
+ mappend = docAppend
+
+docAppend :: DocH mod id -> DocH mod id -> DocH mod id
+docAppend (DocDefList ds1) (DocDefList ds2) = DocDefList (ds1++ds2)
+docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d) = DocAppend (DocDefList (ds1++ds2)) d
+docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 ++ ds2)
+docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 ++ ds2)
+docAppend DocEmpty d = d
+docAppend d DocEmpty = d
+docAppend (DocString s1) (DocString s2) = DocString (s1 ++ s2)
+docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 ++ s2))
+docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 ++ s2)) d
+docAppend d1 d2 = DocAppend d1 d2
+
+-- again to make parsing easier - we spot a paragraph whose only item
+-- is a DocMonospaced and make it into a DocCodeBlock
+docParagraph :: DocH mod id -> DocH mod id
+docParagraph (DocMonospaced p)
+ = DocCodeBlock (docCodeBlock p)
+docParagraph (DocAppend (DocString s1) (DocMonospaced p))
+ | all isSpace s1
+ = DocCodeBlock (docCodeBlock p)
+docParagraph (DocAppend (DocString s1)
+ (DocAppend (DocMonospaced p) (DocString s2)))
+ | all isSpace s1 && all isSpace s2
+ = DocCodeBlock (docCodeBlock p)
+docParagraph (DocAppend (DocMonospaced p) (DocString s2))
+ | all isSpace s2
+ = DocCodeBlock (docCodeBlock p)
+docParagraph p
+ = DocParagraph p
+
+
+-- Drop trailing whitespace from @..@ code blocks. Otherwise this:
+--
+-- -- @
+-- -- foo
+-- -- @
+--
+-- turns into (DocCodeBlock "\nfoo\n ") which when rendered in HTML
+-- gives an extra vertical space after the code block. The single space
+-- on the final line seems to trigger the extra vertical space.
+--
+docCodeBlock :: DocH mod id -> DocH mod id
+docCodeBlock (DocString s)
+ = DocString (reverse $ dropWhile (`elem` " \t") $ reverse s)
+docCodeBlock (DocAppend l r)
+ = DocAppend l (docCodeBlock r)
+docCodeBlock d = d
diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs
new file mode 100644
index 00000000..3d146d33
--- /dev/null
+++ b/haddock-library/src/Documentation/Haddock/Parser.hs
@@ -0,0 +1,474 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE StandaloneDeriving #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE IncoherentInstances #-}
+{-# LANGUAGE LambdaCase #-}
+-- |
+-- Module : Documentation.Haddock.Parser
+-- Copyright : (c) Mateusz Kowalczyk 2013-2014,
+-- Simon Hengel 2013
+-- License : BSD-like
+--
+-- Maintainer : haddock@projects.haskell.org
+-- Stability : experimental
+-- Portability : portable
+
+module Documentation.Haddock.Parser ( parseString, parseParas
+ , overIdentifier, toRegular, Identifier
+ ) where
+
+import Control.Applicative
+import Control.Arrow (first)
+import Control.Monad (void, mfilter)
+import Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine)
+import qualified Data.ByteString.Char8 as BS
+import Data.Char (chr, isAsciiUpper)
+import Data.List (stripPrefix, intercalate, unfoldr)
+import Data.Maybe (fromMaybe)
+import Data.Monoid
+import Documentation.Haddock.Doc
+import Documentation.Haddock.Parser.Util
+import Documentation.Haddock.Types
+import Documentation.Haddock.Utf8
+import Prelude hiding (takeWhile)
+
+-- | Identifier string surrounded with opening and closing quotes/backticks.
+type Identifier = (Char, String, Char)
+
+-- | Drops the quotes/backticks around all identifiers, as if they
+-- were valid but still 'String's.
+toRegular :: DocH mod Identifier -> DocH mod String
+toRegular = fmap (\(_, x, _) -> x)
+
+-- | Maps over 'DocIdentifier's over 'String' with potentially failing
+-- conversion using user-supplied function. If the conversion fails,
+-- the identifier is deemed to not be valid and is treated as a
+-- regular string.
+overIdentifier :: (String -> Maybe a)
+ -> DocH mod Identifier
+ -> DocH mod a
+overIdentifier f d = g d
+ where
+ g (DocIdentifier (o, x, e)) = case f x of
+ Nothing -> DocString $ o : x ++ [e]
+ Just x' -> DocIdentifier x'
+ g DocEmpty = DocEmpty
+ g (DocAppend x x') = DocAppend (g x) (g x')
+ g (DocString x) = DocString x
+ g (DocParagraph x) = DocParagraph $ g x
+ g (DocIdentifierUnchecked x) = DocIdentifierUnchecked x
+ g (DocModule x) = DocModule x
+ g (DocWarning x) = DocWarning $ g x
+ g (DocEmphasis x) = DocEmphasis $ g x
+ g (DocMonospaced x) = DocMonospaced $ g x
+ g (DocBold x) = DocBold $ g x
+ g (DocUnorderedList x) = DocUnorderedList $ fmap g x
+ g (DocOrderedList x) = DocOrderedList $ fmap g x
+ g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x
+ g (DocCodeBlock x) = DocCodeBlock $ g x
+ g (DocHyperlink x) = DocHyperlink x
+ g (DocPic x) = DocPic x
+ g (DocAName x) = DocAName x
+ g (DocProperty x) = DocProperty x
+ g (DocExamples x) = DocExamples x
+ g (DocHeader (Header l x)) = DocHeader . Header l $ g x
+
+parse :: Parser a -> BS.ByteString -> a
+parse p = either err id . parseOnly (p <* endOfInput)
+ where
+ err = error . ("Haddock.Parser.parse: " ++)
+
+-- | Main entry point to the parser. Appends the newline character
+-- to the input string.
+parseParas :: String -- ^ String to parse
+ -> DocH mod Identifier
+parseParas = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
+ where
+ p :: Parser (DocH mod Identifier)
+ p = mconcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n")
+
+-- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which
+-- drops leading whitespace and encodes the string to UTF8 first.
+parseString :: String -> DocH mod Identifier
+parseString = parseStringBS . encodeUtf8 . dropWhile isSpace
+
+parseStringBS :: BS.ByteString -> DocH mod Identifier
+parseStringBS = parse p
+ where
+ p :: Parser (DocH mod Identifier)
+ p = mconcat <$> many (monospace <|> anchor <|> identifier
+ <|> moduleName <|> picture <|> hyperlink <|> autoUrl <|> bold
+ <|> emphasis <|> encodedChar <|> string' <|> skipSpecialChar)
+
+-- | Parses and processes
+-- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references>
+--
+-- >>> parseOnly encodedChar "&#65;&#66;&#67;"
+-- Right (DocString "ABC")
+encodedChar :: Parser (DocH mod a)
+encodedChar = "&#" *> c <* ";"
+ where
+ c = DocString . return . chr <$> num
+ num = hex <|> decimal
+ hex = ("x" <|> "X") *> hexadecimal
+
+specialChar :: [Char]
+specialChar = "_/<@\"&'`"
+
+-- | Plain, regular parser for text. Called as one of the last parsers
+-- to ensure that we have already given a chance to more meaningful parsers
+-- before capturing their characers.
+string' :: Parser (DocH mod a)
+string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar)
+ where
+ unescape "" = ""
+ unescape ('\\':x:xs) = x : unescape xs
+ unescape (x:xs) = x : unescape xs
+
+-- | Skips a single special character and treats it as a plain string.
+-- This is done to skip over any special characters belonging to other
+-- elements but which were not deemed meaningful at their positions.
+skipSpecialChar :: Parser (DocH mod a)
+skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar)
+
+-- | Emphasis parser.
+--
+-- >>> parseOnly emphasis "/Hello world/"
+-- Right (DocEmphasis (DocString "Hello world"))
+emphasis :: Parser (DocH mod Identifier)
+emphasis = DocEmphasis . parseStringBS <$>
+ mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/")
+
+-- | Bold parser.
+--
+-- >>> parseOnly bold "__Hello world__"
+-- Right (DocBold (DocString "Hello world"))
+bold :: Parser (DocH mod Identifier)
+bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__")
+
+disallowNewline :: Parser BS.ByteString -> Parser BS.ByteString
+disallowNewline = mfilter ('\n' `BS.notElem`)
+
+-- | Like `takeWhile`, but unconditionally take escaped characters.
+takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString
+takeWhile_ p = scan False p_
+ where
+ p_ escaped c
+ | escaped = Just False
+ | not $ p c = Nothing
+ | otherwise = Just (c == '\\')
+
+-- | Like `takeWhile1`, but unconditionally take escaped characters.
+takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString
+takeWhile1_ = mfilter (not . BS.null) . takeWhile_
+
+-- | Text anchors to allow for jumping around the generated documentation.
+--
+-- >>> parseOnly anchor "#Hello world#"
+-- Right (DocAName "Hello world")
+anchor :: Parser (DocH mod a)
+anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#")
+
+-- | Monospaced strings.
+--
+-- >>> parseOnly monospace "@cruel@"
+-- Right (DocMonospaced (DocString "cruel"))
+monospace :: Parser (DocH mod Identifier)
+monospace = DocMonospaced . parseStringBS <$> ("@" *> takeWhile1_ (/= '@') <* "@")
+
+moduleName :: Parser (DocH mod a)
+moduleName = DocModule <$> (char '"' *> modid <* char '"')
+ where
+ modid = intercalate "." <$> conid `sepBy1` "."
+ conid = (:)
+ <$> satisfy isAsciiUpper
+ -- NOTE: According to Haskell 2010 we shouldd actually only
+ -- accept {small | large | digit | ' } here. But as we can't
+ -- match on unicode characters, this is currently not possible.
+ <*> (decodeUtf8 <$> takeWhile (`notElem` " .&[{}(=*)+]!#|@/;,^?\"\n"))
+
+-- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify
+-- a title for the picture.
+--
+-- >>> parseOnly picture "<<hello.png>>"
+-- Right (DocPic (Picture "hello.png" Nothing))
+-- >>> parseOnly picture "<<hello.png world>>"
+-- Right (DocPic (Picture "hello.png" (Just "world")))
+picture :: Parser (DocH mod a)
+picture = DocPic . makeLabeled Picture . decodeUtf8
+ <$> disallowNewline ("<<" *> takeUntil ">>")
+
+-- | Paragraph parser, called by 'parseParas'.
+paragraph :: Parser (DocH mod Identifier)
+paragraph = examples <|> skipSpace *> (list <|> birdtracks <|> codeblock
+ <|> property <|> header
+ <|> textParagraph)
+
+header :: Parser (DocH mod Identifier)
+header = do
+ let psers = map (string . encodeUtf8 . concat . flip replicate "=") [6, 5 .. 1]
+ pser = foldl1 (<|>) psers
+ delim <- decodeUtf8 <$> pser
+ line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString
+ rest <- paragraph <|> return mempty
+ return $ DocParagraph (DocHeader (Header (length delim) line)) <> rest
+
+textParagraph :: Parser (DocH mod Identifier)
+textParagraph = docParagraph . parseString . intercalate "\n" <$> many1 nonEmptyLine
+
+-- | List parser, called by 'paragraph'.
+list :: Parser (DocH mod Identifier)
+list = DocUnorderedList <$> unorderedList
+ <|> DocOrderedList <$> orderedList
+ <|> DocDefList <$> definitionList
+
+-- | Parses unordered (bullet) lists.
+unorderedList :: Parser [DocH mod Identifier]
+unorderedList = ("*" <|> "-") *> innerList unorderedList
+
+-- | Parses ordered lists (numbered or dashed).
+orderedList :: Parser [DocH mod Identifier]
+orderedList = (paren <|> dot) *> innerList orderedList
+ where
+ dot = (decimal :: Parser Int) <* "."
+ paren = "(" *> decimal <* ")"
+
+-- | Generic function collecting any further lines belonging to the
+-- list entry and recursively collecting any further lists in the
+-- same paragraph. Usually used as
+--
+-- > someListFunction = listBeginning *> innerList someListFunction
+innerList :: Parser [DocH mod Identifier] -> Parser [DocH mod Identifier]
+innerList item = do
+ c <- takeLine
+ (cs, items) <- more item
+ let contents = docParagraph . parseString . dropNLs . unlines $ c : cs
+ return $ case items of
+ Left p -> [contents `joinPara` p]
+ Right i -> contents : i
+
+-- | Parses definition lists.
+definitionList :: Parser [(DocH mod Identifier, DocH mod Identifier)]
+definitionList = do
+ label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` "]\n")) <* "]"
+ c <- takeLine
+ (cs, items) <- more definitionList
+ let contents = parseString . dropNLs . unlines $ c : cs
+ return $ case items of
+ Left p -> [(label, contents `joinPara` p)]
+ Right i -> (label, contents) : i
+
+-- | If possible, appends two 'Doc's under a 'DocParagraph' rather than
+-- outside of it. This allows to get structures like
+--
+-- @DocParagraph (DocAppend … …)@
+--
+-- rather than
+--
+-- @DocAppend (DocParagraph …) …@
+joinPara :: DocH mod id -> DocH mod id -> DocH mod id
+joinPara (DocParagraph p) c = docParagraph $ p <> c
+joinPara d p = d <> p
+
+-- | Drops all trailing newlines.
+dropNLs :: String -> String
+dropNLs = reverse . dropWhile (== '\n') . reverse
+
+-- | Main worker for 'innerList' and 'definitionList'.
+-- We need the 'Either' here to be able to tell in the respective functions
+-- whether we're dealing with the next list or a nested paragraph.
+more :: Monoid a => Parser a
+ -> Parser ([String], Either (DocH mod Identifier) a)
+more item = innerParagraphs <|> moreListItems item
+ <|> moreContent item <|> pure ([], Right mempty)
+
+-- | Use by 'innerList' and 'definitionList' to parse any nested paragraphs.
+innerParagraphs :: Parser ([String], Either (DocH mod Identifier) a)
+innerParagraphs = (,) [] . Left <$> ("\n" *> indentedParagraphs)
+
+-- | Attemps to fetch the next list if possibly. Used by 'innerList' and
+-- 'definitionList' to recursivly grab lists that aren't separated by a whole
+-- paragraph.
+moreListItems :: Parser a
+ -> Parser ([String], Either (DocH mod Identifier) a)
+moreListItems item = (,) [] . Right <$> (skipSpace *> item)
+
+-- | Helper for 'innerList' and 'definitionList' which simply takes
+-- a line of text and attempts to parse more list content with 'more'.
+moreContent :: Monoid a => Parser a
+ -> Parser ([String], Either (DocH mod Identifier) a)
+moreContent item = first . (:) <$> nonEmptyLine <*> more item
+
+-- | Runs the 'parseParas' parser on an indented paragraph.
+-- The indentation is 4 spaces.
+indentedParagraphs :: Parser (DocH mod Identifier)
+indentedParagraphs = parseParas . concat <$> dropFrontOfPara " "
+
+-- | Grab as many fully indented paragraphs as we can.
+dropFrontOfPara :: Parser BS.ByteString -> Parser [String]
+dropFrontOfPara sp = do
+ currentParagraph <- some (sp *> takeNonEmptyLine)
+ followingParagraphs <-
+ skipHorizontalSpace *> nextPar -- we have more paragraphs to take
+ <|> skipHorizontalSpace *> nlList -- end of the ride, remember the newline
+ <|> endOfInput *> return [] -- nothing more to take at all
+ return (currentParagraph ++ followingParagraphs)
+ where
+ nextPar = (++) <$> nlList <*> dropFrontOfPara sp
+ nlList = "\n" *> return ["\n"]
+
+nonSpace :: BS.ByteString -> Parser BS.ByteString
+nonSpace xs
+ | not $ any (not . isSpace) $ decodeUtf8 xs = fail "empty line"
+ | otherwise = return xs
+
+-- | Takes a non-empty, not fully whitespace line.
+--
+-- Doesn't discard the trailing newline.
+takeNonEmptyLine :: Parser String
+takeNonEmptyLine = do
+ (++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n"
+
+birdtracks :: Parser (DocH mod a)
+birdtracks = DocCodeBlock . DocString . intercalate "\n" . stripSpace <$> many1 line
+ where
+ line = skipHorizontalSpace *> ">" *> takeLine
+
+stripSpace :: [String] -> [String]
+stripSpace = fromMaybe <*> mapM strip'
+ where
+ strip' (' ':xs') = Just xs'
+ strip' "" = Just ""
+ strip' _ = Nothing
+
+-- | Parses examples. Examples are a paragraph level entitity (separated by an empty line).
+-- Consecutive examples are accepted.
+examples :: Parser (DocH mod a)
+examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go)
+ where
+ go :: Parser [Example]
+ go = do
+ prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>"
+ expr <- takeLine
+ (rs, es) <- resultAndMoreExamples
+ return (makeExample prefix expr rs : es)
+ where
+ resultAndMoreExamples :: Parser ([String], [Example])
+ resultAndMoreExamples = moreExamples <|> result <|> pure ([], [])
+ where
+ moreExamples :: Parser ([String], [Example])
+ moreExamples = (,) [] <$> go
+
+ result :: Parser ([String], [Example])
+ result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples
+
+ makeExample :: String -> String -> [String] -> Example
+ makeExample prefix expression res =
+ Example (strip expression) result
+ where
+ result = map (substituteBlankLine . tryStripPrefix) res
+
+ tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs)
+
+ substituteBlankLine "<BLANKLINE>" = ""
+ substituteBlankLine xs = xs
+
+nonEmptyLine :: Parser String
+nonEmptyLine = mfilter (any (not . isSpace)) takeLine
+
+takeLine :: Parser String
+takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine
+
+endOfLine :: Parser ()
+endOfLine = void "\n" <|> endOfInput
+
+-- | Property parser.
+--
+-- >>> parseOnly property "prop> hello world"
+-- Right (DocProperty "hello world")
+property :: Parser (DocH mod a)
+property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n'))
+
+-- |
+-- Paragraph level codeblock. Anything between the two delimiting @ is parsed
+-- for markup.
+codeblock :: Parser (DocH mod Identifier)
+codeblock =
+ DocCodeBlock . parseStringBS . dropSpaces
+ <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
+ where
+ dropSpaces xs =
+ let rs = decodeUtf8 xs
+ in case splitByNl rs of
+ [] -> xs
+ ys -> case last ys of
+ ' ':_ -> case mapM dropSpace ys of
+ Nothing -> xs
+ Just zs -> encodeUtf8 $ intercalate "\n" zs
+ _ -> xs
+
+ -- This is necessary because ‘lines’ swallows up a trailing newline
+ -- and we lose information about whether the last line belongs to @ or to
+ -- text which we need to decide whether we actually want to be dropping
+ -- anything at all.
+ splitByNl = unfoldr (\case '\n':s -> Just (span (/= '\n') s)
+ _ -> Nothing)
+ . ('\n' :)
+
+ dropSpace "" = Just ""
+ dropSpace (' ':xs) = Just xs
+ dropSpace _ = Nothing
+
+ block' = scan False p
+ where
+ p isNewline c
+ | isNewline && c == '@' = Nothing
+ | isNewline && isSpace c = Just isNewline
+ | otherwise = Just $ c == '\n'
+
+hyperlink :: Parser (DocH mod a)
+hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8
+ <$> disallowNewline ("<" *> takeUntil ">")
+ <|> autoUrl
+
+autoUrl :: Parser (DocH mod a)
+autoUrl = mkLink <$> url
+ where
+ url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
+ mkLink :: BS.ByteString -> DocH mod a
+ mkLink s = case BS.unsnoc s of
+ Just (xs, x) | x `elem` ",.!?" -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x]
+ _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
+
+-- | Parses strings between identifier delimiters. Consumes all input that it
+-- deems to be valid in an identifier. Note that it simply blindly consumes
+-- characters and does no actual validation itself.
+parseValid :: Parser String
+parseValid = do
+ vs' <- many' $ utf8String "⋆" <|> return <$> idChar
+ let vs = concat vs'
+ c <- peekChar
+ case c of
+ Just '`' -> return vs
+ Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid)
+ <|> return vs
+ _ -> fail "outofvalid"
+ where
+ idChar = satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:^")
+ <|> digit <|> letter_ascii
+
+-- | Parses UTF8 strings from ByteString streams.
+utf8String :: String -> Parser String
+utf8String x = decodeUtf8 <$> string (encodeUtf8 x)
+
+-- | Parses identifiers with help of 'parseValid'. Asks GHC for 'String' from the
+-- string it deems valid.
+identifier :: Parser (DocH mod Identifier)
+identifier = do
+ o <- idDelim
+ vid <- parseValid
+ e <- idDelim
+ return $ DocIdentifier (o, vid, e)
+ where
+ idDelim = char '\'' <|> char '`'
diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs
new file mode 100644
index 00000000..25dba2d5
--- /dev/null
+++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs
@@ -0,0 +1,61 @@
+-- |
+-- Module : Documentation.Haddock.Parser.Util
+-- Copyright : (c) Mateusz Kowalczyk 2013-2014,
+-- Simon Hengel 2013
+-- License : BSD-like
+--
+-- Maintainer : haddock@projects.haskell.org
+-- Stability : experimental
+-- Portability : portable
+--
+-- Various utility functions used by the parser.
+module Documentation.Haddock.Parser.Util where
+
+import Control.Applicative
+import Control.Monad (mfilter)
+import Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine)
+import Data.ByteString.Char8 (ByteString)
+import qualified Data.ByteString.Char8 as BS
+import Prelude hiding (takeWhile)
+
+-- | Remove all leading and trailing whitespace
+strip :: String -> String
+strip = (\f -> f . f) $ dropWhile isSpace . reverse
+
+skipHorizontalSpace :: Parser ()
+skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r")
+
+takeHorizontalSpace :: Parser BS.ByteString
+takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r")
+
+makeLabeled :: (String -> Maybe String -> a) -> String -> a
+makeLabeled f input = case break isSpace $ removeEscapes $ strip input of
+ (uri, "") -> f uri Nothing
+ (uri, label) -> f uri (Just $ dropWhile isSpace label)
+ where
+ -- As we don't parse these any further, we don't do any processing to the
+ -- string so we at least remove escape character here. Perhaps we should
+ -- actually be parsing the label at the very least?
+ removeEscapes "" = ""
+ removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs
+ removeEscapes ('\\':xs) = removeEscapes xs
+ removeEscapes (x:xs) = x : removeEscapes xs
+
+takeUntil :: ByteString -> Parser ByteString
+takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome
+ where
+ end = BS.unpack end_
+
+ p :: (Bool, String) -> Char -> Maybe (Bool, String)
+ p acc c = case acc of
+ (True, _) -> Just (False, end)
+ (_, []) -> Nothing
+ (_, x:xs) | x == c -> Just (False, xs)
+ _ -> Just (c == '\\', end)
+
+ dropEnd = BS.reverse . BS.drop (length end) . BS.reverse
+ requireEnd = mfilter (BS.isSuffixOf end_)
+
+ gotSome xs
+ | BS.null xs = fail "didn't get any content"
+ | otherwise = return xs
diff --git a/haddock-library/src/Documentation/Haddock/Types.hs b/haddock-library/src/Documentation/Haddock/Types.hs
new file mode 100644
index 00000000..3e25d230
--- /dev/null
+++ b/haddock-library/src/Documentation/Haddock/Types.hs
@@ -0,0 +1,70 @@
+{-# LANGUAGE DeriveDataTypeable, DeriveFunctor, DeriveFoldable #-}
+{-# LANGUAGE DeriveTraversable, StandaloneDeriving #-}
+
+-- |
+-- Module : Documentation.Haddock.Types
+-- Copyright : (c) Simon Marlow 2003-2006,
+-- David Waern 2006-2009,
+-- Mateusz Kowalczyk 2013
+-- License : BSD-like
+--
+-- Maintainer : haddock@projects.haskellorg
+-- Stability : experimental
+-- Portability : portable
+--
+-- Exposes documentation data types used for (some) of Haddock.
+module Documentation.Haddock.Types where
+
+import Data.Foldable
+import Data.Traversable
+
+instance Foldable Header where
+ foldMap f (Header _ a) = f a
+
+instance Traversable Header where
+ traverse f (Header l a) = Header l `fmap` f a
+
+data Hyperlink = Hyperlink
+ { hyperlinkUrl :: String
+ , hyperlinkLabel :: Maybe String
+ } deriving (Eq, Show)
+
+
+data Picture = Picture
+ { pictureUri :: String
+ , pictureTitle :: Maybe String
+ } deriving (Eq, Show)
+
+data Header id = Header
+ { headerLevel :: Int
+ , headerTitle :: id
+ } deriving Functor
+
+data Example = Example
+ { exampleExpression :: String
+ , exampleResult :: [String]
+ } deriving (Eq, Show)
+
+data DocH mod id
+ = DocEmpty
+ | DocAppend (DocH mod id) (DocH mod id)
+ | DocString String
+ | DocParagraph (DocH mod id)
+ | DocIdentifier id
+ | DocIdentifierUnchecked mod
+ | DocModule String
+ | DocWarning (DocH mod id)
+ | DocEmphasis (DocH mod id)
+ | DocMonospaced (DocH mod id)
+ | DocBold (DocH mod id)
+ | DocUnorderedList [DocH mod id]
+ | DocOrderedList [DocH mod id]
+ | DocDefList [(DocH mod id, DocH mod id)]
+ | DocCodeBlock (DocH mod id)
+ | DocHyperlink Hyperlink
+ | DocPic Picture
+ | DocAName String
+ | DocProperty String
+ | DocExamples [Example]
+ | DocHeader (Header (DocH mod id))
+ deriving (Functor, Foldable, Traversable)
diff --git a/src/Haddock/Utf8.hs b/haddock-library/src/Documentation/Haddock/Utf8.hs
index 1fb0e818..3f75e53b 100644
--- a/src/Haddock/Utf8.hs
+++ b/haddock-library/src/Documentation/Haddock/Utf8.hs
@@ -1,4 +1,4 @@
-module Haddock.Utf8 (encodeUtf8, decodeUtf8) where
+module Documentation.Haddock.Utf8 (encodeUtf8, decodeUtf8) where
import Data.Bits ((.|.), (.&.), shiftL, shiftR)
import qualified Data.ByteString as BS
import Data.Char (chr, ord)
diff --git a/test/Haddock/Parser/UtilSpec.hs b/haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs
index 9e1e8de1..265a4d49 100644
--- a/test/Haddock/Parser/UtilSpec.hs
+++ b/haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs
@@ -1,11 +1,10 @@
{-# LANGUAGE OverloadedStrings #-}
-module Haddock.Parser.UtilSpec (main, spec) where
+module Documentation.Haddock.Parser.UtilSpec (main, spec) where
-import Test.Hspec
-import Data.Either
-
-import Data.Attoparsec.ByteString.Char8
-import Haddock.Parser
+import Data.Attoparsec.ByteString.Char8
+import Data.Either
+import Documentation.Haddock.Parser.Util
+import Test.Hspec
main :: IO ()
main = hspec spec
diff --git a/test/Haddock/ParserSpec.hs b/haddock-library/test/Documentation/Haddock/ParserSpec.hs
index 53fef943..3889d555 100644
--- a/test/Haddock/ParserSpec.hs
+++ b/haddock-library/test/Documentation/Haddock/ParserSpec.hs
@@ -3,23 +3,16 @@
, IncoherentInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
-module Haddock.ParserSpec (main, spec) where
+module Documentation.Haddock.ParserSpec (main, spec) where
import Data.Monoid
import Data.String
-import qualified Haddock.Parser as Parse
-import Haddock.Types
-import Outputable (Outputable, showSDoc, ppr)
-import RdrName (RdrName, mkVarUnqual)
-import FastString (fsLit)
-import StaticFlags (initStaticOpts)
+import qualified Documentation.Haddock.Parser as Parse
+import Documentation.Haddock.Types
import Test.Hspec
import Test.QuickCheck
-import Helper
-
-instance Outputable a => Show a where
- show = showSDoc dynFlags . ppr
+type Doc id = DocH () id
deriving instance Show a => Show (Header a)
deriving instance Show a => Show (Doc a)
@@ -38,13 +31,11 @@ parseParas = Parse.toRegular . Parse.parseParas
parseString :: String -> Doc String
parseString = Parse.toRegular . Parse.parseString
-
-
main :: IO ()
main = hspec spec
spec :: Spec
-spec = before initStaticOpts $ do
+spec = do
describe "parseString" $ do
let infix 1 `shouldParseTo`
shouldParseTo :: String -> Doc String -> Expectation
diff --git a/test/Haddock/Utf8Spec.hs b/haddock-library/test/Documentation/Haddock/Utf8Spec.hs
index a352bf61..47e12704 100644
--- a/test/Haddock/Utf8Spec.hs
+++ b/haddock-library/test/Documentation/Haddock/Utf8Spec.hs
@@ -1,9 +1,8 @@
-module Haddock.Utf8Spec (main, spec) where
+module Documentation.Haddock.Utf8Spec (main, spec) where
-import Test.Hspec
-import Test.QuickCheck
-
-import Haddock.Utf8
+import Test.Hspec
+import Test.QuickCheck
+import Documentation.Haddock.Utf8
main :: IO ()
main = hspec spec
diff --git a/test/Spec.hs b/haddock-library/test/Spec.hs
index a824f8c3..a824f8c3 100644
--- a/test/Spec.hs
+++ b/haddock-library/test/Spec.hs
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs
index 41b4ed30..41b4ed30 100644
--- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs
+++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec.hs
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs
index d2f3761c..d2f3761c 100644
--- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs
+++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString.hs
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs
index 3bbe51f0..3bbe51f0 100644
--- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs
+++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Char8.hs
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs
index 73d02056..73d02056 100644
--- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs
+++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/FastSet.hs
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs
index b3699728..b3699728 100644
--- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs
+++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/ByteString/Internal.hs
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs
index cb9cee83..cb9cee83 100644
--- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs
+++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Combinator.hs
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal.hs
index 0572d682..0572d682 100644
--- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal.hs
+++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal.hs
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal/Types.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal/Types.hs
index e47e5c9e..e47e5c9e 100644
--- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal/Types.hs
+++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Internal/Types.hs
diff --git a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Number.hs b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Number.hs
index bf175f4b..bf175f4b 100644
--- a/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Number.hs
+++ b/haddock-library/vendor/attoparsec-0.10.4.0/Data/Attoparsec/Number.hs
diff --git a/haddock.cabal b/haddock.cabal
index 0e26949a..b308a022 100644
--- a/haddock.cabal
+++ b/haddock.cabal
@@ -84,19 +84,11 @@ executable haddock
xhtml >= 3000.2 && < 3000.3,
Cabal >= 1.10,
ghc == 7.9.*,
- bytestring
+ bytestring,
+ haddock-library
other-modules:
Documentation.Haddock
- Data.Attoparsec
- Data.Attoparsec.ByteString
- Data.Attoparsec.ByteString.Char8
- Data.Attoparsec.Combinator
- Data.Attoparsec.Number
- Data.Attoparsec.ByteString.FastSet
- Data.Attoparsec.ByteString.Internal
- Data.Attoparsec.Internal
- Data.Attoparsec.Internal.Types
Haddock
Haddock.Interface
Haddock.Interface.Rename
@@ -105,7 +97,6 @@ executable haddock
Haddock.Interface.LexParseRn
Haddock.Interface.ParseModuleHeader
Haddock.Parser
- Haddock.Parser.Util
Haddock.Utf8
Haddock.Utils
Haddock.Backends.Xhtml
@@ -128,7 +119,7 @@ executable haddock
Haddock.GhcUtils
Haddock.Convert
else
- build-depends: haddock
+ build-depends: haddock, haddock-library
library
default-language: Haskell2010
@@ -143,14 +134,15 @@ library
array,
xhtml >= 3000.2 && < 3000.3,
Cabal >= 1.10,
- ghc == 7.9.*
+ ghc == 7.9.*,
+ haddock-library
if flag(in-ghc-tree)
cpp-options: -DIN_GHC_TREE
else
build-depends: ghc-paths
- hs-source-dirs: src, vendor/attoparsec-0.10.4.0
+ hs-source-dirs: src
if flag(dev)
ghc-options: -funbox-strict-fields -Wall -fwarn-tabs
else
@@ -160,15 +152,6 @@ library
Documentation.Haddock
other-modules:
- Data.Attoparsec
- Data.Attoparsec.ByteString
- Data.Attoparsec.ByteString.Char8
- Data.Attoparsec.Combinator
- Data.Attoparsec.Number
- Data.Attoparsec.ByteString.FastSet
- Data.Attoparsec.ByteString.Internal
- Data.Attoparsec.Internal
- Data.Attoparsec.Internal.Types
Haddock
Haddock.Interface
Haddock.Interface.Rename
@@ -177,8 +160,6 @@ library
Haddock.Interface.LexParseRn
Haddock.Interface.ParseModuleHeader
Haddock.Parser
- Haddock.Parser.Util
- Haddock.Utf8
Haddock.Utils
Haddock.Backends.Xhtml
Haddock.Backends.Xhtml.Decl
@@ -218,34 +199,6 @@ test-suite latex-test
hs-source-dirs: latex-test
build-depends: base, directory, process, filepath, Cabal
-test-suite spec
- type: exitcode-stdio-1.0
- default-language: Haskell2010
- main-is: Spec.hs
- hs-source-dirs:
- test
- , src
- , vendor/attoparsec-0.10.4.0
-
- other-modules:
- Helper
- Haddock.ParserSpec
- Haddock.Utf8Spec
- Haddock.Parser.UtilSpec
-
- build-depends:
- base
- , bytestring
- , ghc
- , containers
- , deepseq
- , array
- , hspec
- , QuickCheck == 2.*
-
- build-depends:
- haddock
-
source-repository head
type: git
location: http://git.haskell.org/haddock.git
diff --git a/src/Documentation/Haddock.hs b/src/Documentation/Haddock.hs
index 36115a2a..655a9723 100644
--- a/src/Documentation/Haddock.hs
+++ b/src/Documentation/Haddock.hs
@@ -33,7 +33,8 @@ module Documentation.Haddock (
InstHead,
-- * Documentation comments
- Doc(..),
+ Doc,
+ DocH(..),
Example(..),
Hyperlink(..),
DocMarkup(..),
diff --git a/src/Haddock.hs b/src/Haddock.hs
index 66dfb168..78844c96 100644
--- a/src/Haddock.hs
+++ b/src/Haddock.hs
@@ -25,7 +25,7 @@ import Haddock.Backends.Xhtml.Themes (getThemes)
import Haddock.Backends.LaTeX
import Haddock.Backends.Hoogle
import Haddock.Interface
-import Haddock.Parser.Util
+import Haddock.Parser
import Haddock.Types
import Haddock.Version
import Haddock.InterfaceFile
diff --git a/src/Haddock/Doc.hs b/src/Haddock/Doc.hs
index d812aee2..79a59ac2 100644
--- a/src/Haddock/Doc.hs
+++ b/src/Haddock/Doc.hs
@@ -1,55 +1,18 @@
{-# OPTIONS_GHC -fno-warn-orphans #-}
-module Haddock.Doc (
- docAppend
-, docParagraph
-, combineDocumentation
-) where
+module Haddock.Doc ( module Documentation.Haddock.Doc
+ , docCodeBlock
+ , combineDocumentation
+ ) where
import Data.Maybe
import Data.Monoid
+import Documentation.Haddock.Doc
import Haddock.Types
-import Data.Char (isSpace)
-
--- We put it here so that we can avoid a circular import
--- anything relevant imports this module anyway
-instance Monoid (DocH mod id) where
- mempty = DocEmpty
- mappend = docAppend
combineDocumentation :: Documentation name -> Maybe (Doc name)
combineDocumentation (Documentation Nothing Nothing) = Nothing
-combineDocumentation (Documentation mDoc mWarning) = Just (fromMaybe mempty mWarning `mappend` fromMaybe mempty mDoc)
-
-docAppend :: DocH mod id -> DocH mod id -> DocH mod id
-docAppend (DocDefList ds1) (DocDefList ds2) = DocDefList (ds1++ds2)
-docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d) = DocAppend (DocDefList (ds1++ds2)) d
-docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 ++ ds2)
-docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 ++ ds2)
-docAppend DocEmpty d = d
-docAppend d DocEmpty = d
-docAppend (DocString s1) (DocString s2) = DocString (s1 ++ s2)
-docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 ++ s2))
-docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 ++ s2)) d
-docAppend d1 d2 = DocAppend d1 d2
-
--- again to make parsing easier - we spot a paragraph whose only item
--- is a DocMonospaced and make it into a DocCodeBlock
-docParagraph :: DocH mod id -> DocH mod id
-docParagraph (DocMonospaced p)
- = DocCodeBlock (docCodeBlock p)
-docParagraph (DocAppend (DocString s1) (DocMonospaced p))
- | all isSpace s1
- = DocCodeBlock (docCodeBlock p)
-docParagraph (DocAppend (DocString s1)
- (DocAppend (DocMonospaced p) (DocString s2)))
- | all isSpace s1 && all isSpace s2
- = DocCodeBlock (docCodeBlock p)
-docParagraph (DocAppend (DocMonospaced p) (DocString s2))
- | all isSpace s2
- = DocCodeBlock (docCodeBlock p)
-docParagraph p
- = DocParagraph p
-
+combineDocumentation (Documentation mDoc mWarning) =
+ Just (fromMaybe mempty mWarning <> fromMaybe mempty mDoc)
-- Drop trailing whitespace from @..@ code blocks. Otherwise this:
--
diff --git a/src/Haddock/Interface/LexParseRn.hs b/src/Haddock/Interface/LexParseRn.hs
index 01276310..d9d4ae58 100644
--- a/src/Haddock/Interface/LexParseRn.hs
+++ b/src/Haddock/Interface/LexParseRn.hs
@@ -18,31 +18,29 @@ module Haddock.Interface.LexParseRn
, processModuleHeader
) where
-import qualified Data.IntSet as IS
-import Haddock.Types
-import Haddock.Parser.Util
-import Haddock.Interface.ParseModuleHeader
-import Haddock.Doc
-
import Control.Applicative
+import Data.IntSet (toList)
import Data.List
import Data.Maybe
+import Data.Monoid ((<>))
+import DynFlags (ExtensionFlag(..), languageExtensions)
import FastString
import GHC
-import DynFlags (ExtensionFlag(..), languageExtensions)
+import Haddock.Interface.ParseModuleHeader
+import Haddock.Parser
+import Haddock.Types
import Name
-import Outputable
+import Outputable (showPpr)
import RdrName
processDocStrings :: DynFlags -> GlobalRdrEnv -> [HsDocString] -> ErrMsgM (Maybe (Doc Name))
processDocStrings dflags gre strs = do
docs <- catMaybes <$> mapM (processDocStringParas dflags gre) strs
- let doc = foldl' docAppend DocEmpty docs
+ let doc = foldl' (<>) DocEmpty docs
case doc of
DocEmpty -> return Nothing
_ -> return (Just doc)
-
processDocStringParas :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name))
processDocStringParas = process parseParasMaybe
@@ -86,7 +84,7 @@ processModuleHeader dflags gre safety mayStr = do
let flags :: [ExtensionFlag]
-- We remove the flags implied by the language setting and we display the language instead
- flags = map toEnum (IS.toList $ extensionFlags dflags) \\ languageExtensions (language dflags)
+ flags = map toEnum (toList $ extensionFlags dflags) \\ languageExtensions (language dflags)
return (hmi { hmi_safety = Just $ showPpr dflags safety
, hmi_language = language dflags
, hmi_extensions = flags
diff --git a/src/Haddock/Interface/ParseModuleHeader.hs b/src/Haddock/Interface/ParseModuleHeader.hs
index c155a83b..0be2511f 100644
--- a/src/Haddock/Interface/ParseModuleHeader.hs
+++ b/src/Haddock/Interface/ParseModuleHeader.hs
@@ -12,7 +12,7 @@
module Haddock.Interface.ParseModuleHeader (parseModuleHeader) where
import Haddock.Types
-import Haddock.Parser.Util
+import Haddock.Parser
import RdrName
import DynFlags
diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs
index 1489ae84..720f442b 100644
--- a/src/Haddock/Parser.hs
+++ b/src/Haddock/Parser.hs
@@ -13,504 +13,35 @@
-- Stability : experimental
-- Portability : portable
-module Haddock.Parser ( parseString, parseParas
- , overIdentifier, toRegular
- , takeUntil
+module Haddock.Parser ( module Documentation.Haddock.Parser
+ , parseParasMaybe
+ , parseStringMaybe
+ , parseIdent
) where
-import Prelude hiding (takeWhile)
-import Control.Arrow (first)
-import Control.Monad (void, mfilter)
-import Control.Applicative
-import Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine)
-import qualified Data.ByteString.Char8 as BS
-import Data.Char (chr, isAsciiUpper)
-import Data.List (stripPrefix, intercalate, unfoldr)
-import Data.Maybe (fromMaybe)
-import Data.Monoid
-import Haddock.Doc
-import Haddock.Types hiding (Doc)
-import Haddock.Utf8
-import Data.ByteString.Char8 (ByteString)
-
--- | Identifier string surrounded with opening and closing quotes/backticks.
-type IString = (Char, String, Char)
-
--- | Drops the quotes/backticks around all identifiers, as if they
--- were valid but still 'String's.
-toRegular :: DocH mod IString -> DocH mod String
-toRegular = fmap (\(_, x, _) -> x)
-
--- | Maps over 'DocIdentifier's over 'String' with potentially failing
--- conversion using user-supplied function. If the conversion fails,
--- the identifier is deemed to not be valid and is treated as a
--- regular string.
-overIdentifier :: (String -> Maybe a)
- -> DocH mod IString
- -> DocH mod a
-overIdentifier f d = g d
- where
- g (DocIdentifier (o, x, e)) = case f x of
- Nothing -> DocString $ o : x ++ [e]
- Just x' -> DocIdentifier x'
- g DocEmpty = DocEmpty
- g (DocAppend x x') = DocAppend (g x) (g x')
- g (DocString x) = DocString x
- g (DocParagraph x) = DocParagraph $ g x
- g (DocIdentifierUnchecked x) = DocIdentifierUnchecked x
- g (DocModule x) = DocModule x
- g (DocWarning x) = DocWarning $ g x
- g (DocEmphasis x) = DocEmphasis $ g x
- g (DocMonospaced x) = DocMonospaced $ g x
- g (DocBold x) = DocBold $ g x
- g (DocUnorderedList x) = DocUnorderedList $ fmap g x
- g (DocOrderedList x) = DocOrderedList $ fmap g x
- g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x
- g (DocCodeBlock x) = DocCodeBlock $ g x
- g (DocHyperlink x) = DocHyperlink x
- g (DocPic x) = DocPic x
- g (DocAName x) = DocAName x
- g (DocProperty x) = DocProperty x
- g (DocExamples x) = DocExamples x
- g (DocHeader (Header l x)) = DocHeader . Header l $ g x
-
-parse :: Parser a -> BS.ByteString -> a
-parse p = either err id . parseOnly (p <* endOfInput)
- where
- err = error . ("Haddock.Parser.parse: " ++)
-
--- | Main entry point to the parser. Appends the newline character
--- to the input string.
-parseParas :: String -- ^ String to parse
- -> DocH mod IString
-parseParas = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
- where
- p :: Parser (DocH mod IString)
- p = mconcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n")
-
--- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which
--- drops leading whitespace and encodes the string to UTF8 first.
-parseString :: String -> DocH mod IString
-parseString = parseStringBS . encodeUtf8 . dropWhile isSpace
-
-parseStringBS :: BS.ByteString -> DocH mod IString
-parseStringBS = parse p
- where
- p :: Parser (DocH mod IString)
- p = mconcat <$> many (monospace <|> anchor <|> identifier
- <|> moduleName <|> picture <|> hyperlink <|> autoUrl <|> bold
- <|> emphasis <|> encodedChar <|> string' <|> skipSpecialChar)
-
--- | Parses and processes
--- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references>
---
--- >>> parseOnly encodedChar "&#65;&#66;&#67;"
--- Right (DocString "ABC")
-encodedChar :: Parser (DocH mod a)
-encodedChar = "&#" *> c <* ";"
- where
- c = DocString . return . chr <$> num
- num = hex <|> decimal
- hex = ("x" <|> "X") *> hexadecimal
-
-specialChar :: [Char]
-specialChar = "_/<@\"&'`"
-
--- | Plain, regular parser for text. Called as one of the last parsers
--- to ensure that we have already given a chance to more meaningful parsers
--- before capturing their characers.
-string' :: Parser (DocH mod a)
-string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar)
- where
- unescape "" = ""
- unescape ('\\':x:xs) = x : unescape xs
- unescape (x:xs) = x : unescape xs
-
--- | Skips a single special character and treats it as a plain string.
--- This is done to skip over any special characters belonging to other
--- elements but which were not deemed meaningful at their positions.
-skipSpecialChar :: Parser (DocH mod a)
-skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar)
-
--- | Emphasis parser.
---
--- >>> parseOnly emphasis "/Hello world/"
--- Right (DocEmphasis (DocString "Hello world"))
-emphasis :: Parser (DocH mod IString)
-emphasis = DocEmphasis . parseStringBS <$>
- mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/")
-
--- | Bold parser.
---
--- >>> parseOnly bold "__Hello world__"
--- Right (DocBold (DocString "Hello world"))
-bold :: Parser (DocH mod IString)
-bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__")
-
-disallowNewline :: Parser BS.ByteString -> Parser BS.ByteString
-disallowNewline = mfilter ('\n' `BS.notElem`)
-
--- | Like `takeWhile`, but unconditionally take escaped characters.
-takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString
-takeWhile_ p = scan False p_
- where
- p_ escaped c
- | escaped = Just False
- | not $ p c = Nothing
- | otherwise = Just (c == '\\')
-
--- | Like `takeWhile1`, but unconditionally take escaped characters.
-takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString
-takeWhile1_ = mfilter (not . BS.null) . takeWhile_
-
--- | Text anchors to allow for jumping around the generated documentation.
---
--- >>> parseOnly anchor "#Hello world#"
--- Right (DocAName "Hello world")
-anchor :: Parser (DocH mod a)
-anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#")
-
--- | Monospaced strings.
---
--- >>> parseOnly monospace "@cruel@"
--- Right (DocMonospaced (DocString "cruel"))
-monospace :: Parser (DocH mod IString)
-monospace = DocMonospaced . parseStringBS <$> ("@" *> takeWhile1_ (/= '@') <* "@")
-
-moduleName :: Parser (DocH mod a)
-moduleName = DocModule <$> (char '"' *> modid <* char '"')
- where
- modid = intercalate "." <$> conid `sepBy1` "."
- conid = (:)
- <$> satisfy isAsciiUpper
- -- NOTE: According to Haskell 2010 we shouldd actually only
- -- accept {small | large | digit | ' } here. But as we can't
- -- match on unicode characters, this is currently not possible.
- <*> (decodeUtf8 <$> takeWhile (`notElem` " .&[{}(=*)+]!#|@/;,^?\"\n"))
-
--- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify
--- a title for the picture.
---
--- >>> parseOnly picture "<<hello.png>>"
--- Right (DocPic (Picture "hello.png" Nothing))
--- >>> parseOnly picture "<<hello.png world>>"
--- Right (DocPic (Picture "hello.png" (Just "world")))
-picture :: Parser (DocH mod a)
-picture = DocPic . makeLabeled Picture . decodeUtf8
- <$> disallowNewline ("<<" *> takeUntil ">>")
-
--- | Paragraph parser, called by 'parseParas'.
-paragraph :: Parser (DocH mod IString)
-paragraph = examples <|> skipSpace *> (list <|> birdtracks <|> codeblock
- <|> property <|> header
- <|> textParagraph)
-
-header :: Parser (DocH mod IString)
-header = do
- let psers = map (string . encodeUtf8 . concat . flip replicate "=") [6, 5 .. 1]
- pser = foldl1 (<|>) psers
- delim <- decodeUtf8 <$> pser
- line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString
- rest <- paragraph <|> return mempty
- return $ docAppend (DocParagraph (DocHeader (Header (length delim) line))) rest
-
-textParagraph :: Parser (DocH mod IString)
-textParagraph = docParagraph . parseString . intercalate "\n" <$> many1 nonEmptyLine
-
--- | List parser, called by 'paragraph'.
-list :: Parser (DocH mod IString)
-list = DocUnorderedList <$> unorderedList
- <|> DocOrderedList <$> orderedList
- <|> DocDefList <$> definitionList
-
--- | Parses unordered (bullet) lists.
-unorderedList :: Parser [DocH mod IString]
-unorderedList = ("*" <|> "-") *> innerList unorderedList
-
--- | Parses ordered lists (numbered or dashed).
-orderedList :: Parser [DocH mod IString]
-orderedList = (paren <|> dot) *> innerList orderedList
- where
- dot = (decimal :: Parser Int) <* "."
- paren = "(" *> decimal <* ")"
-
--- | Generic function collecting any further lines belonging to the
--- list entry and recursively collecting any further lists in the
--- same paragraph. Usually used as
---
--- > someListFunction = listBeginning *> innerList someListFunction
-innerList :: Parser [DocH mod IString] -> Parser [DocH mod IString]
-innerList item = do
- c <- takeLine
- (cs, items) <- more item
- let contents = docParagraph . parseString . dropNLs . unlines $ c : cs
- return $ case items of
- Left p -> [contents `joinPara` p]
- Right i -> contents : i
-
--- | Parses definition lists.
-definitionList :: Parser [(DocH mod IString, DocH mod IString)]
-definitionList = do
- label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` "]\n")) <* "]"
- c <- takeLine
- (cs, items) <- more definitionList
- let contents = parseString . dropNLs . unlines $ c : cs
- return $ case items of
- Left p -> [(label, contents `joinPara` p)]
- Right i -> (label, contents) : i
-
--- | If possible, appends two 'Doc's under a 'DocParagraph' rather than
--- outside of it. This allows to get structures like
---
--- @DocParagraph (DocAppend … …)@
---
--- rather than
---
--- @DocAppend (DocParagraph …) …@
-joinPara :: DocH mod id -> DocH mod id -> DocH mod id
-joinPara (DocParagraph p) c = docParagraph $ docAppend p c
-joinPara d p = docAppend d p
-
--- | Drops all trailing newlines.
-dropNLs :: String -> String
-dropNLs = reverse . dropWhile (== '\n') . reverse
-
--- | Main worker for 'innerList' and 'definitionList'.
--- We need the 'Either' here to be able to tell in the respective functions
--- whether we're dealing with the next list or a nested paragraph.
-more :: Monoid a => Parser a
- -> Parser ([String], Either (DocH mod IString) a)
-more item = innerParagraphs <|> moreListItems item
- <|> moreContent item <|> pure ([], Right mempty)
-
--- | Use by 'innerList' and 'definitionList' to parse any nested paragraphs.
-innerParagraphs :: Parser ([String], Either (DocH mod IString) a)
-innerParagraphs = (,) [] . Left <$> ("\n" *> indentedParagraphs)
-
--- | Attemps to fetch the next list if possibly. Used by 'innerList' and
--- 'definitionList' to recursivly grab lists that aren't separated by a whole
--- paragraph.
-moreListItems :: Parser a
- -> Parser ([String], Either (DocH mod IString) a)
-moreListItems item = (,) [] . Right <$> (skipSpace *> item)
-
--- | Helper for 'innerList' and 'definitionList' which simply takes
--- a line of text and attempts to parse more list content with 'more'.
-moreContent :: Monoid a => Parser a
- -> Parser ([String], Either (DocH mod IString) a)
-moreContent item = first . (:) <$> nonEmptyLine <*> more item
-
--- | Runs the 'parseParas' parser on an indented paragraph.
--- The indentation is 4 spaces.
-indentedParagraphs :: Parser (DocH mod IString)
-indentedParagraphs = parseParas . concat <$> dropFrontOfPara " "
-
--- | Grab as many fully indented paragraphs as we can.
-dropFrontOfPara :: Parser BS.ByteString -> Parser [String]
-dropFrontOfPara sp = do
- currentParagraph <- some (sp *> takeNonEmptyLine)
- followingParagraphs <-
- skipHorizontalSpace *> nextPar -- we have more paragraphs to take
- <|> skipHorizontalSpace *> nlList -- end of the ride, remember the newline
- <|> endOfInput *> return [] -- nothing more to take at all
- return (currentParagraph ++ followingParagraphs)
- where
- nextPar = (++) <$> nlList <*> dropFrontOfPara sp
- nlList = "\n" *> return ["\n"]
-
-nonSpace :: BS.ByteString -> Parser BS.ByteString
-nonSpace xs
- | not $ any (not . isSpace) $ decodeUtf8 xs = fail "empty line"
- | otherwise = return xs
-
--- | Takes a non-empty, not fully whitespace line.
---
--- Doesn't discard the trailing newline.
-takeNonEmptyLine :: Parser String
-takeNonEmptyLine = do
- (++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n"
-
-birdtracks :: Parser (DocH mod a)
-birdtracks = DocCodeBlock . DocString . intercalate "\n" . stripSpace <$> many1 line
- where
- line = skipHorizontalSpace *> ">" *> takeLine
-
-stripSpace :: [String] -> [String]
-stripSpace = fromMaybe <*> mapM strip'
- where
- strip' (' ':xs') = Just xs'
- strip' "" = Just ""
- strip' _ = Nothing
-
--- | Parses examples. Examples are a paragraph level entitity (separated by an empty line).
--- Consecutive examples are accepted.
-examples :: Parser (DocH mod a)
-examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go)
- where
- go :: Parser [Example]
- go = do
- prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>"
- expr <- takeLine
- (rs, es) <- resultAndMoreExamples
- return (makeExample prefix expr rs : es)
- where
- resultAndMoreExamples :: Parser ([String], [Example])
- resultAndMoreExamples = moreExamples <|> result <|> pure ([], [])
- where
- moreExamples :: Parser ([String], [Example])
- moreExamples = (,) [] <$> go
-
- result :: Parser ([String], [Example])
- result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples
-
- makeExample :: String -> String -> [String] -> Example
- makeExample prefix expression res =
- Example (strip expression) result
- where
- result = map (substituteBlankLine . tryStripPrefix) res
-
- tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs)
-
- substituteBlankLine "<BLANKLINE>" = ""
- substituteBlankLine xs = xs
-
-nonEmptyLine :: Parser String
-nonEmptyLine = mfilter (any (not . isSpace)) takeLine
-
-takeLine :: Parser String
-takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine
-
-endOfLine :: Parser ()
-endOfLine = void "\n" <|> endOfInput
-
--- | Property parser.
---
--- >>> parseOnly property "prop> hello world"
--- Right (DocProperty "hello world")
-property :: Parser (DocH mod a)
-property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n'))
-
--- |
--- Paragraph level codeblock. Anything between the two delimiting @ is parsed
--- for markup.
-codeblock :: Parser (DocH mod IString)
-codeblock =
- DocCodeBlock . parseStringBS . dropSpaces
- <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
- where
- dropSpaces xs =
- let rs = decodeUtf8 xs
- in case splitByNl rs of
- [] -> xs
- ys -> case last ys of
- ' ':_ -> case mapM dropSpace ys of
- Nothing -> xs
- Just zs -> encodeUtf8 $ intercalate "\n" zs
- _ -> xs
-
- -- This is necessary because ‘lines’ swallows up a trailing newline
- -- and we lose information about whether the last line belongs to @ or to
- -- text which we need to decide whether we actually want to be dropping
- -- anything at all.
- splitByNl = unfoldr (\case '\n':s -> Just (span (/= '\n') s)
- _ -> Nothing)
- . ('\n' :)
-
- dropSpace "" = Just ""
- dropSpace (' ':xs) = Just xs
- dropSpace _ = Nothing
-
- block' = scan False p
- where
- p isNewline c
- | isNewline && c == '@' = Nothing
- | isNewline && isSpace c = Just isNewline
- | otherwise = Just $ c == '\n'
-
-hyperlink :: Parser (DocH mod a)
-hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8
- <$> disallowNewline ("<" *> takeUntil ">")
- <|> autoUrl
-
-autoUrl :: Parser (DocH mod a)
-autoUrl = mkLink <$> url
- where
- url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
- mkLink :: BS.ByteString -> DocH mod a
- mkLink s = case BS.unsnoc s of
- Just (xs, x) | x `elem` ",.!?" -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x]
- _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
-
--- | Parses strings between identifier delimiters. Consumes all input that it
--- deems to be valid in an identifier. Note that it simply blindly consumes
--- characters and does no actual validation itself.
-parseValid :: Parser String
-parseValid = do
- vs' <- many' $ utf8String "⋆" <|> return <$> idChar
- let vs = concat vs'
- c <- peekChar
- case c of
- Just '`' -> return vs
- Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid)
- <|> return vs
- _ -> fail "outofvalid"
- where
- idChar = satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:^")
- <|> digit <|> letter_ascii
-
--- | Parses UTF8 strings from ByteString streams.
-utf8String :: String -> Parser String
-utf8String x = decodeUtf8 <$> string (encodeUtf8 x)
-
--- | Parses identifiers with help of 'parseValid'. Asks GHC for 'String' from the
--- string it deems valid.
-identifier :: Parser (DocH mod IString)
-identifier = do
- o <- idDelim
- vid <- parseValid
- e <- idDelim
- return $ DocIdentifier (o, vid, e)
- where
- idDelim = char '\'' <|> char '`'
-
--- | Remove all leading and trailing whitespace
-strip :: String -> String
-strip = (\f -> f . f) $ dropWhile isSpace . reverse
-
-skipHorizontalSpace :: Parser ()
-skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r")
-
-takeHorizontalSpace :: Parser BS.ByteString
-takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r")
-
-makeLabeled :: (String -> Maybe String -> a) -> String -> a
-makeLabeled f input = case break isSpace $ removeEscapes $ strip input of
- (uri, "") -> f uri Nothing
- (uri, label) -> f uri (Just $ dropWhile isSpace label)
- where
- -- As we don't parse these any further, we don't do any processing to the
- -- string so we at least remove escape character here. Perhaps we should
- -- actually be parsing the label at the very least?
- removeEscapes "" = ""
- removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs
- removeEscapes ('\\':xs) = removeEscapes xs
- removeEscapes (x:xs) = x : removeEscapes xs
-
-takeUntil :: ByteString -> Parser ByteString
-takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome
- where
- end = BS.unpack end_
-
- p :: (Bool, String) -> Char -> Maybe (Bool, String)
- p acc c = case acc of
- (True, _) -> Just (False, end)
- (_, []) -> Nothing
- (_, x:xs) | x == c -> Just (False, xs)
- _ -> Just (c == '\\', end)
-
- dropEnd = BS.reverse . BS.drop (length end) . BS.reverse
- requireEnd = mfilter (BS.isSuffixOf end_)
-
- gotSome xs
- | BS.null xs = fail "didn't get any content"
- | otherwise = return xs
+import Documentation.Haddock.Parser
+import DynFlags (DynFlags)
+import FastString (mkFastString)
+import Documentation.Haddock.Types
+import Lexer (mkPState, unP, ParseResult(POk))
+import Parser (parseIdentifier)
+import RdrName (RdrName)
+import SrcLoc (mkRealSrcLoc, unLoc)
+import StringBuffer (stringToStringBuffer)
+
+{-# DEPRECATED parseParasMaybe "use `parseParas` instead" #-}
+parseParasMaybe :: DynFlags -> String -> Maybe (DocH mod RdrName)
+parseParasMaybe d = Just . overIdentifier (parseIdent d) . parseParas
+
+{-# DEPRECATED parseStringMaybe "use `parseString` instead" #-}
+parseStringMaybe :: DynFlags -> String -> Maybe (DocH mod RdrName)
+parseStringMaybe d = Just . overIdentifier (parseIdent d) . parseString
+
+parseIdent :: DynFlags -> String -> Maybe RdrName
+parseIdent dflags str0 =
+ let buffer = stringToStringBuffer str0
+ realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0
+ pstate = mkPState dflags buffer realSrcLc
+ in case unP parseIdentifier pstate of
+ POk _ name -> Just (unLoc name)
+ _ -> Nothing
diff --git a/src/Haddock/Parser/Util.hs b/src/Haddock/Parser/Util.hs
deleted file mode 100644
index 29da91c0..00000000
--- a/src/Haddock/Parser/Util.hs
+++ /dev/null
@@ -1,28 +0,0 @@
-module Haddock.Parser.Util where
-
-import DynFlags (DynFlags)
-import FastString (mkFastString)
-import Haddock.Types
-import Haddock.Parser
-import Lexer (mkPState, unP, ParseResult(POk))
-import Parser (parseIdentifier)
-import RdrName (RdrName)
-import SrcLoc (mkRealSrcLoc, unLoc)
-import StringBuffer (stringToStringBuffer)
-
-{-# DEPRECATED parseParasMaybe "use `parseParas` instead" #-}
-parseParasMaybe :: DynFlags -> String -> Maybe (Doc RdrName)
-parseParasMaybe d = Just . overIdentifier (parseIdent d) . parseParas
-
-{-# DEPRECATED parseStringMaybe "use `parseString` instead" #-}
-parseStringMaybe :: DynFlags -> String -> Maybe (Doc RdrName)
-parseStringMaybe d = Just . overIdentifier (parseIdent d) . parseString
-
-parseIdent :: DynFlags -> String -> Maybe RdrName
-parseIdent dflags str0 =
- let buffer = stringToStringBuffer str0
- realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0
- pstate = mkPState dflags buffer realSrcLc
- in case unP parseIdentifier pstate of
- POk _ name -> Just (unLoc name)
- _ -> Nothing
diff --git a/src/Haddock/Types.hs b/src/Haddock/Types.hs
index cd615bf4..85b3a592 100644
--- a/src/Haddock/Types.hs
+++ b/src/Haddock/Types.hs
@@ -19,16 +19,16 @@ module Haddock.Types (
module Haddock.Types
, HsDocString, LHsDocString
, Fixity(..)
+ , module Documentation.Haddock.Types
) where
-import Data.Foldable
-import Data.Traversable
import Control.Exception
import Control.Arrow hiding ((<+>))
import Control.DeepSeq
import Data.Typeable
import Data.Map (Map)
import qualified Data.Map as Map
+import Documentation.Haddock.Types
import BasicTypes (Fixity(..))
import GHC hiding (NoLink)
import DynFlags (ExtensionFlag, Language)
@@ -316,36 +316,6 @@ type LDoc id = Located (Doc id)
type Doc id = DocH (ModuleName, OccName) id
-data DocH mod id
- = DocEmpty
- | DocAppend (DocH mod id) (DocH mod id)
- | DocString String
- | DocParagraph (DocH mod id)
- | DocIdentifier id
- | DocIdentifierUnchecked mod
- | DocModule String
- | DocWarning (DocH mod id)
- | DocEmphasis (DocH mod id)
- | DocMonospaced (DocH mod id)
- | DocBold (DocH mod id)
- | DocUnorderedList [DocH mod id]
- | DocOrderedList [DocH mod id]
- | DocDefList [(DocH mod id, DocH mod id)]
- | DocCodeBlock (DocH mod id)
- | DocHyperlink Hyperlink
- | DocPic Picture
- | DocAName String
- | DocProperty String
- | DocExamples [Example]
- | DocHeader (Header (DocH mod id))
- deriving (Functor, Foldable, Traversable)
-
-instance Foldable Header where
- foldMap f (Header _ a) = f a
-
-instance Traversable Header where
- traverse f (Header l a) = Header l `fmap` f a
-
instance (NFData a, NFData mod)
=> NFData (DocH mod a) where
rnf doc = case doc of
@@ -376,23 +346,6 @@ instance NFData Name
instance NFData OccName
instance NFData ModuleName
-
-data Hyperlink = Hyperlink
- { hyperlinkUrl :: String
- , hyperlinkLabel :: Maybe String
- } deriving (Eq, Show)
-
-
-data Picture = Picture
- { pictureUri :: String
- , pictureTitle :: Maybe String
- } deriving (Eq, Show)
-
-data Header id = Header
- { headerLevel :: Int
- , headerTitle :: id
- } deriving Functor
-
instance NFData id => NFData (Header id) where
rnf (Header a b) = a `deepseq` b `deepseq` ()
@@ -402,13 +355,6 @@ instance NFData Hyperlink where
instance NFData Picture where
rnf (Picture a b) = a `deepseq` b `deepseq` ()
-
-data Example = Example
- { exampleExpression :: String
- , exampleResult :: [String]
- } deriving (Eq, Show)
-
-
instance NFData Example where
rnf (Example a b) = a `deepseq` b `deepseq` ()
diff --git a/test/Helper.hs b/test/Helper.hs
deleted file mode 100644
index f0a3c05f..00000000
--- a/test/Helper.hs
+++ /dev/null
@@ -1,186 +0,0 @@
-module Helper where
-import DynFlags (Settings(..), DynFlags, defaultDynFlags)
-import Platform
-import PlatformConstants
-
-dynFlags :: DynFlags
-dynFlags = defaultDynFlags settings
- where
- settings = Settings {
- sTargetPlatform = platform
- , sGhcUsagePath = error "Haddock.ParserSpec.sGhcUsagePath"
- , sGhciUsagePath = error "Haddock.ParserSpec.sGhciUsagePath"
- , sTopDir = error "Haddock.ParserSpec.sTopDir"
- , sTmpDir = error "Haddock.ParserSpec.sTmpDir"
- , sRawSettings = []
- , sExtraGccViaCFlags = error "Haddock.ParserSpec.sExtraGccViaCFlags"
- , sSystemPackageConfig = error "Haddock.ParserSpec.sSystemPackageConfig"
- , sPgm_L = error "Haddock.ParserSpec.sPgm_L"
- , sPgm_P = error "Haddock.ParserSpec.sPgm_P"
- , sPgm_F = error "Haddock.ParserSpec.sPgm_F"
- , sPgm_c = error "Haddock.ParserSpec.sPgm_c"
- , sPgm_s = error "Haddock.ParserSpec.sPgm_s"
- , sPgm_a = error "Haddock.ParserSpec.sPgm_a"
- , sPgm_l = error "Haddock.ParserSpec.sPgm_l"
- , sPgm_dll = error "Haddock.ParserSpec.sPgm_dll"
- , sPgm_T = error "Haddock.ParserSpec.sPgm_T"
- , sPgm_sysman = error "Haddock.ParserSpec.sPgm_sysman"
- , sPgm_windres = error "Haddock.ParserSpec.sPgm_windres"
- , sPgm_libtool = error "Haddock.ParserSpec.sPgm_libtool"
- , sPgm_lo = error "Haddock.ParserSpec.sPgm_lo"
- , sPgm_lc = error "Haddock.ParserSpec.sPgm_lc"
- , sOpt_L = []
- , sOpt_P = []
- , sOpt_F = []
- , sOpt_c = []
- , sOpt_a = []
- , sOpt_l = []
- , sOpt_windres = []
- , sOpt_lo = []
- , sOpt_lc = []
- , sLdSupportsCompactUnwind = error "Haddock.ParserSpec.sLdSupportsCompactUnwind"
- , sLdSupportsBuildId = error "Haddock.ParserSpec.sLdSupportsBuildId "
- , sLdSupportsFilelist = error "Haddock.ParserSpec.sLdSupportsFilelist "
- , sLdIsGnuLd = error "Haddock.ParserSpec.sLdIsGnuLd"
- , sPlatformConstants = platformConstants
- }
- platform = Platform {
- platformArch = ArchUnknown
- , platformOS = OSUnknown
- , platformWordSize = 64
- , platformHasGnuNonexecStack = False
- , platformHasIdentDirective = False
- , platformHasSubsectionsViaSymbols = False
- , platformUnregisterised = error "Haddock.ParserSpec.platformUnregisterised"
- }
-
- platformConstants = PlatformConstants {
- pc_platformConstants = ()
- , pc_STD_HDR_SIZE = 0
- , pc_PROF_HDR_SIZE = 0
- , pc_BLOCK_SIZE = 0
- , pc_BLOCKS_PER_MBLOCK = 0
- , pc_OFFSET_StgRegTable_rR1 = 0
- , pc_OFFSET_StgRegTable_rR2 = 0
- , pc_OFFSET_StgRegTable_rR3 = 0
- , pc_OFFSET_StgRegTable_rR4 = 0
- , pc_OFFSET_StgRegTable_rR5 = 0
- , pc_OFFSET_StgRegTable_rR6 = 0
- , pc_OFFSET_StgRegTable_rR7 = 0
- , pc_OFFSET_StgRegTable_rR8 = 0
- , pc_OFFSET_StgRegTable_rR9 = 0
- , pc_OFFSET_StgRegTable_rR10 = 0
- , pc_OFFSET_StgRegTable_rF1 = 0
- , pc_OFFSET_StgRegTable_rF2 = 0
- , pc_OFFSET_StgRegTable_rF3 = 0
- , pc_OFFSET_StgRegTable_rF4 = 0
- , pc_OFFSET_StgRegTable_rF5 = 0
- , pc_OFFSET_StgRegTable_rF6 = 0
- , pc_OFFSET_StgRegTable_rD1 = 0
- , pc_OFFSET_StgRegTable_rD2 = 0
- , pc_OFFSET_StgRegTable_rD3 = 0
- , pc_OFFSET_StgRegTable_rD4 = 0
- , pc_OFFSET_StgRegTable_rD5 = 0
- , pc_OFFSET_StgRegTable_rD6 = 0
- , pc_OFFSET_StgRegTable_rXMM1 = 0
- , pc_OFFSET_StgRegTable_rXMM2 = 0
- , pc_OFFSET_StgRegTable_rXMM3 = 0
- , pc_OFFSET_StgRegTable_rXMM4 = 0
- , pc_OFFSET_StgRegTable_rXMM5 = 0
- , pc_OFFSET_StgRegTable_rXMM6 = 0
- , pc_OFFSET_StgRegTable_rL1 = 0
- , pc_OFFSET_StgRegTable_rSp = 0
- , pc_OFFSET_StgRegTable_rSpLim = 0
- , pc_OFFSET_StgRegTable_rHp = 0
- , pc_OFFSET_StgRegTable_rHpLim = 0
- , pc_OFFSET_StgRegTable_rCCCS = 0
- , pc_OFFSET_StgRegTable_rCurrentTSO = 0
- , pc_OFFSET_StgRegTable_rCurrentNursery = 0
- , pc_OFFSET_StgRegTable_rHpAlloc = 0
- , pc_OFFSET_stgEagerBlackholeInfo = 0
- , pc_OFFSET_stgGCEnter1 = 0
- , pc_OFFSET_stgGCFun = 0
- , pc_OFFSET_Capability_r = 0
- , pc_OFFSET_bdescr_start = 0
- , pc_OFFSET_bdescr_free = 0
- , pc_OFFSET_bdescr_blocks = 0
- , pc_SIZEOF_CostCentreStack = 0
- , pc_OFFSET_CostCentreStack_mem_alloc = 0
- , pc_REP_CostCentreStack_mem_alloc = 0
- , pc_OFFSET_CostCentreStack_scc_count = 0
- , pc_REP_CostCentreStack_scc_count = 0
- , pc_OFFSET_StgHeader_ccs = 0
- , pc_OFFSET_StgHeader_ldvw = 0
- , pc_SIZEOF_StgSMPThunkHeader = 0
- , pc_OFFSET_StgEntCounter_allocs = 0
- , pc_REP_StgEntCounter_allocs = 0
- , pc_OFFSET_StgEntCounter_allocd = 0
- , pc_REP_StgEntCounter_allocd = 0
- , pc_OFFSET_StgEntCounter_registeredp = 0
- , pc_OFFSET_StgEntCounter_link = 0
- , pc_OFFSET_StgEntCounter_entry_count = 0
- , pc_SIZEOF_StgUpdateFrame_NoHdr = 0
- , pc_SIZEOF_StgMutArrPtrs_NoHdr = 0
- , pc_OFFSET_StgMutArrPtrs_ptrs = 0
- , pc_OFFSET_StgMutArrPtrs_size = 0
- , pc_SIZEOF_StgArrWords_NoHdr = 0
- , pc_OFFSET_StgTSO_cccs = 0
- , pc_OFFSET_StgTSO_stackobj = 0
- , pc_OFFSET_StgStack_sp = 0
- , pc_OFFSET_StgStack_stack = 0
- , pc_OFFSET_StgUpdateFrame_updatee = 0
- , pc_SIZEOF_StgFunInfoExtraRev = 0
- , pc_MAX_SPEC_SELECTEE_SIZE = 0
- , pc_MAX_SPEC_AP_SIZE = 0
- , pc_MIN_PAYLOAD_SIZE = 0
- , pc_MIN_INTLIKE = 0
- , pc_MAX_INTLIKE = 0
- , pc_MIN_CHARLIKE = 0
- , pc_MAX_CHARLIKE = 0
- , pc_MUT_ARR_PTRS_CARD_BITS = 0
- , pc_MAX_Vanilla_REG = 0
- , pc_MAX_Float_REG = 0
- , pc_MAX_Double_REG = 0
- , pc_MAX_Long_REG = 0
- , pc_MAX_XMM_REG = 0
- , pc_MAX_Real_Vanilla_REG = 0
- , pc_MAX_Real_Float_REG = 0
- , pc_MAX_Real_Double_REG = 0
- , pc_MAX_Real_XMM_REG = 0
- , pc_MAX_Real_Long_REG = 0
- , pc_RESERVED_C_STACK_BYTES = 0
- , pc_RESERVED_STACK_WORDS = 0
- , pc_AP_STACK_SPLIM = 0
- , pc_WORD_SIZE = 0
- , pc_DOUBLE_SIZE = 0
- , pc_CINT_SIZE = 0
- , pc_CLONG_SIZE = 0
- , pc_CLONG_LONG_SIZE = 0
- , pc_BITMAP_BITS_SHIFT = 0
- , pc_TAG_BITS = 0
- , pc_WORDS_BIGENDIAN = False
- , pc_DYNAMIC_BY_DEFAULT = False
- , pc_LDV_SHIFT = 0
- , pc_ILDV_CREATE_MASK = 0
- , pc_ILDV_STATE_CREATE = 0
- , pc_ILDV_STATE_USE = 0
- , pc_OFFSET_StgRegTable_rYMM1 = 0
- , pc_OFFSET_StgRegTable_rYMM2 = 0
- , pc_OFFSET_StgRegTable_rYMM3 = 0
- , pc_OFFSET_StgRegTable_rYMM4 = 0
- , pc_OFFSET_StgRegTable_rYMM5 = 0
- , pc_OFFSET_StgRegTable_rYMM6 = 0
- , pc_OFFSET_StgRegTable_rZMM1 = 0
- , pc_OFFSET_StgRegTable_rZMM2 = 0
- , pc_OFFSET_StgRegTable_rZMM3 = 0
- , pc_OFFSET_StgRegTable_rZMM4 = 0
- , pc_OFFSET_StgRegTable_rZMM5 = 0
- , pc_OFFSET_StgRegTable_rZMM6 = 0
- , pc_OFFSET_StgFunInfoExtraFwd_arity = 0
- , pc_REP_StgFunInfoExtraFwd_arity = 0
- , pc_OFFSET_StgFunInfoExtraRev_arity = 0
- , pc_REP_StgFunInfoExtraRev_arity = 0
- , pc_OFFSET_StgArrWords_bytes = 0
- , pc_SIZEOF_StgSmallMutArrPtrs_NoHdr = 0
- , pc_OFFSET_StgSmallMutArrPtrs_ptrs = 0
- }