Move parser + parser tests out to own package.

We move some types out that are necessary as well and then re-export and specialise them in the core Haddock. Reason for moving out spec tests is that if we're working on the parser, we can simply work on that and we can ignore the rest of Haddock. The downside is that it's a little inconvenient if at the end of the day we want to see that everything passes.
author: Mateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk> 2014-05-05 09:01:03 +0200
committer: Mateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk> 2014-05-05 11:00:41 +0200
commit: cc269e6b0b615e9e237c35a985e4ace7b9ab0dd9 (patch)
tree: f0264138c81909151f9724c1f02f7bf8b30803cb /src
parent: 7ac2d0f2d31c2e1c7ede09828f3d5ba5626bd0d4 (diff)
9 files changed, 52 insertions, 715 deletions
diff --git a/src/Documentation/Haddock.hs b/src/Documentation/Haddock.hs
index 36115a2a..655a9723 100644
--- a/src/Documentation/Haddock.hs
+++ b/src/Documentation/Haddock.hs
@@ -33,7 +33,8 @@ module Documentation.Haddock (
   InstHead,
 
   -- * Documentation comments
-  Doc(..),
+  Doc,
+  DocH(..),
   Example(..),
   Hyperlink(..),
   DocMarkup(..),
diff --git a/src/Haddock.hs b/src/Haddock.hs
index 66dfb168..78844c96 100644
--- a/src/Haddock.hs
+++ b/src/Haddock.hs
@@ -25,7 +25,7 @@ import Haddock.Backends.Xhtml.Themes (getThemes)
 import Haddock.Backends.LaTeX
 import Haddock.Backends.Hoogle
 import Haddock.Interface
-import Haddock.Parser.Util
+import Haddock.Parser
 import Haddock.Types
 import Haddock.Version
 import Haddock.InterfaceFile
diff --git a/src/Haddock/Doc.hs b/src/Haddock/Doc.hs
index d812aee2..79a59ac2 100644
--- a/src/Haddock/Doc.hs
+++ b/src/Haddock/Doc.hs
@@ -1,55 +1,18 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
-module Haddock.Doc (
-  docAppend
-, docParagraph
-, combineDocumentation
-) where
+module Haddock.Doc ( module Documentation.Haddock.Doc
+                   , docCodeBlock
+                   , combineDocumentation
+                   ) where
 
 import Data.Maybe
 import Data.Monoid
+import Documentation.Haddock.Doc
 import Haddock.Types
-import Data.Char (isSpace)
-
--- We put it here so that we can avoid a circular import
--- anything relevant imports this module anyway
-instance Monoid (DocH mod id) where
-  mempty  = DocEmpty
-  mappend = docAppend
 
 combineDocumentation :: Documentation name -> Maybe (Doc name)
 combineDocumentation (Documentation Nothing Nothing) = Nothing
-combineDocumentation (Documentation mDoc mWarning)   = Just (fromMaybe mempty mWarning `mappend` fromMaybe mempty mDoc)
-
-docAppend :: DocH mod id -> DocH mod id -> DocH mod id
-docAppend (DocDefList ds1) (DocDefList ds2) = DocDefList (ds1++ds2)
-docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d) = DocAppend (DocDefList (ds1++ds2)) d
-docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 ++ ds2)
-docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 ++ ds2)
-docAppend DocEmpty d = d
-docAppend d DocEmpty = d
-docAppend (DocString s1) (DocString s2) = DocString (s1 ++ s2)
-docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 ++ s2))
-docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 ++ s2)) d
-docAppend d1 d2 = DocAppend d1 d2
-
--- again to make parsing easier - we spot a paragraph whose only item
--- is a DocMonospaced and make it into a DocCodeBlock
-docParagraph :: DocH mod id -> DocH mod id
-docParagraph (DocMonospaced p)
-  = DocCodeBlock (docCodeBlock p)
-docParagraph (DocAppend (DocString s1) (DocMonospaced p))
-  | all isSpace s1
-  = DocCodeBlock (docCodeBlock p)
-docParagraph (DocAppend (DocString s1)
-    (DocAppend (DocMonospaced p) (DocString s2)))
-  | all isSpace s1 && all isSpace s2
-  = DocCodeBlock (docCodeBlock p)
-docParagraph (DocAppend (DocMonospaced p) (DocString s2))
-  | all isSpace s2
-  = DocCodeBlock (docCodeBlock p)
-docParagraph p
-  = DocParagraph p
-
+combineDocumentation (Documentation mDoc mWarning)   =
+  Just (fromMaybe mempty mWarning <> fromMaybe mempty mDoc)
 
 -- Drop trailing whitespace from @..@ code blocks.  Otherwise this:
 --
diff --git a/src/Haddock/Interface/LexParseRn.hs b/src/Haddock/Interface/LexParseRn.hs
index 01276310..d9d4ae58 100644
--- a/src/Haddock/Interface/LexParseRn.hs
+++ b/src/Haddock/Interface/LexParseRn.hs
@@ -18,31 +18,29 @@ module Haddock.Interface.LexParseRn
   , processModuleHeader
   ) where
 
-import qualified Data.IntSet as IS
-import Haddock.Types
-import Haddock.Parser.Util
-import Haddock.Interface.ParseModuleHeader
-import Haddock.Doc
-
 import Control.Applicative
+import Data.IntSet (toList)
 import Data.List
 import Data.Maybe
+import Data.Monoid ((<>))
+import DynFlags (ExtensionFlag(..), languageExtensions)
 import FastString
 import GHC
-import DynFlags (ExtensionFlag(..), languageExtensions)
+import Haddock.Interface.ParseModuleHeader
+import Haddock.Parser
+import Haddock.Types
 import Name
-import Outputable
+import Outputable (showPpr)
 import RdrName
 
 processDocStrings :: DynFlags -> GlobalRdrEnv -> [HsDocString] -> ErrMsgM (Maybe (Doc Name))
 processDocStrings dflags gre strs = do
   docs <- catMaybes <$> mapM (processDocStringParas dflags gre) strs
-  let doc = foldl' docAppend DocEmpty docs
+  let doc = foldl' (<>) DocEmpty docs
   case doc of
     DocEmpty -> return Nothing
     _ -> return (Just doc)
 
-
 processDocStringParas :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name))
 processDocStringParas = process parseParasMaybe
 
@@ -86,7 +84,7 @@ processModuleHeader dflags gre safety mayStr = do
 
   let flags :: [ExtensionFlag]
       -- We remove the flags implied by the language setting and we display the language instead
-      flags = map toEnum (IS.toList $ extensionFlags dflags) \\ languageExtensions (language dflags)
+      flags = map toEnum (toList $ extensionFlags dflags) \\ languageExtensions (language dflags)
   return (hmi { hmi_safety = Just $ showPpr dflags safety
               , hmi_language = language dflags
               , hmi_extensions = flags
diff --git a/src/Haddock/Interface/ParseModuleHeader.hs b/src/Haddock/Interface/ParseModuleHeader.hs
index c155a83b..0be2511f 100644
--- a/src/Haddock/Interface/ParseModuleHeader.hs
+++ b/src/Haddock/Interface/ParseModuleHeader.hs
@@ -12,7 +12,7 @@
 module Haddock.Interface.ParseModuleHeader (parseModuleHeader) where
 
 import Haddock.Types
-import Haddock.Parser.Util
+import Haddock.Parser
 
 import RdrName
 import DynFlags
diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs
index 1489ae84..720f442b 100644
--- a/src/Haddock/Parser.hs
+++ b/src/Haddock/Parser.hs
@@ -13,504 +13,35 @@
 -- Stability   :  experimental
 -- Portability :  portable
 
-module Haddock.Parser ( parseString, parseParas
-                      , overIdentifier, toRegular
-                      , takeUntil
+module Haddock.Parser ( module Documentation.Haddock.Parser
+                      , parseParasMaybe
+                      , parseStringMaybe
+                      , parseIdent
                       ) where
 
-import           Prelude hiding (takeWhile)
-import           Control.Arrow (first)
-import           Control.Monad (void, mfilter)
-import           Control.Applicative
-import           Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine)
-import qualified Data.ByteString.Char8 as BS
-import           Data.Char (chr, isAsciiUpper)
-import           Data.List (stripPrefix, intercalate, unfoldr)
-import           Data.Maybe (fromMaybe)
-import           Data.Monoid
-import           Haddock.Doc
-import           Haddock.Types hiding (Doc)
-import           Haddock.Utf8
-import           Data.ByteString.Char8 (ByteString)
-
--- | Identifier string surrounded with opening and closing quotes/backticks.
-type IString = (Char, String, Char)
-
--- | Drops the quotes/backticks around all identifiers, as if they
--- were valid but still 'String's.
-toRegular :: DocH mod IString -> DocH mod String
-toRegular = fmap (\(_, x, _) -> x)
-
--- | Maps over 'DocIdentifier's over 'String' with potentially failing
--- conversion using user-supplied function. If the conversion fails,
--- the identifier is deemed to not be valid and is treated as a
--- regular string.
-overIdentifier :: (String -> Maybe a)
-               -> DocH mod IString
-               -> DocH mod a
-overIdentifier f d = g d
-  where
-    g (DocIdentifier (o, x, e)) = case f x of
-      Nothing -> DocString $ o : x ++ [e]
-      Just x' -> DocIdentifier x'
-    g DocEmpty = DocEmpty
-    g (DocAppend x x') = DocAppend (g x) (g x')
-    g (DocString x) = DocString x
-    g (DocParagraph x) = DocParagraph $ g x
-    g (DocIdentifierUnchecked x) = DocIdentifierUnchecked x
-    g (DocModule x) = DocModule x
-    g (DocWarning x) = DocWarning $ g x
-    g (DocEmphasis x) = DocEmphasis $ g x
-    g (DocMonospaced x) = DocMonospaced $ g x
-    g (DocBold x) = DocBold $ g x
-    g (DocUnorderedList x) = DocUnorderedList $ fmap g x
-    g (DocOrderedList x) = DocOrderedList $ fmap g x
-    g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x
-    g (DocCodeBlock x) = DocCodeBlock $ g x
-    g (DocHyperlink x) = DocHyperlink x
-    g (DocPic x) = DocPic x
-    g (DocAName x) = DocAName x
-    g (DocProperty x) = DocProperty x
-    g (DocExamples x) = DocExamples x
-    g (DocHeader (Header l x)) = DocHeader . Header l $ g x
-
-parse :: Parser a -> BS.ByteString -> a
-parse p = either err id . parseOnly (p <* endOfInput)
-  where
-    err = error . ("Haddock.Parser.parse: " ++)
-
--- | Main entry point to the parser. Appends the newline character
--- to the input string.
-parseParas :: String -- ^ String to parse
-           -> DocH mod IString
-parseParas = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
-  where
-    p :: Parser (DocH mod IString)
-    p = mconcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n")
-
--- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which
--- drops leading whitespace and encodes the string to UTF8 first.
-parseString :: String -> DocH mod IString
-parseString = parseStringBS . encodeUtf8 . dropWhile isSpace
-
-parseStringBS :: BS.ByteString -> DocH mod IString
-parseStringBS = parse p
-  where
-    p :: Parser (DocH mod IString)
-    p = mconcat <$> many (monospace <|> anchor <|> identifier
-                          <|> moduleName <|> picture <|> hyperlink <|> autoUrl <|> bold
-                          <|> emphasis <|> encodedChar <|> string' <|> skipSpecialChar)
-
--- | Parses and processes
--- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references>
---
--- >>> parseOnly encodedChar "&#65;&#66;&#67;"
--- Right (DocString "ABC")
-encodedChar :: Parser (DocH mod a)
-encodedChar = "&#" *> c <* ";"
-  where
-    c = DocString . return . chr <$> num
-    num = hex <|> decimal
-    hex = ("x" <|> "X") *> hexadecimal
-
-specialChar :: [Char]
-specialChar = "_/<@\"&'`"
-
--- | Plain, regular parser for text. Called as one of the last parsers
--- to ensure that we have already given a chance to more meaningful parsers
--- before capturing their characers.
-string' :: Parser (DocH mod a)
-string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar)
-  where
-    unescape "" = ""
-    unescape ('\\':x:xs) = x : unescape xs
-    unescape (x:xs) = x : unescape xs
-
--- | Skips a single special character and treats it as a plain string.
--- This is done to skip over any special characters belonging to other
--- elements but which were not deemed meaningful at their positions.
-skipSpecialChar :: Parser (DocH mod a)
-skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar)
-
--- | Emphasis parser.
---
--- >>> parseOnly emphasis "/Hello world/"
--- Right (DocEmphasis (DocString "Hello world"))
-emphasis :: Parser (DocH mod IString)
-emphasis = DocEmphasis . parseStringBS <$>
-  mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/")
-
--- | Bold parser.
---
--- >>> parseOnly bold "__Hello world__"
--- Right (DocBold (DocString "Hello world"))
-bold :: Parser (DocH mod IString)
-bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__")
-
-disallowNewline :: Parser BS.ByteString -> Parser BS.ByteString
-disallowNewline = mfilter ('\n' `BS.notElem`)
-
--- | Like `takeWhile`, but unconditionally take escaped characters.
-takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString
-takeWhile_ p = scan False p_
-  where
-    p_ escaped c
-      | escaped = Just False
-      | not $ p c = Nothing
-      | otherwise = Just (c == '\\')
-
--- | Like `takeWhile1`, but unconditionally take escaped characters.
-takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString
-takeWhile1_ = mfilter (not . BS.null) . takeWhile_
-
--- | Text anchors to allow for jumping around the generated documentation.
---
--- >>> parseOnly anchor "#Hello world#"
--- Right (DocAName "Hello world")
-anchor :: Parser (DocH mod a)
-anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#")
-
--- | Monospaced strings.
---
--- >>> parseOnly monospace "@cruel@"
--- Right (DocMonospaced (DocString "cruel"))
-monospace :: Parser (DocH mod IString)
-monospace = DocMonospaced . parseStringBS <$> ("@" *> takeWhile1_ (/= '@') <* "@")
-
-moduleName :: Parser (DocH mod a)
-moduleName = DocModule <$> (char '"' *> modid <* char '"')
-  where
-    modid = intercalate "." <$> conid `sepBy1` "."
-    conid = (:)
-      <$> satisfy isAsciiUpper
-      -- NOTE: According to Haskell 2010 we shouldd actually only
-      -- accept {small | large | digit | ' } here.  But as we can't
-      -- match on unicode characters, this is currently not possible.
-      <*> (decodeUtf8 <$> takeWhile (`notElem` " .&[{}(=*)+]!#|@/;,^?\"\n"))
-
--- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify
--- a title for the picture.
---
--- >>> parseOnly picture "<<hello.png>>"
--- Right (DocPic (Picture "hello.png" Nothing))
--- >>> parseOnly picture "<<hello.png world>>"
--- Right (DocPic (Picture "hello.png" (Just "world")))
-picture :: Parser (DocH mod a)
-picture = DocPic . makeLabeled Picture . decodeUtf8
-          <$> disallowNewline ("<<" *> takeUntil ">>")
-
--- | Paragraph parser, called by 'parseParas'.
-paragraph :: Parser (DocH mod IString)
-paragraph = examples <|> skipSpace *> (list <|> birdtracks <|> codeblock
-                                       <|> property <|> header
-                                       <|> textParagraph)
-
-header :: Parser (DocH mod IString)
-header = do
-  let psers = map (string . encodeUtf8 . concat . flip replicate "=") [6, 5 .. 1]
-      pser = foldl1 (<|>) psers
-  delim <- decodeUtf8 <$> pser
-  line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString
-  rest <- paragraph <|> return mempty
-  return $ docAppend (DocParagraph (DocHeader (Header (length delim) line))) rest
-
-textParagraph :: Parser (DocH mod IString)
-textParagraph = docParagraph . parseString . intercalate "\n" <$> many1 nonEmptyLine
-
--- | List parser, called by 'paragraph'.
-list :: Parser (DocH mod IString)
-list = DocUnorderedList <$> unorderedList
-       <|> DocOrderedList <$> orderedList
-       <|> DocDefList <$> definitionList
-
--- | Parses unordered (bullet) lists.
-unorderedList :: Parser [DocH mod IString]
-unorderedList = ("*" <|> "-") *> innerList unorderedList
-
--- | Parses ordered lists (numbered or dashed).
-orderedList :: Parser [DocH mod IString]
-orderedList = (paren <|> dot) *> innerList orderedList
-  where
-    dot = (decimal :: Parser Int) <* "."
-    paren = "(" *> decimal <* ")"
-
--- | Generic function collecting any further lines belonging to the
--- list entry and recursively collecting any further lists in the
--- same paragraph. Usually used as
---
--- > someListFunction = listBeginning *> innerList someListFunction
-innerList :: Parser [DocH mod IString] -> Parser [DocH mod IString]
-innerList item = do
-  c <- takeLine
-  (cs, items) <- more item
-  let contents = docParagraph . parseString . dropNLs . unlines $ c : cs
-  return $ case items of
-    Left p -> [contents `joinPara` p]
-    Right i -> contents : i
-
--- | Parses definition lists.
-definitionList :: Parser [(DocH mod IString, DocH mod IString)]
-definitionList = do
-  label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` "]\n")) <* "]"
-  c <- takeLine
-  (cs, items) <- more definitionList
-  let contents = parseString . dropNLs . unlines $ c : cs
-  return $ case items of
-    Left p -> [(label, contents `joinPara` p)]
-    Right i -> (label, contents) : i
-
--- | If possible, appends two 'Doc's under a 'DocParagraph' rather than
--- outside of it. This allows to get structures like
---
--- @DocParagraph (DocAppend … …)@
---
--- rather than
---
--- @DocAppend (DocParagraph …) …@
-joinPara :: DocH mod id -> DocH mod id -> DocH mod id
-joinPara (DocParagraph p) c = docParagraph $ docAppend p c
-joinPara d p = docAppend d p
-
--- | Drops all trailing newlines.
-dropNLs :: String -> String
-dropNLs = reverse . dropWhile (== '\n') . reverse
-
--- | Main worker for 'innerList' and 'definitionList'.
--- We need the 'Either' here to be able to tell in the respective functions
--- whether we're dealing with the next list or a nested paragraph.
-more :: Monoid a => Parser a
-     -> Parser ([String], Either (DocH mod IString) a)
-more item = innerParagraphs <|> moreListItems item
-            <|> moreContent item <|> pure ([], Right mempty)
-
--- | Use by 'innerList' and 'definitionList' to parse any nested paragraphs.
-innerParagraphs :: Parser ([String], Either (DocH mod IString) a)
-innerParagraphs = (,) [] . Left <$> ("\n" *> indentedParagraphs)
-
--- | Attemps to fetch the next list if possibly. Used by 'innerList' and
--- 'definitionList' to recursivly grab lists that aren't separated by a whole
--- paragraph.
-moreListItems :: Parser a
-              -> Parser ([String], Either (DocH mod IString) a)
-moreListItems item = (,) [] . Right <$> (skipSpace *> item)
-
--- | Helper for 'innerList' and 'definitionList' which simply takes
--- a line of text and attempts to parse more list content with 'more'.
-moreContent :: Monoid a => Parser a
-            -> Parser ([String], Either (DocH mod IString) a)
-moreContent item = first . (:) <$> nonEmptyLine <*> more item
-
--- | Runs the 'parseParas' parser on an indented paragraph.
--- The indentation is 4 spaces.
-indentedParagraphs :: Parser (DocH mod IString)
-indentedParagraphs = parseParas . concat <$> dropFrontOfPara "    "
-
--- | Grab as many fully indented paragraphs as we can.
-dropFrontOfPara :: Parser BS.ByteString -> Parser [String]
-dropFrontOfPara sp = do
-  currentParagraph <- some (sp *> takeNonEmptyLine)
-  followingParagraphs <-
-    skipHorizontalSpace *> nextPar -- we have more paragraphs to take
-    <|> skipHorizontalSpace *> nlList -- end of the ride, remember the newline
-    <|> endOfInput *> return [] -- nothing more to take at all
-  return (currentParagraph ++ followingParagraphs)
-  where
-    nextPar = (++) <$> nlList <*> dropFrontOfPara sp
-    nlList = "\n" *> return ["\n"]
-
-nonSpace :: BS.ByteString -> Parser BS.ByteString
-nonSpace xs
-  | not $ any (not . isSpace) $ decodeUtf8 xs = fail "empty line"
-  | otherwise = return xs
-
--- | Takes a non-empty, not fully whitespace line.
---
---  Doesn't discard the trailing newline.
-takeNonEmptyLine :: Parser String
-takeNonEmptyLine = do
-    (++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n"
-
-birdtracks :: Parser (DocH mod a)
-birdtracks = DocCodeBlock . DocString . intercalate "\n" . stripSpace <$> many1 line
-  where
-    line = skipHorizontalSpace *> ">" *> takeLine
-
-stripSpace :: [String] -> [String]
-stripSpace = fromMaybe <*> mapM strip'
-  where
-    strip' (' ':xs') = Just xs'
-    strip' "" = Just ""
-    strip' _  = Nothing
-
--- | Parses examples. Examples are a paragraph level entitity (separated by an empty line).
--- Consecutive examples are accepted.
-examples :: Parser (DocH mod a)
-examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go)
-  where
-    go :: Parser [Example]
-    go = do
-      prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>"
-      expr <- takeLine
-      (rs, es) <- resultAndMoreExamples
-      return (makeExample prefix expr rs : es)
-      where
-        resultAndMoreExamples :: Parser ([String], [Example])
-        resultAndMoreExamples = moreExamples <|> result <|> pure ([], [])
-          where
-            moreExamples :: Parser ([String], [Example])
-            moreExamples = (,) [] <$> go
-
-            result :: Parser ([String], [Example])
-            result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples
-
-    makeExample :: String -> String -> [String] -> Example
-    makeExample prefix expression res =
-      Example (strip expression) result
-      where
-        result = map (substituteBlankLine . tryStripPrefix) res
-
-        tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs)
-
-        substituteBlankLine "<BLANKLINE>" = ""
-        substituteBlankLine xs = xs
-
-nonEmptyLine :: Parser String
-nonEmptyLine = mfilter (any (not . isSpace)) takeLine
-
-takeLine :: Parser String
-takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine
-
-endOfLine :: Parser ()
-endOfLine = void "\n" <|> endOfInput
-
--- | Property parser.
---
--- >>> parseOnly property "prop> hello world"
--- Right (DocProperty "hello world")
-property :: Parser (DocH mod a)
-property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n'))
-
--- |
--- Paragraph level codeblock. Anything between the two delimiting @ is parsed
--- for markup.
-codeblock :: Parser (DocH mod IString)
-codeblock =
-  DocCodeBlock . parseStringBS . dropSpaces
-  <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
-  where
-    dropSpaces xs =
-      let rs = decodeUtf8 xs
-      in case splitByNl rs of
-        [] -> xs
-        ys -> case last ys of
-          ' ':_ -> case mapM dropSpace ys of
-            Nothing -> xs
-            Just zs -> encodeUtf8 $ intercalate "\n" zs
-          _ -> xs
-
-    -- This is necessary because ‘lines’ swallows up a trailing newline
-    -- and we lose information about whether the last line belongs to @ or to
-    -- text which we need to decide whether we actually want to be dropping
-    -- anything at all.
-    splitByNl = unfoldr (\case '\n':s -> Just (span (/= '\n') s)
-                               _      -> Nothing)
-                . ('\n' :)
-
-    dropSpace "" = Just ""
-    dropSpace (' ':xs) = Just xs
-    dropSpace _ = Nothing
-
-    block' = scan False p
-      where
-        p isNewline c
-          | isNewline && c == '@' = Nothing
-          | isNewline && isSpace c = Just isNewline
-          | otherwise = Just $ c == '\n'
-
-hyperlink :: Parser (DocH mod a)
-hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8
-              <$> disallowNewline ("<" *> takeUntil ">")
-            <|> autoUrl
-
-autoUrl :: Parser (DocH mod a)
-autoUrl = mkLink <$> url
-  where
-    url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
-    mkLink :: BS.ByteString -> DocH mod a
-    mkLink s = case BS.unsnoc s of
-      Just (xs, x) | x `elem` ",.!?" -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x]
-      _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
-
--- | Parses strings between identifier delimiters. Consumes all input that it
--- deems to be valid in an identifier. Note that it simply blindly consumes
--- characters and does no actual validation itself.
-parseValid :: Parser String
-parseValid = do
-  vs' <- many' $ utf8String "⋆" <|> return <$> idChar
-  let vs = concat vs'
-  c <- peekChar
-  case c of
-    Just '`' -> return vs
-    Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid)
-                 <|> return vs
-    _ -> fail "outofvalid"
-  where
-    idChar = satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:^")
-             <|> digit <|> letter_ascii
-
--- | Parses UTF8 strings from ByteString streams.
-utf8String :: String -> Parser String
-utf8String x = decodeUtf8 <$> string (encodeUtf8 x)
-
--- | Parses identifiers with help of 'parseValid'. Asks GHC for 'String' from the
--- string it deems valid.
-identifier :: Parser (DocH mod IString)
-identifier = do
-  o <- idDelim
-  vid <- parseValid
-  e <- idDelim
-  return $ DocIdentifier (o, vid, e)
-  where
-    idDelim = char '\'' <|> char '`'
-
--- | Remove all leading and trailing whitespace
-strip :: String -> String
-strip = (\f -> f . f) $ dropWhile isSpace . reverse
-
-skipHorizontalSpace :: Parser ()
-skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r")
-
-takeHorizontalSpace :: Parser BS.ByteString
-takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r")
-
-makeLabeled :: (String -> Maybe String -> a) -> String -> a
-makeLabeled f input = case break isSpace $ removeEscapes $ strip input of
-  (uri, "")    -> f uri Nothing
-  (uri, label) -> f uri (Just $ dropWhile isSpace label)
-  where
-    -- As we don't parse these any further, we don't do any processing to the
-    -- string so we at least remove escape character here. Perhaps we should
-    -- actually be parsing the label at the very least?
-    removeEscapes "" = ""
-    removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs
-    removeEscapes ('\\':xs) = removeEscapes xs
-    removeEscapes (x:xs) = x : removeEscapes xs
-
-takeUntil :: ByteString -> Parser ByteString
-takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome
-  where
-    end = BS.unpack end_
-
-    p :: (Bool, String) -> Char -> Maybe (Bool, String)
-    p acc c = case acc of
-      (True, _) -> Just (False, end)
-      (_, []) -> Nothing
-      (_, x:xs) | x == c -> Just (False, xs)
-      _ -> Just (c == '\\', end)
-
-    dropEnd = BS.reverse . BS.drop (length end) . BS.reverse
-    requireEnd = mfilter (BS.isSuffixOf end_)
-
-    gotSome xs
-      | BS.null xs = fail "didn't get any content"
-      | otherwise = return xs
+import Documentation.Haddock.Parser
+import DynFlags (DynFlags)
+import FastString (mkFastString)
+import Documentation.Haddock.Types
+import Lexer (mkPState, unP, ParseResult(POk))
+import Parser (parseIdentifier)
+import RdrName (RdrName)
+import SrcLoc (mkRealSrcLoc, unLoc)
+import StringBuffer (stringToStringBuffer)
+
+{-# DEPRECATED parseParasMaybe "use `parseParas` instead" #-}
+parseParasMaybe :: DynFlags -> String -> Maybe (DocH mod RdrName)
+parseParasMaybe d = Just . overIdentifier (parseIdent d) . parseParas
+
+{-# DEPRECATED parseStringMaybe "use `parseString` instead" #-}
+parseStringMaybe :: DynFlags -> String -> Maybe (DocH mod RdrName)
+parseStringMaybe d = Just . overIdentifier (parseIdent d) . parseString
+
+parseIdent :: DynFlags -> String -> Maybe RdrName
+parseIdent dflags str0 =
+  let buffer = stringToStringBuffer str0
+      realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0
+      pstate = mkPState dflags buffer realSrcLc
+  in case unP parseIdentifier pstate of
+    POk _ name -> Just (unLoc name)
+    _ -> Nothing
diff --git a/src/Haddock/Parser/Util.hs b/src/Haddock/Parser/Util.hs
deleted file mode 100644
index 29da91c0..00000000
--- a/src/Haddock/Parser/Util.hs
+++ /dev/null
@@ -1,28 +0,0 @@
-module Haddock.Parser.Util where
-
-import DynFlags (DynFlags)
-import FastString (mkFastString)
-import Haddock.Types
-import Haddock.Parser
-import Lexer (mkPState, unP, ParseResult(POk))
-import Parser (parseIdentifier)
-import RdrName (RdrName)
-import SrcLoc (mkRealSrcLoc, unLoc)
-import StringBuffer (stringToStringBuffer)
-
-{-# DEPRECATED parseParasMaybe "use `parseParas` instead" #-}
-parseParasMaybe :: DynFlags -> String -> Maybe (Doc RdrName)
-parseParasMaybe d = Just . overIdentifier (parseIdent d) . parseParas
-
-{-# DEPRECATED parseStringMaybe "use `parseString` instead" #-}
-parseStringMaybe :: DynFlags -> String -> Maybe (Doc RdrName)
-parseStringMaybe d = Just . overIdentifier (parseIdent d) . parseString
-
-parseIdent :: DynFlags -> String -> Maybe RdrName
-parseIdent dflags str0 =
-  let buffer = stringToStringBuffer str0
-      realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0
-      pstate = mkPState dflags buffer realSrcLc
-  in case unP parseIdentifier pstate of
-    POk _ name -> Just (unLoc name)
-    _ -> Nothing
diff --git a/src/Haddock/Types.hs b/src/Haddock/Types.hs
index cd615bf4..85b3a592 100644
--- a/src/Haddock/Types.hs
+++ b/src/Haddock/Types.hs
@@ -19,16 +19,16 @@ module Haddock.Types (
   module Haddock.Types
   , HsDocString, LHsDocString
   , Fixity(..)
+  , module Documentation.Haddock.Types
  ) where
 
-import Data.Foldable
-import Data.Traversable
 import Control.Exception
 import Control.Arrow hiding ((<+>))
 import Control.DeepSeq
 import Data.Typeable
 import Data.Map (Map)
 import qualified Data.Map as Map
+import Documentation.Haddock.Types
 import BasicTypes (Fixity(..))
 import GHC hiding (NoLink)
 import DynFlags (ExtensionFlag, Language)
@@ -316,36 +316,6 @@ type LDoc id = Located (Doc id)
 
 type Doc id = DocH (ModuleName, OccName) id
 
-data DocH mod id
-  = DocEmpty
-  | DocAppend (DocH mod id) (DocH mod id)
-  | DocString String
-  | DocParagraph (DocH mod id)
-  | DocIdentifier id
-  | DocIdentifierUnchecked mod
-  | DocModule String
-  | DocWarning (DocH mod id)
-  | DocEmphasis (DocH mod id)
-  | DocMonospaced (DocH mod id)
-  | DocBold (DocH mod id)
-  | DocUnorderedList [DocH mod id]
-  | DocOrderedList [DocH mod id]
-  | DocDefList [(DocH mod id, DocH mod id)]
-  | DocCodeBlock (DocH mod id)
-  | DocHyperlink Hyperlink
-  | DocPic Picture
-  | DocAName String
-  | DocProperty String
-  | DocExamples [Example]
-  | DocHeader (Header (DocH mod id))
-  deriving (Functor, Foldable, Traversable)
-
-instance Foldable Header where
-  foldMap f (Header _ a) = f a
-
-instance Traversable Header where
-  traverse f (Header l a) = Header l `fmap` f a
-
 instance (NFData a, NFData mod)
          => NFData (DocH mod a) where
   rnf doc = case doc of
@@ -376,23 +346,6 @@ instance NFData Name
 instance NFData OccName
 instance NFData ModuleName
 
-
-data Hyperlink = Hyperlink
-  { hyperlinkUrl   :: String
-  , hyperlinkLabel :: Maybe String
-  } deriving (Eq, Show)
-
-
-data Picture = Picture
-  { pictureUri   :: String
-  , pictureTitle :: Maybe String
-  } deriving (Eq, Show)
-
-data Header id = Header
-  { headerLevel :: Int
-  , headerTitle :: id
-  } deriving Functor
-
 instance NFData id => NFData (Header id) where
   rnf (Header a b) = a `deepseq` b `deepseq` ()
 
@@ -402,13 +355,6 @@ instance NFData Hyperlink where
 instance NFData Picture where
   rnf (Picture a b) = a `deepseq` b `deepseq` ()
 
-
-data Example = Example
-  { exampleExpression :: String
-  , exampleResult     :: [String]
-  } deriving (Eq, Show)
-
-
 instance NFData Example where
   rnf (Example a b) = a `deepseq` b `deepseq` ()
 
diff --git a/src/Haddock/Utf8.hs b/src/Haddock/Utf8.hs
deleted file mode 100644
index 1fb0e818..00000000
--- a/src/Haddock/Utf8.hs
+++ /dev/null
@@ -1,74 +0,0 @@
-module Haddock.Utf8 (encodeUtf8, decodeUtf8) where
-import           Data.Bits ((.|.), (.&.), shiftL, shiftR)
-import qualified Data.ByteString as BS
-import           Data.Char (chr, ord)
-import           Data.Word (Word8)
-
--- | Helper that encodes and packs a 'String' into a 'BS.ByteString'
-encodeUtf8 :: String -> BS.ByteString
-encodeUtf8 = BS.pack . encode
-
--- | Helper that unpacks and decodes a 'BS.ByteString' into a 'String'
-decodeUtf8 :: BS.ByteString -> String
-decodeUtf8 = decode . BS.unpack
-
--- Copy/pasted functions from Codec.Binary.UTF8.String for encoding/decoding
--- | Character to use when 'encode' or 'decode' fail for a byte.
-replacementCharacter :: Char
-replacementCharacter = '\xfffd'
-
--- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
-encode :: String -> [Word8]
-encode = concatMap (map fromIntegral . go . ord)
- where
-  go oc
-   | oc <= 0x7f       = [oc]
-
-   | oc <= 0x7ff      = [ 0xc0 + (oc `shiftR` 6)
-                        , 0x80 + oc .&. 0x3f
-                        ]
-
-   | oc <= 0xffff     = [ 0xe0 + (oc `shiftR` 12)
-                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
-                        , 0x80 + oc .&. 0x3f
-                        ]
-   | otherwise        = [ 0xf0 + (oc `shiftR` 18)
-                        , 0x80 + ((oc `shiftR` 12) .&. 0x3f)
-                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
-                        , 0x80 + oc .&. 0x3f
-                        ]
-
--- | Decode a UTF8 string packed into a list of Word8 values, directly to String
-decode :: [Word8] -> String
-decode [    ] = ""
-decode (c:cs)
-  | c < 0x80  = chr (fromEnum c) : decode cs
-  | c < 0xc0  = replacementCharacter : decode cs
-  | c < 0xe0  = multi1
-  | c < 0xf0  = multi_byte 2 0xf  0x800
-  | c < 0xf8  = multi_byte 3 0x7  0x10000
-  | c < 0xfc  = multi_byte 4 0x3  0x200000
-  | c < 0xfe  = multi_byte 5 0x1  0x4000000
-  | otherwise = replacementCharacter : decode cs
-  where
-    multi1 = case cs of
-      c1 : ds | c1 .&. 0xc0 == 0x80 ->
-        let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|.  fromEnum (c1 .&. 0x3f)
-        in if d >= 0x000080 then toEnum d : decode ds
-                            else replacementCharacter : decode ds
-      _ -> replacementCharacter : decode cs
-
-    multi_byte :: Int -> Word8 -> Int -> String
-    multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask))
-      where
-        aux 0 rs acc
-          | overlong <= acc && acc <= 0x10ffff &&
-            (acc < 0xd800 || 0xdfff < acc)     &&
-            (acc < 0xfffe || 0xffff < acc)      = chr acc : decode rs
-          | otherwise = replacementCharacter : decode rs
-
-        aux n (r:rs) acc
-          | r .&. 0xc0 == 0x80 = aux (n-1) rs
-                               $ shiftL acc 6 .|. fromEnum (r .&. 0x3f)
-
-        aux _ rs     _ = replacementCharacter : decode rs
author	Mateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk>	2014-05-05 09:01:03 +0200
committer	Mateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk>	2014-05-05 11:00:41 +0200
commit	cc269e6b0b615e9e237c35a985e4ace7b9ab0dd9 (patch)
tree	f0264138c81909151f9724c1f02f7bf8b30803cb /src
parent	7ac2d0f2d31c2e1c7ede09828f3d5ba5626bd0d4 (diff)