Merge pull request #857 from sjakobi/ghc-head-update-3

Update ghc-head
author: Alexander Biehl <alexbiehl@gmail.com> 2018-06-14 15:28:52 +0200
committer: GitHub <noreply@github.com> 2018-06-14 15:28:52 +0200
commit: 6247ec8b5a5bc8145ce851dce11eb617a380381c (patch)
tree: 7856c0dd1ddd0c1f3eef0422b0cd8e8a5a6b71cb /haddock-library/src/Documentation/Haddock
parent: 9a7f539d0c20654ff394f2ff99836412a6844df1 (diff)
parent: 095fa970b32c818ed4c06cefc00ba98aaff756fa (diff)
6 files changed, 581 insertions, 360 deletions
diff --git a/haddock-library/src/Documentation/Haddock/Doc.hs b/haddock-library/src/Documentation/Haddock/Doc.hs
index 66bd1c97..297d30d6 100644
--- a/haddock-library/src/Documentation/Haddock/Doc.hs
+++ b/haddock-library/src/Documentation/Haddock/Doc.hs
@@ -27,16 +27,16 @@ metaDocAppend (MetaDoc { _meta = m, _doc = d })
               (MetaDoc { _meta = m', _doc = d' }) =
   MetaDoc { _meta = m' `metaAppend` m, _doc = d `docAppend` d' }
 
--- | This is not a monoidal append, it uses '<|>' for the '_version'.
+-- | This is not a monoidal append, it uses '<|>' for the '_version' and
+-- '_package'.
 metaAppend :: Meta -> Meta -> Meta
-metaAppend (Meta { _version = v }) (Meta { _version = v' }) =
-  Meta { _version = v <|> v' }
+metaAppend (Meta v1 p1) (Meta v2 p2) = Meta (v1 <|> v2) (p1 <|> p2)
 
 emptyMetaDoc :: MetaDoc mod id
 emptyMetaDoc = MetaDoc { _meta = emptyMeta, _doc = DocEmpty }
 
 emptyMeta :: Meta
-emptyMeta = Meta { _version = empty }
+emptyMeta = Meta empty empty
 
 docAppend :: DocH mod id -> DocH mod id -> DocH mod id
 docAppend (DocDefList ds1) (DocDefList ds2) = DocDefList (ds1++ds2)
diff --git a/haddock-library/src/Documentation/Haddock/Markup.hs b/haddock-library/src/Documentation/Haddock/Markup.hs
index 1bf6c084..da8edcd4 100644
--- a/haddock-library/src/Documentation/Haddock/Markup.hs
+++ b/haddock-library/src/Documentation/Haddock/Markup.hs
@@ -30,6 +30,7 @@ markup m (DocMathDisplay mathjax)    = markupMathDisplay m mathjax
 markup m (DocProperty p)             = markupProperty m p
 markup m (DocExamples e)             = markupExample m e
 markup m (DocHeader (Header l t))    = markupHeader m (Header l (markup m t))
+markup m (DocTable (Table h b))      = markupTable m (Table (map (fmap (markup m)) h) (map (fmap (markup m)) b))
 
 markupPair :: DocMarkupH mod id a -> (DocH mod id, DocH mod id) -> (a, a)
 markupPair m (a,b) = (markup m a, markup m b)
@@ -59,5 +60,6 @@ idMarkup = Markup {
   markupMathDisplay          = DocMathDisplay,
   markupProperty             = DocProperty,
   markupExample              = DocExamples,
-  markupHeader               = DocHeader
+  markupHeader               = DocHeader,
+  markupTable                = DocTable
   }
diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs
index 8dc2a801..d79da40b 100644
--- a/haddock-library/src/Documentation/Haddock/Parser.hs
+++ b/haddock-library/src/Documentation/Haddock/Parser.hs
@@ -1,5 +1,6 @@
+{-# LANGUAGE CPP               #-}
 {-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE ViewPatterns      #-}
 -- |
 -- Module      :  Documentation.Haddock.Parser
 -- Copyright   :  (c) Mateusz Kowalczyk 2013-2014,
@@ -15,28 +16,63 @@
 -- to be
 --
 -- @'toRegular' . '_doc' . 'parseParas'@
-module Documentation.Haddock.Parser ( parseString, parseParas
-                                    , overIdentifier, toRegular, Identifier
-                                    ) where
+module Documentation.Haddock.Parser (
+  parseString,
+  parseParas,
+  overIdentifier,
+  toRegular,
+  Identifier
+) where
 
 import           Control.Applicative
 import           Control.Arrow (first)
 import           Control.Monad
-import qualified Data.ByteString.Char8 as BS
-import           Data.Char (chr, isAsciiUpper)
-import           Data.List (stripPrefix, intercalate, unfoldr)
-import           Data.Maybe (fromMaybe)
+import           Data.Char (chr, isUpper, isAlpha, isAlphaNum, isSpace)
+import           Data.List (intercalate, unfoldr, elemIndex, notElem)
+import           Data.Maybe (fromMaybe, mapMaybe)
 import           Data.Monoid
+import qualified Data.Set as Set
 import           Documentation.Haddock.Doc
-import           Documentation.Haddock.Parser.Monad hiding (take, endOfLine)
+import           Documentation.Haddock.Parser.Monad
 import           Documentation.Haddock.Parser.Util
 import           Documentation.Haddock.Types
-import           Documentation.Haddock.Utf8
 import           Prelude hiding (takeWhile)
+import qualified Prelude as P
+
+import qualified Text.Parsec as Parsec
+import           Text.Parsec (try)
+
+import qualified Data.Text as T
+import           Data.Text (Text)
+
+#if MIN_VERSION_base(4,9,0)
+import           Text.Read.Lex                      (isSymbolChar)
+#else
+import           Data.Char                          (GeneralCategory (..),
+                                                     generalCategory)
+#endif
 
 -- $setup
 -- >>> :set -XOverloadedStrings
 
+#if !MIN_VERSION_base(4,9,0)
+-- inlined from base-4.10.0.0
+isSymbolChar :: Char -> Bool
+isSymbolChar c = not (isPuncChar c) && case generalCategory c of
+    MathSymbol           -> True
+    CurrencySymbol       -> True
+    ModifierSymbol       -> True
+    OtherSymbol          -> True
+    DashPunctuation      -> True
+    OtherPunctuation     -> c `notElem` ("'\"" :: String)
+    ConnectorPunctuation -> c /= '_'
+    _                    -> False
+  where
+    -- | The @special@ character class as defined in the Haskell Report.
+    isPuncChar :: Char -> Bool
+    isPuncChar = (`elem` (",;()[]{}`" :: String))
+#endif
+
 -- | Identifier string surrounded with opening and closing quotes/backticks.
 type Identifier = (Char, String, Char)
 
@@ -79,47 +115,72 @@ overIdentifier f d = g d
     g (DocProperty x) = DocProperty x
     g (DocExamples x) = DocExamples x
     g (DocHeader (Header l x)) = DocHeader . Header l $ g x
+    g (DocTable (Table h b)) = DocTable (Table (map (fmap g) h) (map (fmap g) b))
+
 
-parse :: Parser a -> BS.ByteString -> (ParserState, a)
-parse p = either err id . parseOnly (p <* endOfInput)
+choice' :: [Parser a] -> Parser a
+choice' [] = empty
+choice' [p] = p
+choice' (p : ps) = try p <|> choice' ps
+
+parse :: Parser a -> Text -> (ParserState, a)
+parse p = either err id . parseOnly (p <* Parsec.eof)
   where
     err = error . ("Haddock.Parser.parse: " ++)
 
 -- | Main entry point to the parser. Appends the newline character
 -- to the input string.
-parseParas :: String -- ^ String to parse
+parseParas :: Maybe Package
+           -> String -- ^ String to parse
            -> MetaDoc mod Identifier
-parseParas input = case parseParasState input of
-  (state, a) -> MetaDoc { _meta = Meta { _version = parserStateSince state }
+parseParas pkg input = case parseParasState input of
+  (state, a) -> MetaDoc { _meta = Meta { _version = parserStateSince state
+                                       , _package = pkg
+                                       }
                         , _doc = a
                         }
 
 parseParasState :: String -> (ParserState, DocH mod Identifier)
-parseParasState =
-    parse (p <* skipSpace) . encodeUtf8 . (++ "\n") . filter (/= '\r')
+parseParasState = parse (emptyLines *> p) . T.pack . (++ "\n") . filter (/= '\r')
   where
     p :: Parser (DocH mod Identifier)
-    p = docConcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n")
+    p = docConcat <$> many (paragraph <* emptyLines)
+
+    emptyLines :: Parser ()
+    emptyLines = void $ many (try (skipHorizontalSpace *> "\n"))
 
 parseParagraphs :: String -> Parser (DocH mod Identifier)
 parseParagraphs input = case parseParasState input of
-  (state, a) -> setParserState state >> return a
+  (state, a) -> Parsec.putState state *> pure a
 
--- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which
--- drops leading whitespace and encodes the string to UTF8 first.
+-- | Variant of 'parseText' for 'String' instead of 'Text'
 parseString :: String -> DocH mod Identifier
-parseString = parseStringBS . encodeUtf8 . dropWhile isSpace . filter (/= '\r')
+parseString = parseText . T.pack
+
+-- | Parse a text paragraph. Actually just a wrapper over 'parseParagraph' which
+-- drops leading whitespace.
+parseText :: Text -> DocH mod Identifier
+parseText = parseParagraph . T.dropWhile isSpace . T.filter (/= '\r')
 
-parseStringBS :: BS.ByteString -> DocH mod Identifier
-parseStringBS = snd . parse p
+parseParagraph :: Text -> DocH mod Identifier
+parseParagraph = snd . parse p
   where
     p :: Parser (DocH mod Identifier)
-    p = docConcat <$> many (monospace <|> anchor <|> identifier <|> moduleName
-                            <|> picture <|> mathDisplay <|> mathInline
-                            <|> markdownImage
-                            <|> hyperlink <|> bold
-                            <|> emphasis <|> encodedChar <|> string'
-                            <|> skipSpecialChar)
+    p = docConcat <$> many (choice' [ monospace
+                                    , anchor
+                                    , identifier
+                                    , moduleName
+                                    , picture
+                                    , mathDisplay
+                                    , mathInline
+                                    , markdownImage
+                                    , hyperlink
+                                    , bold
+                                    , emphasis
+                                    , encodedChar
+                                    , string'
+                                    , skipSpecialChar
+                                    ])
 
 -- | Parses and processes
 -- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references>
@@ -143,7 +204,7 @@ specialChar = "_/<@\"&'`# "
 -- to ensure that we have already given a chance to more meaningful parsers
 -- before capturing their characers.
 string' :: Parser (DocH mod a)
-string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (notInClass specialChar)
+string' = DocString . unescape . T.unpack <$> takeWhile1_ (`notElem` specialChar)
   where
     unescape "" = ""
     unescape ('\\':x:xs) = x : unescape xs
@@ -153,45 +214,45 @@ string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (notInClass specialC
 -- This is done to skip over any special characters belonging to other
 -- elements but which were not deemed meaningful at their positions.
 skipSpecialChar :: Parser (DocH mod a)
-skipSpecialChar = DocString . return <$> satisfy (inClass specialChar)
+skipSpecialChar = DocString . return <$> Parsec.oneOf specialChar
 
 -- | Emphasis parser.
 --
 -- >>> parseString "/Hello world/"
 -- DocEmphasis (DocString "Hello world")
 emphasis :: Parser (DocH mod Identifier)
-emphasis = DocEmphasis . parseStringBS <$>
-  mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/")
+emphasis = DocEmphasis . parseParagraph <$>
+  disallowNewline ("/" *> takeWhile1_ (/= '/') <* "/")
 
 -- | Bold parser.
 --
 -- >>> parseString "__Hello world__"
 -- DocBold (DocString "Hello world")
 bold :: Parser (DocH mod Identifier)
-bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__")
+bold = DocBold . parseParagraph <$> disallowNewline ("__" *> takeUntil "__")
 
-disallowNewline :: Parser BS.ByteString -> Parser BS.ByteString
-disallowNewline = mfilter ('\n' `BS.notElem`)
+disallowNewline :: Parser Text -> Parser Text
+disallowNewline = mfilter (T.all (/= '\n'))
 
 -- | Like `takeWhile`, but unconditionally take escaped characters.
-takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString
-takeWhile_ p = scan False p_
+takeWhile_ :: (Char -> Bool) -> Parser Text
+takeWhile_ p = scan p_ False
   where
     p_ escaped c
       | escaped = Just False
       | not $ p c = Nothing
       | otherwise = Just (c == '\\')
 
--- | Like `takeWhile1`, but unconditionally take escaped characters.
-takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString
-takeWhile1_ = mfilter (not . BS.null) . takeWhile_
+-- | Like 'takeWhile1', but unconditionally take escaped characters.
+takeWhile1_ :: (Char -> Bool) -> Parser Text
+takeWhile1_ = mfilter (not . T.null) . takeWhile_
 
 -- | Text anchors to allow for jumping around the generated documentation.
 --
 -- >>> parseString "#Hello world#"
 -- DocAName "Hello world"
 anchor :: Parser (DocH mod a)
-anchor = DocAName . decodeUtf8 <$>
+anchor = DocAName . T.unpack <$>
          disallowNewline ("#" *> takeWhile1_ (/= '#') <* "#")
 
 -- | Monospaced strings.
@@ -199,23 +260,22 @@ anchor = DocAName . decodeUtf8 <$>
 -- >>> parseString "@cruel@"
 -- DocMonospaced (DocString "cruel")
 monospace :: Parser (DocH mod Identifier)
-monospace = DocMonospaced . parseStringBS
+monospace = DocMonospaced . parseParagraph
             <$> ("@" *> takeWhile1_ (/= '@') <* "@")
 
--- | Module names: we try our reasonable best to only allow valid
--- Haskell module names, with caveat about not matching on technically
--- valid unicode symbols.
+-- | Module names.
+--
+-- Note that we allow '#' and '\' to support anchors (old style anchors are of
+-- the form "SomeModule\#anchor").
 moduleName :: Parser (DocH mod a)
-moduleName = DocModule <$> (char '"' *> modid <* char '"')
+moduleName = DocModule <$> ("\"" *> modid <* "\"")
   where
-    modid = intercalate "." <$> conid `sepBy1` "."
+    modid = intercalate "." <$> conid `Parsec.sepBy1` "."
     conid = (:)
-      <$> satisfy isAsciiUpper
-      -- NOTE: According to Haskell 2010 we should actually only
-      -- accept {small | large | digit | ' } here.  But as we can't
-      -- match on unicode characters, this is currently not possible.
-      -- Note that we allow ‘#’ to suport anchors.
-      <*> (decodeUtf8 <$> takeWhile (notInClass " .&[{}(=*)+]!|@/;,^?\"\n"))
+      <$> Parsec.satisfy (\c -> isAlpha c && isUpper c)
+      <*> many (conChar <|> Parsec.oneOf "\\#")
+
+    conChar = Parsec.alphaNum <|> Parsec.char '_'
 
 -- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify
 -- a title for the picture.
@@ -225,7 +285,7 @@ moduleName = DocModule <$> (char '"' *> modid <* char '"')
 -- >>> parseString "<<hello.png world>>"
 -- DocPic (Picture {pictureUri = "hello.png", pictureTitle = Just "world"})
 picture :: Parser (DocH mod a)
-picture = DocPic . makeLabeled Picture . decodeUtf8
+picture = DocPic . makeLabeled Picture
           <$> disallowNewline ("<<" *> takeUntil ">>")
 
 -- | Inline math parser, surrounded by \\( and \\).
@@ -233,7 +293,7 @@ picture = DocPic . makeLabeled Picture . decodeUtf8
 -- >>> parseString "\\(\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}\\)"
 -- DocMathInline "\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}"
 mathInline :: Parser (DocH mod a)
-mathInline = DocMathInline . decodeUtf8
+mathInline = DocMathInline . T.unpack 
              <$> disallowNewline  ("\\(" *> takeUntil "\\)")
 
 -- | Display math parser, surrounded by \\[ and \\].
@@ -241,7 +301,7 @@ mathInline = DocMathInline . decodeUtf8
 -- >>> parseString "\\[\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}\\]"
 -- DocMathDisplay "\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}"
 mathDisplay :: Parser (DocH mod a)
-mathDisplay = DocMathDisplay . decodeUtf8
+mathDisplay = DocMathDisplay . T.unpack 
               <$> ("\\[" *> takeUntil "\\]")
 
 markdownImage :: Parser (DocH mod a)
@@ -251,25 +311,213 @@ markdownImage = fromHyperlink <$> ("!" *> linkParser)
 
 -- | Paragraph parser, called by 'parseParas'.
 paragraph :: Parser (DocH mod Identifier)
-paragraph = examples <|> do
-  indent <- takeIndent
-  choice
-    [ since
-    , unorderedList indent
-    , orderedList indent
-    , birdtracks
-    , codeblock
-    , property
-    , header
-    , textParagraphThatStartsWithMarkdownLink
-    , definitionList indent
-    , docParagraph <$> textParagraph
-    ]
+paragraph = choice' [ examples
+                    , table
+                    , do indent <- takeIndent
+                         choice' [ since
+                                 , unorderedList indent
+                                 , orderedList indent
+                                 , birdtracks
+                                 , codeblock
+                                 , property
+                                 , header
+                                 , textParagraphThatStartsWithMarkdownLink
+                                 , definitionList indent
+                                 , docParagraph <$> textParagraph
+                                 ]
+                    ]
+
+-- | Provides support for grid tables.
+--
+-- Tables are composed by an optional header and body. The header is composed by
+-- a single row. The body is composed by a non-empty list of rows.
+--
+-- Example table with header:
+--
+-- > +----------+----------+
+-- > | /32bit/  |   64bit  |
+-- > +==========+==========+
+-- > |  0x0000  | @0x0000@ |
+-- > +----------+----------+
+--
+-- Algorithms loosely follows ideas in
+-- http://docutils.sourceforge.net/docutils/parsers/rst/tableparser.py
+--
+table :: Parser (DocH mod Identifier)
+table = do
+    -- first we parse the first row, which determines the width of the table
+    firstRow <- parseFirstRow
+    let len = T.length firstRow
+
+    -- then we parse all consequtive rows starting and ending with + or |,
+    -- of the width `len`.
+    restRows <- many (try (parseRestRows len))
+
+    -- Now we gathered the table block, the next step is to split the block
+    -- into cells.
+    DocTable <$> tableStepTwo len (firstRow : restRows)
+  where
+    parseFirstRow :: Parser Text
+    parseFirstRow = do
+        skipHorizontalSpace
+        -- upper-left corner is +
+        c <- Parsec.char '+'
+        cs <- some (Parsec.char '-' <|> Parsec.char '+')
+
+        -- upper right corner is + too
+        guard (last cs == '+')
+
+        -- trailing space
+        skipHorizontalSpace
+        _ <- Parsec.newline
+
+        return (T.cons c $ T.pack cs)
+
+    parseRestRows :: Int -> Parser Text
+    parseRestRows l = do
+        skipHorizontalSpace
 
+        c <- Parsec.char '|' <|> Parsec.char '+'
+        bs <- scan predicate (l - 2)
+        c2 <- Parsec.char '|' <|> Parsec.char '+'
+
+        -- trailing space
+        skipHorizontalSpace
+        _ <- Parsec.newline
+
+        return (T.cons c (T.snoc bs c2))
+      where
+        predicate n c
+            | n <= 0    = Nothing
+            | c == '\n' = Nothing
+            | otherwise = Just (n - 1)
+
+-- Second step searchs for row of '+' and '=' characters, records it's index
+-- and changes to '=' to '-'.
+tableStepTwo
+    :: Int              -- ^ width
+    -> [Text]           -- ^ rows
+    -> Parser (Table (DocH mod Identifier))
+tableStepTwo width = go 0 [] where
+    go _ left [] = tableStepThree width (reverse left) Nothing
+    go n left (r : rs)
+        | T.all (`elem` ['+', '=']) r =
+            tableStepThree width (reverse left ++ r' : rs) (Just n)
+        | otherwise =
+            go (n + 1) (r :  left) rs
+      where
+        r' = T.map (\c -> if c == '=' then '-' else c) r
+
+-- Third step recognises cells in the table area, returning a list of TC, cells.
+tableStepThree
+    :: Int              -- ^ width
+    -> [Text]           -- ^ rows
+    -> Maybe Int        -- ^ index of header separator
+    -> Parser (Table (DocH mod Identifier))
+tableStepThree width rs hdrIndex = do
+    cells <- loop (Set.singleton (0, 0))
+    tableStepFour rs hdrIndex cells
+  where
+    height = length rs
+
+    loop :: Set.Set (Int, Int) -> Parser [TC]
+    loop queue = case Set.minView queue of
+        Nothing -> return []
+        Just ((y, x), queue')
+            | y + 1 >= height || x + 1 >= width -> loop queue'
+            | otherwise -> case scanRight x y of
+                Nothing -> loop queue'
+                Just (x2, y2) -> do
+                    let tc = TC y x y2 x2
+                    fmap (tc :) $ loop $ queue' `Set.union` Set.fromList
+                        [(y, x2), (y2, x), (y2, x2)]
+
+    -- scan right looking for +, then try scan down
+    --
+    -- do we need to record + saw on the way left and down?
+    scanRight :: Int -> Int -> Maybe (Int, Int)
+    scanRight x y = go (x + 1) where
+        bs = rs !! y
+        go x' | x' >= width          = fail "overflow right "
+              | T.index bs x' == '+' = scanDown x y x' <|> go (x' + 1)
+              | T.index bs x' == '-' = go (x' + 1)
+              | otherwise            = fail $ "not a border (right) " ++ show (x,y,x')
+
+    -- scan down looking for +
+    scanDown :: Int -> Int -> Int -> Maybe (Int, Int)
+    scanDown x y x2 = go (y + 1) where
+        go y' | y' >= height                 = fail "overflow down"
+              | T.index (rs !! y') x2 == '+' = scanLeft x y x2 y' <|> go (y' + 1)
+              | T.index (rs !! y') x2 == '|' = go (y' + 1)
+              | otherwise                    = fail $ "not a border (down) " ++ show (x,y,x2,y')
+
+    -- check that at y2 x..x2 characters are '+' or '-'
+    scanLeft :: Int -> Int -> Int -> Int -> Maybe (Int, Int)
+    scanLeft x y x2 y2
+        | all (\x' -> T.index bs x' `elem` ['+', '-']) [x..x2] = scanUp x y x2 y2
+        | otherwise                                            = fail $ "not a border (left) " ++ show (x,y,x2,y2)
+      where
+        bs = rs !! y2
+
+    -- check that at y2 x..x2 characters are '+' or '-'
+    scanUp :: Int -> Int -> Int -> Int -> Maybe (Int, Int)
+    scanUp x y x2 y2
+        | all (\y' -> T.index (rs !! y') x `elem` ['+', '|']) [y..y2] = return (x2, y2)
+        | otherwise                                                   = fail $ "not a border (up) " ++ show (x,y,x2,y2)
+
+-- | table cell: top left bottom right
+data TC = TC !Int !Int !Int !Int
+  deriving Show
+
+tcXS :: TC -> [Int]
+tcXS (TC _ x _ x2) = [x, x2]
+
+tcYS :: TC -> [Int]
+tcYS (TC y _ y2 _) = [y, y2]
+
+-- | Fourth step. Given the locations of cells, forms 'Table' structure.
+tableStepFour :: [Text] -> Maybe Int -> [TC] -> Parser (Table (DocH mod Identifier))
+tableStepFour rs hdrIndex cells =  case hdrIndex of
+    Nothing -> return $ Table [] rowsDoc
+    Just i  -> case elemIndex i yTabStops of
+        Nothing -> return $ Table [] rowsDoc
+        Just i' -> return $ uncurry Table $ splitAt i' rowsDoc
+  where
+    xTabStops = sortNub $ concatMap tcXS cells
+    yTabStops = sortNub $ concatMap tcYS cells
+
+    sortNub :: Ord a => [a] -> [a]
+    sortNub = Set.toList . Set.fromList
+
+    init' :: [a] -> [a]
+    init' []       = []
+    init' [_]      = []
+    init' (x : xs) = x : init' xs
+
+    rowsDoc = (fmap . fmap) parseParagraph rows
+
+    rows = map makeRow (init' yTabStops)
+      where
+        makeRow y = TableRow $ mapMaybe (makeCell y) cells
+        makeCell y (TC y' x y2 x2)
+            | y /= y' = Nothing
+            | otherwise = Just $ TableCell xts yts (extract (x + 1) (y + 1) (x2 - 1) (y2 - 1))
+          where
+            xts = length $ P.takeWhile (< x2) $ dropWhile (< x) xTabStops
+            yts = length $ P.takeWhile (< y2) $ dropWhile (< y) yTabStops
+
+    -- extract cell contents given boundaries
+    extract :: Int -> Int -> Int -> Int -> Text
+    extract x y x2 y2 = T.intercalate "\n"
+        [ T.take (x2 - x + 1) $ T.drop x $ rs !! y'
+        | y' <- [y .. y2]
+        ]
+
+-- | Parse \@since annotations.
 since :: Parser (DocH mod a)
 since = ("@since " *> version <* skipHorizontalSpace <* endOfLine) >>= setSince >> return DocEmpty
   where
-    version = decimal `sepBy1'` "."
+    version = decimal `Parsec.sepBy1` "."
 
 -- | Headers inside the comment denoted with @=@ signs, up to 6 levels
 -- deep.
@@ -280,38 +528,39 @@ since = ("@since " *> version <* skipHorizontalSpace <* endOfLine) >>= setSince
 -- Right (DocHeader (Header {headerLevel = 2, headerTitle = DocString "World"}))
 header :: Parser (DocH mod Identifier)
 header = do
-  let psers = map (string . encodeUtf8 . concat . flip replicate "=") [6, 5 .. 1]
-      pser = foldl1 (<|>) psers
-  delim <- decodeUtf8 <$> pser
-  line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString
-  rest <- paragraph <|> return DocEmpty
+  let psers = map (string . flip T.replicate "=") [6, 5 .. 1]
+      pser = choice' psers
+  delim <- T.unpack <$> pser
+  line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseText
+  rest <- try paragraph <|> return DocEmpty
   return $ DocHeader (Header (length delim) line) `docAppend` rest
 
 textParagraph :: Parser (DocH mod Identifier)
-textParagraph = parseString . intercalate "\n" <$> many1 nonEmptyLine
+textParagraph = parseText . T.intercalate "\n" <$> some nonEmptyLine
 
 textParagraphThatStartsWithMarkdownLink :: Parser (DocH mod Identifier)
 textParagraphThatStartsWithMarkdownLink = docParagraph <$> (docAppend <$> markdownLink <*> optionalTextParagraph)
   where
     optionalTextParagraph :: Parser (DocH mod Identifier)
-    optionalTextParagraph = (docAppend <$> whitespace <*> textParagraph) <|> pure DocEmpty
+    optionalTextParagraph = choice' [ docAppend <$> whitespace <*> textParagraph
+                                    , pure DocEmpty ]
 
     whitespace :: Parser (DocH mod a)
     whitespace = DocString <$> (f <$> takeHorizontalSpace <*> optional "\n")
       where
-        f :: BS.ByteString -> Maybe BS.ByteString -> String
+        f :: Text -> Maybe Text -> String
         f xs (fromMaybe "" -> x)
-          | BS.null (xs <> x) = ""
+          | T.null (xs <> x) = ""
           | otherwise = " "
 
 -- | Parses unordered (bullet) lists.
-unorderedList :: BS.ByteString -> Parser (DocH mod Identifier)
+unorderedList :: Text -> Parser (DocH mod Identifier)
 unorderedList indent = DocUnorderedList <$> p
   where
     p = ("*" <|> "-") *> innerList indent p
 
 -- | Parses ordered lists (numbered or dashed).
-orderedList :: BS.ByteString -> Parser (DocH mod Identifier)
+orderedList :: Text -> Parser (DocH mod Identifier)
 orderedList indent = DocOrderedList <$> p
   where
     p = (paren <|> dot) *> innerList indent p
@@ -323,104 +572,110 @@ orderedList indent = DocOrderedList <$> p
 -- same paragraph. Usually used as
 --
 -- > someListFunction = listBeginning *> innerList someListFunction
-innerList :: BS.ByteString -> Parser [DocH mod Identifier]
+innerList :: Text -> Parser [DocH mod Identifier]
           -> Parser [DocH mod Identifier]
 innerList indent item = do
   c <- takeLine
   (cs, items) <- more indent item
-  let contents = docParagraph . parseString . dropNLs . unlines $ c : cs
+  let contents = docParagraph . parseText . dropNLs . T.unlines $ c : cs
   return $ case items of
     Left p -> [contents `docAppend` p]
     Right i -> contents : i
 
 -- | Parses definition lists.
-definitionList :: BS.ByteString -> Parser (DocH mod Identifier)
+definitionList :: Text -> Parser (DocH mod Identifier)
 definitionList indent = DocDefList <$> p
   where
     p = do
-      label <- "[" *> (parseStringBS <$> takeWhile1 (notInClass "]\n")) <* ("]" <* optional ":")
+      label <- "[" *> (parseParagraph <$> takeWhile1_ (`notElem` ("]\n" :: String))) <* ("]" <* optional ":")
       c <- takeLine
       (cs, items) <- more indent p
-      let contents = parseString . dropNLs . unlines $ c : cs
+      let contents = parseText . dropNLs . T.unlines $ c : cs
       return $ case items of
         Left x -> [(label, contents `docAppend` x)]
         Right i -> (label, contents) : i
 
 -- | Drops all trailing newlines.
-dropNLs :: String -> String
-dropNLs = reverse . dropWhile (== '\n') . reverse
+dropNLs :: Text -> Text 
+dropNLs = T.dropWhileEnd (== '\n')
 
 -- | Main worker for 'innerList' and 'definitionList'.
 -- We need the 'Either' here to be able to tell in the respective functions
 -- whether we're dealing with the next list or a nested paragraph.
-more :: Monoid a => BS.ByteString -> Parser a
-     -> Parser ([String], Either (DocH mod Identifier) a)
-more indent item = innerParagraphs indent
-               <|> moreListItems indent item
-               <|> moreContent indent item
-               <|> pure ([], Right mempty)
+more :: Monoid a => Text -> Parser a
+     -> Parser ([Text], Either (DocH mod Identifier) a)
+more indent item = choice' [ innerParagraphs indent
+                           , moreListItems indent item
+                           , moreContent indent item
+                           , pure ([], Right mempty)
+                           ]
 
 -- | Used by 'innerList' and 'definitionList' to parse any nested paragraphs.
-innerParagraphs :: BS.ByteString
-                -> Parser ([String], Either (DocH mod Identifier) a)
+innerParagraphs :: Text
+                -> Parser ([Text], Either (DocH mod Identifier) a)
 innerParagraphs indent = (,) [] . Left <$> ("\n" *> indentedParagraphs indent)
 
 -- | Attempts to fetch the next list if possibly. Used by 'innerList' and
 -- 'definitionList' to recursively grab lists that aren't separated by a whole
 -- paragraph.
-moreListItems :: BS.ByteString -> Parser a
-              -> Parser ([String], Either (DocH mod Identifier) a)
+moreListItems :: Text -> Parser a
+              -> Parser ([Text], Either (DocH mod Identifier) a)
 moreListItems indent item = (,) [] . Right <$> indentedItem
   where
-    indentedItem = string indent *> skipSpace *> item
+    indentedItem = string indent *> Parsec.spaces *> item
 
 -- | Helper for 'innerList' and 'definitionList' which simply takes
 -- a line of text and attempts to parse more list content with 'more'.
-moreContent :: Monoid a => BS.ByteString -> Parser a
-            -> Parser ([String], Either (DocH mod Identifier) a)
+moreContent :: Monoid a => Text -> Parser a
+            -> Parser ([Text], Either (DocH mod Identifier) a)
 moreContent indent item = first . (:) <$> nonEmptyLine <*> more indent item
 
 -- | Parses an indented paragraph.
 -- The indentation is 4 spaces.
-indentedParagraphs :: BS.ByteString -> Parser (DocH mod Identifier)
+indentedParagraphs :: Text -> Parser (DocH mod Identifier)
 indentedParagraphs indent =
-    (concat <$> dropFrontOfPara indent') >>= parseParagraphs
+    (T.unpack . T.concat <$> dropFrontOfPara indent') >>= parseParagraphs
   where
-    indent' = string $ BS.append indent "    "
+    indent' = string $ indent <> "    "
 
 -- | Grab as many fully indented paragraphs as we can.
-dropFrontOfPara :: Parser BS.ByteString -> Parser [String]
+dropFrontOfPara :: Parser Text -> Parser [Text]
 dropFrontOfPara sp = do
-  currentParagraph <- some (sp *> takeNonEmptyLine)
+  currentParagraph <- some (try (sp *> takeNonEmptyLine))
   followingParagraphs <-
-    skipHorizontalSpace *> nextPar -- we have more paragraphs to take
-    <|> skipHorizontalSpace *> nlList -- end of the ride, remember the newline
-    <|> endOfInput *> return [] -- nothing more to take at all
+    choice' [ skipHorizontalSpace *> nextPar -- we have more paragraphs to take
+            , skipHorizontalSpace *> nlList -- end of the ride, remember the newline
+            , Parsec.eof *> return []       -- nothing more to take at all
+            ]
   return (currentParagraph ++ followingParagraphs)
   where
     nextPar = (++) <$> nlList <*> dropFrontOfPara sp
     nlList = "\n" *> return ["\n"]
 
-nonSpace :: BS.ByteString -> Parser BS.ByteString
+nonSpace :: Text -> Parser Text
 nonSpace xs
-  | not $ any (not . isSpace) $ decodeUtf8 xs = fail "empty line"
+  | T.all isSpace xs = fail "empty line"
   | otherwise = return xs
 
 -- | Takes a non-empty, not fully whitespace line.
 --
 --  Doesn't discard the trailing newline.
-takeNonEmptyLine :: Parser String
+takeNonEmptyLine :: Parser Text
 takeNonEmptyLine = do
-    (++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n"
+    l <- takeWhile1 (Parsec.noneOf "\n") >>= nonSpace
+    _ <- "\n"
+    pure (l <> "\n")
 
 -- | Takes indentation of first non-empty line.
 --
 -- More precisely: skips all whitespace-only lines and returns indentation
 -- (horizontal space, might be empty) of that non-empty line.
-takeIndent :: Parser BS.ByteString
+takeIndent :: Parser Text 
 takeIndent = do
   indent <- takeHorizontalSpace
-  "\n" *> takeIndent <|> return indent
+  choice' [ "\n" *> takeIndent
+          , return indent
+          ]
 
 -- | Blocks of text of the form:
 --
@@ -429,97 +684,98 @@ takeIndent = do
 -- >> baz
 --
 birdtracks :: Parser (DocH mod a)
-birdtracks = DocCodeBlock . DocString . intercalate "\n" . stripSpace <$> many1 line
+birdtracks = DocCodeBlock . DocString . T.unpack . T.intercalate "\n" . stripSpace <$> some line
   where
-    line = skipHorizontalSpace *> ">" *> takeLine
+    line = try (skipHorizontalSpace *> ">" *> takeLine)
 
-stripSpace :: [String] -> [String]
+stripSpace :: [Text] -> [Text]
 stripSpace = fromMaybe <*> mapM strip'
   where
-    strip' (' ':xs') = Just xs'
-    strip' "" = Just ""
-    strip' _  = Nothing
+    strip' t = case T.uncons t of
+                 Nothing -> Just ""
+                 Just (' ',t') -> Just t'
+                 _ -> Nothing
 
 -- | Parses examples. Examples are a paragraph level entitity (separated by an empty line).
 -- Consecutive examples are accepted.
 examples :: Parser (DocH mod a)
-examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go)
+examples = DocExamples <$> (many (try (skipHorizontalSpace *> "\n")) *> go)
   where
     go :: Parser [Example]
     go = do
-      prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>"
+      prefix <- takeHorizontalSpace <* ">>>"
       expr <- takeLine
       (rs, es) <- resultAndMoreExamples
       return (makeExample prefix expr rs : es)
       where
-        resultAndMoreExamples :: Parser ([String], [Example])
-        resultAndMoreExamples = moreExamples <|> result <|> pure ([], [])
+        resultAndMoreExamples :: Parser ([Text], [Example])
+        resultAndMoreExamples = choice' [ moreExamples, result, pure ([], []) ]
           where
-            moreExamples :: Parser ([String], [Example])
+            moreExamples :: Parser ([Text], [Example])
             moreExamples = (,) [] <$> go
 
-            result :: Parser ([String], [Example])
+            result :: Parser ([Text], [Example])
             result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples
 
-    makeExample :: String -> String -> [String] -> Example
+    makeExample :: Text -> Text -> [Text] -> Example
     makeExample prefix expression res =
-      Example (strip expression) result
+      Example (T.unpack (T.strip expression)) result
       where
-        result = map (substituteBlankLine . tryStripPrefix) res
+        result = map (T.unpack . substituteBlankLine . tryStripPrefix) res
 
-        tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs)
+        tryStripPrefix xs = fromMaybe xs (T.stripPrefix prefix xs)
 
         substituteBlankLine "<BLANKLINE>" = ""
         substituteBlankLine xs = xs
 
-nonEmptyLine :: Parser String
-nonEmptyLine = mfilter (any (not . isSpace)) takeLine
+nonEmptyLine :: Parser Text 
+nonEmptyLine = try (mfilter (T.any (not . isSpace)) takeLine)
 
-takeLine :: Parser String
-takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine
+takeLine :: Parser Text
+takeLine = try (takeWhile (Parsec.noneOf "\n") <* endOfLine)
 
 endOfLine :: Parser ()
-endOfLine = void "\n" <|> endOfInput
+endOfLine = void "\n" <|> Parsec.eof 
 
 -- | Property parser.
 --
 -- >>> snd <$> parseOnly property "prop> hello world"
 -- Right (DocProperty "hello world")
 property :: Parser (DocH mod a)
-property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n'))
+property = DocProperty . T.unpack . T.strip <$> ("prop>" *> takeWhile1 (Parsec.noneOf "\n"))
 
 -- |
 -- Paragraph level codeblock. Anything between the two delimiting \@ is parsed
 -- for markup.
 codeblock :: Parser (DocH mod Identifier)
 codeblock =
-  DocCodeBlock . parseStringBS . dropSpaces
+  DocCodeBlock . parseParagraph . dropSpaces
   <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
   where
     dropSpaces xs =
-      let rs = decodeUtf8 xs
-      in case splitByNl rs of
+      case splitByNl xs of
         [] -> xs
-        ys -> case last ys of
-          ' ':_ -> case mapM dropSpace ys of
-            Nothing -> xs
-            Just zs -> encodeUtf8 $ intercalate "\n" zs
+        ys -> case T.uncons (last ys) of
+          Just (' ',_) -> case mapM dropSpace ys of
+                            Nothing -> xs
+                            Just zs -> T.intercalate "\n" zs
           _ -> xs
 
     -- This is necessary because ‘lines’ swallows up a trailing newline
     -- and we lose information about whether the last line belongs to @ or to
     -- text which we need to decide whether we actually want to be dropping
     -- anything at all.
-    splitByNl = unfoldr (\x -> case x of
-                                 '\n':s -> Just (span (/= '\n') s)
-                                 _      -> Nothing)
-                . ('\n' :)
+    splitByNl = unfoldr (\x -> case T.uncons x of
+                                 Just ('\n',x') -> Just (T.span (/= '\n') x')
+                                 _ -> Nothing)
+                . ("\n" <>)
 
-    dropSpace "" = Just ""
-    dropSpace (' ':xs) = Just xs
-    dropSpace _ = Nothing
+    dropSpace t = case T.uncons t of
+                    Nothing -> Just ""
+                    Just (' ',t') -> Just t'
+                    _ -> Nothing
 
-    block' = scan False p
+    block' = scan p False
       where
         p isNewline c
           | isNewline && c == '@' = Nothing
@@ -527,10 +783,12 @@ codeblock =
           | otherwise = Just $ c == '\n'
 
 hyperlink :: Parser (DocH mod a)
-hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8
-              <$> disallowNewline ("<" *> takeUntil ">")
-            <|> autoUrl
-            <|> markdownLink
+hyperlink = choice' [ angleBracketLink, markdownLink, autoUrl ]
+
+angleBracketLink :: Parser (DocH mod a)
+angleBracketLink =
+    DocHyperlink . makeLabeled Hyperlink 
+    <$> disallowNewline ("<" *> takeUntil ">")
 
 markdownLink :: Parser (DocH mod a)
 markdownLink = DocHyperlink <$> linkParser
@@ -539,7 +797,7 @@ linkParser :: Parser Hyperlink
 linkParser = flip Hyperlink <$> label <*> (whitespace *> url)
   where
     label :: Parser (Maybe String)
-    label = Just . strip . decode <$> ("[" *> takeUntil "]")
+    label = Just . decode . T.strip <$> ("[" *> takeUntil "]")
 
     whitespace :: Parser ()
     whitespace = skipHorizontalSpace <* optional ("\n" *> skipHorizontalSpace)
@@ -550,19 +808,25 @@ linkParser = flip Hyperlink <$> label <*> (whitespace *> url)
     rejectWhitespace :: MonadPlus m => m String -> m String
     rejectWhitespace = mfilter (all (not . isSpace))
 
-    decode :: BS.ByteString -> String
-    decode = removeEscapes . decodeUtf8
+    decode :: Text -> String
+    decode = T.unpack . removeEscapes
 
 -- | Looks for URL-like things to automatically hyperlink even if they
 -- weren't marked as links.
 autoUrl :: Parser (DocH mod a)
 autoUrl = mkLink <$> url
   where
-    url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
-    mkLink :: BS.ByteString -> DocH mod a
-    mkLink s = case unsnoc s of
-      Just (xs, x) | inClass ",.!?" x -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) `docAppend` DocString [x]
-      _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
+    url = mappend <$> choice' [ "http://", "https://", "ftp://"] <*> takeWhile1 (Parsec.satisfy (not . isSpace))
+    
+    mkLink :: Text -> DocH mod a
+    mkLink s = case T.unsnoc s of
+      Just (xs,x) | x `elem` (",.!?" :: String) -> DocHyperlink (mkHyperlink xs) `docAppend` DocString [x]
+      _ -> DocHyperlink (mkHyperlink s)
+
+    mkHyperlink :: Text -> Hyperlink
+    mkHyperlink lnk = Hyperlink (T.unpack lnk) Nothing
+
+
 
 -- | Parses strings between identifier delimiters. Consumes all input that it
 -- deems to be valid in an identifier. Note that it simply blindly consumes
@@ -570,26 +834,16 @@ autoUrl = mkLink <$> url
 parseValid :: Parser String
 parseValid = p some
   where
-    idChar =
-      satisfy (\c -> isAlpha_ascii c
-                     || isDigit c
-                     -- N.B. '-' is placed first otherwise attoparsec thinks
-                     -- it belongs to a character class
-                     || inClass "-_.!#$%&*+/<=>?@\\|~:^" c)
+    idChar = Parsec.satisfy (\c -> isAlphaNum c || isSymbolChar c || c == '_')
 
     p p' = do
-      vs' <- p' $ utf8String "⋆" <|> return <$> idChar
-      let vs = concat vs'
+      vs <- p' idChar
       c <- peekChar'
       case c of
         '`' -> return vs
-        '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> p many') <|> return vs
+        '\'' -> choice' [ (\x -> vs ++ "'" ++ x) <$> ("'" *> p many), return vs ]
         _ -> fail "outofvalid"
 
--- | Parses UTF8 strings from ByteString streams.
-utf8String :: String -> Parser String
-utf8String x = decodeUtf8 <$> string (encodeUtf8 x)
-
 -- | Parses identifiers with help of 'parseValid'. Asks GHC for
 -- 'String' from the string it deems valid.
 identifier :: Parser (DocH mod Identifier)
@@ -599,4 +853,4 @@ identifier = do
   e <- idDelim
   return $ DocIdentifier (o, vid, e)
   where
-    idDelim = satisfy (\c -> c == '\'' || c == '`')
+    idDelim = Parsec.satisfy (\c -> c == '\'' || c == '`')
diff --git a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs
index 3f7d60f8..585c76bb 100644
--- a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs
+++ b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs
@@ -1,149 +1,91 @@
-{-# LANGUAGE GeneralizedNewtypeDeriving, TypeFamilies #-}
-module Documentation.Haddock.Parser.Monad (
-  module Documentation.Haddock.Parser.Monad
-, Attoparsec.isDigit
-, Attoparsec.isDigit_w8
-, Attoparsec.isAlpha_iso8859_15
-, Attoparsec.isAlpha_ascii
-, Attoparsec.isSpace
-, Attoparsec.isSpace_w8
-, Attoparsec.inClass
-, Attoparsec.notInClass
-, Attoparsec.isEndOfLine
-, Attoparsec.isHorizontalSpace
-, Attoparsec.choice
-, Attoparsec.count
-, Attoparsec.option
-, Attoparsec.many'
-, Attoparsec.many1
-, Attoparsec.many1'
-, Attoparsec.manyTill
-, Attoparsec.manyTill'
-, Attoparsec.sepBy
-, Attoparsec.sepBy'
-, Attoparsec.sepBy1
-, Attoparsec.sepBy1'
-, Attoparsec.skipMany
-, Attoparsec.skipMany1
-, Attoparsec.eitherP
-) where
-
-import           Control.Applicative
-import           Control.Monad
-import           Data.String
-import           Data.ByteString (ByteString)
-import qualified Data.ByteString.Lazy as LB
-import qualified Data.Attoparsec.ByteString.Char8 as Attoparsec
-import           Control.Monad.Trans.State
-import qualified Control.Monad.Trans.Class as Trans
-import           Data.Word
-import           Data.Bits
-import           Data.Tuple
-
-import           Documentation.Haddock.Types (Version)
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE TypeSynonymInstances #-}
 
-newtype ParserState = ParserState {
-  parserStateSince :: Maybe Version
-} deriving (Eq, Show)
+module Documentation.Haddock.Parser.Monad where
 
-initialParserState :: ParserState
-initialParserState = ParserState Nothing
+import qualified Text.Parsec.Char as Parsec
+import qualified Text.Parsec as Parsec
 
-newtype Parser a = Parser (StateT ParserState Attoparsec.Parser a)
-  deriving (Functor, Applicative, Alternative, Monad, MonadPlus)
+import qualified Data.Text as T
+import           Data.Text                   ( Text )
 
-instance (a ~ ByteString) => IsString (Parser a) where
-  fromString = lift . fromString
+import           Data.String                 ( IsString(..) )
+import           Data.Bits                   ( Bits(..) )
+import           Data.Char                   ( ord )
+import           Data.List                   ( foldl' )
 
-parseOnly :: Parser a -> ByteString -> Either String (ParserState, a)
-parseOnly (Parser p) = fmap swap . Attoparsec.parseOnly (runStateT p initialParserState)
+import           Documentation.Haddock.Types ( Version )
 
-lift :: Attoparsec.Parser a -> Parser a
-lift = Parser . Trans.lift
+newtype ParserState = ParserState {
+  parserStateSince :: Maybe Version
+} deriving (Eq, Show)
 
-setParserState :: ParserState -> Parser ()
-setParserState = Parser . put
+initialParserState :: ParserState
+initialParserState = ParserState Nothing
 
 setSince :: Version -> Parser ()
-setSince since = Parser $ modify (\st -> st {parserStateSince = Just since})
-
-char :: Char -> Parser Char
-char = lift . Attoparsec.char
-
-char8 :: Char -> Parser Word8
-char8 = lift . Attoparsec.char8
+setSince since = Parsec.modifyState (\st -> st {parserStateSince = Just since})
 
-anyChar :: Parser Char
-anyChar = lift Attoparsec.anyChar
+type Parser = Parsec.Parsec Text ParserState
 
-notChar :: Char -> Parser Char
-notChar = lift . Attoparsec.notChar
+instance (a ~ Text) => IsString (Parser a) where
+  fromString = fmap T.pack . Parsec.string
 
-satisfy :: (Char -> Bool) -> Parser Char
-satisfy = lift . Attoparsec.satisfy
+parseOnly :: Parser a -> Text -> Either String (ParserState, a)
+parseOnly p t = case Parsec.runParser p' initialParserState "<haddock>" t of
+                  Left e -> Left (show e)
+                  Right (x,s) -> Right (s,x)
+  where p' = (,) <$> p <*> Parsec.getState
 
+-- | Always succeeds, but returns 'Nothing' if at the end of input. Does not
+-- consume input.
 peekChar :: Parser (Maybe Char)
-peekChar = lift Attoparsec.peekChar
+peekChar = Parsec.optionMaybe . Parsec.try . Parsec.lookAhead $ Parsec.anyChar
 
+-- | Fails if at the end of input. Does not consume input.
 peekChar' :: Parser Char
-peekChar' = lift Attoparsec.peekChar'
-
-digit :: Parser Char
-digit = lift Attoparsec.digit
-
-letter_iso8859_15 :: Parser Char
-letter_iso8859_15 = lift Attoparsec.letter_iso8859_15
-
-letter_ascii :: Parser Char
-letter_ascii = lift Attoparsec.letter_ascii
-
-space :: Parser Char
-space = lift Attoparsec.space
-
-string :: ByteString -> Parser ByteString
-string = lift . Attoparsec.string
-
-stringCI :: ByteString -> Parser ByteString
-stringCI = lift . Attoparsec.stringCI
-
-skipSpace :: Parser ()
-skipSpace = lift Attoparsec.skipSpace
-
-skipWhile :: (Char -> Bool) -> Parser ()
-skipWhile = lift . Attoparsec.skipWhile
-
-take :: Int -> Parser ByteString
-take = lift . Attoparsec.take
-
-scan :: s -> (s -> Char -> Maybe s) -> Parser ByteString
-scan s = lift . Attoparsec.scan s
-
-takeWhile :: (Char -> Bool) -> Parser ByteString
-takeWhile = lift . Attoparsec.takeWhile
-
-takeWhile1 :: (Char -> Bool) -> Parser ByteString
-takeWhile1 = lift . Attoparsec.takeWhile1
-
-takeTill :: (Char -> Bool) -> Parser ByteString
-takeTill = lift . Attoparsec.takeTill
-
-takeByteString :: Parser ByteString
-takeByteString = lift Attoparsec.takeByteString
-
-takeLazyByteString :: Parser LB.ByteString
-takeLazyByteString = lift Attoparsec.takeLazyByteString
-
-endOfLine :: Parser ()
-endOfLine = lift Attoparsec.endOfLine
-
+peekChar' = Parsec.lookAhead Parsec.anyChar 
+
+-- | Parses the given string. Returns the parsed string.
+string :: Text -> Parser Text
+string t = Parsec.string (T.unpack t) *> pure t
+
+-- | Scan the input text, accumulating characters as long as the scanning
+-- function returns true.
+scan :: (s -> Char -> Maybe s) -- ^ scan function
+     -> s                      -- ^ initial state
+     -> Parser Text 
+scan f = fmap T.pack . go
+  where go s1 = do { cOpt <- peekChar
+                   ; case cOpt >>= f s1 of
+                       Nothing -> pure ""
+                       Just s2 -> (:) <$> Parsec.anyChar <*> go s2
+                   }
+
+-- | Apply a parser for a character zero or more times and collect the result in
+-- a string.
+takeWhile :: Parser Char -> Parser Text
+takeWhile = fmap T.pack . Parsec.many
+
+-- | Apply a parser for a character one or more times and collect the result in
+-- a string.
+takeWhile1 :: Parser Char -> Parser Text
+takeWhile1 =  fmap T.pack . Parsec.many1
+
+-- | Parse a decimal number.
 decimal :: Integral a => Parser a
-decimal = lift Attoparsec.decimal
+decimal = foldl' step 0 `fmap` Parsec.many1 Parsec.digit
+  where step a c = a * 10 + fromIntegral (ord c - 48)
 
+-- | Parse a hexadecimal number.
 hexadecimal :: (Integral a, Bits a) => Parser a
-hexadecimal = lift Attoparsec.hexadecimal
-
-endOfInput :: Parser ()
-endOfInput = lift Attoparsec.endOfInput
-
-atEnd :: Parser Bool
-atEnd = lift Attoparsec.atEnd
+hexadecimal = foldl' step 0 `fmap` Parsec.many1 Parsec.hexDigit 
+  where
+  step a c | w >= 48 && w <= 57  = (a `shiftL` 4) .|. fromIntegral (w - 48)
+           | w >= 97             = (a `shiftL` 4) .|. fromIntegral (w - 87)
+           | otherwise           = (a `shiftL` 4) .|. fromIntegral (w - 55)
+    where w = ord c
diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs
index ab5e5e9e..ffa91b09 100644
--- a/haddock-library/src/Documentation/Haddock/Parser/Util.hs
+++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE CPP #-}
+{-# LANGUAGE OverloadedStrings #-}
 -- |
 -- Module      :  Documentation.Haddock.Parser.Util
 -- Copyright   :  (c) Mateusz Kowalczyk 2013-2014,
@@ -11,62 +11,59 @@
 --
 -- Various utility functions used by the parser.
 module Documentation.Haddock.Parser.Util (
-  unsnoc
-, strip
-, takeUntil
-, removeEscapes
-, makeLabeled
-, takeHorizontalSpace
-, skipHorizontalSpace
+  takeUntil,
+  removeEscapes,
+  makeLabeled,
+  takeHorizontalSpace,
+  skipHorizontalSpace,
 ) where
 
+import qualified Text.Parsec as Parsec
+
+import qualified Data.Text as T
+import           Data.Text (Text)
+
 import           Control.Applicative
 import           Control.Monad (mfilter)
-import           Documentation.Haddock.Parser.Monad hiding (isHorizontalSpace)
-import           Data.ByteString.Char8 (ByteString)
-import qualified Data.ByteString.Char8 as BS
+import           Documentation.Haddock.Parser.Monad
 import           Prelude hiding (takeWhile)
 
-#if MIN_VERSION_bytestring(0,10,2)
-import           Data.ByteString.Char8 (unsnoc)
-#else
-unsnoc :: ByteString -> Maybe (ByteString, Char)
-unsnoc bs
-  | BS.null bs = Nothing
-  | otherwise = Just (BS.init bs, BS.last bs)
-#endif
+import           Data.Char (isSpace)
 
--- | Remove all leading and trailing whitespace
-strip :: String -> String
-strip = (\f -> f . f) $ dropWhile isSpace . reverse
-
-isHorizontalSpace :: Char -> Bool
-isHorizontalSpace = inClass " \t\f\v\r"
+-- | Characters that count as horizontal space
+horizontalSpace :: [Char]
+horizontalSpace = " \t\f\v\r"
 
+-- | Skip and ignore leading horizontal space
 skipHorizontalSpace :: Parser ()
-skipHorizontalSpace = skipWhile isHorizontalSpace
+skipHorizontalSpace = Parsec.skipMany (Parsec.oneOf horizontalSpace)
 
-takeHorizontalSpace :: Parser BS.ByteString
-takeHorizontalSpace = takeWhile isHorizontalSpace
+-- | Take leading horizontal space
+takeHorizontalSpace :: Parser Text 
+takeHorizontalSpace = takeWhile (Parsec.oneOf horizontalSpace)
 
-makeLabeled :: (String -> Maybe String -> a) -> String -> a
-makeLabeled f input = case break isSpace $ removeEscapes $ strip input of
-  (uri, "")    -> f uri Nothing
-  (uri, label) -> f uri (Just $ dropWhile isSpace label)
+makeLabeled :: (String -> Maybe String -> a) -> Text -> a
+makeLabeled f input = case T.break isSpace $ removeEscapes $ T.strip input of
+  (uri, "")    -> f (T.unpack uri) Nothing
+  (uri, label) -> f (T.unpack uri) (Just . T.unpack $ T.stripStart label)
 
 -- | Remove escapes from given string.
 --
 -- Only do this if you do not process (read: parse) the input any further.
-removeEscapes :: String -> String
-removeEscapes "" = ""
-removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs
-removeEscapes ('\\':xs) = removeEscapes xs
-removeEscapes (x:xs) = x : removeEscapes xs
+removeEscapes :: Text -> Text
+removeEscapes = T.unfoldr go
+  where
+  go :: Text -> Maybe (Char, Text)
+  go xs = case T.uncons xs of
+            Just ('\\',ys) -> T.uncons ys
+            unconsed -> unconsed
 
-takeUntil :: ByteString -> Parser ByteString
-takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome
+-- | Consume characters from the input up to and including the given pattern.
+-- Return everything consumed except for the end pattern itself.
+takeUntil :: Text -> Parser Text 
+takeUntil end_ = T.dropEnd (T.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome
   where
-    end = BS.unpack end_
+    end = T.unpack end_ 
 
     p :: (Bool, String) -> Char -> Maybe (Bool, String)
     p acc c = case acc of
@@ -75,9 +72,8 @@ takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome
       (_, x:xs) | x == c -> Just (False, xs)
       _ -> Just (c == '\\', end)
 
-    dropEnd = BS.reverse . BS.drop (length end) . BS.reverse
-    requireEnd = mfilter (BS.isSuffixOf end_)
+    requireEnd = mfilter (T.isSuffixOf end_)
 
     gotSome xs
-      | BS.null xs = fail "didn't get any content"
+      | T.null xs = fail "didn't get any content"
       | otherwise = return xs
diff --git a/haddock-library/src/Documentation/Haddock/Types.hs b/haddock-library/src/Documentation/Haddock/Types.hs
index 1e76c631..b5dea3d4 100644
--- a/haddock-library/src/Documentation/Haddock/Types.hs
+++ b/haddock-library/src/Documentation/Haddock/Types.hs
@@ -15,6 +15,7 @@
 module Documentation.Haddock.Types where
 
 #if !MIN_VERSION_base(4,8,0)
+import Control.Applicative
 import Data.Foldable
 import Data.Traversable
 #endif
@@ -33,7 +34,9 @@ import Data.Bitraversable
 -- meta-data to comments. We make a structure for this ahead of time
 -- so we don't have to gut half the core each time we want to add such
 -- info.
-newtype Meta = Meta { _version :: Maybe Version } deriving (Eq, Show)
+data Meta = Meta { _version :: Maybe Version
+                 , _package :: Maybe Package
+                 } deriving (Eq, Show)
 
 data MetaDoc mod id =
   MetaDoc { _meta :: Meta
@@ -60,6 +63,7 @@ overDocF :: Functor f => (DocH a b -> f (DocH c d)) -> MetaDoc a b -> f (MetaDoc
 overDocF f d = (\x -> d { _doc = x }) <$> f (_doc d)
 
 type Version = [Int]
+type Package = String
 
 data Hyperlink = Hyperlink
   { hyperlinkUrl   :: String
@@ -81,6 +85,21 @@ data Example = Example
   , exampleResult     :: [String]
   } deriving (Eq, Show)
 
+data TableCell id = TableCell
+  { tableCellColspan  :: Int
+  , tableCellRowspan  :: Int
+  , tableCellContents :: id
+  } deriving (Eq, Show, Functor, Foldable, Traversable)
+
+newtype TableRow id = TableRow
+  { tableRowCells :: [TableCell id]
+  } deriving (Eq, Show, Functor, Foldable, Traversable)
+
+data Table id = Table
+  { tableHeaderRows :: [TableRow id]
+  , tableBodyRows   :: [TableRow id]
+  } deriving (Eq, Show, Functor, Foldable, Traversable)
+
 data DocH mod id
   = DocEmpty
   | DocAppend (DocH mod id) (DocH mod id)
@@ -88,8 +107,10 @@ data DocH mod id
   | DocParagraph (DocH mod id)
   | DocIdentifier id
   | DocIdentifierUnchecked mod
+  -- ^ A qualified identifier that couldn't be resolved.
   | DocModule String
   | DocWarning (DocH mod id)
+  -- ^ This constructor has no counterpart in Haddock markup.
   | DocEmphasis (DocH mod id)
   | DocMonospaced (DocH mod id)
   | DocBold (DocH mod id)
@@ -102,9 +123,11 @@ data DocH mod id
   | DocMathInline String
   | DocMathDisplay String
   | DocAName String
+  -- ^ A (HTML) anchor.
   | DocProperty String
   | DocExamples [Example]
   | DocHeader (Header (DocH mod id))
+  | DocTable (Table (DocH mod id))
   deriving (Eq, Show, Functor, Foldable, Traversable)
 
 #if MIN_VERSION_base(4,8,0)
@@ -132,6 +155,7 @@ instance Bifunctor DocH where
   bimap _ _ (DocProperty s) = DocProperty s
   bimap _ _ (DocExamples examples) = DocExamples examples
   bimap f g (DocHeader (Header level title)) = DocHeader (Header level (bimap f g title))
+  bimap f g (DocTable (Table header body)) = DocTable (Table (map (fmap (bimap f g)) header) (map (fmap (bimap f g)) body))
 #endif
 
 #if MIN_VERSION_base(4,10,0)
@@ -149,6 +173,7 @@ instance Bifoldable DocH where
   bifoldr f g z (DocDefList docs) = foldr (\(l, r) acc -> bifoldr f g (bifoldr f g acc l) r) z docs
   bifoldr f g z (DocCodeBlock doc) = bifoldr f g z doc
   bifoldr f g z (DocHeader (Header _ title)) = bifoldr f g z title
+  bifoldr f g z (DocTable (Table header body)) = foldr (\r acc -> foldr (flip (bifoldr f g)) acc r) (foldr (\r acc -> foldr (flip (bifoldr f g)) acc r) z body) header
   bifoldr _ _ z _ = z
 
 instance Bitraversable DocH where
@@ -175,6 +200,7 @@ instance Bitraversable DocH where
   bitraverse _ _ (DocProperty s) = pure (DocProperty s)
   bitraverse _ _ (DocExamples examples) = pure (DocExamples examples)
   bitraverse f g (DocHeader (Header level title)) = (DocHeader . Header level) <$> bitraverse f g title
+  bitraverse f g (DocTable (Table header body)) = (\h b -> DocTable (Table h b)) <$> traverse (traverse (bitraverse f g)) header <*> traverse (traverse (bitraverse f g)) body
 #endif
 
 -- | 'DocMarkupH' is a set of instructions for marking up documentation.
@@ -209,4 +235,5 @@ data DocMarkupH mod id a = Markup
   , markupProperty             :: String -> a
   , markupExample              :: [Example] -> a
   , markupHeader               :: Header a -> a
+  , markupTable                :: Table a -> a
   }
author	Alexander Biehl <alexbiehl@gmail.com>	2018-06-14 15:28:52 +0200
committer	GitHub <noreply@github.com>	2018-06-14 15:28:52 +0200
commit	6247ec8b5a5bc8145ce851dce11eb617a380381c (patch)
tree	7856c0dd1ddd0c1f3eef0422b0cd8e8a5a6b71cb /haddock-library/src/Documentation/Haddock
parent	9a7f539d0c20654ff394f2ff99836412a6844df1 (diff)
parent	095fa970b32c818ed4c06cefc00ba98aaff756fa (diff)