diff options
Diffstat (limited to 'haddock-library/src')
6 files changed, 581 insertions, 360 deletions
| diff --git a/haddock-library/src/Documentation/Haddock/Doc.hs b/haddock-library/src/Documentation/Haddock/Doc.hs index 66bd1c97..297d30d6 100644 --- a/haddock-library/src/Documentation/Haddock/Doc.hs +++ b/haddock-library/src/Documentation/Haddock/Doc.hs @@ -27,16 +27,16 @@ metaDocAppend (MetaDoc { _meta = m, _doc = d })                (MetaDoc { _meta = m', _doc = d' }) =    MetaDoc { _meta = m' `metaAppend` m, _doc = d `docAppend` d' } --- | This is not a monoidal append, it uses '<|>' for the '_version'. +-- | This is not a monoidal append, it uses '<|>' for the '_version' and +-- '_package'.  metaAppend :: Meta -> Meta -> Meta -metaAppend (Meta { _version = v }) (Meta { _version = v' }) = -  Meta { _version = v <|> v' } +metaAppend (Meta v1 p1) (Meta v2 p2) = Meta (v1 <|> v2) (p1 <|> p2)  emptyMetaDoc :: MetaDoc mod id  emptyMetaDoc = MetaDoc { _meta = emptyMeta, _doc = DocEmpty }  emptyMeta :: Meta -emptyMeta = Meta { _version = empty } +emptyMeta = Meta empty empty  docAppend :: DocH mod id -> DocH mod id -> DocH mod id  docAppend (DocDefList ds1) (DocDefList ds2) = DocDefList (ds1++ds2) diff --git a/haddock-library/src/Documentation/Haddock/Markup.hs b/haddock-library/src/Documentation/Haddock/Markup.hs index 1bf6c084..da8edcd4 100644 --- a/haddock-library/src/Documentation/Haddock/Markup.hs +++ b/haddock-library/src/Documentation/Haddock/Markup.hs @@ -30,6 +30,7 @@ markup m (DocMathDisplay mathjax)    = markupMathDisplay m mathjax  markup m (DocProperty p)             = markupProperty m p  markup m (DocExamples e)             = markupExample m e  markup m (DocHeader (Header l t))    = markupHeader m (Header l (markup m t)) +markup m (DocTable (Table h b))      = markupTable m (Table (map (fmap (markup m)) h) (map (fmap (markup m)) b))  markupPair :: DocMarkupH mod id a -> (DocH mod id, DocH mod id) -> (a, a)  markupPair m (a,b) = (markup m a, markup m b) @@ -59,5 +60,6 @@ idMarkup = Markup {    markupMathDisplay          = DocMathDisplay,    markupProperty             = DocProperty,    markupExample              = DocExamples, -  markupHeader               = DocHeader +  markupHeader               = DocHeader, +  markupTable                = DocTable    } diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs index 8dc2a801..d79da40b 100644 --- a/haddock-library/src/Documentation/Haddock/Parser.hs +++ b/haddock-library/src/Documentation/Haddock/Parser.hs @@ -1,5 +1,6 @@ +{-# LANGUAGE CPP               #-}  {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE ViewPatterns      #-}  -- |  -- Module      :  Documentation.Haddock.Parser  -- Copyright   :  (c) Mateusz Kowalczyk 2013-2014, @@ -15,28 +16,63 @@  -- to be  --  -- @'toRegular' . '_doc' . 'parseParas'@ -module Documentation.Haddock.Parser ( parseString, parseParas -                                    , overIdentifier, toRegular, Identifier -                                    ) where +module Documentation.Haddock.Parser ( +  parseString, +  parseParas, +  overIdentifier, +  toRegular, +  Identifier +) where  import           Control.Applicative  import           Control.Arrow (first)  import           Control.Monad -import qualified Data.ByteString.Char8 as BS -import           Data.Char (chr, isAsciiUpper) -import           Data.List (stripPrefix, intercalate, unfoldr) -import           Data.Maybe (fromMaybe) +import           Data.Char (chr, isUpper, isAlpha, isAlphaNum, isSpace) +import           Data.List (intercalate, unfoldr, elemIndex, notElem) +import           Data.Maybe (fromMaybe, mapMaybe)  import           Data.Monoid +import qualified Data.Set as Set  import           Documentation.Haddock.Doc -import           Documentation.Haddock.Parser.Monad hiding (take, endOfLine) +import           Documentation.Haddock.Parser.Monad  import           Documentation.Haddock.Parser.Util  import           Documentation.Haddock.Types -import           Documentation.Haddock.Utf8  import           Prelude hiding (takeWhile) +import qualified Prelude as P + +import qualified Text.Parsec as Parsec +import           Text.Parsec (try) + +import qualified Data.Text as T +import           Data.Text (Text) + +#if MIN_VERSION_base(4,9,0) +import           Text.Read.Lex                      (isSymbolChar) +#else +import           Data.Char                          (GeneralCategory (..), +                                                     generalCategory) +#endif  -- $setup  -- >>> :set -XOverloadedStrings +#if !MIN_VERSION_base(4,9,0) +-- inlined from base-4.10.0.0 +isSymbolChar :: Char -> Bool +isSymbolChar c = not (isPuncChar c) && case generalCategory c of +    MathSymbol           -> True +    CurrencySymbol       -> True +    ModifierSymbol       -> True +    OtherSymbol          -> True +    DashPunctuation      -> True +    OtherPunctuation     -> c `notElem` ("'\"" :: String) +    ConnectorPunctuation -> c /= '_' +    _                    -> False +  where +    -- | The @special@ character class as defined in the Haskell Report. +    isPuncChar :: Char -> Bool +    isPuncChar = (`elem` (",;()[]{}`" :: String)) +#endif +  -- | Identifier string surrounded with opening and closing quotes/backticks.  type Identifier = (Char, String, Char) @@ -79,47 +115,72 @@ overIdentifier f d = g d      g (DocProperty x) = DocProperty x      g (DocExamples x) = DocExamples x      g (DocHeader (Header l x)) = DocHeader . Header l $ g x +    g (DocTable (Table h b)) = DocTable (Table (map (fmap g) h) (map (fmap g) b)) + -parse :: Parser a -> BS.ByteString -> (ParserState, a) -parse p = either err id . parseOnly (p <* endOfInput) +choice' :: [Parser a] -> Parser a +choice' [] = empty +choice' [p] = p +choice' (p : ps) = try p <|> choice' ps + +parse :: Parser a -> Text -> (ParserState, a) +parse p = either err id . parseOnly (p <* Parsec.eof)    where      err = error . ("Haddock.Parser.parse: " ++)  -- | Main entry point to the parser. Appends the newline character  -- to the input string. -parseParas :: String -- ^ String to parse +parseParas :: Maybe Package +           -> String -- ^ String to parse             -> MetaDoc mod Identifier -parseParas input = case parseParasState input of -  (state, a) -> MetaDoc { _meta = Meta { _version = parserStateSince state } +parseParas pkg input = case parseParasState input of +  (state, a) -> MetaDoc { _meta = Meta { _version = parserStateSince state +                                       , _package = pkg +                                       }                          , _doc = a                          }  parseParasState :: String -> (ParserState, DocH mod Identifier) -parseParasState = -    parse (p <* skipSpace) . encodeUtf8 . (++ "\n") . filter (/= '\r') +parseParasState = parse (emptyLines *> p) . T.pack . (++ "\n") . filter (/= '\r')    where      p :: Parser (DocH mod Identifier) -    p = docConcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n") +    p = docConcat <$> many (paragraph <* emptyLines) + +    emptyLines :: Parser () +    emptyLines = void $ many (try (skipHorizontalSpace *> "\n"))  parseParagraphs :: String -> Parser (DocH mod Identifier)  parseParagraphs input = case parseParasState input of -  (state, a) -> setParserState state >> return a +  (state, a) -> Parsec.putState state *> pure a --- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which --- drops leading whitespace and encodes the string to UTF8 first. +-- | Variant of 'parseText' for 'String' instead of 'Text'  parseString :: String -> DocH mod Identifier -parseString = parseStringBS . encodeUtf8 . dropWhile isSpace . filter (/= '\r') +parseString = parseText . T.pack + +-- | Parse a text paragraph. Actually just a wrapper over 'parseParagraph' which +-- drops leading whitespace. +parseText :: Text -> DocH mod Identifier +parseText = parseParagraph . T.dropWhile isSpace . T.filter (/= '\r') -parseStringBS :: BS.ByteString -> DocH mod Identifier -parseStringBS = snd . parse p +parseParagraph :: Text -> DocH mod Identifier +parseParagraph = snd . parse p    where      p :: Parser (DocH mod Identifier) -    p = docConcat <$> many (monospace <|> anchor <|> identifier <|> moduleName -                            <|> picture <|> mathDisplay <|> mathInline -                            <|> markdownImage -                            <|> hyperlink <|> bold -                            <|> emphasis <|> encodedChar <|> string' -                            <|> skipSpecialChar) +    p = docConcat <$> many (choice' [ monospace +                                    , anchor +                                    , identifier +                                    , moduleName +                                    , picture +                                    , mathDisplay +                                    , mathInline +                                    , markdownImage +                                    , hyperlink +                                    , bold +                                    , emphasis +                                    , encodedChar +                                    , string' +                                    , skipSpecialChar +                                    ])  -- | Parses and processes  -- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references> @@ -143,7 +204,7 @@ specialChar = "_/<@\"&'`# "  -- to ensure that we have already given a chance to more meaningful parsers  -- before capturing their characers.  string' :: Parser (DocH mod a) -string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (notInClass specialChar) +string' = DocString . unescape . T.unpack <$> takeWhile1_ (`notElem` specialChar)    where      unescape "" = ""      unescape ('\\':x:xs) = x : unescape xs @@ -153,45 +214,45 @@ string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (notInClass specialC  -- This is done to skip over any special characters belonging to other  -- elements but which were not deemed meaningful at their positions.  skipSpecialChar :: Parser (DocH mod a) -skipSpecialChar = DocString . return <$> satisfy (inClass specialChar) +skipSpecialChar = DocString . return <$> Parsec.oneOf specialChar  -- | Emphasis parser.  --  -- >>> parseString "/Hello world/"  -- DocEmphasis (DocString "Hello world")  emphasis :: Parser (DocH mod Identifier) -emphasis = DocEmphasis . parseStringBS <$> -  mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/") +emphasis = DocEmphasis . parseParagraph <$> +  disallowNewline ("/" *> takeWhile1_ (/= '/') <* "/")  -- | Bold parser.  --  -- >>> parseString "__Hello world__"  -- DocBold (DocString "Hello world")  bold :: Parser (DocH mod Identifier) -bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__") +bold = DocBold . parseParagraph <$> disallowNewline ("__" *> takeUntil "__") -disallowNewline :: Parser BS.ByteString -> Parser BS.ByteString -disallowNewline = mfilter ('\n' `BS.notElem`) +disallowNewline :: Parser Text -> Parser Text +disallowNewline = mfilter (T.all (/= '\n'))  -- | Like `takeWhile`, but unconditionally take escaped characters. -takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString -takeWhile_ p = scan False p_ +takeWhile_ :: (Char -> Bool) -> Parser Text +takeWhile_ p = scan p_ False    where      p_ escaped c        | escaped = Just False        | not $ p c = Nothing        | otherwise = Just (c == '\\') --- | Like `takeWhile1`, but unconditionally take escaped characters. -takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString -takeWhile1_ = mfilter (not . BS.null) . takeWhile_ +-- | Like 'takeWhile1', but unconditionally take escaped characters. +takeWhile1_ :: (Char -> Bool) -> Parser Text +takeWhile1_ = mfilter (not . T.null) . takeWhile_  -- | Text anchors to allow for jumping around the generated documentation.  --  -- >>> parseString "#Hello world#"  -- DocAName "Hello world"  anchor :: Parser (DocH mod a) -anchor = DocAName . decodeUtf8 <$> +anchor = DocAName . T.unpack <$>           disallowNewline ("#" *> takeWhile1_ (/= '#') <* "#")  -- | Monospaced strings. @@ -199,23 +260,22 @@ anchor = DocAName . decodeUtf8 <$>  -- >>> parseString "@cruel@"  -- DocMonospaced (DocString "cruel")  monospace :: Parser (DocH mod Identifier) -monospace = DocMonospaced . parseStringBS +monospace = DocMonospaced . parseParagraph              <$> ("@" *> takeWhile1_ (/= '@') <* "@") --- | Module names: we try our reasonable best to only allow valid --- Haskell module names, with caveat about not matching on technically --- valid unicode symbols. +-- | Module names. +-- +-- Note that we allow '#' and '\' to support anchors (old style anchors are of +-- the form "SomeModule\#anchor").  moduleName :: Parser (DocH mod a) -moduleName = DocModule <$> (char '"' *> modid <* char '"') +moduleName = DocModule <$> ("\"" *> modid <* "\"")    where -    modid = intercalate "." <$> conid `sepBy1` "." +    modid = intercalate "." <$> conid `Parsec.sepBy1` "."      conid = (:) -      <$> satisfy isAsciiUpper -      -- NOTE: According to Haskell 2010 we should actually only -      -- accept {small | large | digit | ' } here.  But as we can't -      -- match on unicode characters, this is currently not possible. -      -- Note that we allow ‘#’ to suport anchors. -      <*> (decodeUtf8 <$> takeWhile (notInClass " .&[{}(=*)+]!|@/;,^?\"\n")) +      <$> Parsec.satisfy (\c -> isAlpha c && isUpper c) +      <*> many (conChar <|> Parsec.oneOf "\\#") + +    conChar = Parsec.alphaNum <|> Parsec.char '_'  -- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify  -- a title for the picture. @@ -225,7 +285,7 @@ moduleName = DocModule <$> (char '"' *> modid <* char '"')  -- >>> parseString "<<hello.png world>>"  -- DocPic (Picture {pictureUri = "hello.png", pictureTitle = Just "world"})  picture :: Parser (DocH mod a) -picture = DocPic . makeLabeled Picture . decodeUtf8 +picture = DocPic . makeLabeled Picture            <$> disallowNewline ("<<" *> takeUntil ">>")  -- | Inline math parser, surrounded by \\( and \\). @@ -233,7 +293,7 @@ picture = DocPic . makeLabeled Picture . decodeUtf8  -- >>> parseString "\\(\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}\\)"  -- DocMathInline "\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}"  mathInline :: Parser (DocH mod a) -mathInline = DocMathInline . decodeUtf8 +mathInline = DocMathInline . T.unpack                <$> disallowNewline  ("\\(" *> takeUntil "\\)")  -- | Display math parser, surrounded by \\[ and \\]. @@ -241,7 +301,7 @@ mathInline = DocMathInline . decodeUtf8  -- >>> parseString "\\[\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}\\]"  -- DocMathDisplay "\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}"  mathDisplay :: Parser (DocH mod a) -mathDisplay = DocMathDisplay . decodeUtf8 +mathDisplay = DocMathDisplay . T.unpack                 <$> ("\\[" *> takeUntil "\\]")  markdownImage :: Parser (DocH mod a) @@ -251,25 +311,213 @@ markdownImage = fromHyperlink <$> ("!" *> linkParser)  -- | Paragraph parser, called by 'parseParas'.  paragraph :: Parser (DocH mod Identifier) -paragraph = examples <|> do -  indent <- takeIndent -  choice -    [ since -    , unorderedList indent -    , orderedList indent -    , birdtracks -    , codeblock -    , property -    , header -    , textParagraphThatStartsWithMarkdownLink -    , definitionList indent -    , docParagraph <$> textParagraph -    ] +paragraph = choice' [ examples +                    , table +                    , do indent <- takeIndent +                         choice' [ since +                                 , unorderedList indent +                                 , orderedList indent +                                 , birdtracks +                                 , codeblock +                                 , property +                                 , header +                                 , textParagraphThatStartsWithMarkdownLink +                                 , definitionList indent +                                 , docParagraph <$> textParagraph +                                 ] +                    ] + +-- | Provides support for grid tables. +-- +-- Tables are composed by an optional header and body. The header is composed by +-- a single row. The body is composed by a non-empty list of rows. +-- +-- Example table with header: +-- +-- > +----------+----------+ +-- > | /32bit/  |   64bit  | +-- > +==========+==========+ +-- > |  0x0000  | @0x0000@ | +-- > +----------+----------+ +-- +-- Algorithms loosely follows ideas in +-- http://docutils.sourceforge.net/docutils/parsers/rst/tableparser.py +-- +table :: Parser (DocH mod Identifier) +table = do +    -- first we parse the first row, which determines the width of the table +    firstRow <- parseFirstRow +    let len = T.length firstRow + +    -- then we parse all consequtive rows starting and ending with + or |, +    -- of the width `len`. +    restRows <- many (try (parseRestRows len)) + +    -- Now we gathered the table block, the next step is to split the block +    -- into cells. +    DocTable <$> tableStepTwo len (firstRow : restRows) +  where +    parseFirstRow :: Parser Text +    parseFirstRow = do +        skipHorizontalSpace +        -- upper-left corner is + +        c <- Parsec.char '+' +        cs <- some (Parsec.char '-' <|> Parsec.char '+') + +        -- upper right corner is + too +        guard (last cs == '+') + +        -- trailing space +        skipHorizontalSpace +        _ <- Parsec.newline + +        return (T.cons c $ T.pack cs) + +    parseRestRows :: Int -> Parser Text +    parseRestRows l = do +        skipHorizontalSpace +        c <- Parsec.char '|' <|> Parsec.char '+' +        bs <- scan predicate (l - 2) +        c2 <- Parsec.char '|' <|> Parsec.char '+' + +        -- trailing space +        skipHorizontalSpace +        _ <- Parsec.newline + +        return (T.cons c (T.snoc bs c2)) +      where +        predicate n c +            | n <= 0    = Nothing +            | c == '\n' = Nothing +            | otherwise = Just (n - 1) + +-- Second step searchs for row of '+' and '=' characters, records it's index +-- and changes to '=' to '-'. +tableStepTwo +    :: Int              -- ^ width +    -> [Text]           -- ^ rows +    -> Parser (Table (DocH mod Identifier)) +tableStepTwo width = go 0 [] where +    go _ left [] = tableStepThree width (reverse left) Nothing +    go n left (r : rs) +        | T.all (`elem` ['+', '=']) r = +            tableStepThree width (reverse left ++ r' : rs) (Just n) +        | otherwise = +            go (n + 1) (r :  left) rs +      where +        r' = T.map (\c -> if c == '=' then '-' else c) r + +-- Third step recognises cells in the table area, returning a list of TC, cells. +tableStepThree +    :: Int              -- ^ width +    -> [Text]           -- ^ rows +    -> Maybe Int        -- ^ index of header separator +    -> Parser (Table (DocH mod Identifier)) +tableStepThree width rs hdrIndex = do +    cells <- loop (Set.singleton (0, 0)) +    tableStepFour rs hdrIndex cells +  where +    height = length rs + +    loop :: Set.Set (Int, Int) -> Parser [TC] +    loop queue = case Set.minView queue of +        Nothing -> return [] +        Just ((y, x), queue') +            | y + 1 >= height || x + 1 >= width -> loop queue' +            | otherwise -> case scanRight x y of +                Nothing -> loop queue' +                Just (x2, y2) -> do +                    let tc = TC y x y2 x2 +                    fmap (tc :) $ loop $ queue' `Set.union` Set.fromList +                        [(y, x2), (y2, x), (y2, x2)] + +    -- scan right looking for +, then try scan down +    -- +    -- do we need to record + saw on the way left and down? +    scanRight :: Int -> Int -> Maybe (Int, Int) +    scanRight x y = go (x + 1) where +        bs = rs !! y +        go x' | x' >= width          = fail "overflow right " +              | T.index bs x' == '+' = scanDown x y x' <|> go (x' + 1) +              | T.index bs x' == '-' = go (x' + 1) +              | otherwise            = fail $ "not a border (right) " ++ show (x,y,x') + +    -- scan down looking for + +    scanDown :: Int -> Int -> Int -> Maybe (Int, Int) +    scanDown x y x2 = go (y + 1) where +        go y' | y' >= height                 = fail "overflow down" +              | T.index (rs !! y') x2 == '+' = scanLeft x y x2 y' <|> go (y' + 1) +              | T.index (rs !! y') x2 == '|' = go (y' + 1) +              | otherwise                    = fail $ "not a border (down) " ++ show (x,y,x2,y') + +    -- check that at y2 x..x2 characters are '+' or '-' +    scanLeft :: Int -> Int -> Int -> Int -> Maybe (Int, Int) +    scanLeft x y x2 y2 +        | all (\x' -> T.index bs x' `elem` ['+', '-']) [x..x2] = scanUp x y x2 y2 +        | otherwise                                            = fail $ "not a border (left) " ++ show (x,y,x2,y2) +      where +        bs = rs !! y2 + +    -- check that at y2 x..x2 characters are '+' or '-' +    scanUp :: Int -> Int -> Int -> Int -> Maybe (Int, Int) +    scanUp x y x2 y2 +        | all (\y' -> T.index (rs !! y') x `elem` ['+', '|']) [y..y2] = return (x2, y2) +        | otherwise                                                   = fail $ "not a border (up) " ++ show (x,y,x2,y2) + +-- | table cell: top left bottom right +data TC = TC !Int !Int !Int !Int +  deriving Show + +tcXS :: TC -> [Int] +tcXS (TC _ x _ x2) = [x, x2] + +tcYS :: TC -> [Int] +tcYS (TC y _ y2 _) = [y, y2] + +-- | Fourth step. Given the locations of cells, forms 'Table' structure. +tableStepFour :: [Text] -> Maybe Int -> [TC] -> Parser (Table (DocH mod Identifier)) +tableStepFour rs hdrIndex cells =  case hdrIndex of +    Nothing -> return $ Table [] rowsDoc +    Just i  -> case elemIndex i yTabStops of +        Nothing -> return $ Table [] rowsDoc +        Just i' -> return $ uncurry Table $ splitAt i' rowsDoc +  where +    xTabStops = sortNub $ concatMap tcXS cells +    yTabStops = sortNub $ concatMap tcYS cells + +    sortNub :: Ord a => [a] -> [a] +    sortNub = Set.toList . Set.fromList + +    init' :: [a] -> [a] +    init' []       = [] +    init' [_]      = [] +    init' (x : xs) = x : init' xs + +    rowsDoc = (fmap . fmap) parseParagraph rows + +    rows = map makeRow (init' yTabStops) +      where +        makeRow y = TableRow $ mapMaybe (makeCell y) cells +        makeCell y (TC y' x y2 x2) +            | y /= y' = Nothing +            | otherwise = Just $ TableCell xts yts (extract (x + 1) (y + 1) (x2 - 1) (y2 - 1)) +          where +            xts = length $ P.takeWhile (< x2) $ dropWhile (< x) xTabStops +            yts = length $ P.takeWhile (< y2) $ dropWhile (< y) yTabStops + +    -- extract cell contents given boundaries +    extract :: Int -> Int -> Int -> Int -> Text +    extract x y x2 y2 = T.intercalate "\n" +        [ T.take (x2 - x + 1) $ T.drop x $ rs !! y' +        | y' <- [y .. y2] +        ] + +-- | Parse \@since annotations.  since :: Parser (DocH mod a)  since = ("@since " *> version <* skipHorizontalSpace <* endOfLine) >>= setSince >> return DocEmpty    where -    version = decimal `sepBy1'` "." +    version = decimal `Parsec.sepBy1` "."  -- | Headers inside the comment denoted with @=@ signs, up to 6 levels  -- deep. @@ -280,38 +528,39 @@ since = ("@since " *> version <* skipHorizontalSpace <* endOfLine) >>= setSince  -- Right (DocHeader (Header {headerLevel = 2, headerTitle = DocString "World"}))  header :: Parser (DocH mod Identifier)  header = do -  let psers = map (string . encodeUtf8 . concat . flip replicate "=") [6, 5 .. 1] -      pser = foldl1 (<|>) psers -  delim <- decodeUtf8 <$> pser -  line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString -  rest <- paragraph <|> return DocEmpty +  let psers = map (string . flip T.replicate "=") [6, 5 .. 1] +      pser = choice' psers +  delim <- T.unpack <$> pser +  line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseText +  rest <- try paragraph <|> return DocEmpty    return $ DocHeader (Header (length delim) line) `docAppend` rest  textParagraph :: Parser (DocH mod Identifier) -textParagraph = parseString . intercalate "\n" <$> many1 nonEmptyLine +textParagraph = parseText . T.intercalate "\n" <$> some nonEmptyLine  textParagraphThatStartsWithMarkdownLink :: Parser (DocH mod Identifier)  textParagraphThatStartsWithMarkdownLink = docParagraph <$> (docAppend <$> markdownLink <*> optionalTextParagraph)    where      optionalTextParagraph :: Parser (DocH mod Identifier) -    optionalTextParagraph = (docAppend <$> whitespace <*> textParagraph) <|> pure DocEmpty +    optionalTextParagraph = choice' [ docAppend <$> whitespace <*> textParagraph +                                    , pure DocEmpty ]      whitespace :: Parser (DocH mod a)      whitespace = DocString <$> (f <$> takeHorizontalSpace <*> optional "\n")        where -        f :: BS.ByteString -> Maybe BS.ByteString -> String +        f :: Text -> Maybe Text -> String          f xs (fromMaybe "" -> x) -          | BS.null (xs <> x) = "" +          | T.null (xs <> x) = ""            | otherwise = " "  -- | Parses unordered (bullet) lists. -unorderedList :: BS.ByteString -> Parser (DocH mod Identifier) +unorderedList :: Text -> Parser (DocH mod Identifier)  unorderedList indent = DocUnorderedList <$> p    where      p = ("*" <|> "-") *> innerList indent p  -- | Parses ordered lists (numbered or dashed). -orderedList :: BS.ByteString -> Parser (DocH mod Identifier) +orderedList :: Text -> Parser (DocH mod Identifier)  orderedList indent = DocOrderedList <$> p    where      p = (paren <|> dot) *> innerList indent p @@ -323,104 +572,110 @@ orderedList indent = DocOrderedList <$> p  -- same paragraph. Usually used as  --  -- > someListFunction = listBeginning *> innerList someListFunction -innerList :: BS.ByteString -> Parser [DocH mod Identifier] +innerList :: Text -> Parser [DocH mod Identifier]            -> Parser [DocH mod Identifier]  innerList indent item = do    c <- takeLine    (cs, items) <- more indent item -  let contents = docParagraph . parseString . dropNLs . unlines $ c : cs +  let contents = docParagraph . parseText . dropNLs . T.unlines $ c : cs    return $ case items of      Left p -> [contents `docAppend` p]      Right i -> contents : i  -- | Parses definition lists. -definitionList :: BS.ByteString -> Parser (DocH mod Identifier) +definitionList :: Text -> Parser (DocH mod Identifier)  definitionList indent = DocDefList <$> p    where      p = do -      label <- "[" *> (parseStringBS <$> takeWhile1 (notInClass "]\n")) <* ("]" <* optional ":") +      label <- "[" *> (parseParagraph <$> takeWhile1_ (`notElem` ("]\n" :: String))) <* ("]" <* optional ":")        c <- takeLine        (cs, items) <- more indent p -      let contents = parseString . dropNLs . unlines $ c : cs +      let contents = parseText . dropNLs . T.unlines $ c : cs        return $ case items of          Left x -> [(label, contents `docAppend` x)]          Right i -> (label, contents) : i  -- | Drops all trailing newlines. -dropNLs :: String -> String -dropNLs = reverse . dropWhile (== '\n') . reverse +dropNLs :: Text -> Text  +dropNLs = T.dropWhileEnd (== '\n')  -- | Main worker for 'innerList' and 'definitionList'.  -- We need the 'Either' here to be able to tell in the respective functions  -- whether we're dealing with the next list or a nested paragraph. -more :: Monoid a => BS.ByteString -> Parser a -     -> Parser ([String], Either (DocH mod Identifier) a) -more indent item = innerParagraphs indent -               <|> moreListItems indent item -               <|> moreContent indent item -               <|> pure ([], Right mempty) +more :: Monoid a => Text -> Parser a +     -> Parser ([Text], Either (DocH mod Identifier) a) +more indent item = choice' [ innerParagraphs indent +                           , moreListItems indent item +                           , moreContent indent item +                           , pure ([], Right mempty) +                           ]  -- | Used by 'innerList' and 'definitionList' to parse any nested paragraphs. -innerParagraphs :: BS.ByteString -                -> Parser ([String], Either (DocH mod Identifier) a) +innerParagraphs :: Text +                -> Parser ([Text], Either (DocH mod Identifier) a)  innerParagraphs indent = (,) [] . Left <$> ("\n" *> indentedParagraphs indent)  -- | Attempts to fetch the next list if possibly. Used by 'innerList' and  -- 'definitionList' to recursively grab lists that aren't separated by a whole  -- paragraph. -moreListItems :: BS.ByteString -> Parser a -              -> Parser ([String], Either (DocH mod Identifier) a) +moreListItems :: Text -> Parser a +              -> Parser ([Text], Either (DocH mod Identifier) a)  moreListItems indent item = (,) [] . Right <$> indentedItem    where -    indentedItem = string indent *> skipSpace *> item +    indentedItem = string indent *> Parsec.spaces *> item  -- | Helper for 'innerList' and 'definitionList' which simply takes  -- a line of text and attempts to parse more list content with 'more'. -moreContent :: Monoid a => BS.ByteString -> Parser a -            -> Parser ([String], Either (DocH mod Identifier) a) +moreContent :: Monoid a => Text -> Parser a +            -> Parser ([Text], Either (DocH mod Identifier) a)  moreContent indent item = first . (:) <$> nonEmptyLine <*> more indent item  -- | Parses an indented paragraph.  -- The indentation is 4 spaces. -indentedParagraphs :: BS.ByteString -> Parser (DocH mod Identifier) +indentedParagraphs :: Text -> Parser (DocH mod Identifier)  indentedParagraphs indent = -    (concat <$> dropFrontOfPara indent') >>= parseParagraphs +    (T.unpack . T.concat <$> dropFrontOfPara indent') >>= parseParagraphs    where -    indent' = string $ BS.append indent "    " +    indent' = string $ indent <> "    "  -- | Grab as many fully indented paragraphs as we can. -dropFrontOfPara :: Parser BS.ByteString -> Parser [String] +dropFrontOfPara :: Parser Text -> Parser [Text]  dropFrontOfPara sp = do -  currentParagraph <- some (sp *> takeNonEmptyLine) +  currentParagraph <- some (try (sp *> takeNonEmptyLine))    followingParagraphs <- -    skipHorizontalSpace *> nextPar -- we have more paragraphs to take -    <|> skipHorizontalSpace *> nlList -- end of the ride, remember the newline -    <|> endOfInput *> return [] -- nothing more to take at all +    choice' [ skipHorizontalSpace *> nextPar -- we have more paragraphs to take +            , skipHorizontalSpace *> nlList -- end of the ride, remember the newline +            , Parsec.eof *> return []       -- nothing more to take at all +            ]    return (currentParagraph ++ followingParagraphs)    where      nextPar = (++) <$> nlList <*> dropFrontOfPara sp      nlList = "\n" *> return ["\n"] -nonSpace :: BS.ByteString -> Parser BS.ByteString +nonSpace :: Text -> Parser Text  nonSpace xs -  | not $ any (not . isSpace) $ decodeUtf8 xs = fail "empty line" +  | T.all isSpace xs = fail "empty line"    | otherwise = return xs  -- | Takes a non-empty, not fully whitespace line.  --  --  Doesn't discard the trailing newline. -takeNonEmptyLine :: Parser String +takeNonEmptyLine :: Parser Text  takeNonEmptyLine = do -    (++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n" +    l <- takeWhile1 (Parsec.noneOf "\n") >>= nonSpace +    _ <- "\n" +    pure (l <> "\n")  -- | Takes indentation of first non-empty line.  --  -- More precisely: skips all whitespace-only lines and returns indentation  -- (horizontal space, might be empty) of that non-empty line. -takeIndent :: Parser BS.ByteString +takeIndent :: Parser Text   takeIndent = do    indent <- takeHorizontalSpace -  "\n" *> takeIndent <|> return indent +  choice' [ "\n" *> takeIndent +          , return indent +          ]  -- | Blocks of text of the form:  -- @@ -429,97 +684,98 @@ takeIndent = do  -- >> baz  --  birdtracks :: Parser (DocH mod a) -birdtracks = DocCodeBlock . DocString . intercalate "\n" . stripSpace <$> many1 line +birdtracks = DocCodeBlock . DocString . T.unpack . T.intercalate "\n" . stripSpace <$> some line    where -    line = skipHorizontalSpace *> ">" *> takeLine +    line = try (skipHorizontalSpace *> ">" *> takeLine) -stripSpace :: [String] -> [String] +stripSpace :: [Text] -> [Text]  stripSpace = fromMaybe <*> mapM strip'    where -    strip' (' ':xs') = Just xs' -    strip' "" = Just "" -    strip' _  = Nothing +    strip' t = case T.uncons t of +                 Nothing -> Just "" +                 Just (' ',t') -> Just t' +                 _ -> Nothing  -- | Parses examples. Examples are a paragraph level entitity (separated by an empty line).  -- Consecutive examples are accepted.  examples :: Parser (DocH mod a) -examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go) +examples = DocExamples <$> (many (try (skipHorizontalSpace *> "\n")) *> go)    where      go :: Parser [Example]      go = do -      prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>" +      prefix <- takeHorizontalSpace <* ">>>"        expr <- takeLine        (rs, es) <- resultAndMoreExamples        return (makeExample prefix expr rs : es)        where -        resultAndMoreExamples :: Parser ([String], [Example]) -        resultAndMoreExamples = moreExamples <|> result <|> pure ([], []) +        resultAndMoreExamples :: Parser ([Text], [Example]) +        resultAndMoreExamples = choice' [ moreExamples, result, pure ([], []) ]            where -            moreExamples :: Parser ([String], [Example]) +            moreExamples :: Parser ([Text], [Example])              moreExamples = (,) [] <$> go -            result :: Parser ([String], [Example]) +            result :: Parser ([Text], [Example])              result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples -    makeExample :: String -> String -> [String] -> Example +    makeExample :: Text -> Text -> [Text] -> Example      makeExample prefix expression res = -      Example (strip expression) result +      Example (T.unpack (T.strip expression)) result        where -        result = map (substituteBlankLine . tryStripPrefix) res +        result = map (T.unpack . substituteBlankLine . tryStripPrefix) res -        tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs) +        tryStripPrefix xs = fromMaybe xs (T.stripPrefix prefix xs)          substituteBlankLine "<BLANKLINE>" = ""          substituteBlankLine xs = xs -nonEmptyLine :: Parser String -nonEmptyLine = mfilter (any (not . isSpace)) takeLine +nonEmptyLine :: Parser Text  +nonEmptyLine = try (mfilter (T.any (not . isSpace)) takeLine) -takeLine :: Parser String -takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine +takeLine :: Parser Text +takeLine = try (takeWhile (Parsec.noneOf "\n") <* endOfLine)  endOfLine :: Parser () -endOfLine = void "\n" <|> endOfInput +endOfLine = void "\n" <|> Parsec.eof   -- | Property parser.  --  -- >>> snd <$> parseOnly property "prop> hello world"  -- Right (DocProperty "hello world")  property :: Parser (DocH mod a) -property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n')) +property = DocProperty . T.unpack . T.strip <$> ("prop>" *> takeWhile1 (Parsec.noneOf "\n"))  -- |  -- Paragraph level codeblock. Anything between the two delimiting \@ is parsed  -- for markup.  codeblock :: Parser (DocH mod Identifier)  codeblock = -  DocCodeBlock . parseStringBS . dropSpaces +  DocCodeBlock . parseParagraph . dropSpaces    <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")    where      dropSpaces xs = -      let rs = decodeUtf8 xs -      in case splitByNl rs of +      case splitByNl xs of          [] -> xs -        ys -> case last ys of -          ' ':_ -> case mapM dropSpace ys of -            Nothing -> xs -            Just zs -> encodeUtf8 $ intercalate "\n" zs +        ys -> case T.uncons (last ys) of +          Just (' ',_) -> case mapM dropSpace ys of +                            Nothing -> xs +                            Just zs -> T.intercalate "\n" zs            _ -> xs      -- This is necessary because ‘lines’ swallows up a trailing newline      -- and we lose information about whether the last line belongs to @ or to      -- text which we need to decide whether we actually want to be dropping      -- anything at all. -    splitByNl = unfoldr (\x -> case x of -                                 '\n':s -> Just (span (/= '\n') s) -                                 _      -> Nothing) -                . ('\n' :) +    splitByNl = unfoldr (\x -> case T.uncons x of +                                 Just ('\n',x') -> Just (T.span (/= '\n') x') +                                 _ -> Nothing) +                . ("\n" <>) -    dropSpace "" = Just "" -    dropSpace (' ':xs) = Just xs -    dropSpace _ = Nothing +    dropSpace t = case T.uncons t of +                    Nothing -> Just "" +                    Just (' ',t') -> Just t' +                    _ -> Nothing -    block' = scan False p +    block' = scan p False        where          p isNewline c            | isNewline && c == '@' = Nothing @@ -527,10 +783,12 @@ codeblock =            | otherwise = Just $ c == '\n'  hyperlink :: Parser (DocH mod a) -hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8 -              <$> disallowNewline ("<" *> takeUntil ">") -            <|> autoUrl -            <|> markdownLink +hyperlink = choice' [ angleBracketLink, markdownLink, autoUrl ] + +angleBracketLink :: Parser (DocH mod a) +angleBracketLink = +    DocHyperlink . makeLabeled Hyperlink  +    <$> disallowNewline ("<" *> takeUntil ">")  markdownLink :: Parser (DocH mod a)  markdownLink = DocHyperlink <$> linkParser @@ -539,7 +797,7 @@ linkParser :: Parser Hyperlink  linkParser = flip Hyperlink <$> label <*> (whitespace *> url)    where      label :: Parser (Maybe String) -    label = Just . strip . decode <$> ("[" *> takeUntil "]") +    label = Just . decode . T.strip <$> ("[" *> takeUntil "]")      whitespace :: Parser ()      whitespace = skipHorizontalSpace <* optional ("\n" *> skipHorizontalSpace) @@ -550,19 +808,25 @@ linkParser = flip Hyperlink <$> label <*> (whitespace *> url)      rejectWhitespace :: MonadPlus m => m String -> m String      rejectWhitespace = mfilter (all (not . isSpace)) -    decode :: BS.ByteString -> String -    decode = removeEscapes . decodeUtf8 +    decode :: Text -> String +    decode = T.unpack . removeEscapes  -- | Looks for URL-like things to automatically hyperlink even if they  -- weren't marked as links.  autoUrl :: Parser (DocH mod a)  autoUrl = mkLink <$> url    where -    url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace) -    mkLink :: BS.ByteString -> DocH mod a -    mkLink s = case unsnoc s of -      Just (xs, x) | inClass ",.!?" x -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) `docAppend` DocString [x] -      _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing) +    url = mappend <$> choice' [ "http://", "https://", "ftp://"] <*> takeWhile1 (Parsec.satisfy (not . isSpace)) +     +    mkLink :: Text -> DocH mod a +    mkLink s = case T.unsnoc s of +      Just (xs,x) | x `elem` (",.!?" :: String) -> DocHyperlink (mkHyperlink xs) `docAppend` DocString [x] +      _ -> DocHyperlink (mkHyperlink s) + +    mkHyperlink :: Text -> Hyperlink +    mkHyperlink lnk = Hyperlink (T.unpack lnk) Nothing + +  -- | Parses strings between identifier delimiters. Consumes all input that it  -- deems to be valid in an identifier. Note that it simply blindly consumes @@ -570,26 +834,16 @@ autoUrl = mkLink <$> url  parseValid :: Parser String  parseValid = p some    where -    idChar = -      satisfy (\c -> isAlpha_ascii c -                     || isDigit c -                     -- N.B. '-' is placed first otherwise attoparsec thinks -                     -- it belongs to a character class -                     || inClass "-_.!#$%&*+/<=>?@\\|~:^" c) +    idChar = Parsec.satisfy (\c -> isAlphaNum c || isSymbolChar c || c == '_')      p p' = do -      vs' <- p' $ utf8String "⋆" <|> return <$> idChar -      let vs = concat vs' +      vs <- p' idChar        c <- peekChar'        case c of          '`' -> return vs -        '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> p many') <|> return vs +        '\'' -> choice' [ (\x -> vs ++ "'" ++ x) <$> ("'" *> p many), return vs ]          _ -> fail "outofvalid" --- | Parses UTF8 strings from ByteString streams. -utf8String :: String -> Parser String -utf8String x = decodeUtf8 <$> string (encodeUtf8 x) -  -- | Parses identifiers with help of 'parseValid'. Asks GHC for  -- 'String' from the string it deems valid.  identifier :: Parser (DocH mod Identifier) @@ -599,4 +853,4 @@ identifier = do    e <- idDelim    return $ DocIdentifier (o, vid, e)    where -    idDelim = satisfy (\c -> c == '\'' || c == '`') +    idDelim = Parsec.satisfy (\c -> c == '\'' || c == '`') diff --git a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs index 3f7d60f8..585c76bb 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs @@ -1,149 +1,91 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving, TypeFamilies #-} -module Documentation.Haddock.Parser.Monad ( -  module Documentation.Haddock.Parser.Monad -, Attoparsec.isDigit -, Attoparsec.isDigit_w8 -, Attoparsec.isAlpha_iso8859_15 -, Attoparsec.isAlpha_ascii -, Attoparsec.isSpace -, Attoparsec.isSpace_w8 -, Attoparsec.inClass -, Attoparsec.notInClass -, Attoparsec.isEndOfLine -, Attoparsec.isHorizontalSpace -, Attoparsec.choice -, Attoparsec.count -, Attoparsec.option -, Attoparsec.many' -, Attoparsec.many1 -, Attoparsec.many1' -, Attoparsec.manyTill -, Attoparsec.manyTill' -, Attoparsec.sepBy -, Attoparsec.sepBy' -, Attoparsec.sepBy1 -, Attoparsec.sepBy1' -, Attoparsec.skipMany -, Attoparsec.skipMany1 -, Attoparsec.eitherP -) where - -import           Control.Applicative -import           Control.Monad -import           Data.String -import           Data.ByteString (ByteString) -import qualified Data.ByteString.Lazy as LB -import qualified Data.Attoparsec.ByteString.Char8 as Attoparsec -import           Control.Monad.Trans.State -import qualified Control.Monad.Trans.Class as Trans -import           Data.Word -import           Data.Bits -import           Data.Tuple - -import           Documentation.Haddock.Types (Version) +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE TypeSynonymInstances #-} -newtype ParserState = ParserState { -  parserStateSince :: Maybe Version -} deriving (Eq, Show) +module Documentation.Haddock.Parser.Monad where -initialParserState :: ParserState -initialParserState = ParserState Nothing +import qualified Text.Parsec.Char as Parsec +import qualified Text.Parsec as Parsec -newtype Parser a = Parser (StateT ParserState Attoparsec.Parser a) -  deriving (Functor, Applicative, Alternative, Monad, MonadPlus) +import qualified Data.Text as T +import           Data.Text                   ( Text ) -instance (a ~ ByteString) => IsString (Parser a) where -  fromString = lift . fromString +import           Data.String                 ( IsString(..) ) +import           Data.Bits                   ( Bits(..) ) +import           Data.Char                   ( ord ) +import           Data.List                   ( foldl' ) -parseOnly :: Parser a -> ByteString -> Either String (ParserState, a) -parseOnly (Parser p) = fmap swap . Attoparsec.parseOnly (runStateT p initialParserState) +import           Documentation.Haddock.Types ( Version ) -lift :: Attoparsec.Parser a -> Parser a -lift = Parser . Trans.lift +newtype ParserState = ParserState { +  parserStateSince :: Maybe Version +} deriving (Eq, Show) -setParserState :: ParserState -> Parser () -setParserState = Parser . put +initialParserState :: ParserState +initialParserState = ParserState Nothing  setSince :: Version -> Parser () -setSince since = Parser $ modify (\st -> st {parserStateSince = Just since}) - -char :: Char -> Parser Char -char = lift . Attoparsec.char - -char8 :: Char -> Parser Word8 -char8 = lift . Attoparsec.char8 +setSince since = Parsec.modifyState (\st -> st {parserStateSince = Just since}) -anyChar :: Parser Char -anyChar = lift Attoparsec.anyChar +type Parser = Parsec.Parsec Text ParserState -notChar :: Char -> Parser Char -notChar = lift . Attoparsec.notChar +instance (a ~ Text) => IsString (Parser a) where +  fromString = fmap T.pack . Parsec.string -satisfy :: (Char -> Bool) -> Parser Char -satisfy = lift . Attoparsec.satisfy +parseOnly :: Parser a -> Text -> Either String (ParserState, a) +parseOnly p t = case Parsec.runParser p' initialParserState "<haddock>" t of +                  Left e -> Left (show e) +                  Right (x,s) -> Right (s,x) +  where p' = (,) <$> p <*> Parsec.getState +-- | Always succeeds, but returns 'Nothing' if at the end of input. Does not +-- consume input.  peekChar :: Parser (Maybe Char) -peekChar = lift Attoparsec.peekChar +peekChar = Parsec.optionMaybe . Parsec.try . Parsec.lookAhead $ Parsec.anyChar +-- | Fails if at the end of input. Does not consume input.  peekChar' :: Parser Char -peekChar' = lift Attoparsec.peekChar' - -digit :: Parser Char -digit = lift Attoparsec.digit - -letter_iso8859_15 :: Parser Char -letter_iso8859_15 = lift Attoparsec.letter_iso8859_15 - -letter_ascii :: Parser Char -letter_ascii = lift Attoparsec.letter_ascii - -space :: Parser Char -space = lift Attoparsec.space - -string :: ByteString -> Parser ByteString -string = lift . Attoparsec.string - -stringCI :: ByteString -> Parser ByteString -stringCI = lift . Attoparsec.stringCI - -skipSpace :: Parser () -skipSpace = lift Attoparsec.skipSpace - -skipWhile :: (Char -> Bool) -> Parser () -skipWhile = lift . Attoparsec.skipWhile - -take :: Int -> Parser ByteString -take = lift . Attoparsec.take - -scan :: s -> (s -> Char -> Maybe s) -> Parser ByteString -scan s = lift . Attoparsec.scan s - -takeWhile :: (Char -> Bool) -> Parser ByteString -takeWhile = lift . Attoparsec.takeWhile - -takeWhile1 :: (Char -> Bool) -> Parser ByteString -takeWhile1 = lift . Attoparsec.takeWhile1 - -takeTill :: (Char -> Bool) -> Parser ByteString -takeTill = lift . Attoparsec.takeTill - -takeByteString :: Parser ByteString -takeByteString = lift Attoparsec.takeByteString - -takeLazyByteString :: Parser LB.ByteString -takeLazyByteString = lift Attoparsec.takeLazyByteString - -endOfLine :: Parser () -endOfLine = lift Attoparsec.endOfLine - +peekChar' = Parsec.lookAhead Parsec.anyChar  + +-- | Parses the given string. Returns the parsed string. +string :: Text -> Parser Text +string t = Parsec.string (T.unpack t) *> pure t + +-- | Scan the input text, accumulating characters as long as the scanning +-- function returns true. +scan :: (s -> Char -> Maybe s) -- ^ scan function +     -> s                      -- ^ initial state +     -> Parser Text  +scan f = fmap T.pack . go +  where go s1 = do { cOpt <- peekChar +                   ; case cOpt >>= f s1 of +                       Nothing -> pure "" +                       Just s2 -> (:) <$> Parsec.anyChar <*> go s2 +                   } + +-- | Apply a parser for a character zero or more times and collect the result in +-- a string. +takeWhile :: Parser Char -> Parser Text +takeWhile = fmap T.pack . Parsec.many + +-- | Apply a parser for a character one or more times and collect the result in +-- a string. +takeWhile1 :: Parser Char -> Parser Text +takeWhile1 =  fmap T.pack . Parsec.many1 + +-- | Parse a decimal number.  decimal :: Integral a => Parser a -decimal = lift Attoparsec.decimal +decimal = foldl' step 0 `fmap` Parsec.many1 Parsec.digit +  where step a c = a * 10 + fromIntegral (ord c - 48) +-- | Parse a hexadecimal number.  hexadecimal :: (Integral a, Bits a) => Parser a -hexadecimal = lift Attoparsec.hexadecimal - -endOfInput :: Parser () -endOfInput = lift Attoparsec.endOfInput - -atEnd :: Parser Bool -atEnd = lift Attoparsec.atEnd +hexadecimal = foldl' step 0 `fmap` Parsec.many1 Parsec.hexDigit  +  where +  step a c | w >= 48 && w <= 57  = (a `shiftL` 4) .|. fromIntegral (w - 48) +           | w >= 97             = (a `shiftL` 4) .|. fromIntegral (w - 87) +           | otherwise           = (a `shiftL` 4) .|. fromIntegral (w - 55) +    where w = ord c diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs index ab5e5e9e..ffa91b09 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Util.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-}  -- |  -- Module      :  Documentation.Haddock.Parser.Util  -- Copyright   :  (c) Mateusz Kowalczyk 2013-2014, @@ -11,62 +11,59 @@  --  -- Various utility functions used by the parser.  module Documentation.Haddock.Parser.Util ( -  unsnoc -, strip -, takeUntil -, removeEscapes -, makeLabeled -, takeHorizontalSpace -, skipHorizontalSpace +  takeUntil, +  removeEscapes, +  makeLabeled, +  takeHorizontalSpace, +  skipHorizontalSpace,  ) where +import qualified Text.Parsec as Parsec + +import qualified Data.Text as T +import           Data.Text (Text) +  import           Control.Applicative  import           Control.Monad (mfilter) -import           Documentation.Haddock.Parser.Monad hiding (isHorizontalSpace) -import           Data.ByteString.Char8 (ByteString) -import qualified Data.ByteString.Char8 as BS +import           Documentation.Haddock.Parser.Monad  import           Prelude hiding (takeWhile) -#if MIN_VERSION_bytestring(0,10,2) -import           Data.ByteString.Char8 (unsnoc) -#else -unsnoc :: ByteString -> Maybe (ByteString, Char) -unsnoc bs -  | BS.null bs = Nothing -  | otherwise = Just (BS.init bs, BS.last bs) -#endif +import           Data.Char (isSpace) --- | Remove all leading and trailing whitespace -strip :: String -> String -strip = (\f -> f . f) $ dropWhile isSpace . reverse - -isHorizontalSpace :: Char -> Bool -isHorizontalSpace = inClass " \t\f\v\r" +-- | Characters that count as horizontal space +horizontalSpace :: [Char] +horizontalSpace = " \t\f\v\r" +-- | Skip and ignore leading horizontal space  skipHorizontalSpace :: Parser () -skipHorizontalSpace = skipWhile isHorizontalSpace +skipHorizontalSpace = Parsec.skipMany (Parsec.oneOf horizontalSpace) -takeHorizontalSpace :: Parser BS.ByteString -takeHorizontalSpace = takeWhile isHorizontalSpace +-- | Take leading horizontal space +takeHorizontalSpace :: Parser Text  +takeHorizontalSpace = takeWhile (Parsec.oneOf horizontalSpace) -makeLabeled :: (String -> Maybe String -> a) -> String -> a -makeLabeled f input = case break isSpace $ removeEscapes $ strip input of -  (uri, "")    -> f uri Nothing -  (uri, label) -> f uri (Just $ dropWhile isSpace label) +makeLabeled :: (String -> Maybe String -> a) -> Text -> a +makeLabeled f input = case T.break isSpace $ removeEscapes $ T.strip input of +  (uri, "")    -> f (T.unpack uri) Nothing +  (uri, label) -> f (T.unpack uri) (Just . T.unpack $ T.stripStart label)  -- | Remove escapes from given string.  --  -- Only do this if you do not process (read: parse) the input any further. -removeEscapes :: String -> String -removeEscapes "" = "" -removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs -removeEscapes ('\\':xs) = removeEscapes xs -removeEscapes (x:xs) = x : removeEscapes xs +removeEscapes :: Text -> Text +removeEscapes = T.unfoldr go +  where +  go :: Text -> Maybe (Char, Text) +  go xs = case T.uncons xs of +            Just ('\\',ys) -> T.uncons ys +            unconsed -> unconsed -takeUntil :: ByteString -> Parser ByteString -takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome +-- | Consume characters from the input up to and including the given pattern. +-- Return everything consumed except for the end pattern itself. +takeUntil :: Text -> Parser Text  +takeUntil end_ = T.dropEnd (T.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome    where -    end = BS.unpack end_ +    end = T.unpack end_       p :: (Bool, String) -> Char -> Maybe (Bool, String)      p acc c = case acc of @@ -75,9 +72,8 @@ takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome        (_, x:xs) | x == c -> Just (False, xs)        _ -> Just (c == '\\', end) -    dropEnd = BS.reverse . BS.drop (length end) . BS.reverse -    requireEnd = mfilter (BS.isSuffixOf end_) +    requireEnd = mfilter (T.isSuffixOf end_)      gotSome xs -      | BS.null xs = fail "didn't get any content" +      | T.null xs = fail "didn't get any content"        | otherwise = return xs diff --git a/haddock-library/src/Documentation/Haddock/Types.hs b/haddock-library/src/Documentation/Haddock/Types.hs index 1e76c631..b5dea3d4 100644 --- a/haddock-library/src/Documentation/Haddock/Types.hs +++ b/haddock-library/src/Documentation/Haddock/Types.hs @@ -15,6 +15,7 @@  module Documentation.Haddock.Types where  #if !MIN_VERSION_base(4,8,0) +import Control.Applicative  import Data.Foldable  import Data.Traversable  #endif @@ -33,7 +34,9 @@ import Data.Bitraversable  -- meta-data to comments. We make a structure for this ahead of time  -- so we don't have to gut half the core each time we want to add such  -- info. -newtype Meta = Meta { _version :: Maybe Version } deriving (Eq, Show) +data Meta = Meta { _version :: Maybe Version +                 , _package :: Maybe Package +                 } deriving (Eq, Show)  data MetaDoc mod id =    MetaDoc { _meta :: Meta @@ -60,6 +63,7 @@ overDocF :: Functor f => (DocH a b -> f (DocH c d)) -> MetaDoc a b -> f (MetaDoc  overDocF f d = (\x -> d { _doc = x }) <$> f (_doc d)  type Version = [Int] +type Package = String  data Hyperlink = Hyperlink    { hyperlinkUrl   :: String @@ -81,6 +85,21 @@ data Example = Example    , exampleResult     :: [String]    } deriving (Eq, Show) +data TableCell id = TableCell +  { tableCellColspan  :: Int +  , tableCellRowspan  :: Int +  , tableCellContents :: id +  } deriving (Eq, Show, Functor, Foldable, Traversable) + +newtype TableRow id = TableRow +  { tableRowCells :: [TableCell id] +  } deriving (Eq, Show, Functor, Foldable, Traversable) + +data Table id = Table +  { tableHeaderRows :: [TableRow id] +  , tableBodyRows   :: [TableRow id] +  } deriving (Eq, Show, Functor, Foldable, Traversable) +  data DocH mod id    = DocEmpty    | DocAppend (DocH mod id) (DocH mod id) @@ -88,8 +107,10 @@ data DocH mod id    | DocParagraph (DocH mod id)    | DocIdentifier id    | DocIdentifierUnchecked mod +  -- ^ A qualified identifier that couldn't be resolved.    | DocModule String    | DocWarning (DocH mod id) +  -- ^ This constructor has no counterpart in Haddock markup.    | DocEmphasis (DocH mod id)    | DocMonospaced (DocH mod id)    | DocBold (DocH mod id) @@ -102,9 +123,11 @@ data DocH mod id    | DocMathInline String    | DocMathDisplay String    | DocAName String +  -- ^ A (HTML) anchor.    | DocProperty String    | DocExamples [Example]    | DocHeader (Header (DocH mod id)) +  | DocTable (Table (DocH mod id))    deriving (Eq, Show, Functor, Foldable, Traversable)  #if MIN_VERSION_base(4,8,0) @@ -132,6 +155,7 @@ instance Bifunctor DocH where    bimap _ _ (DocProperty s) = DocProperty s    bimap _ _ (DocExamples examples) = DocExamples examples    bimap f g (DocHeader (Header level title)) = DocHeader (Header level (bimap f g title)) +  bimap f g (DocTable (Table header body)) = DocTable (Table (map (fmap (bimap f g)) header) (map (fmap (bimap f g)) body))  #endif  #if MIN_VERSION_base(4,10,0) @@ -149,6 +173,7 @@ instance Bifoldable DocH where    bifoldr f g z (DocDefList docs) = foldr (\(l, r) acc -> bifoldr f g (bifoldr f g acc l) r) z docs    bifoldr f g z (DocCodeBlock doc) = bifoldr f g z doc    bifoldr f g z (DocHeader (Header _ title)) = bifoldr f g z title +  bifoldr f g z (DocTable (Table header body)) = foldr (\r acc -> foldr (flip (bifoldr f g)) acc r) (foldr (\r acc -> foldr (flip (bifoldr f g)) acc r) z body) header    bifoldr _ _ z _ = z  instance Bitraversable DocH where @@ -175,6 +200,7 @@ instance Bitraversable DocH where    bitraverse _ _ (DocProperty s) = pure (DocProperty s)    bitraverse _ _ (DocExamples examples) = pure (DocExamples examples)    bitraverse f g (DocHeader (Header level title)) = (DocHeader . Header level) <$> bitraverse f g title +  bitraverse f g (DocTable (Table header body)) = (\h b -> DocTable (Table h b)) <$> traverse (traverse (bitraverse f g)) header <*> traverse (traverse (bitraverse f g)) body  #endif  -- | 'DocMarkupH' is a set of instructions for marking up documentation. @@ -209,4 +235,5 @@ data DocMarkupH mod id a = Markup    , markupProperty             :: String -> a    , markupExample              :: [Example] -> a    , markupHeader               :: Header a -> a +  , markupTable                :: Table a -> a    } | 
