diff options
Diffstat (limited to 'haddock-library/src/Documentation')
7 files changed, 384 insertions, 211 deletions
diff --git a/haddock-library/src/Documentation/Haddock/Markup.hs b/haddock-library/src/Documentation/Haddock/Markup.hs index da8edcd4..365041ee 100644 --- a/haddock-library/src/Documentation/Haddock/Markup.hs +++ b/haddock-library/src/Documentation/Haddock/Markup.hs @@ -2,35 +2,38 @@ module Documentation.Haddock.Markup ( markup , idMarkup + , plainMarkup ) where import Documentation.Haddock.Types +import Data.Maybe ( fromMaybe ) + markup :: DocMarkupH mod id a -> DocH mod id -> a -markup m DocEmpty = markupEmpty m -markup m (DocAppend d1 d2) = markupAppend m (markup m d1) (markup m d2) -markup m (DocString s) = markupString m s -markup m (DocParagraph d) = markupParagraph m (markup m d) -markup m (DocIdentifier x) = markupIdentifier m x -markup m (DocIdentifierUnchecked x) = markupIdentifierUnchecked m x -markup m (DocModule mod0) = markupModule m mod0 -markup m (DocWarning d) = markupWarning m (markup m d) -markup m (DocEmphasis d) = markupEmphasis m (markup m d) -markup m (DocBold d) = markupBold m (markup m d) -markup m (DocMonospaced d) = markupMonospaced m (markup m d) -markup m (DocUnorderedList ds) = markupUnorderedList m (map (markup m) ds) -markup m (DocOrderedList ds) = markupOrderedList m (map (markup m) ds) -markup m (DocDefList ds) = markupDefList m (map (markupPair m) ds) -markup m (DocCodeBlock d) = markupCodeBlock m (markup m d) -markup m (DocHyperlink l) = markupHyperlink m l -markup m (DocAName ref) = markupAName m ref -markup m (DocPic img) = markupPic m img -markup m (DocMathInline mathjax) = markupMathInline m mathjax -markup m (DocMathDisplay mathjax) = markupMathDisplay m mathjax -markup m (DocProperty p) = markupProperty m p -markup m (DocExamples e) = markupExample m e -markup m (DocHeader (Header l t)) = markupHeader m (Header l (markup m t)) -markup m (DocTable (Table h b)) = markupTable m (Table (map (fmap (markup m)) h) (map (fmap (markup m)) b)) +markup m DocEmpty = markupEmpty m +markup m (DocAppend d1 d2) = markupAppend m (markup m d1) (markup m d2) +markup m (DocString s) = markupString m s +markup m (DocParagraph d) = markupParagraph m (markup m d) +markup m (DocIdentifier x) = markupIdentifier m x +markup m (DocIdentifierUnchecked x) = markupIdentifierUnchecked m x +markup m (DocModule mod0) = markupModule m mod0 +markup m (DocWarning d) = markupWarning m (markup m d) +markup m (DocEmphasis d) = markupEmphasis m (markup m d) +markup m (DocBold d) = markupBold m (markup m d) +markup m (DocMonospaced d) = markupMonospaced m (markup m d) +markup m (DocUnorderedList ds) = markupUnorderedList m (map (markup m) ds) +markup m (DocOrderedList ds) = markupOrderedList m (map (markup m) ds) +markup m (DocDefList ds) = markupDefList m (map (markupPair m) ds) +markup m (DocCodeBlock d) = markupCodeBlock m (markup m d) +markup m (DocHyperlink (Hyperlink u l)) = markupHyperlink m (Hyperlink u (fmap (markup m) l)) +markup m (DocAName ref) = markupAName m ref +markup m (DocPic img) = markupPic m img +markup m (DocMathInline mathjax) = markupMathInline m mathjax +markup m (DocMathDisplay mathjax) = markupMathDisplay m mathjax +markup m (DocProperty p) = markupProperty m p +markup m (DocExamples e) = markupExample m e +markup m (DocHeader (Header l t)) = markupHeader m (Header l (markup m t)) +markup m (DocTable (Table h b)) = markupTable m (Table (map (fmap (markup m)) h) (map (fmap (markup m)) b)) markupPair :: DocMarkupH mod id a -> (DocH mod id, DocH mod id) -> (a, a) markupPair m (a,b) = (markup m a, markup m b) @@ -63,3 +66,34 @@ idMarkup = Markup { markupHeader = DocHeader, markupTable = DocTable } + +-- | Map a 'DocH' into a best estimate of an alternate string. The idea is to +-- strip away any formatting while preserving as much of the actual text as +-- possible. +plainMarkup :: (mod -> String) -> (id -> String) -> DocMarkupH mod id String +plainMarkup plainMod plainIdent = Markup { + markupEmpty = "", + markupString = id, + markupParagraph = id, + markupAppend = (++), + markupIdentifier = plainIdent, + markupIdentifierUnchecked = plainMod, + markupModule = id, + markupWarning = id, + markupEmphasis = id, + markupBold = id, + markupMonospaced = id, + markupUnorderedList = const "", + markupOrderedList = const "", + markupDefList = const "", + markupCodeBlock = id, + markupHyperlink = \(Hyperlink url lbl) -> fromMaybe url lbl, + markupAName = id, + markupPic = \(Picture uri title) -> fromMaybe uri title, + markupMathInline = id, + markupMathDisplay = id, + markupProperty = id, + markupExample = const "", + markupHeader = \(Header _ title) -> title, + markupTable = const "" + } diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs index d79da40b..36c8bb5b 100644 --- a/haddock-library/src/Documentation/Haddock/Parser.hs +++ b/haddock-library/src/Documentation/Haddock/Parser.hs @@ -27,14 +27,16 @@ module Documentation.Haddock.Parser ( import Control.Applicative import Control.Arrow (first) import Control.Monad -import Data.Char (chr, isUpper, isAlpha, isAlphaNum, isSpace) -import Data.List (intercalate, unfoldr, elemIndex, notElem) +import Data.Char (chr, isUpper, isAlpha, isSpace) +import Data.List (intercalate, unfoldr, elemIndex) import Data.Maybe (fromMaybe, mapMaybe) import Data.Monoid import qualified Data.Set as Set import Documentation.Haddock.Doc +import Documentation.Haddock.Markup ( markup, plainMarkup ) import Documentation.Haddock.Parser.Monad import Documentation.Haddock.Parser.Util +import Documentation.Haddock.Parser.Identifier import Documentation.Haddock.Types import Prelude hiding (takeWhile) import qualified Prelude as P @@ -45,53 +47,26 @@ import Text.Parsec (try) import qualified Data.Text as T import Data.Text (Text) -#if MIN_VERSION_base(4,9,0) -import Text.Read.Lex (isSymbolChar) -#else -import Data.Char (GeneralCategory (..), - generalCategory) -#endif -- $setup -- >>> :set -XOverloadedStrings -#if !MIN_VERSION_base(4,9,0) --- inlined from base-4.10.0.0 -isSymbolChar :: Char -> Bool -isSymbolChar c = not (isPuncChar c) && case generalCategory c of - MathSymbol -> True - CurrencySymbol -> True - ModifierSymbol -> True - OtherSymbol -> True - DashPunctuation -> True - OtherPunctuation -> c `notElem` ("'\"" :: String) - ConnectorPunctuation -> c /= '_' - _ -> False - where - -- | The @special@ character class as defined in the Haskell Report. - isPuncChar :: Char -> Bool - isPuncChar = (`elem` (",;()[]{}`" :: String)) -#endif - --- | Identifier string surrounded with opening and closing quotes/backticks. -type Identifier = (Char, String, Char) - -- | Drops the quotes/backticks around all identifiers, as if they -- were valid but still 'String's. toRegular :: DocH mod Identifier -> DocH mod String -toRegular = fmap (\(_, x, _) -> x) +toRegular = fmap (\(Identifier _ _ x _) -> x) -- | Maps over 'DocIdentifier's over 'String' with potentially failing -- conversion using user-supplied function. If the conversion fails, -- the identifier is deemed to not be valid and is treated as a -- regular string. -overIdentifier :: (String -> Maybe a) +overIdentifier :: (Namespace -> String -> Maybe a) -> DocH mod Identifier -> DocH mod a overIdentifier f d = g d where - g (DocIdentifier (o, x, e)) = case f x of - Nothing -> DocString $ o : x ++ [e] + g (DocIdentifier (Identifier ns o x e)) = case f ns x of + Nothing -> DocString $ renderNs ns ++ [o] ++ x ++ [e] Just x' -> DocIdentifier x' g DocEmpty = DocEmpty g (DocAppend x x') = DocAppend (g x) (g x') @@ -107,7 +82,7 @@ overIdentifier f d = g d g (DocOrderedList x) = DocOrderedList $ fmap g x g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x g (DocCodeBlock x) = DocCodeBlock $ g x - g (DocHyperlink x) = DocHyperlink x + g (DocHyperlink (Hyperlink u x)) = DocHyperlink (Hyperlink u (fmap g x)) g (DocPic x) = DocPic x g (DocMathInline x) = DocMathInline x g (DocMathDisplay x) = DocMathDisplay x @@ -301,13 +276,20 @@ mathInline = DocMathInline . T.unpack -- >>> parseString "\\[\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}\\]" -- DocMathDisplay "\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}" mathDisplay :: Parser (DocH mod a) -mathDisplay = DocMathDisplay . T.unpack +mathDisplay = DocMathDisplay . T.unpack <$> ("\\[" *> takeUntil "\\]") -markdownImage :: Parser (DocH mod a) -markdownImage = fromHyperlink <$> ("!" *> linkParser) +-- | Markdown image parser. As per the commonmark reference recommendation, the +-- description text for an image converted to its a plain string representation. +-- +-- >>> parseString "![some /emphasis/ in a description](www.site.com)" +-- DocPic (Picture "www.site.com" (Just "some emphasis in a description")) +markdownImage :: Parser (DocH mod Identifier) +markdownImage = DocPic . fromHyperlink <$> ("!" *> linkParser) where - fromHyperlink (Hyperlink url label) = DocPic (Picture url label) + fromHyperlink (Hyperlink u l) = Picture u (fmap (markup stringMarkup) l) + stringMarkup = plainMarkup (const "") renderIdent + renderIdent (Identifier ns l c r) = renderNs ns <> [l] <> c <> [r] -- | Paragraph parser, called by 'parseParas'. paragraph :: Parser (DocH mod Identifier) @@ -360,32 +342,34 @@ table = do parseFirstRow :: Parser Text parseFirstRow = do skipHorizontalSpace - -- upper-left corner is + - c <- Parsec.char '+' - cs <- some (Parsec.char '-' <|> Parsec.char '+') + cs <- takeWhile (\c -> c == '-' || c == '+') - -- upper right corner is + too - guard (last cs == '+') + -- upper-left and upper-right corners are `+` + guard (T.length cs >= 2 && + T.head cs == '+' && + T.last cs == '+') -- trailing space skipHorizontalSpace _ <- Parsec.newline - return (T.cons c $ T.pack cs) + return cs parseRestRows :: Int -> Parser Text parseRestRows l = do skipHorizontalSpace + bs <- scan predicate l - c <- Parsec.char '|' <|> Parsec.char '+' - bs <- scan predicate (l - 2) - c2 <- Parsec.char '|' <|> Parsec.char '+' + -- Left and right edges are `|` or `+` + guard (T.length bs >= 2 && + (T.head bs == '|' || T.head bs == '+') && + (T.last bs == '|' || T.last bs == '+')) -- trailing space skipHorizontalSpace _ <- Parsec.newline - return (T.cons c (T.snoc bs c2)) + return bs where predicate n c | n <= 0 = Nothing @@ -662,7 +646,7 @@ nonSpace xs -- Doesn't discard the trailing newline. takeNonEmptyLine :: Parser Text takeNonEmptyLine = do - l <- takeWhile1 (Parsec.noneOf "\n") >>= nonSpace + l <- takeWhile1 (/= '\n') >>= nonSpace _ <- "\n" pure (l <> "\n") @@ -732,7 +716,7 @@ nonEmptyLine :: Parser Text nonEmptyLine = try (mfilter (T.any (not . isSpace)) takeLine) takeLine :: Parser Text -takeLine = try (takeWhile (Parsec.noneOf "\n") <* endOfLine) +takeLine = try (takeWhile (/= '\n') <* endOfLine) endOfLine :: Parser () endOfLine = void "\n" <|> Parsec.eof @@ -742,7 +726,7 @@ endOfLine = void "\n" <|> Parsec.eof -- >>> snd <$> parseOnly property "prop> hello world" -- Right (DocProperty "hello world") property :: Parser (DocH mod a) -property = DocProperty . T.unpack . T.strip <$> ("prop>" *> takeWhile1 (Parsec.noneOf "\n")) +property = DocProperty . T.unpack . T.strip <$> ("prop>" *> takeWhile1 (/= '\n')) -- | -- Paragraph level codeblock. Anything between the two delimiting \@ is parsed @@ -782,22 +766,22 @@ codeblock = | isNewline && isSpace c = Just isNewline | otherwise = Just $ c == '\n' -hyperlink :: Parser (DocH mod a) +hyperlink :: Parser (DocH mod Identifier) hyperlink = choice' [ angleBracketLink, markdownLink, autoUrl ] angleBracketLink :: Parser (DocH mod a) angleBracketLink = - DocHyperlink . makeLabeled Hyperlink + DocHyperlink . makeLabeled (\s -> Hyperlink s . fmap DocString) <$> disallowNewline ("<" *> takeUntil ">") -markdownLink :: Parser (DocH mod a) +markdownLink :: Parser (DocH mod Identifier) markdownLink = DocHyperlink <$> linkParser -linkParser :: Parser Hyperlink +linkParser :: Parser (Hyperlink (DocH mod Identifier)) linkParser = flip Hyperlink <$> label <*> (whitespace *> url) where - label :: Parser (Maybe String) - label = Just . decode . T.strip <$> ("[" *> takeUntil "]") + label :: Parser (Maybe (DocH mod Identifier)) + label = Just . parseParagraph . T.strip <$> ("[" *> takeUntil "]") whitespace :: Parser () whitespace = skipHorizontalSpace <* optional ("\n" *> skipHorizontalSpace) @@ -816,41 +800,17 @@ linkParser = flip Hyperlink <$> label <*> (whitespace *> url) autoUrl :: Parser (DocH mod a) autoUrl = mkLink <$> url where - url = mappend <$> choice' [ "http://", "https://", "ftp://"] <*> takeWhile1 (Parsec.satisfy (not . isSpace)) + url = mappend <$> choice' [ "http://", "https://", "ftp://"] <*> takeWhile1 (not . isSpace) mkLink :: Text -> DocH mod a mkLink s = case T.unsnoc s of Just (xs,x) | x `elem` (",.!?" :: String) -> DocHyperlink (mkHyperlink xs) `docAppend` DocString [x] _ -> DocHyperlink (mkHyperlink s) - mkHyperlink :: Text -> Hyperlink + mkHyperlink :: Text -> Hyperlink (DocH mod a) mkHyperlink lnk = Hyperlink (T.unpack lnk) Nothing - --- | Parses strings between identifier delimiters. Consumes all input that it --- deems to be valid in an identifier. Note that it simply blindly consumes --- characters and does no actual validation itself. -parseValid :: Parser String -parseValid = p some - where - idChar = Parsec.satisfy (\c -> isAlphaNum c || isSymbolChar c || c == '_') - - p p' = do - vs <- p' idChar - c <- peekChar' - case c of - '`' -> return vs - '\'' -> choice' [ (\x -> vs ++ "'" ++ x) <$> ("'" *> p many), return vs ] - _ -> fail "outofvalid" - --- | Parses identifiers with help of 'parseValid'. Asks GHC for --- 'String' from the string it deems valid. +-- | Parses identifiers with help of 'parseValid'. identifier :: Parser (DocH mod Identifier) -identifier = do - o <- idDelim - vid <- parseValid - e <- idDelim - return $ DocIdentifier (o, vid, e) - where - idDelim = Parsec.satisfy (\c -> c == '\'' || c == '`') +identifier = DocIdentifier <$> parseValid diff --git a/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs b/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs new file mode 100644 index 00000000..7bc98b62 --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs @@ -0,0 +1,186 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ViewPatterns #-} +-- | +-- Module : Documentation.Haddock.Parser.Identifier +-- Copyright : (c) Alec Theriault 2019, +-- License : BSD-like +-- +-- Maintainer : haddock@projects.haskell.org +-- Stability : experimental +-- Portability : portable +-- +-- Functionality for parsing identifiers and operators + +module Documentation.Haddock.Parser.Identifier ( + Identifier(..), + parseValid, +) where + +import Documentation.Haddock.Types ( Namespace(..) ) +import Documentation.Haddock.Parser.Monad +import qualified Text.Parsec as Parsec +import Text.Parsec.Pos ( updatePosChar ) +import Text.Parsec ( State(..) + , getParserState, setParserState ) + +import Data.Text (Text) +import qualified Data.Text as T + +import Data.Char (isAlpha, isAlphaNum) +import Control.Monad (guard) +import Data.Functor (($>)) +#if MIN_VERSION_base(4,9,0) +import Text.Read.Lex (isSymbolChar) +#else +import Data.Char (GeneralCategory (..), + generalCategory) +#endif + +import Data.Maybe + +-- | Identifier string surrounded with namespace, opening, and closing quotes/backticks. +data Identifier = Identifier !Namespace !Char String !Char + deriving (Show, Eq) + +parseValid :: Parser Identifier +parseValid = do + s@State{ stateInput = inp, statePos = pos } <- getParserState + + case takeIdentifier inp of + Nothing -> Parsec.parserFail "parseValid: Failed to match a valid identifier" + Just (ns, op, ident, cl, inp') -> + let posOp = updatePosChar pos op + posIdent = T.foldl updatePosChar posOp ident + posCl = updatePosChar posIdent cl + s' = s{ stateInput = inp', statePos = posCl } + in setParserState s' $> Identifier ns op (T.unpack ident) cl + + +#if !MIN_VERSION_base(4,9,0) +-- inlined from base-4.10.0.0 +isSymbolChar :: Char -> Bool +isSymbolChar c = not (isPuncChar c) && case generalCategory c of + MathSymbol -> True + CurrencySymbol -> True + ModifierSymbol -> True + OtherSymbol -> True + DashPunctuation -> True + OtherPunctuation -> c `notElem` "'\"" + ConnectorPunctuation -> c /= '_' + _ -> False + where + -- | The @special@ character class as defined in the Haskell Report. + isPuncChar :: Char -> Bool + isPuncChar = (`elem` (",;()[]{}`" :: String)) +#endif + +-- | Try to parse a delimited identifier off the front of the given input. +-- +-- This tries to match as many valid Haskell identifiers/operators as possible, +-- to the point of sometimes accepting invalid things (ex: keywords). Some +-- considerations: +-- +-- - operators and identifiers can have module qualifications +-- - operators can be wrapped in parens (for prefix) +-- - identifiers can be wrapped in backticks (for infix) +-- - delimiters are backticks or regular ticks +-- - since regular ticks are also valid in identifiers, we opt for the +-- longest successful parse +-- +-- This function should make /O(1)/ allocations +takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text) +takeIdentifier input = listToMaybe $ do + + -- Optional namespace + let (ns, input') = case T.uncons input of + Just ('v', i) -> (Value, i) + Just ('t', i) -> (Type, i) + _ -> (None, input) + + -- Opening tick + (op, input'') <- maybeToList (T.uncons input') + guard (op == '\'' || op == '`') + + -- Identifier/operator + (ident, input''') <- wrapped input'' + + -- Closing tick + (cl, input'''') <- maybeToList (T.uncons input''') + guard (cl == '\'' || cl == '`') + + pure (ns, op, ident, cl, input'''') + + where + + -- | Parse out a wrapped, possibly qualified, operator or identifier + wrapped t = do + (c, t' ) <- maybeToList (T.uncons t) + -- Tuples + case c of + '(' | Just (c', _) <- T.uncons t' + , c' == ',' || c' == ')' + -> do let (commas, t'') = T.span (== ',') t' + (')', t''') <- maybeToList (T.uncons t'') + pure (T.take (T.length commas + 2) t, t''') + + -- Parenthesized + '(' -> do (n, t'' ) <- general False 0 [] t' + (')', t''') <- maybeToList (T.uncons t'') + pure (T.take (n + 2) t, t''') + + -- Backticked + '`' -> do (n, t'' ) <- general False 0 [] t' + ('`', t''') <- maybeToList (T.uncons t'') + pure (T.take (n + 2) t, t''') + + -- Unadorned + _ -> do (n, t'' ) <- general False 0 [] t + pure (T.take n t, t'') + + -- | Parse out a possibly qualified operator or identifier + general :: Bool -- ^ refuse inputs starting with operators + -> Int -- ^ total characters \"consumed\" so far + -> [(Int, Text)] -- ^ accumulated results + -> Text -- ^ current input + -> [(Int, Text)] -- ^ total characters parsed & what remains + general !identOnly !i acc t + -- Starts with an identifier (either just an identifier, or a module qual) + | Just (n, rest) <- identLike t + = if T.null rest + then acc + else case T.head rest of + '`' -> (n + i, rest) : acc + ')' -> (n + i, rest) : acc + '.' -> general False (n + i + 1) acc (T.tail rest) + '\'' -> let (m, rest') = quotes rest + in general True (n + m + 1 + i) ((n + m + i, rest') : acc) (T.tail rest') + _ -> acc + + -- An operator + | Just (n, rest) <- optr t + , not identOnly + = (n + i, rest) : acc + + -- Anything else + | otherwise + = acc + + -- | Parse an identifier off the front of the input + identLike t + | T.null t = Nothing + | isAlpha (T.head t) || '_' == T.head t + = let !(idt, rest) = T.span (\c -> isAlphaNum c || c == '_') t + !(octos, rest') = T.span (== '#') rest + in Just (T.length idt + T.length octos, rest') + | otherwise = Nothing + + -- | Parse all but the last quote off the front of the input + -- PRECONDITION: T.head t == '\'' + quotes :: Text -> (Int, Text) + quotes t = let !n = T.length (T.takeWhile (== '\'') t) - 1 + in (n, T.drop n t) + + -- | Parse an operator off the front of the input + optr t = let !(op, rest) = T.span isSymbolChar t + in if T.null op then Nothing else Just (T.length op, rest) diff --git a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs index a5664aa8..fa46f536 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs @@ -4,15 +4,32 @@ {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE TypeSynonymInstances #-} +-- | +-- Module : Documentation.Haddock.Parser.Monad +-- Copyright : (c) Alec Theriault 2018-2019, +-- License : BSD-like +-- +-- Maintainer : haddock@projects.haskell.org +-- Stability : experimental +-- Portability : portable +-- +-- Defines the Parsec monad over which all parsing is done and also provides +-- more efficient versions of the usual parsec combinator functions (but +-- specialized to 'Text'). module Documentation.Haddock.Parser.Monad where import qualified Text.Parsec.Char as Parsec import qualified Text.Parsec as Parsec +import Text.Parsec.Pos ( updatePosChar ) +import Text.Parsec ( State(..) + , getParserState, setParserState ) import qualified Data.Text as T import Data.Text ( Text ) +import Control.Monad ( mfilter ) +import Data.Functor ( ($>) ) import Data.String ( IsString(..) ) import Data.Bits ( Bits(..) ) import Data.Char ( ord ) @@ -20,7 +37,11 @@ import Data.List ( foldl' ) import Control.Applicative as App import Documentation.Haddock.Types ( Version ) +import Prelude hiding (takeWhile) +-- | The only bit of information we really care about truding along with us +-- through parsing is the version attached to a @\@since@ annotation - if +-- the doc even contained one. newtype ParserState = ParserState { parserStateSince :: Maybe Version } deriving (Eq, Show) @@ -29,7 +50,7 @@ initialParserState :: ParserState initialParserState = ParserState Nothing setSince :: Version -> Parser () -setSince since = Parsec.modifyState (\st -> st {parserStateSince = Just since}) +setSince since = Parsec.modifyState (\st -> st{ parserStateSince = Just since }) type Parser = Parsec.Parsec Text ParserState @@ -44,38 +65,74 @@ parseOnly p t = case Parsec.runParser p' initialParserState "<haddock>" t of -- | Always succeeds, but returns 'Nothing' if at the end of input. Does not -- consume input. +-- +-- Equivalent to @Parsec.optionMaybe . Parsec.lookAhead $ Parsec.anyChar@, but +-- more efficient. peekChar :: Parser (Maybe Char) -peekChar = Parsec.optionMaybe . Parsec.try . Parsec.lookAhead $ Parsec.anyChar +peekChar = headOpt . stateInput <$> getParserState + where headOpt t | T.null t = Nothing + | otherwise = Just (T.head t) +{-# INLINE peekChar #-} -- | Fails if at the end of input. Does not consume input. +-- +-- Equivalent to @Parsec.lookAhead Parsec.anyChar@, but more efficient. peekChar' :: Parser Char -peekChar' = Parsec.lookAhead Parsec.anyChar +peekChar' = headFail . stateInput =<< getParserState + where headFail t | T.null t = Parsec.parserFail "peekChar': reached EOF" + | otherwise = App.pure (T.head t) +{-# INLINE peekChar' #-} -- | Parses the given string. Returns the parsed string. +-- +-- Equivalent to @Parsec.string (T.unpack t) $> t@, but more efficient. string :: Text -> Parser Text -string t = Parsec.string (T.unpack t) *> App.pure t +string t = do + s@State{ stateInput = inp, statePos = pos } <- getParserState + case T.stripPrefix t inp of + Nothing -> Parsec.parserFail "string: Failed to match the input string" + Just inp' -> + let pos' = T.foldl updatePosChar pos t + s' = s{ stateInput = inp', statePos = pos' } + in setParserState s' $> t + +-- | Keep matching characters as long as the predicate function holds (and +-- return them). +-- +-- Equivalent to @fmap T.pack . Parsec.many@, but more efficient. +takeWhile :: (Char -> Bool) -> Parser Text +takeWhile f = do + s@State{ stateInput = inp, statePos = pos } <- getParserState + let (t, inp') = T.span f inp + pos' = T.foldl updatePosChar pos t + s' = s{ stateInput = inp', statePos = pos' } + setParserState s' $> t + +-- | Like 'takeWhile', but fails if no characters matched. +-- +-- Equivalent to @fmap T.pack . Parsec.many1@, but more efficient. +takeWhile1 :: (Char -> Bool) -> Parser Text +takeWhile1 = mfilter (not . T.null) . takeWhile -- | Scan the input text, accumulating characters as long as the scanning -- function returns true. scan :: (s -> Char -> Maybe s) -- ^ scan function -> s -- ^ initial state -> Parser Text -scan f = fmap T.pack . go - where go s1 = do { cOpt <- peekChar - ; case cOpt >>= f s1 of - Nothing -> pure "" - Just s2 -> (:) <$> Parsec.anyChar <*> go s2 - } - --- | Apply a parser for a character zero or more times and collect the result in --- a string. -takeWhile :: Parser Char -> Parser Text -takeWhile = fmap T.pack . Parsec.many - --- | Apply a parser for a character one or more times and collect the result in --- a string. -takeWhile1 :: Parser Char -> Parser Text -takeWhile1 = fmap T.pack . Parsec.many1 +scan f st = do + s@State{ stateInput = inp, statePos = pos } <- getParserState + go inp st pos 0 $ \inp' pos' n -> + let s' = s{ Parsec.stateInput = inp', Parsec.statePos = pos' } + in setParserState s' $> T.take n inp + where + go inp s !pos !n cont + = case T.uncons inp of + Nothing -> cont inp pos n -- ran out of input + Just (c, inp') -> + case f s c of + Nothing -> cont inp pos n -- scan function failed + Just s' -> go inp' s' (updatePosChar pos c) (n+1) cont + -- | Parse a decimal number. decimal :: Integral a => Parser a diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs index ffa91b09..98570c22 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Util.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs @@ -40,7 +40,7 @@ skipHorizontalSpace = Parsec.skipMany (Parsec.oneOf horizontalSpace) -- | Take leading horizontal space takeHorizontalSpace :: Parser Text -takeHorizontalSpace = takeWhile (Parsec.oneOf horizontalSpace) +takeHorizontalSpace = takeWhile (`elem` horizontalSpace) makeLabeled :: (String -> Maybe String -> a) -> Text -> a makeLabeled f input = case T.break isSpace $ removeEscapes $ T.strip input of diff --git a/haddock-library/src/Documentation/Haddock/Types.hs b/haddock-library/src/Documentation/Haddock/Types.hs index b5dea3d4..ba2f873c 100644 --- a/haddock-library/src/Documentation/Haddock/Types.hs +++ b/haddock-library/src/Documentation/Haddock/Types.hs @@ -65,10 +65,10 @@ overDocF f d = (\x -> d { _doc = x }) <$> f (_doc d) type Version = [Int] type Package = String -data Hyperlink = Hyperlink +data Hyperlink id = Hyperlink { hyperlinkUrl :: String - , hyperlinkLabel :: Maybe String - } deriving (Eq, Show) + , hyperlinkLabel :: Maybe id + } deriving (Eq, Show, Functor, Foldable, Traversable) data Picture = Picture { pictureUri :: String @@ -118,7 +118,7 @@ data DocH mod id | DocOrderedList [DocH mod id] | DocDefList [(DocH mod id, DocH mod id)] | DocCodeBlock (DocH mod id) - | DocHyperlink Hyperlink + | DocHyperlink (Hyperlink (DocH mod id)) | DocPic Picture | DocMathInline String | DocMathDisplay String @@ -147,7 +147,7 @@ instance Bifunctor DocH where bimap f g (DocOrderedList docs) = DocOrderedList (map (bimap f g) docs) bimap f g (DocDefList docs) = DocDefList (map (bimap f g *** bimap f g) docs) bimap f g (DocCodeBlock doc) = DocCodeBlock (bimap f g doc) - bimap _ _ (DocHyperlink hyperlink) = DocHyperlink hyperlink + bimap f g (DocHyperlink (Hyperlink url lbl)) = DocHyperlink (Hyperlink url (fmap (bimap f g) lbl)) bimap _ _ (DocPic picture) = DocPic picture bimap _ _ (DocMathInline s) = DocMathInline s bimap _ _ (DocMathDisplay s) = DocMathDisplay s @@ -192,7 +192,7 @@ instance Bitraversable DocH where bitraverse f g (DocOrderedList docs) = DocOrderedList <$> traverse (bitraverse f g) docs bitraverse f g (DocDefList docs) = DocDefList <$> traverse (bitraverse (bitraverse f g) (bitraverse f g)) docs bitraverse f g (DocCodeBlock doc) = DocCodeBlock <$> bitraverse f g doc - bitraverse _ _ (DocHyperlink hyperlink) = pure (DocHyperlink hyperlink) + bitraverse f g (DocHyperlink (Hyperlink url lbl)) = DocHyperlink <$> (Hyperlink url <$> traverse (bitraverse f g) lbl) bitraverse _ _ (DocPic picture) = pure (DocPic picture) bitraverse _ _ (DocMathInline s) = pure (DocMathInline s) bitraverse _ _ (DocMathDisplay s) = pure (DocMathDisplay s) @@ -203,6 +203,16 @@ instance Bitraversable DocH where bitraverse f g (DocTable (Table header body)) = (\h b -> DocTable (Table h b)) <$> traverse (traverse (bitraverse f g)) header <*> traverse (traverse (bitraverse f g)) body #endif +-- | The namespace qualification for an identifier. +data Namespace = Value | Type | None deriving (Eq, Ord, Enum, Show) + +-- | Render the a namespace into the same format it was initially parsed. +renderNs :: Namespace -> String +renderNs Value = "v" +renderNs Type = "t" +renderNs None = "" + + -- | 'DocMarkupH' is a set of instructions for marking up documentation. -- In fact, it's really just a mapping from 'Doc' to some other -- type [a], where [a] is usually the type of the output (HTML, say). @@ -227,7 +237,7 @@ data DocMarkupH mod id a = Markup , markupOrderedList :: [a] -> a , markupDefList :: [(a,a)] -> a , markupCodeBlock :: a -> a - , markupHyperlink :: Hyperlink -> a + , markupHyperlink :: Hyperlink a -> a , markupAName :: String -> a , markupPic :: Picture -> a , markupMathInline :: String -> a diff --git a/haddock-library/src/Documentation/Haddock/Utf8.hs b/haddock-library/src/Documentation/Haddock/Utf8.hs deleted file mode 100644 index 3f75e53b..00000000 --- a/haddock-library/src/Documentation/Haddock/Utf8.hs +++ /dev/null @@ -1,74 +0,0 @@ -module Documentation.Haddock.Utf8 (encodeUtf8, decodeUtf8) where -import Data.Bits ((.|.), (.&.), shiftL, shiftR) -import qualified Data.ByteString as BS -import Data.Char (chr, ord) -import Data.Word (Word8) - --- | Helper that encodes and packs a 'String' into a 'BS.ByteString' -encodeUtf8 :: String -> BS.ByteString -encodeUtf8 = BS.pack . encode - --- | Helper that unpacks and decodes a 'BS.ByteString' into a 'String' -decodeUtf8 :: BS.ByteString -> String -decodeUtf8 = decode . BS.unpack - --- Copy/pasted functions from Codec.Binary.UTF8.String for encoding/decoding --- | Character to use when 'encode' or 'decode' fail for a byte. -replacementCharacter :: Char -replacementCharacter = '\xfffd' - --- | Encode a Haskell String to a list of Word8 values, in UTF8 format. -encode :: String -> [Word8] -encode = concatMap (map fromIntegral . go . ord) - where - go oc - | oc <= 0x7f = [oc] - - | oc <= 0x7ff = [ 0xc0 + (oc `shiftR` 6) - , 0x80 + oc .&. 0x3f - ] - - | oc <= 0xffff = [ 0xe0 + (oc `shiftR` 12) - , 0x80 + ((oc `shiftR` 6) .&. 0x3f) - , 0x80 + oc .&. 0x3f - ] - | otherwise = [ 0xf0 + (oc `shiftR` 18) - , 0x80 + ((oc `shiftR` 12) .&. 0x3f) - , 0x80 + ((oc `shiftR` 6) .&. 0x3f) - , 0x80 + oc .&. 0x3f - ] - --- | Decode a UTF8 string packed into a list of Word8 values, directly to String -decode :: [Word8] -> String -decode [ ] = "" -decode (c:cs) - | c < 0x80 = chr (fromEnum c) : decode cs - | c < 0xc0 = replacementCharacter : decode cs - | c < 0xe0 = multi1 - | c < 0xf0 = multi_byte 2 0xf 0x800 - | c < 0xf8 = multi_byte 3 0x7 0x10000 - | c < 0xfc = multi_byte 4 0x3 0x200000 - | c < 0xfe = multi_byte 5 0x1 0x4000000 - | otherwise = replacementCharacter : decode cs - where - multi1 = case cs of - c1 : ds | c1 .&. 0xc0 == 0x80 -> - let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|. fromEnum (c1 .&. 0x3f) - in if d >= 0x000080 then toEnum d : decode ds - else replacementCharacter : decode ds - _ -> replacementCharacter : decode cs - - multi_byte :: Int -> Word8 -> Int -> String - multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask)) - where - aux 0 rs acc - | overlong <= acc && acc <= 0x10ffff && - (acc < 0xd800 || 0xdfff < acc) && - (acc < 0xfffe || 0xffff < acc) = chr acc : decode rs - | otherwise = replacementCharacter : decode rs - - aux n (r:rs) acc - | r .&. 0xc0 == 0x80 = aux (n-1) rs - $ shiftL acc 6 .|. fromEnum (r .&. 0x3f) - - aux _ rs _ = replacementCharacter : decode rs |