diff options
Diffstat (limited to 'haddock-library/src')
-rw-r--r-- | haddock-library/src/CompatPrelude.hs | 52 | ||||
-rw-r--r-- | haddock-library/src/Documentation/Haddock/Parser.hs | 70 | ||||
-rw-r--r-- | haddock-library/src/Documentation/Haddock/Parser/Identifier.hs | 160 | ||||
-rw-r--r-- | haddock-library/src/Documentation/Haddock/Parser/Monad.hs | 16 | ||||
-rw-r--r-- | haddock-library/src/Documentation/Haddock/Types.hs | 16 |
5 files changed, 252 insertions, 62 deletions
diff --git a/haddock-library/src/CompatPrelude.hs b/haddock-library/src/CompatPrelude.hs new file mode 100644 index 00000000..60fa94d9 --- /dev/null +++ b/haddock-library/src/CompatPrelude.hs @@ -0,0 +1,52 @@ +{-# LANGUAGE CPP #-} + +#if !MIN_VERSION_base(4,5,0) +# error This module doesn't provide compat-shims for versions prior to base-4.5 +#endif + +-- | Bridge impedance mismatch of different @base@ versions back till @base-4.5@ (GHC 7.4.2) +module CompatPrelude + ( ($>) + , isSymbolChar + ) where + +#if MIN_VERSION_base(4,7,0) +import Data.Functor ( ($>) ) +#else +import Data.Functor ( (<$) ) +#endif + +#if MIN_VERSION_base(4,9,0) +import Text.Read.Lex (isSymbolChar) +#else +import Data.Char (GeneralCategory(..), generalCategory) +#endif + + +#if !MIN_VERSION_base(4,7,0) +infixl 4 $> + +-- | Flipped version of '<$'. +-- +-- @since 4.7.0.0 +($>) :: Functor f => f a -> b -> f b +($>) = flip (<$) +#endif + +#if !MIN_VERSION_base(4,9,0) +-- inlined from base-4.10.0.0 +isSymbolChar :: Char -> Bool +isSymbolChar c = not (isPuncChar c) && case generalCategory c of + MathSymbol -> True + CurrencySymbol -> True + ModifierSymbol -> True + OtherSymbol -> True + DashPunctuation -> True + OtherPunctuation -> c `notElem` "'\"" + ConnectorPunctuation -> c /= '_' + _ -> False + where + -- | The @special@ character class as defined in the Haskell Report. + isPuncChar :: Char -> Bool + isPuncChar = (`elem` (",;()[]{}`" :: String)) +#endif diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs index 82d65a0a..028422a7 100644 --- a/haddock-library/src/Documentation/Haddock/Parser.hs +++ b/haddock-library/src/Documentation/Haddock/Parser.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-} -- | @@ -27,7 +26,7 @@ module Documentation.Haddock.Parser ( import Control.Applicative import Control.Arrow (first) import Control.Monad -import Data.Char (chr, isUpper, isAlpha, isAlphaNum, isSpace) +import Data.Char (chr, isUpper, isAlpha, isSpace) import Data.List (intercalate, unfoldr, elemIndex) import Data.Maybe (fromMaybe, mapMaybe) import Data.Monoid @@ -36,6 +35,7 @@ import Documentation.Haddock.Doc import Documentation.Haddock.Markup ( markup, plainMarkup ) import Documentation.Haddock.Parser.Monad import Documentation.Haddock.Parser.Util +import Documentation.Haddock.Parser.Identifier import Documentation.Haddock.Types import Prelude hiding (takeWhile) import qualified Prelude as P @@ -46,53 +46,26 @@ import Text.Parsec (try) import qualified Data.Text as T import Data.Text (Text) -#if MIN_VERSION_base(4,9,0) -import Text.Read.Lex (isSymbolChar) -#else -import Data.Char (GeneralCategory (..), - generalCategory) -#endif -- $setup -- >>> :set -XOverloadedStrings -#if !MIN_VERSION_base(4,9,0) --- inlined from base-4.10.0.0 -isSymbolChar :: Char -> Bool -isSymbolChar c = not (isPuncChar c) && case generalCategory c of - MathSymbol -> True - CurrencySymbol -> True - ModifierSymbol -> True - OtherSymbol -> True - DashPunctuation -> True - OtherPunctuation -> c `notElem` ("'\"" :: String) - ConnectorPunctuation -> c /= '_' - _ -> False - where - -- | The @special@ character class as defined in the Haskell Report. - isPuncChar :: Char -> Bool - isPuncChar = (`elem` (",;()[]{}`" :: String)) -#endif - --- | Identifier string surrounded with opening and closing quotes/backticks. -type Identifier = (Char, String, Char) - -- | Drops the quotes/backticks around all identifiers, as if they -- were valid but still 'String's. toRegular :: DocH mod Identifier -> DocH mod String -toRegular = fmap (\(_, x, _) -> x) +toRegular = fmap (\(Identifier _ _ x _) -> x) -- | Maps over 'DocIdentifier's over 'String' with potentially failing -- conversion using user-supplied function. If the conversion fails, -- the identifier is deemed to not be valid and is treated as a -- regular string. -overIdentifier :: (String -> Maybe a) +overIdentifier :: (Namespace -> String -> Maybe a) -> DocH mod Identifier -> DocH mod a overIdentifier f d = g d where - g (DocIdentifier (o, x, e)) = case f x of - Nothing -> DocString $ o : x ++ [e] + g (DocIdentifier (Identifier ns o x e)) = case f ns x of + Nothing -> DocString $ renderNs ns ++ [o] ++ x ++ [e] Just x' -> DocIdentifier x' g DocEmpty = DocEmpty g (DocAppend x x') = DocAppend (g x) (g x') @@ -314,7 +287,8 @@ markdownImage :: Parser (DocH mod Identifier) markdownImage = DocPic . fromHyperlink <$> ("!" *> linkParser) where fromHyperlink (Hyperlink u l) = Picture u (fmap (markup stringMarkup) l) - stringMarkup = plainMarkup (const "") (\(l,c,r) -> [l] <> c <> [r]) + stringMarkup = plainMarkup (const "") renderIdent + renderIdent (Identifier ns l c r) = renderNs ns <> [l] <> c <> [r] -- | Paragraph parser, called by 'parseParas'. paragraph :: Parser (DocH mod Identifier) @@ -836,30 +810,6 @@ autoUrl = mkLink <$> url mkHyperlink lnk = Hyperlink (T.unpack lnk) Nothing - --- | Parses strings between identifier delimiters. Consumes all input that it --- deems to be valid in an identifier. Note that it simply blindly consumes --- characters and does no actual validation itself. -parseValid :: Parser String -parseValid = p some - where - idChar = Parsec.satisfy (\c -> isAlphaNum c || isSymbolChar c || c == '_') - - p p' = do - vs <- p' idChar - c <- peekChar' - case c of - '`' -> return vs - '\'' -> choice' [ (\x -> vs ++ "'" ++ x) <$> ("'" *> p many), return vs ] - _ -> fail "outofvalid" - --- | Parses identifiers with help of 'parseValid'. Asks GHC for --- 'String' from the string it deems valid. +-- | Parses identifiers with help of 'parseValid'. identifier :: Parser (DocH mod Identifier) -identifier = do - o <- idDelim - vid <- parseValid - e <- idDelim - return $ DocIdentifier (o, vid, e) - where - idDelim = Parsec.satisfy (\c -> c == '\'' || c == '`') +identifier = DocIdentifier <$> parseValid diff --git a/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs b/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs new file mode 100644 index 00000000..b8afb951 --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs @@ -0,0 +1,160 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ViewPatterns #-} +-- | +-- Module : Documentation.Haddock.Parser.Identifier +-- Copyright : (c) Alec Theriault 2019, +-- License : BSD-like +-- +-- Maintainer : haddock@projects.haskell.org +-- Stability : experimental +-- Portability : portable +-- +-- Functionality for parsing identifiers and operators + +module Documentation.Haddock.Parser.Identifier ( + Identifier(..), + parseValid, +) where + +import Documentation.Haddock.Types ( Namespace(..) ) +import Documentation.Haddock.Parser.Monad +import qualified Text.Parsec as Parsec +import Text.Parsec.Pos ( updatePosChar ) +import Text.Parsec ( State(..) + , getParserState, setParserState ) + +import Data.Text (Text) +import qualified Data.Text as T + +import Data.Char (isAlpha, isAlphaNum) +import Control.Monad (guard) +import Data.Maybe +import CompatPrelude + +-- | Identifier string surrounded with namespace, opening, and closing quotes/backticks. +data Identifier = Identifier !Namespace !Char String !Char + deriving (Show, Eq) + +parseValid :: Parser Identifier +parseValid = do + s@State{ stateInput = inp, statePos = pos } <- getParserState + + case takeIdentifier inp of + Nothing -> Parsec.parserFail "parseValid: Failed to match a valid identifier" + Just (ns, op, ident, cl, inp') -> + let posOp = updatePosChar pos op + posIdent = T.foldl updatePosChar posOp ident + posCl = updatePosChar posIdent cl + s' = s{ stateInput = inp', statePos = posCl } + in setParserState s' $> Identifier ns op (T.unpack ident) cl + + +-- | Try to parse a delimited identifier off the front of the given input. +-- +-- This tries to match as many valid Haskell identifiers/operators as possible, +-- to the point of sometimes accepting invalid things (ex: keywords). Some +-- considerations: +-- +-- - operators and identifiers can have module qualifications +-- - operators can be wrapped in parens (for prefix) +-- - identifiers can be wrapped in backticks (for infix) +-- - delimiters are backticks or regular ticks +-- - since regular ticks are also valid in identifiers, we opt for the +-- longest successful parse +-- +-- This function should make /O(1)/ allocations +takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text) +takeIdentifier input = listToMaybe $ do + + -- Optional namespace + let (ns, input') = case T.uncons input of + Just ('v', i) -> (Value, i) + Just ('t', i) -> (Type, i) + _ -> (None, input) + + -- Opening tick + (op, input'') <- maybeToList (T.uncons input') + guard (op == '\'' || op == '`') + + -- Identifier/operator + (ident, input''') <- wrapped input'' + + -- Closing tick + (cl, input'''') <- maybeToList (T.uncons input''') + guard (cl == '\'' || cl == '`') + + return (ns, op, ident, cl, input'''') + + where + + -- | Parse out a wrapped, possibly qualified, operator or identifier + wrapped t = do + (c, t' ) <- maybeToList (T.uncons t) + -- Tuples + case c of + '(' | Just (c', _) <- T.uncons t' + , c' == ',' || c' == ')' + -> do let (commas, t'') = T.span (== ',') t' + (')', t''') <- maybeToList (T.uncons t'') + return (T.take (T.length commas + 2) t, t''') + + -- Parenthesized + '(' -> do (n, t'' ) <- general False 0 [] t' + (')', t''') <- maybeToList (T.uncons t'') + return (T.take (n + 2) t, t''') + + -- Backticked + '`' -> do (n, t'' ) <- general False 0 [] t' + ('`', t''') <- maybeToList (T.uncons t'') + return (T.take (n + 2) t, t''') + + -- Unadorned + _ -> do (n, t'' ) <- general False 0 [] t + return (T.take n t, t'') + + -- | Parse out a possibly qualified operator or identifier + general :: Bool -- ^ refuse inputs starting with operators + -> Int -- ^ total characters \"consumed\" so far + -> [(Int, Text)] -- ^ accumulated results + -> Text -- ^ current input + -> [(Int, Text)] -- ^ total characters parsed & what remains + general !identOnly !i acc t + -- Starts with an identifier (either just an identifier, or a module qual) + | Just (n, rest) <- identLike t + = if T.null rest + then acc + else case T.head rest of + '`' -> (n + i, rest) : acc + ')' -> (n + i, rest) : acc + '.' -> general False (n + i + 1) acc (T.tail rest) + '\'' -> let (m, rest') = quotes rest + in general True (n + m + 1 + i) ((n + m + i, rest') : acc) (T.tail rest') + _ -> acc + + -- An operator + | Just (n, rest) <- optr t + , not identOnly + = (n + i, rest) : acc + + -- Anything else + | otherwise + = acc + + -- | Parse an identifier off the front of the input + identLike t + | T.null t = Nothing + | isAlpha (T.head t) || '_' == T.head t + = let !(idt, rest) = T.span (\c -> isAlphaNum c || c == '_') t + !(octos, rest') = T.span (== '#') rest + in Just (T.length idt + T.length octos, rest') + | otherwise = Nothing + + -- | Parse all but the last quote off the front of the input + -- PRECONDITION: T.head t == '\'' + quotes :: Text -> (Int, Text) + quotes t = let !n = T.length (T.takeWhile (== '\'') t) - 1 + in (n, T.drop n t) + + -- | Parse an operator off the front of the input + optr t = let !(op, rest) = T.span isSymbolChar t + in if T.null op then Nothing else Just (T.length op, rest) diff --git a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs index 8f5bd217..7c73a168 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs @@ -4,6 +4,18 @@ {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE TypeSynonymInstances #-} +-- | +-- Module : Documentation.Haddock.Parser.Monad +-- Copyright : (c) Alec Theriault 2018-2019, +-- License : BSD-like +-- +-- Maintainer : haddock@projects.haskell.org +-- Stability : experimental +-- Portability : portable +-- +-- Defines the Parsec monad over which all parsing is done and also provides +-- more efficient versions of the usual parsec combinator functions (but +-- specialized to 'Text'). module Documentation.Haddock.Parser.Monad where @@ -17,7 +29,6 @@ import qualified Data.Text as T import Data.Text ( Text ) import Control.Monad ( mfilter ) -import Data.Functor ( ($>) ) import Data.String ( IsString(..) ) import Data.Bits ( Bits(..) ) import Data.Char ( ord ) @@ -25,7 +36,9 @@ import Data.List ( foldl' ) import Control.Applicative as App import Documentation.Haddock.Types ( Version ) + import Prelude hiding (takeWhile) +import CompatPrelude -- | The only bit of information we really care about truding along with us -- through parsing is the version attached to a @\@since@ annotation - if @@ -96,7 +109,6 @@ takeWhile f = do s' = s{ stateInput = inp', statePos = pos' } setParserState s' $> t - -- | Like 'takeWhile', but fails if no characters matched. -- -- Equivalent to @fmap T.pack . Parsec.many1@, but more efficient. diff --git a/haddock-library/src/Documentation/Haddock/Types.hs b/haddock-library/src/Documentation/Haddock/Types.hs index f8f7d353..4cf61fee 100644 --- a/haddock-library/src/Documentation/Haddock/Types.hs +++ b/haddock-library/src/Documentation/Haddock/Types.hs @@ -44,14 +44,17 @@ data MetaDoc mod id = } deriving (Eq, Show, Functor, Foldable, Traversable) #if MIN_VERSION_base(4,8,0) +-- | __NOTE__: Only defined for @base >= 4.8.0@ instance Bifunctor MetaDoc where bimap f g (MetaDoc m d) = MetaDoc m (bimap f g d) #endif #if MIN_VERSION_base(4,10,0) +-- | __NOTE__: Only defined for @base >= 4.10.0@ instance Bifoldable MetaDoc where bifoldr f g z d = bifoldr f g z (_doc d) +-- | __NOTE__: Only defined for @base >= 4.10.0@ instance Bitraversable MetaDoc where bitraverse f g (MetaDoc m d) = MetaDoc m <$> bitraverse f g d #endif @@ -131,6 +134,7 @@ data DocH mod id deriving (Eq, Show, Functor, Foldable, Traversable) #if MIN_VERSION_base(4,8,0) +-- | __NOTE__: Only defined for @base >= 4.8.0@ instance Bifunctor DocH where bimap _ _ DocEmpty = DocEmpty bimap f g (DocAppend docA docB) = DocAppend (bimap f g docA) (bimap f g docB) @@ -159,6 +163,7 @@ instance Bifunctor DocH where #endif #if MIN_VERSION_base(4,10,0) +-- | __NOTE__: Only defined for @base >= 4.10.0@ instance Bifoldable DocH where bifoldr f g z (DocAppend docA docB) = bifoldr f g (bifoldr f g z docA) docB bifoldr f g z (DocParagraph doc) = bifoldr f g z doc @@ -176,6 +181,7 @@ instance Bifoldable DocH where bifoldr f g z (DocTable (Table header body)) = foldr (\r acc -> foldr (flip (bifoldr f g)) acc r) (foldr (\r acc -> foldr (flip (bifoldr f g)) acc r) z body) header bifoldr _ _ z _ = z +-- | __NOTE__: Only defined for @base >= 4.10.0@ instance Bitraversable DocH where bitraverse _ _ DocEmpty = pure DocEmpty bitraverse f g (DocAppend docA docB) = DocAppend <$> bitraverse f g docA <*> bitraverse f g docB @@ -203,6 +209,16 @@ instance Bitraversable DocH where bitraverse f g (DocTable (Table header body)) = (\h b -> DocTable (Table h b)) <$> traverse (traverse (bitraverse f g)) header <*> traverse (traverse (bitraverse f g)) body #endif +-- | The namespace qualification for an identifier. +data Namespace = Value | Type | None deriving (Eq, Ord, Enum, Show) + +-- | Render the a namespace into the same format it was initially parsed. +renderNs :: Namespace -> String +renderNs Value = "v" +renderNs Type = "t" +renderNs None = "" + + -- | 'DocMarkupH' is a set of instructions for marking up documentation. -- In fact, it's really just a mapping from 'Doc' to some other -- type [a], where [a] is usually the type of the output (HTML, say). |