diff options
Diffstat (limited to 'haddock-library/src/Documentation/Haddock/Parser.hs')
-rw-r--r-- | haddock-library/src/Documentation/Haddock/Parser.hs | 104 |
1 files changed, 30 insertions, 74 deletions
diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs index 82d65a0a..a3bba38a 100644 --- a/haddock-library/src/Documentation/Haddock/Parser.hs +++ b/haddock-library/src/Documentation/Haddock/Parser.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-} -- | @@ -27,7 +26,7 @@ module Documentation.Haddock.Parser ( import Control.Applicative import Control.Arrow (first) import Control.Monad -import Data.Char (chr, isUpper, isAlpha, isAlphaNum, isSpace) +import Data.Char (chr, isUpper, isAlpha, isSpace) import Data.List (intercalate, unfoldr, elemIndex) import Data.Maybe (fromMaybe, mapMaybe) import Data.Monoid @@ -36,6 +35,7 @@ import Documentation.Haddock.Doc import Documentation.Haddock.Markup ( markup, plainMarkup ) import Documentation.Haddock.Parser.Monad import Documentation.Haddock.Parser.Util +import Documentation.Haddock.Parser.Identifier import Documentation.Haddock.Types import Prelude hiding (takeWhile) import qualified Prelude as P @@ -46,53 +46,26 @@ import Text.Parsec (try) import qualified Data.Text as T import Data.Text (Text) -#if MIN_VERSION_base(4,9,0) -import Text.Read.Lex (isSymbolChar) -#else -import Data.Char (GeneralCategory (..), - generalCategory) -#endif -- $setup -- >>> :set -XOverloadedStrings -#if !MIN_VERSION_base(4,9,0) --- inlined from base-4.10.0.0 -isSymbolChar :: Char -> Bool -isSymbolChar c = not (isPuncChar c) && case generalCategory c of - MathSymbol -> True - CurrencySymbol -> True - ModifierSymbol -> True - OtherSymbol -> True - DashPunctuation -> True - OtherPunctuation -> c `notElem` ("'\"" :: String) - ConnectorPunctuation -> c /= '_' - _ -> False - where - -- | The @special@ character class as defined in the Haskell Report. - isPuncChar :: Char -> Bool - isPuncChar = (`elem` (",;()[]{}`" :: String)) -#endif - --- | Identifier string surrounded with opening and closing quotes/backticks. -type Identifier = (Char, String, Char) - -- | Drops the quotes/backticks around all identifiers, as if they -- were valid but still 'String's. toRegular :: DocH mod Identifier -> DocH mod String -toRegular = fmap (\(_, x, _) -> x) +toRegular = fmap (\(Identifier _ _ x _) -> x) -- | Maps over 'DocIdentifier's over 'String' with potentially failing -- conversion using user-supplied function. If the conversion fails, -- the identifier is deemed to not be valid and is treated as a -- regular string. -overIdentifier :: (String -> Maybe a) +overIdentifier :: (Namespace -> String -> Maybe a) -> DocH mod Identifier -> DocH mod a overIdentifier f d = g d where - g (DocIdentifier (o, x, e)) = case f x of - Nothing -> DocString $ o : x ++ [e] + g (DocIdentifier (Identifier ns o x e)) = case f ns x of + Nothing -> DocString $ renderNs ns ++ [o] ++ x ++ [e] Just x' -> DocIdentifier x' g DocEmpty = DocEmpty g (DocAppend x x') = DocAppend (g x) (g x') @@ -254,7 +227,7 @@ takeWhile1_ = mfilter (not . T.null) . takeWhile_ -- DocAName "Hello world" anchor :: Parser (DocH mod a) anchor = DocAName . T.unpack <$> - disallowNewline ("#" *> takeWhile1_ (/= '#') <* "#") + ("#" *> takeWhile1_ (\x -> x /= '#' && not (isSpace x)) <* "#") -- | Monospaced strings. -- @@ -269,12 +242,18 @@ monospace = DocMonospaced . parseParagraph -- Note that we allow '#' and '\' to support anchors (old style anchors are of -- the form "SomeModule\#anchor"). moduleName :: Parser (DocH mod a) -moduleName = DocModule <$> ("\"" *> modid <* "\"") +moduleName = DocModule <$> ("\"" *> (modid `maybeFollowedBy` anchor_) <* "\"") where modid = intercalate "." <$> conid `Parsec.sepBy1` "." + anchor_ = (++) + <$> (Parsec.string "#" <|> Parsec.string "\\#") + <*> many (Parsec.satisfy (\c -> c /= '"' && not (isSpace c))) + + maybeFollowedBy pre suf = (\x -> maybe x (x ++)) <$> pre <*> optional suf + conid = (:) <$> Parsec.satisfy (\c -> isAlpha c && isUpper c) - <*> many (conChar <|> Parsec.oneOf "\\#") + <*> many conChar conChar = Parsec.alphaNum <|> Parsec.char '_' @@ -294,7 +273,7 @@ picture = DocPic . makeLabeled Picture -- >>> parseString "\\(\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}\\)" -- DocMathInline "\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}" mathInline :: Parser (DocH mod a) -mathInline = DocMathInline . T.unpack +mathInline = DocMathInline . T.unpack <$> disallowNewline ("\\(" *> takeUntil "\\)") -- | Display math parser, surrounded by \\[ and \\]. @@ -314,7 +293,8 @@ markdownImage :: Parser (DocH mod Identifier) markdownImage = DocPic . fromHyperlink <$> ("!" *> linkParser) where fromHyperlink (Hyperlink u l) = Picture u (fmap (markup stringMarkup) l) - stringMarkup = plainMarkup (const "") (\(l,c,r) -> [l] <> c <> [r]) + stringMarkup = plainMarkup (const "") renderIdent + renderIdent (Identifier ns l c r) = renderNs ns <> [l] <> c <> [r] -- | Paragraph parser, called by 'parseParas'. paragraph :: Parser (DocH mod Identifier) @@ -518,7 +498,7 @@ tableStepFour rs hdrIndex cells = case hdrIndex of -- extract cell contents given boundaries extract :: Int -> Int -> Int -> Int -> Text extract x y x2 y2 = T.intercalate "\n" - [ T.take (x2 - x + 1) $ T.drop x $ rs !! y' + [ T.stripEnd $ T.stripStart $ T.take (x2 - x + 1) $ T.drop x $ rs !! y' | y' <- [y .. y2] ] @@ -538,11 +518,11 @@ since = ("@since " *> version <* skipHorizontalSpace <* endOfLine) >>= setSince header :: Parser (DocH mod Identifier) header = do let psers = map (string . flip T.replicate "=") [6, 5 .. 1] - pser = choice' psers - delim <- T.unpack <$> pser - line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseText + pser = Parsec.choice psers + depth <- T.length <$> pser + line <- parseText <$> (skipHorizontalSpace *> nonEmptyLine) rest <- try paragraph <|> return DocEmpty - return $ DocHeader (Header (length delim) line) `docAppend` rest + return $ DocHeader (Header depth line) `docAppend` rest textParagraph :: Parser (DocH mod Identifier) textParagraph = parseText . T.intercalate "\n" <$> some nonEmptyLine @@ -605,7 +585,7 @@ definitionList indent = DocDefList <$> p Right i -> (label, contents) : i -- | Drops all trailing newlines. -dropNLs :: Text -> Text +dropNLs :: Text -> Text dropNLs = T.dropWhileEnd (== '\n') -- | Main worker for 'innerList' and 'definitionList'. @@ -679,7 +659,7 @@ takeNonEmptyLine = do -- -- More precisely: skips all whitespace-only lines and returns indentation -- (horizontal space, might be empty) of that non-empty line. -takeIndent :: Parser Text +takeIndent :: Parser Text takeIndent = do indent <- takeHorizontalSpace choice' [ "\n" *> takeIndent @@ -737,14 +717,14 @@ examples = DocExamples <$> (many (try (skipHorizontalSpace *> "\n")) *> go) substituteBlankLine "<BLANKLINE>" = "" substituteBlankLine xs = xs -nonEmptyLine :: Parser Text +nonEmptyLine :: Parser Text nonEmptyLine = try (mfilter (T.any (not . isSpace)) takeLine) takeLine :: Parser Text takeLine = try (takeWhile (/= '\n') <* endOfLine) endOfLine :: Parser () -endOfLine = void "\n" <|> Parsec.eof +endOfLine = void "\n" <|> Parsec.eof -- | Property parser. -- @@ -826,7 +806,7 @@ autoUrl :: Parser (DocH mod a) autoUrl = mkLink <$> url where url = mappend <$> choice' [ "http://", "https://", "ftp://"] <*> takeWhile1 (not . isSpace) - + mkLink :: Text -> DocH mod a mkLink s = case T.unsnoc s of Just (xs,x) | x `elem` (",.!?" :: String) -> DocHyperlink (mkHyperlink xs) `docAppend` DocString [x] @@ -836,30 +816,6 @@ autoUrl = mkLink <$> url mkHyperlink lnk = Hyperlink (T.unpack lnk) Nothing - --- | Parses strings between identifier delimiters. Consumes all input that it --- deems to be valid in an identifier. Note that it simply blindly consumes --- characters and does no actual validation itself. -parseValid :: Parser String -parseValid = p some - where - idChar = Parsec.satisfy (\c -> isAlphaNum c || isSymbolChar c || c == '_') - - p p' = do - vs <- p' idChar - c <- peekChar' - case c of - '`' -> return vs - '\'' -> choice' [ (\x -> vs ++ "'" ++ x) <$> ("'" *> p many), return vs ] - _ -> fail "outofvalid" - --- | Parses identifiers with help of 'parseValid'. Asks GHC for --- 'String' from the string it deems valid. +-- | Parses identifiers with help of 'parseValid'. identifier :: Parser (DocH mod Identifier) -identifier = do - o <- idDelim - vid <- parseValid - e <- idDelim - return $ DocIdentifier (o, vid, e) - where - idDelim = Parsec.satisfy (\c -> c == '\'' || c == '`') +identifier = DocIdentifier <$> parseValid |