diff options
Diffstat (limited to 'haddock-library/src/Documentation/Haddock/Parser.hs')
-rw-r--r-- | haddock-library/src/Documentation/Haddock/Parser.hs | 216 |
1 files changed, 140 insertions, 76 deletions
diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs index e8bc2761..b7ab85b0 100644 --- a/haddock-library/src/Documentation/Haddock/Parser.hs +++ b/haddock-library/src/Documentation/Haddock/Parser.hs @@ -1,8 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE StandaloneDeriving #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE UndecidableInstances #-} -{-# LANGUAGE IncoherentInstances #-} +{-# LANGUAGE ViewPatterns #-} -- | -- Module : Documentation.Haddock.Parser -- Copyright : (c) Mateusz Kowalczyk 2013-2014, @@ -24,14 +21,14 @@ module Documentation.Haddock.Parser ( parseString, parseParas import Control.Applicative import Control.Arrow (first) -import Control.Monad (void, mfilter) -import Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine) +import Control.Monad import qualified Data.ByteString.Char8 as BS import Data.Char (chr, isAsciiUpper) import Data.List (stripPrefix, intercalate, unfoldr) import Data.Maybe (fromMaybe) import Data.Monoid import Documentation.Haddock.Doc +import Documentation.Haddock.Parser.Monad hiding (take, endOfLine) import Documentation.Haddock.Parser.Util import Documentation.Haddock.Types import Documentation.Haddock.Utf8 @@ -81,7 +78,7 @@ overIdentifier f d = g d g (DocExamples x) = DocExamples x g (DocHeader (Header l x)) = DocHeader . Header l $ g x -parse :: Parser a -> BS.ByteString -> a +parse :: Parser a -> BS.ByteString -> (ParserState, a) parse p = either err id . parseOnly (p <* endOfInput) where err = error . ("Haddock.Parser.parse: " ++) @@ -89,11 +86,21 @@ parse p = either err id . parseOnly (p <* endOfInput) -- | Main entry point to the parser. Appends the newline character -- to the input string. parseParas :: String -- ^ String to parse - -> DocH mod Identifier -parseParas = parse (p <* skipSpace) . encodeUtf8 . (++ "\n") + -> MetaDoc mod Identifier +parseParas input = case parseParasState input of + (state, a) -> MetaDoc { _meta = Meta { _version = parserStateSince state } + , _doc = a + } + +parseParasState :: String -> (ParserState, DocH mod Identifier) +parseParasState = parse (p <* skipSpace) . encodeUtf8 . (++ "\n") where p :: Parser (DocH mod Identifier) - p = mconcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n") + p = docConcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n") + +parseParagraphs :: String -> Parser (DocH mod Identifier) +parseParagraphs input = case parseParasState input of + (state, a) -> setParserState state >> return a -- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which -- drops leading whitespace and encodes the string to UTF8 first. @@ -101,19 +108,19 @@ parseString :: String -> DocH mod Identifier parseString = parseStringBS . encodeUtf8 . dropWhile isSpace parseStringBS :: BS.ByteString -> DocH mod Identifier -parseStringBS = parse p +parseStringBS = snd . parse p where p :: Parser (DocH mod Identifier) - p = mconcat <$> many (monospace <|> anchor <|> identifier <|> moduleName - <|> picture <|> hyperlink <|> bold - <|> emphasis <|> encodedChar <|> string' - <|> skipSpecialChar) + p = docConcat <$> many (monospace <|> anchor <|> identifier <|> moduleName + <|> picture <|> markdownImage <|> hyperlink <|> bold + <|> emphasis <|> encodedChar <|> string' + <|> skipSpecialChar) -- | Parses and processes -- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references> -- --- >>> parseOnly encodedChar "A" --- Right (DocString "A") +-- >>> parseString "A" +-- DocString "A" encodedChar :: Parser (DocH mod a) encodedChar = "&#" *> c <* ";" where @@ -145,16 +152,16 @@ skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar) -- | Emphasis parser. -- --- >>> parseOnly emphasis "/Hello world/" --- Right (DocEmphasis (DocString "Hello world")) +-- >>> parseString "/Hello world/" +-- DocEmphasis (DocString "Hello world") emphasis :: Parser (DocH mod Identifier) emphasis = DocEmphasis . parseStringBS <$> mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/") -- | Bold parser. -- --- >>> parseOnly bold "__Hello world__" --- Right (DocBold (DocString "Hello world")) +-- >>> parseString "__Hello world__" +-- DocBold (DocString "Hello world") bold :: Parser (DocH mod Identifier) bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__") @@ -176,19 +183,23 @@ takeWhile1_ = mfilter (not . BS.null) . takeWhile_ -- | Text anchors to allow for jumping around the generated documentation. -- --- >>> parseOnly anchor "#Hello world#" --- Right (DocAName "Hello world") +-- >>> parseString "#Hello world#" +-- DocAName "Hello world" anchor :: Parser (DocH mod a) anchor = DocAName . decodeUtf8 <$> disallowNewline ("#" *> takeWhile1_ (/= '#') <* "#") -- | Monospaced strings. -- --- >>> parseOnly monospace "@cruel@" --- Right (DocMonospaced (DocString "cruel")) +-- >>> parseString "@cruel@" +-- DocMonospaced (DocString "cruel") monospace :: Parser (DocH mod Identifier) -monospace = DocMonospaced . parseStringBS <$> ("@" *> takeWhile1_ (/= '@') <* "@") +monospace = DocMonospaced . parseStringBS + <$> ("@" *> takeWhile1_ (/= '@') <* "@") +-- | Module names: we try our reasonable best to only allow valid +-- Haskell module names, with caveat about not matching on technically +-- valid unicode symbols. moduleName :: Parser (DocH mod a) moduleName = DocModule <$> (char '"' *> modid <* char '"') where @@ -204,26 +215,45 @@ moduleName = DocModule <$> (char '"' *> modid <* char '"') -- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify -- a title for the picture. -- --- >>> parseOnly picture "<<hello.png>>" --- Right (DocPic (Picture {pictureUri = "hello.png", pictureTitle = Nothing})) --- >>> parseOnly picture "<<hello.png world>>" --- Right (DocPic (Picture {pictureUri = "hello.png", pictureTitle = Just "world"})) +-- >>> parseString "<<hello.png>>" +-- DocPic (Picture {pictureUri = "hello.png", pictureTitle = Nothing}) +-- >>> parseString "<<hello.png world>>" +-- DocPic (Picture {pictureUri = "hello.png", pictureTitle = Just "world"}) picture :: Parser (DocH mod a) picture = DocPic . makeLabeled Picture . decodeUtf8 <$> disallowNewline ("<<" *> takeUntil ">>") +markdownImage :: Parser (DocH mod a) +markdownImage = fromHyperlink <$> ("!" *> linkParser) + where + fromHyperlink (Hyperlink url label) = DocPic (Picture url label) + -- | Paragraph parser, called by 'parseParas'. paragraph :: Parser (DocH mod Identifier) -paragraph = examples <|> skipSpace *> (list <|> birdtracks <|> codeblock - <|> property <|> header - <|> textParagraph) +paragraph = examples <|> skipSpace *> ( + since + <|> unorderedList + <|> orderedList + <|> birdtracks + <|> codeblock + <|> property + <|> header + <|> textParagraphThatStartsWithMarkdownLink + <|> definitionList + <|> docParagraph <$> textParagraph + ) + +since :: Parser (DocH mod a) +since = ("@since " *> version <* skipHorizontalSpace <* endOfLine) >>= setSince >> return DocEmpty + where + version = decimal `sepBy1'` "." -- | Headers inside the comment denoted with @=@ signs, up to 6 levels -- deep. -- --- >>> parseOnly header "= Hello" +-- >>> snd <$> parseOnly header "= Hello" -- Right (DocHeader (Header {headerLevel = 1, headerTitle = DocString "Hello"})) --- >>> parseOnly header "== World" +-- >>> snd <$> parseOnly header "== World" -- Right (DocHeader (Header {headerLevel = 2, headerTitle = DocString "World"})) header :: Parser (DocH mod Identifier) header = do @@ -231,26 +261,37 @@ header = do pser = foldl1 (<|>) psers delim <- decodeUtf8 <$> pser line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString - rest <- paragraph <|> return mempty - return $ DocHeader (Header (length delim) line) <> rest + rest <- paragraph <|> return DocEmpty + return $ DocHeader (Header (length delim) line) `docAppend` rest textParagraph :: Parser (DocH mod Identifier) -textParagraph = docParagraph . parseString . intercalate "\n" <$> many1 nonEmptyLine +textParagraph = parseString . intercalate "\n" <$> many1 nonEmptyLine --- | List parser, called by 'paragraph'. -list :: Parser (DocH mod Identifier) -list = DocUnorderedList <$> unorderedList - <|> DocOrderedList <$> orderedList - <|> DocDefList <$> definitionList +textParagraphThatStartsWithMarkdownLink :: Parser (DocH mod Identifier) +textParagraphThatStartsWithMarkdownLink = docParagraph <$> (docAppend <$> markdownLink <*> optionalTextParagraph) + where + optionalTextParagraph :: Parser (DocH mod Identifier) + optionalTextParagraph = (docAppend <$> whitespace <*> textParagraph) <|> pure DocEmpty + + whitespace :: Parser (DocH mod a) + whitespace = DocString <$> (f <$> takeHorizontalSpace <*> optional "\n") + where + f :: BS.ByteString -> Maybe BS.ByteString -> String + f xs (fromMaybe "" -> x) + | BS.null (xs <> x) = "" + | otherwise = " " -- | Parses unordered (bullet) lists. -unorderedList :: Parser [DocH mod Identifier] -unorderedList = ("*" <|> "-") *> innerList unorderedList +unorderedList :: Parser (DocH mod Identifier) +unorderedList = DocUnorderedList <$> p + where + p = ("*" <|> "-") *> innerList p -- | Parses ordered lists (numbered or dashed). -orderedList :: Parser [DocH mod Identifier] -orderedList = (paren <|> dot) *> innerList orderedList +orderedList :: Parser (DocH mod Identifier) +orderedList = DocOrderedList <$> p where + p = (paren <|> dot) *> innerList p dot = (decimal :: Parser Int) <* "." paren = "(" *> decimal <* ")" @@ -265,19 +306,21 @@ innerList item = do (cs, items) <- more item let contents = docParagraph . parseString . dropNLs . unlines $ c : cs return $ case items of - Left p -> [contents <> p] + Left p -> [contents `docAppend` p] Right i -> contents : i -- | Parses definition lists. -definitionList :: Parser [(DocH mod Identifier, DocH mod Identifier)] -definitionList = do - label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` ("]\n"::String))) <* "]" - c <- takeLine - (cs, items) <- more definitionList - let contents = parseString . dropNLs . unlines $ c : cs - return $ case items of - Left p -> [(label, contents <> p)] - Right i -> (label, contents) : i +definitionList :: Parser (DocH mod Identifier) +definitionList = DocDefList <$> p + where + p = do + label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` ("]\n" :: String))) <* ("]" <* optional ":") + c <- takeLine + (cs, items) <- more p + let contents = parseString . dropNLs . unlines $ c : cs + return $ case items of + Left x -> [(label, contents `docAppend` x)] + Right i -> (label, contents) : i -- | Drops all trailing newlines. dropNLs :: String -> String @@ -291,12 +334,12 @@ more :: Monoid a => Parser a more item = innerParagraphs <|> moreListItems item <|> moreContent item <|> pure ([], Right mempty) --- | Use by 'innerList' and 'definitionList' to parse any nested paragraphs. +-- | Used by 'innerList' and 'definitionList' to parse any nested paragraphs. innerParagraphs :: Parser ([String], Either (DocH mod Identifier) a) innerParagraphs = (,) [] . Left <$> ("\n" *> indentedParagraphs) --- | Attemps to fetch the next list if possibly. Used by 'innerList' and --- 'definitionList' to recursivly grab lists that aren't separated by a whole +-- | Attempts to fetch the next list if possibly. Used by 'innerList' and +-- 'definitionList' to recursively grab lists that aren't separated by a whole -- paragraph. moreListItems :: Parser a -> Parser ([String], Either (DocH mod Identifier) a) @@ -308,10 +351,10 @@ moreContent :: Monoid a => Parser a -> Parser ([String], Either (DocH mod Identifier) a) moreContent item = first . (:) <$> nonEmptyLine <*> more item --- | Runs the 'parseParas' parser on an indented paragraph. +-- | Parses an indented paragraph. -- The indentation is 4 spaces. indentedParagraphs :: Parser (DocH mod Identifier) -indentedParagraphs = parseParas . concat <$> dropFrontOfPara " " +indentedParagraphs = (concat <$> dropFrontOfPara " ") >>= parseParagraphs -- | Grab as many fully indented paragraphs as we can. dropFrontOfPara :: Parser BS.ByteString -> Parser [String] @@ -399,7 +442,7 @@ endOfLine = void "\n" <|> endOfInput -- | Property parser. -- --- >>> parseOnly property "prop> hello world" +-- >>> snd <$> parseOnly property "prop> hello world" -- Right (DocProperty "hello world") property :: Parser (DocH mod a) property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n')) @@ -442,11 +485,32 @@ codeblock = | isNewline && isSpace c = Just isNewline | otherwise = Just $ c == '\n' --- | Parses links that were specifically marked as such. hyperlink :: Parser (DocH mod a) hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8 <$> disallowNewline ("<" *> takeUntil ">") <|> autoUrl + <|> markdownLink + +markdownLink :: Parser (DocH mod a) +markdownLink = DocHyperlink <$> linkParser + +linkParser :: Parser Hyperlink +linkParser = flip Hyperlink <$> label <*> (whitespace *> url) + where + label :: Parser (Maybe String) + label = Just . strip . decode <$> ("[" *> takeUntil "]") + + whitespace :: Parser () + whitespace = skipHorizontalSpace <* optional ("\n" *> skipHorizontalSpace) + + url :: Parser String + url = rejectWhitespace (decode <$> ("(" *> takeUntil ")")) + + rejectWhitespace :: MonadPlus m => m String -> m String + rejectWhitespace = mfilter (all (not . isSpace)) + + decode :: BS.ByteString -> String + decode = removeEscapes . decodeUtf8 -- | Looks for URL-like things to automatically hyperlink even if they -- weren't marked as links. @@ -456,32 +520,32 @@ autoUrl = mkLink <$> url url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace) mkLink :: BS.ByteString -> DocH mod a mkLink s = case unsnoc s of - Just (xs, x) | x `elem` (",.!?"::String) -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x] + Just (xs, x) | x `elem` (",.!?" :: String) -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) `docAppend` DocString [x] _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing) -- | Parses strings between identifier delimiters. Consumes all input that it -- deems to be valid in an identifier. Note that it simply blindly consumes -- characters and does no actual validation itself. parseValid :: Parser String -parseValid = do - vs' <- many' $ utf8String "⋆" <|> return <$> idChar - let vs = concat vs' - c <- peekChar - case c of - Just '`' -> return vs - Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid) - <|> return vs - _ -> fail "outofvalid" +parseValid = p some where idChar = satisfy (`elem` ("_.!#$%&*+/<=>?@\\|-~:^"::String)) <|> digit <|> letter_ascii + p p' = do + vs' <- p' $ utf8String "⋆" <|> return <$> idChar + let vs = concat vs' + c <- peekChar' + case c of + '`' -> return vs + '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> p many') <|> return vs + _ -> fail "outofvalid" -- | Parses UTF8 strings from ByteString streams. utf8String :: String -> Parser String utf8String x = decodeUtf8 <$> string (encodeUtf8 x) --- | Parses identifiers with help of 'parseValid'. Asks GHC for 'String' from the --- string it deems valid. +-- | Parses identifiers with help of 'parseValid'. Asks GHC for +-- 'String' from the string it deems valid. identifier :: Parser (DocH mod Identifier) identifier = do o <- idDelim |