aboutsummaryrefslogtreecommitdiff
path: root/haddock-library/src/Documentation/Haddock/Parser.hs
diff options
context:
space:
mode:
Diffstat (limited to 'haddock-library/src/Documentation/Haddock/Parser.hs')
-rw-r--r--haddock-library/src/Documentation/Haddock/Parser.hs216
1 files changed, 140 insertions, 76 deletions
diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs
index e8bc2761..b7ab85b0 100644
--- a/haddock-library/src/Documentation/Haddock/Parser.hs
+++ b/haddock-library/src/Documentation/Haddock/Parser.hs
@@ -1,8 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE StandaloneDeriving #-}
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE UndecidableInstances #-}
-{-# LANGUAGE IncoherentInstances #-}
+{-# LANGUAGE ViewPatterns #-}
-- |
-- Module : Documentation.Haddock.Parser
-- Copyright : (c) Mateusz Kowalczyk 2013-2014,
@@ -24,14 +21,14 @@ module Documentation.Haddock.Parser ( parseString, parseParas
import Control.Applicative
import Control.Arrow (first)
-import Control.Monad (void, mfilter)
-import Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine)
+import Control.Monad
import qualified Data.ByteString.Char8 as BS
import Data.Char (chr, isAsciiUpper)
import Data.List (stripPrefix, intercalate, unfoldr)
import Data.Maybe (fromMaybe)
import Data.Monoid
import Documentation.Haddock.Doc
+import Documentation.Haddock.Parser.Monad hiding (take, endOfLine)
import Documentation.Haddock.Parser.Util
import Documentation.Haddock.Types
import Documentation.Haddock.Utf8
@@ -81,7 +78,7 @@ overIdentifier f d = g d
g (DocExamples x) = DocExamples x
g (DocHeader (Header l x)) = DocHeader . Header l $ g x
-parse :: Parser a -> BS.ByteString -> a
+parse :: Parser a -> BS.ByteString -> (ParserState, a)
parse p = either err id . parseOnly (p <* endOfInput)
where
err = error . ("Haddock.Parser.parse: " ++)
@@ -89,11 +86,21 @@ parse p = either err id . parseOnly (p <* endOfInput)
-- | Main entry point to the parser. Appends the newline character
-- to the input string.
parseParas :: String -- ^ String to parse
- -> DocH mod Identifier
-parseParas = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
+ -> MetaDoc mod Identifier
+parseParas input = case parseParasState input of
+ (state, a) -> MetaDoc { _meta = Meta { _version = parserStateSince state }
+ , _doc = a
+ }
+
+parseParasState :: String -> (ParserState, DocH mod Identifier)
+parseParasState = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
where
p :: Parser (DocH mod Identifier)
- p = mconcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n")
+ p = docConcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n")
+
+parseParagraphs :: String -> Parser (DocH mod Identifier)
+parseParagraphs input = case parseParasState input of
+ (state, a) -> setParserState state >> return a
-- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which
-- drops leading whitespace and encodes the string to UTF8 first.
@@ -101,19 +108,19 @@ parseString :: String -> DocH mod Identifier
parseString = parseStringBS . encodeUtf8 . dropWhile isSpace
parseStringBS :: BS.ByteString -> DocH mod Identifier
-parseStringBS = parse p
+parseStringBS = snd . parse p
where
p :: Parser (DocH mod Identifier)
- p = mconcat <$> many (monospace <|> anchor <|> identifier <|> moduleName
- <|> picture <|> hyperlink <|> bold
- <|> emphasis <|> encodedChar <|> string'
- <|> skipSpecialChar)
+ p = docConcat <$> many (monospace <|> anchor <|> identifier <|> moduleName
+ <|> picture <|> markdownImage <|> hyperlink <|> bold
+ <|> emphasis <|> encodedChar <|> string'
+ <|> skipSpecialChar)
-- | Parses and processes
-- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references>
--
--- >>> parseOnly encodedChar "&#65;"
--- Right (DocString "A")
+-- >>> parseString "&#65;"
+-- DocString "A"
encodedChar :: Parser (DocH mod a)
encodedChar = "&#" *> c <* ";"
where
@@ -145,16 +152,16 @@ skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar)
-- | Emphasis parser.
--
--- >>> parseOnly emphasis "/Hello world/"
--- Right (DocEmphasis (DocString "Hello world"))
+-- >>> parseString "/Hello world/"
+-- DocEmphasis (DocString "Hello world")
emphasis :: Parser (DocH mod Identifier)
emphasis = DocEmphasis . parseStringBS <$>
mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/")
-- | Bold parser.
--
--- >>> parseOnly bold "__Hello world__"
--- Right (DocBold (DocString "Hello world"))
+-- >>> parseString "__Hello world__"
+-- DocBold (DocString "Hello world")
bold :: Parser (DocH mod Identifier)
bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__")
@@ -176,19 +183,23 @@ takeWhile1_ = mfilter (not . BS.null) . takeWhile_
-- | Text anchors to allow for jumping around the generated documentation.
--
--- >>> parseOnly anchor "#Hello world#"
--- Right (DocAName "Hello world")
+-- >>> parseString "#Hello world#"
+-- DocAName "Hello world"
anchor :: Parser (DocH mod a)
anchor = DocAName . decodeUtf8 <$>
disallowNewline ("#" *> takeWhile1_ (/= '#') <* "#")
-- | Monospaced strings.
--
--- >>> parseOnly monospace "@cruel@"
--- Right (DocMonospaced (DocString "cruel"))
+-- >>> parseString "@cruel@"
+-- DocMonospaced (DocString "cruel")
monospace :: Parser (DocH mod Identifier)
-monospace = DocMonospaced . parseStringBS <$> ("@" *> takeWhile1_ (/= '@') <* "@")
+monospace = DocMonospaced . parseStringBS
+ <$> ("@" *> takeWhile1_ (/= '@') <* "@")
+-- | Module names: we try our reasonable best to only allow valid
+-- Haskell module names, with caveat about not matching on technically
+-- valid unicode symbols.
moduleName :: Parser (DocH mod a)
moduleName = DocModule <$> (char '"' *> modid <* char '"')
where
@@ -204,26 +215,45 @@ moduleName = DocModule <$> (char '"' *> modid <* char '"')
-- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify
-- a title for the picture.
--
--- >>> parseOnly picture "<<hello.png>>"
--- Right (DocPic (Picture {pictureUri = "hello.png", pictureTitle = Nothing}))
--- >>> parseOnly picture "<<hello.png world>>"
--- Right (DocPic (Picture {pictureUri = "hello.png", pictureTitle = Just "world"}))
+-- >>> parseString "<<hello.png>>"
+-- DocPic (Picture {pictureUri = "hello.png", pictureTitle = Nothing})
+-- >>> parseString "<<hello.png world>>"
+-- DocPic (Picture {pictureUri = "hello.png", pictureTitle = Just "world"})
picture :: Parser (DocH mod a)
picture = DocPic . makeLabeled Picture . decodeUtf8
<$> disallowNewline ("<<" *> takeUntil ">>")
+markdownImage :: Parser (DocH mod a)
+markdownImage = fromHyperlink <$> ("!" *> linkParser)
+ where
+ fromHyperlink (Hyperlink url label) = DocPic (Picture url label)
+
-- | Paragraph parser, called by 'parseParas'.
paragraph :: Parser (DocH mod Identifier)
-paragraph = examples <|> skipSpace *> (list <|> birdtracks <|> codeblock
- <|> property <|> header
- <|> textParagraph)
+paragraph = examples <|> skipSpace *> (
+ since
+ <|> unorderedList
+ <|> orderedList
+ <|> birdtracks
+ <|> codeblock
+ <|> property
+ <|> header
+ <|> textParagraphThatStartsWithMarkdownLink
+ <|> definitionList
+ <|> docParagraph <$> textParagraph
+ )
+
+since :: Parser (DocH mod a)
+since = ("@since " *> version <* skipHorizontalSpace <* endOfLine) >>= setSince >> return DocEmpty
+ where
+ version = decimal `sepBy1'` "."
-- | Headers inside the comment denoted with @=@ signs, up to 6 levels
-- deep.
--
--- >>> parseOnly header "= Hello"
+-- >>> snd <$> parseOnly header "= Hello"
-- Right (DocHeader (Header {headerLevel = 1, headerTitle = DocString "Hello"}))
--- >>> parseOnly header "== World"
+-- >>> snd <$> parseOnly header "== World"
-- Right (DocHeader (Header {headerLevel = 2, headerTitle = DocString "World"}))
header :: Parser (DocH mod Identifier)
header = do
@@ -231,26 +261,37 @@ header = do
pser = foldl1 (<|>) psers
delim <- decodeUtf8 <$> pser
line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString
- rest <- paragraph <|> return mempty
- return $ DocHeader (Header (length delim) line) <> rest
+ rest <- paragraph <|> return DocEmpty
+ return $ DocHeader (Header (length delim) line) `docAppend` rest
textParagraph :: Parser (DocH mod Identifier)
-textParagraph = docParagraph . parseString . intercalate "\n" <$> many1 nonEmptyLine
+textParagraph = parseString . intercalate "\n" <$> many1 nonEmptyLine
--- | List parser, called by 'paragraph'.
-list :: Parser (DocH mod Identifier)
-list = DocUnorderedList <$> unorderedList
- <|> DocOrderedList <$> orderedList
- <|> DocDefList <$> definitionList
+textParagraphThatStartsWithMarkdownLink :: Parser (DocH mod Identifier)
+textParagraphThatStartsWithMarkdownLink = docParagraph <$> (docAppend <$> markdownLink <*> optionalTextParagraph)
+ where
+ optionalTextParagraph :: Parser (DocH mod Identifier)
+ optionalTextParagraph = (docAppend <$> whitespace <*> textParagraph) <|> pure DocEmpty
+
+ whitespace :: Parser (DocH mod a)
+ whitespace = DocString <$> (f <$> takeHorizontalSpace <*> optional "\n")
+ where
+ f :: BS.ByteString -> Maybe BS.ByteString -> String
+ f xs (fromMaybe "" -> x)
+ | BS.null (xs <> x) = ""
+ | otherwise = " "
-- | Parses unordered (bullet) lists.
-unorderedList :: Parser [DocH mod Identifier]
-unorderedList = ("*" <|> "-") *> innerList unorderedList
+unorderedList :: Parser (DocH mod Identifier)
+unorderedList = DocUnorderedList <$> p
+ where
+ p = ("*" <|> "-") *> innerList p
-- | Parses ordered lists (numbered or dashed).
-orderedList :: Parser [DocH mod Identifier]
-orderedList = (paren <|> dot) *> innerList orderedList
+orderedList :: Parser (DocH mod Identifier)
+orderedList = DocOrderedList <$> p
where
+ p = (paren <|> dot) *> innerList p
dot = (decimal :: Parser Int) <* "."
paren = "(" *> decimal <* ")"
@@ -265,19 +306,21 @@ innerList item = do
(cs, items) <- more item
let contents = docParagraph . parseString . dropNLs . unlines $ c : cs
return $ case items of
- Left p -> [contents <> p]
+ Left p -> [contents `docAppend` p]
Right i -> contents : i
-- | Parses definition lists.
-definitionList :: Parser [(DocH mod Identifier, DocH mod Identifier)]
-definitionList = do
- label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` ("]\n"::String))) <* "]"
- c <- takeLine
- (cs, items) <- more definitionList
- let contents = parseString . dropNLs . unlines $ c : cs
- return $ case items of
- Left p -> [(label, contents <> p)]
- Right i -> (label, contents) : i
+definitionList :: Parser (DocH mod Identifier)
+definitionList = DocDefList <$> p
+ where
+ p = do
+ label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` ("]\n" :: String))) <* ("]" <* optional ":")
+ c <- takeLine
+ (cs, items) <- more p
+ let contents = parseString . dropNLs . unlines $ c : cs
+ return $ case items of
+ Left x -> [(label, contents `docAppend` x)]
+ Right i -> (label, contents) : i
-- | Drops all trailing newlines.
dropNLs :: String -> String
@@ -291,12 +334,12 @@ more :: Monoid a => Parser a
more item = innerParagraphs <|> moreListItems item
<|> moreContent item <|> pure ([], Right mempty)
--- | Use by 'innerList' and 'definitionList' to parse any nested paragraphs.
+-- | Used by 'innerList' and 'definitionList' to parse any nested paragraphs.
innerParagraphs :: Parser ([String], Either (DocH mod Identifier) a)
innerParagraphs = (,) [] . Left <$> ("\n" *> indentedParagraphs)
--- | Attemps to fetch the next list if possibly. Used by 'innerList' and
--- 'definitionList' to recursivly grab lists that aren't separated by a whole
+-- | Attempts to fetch the next list if possibly. Used by 'innerList' and
+-- 'definitionList' to recursively grab lists that aren't separated by a whole
-- paragraph.
moreListItems :: Parser a
-> Parser ([String], Either (DocH mod Identifier) a)
@@ -308,10 +351,10 @@ moreContent :: Monoid a => Parser a
-> Parser ([String], Either (DocH mod Identifier) a)
moreContent item = first . (:) <$> nonEmptyLine <*> more item
--- | Runs the 'parseParas' parser on an indented paragraph.
+-- | Parses an indented paragraph.
-- The indentation is 4 spaces.
indentedParagraphs :: Parser (DocH mod Identifier)
-indentedParagraphs = parseParas . concat <$> dropFrontOfPara " "
+indentedParagraphs = (concat <$> dropFrontOfPara " ") >>= parseParagraphs
-- | Grab as many fully indented paragraphs as we can.
dropFrontOfPara :: Parser BS.ByteString -> Parser [String]
@@ -399,7 +442,7 @@ endOfLine = void "\n" <|> endOfInput
-- | Property parser.
--
--- >>> parseOnly property "prop> hello world"
+-- >>> snd <$> parseOnly property "prop> hello world"
-- Right (DocProperty "hello world")
property :: Parser (DocH mod a)
property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n'))
@@ -442,11 +485,32 @@ codeblock =
| isNewline && isSpace c = Just isNewline
| otherwise = Just $ c == '\n'
--- | Parses links that were specifically marked as such.
hyperlink :: Parser (DocH mod a)
hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8
<$> disallowNewline ("<" *> takeUntil ">")
<|> autoUrl
+ <|> markdownLink
+
+markdownLink :: Parser (DocH mod a)
+markdownLink = DocHyperlink <$> linkParser
+
+linkParser :: Parser Hyperlink
+linkParser = flip Hyperlink <$> label <*> (whitespace *> url)
+ where
+ label :: Parser (Maybe String)
+ label = Just . strip . decode <$> ("[" *> takeUntil "]")
+
+ whitespace :: Parser ()
+ whitespace = skipHorizontalSpace <* optional ("\n" *> skipHorizontalSpace)
+
+ url :: Parser String
+ url = rejectWhitespace (decode <$> ("(" *> takeUntil ")"))
+
+ rejectWhitespace :: MonadPlus m => m String -> m String
+ rejectWhitespace = mfilter (all (not . isSpace))
+
+ decode :: BS.ByteString -> String
+ decode = removeEscapes . decodeUtf8
-- | Looks for URL-like things to automatically hyperlink even if they
-- weren't marked as links.
@@ -456,32 +520,32 @@ autoUrl = mkLink <$> url
url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
mkLink :: BS.ByteString -> DocH mod a
mkLink s = case unsnoc s of
- Just (xs, x) | x `elem` (",.!?"::String) -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x]
+ Just (xs, x) | x `elem` (",.!?" :: String) -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) `docAppend` DocString [x]
_ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
-- | Parses strings between identifier delimiters. Consumes all input that it
-- deems to be valid in an identifier. Note that it simply blindly consumes
-- characters and does no actual validation itself.
parseValid :: Parser String
-parseValid = do
- vs' <- many' $ utf8String "⋆" <|> return <$> idChar
- let vs = concat vs'
- c <- peekChar
- case c of
- Just '`' -> return vs
- Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid)
- <|> return vs
- _ -> fail "outofvalid"
+parseValid = p some
where
idChar = satisfy (`elem` ("_.!#$%&*+/<=>?@\\|-~:^"::String))
<|> digit <|> letter_ascii
+ p p' = do
+ vs' <- p' $ utf8String "⋆" <|> return <$> idChar
+ let vs = concat vs'
+ c <- peekChar'
+ case c of
+ '`' -> return vs
+ '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> p many') <|> return vs
+ _ -> fail "outofvalid"
-- | Parses UTF8 strings from ByteString streams.
utf8String :: String -> Parser String
utf8String x = decodeUtf8 <$> string (encodeUtf8 x)
--- | Parses identifiers with help of 'parseValid'. Asks GHC for 'String' from the
--- string it deems valid.
+-- | Parses identifiers with help of 'parseValid'. Asks GHC for
+-- 'String' from the string it deems valid.
identifier :: Parser (DocH mod Identifier)
identifier = do
o <- idDelim