1 files changed, 140 insertions, 76 deletions
diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs
index e8bc2761..b7ab85b0 100644
--- a/haddock-library/src/Documentation/Haddock/Parser.hs
+++ b/haddock-library/src/Documentation/Haddock/Parser.hs
@@ -1,8 +1,5 @@
 {-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE StandaloneDeriving #-}
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE UndecidableInstances #-}
-{-# LANGUAGE IncoherentInstances #-}
+{-# LANGUAGE ViewPatterns #-}
 -- |
 -- Module      :  Documentation.Haddock.Parser
 -- Copyright   :  (c) Mateusz Kowalczyk 2013-2014,
@@ -24,14 +21,14 @@ module Documentation.Haddock.Parser ( parseString, parseParas
 
 import           Control.Applicative
 import           Control.Arrow (first)
-import           Control.Monad (void, mfilter)
-import           Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine)
+import           Control.Monad
 import qualified Data.ByteString.Char8 as BS
 import           Data.Char (chr, isAsciiUpper)
 import           Data.List (stripPrefix, intercalate, unfoldr)
 import           Data.Maybe (fromMaybe)
 import           Data.Monoid
 import           Documentation.Haddock.Doc
+import           Documentation.Haddock.Parser.Monad hiding (take, endOfLine)
 import           Documentation.Haddock.Parser.Util
 import           Documentation.Haddock.Types
 import           Documentation.Haddock.Utf8
@@ -81,7 +78,7 @@ overIdentifier f d = g d
     g (DocExamples x) = DocExamples x
     g (DocHeader (Header l x)) = DocHeader . Header l $ g x
 
-parse :: Parser a -> BS.ByteString -> a
+parse :: Parser a -> BS.ByteString -> (ParserState, a)
 parse p = either err id . parseOnly (p <* endOfInput)
   where
     err = error . ("Haddock.Parser.parse: " ++)
@@ -89,11 +86,21 @@ parse p = either err id . parseOnly (p <* endOfInput)
 -- | Main entry point to the parser. Appends the newline character
 -- to the input string.
 parseParas :: String -- ^ String to parse
-           -> DocH mod Identifier
-parseParas = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
+           -> MetaDoc mod Identifier
+parseParas input = case parseParasState input of
+  (state, a) -> MetaDoc { _meta = Meta { _version = parserStateSince state }
+                        , _doc = a
+                        }
+
+parseParasState :: String -> (ParserState, DocH mod Identifier)
+parseParasState = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
   where
     p :: Parser (DocH mod Identifier)
-    p = mconcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n")
+    p = docConcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n")
+
+parseParagraphs :: String -> Parser (DocH mod Identifier)
+parseParagraphs input = case parseParasState input of
+  (state, a) -> setParserState state >> return a
 
 -- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which
 -- drops leading whitespace and encodes the string to UTF8 first.
@@ -101,19 +108,19 @@ parseString :: String -> DocH mod Identifier
 parseString = parseStringBS . encodeUtf8 . dropWhile isSpace
 
 parseStringBS :: BS.ByteString -> DocH mod Identifier
-parseStringBS = parse p
+parseStringBS = snd . parse p
   where
     p :: Parser (DocH mod Identifier)
-    p = mconcat <$> many (monospace <|> anchor <|> identifier <|> moduleName
-                          <|> picture <|> hyperlink <|> bold
-                          <|> emphasis <|> encodedChar <|> string'
-                          <|> skipSpecialChar)
+    p = docConcat <$> many (monospace <|> anchor <|> identifier <|> moduleName
+                            <|> picture <|> markdownImage <|> hyperlink <|> bold
+                            <|> emphasis <|> encodedChar <|> string'
+                            <|> skipSpecialChar)
 
 -- | Parses and processes
 -- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references>
 --
--- >>> parseOnly encodedChar "&#65;"
--- Right (DocString "A")
+-- >>> parseString "&#65;"
+-- DocString "A"
 encodedChar :: Parser (DocH mod a)
 encodedChar = "&#" *> c <* ";"
   where
@@ -145,16 +152,16 @@ skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar)
 
 -- | Emphasis parser.
 --
--- >>> parseOnly emphasis "/Hello world/"
--- Right (DocEmphasis (DocString "Hello world"))
+-- >>> parseString "/Hello world/"
+-- DocEmphasis (DocString "Hello world")
 emphasis :: Parser (DocH mod Identifier)
 emphasis = DocEmphasis . parseStringBS <$>
   mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/")
 
 -- | Bold parser.
 --
--- >>> parseOnly bold "__Hello world__"
--- Right (DocBold (DocString "Hello world"))
+-- >>> parseString "__Hello world__"
+-- DocBold (DocString "Hello world")
 bold :: Parser (DocH mod Identifier)
 bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__")
 
@@ -176,19 +183,23 @@ takeWhile1_ = mfilter (not . BS.null) . takeWhile_
 
 -- | Text anchors to allow for jumping around the generated documentation.
 --
--- >>> parseOnly anchor "#Hello world#"
--- Right (DocAName "Hello world")
+-- >>> parseString "#Hello world#"
+-- DocAName "Hello world"
 anchor :: Parser (DocH mod a)
 anchor = DocAName . decodeUtf8 <$>
          disallowNewline ("#" *> takeWhile1_ (/= '#') <* "#")
 
 -- | Monospaced strings.
 --
--- >>> parseOnly monospace "@cruel@"
--- Right (DocMonospaced (DocString "cruel"))
+-- >>> parseString "@cruel@"
+-- DocMonospaced (DocString "cruel")
 monospace :: Parser (DocH mod Identifier)
-monospace = DocMonospaced . parseStringBS <$> ("@" *> takeWhile1_ (/= '@') <* "@")
+monospace = DocMonospaced . parseStringBS
+            <$> ("@" *> takeWhile1_ (/= '@') <* "@")
 
+-- | Module names: we try our reasonable best to only allow valid
+-- Haskell module names, with caveat about not matching on technically
+-- valid unicode symbols.
 moduleName :: Parser (DocH mod a)
 moduleName = DocModule <$> (char '"' *> modid <* char '"')
   where
@@ -204,26 +215,45 @@ moduleName = DocModule <$> (char '"' *> modid <* char '"')
 -- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify
 -- a title for the picture.
 --
--- >>> parseOnly picture "<<hello.png>>"
--- Right (DocPic (Picture {pictureUri = "hello.png", pictureTitle = Nothing}))
--- >>> parseOnly picture "<<hello.png world>>"
--- Right (DocPic (Picture {pictureUri = "hello.png", pictureTitle = Just "world"}))
+-- >>> parseString "<<hello.png>>"
+-- DocPic (Picture {pictureUri = "hello.png", pictureTitle = Nothing})
+-- >>> parseString "<<hello.png world>>"
+-- DocPic (Picture {pictureUri = "hello.png", pictureTitle = Just "world"})
 picture :: Parser (DocH mod a)
 picture = DocPic . makeLabeled Picture . decodeUtf8
           <$> disallowNewline ("<<" *> takeUntil ">>")
 
+markdownImage :: Parser (DocH mod a)
+markdownImage = fromHyperlink <$> ("!" *> linkParser)
+  where
+    fromHyperlink (Hyperlink url label) = DocPic (Picture url label)
+
 -- | Paragraph parser, called by 'parseParas'.
 paragraph :: Parser (DocH mod Identifier)
-paragraph = examples <|> skipSpace *> (list <|> birdtracks <|> codeblock
-                                       <|> property <|> header
-                                       <|> textParagraph)
+paragraph = examples <|> skipSpace *> (
+      since
+  <|> unorderedList
+  <|> orderedList
+  <|> birdtracks
+  <|> codeblock
+  <|> property
+  <|> header
+  <|> textParagraphThatStartsWithMarkdownLink
+  <|> definitionList
+  <|> docParagraph <$> textParagraph
+  )
+
+since :: Parser (DocH mod a)
+since = ("@since " *> version <* skipHorizontalSpace <* endOfLine) >>= setSince >> return DocEmpty
+  where
+    version = decimal `sepBy1'` "."
 
 -- | Headers inside the comment denoted with @=@ signs, up to 6 levels
 -- deep.
 --
--- >>> parseOnly header "= Hello"
+-- >>> snd <$> parseOnly header "= Hello"
 -- Right (DocHeader (Header {headerLevel = 1, headerTitle = DocString "Hello"}))
--- >>> parseOnly header "== World"
+-- >>> snd <$> parseOnly header "== World"
 -- Right (DocHeader (Header {headerLevel = 2, headerTitle = DocString "World"}))
 header :: Parser (DocH mod Identifier)
 header = do
@@ -231,26 +261,37 @@ header = do
       pser = foldl1 (<|>) psers
   delim <- decodeUtf8 <$> pser
   line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString
-  rest <- paragraph <|> return mempty
-  return $ DocHeader (Header (length delim) line) <> rest
+  rest <- paragraph <|> return DocEmpty
+  return $ DocHeader (Header (length delim) line) `docAppend` rest
 
 textParagraph :: Parser (DocH mod Identifier)
-textParagraph = docParagraph . parseString . intercalate "\n" <$> many1 nonEmptyLine
+textParagraph = parseString . intercalate "\n" <$> many1 nonEmptyLine
 
--- | List parser, called by 'paragraph'.
-list :: Parser (DocH mod Identifier)
-list = DocUnorderedList <$> unorderedList
-       <|> DocOrderedList <$> orderedList
-       <|> DocDefList <$> definitionList
+textParagraphThatStartsWithMarkdownLink :: Parser (DocH mod Identifier)
+textParagraphThatStartsWithMarkdownLink = docParagraph <$> (docAppend <$> markdownLink <*> optionalTextParagraph)
+  where
+    optionalTextParagraph :: Parser (DocH mod Identifier)
+    optionalTextParagraph = (docAppend <$> whitespace <*> textParagraph) <|> pure DocEmpty
+
+    whitespace :: Parser (DocH mod a)
+    whitespace = DocString <$> (f <$> takeHorizontalSpace <*> optional "\n")
+      where
+        f :: BS.ByteString -> Maybe BS.ByteString -> String
+        f xs (fromMaybe "" -> x)
+          | BS.null (xs <> x) = ""
+          | otherwise = " "
 
 -- | Parses unordered (bullet) lists.
-unorderedList :: Parser [DocH mod Identifier]
-unorderedList = ("*" <|> "-") *> innerList unorderedList
+unorderedList :: Parser (DocH mod Identifier)
+unorderedList = DocUnorderedList <$> p
+  where
+    p = ("*" <|> "-") *> innerList p
 
 -- | Parses ordered lists (numbered or dashed).
-orderedList :: Parser [DocH mod Identifier]
-orderedList = (paren <|> dot) *> innerList orderedList
+orderedList :: Parser (DocH mod Identifier)
+orderedList = DocOrderedList <$> p
   where
+    p = (paren <|> dot) *> innerList p
     dot = (decimal :: Parser Int) <* "."
     paren = "(" *> decimal <* ")"
 
@@ -265,19 +306,21 @@ innerList item = do
   (cs, items) <- more item
   let contents = docParagraph . parseString . dropNLs . unlines $ c : cs
   return $ case items of
-    Left p -> [contents <> p]
+    Left p -> [contents `docAppend` p]
     Right i -> contents : i
 
 -- | Parses definition lists.
-definitionList :: Parser [(DocH mod Identifier, DocH mod Identifier)]
-definitionList = do
-  label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` ("]\n"::String))) <* "]"
-  c <- takeLine
-  (cs, items) <- more definitionList
-  let contents = parseString . dropNLs . unlines $ c : cs
-  return $ case items of
-    Left p -> [(label, contents <> p)]
-    Right i -> (label, contents) : i
+definitionList :: Parser (DocH mod Identifier)
+definitionList = DocDefList <$> p
+  where
+    p = do
+      label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` ("]\n" :: String))) <* ("]" <* optional ":")
+      c <- takeLine
+      (cs, items) <- more p
+      let contents = parseString . dropNLs . unlines $ c : cs
+      return $ case items of
+        Left x -> [(label, contents `docAppend` x)]
+        Right i -> (label, contents) : i
 
 -- | Drops all trailing newlines.
 dropNLs :: String -> String
@@ -291,12 +334,12 @@ more :: Monoid a => Parser a
 more item = innerParagraphs <|> moreListItems item
             <|> moreContent item <|> pure ([], Right mempty)
 
--- | Use by 'innerList' and 'definitionList' to parse any nested paragraphs.
+-- | Used by 'innerList' and 'definitionList' to parse any nested paragraphs.
 innerParagraphs :: Parser ([String], Either (DocH mod Identifier) a)
 innerParagraphs = (,) [] . Left <$> ("\n" *> indentedParagraphs)
 
--- | Attemps to fetch the next list if possibly. Used by 'innerList' and
--- 'definitionList' to recursivly grab lists that aren't separated by a whole
+-- | Attempts to fetch the next list if possibly. Used by 'innerList' and
+-- 'definitionList' to recursively grab lists that aren't separated by a whole
 -- paragraph.
 moreListItems :: Parser a
               -> Parser ([String], Either (DocH mod Identifier) a)
@@ -308,10 +351,10 @@ moreContent :: Monoid a => Parser a
             -> Parser ([String], Either (DocH mod Identifier) a)
 moreContent item = first . (:) <$> nonEmptyLine <*> more item
 
--- | Runs the 'parseParas' parser on an indented paragraph.
+-- | Parses an indented paragraph.
 -- The indentation is 4 spaces.
 indentedParagraphs :: Parser (DocH mod Identifier)
-indentedParagraphs = parseParas . concat <$> dropFrontOfPara "    "
+indentedParagraphs = (concat <$> dropFrontOfPara "    ") >>= parseParagraphs
 
 -- | Grab as many fully indented paragraphs as we can.
 dropFrontOfPara :: Parser BS.ByteString -> Parser [String]
@@ -399,7 +442,7 @@ endOfLine = void "\n" <|> endOfInput
 
 -- | Property parser.
 --
--- >>> parseOnly property "prop> hello world"
+-- >>> snd <$> parseOnly property "prop> hello world"
 -- Right (DocProperty "hello world")
 property :: Parser (DocH mod a)
 property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n'))
@@ -442,11 +485,32 @@ codeblock =
           | isNewline && isSpace c = Just isNewline
           | otherwise = Just $ c == '\n'
 
--- | Parses links that were specifically marked as such.
 hyperlink :: Parser (DocH mod a)
 hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8
               <$> disallowNewline ("<" *> takeUntil ">")
             <|> autoUrl
+            <|> markdownLink
+
+markdownLink :: Parser (DocH mod a)
+markdownLink = DocHyperlink <$> linkParser
+
+linkParser :: Parser Hyperlink
+linkParser = flip Hyperlink <$> label <*> (whitespace *> url)
+  where
+    label :: Parser (Maybe String)
+    label = Just . strip . decode <$> ("[" *> takeUntil "]")
+
+    whitespace :: Parser ()
+    whitespace = skipHorizontalSpace <* optional ("\n" *> skipHorizontalSpace)
+
+    url :: Parser String
+    url = rejectWhitespace (decode <$> ("(" *> takeUntil ")"))
+
+    rejectWhitespace :: MonadPlus m => m String -> m String
+    rejectWhitespace = mfilter (all (not . isSpace))
+
+    decode :: BS.ByteString -> String
+    decode = removeEscapes . decodeUtf8
 
 -- | Looks for URL-like things to automatically hyperlink even if they
 -- weren't marked as links.
@@ -456,32 +520,32 @@ autoUrl = mkLink <$> url
     url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
     mkLink :: BS.ByteString -> DocH mod a
     mkLink s = case unsnoc s of
-      Just (xs, x) | x `elem` (",.!?"::String) -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x]
+      Just (xs, x) | x `elem` (",.!?" :: String) -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) `docAppend` DocString [x]
       _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
 
 -- | Parses strings between identifier delimiters. Consumes all input that it
 -- deems to be valid in an identifier. Note that it simply blindly consumes
 -- characters and does no actual validation itself.
 parseValid :: Parser String
-parseValid = do
-  vs' <- many' $ utf8String "⋆" <|> return <$> idChar
-  let vs = concat vs'
-  c <- peekChar
-  case c of
-    Just '`' -> return vs
-    Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid)
-                 <|> return vs
-    _ -> fail "outofvalid"
+parseValid = p some
   where
     idChar = satisfy (`elem` ("_.!#$%&*+/<=>?@\\|-~:^"::String))
              <|> digit <|> letter_ascii
+    p p' = do
+      vs' <- p' $ utf8String "⋆" <|> return <$> idChar
+      let vs = concat vs'
+      c <- peekChar'
+      case c of
+        '`' -> return vs
+        '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> p many') <|> return vs
+        _ -> fail "outofvalid"
 
 -- | Parses UTF8 strings from ByteString streams.
 utf8String :: String -> Parser String
 utf8String x = decodeUtf8 <$> string (encodeUtf8 x)
 
--- | Parses identifiers with help of 'parseValid'. Asks GHC for 'String' from the
--- string it deems valid.
+-- | Parses identifiers with help of 'parseValid'. Asks GHC for
+-- 'String' from the string it deems valid.
 identifier :: Parser (DocH mod Identifier)
 identifier = do
   o <- idDelim