Merge pull request #1 from haskell/ghc-head

Ghc head
author: idontgetoutmuch <dominic@steinitz.org> 2015-12-20 21:01:47 +0000
committer: idontgetoutmuch <dominic@steinitz.org> 2015-12-20 21:01:47 +0000
commit: 2bdfda1fb2e0de696ca8c6f7a152b2f85a541be9 (patch)
tree: cc29895f7d69f051cfec172bb0f8c2ef03552789 /haddock-library/src/Documentation/Haddock/Parser.hs
parent: 5a57a24c44e06e964c4ea2276c842c722c4e93d9 (diff)
parent: fa03f80d76f1511a811a0209ea7a6a8b6c58704f (diff)
1 files changed, 58 insertions, 37 deletions
diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs
index b7ab85b0..ca9e9d8d 100644
--- a/haddock-library/src/Documentation/Haddock/Parser.hs
+++ b/haddock-library/src/Documentation/Haddock/Parser.hs
@@ -93,7 +93,8 @@ parseParas input = case parseParasState input of
                         }
 
 parseParasState :: String -> (ParserState, DocH mod Identifier)
-parseParasState = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
+parseParasState =
+    parse (p <* skipSpace) . encodeUtf8 . (++ "\n") . filter (/= '\r')
   where
     p :: Parser (DocH mod Identifier)
     p = docConcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n")
@@ -105,7 +106,7 @@ parseParagraphs input = case parseParasState input of
 -- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which
 -- drops leading whitespace and encodes the string to UTF8 first.
 parseString :: String -> DocH mod Identifier
-parseString = parseStringBS . encodeUtf8 . dropWhile isSpace
+parseString = parseStringBS . encodeUtf8 . dropWhile isSpace . filter (/= '\r')
 
 parseStringBS :: BS.ByteString -> DocH mod Identifier
 parseStringBS = snd . parse p
@@ -230,18 +231,20 @@ markdownImage = fromHyperlink <$> ("!" *> linkParser)
 
 -- | Paragraph parser, called by 'parseParas'.
 paragraph :: Parser (DocH mod Identifier)
-paragraph = examples <|> skipSpace *> (
-      since
-  <|> unorderedList
-  <|> orderedList
-  <|> birdtracks
-  <|> codeblock
-  <|> property
-  <|> header
-  <|> textParagraphThatStartsWithMarkdownLink
-  <|> definitionList
-  <|> docParagraph <$> textParagraph
-  )
+paragraph = examples <|> do
+  indent <- takeIndent
+  choice
+    [ since
+    , unorderedList indent
+    , orderedList indent
+    , birdtracks
+    , codeblock
+    , property
+    , header
+    , textParagraphThatStartsWithMarkdownLink
+    , definitionList indent
+    , docParagraph <$> textParagraph
+    ]
 
 since :: Parser (DocH mod a)
 since = ("@since " *> version <* skipHorizontalSpace <* endOfLine) >>= setSince >> return DocEmpty
@@ -282,16 +285,16 @@ textParagraphThatStartsWithMarkdownLink = docParagraph <$> (docAppend <$> markdo
           | otherwise = " "
 
 -- | Parses unordered (bullet) lists.
-unorderedList :: Parser (DocH mod Identifier)
-unorderedList = DocUnorderedList <$> p
+unorderedList :: BS.ByteString -> Parser (DocH mod Identifier)
+unorderedList indent = DocUnorderedList <$> p
   where
-    p = ("*" <|> "-") *> innerList p
+    p = ("*" <|> "-") *> innerList indent p
 
 -- | Parses ordered lists (numbered or dashed).
-orderedList :: Parser (DocH mod Identifier)
-orderedList = DocOrderedList <$> p
+orderedList :: BS.ByteString -> Parser (DocH mod Identifier)
+orderedList indent = DocOrderedList <$> p
   where
-    p = (paren <|> dot) *> innerList p
+    p = (paren <|> dot) *> innerList indent p
     dot = (decimal :: Parser Int) <* "."
     paren = "(" *> decimal <* ")"
 
@@ -300,23 +303,24 @@ orderedList = DocOrderedList <$> p
 -- same paragraph. Usually used as
 --
 -- > someListFunction = listBeginning *> innerList someListFunction
-innerList :: Parser [DocH mod Identifier] -> Parser [DocH mod Identifier]
-innerList item = do
+innerList :: BS.ByteString -> Parser [DocH mod Identifier]
+          -> Parser [DocH mod Identifier]
+innerList indent item = do
   c <- takeLine
-  (cs, items) <- more item
+  (cs, items) <- more indent item
   let contents = docParagraph . parseString . dropNLs . unlines $ c : cs
   return $ case items of
     Left p -> [contents `docAppend` p]
     Right i -> contents : i
 
 -- | Parses definition lists.
-definitionList :: Parser (DocH mod Identifier)
-definitionList = DocDefList <$> p
+definitionList :: BS.ByteString -> Parser (DocH mod Identifier)
+definitionList indent = DocDefList <$> p
   where
     p = do
       label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` ("]\n" :: String))) <* ("]" <* optional ":")
       c <- takeLine
-      (cs, items) <- more p
+      (cs, items) <- more indent p
       let contents = parseString . dropNLs . unlines $ c : cs
       return $ case items of
         Left x -> [(label, contents `docAppend` x)]
@@ -329,32 +333,40 @@ dropNLs = reverse . dropWhile (== '\n') . reverse
 -- | Main worker for 'innerList' and 'definitionList'.
 -- We need the 'Either' here to be able to tell in the respective functions
 -- whether we're dealing with the next list or a nested paragraph.
-more :: Monoid a => Parser a
+more :: Monoid a => BS.ByteString -> Parser a
      -> Parser ([String], Either (DocH mod Identifier) a)
-more item = innerParagraphs <|> moreListItems item
-            <|> moreContent item <|> pure ([], Right mempty)
+more indent item = innerParagraphs indent
+               <|> moreListItems indent item
+               <|> moreContent indent item
+               <|> pure ([], Right mempty)
 
 -- | Used by 'innerList' and 'definitionList' to parse any nested paragraphs.
-innerParagraphs :: Parser ([String], Either (DocH mod Identifier) a)
-innerParagraphs = (,) [] . Left <$> ("\n" *> indentedParagraphs)
+innerParagraphs :: BS.ByteString
+                -> Parser ([String], Either (DocH mod Identifier) a)
+innerParagraphs indent = (,) [] . Left <$> ("\n" *> indentedParagraphs indent)
 
 -- | Attempts to fetch the next list if possibly. Used by 'innerList' and
 -- 'definitionList' to recursively grab lists that aren't separated by a whole
 -- paragraph.
-moreListItems :: Parser a
+moreListItems :: BS.ByteString -> Parser a
               -> Parser ([String], Either (DocH mod Identifier) a)
-moreListItems item = (,) [] . Right <$> (skipSpace *> item)
+moreListItems indent item = (,) [] . Right <$> indentedItem
+  where
+    indentedItem = string indent *> skipSpace *> item
 
 -- | Helper for 'innerList' and 'definitionList' which simply takes
 -- a line of text and attempts to parse more list content with 'more'.
-moreContent :: Monoid a => Parser a
+moreContent :: Monoid a => BS.ByteString -> Parser a
             -> Parser ([String], Either (DocH mod Identifier) a)
-moreContent item = first . (:) <$> nonEmptyLine <*> more item
+moreContent indent item = first . (:) <$> nonEmptyLine <*> more indent item
 
 -- | Parses an indented paragraph.
 -- The indentation is 4 spaces.
-indentedParagraphs :: Parser (DocH mod Identifier)
-indentedParagraphs = (concat <$> dropFrontOfPara "    ") >>= parseParagraphs
+indentedParagraphs :: BS.ByteString -> Parser (DocH mod Identifier)
+indentedParagraphs indent =
+    (concat <$> dropFrontOfPara indent') >>= parseParagraphs
+  where
+    indent' = string $ BS.append indent "    "
 
 -- | Grab as many fully indented paragraphs as we can.
 dropFrontOfPara :: Parser BS.ByteString -> Parser [String]
@@ -381,6 +393,15 @@ takeNonEmptyLine :: Parser String
 takeNonEmptyLine = do
     (++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n"
 
+-- | Takes indentation of first non-empty line.
+--
+-- More precisely: skips all whitespace-only lines and returns indentation
+-- (horizontal space, might be empty) of that non-empty line.
+takeIndent :: Parser BS.ByteString
+takeIndent = do
+  indent <- takeHorizontalSpace
+  "\n" *> takeIndent <|> return indent
+
 -- | Blocks of text of the form:
 --
 -- >> foo
author	idontgetoutmuch <dominic@steinitz.org>	2015-12-20 21:01:47 +0000
committer	idontgetoutmuch <dominic@steinitz.org>	2015-12-20 21:01:47 +0000
commit	2bdfda1fb2e0de696ca8c6f7a152b2f85a541be9 (patch)
tree	cc29895f7d69f051cfec172bb0f8c2ef03552789 /haddock-library/src/Documentation/Haddock/Parser.hs
parent	5a57a24c44e06e964c4ea2276c842c722c4e93d9 (diff)
parent	fa03f80d76f1511a811a0209ea7a6a8b6c58704f (diff)