aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSimon Hengel <sol@typeful.net>2013-09-08 10:33:38 +0200
committerAustin Seipp <austin@well-typed.com>2014-01-12 14:48:35 -0600
commit2448bd71609688be7b8bfe362a8534959531cd79 (patch)
tree66f23e3cc5fd6c97da832e8704f8f633e508b64b /src
parent27876dc77ff259e27a71ea6f30662a668adfd134 (diff)
Fix totality, unicode, examples, paragraph parsing
Also simplify specs and parsers while we're at it. Some parsers were made more generic. This commit is a part of GHC pre-merge squash, email fuuzetsu@fuuzetsu.co.uk if you need the full commit history.
Diffstat (limited to 'src')
-rw-r--r--src/Haddock.hs2
-rw-r--r--src/Haddock/Doc.hs57
-rw-r--r--src/Haddock/Interface/LexParseRn.hs4
-rw-r--r--src/Haddock/Interface/ParseModuleHeader.hs4
-rw-r--r--src/Haddock/Parser.hs482
5 files changed, 217 insertions, 332 deletions
diff --git a/src/Haddock.hs b/src/Haddock.hs
index b741f5f1..cc7e7842 100644
--- a/src/Haddock.hs
+++ b/src/Haddock.hs
@@ -446,7 +446,7 @@ getPrologue dflags flags =
[] -> return Nothing
[filename] -> do
str <- readFile filename
- case parseParas dflags str of
+ case parseParasMaybe dflags str of
Nothing -> throwE $ "failed to parse haddock prologue from file: " ++ filename
Just doc -> return (Just doc)
_otherwise -> throwE "multiple -p/--prologue options"
diff --git a/src/Haddock/Doc.hs b/src/Haddock/Doc.hs
index 4d68c554..69b2dd6f 100644
--- a/src/Haddock/Doc.hs
+++ b/src/Haddock/Doc.hs
@@ -1,16 +1,14 @@
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Haddock.Doc (
- docAppend,
- docParagraph,
- combineStringNodes,
- combineDocumentation
- ) where
+ docAppend
+, docParagraph
+, combineDocumentation
+) where
import Data.Maybe
import Data.Monoid
import Haddock.Types
import Data.Char (isSpace)
-import Control.Arrow ((***))
-- We put it here so that we can avoid a circular import
-- anything relevant imports this module anyway
@@ -22,25 +20,15 @@ combineDocumentation :: Documentation name -> Maybe (Doc name)
combineDocumentation (Documentation Nothing Nothing) = Nothing
combineDocumentation (Documentation mDoc mWarning) = Just (fromMaybe mempty mWarning `mappend` fromMaybe mempty mDoc)
--- used to make parsing easier; we group the list items later
docAppend :: Doc id -> Doc id -> Doc id
-docAppend (DocUnorderedList ds1) (DocUnorderedList ds2)
- = DocUnorderedList (ds1++ds2)
-docAppend (DocUnorderedList ds1) (DocAppend (DocUnorderedList ds2) d)
- = DocAppend (DocUnorderedList (ds1++ds2)) d
-docAppend (DocOrderedList ds1) (DocOrderedList ds2)
- = DocOrderedList (ds1++ds2)
-docAppend (DocOrderedList ds1) (DocAppend (DocOrderedList ds2) d)
- = DocAppend (DocOrderedList (ds1++ds2)) d
-docAppend (DocDefList ds1) (DocDefList ds2)
- = DocDefList (ds1++ds2)
-docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d)
- = DocAppend (DocDefList (ds1++ds2)) d
+docAppend (DocDefList ds1) (DocDefList ds2) = DocDefList (ds1++ds2)
+docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d) = DocAppend (DocDefList (ds1++ds2)) d
docAppend DocEmpty d = d
docAppend d DocEmpty = d
-docAppend d1 d2
- = DocAppend d1 d2
-
+docAppend (DocString s1) (DocString s2) = DocString (s1 ++ s2)
+docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 ++ s2))
+docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 ++ s2)) d
+docAppend d1 d2 = DocAppend d1 d2
-- again to make parsing easier - we spot a paragraph whose only item
-- is a DocMonospaced and make it into a DocCodeBlock
@@ -77,28 +65,3 @@ docCodeBlock (DocString s)
docCodeBlock (DocAppend l r)
= DocAppend l (docCodeBlock r)
docCodeBlock d = d
-
--- | This is a hack that joins neighbouring 'DocString's into a single one.
--- This is done to ease up the testing and doesn't change the final result
--- as this would be done later anyway.
-combineStringNodes :: Doc id -> Doc id
-combineStringNodes (DocAppend (DocString x) (DocString y)) = DocString (x ++ y)
-combineStringNodes (DocAppend (DocString x) (DocAppend (DocString y) z)) =
- tryjoin (DocAppend (DocString (x ++ y)) (combineStringNodes z))
-combineStringNodes (DocAppend x y) = tryjoin (DocAppend (combineStringNodes x) (combineStringNodes y))
-combineStringNodes (DocParagraph x) = DocParagraph (combineStringNodes x)
-combineStringNodes (DocWarning x) = DocWarning (combineStringNodes x)
-combineStringNodes (DocEmphasis x) = DocEmphasis (combineStringNodes x)
-combineStringNodes (DocMonospaced x) = DocMonospaced (combineStringNodes x)
-combineStringNodes (DocUnorderedList xs) = DocUnorderedList (map combineStringNodes xs)
-combineStringNodes (DocOrderedList x) = DocOrderedList (map combineStringNodes x)
-combineStringNodes (DocDefList xs) = DocDefList (map (combineStringNodes *** combineStringNodes) xs)
-combineStringNodes (DocCodeBlock x) = DocCodeBlock (combineStringNodes x)
-combineStringNodes x = x
-
-tryjoin :: Doc id -> Doc id
-tryjoin (DocAppend (DocString x) (DocString y)) = DocString (x ++ y)
-tryjoin (DocAppend (DocString x) (DocAppend (DocString y) z)) = DocAppend (DocString (x ++ y)) z
-tryjoin (DocAppend (DocAppend x (DocString y)) (DocString z))
- = tryjoin (DocAppend (combineStringNodes x) (DocString $ y ++ z))
-tryjoin x = x
diff --git a/src/Haddock/Interface/LexParseRn.hs b/src/Haddock/Interface/LexParseRn.hs
index 13563532..8c33ade6 100644
--- a/src/Haddock/Interface/LexParseRn.hs
+++ b/src/Haddock/Interface/LexParseRn.hs
@@ -43,11 +43,11 @@ processDocStrings dflags gre strs = do
processDocStringParas :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name))
-processDocStringParas = process parseParas
+processDocStringParas = process parseParasMaybe
processDocString :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name))
-processDocString = process parseString
+processDocString = process parseStringMaybe
process :: (DynFlags -> String -> Maybe (Doc RdrName))
-> DynFlags
diff --git a/src/Haddock/Interface/ParseModuleHeader.hs b/src/Haddock/Interface/ParseModuleHeader.hs
index 2e4fe73b..ade28728 100644
--- a/src/Haddock/Interface/ParseModuleHeader.hs
+++ b/src/Haddock/Interface/ParseModuleHeader.hs
@@ -46,13 +46,13 @@ parseModuleHeader dflags str0 =
description1 :: Either String (Maybe (Doc RdrName))
description1 = case descriptionOpt of
Nothing -> Right Nothing
- Just description -> case parseString dflags description of
+ Just description -> case parseStringMaybe dflags description of
Nothing -> Left ("Cannot parse Description: " ++ description)
Just doc -> Right (Just doc)
in
case description1 of
Left mess -> Left mess
- Right docOpt -> case parseParas dflags str8 of
+ Right docOpt -> case parseParasMaybe dflags str8 of
Nothing -> Left "Cannot parse header documentation paragraphs"
Just doc -> Right (HaddockModInfo {
hmi_description = docOpt,
diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs
index 43a2b169..fe8904d4 100644
--- a/src/Haddock/Parser.hs
+++ b/src/Haddock/Parser.hs
@@ -9,15 +9,15 @@
-- Stability : experimental
-- Portability : portable
-module Haddock.Parser (parseString, parseParas) where
+module Haddock.Parser (parseString, parseParas, parseStringMaybe, parseParasMaybe) where
+import Prelude hiding (takeWhile)
+import Control.Monad (void, mfilter)
import Control.Applicative
-import Data.Attoparsec.ByteString hiding (parse, takeWhile1, take, inClass)
-import qualified Data.Attoparsec.ByteString.Char8 as A8
-import Data.Attoparsec.ByteString.Char8 hiding (parse, take, string)
-import qualified Data.ByteString as BS
-import Data.Char (chr)
-import Data.List (stripPrefix)
+import Data.Attoparsec.ByteString.Char8 hiding (parse, take, string, endOfLine)
+import qualified Data.ByteString.Char8 as BS
+import Data.Char (chr, isAsciiUpper)
+import Data.List (stripPrefix, intercalate)
import Data.Maybe (fromMaybe)
import Data.Monoid
import DynFlags
@@ -31,157 +31,117 @@ import SrcLoc (mkRealSrcLoc, unLoc)
import StringBuffer (stringToStringBuffer)
import Haddock.Utf8
-parse :: Parser a -> String -> Maybe a
-parse p = either (const Nothing) Just . parseOnly (p <* endOfInput) . encodeUtf8
+{-# DEPRECATED parseParasMaybe "use `parseParas` instead" #-}
+parseParasMaybe :: DynFlags -> String -> Maybe (Doc RdrName)
+parseParasMaybe d = Just . parseParas d
+
+{-# DEPRECATED parseStringMaybe "use `parseString` instead" #-}
+parseStringMaybe :: DynFlags -> String -> Maybe (Doc RdrName)
+parseStringMaybe d = Just . parseString d
+
+parse :: Parser a -> BS.ByteString -> a
+parse p = either err id . parseOnly (p <* endOfInput)
+ where
+ err = error . ("Haddock.Parser.parse: " ++)
-- | Main entry point to the parser. Appends the newline character
-- to the input string.
parseParas :: DynFlags
-> String -- ^ String to parse
- -> Maybe (Doc RdrName)
-parseParas d = fmap combineStringNodes . parse (p <* skipSpace) . (++ "\n")
+ -> Doc RdrName
+parseParas d = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
where
p :: Parser (Doc RdrName)
- -- make sure that we don't swallow up whitespace belonging to next paragraph
- p = mconcat <$> paragraph d `sepBy` some (optWs *> "\n")
-
--- | A parser that parsers separate lines of the comments. Eventually
--- called by 'parseParas'. Appends a newline character to the input string.
--- Drops any whitespace in front of the input string. It's dropped for the sake of
--- section headings.
-parseString :: DynFlags -> String -> Maybe (Doc RdrName)
-parseString d = parseString' d . dropWhile isSpace
-
--- | A parser that parsers separate lines of the comments. Eventually
--- called by 'parseParas'. Appends a newline character to the input string.
--- Unlike 'parseString', doesn't drop the preceding whitespace. Internal use.
-parseString'' :: DynFlags -> String -> Maybe (Doc RdrName)
-parseString'' d = parseString' d . (++ "\n")
-
--- | An internal use function. Split from the 'parseString' is useful
--- as we can specify separately when we want the newline to be appended.
-parseString' :: DynFlags -> String -> Maybe (Doc RdrName)
-parseString' d = fmap combineStringNodes . parse p
+ p = mconcat <$> paragraph d `sepBy` many (skipHorizontalSpace *> "\n")
+
+-- | Parse a text paragraph.
+parseString :: DynFlags -> String -> Doc RdrName
+parseString d = parseStringBS d . encodeUtf8 . dropWhile isSpace
+
+parseStringBS :: DynFlags -> BS.ByteString -> Doc RdrName
+parseStringBS d = parse p
where
p :: Parser (Doc RdrName)
- p = mconcat <$> some (charEscape <|> monospace d <|> anchor <|> identifier d
- <|> moduleName <|> picture <|> url
- <|> emphasis d <|> encodedChar <|> string' <|> skipChar)
+ p = mconcat <$> many (monospace d <|> anchor <|> identifier d
+ <|> moduleName <|> picture <|> hyperlink <|> autoUrl
+ <|> emphasis d <|> encodedChar <|> string' <|> skipSpecialChar)
-- | Parses and processes
-- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references>
--
-- >>> parseOnly encodedChar "&#65;&#66;&#67;"
-- Right (DocString "ABC")
-encodedChar :: Parser (Doc RdrName)
+encodedChar :: Parser (Doc a)
encodedChar = "&#" *> c <* ";"
where
c = DocString . return . chr <$> num
num = hex <|> decimal
hex = ("x" <|> "X") *> hexadecimal
+specialChar :: [Char]
+specialChar = "/<@\"&'`"
+
-- | Plain, regular parser for text. Called as one of the last parsers
-- to ensure that we have already given a chance to more meaningful parsers
-- before capturing their characers.
-string' :: Parser (Doc RdrName)
-string' = DocString . decodeUtf8 <$> takeWhile1 (`notElem` "/<@\" &'`\\")
+string' :: Parser (Doc a)
+string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar)
+ where
+ unescape "" = ""
+ unescape ('\\':x:xs) = x : unescape xs
+ unescape (x:xs) = x : unescape xs
+
+-- | Skips a single special character and treats it as a plain string.
+-- This is done to skip over any special characters belonging to other
+-- elements but which were not deemed meaningful at their positions.
+skipSpecialChar :: Parser (Doc a)
+skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar)
-- | Emphasis parser.
--
-- >>> parseOnly emphasis "/Hello world/"
-- Right (DocEmphasis (DocString "Hello world"))
emphasis :: DynFlags -> Parser (Doc RdrName)
-emphasis d = DocEmphasis <$> stringBlock d "/" "/" "\n"
+emphasis d = DocEmphasis . parseStringBS d <$>
+ mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/")
--- | Skips a single character and treats it as a plain string.
--- This is done to skip over any special characters belonging to other
--- elements but which were not deemed meaningful at their positions.
--- Note that this can only be used in places where we're absolutely certain
--- no unicode is present, such as to skip a 100% certain ASCII delimeter.
-skipChar :: Parser (Doc RdrName)
-skipChar = DocString . return <$> anyChar
+-- | Like `takeWhile`, but unconditionally take escaped characters.
+takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString
+takeWhile_ p = scan False p_
+ where
+ p_ escaped c
+ | escaped = Just False
+ | not $ p c = Nothing
+ | otherwise = Just (c == '\\')
--- | Treats the next character as a regular string, even if it's normally
--- used for markup.
-charEscape :: Parser (Doc RdrName)
-charEscape = "\\" *> (DocString . return <$> A8.satisfy (/= '\n'))
+-- | Like `takeWhile1`, but unconditionally take escaped characters.
+takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString
+takeWhile1_ = mfilter (not . BS.null) . takeWhile_
-- | Text anchors to allow for jumping around the generated documentation.
--
-- >>> parseOnly anchor "#Hello world#"
-- Right (DocAName "Hello world")
-anchor :: Parser (Doc RdrName)
+anchor :: Parser (Doc a)
anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#")
--- | Helper for markup structures surrounded with delimiters.
-stringBlock
- :: DynFlags
- -> String -- ^ Opening delimiter
- -> String -- ^ Closing delimiter
- -> String -- ^ Additional characters to terminate parsing on
- -> Parser (Doc RdrName)
-stringBlock d op ed n = do
- inner <- block op ed n
- case parseString' d inner of
- Just r -> return r
- _ -> fail $ "inner parse fail with op: ‘" ++ op ++ "’, ed: ‘" ++ ed ++ "’"
-
--- | Returns sections of text delimited by specified text.
-block :: String -> String -> String -> Parser String
-block op ed n = reverse . drop (length ed) . reverse <$> block' op ed
- where
- block' op' ed' = string (encodeUtf8 op') *> mid
- where
- mid :: Parser String
- mid = decodeUtf8 <$> string (encodeUtf8 ed')
- <|> do
- inner <- takeWithSkip (head ed') n
- more <- decodeUtf8 <$> string (encodeUtf8 $ tail ed')
- <|> block' "" ed' -- not full ending, take more
- return $ inner ++ more
-
-
--- | Takes all characters until the specified one. Unconditionally
--- takes a character if it's escaped. Fails if it doesn't find the character or
--- when the input string is empty.
-takeWithSkip :: Char -> String -> Parser String
-takeWithSkip s n = do
- content <- decodeUtf8 <$> A8.scan (False, False) p >>= gotSome
- if or (map (`elem` content) n) || last content /= s
- then fail "failed in takeWithSkip"
- else return content
- where
- gotSome [] = fail "EOF in takeWithSkip"
- gotSome xs = return xs
- -- Apparently ‘scan’ is so magical that it doesn't mangle unicode.
- p (escaped, terminate) c
- | terminate = Nothing -- swallows up that extra character
- | escaped = Just (False, False)
- | c == s = Just (False, True)
- | otherwise = Just (c == '\\', False)
-
-- | Monospaced strings.
--
-- >>> parseOnly (monospace dynflags) "@cruel@"
-- Right (DocMonospaced (DocString "cruel"))
monospace :: DynFlags -> Parser (Doc RdrName)
-monospace d = DocMonospaced <$> stringBlock d "@" "@" ""
-
--- | Module name parser, surrounded by double quotes. This does a very primitive and
--- purely syntactic checking so that obviously invalid names are not treated as valid
--- and blindly hyperlinked (not starting with a capital letter or including spaces).
-moduleName :: Parser (Doc RdrName)
-moduleName = DocModule <$> ("\"" *> legalModule <* "\"")
- where legalModule = do
- n <- (:) <$> A8.satisfy (`elem` ['A' .. 'Z'])
- <*> (decodeUtf8 <$> A8.takeWhile (`notElem` "\"\n"))
-
- if any (`elem` n) " &[{}(=*)+]!#|@/;,^?"
- then fail "invalid characters in module name"
- else case n of
- [] -> return []
- _ -> if last n == '.' then fail "trailing dot in module name" else return n
+monospace d = DocMonospaced . parseStringBS d <$> ("@" *> takeWhile1_ (/= '@') <* "@")
+moduleName :: Parser (Doc a)
+moduleName = DocModule <$> (char '"' *> modid <* char '"')
+ where
+ modid = intercalate "." <$> conid `sepBy1` "."
+ conid = (:)
+ <$> satisfy isAsciiUpper
+ -- NOTE: According to Haskell 2010 we shouldd actually only
+ -- accept {small | large | digit | ' } here. But as we can't
+ -- match on unicode characters, this is currently not possible.
+ <*> (decodeUtf8 <$> takeWhile (`notElem` " .&[{}(=*)+]!#|@/;,^?\"\n"))
-- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify
-- a title for the picture.
@@ -190,181 +150,166 @@ moduleName = DocModule <$> ("\"" *> legalModule <* "\"")
-- Right (DocPic (Picture "hello.png" Nothing))
-- >>> parseOnly picture "<<hello.png world>>"
-- Right (DocPic (Picture "hello.png" (Just "world")))
-picture :: Parser (Doc RdrName)
-picture = DocPic . makePicture . decodeUtf8 <$> ("<<" *> takeWhile1 (`notElem` ">\n") <* ">>")
+picture :: Parser (Doc a)
+picture = DocPic . makeLabeled Picture . decodeUtf8
+ <$> ("<<" *> takeWhile1 (`notElem` ">\n") <* ">>")
-- | Paragraph parser, called by 'parseParas'.
paragraph :: DynFlags -> Parser (Doc RdrName)
paragraph d = examples <|> skipSpace *> (list d <|> birdtracks <|> codeblock d
<|> property <|> textParagraph d)
+textParagraph :: DynFlags -> Parser (Doc RdrName)
+textParagraph d = docParagraph . parseString d . intercalate "\n" <$> many1 nonEmptyLine
+
-- | List parser, called by 'paragraph'.
list :: DynFlags -> Parser (Doc RdrName)
list d = DocUnorderedList <$> unorderedList d
<|> DocOrderedList <$> orderedList d
<|> DocDefList <$> definitionList d
--- | Parse given text with a provided parser, casting
--- Nothing to a failure
-parseLine :: (String -> Maybe (Doc RdrName)) -- ^ Parser to use
- -> (Doc RdrName -> a) -- ^ Doc function to wrap around the result
- -> BS.ByteString -- ^ Text to parse
- -> Parser a
-parseLine f doc str = maybe (fail "invalid string") (return . doc) (f $ decodeUtf8 str)
-
-- | Parses unordered (bullet) lists.
unorderedList :: DynFlags -> Parser [Doc RdrName]
-unorderedList d = ("*" <|> "-") *> innerList unorderedList d
+unorderedList d = ("*" <|> "-") *> innerList (unorderedList d) d
-- | Parses ordered lists (numbered or dashed).
orderedList :: DynFlags -> Parser [Doc RdrName]
-orderedList d = skipSpace *> (paren <|> dot) *> innerList orderedList d
+orderedList d = (paren <|> dot) *> innerList (orderedList d) d
where
- dot = decimal <* "."
- paren = "(" *> (decimal :: Parser Int) <* ")"
+ dot = (decimal :: Parser Int) <* "."
+ paren = "(" *> decimal <* ")"
-- | Generic function collecting any further lines belonging to the
-- list entry and recursively collecting any further lists in the
-- same paragraph. Usually used as
--
-- > someListFunction dynflags = listBeginning *> innerList someListFunction dynflags
-innerList :: (DynFlags -> Parser [Doc RdrName]) -- ^ parser calling this function
- -> DynFlags
- -> Parser [Doc RdrName]
-innerList p d = do
- cl <- do
- content <- A8.takeWhile (/= '\n') <* "\n" -- allow empty
- parseLine (parseString'' d) id content
- ulcs <- many ulc
- let contents = docParagraph $ mconcat $ cl : [x | Right x <- ulcs]
- unLists = mconcat [x | Left x <- ulcs]
- return $ contents : unLists
+innerList :: Parser [Doc RdrName] -> DynFlags -> Parser [Doc RdrName]
+innerList item d = do
+ c <- takeLine
+ (cs, items) <- more
+ let contents = (docParagraph . parseString d . unlines) (c : cs)
+ return (contents : items)
where
- ulc :: Parser (Either [Doc RdrName] (Doc RdrName))
- ulc = Left <$> (optWs *> p d)
- <|> Right <$> nonEmptyLine d
-
--- | Takes the remained of the line until the newline character
--- and calls 'parseLine' using 'parseString'. Fails if it's made
--- up strictly of whitespace.
-nonEmptyLine :: DynFlags -> Parser (Doc RdrName)
-nonEmptyLine d = do
- s <- (takeWhile1 (/= '\n') >>= nonSpace) <* "\n"
- parseLine (parseString'' d) id s
- where
- nonSpace xs
- | not (any (not . isSpace) (decodeUtf8 xs)) = fail "empty line"
- | otherwise = return xs
+ more :: Parser ([String], [Doc RdrName])
+ more = moreListItems <|> moreContent <|> pure ([], [])
+
+ moreListItems :: Parser ([String], [Doc RdrName])
+ moreListItems = (,) [] <$> (skipSpace *> item)
+
+ moreContent :: Parser ([String], [Doc RdrName])
+ moreContent = mapFst . (:) <$> nonEmptyLine <*> more
-- | Parses definition lists.
definitionList :: DynFlags -> Parser [(Doc RdrName, Doc RdrName)]
definitionList d = do
- _ <- "["
- inner <- parseLine (parseString' d) id =<< takeWhile1 (`notElem` "]\n")
- _ <- "]"
- outer <- parseLine (parseString'' d) id =<< (A8.takeWhile (/= '\n') <* "\n")
- ulcs <- many ulc
- let contents = mconcat $ outer : [x | Right x <- ulcs]
- unLists = map mconcat [x | Left x <- ulcs]
- return $ (inner, contents) : unLists
+ label <- parseStringBS d <$> ("[" *> takeWhile1 (`notElem` "]\n") <* "]")
+ c <- takeLine
+ (cs, items) <- more
+ let contents = (parseString d . unlines) (c : cs)
+ return ((label, contents) : items)
where
- ulc :: Parser (Either [(Doc RdrName, Doc RdrName)] (Doc RdrName))
- ulc = Left <$> (optWs *> definitionList d)
- <|> Right <$> nonEmptyLine d
-
--- | Parses birdtracks. No further markup is parsed after the birdtrack.
--- Consecutive birdtracks are allowed.
-birdtracks :: Parser (Doc RdrName)
-birdtracks = DocCodeBlock . mconcat . map (DocString . (++ "\n") . decodeUtf8) <$> line `sepBy1` "\n"
+ more :: Parser ([String], [(Doc RdrName, Doc RdrName)])
+ more = moreListItems <|> moreContent <|> pure ([], [])
+
+ moreListItems :: Parser ([String], [(Doc RdrName, Doc RdrName)])
+ moreListItems = (,) [] <$> (skipSpace *> definitionList d)
+
+ moreContent :: Parser ([String], [(Doc RdrName, Doc RdrName)])
+ moreContent = mapFst . (:) <$> nonEmptyLine <*> more
+
+birdtracks :: Parser (Doc a)
+birdtracks = DocCodeBlock . DocString . intercalate "\n" <$> many1 line
where
- line = optWs *> ">" *> A8.takeWhile (/= '\n')
+ line = skipHorizontalSpace *> ">" *> takeLine
-- | Parses examples. Examples are a paragraph level entitity (separated by an empty line).
-- Consecutive examples are accepted.
-examples :: Parser (Doc RdrName)
-examples = DocExamples <$> example
-
--- | Collects consecutive examples and their results.
-example :: Parser [Example]
-example = do
- ws <- optWs
- prompt <- decodeUtf8 <$> string ">>>"
- expr <- (++ "\n") . decodeUtf8 <$> (A8.takeWhile (/= '\n') <* "\n")
- results <- many result
- let exs = concat [ e | Left e <- results ]
- res = filter (not . null) [ r | Right r <- results ]
- return $ makeExample (decodeUtf8 ws ++ prompt) expr res : exs
+examples :: Parser (Doc a)
+examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go)
where
- result = Left <$> example
- <|> Right . decodeUtf8 <$> takeWhile1 (/= '\n') <* "\n"
+ go :: Parser [Example]
+ go = do
+ prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>"
+ expr <- takeLine
+ (rs, es) <- resultAndMoreExamples
+ return (makeExample prefix expr rs : es)
+ where
+ resultAndMoreExamples :: Parser ([String], [Example])
+ resultAndMoreExamples = moreExamples <|> result <|> pure ([], [])
+ where
+ moreExamples :: Parser ([String], [Example])
+ moreExamples = (,) [] <$> go
+
+ result :: Parser ([String], [Example])
+ result = mapFst . (:) <$> nonEmptyLine <*> resultAndMoreExamples
+
+ makeExample :: String -> String -> [String] -> Example
+ makeExample prefix expression res =
+ Example (strip expression) result
+ where
+ result = map (substituteBlankLine . tryStripPrefix) res
+
+ tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs)
+
+ substituteBlankLine "<BLANKLINE>" = ""
+ substituteBlankLine xs = xs
+
+nonEmptyLine :: Parser String
+nonEmptyLine = mfilter (any (not . isSpace)) takeLine
--- | Propery parser.
+takeLine :: Parser String
+takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine
+
+endOfLine :: Parser ()
+endOfLine = void "\n" <|> endOfInput
+
+mapFst :: (a -> b) -> (a, c) -> (b, c)
+mapFst f (a, b) = (f a, b)
+
+-- | Property parser.
--
-- >>> parseOnly property "prop> hello world"
-- Right (DocProperty "hello world")
-property :: Parser (Doc RdrName)
-property = do
- _ <- skipSpace
- s <- decodeUtf8 <$> (string "prop>" *> takeWhile1 (/= '\n'))
- return $ makeProperty ("prop>" ++ s)
-
--- | Paragraph level codeblock. Anything between the two delimiting @
--- is parsed for markup.
+property :: Parser (Doc a)
+property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n'))
+
+-- |
+-- Paragraph level codeblock. Anything between the two delimiting @ is parsed
+-- for markup.
codeblock :: DynFlags -> Parser (Doc RdrName)
-codeblock d = do
- -- Note that we don't need to use optWs here because in cases where
- -- we don't see a \n immediatelly after the opening @, this parser
- -- fails but we still have a chance to get a codeblock by getting
- -- a monospaced doc on its own in the paragraph. With that, the cases
- -- are covered. This should be updated if the implementation ever changes.
- s <- parseString' d . ('\n':) . decodeUtf8 <$> ("@\n" *> block' <* "@")
- maybe (fail "codeblock") (return . DocCodeBlock) s
+codeblock d =
+ DocCodeBlock . parseStringBS d <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
where
- block' = A8.scan False p
+ block' = scan False p
where
p isNewline c
| isNewline && c == '@' = Nothing
| otherwise = Just $ c == '\n'
--- | Calls 'parseString'' on each line of a paragraph
-textParagraph :: DynFlags -> Parser (Doc RdrName)
-textParagraph d = do
- s <- parseString' d . concatMap ((++ "\n") . decodeUtf8) <$> line `sepBy1` "\n"
- maybe (fail "textParagraph") (return . docParagraph) s
- where
- line = takeWhile1 (/= '\n')
-
--- | See 'picture' for adding a page title.
-url :: Parser (Doc RdrName)
-url = DocHyperlink . makeHyperlink . decodeUtf8 <$> ("<" *> takeWhile1 (`notElem` ">\n") <* ">")
- <|> autoUrl
-
--- | Naive implementation of auto-linking. Will link everything after
--- @http://@, @https://@, @ftp://@, @ssh://@, @gopher://@ until a space.
--- Single trailing punctuation character (.!?,) is split off.
-autoUrl :: Parser (Doc RdrName)
-autoUrl = do
- link <- decodeUtf8 <$> urlLone
- return $ formatLink link
+hyperlink :: Parser (Doc a)
+hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8 <$> ("<" *> takeWhile1 (`notElem` ">\n") <* ">")
+
+autoUrl :: Parser (Doc a)
+autoUrl = mkLink <$> url
where
- urlLone = mappend <$> choice prefixes <*> takeWhile1 (not . isSpace)
- prefixes = [ "http://", "https://", "ftp://"
- , "ssh://", "gopher://" ]
- formatLink :: String -> Doc RdrName
- formatLink s = if last s `elem` ".!?,"
- then docAppend (DocHyperlink $ Hyperlink (init s) Nothing) (DocString [last s])
- else DocHyperlink $ Hyperlink s Nothing
+ url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
+ mkLink :: BS.ByteString -> Doc a
+ mkLink s = case BS.unsnoc s of
+ Just (xs, x) | x `elem` ",.!?" -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x]
+ _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
-- | Parses strings between identifier delimiters. Consumes all input that it
-- deems to be valid in an identifier. Note that it simply blindly consumes
-- characters and does no actual validation itself.
parseValid :: Parser String
parseValid = do
- vs <- many' (A8.satisfy (`elem` "_.!#$%&*+/<=>?@\\?|-~:") <|> digit <|> letter_ascii)
+ vs <- many' $ satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:") <|> digit <|> letter_ascii
c <- peekChar
case c of
Just '`' -> return vs
- Just '\'' -> (do {c'' <- char '\''; y'' <- parseValid; return $ vs ++ [c''] ++ y''}) <|> return vs
+ Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid)
+ <|> return vs
_ -> fail "outofvalid"
-- | Parses identifiers with help of 'parseValid'. Asks GHC for 'RdrName' from the
@@ -374,56 +319,33 @@ identifier dflags = do
o <- idDelim
vid <- parseValid
e <- idDelim
- return $ validIdentifier $ o : (vid ++ [e])
- where idDelim = char '\'' <|> char '`'
- validIdentifier str = case parseIdent (tail $ init str) of
- Just identName -> DocIdentifier identName
- Nothing -> DocString str
- parseIdent :: String -> Maybe RdrName
- parseIdent str0 =
- let buffer = stringToStringBuffer str0
- realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0
- pstate = mkPState dflags buffer realSrcLc
- in case unP parseIdentifier pstate of
- POk _ name -> Just (unLoc name)
- _ -> Nothing
+ return $ validIdentifier o vid e
+ where
+ idDelim = char '\'' <|> char '`'
+ validIdentifier o ident e = case parseIdent ident of
+ Just identName -> DocIdentifier identName
+ Nothing -> DocString $ o : ident ++ [e]
+
+ parseIdent :: String -> Maybe RdrName
+ parseIdent str0 =
+ let buffer = stringToStringBuffer str0
+ realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0
+ pstate = mkPState dflags buffer realSrcLc
+ in case unP parseIdentifier pstate of
+ POk _ name -> Just (unLoc name)
+ _ -> Nothing
-- | Remove all leading and trailing whitespace
strip :: String -> String
strip = (\f -> f . f) $ dropWhile isSpace . reverse
--- | Consumes whitespace, excluding a newline.
-optWs :: Parser BS.ByteString
-optWs = A8.takeWhile (`elem` " \t\f\v\r")
-
--- | Create an 'Example', stripping superfluous characters as appropriate.
--- Remembers the amount of indentation used for the prompt.
-makeExample :: String -> String -> [String] -> Example
-makeExample prompt expression res =
- Example (strip expression) result' -- drop whitespace in expressions
- where (prefix, _) = span isSpace prompt
- result' = map substituteBlankLine $ filter (not . null) $ map (tryStripPrefix prefix) res
- where tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys
- substituteBlankLine "<BLANKLINE>" = ""
- substituteBlankLine line = line
-
--- | Creates a 'Picture' with an optional title. Called by 'picture'.
-makePicture :: String -> Picture
-makePicture input = case break isSpace $ strip input of
- (uri, "") -> Picture uri Nothing
- (uri, label) -> Picture uri (Just $ dropWhile isSpace label)
-
--- | Creates a 'Hyperlink' with an optional title. Called by 'example'.
-makeHyperlink :: String -> Hyperlink
-makeHyperlink input = case break isSpace $ strip input of
- (u, "") -> Hyperlink u Nothing
- (u, label) -> Hyperlink u (Just $ dropWhile isSpace label)
-
--- | Makes a property that can be used by other programs for assertions.
--- Drops whitespace around the property. Called by 'property'
-makeProperty :: String -> Doc RdrName
-makeProperty s = case strip s of
- 'p':'r':'o':'p':'>':xs ->
- DocProperty (dropWhile isSpace xs)
- xs ->
- error $ "makeProperty: invalid input " ++ show xs
+skipHorizontalSpace :: Parser ()
+skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r")
+
+takeHorizontalSpace :: Parser BS.ByteString
+takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r")
+
+makeLabeled :: (String -> Maybe String -> a) -> String -> a
+makeLabeled f input = case break isSpace $ strip input of
+ (uri, "") -> f uri Nothing
+ (uri, label) -> f uri (Just $ dropWhile isSpace label)