diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/Haddock.hs | 2 | ||||
| -rw-r--r-- | src/Haddock/Doc.hs | 57 | ||||
| -rw-r--r-- | src/Haddock/Interface/LexParseRn.hs | 4 | ||||
| -rw-r--r-- | src/Haddock/Interface/ParseModuleHeader.hs | 4 | ||||
| -rw-r--r-- | src/Haddock/Parser.hs | 482 | 
5 files changed, 217 insertions, 332 deletions
| diff --git a/src/Haddock.hs b/src/Haddock.hs index b741f5f1..cc7e7842 100644 --- a/src/Haddock.hs +++ b/src/Haddock.hs @@ -446,7 +446,7 @@ getPrologue dflags flags =      [] -> return Nothing      [filename] -> do        str <- readFile filename -      case parseParas dflags str of +      case parseParasMaybe dflags str of          Nothing -> throwE $ "failed to parse haddock prologue from file: " ++ filename          Just doc -> return (Just doc)      _otherwise -> throwE "multiple -p/--prologue options" diff --git a/src/Haddock/Doc.hs b/src/Haddock/Doc.hs index 4d68c554..69b2dd6f 100644 --- a/src/Haddock/Doc.hs +++ b/src/Haddock/Doc.hs @@ -1,16 +1,14 @@  {-# OPTIONS_GHC -fno-warn-orphans #-}  module Haddock.Doc ( -  docAppend, -  docParagraph, -  combineStringNodes, -  combineDocumentation -  ) where +  docAppend +, docParagraph +, combineDocumentation +) where  import Data.Maybe  import Data.Monoid  import Haddock.Types  import Data.Char (isSpace) -import Control.Arrow ((***))  -- We put it here so that we can avoid a circular import  -- anything relevant imports this module anyway @@ -22,25 +20,15 @@ combineDocumentation :: Documentation name -> Maybe (Doc name)  combineDocumentation (Documentation Nothing Nothing) = Nothing  combineDocumentation (Documentation mDoc mWarning)   = Just (fromMaybe mempty mWarning `mappend` fromMaybe mempty mDoc) --- used to make parsing easier; we group the list items later  docAppend :: Doc id -> Doc id -> Doc id -docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) -  = DocUnorderedList (ds1++ds2) -docAppend (DocUnorderedList ds1) (DocAppend (DocUnorderedList ds2) d) -  = DocAppend (DocUnorderedList (ds1++ds2)) d -docAppend (DocOrderedList ds1) (DocOrderedList ds2) -  = DocOrderedList (ds1++ds2) -docAppend (DocOrderedList ds1) (DocAppend (DocOrderedList ds2) d) -  = DocAppend (DocOrderedList (ds1++ds2)) d -docAppend (DocDefList ds1) (DocDefList ds2) -  = DocDefList (ds1++ds2) -docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d) -  = DocAppend (DocDefList (ds1++ds2)) d +docAppend (DocDefList ds1) (DocDefList ds2) = DocDefList (ds1++ds2) +docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d) = DocAppend (DocDefList (ds1++ds2)) d  docAppend DocEmpty d = d  docAppend d DocEmpty = d -docAppend d1 d2 -  = DocAppend d1 d2 - +docAppend (DocString s1) (DocString s2) = DocString (s1 ++ s2) +docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 ++ s2)) +docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 ++ s2)) d +docAppend d1 d2 = DocAppend d1 d2  -- again to make parsing easier - we spot a paragraph whose only item  -- is a DocMonospaced and make it into a DocCodeBlock @@ -77,28 +65,3 @@ docCodeBlock (DocString s)  docCodeBlock (DocAppend l r)    = DocAppend l (docCodeBlock r)  docCodeBlock d = d - --- | This is a hack that joins neighbouring 'DocString's into a single one. --- This is done to ease up the testing and doesn't change the final result --- as this would be done later anyway. -combineStringNodes :: Doc id -> Doc id -combineStringNodes (DocAppend (DocString x) (DocString y)) = DocString (x ++ y) -combineStringNodes (DocAppend (DocString x) (DocAppend (DocString y) z)) = -  tryjoin (DocAppend (DocString (x ++ y)) (combineStringNodes z)) -combineStringNodes (DocAppend x y) = tryjoin (DocAppend (combineStringNodes x) (combineStringNodes y)) -combineStringNodes (DocParagraph x) = DocParagraph (combineStringNodes x) -combineStringNodes (DocWarning x) = DocWarning (combineStringNodes x) -combineStringNodes (DocEmphasis x) = DocEmphasis (combineStringNodes x) -combineStringNodes (DocMonospaced x) = DocMonospaced (combineStringNodes x) -combineStringNodes (DocUnorderedList xs) = DocUnorderedList (map combineStringNodes xs) -combineStringNodes (DocOrderedList x) = DocOrderedList (map combineStringNodes x) -combineStringNodes (DocDefList xs) = DocDefList (map (combineStringNodes *** combineStringNodes) xs) -combineStringNodes (DocCodeBlock x) = DocCodeBlock (combineStringNodes x) -combineStringNodes x = x - -tryjoin :: Doc id -> Doc id -tryjoin (DocAppend (DocString x) (DocString y)) = DocString (x ++ y) -tryjoin (DocAppend (DocString x) (DocAppend (DocString y) z)) = DocAppend (DocString (x ++ y)) z -tryjoin (DocAppend (DocAppend x (DocString y)) (DocString z)) -  = tryjoin (DocAppend (combineStringNodes x) (DocString $ y ++ z)) -tryjoin x = x diff --git a/src/Haddock/Interface/LexParseRn.hs b/src/Haddock/Interface/LexParseRn.hs index 13563532..8c33ade6 100644 --- a/src/Haddock/Interface/LexParseRn.hs +++ b/src/Haddock/Interface/LexParseRn.hs @@ -43,11 +43,11 @@ processDocStrings dflags gre strs = do  processDocStringParas :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name)) -processDocStringParas = process parseParas +processDocStringParas = process parseParasMaybe  processDocString :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name)) -processDocString = process parseString +processDocString = process parseStringMaybe  process :: (DynFlags -> String -> Maybe (Doc RdrName))          -> DynFlags diff --git a/src/Haddock/Interface/ParseModuleHeader.hs b/src/Haddock/Interface/ParseModuleHeader.hs index 2e4fe73b..ade28728 100644 --- a/src/Haddock/Interface/ParseModuleHeader.hs +++ b/src/Haddock/Interface/ParseModuleHeader.hs @@ -46,13 +46,13 @@ parseModuleHeader dflags str0 =        description1 :: Either String (Maybe (Doc RdrName))        description1 = case descriptionOpt of           Nothing -> Right Nothing -         Just description -> case parseString dflags description of +         Just description -> case parseStringMaybe dflags description of              Nothing -> Left ("Cannot parse Description: " ++ description)              Just doc -> Right (Just doc)     in        case description1 of           Left mess -> Left mess -         Right docOpt -> case parseParas dflags str8 of +         Right docOpt -> case parseParasMaybe dflags str8 of             Nothing -> Left "Cannot parse header documentation paragraphs"             Just doc -> Right (HaddockModInfo {              hmi_description = docOpt, diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs index 43a2b169..fe8904d4 100644 --- a/src/Haddock/Parser.hs +++ b/src/Haddock/Parser.hs @@ -9,15 +9,15 @@  -- Stability   :  experimental  -- Portability :  portable -module Haddock.Parser (parseString, parseParas) where +module Haddock.Parser (parseString, parseParas, parseStringMaybe, parseParasMaybe) where +import           Prelude hiding (takeWhile) +import           Control.Monad (void, mfilter)  import           Control.Applicative -import           Data.Attoparsec.ByteString hiding (parse, takeWhile1, take, inClass) -import qualified Data.Attoparsec.ByteString.Char8 as A8 -import           Data.Attoparsec.ByteString.Char8 hiding (parse, take, string) -import qualified Data.ByteString as BS -import           Data.Char (chr) -import           Data.List (stripPrefix) +import           Data.Attoparsec.ByteString.Char8 hiding (parse, take, string, endOfLine) +import qualified Data.ByteString.Char8 as BS +import           Data.Char (chr, isAsciiUpper) +import           Data.List (stripPrefix, intercalate)  import           Data.Maybe (fromMaybe)  import           Data.Monoid  import           DynFlags @@ -31,157 +31,117 @@ import           SrcLoc (mkRealSrcLoc, unLoc)  import           StringBuffer (stringToStringBuffer)  import           Haddock.Utf8 -parse :: Parser a -> String -> Maybe a -parse p = either (const Nothing) Just . parseOnly (p <* endOfInput) . encodeUtf8 +{-# DEPRECATED parseParasMaybe "use `parseParas` instead" #-} +parseParasMaybe :: DynFlags -> String -> Maybe (Doc RdrName) +parseParasMaybe d = Just . parseParas d + +{-# DEPRECATED parseStringMaybe "use `parseString` instead" #-} +parseStringMaybe :: DynFlags -> String -> Maybe (Doc RdrName) +parseStringMaybe d = Just . parseString d + +parse :: Parser a -> BS.ByteString -> a +parse p = either err id . parseOnly (p <* endOfInput) +  where +    err = error . ("Haddock.Parser.parse: " ++)  -- | Main entry point to the parser. Appends the newline character  -- to the input string.  parseParas :: DynFlags                -> String -- ^ String to parse -              -> Maybe (Doc RdrName) -parseParas d = fmap combineStringNodes . parse (p <* skipSpace) . (++ "\n") +              -> Doc RdrName +parseParas d = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")    where      p :: Parser (Doc RdrName) -    -- make sure that we don't swallow up whitespace belonging to next paragraph -    p = mconcat <$> paragraph d `sepBy` some (optWs *> "\n") - --- | A parser that parsers separate lines of the comments. Eventually --- called by 'parseParas'. Appends a newline character to the input string. --- Drops any whitespace in front of the input string. It's dropped for the sake of --- section headings. -parseString :: DynFlags -> String -> Maybe (Doc RdrName) -parseString d = parseString' d . dropWhile isSpace - --- | A parser that parsers separate lines of the comments. Eventually --- called by 'parseParas'. Appends a newline character to the input string. --- Unlike 'parseString', doesn't drop the preceding whitespace. Internal use. -parseString'' :: DynFlags -> String -> Maybe (Doc RdrName) -parseString'' d = parseString' d . (++ "\n") - --- | An internal use function. Split from the 'parseString' is useful --- as we can specify separately when we want the newline to be appended. -parseString' :: DynFlags -> String -> Maybe (Doc RdrName) -parseString' d = fmap combineStringNodes . parse p +    p = mconcat <$> paragraph d `sepBy` many (skipHorizontalSpace *> "\n") + +-- | Parse a text paragraph. +parseString :: DynFlags -> String -> Doc RdrName +parseString d = parseStringBS d . encodeUtf8 . dropWhile isSpace + +parseStringBS :: DynFlags -> BS.ByteString -> Doc RdrName +parseStringBS d = parse p    where      p :: Parser (Doc RdrName) -    p = mconcat <$> some (charEscape <|> monospace d <|> anchor <|> identifier d -                          <|> moduleName <|> picture <|> url -                          <|> emphasis d <|> encodedChar <|> string' <|> skipChar) +    p = mconcat <$> many (monospace d <|> anchor <|> identifier d +                          <|> moduleName <|> picture <|> hyperlink <|> autoUrl +                          <|> emphasis d <|> encodedChar <|> string' <|> skipSpecialChar)  -- | Parses and processes  -- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references>  --  -- >>> parseOnly encodedChar "ABC"  -- Right (DocString "ABC") -encodedChar :: Parser (Doc RdrName) +encodedChar :: Parser (Doc a)  encodedChar = "&#" *> c <* ";"    where      c = DocString . return . chr <$> num      num = hex <|> decimal      hex = ("x" <|> "X") *> hexadecimal +specialChar :: [Char] +specialChar = "/<@\"&'`" +  -- | Plain, regular parser for text. Called as one of the last parsers  -- to ensure that we have already given a chance to more meaningful parsers  -- before capturing their characers. -string' :: Parser (Doc RdrName) -string' = DocString . decodeUtf8 <$> takeWhile1 (`notElem` "/<@\" &'`\\") +string' :: Parser (Doc a) +string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar) +  where +    unescape "" = "" +    unescape ('\\':x:xs) = x : unescape xs +    unescape (x:xs) = x : unescape xs + +-- | Skips a single special character and treats it as a plain string. +-- This is done to skip over any special characters belonging to other +-- elements but which were not deemed meaningful at their positions. +skipSpecialChar :: Parser (Doc a) +skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar)  -- | Emphasis parser.  --  -- >>> parseOnly emphasis "/Hello world/"  -- Right (DocEmphasis (DocString "Hello world"))  emphasis :: DynFlags -> Parser (Doc RdrName) -emphasis d = DocEmphasis <$> stringBlock d "/" "/" "\n" +emphasis d = DocEmphasis . parseStringBS d <$> +  mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/") --- | Skips a single character and treats it as a plain string. --- This is done to skip over any special characters belonging to other --- elements but which were not deemed meaningful at their positions. --- Note that this can only be used in places where we're absolutely certain --- no unicode is present, such as to skip a 100% certain ASCII delimeter. -skipChar :: Parser (Doc RdrName) -skipChar = DocString . return <$> anyChar +-- | Like `takeWhile`, but unconditionally take escaped characters. +takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString +takeWhile_ p = scan False p_ +  where +    p_ escaped c +      | escaped = Just False +      | not $ p c = Nothing +      | otherwise = Just (c == '\\') --- | Treats the next character as a regular string, even if it's normally --- used for markup. -charEscape :: Parser (Doc RdrName) -charEscape = "\\" *> (DocString . return <$> A8.satisfy (/= '\n')) +-- | Like `takeWhile1`, but unconditionally take escaped characters. +takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString +takeWhile1_ = mfilter (not . BS.null) . takeWhile_  -- | Text anchors to allow for jumping around the generated documentation.  --  -- >>> parseOnly anchor "#Hello world#"  -- Right (DocAName "Hello world") -anchor :: Parser (Doc RdrName) +anchor :: Parser (Doc a)  anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#") --- | Helper for markup structures surrounded with delimiters. -stringBlock -  :: DynFlags -     -> String -- ^ Opening delimiter -     -> String -- ^ Closing delimiter -     -> String -- ^ Additional characters to terminate parsing on -     -> Parser (Doc RdrName) -stringBlock d op ed n = do -  inner <- block op ed n -  case parseString' d inner of -    Just r -> return r -    _ -> fail $ "inner parse fail with op: ‘" ++ op ++ "’, ed: ‘" ++ ed ++ "’" - --- | Returns sections of text delimited by specified text. -block :: String -> String -> String -> Parser String -block op ed n = reverse . drop (length ed) . reverse <$> block' op ed -  where -    block' op' ed' = string (encodeUtf8 op') *> mid -      where -        mid :: Parser String -        mid = decodeUtf8 <$> string (encodeUtf8 ed') -              <|> do -                inner <- takeWithSkip (head ed') n -                more <- decodeUtf8 <$> string (encodeUtf8 $ tail ed') -                        <|> block' "" ed'  -- not full ending, take more -                return $ inner ++ more - - --- | Takes all characters until the specified one. Unconditionally --- takes a character if it's escaped. Fails if it doesn't find the character or --- when the input string is empty. -takeWithSkip :: Char -> String -> Parser String -takeWithSkip s n = do -  content <- decodeUtf8 <$> A8.scan (False, False) p >>= gotSome -  if or (map (`elem` content) n) || last content /= s -    then fail "failed in takeWithSkip" -    else return content -  where -    gotSome [] = fail "EOF in takeWithSkip" -    gotSome xs = return xs -    -- Apparently ‘scan’ is so magical that it doesn't mangle unicode. -    p (escaped, terminate) c -      | terminate = Nothing -- swallows up that extra character -      | escaped = Just (False, False) -      | c == s = Just (False, True) -      | otherwise = Just (c == '\\', False) -  -- | Monospaced strings.  --  -- >>> parseOnly (monospace dynflags) "@cruel@"  -- Right (DocMonospaced (DocString "cruel"))  monospace :: DynFlags -> Parser (Doc RdrName) -monospace d = DocMonospaced <$> stringBlock d "@" "@" "" - --- | Module name parser, surrounded by double quotes. This does a very primitive and --- purely syntactic checking so that obviously invalid names are not treated as valid --- and blindly hyperlinked (not starting with a capital letter or including spaces). -moduleName :: Parser (Doc RdrName) -moduleName = DocModule <$> ("\"" *> legalModule <* "\"") -  where legalModule = do -          n <- (:) <$> A8.satisfy (`elem` ['A' .. 'Z']) -               <*> (decodeUtf8 <$> A8.takeWhile (`notElem` "\"\n")) - -          if any (`elem` n) " &[{}(=*)+]!#|@/;,^?" -            then fail "invalid characters in module name" -            else case n of -            [] -> return [] -            _ -> if last n == '.' then fail "trailing dot in module name" else return n +monospace d = DocMonospaced . parseStringBS d <$> ("@" *> takeWhile1_ (/= '@') <* "@") +moduleName :: Parser (Doc a) +moduleName = DocModule <$> (char '"' *> modid <* char '"') +  where +    modid = intercalate "." <$> conid `sepBy1` "." +    conid = (:) +      <$> satisfy isAsciiUpper +      -- NOTE: According to Haskell 2010 we shouldd actually only +      -- accept {small | large | digit | ' } here.  But as we can't +      -- match on unicode characters, this is currently not possible. +      <*> (decodeUtf8 <$> takeWhile (`notElem` " .&[{}(=*)+]!#|@/;,^?\"\n"))  -- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify  -- a title for the picture. @@ -190,181 +150,166 @@ moduleName = DocModule <$> ("\"" *> legalModule <* "\"")  -- Right (DocPic (Picture "hello.png" Nothing))  -- >>> parseOnly picture "<<hello.png world>>"  -- Right (DocPic (Picture "hello.png" (Just "world"))) -picture :: Parser (Doc RdrName) -picture = DocPic . makePicture . decodeUtf8 <$> ("<<" *> takeWhile1 (`notElem` ">\n") <* ">>") +picture :: Parser (Doc a) +picture = DocPic . makeLabeled Picture . decodeUtf8 +          <$> ("<<" *> takeWhile1 (`notElem` ">\n") <* ">>")  -- | Paragraph parser, called by 'parseParas'.  paragraph :: DynFlags -> Parser (Doc RdrName)  paragraph d = examples <|> skipSpace *> (list d <|> birdtracks <|> codeblock d                                           <|> property <|> textParagraph d) +textParagraph :: DynFlags -> Parser (Doc RdrName) +textParagraph d = docParagraph . parseString d . intercalate "\n" <$> many1 nonEmptyLine +  -- | List parser, called by 'paragraph'.  list :: DynFlags -> Parser (Doc RdrName)  list d = DocUnorderedList <$> unorderedList d           <|> DocOrderedList <$> orderedList d           <|> DocDefList <$> definitionList d --- | Parse given text with a provided parser, casting --- Nothing to a failure -parseLine :: (String -> Maybe (Doc RdrName)) -- ^ Parser to use -             -> (Doc RdrName -> a) -- ^ Doc function to wrap around the result -             -> BS.ByteString -- ^ Text to parse -             -> Parser a -parseLine f doc str = maybe (fail "invalid string") (return . doc) (f $ decodeUtf8 str) -  -- | Parses unordered (bullet) lists.  unorderedList :: DynFlags -> Parser [Doc RdrName] -unorderedList d = ("*" <|> "-") *> innerList unorderedList d +unorderedList d = ("*" <|> "-") *> innerList (unorderedList d) d  -- | Parses ordered lists (numbered or dashed).  orderedList :: DynFlags -> Parser [Doc RdrName] -orderedList d = skipSpace *> (paren <|> dot) *> innerList orderedList d +orderedList d = (paren <|> dot) *> innerList (orderedList d) d    where -    dot = decimal <* "." -    paren = "(" *> (decimal :: Parser Int) <* ")" +    dot = (decimal :: Parser Int) <* "." +    paren = "(" *> decimal <* ")"  -- | Generic function collecting any further lines belonging to the  -- list entry and recursively collecting any further lists in the  -- same paragraph. Usually used as  --  -- > someListFunction dynflags = listBeginning *> innerList someListFunction dynflags -innerList :: (DynFlags -> Parser [Doc RdrName]) -- ^ parser calling this function -             -> DynFlags -             -> Parser [Doc RdrName] -innerList p d = do -  cl <- do -    content <- A8.takeWhile (/= '\n') <* "\n" -- allow empty -    parseLine (parseString'' d) id content -  ulcs <- many ulc -  let contents = docParagraph $ mconcat $ cl : [x | Right x <- ulcs] -      unLists = mconcat [x | Left x <- ulcs] -  return $ contents : unLists +innerList :: Parser [Doc RdrName] -> DynFlags -> Parser [Doc RdrName] +innerList item d = do +  c <- takeLine +  (cs, items) <- more +  let contents = (docParagraph . parseString d . unlines) (c : cs) +  return (contents : items)    where -    ulc :: Parser (Either [Doc RdrName] (Doc RdrName)) -    ulc = Left <$> (optWs *> p d) -          <|> Right <$> nonEmptyLine d - --- | Takes the remained of the line until the newline character --- and calls 'parseLine' using 'parseString'. Fails if it's made --- up strictly of whitespace. -nonEmptyLine :: DynFlags -> Parser (Doc RdrName) -nonEmptyLine d = do -  s <- (takeWhile1 (/= '\n') >>= nonSpace) <* "\n" -  parseLine (parseString'' d) id s -  where -    nonSpace xs -      | not (any (not . isSpace) (decodeUtf8 xs)) = fail "empty line" -      | otherwise = return xs +    more :: Parser ([String], [Doc RdrName]) +    more = moreListItems <|> moreContent <|> pure ([], []) + +    moreListItems :: Parser ([String], [Doc RdrName]) +    moreListItems = (,) [] <$> (skipSpace *> item) + +    moreContent :: Parser ([String], [Doc RdrName]) +    moreContent = mapFst . (:) <$> nonEmptyLine <*> more  -- | Parses definition lists.  definitionList :: DynFlags -> Parser [(Doc RdrName, Doc RdrName)]  definitionList d = do -  _ <- "[" -  inner <- parseLine (parseString' d) id =<< takeWhile1 (`notElem` "]\n") -  _ <- "]" -  outer <- parseLine (parseString'' d) id =<< (A8.takeWhile (/= '\n') <* "\n") -  ulcs <- many ulc -  let contents = mconcat $ outer : [x | Right x <- ulcs] -      unLists = map mconcat [x | Left x <- ulcs] -  return $ (inner, contents) : unLists +  label <- parseStringBS d <$> ("[" *> takeWhile1 (`notElem` "]\n") <* "]") +  c <- takeLine +  (cs, items) <- more +  let contents = (parseString d . unlines) (c : cs) +  return ((label, contents) : items)    where -    ulc :: Parser (Either [(Doc RdrName, Doc RdrName)] (Doc RdrName)) -    ulc = Left <$> (optWs *> definitionList d) -          <|> Right <$> nonEmptyLine d - --- | Parses birdtracks. No further markup is parsed after the birdtrack. --- Consecutive birdtracks are allowed. -birdtracks :: Parser (Doc RdrName) -birdtracks = DocCodeBlock . mconcat . map (DocString . (++ "\n") . decodeUtf8) <$> line `sepBy1` "\n" +    more :: Parser ([String], [(Doc RdrName, Doc RdrName)]) +    more = moreListItems <|> moreContent <|> pure ([], []) + +    moreListItems :: Parser ([String], [(Doc RdrName, Doc RdrName)]) +    moreListItems = (,) [] <$> (skipSpace *> definitionList d) + +    moreContent :: Parser ([String], [(Doc RdrName, Doc RdrName)]) +    moreContent = mapFst . (:) <$> nonEmptyLine <*> more + +birdtracks :: Parser (Doc a) +birdtracks = DocCodeBlock . DocString . intercalate "\n" <$> many1 line    where -    line = optWs *> ">" *> A8.takeWhile (/= '\n') +    line = skipHorizontalSpace *> ">" *> takeLine  -- | Parses examples. Examples are a paragraph level entitity (separated by an empty line).  -- Consecutive examples are accepted. -examples :: Parser (Doc RdrName) -examples = DocExamples <$> example - --- | Collects consecutive examples and their results. -example :: Parser [Example] -example = do -  ws <- optWs -  prompt <- decodeUtf8 <$> string ">>>" -  expr <- (++ "\n") . decodeUtf8 <$> (A8.takeWhile (/= '\n') <* "\n") -  results <- many result -  let exs = concat [ e | Left e <- results ] -      res = filter (not . null) [ r | Right r <- results ] -  return $ makeExample (decodeUtf8 ws ++ prompt) expr res : exs +examples :: Parser (Doc a) +examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go)    where -    result = Left <$> example -             <|> Right . decodeUtf8 <$> takeWhile1 (/= '\n') <* "\n" +    go :: Parser [Example] +    go = do +      prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>" +      expr <- takeLine +      (rs, es) <- resultAndMoreExamples +      return (makeExample prefix expr rs : es) +      where +        resultAndMoreExamples :: Parser ([String], [Example]) +        resultAndMoreExamples = moreExamples <|> result <|> pure ([], []) +          where +            moreExamples :: Parser ([String], [Example]) +            moreExamples = (,) [] <$> go + +            result :: Parser ([String], [Example]) +            result = mapFst . (:) <$> nonEmptyLine <*> resultAndMoreExamples + +    makeExample :: String -> String -> [String] -> Example +    makeExample prefix expression res = +      Example (strip expression) result +      where +        result = map (substituteBlankLine . tryStripPrefix) res + +        tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs) + +        substituteBlankLine "<BLANKLINE>" = "" +        substituteBlankLine xs = xs + +nonEmptyLine :: Parser String +nonEmptyLine = mfilter (any (not . isSpace)) takeLine --- | Propery parser. +takeLine :: Parser String +takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine + +endOfLine :: Parser () +endOfLine = void "\n" <|> endOfInput + +mapFst :: (a -> b) -> (a, c) -> (b, c) +mapFst f (a, b) = (f a, b) + +-- | Property parser.  --  -- >>> parseOnly property "prop> hello world"  -- Right (DocProperty "hello world") -property :: Parser (Doc RdrName) -property = do -    _ <- skipSpace -    s <- decodeUtf8 <$> (string "prop>" *> takeWhile1 (/= '\n')) -    return $ makeProperty ("prop>" ++ s) - --- | Paragraph level codeblock. Anything between the two delimiting @ --- is parsed for markup. +property :: Parser (Doc a) +property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n')) + +-- | +-- Paragraph level codeblock. Anything between the two delimiting @ is parsed +-- for markup.  codeblock :: DynFlags -> Parser (Doc RdrName) -codeblock d = do -  -- Note that we don't need to use optWs here because in cases where -  -- we don't see a \n immediatelly after the opening @, this parser -  -- fails but we still have a chance to get a codeblock by getting -  -- a monospaced doc on its own in the paragraph. With that, the cases -  -- are covered. This should be updated if the implementation ever changes. -  s <- parseString' d . ('\n':) . decodeUtf8 <$> ("@\n" *> block' <* "@") -  maybe (fail "codeblock") (return . DocCodeBlock) s +codeblock d = +  DocCodeBlock . parseStringBS d <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")    where -    block' = A8.scan False p +    block' = scan False p        where          p isNewline c            | isNewline && c == '@' = Nothing            | otherwise = Just $ c == '\n' --- | Calls 'parseString'' on each line of a paragraph -textParagraph :: DynFlags -> Parser (Doc RdrName) -textParagraph d = do -  s <- parseString' d . concatMap ((++ "\n") . decodeUtf8) <$> line `sepBy1` "\n" -  maybe (fail "textParagraph") (return . docParagraph) s -  where -    line = takeWhile1 (/= '\n') - --- | See 'picture' for adding a page title. -url :: Parser (Doc RdrName) -url = DocHyperlink . makeHyperlink . decodeUtf8 <$> ("<" *> takeWhile1 (`notElem` ">\n") <* ">") -      <|> autoUrl - --- | Naive implementation of auto-linking. Will link everything after --- @http://@, @https://@, @ftp://@, @ssh://@, @gopher://@ until a space. --- Single trailing punctuation character (.!?,) is split off. -autoUrl :: Parser (Doc RdrName) -autoUrl = do -  link <- decodeUtf8 <$> urlLone -  return $ formatLink link +hyperlink :: Parser (Doc a) +hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8 <$> ("<" *> takeWhile1 (`notElem` ">\n") <* ">") + +autoUrl :: Parser (Doc a) +autoUrl = mkLink <$> url    where -    urlLone = mappend <$> choice prefixes <*> takeWhile1 (not . isSpace) -    prefixes = [ "http://", "https://", "ftp://" -               , "ssh://", "gopher://" ] -    formatLink :: String -> Doc RdrName -    formatLink s = if last s `elem` ".!?," -                   then docAppend (DocHyperlink $ Hyperlink (init s) Nothing) (DocString [last s]) -                   else DocHyperlink $ Hyperlink s Nothing +    url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace) +    mkLink :: BS.ByteString -> Doc a +    mkLink s = case BS.unsnoc s of +      Just (xs, x) | x `elem` ",.!?" -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x] +      _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)  -- | Parses strings between identifier delimiters. Consumes all input that it  -- deems to be valid in an identifier. Note that it simply blindly consumes  -- characters and does no actual validation itself.  parseValid :: Parser String  parseValid = do -  vs <- many' (A8.satisfy (`elem` "_.!#$%&*+/<=>?@\\?|-~:") <|> digit <|> letter_ascii) +  vs <- many' $ satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:") <|> digit <|> letter_ascii    c <- peekChar    case c of      Just '`' -> return vs -    Just '\'' -> (do {c'' <- char '\''; y'' <- parseValid; return $ vs ++ [c''] ++ y''}) <|> return vs +    Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid) +                 <|> return vs      _ -> fail "outofvalid"  -- | Parses identifiers with help of 'parseValid'. Asks GHC for 'RdrName' from the @@ -374,56 +319,33 @@ identifier dflags = do    o <- idDelim    vid <- parseValid    e <- idDelim -  return $ validIdentifier $ o : (vid ++ [e]) -  where idDelim = char '\'' <|> char '`' -        validIdentifier str = case parseIdent (tail $ init str) of -          Just identName -> DocIdentifier identName -          Nothing -> DocString str -        parseIdent :: String -> Maybe RdrName -        parseIdent str0 = -          let buffer = stringToStringBuffer str0 -              realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0 -              pstate = mkPState dflags buffer realSrcLc -          in case unP parseIdentifier pstate of -            POk _ name -> Just (unLoc name) -            _ -> Nothing +  return $ validIdentifier o vid e +  where +    idDelim = char '\'' <|> char '`' +    validIdentifier o ident e = case parseIdent ident of +      Just identName -> DocIdentifier identName +      Nothing -> DocString $ o : ident ++ [e] + +    parseIdent :: String -> Maybe RdrName +    parseIdent str0 = +      let buffer = stringToStringBuffer str0 +          realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0 +          pstate = mkPState dflags buffer realSrcLc +      in case unP parseIdentifier pstate of +        POk _ name -> Just (unLoc name) +        _ -> Nothing  -- | Remove all leading and trailing whitespace  strip :: String -> String  strip = (\f -> f . f) $ dropWhile isSpace . reverse --- | Consumes whitespace, excluding a newline. -optWs :: Parser BS.ByteString -optWs = A8.takeWhile (`elem` " \t\f\v\r") - --- | Create an 'Example', stripping superfluous characters as appropriate. --- Remembers the amount of indentation used for the prompt. -makeExample :: String -> String -> [String] -> Example -makeExample prompt expression res = -  Example (strip expression) result'       -- drop whitespace in expressions -  where (prefix, _) = span isSpace prompt -        result' = map substituteBlankLine $ filter (not . null) $ map (tryStripPrefix prefix) res -          where tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys -                substituteBlankLine "<BLANKLINE>" = "" -                substituteBlankLine line          = line - --- | Creates a 'Picture' with an optional title. Called by 'picture'. -makePicture :: String -> Picture -makePicture input = case break isSpace $ strip input of -  (uri, "")    -> Picture uri Nothing -  (uri, label) -> Picture uri (Just $ dropWhile isSpace label) - --- | Creates a 'Hyperlink' with an optional title. Called by 'example'. -makeHyperlink :: String -> Hyperlink -makeHyperlink input = case break isSpace $ strip input of -  (u, "")    -> Hyperlink u Nothing -  (u, label) -> Hyperlink u (Just $ dropWhile isSpace label) - --- | Makes a property that can be used by other programs for assertions. --- Drops whitespace around the property. Called by 'property' -makeProperty :: String -> Doc RdrName -makeProperty s = case strip s of -  'p':'r':'o':'p':'>':xs -> -    DocProperty (dropWhile isSpace xs) -  xs -> -    error $ "makeProperty: invalid input " ++ show xs +skipHorizontalSpace :: Parser () +skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r") + +takeHorizontalSpace :: Parser BS.ByteString +takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r") + +makeLabeled :: (String -> Maybe String -> a) -> String -> a +makeLabeled f input = case break isSpace $ strip input of +  (uri, "")    -> f uri Nothing +  (uri, label) -> f uri (Just $ dropWhile isSpace label) | 
