diff options
Diffstat (limited to 'haddock-library')
-rw-r--r-- | haddock-library/.ghci | 2 | ||||
-rw-r--r-- | haddock-library/haddock-library.cabal | 16 | ||||
-rw-r--r-- | haddock-library/src/Documentation/Haddock/Doc.hs | 43 | ||||
-rw-r--r-- | haddock-library/src/Documentation/Haddock/Parser.hs | 216 | ||||
-rw-r--r-- | haddock-library/src/Documentation/Haddock/Parser/Monad.hs | 149 | ||||
-rw-r--r-- | haddock-library/src/Documentation/Haddock/Parser/Util.hs | 20 | ||||
-rw-r--r-- | haddock-library/src/Documentation/Haddock/Types.hs | 28 | ||||
-rw-r--r-- | haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs | 9 | ||||
-rw-r--r-- | haddock-library/test/Documentation/Haddock/ParserSpec.hs | 194 |
9 files changed, 525 insertions, 152 deletions
diff --git a/haddock-library/.ghci b/haddock-library/.ghci index f0bc9104..78950a5b 100644 --- a/haddock-library/.ghci +++ b/haddock-library/.ghci @@ -1 +1 @@ -:set -isrc -ivendor/attoparsec-0.12.1.1 -itest -idist/build -idist/build/autogen -optP-include -optPdist/build/autogen/cabal_macros.h -fobject-code +:set -isrc -ivendor/attoparsec-0.12.1.1 -itest -idist/build -idist/build/autogen -optP-include -optPdist/build/autogen/cabal_macros.h diff --git a/haddock-library/haddock-library.cabal b/haddock-library/haddock-library.cabal index 30db3e8a..b0f886cd 100644 --- a/haddock-library/haddock-library.cabal +++ b/haddock-library/haddock-library.cabal @@ -1,5 +1,5 @@ name: haddock-library -version: 1.1.0 +version: 1.2.0 synopsis: Library exposing some functionality of Haddock. description: Haddock is a documentation-generation tool for Haskell libraries. These modules expose some functionality of it @@ -21,15 +21,17 @@ library default-language: Haskell2010 build-depends: - base >= 4.3 && < 4.8, - bytestring, - deepseq + base >= 4.3 && < 4.9 + , bytestring + , transformers + , deepseq hs-source-dirs: src, vendor/attoparsec-0.12.1.1 ghc-options: -funbox-strict-fields -Wall -fwarn-tabs -O2 exposed-modules: Documentation.Haddock.Parser + Documentation.Haddock.Parser.Monad Documentation.Haddock.Types Documentation.Haddock.Doc @@ -68,10 +70,12 @@ test-suite spec build-depends: base - , base-compat - , hspec , bytestring + , transformers , deepseq + + , base-compat + , hspec , QuickCheck == 2.* source-repository head diff --git a/haddock-library/src/Documentation/Haddock/Doc.hs b/haddock-library/src/Documentation/Haddock/Doc.hs index 4d6c10a4..66bd1c97 100644 --- a/haddock-library/src/Documentation/Haddock/Doc.hs +++ b/haddock-library/src/Documentation/Haddock/Doc.hs @@ -1,21 +1,50 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} -module Documentation.Haddock.Doc (docParagraph) where +module Documentation.Haddock.Doc (docParagraph, docAppend, + docConcat, metaDocConcat, + metaDocAppend, emptyMetaDoc, + metaAppend, metaConcat) where -import Data.Monoid +import Control.Applicative ((<|>), empty) import Documentation.Haddock.Types import Data.Char (isSpace) --- We put it here so that we can avoid a circular import --- anything relevant imports this module anyway -instance Monoid (DocH mod id) where - mempty = DocEmpty - mappend = docAppend +docConcat :: [DocH mod id] -> DocH mod id +docConcat = foldr docAppend DocEmpty + +-- | Concat using 'metaAppend'. +metaConcat :: [Meta] -> Meta +metaConcat = foldr metaAppend emptyMeta + +-- | Like 'docConcat' but also joins the 'Meta' info. +metaDocConcat :: [MetaDoc mod id] -> MetaDoc mod id +metaDocConcat = foldr metaDocAppend emptyMetaDoc + +-- | We do something perhaps unexpected here and join the meta info +-- in ‘reverse’: this results in the metadata from the ‘latest’ +-- paragraphs taking precedence. +metaDocAppend :: MetaDoc mod id -> MetaDoc mod id -> MetaDoc mod id +metaDocAppend (MetaDoc { _meta = m, _doc = d }) + (MetaDoc { _meta = m', _doc = d' }) = + MetaDoc { _meta = m' `metaAppend` m, _doc = d `docAppend` d' } + +-- | This is not a monoidal append, it uses '<|>' for the '_version'. +metaAppend :: Meta -> Meta -> Meta +metaAppend (Meta { _version = v }) (Meta { _version = v' }) = + Meta { _version = v <|> v' } + +emptyMetaDoc :: MetaDoc mod id +emptyMetaDoc = MetaDoc { _meta = emptyMeta, _doc = DocEmpty } + +emptyMeta :: Meta +emptyMeta = Meta { _version = empty } docAppend :: DocH mod id -> DocH mod id -> DocH mod id docAppend (DocDefList ds1) (DocDefList ds2) = DocDefList (ds1++ds2) docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d) = DocAppend (DocDefList (ds1++ds2)) d docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 ++ ds2) +docAppend (DocOrderedList ds1) (DocAppend (DocOrderedList ds2) d) = DocAppend (DocOrderedList (ds1++ds2)) d docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 ++ ds2) +docAppend (DocUnorderedList ds1) (DocAppend (DocUnorderedList ds2) d) = DocAppend (DocUnorderedList (ds1++ds2)) d docAppend DocEmpty d = d docAppend d DocEmpty = d docAppend (DocString s1) (DocString s2) = DocString (s1 ++ s2) diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs index e8bc2761..b7ab85b0 100644 --- a/haddock-library/src/Documentation/Haddock/Parser.hs +++ b/haddock-library/src/Documentation/Haddock/Parser.hs @@ -1,8 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE StandaloneDeriving #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE UndecidableInstances #-} -{-# LANGUAGE IncoherentInstances #-} +{-# LANGUAGE ViewPatterns #-} -- | -- Module : Documentation.Haddock.Parser -- Copyright : (c) Mateusz Kowalczyk 2013-2014, @@ -24,14 +21,14 @@ module Documentation.Haddock.Parser ( parseString, parseParas import Control.Applicative import Control.Arrow (first) -import Control.Monad (void, mfilter) -import Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine) +import Control.Monad import qualified Data.ByteString.Char8 as BS import Data.Char (chr, isAsciiUpper) import Data.List (stripPrefix, intercalate, unfoldr) import Data.Maybe (fromMaybe) import Data.Monoid import Documentation.Haddock.Doc +import Documentation.Haddock.Parser.Monad hiding (take, endOfLine) import Documentation.Haddock.Parser.Util import Documentation.Haddock.Types import Documentation.Haddock.Utf8 @@ -81,7 +78,7 @@ overIdentifier f d = g d g (DocExamples x) = DocExamples x g (DocHeader (Header l x)) = DocHeader . Header l $ g x -parse :: Parser a -> BS.ByteString -> a +parse :: Parser a -> BS.ByteString -> (ParserState, a) parse p = either err id . parseOnly (p <* endOfInput) where err = error . ("Haddock.Parser.parse: " ++) @@ -89,11 +86,21 @@ parse p = either err id . parseOnly (p <* endOfInput) -- | Main entry point to the parser. Appends the newline character -- to the input string. parseParas :: String -- ^ String to parse - -> DocH mod Identifier -parseParas = parse (p <* skipSpace) . encodeUtf8 . (++ "\n") + -> MetaDoc mod Identifier +parseParas input = case parseParasState input of + (state, a) -> MetaDoc { _meta = Meta { _version = parserStateSince state } + , _doc = a + } + +parseParasState :: String -> (ParserState, DocH mod Identifier) +parseParasState = parse (p <* skipSpace) . encodeUtf8 . (++ "\n") where p :: Parser (DocH mod Identifier) - p = mconcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n") + p = docConcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n") + +parseParagraphs :: String -> Parser (DocH mod Identifier) +parseParagraphs input = case parseParasState input of + (state, a) -> setParserState state >> return a -- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which -- drops leading whitespace and encodes the string to UTF8 first. @@ -101,19 +108,19 @@ parseString :: String -> DocH mod Identifier parseString = parseStringBS . encodeUtf8 . dropWhile isSpace parseStringBS :: BS.ByteString -> DocH mod Identifier -parseStringBS = parse p +parseStringBS = snd . parse p where p :: Parser (DocH mod Identifier) - p = mconcat <$> many (monospace <|> anchor <|> identifier <|> moduleName - <|> picture <|> hyperlink <|> bold - <|> emphasis <|> encodedChar <|> string' - <|> skipSpecialChar) + p = docConcat <$> many (monospace <|> anchor <|> identifier <|> moduleName + <|> picture <|> markdownImage <|> hyperlink <|> bold + <|> emphasis <|> encodedChar <|> string' + <|> skipSpecialChar) -- | Parses and processes -- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references> -- --- >>> parseOnly encodedChar "A" --- Right (DocString "A") +-- >>> parseString "A" +-- DocString "A" encodedChar :: Parser (DocH mod a) encodedChar = "&#" *> c <* ";" where @@ -145,16 +152,16 @@ skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar) -- | Emphasis parser. -- --- >>> parseOnly emphasis "/Hello world/" --- Right (DocEmphasis (DocString "Hello world")) +-- >>> parseString "/Hello world/" +-- DocEmphasis (DocString "Hello world") emphasis :: Parser (DocH mod Identifier) emphasis = DocEmphasis . parseStringBS <$> mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/") -- | Bold parser. -- --- >>> parseOnly bold "__Hello world__" --- Right (DocBold (DocString "Hello world")) +-- >>> parseString "__Hello world__" +-- DocBold (DocString "Hello world") bold :: Parser (DocH mod Identifier) bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__") @@ -176,19 +183,23 @@ takeWhile1_ = mfilter (not . BS.null) . takeWhile_ -- | Text anchors to allow for jumping around the generated documentation. -- --- >>> parseOnly anchor "#Hello world#" --- Right (DocAName "Hello world") +-- >>> parseString "#Hello world#" +-- DocAName "Hello world" anchor :: Parser (DocH mod a) anchor = DocAName . decodeUtf8 <$> disallowNewline ("#" *> takeWhile1_ (/= '#') <* "#") -- | Monospaced strings. -- --- >>> parseOnly monospace "@cruel@" --- Right (DocMonospaced (DocString "cruel")) +-- >>> parseString "@cruel@" +-- DocMonospaced (DocString "cruel") monospace :: Parser (DocH mod Identifier) -monospace = DocMonospaced . parseStringBS <$> ("@" *> takeWhile1_ (/= '@') <* "@") +monospace = DocMonospaced . parseStringBS + <$> ("@" *> takeWhile1_ (/= '@') <* "@") +-- | Module names: we try our reasonable best to only allow valid +-- Haskell module names, with caveat about not matching on technically +-- valid unicode symbols. moduleName :: Parser (DocH mod a) moduleName = DocModule <$> (char '"' *> modid <* char '"') where @@ -204,26 +215,45 @@ moduleName = DocModule <$> (char '"' *> modid <* char '"') -- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify -- a title for the picture. -- --- >>> parseOnly picture "<<hello.png>>" --- Right (DocPic (Picture {pictureUri = "hello.png", pictureTitle = Nothing})) --- >>> parseOnly picture "<<hello.png world>>" --- Right (DocPic (Picture {pictureUri = "hello.png", pictureTitle = Just "world"})) +-- >>> parseString "<<hello.png>>" +-- DocPic (Picture {pictureUri = "hello.png", pictureTitle = Nothing}) +-- >>> parseString "<<hello.png world>>" +-- DocPic (Picture {pictureUri = "hello.png", pictureTitle = Just "world"}) picture :: Parser (DocH mod a) picture = DocPic . makeLabeled Picture . decodeUtf8 <$> disallowNewline ("<<" *> takeUntil ">>") +markdownImage :: Parser (DocH mod a) +markdownImage = fromHyperlink <$> ("!" *> linkParser) + where + fromHyperlink (Hyperlink url label) = DocPic (Picture url label) + -- | Paragraph parser, called by 'parseParas'. paragraph :: Parser (DocH mod Identifier) -paragraph = examples <|> skipSpace *> (list <|> birdtracks <|> codeblock - <|> property <|> header - <|> textParagraph) +paragraph = examples <|> skipSpace *> ( + since + <|> unorderedList + <|> orderedList + <|> birdtracks + <|> codeblock + <|> property + <|> header + <|> textParagraphThatStartsWithMarkdownLink + <|> definitionList + <|> docParagraph <$> textParagraph + ) + +since :: Parser (DocH mod a) +since = ("@since " *> version <* skipHorizontalSpace <* endOfLine) >>= setSince >> return DocEmpty + where + version = decimal `sepBy1'` "." -- | Headers inside the comment denoted with @=@ signs, up to 6 levels -- deep. -- --- >>> parseOnly header "= Hello" +-- >>> snd <$> parseOnly header "= Hello" -- Right (DocHeader (Header {headerLevel = 1, headerTitle = DocString "Hello"})) --- >>> parseOnly header "== World" +-- >>> snd <$> parseOnly header "== World" -- Right (DocHeader (Header {headerLevel = 2, headerTitle = DocString "World"})) header :: Parser (DocH mod Identifier) header = do @@ -231,26 +261,37 @@ header = do pser = foldl1 (<|>) psers delim <- decodeUtf8 <$> pser line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString - rest <- paragraph <|> return mempty - return $ DocHeader (Header (length delim) line) <> rest + rest <- paragraph <|> return DocEmpty + return $ DocHeader (Header (length delim) line) `docAppend` rest textParagraph :: Parser (DocH mod Identifier) -textParagraph = docParagraph . parseString . intercalate "\n" <$> many1 nonEmptyLine +textParagraph = parseString . intercalate "\n" <$> many1 nonEmptyLine --- | List parser, called by 'paragraph'. -list :: Parser (DocH mod Identifier) -list = DocUnorderedList <$> unorderedList - <|> DocOrderedList <$> orderedList - <|> DocDefList <$> definitionList +textParagraphThatStartsWithMarkdownLink :: Parser (DocH mod Identifier) +textParagraphThatStartsWithMarkdownLink = docParagraph <$> (docAppend <$> markdownLink <*> optionalTextParagraph) + where + optionalTextParagraph :: Parser (DocH mod Identifier) + optionalTextParagraph = (docAppend <$> whitespace <*> textParagraph) <|> pure DocEmpty + + whitespace :: Parser (DocH mod a) + whitespace = DocString <$> (f <$> takeHorizontalSpace <*> optional "\n") + where + f :: BS.ByteString -> Maybe BS.ByteString -> String + f xs (fromMaybe "" -> x) + | BS.null (xs <> x) = "" + | otherwise = " " -- | Parses unordered (bullet) lists. -unorderedList :: Parser [DocH mod Identifier] -unorderedList = ("*" <|> "-") *> innerList unorderedList +unorderedList :: Parser (DocH mod Identifier) +unorderedList = DocUnorderedList <$> p + where + p = ("*" <|> "-") *> innerList p -- | Parses ordered lists (numbered or dashed). -orderedList :: Parser [DocH mod Identifier] -orderedList = (paren <|> dot) *> innerList orderedList +orderedList :: Parser (DocH mod Identifier) +orderedList = DocOrderedList <$> p where + p = (paren <|> dot) *> innerList p dot = (decimal :: Parser Int) <* "." paren = "(" *> decimal <* ")" @@ -265,19 +306,21 @@ innerList item = do (cs, items) <- more item let contents = docParagraph . parseString . dropNLs . unlines $ c : cs return $ case items of - Left p -> [contents <> p] + Left p -> [contents `docAppend` p] Right i -> contents : i -- | Parses definition lists. -definitionList :: Parser [(DocH mod Identifier, DocH mod Identifier)] -definitionList = do - label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` ("]\n"::String))) <* "]" - c <- takeLine - (cs, items) <- more definitionList - let contents = parseString . dropNLs . unlines $ c : cs - return $ case items of - Left p -> [(label, contents <> p)] - Right i -> (label, contents) : i +definitionList :: Parser (DocH mod Identifier) +definitionList = DocDefList <$> p + where + p = do + label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` ("]\n" :: String))) <* ("]" <* optional ":") + c <- takeLine + (cs, items) <- more p + let contents = parseString . dropNLs . unlines $ c : cs + return $ case items of + Left x -> [(label, contents `docAppend` x)] + Right i -> (label, contents) : i -- | Drops all trailing newlines. dropNLs :: String -> String @@ -291,12 +334,12 @@ more :: Monoid a => Parser a more item = innerParagraphs <|> moreListItems item <|> moreContent item <|> pure ([], Right mempty) --- | Use by 'innerList' and 'definitionList' to parse any nested paragraphs. +-- | Used by 'innerList' and 'definitionList' to parse any nested paragraphs. innerParagraphs :: Parser ([String], Either (DocH mod Identifier) a) innerParagraphs = (,) [] . Left <$> ("\n" *> indentedParagraphs) --- | Attemps to fetch the next list if possibly. Used by 'innerList' and --- 'definitionList' to recursivly grab lists that aren't separated by a whole +-- | Attempts to fetch the next list if possibly. Used by 'innerList' and +-- 'definitionList' to recursively grab lists that aren't separated by a whole -- paragraph. moreListItems :: Parser a -> Parser ([String], Either (DocH mod Identifier) a) @@ -308,10 +351,10 @@ moreContent :: Monoid a => Parser a -> Parser ([String], Either (DocH mod Identifier) a) moreContent item = first . (:) <$> nonEmptyLine <*> more item --- | Runs the 'parseParas' parser on an indented paragraph. +-- | Parses an indented paragraph. -- The indentation is 4 spaces. indentedParagraphs :: Parser (DocH mod Identifier) -indentedParagraphs = parseParas . concat <$> dropFrontOfPara " " +indentedParagraphs = (concat <$> dropFrontOfPara " ") >>= parseParagraphs -- | Grab as many fully indented paragraphs as we can. dropFrontOfPara :: Parser BS.ByteString -> Parser [String] @@ -399,7 +442,7 @@ endOfLine = void "\n" <|> endOfInput -- | Property parser. -- --- >>> parseOnly property "prop> hello world" +-- >>> snd <$> parseOnly property "prop> hello world" -- Right (DocProperty "hello world") property :: Parser (DocH mod a) property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n')) @@ -442,11 +485,32 @@ codeblock = | isNewline && isSpace c = Just isNewline | otherwise = Just $ c == '\n' --- | Parses links that were specifically marked as such. hyperlink :: Parser (DocH mod a) hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8 <$> disallowNewline ("<" *> takeUntil ">") <|> autoUrl + <|> markdownLink + +markdownLink :: Parser (DocH mod a) +markdownLink = DocHyperlink <$> linkParser + +linkParser :: Parser Hyperlink +linkParser = flip Hyperlink <$> label <*> (whitespace *> url) + where + label :: Parser (Maybe String) + label = Just . strip . decode <$> ("[" *> takeUntil "]") + + whitespace :: Parser () + whitespace = skipHorizontalSpace <* optional ("\n" *> skipHorizontalSpace) + + url :: Parser String + url = rejectWhitespace (decode <$> ("(" *> takeUntil ")")) + + rejectWhitespace :: MonadPlus m => m String -> m String + rejectWhitespace = mfilter (all (not . isSpace)) + + decode :: BS.ByteString -> String + decode = removeEscapes . decodeUtf8 -- | Looks for URL-like things to automatically hyperlink even if they -- weren't marked as links. @@ -456,32 +520,32 @@ autoUrl = mkLink <$> url url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace) mkLink :: BS.ByteString -> DocH mod a mkLink s = case unsnoc s of - Just (xs, x) | x `elem` (",.!?"::String) -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x] + Just (xs, x) | x `elem` (",.!?" :: String) -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) `docAppend` DocString [x] _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing) -- | Parses strings between identifier delimiters. Consumes all input that it -- deems to be valid in an identifier. Note that it simply blindly consumes -- characters and does no actual validation itself. parseValid :: Parser String -parseValid = do - vs' <- many' $ utf8String "⋆" <|> return <$> idChar - let vs = concat vs' - c <- peekChar - case c of - Just '`' -> return vs - Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid) - <|> return vs - _ -> fail "outofvalid" +parseValid = p some where idChar = satisfy (`elem` ("_.!#$%&*+/<=>?@\\|-~:^"::String)) <|> digit <|> letter_ascii + p p' = do + vs' <- p' $ utf8String "⋆" <|> return <$> idChar + let vs = concat vs' + c <- peekChar' + case c of + '`' -> return vs + '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> p many') <|> return vs + _ -> fail "outofvalid" -- | Parses UTF8 strings from ByteString streams. utf8String :: String -> Parser String utf8String x = decodeUtf8 <$> string (encodeUtf8 x) --- | Parses identifiers with help of 'parseValid'. Asks GHC for 'String' from the --- string it deems valid. +-- | Parses identifiers with help of 'parseValid'. Asks GHC for +-- 'String' from the string it deems valid. identifier :: Parser (DocH mod Identifier) identifier = do o <- idDelim diff --git a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs new file mode 100644 index 00000000..a421c58c --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs @@ -0,0 +1,149 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving, TypeFamilies #-} +module Documentation.Haddock.Parser.Monad ( + module Documentation.Haddock.Parser.Monad +, Attoparsec.isDigit +, Attoparsec.isDigit_w8 +, Attoparsec.isAlpha_iso8859_15 +, Attoparsec.isAlpha_ascii +, Attoparsec.isSpace +, Attoparsec.isSpace_w8 +, Attoparsec.inClass +, Attoparsec.notInClass +, Attoparsec.isEndOfLine +, Attoparsec.isHorizontalSpace +, Attoparsec.choice +, Attoparsec.count +, Attoparsec.option +, Attoparsec.many' +, Attoparsec.many1 +, Attoparsec.many1' +, Attoparsec.manyTill +, Attoparsec.manyTill' +, Attoparsec.sepBy +, Attoparsec.sepBy' +, Attoparsec.sepBy1 +, Attoparsec.sepBy1' +, Attoparsec.skipMany +, Attoparsec.skipMany1 +, Attoparsec.eitherP +) where + +import Control.Applicative +import Control.Monad +import Data.String +import Data.ByteString (ByteString) +import qualified Data.ByteString.Lazy as LB +import qualified Data.Attoparsec.ByteString.Char8 as Attoparsec +import Control.Monad.Trans.State +import qualified Control.Monad.Trans.Class as Trans +import Data.Word +import Data.Bits +import Data.Tuple + +import Documentation.Haddock.Types (Version) + +data ParserState = ParserState { + parserStateSince :: Maybe Version +} deriving (Eq, Show) + +initialParserState :: ParserState +initialParserState = ParserState Nothing + +newtype Parser a = Parser (StateT ParserState Attoparsec.Parser a) + deriving (Functor, Applicative, Alternative, Monad, MonadPlus) + +instance (a ~ ByteString) => IsString (Parser a) where + fromString = lift . fromString + +parseOnly :: Parser a -> ByteString -> Either String (ParserState, a) +parseOnly (Parser p) = fmap swap . Attoparsec.parseOnly (runStateT p initialParserState) + +lift :: Attoparsec.Parser a -> Parser a +lift = Parser . Trans.lift + +setParserState :: ParserState -> Parser () +setParserState = Parser . put + +setSince :: Version -> Parser () +setSince since = Parser $ modify (\st -> st {parserStateSince = Just since}) + +char :: Char -> Parser Char +char = lift . Attoparsec.char + +char8 :: Char -> Parser Word8 +char8 = lift . Attoparsec.char8 + +anyChar :: Parser Char +anyChar = lift Attoparsec.anyChar + +notChar :: Char -> Parser Char +notChar = lift . Attoparsec.notChar + +satisfy :: (Char -> Bool) -> Parser Char +satisfy = lift . Attoparsec.satisfy + +peekChar :: Parser (Maybe Char) +peekChar = lift Attoparsec.peekChar + +peekChar' :: Parser Char +peekChar' = lift Attoparsec.peekChar' + +digit :: Parser Char +digit = lift Attoparsec.digit + +letter_iso8859_15 :: Parser Char +letter_iso8859_15 = lift Attoparsec.letter_iso8859_15 + +letter_ascii :: Parser Char +letter_ascii = lift Attoparsec.letter_ascii + +space :: Parser Char +space = lift Attoparsec.space + +string :: ByteString -> Parser ByteString +string = lift . Attoparsec.string + +stringCI :: ByteString -> Parser ByteString +stringCI = lift . Attoparsec.stringCI + +skipSpace :: Parser () +skipSpace = lift Attoparsec.skipSpace + +skipWhile :: (Char -> Bool) -> Parser () +skipWhile = lift . Attoparsec.skipWhile + +take :: Int -> Parser ByteString +take = lift . Attoparsec.take + +scan :: s -> (s -> Char -> Maybe s) -> Parser ByteString +scan s = lift . Attoparsec.scan s + +takeWhile :: (Char -> Bool) -> Parser ByteString +takeWhile = lift . Attoparsec.takeWhile + +takeWhile1 :: (Char -> Bool) -> Parser ByteString +takeWhile1 = lift . Attoparsec.takeWhile1 + +takeTill :: (Char -> Bool) -> Parser ByteString +takeTill = lift . Attoparsec.takeTill + +takeByteString :: Parser ByteString +takeByteString = lift Attoparsec.takeByteString + +takeLazyByteString :: Parser LB.ByteString +takeLazyByteString = lift Attoparsec.takeLazyByteString + +endOfLine :: Parser () +endOfLine = lift Attoparsec.endOfLine + +decimal :: Integral a => Parser a +decimal = lift Attoparsec.decimal + +hexadecimal :: (Integral a, Bits a) => Parser a +hexadecimal = lift Attoparsec.hexadecimal + +endOfInput :: Parser () +endOfInput = lift Attoparsec.endOfInput + +atEnd :: Parser Bool +atEnd = lift Attoparsec.atEnd diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs index ef2af140..d908ce18 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Util.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs @@ -14,6 +14,7 @@ module Documentation.Haddock.Parser.Util ( unsnoc , strip , takeUntil +, removeEscapes , makeLabeled , takeHorizontalSpace , skipHorizontalSpace @@ -21,7 +22,7 @@ module Documentation.Haddock.Parser.Util ( import Control.Applicative import Control.Monad (mfilter) -import Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine) +import Documentation.Haddock.Parser.Monad import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as BS import Prelude hiding (takeWhile) @@ -49,14 +50,15 @@ makeLabeled :: (String -> Maybe String -> a) -> String -> a makeLabeled f input = case break isSpace $ removeEscapes $ strip input of (uri, "") -> f uri Nothing (uri, label) -> f uri (Just $ dropWhile isSpace label) - where - -- As we don't parse these any further, we don't do any processing to the - -- string so we at least remove escape character here. Perhaps we should - -- actually be parsing the label at the very least? - removeEscapes "" = "" - removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs - removeEscapes ('\\':xs) = removeEscapes xs - removeEscapes (x:xs) = x : removeEscapes xs + +-- | Remove escapes from given string. +-- +-- Only do this if you do not process (read: parse) the input any further. +removeEscapes :: String -> String +removeEscapes "" = "" +removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs +removeEscapes ('\\':xs) = removeEscapes xs +removeEscapes (x:xs) = x : removeEscapes xs takeUntil :: ByteString -> Parser ByteString takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome diff --git a/haddock-library/src/Documentation/Haddock/Types.hs b/haddock-library/src/Documentation/Haddock/Types.hs index b3118cc6..4ef89658 100644 --- a/haddock-library/src/Documentation/Haddock/Types.hs +++ b/haddock-library/src/Documentation/Haddock/Types.hs @@ -1,5 +1,4 @@ -{-# LANGUAGE DeriveDataTypeable, DeriveFunctor, DeriveFoldable #-} -{-# LANGUAGE DeriveTraversable, StandaloneDeriving #-} +{-# LANGUAGE DeriveFunctor, DeriveFoldable, DeriveTraversable #-} -- | -- Module : Documentation.Haddock.Types @@ -18,24 +17,27 @@ module Documentation.Haddock.Types where import Data.Foldable import Data.Traversable -instance Foldable Header where - foldMap f (Header _ a) = f a +-- | With the advent of 'Version', we may want to start attaching more +-- meta-data to comments. We make a structure for this ahead of time +-- so we don't have to gut half the core each time we want to add such +-- info. +newtype Meta = Meta { _version :: Maybe Version } deriving (Eq, Show) -instance Traversable Header where - traverse f (Header l a) = Header l `fmap` f a +data MetaDoc mod id = + MetaDoc { _meta :: Meta + , _doc :: DocH mod id + } deriving (Eq, Show, Functor, Foldable, Traversable) +overDoc :: (DocH a b -> DocH c d) -> MetaDoc a b -> MetaDoc c d +overDoc f d = d { _doc = f $ _doc d } -deriving instance Show a => Show (Header a) -deriving instance (Show a, Show b) => Show (DocH a b) -deriving instance Eq a => Eq (Header a) -deriving instance (Eq a, Eq b) => Eq (DocH a b) +type Version = [Int] data Hyperlink = Hyperlink { hyperlinkUrl :: String , hyperlinkLabel :: Maybe String } deriving (Eq, Show) - data Picture = Picture { pictureUri :: String , pictureTitle :: Maybe String @@ -44,7 +46,7 @@ data Picture = Picture data Header id = Header { headerLevel :: Int , headerTitle :: id - } deriving Functor + } deriving (Eq, Show, Functor, Foldable, Traversable) data Example = Example { exampleExpression :: String @@ -73,4 +75,4 @@ data DocH mod id | DocProperty String | DocExamples [Example] | DocHeader (Header (DocH mod id)) - deriving (Functor, Foldable, Traversable) + deriving (Eq, Show, Functor, Foldable, Traversable) diff --git a/haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs b/haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs index a6ac49ee..10c701c7 100644 --- a/haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs +++ b/haddock-library/test/Documentation/Haddock/Parser/UtilSpec.hs @@ -1,10 +1,11 @@ {-# LANGUAGE OverloadedStrings #-} module Documentation.Haddock.Parser.UtilSpec (main, spec) where -import Data.Attoparsec.ByteString.Char8 +import Documentation.Haddock.Parser.Monad import Documentation.Haddock.Parser.Util import Data.Either.Compat (isLeft) import Test.Hspec +import Control.Applicative main :: IO () main = hspec spec @@ -13,10 +14,10 @@ spec :: Spec spec = do describe "takeUntil" $ do it "takes everything until a specified byte sequence" $ do - parseOnly (takeUntil "end") "someend" `shouldBe` Right "some" + snd <$> parseOnly (takeUntil "end") "someend" `shouldBe` Right "some" it "requires the end sequence" $ do - parseOnly (takeUntil "end") "someen" `shouldSatisfy` isLeft + snd <$> parseOnly (takeUntil "end") "someen" `shouldSatisfy` isLeft it "takes escaped bytes unconditionally" $ do - parseOnly (takeUntil "end") "some\\endend" `shouldBe` Right "some\\end" + snd <$> parseOnly (takeUntil "end") "some\\endend" `shouldBe` Right "some\\end" diff --git a/haddock-library/test/Documentation/Haddock/ParserSpec.hs b/haddock-library/test/Documentation/Haddock/ParserSpec.hs index 5181a3f3..44ec2988 100644 --- a/haddock-library/test/Documentation/Haddock/ParserSpec.hs +++ b/haddock-library/test/Documentation/Haddock/ParserSpec.hs @@ -1,16 +1,19 @@ {-# LANGUAGE OverloadedStrings, FlexibleInstances #-} -{-# LANGUAGE IncoherentInstances, UndecidableInstances #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Documentation.Haddock.ParserSpec (main, spec) where -import Data.Monoid import Data.String import qualified Documentation.Haddock.Parser as Parse import Documentation.Haddock.Types +import Documentation.Haddock.Doc (docAppend) import Test.Hspec import Test.QuickCheck +infixr 6 <> +(<>) :: Doc id -> Doc id -> Doc id +(<>) = docAppend + type Doc id = DocH () id instance IsString (Doc String) where @@ -19,12 +22,15 @@ instance IsString (Doc String) where instance IsString a => IsString (Maybe a) where fromString = Just . fromString -parseParas :: String -> Doc String -parseParas = Parse.toRegular . Parse.parseParas +parseParas :: String -> MetaDoc () String +parseParas = overDoc Parse.toRegular . Parse.parseParas parseString :: String -> Doc String parseString = Parse.toRegular . Parse.parseString +hyperlink :: String -> Maybe String -> Doc String +hyperlink url = DocHyperlink . Hyperlink url + main :: IO () main = hspec spec @@ -79,10 +85,13 @@ spec = do " don't use apostrophe's in the wrong place's" `shouldParseTo` "don't use apostrophe's in the wrong place's" - context "when parsing URLs" $ do - let hyperlink :: String -> Maybe String -> Doc String - hyperlink url = DocHyperlink . Hyperlink url + it "doesn't parse empty identifiers" $ do + "``" `shouldParseTo` "``" + it "can parse infix identifiers" $ do + "``infix``" `shouldParseTo` "`" <> DocIdentifier "infix" <> "`" + + context "when parsing URLs" $ do it "parses a URL" $ do "<http://example.com/>" `shouldParseTo` hyperlink "http://example.com/" Nothing @@ -111,6 +120,45 @@ spec = do it "doesn't allow for multi-line link tags" $ do "<ba\nz aar>" `shouldParseTo` "<ba\nz aar>" + context "when parsing markdown links" $ do + it "parses a simple link" $ do + "[some label](url)" `shouldParseTo` + hyperlink "url" "some label" + + it "allows whitespace between label and URL" $ do + "[some label] \t (url)" `shouldParseTo` + hyperlink "url" "some label" + + it "allows newlines in label" $ do + "[some\n\nlabel](url)" `shouldParseTo` + hyperlink "url" "some\n\nlabel" + + it "allows escaping in label" $ do + "[some\\] label](url)" `shouldParseTo` + hyperlink "url" "some] label" + + it "strips leading and trailing whitespace from label" $ do + "[ some label ](url)" `shouldParseTo` + hyperlink "url" "some label" + + it "rejects whitespace in URL" $ do + "[some label]( url)" `shouldParseTo` + "[some label]( url)" + + context "when URL is on a separate line" $ do + it "allows URL to be on a separate line" $ do + "[some label]\n(url)" `shouldParseTo` + hyperlink "url" "some label" + + it "allows leading whitespace" $ do + "[some label]\n \t (url)" `shouldParseTo` + hyperlink "url" "some label" + + it "rejects additional newlines" $ do + "[some label]\n\n(url)" `shouldParseTo` + "[some label]\n\n(url)" + + context "when autolinking URLs" $ do it "autolinks HTTP URLs" $ do "http://example.com/" `shouldParseTo` hyperlink "http://example.com/" Nothing @@ -141,24 +189,22 @@ spec = do "foo https://example.com/example bar" `shouldParseTo` "foo " <> hyperlink "https://example.com/example" Nothing <> " bar" - context "when parsing pictures" $ do - let picture :: String -> Maybe String -> Doc String - picture uri = DocPic . Picture uri + context "when parsing images" $ do + let image :: String -> Maybe String -> Doc String + image uri = DocPic . Picture uri - it "parses a simple picture" $ do - "<<baz>>" `shouldParseTo` picture "baz" Nothing + it "accepts markdown syntax for images" $ do + "![label](url)" `shouldParseTo` image "url" "label" - it "parses a picture with a title" $ do - "<<b a z>>" `shouldParseTo` picture "b" (Just "a z") + it "accepts Unicode" $ do + "![灼眼のシャナ](url)" `shouldParseTo` image "url" "灼眼のシャナ" - it "parses a picture with unicode" $ do - "<<灼眼のシャナ>>" `shouldParseTo` picture "灼眼のシャナ" Nothing + it "supports deprecated picture syntax" $ do + "<<baz>>" `shouldParseTo` image "baz" Nothing - it "allows for escaping of the closing tags" $ do - "<<ba\\>>z>>" `shouldParseTo` picture "ba>>z" Nothing + it "supports title for deprecated picture syntax" $ do + "<<b a z>>" `shouldParseTo` image "b" "a z" - it "doesn't allow for multi-line picture tags" $ do - "<<ba\nz aar>>" `shouldParseTo` "<<ba\nz aar>>" context "when parsing anchors" $ do it "parses a single word anchor" $ do @@ -312,12 +358,39 @@ spec = do describe "parseParas" $ do let infix 1 `shouldParseTo` shouldParseTo :: String -> Doc String -> Expectation - shouldParseTo input ast = parseParas input `shouldBe` ast + shouldParseTo input ast = _doc (parseParas input) `shouldBe` ast it "is total" $ do property $ \xs -> (length . show . parseParas) xs `shouldSatisfy` (> 0) + context "when parsing @since" $ do + it "adds specified version to the result" $ do + parseParas "@since 0.5.0" `shouldBe` + MetaDoc { _meta = Meta { _version = Just [0,5,0] } + , _doc = DocEmpty } + + it "ignores trailing whitespace" $ do + parseParas "@since 0.5.0 \t " `shouldBe` + MetaDoc { _meta = Meta { _version = Just [0,5,0] } + , _doc = DocEmpty } + + it "does not allow trailing input" $ do + parseParas "@since 0.5.0 foo" `shouldBe` + MetaDoc { _meta = Meta { _version = Nothing } + , _doc = DocParagraph "@since 0.5.0 foo" } + + + context "when given multiple times" $ do + it "gives last occurrence precedence" $ do + (parseParas . unlines) [ + "@since 0.5.0" + , "@since 0.6.0" + , "@since 0.7.0" + ] `shouldBe` MetaDoc { _meta = Meta { _version = Just [0,7,0] } + , _doc = DocEmpty } + + context "when parsing text paragraphs" $ do let filterSpecial = filter (`notElem` (".(=#-[*`\v\f\n\t\r\\\"'_/@<> " :: String)) @@ -345,6 +418,28 @@ spec = do it "turns it into a code block" $ do "@foo@" `shouldParseTo` DocCodeBlock "foo" + context "when a paragraph starts with a markdown link" $ do + it "correctly parses it as a text paragraph (not a definition list)" $ do + "[label](url)" `shouldParseTo` + DocParagraph (hyperlink "url" "label") + + it "can be followed by an other paragraph" $ do + "[label](url)\n\nfoobar" `shouldParseTo` + DocParagraph (hyperlink "url" "label") <> DocParagraph "foobar" + + context "when paragraph contains additional text" $ do + it "accepts more text after the link" $ do + "[label](url) foo bar baz" `shouldParseTo` + DocParagraph (hyperlink "url" "label" <> " foo bar baz") + + it "accepts a newline right after the markdown link" $ do + "[label](url)\nfoo bar baz" `shouldParseTo` + DocParagraph (hyperlink "url" "label" <> " foo bar baz") + + it "can be followed by an other paragraph" $ do + "[label](url)foo\n\nbar" `shouldParseTo` + DocParagraph (hyperlink "url" "label" <> "foo") <> DocParagraph "bar" + context "when parsing birdtracks" $ do it "parses them as a code block" $ do unlines [ @@ -584,7 +679,7 @@ spec = do it "can nest definition lists" $ do - "[a] foo\n\n [b] bar\n\n [c] baz\n qux" `shouldParseTo` + "[a]: foo\n\n [b]: bar\n\n [c]: baz\n qux" `shouldParseTo` DocDefList [ ("a", "foo" <> DocDefList [ ("b", "bar" <> DocDefList [("c", "baz\nqux")]) @@ -599,12 +694,27 @@ spec = do <> DocOrderedList [ DocParagraph "baz" ] it "definition lists can come back to top level with a different list" $ do - "[foo] foov\n\n [bar] barv\n\n1. baz" `shouldParseTo` + "[foo]: foov\n\n [bar]: barv\n\n1. baz" `shouldParseTo` DocDefList [ ("foo", "foov" <> DocDefList [ ("bar", "barv") ]) ] <> DocOrderedList [ DocParagraph "baz" ] + it "list order is preserved in presence of nesting + extra text" $ do + "1. Foo\n\n > Some code\n\n2. Bar\n\nSome text" + `shouldParseTo` + DocOrderedList [ DocParagraph "Foo" <> DocCodeBlock "Some code" + , DocParagraph "Bar" + ] + <> DocParagraph (DocString "Some text") + + "1. Foo\n\n2. Bar\n\nSome text" + `shouldParseTo` + DocOrderedList [ DocParagraph "Foo" + , DocParagraph "Bar" + ] + <> DocParagraph (DocString "Some text") + context "when parsing properties" $ do it "can parse a single property" $ do "prop> 23 == 23" `shouldParseTo` DocProperty "23 == 23" @@ -732,9 +842,9 @@ spec = do context "when parsing definition lists" $ do it "parses a simple list" $ do unlines [ - " [foo] one" - , " [bar] two" - , " [baz] three" + " [foo]: one" + , " [bar]: two" + , " [baz]: three" ] `shouldParseTo` DocDefList [ ("foo", "one") @@ -744,9 +854,9 @@ spec = do it "ignores empty lines between list items" $ do unlines [ - "[foo] one" + "[foo]: one" , "" - , "[bar] two" + , "[bar]: two" ] `shouldParseTo` DocDefList [ ("foo", "one") @@ -754,13 +864,13 @@ spec = do ] it "accepts an empty list item" $ do - "[foo]" `shouldParseTo` DocDefList [("foo", DocEmpty)] + "[foo]:" `shouldParseTo` DocDefList [("foo", DocEmpty)] it "accepts multi-line list items" $ do unlines [ - "[foo] point one" + "[foo]: point one" , " more one" - , "[bar] point two" + , "[bar]: point two" , "more two" ] `shouldParseTo` DocDefList [ @@ -769,21 +879,33 @@ spec = do ] it "accepts markup in list items" $ do - "[foo] /foo/" `shouldParseTo` DocDefList [("foo", DocEmphasis "foo")] + "[foo]: /foo/" `shouldParseTo` DocDefList [("foo", DocEmphasis "foo")] it "accepts markup for the label" $ do - "[/foo/] bar" `shouldParseTo` DocDefList [(DocEmphasis "foo", "bar")] + "[/foo/]: bar" `shouldParseTo` DocDefList [(DocEmphasis "foo", "bar")] it "requires empty lines between list and other paragraphs" $ do unlines [ "foo" , "" - , "[foo] bar" + , "[foo]: bar" , "" , "baz" ] `shouldParseTo` DocParagraph "foo" <> DocDefList [("foo", "bar")] <> DocParagraph "baz" + it "dose not require the colon (deprecated - this will be removed in a future release)" $ do + unlines [ + " [foo] one" + , " [bar] two" + , " [baz] three" + ] + `shouldParseTo` DocDefList [ + ("foo", "one") + , ("bar", "two") + , ("baz", "three") + ] + context "when parsing consecutive paragraphs" $ do it "will not capture irrelevant consecutive lists" $ do unlines [ " * bullet" @@ -796,9 +918,9 @@ spec = do , " " , " 2. different bullet" , " " - , " [cat] kitten" + , " [cat]: kitten" , " " - , " [pineapple] fruit" + , " [pineapple]: fruit" ] `shouldParseTo` DocUnorderedList [ DocParagraph "bullet" , DocParagraph "different bullet"] |