diff options
author | Mateusz Kowalczyk <fuuzetsu@fuuzetsu.co.uk> | 2013-09-16 03:01:29 +0100 |
---|---|---|
committer | Austin Seipp <austin@well-typed.com> | 2014-01-12 14:48:36 -0600 |
commit | deb106d8250b7582e01f78e88c88ca7465fc0bc1 (patch) | |
tree | fef76ea6a8c491359becf8ee5d620b34558ffb85 | |
parent | a03c93524ba2ca4143c10770a2fa0dd134b57a83 (diff) |
Allow escaping in URLs and pictures.
Some tests were moved under parseString as they weren't about paragraph
level markup.
Conflicts:
src/Haddock/Parser.hs
test/Haddock/ParserSpec.hs
-rw-r--r-- | src/Haddock/Parser.hs | 19 | ||||
-rw-r--r-- | src/Haddock/Parser/Util.hs | 6 | ||||
-rw-r--r-- | test/Haddock/ParserSpec.hs | 308 |
3 files changed, 190 insertions, 143 deletions
diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs index b8aa9cb4..6370eecb 100644 --- a/src/Haddock/Parser.hs +++ b/src/Haddock/Parser.hs @@ -167,7 +167,7 @@ moduleName = DocModule <$> (char '"' *> modid <* char '"') -- Right (DocPic (Picture "hello.png" (Just "world"))) picture :: Parser (Doc a) picture = DocPic . makeLabeled Picture . decodeUtf8 - <$> ("<<" *> takeWhile1 (`notElem` ">\n") <* ">>") + <$> disallowNewline ("<<" *> takeUntil ">>") -- | Paragraph parser, called by 'parseParas'. paragraph :: DynFlags -> Parser (Doc RdrName) @@ -271,7 +271,8 @@ moreContent :: Monoid a => Parser a -> DynFlags -> Parser ([String], Either (Doc RdrName) a) moreContent item d = first . (:) <$> nonEmptyLine <*> more item d --- | Collects and parses the result of 'dropFrontOfPara' +-- | Runs the 'parseParas' parser on an indented paragraph. +-- The indentation is 4 spaces. indentedParagraphs :: DynFlags -> Parser (Doc RdrName) indentedParagraphs d = parseParas d . concat <$> dropFrontOfPara " " @@ -367,7 +368,9 @@ codeblock d = | otherwise = Just $ c == '\n' hyperlink :: Parser (Doc a) -hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8 <$> ("<" *> takeWhile1 (`notElem` ">\n") <* ">") +hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8 + <$> disallowNewline ("<" *> takeUntil ">") + <|> autoUrl autoUrl :: Parser (Doc a) autoUrl = mkLink <$> url @@ -425,6 +428,14 @@ takeHorizontalSpace :: Parser BS.ByteString takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r") makeLabeled :: (String -> Maybe String -> a) -> String -> a -makeLabeled f input = case break isSpace $ strip input of +makeLabeled f input = case break isSpace $ removeEscapes $ strip input of (uri, "") -> f uri Nothing (uri, label) -> f uri (Just $ dropWhile isSpace label) + where + -- As we don't parse these any further, we don't do any processing to the + -- string so we at least remove escape character here. Perhaps we should + -- actually be parsing the label at the very least? + removeEscapes "" = "" + removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs + removeEscapes ('\\':xs) = removeEscapes xs + removeEscapes (x:xs) = x : removeEscapes xs diff --git a/src/Haddock/Parser/Util.hs b/src/Haddock/Parser/Util.hs index ea682601..92fa7448 100644 --- a/src/Haddock/Parser/Util.hs +++ b/src/Haddock/Parser/Util.hs @@ -7,7 +7,7 @@ import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as BS takeUntil :: ByteString -> Parser ByteString -takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) +takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome where end = BS.unpack end_ @@ -20,3 +20,7 @@ takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) dropEnd = BS.reverse . BS.drop (length end) . BS.reverse requireEnd = mfilter (BS.isSuffixOf end_) + + gotSome xs + | BS.null xs = fail "didn't get any content" + | otherwise = return xs diff --git a/test/Haddock/ParserSpec.hs b/test/Haddock/ParserSpec.hs index 4679661f..9bca745f 100644 --- a/test/Haddock/ParserSpec.hs +++ b/test/Haddock/ParserSpec.hs @@ -68,6 +68,16 @@ spec = before initStaticOpts $ do it "allows to backslash-escape characters" $ do property $ \x -> ['\\', x] `shouldParseTo` DocString [x] + context "when parsing strings contaning numeric character references" $ do + it "will implicitly convert digits to characters" $ do + "AAAA" `shouldParseTo` "AAAA" + + "灼眼のシャナ" + `shouldParseTo` "灼眼のシャナ" + + it "will implicitly convert hex encoded characters" $ do + "eeee" `shouldParseTo` "eeee" + context "when parsing identifiers" $ do it "parses identifiers enclosed within single ticks" $ do "'foo'" `shouldParseTo` DocIdentifier "foo" @@ -75,8 +85,15 @@ spec = before initStaticOpts $ do it "parses identifiers enclosed within backticks" $ do "`foo`" `shouldParseTo` DocIdentifier "foo" - it "parses a word with one of the delimiters in it as ordinary string" $ do - "don't use apostrophe's in the wrong place's" `shouldParseTo` "don't use apostrophe's in the wrong place's" + it "parses a word with an one of the delimiters in it as DocString" $ do + "don't" `shouldParseTo` "don't" + + it "doesn't pass pairs of delimiters with spaces between them" $ do + "hel'lo w'orld" `shouldParseTo` "hel'lo w'orld" + + it "don't use apostrophe's in the wrong place's" $ do + " don't use apostrophe's in the wrong place's" `shouldParseTo` + "don't use apostrophe's in the wrong place's" context "when parsing URLs" $ do let hyperlink :: String -> Maybe String -> Doc RdrName @@ -91,13 +108,25 @@ spec = before initStaticOpts $ do it "does not accept newlines in label" $ do "<foo bar\nbaz>" `shouldParseTo` "<foo bar\nbaz>" - it "does not allow to escap >" $ do - "<http://examp\\>le.com" `shouldParseTo` hyperlink "http://examp\\" Nothing <> "le.com" + -- new behaviour test, this will be now consistent with other markup + it "allows us to escape > inside the URL" $ do + "<http://examp\\>le.com>" `shouldParseTo` + hyperlink "http://examp>le.com" Nothing + + "<http://exa\\>mp\\>le.com>" `shouldParseTo` + hyperlink "http://exa>mp>le.com" Nothing + + -- Likewise in label + "<http://example.com f\\>oo>" `shouldParseTo` + hyperlink "http://example.com" "f>oo" it "parses inline URLs" $ do "foo <http://example.com/> bar" `shouldParseTo` "foo " <> hyperlink "http://example.com/" Nothing <> " bar" + it "doesn't allow for multi-line link tags" $ do + "<ba\nz aar>" `shouldParseTo` "<ba\nz aar>" + context "when autolinking URLs" $ do it "autolinks HTTP URLs" $ do "http://example.com/" `shouldParseTo` hyperlink "http://example.com/" Nothing @@ -129,19 +158,19 @@ spec = before initStaticOpts $ do picture uri = DocPic . Picture uri it "parses a simple picture" $ do - "<<foo>>" `shouldParseTo` picture "foo" Nothing - - it "accepts an optional title" $ do - "<<foo bar baz>>" `shouldParseTo` picture "foo" (Just "bar baz") + "<<baz>>" `shouldParseTo` picture "baz" Nothing - it "does not accept newlines in title" $ do - "<<foo bar\nbaz>>" `shouldParseTo` "<<foo bar\nbaz>>" + it "parses a picture with a title" $ do + "<<b a z>>" `shouldParseTo` picture "b" (Just "a z") it "parses a picture with unicode" $ do - "<<灼眼 のシャナ>>" `shouldParseTo` picture "灼眼" (Just "のシャナ") + "<<灼眼のシャナ>>" `shouldParseTo` picture "灼眼のシャナ" Nothing - it "doesn't allow for escaping of the closing tags" $ do -- bug? - "<<ba\\>>z>>" `shouldParseTo` picture "ba\\" Nothing <> "z>>" + it "allows for escaping of the closing tags" $ do + "<<ba\\>>z>>" `shouldParseTo` picture "ba>>z" Nothing + + it "doesn't allow for multi-line picture tags" $ do + "<<ba\nz aar>>" `shouldParseTo` "<<ba\nz aar>>" context "when parsing anchors" $ do it "parses a single word anchor" $ do @@ -191,6 +220,25 @@ spec = before initStaticOpts $ do it "allows to escape the emphasis delimiter inside of emphasis" $ do "/empha\\/sis/" `shouldParseTo` DocEmphasis "empha/sis" + context "when parsing monospaced text" $ do + it "parses simple monospaced text" $ do + "@foo@" `shouldParseTo` DocMonospaced "foo" + + it "parses inline monospaced text" $ do + "foo @bar@ baz" `shouldParseTo` "foo " <> DocMonospaced "bar" <> " baz" + + it "allows to escape @" $ do + "@foo \\@ bar@" `shouldParseTo` DocMonospaced "foo @ bar" + + it "accepts unicode" $ do + "@foo 灼眼のシャナ bar@" `shouldParseTo` DocMonospaced "foo 灼眼のシャナ bar" + + it "accepts other markup in monospaced text" $ do + "@/foo/@" `shouldParseTo` DocMonospaced (DocEmphasis "foo") + + it "requires the closing @" $ do + "@foo /bar/ baz" `shouldParseTo` "@foo " <> DocEmphasis "bar" <> " baz" + context "when parsing bold strings" $ do it "allows for a bold string on its own" $ do "__bold string__" `shouldParseTo` @@ -224,50 +272,42 @@ spec = before initStaticOpts $ do "__bo\\__ld__" `shouldParseTo` DocBold "bo__ld" - context "when parsing monospaced text" $ do - it "parses simple monospaced text" $ do - "@foo@" `shouldParseTo` DocMonospaced "foo" + it "doesn't allow for empty bold" $ do + "____" `shouldParseTo` "____" - it "parses inline monospaced text" $ do - "foo @bar@ baz" `shouldParseTo` "foo " <> DocMonospaced "bar" <> " baz" + context "when parsing module strings" $ do + it "should parse a module on its own" $ do + "\"Module\"" `shouldParseTo` + DocModule "Module" - it "allows to escape @" $ do - "@foo \\@ bar@" `shouldParseTo` DocMonospaced "foo @ bar" + it "should parse a module inline" $ do + "This is a \"Module\"." `shouldParseTo` + "This is a " <> DocModule "Module" <> "." - it "accepts unicode" $ do - "@foo 灼眼のシャナ bar@" `shouldParseTo` DocMonospaced "foo 灼眼のシャナ bar" + it "can accept a simple module name" $ do + "\"Hello\"" `shouldParseTo` DocModule "Hello" - it "accepts other markup in monospaced text" $ do - "@/foo/@" `shouldParseTo` DocMonospaced (DocEmphasis "foo") + it "can accept a module name with dots" $ do + "\"Hello.World\"" `shouldParseTo` DocModule "Hello.World" - it "requires the closing @" $ do - "@foo /bar/ baz" `shouldParseTo` "@foo " <> DocEmphasis "bar" <> " baz" + it "can accept a module name with unicode" $ do + "\"Hello.Worldλ\"" `shouldParseTo` DocModule "Hello.Worldλ" - context "when parsing module names" $ do - it "accepts a simple module name" $ do - "\"Foo\"" `shouldParseTo` DocModule "Foo" + it "parses a module name with a trailing dot as regular quoted string" $ do + "\"Hello.\"" `shouldParseTo` "\"Hello.\"" - it "accepts a module name with dots" $ do - "\"Foo.Bar.Baz\"" `shouldParseTo` DocModule "Foo.Bar.Baz" + it "parses a module name with a space as regular quoted string" $ do + "\"Hello World\"" `shouldParseTo` "\"Hello World\"" + + it "parses a module name with invalid characters as regular quoted string" $ do + "\"Hello&[{}(=*)+]!\"" `shouldParseTo` "\"Hello&[{}(=*)+]!\"" it "accepts a module name with unicode" $ do "\"Foo.Barλ\"" `shouldParseTo` DocModule "Foo.Barλ" - it "parses a module inline" $ do - "This is a \"Module\"." `shouldParseTo` ("This is a " <> (DocModule "Module" <> ".")) - - it "rejects empty module name" $ do + it "treats empty module name as regular double quotes" $ do "\"\"" `shouldParseTo` "\"\"" - it "rejects a module name with a trailing dot" $ do - "\"Foo.\"" `shouldParseTo` "\"Foo.\"" - - it "rejects a module name with a space" $ do - "\"Foo Bar\"" `shouldParseTo` "\"Foo Bar\"" - - it "rejects a module name with invalid characters" $ do - "\"Foo&[{}(=*)+]!\"" `shouldParseTo` "\"Foo&[{}(=*)+]!\"" - describe "parseParas" $ do let infix 1 `shouldParseTo` shouldParseTo :: String -> Doc RdrName -> Expectation @@ -370,6 +410,14 @@ spec = before initStaticOpts $ do , "@" ] `shouldParseTo` DocCodeBlock "foo\n@\nbar\n" + it "accepts unicode" $ do + "@foo 灼眼のシャナ bar@" `shouldParseTo` DocCodeBlock "foo 灼眼のシャナ bar" + + it "requires the closing @" $ do + "@foo /bar/ baz" + `shouldParseTo` DocParagraph ("@foo " <> DocEmphasis "bar" <> " baz") + + context "when parsing examples" $ do it "parses a simple example" $ do ">>> foo" `shouldParseTo` DocExamples [Example "foo" []] @@ -392,6 +440,61 @@ spec = before initStaticOpts $ do , Example "fib 10" ["55"] ] + it ("requires an example to be separated" + ++ " from a previous paragraph by an empty line") $ do + "foobar\n\n>>> fib 10\n55" `shouldParseTo` + DocParagraph "foobar" + <> DocExamples [Example "fib 10" ["55"]] + + it "parses bird-tracks inside of paragraphs as plain strings" $ do + let xs = "foo\n>>> bar" + xs `shouldParseTo` DocParagraph (DocString xs) + + it "skips empty lines in front of an example" $ do + "\n \n\n>>> foo" `shouldParseTo` DocExamples [Example "foo" []] + + it "terminates example on empty line" $ do + unlines [ + ">>> foo" + , "bar" + , " " + , "baz" + ] + `shouldParseTo` + DocExamples [Example "foo" ["bar"]] <> DocParagraph "baz" + + it "parses a <BLANKLINE> result as an empty result" $ do + unlines [ + ">>> foo" + , "bar" + , "<BLANKLINE>" + , "baz" + ] + `shouldParseTo` DocExamples [Example "foo" ["bar", "", "baz"]] + + it "accepts unicode in examples" $ do + ">>> 灼眼\nシャナ" `shouldParseTo` DocExamples [Example "灼眼" ["シャナ"]] + + context "when prompt is prefixed by whitespace" $ do + it "strips the exact same amount of whitespace from result lines" $ do + unlines [ + " >>> foo" + , " bar" + , " baz" + ] `shouldParseTo` DocExamples [Example "foo" ["bar", "baz"]] + + it "preserves additional whitespace" $ do + unlines [ + " >>> foo" + , " bar" + ] `shouldParseTo` DocExamples [Example "foo" [" bar"]] + + it "keeps original if stripping is not possible" $ do + unlines [ + " >>> foo" + , " bar" + ] `shouldParseTo` DocExamples [Example "foo" [" bar"]] + context "when parsing paragraphs nested in lists" $ do it "can nest the same type of list" $ do @@ -450,77 +553,6 @@ spec = before initStaticOpts $ do ] <> DocOrderedList [ DocParagraph "baz" ] - - context "when parsing consecutive paragraphs" $ do - it "will not capture irrelevant consecutive lists" $ do - " * bullet\n\n - different bullet\n\n (1) ordered\n \n " - ++ "2. different bullet\n \n [cat] kitten\n \n [pineapple] fruit" - `shouldParseTo` - DocUnorderedList [ DocParagraph "bullet" - , DocParagraph "different bullet"] - <> DocOrderedList [ DocParagraph "ordered" - , DocParagraph "different bullet" - ] - <> DocDefList [ ("cat", "kitten") - , ("pineapple", "fruit") - ] - - context "when parsing an example" $ do - it ("requires an example to be separated" - ++ " from a previous paragraph by an empty line") $ do - "foobar\n\n>>> fib 10\n55" `shouldParseTo` - DocParagraph "foobar" - <> DocExamples [Example "fib 10" ["55"]] - - it "parses bird-tracks inside of paragraphs as plain strings" $ do - let xs = "foo\n>>> bar" - xs `shouldParseTo` DocParagraph (DocString xs) - - it "skips empty lines in front of an example" $ do - "\n \n\n>>> foo" `shouldParseTo` DocExamples [Example "foo" []] - - it "terminates example on empty line" $ do - unlines [ - ">>> foo" - , "bar" - , " " - , "baz" - ] - `shouldParseTo` - DocExamples [Example "foo" ["bar"]] <> DocParagraph "baz" - - it "parses a <BLANKLINE> result as an empty result" $ do - unlines [ - ">>> foo" - , "bar" - , "<BLANKLINE>" - , "baz" - ] - `shouldParseTo` DocExamples [Example "foo" ["bar", "", "baz"]] - - it "accepts unicode in examples" $ do - ">>> 灼眼\nシャナ" `shouldParseTo` DocExamples [Example "灼眼" ["シャナ"]] - - context "when prompt is prefixed by whitespace" $ do - it "strips the exact same amount of whitespace from result lines" $ do - unlines [ - " >>> foo" - , " bar" - , " baz" - ] `shouldParseTo` DocExamples [Example "foo" ["bar", "baz"]] - - it "preserves additional whitespace" $ do - unlines [ - " >>> foo" - , " bar" - ] `shouldParseTo` DocExamples [Example "foo" [" bar"]] - - it "keeps original if stripping is not possible" $ do - unlines [ - " >>> foo" - , " bar" - ] `shouldParseTo` DocExamples [Example "foo" [" bar"]] - context "when parsing properties" $ do it "can parse a single property" $ do "prop> 23 == 23" `shouldParseTo` DocProperty "23 == 23" @@ -701,29 +733,29 @@ spec = before initStaticOpts $ do `shouldParseTo` DocParagraph "foo" <> DocDefList [("foo", "bar")] <> DocParagraph "baz" context "when parsing consecutive paragraphs" $ do - it "accepts consecutive lists" $ do - unlines [ - " * foo" - , "" - , " - bar" - , "" - , " (1) ordered foo" - , " " - , " 2. ordered bar" - , " " - , " [cat] kitten" - , " " - , " [pineapple] fruit" - ] `shouldParseTo` DocUnorderedList [ - DocParagraph "foo" - , DocParagraph "bar" - ] <> DocOrderedList [ - DocParagraph "ordered foo" - , DocParagraph "ordered bar" - ] <> DocDefList [ - ("cat", "kitten") - , ("pineapple", "fruit") - ] + it "will not capture irrelevant consecutive lists" $ do + unlines [ " * bullet" + , "" + , "" + , " - different bullet" + , "" + , "" + , " (1) ordered" + , " " + , " 2. different bullet" + , " " + , " [cat] kitten" + , " " + , " [pineapple] fruit" + ] `shouldParseTo` + DocUnorderedList [ DocParagraph "bullet" + , DocParagraph "different bullet"] + <> DocOrderedList [ DocParagraph "ordered" + , DocParagraph "different bullet" + ] + <> DocDefList [ ("cat", "kitten") + , ("pineapple", "fruit") + ] context "when parsing function documentation headers" $ do it "can parse a simple header" $ do |