aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlec Theriault <alec.theriault@gmail.com>2020-03-27 20:16:51 -0400
committerAlec Theriault <alec.theriault@gmail.com>2020-03-27 20:34:33 -0400
commitb0514cc5d53bb37424177d2ba986216a914f8b1c (patch)
treee67847ed5e4ecc907c710cabc0397ce39df7d589
parent8c6532636e6bd572455dfcf0b0d6e05f7033d110 (diff)
Fix crash in `haddock-library` on unicode space
Our quickcheck tests for `haddock-library` stumbled across an edge case input that was causing Haddock to crash: it was a unicode space character. The root cause of the crash is that we were implicitly assuming that if a space character was not " \t\f\v\r", it would have to be "\n". We fix this by instead defining horizontal space as: any space character that is not '\n'. Fixes #1142
-rw-r--r--haddock-library/src/Documentation/Haddock/Parser/Util.hs14
-rw-r--r--haddock-library/test/Documentation/Haddock/ParserSpec.hs8
2 files changed, 14 insertions, 8 deletions
diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs
index 98570c22..eef744d8 100644
--- a/haddock-library/src/Documentation/Haddock/Parser/Util.hs
+++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs
@@ -31,16 +31,16 @@ import Prelude hiding (takeWhile)
import Data.Char (isSpace)
-- | Characters that count as horizontal space
-horizontalSpace :: [Char]
-horizontalSpace = " \t\f\v\r"
+horizontalSpace :: Char -> Bool
+horizontalSpace c = isSpace c && c /= '\n'
-- | Skip and ignore leading horizontal space
skipHorizontalSpace :: Parser ()
-skipHorizontalSpace = Parsec.skipMany (Parsec.oneOf horizontalSpace)
+skipHorizontalSpace = Parsec.skipMany (Parsec.satisfy horizontalSpace)
-- | Take leading horizontal space
-takeHorizontalSpace :: Parser Text
-takeHorizontalSpace = takeWhile (`elem` horizontalSpace)
+takeHorizontalSpace :: Parser Text
+takeHorizontalSpace = takeWhile horizontalSpace
makeLabeled :: (String -> Maybe String -> a) -> Text -> a
makeLabeled f input = case T.break isSpace $ removeEscapes $ T.strip input of
@@ -60,10 +60,10 @@ removeEscapes = T.unfoldr go
-- | Consume characters from the input up to and including the given pattern.
-- Return everything consumed except for the end pattern itself.
-takeUntil :: Text -> Parser Text
+takeUntil :: Text -> Parser Text
takeUntil end_ = T.dropEnd (T.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome
where
- end = T.unpack end_
+ end = T.unpack end_
p :: (Bool, String) -> Char -> Maybe (Bool, String)
p acc c = case acc of
diff --git a/haddock-library/test/Documentation/Haddock/ParserSpec.hs b/haddock-library/test/Documentation/Haddock/ParserSpec.hs
index bc40a0a2..8b59b560 100644
--- a/haddock-library/test/Documentation/Haddock/ParserSpec.hs
+++ b/haddock-library/test/Documentation/Haddock/ParserSpec.hs
@@ -3,6 +3,7 @@
module Documentation.Haddock.ParserSpec (main, spec) where
+import Data.Char (isSpace)
import Data.String
import qualified Documentation.Haddock.Parser as Parse
import Documentation.Haddock.Types
@@ -442,6 +443,10 @@ spec = do
property $ \xs ->
(length . show . parseParas) xs `shouldSatisfy` (> 0)
+ -- See <https://github.com/haskell/haddock/issues/1142>
+ it "doesn't crash on unicode whitespace" $ do
+ "\8197" `shouldParseTo` DocEmpty
+
context "when parsing @since" $ do
it "adds specified version to the result" $ do
parseParas "@since 0.5.0" `shouldBe`
@@ -470,7 +475,8 @@ spec = do
context "when parsing text paragraphs" $ do
- let filterSpecial = filter (`notElem` (".(=#-[*`\v\f\n\t\r\\\"'_/@<> " :: String))
+ let isSpecial c = isSpace c || c `elem` (".(=#-[*`\\\"'_/@<>" :: String)
+ filterSpecial = filter (not . isSpecial)
it "parses an empty paragraph" $ do
"" `shouldParseTo` DocEmpty