From b0514cc5d53bb37424177d2ba986216a914f8b1c Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Fri, 27 Mar 2020 20:16:51 -0400 Subject: Fix crash in `haddock-library` on unicode space Our quickcheck tests for `haddock-library` stumbled across an edge case input that was causing Haddock to crash: it was a unicode space character. The root cause of the crash is that we were implicitly assuming that if a space character was not " \t\f\v\r", it would have to be "\n". We fix this by instead defining horizontal space as: any space character that is not '\n'. Fixes #1142 --- haddock-library/src/Documentation/Haddock/Parser/Util.hs | 14 +++++++------- haddock-library/test/Documentation/Haddock/ParserSpec.hs | 8 +++++++- 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'haddock-library') diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs index 98570c22..eef744d8 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Util.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs @@ -31,16 +31,16 @@ import Prelude hiding (takeWhile) import Data.Char (isSpace) -- | Characters that count as horizontal space -horizontalSpace :: [Char] -horizontalSpace = " \t\f\v\r" +horizontalSpace :: Char -> Bool +horizontalSpace c = isSpace c && c /= '\n' -- | Skip and ignore leading horizontal space skipHorizontalSpace :: Parser () -skipHorizontalSpace = Parsec.skipMany (Parsec.oneOf horizontalSpace) +skipHorizontalSpace = Parsec.skipMany (Parsec.satisfy horizontalSpace) -- | Take leading horizontal space -takeHorizontalSpace :: Parser Text -takeHorizontalSpace = takeWhile (`elem` horizontalSpace) +takeHorizontalSpace :: Parser Text +takeHorizontalSpace = takeWhile horizontalSpace makeLabeled :: (String -> Maybe String -> a) -> Text -> a makeLabeled f input = case T.break isSpace $ removeEscapes $ T.strip input of @@ -60,10 +60,10 @@ removeEscapes = T.unfoldr go -- | Consume characters from the input up to and including the given pattern. -- Return everything consumed except for the end pattern itself. -takeUntil :: Text -> Parser Text +takeUntil :: Text -> Parser Text takeUntil end_ = T.dropEnd (T.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome where - end = T.unpack end_ + end = T.unpack end_ p :: (Bool, String) -> Char -> Maybe (Bool, String) p acc c = case acc of diff --git a/haddock-library/test/Documentation/Haddock/ParserSpec.hs b/haddock-library/test/Documentation/Haddock/ParserSpec.hs index bc40a0a2..8b59b560 100644 --- a/haddock-library/test/Documentation/Haddock/ParserSpec.hs +++ b/haddock-library/test/Documentation/Haddock/ParserSpec.hs @@ -3,6 +3,7 @@ module Documentation.Haddock.ParserSpec (main, spec) where +import Data.Char (isSpace) import Data.String import qualified Documentation.Haddock.Parser as Parse import Documentation.Haddock.Types @@ -442,6 +443,10 @@ spec = do property $ \xs -> (length . show . parseParas) xs `shouldSatisfy` (> 0) + -- See + it "doesn't crash on unicode whitespace" $ do + "\8197" `shouldParseTo` DocEmpty + context "when parsing @since" $ do it "adds specified version to the result" $ do parseParas "@since 0.5.0" `shouldBe` @@ -470,7 +475,8 @@ spec = do context "when parsing text paragraphs" $ do - let filterSpecial = filter (`notElem` (".(=#-[*`\v\f\n\t\r\\\"'_/@<> " :: String)) + let isSpecial c = isSpace c || c `elem` (".(=#-[*`\\\"'_/@<>" :: String) + filterSpecial = filter (not . isSpecial) it "parses an empty paragraph" $ do "" `shouldParseTo` DocEmpty -- cgit v1.2.3