From a5199600c39d25d7b71dcb2328000c1c49ad95a2 Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Wed, 6 Feb 2019 01:01:41 -0800 Subject: Better identifier parsing * '(<|>)' and '`elem`' now get parsed and rendered properly as links * 'DbModule'/'DbUnitId' now properly get split apart into two links * tuple names now get parsed properly * some more small niceties... The identifier parsing code is more precise and more efficient (although to be fair: it is also longer and in its own module). On the rendering side, we need to pipe through information about backticks/parens/neither all the way through from renaming to the backends. In terms of impact: a total of 35 modules in the entirety of the bootlib + ghc lib docs change. The only "regression" is things like '\0'. These should be changed to @\\0@ (the path by which this previously worked seems accidental). --- .../src/Documentation/Haddock/Parser/Identifier.hs | 186 +++++++++++++++++++++ .../src/Documentation/Haddock/Parser/Monad.hs | 13 +- 2 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 haddock-library/src/Documentation/Haddock/Parser/Identifier.hs (limited to 'haddock-library/src/Documentation/Haddock/Parser') diff --git a/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs b/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs new file mode 100644 index 00000000..7bc98b62 --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs @@ -0,0 +1,186 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ViewPatterns #-} +-- | +-- Module : Documentation.Haddock.Parser.Identifier +-- Copyright : (c) Alec Theriault 2019, +-- License : BSD-like +-- +-- Maintainer : haddock@projects.haskell.org +-- Stability : experimental +-- Portability : portable +-- +-- Functionality for parsing identifiers and operators + +module Documentation.Haddock.Parser.Identifier ( + Identifier(..), + parseValid, +) where + +import Documentation.Haddock.Types ( Namespace(..) ) +import Documentation.Haddock.Parser.Monad +import qualified Text.Parsec as Parsec +import Text.Parsec.Pos ( updatePosChar ) +import Text.Parsec ( State(..) + , getParserState, setParserState ) + +import Data.Text (Text) +import qualified Data.Text as T + +import Data.Char (isAlpha, isAlphaNum) +import Control.Monad (guard) +import Data.Functor (($>)) +#if MIN_VERSION_base(4,9,0) +import Text.Read.Lex (isSymbolChar) +#else +import Data.Char (GeneralCategory (..), + generalCategory) +#endif + +import Data.Maybe + +-- | Identifier string surrounded with namespace, opening, and closing quotes/backticks. +data Identifier = Identifier !Namespace !Char String !Char + deriving (Show, Eq) + +parseValid :: Parser Identifier +parseValid = do + s@State{ stateInput = inp, statePos = pos } <- getParserState + + case takeIdentifier inp of + Nothing -> Parsec.parserFail "parseValid: Failed to match a valid identifier" + Just (ns, op, ident, cl, inp') -> + let posOp = updatePosChar pos op + posIdent = T.foldl updatePosChar posOp ident + posCl = updatePosChar posIdent cl + s' = s{ stateInput = inp', statePos = posCl } + in setParserState s' $> Identifier ns op (T.unpack ident) cl + + +#if !MIN_VERSION_base(4,9,0) +-- inlined from base-4.10.0.0 +isSymbolChar :: Char -> Bool +isSymbolChar c = not (isPuncChar c) && case generalCategory c of + MathSymbol -> True + CurrencySymbol -> True + ModifierSymbol -> True + OtherSymbol -> True + DashPunctuation -> True + OtherPunctuation -> c `notElem` "'\"" + ConnectorPunctuation -> c /= '_' + _ -> False + where + -- | The @special@ character class as defined in the Haskell Report. + isPuncChar :: Char -> Bool + isPuncChar = (`elem` (",;()[]{}`" :: String)) +#endif + +-- | Try to parse a delimited identifier off the front of the given input. +-- +-- This tries to match as many valid Haskell identifiers/operators as possible, +-- to the point of sometimes accepting invalid things (ex: keywords). Some +-- considerations: +-- +-- - operators and identifiers can have module qualifications +-- - operators can be wrapped in parens (for prefix) +-- - identifiers can be wrapped in backticks (for infix) +-- - delimiters are backticks or regular ticks +-- - since regular ticks are also valid in identifiers, we opt for the +-- longest successful parse +-- +-- This function should make /O(1)/ allocations +takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text) +takeIdentifier input = listToMaybe $ do + + -- Optional namespace + let (ns, input') = case T.uncons input of + Just ('v', i) -> (Value, i) + Just ('t', i) -> (Type, i) + _ -> (None, input) + + -- Opening tick + (op, input'') <- maybeToList (T.uncons input') + guard (op == '\'' || op == '`') + + -- Identifier/operator + (ident, input''') <- wrapped input'' + + -- Closing tick + (cl, input'''') <- maybeToList (T.uncons input''') + guard (cl == '\'' || cl == '`') + + pure (ns, op, ident, cl, input'''') + + where + + -- | Parse out a wrapped, possibly qualified, operator or identifier + wrapped t = do + (c, t' ) <- maybeToList (T.uncons t) + -- Tuples + case c of + '(' | Just (c', _) <- T.uncons t' + , c' == ',' || c' == ')' + -> do let (commas, t'') = T.span (== ',') t' + (')', t''') <- maybeToList (T.uncons t'') + pure (T.take (T.length commas + 2) t, t''') + + -- Parenthesized + '(' -> do (n, t'' ) <- general False 0 [] t' + (')', t''') <- maybeToList (T.uncons t'') + pure (T.take (n + 2) t, t''') + + -- Backticked + '`' -> do (n, t'' ) <- general False 0 [] t' + ('`', t''') <- maybeToList (T.uncons t'') + pure (T.take (n + 2) t, t''') + + -- Unadorned + _ -> do (n, t'' ) <- general False 0 [] t + pure (T.take n t, t'') + + -- | Parse out a possibly qualified operator or identifier + general :: Bool -- ^ refuse inputs starting with operators + -> Int -- ^ total characters \"consumed\" so far + -> [(Int, Text)] -- ^ accumulated results + -> Text -- ^ current input + -> [(Int, Text)] -- ^ total characters parsed & what remains + general !identOnly !i acc t + -- Starts with an identifier (either just an identifier, or a module qual) + | Just (n, rest) <- identLike t + = if T.null rest + then acc + else case T.head rest of + '`' -> (n + i, rest) : acc + ')' -> (n + i, rest) : acc + '.' -> general False (n + i + 1) acc (T.tail rest) + '\'' -> let (m, rest') = quotes rest + in general True (n + m + 1 + i) ((n + m + i, rest') : acc) (T.tail rest') + _ -> acc + + -- An operator + | Just (n, rest) <- optr t + , not identOnly + = (n + i, rest) : acc + + -- Anything else + | otherwise + = acc + + -- | Parse an identifier off the front of the input + identLike t + | T.null t = Nothing + | isAlpha (T.head t) || '_' == T.head t + = let !(idt, rest) = T.span (\c -> isAlphaNum c || c == '_') t + !(octos, rest') = T.span (== '#') rest + in Just (T.length idt + T.length octos, rest') + | otherwise = Nothing + + -- | Parse all but the last quote off the front of the input + -- PRECONDITION: T.head t == '\'' + quotes :: Text -> (Int, Text) + quotes t = let !n = T.length (T.takeWhile (== '\'') t) - 1 + in (n, T.drop n t) + + -- | Parse an operator off the front of the input + optr t = let !(op, rest) = T.span isSymbolChar t + in if T.null op then Nothing else Just (T.length op, rest) diff --git a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs index 8f5bd217..fa46f536 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs @@ -4,6 +4,18 @@ {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE TypeSynonymInstances #-} +-- | +-- Module : Documentation.Haddock.Parser.Monad +-- Copyright : (c) Alec Theriault 2018-2019, +-- License : BSD-like +-- +-- Maintainer : haddock@projects.haskell.org +-- Stability : experimental +-- Portability : portable +-- +-- Defines the Parsec monad over which all parsing is done and also provides +-- more efficient versions of the usual parsec combinator functions (but +-- specialized to 'Text'). module Documentation.Haddock.Parser.Monad where @@ -96,7 +108,6 @@ takeWhile f = do s' = s{ stateInput = inp', statePos = pos' } setParserState s' $> t - -- | Like 'takeWhile', but fails if no characters matched. -- -- Equivalent to @fmap T.pack . Parsec.many1@, but more efficient. -- cgit v1.2.3 From 91f55209065497c8cd0d0a23e5ed5561410b4df0 Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Sun, 26 May 2019 15:19:27 -0400 Subject: Release haddock-2.23, haddock-library-1.8.0 Tentatively adjust bounds and changelogs for the release to be bundled with GHC 8.8.1. --- CHANGES.md | 4 +++- haddock-api/haddock-api.cabal | 6 +++--- haddock-library/haddock-library.cabal | 2 +- .../src/Documentation/Haddock/Parser/Identifier.hs | 10 +++++----- haddock-test/haddock-test.cabal | 2 +- haddock.cabal | 11 +++++------ 6 files changed, 18 insertions(+), 17 deletions(-) (limited to 'haddock-library/src/Documentation/Haddock/Parser') diff --git a/CHANGES.md b/CHANGES.md index a6d96fed..88656da4 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,4 +1,4 @@ -## Changes in TBA +## Changes in 2.23.0 * "Linuwial" is the new default theme (#721, #782, #949) @@ -29,6 +29,8 @@ * Many fixes to the LaTeX backend, mostly focused on not crashing as well as generating LaTeX source that compiles + * More flexible parsing of the module header + ## Changes in version 2.22.0 * Make `--package-version` optional for `--hoogle` (#899) diff --git a/haddock-api/haddock-api.cabal b/haddock-api/haddock-api.cabal index c427e752..9a120f5d 100644 --- a/haddock-api/haddock-api.cabal +++ b/haddock-api/haddock-api.cabal @@ -1,6 +1,6 @@ cabal-version: 2.0 name: haddock-api -version: 2.22.0 +version: 2.23.0 synopsis: A documentation-generation tool for Haskell libraries description: Haddock is a documentation-generation tool for Haskell libraries @@ -42,7 +42,7 @@ library default-language: Haskell2010 -- this package typically supports only single major versions - build-depends: base ^>= 4.12.0 + build-depends: base ^>= 4.13.0 , ghc ^>= 8.8 , ghc-paths ^>= 0.1.0.9 , haddock-library ^>= 1.8.0 @@ -65,7 +65,7 @@ library ghc-options: -funbox-strict-fields -Wall -fwarn-tabs -O2 ghc-options: -Wall if impl(ghc >= 8.0) - ghc-options: -Wcompat -Wnoncanonical-monad-instances -Wnoncanonical-monadfail-instances + ghc-options: -Wcompat -Wnoncanonical-monad-instances exposed-modules: Documentation.Haddock diff --git a/haddock-library/haddock-library.cabal b/haddock-library/haddock-library.cabal index 5c744082..99773475 100644 --- a/haddock-library/haddock-library.cabal +++ b/haddock-library/haddock-library.cabal @@ -33,7 +33,7 @@ common lib-defaults ghc-options: -funbox-strict-fields -Wall -fwarn-tabs if impl(ghc >= 8.0) - ghc-options: -Wcompat -Wnoncanonical-monad-instances -Wnoncanonical-monadfail-instances + ghc-options: -Wcompat -Wnoncanonical-monad-instances library import: lib-defaults diff --git a/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs b/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs index 7bc98b62..a83e5abf 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs @@ -109,7 +109,7 @@ takeIdentifier input = listToMaybe $ do (cl, input'''') <- maybeToList (T.uncons input''') guard (cl == '\'' || cl == '`') - pure (ns, op, ident, cl, input'''') + return (ns, op, ident, cl, input'''') where @@ -122,21 +122,21 @@ takeIdentifier input = listToMaybe $ do , c' == ',' || c' == ')' -> do let (commas, t'') = T.span (== ',') t' (')', t''') <- maybeToList (T.uncons t'') - pure (T.take (T.length commas + 2) t, t''') + return (T.take (T.length commas + 2) t, t''') -- Parenthesized '(' -> do (n, t'' ) <- general False 0 [] t' (')', t''') <- maybeToList (T.uncons t'') - pure (T.take (n + 2) t, t''') + return (T.take (n + 2) t, t''') -- Backticked '`' -> do (n, t'' ) <- general False 0 [] t' ('`', t''') <- maybeToList (T.uncons t'') - pure (T.take (n + 2) t, t''') + return (T.take (n + 2) t, t''') -- Unadorned _ -> do (n, t'' ) <- general False 0 [] t - pure (T.take n t, t'') + return (T.take n t, t'') -- | Parse out a possibly qualified operator or identifier general :: Bool -- ^ refuse inputs starting with operators diff --git a/haddock-test/haddock-test.cabal b/haddock-test/haddock-test.cabal index 23b5953c..ed174e4f 100644 --- a/haddock-test/haddock-test.cabal +++ b/haddock-test/haddock-test.cabal @@ -16,7 +16,7 @@ library default-language: Haskell2010 ghc-options: -Wall hs-source-dirs: src - build-depends: base >= 4.3 && < 4.13, bytestring, directory, process, filepath, Cabal, xml, xhtml + build-depends: base >= 4.3 && < 4.14, bytestring, directory, process, filepath, Cabal, xml, xhtml exposed-modules: Test.Haddock diff --git a/haddock.cabal b/haddock.cabal index 078955fb..563955b9 100644 --- a/haddock.cabal +++ b/haddock.cabal @@ -1,6 +1,6 @@ cabal-version: 2.0 name: haddock -version: 2.22.0 +version: 2.23.0 synopsis: A documentation-generation tool for Haskell libraries description: This is Haddock, a tool for automatically generating documentation @@ -23,7 +23,7 @@ description: without any documentation annotations, Haddock can generate useful documentation from your source code. . - <> + <> license: BSD3 license-file: LICENSE author: Simon Marlow, David Waern @@ -33,7 +33,7 @@ bug-reports: https://github.com/haskell/haddock/issues copyright: (c) Simon Marlow, David Waern category: Documentation build-type: Simple -tested-with: GHC==8.6.* +tested-with: GHC==8.8.* extra-source-files: CHANGES.md @@ -64,8 +64,7 @@ executable haddock -- haddock typically only supports a single GHC major version build-depends: - -- FIXME: drop 4.12.0.0 once GHC HEAD updates to 4.13.0.0 - base ^>= 4.12.0.0 || ^>= 4.13.0.0 + base ^>= 4.13.0.0 if flag(in-ghc-tree) hs-source-dirs: haddock-api/src, haddock-library/src @@ -141,7 +140,7 @@ executable haddock else -- in order for haddock's advertised version number to have proper meaning, -- we pin down to a single haddock-api version. - build-depends: haddock-api == 2.22.0 + build-depends: haddock-api == 2.23.0 test-suite html-test type: exitcode-stdio-1.0 -- cgit v1.2.3 From be8b02c4e3cffe7d45b3dad0a0f071d35a274d65 Mon Sep 17 00:00:00 2001 From: Herbert Valerio Riedel Date: Sun, 8 Dec 2019 11:53:39 +0100 Subject: Fix build-failure regression for base < 4.7 The `$>` operator definition is available only since base-4.7 which unfortunately wasn't caught before release to Hackage (but has been fixed up by a metadata-revision) This commit introduces a `CompatPrelude` module which allows to reduce the amount of CPP by ousting it to a central location, i.e. the new `CompatPrelude` module. This pattern also tends to reduce the tricks needed to silence unused import warnings. Addresses #1119 --- haddock-library/CHANGES.md | 4 ++ haddock-library/haddock-library.cabal | 3 +- haddock-library/src/CompatPrelude.hs | 52 ++++++++++++++++++++++ .../src/Documentation/Haddock/Parser.hs | 1 - .../src/Documentation/Haddock/Parser/Identifier.hs | 28 +----------- .../src/Documentation/Haddock/Parser/Monad.hs | 3 +- 6 files changed, 61 insertions(+), 30 deletions(-) create mode 100644 haddock-library/src/CompatPrelude.hs (limited to 'haddock-library/src/Documentation/Haddock/Parser') diff --git a/haddock-library/CHANGES.md b/haddock-library/CHANGES.md index 265579ca..d112db45 100644 --- a/haddock-library/CHANGES.md +++ b/haddock-library/CHANGES.md @@ -1,3 +1,7 @@ +## Changes in version 1.8.0.1 + + * Fix build-time regression for `base < 4.7` (#1119) + ## Changes in version 1.8.0 * Support inline markup in markdown-style links (#875) diff --git a/haddock-library/haddock-library.cabal b/haddock-library/haddock-library.cabal index fe6aeede..7f91fd19 100644 --- a/haddock-library/haddock-library.cabal +++ b/haddock-library/haddock-library.cabal @@ -1,6 +1,6 @@ cabal-version: 2.2 name: haddock-library -version: 1.8.0 +version: 1.8.0.1 synopsis: Library exposing some functionality of Haddock. description: Haddock is a documentation-generation tool for Haskell @@ -49,6 +49,7 @@ library Documentation.Haddock.Types other-modules: + CompatPrelude Documentation.Haddock.Parser.Util Documentation.Haddock.Parser.Monad Documentation.Haddock.Parser.Identifier diff --git a/haddock-library/src/CompatPrelude.hs b/haddock-library/src/CompatPrelude.hs new file mode 100644 index 00000000..60fa94d9 --- /dev/null +++ b/haddock-library/src/CompatPrelude.hs @@ -0,0 +1,52 @@ +{-# LANGUAGE CPP #-} + +#if !MIN_VERSION_base(4,5,0) +# error This module doesn't provide compat-shims for versions prior to base-4.5 +#endif + +-- | Bridge impedance mismatch of different @base@ versions back till @base-4.5@ (GHC 7.4.2) +module CompatPrelude + ( ($>) + , isSymbolChar + ) where + +#if MIN_VERSION_base(4,7,0) +import Data.Functor ( ($>) ) +#else +import Data.Functor ( (<$) ) +#endif + +#if MIN_VERSION_base(4,9,0) +import Text.Read.Lex (isSymbolChar) +#else +import Data.Char (GeneralCategory(..), generalCategory) +#endif + + +#if !MIN_VERSION_base(4,7,0) +infixl 4 $> + +-- | Flipped version of '<$'. +-- +-- @since 4.7.0.0 +($>) :: Functor f => f a -> b -> f b +($>) = flip (<$) +#endif + +#if !MIN_VERSION_base(4,9,0) +-- inlined from base-4.10.0.0 +isSymbolChar :: Char -> Bool +isSymbolChar c = not (isPuncChar c) && case generalCategory c of + MathSymbol -> True + CurrencySymbol -> True + ModifierSymbol -> True + OtherSymbol -> True + DashPunctuation -> True + OtherPunctuation -> c `notElem` "'\"" + ConnectorPunctuation -> c /= '_' + _ -> False + where + -- | The @special@ character class as defined in the Haskell Report. + isPuncChar :: Char -> Bool + isPuncChar = (`elem` (",;()[]{}`" :: String)) +#endif diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs index 36c8bb5b..028422a7 100644 --- a/haddock-library/src/Documentation/Haddock/Parser.hs +++ b/haddock-library/src/Documentation/Haddock/Parser.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-} -- | diff --git a/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs b/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs index a83e5abf..b8afb951 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE CPP #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE ViewPatterns #-} -- | @@ -29,15 +28,8 @@ import qualified Data.Text as T import Data.Char (isAlpha, isAlphaNum) import Control.Monad (guard) -import Data.Functor (($>)) -#if MIN_VERSION_base(4,9,0) -import Text.Read.Lex (isSymbolChar) -#else -import Data.Char (GeneralCategory (..), - generalCategory) -#endif - import Data.Maybe +import CompatPrelude -- | Identifier string surrounded with namespace, opening, and closing quotes/backticks. data Identifier = Identifier !Namespace !Char String !Char @@ -57,24 +49,6 @@ parseValid = do in setParserState s' $> Identifier ns op (T.unpack ident) cl -#if !MIN_VERSION_base(4,9,0) --- inlined from base-4.10.0.0 -isSymbolChar :: Char -> Bool -isSymbolChar c = not (isPuncChar c) && case generalCategory c of - MathSymbol -> True - CurrencySymbol -> True - ModifierSymbol -> True - OtherSymbol -> True - DashPunctuation -> True - OtherPunctuation -> c `notElem` "'\"" - ConnectorPunctuation -> c /= '_' - _ -> False - where - -- | The @special@ character class as defined in the Haskell Report. - isPuncChar :: Char -> Bool - isPuncChar = (`elem` (",;()[]{}`" :: String)) -#endif - -- | Try to parse a delimited identifier off the front of the given input. -- -- This tries to match as many valid Haskell identifiers/operators as possible, diff --git a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs index fa46f536..7c73a168 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs @@ -29,7 +29,6 @@ import qualified Data.Text as T import Data.Text ( Text ) import Control.Monad ( mfilter ) -import Data.Functor ( ($>) ) import Data.String ( IsString(..) ) import Data.Bits ( Bits(..) ) import Data.Char ( ord ) @@ -37,7 +36,9 @@ import Data.List ( foldl' ) import Control.Applicative as App import Documentation.Haddock.Types ( Version ) + import Prelude hiding (takeWhile) +import CompatPrelude -- | The only bit of information we really care about truding along with us -- through parsing is the version attached to a @\@since@ annotation - if -- cgit v1.2.3 From b0514cc5d53bb37424177d2ba986216a914f8b1c Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Fri, 27 Mar 2020 20:16:51 -0400 Subject: Fix crash in `haddock-library` on unicode space Our quickcheck tests for `haddock-library` stumbled across an edge case input that was causing Haddock to crash: it was a unicode space character. The root cause of the crash is that we were implicitly assuming that if a space character was not " \t\f\v\r", it would have to be "\n". We fix this by instead defining horizontal space as: any space character that is not '\n'. Fixes #1142 --- haddock-library/src/Documentation/Haddock/Parser/Util.hs | 14 +++++++------- haddock-library/test/Documentation/Haddock/ParserSpec.hs | 8 +++++++- 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'haddock-library/src/Documentation/Haddock/Parser') diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs index 98570c22..eef744d8 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Util.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs @@ -31,16 +31,16 @@ import Prelude hiding (takeWhile) import Data.Char (isSpace) -- | Characters that count as horizontal space -horizontalSpace :: [Char] -horizontalSpace = " \t\f\v\r" +horizontalSpace :: Char -> Bool +horizontalSpace c = isSpace c && c /= '\n' -- | Skip and ignore leading horizontal space skipHorizontalSpace :: Parser () -skipHorizontalSpace = Parsec.skipMany (Parsec.oneOf horizontalSpace) +skipHorizontalSpace = Parsec.skipMany (Parsec.satisfy horizontalSpace) -- | Take leading horizontal space -takeHorizontalSpace :: Parser Text -takeHorizontalSpace = takeWhile (`elem` horizontalSpace) +takeHorizontalSpace :: Parser Text +takeHorizontalSpace = takeWhile horizontalSpace makeLabeled :: (String -> Maybe String -> a) -> Text -> a makeLabeled f input = case T.break isSpace $ removeEscapes $ T.strip input of @@ -60,10 +60,10 @@ removeEscapes = T.unfoldr go -- | Consume characters from the input up to and including the given pattern. -- Return everything consumed except for the end pattern itself. -takeUntil :: Text -> Parser Text +takeUntil :: Text -> Parser Text takeUntil end_ = T.dropEnd (T.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome where - end = T.unpack end_ + end = T.unpack end_ p :: (Bool, String) -> Char -> Maybe (Bool, String) p acc c = case acc of diff --git a/haddock-library/test/Documentation/Haddock/ParserSpec.hs b/haddock-library/test/Documentation/Haddock/ParserSpec.hs index bc40a0a2..8b59b560 100644 --- a/haddock-library/test/Documentation/Haddock/ParserSpec.hs +++ b/haddock-library/test/Documentation/Haddock/ParserSpec.hs @@ -3,6 +3,7 @@ module Documentation.Haddock.ParserSpec (main, spec) where +import Data.Char (isSpace) import Data.String import qualified Documentation.Haddock.Parser as Parse import Documentation.Haddock.Types @@ -442,6 +443,10 @@ spec = do property $ \xs -> (length . show . parseParas) xs `shouldSatisfy` (> 0) + -- See + it "doesn't crash on unicode whitespace" $ do + "\8197" `shouldParseTo` DocEmpty + context "when parsing @since" $ do it "adds specified version to the result" $ do parseParas "@since 0.5.0" `shouldBe` @@ -470,7 +475,8 @@ spec = do context "when parsing text paragraphs" $ do - let filterSpecial = filter (`notElem` (".(=#-[*`\v\f\n\t\r\\\"'_/@<> " :: String)) + let isSpecial c = isSpace c || c `elem` (".(=#-[*`\\\"'_/@<>" :: String) + filterSpecial = filter (not . isSpecial) it "parses an empty paragraph" $ do "" `shouldParseTo` DocEmpty -- cgit v1.2.3