From 79c7159101c03bbbc7350e07963896ca2bb97c02 Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Wed, 25 Apr 2018 11:24:07 -0700 Subject: Replace 'attoparsec' with 'parsec' (#799) * Remove attoparsec with parsec and start fixing failed parses * Make tests pass * Fix encoding issues The Haddock parser no longer needs to worry about bytestrings. All the internal parsing work in haddock-library happens over 'Text'. * Remove attoparsec vendor * Fix stuff broken in 'attoparsec' -> 'parsec' * hyperlinks * codeblocks * examples Pretty much all issues are due to attoparsec's backtracking failure behaviour vs. parsec's non-backtracking failure behaviour. * Fix small TODOs * Missing quote + Haddocks * Better handle spaces before/after paragraphs * Address review comments --- .../src/Documentation/Haddock/Parser/Util.hs | 82 ++++++++++------------ 1 file changed, 39 insertions(+), 43 deletions(-) (limited to 'haddock-library/src/Documentation/Haddock/Parser/Util.hs') diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs index ab5e5e9e..ffa91b09 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Util.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} -- | -- Module : Documentation.Haddock.Parser.Util -- Copyright : (c) Mateusz Kowalczyk 2013-2014, @@ -11,62 +11,59 @@ -- -- Various utility functions used by the parser. module Documentation.Haddock.Parser.Util ( - unsnoc -, strip -, takeUntil -, removeEscapes -, makeLabeled -, takeHorizontalSpace -, skipHorizontalSpace + takeUntil, + removeEscapes, + makeLabeled, + takeHorizontalSpace, + skipHorizontalSpace, ) where +import qualified Text.Parsec as Parsec + +import qualified Data.Text as T +import Data.Text (Text) + import Control.Applicative import Control.Monad (mfilter) -import Documentation.Haddock.Parser.Monad hiding (isHorizontalSpace) -import Data.ByteString.Char8 (ByteString) -import qualified Data.ByteString.Char8 as BS +import Documentation.Haddock.Parser.Monad import Prelude hiding (takeWhile) -#if MIN_VERSION_bytestring(0,10,2) -import Data.ByteString.Char8 (unsnoc) -#else -unsnoc :: ByteString -> Maybe (ByteString, Char) -unsnoc bs - | BS.null bs = Nothing - | otherwise = Just (BS.init bs, BS.last bs) -#endif +import Data.Char (isSpace) --- | Remove all leading and trailing whitespace -strip :: String -> String -strip = (\f -> f . f) $ dropWhile isSpace . reverse - -isHorizontalSpace :: Char -> Bool -isHorizontalSpace = inClass " \t\f\v\r" +-- | Characters that count as horizontal space +horizontalSpace :: [Char] +horizontalSpace = " \t\f\v\r" +-- | Skip and ignore leading horizontal space skipHorizontalSpace :: Parser () -skipHorizontalSpace = skipWhile isHorizontalSpace +skipHorizontalSpace = Parsec.skipMany (Parsec.oneOf horizontalSpace) -takeHorizontalSpace :: Parser BS.ByteString -takeHorizontalSpace = takeWhile isHorizontalSpace +-- | Take leading horizontal space +takeHorizontalSpace :: Parser Text +takeHorizontalSpace = takeWhile (Parsec.oneOf horizontalSpace) -makeLabeled :: (String -> Maybe String -> a) -> String -> a -makeLabeled f input = case break isSpace $ removeEscapes $ strip input of - (uri, "") -> f uri Nothing - (uri, label) -> f uri (Just $ dropWhile isSpace label) +makeLabeled :: (String -> Maybe String -> a) -> Text -> a +makeLabeled f input = case T.break isSpace $ removeEscapes $ T.strip input of + (uri, "") -> f (T.unpack uri) Nothing + (uri, label) -> f (T.unpack uri) (Just . T.unpack $ T.stripStart label) -- | Remove escapes from given string. -- -- Only do this if you do not process (read: parse) the input any further. -removeEscapes :: String -> String -removeEscapes "" = "" -removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs -removeEscapes ('\\':xs) = removeEscapes xs -removeEscapes (x:xs) = x : removeEscapes xs +removeEscapes :: Text -> Text +removeEscapes = T.unfoldr go + where + go :: Text -> Maybe (Char, Text) + go xs = case T.uncons xs of + Just ('\\',ys) -> T.uncons ys + unconsed -> unconsed -takeUntil :: ByteString -> Parser ByteString -takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome +-- | Consume characters from the input up to and including the given pattern. +-- Return everything consumed except for the end pattern itself. +takeUntil :: Text -> Parser Text +takeUntil end_ = T.dropEnd (T.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome where - end = BS.unpack end_ + end = T.unpack end_ p :: (Bool, String) -> Char -> Maybe (Bool, String) p acc c = case acc of @@ -75,9 +72,8 @@ takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome (_, x:xs) | x == c -> Just (False, xs) _ -> Just (c == '\\', end) - dropEnd = BS.reverse . BS.drop (length end) . BS.reverse - requireEnd = mfilter (BS.isSuffixOf end_) + requireEnd = mfilter (T.isSuffixOf end_) gotSome xs - | BS.null xs = fail "didn't get any content" + | T.null xs = fail "didn't get any content" | otherwise = return xs -- cgit v1.2.3