diff options
| author | Alec Theriault <alec.theriault@gmail.com> | 2018-08-29 04:52:42 -0700 | 
|---|---|---|
| committer | Alec Theriault <alec.theriault@gmail.com> | 2018-11-05 15:54:22 -0800 | 
| commit | 82b8f491e18d707f67857bcb170b2147fa275ccc (patch) | |
| tree | bea7fedeab9a8e6bfd5e811369f1b76781621309 /haddock-library | |
| parent | d9c87e47afda04fd2bda380b81afca8e337a517c (diff) | |
Faster 'Text' driven parser combinators
Perf only change:
  * use 'getParserState'/'setParserState' to make 'Text'-optimized
    parser combinators
  * minimize uses of 'Data.Text.{pack,unpack,cons,snoc}'
Diffstat (limited to 'haddock-library')
3 files changed, 83 insertions, 35 deletions
| diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs index d79da40b..46b7ad3e 100644 --- a/haddock-library/src/Documentation/Haddock/Parser.hs +++ b/haddock-library/src/Documentation/Haddock/Parser.hs @@ -360,32 +360,34 @@ table = do      parseFirstRow :: Parser Text      parseFirstRow = do          skipHorizontalSpace -        -- upper-left corner is + -        c <- Parsec.char '+' -        cs <- some (Parsec.char '-' <|> Parsec.char '+') +        cs <- takeWhile (\c -> c == '-' || c == '+') -        -- upper right corner is + too -        guard (last cs == '+') +        -- upper-left and upper-right corners are `+` +        guard (T.length cs >= 2 && +               T.head cs == '+' && +               T.last cs == '+')          -- trailing space          skipHorizontalSpace          _ <- Parsec.newline -        return (T.cons c $ T.pack cs) +        return cs      parseRestRows :: Int -> Parser Text      parseRestRows l = do          skipHorizontalSpace +        bs <- scan predicate l -        c <- Parsec.char '|' <|> Parsec.char '+' -        bs <- scan predicate (l - 2) -        c2 <- Parsec.char '|' <|> Parsec.char '+' +        -- Left and right edges are `|` or `+` +        guard (T.length bs >= 2 && +               (T.head bs == '|' || T.head bs == '+') && +               (T.last bs == '|' || T.last bs == '+'))          -- trailing space          skipHorizontalSpace          _ <- Parsec.newline -        return (T.cons c (T.snoc bs c2)) +        return bs        where          predicate n c              | n <= 0    = Nothing @@ -662,7 +664,7 @@ nonSpace xs  --  Doesn't discard the trailing newline.  takeNonEmptyLine :: Parser Text  takeNonEmptyLine = do -    l <- takeWhile1 (Parsec.noneOf "\n") >>= nonSpace +    l <- takeWhile1 (/= '\n') >>= nonSpace      _ <- "\n"      pure (l <> "\n") @@ -732,7 +734,7 @@ nonEmptyLine :: Parser Text  nonEmptyLine = try (mfilter (T.any (not . isSpace)) takeLine)  takeLine :: Parser Text -takeLine = try (takeWhile (Parsec.noneOf "\n") <* endOfLine) +takeLine = try (takeWhile (/= '\n') <* endOfLine)  endOfLine :: Parser ()  endOfLine = void "\n" <|> Parsec.eof  @@ -742,7 +744,7 @@ endOfLine = void "\n" <|> Parsec.eof  -- >>> snd <$> parseOnly property "prop> hello world"  -- Right (DocProperty "hello world")  property :: Parser (DocH mod a) -property = DocProperty . T.unpack . T.strip <$> ("prop>" *> takeWhile1 (Parsec.noneOf "\n")) +property = DocProperty . T.unpack . T.strip <$> ("prop>" *> takeWhile1 (/= '\n'))  -- |  -- Paragraph level codeblock. Anything between the two delimiting \@ is parsed @@ -816,7 +818,7 @@ linkParser = flip Hyperlink <$> label <*> (whitespace *> url)  autoUrl :: Parser (DocH mod a)  autoUrl = mkLink <$> url    where -    url = mappend <$> choice' [ "http://", "https://", "ftp://"] <*> takeWhile1 (Parsec.satisfy (not . isSpace)) +    url = mappend <$> choice' [ "http://", "https://", "ftp://"] <*> takeWhile1 (not . isSpace)      mkLink :: Text -> DocH mod a      mkLink s = case T.unsnoc s of diff --git a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs index a5664aa8..8f5bd217 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs @@ -9,10 +9,15 @@ module Documentation.Haddock.Parser.Monad where  import qualified Text.Parsec.Char as Parsec  import qualified Text.Parsec as Parsec +import           Text.Parsec.Pos             ( updatePosChar ) +import           Text.Parsec                 ( State(..) +                                             , getParserState, setParserState )  import qualified Data.Text as T  import           Data.Text                   ( Text ) +import           Control.Monad               ( mfilter ) +import           Data.Functor                ( ($>) )  import           Data.String                 ( IsString(..) )  import           Data.Bits                   ( Bits(..) )  import           Data.Char                   ( ord ) @@ -20,7 +25,11 @@ import           Data.List                   ( foldl' )  import           Control.Applicative as App  import           Documentation.Haddock.Types ( Version ) +import           Prelude hiding (takeWhile) +-- | The only bit of information we really care about truding along with us +-- through parsing is the version attached to a @\@since@ annotation - if +-- the doc even contained one.  newtype ParserState = ParserState {    parserStateSince :: Maybe Version  } deriving (Eq, Show) @@ -29,7 +38,7 @@ initialParserState :: ParserState  initialParserState = ParserState Nothing  setSince :: Version -> Parser () -setSince since = Parsec.modifyState (\st -> st {parserStateSince = Just since}) +setSince since = Parsec.modifyState (\st -> st{ parserStateSince = Just since })  type Parser = Parsec.Parsec Text ParserState @@ -44,38 +53,75 @@ parseOnly p t = case Parsec.runParser p' initialParserState "<haddock>" t of  -- | Always succeeds, but returns 'Nothing' if at the end of input. Does not  -- consume input. +-- +-- Equivalent to @Parsec.optionMaybe . Parsec.lookAhead $ Parsec.anyChar@, but +-- more efficient.  peekChar :: Parser (Maybe Char) -peekChar = Parsec.optionMaybe . Parsec.try . Parsec.lookAhead $ Parsec.anyChar +peekChar = headOpt . stateInput <$> getParserState +  where headOpt t | T.null t = Nothing +                  | otherwise = Just (T.head t) +{-# INLINE peekChar #-}  -- | Fails if at the end of input. Does not consume input. +-- +-- Equivalent to @Parsec.lookAhead Parsec.anyChar@, but more efficient.  peekChar' :: Parser Char -peekChar' = Parsec.lookAhead Parsec.anyChar  +peekChar' = headFail . stateInput =<< getParserState +  where headFail t | T.null t = Parsec.parserFail "peekChar': reached EOF" +                   | otherwise = App.pure (T.head t) +{-# INLINE peekChar' #-}  -- | Parses the given string. Returns the parsed string. +-- +-- Equivalent to @Parsec.string (T.unpack t) $> t@, but more efficient.  string :: Text -> Parser Text -string t = Parsec.string (T.unpack t) *> App.pure t +string t = do +  s@State{ stateInput = inp, statePos = pos } <- getParserState +  case T.stripPrefix t inp of +    Nothing -> Parsec.parserFail "string: Failed to match the input string" +    Just inp' -> +      let pos' = T.foldl updatePosChar pos t +          s' = s{ stateInput = inp', statePos = pos' } +      in setParserState s' $> t + +-- | Keep matching characters as long as the predicate function holds (and +-- return them). +-- +-- Equivalent to @fmap T.pack . Parsec.many@, but more efficient. +takeWhile :: (Char -> Bool) -> Parser Text +takeWhile f = do +  s@State{ stateInput = inp, statePos = pos } <- getParserState +  let (t, inp') = T.span f inp +      pos' = T.foldl updatePosChar pos t +      s' = s{ stateInput = inp', statePos = pos' } +  setParserState s' $> t + + +-- | Like 'takeWhile', but fails if no characters matched. +-- +-- Equivalent to @fmap T.pack . Parsec.many1@, but more efficient. +takeWhile1 :: (Char -> Bool) -> Parser Text +takeWhile1 = mfilter (not . T.null) . takeWhile  -- | Scan the input text, accumulating characters as long as the scanning  -- function returns true.  scan :: (s -> Char -> Maybe s) -- ^ scan function       -> s                      -- ^ initial state       -> Parser Text  -scan f = fmap T.pack . go -  where go s1 = do { cOpt <- peekChar -                   ; case cOpt >>= f s1 of -                       Nothing -> pure "" -                       Just s2 -> (:) <$> Parsec.anyChar <*> go s2 -                   } - --- | Apply a parser for a character zero or more times and collect the result in --- a string. -takeWhile :: Parser Char -> Parser Text -takeWhile = fmap T.pack . Parsec.many - --- | Apply a parser for a character one or more times and collect the result in --- a string. -takeWhile1 :: Parser Char -> Parser Text -takeWhile1 =  fmap T.pack . Parsec.many1 +scan f st = do +  s@State{ stateInput = inp, statePos = pos } <- getParserState +  go inp st pos 0 $ \inp' pos' n -> +    let s' = s{ Parsec.stateInput = inp', Parsec.statePos = pos' } +    in setParserState s' $> T.take n inp +  where +    go inp s !pos !n cont +      = case T.uncons inp of +          Nothing -> cont inp pos n        -- ran out of input +          Just (c, inp') -> +             case f s c of +               Nothing -> cont inp pos n   -- scan function failed +               Just s' -> go inp' s' (updatePosChar pos c) (n+1) cont +  -- | Parse a decimal number.  decimal :: Integral a => Parser a diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs index ffa91b09..98570c22 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Util.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs @@ -40,7 +40,7 @@ skipHorizontalSpace = Parsec.skipMany (Parsec.oneOf horizontalSpace)  -- | Take leading horizontal space  takeHorizontalSpace :: Parser Text  -takeHorizontalSpace = takeWhile (Parsec.oneOf horizontalSpace) +takeHorizontalSpace = takeWhile (`elem` horizontalSpace)  makeLabeled :: (String -> Maybe String -> a) -> Text -> a  makeLabeled f input = case T.break isSpace $ removeEscapes $ T.strip input of | 
