From c836dd4cb47d457b066b51b61a08f583a8c4466e Mon Sep 17 00:00:00 2001 From: Alex Biehl Date: Sat, 13 May 2017 12:48:10 +0200 Subject: Consequently use inClass and notInClass in haddock-library (#617) These allow attoparsec to do some clever lookup optimization --- haddock-library/src/Documentation/Haddock/Parser.hs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'haddock-library/src/Documentation/Haddock/Parser.hs') diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs index 123f5612..ddea2b9b 100644 --- a/haddock-library/src/Documentation/Haddock/Parser.hs +++ b/haddock-library/src/Documentation/Haddock/Parser.hs @@ -143,7 +143,7 @@ specialChar = "_/<@\"&'`# " -- to ensure that we have already given a chance to more meaningful parsers -- before capturing their characers. string' :: Parser (DocH mod a) -string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar) +string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (notInClass specialChar) where unescape "" = "" unescape ('\\':x:xs) = x : unescape xs @@ -153,7 +153,7 @@ string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialCh -- This is done to skip over any special characters belonging to other -- elements but which were not deemed meaningful at their positions. skipSpecialChar :: Parser (DocH mod a) -skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar) +skipSpecialChar = DocString . return <$> satisfy (inClass specialChar) -- | Emphasis parser. -- @@ -215,7 +215,7 @@ moduleName = DocModule <$> (char '"' *> modid <* char '"') -- accept {small | large | digit | ' } here. But as we can't -- match on unicode characters, this is currently not possible. -- Note that we allow ‘#’ to suport anchors. - <*> (decodeUtf8 <$> takeWhile (`notElem` (" .&[{}(=*)+]!|@/;,^?\"\n"::String))) + <*> (decodeUtf8 <$> takeWhile (notInClass " .&[{}(=*)+]!|@/;,^?\"\n")) -- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify -- a title for the picture. @@ -338,7 +338,7 @@ definitionList :: BS.ByteString -> Parser (DocH mod Identifier) definitionList indent = DocDefList <$> p where p = do - label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` ("]\n" :: String))) <* ("]" <* optional ":") + label <- "[" *> (parseStringBS <$> takeWhile1 (notInClass "]\n")) <* ("]" <* optional ":") c <- takeLine (cs, items) <- more indent p let contents = parseString . dropNLs . unlines $ c : cs @@ -561,7 +561,7 @@ autoUrl = mkLink <$> url url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace) mkLink :: BS.ByteString -> DocH mod a mkLink s = case unsnoc s of - Just (xs, x) | x `elem` (",.!?" :: String) -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) `docAppend` DocString [x] + Just (xs, x) | inClass ",.!?" x -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) `docAppend` DocString [x] _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing) -- | Parses strings between identifier delimiters. Consumes all input that it @@ -570,8 +570,13 @@ autoUrl = mkLink <$> url parseValid :: Parser String parseValid = p some where - idChar = satisfy (`elem` ("_.!#$%&*+/<=>?@\\|-~:^"::String)) - <|> digit <|> letter_ascii + idChar = + satisfy (\c -> isAlpha_ascii c + || isDigit c + -- N.B. '-' is placed first otherwise attoparsec thinks + -- it belongs to a character class + || inClass "-_.!#$%&*+/<=>?@\\|~:^" c) + p p' = do vs' <- p' $ utf8String "⋆" <|> return <$> idChar let vs = concat vs' @@ -594,4 +599,4 @@ identifier = do e <- idDelim return $ DocIdentifier (o, vid, e) where - idDelim = char '\'' <|> char '`' + idDelim = satisfy (\c -> c == '\'' || c == '`') -- cgit v1.2.3 From 1ca3ff62bc3be0d9ad03eb7f531197c69182d3a0 Mon Sep 17 00:00:00 2001 From: Alexander Biehl Date: Mon, 31 Jul 2017 20:35:49 +0200 Subject: Fixup haddock --- haddock-library/src/Documentation/Haddock/Parser.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'haddock-library/src/Documentation/Haddock/Parser.hs') diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs index ddea2b9b..8dc2a801 100644 --- a/haddock-library/src/Documentation/Haddock/Parser.hs +++ b/haddock-library/src/Documentation/Haddock/Parser.hs @@ -14,7 +14,7 @@ -- library, the most commonly used combination of functions is going -- to be -- --- @'toRegular' . 'parseParas'@ +-- @'toRegular' . '_doc' . 'parseParas'@ module Documentation.Haddock.Parser ( parseString, parseParas , overIdentifier, toRegular, Identifier ) where -- cgit v1.2.3