Consequently use inClass and notInClass in haddock-library (#617)

These allow attoparsec to do some clever lookup optimization
author: Alex Biehl <alexbiehl@gmail.com> 2017-05-13 12:48:10 +0200
committer: GitHub <noreply@github.com> 2017-05-13 12:48:10 +0200
commit: c836dd4cb47d457b066b51b61a08f583a8c4466e (patch)
tree: 90326b235fc19cadddf53ebfe2cef46cde94a96c /haddock-library/src/Documentation/Haddock
parent: b35eed2a9f1c82131f51f55c771ac2372127520d (diff)
2 files changed, 19 insertions, 11 deletions
diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs
index 123f5612..ddea2b9b 100644
--- a/haddock-library/src/Documentation/Haddock/Parser.hs
+++ b/haddock-library/src/Documentation/Haddock/Parser.hs
@@ -143,7 +143,7 @@ specialChar = "_/<@\"&'`# "
 -- to ensure that we have already given a chance to more meaningful parsers
 -- before capturing their characers.
 string' :: Parser (DocH mod a)
-string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar)
+string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (notInClass specialChar)
   where
     unescape "" = ""
     unescape ('\\':x:xs) = x : unescape xs
@@ -153,7 +153,7 @@ string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialCh
 -- This is done to skip over any special characters belonging to other
 -- elements but which were not deemed meaningful at their positions.
 skipSpecialChar :: Parser (DocH mod a)
-skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar)
+skipSpecialChar = DocString . return <$> satisfy (inClass specialChar)
 
 -- | Emphasis parser.
 --
@@ -215,7 +215,7 @@ moduleName = DocModule <$> (char '"' *> modid <* char '"')
       -- accept {small | large | digit | ' } here.  But as we can't
       -- match on unicode characters, this is currently not possible.
       -- Note that we allow ‘#’ to suport anchors.
-      <*> (decodeUtf8 <$> takeWhile (`notElem` (" .&[{}(=*)+]!|@/;,^?\"\n"::String)))
+      <*> (decodeUtf8 <$> takeWhile (notInClass " .&[{}(=*)+]!|@/;,^?\"\n"))
 
 -- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify
 -- a title for the picture.
@@ -338,7 +338,7 @@ definitionList :: BS.ByteString -> Parser (DocH mod Identifier)
 definitionList indent = DocDefList <$> p
   where
     p = do
-      label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` ("]\n" :: String))) <* ("]" <* optional ":")
+      label <- "[" *> (parseStringBS <$> takeWhile1 (notInClass "]\n")) <* ("]" <* optional ":")
       c <- takeLine
       (cs, items) <- more indent p
       let contents = parseString . dropNLs . unlines $ c : cs
@@ -561,7 +561,7 @@ autoUrl = mkLink <$> url
     url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
     mkLink :: BS.ByteString -> DocH mod a
     mkLink s = case unsnoc s of
-      Just (xs, x) | x `elem` (",.!?" :: String) -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) `docAppend` DocString [x]
+      Just (xs, x) | inClass ",.!?" x -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) `docAppend` DocString [x]
       _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
 
 -- | Parses strings between identifier delimiters. Consumes all input that it
@@ -570,8 +570,13 @@ autoUrl = mkLink <$> url
 parseValid :: Parser String
 parseValid = p some
   where
-    idChar = satisfy (`elem` ("_.!#$%&*+/<=>?@\\|-~:^"::String))
-             <|> digit <|> letter_ascii
+    idChar =
+      satisfy (\c -> isAlpha_ascii c
+                     || isDigit c
+                     -- N.B. '-' is placed first otherwise attoparsec thinks
+                     -- it belongs to a character class
+                     || inClass "-_.!#$%&*+/<=>?@\\|~:^" c)
+
     p p' = do
       vs' <- p' $ utf8String "⋆" <|> return <$> idChar
       let vs = concat vs'
@@ -594,4 +599,4 @@ identifier = do
   e <- idDelim
   return $ DocIdentifier (o, vid, e)
   where
-    idDelim = char '\'' <|> char '`'
+    idDelim = satisfy (\c -> c == '\'' || c == '`')
diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs
index d908ce18..ab5e5e9e 100644
--- a/haddock-library/src/Documentation/Haddock/Parser/Util.hs
+++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs
@@ -22,7 +22,7 @@ module Documentation.Haddock.Parser.Util (
 
 import           Control.Applicative
 import           Control.Monad (mfilter)
-import           Documentation.Haddock.Parser.Monad
+import           Documentation.Haddock.Parser.Monad hiding (isHorizontalSpace)
 import           Data.ByteString.Char8 (ByteString)
 import qualified Data.ByteString.Char8 as BS
 import           Prelude hiding (takeWhile)
@@ -40,11 +40,14 @@ unsnoc bs
 strip :: String -> String
 strip = (\f -> f . f) $ dropWhile isSpace . reverse
 
+isHorizontalSpace :: Char -> Bool
+isHorizontalSpace = inClass " \t\f\v\r"
+
 skipHorizontalSpace :: Parser ()
-skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r")
+skipHorizontalSpace = skipWhile isHorizontalSpace
 
 takeHorizontalSpace :: Parser BS.ByteString
-takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r")
+takeHorizontalSpace = takeWhile isHorizontalSpace
 
 makeLabeled :: (String -> Maybe String -> a) -> String -> a
 makeLabeled f input = case break isSpace $ removeEscapes $ strip input of
author	Alex Biehl <alexbiehl@gmail.com>	2017-05-13 12:48:10 +0200
committer	GitHub <noreply@github.com>	2017-05-13 12:48:10 +0200
commit	c836dd4cb47d457b066b51b61a08f583a8c4466e (patch)
tree	90326b235fc19cadddf53ebfe2cef46cde94a96c /haddock-library/src/Documentation/Haddock
parent	b35eed2a9f1c82131f51f55c771ac2372127520d (diff)