aboutsummaryrefslogtreecommitdiff
path: root/haddock-library/src/Documentation/Haddock/Parser
diff options
context:
space:
mode:
Diffstat (limited to 'haddock-library/src/Documentation/Haddock/Parser')
-rw-r--r--haddock-library/src/Documentation/Haddock/Parser/Monad.hs202
-rw-r--r--haddock-library/src/Documentation/Haddock/Parser/Util.hs82
2 files changed, 111 insertions, 173 deletions
diff --git a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs
index 3f7d60f8..585c76bb 100644
--- a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs
+++ b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs
@@ -1,149 +1,91 @@
-{-# LANGUAGE GeneralizedNewtypeDeriving, TypeFamilies #-}
-module Documentation.Haddock.Parser.Monad (
- module Documentation.Haddock.Parser.Monad
-, Attoparsec.isDigit
-, Attoparsec.isDigit_w8
-, Attoparsec.isAlpha_iso8859_15
-, Attoparsec.isAlpha_ascii
-, Attoparsec.isSpace
-, Attoparsec.isSpace_w8
-, Attoparsec.inClass
-, Attoparsec.notInClass
-, Attoparsec.isEndOfLine
-, Attoparsec.isHorizontalSpace
-, Attoparsec.choice
-, Attoparsec.count
-, Attoparsec.option
-, Attoparsec.many'
-, Attoparsec.many1
-, Attoparsec.many1'
-, Attoparsec.manyTill
-, Attoparsec.manyTill'
-, Attoparsec.sepBy
-, Attoparsec.sepBy'
-, Attoparsec.sepBy1
-, Attoparsec.sepBy1'
-, Attoparsec.skipMany
-, Attoparsec.skipMany1
-, Attoparsec.eitherP
-) where
-
-import Control.Applicative
-import Control.Monad
-import Data.String
-import Data.ByteString (ByteString)
-import qualified Data.ByteString.Lazy as LB
-import qualified Data.Attoparsec.ByteString.Char8 as Attoparsec
-import Control.Monad.Trans.State
-import qualified Control.Monad.Trans.Class as Trans
-import Data.Word
-import Data.Bits
-import Data.Tuple
-
-import Documentation.Haddock.Types (Version)
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE TypeSynonymInstances #-}
-newtype ParserState = ParserState {
- parserStateSince :: Maybe Version
-} deriving (Eq, Show)
+module Documentation.Haddock.Parser.Monad where
-initialParserState :: ParserState
-initialParserState = ParserState Nothing
+import qualified Text.Parsec.Char as Parsec
+import qualified Text.Parsec as Parsec
-newtype Parser a = Parser (StateT ParserState Attoparsec.Parser a)
- deriving (Functor, Applicative, Alternative, Monad, MonadPlus)
+import qualified Data.Text as T
+import Data.Text ( Text )
-instance (a ~ ByteString) => IsString (Parser a) where
- fromString = lift . fromString
+import Data.String ( IsString(..) )
+import Data.Bits ( Bits(..) )
+import Data.Char ( ord )
+import Data.List ( foldl' )
-parseOnly :: Parser a -> ByteString -> Either String (ParserState, a)
-parseOnly (Parser p) = fmap swap . Attoparsec.parseOnly (runStateT p initialParserState)
+import Documentation.Haddock.Types ( Version )
-lift :: Attoparsec.Parser a -> Parser a
-lift = Parser . Trans.lift
+newtype ParserState = ParserState {
+ parserStateSince :: Maybe Version
+} deriving (Eq, Show)
-setParserState :: ParserState -> Parser ()
-setParserState = Parser . put
+initialParserState :: ParserState
+initialParserState = ParserState Nothing
setSince :: Version -> Parser ()
-setSince since = Parser $ modify (\st -> st {parserStateSince = Just since})
-
-char :: Char -> Parser Char
-char = lift . Attoparsec.char
-
-char8 :: Char -> Parser Word8
-char8 = lift . Attoparsec.char8
+setSince since = Parsec.modifyState (\st -> st {parserStateSince = Just since})
-anyChar :: Parser Char
-anyChar = lift Attoparsec.anyChar
+type Parser = Parsec.Parsec Text ParserState
-notChar :: Char -> Parser Char
-notChar = lift . Attoparsec.notChar
+instance (a ~ Text) => IsString (Parser a) where
+ fromString = fmap T.pack . Parsec.string
-satisfy :: (Char -> Bool) -> Parser Char
-satisfy = lift . Attoparsec.satisfy
+parseOnly :: Parser a -> Text -> Either String (ParserState, a)
+parseOnly p t = case Parsec.runParser p' initialParserState "<haddock>" t of
+ Left e -> Left (show e)
+ Right (x,s) -> Right (s,x)
+ where p' = (,) <$> p <*> Parsec.getState
+-- | Always succeeds, but returns 'Nothing' if at the end of input. Does not
+-- consume input.
peekChar :: Parser (Maybe Char)
-peekChar = lift Attoparsec.peekChar
+peekChar = Parsec.optionMaybe . Parsec.try . Parsec.lookAhead $ Parsec.anyChar
+-- | Fails if at the end of input. Does not consume input.
peekChar' :: Parser Char
-peekChar' = lift Attoparsec.peekChar'
-
-digit :: Parser Char
-digit = lift Attoparsec.digit
-
-letter_iso8859_15 :: Parser Char
-letter_iso8859_15 = lift Attoparsec.letter_iso8859_15
-
-letter_ascii :: Parser Char
-letter_ascii = lift Attoparsec.letter_ascii
-
-space :: Parser Char
-space = lift Attoparsec.space
-
-string :: ByteString -> Parser ByteString
-string = lift . Attoparsec.string
-
-stringCI :: ByteString -> Parser ByteString
-stringCI = lift . Attoparsec.stringCI
-
-skipSpace :: Parser ()
-skipSpace = lift Attoparsec.skipSpace
-
-skipWhile :: (Char -> Bool) -> Parser ()
-skipWhile = lift . Attoparsec.skipWhile
-
-take :: Int -> Parser ByteString
-take = lift . Attoparsec.take
-
-scan :: s -> (s -> Char -> Maybe s) -> Parser ByteString
-scan s = lift . Attoparsec.scan s
-
-takeWhile :: (Char -> Bool) -> Parser ByteString
-takeWhile = lift . Attoparsec.takeWhile
-
-takeWhile1 :: (Char -> Bool) -> Parser ByteString
-takeWhile1 = lift . Attoparsec.takeWhile1
-
-takeTill :: (Char -> Bool) -> Parser ByteString
-takeTill = lift . Attoparsec.takeTill
-
-takeByteString :: Parser ByteString
-takeByteString = lift Attoparsec.takeByteString
-
-takeLazyByteString :: Parser LB.ByteString
-takeLazyByteString = lift Attoparsec.takeLazyByteString
-
-endOfLine :: Parser ()
-endOfLine = lift Attoparsec.endOfLine
-
+peekChar' = Parsec.lookAhead Parsec.anyChar
+
+-- | Parses the given string. Returns the parsed string.
+string :: Text -> Parser Text
+string t = Parsec.string (T.unpack t) *> pure t
+
+-- | Scan the input text, accumulating characters as long as the scanning
+-- function returns true.
+scan :: (s -> Char -> Maybe s) -- ^ scan function
+ -> s -- ^ initial state
+ -> Parser Text
+scan f = fmap T.pack . go
+ where go s1 = do { cOpt <- peekChar
+ ; case cOpt >>= f s1 of
+ Nothing -> pure ""
+ Just s2 -> (:) <$> Parsec.anyChar <*> go s2
+ }
+
+-- | Apply a parser for a character zero or more times and collect the result in
+-- a string.
+takeWhile :: Parser Char -> Parser Text
+takeWhile = fmap T.pack . Parsec.many
+
+-- | Apply a parser for a character one or more times and collect the result in
+-- a string.
+takeWhile1 :: Parser Char -> Parser Text
+takeWhile1 = fmap T.pack . Parsec.many1
+
+-- | Parse a decimal number.
decimal :: Integral a => Parser a
-decimal = lift Attoparsec.decimal
+decimal = foldl' step 0 `fmap` Parsec.many1 Parsec.digit
+ where step a c = a * 10 + fromIntegral (ord c - 48)
+-- | Parse a hexadecimal number.
hexadecimal :: (Integral a, Bits a) => Parser a
-hexadecimal = lift Attoparsec.hexadecimal
-
-endOfInput :: Parser ()
-endOfInput = lift Attoparsec.endOfInput
-
-atEnd :: Parser Bool
-atEnd = lift Attoparsec.atEnd
+hexadecimal = foldl' step 0 `fmap` Parsec.many1 Parsec.hexDigit
+ where
+ step a c | w >= 48 && w <= 57 = (a `shiftL` 4) .|. fromIntegral (w - 48)
+ | w >= 97 = (a `shiftL` 4) .|. fromIntegral (w - 87)
+ | otherwise = (a `shiftL` 4) .|. fromIntegral (w - 55)
+ where w = ord c
diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs
index ab5e5e9e..ffa91b09 100644
--- a/haddock-library/src/Documentation/Haddock/Parser/Util.hs
+++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE CPP #-}
+{-# LANGUAGE OverloadedStrings #-}
-- |
-- Module : Documentation.Haddock.Parser.Util
-- Copyright : (c) Mateusz Kowalczyk 2013-2014,
@@ -11,62 +11,59 @@
--
-- Various utility functions used by the parser.
module Documentation.Haddock.Parser.Util (
- unsnoc
-, strip
-, takeUntil
-, removeEscapes
-, makeLabeled
-, takeHorizontalSpace
-, skipHorizontalSpace
+ takeUntil,
+ removeEscapes,
+ makeLabeled,
+ takeHorizontalSpace,
+ skipHorizontalSpace,
) where
+import qualified Text.Parsec as Parsec
+
+import qualified Data.Text as T
+import Data.Text (Text)
+
import Control.Applicative
import Control.Monad (mfilter)
-import Documentation.Haddock.Parser.Monad hiding (isHorizontalSpace)
-import Data.ByteString.Char8 (ByteString)
-import qualified Data.ByteString.Char8 as BS
+import Documentation.Haddock.Parser.Monad
import Prelude hiding (takeWhile)
-#if MIN_VERSION_bytestring(0,10,2)
-import Data.ByteString.Char8 (unsnoc)
-#else
-unsnoc :: ByteString -> Maybe (ByteString, Char)
-unsnoc bs
- | BS.null bs = Nothing
- | otherwise = Just (BS.init bs, BS.last bs)
-#endif
+import Data.Char (isSpace)
--- | Remove all leading and trailing whitespace
-strip :: String -> String
-strip = (\f -> f . f) $ dropWhile isSpace . reverse
-
-isHorizontalSpace :: Char -> Bool
-isHorizontalSpace = inClass " \t\f\v\r"
+-- | Characters that count as horizontal space
+horizontalSpace :: [Char]
+horizontalSpace = " \t\f\v\r"
+-- | Skip and ignore leading horizontal space
skipHorizontalSpace :: Parser ()
-skipHorizontalSpace = skipWhile isHorizontalSpace
+skipHorizontalSpace = Parsec.skipMany (Parsec.oneOf horizontalSpace)
-takeHorizontalSpace :: Parser BS.ByteString
-takeHorizontalSpace = takeWhile isHorizontalSpace
+-- | Take leading horizontal space
+takeHorizontalSpace :: Parser Text
+takeHorizontalSpace = takeWhile (Parsec.oneOf horizontalSpace)
-makeLabeled :: (String -> Maybe String -> a) -> String -> a
-makeLabeled f input = case break isSpace $ removeEscapes $ strip input of
- (uri, "") -> f uri Nothing
- (uri, label) -> f uri (Just $ dropWhile isSpace label)
+makeLabeled :: (String -> Maybe String -> a) -> Text -> a
+makeLabeled f input = case T.break isSpace $ removeEscapes $ T.strip input of
+ (uri, "") -> f (T.unpack uri) Nothing
+ (uri, label) -> f (T.unpack uri) (Just . T.unpack $ T.stripStart label)
-- | Remove escapes from given string.
--
-- Only do this if you do not process (read: parse) the input any further.
-removeEscapes :: String -> String
-removeEscapes "" = ""
-removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs
-removeEscapes ('\\':xs) = removeEscapes xs
-removeEscapes (x:xs) = x : removeEscapes xs
+removeEscapes :: Text -> Text
+removeEscapes = T.unfoldr go
+ where
+ go :: Text -> Maybe (Char, Text)
+ go xs = case T.uncons xs of
+ Just ('\\',ys) -> T.uncons ys
+ unconsed -> unconsed
-takeUntil :: ByteString -> Parser ByteString
-takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome
+-- | Consume characters from the input up to and including the given pattern.
+-- Return everything consumed except for the end pattern itself.
+takeUntil :: Text -> Parser Text
+takeUntil end_ = T.dropEnd (T.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome
where
- end = BS.unpack end_
+ end = T.unpack end_
p :: (Bool, String) -> Char -> Maybe (Bool, String)
p acc c = case acc of
@@ -75,9 +72,8 @@ takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome
(_, x:xs) | x == c -> Just (False, xs)
_ -> Just (c == '\\', end)
- dropEnd = BS.reverse . BS.drop (length end) . BS.reverse
- requireEnd = mfilter (BS.isSuffixOf end_)
+ requireEnd = mfilter (T.isSuffixOf end_)
gotSome xs
- | BS.null xs = fail "didn't get any content"
+ | T.null xs = fail "didn't get any content"
| otherwise = return xs