diff options
Diffstat (limited to 'haddock-library/src/Documentation/Haddock/Parser')
| -rw-r--r-- | haddock-library/src/Documentation/Haddock/Parser/Monad.hs | 202 | ||||
| -rw-r--r-- | haddock-library/src/Documentation/Haddock/Parser/Util.hs | 82 | 
2 files changed, 111 insertions, 173 deletions
| diff --git a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs index 3f7d60f8..585c76bb 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs @@ -1,149 +1,91 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving, TypeFamilies #-} -module Documentation.Haddock.Parser.Monad ( -  module Documentation.Haddock.Parser.Monad -, Attoparsec.isDigit -, Attoparsec.isDigit_w8 -, Attoparsec.isAlpha_iso8859_15 -, Attoparsec.isAlpha_ascii -, Attoparsec.isSpace -, Attoparsec.isSpace_w8 -, Attoparsec.inClass -, Attoparsec.notInClass -, Attoparsec.isEndOfLine -, Attoparsec.isHorizontalSpace -, Attoparsec.choice -, Attoparsec.count -, Attoparsec.option -, Attoparsec.many' -, Attoparsec.many1 -, Attoparsec.many1' -, Attoparsec.manyTill -, Attoparsec.manyTill' -, Attoparsec.sepBy -, Attoparsec.sepBy' -, Attoparsec.sepBy1 -, Attoparsec.sepBy1' -, Attoparsec.skipMany -, Attoparsec.skipMany1 -, Attoparsec.eitherP -) where - -import           Control.Applicative -import           Control.Monad -import           Data.String -import           Data.ByteString (ByteString) -import qualified Data.ByteString.Lazy as LB -import qualified Data.Attoparsec.ByteString.Char8 as Attoparsec -import           Control.Monad.Trans.State -import qualified Control.Monad.Trans.Class as Trans -import           Data.Word -import           Data.Bits -import           Data.Tuple - -import           Documentation.Haddock.Types (Version) +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE TypeSynonymInstances #-} -newtype ParserState = ParserState { -  parserStateSince :: Maybe Version -} deriving (Eq, Show) +module Documentation.Haddock.Parser.Monad where -initialParserState :: ParserState -initialParserState = ParserState Nothing +import qualified Text.Parsec.Char as Parsec +import qualified Text.Parsec as Parsec -newtype Parser a = Parser (StateT ParserState Attoparsec.Parser a) -  deriving (Functor, Applicative, Alternative, Monad, MonadPlus) +import qualified Data.Text as T +import           Data.Text                   ( Text ) -instance (a ~ ByteString) => IsString (Parser a) where -  fromString = lift . fromString +import           Data.String                 ( IsString(..) ) +import           Data.Bits                   ( Bits(..) ) +import           Data.Char                   ( ord ) +import           Data.List                   ( foldl' ) -parseOnly :: Parser a -> ByteString -> Either String (ParserState, a) -parseOnly (Parser p) = fmap swap . Attoparsec.parseOnly (runStateT p initialParserState) +import           Documentation.Haddock.Types ( Version ) -lift :: Attoparsec.Parser a -> Parser a -lift = Parser . Trans.lift +newtype ParserState = ParserState { +  parserStateSince :: Maybe Version +} deriving (Eq, Show) -setParserState :: ParserState -> Parser () -setParserState = Parser . put +initialParserState :: ParserState +initialParserState = ParserState Nothing  setSince :: Version -> Parser () -setSince since = Parser $ modify (\st -> st {parserStateSince = Just since}) - -char :: Char -> Parser Char -char = lift . Attoparsec.char - -char8 :: Char -> Parser Word8 -char8 = lift . Attoparsec.char8 +setSince since = Parsec.modifyState (\st -> st {parserStateSince = Just since}) -anyChar :: Parser Char -anyChar = lift Attoparsec.anyChar +type Parser = Parsec.Parsec Text ParserState -notChar :: Char -> Parser Char -notChar = lift . Attoparsec.notChar +instance (a ~ Text) => IsString (Parser a) where +  fromString = fmap T.pack . Parsec.string -satisfy :: (Char -> Bool) -> Parser Char -satisfy = lift . Attoparsec.satisfy +parseOnly :: Parser a -> Text -> Either String (ParserState, a) +parseOnly p t = case Parsec.runParser p' initialParserState "<haddock>" t of +                  Left e -> Left (show e) +                  Right (x,s) -> Right (s,x) +  where p' = (,) <$> p <*> Parsec.getState +-- | Always succeeds, but returns 'Nothing' if at the end of input. Does not +-- consume input.  peekChar :: Parser (Maybe Char) -peekChar = lift Attoparsec.peekChar +peekChar = Parsec.optionMaybe . Parsec.try . Parsec.lookAhead $ Parsec.anyChar +-- | Fails if at the end of input. Does not consume input.  peekChar' :: Parser Char -peekChar' = lift Attoparsec.peekChar' - -digit :: Parser Char -digit = lift Attoparsec.digit - -letter_iso8859_15 :: Parser Char -letter_iso8859_15 = lift Attoparsec.letter_iso8859_15 - -letter_ascii :: Parser Char -letter_ascii = lift Attoparsec.letter_ascii - -space :: Parser Char -space = lift Attoparsec.space - -string :: ByteString -> Parser ByteString -string = lift . Attoparsec.string - -stringCI :: ByteString -> Parser ByteString -stringCI = lift . Attoparsec.stringCI - -skipSpace :: Parser () -skipSpace = lift Attoparsec.skipSpace - -skipWhile :: (Char -> Bool) -> Parser () -skipWhile = lift . Attoparsec.skipWhile - -take :: Int -> Parser ByteString -take = lift . Attoparsec.take - -scan :: s -> (s -> Char -> Maybe s) -> Parser ByteString -scan s = lift . Attoparsec.scan s - -takeWhile :: (Char -> Bool) -> Parser ByteString -takeWhile = lift . Attoparsec.takeWhile - -takeWhile1 :: (Char -> Bool) -> Parser ByteString -takeWhile1 = lift . Attoparsec.takeWhile1 - -takeTill :: (Char -> Bool) -> Parser ByteString -takeTill = lift . Attoparsec.takeTill - -takeByteString :: Parser ByteString -takeByteString = lift Attoparsec.takeByteString - -takeLazyByteString :: Parser LB.ByteString -takeLazyByteString = lift Attoparsec.takeLazyByteString - -endOfLine :: Parser () -endOfLine = lift Attoparsec.endOfLine - +peekChar' = Parsec.lookAhead Parsec.anyChar  + +-- | Parses the given string. Returns the parsed string. +string :: Text -> Parser Text +string t = Parsec.string (T.unpack t) *> pure t + +-- | Scan the input text, accumulating characters as long as the scanning +-- function returns true. +scan :: (s -> Char -> Maybe s) -- ^ scan function +     -> s                      -- ^ initial state +     -> Parser Text  +scan f = fmap T.pack . go +  where go s1 = do { cOpt <- peekChar +                   ; case cOpt >>= f s1 of +                       Nothing -> pure "" +                       Just s2 -> (:) <$> Parsec.anyChar <*> go s2 +                   } + +-- | Apply a parser for a character zero or more times and collect the result in +-- a string. +takeWhile :: Parser Char -> Parser Text +takeWhile = fmap T.pack . Parsec.many + +-- | Apply a parser for a character one or more times and collect the result in +-- a string. +takeWhile1 :: Parser Char -> Parser Text +takeWhile1 =  fmap T.pack . Parsec.many1 + +-- | Parse a decimal number.  decimal :: Integral a => Parser a -decimal = lift Attoparsec.decimal +decimal = foldl' step 0 `fmap` Parsec.many1 Parsec.digit +  where step a c = a * 10 + fromIntegral (ord c - 48) +-- | Parse a hexadecimal number.  hexadecimal :: (Integral a, Bits a) => Parser a -hexadecimal = lift Attoparsec.hexadecimal - -endOfInput :: Parser () -endOfInput = lift Attoparsec.endOfInput - -atEnd :: Parser Bool -atEnd = lift Attoparsec.atEnd +hexadecimal = foldl' step 0 `fmap` Parsec.many1 Parsec.hexDigit  +  where +  step a c | w >= 48 && w <= 57  = (a `shiftL` 4) .|. fromIntegral (w - 48) +           | w >= 97             = (a `shiftL` 4) .|. fromIntegral (w - 87) +           | otherwise           = (a `shiftL` 4) .|. fromIntegral (w - 55) +    where w = ord c diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs index ab5e5e9e..ffa91b09 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Util.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-}  -- |  -- Module      :  Documentation.Haddock.Parser.Util  -- Copyright   :  (c) Mateusz Kowalczyk 2013-2014, @@ -11,62 +11,59 @@  --  -- Various utility functions used by the parser.  module Documentation.Haddock.Parser.Util ( -  unsnoc -, strip -, takeUntil -, removeEscapes -, makeLabeled -, takeHorizontalSpace -, skipHorizontalSpace +  takeUntil, +  removeEscapes, +  makeLabeled, +  takeHorizontalSpace, +  skipHorizontalSpace,  ) where +import qualified Text.Parsec as Parsec + +import qualified Data.Text as T +import           Data.Text (Text) +  import           Control.Applicative  import           Control.Monad (mfilter) -import           Documentation.Haddock.Parser.Monad hiding (isHorizontalSpace) -import           Data.ByteString.Char8 (ByteString) -import qualified Data.ByteString.Char8 as BS +import           Documentation.Haddock.Parser.Monad  import           Prelude hiding (takeWhile) -#if MIN_VERSION_bytestring(0,10,2) -import           Data.ByteString.Char8 (unsnoc) -#else -unsnoc :: ByteString -> Maybe (ByteString, Char) -unsnoc bs -  | BS.null bs = Nothing -  | otherwise = Just (BS.init bs, BS.last bs) -#endif +import           Data.Char (isSpace) --- | Remove all leading and trailing whitespace -strip :: String -> String -strip = (\f -> f . f) $ dropWhile isSpace . reverse - -isHorizontalSpace :: Char -> Bool -isHorizontalSpace = inClass " \t\f\v\r" +-- | Characters that count as horizontal space +horizontalSpace :: [Char] +horizontalSpace = " \t\f\v\r" +-- | Skip and ignore leading horizontal space  skipHorizontalSpace :: Parser () -skipHorizontalSpace = skipWhile isHorizontalSpace +skipHorizontalSpace = Parsec.skipMany (Parsec.oneOf horizontalSpace) -takeHorizontalSpace :: Parser BS.ByteString -takeHorizontalSpace = takeWhile isHorizontalSpace +-- | Take leading horizontal space +takeHorizontalSpace :: Parser Text  +takeHorizontalSpace = takeWhile (Parsec.oneOf horizontalSpace) -makeLabeled :: (String -> Maybe String -> a) -> String -> a -makeLabeled f input = case break isSpace $ removeEscapes $ strip input of -  (uri, "")    -> f uri Nothing -  (uri, label) -> f uri (Just $ dropWhile isSpace label) +makeLabeled :: (String -> Maybe String -> a) -> Text -> a +makeLabeled f input = case T.break isSpace $ removeEscapes $ T.strip input of +  (uri, "")    -> f (T.unpack uri) Nothing +  (uri, label) -> f (T.unpack uri) (Just . T.unpack $ T.stripStart label)  -- | Remove escapes from given string.  --  -- Only do this if you do not process (read: parse) the input any further. -removeEscapes :: String -> String -removeEscapes "" = "" -removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs -removeEscapes ('\\':xs) = removeEscapes xs -removeEscapes (x:xs) = x : removeEscapes xs +removeEscapes :: Text -> Text +removeEscapes = T.unfoldr go +  where +  go :: Text -> Maybe (Char, Text) +  go xs = case T.uncons xs of +            Just ('\\',ys) -> T.uncons ys +            unconsed -> unconsed -takeUntil :: ByteString -> Parser ByteString -takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome +-- | Consume characters from the input up to and including the given pattern. +-- Return everything consumed except for the end pattern itself. +takeUntil :: Text -> Parser Text  +takeUntil end_ = T.dropEnd (T.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome    where -    end = BS.unpack end_ +    end = T.unpack end_       p :: (Bool, String) -> Char -> Maybe (Bool, String)      p acc c = case acc of @@ -75,9 +72,8 @@ takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome        (_, x:xs) | x == c -> Just (False, xs)        _ -> Just (c == '\\', end) -    dropEnd = BS.reverse . BS.drop (length end) . BS.reverse -    requireEnd = mfilter (BS.isSuffixOf end_) +    requireEnd = mfilter (T.isSuffixOf end_)      gotSome xs -      | BS.null xs = fail "didn't get any content" +      | T.null xs = fail "didn't get any content"        | otherwise = return xs | 
