aboutsummaryrefslogtreecommitdiff
path: root/haddock-library/src
diff options
context:
space:
mode:
authorAlec Theriault <alec.theriault@gmail.com>2018-04-25 11:24:07 -0700
committerAlexander Biehl <alexbiehl@gmail.com>2018-04-25 11:24:07 -0700
commit79c7159101c03bbbc7350e07963896ca2bb97c02 (patch)
treec754c425ed9d4ff8755dbe67589fa3c9dbbce10c /haddock-library/src
parent979c7338cfcdc59f0b0dda562a53558c416cc362 (diff)
Replace 'attoparsec' with 'parsec' (#799)
* Remove attoparsec with parsec and start fixing failed parses * Make tests pass * Fix encoding issues The Haddock parser no longer needs to worry about bytestrings. All the internal parsing work in haddock-library happens over 'Text'. * Remove attoparsec vendor * Fix stuff broken in 'attoparsec' -> 'parsec' * hyperlinks * codeblocks * examples Pretty much all issues are due to attoparsec's backtracking failure behaviour vs. parsec's non-backtracking failure behaviour. * Fix small TODOs * Missing quote + Haddocks * Better handle spaces before/after paragraphs * Address review comments
Diffstat (limited to 'haddock-library/src')
-rw-r--r--haddock-library/src/Documentation/Haddock/Parser.hs428
-rw-r--r--haddock-library/src/Documentation/Haddock/Parser/Monad.hs225
-rw-r--r--haddock-library/src/Documentation/Haddock/Parser/Util.hs82
3 files changed, 348 insertions, 387 deletions
diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs
index 4921b3a7..d79da40b 100644
--- a/haddock-library/src/Documentation/Haddock/Parser.hs
+++ b/haddock-library/src/Documentation/Haddock/Parser.hs
@@ -16,27 +16,35 @@
-- to be
--
-- @'toRegular' . '_doc' . 'parseParas'@
-module Documentation.Haddock.Parser ( parseString, parseParas
- , overIdentifier, toRegular, Identifier
- ) where
+module Documentation.Haddock.Parser (
+ parseString,
+ parseParas,
+ overIdentifier,
+ toRegular,
+ Identifier
+) where
import Control.Applicative
import Control.Arrow (first)
import Control.Monad
-import qualified Data.ByteString.Char8 as BS
-import Data.Char (chr, isUpper, isAlpha, isAlphaNum)
-import Data.List (stripPrefix, intercalate, unfoldr, elemIndex)
+import Data.Char (chr, isUpper, isAlpha, isAlphaNum, isSpace)
+import Data.List (intercalate, unfoldr, elemIndex, notElem)
import Data.Maybe (fromMaybe, mapMaybe)
import Data.Monoid
import qualified Data.Set as Set
import Documentation.Haddock.Doc
-import Documentation.Haddock.Parser.Monad hiding (take, endOfLine)
+import Documentation.Haddock.Parser.Monad
import Documentation.Haddock.Parser.Util
import Documentation.Haddock.Types
-import Documentation.Haddock.Utf8
import Prelude hiding (takeWhile)
import qualified Prelude as P
+import qualified Text.Parsec as Parsec
+import Text.Parsec (try)
+
+import qualified Data.Text as T
+import Data.Text (Text)
+
#if MIN_VERSION_base(4,9,0)
import Text.Read.Lex (isSymbolChar)
#else
@@ -56,7 +64,7 @@ isSymbolChar c = not (isPuncChar c) && case generalCategory c of
ModifierSymbol -> True
OtherSymbol -> True
DashPunctuation -> True
- OtherPunctuation -> not (c `elem` ("'\"" :: String))
+ OtherPunctuation -> c `notElem` ("'\"" :: String)
ConnectorPunctuation -> c /= '_'
_ -> False
where
@@ -109,8 +117,14 @@ overIdentifier f d = g d
g (DocHeader (Header l x)) = DocHeader . Header l $ g x
g (DocTable (Table h b)) = DocTable (Table (map (fmap g) h) (map (fmap g) b))
-parse :: Parser a -> BS.ByteString -> (ParserState, a)
-parse p = either err id . parseOnly (p <* endOfInput)
+
+choice' :: [Parser a] -> Parser a
+choice' [] = empty
+choice' [p] = p
+choice' (p : ps) = try p <|> choice' ps
+
+parse :: Parser a -> Text -> (ParserState, a)
+parse p = either err id . parseOnly (p <* Parsec.eof)
where
err = error . ("Haddock.Parser.parse: " ++)
@@ -127,31 +141,46 @@ parseParas pkg input = case parseParasState input of
}
parseParasState :: String -> (ParserState, DocH mod Identifier)
-parseParasState =
- parse (p <* skipSpace) . encodeUtf8 . (++ "\n") . filter (/= '\r')
+parseParasState = parse (emptyLines *> p) . T.pack . (++ "\n") . filter (/= '\r')
where
p :: Parser (DocH mod Identifier)
- p = docConcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n")
+ p = docConcat <$> many (paragraph <* emptyLines)
+
+ emptyLines :: Parser ()
+ emptyLines = void $ many (try (skipHorizontalSpace *> "\n"))
parseParagraphs :: String -> Parser (DocH mod Identifier)
parseParagraphs input = case parseParasState input of
- (state, a) -> setParserState state >> return a
+ (state, a) -> Parsec.putState state *> pure a
--- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which
--- drops leading whitespace and encodes the string to UTF8 first.
+-- | Variant of 'parseText' for 'String' instead of 'Text'
parseString :: String -> DocH mod Identifier
-parseString = parseStringBS . encodeUtf8 . dropWhile isSpace . filter (/= '\r')
+parseString = parseText . T.pack
-parseStringBS :: BS.ByteString -> DocH mod Identifier
-parseStringBS = snd . parse p
+-- | Parse a text paragraph. Actually just a wrapper over 'parseParagraph' which
+-- drops leading whitespace.
+parseText :: Text -> DocH mod Identifier
+parseText = parseParagraph . T.dropWhile isSpace . T.filter (/= '\r')
+
+parseParagraph :: Text -> DocH mod Identifier
+parseParagraph = snd . parse p
where
p :: Parser (DocH mod Identifier)
- p = docConcat <$> many (monospace <|> anchor <|> identifier <|> moduleName
- <|> picture <|> mathDisplay <|> mathInline
- <|> markdownImage
- <|> hyperlink <|> bold
- <|> emphasis <|> encodedChar <|> string'
- <|> skipSpecialChar)
+ p = docConcat <$> many (choice' [ monospace
+ , anchor
+ , identifier
+ , moduleName
+ , picture
+ , mathDisplay
+ , mathInline
+ , markdownImage
+ , hyperlink
+ , bold
+ , emphasis
+ , encodedChar
+ , string'
+ , skipSpecialChar
+ ])
-- | Parses and processes
-- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references>
@@ -175,7 +204,7 @@ specialChar = "_/<@\"&'`# "
-- to ensure that we have already given a chance to more meaningful parsers
-- before capturing their characers.
string' :: Parser (DocH mod a)
-string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (notInClass specialChar)
+string' = DocString . unescape . T.unpack <$> takeWhile1_ (`notElem` specialChar)
where
unescape "" = ""
unescape ('\\':x:xs) = x : unescape xs
@@ -185,45 +214,45 @@ string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (notInClass specialC
-- This is done to skip over any special characters belonging to other
-- elements but which were not deemed meaningful at their positions.
skipSpecialChar :: Parser (DocH mod a)
-skipSpecialChar = DocString . return <$> satisfy (inClass specialChar)
+skipSpecialChar = DocString . return <$> Parsec.oneOf specialChar
-- | Emphasis parser.
--
-- >>> parseString "/Hello world/"
-- DocEmphasis (DocString "Hello world")
emphasis :: Parser (DocH mod Identifier)
-emphasis = DocEmphasis . parseStringBS <$>
- mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/")
+emphasis = DocEmphasis . parseParagraph <$>
+ disallowNewline ("/" *> takeWhile1_ (/= '/') <* "/")
-- | Bold parser.
--
-- >>> parseString "__Hello world__"
-- DocBold (DocString "Hello world")
bold :: Parser (DocH mod Identifier)
-bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__")
+bold = DocBold . parseParagraph <$> disallowNewline ("__" *> takeUntil "__")
-disallowNewline :: Parser BS.ByteString -> Parser BS.ByteString
-disallowNewline = mfilter ('\n' `BS.notElem`)
+disallowNewline :: Parser Text -> Parser Text
+disallowNewline = mfilter (T.all (/= '\n'))
-- | Like `takeWhile`, but unconditionally take escaped characters.
-takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString
-takeWhile_ p = scan False p_
+takeWhile_ :: (Char -> Bool) -> Parser Text
+takeWhile_ p = scan p_ False
where
p_ escaped c
| escaped = Just False
| not $ p c = Nothing
| otherwise = Just (c == '\\')
--- | Like `takeWhile1`, but unconditionally take escaped characters.
-takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString
-takeWhile1_ = mfilter (not . BS.null) . takeWhile_
+-- | Like 'takeWhile1', but unconditionally take escaped characters.
+takeWhile1_ :: (Char -> Bool) -> Parser Text
+takeWhile1_ = mfilter (not . T.null) . takeWhile_
-- | Text anchors to allow for jumping around the generated documentation.
--
-- >>> parseString "#Hello world#"
-- DocAName "Hello world"
anchor :: Parser (DocH mod a)
-anchor = DocAName . decodeUtf8 <$>
+anchor = DocAName . T.unpack <$>
disallowNewline ("#" *> takeWhile1_ (/= '#') <* "#")
-- | Monospaced strings.
@@ -231,7 +260,7 @@ anchor = DocAName . decodeUtf8 <$>
-- >>> parseString "@cruel@"
-- DocMonospaced (DocString "cruel")
monospace :: Parser (DocH mod Identifier)
-monospace = DocMonospaced . parseStringBS
+monospace = DocMonospaced . parseParagraph
<$> ("@" *> takeWhile1_ (/= '@') <* "@")
-- | Module names.
@@ -239,14 +268,14 @@ monospace = DocMonospaced . parseStringBS
-- Note that we allow '#' and '\' to support anchors (old style anchors are of
-- the form "SomeModule\#anchor").
moduleName :: Parser (DocH mod a)
-moduleName = DocModule <$> (char '"' *> modid <* char '"')
+moduleName = DocModule <$> ("\"" *> modid <* "\"")
where
- modid = intercalate "." <$> conid `sepBy1` "."
+ modid = intercalate "." <$> conid `Parsec.sepBy1` "."
conid = (:)
- <$> satisfyUnicode (\c -> isAlpha c && isUpper c)
- <*> many (satisfyUnicode conChar <|> char '\\' <|> char '#')
+ <$> Parsec.satisfy (\c -> isAlpha c && isUpper c)
+ <*> many (conChar <|> Parsec.oneOf "\\#")
- conChar c = isAlphaNum c || c == '_'
+ conChar = Parsec.alphaNum <|> Parsec.char '_'
-- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify
-- a title for the picture.
@@ -256,7 +285,7 @@ moduleName = DocModule <$> (char '"' *> modid <* char '"')
-- >>> parseString "<<hello.png world>>"
-- DocPic (Picture {pictureUri = "hello.png", pictureTitle = Just "world"})
picture :: Parser (DocH mod a)
-picture = DocPic . makeLabeled Picture . decodeUtf8
+picture = DocPic . makeLabeled Picture
<$> disallowNewline ("<<" *> takeUntil ">>")
-- | Inline math parser, surrounded by \\( and \\).
@@ -264,7 +293,7 @@ picture = DocPic . makeLabeled Picture . decodeUtf8
-- >>> parseString "\\(\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}\\)"
-- DocMathInline "\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}"
mathInline :: Parser (DocH mod a)
-mathInline = DocMathInline . decodeUtf8
+mathInline = DocMathInline . T.unpack
<$> disallowNewline ("\\(" *> takeUntil "\\)")
-- | Display math parser, surrounded by \\[ and \\].
@@ -272,7 +301,7 @@ mathInline = DocMathInline . decodeUtf8
-- >>> parseString "\\[\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}\\]"
-- DocMathDisplay "\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}"
mathDisplay :: Parser (DocH mod a)
-mathDisplay = DocMathDisplay . decodeUtf8
+mathDisplay = DocMathDisplay . T.unpack
<$> ("\\[" *> takeUntil "\\]")
markdownImage :: Parser (DocH mod a)
@@ -282,20 +311,21 @@ markdownImage = fromHyperlink <$> ("!" *> linkParser)
-- | Paragraph parser, called by 'parseParas'.
paragraph :: Parser (DocH mod Identifier)
-paragraph = examples <|> table <|> do
- indent <- takeIndent
- choice
- [ since
- , unorderedList indent
- , orderedList indent
- , birdtracks
- , codeblock
- , property
- , header
- , textParagraphThatStartsWithMarkdownLink
- , definitionList indent
- , docParagraph <$> textParagraph
- ]
+paragraph = choice' [ examples
+ , table
+ , do indent <- takeIndent
+ choice' [ since
+ , unorderedList indent
+ , orderedList indent
+ , birdtracks
+ , codeblock
+ , property
+ , header
+ , textParagraphThatStartsWithMarkdownLink
+ , definitionList indent
+ , docParagraph <$> textParagraph
+ ]
+ ]
-- | Provides support for grid tables.
--
@@ -317,45 +347,45 @@ table :: Parser (DocH mod Identifier)
table = do
-- first we parse the first row, which determines the width of the table
firstRow <- parseFirstRow
- let len = BS.length firstRow
+ let len = T.length firstRow
-- then we parse all consequtive rows starting and ending with + or |,
-- of the width `len`.
- restRows <- many (parseRestRows len)
+ restRows <- many (try (parseRestRows len))
-- Now we gathered the table block, the next step is to split the block
-- into cells.
DocTable <$> tableStepTwo len (firstRow : restRows)
where
- parseFirstRow :: Parser BS.ByteString
+ parseFirstRow :: Parser Text
parseFirstRow = do
skipHorizontalSpace
-- upper-left corner is +
- c <- char '+'
- cs <- many1 (char '-' <|> char '+')
+ c <- Parsec.char '+'
+ cs <- some (Parsec.char '-' <|> Parsec.char '+')
-- upper right corner is + too
guard (last cs == '+')
-- trailing space
skipHorizontalSpace
- _ <- char '\n'
+ _ <- Parsec.newline
- return (BS.cons c $ BS.pack cs)
+ return (T.cons c $ T.pack cs)
- parseRestRows :: Int -> Parser BS.ByteString
+ parseRestRows :: Int -> Parser Text
parseRestRows l = do
skipHorizontalSpace
- c <- char '|' <|> char '+'
- bs <- scan (l - 2) predicate
- c2 <- char '|' <|> char '+'
+ c <- Parsec.char '|' <|> Parsec.char '+'
+ bs <- scan predicate (l - 2)
+ c2 <- Parsec.char '|' <|> Parsec.char '+'
-- trailing space
skipHorizontalSpace
- _ <- char '\n'
+ _ <- Parsec.newline
- return (BS.cons c (BS.snoc bs c2))
+ return (T.cons c (T.snoc bs c2))
where
predicate n c
| n <= 0 = Nothing
@@ -366,22 +396,22 @@ table = do
-- and changes to '=' to '-'.
tableStepTwo
:: Int -- ^ width
- -> [BS.ByteString] -- ^ rows
+ -> [Text] -- ^ rows
-> Parser (Table (DocH mod Identifier))
tableStepTwo width = go 0 [] where
go _ left [] = tableStepThree width (reverse left) Nothing
go n left (r : rs)
- | BS.all (`elem` ['+', '=']) r =
+ | T.all (`elem` ['+', '=']) r =
tableStepThree width (reverse left ++ r' : rs) (Just n)
| otherwise =
go (n + 1) (r : left) rs
where
- r' = BS.map (\c -> if c == '=' then '-' else c) r
+ r' = T.map (\c -> if c == '=' then '-' else c) r
-- Third step recognises cells in the table area, returning a list of TC, cells.
tableStepThree
:: Int -- ^ width
- -> [BS.ByteString] -- ^ rows
+ -> [Text] -- ^ rows
-> Maybe Int -- ^ index of header separator
-> Parser (Table (DocH mod Identifier))
tableStepThree width rs hdrIndex = do
@@ -408,32 +438,32 @@ tableStepThree width rs hdrIndex = do
scanRight :: Int -> Int -> Maybe (Int, Int)
scanRight x y = go (x + 1) where
bs = rs !! y
- go x' | x' >= width = fail "overflow right "
- | BS.index bs x' == '+' = scanDown x y x' <|> go (x' + 1)
- | BS.index bs x' == '-' = go (x' + 1)
- | otherwise = fail $ "not a border (right) " ++ show (x,y,x')
+ go x' | x' >= width = fail "overflow right "
+ | T.index bs x' == '+' = scanDown x y x' <|> go (x' + 1)
+ | T.index bs x' == '-' = go (x' + 1)
+ | otherwise = fail $ "not a border (right) " ++ show (x,y,x')
-- scan down looking for +
scanDown :: Int -> Int -> Int -> Maybe (Int, Int)
scanDown x y x2 = go (y + 1) where
- go y' | y' >= height = fail "overflow down"
- | BS.index (rs !! y') x2 == '+' = scanLeft x y x2 y' <|> go (y' + 1)
- | BS.index (rs !! y') x2 == '|' = go (y' + 1)
- | otherwise = fail $ "not a border (down) " ++ show (x,y,x2,y')
+ go y' | y' >= height = fail "overflow down"
+ | T.index (rs !! y') x2 == '+' = scanLeft x y x2 y' <|> go (y' + 1)
+ | T.index (rs !! y') x2 == '|' = go (y' + 1)
+ | otherwise = fail $ "not a border (down) " ++ show (x,y,x2,y')
-- check that at y2 x..x2 characters are '+' or '-'
scanLeft :: Int -> Int -> Int -> Int -> Maybe (Int, Int)
scanLeft x y x2 y2
- | all (\x' -> BS.index bs x' `elem` ['+', '-']) [x..x2] = scanUp x y x2 y2
- | otherwise = fail $ "not a border (left) " ++ show (x,y,x2,y2)
+ | all (\x' -> T.index bs x' `elem` ['+', '-']) [x..x2] = scanUp x y x2 y2
+ | otherwise = fail $ "not a border (left) " ++ show (x,y,x2,y2)
where
bs = rs !! y2
-- check that at y2 x..x2 characters are '+' or '-'
scanUp :: Int -> Int -> Int -> Int -> Maybe (Int, Int)
scanUp x y x2 y2
- | all (\y' -> BS.index (rs !! y') x `elem` ['+', '|']) [y..y2] = return (x2, y2)
- | otherwise = fail $ "not a border (up) " ++ show (x,y,x2,y2)
+ | all (\y' -> T.index (rs !! y') x `elem` ['+', '|']) [y..y2] = return (x2, y2)
+ | otherwise = fail $ "not a border (up) " ++ show (x,y,x2,y2)
-- | table cell: top left bottom right
data TC = TC !Int !Int !Int !Int
@@ -446,7 +476,7 @@ tcYS :: TC -> [Int]
tcYS (TC y _ y2 _) = [y, y2]
-- | Fourth step. Given the locations of cells, forms 'Table' structure.
-tableStepFour :: [BS.ByteString] -> Maybe Int -> [TC] -> Parser (Table (DocH mod Identifier))
+tableStepFour :: [Text] -> Maybe Int -> [TC] -> Parser (Table (DocH mod Identifier))
tableStepFour rs hdrIndex cells = case hdrIndex of
Nothing -> return $ Table [] rowsDoc
Just i -> case elemIndex i yTabStops of
@@ -464,7 +494,7 @@ tableStepFour rs hdrIndex cells = case hdrIndex of
init' [_] = []
init' (x : xs) = x : init' xs
- rowsDoc = (fmap . fmap) parseStringBS rows
+ rowsDoc = (fmap . fmap) parseParagraph rows
rows = map makeRow (init' yTabStops)
where
@@ -477,9 +507,9 @@ tableStepFour rs hdrIndex cells = case hdrIndex of
yts = length $ P.takeWhile (< y2) $ dropWhile (< y) yTabStops
-- extract cell contents given boundaries
- extract :: Int -> Int -> Int -> Int -> BS.ByteString
- extract x y x2 y2 = BS.intercalate "\n"
- [ BS.take (x2 - x + 1) $ BS.drop x $ rs !! y'
+ extract :: Int -> Int -> Int -> Int -> Text
+ extract x y x2 y2 = T.intercalate "\n"
+ [ T.take (x2 - x + 1) $ T.drop x $ rs !! y'
| y' <- [y .. y2]
]
@@ -487,7 +517,7 @@ tableStepFour rs hdrIndex cells = case hdrIndex of
since :: Parser (DocH mod a)
since = ("@since " *> version <* skipHorizontalSpace <* endOfLine) >>= setSince >> return DocEmpty
where
- version = decimal `sepBy1'` "."
+ version = decimal `Parsec.sepBy1` "."
-- | Headers inside the comment denoted with @=@ signs, up to 6 levels
-- deep.
@@ -498,38 +528,39 @@ since = ("@since " *> version <* skipHorizontalSpace <* endOfLine) >>= setSince
-- Right (DocHeader (Header {headerLevel = 2, headerTitle = DocString "World"}))
header :: Parser (DocH mod Identifier)
header = do
- let psers = map (string . encodeUtf8 . concat . flip replicate "=") [6, 5 .. 1]
- pser = foldl1 (<|>) psers
- delim <- decodeUtf8 <$> pser
- line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString
- rest <- paragraph <|> return DocEmpty
+ let psers = map (string . flip T.replicate "=") [6, 5 .. 1]
+ pser = choice' psers
+ delim <- T.unpack <$> pser
+ line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseText
+ rest <- try paragraph <|> return DocEmpty
return $ DocHeader (Header (length delim) line) `docAppend` rest
textParagraph :: Parser (DocH mod Identifier)
-textParagraph = parseString . intercalate "\n" <$> many1 nonEmptyLine
+textParagraph = parseText . T.intercalate "\n" <$> some nonEmptyLine
textParagraphThatStartsWithMarkdownLink :: Parser (DocH mod Identifier)
textParagraphThatStartsWithMarkdownLink = docParagraph <$> (docAppend <$> markdownLink <*> optionalTextParagraph)
where
optionalTextParagraph :: Parser (DocH mod Identifier)
- optionalTextParagraph = (docAppend <$> whitespace <*> textParagraph) <|> pure DocEmpty
+ optionalTextParagraph = choice' [ docAppend <$> whitespace <*> textParagraph
+ , pure DocEmpty ]
whitespace :: Parser (DocH mod a)
whitespace = DocString <$> (f <$> takeHorizontalSpace <*> optional "\n")
where
- f :: BS.ByteString -> Maybe BS.ByteString -> String
+ f :: Text -> Maybe Text -> String
f xs (fromMaybe "" -> x)
- | BS.null (xs <> x) = ""
+ | T.null (xs <> x) = ""
| otherwise = " "
-- | Parses unordered (bullet) lists.
-unorderedList :: BS.ByteString -> Parser (DocH mod Identifier)
+unorderedList :: Text -> Parser (DocH mod Identifier)
unorderedList indent = DocUnorderedList <$> p
where
p = ("*" <|> "-") *> innerList indent p
-- | Parses ordered lists (numbered or dashed).
-orderedList :: BS.ByteString -> Parser (DocH mod Identifier)
+orderedList :: Text -> Parser (DocH mod Identifier)
orderedList indent = DocOrderedList <$> p
where
p = (paren <|> dot) *> innerList indent p
@@ -541,104 +572,110 @@ orderedList indent = DocOrderedList <$> p
-- same paragraph. Usually used as
--
-- > someListFunction = listBeginning *> innerList someListFunction
-innerList :: BS.ByteString -> Parser [DocH mod Identifier]
+innerList :: Text -> Parser [DocH mod Identifier]
-> Parser [DocH mod Identifier]
innerList indent item = do
c <- takeLine
(cs, items) <- more indent item
- let contents = docParagraph . parseString . dropNLs . unlines $ c : cs
+ let contents = docParagraph . parseText . dropNLs . T.unlines $ c : cs
return $ case items of
Left p -> [contents `docAppend` p]
Right i -> contents : i
-- | Parses definition lists.
-definitionList :: BS.ByteString -> Parser (DocH mod Identifier)
+definitionList :: Text -> Parser (DocH mod Identifier)
definitionList indent = DocDefList <$> p
where
p = do
- label <- "[" *> (parseStringBS <$> takeWhile1_ (notInClass "]\n")) <* ("]" <* optional ":")
+ label <- "[" *> (parseParagraph <$> takeWhile1_ (`notElem` ("]\n" :: String))) <* ("]" <* optional ":")
c <- takeLine
(cs, items) <- more indent p
- let contents = parseString . dropNLs . unlines $ c : cs
+ let contents = parseText . dropNLs . T.unlines $ c : cs
return $ case items of
Left x -> [(label, contents `docAppend` x)]
Right i -> (label, contents) : i
-- | Drops all trailing newlines.
-dropNLs :: String -> String
-dropNLs = reverse . dropWhile (== '\n') . reverse
+dropNLs :: Text -> Text
+dropNLs = T.dropWhileEnd (== '\n')
-- | Main worker for 'innerList' and 'definitionList'.
-- We need the 'Either' here to be able to tell in the respective functions
-- whether we're dealing with the next list or a nested paragraph.
-more :: Monoid a => BS.ByteString -> Parser a
- -> Parser ([String], Either (DocH mod Identifier) a)
-more indent item = innerParagraphs indent
- <|> moreListItems indent item
- <|> moreContent indent item
- <|> pure ([], Right mempty)
+more :: Monoid a => Text -> Parser a
+ -> Parser ([Text], Either (DocH mod Identifier) a)
+more indent item = choice' [ innerParagraphs indent
+ , moreListItems indent item
+ , moreContent indent item
+ , pure ([], Right mempty)
+ ]
-- | Used by 'innerList' and 'definitionList' to parse any nested paragraphs.
-innerParagraphs :: BS.ByteString
- -> Parser ([String], Either (DocH mod Identifier) a)
+innerParagraphs :: Text
+ -> Parser ([Text], Either (DocH mod Identifier) a)
innerParagraphs indent = (,) [] . Left <$> ("\n" *> indentedParagraphs indent)
-- | Attempts to fetch the next list if possibly. Used by 'innerList' and
-- 'definitionList' to recursively grab lists that aren't separated by a whole
-- paragraph.
-moreListItems :: BS.ByteString -> Parser a
- -> Parser ([String], Either (DocH mod Identifier) a)
+moreListItems :: Text -> Parser a
+ -> Parser ([Text], Either (DocH mod Identifier) a)
moreListItems indent item = (,) [] . Right <$> indentedItem
where
- indentedItem = string indent *> skipSpace *> item
+ indentedItem = string indent *> Parsec.spaces *> item
-- | Helper for 'innerList' and 'definitionList' which simply takes
-- a line of text and attempts to parse more list content with 'more'.
-moreContent :: Monoid a => BS.ByteString -> Parser a
- -> Parser ([String], Either (DocH mod Identifier) a)
+moreContent :: Monoid a => Text -> Parser a
+ -> Parser ([Text], Either (DocH mod Identifier) a)
moreContent indent item = first . (:) <$> nonEmptyLine <*> more indent item
-- | Parses an indented paragraph.
-- The indentation is 4 spaces.
-indentedParagraphs :: BS.ByteString -> Parser (DocH mod Identifier)
+indentedParagraphs :: Text -> Parser (DocH mod Identifier)
indentedParagraphs indent =
- (concat <$> dropFrontOfPara indent') >>= parseParagraphs
+ (T.unpack . T.concat <$> dropFrontOfPara indent') >>= parseParagraphs
where
- indent' = string $ BS.append indent " "
+ indent' = string $ indent <> " "
-- | Grab as many fully indented paragraphs as we can.
-dropFrontOfPara :: Parser BS.ByteString -> Parser [String]
+dropFrontOfPara :: Parser Text -> Parser [Text]
dropFrontOfPara sp = do
- currentParagraph <- some (sp *> takeNonEmptyLine)
+ currentParagraph <- some (try (sp *> takeNonEmptyLine))
followingParagraphs <-
- skipHorizontalSpace *> nextPar -- we have more paragraphs to take
- <|> skipHorizontalSpace *> nlList -- end of the ride, remember the newline
- <|> endOfInput *> return [] -- nothing more to take at all
+ choice' [ skipHorizontalSpace *> nextPar -- we have more paragraphs to take
+ , skipHorizontalSpace *> nlList -- end of the ride, remember the newline
+ , Parsec.eof *> return [] -- nothing more to take at all
+ ]
return (currentParagraph ++ followingParagraphs)
where
nextPar = (++) <$> nlList <*> dropFrontOfPara sp
nlList = "\n" *> return ["\n"]
-nonSpace :: BS.ByteString -> Parser BS.ByteString
+nonSpace :: Text -> Parser Text
nonSpace xs
- | not $ any (not . isSpace) $ decodeUtf8 xs = fail "empty line"
+ | T.all isSpace xs = fail "empty line"
| otherwise = return xs
-- | Takes a non-empty, not fully whitespace line.
--
-- Doesn't discard the trailing newline.
-takeNonEmptyLine :: Parser String
+takeNonEmptyLine :: Parser Text
takeNonEmptyLine = do
- (++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n"
+ l <- takeWhile1 (Parsec.noneOf "\n") >>= nonSpace
+ _ <- "\n"
+ pure (l <> "\n")
-- | Takes indentation of first non-empty line.
--
-- More precisely: skips all whitespace-only lines and returns indentation
-- (horizontal space, might be empty) of that non-empty line.
-takeIndent :: Parser BS.ByteString
+takeIndent :: Parser Text
takeIndent = do
indent <- takeHorizontalSpace
- "\n" *> takeIndent <|> return indent
+ choice' [ "\n" *> takeIndent
+ , return indent
+ ]
-- | Blocks of text of the form:
--
@@ -647,97 +684,98 @@ takeIndent = do
-- >> baz
--
birdtracks :: Parser (DocH mod a)
-birdtracks = DocCodeBlock . DocString . intercalate "\n" . stripSpace <$> many1 line
+birdtracks = DocCodeBlock . DocString . T.unpack . T.intercalate "\n" . stripSpace <$> some line
where
- line = skipHorizontalSpace *> ">" *> takeLine
+ line = try (skipHorizontalSpace *> ">" *> takeLine)
-stripSpace :: [String] -> [String]
+stripSpace :: [Text] -> [Text]
stripSpace = fromMaybe <*> mapM strip'
where
- strip' (' ':xs') = Just xs'
- strip' "" = Just ""
- strip' _ = Nothing
+ strip' t = case T.uncons t of
+ Nothing -> Just ""
+ Just (' ',t') -> Just t'
+ _ -> Nothing
-- | Parses examples. Examples are a paragraph level entitity (separated by an empty line).
-- Consecutive examples are accepted.
examples :: Parser (DocH mod a)
-examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go)
+examples = DocExamples <$> (many (try (skipHorizontalSpace *> "\n")) *> go)
where
go :: Parser [Example]
go = do
- prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>"
+ prefix <- takeHorizontalSpace <* ">>>"
expr <- takeLine
(rs, es) <- resultAndMoreExamples
return (makeExample prefix expr rs : es)
where
- resultAndMoreExamples :: Parser ([String], [Example])
- resultAndMoreExamples = moreExamples <|> result <|> pure ([], [])
+ resultAndMoreExamples :: Parser ([Text], [Example])
+ resultAndMoreExamples = choice' [ moreExamples, result, pure ([], []) ]
where
- moreExamples :: Parser ([String], [Example])
+ moreExamples :: Parser ([Text], [Example])
moreExamples = (,) [] <$> go
- result :: Parser ([String], [Example])
+ result :: Parser ([Text], [Example])
result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples
- makeExample :: String -> String -> [String] -> Example
+ makeExample :: Text -> Text -> [Text] -> Example
makeExample prefix expression res =
- Example (strip expression) result
+ Example (T.unpack (T.strip expression)) result
where
- result = map (substituteBlankLine . tryStripPrefix) res
+ result = map (T.unpack . substituteBlankLine . tryStripPrefix) res
- tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs)
+ tryStripPrefix xs = fromMaybe xs (T.stripPrefix prefix xs)
substituteBlankLine "<BLANKLINE>" = ""
substituteBlankLine xs = xs
-nonEmptyLine :: Parser String
-nonEmptyLine = mfilter (any (not . isSpace)) takeLine
+nonEmptyLine :: Parser Text
+nonEmptyLine = try (mfilter (T.any (not . isSpace)) takeLine)
-takeLine :: Parser String
-takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine
+takeLine :: Parser Text
+takeLine = try (takeWhile (Parsec.noneOf "\n") <* endOfLine)
endOfLine :: Parser ()
-endOfLine = void "\n" <|> endOfInput
+endOfLine = void "\n" <|> Parsec.eof
-- | Property parser.
--
-- >>> snd <$> parseOnly property "prop> hello world"
-- Right (DocProperty "hello world")
property :: Parser (DocH mod a)
-property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n'))
+property = DocProperty . T.unpack . T.strip <$> ("prop>" *> takeWhile1 (Parsec.noneOf "\n"))
-- |
-- Paragraph level codeblock. Anything between the two delimiting \@ is parsed
-- for markup.
codeblock :: Parser (DocH mod Identifier)
codeblock =
- DocCodeBlock . parseStringBS . dropSpaces
+ DocCodeBlock . parseParagraph . dropSpaces
<$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
where
dropSpaces xs =
- let rs = decodeUtf8 xs
- in case splitByNl rs of
+ case splitByNl xs of
[] -> xs
- ys -> case last ys of
- ' ':_ -> case mapM dropSpace ys of
- Nothing -> xs
- Just zs -> encodeUtf8 $ intercalate "\n" zs
+ ys -> case T.uncons (last ys) of
+ Just (' ',_) -> case mapM dropSpace ys of
+ Nothing -> xs
+ Just zs -> T.intercalate "\n" zs
_ -> xs
-- This is necessary because ‘lines’ swallows up a trailing newline
-- and we lose information about whether the last line belongs to @ or to
-- text which we need to decide whether we actually want to be dropping
-- anything at all.
- splitByNl = unfoldr (\x -> case x of
- '\n':s -> Just (span (/= '\n') s)
- _ -> Nothing)
- . ('\n' :)
+ splitByNl = unfoldr (\x -> case T.uncons x of
+ Just ('\n',x') -> Just (T.span (/= '\n') x')
+ _ -> Nothing)
+ . ("\n" <>)
- dropSpace "" = Just ""
- dropSpace (' ':xs) = Just xs
- dropSpace _ = Nothing
+ dropSpace t = case T.uncons t of
+ Nothing -> Just ""
+ Just (' ',t') -> Just t'
+ _ -> Nothing
- block' = scan False p
+ block' = scan p False
where
p isNewline c
| isNewline && c == '@' = Nothing
@@ -745,11 +783,11 @@ codeblock =
| otherwise = Just $ c == '\n'
hyperlink :: Parser (DocH mod a)
-hyperlink = angleBracketLink <|> markdownLink <|> autoUrl
+hyperlink = choice' [ angleBracketLink, markdownLink, autoUrl ]
angleBracketLink :: Parser (DocH mod a)
angleBracketLink =
- DocHyperlink . makeLabeled Hyperlink . decodeUtf8
+ DocHyperlink . makeLabeled Hyperlink
<$> disallowNewline ("<" *> takeUntil ">")
markdownLink :: Parser (DocH mod a)
@@ -759,7 +797,7 @@ linkParser :: Parser Hyperlink
linkParser = flip Hyperlink <$> label <*> (whitespace *> url)
where
label :: Parser (Maybe String)
- label = Just . strip . decode <$> ("[" *> takeUntil "]")
+ label = Just . decode . T.strip <$> ("[" *> takeUntil "]")
whitespace :: Parser ()
whitespace = skipHorizontalSpace <* optional ("\n" *> skipHorizontalSpace)
@@ -770,19 +808,25 @@ linkParser = flip Hyperlink <$> label <*> (whitespace *> url)
rejectWhitespace :: MonadPlus m => m String -> m String
rejectWhitespace = mfilter (all (not . isSpace))
- decode :: BS.ByteString -> String
- decode = removeEscapes . decodeUtf8
+ decode :: Text -> String
+ decode = T.unpack . removeEscapes
-- | Looks for URL-like things to automatically hyperlink even if they
-- weren't marked as links.
autoUrl :: Parser (DocH mod a)
autoUrl = mkLink <$> url
where
- url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
- mkLink :: BS.ByteString -> DocH mod a
- mkLink s = case unsnoc s of
- Just (xs, x) | inClass ",.!?" x -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) `docAppend` DocString [x]
- _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
+ url = mappend <$> choice' [ "http://", "https://", "ftp://"] <*> takeWhile1 (Parsec.satisfy (not . isSpace))
+
+ mkLink :: Text -> DocH mod a
+ mkLink s = case T.unsnoc s of
+ Just (xs,x) | x `elem` (",.!?" :: String) -> DocHyperlink (mkHyperlink xs) `docAppend` DocString [x]
+ _ -> DocHyperlink (mkHyperlink s)
+
+ mkHyperlink :: Text -> Hyperlink
+ mkHyperlink lnk = Hyperlink (T.unpack lnk) Nothing
+
+
-- | Parses strings between identifier delimiters. Consumes all input that it
-- deems to be valid in an identifier. Note that it simply blindly consumes
@@ -790,14 +834,14 @@ autoUrl = mkLink <$> url
parseValid :: Parser String
parseValid = p some
where
- idChar = satisfyUnicode (\c -> isAlphaNum c || isSymbolChar c || c == '_')
+ idChar = Parsec.satisfy (\c -> isAlphaNum c || isSymbolChar c || c == '_')
p p' = do
vs <- p' idChar
c <- peekChar'
case c of
'`' -> return vs
- '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> p many') <|> return vs
+ '\'' -> choice' [ (\x -> vs ++ "'" ++ x) <$> ("'" *> p many), return vs ]
_ -> fail "outofvalid"
-- | Parses identifiers with help of 'parseValid'. Asks GHC for
@@ -809,4 +853,4 @@ identifier = do
e <- idDelim
return $ DocIdentifier (o, vid, e)
where
- idDelim = satisfy (\c -> c == '\'' || c == '`')
+ idDelim = Parsec.satisfy (\c -> c == '\'' || c == '`')
diff --git a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs
index ff6101da..585c76bb 100644
--- a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs
+++ b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs
@@ -1,48 +1,24 @@
-{-# LANGUAGE GeneralizedNewtypeDeriving, TypeFamilies #-}
-module Documentation.Haddock.Parser.Monad (
- module Documentation.Haddock.Parser.Monad
-, Attoparsec.isDigit
-, Attoparsec.isDigit_w8
-, Attoparsec.isAlpha_iso8859_15
-, Attoparsec.isAlpha_ascii
-, Attoparsec.isSpace
-, Attoparsec.isSpace_w8
-, Attoparsec.inClass
-, Attoparsec.notInClass
-, Attoparsec.isEndOfLine
-, Attoparsec.isHorizontalSpace
-, Attoparsec.choice
-, Attoparsec.count
-, Attoparsec.option
-, Attoparsec.many'
-, Attoparsec.many1
-, Attoparsec.many1'
-, Attoparsec.manyTill
-, Attoparsec.manyTill'
-, Attoparsec.sepBy
-, Attoparsec.sepBy'
-, Attoparsec.sepBy1
-, Attoparsec.sepBy1'
-, Attoparsec.skipMany
-, Attoparsec.skipMany1
-, Attoparsec.eitherP
-) where
-
-import Control.Applicative
-import Control.Monad
-import Data.String
-import Data.ByteString (ByteString, length)
-import qualified Data.ByteString.Lazy as LB
-import qualified Data.Attoparsec.ByteString.Char8 as Attoparsec
-import qualified Data.Attoparsec.Combinator as Attoparsec
-import Control.Monad.Trans.State
-import qualified Control.Monad.Trans.Class as Trans
-import Data.Word
-import Data.Bits
-import Data.Tuple
-
-import Documentation.Haddock.Types (Version)
-import Documentation.Haddock.Utf8 (encodeUtf8, decodeUtf8)
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE TypeSynonymInstances #-}
+
+module Documentation.Haddock.Parser.Monad where
+
+import qualified Text.Parsec.Char as Parsec
+import qualified Text.Parsec as Parsec
+
+import qualified Data.Text as T
+import Data.Text ( Text )
+
+import Data.String ( IsString(..) )
+import Data.Bits ( Bits(..) )
+import Data.Char ( ord )
+import Data.List ( foldl' )
+
+import Documentation.Haddock.Types ( Version )
newtype ParserState = ParserState {
parserStateSince :: Maybe Version
@@ -51,120 +27,65 @@ newtype ParserState = ParserState {
initialParserState :: ParserState
initialParserState = ParserState Nothing
-newtype Parser a = Parser (StateT ParserState Attoparsec.Parser a)
- deriving (Functor, Applicative, Alternative, Monad, MonadPlus)
-
-instance (a ~ ByteString) => IsString (Parser a) where
- fromString = lift . fromString
-
-parseOnly :: Parser a -> ByteString -> Either String (ParserState, a)
-parseOnly (Parser p) = fmap swap . Attoparsec.parseOnly (runStateT p initialParserState)
+setSince :: Version -> Parser ()
+setSince since = Parsec.modifyState (\st -> st {parserStateSince = Just since})
-lift :: Attoparsec.Parser a -> Parser a
-lift = Parser . Trans.lift
+type Parser = Parsec.Parsec Text ParserState
-setParserState :: ParserState -> Parser ()
-setParserState = Parser . put
+instance (a ~ Text) => IsString (Parser a) where
+ fromString = fmap T.pack . Parsec.string
-setSince :: Version -> Parser ()
-setSince since = Parser $ modify (\st -> st {parserStateSince = Just since})
-
-char :: Char -> Parser Char
-char = lift . Attoparsec.char
-
-char8 :: Char -> Parser Word8
-char8 = lift . Attoparsec.char8
-
--- | Peek a unicode character and return the number of bytes that it took up
-peekUnicode :: Parser (Char, Int)
-peekUnicode = lift $ Attoparsec.lookAhead $ do
-
- -- attoparsec's take fails on shorter inputs rather than truncate
- bs <- Attoparsec.choice (map Attoparsec.take [4,3,2,1])
-
- let c = head . decodeUtf8 $ bs
- n = Data.ByteString.length . encodeUtf8 $ [c]
- pure (c, fromIntegral n)
-
--- | Like 'satisfy', but consuming a unicode character
-satisfyUnicode :: (Char -> Bool) -> Parser Char
-satisfyUnicode predicate = do
- (c,n) <- peekUnicode
- if predicate c
- then Documentation.Haddock.Parser.Monad.take n *> pure c
- else fail "satsifyUnicode"
-
-anyChar :: Parser Char
-anyChar = lift Attoparsec.anyChar
-
-notChar :: Char -> Parser Char
-notChar = lift . Attoparsec.notChar
-
-satisfy :: (Char -> Bool) -> Parser Char
-satisfy = lift . Attoparsec.satisfy
+parseOnly :: Parser a -> Text -> Either String (ParserState, a)
+parseOnly p t = case Parsec.runParser p' initialParserState "<haddock>" t of
+ Left e -> Left (show e)
+ Right (x,s) -> Right (s,x)
+ where p' = (,) <$> p <*> Parsec.getState
+-- | Always succeeds, but returns 'Nothing' if at the end of input. Does not
+-- consume input.
peekChar :: Parser (Maybe Char)
-peekChar = lift Attoparsec.peekChar
+peekChar = Parsec.optionMaybe . Parsec.try . Parsec.lookAhead $ Parsec.anyChar
+-- | Fails if at the end of input. Does not consume input.
peekChar' :: Parser Char
-peekChar' = lift Attoparsec.peekChar'
-
-digit :: Parser Char
-digit = lift Attoparsec.digit
-
-letter_iso8859_15 :: Parser Char
-letter_iso8859_15 = lift Attoparsec.letter_iso8859_15
-
-letter_ascii :: Parser Char
-letter_ascii = lift Attoparsec.letter_ascii
-
-space :: Parser Char
-space = lift Attoparsec.space
-
-string :: ByteString -> Parser ByteString
-string = lift . Attoparsec.string
-
-stringCI :: ByteString -> Parser ByteString
-stringCI = lift . Attoparsec.stringCI
-
-skipSpace :: Parser ()
-skipSpace = lift Attoparsec.skipSpace
-
-skipWhile :: (Char -> Bool) -> Parser ()
-skipWhile = lift . Attoparsec.skipWhile
-
-take :: Int -> Parser ByteString
-take = lift . Attoparsec.take
-
-scan :: s -> (s -> Char -> Maybe s) -> Parser ByteString
-scan s = lift . Attoparsec.scan s
-
-takeWhile :: (Char -> Bool) -> Parser ByteString
-takeWhile = lift . Attoparsec.takeWhile
-
-takeWhile1 :: (Char -> Bool) -> Parser ByteString
-takeWhile1 = lift . Attoparsec.takeWhile1
-
-takeTill :: (Char -> Bool) -> Parser ByteString
-takeTill = lift . Attoparsec.takeTill
-
-takeByteString :: Parser ByteString
-takeByteString = lift Attoparsec.takeByteString
-
-takeLazyByteString :: Parser LB.ByteString
-takeLazyByteString = lift Attoparsec.takeLazyByteString
-
-endOfLine :: Parser ()
-endOfLine = lift Attoparsec.endOfLine
-
+peekChar' = Parsec.lookAhead Parsec.anyChar
+
+-- | Parses the given string. Returns the parsed string.
+string :: Text -> Parser Text
+string t = Parsec.string (T.unpack t) *> pure t
+
+-- | Scan the input text, accumulating characters as long as the scanning
+-- function returns true.
+scan :: (s -> Char -> Maybe s) -- ^ scan function
+ -> s -- ^ initial state
+ -> Parser Text
+scan f = fmap T.pack . go
+ where go s1 = do { cOpt <- peekChar
+ ; case cOpt >>= f s1 of
+ Nothing -> pure ""
+ Just s2 -> (:) <$> Parsec.anyChar <*> go s2
+ }
+
+-- | Apply a parser for a character zero or more times and collect the result in
+-- a string.
+takeWhile :: Parser Char -> Parser Text
+takeWhile = fmap T.pack . Parsec.many
+
+-- | Apply a parser for a character one or more times and collect the result in
+-- a string.
+takeWhile1 :: Parser Char -> Parser Text
+takeWhile1 = fmap T.pack . Parsec.many1
+
+-- | Parse a decimal number.
decimal :: Integral a => Parser a
-decimal = lift Attoparsec.decimal
+decimal = foldl' step 0 `fmap` Parsec.many1 Parsec.digit
+ where step a c = a * 10 + fromIntegral (ord c - 48)
+-- | Parse a hexadecimal number.
hexadecimal :: (Integral a, Bits a) => Parser a
-hexadecimal = lift Attoparsec.hexadecimal
-
-endOfInput :: Parser ()
-endOfInput = lift Attoparsec.endOfInput
-
-atEnd :: Parser Bool
-atEnd = lift Attoparsec.atEnd
+hexadecimal = foldl' step 0 `fmap` Parsec.many1 Parsec.hexDigit
+ where
+ step a c | w >= 48 && w <= 57 = (a `shiftL` 4) .|. fromIntegral (w - 48)
+ | w >= 97 = (a `shiftL` 4) .|. fromIntegral (w - 87)
+ | otherwise = (a `shiftL` 4) .|. fromIntegral (w - 55)
+ where w = ord c
diff --git a/haddock-library/src/Documentation/Haddock/Parser/Util.hs b/haddock-library/src/Documentation/Haddock/Parser/Util.hs
index ab5e5e9e..ffa91b09 100644
--- a/haddock-library/src/Documentation/Haddock/Parser/Util.hs
+++ b/haddock-library/src/Documentation/Haddock/Parser/Util.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE CPP #-}
+{-# LANGUAGE OverloadedStrings #-}
-- |
-- Module : Documentation.Haddock.Parser.Util
-- Copyright : (c) Mateusz Kowalczyk 2013-2014,
@@ -11,62 +11,59 @@
--
-- Various utility functions used by the parser.
module Documentation.Haddock.Parser.Util (
- unsnoc
-, strip
-, takeUntil
-, removeEscapes
-, makeLabeled
-, takeHorizontalSpace
-, skipHorizontalSpace
+ takeUntil,
+ removeEscapes,
+ makeLabeled,
+ takeHorizontalSpace,
+ skipHorizontalSpace,
) where
+import qualified Text.Parsec as Parsec
+
+import qualified Data.Text as T
+import Data.Text (Text)
+
import Control.Applicative
import Control.Monad (mfilter)
-import Documentation.Haddock.Parser.Monad hiding (isHorizontalSpace)
-import Data.ByteString.Char8 (ByteString)
-import qualified Data.ByteString.Char8 as BS
+import Documentation.Haddock.Parser.Monad
import Prelude hiding (takeWhile)
-#if MIN_VERSION_bytestring(0,10,2)
-import Data.ByteString.Char8 (unsnoc)
-#else
-unsnoc :: ByteString -> Maybe (ByteString, Char)
-unsnoc bs
- | BS.null bs = Nothing
- | otherwise = Just (BS.init bs, BS.last bs)
-#endif
+import Data.Char (isSpace)
--- | Remove all leading and trailing whitespace
-strip :: String -> String
-strip = (\f -> f . f) $ dropWhile isSpace . reverse
-
-isHorizontalSpace :: Char -> Bool
-isHorizontalSpace = inClass " \t\f\v\r"
+-- | Characters that count as horizontal space
+horizontalSpace :: [Char]
+horizontalSpace = " \t\f\v\r"
+-- | Skip and ignore leading horizontal space
skipHorizontalSpace :: Parser ()
-skipHorizontalSpace = skipWhile isHorizontalSpace
+skipHorizontalSpace = Parsec.skipMany (Parsec.oneOf horizontalSpace)
-takeHorizontalSpace :: Parser BS.ByteString
-takeHorizontalSpace = takeWhile isHorizontalSpace
+-- | Take leading horizontal space
+takeHorizontalSpace :: Parser Text
+takeHorizontalSpace = takeWhile (Parsec.oneOf horizontalSpace)
-makeLabeled :: (String -> Maybe String -> a) -> String -> a
-makeLabeled f input = case break isSpace $ removeEscapes $ strip input of
- (uri, "") -> f uri Nothing
- (uri, label) -> f uri (Just $ dropWhile isSpace label)
+makeLabeled :: (String -> Maybe String -> a) -> Text -> a
+makeLabeled f input = case T.break isSpace $ removeEscapes $ T.strip input of
+ (uri, "") -> f (T.unpack uri) Nothing
+ (uri, label) -> f (T.unpack uri) (Just . T.unpack $ T.stripStart label)
-- | Remove escapes from given string.
--
-- Only do this if you do not process (read: parse) the input any further.
-removeEscapes :: String -> String
-removeEscapes "" = ""
-removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs
-removeEscapes ('\\':xs) = removeEscapes xs
-removeEscapes (x:xs) = x : removeEscapes xs
+removeEscapes :: Text -> Text
+removeEscapes = T.unfoldr go
+ where
+ go :: Text -> Maybe (Char, Text)
+ go xs = case T.uncons xs of
+ Just ('\\',ys) -> T.uncons ys
+ unconsed -> unconsed
-takeUntil :: ByteString -> Parser ByteString
-takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome
+-- | Consume characters from the input up to and including the given pattern.
+-- Return everything consumed except for the end pattern itself.
+takeUntil :: Text -> Parser Text
+takeUntil end_ = T.dropEnd (T.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome
where
- end = BS.unpack end_
+ end = T.unpack end_
p :: (Bool, String) -> Char -> Maybe (Bool, String)
p acc c = case acc of
@@ -75,9 +72,8 @@ takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome
(_, x:xs) | x == c -> Just (False, xs)
_ -> Just (c == '\\', end)
- dropEnd = BS.reverse . BS.drop (length end) . BS.reverse
- requireEnd = mfilter (BS.isSuffixOf end_)
+ requireEnd = mfilter (T.isSuffixOf end_)
gotSome xs
- | BS.null xs = fail "didn't get any content"
+ | T.null xs = fail "didn't get any content"
| otherwise = return xs