diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Documentation/Haddock.hs | 3 | ||||
-rw-r--r-- | src/Haddock.hs | 2 | ||||
-rw-r--r-- | src/Haddock/Doc.hs | 51 | ||||
-rw-r--r-- | src/Haddock/Interface/LexParseRn.hs | 20 | ||||
-rw-r--r-- | src/Haddock/Interface/ParseModuleHeader.hs | 2 | ||||
-rw-r--r-- | src/Haddock/Parser.hs | 529 | ||||
-rw-r--r-- | src/Haddock/Parser/Util.hs | 28 | ||||
-rw-r--r-- | src/Haddock/Types.hs | 58 | ||||
-rw-r--r-- | src/Haddock/Utf8.hs | 74 |
9 files changed, 52 insertions, 715 deletions
diff --git a/src/Documentation/Haddock.hs b/src/Documentation/Haddock.hs index 36115a2a..655a9723 100644 --- a/src/Documentation/Haddock.hs +++ b/src/Documentation/Haddock.hs @@ -33,7 +33,8 @@ module Documentation.Haddock ( InstHead, -- * Documentation comments - Doc(..), + Doc, + DocH(..), Example(..), Hyperlink(..), DocMarkup(..), diff --git a/src/Haddock.hs b/src/Haddock.hs index 66dfb168..78844c96 100644 --- a/src/Haddock.hs +++ b/src/Haddock.hs @@ -25,7 +25,7 @@ import Haddock.Backends.Xhtml.Themes (getThemes) import Haddock.Backends.LaTeX import Haddock.Backends.Hoogle import Haddock.Interface -import Haddock.Parser.Util +import Haddock.Parser import Haddock.Types import Haddock.Version import Haddock.InterfaceFile diff --git a/src/Haddock/Doc.hs b/src/Haddock/Doc.hs index d812aee2..79a59ac2 100644 --- a/src/Haddock/Doc.hs +++ b/src/Haddock/Doc.hs @@ -1,55 +1,18 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} -module Haddock.Doc ( - docAppend -, docParagraph -, combineDocumentation -) where +module Haddock.Doc ( module Documentation.Haddock.Doc + , docCodeBlock + , combineDocumentation + ) where import Data.Maybe import Data.Monoid +import Documentation.Haddock.Doc import Haddock.Types -import Data.Char (isSpace) - --- We put it here so that we can avoid a circular import --- anything relevant imports this module anyway -instance Monoid (DocH mod id) where - mempty = DocEmpty - mappend = docAppend combineDocumentation :: Documentation name -> Maybe (Doc name) combineDocumentation (Documentation Nothing Nothing) = Nothing -combineDocumentation (Documentation mDoc mWarning) = Just (fromMaybe mempty mWarning `mappend` fromMaybe mempty mDoc) - -docAppend :: DocH mod id -> DocH mod id -> DocH mod id -docAppend (DocDefList ds1) (DocDefList ds2) = DocDefList (ds1++ds2) -docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d) = DocAppend (DocDefList (ds1++ds2)) d -docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 ++ ds2) -docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 ++ ds2) -docAppend DocEmpty d = d -docAppend d DocEmpty = d -docAppend (DocString s1) (DocString s2) = DocString (s1 ++ s2) -docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 ++ s2)) -docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 ++ s2)) d -docAppend d1 d2 = DocAppend d1 d2 - --- again to make parsing easier - we spot a paragraph whose only item --- is a DocMonospaced and make it into a DocCodeBlock -docParagraph :: DocH mod id -> DocH mod id -docParagraph (DocMonospaced p) - = DocCodeBlock (docCodeBlock p) -docParagraph (DocAppend (DocString s1) (DocMonospaced p)) - | all isSpace s1 - = DocCodeBlock (docCodeBlock p) -docParagraph (DocAppend (DocString s1) - (DocAppend (DocMonospaced p) (DocString s2))) - | all isSpace s1 && all isSpace s2 - = DocCodeBlock (docCodeBlock p) -docParagraph (DocAppend (DocMonospaced p) (DocString s2)) - | all isSpace s2 - = DocCodeBlock (docCodeBlock p) -docParagraph p - = DocParagraph p - +combineDocumentation (Documentation mDoc mWarning) = + Just (fromMaybe mempty mWarning <> fromMaybe mempty mDoc) -- Drop trailing whitespace from @..@ code blocks. Otherwise this: -- diff --git a/src/Haddock/Interface/LexParseRn.hs b/src/Haddock/Interface/LexParseRn.hs index 01276310..d9d4ae58 100644 --- a/src/Haddock/Interface/LexParseRn.hs +++ b/src/Haddock/Interface/LexParseRn.hs @@ -18,31 +18,29 @@ module Haddock.Interface.LexParseRn , processModuleHeader ) where -import qualified Data.IntSet as IS -import Haddock.Types -import Haddock.Parser.Util -import Haddock.Interface.ParseModuleHeader -import Haddock.Doc - import Control.Applicative +import Data.IntSet (toList) import Data.List import Data.Maybe +import Data.Monoid ((<>)) +import DynFlags (ExtensionFlag(..), languageExtensions) import FastString import GHC -import DynFlags (ExtensionFlag(..), languageExtensions) +import Haddock.Interface.ParseModuleHeader +import Haddock.Parser +import Haddock.Types import Name -import Outputable +import Outputable (showPpr) import RdrName processDocStrings :: DynFlags -> GlobalRdrEnv -> [HsDocString] -> ErrMsgM (Maybe (Doc Name)) processDocStrings dflags gre strs = do docs <- catMaybes <$> mapM (processDocStringParas dflags gre) strs - let doc = foldl' docAppend DocEmpty docs + let doc = foldl' (<>) DocEmpty docs case doc of DocEmpty -> return Nothing _ -> return (Just doc) - processDocStringParas :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name)) processDocStringParas = process parseParasMaybe @@ -86,7 +84,7 @@ processModuleHeader dflags gre safety mayStr = do let flags :: [ExtensionFlag] -- We remove the flags implied by the language setting and we display the language instead - flags = map toEnum (IS.toList $ extensionFlags dflags) \\ languageExtensions (language dflags) + flags = map toEnum (toList $ extensionFlags dflags) \\ languageExtensions (language dflags) return (hmi { hmi_safety = Just $ showPpr dflags safety , hmi_language = language dflags , hmi_extensions = flags diff --git a/src/Haddock/Interface/ParseModuleHeader.hs b/src/Haddock/Interface/ParseModuleHeader.hs index c155a83b..0be2511f 100644 --- a/src/Haddock/Interface/ParseModuleHeader.hs +++ b/src/Haddock/Interface/ParseModuleHeader.hs @@ -12,7 +12,7 @@ module Haddock.Interface.ParseModuleHeader (parseModuleHeader) where import Haddock.Types -import Haddock.Parser.Util +import Haddock.Parser import RdrName import DynFlags diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs index 1489ae84..720f442b 100644 --- a/src/Haddock/Parser.hs +++ b/src/Haddock/Parser.hs @@ -13,504 +13,35 @@ -- Stability : experimental -- Portability : portable -module Haddock.Parser ( parseString, parseParas - , overIdentifier, toRegular - , takeUntil +module Haddock.Parser ( module Documentation.Haddock.Parser + , parseParasMaybe + , parseStringMaybe + , parseIdent ) where -import Prelude hiding (takeWhile) -import Control.Arrow (first) -import Control.Monad (void, mfilter) -import Control.Applicative -import Data.Attoparsec.ByteString.Char8 hiding (parse, take, endOfLine) -import qualified Data.ByteString.Char8 as BS -import Data.Char (chr, isAsciiUpper) -import Data.List (stripPrefix, intercalate, unfoldr) -import Data.Maybe (fromMaybe) -import Data.Monoid -import Haddock.Doc -import Haddock.Types hiding (Doc) -import Haddock.Utf8 -import Data.ByteString.Char8 (ByteString) - --- | Identifier string surrounded with opening and closing quotes/backticks. -type IString = (Char, String, Char) - --- | Drops the quotes/backticks around all identifiers, as if they --- were valid but still 'String's. -toRegular :: DocH mod IString -> DocH mod String -toRegular = fmap (\(_, x, _) -> x) - --- | Maps over 'DocIdentifier's over 'String' with potentially failing --- conversion using user-supplied function. If the conversion fails, --- the identifier is deemed to not be valid and is treated as a --- regular string. -overIdentifier :: (String -> Maybe a) - -> DocH mod IString - -> DocH mod a -overIdentifier f d = g d - where - g (DocIdentifier (o, x, e)) = case f x of - Nothing -> DocString $ o : x ++ [e] - Just x' -> DocIdentifier x' - g DocEmpty = DocEmpty - g (DocAppend x x') = DocAppend (g x) (g x') - g (DocString x) = DocString x - g (DocParagraph x) = DocParagraph $ g x - g (DocIdentifierUnchecked x) = DocIdentifierUnchecked x - g (DocModule x) = DocModule x - g (DocWarning x) = DocWarning $ g x - g (DocEmphasis x) = DocEmphasis $ g x - g (DocMonospaced x) = DocMonospaced $ g x - g (DocBold x) = DocBold $ g x - g (DocUnorderedList x) = DocUnorderedList $ fmap g x - g (DocOrderedList x) = DocOrderedList $ fmap g x - g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x - g (DocCodeBlock x) = DocCodeBlock $ g x - g (DocHyperlink x) = DocHyperlink x - g (DocPic x) = DocPic x - g (DocAName x) = DocAName x - g (DocProperty x) = DocProperty x - g (DocExamples x) = DocExamples x - g (DocHeader (Header l x)) = DocHeader . Header l $ g x - -parse :: Parser a -> BS.ByteString -> a -parse p = either err id . parseOnly (p <* endOfInput) - where - err = error . ("Haddock.Parser.parse: " ++) - --- | Main entry point to the parser. Appends the newline character --- to the input string. -parseParas :: String -- ^ String to parse - -> DocH mod IString -parseParas = parse (p <* skipSpace) . encodeUtf8 . (++ "\n") - where - p :: Parser (DocH mod IString) - p = mconcat <$> paragraph `sepBy` many (skipHorizontalSpace *> "\n") - --- | Parse a text paragraph. Actually just a wrapper over 'parseStringBS' which --- drops leading whitespace and encodes the string to UTF8 first. -parseString :: String -> DocH mod IString -parseString = parseStringBS . encodeUtf8 . dropWhile isSpace - -parseStringBS :: BS.ByteString -> DocH mod IString -parseStringBS = parse p - where - p :: Parser (DocH mod IString) - p = mconcat <$> many (monospace <|> anchor <|> identifier - <|> moduleName <|> picture <|> hyperlink <|> autoUrl <|> bold - <|> emphasis <|> encodedChar <|> string' <|> skipSpecialChar) - --- | Parses and processes --- <https://en.wikipedia.org/wiki/Numeric_character_reference Numeric character references> --- --- >>> parseOnly encodedChar "ABC" --- Right (DocString "ABC") -encodedChar :: Parser (DocH mod a) -encodedChar = "&#" *> c <* ";" - where - c = DocString . return . chr <$> num - num = hex <|> decimal - hex = ("x" <|> "X") *> hexadecimal - -specialChar :: [Char] -specialChar = "_/<@\"&'`" - --- | Plain, regular parser for text. Called as one of the last parsers --- to ensure that we have already given a chance to more meaningful parsers --- before capturing their characers. -string' :: Parser (DocH mod a) -string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar) - where - unescape "" = "" - unescape ('\\':x:xs) = x : unescape xs - unescape (x:xs) = x : unescape xs - --- | Skips a single special character and treats it as a plain string. --- This is done to skip over any special characters belonging to other --- elements but which were not deemed meaningful at their positions. -skipSpecialChar :: Parser (DocH mod a) -skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar) - --- | Emphasis parser. --- --- >>> parseOnly emphasis "/Hello world/" --- Right (DocEmphasis (DocString "Hello world")) -emphasis :: Parser (DocH mod IString) -emphasis = DocEmphasis . parseStringBS <$> - mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/") - --- | Bold parser. --- --- >>> parseOnly bold "__Hello world__" --- Right (DocBold (DocString "Hello world")) -bold :: Parser (DocH mod IString) -bold = DocBold . parseStringBS <$> disallowNewline ("__" *> takeUntil "__") - -disallowNewline :: Parser BS.ByteString -> Parser BS.ByteString -disallowNewline = mfilter ('\n' `BS.notElem`) - --- | Like `takeWhile`, but unconditionally take escaped characters. -takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString -takeWhile_ p = scan False p_ - where - p_ escaped c - | escaped = Just False - | not $ p c = Nothing - | otherwise = Just (c == '\\') - --- | Like `takeWhile1`, but unconditionally take escaped characters. -takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString -takeWhile1_ = mfilter (not . BS.null) . takeWhile_ - --- | Text anchors to allow for jumping around the generated documentation. --- --- >>> parseOnly anchor "#Hello world#" --- Right (DocAName "Hello world") -anchor :: Parser (DocH mod a) -anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#") - --- | Monospaced strings. --- --- >>> parseOnly monospace "@cruel@" --- Right (DocMonospaced (DocString "cruel")) -monospace :: Parser (DocH mod IString) -monospace = DocMonospaced . parseStringBS <$> ("@" *> takeWhile1_ (/= '@') <* "@") - -moduleName :: Parser (DocH mod a) -moduleName = DocModule <$> (char '"' *> modid <* char '"') - where - modid = intercalate "." <$> conid `sepBy1` "." - conid = (:) - <$> satisfy isAsciiUpper - -- NOTE: According to Haskell 2010 we shouldd actually only - -- accept {small | large | digit | ' } here. But as we can't - -- match on unicode characters, this is currently not possible. - <*> (decodeUtf8 <$> takeWhile (`notElem` " .&[{}(=*)+]!#|@/;,^?\"\n")) - --- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify --- a title for the picture. --- --- >>> parseOnly picture "<<hello.png>>" --- Right (DocPic (Picture "hello.png" Nothing)) --- >>> parseOnly picture "<<hello.png world>>" --- Right (DocPic (Picture "hello.png" (Just "world"))) -picture :: Parser (DocH mod a) -picture = DocPic . makeLabeled Picture . decodeUtf8 - <$> disallowNewline ("<<" *> takeUntil ">>") - --- | Paragraph parser, called by 'parseParas'. -paragraph :: Parser (DocH mod IString) -paragraph = examples <|> skipSpace *> (list <|> birdtracks <|> codeblock - <|> property <|> header - <|> textParagraph) - -header :: Parser (DocH mod IString) -header = do - let psers = map (string . encodeUtf8 . concat . flip replicate "=") [6, 5 .. 1] - pser = foldl1 (<|>) psers - delim <- decodeUtf8 <$> pser - line <- skipHorizontalSpace *> nonEmptyLine >>= return . parseString - rest <- paragraph <|> return mempty - return $ docAppend (DocParagraph (DocHeader (Header (length delim) line))) rest - -textParagraph :: Parser (DocH mod IString) -textParagraph = docParagraph . parseString . intercalate "\n" <$> many1 nonEmptyLine - --- | List parser, called by 'paragraph'. -list :: Parser (DocH mod IString) -list = DocUnorderedList <$> unorderedList - <|> DocOrderedList <$> orderedList - <|> DocDefList <$> definitionList - --- | Parses unordered (bullet) lists. -unorderedList :: Parser [DocH mod IString] -unorderedList = ("*" <|> "-") *> innerList unorderedList - --- | Parses ordered lists (numbered or dashed). -orderedList :: Parser [DocH mod IString] -orderedList = (paren <|> dot) *> innerList orderedList - where - dot = (decimal :: Parser Int) <* "." - paren = "(" *> decimal <* ")" - --- | Generic function collecting any further lines belonging to the --- list entry and recursively collecting any further lists in the --- same paragraph. Usually used as --- --- > someListFunction = listBeginning *> innerList someListFunction -innerList :: Parser [DocH mod IString] -> Parser [DocH mod IString] -innerList item = do - c <- takeLine - (cs, items) <- more item - let contents = docParagraph . parseString . dropNLs . unlines $ c : cs - return $ case items of - Left p -> [contents `joinPara` p] - Right i -> contents : i - --- | Parses definition lists. -definitionList :: Parser [(DocH mod IString, DocH mod IString)] -definitionList = do - label <- "[" *> (parseStringBS <$> takeWhile1 (`notElem` "]\n")) <* "]" - c <- takeLine - (cs, items) <- more definitionList - let contents = parseString . dropNLs . unlines $ c : cs - return $ case items of - Left p -> [(label, contents `joinPara` p)] - Right i -> (label, contents) : i - --- | If possible, appends two 'Doc's under a 'DocParagraph' rather than --- outside of it. This allows to get structures like --- --- @DocParagraph (DocAppend … …)@ --- --- rather than --- --- @DocAppend (DocParagraph …) …@ -joinPara :: DocH mod id -> DocH mod id -> DocH mod id -joinPara (DocParagraph p) c = docParagraph $ docAppend p c -joinPara d p = docAppend d p - --- | Drops all trailing newlines. -dropNLs :: String -> String -dropNLs = reverse . dropWhile (== '\n') . reverse - --- | Main worker for 'innerList' and 'definitionList'. --- We need the 'Either' here to be able to tell in the respective functions --- whether we're dealing with the next list or a nested paragraph. -more :: Monoid a => Parser a - -> Parser ([String], Either (DocH mod IString) a) -more item = innerParagraphs <|> moreListItems item - <|> moreContent item <|> pure ([], Right mempty) - --- | Use by 'innerList' and 'definitionList' to parse any nested paragraphs. -innerParagraphs :: Parser ([String], Either (DocH mod IString) a) -innerParagraphs = (,) [] . Left <$> ("\n" *> indentedParagraphs) - --- | Attemps to fetch the next list if possibly. Used by 'innerList' and --- 'definitionList' to recursivly grab lists that aren't separated by a whole --- paragraph. -moreListItems :: Parser a - -> Parser ([String], Either (DocH mod IString) a) -moreListItems item = (,) [] . Right <$> (skipSpace *> item) - --- | Helper for 'innerList' and 'definitionList' which simply takes --- a line of text and attempts to parse more list content with 'more'. -moreContent :: Monoid a => Parser a - -> Parser ([String], Either (DocH mod IString) a) -moreContent item = first . (:) <$> nonEmptyLine <*> more item - --- | Runs the 'parseParas' parser on an indented paragraph. --- The indentation is 4 spaces. -indentedParagraphs :: Parser (DocH mod IString) -indentedParagraphs = parseParas . concat <$> dropFrontOfPara " " - --- | Grab as many fully indented paragraphs as we can. -dropFrontOfPara :: Parser BS.ByteString -> Parser [String] -dropFrontOfPara sp = do - currentParagraph <- some (sp *> takeNonEmptyLine) - followingParagraphs <- - skipHorizontalSpace *> nextPar -- we have more paragraphs to take - <|> skipHorizontalSpace *> nlList -- end of the ride, remember the newline - <|> endOfInput *> return [] -- nothing more to take at all - return (currentParagraph ++ followingParagraphs) - where - nextPar = (++) <$> nlList <*> dropFrontOfPara sp - nlList = "\n" *> return ["\n"] - -nonSpace :: BS.ByteString -> Parser BS.ByteString -nonSpace xs - | not $ any (not . isSpace) $ decodeUtf8 xs = fail "empty line" - | otherwise = return xs - --- | Takes a non-empty, not fully whitespace line. --- --- Doesn't discard the trailing newline. -takeNonEmptyLine :: Parser String -takeNonEmptyLine = do - (++ "\n") . decodeUtf8 <$> (takeWhile1 (/= '\n') >>= nonSpace) <* "\n" - -birdtracks :: Parser (DocH mod a) -birdtracks = DocCodeBlock . DocString . intercalate "\n" . stripSpace <$> many1 line - where - line = skipHorizontalSpace *> ">" *> takeLine - -stripSpace :: [String] -> [String] -stripSpace = fromMaybe <*> mapM strip' - where - strip' (' ':xs') = Just xs' - strip' "" = Just "" - strip' _ = Nothing - --- | Parses examples. Examples are a paragraph level entitity (separated by an empty line). --- Consecutive examples are accepted. -examples :: Parser (DocH mod a) -examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go) - where - go :: Parser [Example] - go = do - prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>" - expr <- takeLine - (rs, es) <- resultAndMoreExamples - return (makeExample prefix expr rs : es) - where - resultAndMoreExamples :: Parser ([String], [Example]) - resultAndMoreExamples = moreExamples <|> result <|> pure ([], []) - where - moreExamples :: Parser ([String], [Example]) - moreExamples = (,) [] <$> go - - result :: Parser ([String], [Example]) - result = first . (:) <$> nonEmptyLine <*> resultAndMoreExamples - - makeExample :: String -> String -> [String] -> Example - makeExample prefix expression res = - Example (strip expression) result - where - result = map (substituteBlankLine . tryStripPrefix) res - - tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs) - - substituteBlankLine "<BLANKLINE>" = "" - substituteBlankLine xs = xs - -nonEmptyLine :: Parser String -nonEmptyLine = mfilter (any (not . isSpace)) takeLine - -takeLine :: Parser String -takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine - -endOfLine :: Parser () -endOfLine = void "\n" <|> endOfInput - --- | Property parser. --- --- >>> parseOnly property "prop> hello world" --- Right (DocProperty "hello world") -property :: Parser (DocH mod a) -property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n')) - --- | --- Paragraph level codeblock. Anything between the two delimiting @ is parsed --- for markup. -codeblock :: Parser (DocH mod IString) -codeblock = - DocCodeBlock . parseStringBS . dropSpaces - <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@") - where - dropSpaces xs = - let rs = decodeUtf8 xs - in case splitByNl rs of - [] -> xs - ys -> case last ys of - ' ':_ -> case mapM dropSpace ys of - Nothing -> xs - Just zs -> encodeUtf8 $ intercalate "\n" zs - _ -> xs - - -- This is necessary because ‘lines’ swallows up a trailing newline - -- and we lose information about whether the last line belongs to @ or to - -- text which we need to decide whether we actually want to be dropping - -- anything at all. - splitByNl = unfoldr (\case '\n':s -> Just (span (/= '\n') s) - _ -> Nothing) - . ('\n' :) - - dropSpace "" = Just "" - dropSpace (' ':xs) = Just xs - dropSpace _ = Nothing - - block' = scan False p - where - p isNewline c - | isNewline && c == '@' = Nothing - | isNewline && isSpace c = Just isNewline - | otherwise = Just $ c == '\n' - -hyperlink :: Parser (DocH mod a) -hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8 - <$> disallowNewline ("<" *> takeUntil ">") - <|> autoUrl - -autoUrl :: Parser (DocH mod a) -autoUrl = mkLink <$> url - where - url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace) - mkLink :: BS.ByteString -> DocH mod a - mkLink s = case BS.unsnoc s of - Just (xs, x) | x `elem` ",.!?" -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x] - _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing) - --- | Parses strings between identifier delimiters. Consumes all input that it --- deems to be valid in an identifier. Note that it simply blindly consumes --- characters and does no actual validation itself. -parseValid :: Parser String -parseValid = do - vs' <- many' $ utf8String "⋆" <|> return <$> idChar - let vs = concat vs' - c <- peekChar - case c of - Just '`' -> return vs - Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid) - <|> return vs - _ -> fail "outofvalid" - where - idChar = satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:^") - <|> digit <|> letter_ascii - --- | Parses UTF8 strings from ByteString streams. -utf8String :: String -> Parser String -utf8String x = decodeUtf8 <$> string (encodeUtf8 x) - --- | Parses identifiers with help of 'parseValid'. Asks GHC for 'String' from the --- string it deems valid. -identifier :: Parser (DocH mod IString) -identifier = do - o <- idDelim - vid <- parseValid - e <- idDelim - return $ DocIdentifier (o, vid, e) - where - idDelim = char '\'' <|> char '`' - --- | Remove all leading and trailing whitespace -strip :: String -> String -strip = (\f -> f . f) $ dropWhile isSpace . reverse - -skipHorizontalSpace :: Parser () -skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r") - -takeHorizontalSpace :: Parser BS.ByteString -takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r") - -makeLabeled :: (String -> Maybe String -> a) -> String -> a -makeLabeled f input = case break isSpace $ removeEscapes $ strip input of - (uri, "") -> f uri Nothing - (uri, label) -> f uri (Just $ dropWhile isSpace label) - where - -- As we don't parse these any further, we don't do any processing to the - -- string so we at least remove escape character here. Perhaps we should - -- actually be parsing the label at the very least? - removeEscapes "" = "" - removeEscapes ('\\':'\\':xs) = '\\' : removeEscapes xs - removeEscapes ('\\':xs) = removeEscapes xs - removeEscapes (x:xs) = x : removeEscapes xs - -takeUntil :: ByteString -> Parser ByteString -takeUntil end_ = dropEnd <$> requireEnd (scan (False, end) p) >>= gotSome - where - end = BS.unpack end_ - - p :: (Bool, String) -> Char -> Maybe (Bool, String) - p acc c = case acc of - (True, _) -> Just (False, end) - (_, []) -> Nothing - (_, x:xs) | x == c -> Just (False, xs) - _ -> Just (c == '\\', end) - - dropEnd = BS.reverse . BS.drop (length end) . BS.reverse - requireEnd = mfilter (BS.isSuffixOf end_) - - gotSome xs - | BS.null xs = fail "didn't get any content" - | otherwise = return xs +import Documentation.Haddock.Parser +import DynFlags (DynFlags) +import FastString (mkFastString) +import Documentation.Haddock.Types +import Lexer (mkPState, unP, ParseResult(POk)) +import Parser (parseIdentifier) +import RdrName (RdrName) +import SrcLoc (mkRealSrcLoc, unLoc) +import StringBuffer (stringToStringBuffer) + +{-# DEPRECATED parseParasMaybe "use `parseParas` instead" #-} +parseParasMaybe :: DynFlags -> String -> Maybe (DocH mod RdrName) +parseParasMaybe d = Just . overIdentifier (parseIdent d) . parseParas + +{-# DEPRECATED parseStringMaybe "use `parseString` instead" #-} +parseStringMaybe :: DynFlags -> String -> Maybe (DocH mod RdrName) +parseStringMaybe d = Just . overIdentifier (parseIdent d) . parseString + +parseIdent :: DynFlags -> String -> Maybe RdrName +parseIdent dflags str0 = + let buffer = stringToStringBuffer str0 + realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0 + pstate = mkPState dflags buffer realSrcLc + in case unP parseIdentifier pstate of + POk _ name -> Just (unLoc name) + _ -> Nothing diff --git a/src/Haddock/Parser/Util.hs b/src/Haddock/Parser/Util.hs deleted file mode 100644 index 29da91c0..00000000 --- a/src/Haddock/Parser/Util.hs +++ /dev/null @@ -1,28 +0,0 @@ -module Haddock.Parser.Util where - -import DynFlags (DynFlags) -import FastString (mkFastString) -import Haddock.Types -import Haddock.Parser -import Lexer (mkPState, unP, ParseResult(POk)) -import Parser (parseIdentifier) -import RdrName (RdrName) -import SrcLoc (mkRealSrcLoc, unLoc) -import StringBuffer (stringToStringBuffer) - -{-# DEPRECATED parseParasMaybe "use `parseParas` instead" #-} -parseParasMaybe :: DynFlags -> String -> Maybe (Doc RdrName) -parseParasMaybe d = Just . overIdentifier (parseIdent d) . parseParas - -{-# DEPRECATED parseStringMaybe "use `parseString` instead" #-} -parseStringMaybe :: DynFlags -> String -> Maybe (Doc RdrName) -parseStringMaybe d = Just . overIdentifier (parseIdent d) . parseString - -parseIdent :: DynFlags -> String -> Maybe RdrName -parseIdent dflags str0 = - let buffer = stringToStringBuffer str0 - realSrcLc = mkRealSrcLoc (mkFastString "<unknown file>") 0 0 - pstate = mkPState dflags buffer realSrcLc - in case unP parseIdentifier pstate of - POk _ name -> Just (unLoc name) - _ -> Nothing diff --git a/src/Haddock/Types.hs b/src/Haddock/Types.hs index cd615bf4..85b3a592 100644 --- a/src/Haddock/Types.hs +++ b/src/Haddock/Types.hs @@ -19,16 +19,16 @@ module Haddock.Types ( module Haddock.Types , HsDocString, LHsDocString , Fixity(..) + , module Documentation.Haddock.Types ) where -import Data.Foldable -import Data.Traversable import Control.Exception import Control.Arrow hiding ((<+>)) import Control.DeepSeq import Data.Typeable import Data.Map (Map) import qualified Data.Map as Map +import Documentation.Haddock.Types import BasicTypes (Fixity(..)) import GHC hiding (NoLink) import DynFlags (ExtensionFlag, Language) @@ -316,36 +316,6 @@ type LDoc id = Located (Doc id) type Doc id = DocH (ModuleName, OccName) id -data DocH mod id - = DocEmpty - | DocAppend (DocH mod id) (DocH mod id) - | DocString String - | DocParagraph (DocH mod id) - | DocIdentifier id - | DocIdentifierUnchecked mod - | DocModule String - | DocWarning (DocH mod id) - | DocEmphasis (DocH mod id) - | DocMonospaced (DocH mod id) - | DocBold (DocH mod id) - | DocUnorderedList [DocH mod id] - | DocOrderedList [DocH mod id] - | DocDefList [(DocH mod id, DocH mod id)] - | DocCodeBlock (DocH mod id) - | DocHyperlink Hyperlink - | DocPic Picture - | DocAName String - | DocProperty String - | DocExamples [Example] - | DocHeader (Header (DocH mod id)) - deriving (Functor, Foldable, Traversable) - -instance Foldable Header where - foldMap f (Header _ a) = f a - -instance Traversable Header where - traverse f (Header l a) = Header l `fmap` f a - instance (NFData a, NFData mod) => NFData (DocH mod a) where rnf doc = case doc of @@ -376,23 +346,6 @@ instance NFData Name instance NFData OccName instance NFData ModuleName - -data Hyperlink = Hyperlink - { hyperlinkUrl :: String - , hyperlinkLabel :: Maybe String - } deriving (Eq, Show) - - -data Picture = Picture - { pictureUri :: String - , pictureTitle :: Maybe String - } deriving (Eq, Show) - -data Header id = Header - { headerLevel :: Int - , headerTitle :: id - } deriving Functor - instance NFData id => NFData (Header id) where rnf (Header a b) = a `deepseq` b `deepseq` () @@ -402,13 +355,6 @@ instance NFData Hyperlink where instance NFData Picture where rnf (Picture a b) = a `deepseq` b `deepseq` () - -data Example = Example - { exampleExpression :: String - , exampleResult :: [String] - } deriving (Eq, Show) - - instance NFData Example where rnf (Example a b) = a `deepseq` b `deepseq` () diff --git a/src/Haddock/Utf8.hs b/src/Haddock/Utf8.hs deleted file mode 100644 index 1fb0e818..00000000 --- a/src/Haddock/Utf8.hs +++ /dev/null @@ -1,74 +0,0 @@ -module Haddock.Utf8 (encodeUtf8, decodeUtf8) where -import Data.Bits ((.|.), (.&.), shiftL, shiftR) -import qualified Data.ByteString as BS -import Data.Char (chr, ord) -import Data.Word (Word8) - --- | Helper that encodes and packs a 'String' into a 'BS.ByteString' -encodeUtf8 :: String -> BS.ByteString -encodeUtf8 = BS.pack . encode - --- | Helper that unpacks and decodes a 'BS.ByteString' into a 'String' -decodeUtf8 :: BS.ByteString -> String -decodeUtf8 = decode . BS.unpack - --- Copy/pasted functions from Codec.Binary.UTF8.String for encoding/decoding --- | Character to use when 'encode' or 'decode' fail for a byte. -replacementCharacter :: Char -replacementCharacter = '\xfffd' - --- | Encode a Haskell String to a list of Word8 values, in UTF8 format. -encode :: String -> [Word8] -encode = concatMap (map fromIntegral . go . ord) - where - go oc - | oc <= 0x7f = [oc] - - | oc <= 0x7ff = [ 0xc0 + (oc `shiftR` 6) - , 0x80 + oc .&. 0x3f - ] - - | oc <= 0xffff = [ 0xe0 + (oc `shiftR` 12) - , 0x80 + ((oc `shiftR` 6) .&. 0x3f) - , 0x80 + oc .&. 0x3f - ] - | otherwise = [ 0xf0 + (oc `shiftR` 18) - , 0x80 + ((oc `shiftR` 12) .&. 0x3f) - , 0x80 + ((oc `shiftR` 6) .&. 0x3f) - , 0x80 + oc .&. 0x3f - ] - --- | Decode a UTF8 string packed into a list of Word8 values, directly to String -decode :: [Word8] -> String -decode [ ] = "" -decode (c:cs) - | c < 0x80 = chr (fromEnum c) : decode cs - | c < 0xc0 = replacementCharacter : decode cs - | c < 0xe0 = multi1 - | c < 0xf0 = multi_byte 2 0xf 0x800 - | c < 0xf8 = multi_byte 3 0x7 0x10000 - | c < 0xfc = multi_byte 4 0x3 0x200000 - | c < 0xfe = multi_byte 5 0x1 0x4000000 - | otherwise = replacementCharacter : decode cs - where - multi1 = case cs of - c1 : ds | c1 .&. 0xc0 == 0x80 -> - let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|. fromEnum (c1 .&. 0x3f) - in if d >= 0x000080 then toEnum d : decode ds - else replacementCharacter : decode ds - _ -> replacementCharacter : decode cs - - multi_byte :: Int -> Word8 -> Int -> String - multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask)) - where - aux 0 rs acc - | overlong <= acc && acc <= 0x10ffff && - (acc < 0xd800 || 0xdfff < acc) && - (acc < 0xfffe || 0xffff < acc) = chr acc : decode rs - | otherwise = replacementCharacter : decode rs - - aux n (r:rs) acc - | r .&. 0xc0 == 0x80 = aux (n-1) rs - $ shiftL acc 6 .|. fromEnum (r .&. 0x3f) - - aux _ rs _ = replacementCharacter : decode rs |