diff options
Diffstat (limited to 'haddock-library/src/Documentation/Haddock/Parser.hs')
| -rw-r--r-- | haddock-library/src/Documentation/Haddock/Parser.hs | 130 | 
1 files changed, 45 insertions, 85 deletions
| diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs index d79da40b..36c8bb5b 100644 --- a/haddock-library/src/Documentation/Haddock/Parser.hs +++ b/haddock-library/src/Documentation/Haddock/Parser.hs @@ -27,14 +27,16 @@ module Documentation.Haddock.Parser (  import           Control.Applicative  import           Control.Arrow (first)  import           Control.Monad -import           Data.Char (chr, isUpper, isAlpha, isAlphaNum, isSpace) -import           Data.List (intercalate, unfoldr, elemIndex, notElem) +import           Data.Char (chr, isUpper, isAlpha, isSpace) +import           Data.List (intercalate, unfoldr, elemIndex)  import           Data.Maybe (fromMaybe, mapMaybe)  import           Data.Monoid  import qualified Data.Set as Set  import           Documentation.Haddock.Doc +import           Documentation.Haddock.Markup ( markup, plainMarkup )  import           Documentation.Haddock.Parser.Monad  import           Documentation.Haddock.Parser.Util +import           Documentation.Haddock.Parser.Identifier  import           Documentation.Haddock.Types  import           Prelude hiding (takeWhile)  import qualified Prelude as P @@ -45,53 +47,26 @@ import           Text.Parsec (try)  import qualified Data.Text as T  import           Data.Text (Text) -#if MIN_VERSION_base(4,9,0) -import           Text.Read.Lex                      (isSymbolChar) -#else -import           Data.Char                          (GeneralCategory (..), -                                                     generalCategory) -#endif  -- $setup  -- >>> :set -XOverloadedStrings -#if !MIN_VERSION_base(4,9,0) --- inlined from base-4.10.0.0 -isSymbolChar :: Char -> Bool -isSymbolChar c = not (isPuncChar c) && case generalCategory c of -    MathSymbol           -> True -    CurrencySymbol       -> True -    ModifierSymbol       -> True -    OtherSymbol          -> True -    DashPunctuation      -> True -    OtherPunctuation     -> c `notElem` ("'\"" :: String) -    ConnectorPunctuation -> c /= '_' -    _                    -> False -  where -    -- | The @special@ character class as defined in the Haskell Report. -    isPuncChar :: Char -> Bool -    isPuncChar = (`elem` (",;()[]{}`" :: String)) -#endif - --- | Identifier string surrounded with opening and closing quotes/backticks. -type Identifier = (Char, String, Char) -  -- | Drops the quotes/backticks around all identifiers, as if they  -- were valid but still 'String's.  toRegular :: DocH mod Identifier -> DocH mod String -toRegular = fmap (\(_, x, _) -> x) +toRegular = fmap (\(Identifier _ _ x _) -> x)  -- | Maps over 'DocIdentifier's over 'String' with potentially failing  -- conversion using user-supplied function. If the conversion fails,  -- the identifier is deemed to not be valid and is treated as a  -- regular string. -overIdentifier :: (String -> Maybe a) +overIdentifier :: (Namespace -> String -> Maybe a)                 -> DocH mod Identifier                 -> DocH mod a  overIdentifier f d = g d    where -    g (DocIdentifier (o, x, e)) = case f x of -      Nothing -> DocString $ o : x ++ [e] +    g (DocIdentifier (Identifier ns o x e)) = case f ns x of +      Nothing -> DocString $ renderNs ns ++ [o] ++ x ++ [e]        Just x' -> DocIdentifier x'      g DocEmpty = DocEmpty      g (DocAppend x x') = DocAppend (g x) (g x') @@ -107,7 +82,7 @@ overIdentifier f d = g d      g (DocOrderedList x) = DocOrderedList $ fmap g x      g (DocDefList x) = DocDefList $ fmap (\(y, z) -> (g y, g z)) x      g (DocCodeBlock x) = DocCodeBlock $ g x -    g (DocHyperlink x) = DocHyperlink x +    g (DocHyperlink (Hyperlink u x)) = DocHyperlink (Hyperlink u (fmap g x))      g (DocPic x) = DocPic x      g (DocMathInline x) = DocMathInline x      g (DocMathDisplay x) = DocMathDisplay x @@ -301,13 +276,20 @@ mathInline = DocMathInline . T.unpack  -- >>> parseString "\\[\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}\\]"  -- DocMathDisplay "\\int_{-\\infty}^{\\infty} e^{-x^2/2} = \\sqrt{2\\pi}"  mathDisplay :: Parser (DocH mod a) -mathDisplay = DocMathDisplay . T.unpack  +mathDisplay = DocMathDisplay . T.unpack                <$> ("\\[" *> takeUntil "\\]") -markdownImage :: Parser (DocH mod a) -markdownImage = fromHyperlink <$> ("!" *> linkParser) +-- | Markdown image parser. As per the commonmark reference recommendation, the +-- description text for an image converted to its a plain string representation. +-- +-- >>> parseString "" +-- DocPic (Picture "www.site.com" (Just "some emphasis in a description")) +markdownImage :: Parser (DocH mod Identifier) +markdownImage = DocPic . fromHyperlink <$> ("!" *> linkParser)    where -    fromHyperlink (Hyperlink url label) = DocPic (Picture url label) +    fromHyperlink (Hyperlink u l) = Picture u (fmap (markup stringMarkup) l) +    stringMarkup = plainMarkup (const "") renderIdent +    renderIdent (Identifier ns l c r) = renderNs ns <> [l] <> c <> [r]  -- | Paragraph parser, called by 'parseParas'.  paragraph :: Parser (DocH mod Identifier) @@ -360,32 +342,34 @@ table = do      parseFirstRow :: Parser Text      parseFirstRow = do          skipHorizontalSpace -        -- upper-left corner is + -        c <- Parsec.char '+' -        cs <- some (Parsec.char '-' <|> Parsec.char '+') +        cs <- takeWhile (\c -> c == '-' || c == '+') -        -- upper right corner is + too -        guard (last cs == '+') +        -- upper-left and upper-right corners are `+` +        guard (T.length cs >= 2 && +               T.head cs == '+' && +               T.last cs == '+')          -- trailing space          skipHorizontalSpace          _ <- Parsec.newline -        return (T.cons c $ T.pack cs) +        return cs      parseRestRows :: Int -> Parser Text      parseRestRows l = do          skipHorizontalSpace +        bs <- scan predicate l -        c <- Parsec.char '|' <|> Parsec.char '+' -        bs <- scan predicate (l - 2) -        c2 <- Parsec.char '|' <|> Parsec.char '+' +        -- Left and right edges are `|` or `+` +        guard (T.length bs >= 2 && +               (T.head bs == '|' || T.head bs == '+') && +               (T.last bs == '|' || T.last bs == '+'))          -- trailing space          skipHorizontalSpace          _ <- Parsec.newline -        return (T.cons c (T.snoc bs c2)) +        return bs        where          predicate n c              | n <= 0    = Nothing @@ -662,7 +646,7 @@ nonSpace xs  --  Doesn't discard the trailing newline.  takeNonEmptyLine :: Parser Text  takeNonEmptyLine = do -    l <- takeWhile1 (Parsec.noneOf "\n") >>= nonSpace +    l <- takeWhile1 (/= '\n') >>= nonSpace      _ <- "\n"      pure (l <> "\n") @@ -732,7 +716,7 @@ nonEmptyLine :: Parser Text  nonEmptyLine = try (mfilter (T.any (not . isSpace)) takeLine)  takeLine :: Parser Text -takeLine = try (takeWhile (Parsec.noneOf "\n") <* endOfLine) +takeLine = try (takeWhile (/= '\n') <* endOfLine)  endOfLine :: Parser ()  endOfLine = void "\n" <|> Parsec.eof  @@ -742,7 +726,7 @@ endOfLine = void "\n" <|> Parsec.eof  -- >>> snd <$> parseOnly property "prop> hello world"  -- Right (DocProperty "hello world")  property :: Parser (DocH mod a) -property = DocProperty . T.unpack . T.strip <$> ("prop>" *> takeWhile1 (Parsec.noneOf "\n")) +property = DocProperty . T.unpack . T.strip <$> ("prop>" *> takeWhile1 (/= '\n'))  -- |  -- Paragraph level codeblock. Anything between the two delimiting \@ is parsed @@ -782,22 +766,22 @@ codeblock =            | isNewline && isSpace c = Just isNewline            | otherwise = Just $ c == '\n' -hyperlink :: Parser (DocH mod a) +hyperlink :: Parser (DocH mod Identifier)  hyperlink = choice' [ angleBracketLink, markdownLink, autoUrl ]  angleBracketLink :: Parser (DocH mod a)  angleBracketLink = -    DocHyperlink . makeLabeled Hyperlink  +    DocHyperlink . makeLabeled (\s -> Hyperlink s . fmap DocString)      <$> disallowNewline ("<" *> takeUntil ">") -markdownLink :: Parser (DocH mod a) +markdownLink :: Parser (DocH mod Identifier)  markdownLink = DocHyperlink <$> linkParser -linkParser :: Parser Hyperlink +linkParser :: Parser (Hyperlink (DocH mod Identifier))  linkParser = flip Hyperlink <$> label <*> (whitespace *> url)    where -    label :: Parser (Maybe String) -    label = Just . decode . T.strip <$> ("[" *> takeUntil "]") +    label :: Parser (Maybe (DocH mod Identifier)) +    label = Just . parseParagraph . T.strip <$> ("[" *> takeUntil "]")      whitespace :: Parser ()      whitespace = skipHorizontalSpace <* optional ("\n" *> skipHorizontalSpace) @@ -816,41 +800,17 @@ linkParser = flip Hyperlink <$> label <*> (whitespace *> url)  autoUrl :: Parser (DocH mod a)  autoUrl = mkLink <$> url    where -    url = mappend <$> choice' [ "http://", "https://", "ftp://"] <*> takeWhile1 (Parsec.satisfy (not . isSpace)) +    url = mappend <$> choice' [ "http://", "https://", "ftp://"] <*> takeWhile1 (not . isSpace)      mkLink :: Text -> DocH mod a      mkLink s = case T.unsnoc s of        Just (xs,x) | x `elem` (",.!?" :: String) -> DocHyperlink (mkHyperlink xs) `docAppend` DocString [x]        _ -> DocHyperlink (mkHyperlink s) -    mkHyperlink :: Text -> Hyperlink +    mkHyperlink :: Text -> Hyperlink (DocH mod a)      mkHyperlink lnk = Hyperlink (T.unpack lnk) Nothing - --- | Parses strings between identifier delimiters. Consumes all input that it --- deems to be valid in an identifier. Note that it simply blindly consumes --- characters and does no actual validation itself. -parseValid :: Parser String -parseValid = p some -  where -    idChar = Parsec.satisfy (\c -> isAlphaNum c || isSymbolChar c || c == '_') - -    p p' = do -      vs <- p' idChar -      c <- peekChar' -      case c of -        '`' -> return vs -        '\'' -> choice' [ (\x -> vs ++ "'" ++ x) <$> ("'" *> p many), return vs ] -        _ -> fail "outofvalid" - --- | Parses identifiers with help of 'parseValid'. Asks GHC for --- 'String' from the string it deems valid. +-- | Parses identifiers with help of 'parseValid'.  identifier :: Parser (DocH mod Identifier) -identifier = do -  o <- idDelim -  vid <- parseValid -  e <- idDelim -  return $ DocIdentifier (o, vid, e) -  where -    idDelim = Parsec.satisfy (\c -> c == '\'' || c == '`') +identifier = DocIdentifier <$> parseValid | 
