diff options
author | Alec Theriault <alec.theriault@gmail.com> | 2019-02-06 01:01:41 -0800 |
---|---|---|
committer | Alec Theriault <alec.theriault@gmail.com> | 2019-02-25 00:42:46 -0800 |
commit | a5199600c39d25d7b71dcb2328000c1c49ad95a2 (patch) | |
tree | 787057c0315d1adf98cab3769ad47b63cb3c0a94 /haddock-library/src/Documentation/Haddock/Parser.hs | |
parent | dd47029cb29c80b1ab4db520c9c2ce4dca37f833 (diff) |
Better identifier parsing
* '(<|>)' and '`elem`' now get parsed and rendered properly as links
* 'DbModule'/'DbUnitId' now properly get split apart into two links
* tuple names now get parsed properly
* some more small niceties...
The identifier parsing code is more precise and more efficient (although to be
fair: it is also longer and in its own module). On the rendering side, we need
to pipe through information about backticks/parens/neither all the way through
from renaming to the backends.
In terms of impact: a total of 35 modules in the entirety of the bootlib + ghc
lib docs change. The only "regression" is things like '\0'. These should be
changed to @\\0@ (the path by which this previously worked seems accidental).
Diffstat (limited to 'haddock-library/src/Documentation/Haddock/Parser.hs')
-rw-r--r-- | haddock-library/src/Documentation/Haddock/Parser.hs | 63 |
1 files changed, 4 insertions, 59 deletions
diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs index e9b1c496..36c8bb5b 100644 --- a/haddock-library/src/Documentation/Haddock/Parser.hs +++ b/haddock-library/src/Documentation/Haddock/Parser.hs @@ -27,8 +27,7 @@ module Documentation.Haddock.Parser ( import Control.Applicative import Control.Arrow (first) import Control.Monad -import Data.Char (chr, isUpper, isAlpha, isAlphaNum, isSpace) -import Data.Foldable (asum) +import Data.Char (chr, isUpper, isAlpha, isSpace) import Data.List (intercalate, unfoldr, elemIndex) import Data.Maybe (fromMaybe, mapMaybe) import Data.Monoid @@ -37,6 +36,7 @@ import Documentation.Haddock.Doc import Documentation.Haddock.Markup ( markup, plainMarkup ) import Documentation.Haddock.Parser.Monad import Documentation.Haddock.Parser.Util +import Documentation.Haddock.Parser.Identifier import Documentation.Haddock.Types import Prelude hiding (takeWhile) import qualified Prelude as P @@ -47,37 +47,10 @@ import Text.Parsec (try) import qualified Data.Text as T import Data.Text (Text) -#if MIN_VERSION_base(4,9,0) -import Text.Read.Lex (isSymbolChar) -#else -import Data.Char (GeneralCategory (..), - generalCategory) -#endif -- $setup -- >>> :set -XOverloadedStrings -#if !MIN_VERSION_base(4,9,0) --- inlined from base-4.10.0.0 -isSymbolChar :: Char -> Bool -isSymbolChar c = not (isPuncChar c) && case generalCategory c of - MathSymbol -> True - CurrencySymbol -> True - ModifierSymbol -> True - OtherSymbol -> True - DashPunctuation -> True - OtherPunctuation -> c `notElem` ("'\"" :: String) - ConnectorPunctuation -> c /= '_' - _ -> False - where - -- | The @special@ character class as defined in the Haskell Report. - isPuncChar :: Char -> Bool - isPuncChar = (`elem` (",;()[]{}`" :: String)) -#endif - --- | Identifier string surrounded with opening and closing quotes/backticks. -data Identifier = Identifier !Namespace !Char String !Char - -- | Drops the quotes/backticks around all identifiers, as if they -- were valid but still 'String's. toRegular :: DocH mod Identifier -> DocH mod String @@ -838,34 +811,6 @@ autoUrl = mkLink <$> url mkHyperlink lnk = Hyperlink (T.unpack lnk) Nothing - --- | Parses strings between identifier delimiters. Consumes all input that it --- deems to be valid in an identifier. Note that it simply blindly consumes --- characters and does no actual validation itself. -parseValid :: Parser String -parseValid = p some - where - idChar = Parsec.satisfy (\c -> isAlphaNum c || isSymbolChar c || c == '_') - - p p' = do - vs <- p' idChar - c <- peekChar' - case c of - '`' -> return vs - '\'' -> choice' [ (\x -> vs ++ "'" ++ x) <$> ("'" *> p many), return vs ] - _ -> fail "outofvalid" - --- | Parses identifiers with help of 'parseValid'. Asks GHC for --- 'String' from the string it deems valid. +-- | Parses identifiers with help of 'parseValid'. identifier :: Parser (DocH mod Identifier) -identifier = do - ns <- asum [ Value <$ Parsec.char 'v' - , Type <$ Parsec.char 't' - , pure None - ] - o <- idDelim - vid <- parseValid - e <- idDelim - return $ DocIdentifier (Identifier ns o vid e) - where - idDelim = Parsec.oneOf "'`" +identifier = DocIdentifier <$> parseValid |