diff options
author | Alec Theriault <alec.theriault@gmail.com> | 2019-02-06 01:01:41 -0800 |
---|---|---|
committer | Alec Theriault <alec.theriault@gmail.com> | 2019-02-25 00:42:46 -0800 |
commit | a5199600c39d25d7b71dcb2328000c1c49ad95a2 (patch) | |
tree | 787057c0315d1adf98cab3769ad47b63cb3c0a94 | |
parent | dd47029cb29c80b1ab4db520c9c2ce4dca37f833 (diff) |
Better identifier parsing
* '(<|>)' and '`elem`' now get parsed and rendered properly as links
* 'DbModule'/'DbUnitId' now properly get split apart into two links
* tuple names now get parsed properly
* some more small niceties...
The identifier parsing code is more precise and more efficient (although to be
fair: it is also longer and in its own module). On the rendering side, we need
to pipe through information about backticks/parens/neither all the way through
from renaming to the backends.
In terms of impact: a total of 35 modules in the entirety of the bootlib + ghc
lib docs change. The only "regression" is things like '\0'. These should be
changed to @\\0@ (the path by which this previously worked seems accidental).
21 files changed, 679 insertions, 135 deletions
diff --git a/doc/markup.rst b/doc/markup.rst index 48a6f4ad..56238855 100644 --- a/doc/markup.rst +++ b/doc/markup.rst @@ -932,14 +932,9 @@ necessary to escape the single quote when used as an apostrophe: :: Nothing special is needed to hyperlink identifiers which contain apostrophes themselves: to hyperlink ``foo'`` one would simply type -``'foo''``. Hyperlinking operators works in exactly the same way. +``'foo''``. Hyperlinking operators works in exactly the same way. :: -Note that it is not possible to directly hyperlink an identifier in infix -form or an operator in prefix form. The next best thing to do is to wrap -the whole identifier in monospaced text and put the parentheses/backticks -outside of the identifier, but inside the link: :: - - -- | A prefix operator @('++')@ and an infix identifier @\``elem`\`@. + -- | A prefix operator @'(++)'@ and an infix identifier @'`elem`'@. Emphasis, Bold and Monospaced Text ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/haddock-api/src/Haddock.hs b/haddock-api/src/Haddock.hs index 1378c173..3e0332b5 100644 --- a/haddock-api/src/Haddock.hs +++ b/haddock-api/src/Haddock.hs @@ -663,7 +663,7 @@ getPrologue dflags flags = h <- openFile filename ReadMode hSetEncoding h utf8 str <- hGetContents h -- semi-closes the handle - return . Just $! second rdrName $ parseParas dflags Nothing str + return . Just $! second (fmap rdrName) $ parseParas dflags Nothing str _ -> throwE "multiple -p/--prologue options" diff --git a/haddock-api/src/Haddock/Backends/Hoogle.hs b/haddock-api/src/Haddock/Backends/Hoogle.hs index 9e3186e5..f581c01a 100644 --- a/haddock-api/src/Haddock/Backends/Hoogle.hs +++ b/haddock-api/src/Haddock/Backends/Hoogle.hs @@ -334,7 +334,7 @@ markupTag dflags = Markup { markupString = str, markupAppend = (++), markupIdentifier = box (TagInline "a") . str . out dflags, - markupIdentifierUnchecked = box (TagInline "a") . str . out dflags . snd, + markupIdentifierUnchecked = box (TagInline "a") . str . showWrapped (out dflags . snd), markupModule = box (TagInline "a") . str, markupWarning = box (TagInline "i"), markupEmphasis = box (TagInline "i"), diff --git a/haddock-api/src/Haddock/Backends/LaTeX.hs b/haddock-api/src/Haddock/Backends/LaTeX.hs index d0752506..85769b13 100644 --- a/haddock-api/src/Haddock/Backends/LaTeX.hs +++ b/haddock-api/src/Haddock/Backends/LaTeX.hs @@ -1106,8 +1106,8 @@ ppSymName name | otherwise = ppName name -ppVerbOccName :: OccName -> LaTeX -ppVerbOccName = text . latexFilter . occNameString +ppVerbOccName :: Wrap OccName -> LaTeX +ppVerbOccName = text . latexFilter . showWrapped occNameString ppIPName :: HsIPName -> LaTeX ppIPName = text . ('?':) . unpackFS . hsIPNameFS @@ -1115,13 +1115,12 @@ ppIPName = text . ('?':) . unpackFS . hsIPNameFS ppOccName :: OccName -> LaTeX ppOccName = text . occNameString +ppVerbDocName :: Wrap DocName -> LaTeX +ppVerbDocName = text . latexFilter . showWrapped (occNameString . nameOccName . getName) -ppVerbDocName :: DocName -> LaTeX -ppVerbDocName = ppVerbOccName . nameOccName . getName - -ppVerbRdrName :: RdrName -> LaTeX -ppVerbRdrName = ppVerbOccName . rdrNameOcc +ppVerbRdrName :: Wrap RdrName -> LaTeX +ppVerbRdrName = text . latexFilter . showWrapped (occNameString . rdrNameOcc) ppDocName :: DocName -> LaTeX @@ -1182,7 +1181,7 @@ parLatexMarkup ppId = Markup { markupString = \s v -> text (fixString v s), markupAppend = \l r v -> l v <> r v, markupIdentifier = markupId ppId, - markupIdentifierUnchecked = markupId (ppVerbOccName . snd), + markupIdentifierUnchecked = markupId (ppVerbOccName . fmap snd), markupModule = \m _ -> let (mdl,_ref) = break (=='#') m in tt (text mdl), markupWarning = \p v -> emph (p v), markupEmphasis = \p v -> emph (p v), @@ -1239,11 +1238,11 @@ parLatexMarkup ppId = Markup { where theid = ppId_ id -latexMarkup :: DocMarkup DocName (StringContext -> LaTeX) +latexMarkup :: DocMarkup (Wrap DocName) (StringContext -> LaTeX) latexMarkup = parLatexMarkup ppVerbDocName -rdrLatexMarkup :: DocMarkup RdrName (StringContext -> LaTeX) +rdrLatexMarkup :: DocMarkup (Wrap RdrName) (StringContext -> LaTeX) rdrLatexMarkup = parLatexMarkup ppVerbRdrName diff --git a/haddock-api/src/Haddock/Backends/Xhtml/DocMarkup.hs b/haddock-api/src/Haddock/Backends/Xhtml/DocMarkup.hs index 09aabc0c..1901cf05 100644 --- a/haddock-api/src/Haddock/Backends/Xhtml/DocMarkup.hs +++ b/haddock-api/src/Haddock/Backends/Xhtml/DocMarkup.hs @@ -171,12 +171,12 @@ flatten x = [x] -- extract/append the underlying 'Doc' and convert it to 'Html'. For -- 'CollapsingHeader', we attach extra info to the generated 'Html' -- that allows us to expand/collapse the content. -hackMarkup :: DocMarkup id Html -> Maybe Package -> Hack (ModuleName, OccName) id -> Html +hackMarkup :: DocMarkup id Html -> Maybe Package -> Hack (Wrap (ModuleName, OccName)) id -> Html hackMarkup fmt' currPkg h' = let (html, ms) = hackMarkup' fmt' h' in html +++ renderMeta fmt' currPkg (metaConcat ms) where - hackMarkup' :: DocMarkup id Html -> Hack (ModuleName, OccName) id + hackMarkup' :: DocMarkup id Html -> Hack (Wrap (ModuleName, OccName)) id -> (Html, [Meta]) hackMarkup' fmt h = case h of UntouchedDoc d -> (markup fmt $ _doc d, [_meta d]) @@ -206,7 +206,7 @@ renderMeta _ _ _ = noHtml -- | Goes through 'hackMarkup' to generate the 'Html' rather than -- skipping straight to 'markup': this allows us to employ XHtml -- specific hacks to the tree first. -markupHacked :: DocMarkup id Html +markupHacked :: DocMarkup (Wrap id) Html -> Maybe Package -- this package -> Maybe String -> MDoc id @@ -220,7 +220,7 @@ docToHtml :: Maybe String -- ^ Name of the thing this doc is for. See -> Maybe Package -- ^ Current package -> Qualification -> MDoc DocName -> Html docToHtml n pkg qual = markupHacked fmt pkg n . cleanup - where fmt = parHtmlMarkup qual True (ppDocName qual Raw) + where fmt = parHtmlMarkup qual True (ppWrappedDocName qual Raw) -- | Same as 'docToHtml' but it doesn't insert the 'anchor' element -- in links. This is used to generate the Contents box elements. @@ -228,16 +228,16 @@ docToHtmlNoAnchors :: Maybe String -- ^ See 'toHack' -> Maybe Package -- ^ Current package -> Qualification -> MDoc DocName -> Html docToHtmlNoAnchors n pkg qual = markupHacked fmt pkg n . cleanup - where fmt = parHtmlMarkup qual False (ppDocName qual Raw) + where fmt = parHtmlMarkup qual False (ppWrappedDocName qual Raw) origDocToHtml :: Maybe Package -> Qualification -> MDoc Name -> Html origDocToHtml pkg qual = markupHacked fmt pkg Nothing . cleanup - where fmt = parHtmlMarkup qual True (const $ ppName Raw) + where fmt = parHtmlMarkup qual True (const (ppWrappedName Raw)) rdrDocToHtml :: Maybe Package -> Qualification -> MDoc RdrName -> Html rdrDocToHtml pkg qual = markupHacked fmt pkg Nothing . cleanup - where fmt = parHtmlMarkup qual True (const ppRdrName) + where fmt = parHtmlMarkup qual True (const (ppRdrName . unwrap)) docElement :: (Html -> Html) -> Html -> Html @@ -273,7 +273,7 @@ cleanup = overDoc (markup fmtUnParagraphLists) unParagraph (DocParagraph d) = d unParagraph doc = doc - fmtUnParagraphLists :: DocMarkup a (Doc a) + fmtUnParagraphLists :: DocMarkup (Wrap a) (Doc a) fmtUnParagraphLists = idMarkup { markupUnorderedList = DocUnorderedList . map unParagraph, markupOrderedList = DocOrderedList . map unParagraph diff --git a/haddock-api/src/Haddock/Backends/Xhtml/Names.hs b/haddock-api/src/Haddock/Backends/Xhtml/Names.hs index 574045e0..6a047747 100644 --- a/haddock-api/src/Haddock/Backends/Xhtml/Names.hs +++ b/haddock-api/src/Haddock/Backends/Xhtml/Names.hs @@ -13,7 +13,8 @@ module Haddock.Backends.Xhtml.Names ( ppName, ppDocName, ppLDocName, ppRdrName, ppUncheckedLink, ppBinder, ppBinderInfix, ppBinder', - ppModule, ppModuleRef, ppIPName, linkId, Notation(..) + ppModule, ppModuleRef, ppIPName, linkId, Notation(..), + ppWrappedDocName, ppWrappedName, ) where @@ -24,7 +25,7 @@ import Haddock.Utils import Text.XHtml hiding ( name, p, quote ) import qualified Data.Map as M -import qualified Data.List as List +import Data.List ( stripPrefix ) import GHC hiding (LexicalFixity(..)) import Name @@ -49,9 +50,11 @@ ppIPName :: HsIPName -> Html ppIPName = toHtml . ('?':) . unpackFS . hsIPNameFS -ppUncheckedLink :: Qualification -> (ModuleName, OccName) -> Html -ppUncheckedLink _ (mdl, occ) = linkIdOcc' mdl (Just occ) << ppOccName occ -- TODO: apply ppQualifyName - +ppUncheckedLink :: Qualification -> Wrap (ModuleName, OccName) -> Html +ppUncheckedLink _ x = linkIdOcc' mdl (Just occ) << occHtml + where + (mdl, occ) = unwrap x + occHtml = toHtml (showWrapped (occNameString . snd) x) -- TODO: apply ppQualifyName -- The Bool indicates if it is to be rendered in infix notation ppLDocName :: Qualification -> Notation -> Located DocName -> Html @@ -68,6 +71,19 @@ ppDocName qual notation insertAnchors docName = ppQualifyName qual notation name (nameModule name) | otherwise -> ppName notation name + +ppWrappedDocName :: Qualification -> Notation -> Bool -> Wrap DocName -> Html +ppWrappedDocName qual notation insertAnchors docName = case docName of + Unadorned n -> ppDocName qual notation insertAnchors n + Parenthesized n -> ppDocName qual Prefix insertAnchors n + Backticked n -> ppDocName qual Infix insertAnchors n + +ppWrappedName :: Notation -> Wrap Name -> Html +ppWrappedName notation docName = case docName of + Unadorned n -> ppName notation n + Parenthesized n -> ppName Prefix n + Backticked n -> ppName Infix n + -- | Render a name depending on the selected qualification mode ppQualifyName :: Qualification -> Notation -> Name -> Module -> Html ppQualifyName qual notation name mdl = @@ -79,7 +95,7 @@ ppQualifyName qual notation name mdl = then ppName notation name else ppFullQualName notation mdl name RelativeQual localmdl -> - case List.stripPrefix (moduleString localmdl) (moduleString mdl) of + case stripPrefix (moduleString localmdl) (moduleString mdl) of -- local, A.x -> x Just [] -> ppName notation name -- sub-module, A.B.x -> B.x diff --git a/haddock-api/src/Haddock/Interface/Json.hs b/haddock-api/src/Haddock/Interface/Json.hs index 636d3e19..a9834fa0 100644 --- a/haddock-api/src/Haddock/Interface/Json.hs +++ b/haddock-api/src/Haddock/Interface/Json.hs @@ -62,7 +62,10 @@ jsonMDoc MetaDoc{..} = ] jsonDoc :: Doc Name -> JsonDoc -jsonDoc doc = jsonString (show (bimap (moduleNameString . fst) nameStableString doc)) +jsonDoc doc = jsonString (show (bimap showModName showName doc)) + where + showModName = showWrapped (moduleNameString . fst) + showName = showWrapped nameStableString jsonModule :: Module -> JsonDoc jsonModule = JSString . moduleStableString diff --git a/haddock-api/src/Haddock/Interface/LexParseRn.hs b/haddock-api/src/Haddock/Interface/LexParseRn.hs index 66083cf5..faf23728 100644 --- a/haddock-api/src/Haddock/Interface/LexParseRn.hs +++ b/haddock-api/src/Haddock/Interface/LexParseRn.hs @@ -22,6 +22,7 @@ module Haddock.Interface.LexParseRn import Avail import Control.Arrow import Control.Monad +import Data.Functor (($>)) import Data.List import Data.Ord import Documentation.Haddock.Doc (metaDocConcat) @@ -95,8 +96,9 @@ rename dflags gre = rn rn d = case d of DocAppend a b -> DocAppend <$> rn a <*> rn b DocParagraph doc -> DocParagraph <$> rn doc - DocIdentifier (NsRdrName ns x) -> do - let occ = rdrNameOcc x + DocIdentifier i -> do + let NsRdrName ns x = unwrap i + occ = rdrNameOcc x isValueName = isDataOcc occ || isVarOcc occ let valueNsChoices | isValueName = [x] @@ -119,7 +121,7 @@ rename dflags gre = rn case choices of -- The only way this can happen is if a value namespace was -- specified on something that cannot be a value. - [] -> invalidValue dflags x + [] -> invalidValue dflags i -- There was nothing in the environment so we need to -- pick some default from what's available to us. We @@ -129,14 +131,14 @@ rename dflags gre = rn -- type constructor names (such as in #253). So now we -- only get type constructor links if they are actually -- in scope. - a:_ -> outOfScope dflags ns a + a:_ -> outOfScope dflags ns (i $> a) -- There is only one name in the environment that matches so -- use it. - [a] -> pure (DocIdentifier (gre_name a)) + [a] -> pure (DocIdentifier (i $> gre_name a)) -- There are multiple names available. - gres -> ambiguous dflags x gres + gres -> ambiguous dflags i gres DocWarning doc -> DocWarning <$> rn doc DocEmphasis doc -> DocEmphasis <$> rn doc @@ -168,13 +170,13 @@ rename dflags gre = rn -- users shouldn't rely on this doing the right thing. See tickets -- #253 and #375 on the confusion this causes depending on which -- default we pick in 'rename'. -outOfScope :: DynFlags -> Namespace -> RdrName -> ErrMsgM (Doc a) +outOfScope :: DynFlags -> Namespace -> Wrap RdrName -> ErrMsgM (Doc a) outOfScope dflags ns x = - case x of - Unqual occ -> warnAndMonospace occ - Qual mdl occ -> pure (DocIdentifierUnchecked (mdl, occ)) - Orig _ occ -> warnAndMonospace occ - Exact name -> warnAndMonospace name -- Shouldn't happen since x is out of scope + case unwrap x of + Unqual occ -> warnAndMonospace (x $> occ) + Qual mdl occ -> pure (DocIdentifierUnchecked (x $> (mdl, occ))) + Orig _ occ -> warnAndMonospace (x $> occ) + Exact name -> warnAndMonospace (x $> name) -- Shouldn't happen since x is out of scope where prefix = case ns of Value -> "the value " @@ -182,11 +184,11 @@ outOfScope dflags ns x = None -> "" warnAndMonospace a = do - tell ["Warning: " ++ prefix ++ "'" ++ showPpr dflags a ++ "' is out of scope.\n" ++ - " If you qualify the identifier, haddock can try to link it\n" ++ - " it anyway."] - pure (monospaced a) - monospaced a = DocMonospaced (DocString (showPpr dflags a)) + let a' = showWrapped (showPpr dflags) a + tell ["Warning: " ++ prefix ++ "'" ++ a' ++ "' is out of scope.\n" ++ + " If you qualify the identifier, haddock can try to link it anyway."] + pure (monospaced a') + monospaced = DocMonospaced . DocString -- | Handle ambiguous identifiers. -- @@ -194,36 +196,42 @@ outOfScope dflags ns x = -- -- Emits a warning if the 'GlobalRdrElts's don't belong to the same type or class. ambiguous :: DynFlags - -> RdrName + -> Wrap NsRdrName -> [GlobalRdrElt] -- ^ More than one @gre@s sharing the same `RdrName` above. -> ErrMsgM (Doc Name) ambiguous dflags x gres = do let noChildren = map availName (gresToAvailInfo gres) dflt = maximumBy (comparing (isLocalName &&& isTyConName)) noChildren - msg = "Warning: " ++ x_str ++ " is ambiguous. It is defined\n" ++ + msg = "Warning: " ++ showNsRdrName dflags x ++ " is ambiguous. It is defined\n" ++ concatMap (\n -> " * " ++ defnLoc n ++ "\n") (map gre_name gres) ++ " You may be able to disambiguate the identifier by qualifying it or\n" ++ " by specifying the type/value namespace explicitly.\n" ++ - " Defaulting to " ++ x_str ++ " defined " ++ defnLoc dflt + " Defaulting to the one defined " ++ defnLoc dflt -- TODO: Once we have a syntax for namespace qualification (#667) we may also -- want to emit a warning when an identifier is a data constructor for a type -- of the same name, but not the only constructor. -- For example, for @data D = C | D@, someone may want to reference the @D@ -- constructor. when (length noChildren > 1) $ tell [msg] - pure (DocIdentifier dflt) + pure (DocIdentifier (x $> dflt)) where isLocalName (nameSrcLoc -> RealSrcLoc {}) = True isLocalName _ = False - x_str = '\'' : showPpr dflags x ++ "'" defnLoc = showSDoc dflags . pprNameDefnLoc -- | Handle value-namespaced names that cannot be for values. -- -- Emits a warning that the value-namespace is invalid on a non-value identifier. -invalidValue :: DynFlags -> RdrName -> ErrMsgM (Doc a) +invalidValue :: DynFlags -> Wrap NsRdrName -> ErrMsgM (Doc a) invalidValue dflags x = do - tell ["Warning: '" ++ showPpr dflags x ++ "' cannot be value, yet it is\n" ++ + tell ["Warning: " ++ showNsRdrName dflags x ++ " cannot be value, yet it is\n" ++ " namespaced as such. Did you mean to specify a type namespace\n" ++ " instead?"] - pure (DocMonospaced (DocString (showPpr dflags x))) + pure (DocMonospaced (DocString (showNsRdrName dflags x))) + +-- | Printable representation of a wrapped and namespaced name +showNsRdrName :: DynFlags -> Wrap NsRdrName -> String +showNsRdrName dflags = (\p i -> p ++ "'" ++ i ++ "'") <$> prefix <*> ident + where + ident = showWrapped (showPpr dflags . rdrName) + prefix = renderNs . namespace . unwrap diff --git a/haddock-api/src/Haddock/Interface/Rename.hs b/haddock-api/src/Haddock/Interface/Rename.hs index 57e6d699..88238f04 100644 --- a/haddock-api/src/Haddock/Interface/Rename.hs +++ b/haddock-api/src/Haddock/Interface/Rename.hs @@ -173,8 +173,8 @@ renameLDocHsSyn :: LHsDocString -> RnM LHsDocString renameLDocHsSyn = return -renameDoc :: Traversable t => t Name -> RnM (t DocName) -renameDoc = traverse rename +renameDoc :: Traversable t => t (Wrap Name) -> RnM (t (Wrap DocName)) +renameDoc = traverse (traverse rename) renameFnArgsDoc :: FnArgsDoc Name -> RnM (FnArgsDoc DocName) renameFnArgsDoc = mapM renameDoc diff --git a/haddock-api/src/Haddock/InterfaceFile.hs b/haddock-api/src/Haddock/InterfaceFile.hs index e1d8dbe1..7645b1bb 100644 --- a/haddock-api/src/Haddock/InterfaceFile.hs +++ b/haddock-api/src/Haddock/InterfaceFile.hs @@ -83,7 +83,7 @@ binaryInterfaceMagic = 0xD0Cface -- binaryInterfaceVersion :: Word16 #if (__GLASGOW_HASKELL__ >= 807) && (__GLASGOW_HASKELL__ < 809) -binaryInterfaceVersion = 34 +binaryInterfaceVersion = 35 binaryInterfaceVersionCompatibility :: [Word16] binaryInterfaceVersionCompatibility = [binaryInterfaceVersion] @@ -701,3 +701,28 @@ instance Binary DocName where name <- get bh return (Undocumented name) _ -> error "get DocName: Bad h" + +instance Binary n => Binary (Wrap n) where + put_ bh (Unadorned n) = do + putByte bh 0 + put_ bh n + put_ bh (Parenthesized n) = do + putByte bh 1 + put_ bh n + put_ bh (Backticked n) = do + putByte bh 2 + put_ bh n + + get bh = do + h <- getByte bh + case h of + 0 -> do + name <- get bh + return (Unadorned name) + 1 -> do + name <- get bh + return (Parenthesized name) + 2 -> do + name <- get bh + return (Backticked name) + _ -> error "get Wrap: Bad h" diff --git a/haddock-api/src/Haddock/Parser.hs b/haddock-api/src/Haddock/Parser.hs index 8b7dda7c..6d5dc103 100644 --- a/haddock-api/src/Haddock/Parser.hs +++ b/haddock-api/src/Haddock/Parser.hs @@ -15,27 +15,32 @@ module Haddock.Parser ( parseParas import qualified Documentation.Haddock.Parser as P import Documentation.Haddock.Types -import Haddock.Types (NsRdrName(..)) +import Haddock.Types import DynFlags ( DynFlags ) import FastString ( fsLit ) import Lexer ( mkPState, unP, ParseResult(POk) ) import Parser ( parseIdentifier ) -import RdrName ( RdrName ) import SrcLoc ( mkRealSrcLoc, GenLocated(..) ) import StringBuffer ( stringToStringBuffer ) -parseParas :: DynFlags -> Maybe Package -> String -> MetaDoc mod NsRdrName + +parseParas :: DynFlags -> Maybe Package -> String -> MetaDoc mod (Wrap NsRdrName) parseParas d p = overDoc (P.overIdentifier (parseIdent d)) . P.parseParas p -parseString :: DynFlags -> String -> DocH mod NsRdrName +parseString :: DynFlags -> String -> DocH mod (Wrap NsRdrName) parseString d = P.overIdentifier (parseIdent d) . P.parseString -parseIdent :: DynFlags -> Namespace -> String -> Maybe NsRdrName +parseIdent :: DynFlags -> Namespace -> String -> Maybe (Wrap NsRdrName) parseIdent dflags ns str0 = - let buffer = stringToStringBuffer str0 + let buffer = stringToStringBuffer str1 realSrcLc = mkRealSrcLoc (fsLit "<unknown file>") 0 0 pstate = mkPState dflags buffer realSrcLc + (wrap,str1) = case str0 of + '(' : s@(c : _) | c /= ',', c /= ')' -- rule out tuple names + -> (Parenthesized, init s) + '`' : s@(_ : _) -> (Backticked, init s) + _ -> (Unadorned, str0) in case unP parseIdentifier pstate of - POk _ (L _ name) -> Just (NsRdrName ns name) + POk _ (L _ name) -> Just (wrap (NsRdrName ns name)) _ -> Nothing diff --git a/haddock-api/src/Haddock/Types.hs b/haddock-api/src/Haddock/Types.hs index e8da4120..cd4ac1a1 100644 --- a/haddock-api/src/Haddock/Types.hs +++ b/haddock-api/src/Haddock/Types.hs @@ -42,7 +42,7 @@ import GHC import DynFlags (Language) import qualified GHC.LanguageExtensions as LangExt import OccName -import Outputable +import Outputable hiding ((<>)) ----------------------------------------------------------------------------- -- * Convenient synonyms @@ -334,6 +334,26 @@ instance SetName DocName where setName name' (Documented _ mdl) = Documented name' mdl setName name' (Undocumented _) = Undocumented name' +-- | Adds extra "wrapper" information to a name. +-- +-- This is to work around the fact that most name types in GHC ('Name', 'RdrName', +-- 'OccName', ...) don't include backticks or parens. +data Wrap n + = Unadorned { unwrap :: n } -- ^ don't do anything to the name + | Parenthesized { unwrap :: n } -- ^ add parentheses around the name + | Backticked { unwrap :: n } -- ^ add backticks around the name + deriving (Show, Functor, Foldable, Traversable) + +-- | Useful for debugging +instance Outputable n => Outputable (Wrap n) where + ppr (Unadorned n) = ppr n + ppr (Parenthesized n) = hcat [ char '(', ppr n, char ')' ] + ppr (Backticked n) = hcat [ char '`', ppr n, char '`' ] + +showWrapped :: (a -> String) -> Wrap a -> String +showWrapped f (Unadorned n) = f n +showWrapped f (Parenthesized n) = "(" ++ f n ++ ")" +showWrapped f (Backticked n) = "`" ++ f n ++ "`" ----------------------------------------------------------------------------- @@ -429,10 +449,10 @@ instance NamedThing name => NamedThing (InstOrigin name) where type LDoc id = Located (Doc id) -type Doc id = DocH (ModuleName, OccName) id -type MDoc id = MetaDoc (ModuleName, OccName) id +type Doc id = DocH (Wrap (ModuleName, OccName)) (Wrap id) +type MDoc id = MetaDoc (Wrap (ModuleName, OccName)) (Wrap id) -type DocMarkup id a = DocMarkupH (ModuleName, OccName) id a +type DocMarkup id a = DocMarkupH (Wrap (ModuleName, OccName)) id a instance (NFData a, NFData mod) => NFData (DocH mod a) where diff --git a/haddock-library/haddock-library.cabal b/haddock-library/haddock-library.cabal index b24db5d4..5475d61b 100644 --- a/haddock-library/haddock-library.cabal +++ b/haddock-library/haddock-library.cabal @@ -49,6 +49,7 @@ library other-modules: Documentation.Haddock.Parser.Util Documentation.Haddock.Parser.Monad + Documentation.Haddock.Parser.Identifier test-suite spec import: lib-defaults @@ -70,6 +71,7 @@ test-suite spec Documentation.Haddock.Parser.UtilSpec Documentation.Haddock.ParserSpec Documentation.Haddock.Types + Documentation.Haddock.Parser.Identifier build-depends: , base-compat ^>= 0.9.3 || ^>= 0.10.0 diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs index e9b1c496..36c8bb5b 100644 --- a/haddock-library/src/Documentation/Haddock/Parser.hs +++ b/haddock-library/src/Documentation/Haddock/Parser.hs @@ -27,8 +27,7 @@ module Documentation.Haddock.Parser ( import Control.Applicative import Control.Arrow (first) import Control.Monad -import Data.Char (chr, isUpper, isAlpha, isAlphaNum, isSpace) -import Data.Foldable (asum) +import Data.Char (chr, isUpper, isAlpha, isSpace) import Data.List (intercalate, unfoldr, elemIndex) import Data.Maybe (fromMaybe, mapMaybe) import Data.Monoid @@ -37,6 +36,7 @@ import Documentation.Haddock.Doc import Documentation.Haddock.Markup ( markup, plainMarkup ) import Documentation.Haddock.Parser.Monad import Documentation.Haddock.Parser.Util +import Documentation.Haddock.Parser.Identifier import Documentation.Haddock.Types import Prelude hiding (takeWhile) import qualified Prelude as P @@ -47,37 +47,10 @@ import Text.Parsec (try) import qualified Data.Text as T import Data.Text (Text) -#if MIN_VERSION_base(4,9,0) -import Text.Read.Lex (isSymbolChar) -#else -import Data.Char (GeneralCategory (..), - generalCategory) -#endif -- $setup -- >>> :set -XOverloadedStrings -#if !MIN_VERSION_base(4,9,0) --- inlined from base-4.10.0.0 -isSymbolChar :: Char -> Bool -isSymbolChar c = not (isPuncChar c) && case generalCategory c of - MathSymbol -> True - CurrencySymbol -> True - ModifierSymbol -> True - OtherSymbol -> True - DashPunctuation -> True - OtherPunctuation -> c `notElem` ("'\"" :: String) - ConnectorPunctuation -> c /= '_' - _ -> False - where - -- | The @special@ character class as defined in the Haskell Report. - isPuncChar :: Char -> Bool - isPuncChar = (`elem` (",;()[]{}`" :: String)) -#endif - --- | Identifier string surrounded with opening and closing quotes/backticks. -data Identifier = Identifier !Namespace !Char String !Char - -- | Drops the quotes/backticks around all identifiers, as if they -- were valid but still 'String's. toRegular :: DocH mod Identifier -> DocH mod String @@ -838,34 +811,6 @@ autoUrl = mkLink <$> url mkHyperlink lnk = Hyperlink (T.unpack lnk) Nothing - --- | Parses strings between identifier delimiters. Consumes all input that it --- deems to be valid in an identifier. Note that it simply blindly consumes --- characters and does no actual validation itself. -parseValid :: Parser String -parseValid = p some - where - idChar = Parsec.satisfy (\c -> isAlphaNum c || isSymbolChar c || c == '_') - - p p' = do - vs <- p' idChar - c <- peekChar' - case c of - '`' -> return vs - '\'' -> choice' [ (\x -> vs ++ "'" ++ x) <$> ("'" *> p many), return vs ] - _ -> fail "outofvalid" - --- | Parses identifiers with help of 'parseValid'. Asks GHC for --- 'String' from the string it deems valid. +-- | Parses identifiers with help of 'parseValid'. identifier :: Parser (DocH mod Identifier) -identifier = do - ns <- asum [ Value <$ Parsec.char 'v' - , Type <$ Parsec.char 't' - , pure None - ] - o <- idDelim - vid <- parseValid - e <- idDelim - return $ DocIdentifier (Identifier ns o vid e) - where - idDelim = Parsec.oneOf "'`" +identifier = DocIdentifier <$> parseValid diff --git a/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs b/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs new file mode 100644 index 00000000..7bc98b62 --- /dev/null +++ b/haddock-library/src/Documentation/Haddock/Parser/Identifier.hs @@ -0,0 +1,186 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ViewPatterns #-} +-- | +-- Module : Documentation.Haddock.Parser.Identifier +-- Copyright : (c) Alec Theriault 2019, +-- License : BSD-like +-- +-- Maintainer : haddock@projects.haskell.org +-- Stability : experimental +-- Portability : portable +-- +-- Functionality for parsing identifiers and operators + +module Documentation.Haddock.Parser.Identifier ( + Identifier(..), + parseValid, +) where + +import Documentation.Haddock.Types ( Namespace(..) ) +import Documentation.Haddock.Parser.Monad +import qualified Text.Parsec as Parsec +import Text.Parsec.Pos ( updatePosChar ) +import Text.Parsec ( State(..) + , getParserState, setParserState ) + +import Data.Text (Text) +import qualified Data.Text as T + +import Data.Char (isAlpha, isAlphaNum) +import Control.Monad (guard) +import Data.Functor (($>)) +#if MIN_VERSION_base(4,9,0) +import Text.Read.Lex (isSymbolChar) +#else +import Data.Char (GeneralCategory (..), + generalCategory) +#endif + +import Data.Maybe + +-- | Identifier string surrounded with namespace, opening, and closing quotes/backticks. +data Identifier = Identifier !Namespace !Char String !Char + deriving (Show, Eq) + +parseValid :: Parser Identifier +parseValid = do + s@State{ stateInput = inp, statePos = pos } <- getParserState + + case takeIdentifier inp of + Nothing -> Parsec.parserFail "parseValid: Failed to match a valid identifier" + Just (ns, op, ident, cl, inp') -> + let posOp = updatePosChar pos op + posIdent = T.foldl updatePosChar posOp ident + posCl = updatePosChar posIdent cl + s' = s{ stateInput = inp', statePos = posCl } + in setParserState s' $> Identifier ns op (T.unpack ident) cl + + +#if !MIN_VERSION_base(4,9,0) +-- inlined from base-4.10.0.0 +isSymbolChar :: Char -> Bool +isSymbolChar c = not (isPuncChar c) && case generalCategory c of + MathSymbol -> True + CurrencySymbol -> True + ModifierSymbol -> True + OtherSymbol -> True + DashPunctuation -> True + OtherPunctuation -> c `notElem` "'\"" + ConnectorPunctuation -> c /= '_' + _ -> False + where + -- | The @special@ character class as defined in the Haskell Report. + isPuncChar :: Char -> Bool + isPuncChar = (`elem` (",;()[]{}`" :: String)) +#endif + +-- | Try to parse a delimited identifier off the front of the given input. +-- +-- This tries to match as many valid Haskell identifiers/operators as possible, +-- to the point of sometimes accepting invalid things (ex: keywords). Some +-- considerations: +-- +-- - operators and identifiers can have module qualifications +-- - operators can be wrapped in parens (for prefix) +-- - identifiers can be wrapped in backticks (for infix) +-- - delimiters are backticks or regular ticks +-- - since regular ticks are also valid in identifiers, we opt for the +-- longest successful parse +-- +-- This function should make /O(1)/ allocations +takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text) +takeIdentifier input = listToMaybe $ do + + -- Optional namespace + let (ns, input') = case T.uncons input of + Just ('v', i) -> (Value, i) + Just ('t', i) -> (Type, i) + _ -> (None, input) + + -- Opening tick + (op, input'') <- maybeToList (T.uncons input') + guard (op == '\'' || op == '`') + + -- Identifier/operator + (ident, input''') <- wrapped input'' + + -- Closing tick + (cl, input'''') <- maybeToList (T.uncons input''') + guard (cl == '\'' || cl == '`') + + pure (ns, op, ident, cl, input'''') + + where + + -- | Parse out a wrapped, possibly qualified, operator or identifier + wrapped t = do + (c, t' ) <- maybeToList (T.uncons t) + -- Tuples + case c of + '(' | Just (c', _) <- T.uncons t' + , c' == ',' || c' == ')' + -> do let (commas, t'') = T.span (== ',') t' + (')', t''') <- maybeToList (T.uncons t'') + pure (T.take (T.length commas + 2) t, t''') + + -- Parenthesized + '(' -> do (n, t'' ) <- general False 0 [] t' + (')', t''') <- maybeToList (T.uncons t'') + pure (T.take (n + 2) t, t''') + + -- Backticked + '`' -> do (n, t'' ) <- general False 0 [] t' + ('`', t''') <- maybeToList (T.uncons t'') + pure (T.take (n + 2) t, t''') + + -- Unadorned + _ -> do (n, t'' ) <- general False 0 [] t + pure (T.take n t, t'') + + -- | Parse out a possibly qualified operator or identifier + general :: Bool -- ^ refuse inputs starting with operators + -> Int -- ^ total characters \"consumed\" so far + -> [(Int, Text)] -- ^ accumulated results + -> Text -- ^ current input + -> [(Int, Text)] -- ^ total characters parsed & what remains + general !identOnly !i acc t + -- Starts with an identifier (either just an identifier, or a module qual) + | Just (n, rest) <- identLike t + = if T.null rest + then acc + else case T.head rest of + '`' -> (n + i, rest) : acc + ')' -> (n + i, rest) : acc + '.' -> general False (n + i + 1) acc (T.tail rest) + '\'' -> let (m, rest') = quotes rest + in general True (n + m + 1 + i) ((n + m + i, rest') : acc) (T.tail rest') + _ -> acc + + -- An operator + | Just (n, rest) <- optr t + , not identOnly + = (n + i, rest) : acc + + -- Anything else + | otherwise + = acc + + -- | Parse an identifier off the front of the input + identLike t + | T.null t = Nothing + | isAlpha (T.head t) || '_' == T.head t + = let !(idt, rest) = T.span (\c -> isAlphaNum c || c == '_') t + !(octos, rest') = T.span (== '#') rest + in Just (T.length idt + T.length octos, rest') + | otherwise = Nothing + + -- | Parse all but the last quote off the front of the input + -- PRECONDITION: T.head t == '\'' + quotes :: Text -> (Int, Text) + quotes t = let !n = T.length (T.takeWhile (== '\'') t) - 1 + in (n, T.drop n t) + + -- | Parse an operator off the front of the input + optr t = let !(op, rest) = T.span isSymbolChar t + in if T.null op then Nothing else Just (T.length op, rest) diff --git a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs index 8f5bd217..fa46f536 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs @@ -4,6 +4,18 @@ {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE TypeSynonymInstances #-} +-- | +-- Module : Documentation.Haddock.Parser.Monad +-- Copyright : (c) Alec Theriault 2018-2019, +-- License : BSD-like +-- +-- Maintainer : haddock@projects.haskell.org +-- Stability : experimental +-- Portability : portable +-- +-- Defines the Parsec monad over which all parsing is done and also provides +-- more efficient versions of the usual parsec combinator functions (but +-- specialized to 'Text'). module Documentation.Haddock.Parser.Monad where @@ -96,7 +108,6 @@ takeWhile f = do s' = s{ stateInput = inp', statePos = pos' } setParserState s' $> t - -- | Like 'takeWhile', but fails if no characters matched. -- -- Equivalent to @fmap T.pack . Parsec.many1@, but more efficient. diff --git a/haddock-library/test/Documentation/Haddock/ParserSpec.hs b/haddock-library/test/Documentation/Haddock/ParserSpec.hs index e186a5cf..bc40a0a2 100644 --- a/haddock-library/test/Documentation/Haddock/ParserSpec.hs +++ b/haddock-library/test/Documentation/Haddock/ParserSpec.hs @@ -112,7 +112,7 @@ spec = do "``" `shouldParseTo` "``" it "can parse an identifier in infix notation enclosed within backticks" $ do - "``infix``" `shouldParseTo` "`" <> DocIdentifier "infix" <> "`" + "``infix``" `shouldParseTo` DocIdentifier "`infix`" it "can parse identifiers containing a single quote" $ do "'don't'" `shouldParseTo` DocIdentifier "don't" @@ -138,6 +138,13 @@ spec = do it "can parse type-namespaced identifiers" $ do "t'foo'" `shouldParseTo` DocIdentifier "foo" + it "can parse parenthesized operators and backticked identifiers" $ do + "'(<|>)'" `shouldParseTo` DocIdentifier "(<|>)" + "'`elem`'" `shouldParseTo` DocIdentifier "`elem`" + + it "can properly figure out the end of identifiers" $ do + "'DbModule'/'DbUnitId'" `shouldParseTo` DocIdentifier "DbModule" <> "/" <> DocIdentifier "DbUnitId" + context "when parsing operators" $ do it "can parse an operator enclosed within single quotes" $ do "'.='" `shouldParseTo` DocIdentifier ".=" diff --git a/haddock.cabal b/haddock.cabal index 2b8ee6ff..91a5ea3d 100644 --- a/haddock.cabal +++ b/haddock.cabal @@ -89,6 +89,7 @@ executable haddock other-modules: Documentation.Haddock.Parser Documentation.Haddock.Parser.Monad + Documentation.Haddock.Parser.Identifier Documentation.Haddock.Types Documentation.Haddock.Doc Documentation.Haddock.Parser.Util diff --git a/html-test/ref/Identifiers.html b/html-test/ref/Identifiers.html new file mode 100644 index 00000000..1a0a18a5 --- /dev/null +++ b/html-test/ref/Identifiers.html @@ -0,0 +1,286 @@ +<html xmlns="http://www.w3.org/1999/xhtml" +><head + ><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" + /><meta name="viewport" content="width=device-width, initial-scale=1" + /><title + >Identifiers</title + ><link href="#" rel="stylesheet" type="text/css" title="NewOcean" + /><link rel="stylesheet" type="text/css" href="#" + /><link rel="stylesheet" type="text/css" href="#" + /><script src="haddock-bundle.min.js" async="async" type="text/javascript" + ></script + ><script type="text/x-mathjax-config" + >MathJax.Hub.Config({ tex2jax: { processClass: "mathjax", ignoreClass: ".*" } });</script + ><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_HTMLorMML" type="text/javascript" + ></script + ></head + ><body + ><div id="package-header" + ><span class="caption empty" + > </span + ><ul class="links" id="page-menu" + ><li + ><a href="#" + >Contents</a + ></li + ><li + ><a href="#" + >Index</a + ></li + ></ul + ></div + ><div id="content" + ><div id="module-header" + ><table class="info" + ><tr + ><th + >Safe Haskell</th + ><td + >Safe</td + ></tr + ></table + ><p class="caption" + >Identifiers</p + ></div + ><div id="synopsis" + ><details id="syn" + ><summary + >Synopsis</summary + ><ul class="details-toggle" data-details-id="syn" + ><li class="src short" + ><span class="keyword" + >data</span + > <a href="#" + >Id</a + > = <a href="#" + >Id</a + ></li + ><li class="src short" + ><span class="keyword" + >data</span + > a <a href="#" + >:*</a + > b = a <a href="#" + >:*</a + > b</li + ><li class="src short" + ><a href="#" + >foo</a + > :: ()</li + ></ul + ></details + ></div + ><div id="interface" + ><h1 + >Documentation</h1 + ><div class="top" + ><p class="src" + ><span class="keyword" + >data</span + > <a id="t:Id" class="def" + >Id</a + > <a href="#" class="selflink" + >#</a + ></p + ><div class="subs constructors" + ><p class="caption" + >Constructors</p + ><table + ><tr + ><td class="src" + ><a id="v:Id" class="def" + >Id</a + ></td + ><td class="doc empty" + > </td + ></tr + ></table + ></div + ></div + ><div class="top" + ><p class="src" + ><span class="keyword" + >data</span + > a <a id="t::-42-" class="def" + >:*</a + > b <a href="#" class="selflink" + >#</a + ></p + ><div class="subs constructors" + ><p class="caption" + >Constructors</p + ><table + ><tr + ><td class="src" + >a <a id="v::-42-" class="def" + >:*</a + > b</td + ><td class="doc empty" + > </td + ></tr + ></table + ></div + ></div + ><div class="top" + ><p class="src" + ><a id="v:foo" class="def" + >foo</a + > :: () <a href="#" class="selflink" + >#</a + ></p + ><div class="doc" + ><ul + ><li + ><p + >Unadorned:</p + ><ul + ><li + >Unqualified: <code + ><a href="#" title="GHC.List" + >++</a + ></code + >, <code + ><a href="#" title="Data.Foldable" + >elem</a + ></code + ></li + ><li + >Qualified: <code + ><a href="#" title="GHC.List" + >++</a + ></code + >, <code + ><a href="#" title="Data.Lis" + >elem</a + ></code + ></li + ><li + >Namespaced: <code + ><a href="#" title="GHC.List" + >++</a + ></code + >, <code + >++</code + >, <code + ><a href="#" title="Data.Foldable" + >elem</a + ></code + >, <code + >elem</code + >, <code + ><a href="#" title="Identifiers" + >Id</a + ></code + >, <code + ><a href="#" title="Identifiers" + >Id</a + ></code + >, <code + ><a href="#" title="Identifiers" + >:*</a + ></code + >, <code + ><a href="#" title="Identifiers" + >:*</a + ></code + ></li + ></ul + ></li + ><li + ><p + >Parenthesized:</p + ><ul + ><li + >Unqualified: <code + ><code + ><a href="#" title="GHC.List" + >(++)</a + ></code + > [1,2,3] [4,5,6]</code + ></li + ><li + >Qualified: <code + ><code + ><a href="#" title="GHC.List" + >(++)</a + ></code + > [1,2,3] [4,5,6]</code + ></li + ><li + >Namespaced: <code + ><a href="#" title="GHC.List" + >(++)</a + ></code + >, <code + >++</code + >, <code + ><a href="#" title="Identifiers" + >(:*)</a + ></code + >, <code + ><a href="#" title="Identifiers" + >(:*)</a + ></code + ></li + ></ul + ></li + ><li + ><p + >Backticked:</p + ><ul + ><li + >Unqualified: <code + >1 <code + ><a href="#" title="Data.Foldable" + >`elem`</a + ></code + > [-3..3]</code + ></li + ><li + >Qualified: <code + >1 <code + ><a href="#" title="Data.Foldable" + >`elem`</a + ></code + > [-3..3]</code + ></li + ><li + >Namespaced: <code + ><a href="#" title="Data.Foldable" + >`elem`</a + ></code + >, <code + >`elem`</code + >, <code + ><a href="#" title="Identifiers" + >`Id`</a + ></code + >, <code + ><a href="#" title="Identifiers" + >`Id`</a + ></code + ></li + ></ul + ></li + ><li + ><p + >Edge cases:</p + ><ul + ><li + >Tuples: <code + >()</code + >, <code + >(,,,)</code + ></li + ></ul + ></li + ></ul + ></div + ></div + ></div + ></div + ><div id="footer" + ></div + ></body + ></html +> diff --git a/html-test/ref/Test.html b/html-test/ref/Test.html index b76622e7..aefc4d14 100644 --- a/html-test/ref/Test.html +++ b/html-test/ref/Test.html @@ -2364,7 +2364,7 @@ is at the beginning of the line).</pre >f'</a ></code > - but f' doesn't get link'd 'f\''</p + but f' doesn't get link'd 'f''</p ></div ></div ><div class="top" diff --git a/html-test/src/Identifiers.hs b/html-test/src/Identifiers.hs new file mode 100644 index 00000000..75f12109 --- /dev/null +++ b/html-test/src/Identifiers.hs @@ -0,0 +1,35 @@ +{-# LANGUAGE TypeOperators #-} +module Identifiers where + +import Data.List (elem, (++)) + +data Id = Id +data a :* b = a :* b + +{-| + + * Unadorned: + + - Unqualified: '++', 'elem' + - Qualified: 'Data.List.++', 'Data.Lis.elem' + - Namespaced: v'++', t'++', v'elem', t'elem', v'Id', t'Id', v':*', t':*' + + * Parenthesized: + + - Unqualified: @'(++)' [1,2,3] [4,5,6]@ + - Qualified: @'(Data.List.++)' [1,2,3] [4,5,6]@ + - Namespaced: v'(++)', t'++', v'(:*)', t'(:*)' + + * Backticked: + + - Unqualified: @1 '`elem`' [-3..3]@ + - Qualified: @1 '`Data.List.elem`' [-3..3]@ + - Namespaced: v'`elem`', t'`elem`', v'`Id`', t'`Id`' + + * Edge cases: + + - Tuples: '()', '(,,,)' + +-} +foo :: () +foo = () |