aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlec Theriault <alec.theriault@gmail.com>2017-12-10 12:22:21 -0800
committerAlexander Biehl <alexbiehl@gmail.com>2018-02-01 14:58:18 +0100
commit4f75be94f45a0e92553eccefe56230c554333ce7 (patch)
treeb88a2dd52d4bcd001f423c490c14b4c3cbaaee0e
parent60e10eb876899165e9644013508361bf72048bdb (diff)
Use the GHC lexer for the Hyperlinker backend (#714)
* Start changing to use GHC lexer * better cpp * Change SrcSpan to RealSrcSpan * Remove error * Try to stop too many open files * wip * wip * Revert "wip" This reverts commit b605510a195f26315e3d8ca90e6d95a6737553e1. Conflicts: haddock-api/haddock-api.cabal haddock-api/src/Haddock/Interface.hs * Remove pointless 'caching' * Use dlist rather than lists when finding vars * Use a map rather than list * Delete bogus comment * Rebase followup Things now run using the GHC lexer. There are still - stray debug statements - unnecessary changes w.r.t. master * Cleaned up differences w.r.t. current Haddock HEAD Things are looking good. quasiquotes in particular look beautiful: the TH ones (with Haskell source inside) colour/link their contents too! Haven't yet begun to check for possible performance problems. * Support CPP and top-level pragmas The support for these is hackier - but no more hacky than the existing support. * Tests pass, CPP is better recognized The tests were in some cases altered: I consider the new output to be more correct than the old one.... * Fix shrinking of source without tabs in test * Replace 'Position'/'Span' with GHC counterparts Replaces 'Position' -> 'GHC.RealSrcLoc' and 'Span' -> 'GHC.RealSrcSpan'. * Nits * Forgot entry in .cabal * Update changelog
-rw-r--r--CHANGES.md3
-rw-r--r--haddock-api/haddock-api.cabal62
-rw-r--r--haddock-api/src/Haddock.hs2
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs19
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs534
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs14
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs33
-rw-r--r--haddock-api/src/Haddock/Interface/Create.hs24
-rw-r--r--haddock-api/test/Haddock/Backends/Hyperlinker/ParserSpec.hs135
-rw-r--r--hypsrc-test/ref/src/Identifiers.html8
10 files changed, 522 insertions, 312 deletions
diff --git a/CHANGES.md b/CHANGES.md
index 19417d12..b4d69ce4 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -19,6 +19,9 @@
* Fix: Generate constraint signatures for constructors exported as pattern
synonyms (#663)
+ * The hyperlinker backend now uses the GHC lexer instead of a custom one.
+ This notably fixes rendering of quasiquotes.
+
* Overhaul Haddock's rendering of kind signatures so that invisible kind
parameters are not printed (#681)
diff --git a/haddock-api/haddock-api.cabal b/haddock-api/haddock-api.cabal
index a91afd36..95af5ef5 100644
--- a/haddock-api/haddock-api.cabal
+++ b/haddock-api/haddock-api.cabal
@@ -124,24 +124,68 @@ test-suite spec
test
, src
- -- NB: We only use a small subset of lib:haddock-api here, which
- -- explains why this component has a smaller build-depends set
other-modules:
+ Haddock
+ Haddock.Backends.Hoogle
+ Haddock.Backends.Hyperlinker
+ Haddock.Backends.Hyperlinker.Ast
+ Haddock.Backends.Hyperlinker.Renderer
+ Haddock.Backends.Hyperlinker.Utils
+ Haddock.Backends.LaTeX
+ Haddock.Backends.Xhtml
+ Haddock.Backends.Xhtml.Decl
+ Haddock.Backends.Xhtml.DocMarkup
+ Haddock.Backends.Xhtml.Layout
+ Haddock.Backends.Xhtml.Meta
+ Haddock.Backends.Xhtml.Names
+ Haddock.Backends.Xhtml.Themes
+ Haddock.Backends.Xhtml.Types
+ Haddock.Backends.Xhtml.Utils
+ Haddock.Convert
+ Haddock.Doc
+ Haddock.GhcUtils
+ Haddock.Interface
+ Haddock.Interface.AttachInstances
+ Haddock.Interface.Create
+ Haddock.Interface.Json
+ Haddock.Interface.LexParseRn
+ Haddock.Interface.ParseModuleHeader
+ Haddock.Interface.Rename
+ Haddock.Interface.Specialize
+ Haddock.InterfaceFile
+ Haddock.ModuleTree
+ Haddock.Options
+ Haddock.Parser
+ Haddock.Syb
+ Haddock.Types
+ Haddock.Utils
+ Haddock.Utils.Json
+ Haddock.Version
+ Paths_haddock_api
Haddock.Backends.Hyperlinker.ParserSpec
Haddock.Backends.Hyperlinker.Parser
Haddock.Backends.Hyperlinker.Types
- build-depends:
- ghc ^>= 8.4
- , hspec ^>= 2.4.4
- , QuickCheck ^>= 2.10
+ build-depends: Cabal ^>= 2.0.0
+ , ghc ^>= 8.4
+ , ghc-paths ^>= 0.1.0.9
+ , haddock-library ^>= 1.4.6
+ , xhtml ^>= 3000.2.2
+ , hspec ^>= 2.4.4
+ , QuickCheck ^>= 2.10
-- Versions for the dependencies below are transitively pinned by
-- the non-reinstallable `ghc` package and hence need no version
-- bounds
- build-depends:
- base
- , containers
+ build-depends: base
+ , array
+ , bytestring
+ , containers
+ , deepseq
+ , directory
+ , filepath
+ , ghc-boot
+ , transformers
build-tool-depends:
hspec-discover:hspec-discover ^>= 2.4.4
diff --git a/haddock-api/src/Haddock.hs b/haddock-api/src/Haddock.hs
index f7fa52b3..3f5e5298 100644
--- a/haddock-api/src/Haddock.hs
+++ b/haddock-api/src/Haddock.hs
@@ -277,7 +277,7 @@ render dflags flags qual ifaces installedIfaces extSrcMap = do
| Flag_HyperlinkedSource `elem` flags = Just hypSrcModuleUrlFormat
| otherwise = srcModule
- srcMap = mkSrcMap $ Map.union
+ srcMap = Map.union
(Map.map SrcExternal extSrcMap)
(Map.fromList [ (ifaceMod iface, SrcLocal) | iface <- ifaces ])
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
index 57ff72ff..361bc15d 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs
@@ -12,6 +12,7 @@ import qualified Haddock.Syb as Syb
import Haddock.Backends.Hyperlinker.Types
import qualified GHC
+import qualified SrcLoc
import Control.Applicative
import Control.Monad (guard)
@@ -51,10 +52,10 @@ type DetailsMap = Map.Map Position (Span, TokenDetails)
mkDetailsMap :: [(GHC.SrcSpan, TokenDetails)] -> DetailsMap
mkDetailsMap xs =
- Map.fromListWith select_details [ (start, (token_span, token_details))
+ Map.fromListWith select_details [ (start, (span, token_details))
| (ghc_span, token_details) <- xs
- , Just !token_span <- [ghcSrcSpanToSpan ghc_span]
- , let start = spStart token_span
+ , GHC.RealSrcSpan span <- [ghc_span]
+ , let start = SrcLoc.realSrcSpanStart span
]
where
-- favour token details which appear earlier in the list
@@ -62,17 +63,11 @@ mkDetailsMap xs =
lookupBySpan :: Span -> DetailsMap -> Maybe TokenDetails
lookupBySpan span details = do
- (_, (tok_span, tok_details)) <- Map.lookupLE (spStart span) details
- guard (tok_span `containsSpan` span )
+ let pos = SrcLoc.realSrcSpanStart span
+ (_, (tok_span, tok_details)) <- Map.lookupLE pos details
+ guard (tok_span `SrcLoc.containsSpan` span)
return tok_details
-ghcSrcSpanToSpan :: GHC.SrcSpan -> Maybe Span
-ghcSrcSpanToSpan (GHC.RealSrcSpan span) =
- Just (Span { spStart = Position (GHC.srcSpanStartLine span) (GHC.srcSpanStartCol span)
- , spEnd = Position (GHC.srcSpanEndLine span) (GHC.srcSpanEndCol span)
- })
-ghcSrcSpanToSpan _ = Nothing
-
enrichToken :: Token -> DetailsMap -> Maybe TokenDetails
enrichToken (Token typ _ spn) dm
| typ `elem` [TkIdentifier, TkOperator] = lookupBySpan spn dm
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
index e4345602..da1555b8 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
@@ -1,11 +1,17 @@
module Haddock.Backends.Hyperlinker.Parser (parse) where
+import Data.Either ( isRight, isLeft )
+import Data.List ( foldl', isPrefixOf, isSuffixOf )
+import Data.Maybe ( maybeToList )
-import Data.Char
-import Data.List
-import Data.Maybe
+import GHC ( DynFlags, addSourceToTokens )
+import SrcLoc
+import FastString ( mkFastString )
+import StringBuffer ( stringToStringBuffer )
+import Lexer ( Token(..) )
+import qualified Lexer as L
-import Haddock.Backends.Hyperlinker.Types
+import Haddock.Backends.Hyperlinker.Types as T
-- | Turn source code string into a stream of more descriptive tokens.
@@ -13,202 +19,350 @@ import Haddock.Backends.Hyperlinker.Types
-- Result should retain original file layout (including comments, whitespace,
-- etc.), i.e. the following "law" should hold:
--
--- @concat . map 'tkValue' . 'parse' = id@
-parse :: String -> [Token]
-parse = tokenize . tag . chunk
+-- prop> concat . map tkValue . parse = id
+--
+-- (In reality, this only holds for input not containing '\r', '\t', '\f', '\v',
+-- characters, since GHC transforms those into ' ' and '\n')
+parse :: DynFlags -> FilePath -> String -> [T.Token]
+parse dflags fp s = ghcToks (processCPP dflags fp s)
+
--- | Split raw source string to more meaningful chunks.
+-- | Parse the source into tokens using the GHC lexer.
+--
+-- * CPP lines are removed and reinserted as line-comments
+-- * top-level file pragmas are parsed as block comments (see the
+-- 'ITblockComment' case of 'classify' for more details)
--
--- This is the initial stage of tokenization process. Each chunk is either
--- a comment (including comment delimiters), a whitespace string, preprocessor
--- macro (and all its content until the end of a line) or valid Haskell lexeme.
-chunk :: String -> [String]
-chunk [] = []
-chunk str@(c:_)
- | isSpace c =
- let (space, mcpp, rest) = spanSpaceOrCpp str
- in [space] ++ maybeToList mcpp ++ chunk rest
-chunk str
- | "--" `isPrefixOf` str = chunk' $ spanToNewline str
- | "{-" `isPrefixOf` str = chunk' $ chunkComment 0 str
- | otherwise = case lex' str of
- (tok:_) -> chunk' tok
- [] -> [str]
+processCPP :: DynFlags -- ^ GHC's flags
+ -> FilePath -- ^ source file name (for position information)
+ -> String -- ^ source file contents
+ -> [(Located L.Token, String)]
+processCPP dflags fpath s = addSrc . go start . splitCPP $ s
where
- chunk' (c, rest) = c:(chunk rest)
+ start = mkRealSrcLoc (mkFastString fpath) 1 1
+ addSrc = addSourceToTokens start (stringToStringBuffer s)
--- | A bit better lexer then the default, i.e. handles DataKinds quotes
-lex' :: ReadS String
-lex' ('\'' : '\'' : rest) = [("''", rest)]
-lex' str@('\'' : '\\' : _ : '\'' : _) = lex str
-lex' str@('\'' : _ : '\'' : _) = lex str
-lex' ('\'' : rest) = [("'", rest)]
-lex' str = lex str
+ -- Transform a list of Haskell/CPP lines into a list of tokens
+ go :: RealSrcLoc -> [Either String String] -> [Located L.Token]
+ go _ [] = []
+ go pos ls =
+ let (hLinesRight, ls') = span isRight ls
+ (cppLinesLeft, rest) = span isLeft ls'
+
+ hSrc = concat [ hLine | Right hLine <- hLinesRight ]
+ cppSrc = concat [ cppLine | Left cppLine <- cppLinesLeft ]
+
+ in case L.lexTokenStream (stringToStringBuffer hSrc) pos dflags of
--- | Split input to "first line" string and the rest of it.
---
--- Ideally, this should be done simply with @'break' (== '\n')@. However,
--- Haskell also allows line-unbreaking (or whatever it is called) so things
--- are not as simple and this function deals with that.
-spanToNewline :: String -> (String, String)
-spanToNewline [] = ([], [])
-spanToNewline ('\\':'\n':str) =
- let (str', rest) = spanToNewline str
- in ('\\':'\n':str', rest)
-spanToNewline str@('\n':_) = ("", str)
-spanToNewline (c:str) =
- let (str', rest) = spanToNewline str
- in (c:str', rest)
+ -- Stuff that fails to lex gets turned into comments
+ L.PFailed _ss _msg ->
+ let (src_pos, failed) = mkToken ITunknown pos hSrc
+ (new_pos, cpp) = mkToken ITlineComment src_pos cppSrc
+ in failed : cpp : go new_pos rest
--- | Split input to whitespace string, (optional) preprocessor directive and
--- the rest of it.
+ -- Successfully lexed
+ L.POk ss toks ->
+ let (new_pos, cpp) = mkToken ITlineComment (L.loc ss) cppSrc
+ in toks ++ [cpp] ++ go new_pos rest
+
+ -- Manually make a token from a 'String', advancing the cursor position
+ mkToken tok start' str =
+ let end = foldl' advanceSrcLoc start' str
+ in (end, L (RealSrcSpan $ mkRealSrcSpan start' end) (tok str))
+
+
+-- | Split apart the initial file into Haskell source lines ('Left' entries) and
+-- CPP lines ('Right' entries).
--
--- Again, using something like @'span' 'isSpace'@ would be nice to chunk input
--- to whitespace. The problem is with /#/ symbol - if it is placed at the very
--- beginning of a line, it should be recognized as preprocessor macro. In any
--- other case, it is ordinary Haskell symbol and can be used to declare
--- operators. Hence, while dealing with whitespace we also check whether there
--- happens to be /#/ symbol just after a newline character - if that is the
--- case, we begin treating the whole line as preprocessor macro.
-spanSpaceOrCpp :: String -> (String, Maybe String, String)
-spanSpaceOrCpp ('\n':'#':str) =
- let (str', rest) = spanToNewline str
- in ("\n", Just $ '#':str', rest)
-spanSpaceOrCpp (c:str')
- | isSpace c =
- let (space, mcpp, rest) = spanSpaceOrCpp str'
- in (c:space, mcpp, rest)
-spanSpaceOrCpp str = ("", Nothing, str)
-
--- | Split input to comment content (including delimiters) and the rest.
+-- All characters in the input are present in the output:
--
--- Again, some more logic than simple 'span' is required because of Haskell
--- comment nesting policy.
-chunkComment :: Int -> String -> (String, String)
-chunkComment _ [] = ("", "")
-chunkComment depth ('{':'-':str) =
- let (c, rest) = chunkComment (depth + 1) str
- in ("{-" ++ c, rest)
-chunkComment depth ('-':'}':str)
- | depth == 1 = ("-}", str)
- | otherwise =
- let (c, rest) = chunkComment (depth - 1) str
- in ("-}" ++ c, rest)
-chunkComment depth (e:str) =
- let (c, rest) = chunkComment depth str
- in (e:c, rest)
-
--- | Assign source location for each chunk in given stream.
-tag :: [String] -> [(Span, String)]
-tag =
- reverse . snd . foldl aux (Position 1 1, [])
- where
- aux (pos, cs) str =
- let pos' = foldl move pos str
- in (pos', (Span pos pos', str):cs)
- move pos '\n' = pos { posRow = posRow pos + 1, posCol = 1 }
- move pos _ = pos { posCol = posCol pos + 1 }
-
--- | Turn unrecognised chunk stream to more descriptive token stream.
-tokenize :: [(Span, String)] -> [Token]
-tokenize =
- map aux
+-- prop> concat . map (either id id) . splitCPP = id
+splitCPP :: String -> [Either String String]
+splitCPP "" = []
+splitCPP s | isCPPline s = Left l : splitCPP rest
+ | otherwise = Right l : splitCPP rest
where
- aux (sp, str) = Token
- { tkType = classify str
- , tkValue = str
- , tkSpan = sp
- }
+ ~(l, rest) = spanToNewline 0 s
--- | Classify given string as appropriate Haskell token.
+
+-- | Heuristic to decide if a line is going to be a CPP line. This should be a
+-- cheap operation since it is going to be run on every line being processed.
--
--- This method is based on Haskell 98 Report lexical structure description:
--- https://www.haskell.org/onlinereport/lexemes.html
+-- Right now it just checks if the first non-whitespace character in the first
+-- five characters of the line is a '#':
--
--- However, this is probably far from being perfect and most probably does not
--- handle correctly all corner cases.
-classify :: String -> TokenType
-classify str
- | "--" `isPrefixOf` str = TkComment
- | "{-#" `isPrefixOf` str = TkPragma
- | "{-" `isPrefixOf` str = TkComment
-classify "''" = TkSpecial
-classify "'" = TkSpecial
-classify str@(c:_)
- | isSpace c = TkSpace
- | isDigit c = TkNumber
- | c `elem` special = TkSpecial
- | str `elem` glyphs = TkGlyph
- | all (`elem` symbols) str = TkOperator
- | c == '#' = TkCpp
- | c == '"' = TkString
- | c == '\'' = TkChar
-classify str
- | str `elem` keywords = TkKeyword
- | isIdentifier str = TkIdentifier
- | otherwise = TkUnknown
-
-keywords :: [String]
-keywords =
- [ "as"
- , "case"
- , "class"
- , "data"
- , "default"
- , "deriving"
- , "do"
- , "else"
- , "hiding"
- , "if"
- , "import"
- , "in"
- , "infix"
- , "infixl"
- , "infixr"
- , "instance"
- , "let"
- , "module"
- , "newtype"
- , "of"
- , "qualified"
- , "then"
- , "type"
- , "where"
- , "forall"
- , "family"
- , "mdo"
- ]
-
-glyphs :: [String]
-glyphs =
- [ ".."
- , ":"
- , "::"
- , "="
- , "\\"
- , "|"
- , "<-"
- , "->"
- , "@"
- , "~"
- , "~#"
- , "=>"
- , "-"
- , "!"
- ]
-
-special :: [Char]
-special = "()[]{},;`"
-
--- TODO: Add support for any Unicode symbol or punctuation.
--- source: http://stackoverflow.com/questions/10548170/what-characters-are-permitted-for-haskell-operators
-symbols :: [Char]
-symbols = "!#$%&*+./<=>?@\\^|-~:"
-
-isIdentifier :: String -> Bool
-isIdentifier (s:str)
- | (isLower' s || isUpper s) && all isAlphaNum' str = True
+-- >>> isCPPline "#define FOO 1"
+-- True
+--
+-- >>> isCPPline "\t\t #ifdef GHC"
+-- True
+--
+-- >>> isCPPline " #endif"
+-- False
+--
+isCPPline :: String -> Bool
+isCPPline = isPrefixOf "#" . dropWhile (`elem` " \t") . take 5
+
+
+-- | Split a "line" off the front of a string, supporting newline escapes.
+--
+-- By "line", we understand: the shortest substring ending in a '\n' that is not
+--
+-- 1. immediately preceded by a '\\'
+-- 2. not inside some (possibly nested) block comment
+--
+-- All characters in the input are present in the output:
+--
+-- prop> curry (++) . spanToNewLine 0 = id
+spanToNewline :: Int -- ^ open '{-'
+ -> String -- ^ input
+ -> (String, String)
+spanToNewline _ [] = ([], [])
+spanToNewline n ('\n':str) | n <= 0 = ("\n", str)
+spanToNewline n ('\\':'\n':str) =
+ let (str', rest) = spanToNewline n str
+ in ('\\':'\n':str', rest)
+spanToNewline n ('{':'-':str) =
+ let (str', rest) = spanToNewline (n+1) str
+ in ('{':'-':str', rest)
+spanToNewline n ('-':'}':str) =
+ let (str', rest) = spanToNewline (n-1) str
+ in ('-':'}':str', rest)
+spanToNewline n (c:str) =
+ let (str', rest) = spanToNewline n str
+ in (c:str', rest)
+
+
+-- | Turn a list of GHC's 'L.Token' (and their source 'String') into a list of
+-- Haddock's 'T.Token'.
+ghcToks :: [(Located L.Token, String)] -> [T.Token]
+ghcToks = reverse . snd . foldl' go (start, [])
where
- isLower' c = isLower c || c == '_'
- isAlphaNum' c = isAlphaNum c || c == '_' || c == '\''
-isIdentifier _ = False
+ start = mkRealSrcLoc (mkFastString "lexing") 1 1
+
+ go :: (RealSrcLoc, [T.Token]) -- ^ current position, tokens accumulated
+ -> (Located L.Token, String) -- ^ next token, its content
+ -> (RealSrcLoc, [T.Token]) -- ^ new position, new tokens accumulated
+ go (pos, toks) (L l tok, raw) = ( next_pos
+ , classifiedTok ++ maybeToList white ++ toks
+ )
+ where
+ (next_pos, white) = mkWhitespace pos l
+ classifiedTok = [ Token (classify tok) raw rss
+ | RealSrcSpan rss <- [l]
+ , not (null raw)
+ ]
+
+-- | Find the correct amount of whitespace between tokens.
+mkWhitespace :: RealSrcLoc -> SrcSpan -> (RealSrcLoc, Maybe T.Token)
+mkWhitespace prev spn =
+ case spn of
+ UnhelpfulSpan _ -> (prev,Nothing)
+ RealSrcSpan s | null wsstring -> (end, Nothing)
+ | otherwise -> (end, Just (Token TkSpace wsstring wsspan))
+ where
+ start = realSrcSpanStart s
+ end = realSrcSpanEnd s
+ wsspan = mkRealSrcSpan prev start
+ nls = srcLocLine start - srcLocLine prev
+ spaces = if nls == 0 then srcLocCol start - srcLocCol prev
+ else srcLocCol start - 1
+ wsstring = replicate nls '\n' ++ replicate spaces ' '
+
+
+-- | Classify given string as appropriate Haskell token.
+classify :: L.Token -> TokenType
+classify tok =
+ case tok of
+ ITas -> TkKeyword
+ ITcase -> TkKeyword
+ ITclass -> TkKeyword
+ ITdata -> TkKeyword
+ ITdefault -> TkKeyword
+ ITderiving -> TkKeyword
+ ITdo -> TkKeyword
+ ITelse -> TkKeyword
+ IThiding -> TkKeyword
+ ITforeign -> TkKeyword
+ ITif -> TkKeyword
+ ITimport -> TkKeyword
+ ITin -> TkKeyword
+ ITinfix -> TkKeyword
+ ITinfixl -> TkKeyword
+ ITinfixr -> TkKeyword
+ ITinstance -> TkKeyword
+ ITlet -> TkKeyword
+ ITmodule -> TkKeyword
+ ITnewtype -> TkKeyword
+ ITof -> TkKeyword
+ ITqualified -> TkKeyword
+ ITthen -> TkKeyword
+ ITtype -> TkKeyword
+ ITwhere -> TkKeyword
+
+ ITforall {} -> TkKeyword
+ ITexport -> TkKeyword
+ ITlabel -> TkKeyword
+ ITdynamic -> TkKeyword
+ ITsafe -> TkKeyword
+ ITinterruptible -> TkKeyword
+ ITunsafe -> TkKeyword
+ ITstdcallconv -> TkKeyword
+ ITccallconv -> TkKeyword
+ ITcapiconv -> TkKeyword
+ ITprimcallconv -> TkKeyword
+ ITjavascriptcallconv -> TkKeyword
+ ITmdo -> TkKeyword
+ ITfamily -> TkKeyword
+ ITrole -> TkKeyword
+ ITgroup -> TkKeyword
+ ITby -> TkKeyword
+ ITusing -> TkKeyword
+ ITpattern -> TkKeyword
+ ITstatic -> TkKeyword
+ ITstock -> TkKeyword
+ ITanyclass -> TkKeyword
+
+ ITunit -> TkKeyword
+ ITsignature -> TkKeyword
+ ITdependency -> TkKeyword
+ ITrequires -> TkKeyword
+
+ ITinline_prag {} -> TkPragma
+ ITspec_prag {} -> TkPragma
+ ITspec_inline_prag {} -> TkPragma
+ ITsource_prag {} -> TkPragma
+ ITrules_prag {} -> TkPragma
+ ITwarning_prag {} -> TkPragma
+ ITdeprecated_prag {} -> TkPragma
+ ITline_prag -> TkPragma
+ ITscc_prag {} -> TkPragma
+ ITgenerated_prag {} -> TkPragma
+ ITcore_prag {} -> TkPragma
+ ITunpack_prag {} -> TkPragma
+ ITnounpack_prag {} -> TkPragma
+ ITann_prag {} -> TkPragma
+ ITcomplete_prag {} -> TkPragma
+ ITclose_prag -> TkPragma
+ IToptions_prag {} -> TkPragma
+ ITinclude_prag {} -> TkPragma
+ ITlanguage_prag -> TkPragma
+ ITvect_prag {} -> TkPragma
+ ITvect_scalar_prag {} -> TkPragma
+ ITnovect_prag {} -> TkPragma
+ ITminimal_prag {} -> TkPragma
+ IToverlappable_prag {} -> TkPragma
+ IToverlapping_prag {} -> TkPragma
+ IToverlaps_prag {} -> TkPragma
+ ITincoherent_prag {} -> TkPragma
+ ITctype {} -> TkPragma
+
+ ITdotdot -> TkGlyph
+ ITcolon -> TkGlyph
+ ITdcolon {} -> TkGlyph
+ ITequal -> TkGlyph
+ ITlam -> TkGlyph
+ ITlcase -> TkGlyph
+ ITvbar -> TkGlyph
+ ITlarrow {} -> TkGlyph
+ ITrarrow {} -> TkGlyph
+ ITat -> TkGlyph
+ ITtilde -> TkGlyph
+ ITtildehsh -> TkGlyph
+ ITdarrow {} -> TkGlyph
+ ITminus -> TkGlyph
+ ITbang -> TkGlyph
+ ITdot -> TkOperator
+ ITtypeApp -> TkGlyph
+
+ ITbiglam -> TkGlyph
+
+ ITocurly -> TkSpecial
+ ITccurly -> TkSpecial
+ ITvocurly -> TkSpecial
+ ITvccurly -> TkSpecial
+ ITobrack -> TkSpecial
+ ITopabrack -> TkSpecial
+ ITcpabrack -> TkSpecial
+ ITcbrack -> TkSpecial
+ IToparen -> TkSpecial
+ ITcparen -> TkSpecial
+ IToubxparen -> TkSpecial
+ ITcubxparen -> TkSpecial
+ ITsemi -> TkSpecial
+ ITcomma -> TkSpecial
+ ITunderscore -> TkIdentifier
+ ITbackquote -> TkSpecial
+ ITsimpleQuote -> TkSpecial
+
+ ITvarid {} -> TkIdentifier
+ ITconid {} -> TkIdentifier
+ ITvarsym {} -> TkOperator
+ ITconsym {} -> TkOperator
+ ITqvarid {} -> TkIdentifier
+ ITqconid {} -> TkIdentifier
+ ITqvarsym {} -> TkOperator
+ ITqconsym {} -> TkOperator
+
+ ITdupipvarid {} -> TkUnknown
+ ITlabelvarid {} -> TkUnknown
+
+ ITchar {} -> TkChar
+ ITstring {} -> TkString
+ ITinteger {} -> TkNumber
+ ITrational {} -> TkNumber
+
+ ITprimchar {} -> TkChar
+ ITprimstring {} -> TkString
+ ITprimint {} -> TkNumber
+ ITprimword {} -> TkNumber
+ ITprimfloat {} -> TkNumber
+ ITprimdouble {} -> TkNumber
+
+ ITopenExpQuote {} -> TkSpecial
+ ITopenPatQuote -> TkSpecial
+ ITopenDecQuote -> TkSpecial
+ ITopenTypQuote -> TkSpecial
+ ITcloseQuote {} -> TkSpecial
+ ITopenTExpQuote {} -> TkSpecial
+ ITcloseTExpQuote -> TkSpecial
+ ITidEscape {} -> TkUnknown
+ ITparenEscape -> TkSpecial
+ ITidTyEscape {} -> TkUnknown
+ ITparenTyEscape -> TkSpecial
+ ITtyQuote -> TkSpecial
+ ITquasiQuote {} -> TkUnknown
+ ITqQuasiQuote {} -> TkUnknown
+
+ ITproc -> TkKeyword
+ ITrec -> TkKeyword
+ IToparenbar {} -> TkGlyph
+ ITcparenbar {} -> TkGlyph
+ ITlarrowtail {} -> TkGlyph
+ ITrarrowtail {} -> TkGlyph
+ ITLarrowtail {} -> TkGlyph
+ ITRarrowtail {} -> TkGlyph
+
+ ITunknown {} -> TkUnknown
+ ITeof -> TkUnknown
+
+ -- Line comments are only supposed to start with '--'. Starting with '#'
+ -- means that this was probably a CPP.
+ ITlineComment s
+ | isCPPline s -> TkCpp
+ | otherwise -> TkComment
+
+ ITdocCommentNext {} -> TkComment
+ ITdocCommentPrev {} -> TkComment
+ ITdocCommentNamed {} -> TkComment
+ ITdocSection {} -> TkComment
+ ITdocOptions {} -> TkComment
+
+ -- The lexer considers top-level pragmas as comments (see `pragState` in
+ -- the GHC lexer for more), so we have to manually reverse this. The
+ -- following is a hammer: it smashes _all_ pragma-like block comments into
+ -- pragmas.
+ ITblockComment c
+ | isPrefixOf "{-#" c
+ , isSuffixOf "#-}" c -> TkPragma
+ | otherwise -> TkComment
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs
index 15793f0c..27bf7605 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Renderer.hs
@@ -82,7 +82,7 @@ header mcss mjs =
tokenGroup :: SrcMap -> TokenGroup -> Html
tokenGroup _ (GrpNormal tok@(Token { .. }))
- | tkType == TkSpace = renderSpace (posRow . spStart $ tkSpan) tkValue
+ | tkType == TkSpace = renderSpace (GHC.srcSpanStartLine tkSpan) tkValue
| otherwise = tokenSpan tok ! attrs
where
attrs = [ multiclass . tokenStyle $ tkType ]
@@ -155,7 +155,7 @@ internalHyperlink name content =
Html.anchor content ! [ Html.href $ "#" ++ internalAnchorIdent name ]
externalNameHyperlink :: SrcMap -> GHC.Name -> Html -> Html
-externalNameHyperlink (srcs, _) name content = case Map.lookup mdl srcs of
+externalNameHyperlink srcs name content = case Map.lookup mdl srcs of
Just SrcLocal -> Html.anchor content !
[ Html.href $ hypSrcModuleNameUrl mdl name ]
Just (SrcExternal path) -> Html.anchor content !
@@ -165,12 +165,14 @@ externalNameHyperlink (srcs, _) name content = case Map.lookup mdl srcs of
mdl = GHC.nameModule name
externalModHyperlink :: SrcMap -> GHC.ModuleName -> Html -> Html
-externalModHyperlink (_, srcs) name content = case Map.lookup name srcs of
- Just SrcLocal -> Html.anchor content !
+externalModHyperlink srcs name content =
+ let srcs' = Map.mapKeys GHC.moduleName srcs in
+ case Map.lookup name srcs' of
+ Just SrcLocal -> Html.anchor content !
[ Html.href $ hypSrcModuleUrl' name ]
- Just (SrcExternal path) -> Html.anchor content !
+ Just (SrcExternal path) -> Html.anchor content !
[ Html.href $ path </> hypSrcModuleUrl' name ]
- Nothing -> content
+ Nothing -> content
renderSpace :: Int -> String -> Html
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
index d8ae89e4..e377471e 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs
@@ -4,8 +4,6 @@ module Haddock.Backends.Hyperlinker.Types where
import qualified GHC
import Data.Map (Map)
-import qualified Data.Map as Map
-
data Token = Token
{ tkType :: TokenType
@@ -14,23 +12,8 @@ data Token = Token
}
deriving (Show)
-data Position = Position
- { posRow :: !Int
- , posCol :: !Int
- }
- deriving (Eq, Ord, Show)
-
-data Span = Span
- { spStart :: !Position
- , spEnd :: !Position
- }
- deriving (Show)
-
--- | Tests whether the first span "contains" the other span, meaning
--- that it covers at least as much source code. True where spans are equal.
-containsSpan :: Span -> Span -> Bool
-containsSpan s1 s2 =
- spStart s1 <= spStart s2 && spEnd s1 >= spEnd s2
+type Position = GHC.RealSrcLoc
+type Span = GHC.RealSrcSpan
data TokenType
= TkIdentifier
@@ -80,15 +63,5 @@ data SrcPath
| SrcLocal
-- | Mapping from modules to cross-package source paths.
---
--- This mapping is actually a pair of maps instead of just one map. The reason
--- for this is because when hyperlinking modules in import lists we have no
--- 'GHC.Module' available. On the other hand, we can't just use map with
--- 'GHC.ModuleName' as indices because certain modules may have common name
--- but originate in different packages. Hence, we use both /rich/ and /poor/
--- versions, where the /poor/ is just projection of /rich/ one cached in pair
--- for better performance.
-type SrcMap = (Map GHC.Module SrcPath, Map GHC.ModuleName SrcPath)
+type SrcMap = Map GHC.Module SrcPath
-mkSrcMap :: Map GHC.Module SrcPath -> SrcMap
-mkSrcMap srcs = (srcs, Map.mapKeys GHC.moduleName srcs)
diff --git a/haddock-api/src/Haddock/Interface/Create.hs b/haddock-api/src/Haddock/Interface/Create.hs
index 2a56e87a..4309163f 100644
--- a/haddock-api/src/Haddock/Interface/Create.hs
+++ b/haddock-api/src/Haddock/Interface/Create.hs
@@ -20,7 +20,6 @@
module Haddock.Interface.Create (createInterface) where
import Documentation.Haddock.Doc (metaDocAppend)
-import Documentation.Haddock.Utf8 as Utf8
import Haddock.Types
import Haddock.Options
import Haddock.GhcUtils
@@ -33,15 +32,14 @@ import Haddock.Backends.Hyperlinker.Parser as Hyperlinker
import Data.Bifunctor
import Data.Bitraversable
-import qualified Data.ByteString as BS
import qualified Data.Map as M
import Data.Map (Map)
import Data.List
import Data.Maybe
import Data.Ord
import Control.Applicative
-import Control.Exception (evaluate)
import Control.Monad
+import Control.DeepSeq
import Data.Traversable
import Avail hiding (avail)
@@ -160,7 +158,7 @@ createInterface tm flags modMap instIfaceMap = do
modWarn <- liftErrMsg (moduleWarning dflags gre warnings)
- tokenizedSrc <- mkMaybeTokenizedSrc flags tm
+ tokenizedSrc <- mkMaybeTokenizedSrc dflags flags tm
return $! Interface {
ifaceMod = mdl
@@ -1137,12 +1135,12 @@ seqList :: [a] -> ()
seqList [] = ()
seqList (x : xs) = x `seq` seqList xs
-mkMaybeTokenizedSrc :: [Flag] -> TypecheckedModule
+mkMaybeTokenizedSrc :: DynFlags -> [Flag] -> TypecheckedModule
-> ErrMsgGhc (Maybe [RichToken])
-mkMaybeTokenizedSrc flags tm
+mkMaybeTokenizedSrc dflags flags tm
| Flag_HyperlinkedSource `elem` flags = case renamedSource tm of
Just src -> do
- tokens <- liftGhcToErrMsgGhc . liftIO $ mkTokenizedSrc summary src
+ tokens <- liftGhcToErrMsgGhc (liftIO (mkTokenizedSrc dflags summary src))
return $ Just tokens
Nothing -> do
liftErrMsg . tell . pure $ concat
@@ -1155,12 +1153,14 @@ mkMaybeTokenizedSrc flags tm
where
summary = pm_mod_summary . tm_parsed_module $ tm
-mkTokenizedSrc :: ModSummary -> RenamedSource -> IO [RichToken]
-mkTokenizedSrc ms src = do
- -- make sure to read the whole file at once otherwise
+mkTokenizedSrc :: DynFlags -> ModSummary -> RenamedSource -> IO [RichToken]
+mkTokenizedSrc dflags ms src = do
+ -- make sure to read the whole file at once otherwise
-- we run out of file descriptors (see #495)
- rawSrc <- BS.readFile (msHsFilePath ms) >>= evaluate
- return $ Hyperlinker.enrich src (Hyperlinker.parse (decodeUtf8 rawSrc))
+ file <- force <$> readFile (filepath)
+ return $ Hyperlinker.enrich src (Hyperlinker.parse dflags filepath file)
+ where
+ filepath = msHsFilePath ms
-- | Find a stand-alone documentation comment by its name.
findNamedDoc :: String -> [HsDecl GhcRn] -> ErrMsgM (Maybe HsDocString)
diff --git a/haddock-api/test/Haddock/Backends/Hyperlinker/ParserSpec.hs b/haddock-api/test/Haddock/Backends/Hyperlinker/ParserSpec.hs
index 8cd2690e..dcb30e41 100644
--- a/haddock-api/test/Haddock/Backends/Hyperlinker/ParserSpec.hs
+++ b/haddock-api/test/Haddock/Backends/Hyperlinker/ParserSpec.hs
@@ -4,95 +4,138 @@ module Haddock.Backends.Hyperlinker.ParserSpec (main, spec) where
import Test.Hspec
import Test.QuickCheck
+import qualified GHC
+import Control.Monad.IO.Class
+
+import Haddock (getGhcDirs)
import Haddock.Backends.Hyperlinker.Parser
import Haddock.Backends.Hyperlinker.Types
+withDynFlags :: (GHC.DynFlags -> IO ()) -> IO ()
+withDynFlags cont = do
+ libDir <- fmap snd (getGhcDirs [])
+ GHC.runGhc (Just libDir) $ do
+ dflags <- GHC.getSessionDynFlags
+ liftIO $ cont dflags
+
main :: IO ()
main = hspec spec
spec :: Spec
-spec = do
- describe "parse" parseSpec
+spec = describe "parse" parseSpec
-parseSpec :: Spec
-parseSpec = do
+-- | Defined for its instance of 'Arbitrary'
+newtype NoTabs = NoTabs String deriving (Show, Eq)
- it "is total" $
- property $ \src -> length (parse src) `shouldSatisfy` (>= 0)
+noTabs :: String -> Bool
+noTabs = all (\c -> c `notElem` "\r\t\f\v")
- it "retains file layout" $
- property $ \src -> concatMap tkValue (parse src) == src
+-- | Does not generate content with space characters other than ' ' and '\n'
+instance Arbitrary NoTabs where
+ arbitrary = fmap NoTabs (arbitrary `suchThat` noTabs)
+ shrink (NoTabs src) = [ NoTabs shrunk | shrunk <- shrink src, noTabs shrunk ]
- context "when parsing single-line comments" $ do
- it "should ignore content until the end of line" $
- "-- some very simple comment\nidentifier"
- `shouldParseTo`
- [TkComment, TkSpace, TkIdentifier]
+parseSpec :: Spec
+parseSpec = around withDynFlags $ do
- it "should allow endline escaping" $
- "-- first line\\\nsecond line\\\nand another one"
- `shouldParseTo`
- [TkComment]
+ it "is total" $ \dflags ->
+ property $ \src -> length (parse dflags "" src) `shouldSatisfy` (>= 0)
- context "when parsing multi-line comments" $ do
+ it "retains file layout" $ \dflags ->
+ property $ \(NoTabs src) -> concatMap tkValue (parse dflags "" src) == src
- it "should support nested comments" $
- "{- comment {- nested -} still comment -} {- next comment -}"
- `shouldParseTo`
- [TkComment, TkSpace, TkComment]
+ context "when parsing single-line comments" $ do
+
+ it "should ignore content until the end of line" $ \dflags ->
+ shouldParseTo
+ "-- some very simple comment\nidentifier"
+ [TkComment, TkSpace, TkIdentifier]
+ dflags
- it "should distinguish compiler pragma" $
- "{- comment -}{-# LANGUAGE GADTs #-}{- comment -}"
- `shouldParseTo`
- [TkComment, TkPragma, TkComment]
+ it "should allow endline escaping" $ \dflags ->
+ shouldParseTo
+ "#define first line\\\nsecond line\\\nand another one"
+ [TkCpp]
+ dflags
- it "should recognize preprocessor directives" $ do
- "\n#define foo bar" `shouldParseTo` [TkSpace, TkCpp]
- "x # y" `shouldParseTo`
- [TkIdentifier, TkSpace, TkOperator, TkSpace,TkIdentifier]
+ context "when parsing multi-line comments" $ do
- it "should distinguish basic language constructs" $ do
- "(* 2) <$> (\"abc\", foo)" `shouldParseTo`
+ it "should support nested comments" $ \dflags ->
+ shouldParseTo
+ "{- comment {- nested -} still comment -} {- next comment -}"
+ [TkComment, TkSpace, TkComment]
+ dflags
+
+ it "should distinguish compiler pragma" $ \dflags ->
+ shouldParseTo
+ "{- comment -}{-# LANGUAGE GADTs #-}{- comment -}"
+ [TkComment, TkPragma, TkComment]
+ dflags
+
+ it "should recognize preprocessor directives" $ \dflags -> do
+ shouldParseTo
+ "\n#define foo bar"
+ [TkSpace, TkCpp]
+ dflags
+ shouldParseTo
+ "x # y"
+ [TkIdentifier, TkSpace, TkOperator, TkSpace,TkIdentifier]
+ dflags
+
+ it "should distinguish basic language constructs" $ \dflags -> do
+
+ shouldParseTo
+ "(* 2) <$> (\"abc\", foo)"
[ TkSpecial, TkOperator, TkSpace, TkNumber, TkSpecial
, TkSpace, TkOperator, TkSpace
, TkSpecial, TkString, TkSpecial, TkSpace, TkIdentifier, TkSpecial
]
- "let foo' = foo in foo' + foo'" `shouldParseTo`
+ dflags
+
+ shouldParseTo
+ "let foo' = foo in foo' + foo'"
[ TkKeyword, TkSpace, TkIdentifier
, TkSpace, TkGlyph, TkSpace
, TkIdentifier, TkSpace, TkKeyword, TkSpace
, TkIdentifier, TkSpace, TkOperator, TkSpace, TkIdentifier
]
- "square x = y^2 where y = x" `shouldParseTo`
+ dflags
+
+ shouldParseTo
+ "square x = y^2 where y = x"
[ TkIdentifier, TkSpace, TkIdentifier
, TkSpace, TkGlyph, TkSpace
, TkIdentifier, TkOperator, TkNumber
, TkSpace, TkKeyword, TkSpace
, TkIdentifier, TkSpace, TkGlyph, TkSpace, TkIdentifier
]
+ dflags
- it "should parse do-notation syntax" $ do
- "do { foo <- getLine; putStrLn foo }" `shouldParseTo`
+ it "should parse do-notation syntax" $ \dflags -> do
+ shouldParseTo
+ "do { foo <- getLine; putStrLn foo }"
[ TkKeyword, TkSpace, TkSpecial, TkSpace
, TkIdentifier, TkSpace, TkGlyph, TkSpace
, TkIdentifier, TkSpecial, TkSpace
, TkIdentifier, TkSpace, TkIdentifier, TkSpace, TkSpecial
]
-
- unlines
- [ "do"
- , " foo <- getLine"
- , " putStrLn foo"
- ] `shouldParseTo`
+ dflags
+
+ shouldParseTo
+ (unlines
+ [ "do"
+ , " foo <- getLine"
+ , " putStrLn foo"
+ ])
[ TkKeyword, TkSpace, TkIdentifier
, TkSpace, TkGlyph, TkSpace, TkIdentifier, TkSpace
, TkIdentifier, TkSpace, TkIdentifier, TkSpace
]
-
-
-shouldParseTo :: String -> [TokenType] -> Expectation
-str `shouldParseTo` tokens = map tkType (parse str) `shouldBe` tokens
+ dflags
+ where
+ shouldParseTo :: String -> [TokenType] -> GHC.DynFlags -> Expectation
+ shouldParseTo str tokens dflags = map tkType (parse dflags "" str) `shouldBe` tokens
diff --git a/hypsrc-test/ref/src/Identifiers.html b/hypsrc-test/ref/src/Identifiers.html
index f52db4ab..ce69ad37 100644
--- a/hypsrc-test/ref/src/Identifiers.html
+++ b/hypsrc-test/ref/src/Identifiers.html
@@ -827,11 +827,7 @@
> </span
><a href="Identifiers.html#norf"
><span class="hs-identifier hs-var"
- >Identifiers</span
- ><span class="hs-operator hs-var"
- >.</span
- ><span class="hs-identifier hs-var"
- >norf</span
+ >Identifiers.norf</span
></a
><span
> </span
@@ -931,4 +927,4 @@
></pre
></body
></html
-> \ No newline at end of file
+>