aboutsummaryrefslogtreecommitdiff
path: root/haddock-api/src/Haddock/GhcUtils.hs
diff options
context:
space:
mode:
authorAlec Theriault <alec.theriault@gmail.com>2019-01-31 01:37:25 -0800
committerGitHub <noreply@github.com>2019-01-31 01:37:25 -0800
commit1b26460fb3b5df5215cc1e6715661cbc7c950085 (patch)
tree0b5ac6285ecf432021e8365567d48eeb6249f44b /haddock-api/src/Haddock/GhcUtils.hs
parent4c02498a24e6ceb775528bed043b66340296ad06 (diff)
Use `.hie` files for the Hyperlinker backend (#977)
# Summary This is a large architectural change to the Hyperlinker. * extract link (and now also type) information from `.hie` instead of doing ad-hoc SYB traversals of the `RenamedSource`. Also adds a superb type-on-hover feature (#715). * re-engineer the lexer to avoid needless string conversions. By going directly through GHC's `P` monad and taking bytestring slices, we avoid a ton of allocation and have better handling of position pragmas and CPP. In terms of performance, the Haddock side of things has gotten _much_ more efficient. Unfortunately, much of this is cancelled out by the increased GHC workload for generating `.hie` files. For the full set of boot libs (including `ghc`-the-library) * the sum of total time went down by 9-10% overall * the sum of total allocations went down by 6-7% # Motivation Haddock is moving towards working entirely over `.hi` and `.hie` files. This change means we no longer need the `RenamedSource` from `TypecheckedModule` (something which is _not_ in `.hi` files). # Details Along the way a bunch of things were fixed: * Cross package (and other) links are now more reliable (#496) * The lexer tries to recover from errors on every line (instead of at CPP boundaries) * `LINE`/`COLUMN` pragmas are taken into account * filter out zero length tokens before rendering * avoid recomputing the `ModuleName`-based `SrcMap` * remove the last use of `Documentation.Haddock.Utf8` (see #998) * restructure temporary folder logic for `.hi`/`.hie` model
Diffstat (limited to 'haddock-api/src/Haddock/GhcUtils.hs')
-rw-r--r--haddock-api/src/Haddock/GhcUtils.hs134
1 files changed, 131 insertions, 3 deletions
diff --git a/haddock-api/src/Haddock/GhcUtils.hs b/haddock-api/src/Haddock/GhcUtils.hs
index cdaf6ae4..a342de00 100644
--- a/haddock-api/src/Haddock/GhcUtils.hs
+++ b/haddock-api/src/Haddock/GhcUtils.hs
@@ -19,10 +19,12 @@ module Haddock.GhcUtils where
import Control.Arrow
+import Data.Char ( isSpace )
+
import Haddock.Types( DocNameI )
import Exception
-import Outputable
+import Outputable ( Outputable, panic, showPpr )
import Name
import NameSet
import Module
@@ -30,6 +32,14 @@ import HscTypes
import GHC
import Class
import DynFlags
+import SrcLoc ( advanceSrcLoc )
+
+import StringBuffer ( StringBuffer )
+import qualified StringBuffer as S
+
+import Data.ByteString ( ByteString )
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Internal as BS
moduleString :: Module -> String
@@ -413,11 +423,129 @@ minimalDef n = do
-------------------------------------------------------------------------------
-setObjectDir, setHiDir, setStubDir, setOutputDir :: String -> DynFlags -> DynFlags
+setObjectDir, setHiDir, setHieDir, setStubDir, setOutputDir :: String -> DynFlags -> DynFlags
setObjectDir f d = d{ objectDir = Just f}
setHiDir f d = d{ hiDir = Just f}
+setHieDir f d = d{ hieDir = Just f}
setStubDir f d = d{ stubDir = Just f
, includePaths = addGlobalInclude (includePaths d) [f] }
-- -stubdir D adds an implicit -I D, so that gcc can find the _stub.h file
-- \#included from the .hc file when compiling with -fvia-C.
-setOutputDir f = setObjectDir f . setHiDir f . setStubDir f
+setOutputDir f = setObjectDir f . setHiDir f . setHieDir f . setStubDir f
+
+
+-------------------------------------------------------------------------------
+-- * 'StringBuffer' and 'ByteString'
+-------------------------------------------------------------------------------
+-- We get away with a bunch of these functions because 'StringBuffer' and
+-- 'ByteString' have almost exactly the same structure.
+
+-- | Convert a UTF-8 encoded 'ByteString' into a 'StringBuffer. This really
+-- relies on the internals of both 'ByteString' and 'StringBuffer'.
+--
+-- /O(n)/ (but optimized into a @memcpy@ by @bytestring@ under the hood)
+stringBufferFromByteString :: ByteString -> StringBuffer
+stringBufferFromByteString bs =
+ let BS.PS fp off len = bs <> BS.pack [0,0,0]
+ in S.StringBuffer { S.buf = fp, S.len = len - 3, S.cur = off }
+
+-- | Take the first @n@ /bytes/ of the 'StringBuffer' and put them in a
+-- 'ByteString'.
+--
+-- /O(1)/
+takeStringBuffer :: Int -> StringBuffer -> ByteString
+takeStringBuffer !n !(S.StringBuffer fp _ cur) = BS.PS fp cur n
+
+-- | Return the prefix of the first 'StringBuffer' that /isn't/ in the second
+-- 'StringBuffer'. **The behavior is undefined if the 'StringBuffers' use
+-- separate buffers.**
+--
+-- /O(1)/
+splitStringBuffer :: StringBuffer -> StringBuffer -> ByteString
+splitStringBuffer buf1 buf2 = takeStringBuffer n buf1
+ where n = S.byteDiff buf1 buf2
+
+-- | Split the 'StringBuffer' at the next newline (or the end of the buffer).
+-- Also: initial position is passed in and the updated position is returned.
+--
+-- /O(n)/ (but /O(1)/ space)
+spanLine :: RealSrcLoc -> StringBuffer -> (ByteString, RealSrcLoc, StringBuffer)
+spanLine !loc !buf = go loc buf
+ where
+
+ go !l !b
+ | not (S.atEnd b)
+ = case S.nextChar b of
+ ('\n', b') -> (splitStringBuffer buf b', advanceSrcLoc l '\n', b')
+ (c, b') -> go (advanceSrcLoc l c) b'
+ | otherwise
+ = (splitStringBuffer buf b, advanceSrcLoc l '\n', b)
+
+-- | Given a start position and a buffer with that start position, split the
+-- buffer at an end position.
+--
+-- /O(n)/ (but /O(1)/ space)
+spanPosition :: RealSrcLoc -- ^ start of buffeer
+ -> RealSrcLoc -- ^ position until which to take
+ -> StringBuffer -- ^ buffer from which to take
+ -> (ByteString, StringBuffer)
+spanPosition !start !end !buf = go start buf
+ where
+
+ go !l !b
+ | l < end
+ , not (S.atEnd b)
+ , (c, b') <- S.nextChar b
+ = go (advanceSrcLoc l c) b'
+ | otherwise
+ = (splitStringBuffer buf b, b)
+
+-- | Try to parse a line of CPP from the from of the buffer. A \"line\" of CPP
+-- consists of
+--
+-- * at most 10 whitespace characters, including at least one newline
+-- * a @#@ character
+-- * keep parsing lines until you find a line not ending in @\\@.
+--
+-- This is chock full of heuristics about what a line of CPP is.
+--
+-- /O(n)/ (but /O(1)/ space)
+tryCppLine :: RealSrcLoc -> StringBuffer -> Maybe (ByteString, RealSrcLoc, StringBuffer)
+tryCppLine !loc !buf = spanSpace (S.prevChar buf '\n' == '\n') loc buf
+ where
+
+ -- Keep consuming space characters until we hit either a @#@ or something
+ -- else. If we hit a @#@, start parsing CPP
+ spanSpace !seenNl !l !b
+ | S.atEnd b
+ = Nothing
+ | otherwise
+ = case S.nextChar b of
+ ('#' , b') | not (S.atEnd b')
+ , ('-', b'') <- S.nextChar b'
+ , ('}', _) <- S.nextChar b''
+ -> Nothing -- Edge case exception for @#-}@
+ | seenNl
+ -> Just (spanCppLine (advanceSrcLoc l '#') b') -- parse CPP
+ | otherwise
+ -> Nothing -- We didn't see a newline, so this can't be CPP!
+
+ (c , b') | isSpace c -> spanSpace (seenNl || c == '\n')
+ (advanceSrcLoc l c) b'
+ | otherwise -> Nothing
+
+ -- Consume a CPP line to its "end" (basically the first line that ends not
+ -- with a @\@ character)
+ spanCppLine !l !b
+ | S.atEnd b
+ = (splitStringBuffer buf b, l, b)
+ | otherwise
+ = case S.nextChar b of
+ ('\\', b') | not (S.atEnd b')
+ , ('\n', b'') <- S.nextChar b'
+ -> spanCppLine (advanceSrcLoc (advanceSrcLoc l '\\') '\n') b''
+
+ ('\n', b') -> (splitStringBuffer buf b', advanceSrcLoc l '\n', b')
+
+ (c , b') -> spanCppLine (advanceSrcLoc l c) b'
+