{-# LANGUAGE TupleSections #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE Rank2Types #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE DuplicateRecordFields #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module HaskellCodeExplorer.PackageInfo ( createPackageInfo , testCreatePkgInfo , ghcVersion ) where import Control.DeepSeq ( deepseq ) import Control.Exception ( IOException , SomeAsyncException , SomeException , fromException , throw , try ) import Control.Monad ( foldM , unless , when ) import Control.Monad.Catch ( handle ) import Control.Monad.Extra ( anyM , findM ) import Control.Monad.Logger ( LoggingT(..) , MonadLogger(..) , MonadLoggerIO(..) , logDebugN , logErrorN , logInfoN , logWarnN , runStdoutLoggingT ) import qualified Data.ByteString as BS import qualified Data.HashMap.Strict as HM import Data.IORef ( readIORef ) import qualified Data.IntMap.Strict as IM import qualified Data.List as L import qualified Data.List.NonEmpty as NE import qualified Data.Map as Map import Data.Maybe ( fromMaybe , isJust , mapMaybe , maybeToList ) import qualified Data.Set as S import qualified Data.Text as T import qualified Data.Text.Encoding as TE import Data.Version ( Version(..) , makeVersion , showVersion ) import Distribution.Helper ( ChComponentInfo(..) , ChComponentName(..) , ChEntrypoint(..) , ChModuleName(..) , DistDir(..) , ProjLoc(..) , SCabalProjType(..) , UnitInfo(..) , allUnits , mkQueryEnv , pPackageName , pSourceDir , pUnits , projectPackages , runQuery , uComponentName ) import GHC ( Backend(..) , DynFlags(..) , GeneralFlag(..) , GhcLink(..) , GhcMode(..) , LoadHowMuch(..) , ModLocation(..) , ModSummary(..) , getModuleGraph , getSession , getSessionDynFlags , guessTarget , load , moduleName , moduleNameString , noLoc , parseModule , runGhcT , setSessionDynFlags , setTargets , topSortModuleGraph , typecheckModule ) import GHC.Data.Graph.Directed ( flattenSCCs ) import GHC.Driver.Env ( hsc_EPS , hsc_HPT , hsc_units ) import GHC.Driver.Monad ( GhcT(..) , liftIO ) import GHC.Driver.Session ( gopt_set , parseDynamicFlagsCmdLine ) import GHC.Paths ( libdir ) import GHC.Unit.Module.Graph ( filterToposortToModules ) import GHC.Utils.Exception ( ExceptionMonad ) import HaskellCodeExplorer.GhcUtils ( isHsBoot , toText ) import HaskellCodeExplorer.ModuleInfo ( ModuleDependencies , createModuleInfo ) import qualified HaskellCodeExplorer.Types as HCE import Prelude hiding ( id ) import System.Directory ( canonicalizePath , doesFileExist , findExecutable , getCurrentDirectory , getDirectoryContents , makeAbsolute , setCurrentDirectory ) import qualified System.Directory.Tree as DT import System.Exit ( exitFailure ) import System.FilePath ( () , addTrailingPathSeparator , joinPath , normalise , replaceExtension , splitDirectories , splitPath , takeBaseName , takeDirectory , takeExtension , takeFileName ) import System.FilePath.Find ( (==?) , always , fileName , find ) import System.Process ( readProcess ) testCreatePkgInfo :: FilePath -> IO (HCE.PackageInfo HCE.ModuleInfo) testCreatePkgInfo pkgPath = runStdoutLoggingT $ createPackageInfo pkgPath Nothing HCE.AfterPreprocessing [] [] createPackageInfo :: FilePath -- ^ Path to a Cabal package -> Maybe FilePath -- ^ Relative path to a dist directory -> HCE.SourceCodePreprocessing -- ^ Before or after preprocessor -> [String] -- ^ Options for GHC -> [String] -- ^ Directories to ignore -> LoggingT IO (HCE.PackageInfo HCE.ModuleInfo) createPackageInfo packageDirectoryPath mbDistDirRelativePath sourceCodePreprocessing additionalGhcOptions ignoreDirectories = do packageDirectoryAbsPath <- liftIO $ makeAbsolute packageDirectoryPath currentDirectory <- liftIO getCurrentDirectory liftIO $ setCurrentDirectory packageDirectoryAbsPath distDir <- case mbDistDirRelativePath of Just path -> return $ packageDirectoryAbsPath path Nothing -> return $ packageDirectoryAbsPath "dist-newstyle" cabalFiles <- liftIO $ length . filter (\path -> takeFileName path /= ".cabal" && takeExtension path == ".cabal" ) <$> getDirectoryContents packageDirectoryAbsPath _ <- if cabalFiles == 0 then do logErrorN $ T.concat ["No .cabal file found in ", T.pack packageDirectoryAbsPath] liftIO exitFailure else when (cabalFiles >= 2) $ do logErrorN $ T.concat [ "Found more than one .cabal file in " , T.pack packageDirectoryAbsPath ] liftIO exitFailure cabalHelperQueryEnv <- liftIO $ mkQueryEnv (ProjLocV2Dir packageDirectoryAbsPath) (DistDirCabal SCV2 distDir) packages <- liftIO $ NE.toList <$> runQuery projectPackages cabalHelperQueryEnv logDebugN $ "packages: " <> (T.pack $ show $ zip3 (pPackageName <$> packages) (pSourceDir <$> packages) ((mapMaybe uComponentName . NE.toList . pUnits) <$> packages) ) mbPackage <- liftIO $ findM (\pkg -> do dir1 <- (canonicalizePath . pSourceDir) pkg dir2 <- canonicalizePath packageDirectoryAbsPath return $ dir1 == dir2 ) packages package <- case mbPackage of Just package' -> return package' Nothing -> do logWarnN $ "Cannot find a package with sourceDir in the same directory (" <> T.pack (packageDirectoryAbsPath "") <> "), indexing the first package by default." <> "Alternatively, try using absolute path for -p." return $ head packages units <- liftIO $ (filter (\((pkgName, _), _, _) -> pkgName == pPackageName package)) . NE.toList <$> runQuery (allUnits (\unit -> ( uiPackageId unit , uiCompilerId unit , map (\comp -> ( (ciGhcOptions comp , ciComponentName comp) , (ciEntrypoints comp, ciComponentName comp) , (ciSourceDirs comp , ciComponentName comp) ) ) $ (Map.elems . uiComponents) unit ) ) ) cabalHelperQueryEnv -- TODO: we are assuming all pakcageVersion and packageCompilerVersion are the same let ((packageName, packageVersion), (_, packageCompilerVersion), _) = head units compInfo = concatMap (\(_, _, comp) -> comp) units currentPackageId = HCE.PackageId (T.pack packageName) packageVersion logDebugN $ "compinfo: " <> (T.pack $ show compInfo) unless ( take 3 (versionBranch packageCompilerVersion) == take 3 (versionBranch ghcVersion) ) $ do logErrorN $ T.concat [ "GHC version mismatch. haskell-code-indexer: " , T.pack $ showVersion ghcVersion , ", package: " , T.pack $ showVersion packageCompilerVersion ] liftIO exitFailure logInfoN $ T.append "Indexing " $ HCE.packageIdToText currentPackageId let buildComponents = L.map (\((options, compName), (entrypoint, _), (srcDirs, _)) -> ( chComponentNameToComponentId compName , options , chEntrypointsToModules entrypoint , srcDirs , chComponentNameToComponentType compName ) ) . L.sortBy (\((_, compName1), _, _) ((_, compName2), _, _) -> compare compName1 compName2 ) $ compInfo libSrcDirs = concatMap (\(_, _, _, srcDirs, _) -> srcDirs) . filter (\(_, _, _, _, compType) -> HCE.isLibrary compType) $ buildComponents (indexedModules, (_fileMapResult, _defSiteMapResult, modNameMapResult)) <- foldM (\(modules, (fileMap, defSiteMap, modNameMap)) (compId, options, (mbMain, moduleNames), srcDirs, _) -> do mbMainPath <- case mbMain of Just mainPath -> liftIO $ findM doesFileExist $ mainPath : map (\srcDir -> normalise $ srcDir mainPath) srcDirs Nothing -> return Nothing (modules', (fileMap', defSiteMap', modNameMap')) <- indexBuildComponent sourceCodePreprocessing currentPackageId compId (fileMap, defSiteMap, modNameMap) srcDirs libSrcDirs (options ++ additionalGhcOptions) (maybe moduleNames (: moduleNames) mbMainPath) return (modules ++ modules', (fileMap', defSiteMap', modNameMap')) ) ([], (HM.empty, HM.empty, HM.empty)) buildComponents let modId = HCE.id :: HCE.ModuleInfo -> HCE.HaskellModulePath moduleMap = HM.fromList . map (\modInfo -> (modId modInfo, modInfo)) $ indexedModules references = L.foldl' addReferencesFromModule HM.empty indexedModules moduleId = HCE.id :: HCE.ModuleInfo -> HCE.HaskellModulePath topLevelIdentifiersTrie = L.foldl' addTopLevelIdentifiersFromModule HCE.emptyTrie . L.filter (not . isHsBoot . moduleId) $ indexedModules directoryTree <- liftIO $ buildDirectoryTree packageDirectoryAbsPath ignoreDirectories (\path -> HM.member (HCE.HaskellModulePath . T.pack $ path) moduleMap) liftIO $ setCurrentDirectory currentDirectory return HCE.PackageInfo { id = currentPackageId , moduleMap = moduleMap , moduleNameMap = modNameMapResult , directoryTree = directoryTree , externalIdOccMap = references , externalIdInfoMap = topLevelIdentifiersTrie } where chEntrypointsToModules :: ChEntrypoint -> (Maybe String, [String]) chEntrypointsToModules (ChLibEntrypoint modules otherModules signatures) = ( Nothing , L.map chModuleToString modules ++ L.map chModuleToString otherModules ++ L.map chModuleToString signatures ) chEntrypointsToModules (ChExeEntrypoint mainModule _otherModules) = (Just mainModule, []) chEntrypointsToModules (ChSetupEntrypoint _) = (Nothing, []) chModuleToString :: ChModuleName -> String chModuleToString (ChModuleName n) = n chComponentNameToComponentType :: ChComponentName -> HCE.ComponentType chComponentNameToComponentType ChSetupHsName = HCE.Setup chComponentNameToComponentType (ChLibName _ ) = HCE.Lib chComponentNameToComponentType (ChFLibName name) = HCE.FLib $ T.pack name chComponentNameToComponentType (ChExeName name) = HCE.Exe $ T.pack name chComponentNameToComponentType (ChTestName name) = HCE.Test $ T.pack name chComponentNameToComponentType (ChBenchName name) = HCE.Bench $ T.pack name chComponentNameToComponentId :: ChComponentName -> HCE.ComponentId chComponentNameToComponentId (ChLibName _) = HCE.ComponentId "lib" chComponentNameToComponentId (ChFLibName name) = HCE.ComponentId . T.append "flib-" . T.pack $ name chComponentNameToComponentId (ChExeName name) = HCE.ComponentId . T.append "exe-" . T.pack $ name chComponentNameToComponentId (ChTestName name) = HCE.ComponentId . T.append "test-" . T.pack $ name chComponentNameToComponentId (ChBenchName name) = HCE.ComponentId . T.append "bench-" . T.pack $ name chComponentNameToComponentId ChSetupHsName = HCE.ComponentId "setup" ghcVersion :: Version ghcVersion = makeVersion [9, 2, 2, 0] buildDirectoryTree :: FilePath -> [FilePath] -> (FilePath -> Bool) -> IO HCE.DirTree buildDirectoryTree path ignoreDirectories isHaskellModule = do (_dir DT.:/ tree) <- DT.readDirectoryWith (const . return $ ()) path -- Tuple up the complete file path with the file contents, by building up the path, -- trie-style, from the root. The filepath will be relative to "anchored" directory. let treeWithPaths = DT.zipPaths ("" DT.:/ DT.filterDir (not . ignore) tree) return $ toDirTree (removeTopDir . fst <$> treeWithPaths) where ignore :: DT.DirTree a -> Bool ignore (DT.Dir dirName _) | "." `L.isPrefixOf` dirName = True | dirName == "dist" = True | dirName == "dist-newstyle" = True | dirName == "tmp" = True | otherwise = dirName `elem` ignoreDirectories ignore (DT.Failed _ _) = True ignore _ = False removeTopDir :: FilePath -> FilePath removeTopDir p = case splitPath p of _x : xs -> joinPath xs [] -> "" toDirTree :: DT.DirTree FilePath -> HCE.DirTree toDirTree (DT.Dir name contents) = HCE.Dir name (map toDirTree . filter (not . DT.failed) $ contents) toDirTree (DT.File name filePath) = HCE.File name filePath (isHaskellModule filePath) toDirTree (DT.Failed name err) = HCE.File (name ++ " : " ++ show err) "" False addTopLevelIdentifiersFromModule :: HCE.Trie Char HCE.ExternalIdentifierInfo -> HCE.ModuleInfo -> HCE.Trie Char HCE.ExternalIdentifierInfo addTopLevelIdentifiersFromModule trieIdInfo HCE.ModuleInfo {..} = L.foldl' (\trie idInfo@(HCE.ExternalIdentifierInfo HCE.IdentifierInfo {..}) -> HCE.insertToTrie S.insert (T.unpack demangledOccName) idInfo trie ) trieIdInfo externalIds addReferencesFromModule :: HM.HashMap HCE.ExternalId (S.Set HCE.IdentifierSrcSpan) -> HCE.ModuleInfo -> HM.HashMap HCE.ExternalId (S.Set HCE.IdentifierSrcSpan) addReferencesFromModule references modInfo@HCE.ModuleInfo {..} = eachIdentifierOccurrence references modInfo (\occMap lineNumber startCol endCol occ -> let mbIdExternalId = HCE.externalId =<< maybe Nothing (`HM.lookup` idInfoMap) (HCE.internalId (occ :: HCE.IdentifierOccurrence)) idSrcSpan = HCE.IdentifierSrcSpan { modulePath = id , line = lineNumber , startColumn = startCol , endColumn = endCol } in case mbIdExternalId of Just externalId -> HM.insertWith S.union externalId (S.singleton idSrcSpan) occMap Nothing -> occMap ) eachIdentifierOccurrence :: forall a . a -> HCE.ModuleInfo -> (a -> IM.Key -> Int -> Int -> HCE.IdentifierOccurrence -> a) -> a eachIdentifierOccurrence accumulator HCE.ModuleInfo {..} f = IM.foldlWithKey' (\acc lineNumber occurences -> L.foldl' (\a ((startCol, endCol), occ) -> f a lineNumber startCol endCol occ) acc occurences ) accumulator idOccMap instance MonadLoggerIO (GhcT (LoggingT IO)) where askLoggerIO = GhcT $ const askLoggerIO instance MonadLogger (GhcT (LoggingT IO)) where monadLoggerLog loc source level = GhcT . const . monadLoggerLog loc source level gtrySync :: (ExceptionMonad m) => m a -> m (Either SomeException a) gtrySync action = ghandleSync (return . Left) (fmap Right action) ghandleSync :: (ExceptionMonad m) => (SomeException -> m a) -> m a -> m a ghandleSync onError = handle (\ex -> case fromException ex of Just (asyncEx :: SomeAsyncException) -> throw asyncEx _ -> onError ex ) indexBuildComponent :: HCE.SourceCodePreprocessing -- ^ Before or after preprocessor -> HCE.PackageId -- ^ Current package id -> HCE.ComponentId -- ^ Current component id -> ModuleDependencies -- ^ Already indexed modules -> [FilePath] -- ^ Src dirs -> [FilePath] -- ^ Src dirs of libraries -> [String] -- ^ Command-line options for GHC -> [String] -- ^ Modules to compile -> LoggingT IO ([HCE.ModuleInfo], ModuleDependencies) indexBuildComponent sourceCodePreprocessing currentPackageId componentId deps@(fileMap, defSiteMap, modNameMap) srcDirs libSrcDirs options modules = do let onError ex = do logErrorN $ T.concat [ "Error while indexing component " , HCE.getComponentId componentId , " : " , T.pack . show $ ex ] return ([], deps) ghandleSync onError $ runGhcT (Just libdir) $ do logDebugN (T.append "Component id : " $ HCE.getComponentId componentId) logDebugN (T.append "Modules : " $ T.pack $ show modules) logDebugN (T.append "GHC command line options : " $ T.pack $ L.unwords (options ++ modules) ) flags <- getSessionDynFlags (flags', _, _) <- parseDynamicFlagsCmdLine flags (L.map noLoc . L.filter ("-Werror" /=) $ options) -- -Werror flag makes warnings fatal let mbTmpDir = case hiDir flags' of Just buildDir -> Just $ buildDir (takeBaseName buildDir ++ "-tmp") Nothing -> Nothing -- initUnits happens here _ <- setSessionDynFlags $ L.foldl' gopt_set (flags' { backend = NCG , ghcLink = LinkInMemory , ghcMode = CompManager , importPaths = importPaths flags' ++ maybeToList mbTmpDir } ) [Opt_Haddock] targets <- mapM (`guessTarget` Nothing) modules setTargets targets _ <- load LoadAllTargets modGraph <- getModuleGraph let topSortMods = flattenSCCs $ filterToposortToModules (topSortModuleGraph False modGraph Nothing) buildDir = addTrailingPathSeparator . normalise . fromMaybe "" . hiDir $ flags' pathsModuleName = "Paths_" ++ map (\c -> if c == '-' then '_' else c) (T.unpack (HCE.name (currentPackageId :: HCE.PackageId))) (modSumWithPath, modulesNotFound) <- (\(mods, notFound) -> ( L.reverse . L.foldl' (\acc (mbPath, modSum) -> case mbPath of Just path | not $ HM.member path defSiteMap -> (path, modSum) : acc _ -> acc ) [] $ mods , map snd notFound ) ) . L.partition (\(mbPath, _) -> isJust mbPath) <$> mapM (\modSum -> liftIO $ (, modSum) <$> findHaskellModulePath buildDir (srcDirs ++ libSrcDirs) modSum ) (filter (\modSum -> pathsModuleName /= (moduleNameString . moduleName $ ms_mod modSum) ) topSortMods ) unless (null modulesNotFound) $ logErrorN $ T.append "Cannot find module path : " (toText flags' $ map ms_mod modulesNotFound) foldM (\(indexedModules, (fileMap', defSiteMap', modNameMap')) (modulePath, modSum) -> do result <- indexModule sourceCodePreprocessing componentId currentPackageId flags' (fileMap', defSiteMap', modNameMap') (modulePath, modSum) case result of Right (modInfo, (fileMap'', defSiteMap'', modNameMap'')) -> return ( modInfo : indexedModules , (fileMap'', defSiteMap'', modNameMap'') ) Left exception -> do logErrorN $ T.concat [ "Error while indexing " , T.pack . show $ modulePath , " : " , T.pack . show $ exception ] return (indexedModules, (fileMap', defSiteMap', modNameMap')) ) ([], (fileMap, defSiteMap, modNameMap)) modSumWithPath findHaskellModulePath :: FilePath -> [FilePath] -> ModSummary -> IO (Maybe HCE.HaskellModulePath) findHaskellModulePath buildDir srcDirs modSum = case normalise <$> (ml_hs_file . ms_location $ modSum) of Just modulePath -> let toHaskellModulePath = return . Just . HCE.HaskellModulePath . T.pack removeTmpDir path = case splitDirectories path of parent : rest -> if "-tmp" `L.isSuffixOf` parent then joinPath rest else path _ -> path in case removeTmpDir <$> L.stripPrefix buildDir modulePath of -- File is in the build directory Just path | takeExtension path == ".hs-boot" -> do let possiblePaths = path : map ( path) srcDirs mbFoundPath <- findM doesFileExist possiblePaths case mbFoundPath of Just p -> toHaskellModulePath p _ -> return Nothing | takeExtension path == ".hs" -> do let paths = map (replaceExtension path) HCE.haskellPreprocessorExtensions possiblePaths = paths ++ concatMap (\srcDir -> map (srcDir ) paths) srcDirs mbFoundPath <- findM doesFileExist possiblePaths case mbFoundPath of Just p -> toHaskellModulePath p _ -> return Nothing | otherwise -> return Nothing Nothing -> toHaskellModulePath modulePath Nothing -> return Nothing indexModule :: HCE.SourceCodePreprocessing -> HCE.ComponentId -> HCE.PackageId -> DynFlags -> ModuleDependencies -> (HCE.HaskellModulePath, ModSummary) -> GhcT (LoggingT IO) ( Either SomeException (HCE.ModuleInfo, ModuleDependencies) ) indexModule sourceCodePreprocessing componentId currentPackageId flags deps (modulePath, modSum) = gtrySync $ do logDebugN (T.append "Indexing " $ HCE.getHaskellModulePath modulePath) parsedModule <- parseModule modSum typecheckedModule <- typecheckModule parsedModule hscEnv <- getSession externalPackageState <- liftIO . readIORef . hsc_EPS $ hscEnv originalSourceCode <- liftIO $ T.replace "\t" " " . TE.decodeUtf8 <$> BS.readFile (T.unpack . HCE.getHaskellModulePath $ modulePath) let (modInfo, (fileMap', exportMap', moduleNameMap'), typeErrors) = createModuleInfo deps ( flags , hsc_units hscEnv , typecheckedModule , hsc_HPT hscEnv , externalPackageState , modSum ) modulePath currentPackageId componentId (originalSourceCode, sourceCodePreprocessing) unless (null typeErrors) $ logInfoN $ T.append "Type errors : " $ T.pack $ show typeErrors deepseq modInfo $ return (modInfo, (fileMap', exportMap', moduleNameMap'))