From 0f84e1f004e29663b51e550d5bf7abe6188d3dca Mon Sep 17 00:00:00 2001 From: alexwl Date: Wed, 23 Jan 2019 17:18:29 +0300 Subject: Optimize memory usage of haskell-code-server This commit adds an option to create an on-disk key-value store that contains all the data from PackageInfo of each indexed package in a queriable form. The store can be used by haskell-code-server to respond to API requests. The main benefit of using the store, compared to deserializing and loading PackageInfo of each package into memory, is reduced memory usage (approximately 7 times for a set of Haskell packages). The key-value store on disk consists of two files: 'index' and 'values'. 'index' is a small file that contains a map from strings to locations in a 'values' file. 'index' file should be deserialized and loaded into memory. 'values' is a large file that contains serialized Haskell data structures. 'values' file can be either read directly (without deserializing) or memory-mapped. --- app/Server.hs | 1099 ++++++++++++++++++++++++++++++++++++++++++++------------- app/Store.hs | 125 +++++++ 2 files changed, 974 insertions(+), 250 deletions(-) create mode 100644 app/Store.hs (limited to 'app') diff --git a/app/Server.hs b/app/Server.hs index 3bfebd8..ced90d9 100644 --- a/app/Server.hs +++ b/app/Server.hs @@ -1,5 +1,6 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FlexibleContexts #-} @@ -17,6 +18,7 @@ module Main where +import Control.DeepSeq (NFData, force) import Control.Exception ( SomeAsyncException , SomeException @@ -26,23 +28,28 @@ import Control.Exception , throwIO , try ) -import Control.Monad (unless) +import Control.Monad (foldM, unless) import Control.Monad.Except (ExceptT(..)) +import Control.Monad.State.Strict (StateT(..)) import Control.Monad.Reader (MonadIO, MonadReader, ReaderT(..), asks, liftIO) import qualified Data.Aeson as A import qualified Data.ByteString as BS +import qualified Data.ByteString.Short as BSS +import qualified Data.ByteString.Char8 as BSC import qualified Data.ByteString.Lazy as BSL import Data.Default (def) import Data.Either (lefts, rights) import qualified Data.HashMap.Strict as HM import Data.Hashable (Hashable) import qualified Data.IntervalMap.Strict as IVM +import Data.IntervalMap.Interval (Interval(..), subsumes) import qualified Data.List as L import qualified Data.Map.Strict as M import Data.Maybe (fromMaybe, mapMaybe) import qualified Data.Vector as V #if MIN_VERSION_GLASGOW_HASKELL(8,4,3,0) import qualified GHC.Compact as C +#else import Data.Functor.Identity(Identity(..)) #endif import Data.Pagination @@ -66,21 +73,16 @@ import Data.Version (Version(..)) import GHC.Exts (Down(..), groupWith) import GHC.Generics (Generic) import qualified HaskellCodeExplorer.Types as HCE -import Network.HTTP.Types - ( hContentEncoding - , hContentType - , status200 - , status404 - ) +import Network.HTTP.Types (hContentEncoding, hContentType, status200, status404) import Network.Mime (defaultMimeLookup) import Network.Wai ( Application , Middleware , Response , pathInfo + , requestHeaders , responseFile , responseLBS - , requestHeaders ) import Network.Wai.Handler.Warp (run) import Network.Wai.Middleware.RequestLogger @@ -100,13 +102,13 @@ import Options.Applicative , help , helper , info - , long - , some + , long , metavar , option , optional , progDesc , short + , some , strOption , switch ) @@ -124,6 +126,7 @@ import Servant , ToHttpApiData(..) , addHeader , err404 + , err500 , errBody , serve ) @@ -131,6 +134,7 @@ import Servant.API.ContentTypes (AllCTRender(..), JSON) import Servant.Server (Handler(..), hoistServer) import Servant.Utils.Links (safeLink) import System.Directory (doesFileExist) +import System.Exit (exitFailure, exitSuccess) import System.FilePath.Find ( FileType(..) , (&&?) @@ -142,7 +146,7 @@ import System.FilePath.Find , fileType , find ) -import System.FilePath.Posix ((),takeFileName) +import System.FilePath.Posix ((), takeFileName) import System.Log.FastLogger ( LoggerSet , defaultBufSize @@ -152,6 +156,8 @@ import System.Log.FastLogger import Text.Blaze.Html.Renderer.Text (renderHtml) import qualified Text.Blaze.Html5 as Html hiding (html, source) import Data.FileEmbed (embedDir, embedFile) +import Data.Bifunctor (second) +import qualified Store -------------------------------------------------------------------------------- -- Server config @@ -167,6 +173,7 @@ data ServerConfig = ServerConfig , configStaticFilesUrlPrefix :: !String , configJsDistDirectory :: !(Maybe String) , configMaxPerPage :: !Int + , configStore :: !(Maybe Store) } deriving (Show, Eq) data PackagesPath @@ -174,6 +181,12 @@ data PackagesPath | Directories [FilePath] deriving (Show, Eq) +data Store + = CreateStore FilePath + | UseStore FilePath + | UseStoreMmap FilePath + deriving (Show, Eq) + configParser :: Parser ServerConfig configParser = ServerConfig <$> @@ -219,7 +232,23 @@ configParser = option auto (long "max-per-page" <> metavar "INTEGER" <> - help "Maximum number of items per page (default is 50)")) + help "Maximum number of items per page (default is 50)")) <*> + optional + (CreateStore <$> + strOption + (long "create-store" <> + help "Create a key-value store from PackageInfo of each indexed package" <> + metavar "PATH_TO_DATA_DIRECTORY") <|> + (UseStore <$> + strOption + (long "use-store" <> + help "Use existing key-value store. Read 'values' file into memory." <> + metavar "PATH_TO_DATA_DIRECTORY")) <|> + (UseStoreMmap <$> + strOption + (long "use-store-mmap" <> + help "Use existing key-value store. mmap 'values' file." <> + metavar "PATH_TO_DATA_DIRECTORY"))) -------------------------------------------------------------------------------- -- Loading packages @@ -230,42 +259,461 @@ data PackageVersions = PackageVersions , versions :: [Version] } deriving (Show, Ord, Eq, Generic) -type PackageMap - = HM.HashMap PackageName (M.Map Version (HCE.PackageInfo HCE.CompactModuleInfo)) - +instance Serialize.Serialize PackageVersions + +data PackageMap + = PackageMap (HM.HashMap PackageName (M.Map Version (HCE.PackageInfo HCE.CompactModuleInfo))) + | PackageMapStore { store :: Store.Store + , packageMap :: HM.HashMap PackageName (M.Map Version HCE.PackageId) } + type PackagePathMap = HM.HashMap PackageId FilePath -newtype AllPackages = - AllPackages BSL.ByteString +newtype AllPackages = AllPackages BSL.ByteString newtype PackageId = PackageId { getPackageId :: T.Text - } deriving (Show, Eq, Hashable) + } deriving (Show, Eq, Hashable, Generic, NFData) + +instance Serialize.Serialize PackageId newtype PackageName = PackageName { getPackageName :: T.Text - } deriving (Show, Eq, Hashable) - + } deriving (Show, Eq, Hashable, Generic, NFData) + +instance Serialize.Serialize PackageName instance A.ToJSON PackageVersions -type GlobalReferenceMap = HM.HashMap HCE.ExternalId (S.Set GlobalReferences) +type GlobalReferenceMap = HM.HashMap HCE.ExternalId (S.Set GlobalReferences) + data GlobalReferences = GlobalReferences { count :: Int , packageId :: T.Text } deriving (Show, Eq, Ord, Generic) +instance NFData GlobalReferences + +instance Serialize.Serialize GlobalReferences + instance A.ToJSON GlobalReferences +instance Store.StoreItem (HCE.Trie Char HCE.ExternalIdentifierInfo) where + toByteString = Serialize.encode + fromByteString = Serialize.decode + type KeyArgs (HCE.Trie Char HCE.ExternalIdentifierInfo) = + ( HCE.PackageId + , String + , Proxy (HCE.Trie Char HCE.ExternalIdentifierInfo)) + itemKey (packageId, prefix, _) = + BSS.toShort $ BS.concat + [ "externalIdInfoMap" + , "|" + , TE.encodeUtf8 $ HCE.packageIdToText packageId + , "|" + , BSC.pack prefix + ] + +instance Store.StoreItem [HCE.ExternalIdentifierInfo] where + toByteString = Serialize.encode + fromByteString = Serialize.decode + type KeyArgs [HCE.ExternalIdentifierInfo] = ( HCE.PackageId + , String + , Proxy [HCE.ExternalIdentifierInfo]) + itemKey (packageId, prefix, _) = + BSS.toShort $ BS.concat + [ "externalIdInfo" + , "|" + , TE.encodeUtf8 $ HCE.packageIdToText packageId + , "|" + , BSC.pack prefix + ] + +instance Store.StoreItem (S.Set HCE.IdentifierSrcSpan) where + toByteString = Serialize.encode + fromByteString = Serialize.decode + type KeyArgs (S.Set HCE.IdentifierSrcSpan) = ( HCE.PackageId + , HCE.ExternalId + , Proxy (S.Set HCE.IdentifierSrcSpan)) + itemKey (packageId, HCE.ExternalId extId, _) = + BSS.toShort $ BS.concat + [ "externalIdOcc" + , "|" + , TE.encodeUtf8 $ HCE.packageIdToText packageId + , "|" + , TE.encodeUtf8 extId + ] + +instance Store.StoreItem (HM.HashMap HCE.HaskellModuleName (HM.HashMap HCE.ComponentId HCE.HaskellModulePath)) where + toByteString = Serialize.encode + fromByteString = Serialize.decode + type KeyArgs (HM.HashMap HCE.HaskellModuleName (HM.HashMap HCE.ComponentId HCE.HaskellModulePath)) = + ( HCE.PackageId + , Proxy (HM.HashMap HCE.HaskellModuleName (HM.HashMap HCE.ComponentId HCE.HaskellModulePath))) + itemKey (packageId,_) = + BSS.toShort $ BS.append "moduleNameMap|" $ TE.encodeUtf8 $ HCE.packageIdToText packageId + +instance (Serialize.Serialize modInfo) => + Store.StoreItem (HM.HashMap HCE.HaskellModulePath modInfo) where + toByteString = Serialize.encode + fromByteString = Serialize.decode + type KeyArgs (HM.HashMap HCE.HaskellModulePath modInfo) = + (HCE.PackageId,Proxy (HM.HashMap HCE.HaskellModulePath modInfo)) + itemKey (packageId, _) = + BSS.toShort $ BS.append "moduleMap|" $ TE.encodeUtf8 $ HCE.packageIdToText packageId + +instance Store.StoreItem HCE.ExpressionInfoMap where + toByteString = Serialize.encode + fromByteString = Serialize.decode + type KeyArgs HCE.ExpressionInfoMap = ( HCE.PackageId + , HCE.HaskellModulePath + , BS.ByteString + , Proxy HCE.ExpressionInfoMap) + itemKey (packageId, HCE.HaskellModulePath modulePath, topLevelExprKey, _) = + BSS.toShort $ BS.concat + [ "exprInfoMap" + , "|" + , TE.encodeUtf8 $ HCE.packageIdToText packageId + , "|" + , TE.encodeUtf8 modulePath + , "|" + , topLevelExprKey + ] + +instance Store.StoreItem (IVM.IntervalMap (Int, Int) BS.ByteString) where + toByteString = Serialize.encode + fromByteString = Serialize.decode + type KeyArgs (IVM.IntervalMap (Int, Int) BS.ByteString) = + ( HCE.PackageId + , HCE.HaskellModulePath + , Proxy (IVM.IntervalMap (Int, Int) BS.ByteString)) + itemKey (packageId, HCE.HaskellModulePath modulePath, _) = + BSS.toShort $ BS.concat + [ "topLevelExpr" + , "|" + , TE.encodeUtf8 $ HCE.packageIdToText packageId + , "|" + , TE.encodeUtf8 modulePath + ] + +instance Store.StoreItem HCE.DefinitionSiteMap where + toByteString = Serialize.encode + fromByteString = Serialize.decode + type KeyArgs HCE.DefinitionSiteMap = ( HCE.PackageId + , HCE.HaskellModulePath + , Proxy HCE.DefinitionSiteMap) + itemKey (packageId, HCE.HaskellModulePath modulePath, _) = + BSS.toShort $ BS.concat + [ "definitionSiteMap" + , "|" + , TE.encodeUtf8 $ HCE.packageIdToText packageId + , "|" + , TE.encodeUtf8 modulePath + ] + +instance Store.StoreItem (V.Vector T.Text) where + toByteString = Serialize.encode + fromByteString = Serialize.decode + type KeyArgs (V.Vector T.Text) = ( HCE.PackageId + , HCE.HaskellModulePath + , Proxy (V.Vector T.Text)) + itemKey (packageId, HCE.HaskellModulePath modulePath, _) = + BSS.toShort $ BS.concat + [ "source" + , "|" + , TE.encodeUtf8 $ HCE.packageIdToText packageId + , "|" + , TE.encodeUtf8 modulePath + ] + +instance Store.StoreItem (HM.HashMap PackageName (M.Map Version HCE.PackageId)) where + toByteString = Serialize.encode + fromByteString = Serialize.decode + type KeyArgs (HM.HashMap PackageName (M.Map Version HCE.PackageId)) = + Proxy (HM.HashMap PackageName (M.Map Version HCE.PackageId)) + itemKey _ = "packageMap" + +instance Store.StoreItem PackagePathMap where + toByteString = Serialize.encode + fromByteString = Serialize.decode + type KeyArgs PackagePathMap = Proxy PackagePathMap + itemKey _ = "packagePathMap" + +instance Store.StoreItem GlobalReferenceMap where + toByteString = Serialize.encode + fromByteString = Serialize.decode + type KeyArgs GlobalReferenceMap = Proxy GlobalReferenceMap + itemKey _ = "globalReferenceMap" + +instance Store.StoreItem [PackageVersions] where + toByteString = Serialize.encode + fromByteString = Serialize.decode + type KeyArgs [PackageVersions] = Proxy [PackageVersions] + itemKey _ = "packageVersions" + +findTopLevelExpressions :: (Ord k) => IVM.IntervalMap k v -> [(Interval k, v)] +findTopLevelExpressions = + L.foldl' + (\topLevel interval -> + case topLevel of + [] -> [interval] + topLevelIntervals@(currentTopLevelInterval:rest) + | subsumes (fst currentTopLevelInterval) (fst interval) -> topLevelIntervals + | subsumes (fst interval) (fst currentTopLevelInterval) -> + interval : rest + | otherwise -> interval : topLevelIntervals) + [] . + IVM.assocs + +splitIntervalMap :: + (Show k, Ord k) + => IVM.IntervalMap k v + -> (IVM.IntervalMap k BS.ByteString, [(BS.ByteString, IVM.IntervalMap k v)]) +splitIntervalMap ivmap = + let topLevelExprs = findTopLevelExpressions ivmap + in L.foldl' + (\(index, ivMaps) (interval, _) -> + let topLevelExpressionKey = BSC.pack $ show interval + in ( IVM.insert interval topLevelExpressionKey index + , (topLevelExpressionKey, IVM.within ivmap interval) : ivMaps)) + (IVM.empty, []) + topLevelExprs + +createStore :: FilePath -> ServerConfig -> IO () +createStore storePath config = do + packageDirectories <- findDirectories (configPackagesPath config) + Store.createStore storePath $ \fileHandle -> do + (errors, packageMap', packagePathMap', packageVersions', globalReferenceMap', index'') <- + foldM + (\(errors, packageMap, packagePathMap, packageVersions, globalReferenceMap, index) path -> do + eitherPackageInfo <- loadPackageInfo config path + case eitherPackageInfo of + Right (packageInfo, packagePath) -> do + let packageId = + HCE.id + (packageInfo :: HCE.PackageInfo HCE.CompactModuleInfo) + addPackageInfo :: StateT Store.State IO () + addPackageInfo = do + Store.add + (HCE.moduleNameMap packageInfo) + ( packageId + , Proxy :: Proxy (HM.HashMap HCE.HaskellModuleName (HM.HashMap HCE.ComponentId HCE.HaskellModulePath))) + addExternalIdInfo packageId packageInfo + mapM_ + (\(extId, occs) -> + Store.add + occs + ( packageId + , extId + , Proxy :: Proxy (S.Set HCE.IdentifierSrcSpan))) + (HM.toList $ HCE.externalIdOccMap packageInfo) + mapM_ + (\(modulePath, moduleInfo) -> do + addExpressionInfo + packageId + modulePath + (HCE.exprInfoMap + (moduleInfo :: HCE.CompactModuleInfo)) + Store.add + (HCE.definitionSiteMap + (moduleInfo :: HCE.CompactModuleInfo)) + ( packageId + , modulePath + , Proxy :: Proxy HCE.DefinitionSiteMap) + Store.add + (HCE.source (moduleInfo :: HCE.CompactModuleInfo)) + ( packageId + , modulePath + , Proxy :: Proxy (V.Vector T.Text))) . + HM.toList $ + HCE.moduleMap packageInfo + index' <- Store.writeValues fileHandle index addPackageInfo + print $ T.unpack (HCE.packageIdToText packageId) + return $ + force + ( errors + , let packageVersion = HCE.version packageId + val = M.singleton packageVersion packageId + in HM.insertWith + M.union + (PackageName $ HCE.packageName packageInfo) + val + packageMap + , let key = PackageId $ HCE.packageIdToText packageId + in HM.insert key packagePath packagePathMap + , (\(HCE.PackageId name version) -> (name, [version])) + packageId : + packageVersions + , let references = + HM.map + (\spans -> + S.singleton + (GlobalReferences + (S.size spans) + (HCE.packageIdToText packageId))) . + HCE.externalIdOccMap $ + packageInfo + in HM.unionWith S.union references globalReferenceMap + , index') + Left (errorMessage, path') -> + return $ + force + ( (errorMessage, path') : errors + , packageMap + , packagePathMap + , packageVersions + , globalReferenceMap + , index)) + ([], HM.empty, HM.empty, [], HM.empty, M.empty) + packageDirectories + let versions = + L.sortOn (T.toLower . (name :: PackageVersions -> T.Text)) . + map (\(name, vers) -> PackageVersions name (L.sortOn Down vers)) . + HM.toList . HM.fromListWith (++) $ + packageVersions' + indexFinal <- + Store.writeValues fileHandle index'' $ do + Store.add packagePathMap' (Proxy :: Proxy PackagePathMap) + Store.add versions (Proxy :: Proxy [PackageVersions]) + Store.add globalReferenceMap' (Proxy :: Proxy GlobalReferenceMap) + Store.add + packageMap' + (Proxy :: Proxy (HM.HashMap PackageName (M.Map Version HCE.PackageId))) + unless (null errors) $ do + putStrLn "Package loading errors : " + mapM_ (\(err, path) -> putStrLn $ path ++ " : " ++ err) errors + return indexFinal + +addExpressionInfo :: + ( Show k + , Ord k + , Store.StoreItem (IVM.IntervalMap k v) + , Store.StoreItem (IVM.IntervalMap k BSC.ByteString) + , Store.KeyArgs (IVM.IntervalMap k BSC.ByteString) ~ ( a + , b + , Proxy (IVM.IntervalMap ( Int + , Int) BSC.ByteString)) + , Store.KeyArgs (IVM.IntervalMap k v) ~ ( a + , b + , BSC.ByteString + , Proxy HCE.ExpressionInfoMap) + ) + => a + -> b + -> IVM.IntervalMap k v + -> StateT Store.State IO () +addExpressionInfo packageId modulePath ivMap = do + let (index, ivMaps) = splitIntervalMap ivMap + Store.add + index + ( packageId + , modulePath + , Proxy :: Proxy (IVM.IntervalMap (Int, Int) BS.ByteString)) + mapM_ + (\(topLevelExprKey, ivMap') -> + Store.add + ivMap' + ( packageId + , modulePath + , topLevelExprKey + , Proxy :: Proxy HCE.ExpressionInfoMap)) + ivMaps + +addExternalIdInfo :: + HCE.PackageId + -> HCE.PackageInfo HCE.CompactModuleInfo + -> StateT Store.State IO () +addExternalIdInfo packageId packageInfo = do + let addTrieValues :: + HCE.Trie Char HCE.ExternalIdentifierInfo + -> String + -> StateT Store.State IO () + addTrieValues trie name = + let len = L.length name + in Store.add + (let ids = + S.toAscList $ + HCE.match + name + (trie :: HCE.Trie Char HCE.ExternalIdentifierInfo) + (exactMatches, rest) = + L.span + (\(HCE.ExternalIdentifierInfo HCE.IdentifierInfo {..}) -> + T.length demangledOccName == len) + ids + maxIds = 10 + exactMatchesCount = L.length exactMatches + in if exactMatchesCount >= maxIds + then exactMatches + else exactMatches ++ + L.take (maxIds - exactMatchesCount) rest) + (packageId, name, Proxy :: Proxy [HCE.ExternalIdentifierInfo]) + let fullTrie = HCE.externalIdInfoMap packageInfo + mapM_ + (\(firstLetter, trie) -> do + mapM_ + (\(secondLetter, trie') -> do + mapM_ + (\(thirdLetter, trie'') -> do + mapM_ + (\(fourthLetter, trie''') -> + Store.add + trie''' + ( packageId + , [firstLetter, secondLetter, thirdLetter, fourthLetter] + , Proxy :: Proxy (HCE.Trie Char HCE.ExternalIdentifierInfo))) + (HM.toList . HCE.children $ trie'') + addTrieValues fullTrie [firstLetter, secondLetter, thirdLetter]) + (HM.toList . HCE.children $ trie') + addTrieValues fullTrie [firstLetter, secondLetter]) + (HM.toList . HCE.children $ trie) + addTrieValues fullTrie [firstLetter]) + (HM.toList . HCE.children $ fullTrie) + +findDirectories :: PackagesPath -> IO [FilePath] +findDirectories p = + case p of + DirectoryWithPackages dir -> + find (depth ==? 0) (fileType ==? Directory &&? filePath /=? dir) dir + Directories dirs -> return dirs + loadPackages :: ServerConfig - -> IO (Maybe (PackageMap, PackagePathMap, [PackageVersions], GlobalReferenceMap)) -loadPackages config = do - packageDirectories <- - case configPackagesPath config of - DirectoryWithPackages dir -> - find (depth ==? 0) (fileType ==? Directory &&? filePath /=? dir) dir - Directories dirs -> return dirs + -> Maybe Store.Store + -> IO (Maybe ( PackageMap + , PackagePathMap + , [PackageVersions] + , GlobalReferenceMap)) +loadPackages _config mbStore + | (Just store) <- mbStore = do + let eitherPackagePathMap = + Store.lookup (Proxy :: Proxy PackagePathMap) store + eitherGlobalReferenceMap = + Store.lookup (Proxy :: Proxy GlobalReferenceMap) store + eitherPackageVersions = + Store.lookup (Proxy :: Proxy [PackageVersions]) store + eitherPackageMap = + Store.lookup + (Proxy :: Proxy (HM.HashMap PackageName (M.Map Version HCE.PackageId))) + store + case (,,,) <$> (PackageMapStore store <$> eitherPackageMap) <*> + eitherPackagePathMap <*> + eitherPackageVersions <*> + eitherGlobalReferenceMap of + Right res -> return $ Just res + Left _ -> do + putStrLn "Store lookup errors : " + let ignoreRight :: Either a b -> Either a () + ignoreRight = second (const ()) + print $ + lefts + [ ignoreRight eitherGlobalReferenceMap + , ignoreRight eitherPackageMap + , ignoreRight eitherPackageVersions + , ignoreRight eitherGlobalReferenceMap + ] + return Nothing +loadPackages config _ = do + packageDirectories <- findDirectories (configPackagesPath config) result <- mapM (loadPackageInfo config) packageDirectories let loadedPackages = rights result packageLoadErrors = lefts result @@ -290,6 +738,7 @@ loadPackages config = do map (\HCE.PackageId {..} -> (name, [version])) $ packageIds packageMap = + PackageMap $ L.foldl' (\hMap packageInfo -> let val = M.singleton (packageVersion packageInfo) packageInfo @@ -517,38 +966,9 @@ instance A.ToJSON SourceFile getAllPackages :: ReaderT Environment IO AllPackages getAllPackages = asks envPackageVersions - -getPackageInfoAndModulePath :: - PackageId - -> HCE.ComponentId - -> HCE.HaskellModuleName - -> ReaderT Environment IO ( HCE.PackageInfo HCE.CompactModuleInfo - , HCE.HaskellModulePath) -getPackageInfoAndModulePath packageId componentId moduleName = - withPackageInfo packageId $ \packageInfo -> - case HM.lookup moduleName (HCE.moduleNameMap packageInfo) of - Just modulePathMap -> - case HM.lookup componentId modulePathMap of - Just path -> return (packageInfo, path) - Nothing -> - error404 $ - BSL.concat - [ "Module " - , toLazyBS $ HCE.getHaskellModuleName moduleName - , " not found in component " - , toLazyBS $ HCE.getComponentId componentId - ] - Nothing -> - error404 $ - BSL.concat - [ "Module " - , toLazyBS $ HCE.getHaskellModuleName moduleName - , " not found in package " - , toLazyBS $ getPackageId packageId - ] - + getExpressions :: - PackageId + PackageId -> HCE.HaskellModulePath -> Int -- ^ Start line -> Int -- ^ Start column @@ -559,19 +979,48 @@ getExpressions packageId modulePath startLine startColumn endLine endColumn = do enableExpressionInfo <- asks (configEnableExpressionInfo . envConfig) if not enableExpressionInfo then error404 "Expression queries are disabled" - else withPackageInfo packageId $ \packageInfo -> - withModuleInfo packageInfo modulePath $ \modInfo -> do - maxPerPage <- asks (configMaxPerPage . envConfig) - let exprInfoMap = - HCE.exprInfoMap (modInfo :: HCE.CompactModuleInfo) - requestedInterval = - IVM.ClosedInterval - (startLine, startColumn) - (endLine, endColumn) - return . - map (uncurry Expression) . - L.take maxPerPage . IVM.toList . IVM.within exprInfoMap $ - requestedInterval + else withPackageInfo packageId $ \packageInfo' -> do + maxPerPage <- asks (configMaxPerPage . envConfig) + let requestedInterval = + IVM.ClosedInterval + (startLine, startColumn) + (endLine, endColumn) + findInterval :: + HCE.ExpressionInfoMap -> ReaderT Environment IO [Expression] + findInterval exprInfoMap = + return . + map (uncurry Expression) . + L.take maxPerPage . IVM.toList . IVM.within exprInfoMap $ + requestedInterval + in case packageInfo' of + PackageInfo packageInfo -> + withModuleInfo packageInfo modulePath $ \modInfo -> do + let exprInfoMap = + HCE.exprInfoMap (modInfo :: HCE.CompactModuleInfo) + findInterval exprInfoMap + PackageInfoStore pId store -> do + let topLevelExprKey = + ( pId + , modulePath + , Proxy :: Proxy (IVM.IntervalMap (Int, Int) BS.ByteString)) + eitherTopLevelExprMap = + Store.lookup topLevelExprKey store + case eitherTopLevelExprMap of + Right topLevelExprMap -> + case map snd . IVM.toList $ + IVM.intersecting topLevelExprMap requestedInterval of + exprKey:_ -> do + let key = + ( pId + , modulePath + , exprKey + , Proxy :: Proxy HCE.ExpressionInfoMap) + eitherExprMap = Store.lookup key store + case eitherExprMap of + Right exprMap -> findInterval exprMap + Left e -> error500 $ BSL.fromStrict $ BSC.pack e + _ -> return [] + Left e -> error500 $ BSL.fromStrict $ BSC.pack e getDefinitionSite :: PackageId @@ -581,49 +1030,71 @@ getDefinitionSite :: -> T.Text -> ReaderT Environment IO HCE.DefinitionSite getDefinitionSite packageId componentId modName entity name = - withPackageInfo packageId $ \packageInfo -> - withModulePath packageInfo componentId modName $ \modPath -> - case entity of - HCE.Mod -> - return $ - HCE.DefinitionSite - (HCE.ExactLocation - (HCE.id (packageInfo :: HCE.PackageInfo HCE.CompactModuleInfo)) - modPath - modName - 1 - 1 - 1 - 1) - Nothing - _ -> - withModuleInfo packageInfo modPath $ \modInfo -> do - let defSites = - HCE.definitionSiteMap (modInfo :: HCE.CompactModuleInfo) - mbDefinitionSite = - case entity of - HCE.Typ -> - HM.lookup (HCE.OccName name) $ - HCE.types (defSites :: HCE.DefinitionSiteMap) - HCE.Val -> - HM.lookup (HCE.OccName name) $ - HCE.values (defSites :: HCE.DefinitionSiteMap) - HCE.Inst -> - HM.lookup name $ - HCE.instances (defSites :: HCE.DefinitionSiteMap) - _ -> Nothing - case mbDefinitionSite of - Just definitionSite -> return definitionSite - Nothing -> - error404 $ - BSL.concat - [ toLazyBS . T.pack $ show entity - , " " - , toLazyBS name - , " " - , " not found in a module " - , toLazyBS $ HCE.getHaskellModulePath modPath - ] + withPackageInfo packageId $ \packageInfo' -> + withModulePath packageInfo' componentId modName $ \modPath -> + let findDefSite :: + HCE.PackageId + -> HCE.DefinitionSiteMap + -> ReaderT Environment IO HCE.DefinitionSite + findDefSite pId defSiteMap = + case entity of + HCE.Mod -> + return $ + HCE.DefinitionSite + (HCE.ExactLocation pId modPath modName 1 1 1 1) + Nothing + _ -> do + let mbDefinitionSite = + case entity of + HCE.Typ -> + HM.lookup (HCE.OccName name) $ + HCE.types (defSiteMap :: HCE.DefinitionSiteMap) + HCE.Val -> + HM.lookup (HCE.OccName name) $ + HCE.values (defSiteMap :: HCE.DefinitionSiteMap) + HCE.Inst -> + HM.lookup name $ + HCE.instances (defSiteMap :: HCE.DefinitionSiteMap) + _ -> Nothing + case mbDefinitionSite of + Just definitionSite -> return definitionSite + Nothing -> + error404 $ + BSL.concat + [ toLazyBS . T.pack $ show entity + , " " + , toLazyBS name + , " " + , " not found in module " + , toLazyBS $ HCE.getHaskellModulePath modPath + ] + in case packageInfo' of + PackageInfo packageInfo -> + let pId = + HCE.id + (packageInfo :: HCE.PackageInfo HCE.CompactModuleInfo) + in case HM.lookup + modPath + (HCE.moduleMap + (packageInfo :: HCE.PackageInfo HCE.CompactModuleInfo)) of + Just HCE.CompactModuleInfo {definitionSiteMap = defSiteMap} -> + findDefSite pId defSiteMap + Nothing -> + error404 $ + BSL.concat + [ "Module " + , toLazyBS $ HCE.getHaskellModulePath modPath + , " is not found in package " + , toLazyBS $ HCE.packageIdToText pId + ] + PackageInfoStore pId store -> do + let eitherDefinitionSiteMap = + Store.lookup + (pId, modPath, Proxy :: Proxy HCE.DefinitionSiteMap) + store + case eitherDefinitionSiteMap of + Right definitionSiteMap -> findDefSite pId definitionSiteMap + Left e -> error500 (BSL.fromStrict $ BSC.pack e) buildLinkHeader :: T.Text -> Paginated a -> Natural -> Natural -> T.Text buildLinkHeader url paginated currentPage perPage = @@ -692,76 +1163,110 @@ getReferences :: -> Maybe Int -- ^ Items per page -> ReaderT Environment IO (Headers '[ Header "Link" T.Text, Header "X-Total-Count" Int] [SourceFile]) getReferences packageId externalId mbPage mbPerPage = - withPackageInfo packageId $ \packageInfo -> - case S.toList <$> HM.lookup externalId (HCE.externalIdOccMap packageInfo) of - Just references -> do - (paginatedReferences, page, perPage, totalCount) <- - paginateItems mbPage mbPerPage references - let url = - T.append "/" $ - toUrlPiece $ - safeLink - (Proxy :: Proxy API) - (Proxy :: Proxy GetReferences) - packageId - externalId - Nothing - Nothing - linkHeader = buildLinkHeader url paginatedReferences page perPage - addHeaders :: - forall a. - a - -> Headers '[ Header "Link" T.Text, Header "X-Total-Count" Int] a - addHeaders = addHeader linkHeader . addHeader totalCount - refModulePath :: ReferenceWithSource -> HCE.HaskellModulePath - refModulePath = - (HCE.modulePath :: HCE.IdentifierSrcSpan -> HCE.HaskellModulePath) . - idSrcSpan - return $ - addHeaders $ - concatMap - (\refs -> - case refs of - ref:_ -> - let path = - HCE.getHaskellModulePath . - (HCE.modulePath :: HCE.IdentifierSrcSpan -> HCE.HaskellModulePath) . - idSrcSpan $ - ref - in [SourceFile path refs] - _ -> []) $ - groupWith refModulePath $ - mapMaybe - (mkReferenceWithSource packageInfo) - (L.groupBy (\span1 span2 -> HCE.line span1 == HCE.line span2) $ - paginatedItems paginatedReferences) - Nothing -> - error404 $ - BSL.concat - [ "Cannot find references to " - , toLazyBS $ HCE.getExternalId externalId - ] + withPackageInfo packageId $ \packageInfo' -> + let mkRefsWithSource :: + Maybe [HCE.IdentifierSrcSpan] + -> ReaderT Environment IO (Headers '[ Header "Link" T.Text, Header "X-Total-Count" Int] [SourceFile]) + mkRefsWithSource mbReferences = + case mbReferences of + Just references -> do + (paginatedReferences, page, perPage, totalCount) <- + paginateItems mbPage mbPerPage references + let url = + T.append "/" $ + toUrlPiece $ + safeLink + (Proxy :: Proxy API) + (Proxy :: Proxy GetReferences) + packageId + externalId + Nothing + Nothing + linkHeader = + buildLinkHeader url paginatedReferences page perPage + addHeaders :: + forall a. + a + -> Headers '[ Header "Link" T.Text, Header "X-Total-Count" Int] a + addHeaders = addHeader linkHeader . addHeader totalCount + refModulePath :: ReferenceWithSource -> HCE.HaskellModulePath + refModulePath = + (HCE.modulePath :: HCE.IdentifierSrcSpan -> HCE.HaskellModulePath) . + idSrcSpan + return $ + addHeaders $ + concatMap + (\refs -> + case refs of + ref:_ -> + let path = + HCE.getHaskellModulePath . + (HCE.modulePath :: HCE.IdentifierSrcSpan -> HCE.HaskellModulePath) . + idSrcSpan $ + ref + in [SourceFile path refs] + _ -> []) $ + groupWith refModulePath $ + mapMaybe + (mkReferenceWithSource packageInfo') + (L.groupBy (\span1 span2 -> HCE.line span1 == HCE.line span2) $ + paginatedItems paginatedReferences) + Nothing -> + error404 $ + BSL.concat + [ "Cannot find references to " + , toLazyBS $ HCE.getExternalId externalId + ] + in case packageInfo' of + PackageInfo packageInfo -> + mkRefsWithSource $ S.toList <$> HM.lookup externalId (HCE.externalIdOccMap packageInfo) + PackageInfoStore pId store -> do + let eitherOccurrences = + Store.lookup + ( pId + , externalId + , Proxy :: Proxy (S.Set HCE.IdentifierSrcSpan)) + store + case eitherOccurrences of + Right occurrences -> + mkRefsWithSource (Just $ S.toList occurrences) + Left e -> error500 $ BSL.fromStrict $ BSC.pack e mkReferenceWithSource :: - HCE.PackageInfo HCE.CompactModuleInfo - -> [HCE.IdentifierSrcSpan] - -> Maybe ReferenceWithSource -mkReferenceWithSource packageInfo spans@(span:_) = - let mbModule = - HM.lookup - (HCE.modulePath (span :: HCE.IdentifierSrcSpan)) - (HCE.moduleMap (packageInfo :: HCE.PackageInfo HCE.CompactModuleInfo)) - in case mbModule of - Just modInfo -> - let sourceCodeHtml = - buildHtmlCodeSnippet - (HCE.source (modInfo :: HCE.CompactModuleInfo)) - (HCE.line (span :: HCE.IdentifierSrcSpan)) - (map - (\HCE.IdentifierSrcSpan {..} -> (startColumn, endColumn)) - spans) - in Just $ ReferenceWithSource sourceCodeHtml span - _ -> Just $ ReferenceWithSource "" span + PackageInfo -> [HCE.IdentifierSrcSpan] -> Maybe ReferenceWithSource +mkReferenceWithSource packageInfo' spans@(srcSpan:_) = + let mkRef :: Maybe (V.Vector T.Text) -> Maybe ReferenceWithSource + mkRef mbSource = + case mbSource of + Just source -> + let sourceCodeHtml = + buildHtmlCodeSnippet + source + (HCE.line (srcSpan :: HCE.IdentifierSrcSpan)) + (map + (\HCE.IdentifierSrcSpan {..} -> (startColumn, endColumn)) + spans) + in Just $ ReferenceWithSource sourceCodeHtml srcSpan + _ -> Just $ ReferenceWithSource "" srcSpan + in case packageInfo' of + PackageInfo packageInfo -> do + let mbSource = + (HCE.source :: HCE.CompactModuleInfo -> V.Vector T.Text) <$> + HM.lookup + (HCE.modulePath (srcSpan :: HCE.IdentifierSrcSpan)) + (HCE.moduleMap + (packageInfo :: HCE.PackageInfo HCE.CompactModuleInfo)) + mkRef mbSource + PackageInfoStore packageId store -> do + let eitherSourceCode = + Store.lookup + ( packageId + , HCE.modulePath (srcSpan :: HCE.IdentifierSrcSpan) + , Proxy :: Proxy (V.Vector T.Text)) + store + case eitherSourceCode of + Right source -> mkRef (Just source) + Left _ -> mkRef Nothing mkReferenceWithSource _ _ = Nothing buildHtmlCodeSnippet :: V.Vector T.Text -> Int -> [(Int, Int)] -> T.Text @@ -793,34 +1298,69 @@ findIdentifiers :: -> T.Text -> Maybe Int -> Maybe Int - -> ReaderT Environment IO (Headers '[ Header "Link" T.Text, Header "X-Total-Count" Int] - [HCE.ExternalIdentifierInfo]) + -> ReaderT Environment IO (Headers '[ Header "Link" T.Text, Header "X-Total-Count" Int] [HCE.ExternalIdentifierInfo]) findIdentifiers packageId query mbPage mbPerPage = - withPackageInfo packageId $ \packageInfo -> do - let identifiers - | not $ T.null query = - S.toList $ - HCE.match (T.unpack query) (HCE.externalIdInfoMap packageInfo) - | otherwise = [] - (paginatedIdentifiers, page, perPage, totalCount) <- - paginateItems mbPage mbPerPage identifiers - let url = - T.append "/" $ - toUrlPiece $ - safeLink - (Proxy :: Proxy API) - (Proxy :: Proxy GetIdentifiers) - packageId - query - Nothing - Nothing - linkHeader = buildLinkHeader url paginatedIdentifiers page perPage - addHeaders :: - forall a. - a - -> Headers '[ Header "Link" T.Text, Header "X-Total-Count" Int] a - addHeaders = addHeader linkHeader . addHeader totalCount - return . addHeaders . paginatedItems $ paginatedIdentifiers + withPackageInfo packageId $ \packageInfo' -> + let respond identifiers = do + (paginatedIdentifiers, page, perPage, totalCount) <- + paginateItems mbPage mbPerPage identifiers + let url = + T.append "/" $ + toUrlPiece $ + safeLink + (Proxy :: Proxy API) + (Proxy :: Proxy GetIdentifiers) + packageId + query + Nothing + Nothing + linkHeader = buildLinkHeader url paginatedIdentifiers page perPage + addHeaders :: + forall a. + a + -> Headers '[ Header "Link" T.Text, Header "X-Total-Count" Int] a + addHeaders = addHeader linkHeader . addHeader totalCount + return . addHeaders . paginatedItems $ paginatedIdentifiers + in case packageInfo' of + PackageInfo packageInfo -> do + let identifiers + | not $ T.null query = + S.toList $ + HCE.match + (T.unpack query) + (HCE.externalIdInfoMap packageInfo) + | otherwise = [] + respond identifiers + PackageInfoStore pId store -> + let findIds :: T.Text -> [HCE.ExternalIdentifierInfo] + findIds q = + let eitherIdInfo = + Store.lookup + ( pId + , T.unpack q + , Proxy :: Proxy [HCE.ExternalIdentifierInfo]) + store + in case eitherIdInfo of + Right ids -> ids + Left _ -> [] + in case T.length query of + 0 -> respond [] + 1 -> respond $ findIds query + 2 -> respond $ findIds query + 3 -> respond $ findIds query + _ -> + let eitherIdInfoMap = + Store.lookup + ( pId + , T.unpack $ T.take 4 query + , Proxy :: Proxy (HCE.Trie Char HCE.ExternalIdentifierInfo)) + store + in case eitherIdInfoMap of + Right trie -> + respond $ + S.toList $ + HCE.match (T.unpack $ T.drop 4 query) trie + Left _ -> respond [] paginateItems :: Maybe Int @@ -841,25 +1381,47 @@ paginateItems mbPage mbPerPage items = do error404 :: BSL.ByteString -> ReaderT Environment IO a error404 body = throwServantError $ err404 {errBody = body} +error500 :: BSL.ByteString -> ReaderT Environment IO a +error500 body = throwServantError $ err500 {errBody = body} + toLazyBS :: T.Text -> BSL.ByteString toLazyBS = BSL.fromStrict . TE.encodeUtf8 + +data PackageInfo + = PackageInfo (HCE.PackageInfo HCE.CompactModuleInfo) + | PackageInfoStore HCE.PackageId + Store.Store withPackageInfo :: PackageId - -> (HCE.PackageInfo HCE.CompactModuleInfo -> ReaderT Environment IO a) + -> (PackageInfo -> ReaderT Environment IO a) -> ReaderT Environment IO a withPackageInfo packageId action | Just (packageName, mbVersion) <- parsePackageId packageId = do packageMap <- asks envPackageMap - let mbPackageInfo = - HM.lookup packageName packageMap >>= + let findPackage :: + (Hashable k, Eq k, Ord k1) + => k + -> Maybe k1 + -> HM.HashMap k (M.Map k1 v) + -> Maybe v + findPackage name mbVer pMap = + HM.lookup name pMap >>= (\packages -> let findLastVersion :: M.Map k v -> Maybe v findLastVersion = fmap (snd . fst) . L.uncons . M.toDescList - in case mbVersion of + in case mbVer of Just version -> M.lookup version packages <|> findLastVersion packages Nothing -> findLastVersion packages) + mbPackageInfo = + case packageMap of + PackageMap pMap -> + PackageInfo <$> findPackage packageName mbVersion pMap + PackageMapStore store pMap -> + case findPackage packageName mbVersion pMap of + Just pId -> Just $ PackageInfoStore pId store + Nothing -> Nothing case mbPackageInfo of Just p -> action p Nothing -> packageNotFound packageId @@ -892,50 +1454,72 @@ withModuleInfo packageInfo path action = ] withModulePath :: - HCE.PackageInfo HCE.CompactModuleInfo + PackageInfo -> HCE.ComponentId -> HCE.HaskellModuleName -> (HCE.HaskellModulePath -> ReaderT Environment IO a) -> ReaderT Environment IO a -withModulePath packageInfo componentId moduleName action = - case HM.lookup - (ghcPrimHack packageInfo moduleName) - (HCE.moduleNameMap packageInfo) of - Just modulePathMap -> - case HM.lookup componentId modulePathMap of - Just path -> action path - Nothing -> - case HM.lookup (HCE.ComponentId "lib") modulePathMap of - Just path -> action path +withModulePath packageInfo' componentId moduleName action = + case packageInfo' of + PackageInfo packageInfo -> + case HM.lookup + (ghcPrimHack packageInfo' moduleName) + (HCE.moduleNameMap packageInfo) of + Just modulePathMap -> + case HM.lookup componentId modulePathMap of + Just modulePath -> action modulePath Nothing -> - error404 $ - BSL.concat - [ "Module " - , toLazyBS $ HCE.getHaskellModuleName moduleName - , " is not found in component " - , toLazyBS $ HCE.getComponentId componentId - ] - Nothing -> + case HM.lookup (HCE.ComponentId "lib") modulePathMap of + Just path -> action path + Nothing -> notFoundInComponent + Nothing -> notFoundInPackage (HCE.id (packageInfo :: HCE.PackageInfo HCE.CompactModuleInfo)) + PackageInfoStore packageId store -> do + let eitherModNameMap = + Store.lookup + ( packageId + , Proxy :: Proxy (HM.HashMap HCE.HaskellModuleName (HM.HashMap HCE.ComponentId HCE.HaskellModulePath))) + store + case eitherModNameMap of + Right modNameMap -> + case HM.lookup (ghcPrimHack packageInfo' moduleName) modNameMap of + Just componentMap -> case HM.lookup componentId componentMap of + Just modulePath -> action modulePath + Nothing -> case HM.lookup (HCE.ComponentId "lib") componentMap of + Just modulePath -> action modulePath + Nothing -> notFoundInComponent + Nothing -> notFoundInPackage packageId + Left e -> error500 (BSL.fromStrict $ BSC.pack e) + where + notFoundInComponent = + error404 $ + BSL.concat + [ "Module " + , toLazyBS $ HCE.getHaskellModuleName moduleName + , " is not found in component " + , toLazyBS $ HCE.getComponentId componentId + ] + notFoundInPackage packageId = error404 $ BSL.concat [ "Module " , toLazyBS $ HCE.getHaskellModuleName moduleName , " is not found in package " - , toLazyBS $ - HCE.packageIdToText $ - HCE.id (packageInfo :: HCE.PackageInfo HCE.CompactModuleInfo) + , toLazyBS $ HCE.packageIdToText packageId ] -- | Workaround for : -- https://github.com/ghc/ghc/blob/ghc-8.2.2-release/compiler/main/Finder.hs#L310-L315 -ghcPrimHack :: - HCE.PackageInfo HCE.CompactModuleInfo - -> HCE.HaskellModuleName - -> HCE.HaskellModuleName -ghcPrimHack packageInfo (HCE.HaskellModuleName modName) - | HCE.packageName packageInfo == "ghc-prim" && modName == "GHC.Prim" = - HCE.HaskellModuleName "GHC.Prim_" - | otherwise = HCE.HaskellModuleName modName +ghcPrimHack :: PackageInfo -> HCE.HaskellModuleName -> HCE.HaskellModuleName +ghcPrimHack packageInfo' modName@(HCE.HaskellModuleName name) = + case packageInfo' of + PackageInfo packageInfo + | HCE.packageName packageInfo == "ghc-prim" && name == "GHC.Prim" -> + HCE.HaskellModuleName "GHC.Prim_" + | otherwise -> modName + PackageInfoStore (HCE.PackageId packageName _) _ + | packageName == "ghc-prim" && name == "GHC.Prim" -> + HCE.HaskellModuleName "GHC.Prim_" + | otherwise -> modName parsePackageId :: PackageId -> Maybe (PackageName, Maybe Version) parsePackageId (PackageId text) = @@ -959,9 +1543,10 @@ staticMiddleware staticFilesPrefix packagePathMap _ app req callback Just basePath | ".." `notElem` rest -> do let clientAcceptsEncoding = - fromMaybe [] $ - map T.strip . T.splitOn "," . TE.decodeUtf8 <$> - lookup "Accept-Encoding" (requestHeaders req) + maybe + [] + (map T.strip . T.splitOn "," . TE.decodeUtf8) + (lookup "Accept-Encoding" (requestHeaders req)) clientAcceptsGzip = "gzip" `elem` clientAcceptsEncoding path = basePath T.unpack (T.intercalate "/" rest) gzPath = path ++ ".gz" @@ -1055,11 +1640,25 @@ main = do (configParser <**> helper) (fullDesc <> progDesc - "haskell-code-server provides an HTTP API for Haskell code explorer")) + "haskell-code-server provides an HTTP API for Haskell Code Explorer")) print config - packages <- loadPackages config + mbStore <- + let loadStore :: FilePath -> Store.ReadMode -> IO (Maybe Store.Store) + loadStore path readOrMmap = do + eitherStore <- Store.load path readOrMmap + case eitherStore of + Right store -> return $ Just store + Left e -> putStrLn e >> exitFailure + in case configStore config of + Just (UseStore path) -> loadStore path Store.ReadEntireFile + Just (UseStoreMmap path) -> loadStore path Store.MemoryMapFile + Just (CreateStore path) -> do + createStore path config + exitSuccess + Nothing -> return Nothing + packages <- loadPackages config mbStore case packages of - Just (packageMap, packagePathMap, packageVersions,globalReferenceMap) -> do + Just (packageMap, packagePathMap, packageVersions, globalReferenceMap) -> do loggerSet <- case configLog config of HCE.ToFile logfile -> newFileLoggerSet defaultBufSize logfile diff --git a/app/Store.hs b/app/Store.hs new file mode 100644 index 0000000..956359d --- /dev/null +++ b/app/Store.hs @@ -0,0 +1,125 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeFamilyDependencies #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE StrictData #-} +{-# OPTIONS_GHC -Wno-orphans #-} + +-- | Read-only on-disk key-value store + +module Store where + +import Control.DeepSeq (NFData) +import Control.Monad.IO.Class (liftIO) +import qualified Control.Monad.State.Strict as S +import qualified Data.ByteString as BS +import qualified Data.ByteString.Short as BSS +import Data.Either (Either) +import qualified Data.Map.Strict as M +import Data.Serialize (Serialize, decode, encode, get, put) +import GHC.Generics (Generic) +import Prelude hiding (lookup) +import System.Directory (doesFileExist) +import System.FilePath (()) +import System.IO (Handle, IOMode(..), hTell, withFile) +import System.IO.MMap (mmapFileByteString) + +data Store = Store + { index :: M.Map BSS.ShortByteString Location + , values :: BS.ByteString + } + +data Location = Location + { offset :: Int + , length :: Int + } deriving (Show, Eq, Ord, Generic, NFData) + +instance Serialize Location +instance Serialize BSS.ShortByteString where + put = put . BSS.fromShort + get = BSS.toShort <$> get + +class StoreItem item where + toByteString :: item -> BS.ByteString + fromByteString :: BS.ByteString -> Either String item + type KeyArgs item = arg | arg -> item + itemKey :: KeyArgs item -> BSS.ShortByteString + +indexFileName :: FilePath +indexFileName = "index" + +valuesFileName :: FilePath +valuesFileName = "values" + +data ReadMode + = ReadEntireFile + | MemoryMapFile + deriving (Show, Eq) + +load :: FilePath -> ReadMode -> IO (Either String Store) +load directoryPath readMode = do + let valuesFilePath = directoryPath valuesFileName + indexFilePath = directoryPath indexFileName + (valuesFileExists, indexFileExists) <- + (,) <$> doesFileExist indexFilePath <*> doesFileExist valuesFilePath + case (valuesFileExists, indexFileExists) of + (True, True) -> do + indexFile <- BS.readFile indexFilePath + valuesFile <- + case readMode of + ReadEntireFile -> BS.readFile valuesFilePath + MemoryMapFile -> mmapFileByteString valuesFilePath Nothing + let eitherIndex = decode @(M.Map BSS.ShortByteString Location) indexFile + case eitherIndex of + Right locMap -> return . Right $ Store {index = locMap, values = valuesFile} + Left err -> return . Left $ "Error while decoding index : " ++ err + (False, False) -> + return . Left $ "Cannot find index and values in " ++ directoryPath + (True, False) -> return . Left $ "Cannot find index in " ++ directoryPath + (False, True) -> return . Left $ "Cannot find values in " ++ directoryPath + +lookup :: (StoreItem item) => KeyArgs item -> Store -> Either String item +lookup keyArgs = lookupByteString (itemKey keyArgs) + +lookupByteString :: + (StoreItem item) => BSS.ShortByteString -> Store -> Either String item +lookupByteString key store = + case M.lookup key (index store) of + Just (Location off len) -> + fromByteString . BS.take len . BS.drop off $ values store + Nothing -> Left $ "Cannot find item with key " ++ show key + +data State = + State (M.Map BSS.ShortByteString Location) + Handle + +add :: (StoreItem item) => item -> KeyArgs item -> S.StateT State IO () +add item keyArgs = do + let bs = toByteString item + len = BS.length bs + State locMap handle <- S.get + off <- liftIO $ fromIntegral <$> hTell handle + _ <- liftIO $ BS.hPut handle bs + S.put $ State (M.insert (itemKey keyArgs) (Location off len) locMap) handle + +createStore :: + FilePath -> (Handle -> IO (M.Map BSS.ShortByteString Location)) -> IO () +createStore directoryPath action = + withFile (directoryPath valuesFileName) WriteMode $ \valuesHandle -> do + locMap <- action valuesHandle + BS.writeFile (directoryPath indexFileName) (encode locMap) + +writeValues :: + Handle + -> M.Map BSS.ShortByteString Location + -> S.StateT State IO () + -> IO (M.Map BSS.ShortByteString Location) +writeValues handle locMap action = do + State locMap' _ <- S.execStateT action (State locMap handle) + return locMap' -- cgit v1.2.3