From 4f75be94f45a0e92553eccefe56230c554333ce7 Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Sun, 10 Dec 2017 12:22:21 -0800 Subject: Use the GHC lexer for the Hyperlinker backend (#714) * Start changing to use GHC lexer * better cpp * Change SrcSpan to RealSrcSpan * Remove error * Try to stop too many open files * wip * wip * Revert "wip" This reverts commit b605510a195f26315e3d8ca90e6d95a6737553e1. Conflicts: haddock-api/haddock-api.cabal haddock-api/src/Haddock/Interface.hs * Remove pointless 'caching' * Use dlist rather than lists when finding vars * Use a map rather than list * Delete bogus comment * Rebase followup Things now run using the GHC lexer. There are still - stray debug statements - unnecessary changes w.r.t. master * Cleaned up differences w.r.t. current Haddock HEAD Things are looking good. quasiquotes in particular look beautiful: the TH ones (with Haskell source inside) colour/link their contents too! Haven't yet begun to check for possible performance problems. * Support CPP and top-level pragmas The support for these is hackier - but no more hacky than the existing support. * Tests pass, CPP is better recognized The tests were in some cases altered: I consider the new output to be more correct than the old one.... * Fix shrinking of source without tabs in test * Replace 'Position'/'Span' with GHC counterparts Replaces 'Position' -> 'GHC.RealSrcLoc' and 'Span' -> 'GHC.RealSrcSpan'. * Nits * Forgot entry in .cabal * Update changelog --- .../Haddock/Backends/Hyperlinker/ParserSpec.hs | 135 ++++++++++++++------- 1 file changed, 89 insertions(+), 46 deletions(-) (limited to 'haddock-api/test/Haddock/Backends') diff --git a/haddock-api/test/Haddock/Backends/Hyperlinker/ParserSpec.hs b/haddock-api/test/Haddock/Backends/Hyperlinker/ParserSpec.hs index 8cd2690e..dcb30e41 100644 --- a/haddock-api/test/Haddock/Backends/Hyperlinker/ParserSpec.hs +++ b/haddock-api/test/Haddock/Backends/Hyperlinker/ParserSpec.hs @@ -4,95 +4,138 @@ module Haddock.Backends.Hyperlinker.ParserSpec (main, spec) where import Test.Hspec import Test.QuickCheck +import qualified GHC +import Control.Monad.IO.Class + +import Haddock (getGhcDirs) import Haddock.Backends.Hyperlinker.Parser import Haddock.Backends.Hyperlinker.Types +withDynFlags :: (GHC.DynFlags -> IO ()) -> IO () +withDynFlags cont = do + libDir <- fmap snd (getGhcDirs []) + GHC.runGhc (Just libDir) $ do + dflags <- GHC.getSessionDynFlags + liftIO $ cont dflags + main :: IO () main = hspec spec spec :: Spec -spec = do - describe "parse" parseSpec +spec = describe "parse" parseSpec -parseSpec :: Spec -parseSpec = do +-- | Defined for its instance of 'Arbitrary' +newtype NoTabs = NoTabs String deriving (Show, Eq) - it "is total" $ - property $ \src -> length (parse src) `shouldSatisfy` (>= 0) +noTabs :: String -> Bool +noTabs = all (\c -> c `notElem` "\r\t\f\v") - it "retains file layout" $ - property $ \src -> concatMap tkValue (parse src) == src +-- | Does not generate content with space characters other than ' ' and '\n' +instance Arbitrary NoTabs where + arbitrary = fmap NoTabs (arbitrary `suchThat` noTabs) + shrink (NoTabs src) = [ NoTabs shrunk | shrunk <- shrink src, noTabs shrunk ] - context "when parsing single-line comments" $ do - it "should ignore content until the end of line" $ - "-- some very simple comment\nidentifier" - `shouldParseTo` - [TkComment, TkSpace, TkIdentifier] +parseSpec :: Spec +parseSpec = around withDynFlags $ do - it "should allow endline escaping" $ - "-- first line\\\nsecond line\\\nand another one" - `shouldParseTo` - [TkComment] + it "is total" $ \dflags -> + property $ \src -> length (parse dflags "" src) `shouldSatisfy` (>= 0) - context "when parsing multi-line comments" $ do + it "retains file layout" $ \dflags -> + property $ \(NoTabs src) -> concatMap tkValue (parse dflags "" src) == src - it "should support nested comments" $ - "{- comment {- nested -} still comment -} {- next comment -}" - `shouldParseTo` - [TkComment, TkSpace, TkComment] + context "when parsing single-line comments" $ do + + it "should ignore content until the end of line" $ \dflags -> + shouldParseTo + "-- some very simple comment\nidentifier" + [TkComment, TkSpace, TkIdentifier] + dflags - it "should distinguish compiler pragma" $ - "{- comment -}{-# LANGUAGE GADTs #-}{- comment -}" - `shouldParseTo` - [TkComment, TkPragma, TkComment] + it "should allow endline escaping" $ \dflags -> + shouldParseTo + "#define first line\\\nsecond line\\\nand another one" + [TkCpp] + dflags - it "should recognize preprocessor directives" $ do - "\n#define foo bar" `shouldParseTo` [TkSpace, TkCpp] - "x # y" `shouldParseTo` - [TkIdentifier, TkSpace, TkOperator, TkSpace,TkIdentifier] + context "when parsing multi-line comments" $ do - it "should distinguish basic language constructs" $ do - "(* 2) <$> (\"abc\", foo)" `shouldParseTo` + it "should support nested comments" $ \dflags -> + shouldParseTo + "{- comment {- nested -} still comment -} {- next comment -}" + [TkComment, TkSpace, TkComment] + dflags + + it "should distinguish compiler pragma" $ \dflags -> + shouldParseTo + "{- comment -}{-# LANGUAGE GADTs #-}{- comment -}" + [TkComment, TkPragma, TkComment] + dflags + + it "should recognize preprocessor directives" $ \dflags -> do + shouldParseTo + "\n#define foo bar" + [TkSpace, TkCpp] + dflags + shouldParseTo + "x # y" + [TkIdentifier, TkSpace, TkOperator, TkSpace,TkIdentifier] + dflags + + it "should distinguish basic language constructs" $ \dflags -> do + + shouldParseTo + "(* 2) <$> (\"abc\", foo)" [ TkSpecial, TkOperator, TkSpace, TkNumber, TkSpecial , TkSpace, TkOperator, TkSpace , TkSpecial, TkString, TkSpecial, TkSpace, TkIdentifier, TkSpecial ] - "let foo' = foo in foo' + foo'" `shouldParseTo` + dflags + + shouldParseTo + "let foo' = foo in foo' + foo'" [ TkKeyword, TkSpace, TkIdentifier , TkSpace, TkGlyph, TkSpace , TkIdentifier, TkSpace, TkKeyword, TkSpace , TkIdentifier, TkSpace, TkOperator, TkSpace, TkIdentifier ] - "square x = y^2 where y = x" `shouldParseTo` + dflags + + shouldParseTo + "square x = y^2 where y = x" [ TkIdentifier, TkSpace, TkIdentifier , TkSpace, TkGlyph, TkSpace , TkIdentifier, TkOperator, TkNumber , TkSpace, TkKeyword, TkSpace , TkIdentifier, TkSpace, TkGlyph, TkSpace, TkIdentifier ] + dflags - it "should parse do-notation syntax" $ do - "do { foo <- getLine; putStrLn foo }" `shouldParseTo` + it "should parse do-notation syntax" $ \dflags -> do + shouldParseTo + "do { foo <- getLine; putStrLn foo }" [ TkKeyword, TkSpace, TkSpecial, TkSpace , TkIdentifier, TkSpace, TkGlyph, TkSpace , TkIdentifier, TkSpecial, TkSpace , TkIdentifier, TkSpace, TkIdentifier, TkSpace, TkSpecial ] - - unlines - [ "do" - , " foo <- getLine" - , " putStrLn foo" - ] `shouldParseTo` + dflags + + shouldParseTo + (unlines + [ "do" + , " foo <- getLine" + , " putStrLn foo" + ]) [ TkKeyword, TkSpace, TkIdentifier , TkSpace, TkGlyph, TkSpace, TkIdentifier, TkSpace , TkIdentifier, TkSpace, TkIdentifier, TkSpace ] - - -shouldParseTo :: String -> [TokenType] -> Expectation -str `shouldParseTo` tokens = map tkType (parse str) `shouldBe` tokens + dflags + where + shouldParseTo :: String -> [TokenType] -> GHC.DynFlags -> Expectation + shouldParseTo str tokens dflags = map tkType (parse dflags "" str) `shouldBe` tokens -- cgit v1.2.3