From 4f75be94f45a0e92553eccefe56230c554333ce7 Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Sun, 10 Dec 2017 12:22:21 -0800 Subject: Use the GHC lexer for the Hyperlinker backend (#714) * Start changing to use GHC lexer * better cpp * Change SrcSpan to RealSrcSpan * Remove error * Try to stop too many open files * wip * wip * Revert "wip" This reverts commit b605510a195f26315e3d8ca90e6d95a6737553e1. Conflicts: haddock-api/haddock-api.cabal haddock-api/src/Haddock/Interface.hs * Remove pointless 'caching' * Use dlist rather than lists when finding vars * Use a map rather than list * Delete bogus comment * Rebase followup Things now run using the GHC lexer. There are still - stray debug statements - unnecessary changes w.r.t. master * Cleaned up differences w.r.t. current Haddock HEAD Things are looking good. quasiquotes in particular look beautiful: the TH ones (with Haskell source inside) colour/link their contents too! Haven't yet begun to check for possible performance problems. * Support CPP and top-level pragmas The support for these is hackier - but no more hacky than the existing support. * Tests pass, CPP is better recognized The tests were in some cases altered: I consider the new output to be more correct than the old one.... * Fix shrinking of source without tabs in test * Replace 'Position'/'Span' with GHC counterparts Replaces 'Position' -> 'GHC.RealSrcLoc' and 'Span' -> 'GHC.RealSrcSpan'. * Nits * Forgot entry in .cabal * Update changelog --- .../Haddock/Backends/Hyperlinker/ParserSpec.hs | 135 ++++++++++++++------- 1 file changed, 89 insertions(+), 46 deletions(-) (limited to 'haddock-api/test') diff --git a/haddock-api/test/Haddock/Backends/Hyperlinker/ParserSpec.hs b/haddock-api/test/Haddock/Backends/Hyperlinker/ParserSpec.hs index 8cd2690e..dcb30e41 100644 --- a/haddock-api/test/Haddock/Backends/Hyperlinker/ParserSpec.hs +++ b/haddock-api/test/Haddock/Backends/Hyperlinker/ParserSpec.hs @@ -4,95 +4,138 @@ module Haddock.Backends.Hyperlinker.ParserSpec (main, spec) where import Test.Hspec import Test.QuickCheck +import qualified GHC +import Control.Monad.IO.Class + +import Haddock (getGhcDirs) import Haddock.Backends.Hyperlinker.Parser import Haddock.Backends.Hyperlinker.Types +withDynFlags :: (GHC.DynFlags -> IO ()) -> IO () +withDynFlags cont = do + libDir <- fmap snd (getGhcDirs []) + GHC.runGhc (Just libDir) $ do + dflags <- GHC.getSessionDynFlags + liftIO $ cont dflags + main :: IO () main = hspec spec spec :: Spec -spec = do - describe "parse" parseSpec +spec = describe "parse" parseSpec -parseSpec :: Spec -parseSpec = do +-- | Defined for its instance of 'Arbitrary' +newtype NoTabs = NoTabs String deriving (Show, Eq) - it "is total" $ - property $ \src -> length (parse src) `shouldSatisfy` (>= 0) +noTabs :: String -> Bool +noTabs = all (\c -> c `notElem` "\r\t\f\v") - it "retains file layout" $ - property $ \src -> concatMap tkValue (parse src) == src +-- | Does not generate content with space characters other than ' ' and '\n' +instance Arbitrary NoTabs where + arbitrary = fmap NoTabs (arbitrary `suchThat` noTabs) + shrink (NoTabs src) = [ NoTabs shrunk | shrunk <- shrink src, noTabs shrunk ] - context "when parsing single-line comments" $ do - it "should ignore content until the end of line" $ - "-- some very simple comment\nidentifier" - `shouldParseTo` - [TkComment, TkSpace, TkIdentifier] +parseSpec :: Spec +parseSpec = around withDynFlags $ do - it "should allow endline escaping" $ - "-- first line\\\nsecond line\\\nand another one" - `shouldParseTo` - [TkComment] + it "is total" $ \dflags -> + property $ \src -> length (parse dflags "" src) `shouldSatisfy` (>= 0) - context "when parsing multi-line comments" $ do + it "retains file layout" $ \dflags -> + property $ \(NoTabs src) -> concatMap tkValue (parse dflags "" src) == src - it "should support nested comments" $ - "{- comment {- nested -} still comment -} {- next comment -}" - `shouldParseTo` - [TkComment, TkSpace, TkComment] + context "when parsing single-line comments" $ do + + it "should ignore content until the end of line" $ \dflags -> + shouldParseTo + "-- some very simple comment\nidentifier" + [TkComment, TkSpace, TkIdentifier] + dflags - it "should distinguish compiler pragma" $ - "{- comment -}{-# LANGUAGE GADTs #-}{- comment -}" - `shouldParseTo` - [TkComment, TkPragma, TkComment] + it "should allow endline escaping" $ \dflags -> + shouldParseTo + "#define first line\\\nsecond line\\\nand another one" + [TkCpp] + dflags - it "should recognize preprocessor directives" $ do - "\n#define foo bar" `shouldParseTo` [TkSpace, TkCpp] - "x # y" `shouldParseTo` - [TkIdentifier, TkSpace, TkOperator, TkSpace,TkIdentifier] + context "when parsing multi-line comments" $ do - it "should distinguish basic language constructs" $ do - "(* 2) <$> (\"abc\", foo)" `shouldParseTo` + it "should support nested comments" $ \dflags -> + shouldParseTo + "{- comment {- nested -} still comment -} {- next comment -}" + [TkComment, TkSpace, TkComment] + dflags + + it "should distinguish compiler pragma" $ \dflags -> + shouldParseTo + "{- comment -}{-# LANGUAGE GADTs #-}{- comment -}" + [TkComment, TkPragma, TkComment] + dflags + + it "should recognize preprocessor directives" $ \dflags -> do + shouldParseTo + "\n#define foo bar" + [TkSpace, TkCpp] + dflags + shouldParseTo + "x # y" + [TkIdentifier, TkSpace, TkOperator, TkSpace,TkIdentifier] + dflags + + it "should distinguish basic language constructs" $ \dflags -> do + + shouldParseTo + "(* 2) <$> (\"abc\", foo)" [ TkSpecial, TkOperator, TkSpace, TkNumber, TkSpecial , TkSpace, TkOperator, TkSpace , TkSpecial, TkString, TkSpecial, TkSpace, TkIdentifier, TkSpecial ] - "let foo' = foo in foo' + foo'" `shouldParseTo` + dflags + + shouldParseTo + "let foo' = foo in foo' + foo'" [ TkKeyword, TkSpace, TkIdentifier , TkSpace, TkGlyph, TkSpace , TkIdentifier, TkSpace, TkKeyword, TkSpace , TkIdentifier, TkSpace, TkOperator, TkSpace, TkIdentifier ] - "square x = y^2 where y = x" `shouldParseTo` + dflags + + shouldParseTo + "square x = y^2 where y = x" [ TkIdentifier, TkSpace, TkIdentifier , TkSpace, TkGlyph, TkSpace , TkIdentifier, TkOperator, TkNumber , TkSpace, TkKeyword, TkSpace , TkIdentifier, TkSpace, TkGlyph, TkSpace, TkIdentifier ] + dflags - it "should parse do-notation syntax" $ do - "do { foo <- getLine; putStrLn foo }" `shouldParseTo` + it "should parse do-notation syntax" $ \dflags -> do + shouldParseTo + "do { foo <- getLine; putStrLn foo }" [ TkKeyword, TkSpace, TkSpecial, TkSpace , TkIdentifier, TkSpace, TkGlyph, TkSpace , TkIdentifier, TkSpecial, TkSpace , TkIdentifier, TkSpace, TkIdentifier, TkSpace, TkSpecial ] - - unlines - [ "do" - , " foo <- getLine" - , " putStrLn foo" - ] `shouldParseTo` + dflags + + shouldParseTo + (unlines + [ "do" + , " foo <- getLine" + , " putStrLn foo" + ]) [ TkKeyword, TkSpace, TkIdentifier , TkSpace, TkGlyph, TkSpace, TkIdentifier, TkSpace , TkIdentifier, TkSpace, TkIdentifier, TkSpace ] - - -shouldParseTo :: String -> [TokenType] -> Expectation -str `shouldParseTo` tokens = map tkType (parse str) `shouldBe` tokens + dflags + where + shouldParseTo :: String -> [TokenType] -> GHC.DynFlags -> Expectation + shouldParseTo str tokens dflags = map tkType (parse dflags "" str) `shouldBe` tokens -- cgit v1.2.3 From a8f7b19f7cc3c5ac02cd15b4270cad5c869dc0c0 Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Sun, 24 Dec 2017 09:49:47 -0800 Subject: Patch flaky parser test (#720) * Patch flaky parser test This test was a great idea, but it doesn't port over too well to using the GHC lexer. GHC rewrites its input a bit - nothing surprising, but we need to guard against those cases for the test. * Change instance head * Change use site --- .../Haddock/Backends/Hyperlinker/ParserSpec.hs | 33 ++++++++++++++-------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'haddock-api/test') diff --git a/haddock-api/test/Haddock/Backends/Hyperlinker/ParserSpec.hs b/haddock-api/test/Haddock/Backends/Hyperlinker/ParserSpec.hs index dcb30e41..4639253c 100644 --- a/haddock-api/test/Haddock/Backends/Hyperlinker/ParserSpec.hs +++ b/haddock-api/test/Haddock/Backends/Hyperlinker/ParserSpec.hs @@ -27,16 +27,27 @@ spec :: Spec spec = describe "parse" parseSpec --- | Defined for its instance of 'Arbitrary' -newtype NoTabs = NoTabs String deriving (Show, Eq) - -noTabs :: String -> Bool -noTabs = all (\c -> c `notElem` "\r\t\f\v") - --- | Does not generate content with space characters other than ' ' and '\n' -instance Arbitrary NoTabs where - arbitrary = fmap NoTabs (arbitrary `suchThat` noTabs) - shrink (NoTabs src) = [ NoTabs shrunk | shrunk <- shrink src, noTabs shrunk ] +-- | Defined for its instance of 'Arbitrary'. Represents strings that, when +-- considered as GHC source, won't be rewritten. +newtype NoGhcRewrite = NoGhcRewrite String deriving (Show, Eq) + +-- | Filter out strings where GHC would replace/remove some characters during +-- lexing. +noGhcRewrite :: String -> Bool +noGhcRewrite ('\t':_) = False -- GHC replaces tabs with 8 spaces +noGhcRewrite ('\r':_) = False +noGhcRewrite ('\f':_) = False +noGhcRewrite ('\v':_) = False +noGhcRewrite (' ':'\n':_) = False -- GHC strips whitespace on empty lines +noGhcRewrite (_:s) = noGhcRewrite s +noGhcRewrite "" = True + +instance Arbitrary NoGhcRewrite where + arbitrary = fmap NoGhcRewrite (arbitrary `suchThat` noGhcRewrite) + shrink (NoGhcRewrite src) = [ NoGhcRewrite shrunk + | shrunk <- shrink src + , noGhcRewrite shrunk + ] parseSpec :: Spec @@ -46,7 +57,7 @@ parseSpec = around withDynFlags $ do property $ \src -> length (parse dflags "" src) `shouldSatisfy` (>= 0) it "retains file layout" $ \dflags -> - property $ \(NoTabs src) -> concatMap tkValue (parse dflags "" src) == src + property $ \(NoGhcRewrite src) -> concatMap tkValue (parse dflags "" src) == src context "when parsing single-line comments" $ do -- cgit v1.2.3