1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
|
module Haddock.Backends.Hyperlinker.ParserSpec (main, spec) where
import Test.Hspec
import Test.QuickCheck
import qualified GHC
import Control.Monad.IO.Class
import Haddock (getGhcDirs)
import Haddock.Backends.Hyperlinker.Parser
import Haddock.Backends.Hyperlinker.Types
withDynFlags :: (GHC.DynFlags -> IO ()) -> IO ()
withDynFlags cont = do
libDir <- fmap snd (getGhcDirs [])
GHC.runGhc (Just libDir) $ do
dflags <- GHC.getSessionDynFlags
liftIO $ cont dflags
main :: IO ()
main = hspec spec
spec :: Spec
spec = describe "parse" parseSpec
-- | Defined for its instance of 'Arbitrary'. Represents strings that, when
-- considered as GHC source, won't be rewritten.
newtype NoGhcRewrite = NoGhcRewrite String deriving (Show, Eq)
-- | Filter out strings where GHC would replace/remove some characters during
-- lexing.
noGhcRewrite :: String -> Bool
noGhcRewrite ('\t':_) = False -- GHC replaces tabs with 8 spaces
noGhcRewrite ('\r':_) = False
noGhcRewrite ('\f':_) = False
noGhcRewrite ('\v':_) = False
noGhcRewrite (' ':'\n':_) = False -- GHC strips whitespace on empty lines
noGhcRewrite (_:s) = noGhcRewrite s
noGhcRewrite "" = True
instance Arbitrary NoGhcRewrite where
arbitrary = fmap NoGhcRewrite (arbitrary `suchThat` noGhcRewrite)
shrink (NoGhcRewrite src) = [ NoGhcRewrite shrunk
| shrunk <- shrink src
, noGhcRewrite shrunk
]
parseSpec :: Spec
parseSpec = around withDynFlags $ do
it "is total" $ \dflags ->
property $ \src -> length (parse dflags "" src) `shouldSatisfy` (>= 0)
it "retains file layout" $ \dflags ->
property $ \(NoGhcRewrite src) -> concatMap tkValue (parse dflags "" src) == src
context "when parsing single-line comments" $ do
it "should ignore content until the end of line" $ \dflags ->
shouldParseTo
"-- some very simple comment\nidentifier"
[TkComment, TkSpace, TkIdentifier]
dflags
it "should allow endline escaping" $ \dflags ->
shouldParseTo
"#define first line\\\nsecond line\\\nand another one"
[TkCpp]
dflags
context "when parsing multi-line comments" $ do
it "should support nested comments" $ \dflags ->
shouldParseTo
"{- comment {- nested -} still comment -} {- next comment -}"
[TkComment, TkSpace, TkComment]
dflags
it "should distinguish compiler pragma" $ \dflags ->
shouldParseTo
"{- comment -}{-# LANGUAGE GADTs #-}{- comment -}"
[TkComment, TkPragma, TkComment]
dflags
it "should recognize preprocessor directives" $ \dflags -> do
shouldParseTo
"\n#define foo bar"
[TkSpace, TkCpp]
dflags
shouldParseTo
"x # y"
[TkIdentifier, TkSpace, TkOperator, TkSpace,TkIdentifier]
dflags
it "should distinguish basic language constructs" $ \dflags -> do
shouldParseTo
"(* 2) <$> (\"abc\", foo)"
[ TkSpecial, TkOperator, TkSpace, TkNumber, TkSpecial
, TkSpace, TkOperator, TkSpace
, TkSpecial, TkString, TkSpecial, TkSpace, TkIdentifier, TkSpecial
]
dflags
shouldParseTo
"let foo' = foo in foo' + foo'"
[ TkKeyword, TkSpace, TkIdentifier
, TkSpace, TkGlyph, TkSpace
, TkIdentifier, TkSpace, TkKeyword, TkSpace
, TkIdentifier, TkSpace, TkOperator, TkSpace, TkIdentifier
]
dflags
shouldParseTo
"square x = y^2 where y = x"
[ TkIdentifier, TkSpace, TkIdentifier
, TkSpace, TkGlyph, TkSpace
, TkIdentifier, TkOperator, TkNumber
, TkSpace, TkKeyword, TkSpace
, TkIdentifier, TkSpace, TkGlyph, TkSpace, TkIdentifier
]
dflags
it "should parse do-notation syntax" $ \dflags -> do
shouldParseTo
"do { foo <- getLine; putStrLn foo }"
[ TkKeyword, TkSpace, TkSpecial, TkSpace
, TkIdentifier, TkSpace, TkGlyph, TkSpace
, TkIdentifier, TkSpecial, TkSpace
, TkIdentifier, TkSpace, TkIdentifier, TkSpace, TkSpecial
]
dflags
shouldParseTo
(unlines
[ "do"
, " foo <- getLine"
, " putStrLn foo"
])
[ TkKeyword, TkSpace, TkIdentifier
, TkSpace, TkGlyph, TkSpace, TkIdentifier, TkSpace
, TkIdentifier, TkSpace, TkIdentifier, TkSpace
]
dflags
where
shouldParseTo :: String -> [TokenType] -> GHC.DynFlags -> Expectation
shouldParseTo str tokens dflags = map tkType (parse dflags "" str) `shouldBe` tokens
|