aboutsummaryrefslogtreecommitdiff
path: root/haddock-api/test/Haddock/Backends/Hyperlinker/ParserSpec.hs
blob: ff18cb401ea84f9abaa977569bee10c53bde4ffd (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
{-# LANGUAGE OverloadedStrings #-}
module Haddock.Backends.Hyperlinker.ParserSpec (main, spec) where

import Test.Hspec
import Test.QuickCheck

import GHC           ( runGhc, getSessionDynFlags )
import DynFlags      ( CompilerInfo, DynFlags )
import SysTools.Info ( getCompilerInfo' )
import Control.Monad.IO.Class

import Data.String     ( fromString )
import Data.ByteString ( ByteString )
import qualified Data.ByteString as BS 

import Haddock (getGhcDirs)
import Haddock.Backends.Hyperlinker.Parser
import Haddock.Backends.Hyperlinker.Types

withDynFlags :: ((DynFlags, CompilerInfo) -> IO ()) -> IO ()
withDynFlags cont = do
  libDir <- fmap snd (getGhcDirs [])
  runGhc (Just libDir) $ do
    dflags <- getSessionDynFlags
    cinfo <- liftIO $ getCompilerInfo' dflags
    liftIO $ cont (dflags, cinfo)


main :: IO ()
main = hspec spec


spec :: Spec
spec = describe "parse" parseSpec


-- | Defined for its instance of 'Arbitrary'. Represents strings that, when
-- considered as GHC source, won't be rewritten.
newtype NoGhcRewrite = NoGhcRewrite String deriving (Show, Eq)

-- | Filter out strings where GHC would replace/remove some characters during
-- lexing.
noGhcRewrite :: String -> Bool
noGhcRewrite ('\t':_) = False        -- GHC replaces tabs with 8 spaces
noGhcRewrite ('\r':_) = False
noGhcRewrite ('\f':_) = False
noGhcRewrite ('\v':_) = False
noGhcRewrite (' ':'\n':_) = False    -- GHC strips whitespace on empty lines
noGhcRewrite (_:s) = noGhcRewrite s
noGhcRewrite "" = True

instance Arbitrary NoGhcRewrite where
  arbitrary = fmap NoGhcRewrite (arbitrary `suchThat` noGhcRewrite)
  shrink (NoGhcRewrite src) = [ NoGhcRewrite shrunk
                              | shrunk <- shrink src
                              , noGhcRewrite shrunk
                              ]


parseSpec :: Spec
parseSpec = around withDynFlags $ do

    it "is total" $ \(dflags, cinfo) ->
        property $ \src -> length (parse cinfo dflags "" (fromString src)) `shouldSatisfy` (>= 0)

    it "retains file layout" $ \(dflags, cinfo) ->
        property $ \(NoGhcRewrite src) ->
          let orig = fromString src
              lexed = BS.concat (map tkValue (parse cinfo dflags "" orig))
          in lexed == orig 

    context "when parsing single-line comments" $ do

        it "should ignore content until the end of line" $ \(dflags, cinfo) ->
            shouldParseTo
                "-- some very simple comment\nidentifier"
                [TkComment, TkSpace, TkIdentifier]
                cinfo
                dflags

        it "should allow endline escaping" $ \(dflags, cinfo) ->
            shouldParseTo
                "#define first line\\\nsecond line\\\nand another one"
                [TkCpp]
                cinfo
                dflags

    context "when parsing multi-line comments" $ do

        it "should support nested comments" $ \(dflags, cinfo) ->
            shouldParseTo
                "{- comment {- nested -} still comment -} {- next comment -}"
                [TkComment, TkSpace, TkComment]
                cinfo
                dflags

        it "should distinguish compiler pragma" $ \(dflags, cinfo) ->
            shouldParseTo
                "{- comment -}{-# LANGUAGE GADTs #-}{- comment -}"
                [TkComment, TkPragma, TkComment]
                cinfo
                dflags

    it "should recognize preprocessor directives" $ \(dflags, cinfo) -> do
            shouldParseTo
                "\n#define foo bar"
                [TkCpp]
                cinfo
                dflags
            shouldParseTo
                "x # y"
                [TkIdentifier, TkSpace, TkOperator, TkSpace,TkIdentifier]
                cinfo
                dflags

    it "should distinguish basic language constructs" $ \(dflags, cinfo) -> do
        
        shouldParseTo
            "(* 2) <$> (\"abc\", foo)"
            [ TkSpecial, TkOperator, TkSpace, TkNumber, TkSpecial
            , TkSpace, TkOperator, TkSpace
            , TkSpecial, TkString, TkSpecial, TkSpace, TkIdentifier, TkSpecial
            ]
            cinfo
            dflags
            
        shouldParseTo
            "let foo' = foo in foo' + foo'"
            [ TkKeyword, TkSpace, TkIdentifier
            , TkSpace, TkGlyph, TkSpace
            , TkIdentifier, TkSpace, TkKeyword, TkSpace
            , TkIdentifier, TkSpace, TkOperator, TkSpace, TkIdentifier
            ]
            cinfo
            dflags
        
        shouldParseTo
            "square x = y^2 where y = x"
            [ TkIdentifier, TkSpace, TkIdentifier
            , TkSpace, TkGlyph, TkSpace
            , TkIdentifier, TkOperator, TkNumber
            , TkSpace, TkKeyword, TkSpace
            , TkIdentifier, TkSpace, TkGlyph, TkSpace, TkIdentifier
            ]
            cinfo
            dflags

    it "should parse do-notation syntax" $ \(dflags, cinfo) -> do
        shouldParseTo
            "do { foo <- getLine; putStrLn foo }"
            [ TkKeyword, TkSpace, TkSpecial, TkSpace
            , TkIdentifier, TkSpace, TkGlyph, TkSpace
            , TkIdentifier, TkSpecial, TkSpace
            , TkIdentifier, TkSpace, TkIdentifier, TkSpace, TkSpecial
            ]
            cinfo
            dflags

        shouldParseTo
            (fromString $ unlines
                [ "do"
                , "    foo <- getLine"
                , "    putStrLn foo"
                ])
            [ TkKeyword, TkSpace, TkIdentifier
            , TkSpace, TkGlyph, TkSpace, TkIdentifier, TkSpace
            , TkIdentifier, TkSpace, TkIdentifier, TkSpace
            ]
            cinfo
            dflags
  where
    shouldParseTo :: ByteString -> [TokenType] -> CompilerInfo -> DynFlags -> Expectation
    shouldParseTo str tokens cinfo dflags = [ tkType tok
                                            | tok <- parse cinfo dflags "" str
                                            , not (BS.null (tkValue tok)) ] `shouldBe` tokens