{-# LANGUAGE OverloadedStrings #-}
module Haddock.Backends.Hyperlinker.ParserSpec (main, spec) where
import Test.Hspec
import Test.QuickCheck
import GHC ( runGhc, getSessionDynFlags )
import GHC.Driver.Session ( DynFlags )
import Control.Monad.IO.Class
import Data.String ( fromString )
import Data.ByteString ( ByteString )
import qualified Data.ByteString as BS
import Haddock (getGhcDirs)
import Haddock.Backends.Hyperlinker.Parser
import Haddock.Backends.Hyperlinker.Types
withDynFlags :: (DynFlags -> IO ()) -> IO ()
withDynFlags cont = do
libDir <- fmap snd (getGhcDirs [])
runGhc libDir $ do
dflags <- getSessionDynFlags
liftIO $ cont dflags
main :: IO ()
main = hspec spec
spec :: Spec
spec = describe "parse" parseSpec
-- | Defined for its instance of 'Arbitrary'. Represents strings that, when
-- considered as GHC source, won't be rewritten.
newtype NoGhcRewrite = NoGhcRewrite String deriving (Show, Eq)
-- | Filter out strings where GHC would replace/remove some characters during
-- lexing.
noGhcRewrite :: String -> Bool
noGhcRewrite ('\t':_) = False -- GHC replaces tabs with 8 spaces
noGhcRewrite ('\r':_) = False
noGhcRewrite ('\f':_) = False
noGhcRewrite ('\v':_) = False
noGhcRewrite (' ':'\n':_) = False -- GHC strips whitespace on empty lines
noGhcRewrite (_:s) = noGhcRewrite s
noGhcRewrite "" = True
instance Arbitrary NoGhcRewrite where
arbitrary = fmap NoGhcRewrite (arbitrary `suchThat` noGhcRewrite)
shrink (NoGhcRewrite src) = [ NoGhcRewrite shrunk
| shrunk <- shrink src
, noGhcRewrite shrunk
]
parseSpec :: Spec
parseSpec = around withDynFlags $ do
it "is total" $ \dflags ->
property $ \src -> length (parse dflags "" (fromString src)) `shouldSatisfy` (>= 0)
it "retains file layout" $ \dflags ->
property $ \(NoGhcRewrite src) ->
let orig = fromString src
lexed = BS.concat (map tkValue (parse dflags "" orig))
in lexed == orig
context "when parsing single-line comments" $ do
it "should ignore content until the end of line" $ \dflags ->
shouldParseTo
"-- some very simple comment\nidentifier"
[TkComment, TkSpace, TkIdentifier]
dflags
it "should allow endline escaping" $ \dflags ->
shouldParseTo
"#define first line\\\nsecond line\\\nand another one"
[TkCpp]
dflags
context "when parsing multi-line comments" $ do
it "should support nested comments" $ \dflags ->
shouldParseTo
"{- comment {- nested -} still comment -} {- next comment -}"
[TkComment, TkSpace, TkComment]
dflags
it "should distinguish compiler pragma" $ \dflags ->
shouldParseTo
"{- comment -}{-# LANGUAGE GADTs #-}{- comment -}"
[TkComment, TkPragma, TkComment]
dflags
it "should recognize preprocessor directives" $ \dflags -> do
shouldParseTo
"\n#define foo bar"
[TkCpp]
dflags
shouldParseTo
"x # y"
[TkIdentifier, TkSpace, TkOperator, TkSpace,TkIdentifier]
dflags
it "should distinguish basic language constructs" $ \dflags -> do
shouldParseTo
"(* 2) <$> (\"abc\", foo)"
[ TkSpecial, TkOperator, TkSpace, TkNumber, TkSpecial
, TkSpace, TkOperator, TkSpace
, TkSpecial, TkString, TkSpecial, TkSpace, TkIdentifier, TkSpecial
]
dflags
shouldParseTo
"let foo' = foo in foo' + foo'"
[ TkKeyword, TkSpace, TkIdentifier
, TkSpace, TkGlyph, TkSpace
, TkIdentifier, TkSpace, TkKeyword, TkSpace
, TkIdentifier, TkSpace, TkOperator, TkSpace, TkIdentifier
]
dflags
shouldParseTo
"square x = y^2 where y = x"
[ TkIdentifier, TkSpace, TkIdentifier
, TkSpace, TkGlyph, TkSpace
, TkIdentifier, TkOperator, TkNumber
, TkSpace, TkKeyword, TkSpace
, TkIdentifier, TkSpace, TkGlyph, TkSpace, TkIdentifier
]
dflags
it "should parse do-notation syntax" $ \dflags -> do
shouldParseTo
"do { foo <- getLine; putStrLn foo }"
[ TkKeyword, TkSpace, TkSpecial, TkSpace
, TkIdentifier, TkSpace, TkGlyph, TkSpace
, TkIdentifier, TkSpecial, TkSpace
, TkIdentifier, TkSpace, TkIdentifier, TkSpace, TkSpecial
]
dflags
shouldParseTo
(fromString $ unlines
[ "do"
, " foo <- getLine"
, " putStrLn foo"
])
[ TkKeyword, TkSpace, TkIdentifier
, TkSpace, TkGlyph, TkSpace, TkIdentifier, TkSpace
, TkIdentifier, TkSpace, TkIdentifier, TkSpace
]
dflags
where
shouldParseTo :: ByteString -> [TokenType] -> DynFlags -> Expectation
shouldParseTo str tokens dflags = [ tkType tok
| tok <- parse dflags "" str
, not (BS.null (tkValue tok)) ] `shouldBe` tokens