{-# LANGUAGE OverloadedStrings #-}
module Haddock.Backends.Hyperlinker.ParserSpec (main, spec) where

import Test.Hspec
import Test.QuickCheck

import GHC           ( runGhc, getSessionDynFlags )
import DynFlags      ( CompilerInfo, DynFlags )
import SysTools.Info ( getCompilerInfo' )
import Control.Monad.IO.Class

import Data.String     ( fromString )
import Data.ByteString ( ByteString )
import qualified Data.ByteString as BS 

import Haddock (getGhcDirs)
import Haddock.Backends.Hyperlinker.Parser
import Haddock.Backends.Hyperlinker.Types

withDynFlags :: ((DynFlags, CompilerInfo) -> IO ()) -> IO ()
withDynFlags cont = do
  libDir <- fmap snd (getGhcDirs [])
  runGhc (Just libDir) $ do
    dflags <- getSessionDynFlags
    cinfo <- liftIO $ getCompilerInfo' dflags
    liftIO $ cont (dflags, cinfo)


main :: IO ()
main = hspec spec


spec :: Spec
spec = describe "parse" parseSpec


-- | Defined for its instance of 'Arbitrary'. Represents strings that, when
-- considered as GHC source, won't be rewritten.
newtype NoGhcRewrite = NoGhcRewrite String deriving (Show, Eq)

-- | Filter out strings where GHC would replace/remove some characters during
-- lexing.
noGhcRewrite :: String -> Bool
noGhcRewrite ('\t':_) = False        -- GHC replaces tabs with 8 spaces
noGhcRewrite ('\r':_) = False
noGhcRewrite ('\f':_) = False
noGhcRewrite ('\v':_) = False
noGhcRewrite (' ':'\n':_) = False    -- GHC strips whitespace on empty lines
noGhcRewrite (_:s) = noGhcRewrite s
noGhcRewrite "" = True

instance Arbitrary NoGhcRewrite where
  arbitrary = fmap NoGhcRewrite (arbitrary `suchThat` noGhcRewrite)
  shrink (NoGhcRewrite src) = [ NoGhcRewrite shrunk
                              | shrunk <- shrink src
                              , noGhcRewrite shrunk
                              ]


parseSpec :: Spec
parseSpec = around withDynFlags $ do

    it "is total" $ \(dflags, cinfo) ->
        property $ \src -> length (parse cinfo dflags "" (fromString src)) `shouldSatisfy` (>= 0)

    it "retains file layout" $ \(dflags, cinfo) ->
        property $ \(NoGhcRewrite src) ->
          let orig = fromString src
              lexed = BS.concat (map tkValue (parse cinfo dflags "" orig))
          in lexed == orig 

    context "when parsing single-line comments" $ do

        it "should ignore content until the end of line" $ \(dflags, cinfo) ->
            shouldParseTo
                "-- some very simple comment\nidentifier"
                [TkComment, TkSpace, TkIdentifier]
                cinfo
                dflags

        it "should allow endline escaping" $ \(dflags, cinfo) ->
            shouldParseTo
                "#define first line\\\nsecond line\\\nand another one"
                [TkCpp]
                cinfo
                dflags

    context "when parsing multi-line comments" $ do

        it "should support nested comments" $ \(dflags, cinfo) ->
            shouldParseTo
                "{- comment {- nested -} still comment -} {- next comment -}"
                [TkComment, TkSpace, TkComment]
                cinfo
                dflags

        it "should distinguish compiler pragma" $ \(dflags, cinfo) ->
            shouldParseTo
                "{- comment -}{-# LANGUAGE GADTs #-}{- comment -}"
                [TkComment, TkPragma, TkComment]
                cinfo
                dflags

    it "should recognize preprocessor directives" $ \(dflags, cinfo) -> do
            shouldParseTo
                "\n#define foo bar"
                [TkCpp]
                cinfo
                dflags
            shouldParseTo
                "x # y"
                [TkIdentifier, TkSpace, TkOperator, TkSpace,TkIdentifier]
                cinfo
                dflags

    it "should distinguish basic language constructs" $ \(dflags, cinfo) -> do
        
        shouldParseTo
            "(* 2) <$> (\"abc\", foo)"
            [ TkSpecial, TkOperator, TkSpace, TkNumber, TkSpecial
            , TkSpace, TkOperator, TkSpace
            , TkSpecial, TkString, TkSpecial, TkSpace, TkIdentifier, TkSpecial
            ]
            cinfo
            dflags
            
        shouldParseTo
            "let foo' = foo in foo' + foo'"
            [ TkKeyword, TkSpace, TkIdentifier
            , TkSpace, TkGlyph, TkSpace
            , TkIdentifier, TkSpace, TkKeyword, TkSpace
            , TkIdentifier, TkSpace, TkOperator, TkSpace, TkIdentifier
            ]
            cinfo
            dflags
        
        shouldParseTo
            "square x = y^2 where y = x"
            [ TkIdentifier, TkSpace, TkIdentifier
            , TkSpace, TkGlyph, TkSpace
            , TkIdentifier, TkOperator, TkNumber
            , TkSpace, TkKeyword, TkSpace
            , TkIdentifier, TkSpace, TkGlyph, TkSpace, TkIdentifier
            ]
            cinfo
            dflags

    it "should parse do-notation syntax" $ \(dflags, cinfo) -> do
        shouldParseTo
            "do { foo <- getLine; putStrLn foo }"
            [ TkKeyword, TkSpace, TkSpecial, TkSpace
            , TkIdentifier, TkSpace, TkGlyph, TkSpace
            , TkIdentifier, TkSpecial, TkSpace
            , TkIdentifier, TkSpace, TkIdentifier, TkSpace, TkSpecial
            ]
            cinfo
            dflags

        shouldParseTo
            (fromString $ unlines
                [ "do"
                , "    foo <- getLine"
                , "    putStrLn foo"
                ])
            [ TkKeyword, TkSpace, TkIdentifier
            , TkSpace, TkGlyph, TkSpace, TkIdentifier, TkSpace
            , TkIdentifier, TkSpace, TkIdentifier, TkSpace
            ]
            cinfo
            dflags
  where
    shouldParseTo :: ByteString -> [TokenType] -> CompilerInfo -> DynFlags -> Expectation
    shouldParseTo str tokens cinfo dflags = [ tkType tok
                                            | tok <- parse cinfo dflags "" str
                                            , not (BS.null (tkValue tok)) ] `shouldBe` tokens