aboutsummaryrefslogtreecommitdiff
path: root/haddock-test
diff options
context:
space:
mode:
Diffstat (limited to 'haddock-test')
-rw-r--r--haddock-test/haddock-test.cabal2
-rw-r--r--haddock-test/src/Test/Haddock/Xhtml.hs187
2 files changed, 104 insertions, 85 deletions
diff --git a/haddock-test/haddock-test.cabal b/haddock-test/haddock-test.cabal
index 8d6f986a..a656d707 100644
--- a/haddock-test/haddock-test.cabal
+++ b/haddock-test/haddock-test.cabal
@@ -16,7 +16,7 @@ library
default-language: Haskell2010
ghc-options: -Wall
hs-source-dirs: src
- build-depends: base >= 4.3 && < 4.13, bytestring, directory, process, filepath, Cabal, xml, xhtml
+ build-depends: base >= 4.3 && < 4.13, bytestring, directory, process, filepath, Cabal
exposed-modules:
Test.Haddock
diff --git a/haddock-test/src/Test/Haddock/Xhtml.hs b/haddock-test/src/Test/Haddock/Xhtml.hs
index 6c19dbca..bca2c4cc 100644
--- a/haddock-test/src/Test/Haddock/Xhtml.hs
+++ b/haddock-test/src/Test/Haddock/Xhtml.hs
@@ -1,107 +1,126 @@
-{-# LANGUAGE RecordWildCards #-}
-{-# LANGUAGE StandaloneDeriving #-}
-{-# LANGUAGE TypeApplications #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE GADTs #-}
-{-# OPTIONS_GHC -fno-warn-orphans #-}
-
module Test.Haddock.Xhtml
- ( Xml(..)
+ ( Xml
, parseXml, dumpXml
, stripLinks, stripLinksWhen, stripAnchorsWhen, stripIdsWhen, stripFooter
) where
-import Data.Data ( Data(..), Typeable, eqT, (:~:)(..) )
-import Text.XML.Light
-import Text.XHtml (Html, HtmlAttr, (!))
-import qualified Text.XHtml as Xhtml
-
-
-newtype Xml = Xml
- { xmlElement :: Element
- } deriving Eq
+{-
+This module used to actually parse the HTML (using the `xml` parsing library)
+which made it was possible to do more proper normalization of things like ids or
+names.
+However, in the interests of being able to run this from within the GHC
+testsuite (where non-bootlib dependencies are a liability), this was swapped
+out for some simple string manipulation. Since the test cases aren't very
+and since the `xhtml` library already handles the pretty-printing aspect,
+this would appear to be a reasonable compromise for now.
+-}
-deriving instance Eq Element
-deriving instance Eq Content
-deriving instance Eq CData
-
--- | Similar to @everywhere (mkT f) x@ from SYB.
-gmapEverywhere :: forall a b. (Data a, Typeable b) => (b -> b) -> a -> a
-gmapEverywhere f x = gmapT (gmapEverywhere f) $ case eqT @a @b of
- Nothing -> x
- Just Refl -> f x
+import Data.List ( stripPrefix, isPrefixOf )
+import Data.Char ( isSpace )
+-- | Simple wrapper around the pretty-printed HTML source
+newtype Xml = Xml { unXml :: String }
+-- | Part of parsing involves dropping the @DOCTYPE@ line
parseXml :: String -> Maybe Xml
-parseXml = fmap Xml . parseXMLDoc
-
+parseXml = Just . Xml . dropDocTypeLine
+ where
+ dropDocTypeLine bs
+ | "<!DOCTYPE" `isPrefixOf` bs
+ = drop 1 (dropWhile (/= '\n') bs)
+ | otherwise
+ = bs
dumpXml :: Xml -> String
-dumpXml = Xhtml.renderHtmlFragment. xmlElementToXhtml . xmlElement
-
-
+dumpXml = unXml
+
+type Attr = String
+type Value = String
+
+-- | Almost all sanitization operations take the form of:
+--
+-- * match an attribute key
+-- * check something about the value
+-- * if the check succeeded, replace the value with a dummy value
+--
+stripAttrValueWhen
+ :: Attr -- ^ attribute key
+ -> Value -- ^ dummy attribute value
+ -> (Value -> Bool) -- ^ determine whether we should modify the attribute
+ -> Xml -- ^ input XML
+ -> Xml -- ^ output XML
+stripAttrValueWhen key fallback p (Xml body) = Xml (filterAttrs body)
+ where
+ keyEq = key ++ "=\""
+
+ filterAttrs "" = ""
+ filterAttrs b@(c:cs)
+ | Just valRest <- stripPrefix keyEq b
+ , Just (val,rest) <- spanToEndOfString valRest
+ = if p val
+ then keyEq ++ fallback ++ "\"" ++ filterAttrs rest
+ else keyEq ++ val ++ "\"" ++ filterAttrs rest
+
+ | otherwise
+ = c : filterAttrs cs
+
+-- | Spans to the next (unescaped) @\"@ character.
+--
+-- >>> spanToEndOfString "no closing quotation"
+-- Nothing
+-- >>> spanToEndOfString "foo\" bar \"baz\""
+-- Just ("foo", " bar \"baz\"")
+-- >>> spanToEndOfString "foo\\\" bar \"baz\""
+-- Just ("foo\\\" bar ", "baz\"")
+--
+spanToEndOfString :: String -> Maybe (String, String)
+spanToEndOfString ('"':rest) = Just ("", rest)
+spanToEndOfString ('\\':c:rest)
+ | Just (str, rest') <- spanToEndOfString rest
+ = Just ('\\':c:str, rest')
+spanToEndOfString (c:rest)
+ | Just (str, rest') <- spanToEndOfString rest
+ = Just (c:str, rest')
+spanToEndOfString _ = Nothing
+
+
+-- | Replace hyperlink targets with @\"#\"@ if they match a predicate
+stripLinksWhen :: (Value -> Bool) -> Xml -> Xml
+stripLinksWhen = stripAttrValueWhen "href" "#"
+
+-- | Replace all hyperlink targets with @\"#\"@
stripLinks :: Xml -> Xml
stripLinks = stripLinksWhen (const True)
+-- | Replace id's with @\"\"@ if they match a predicate
+stripIdsWhen :: (Value -> Bool) -> Xml -> Xml
+stripIdsWhen = stripAttrValueWhen "id" ""
-stripLinksWhen :: (String -> Bool) -> Xml -> Xml
-stripLinksWhen p =
- processAnchors unlink
- where
- unlink attr@(Attr { attrKey = key, attrVal = val })
- | qName key == "href" && p val = attr { attrVal = "#" }
- | otherwise = attr
-
-
-stripAnchorsWhen :: (String -> Bool) -> Xml -> Xml
-stripAnchorsWhen p =
- processAnchors unname
- where
- unname attr@(Attr { attrKey = key, attrVal = val })
- | qName key == "name" && p val = attr { attrVal = "" }
- | otherwise = attr
-
-stripIdsWhen :: (String -> Bool) -> Xml -> Xml
-stripIdsWhen p =
- processAnchors unname
- where
- unname attr@(Attr { attrKey = key, attrVal = val })
- | qName key == "id" && p val = attr { attrVal = "" }
- | otherwise = attr
-
-
-processAnchors :: (Attr -> Attr) -> Xml -> Xml
-processAnchors f = Xml . gmapEverywhere f . xmlElement
-
+-- | Replace names's with @\"\"@ if they match a predicate
+stripAnchorsWhen :: (Value -> Bool) -> Xml -> Xml
+stripAnchorsWhen = stripAttrValueWhen "name" ""
+-- | Remove the @div@ which has @id=\"footer\"@
stripFooter :: Xml -> Xml
-stripFooter =
- Xml . gmapEverywhere defoot . xmlElement
- where
- defoot el
- | isFooter el = el { elContent = [] }
- | otherwise = el
- isFooter el = any isFooterAttr $ elAttribs el
- isFooterAttr (Attr { .. }) = and
- [ qName attrKey == "id"
- , attrVal == "footer"
- ]
-
-
-xmlElementToXhtml :: Element -> Html
-xmlElementToXhtml (Element { .. }) =
- Xhtml.tag (qName elName) contents ! attrs
+stripFooter (Xml body) = Xml (findDiv body)
where
- contents = mconcat $ map xmlContentToXhtml elContent
- attrs = map xmlAttrToXhtml elAttribs
+ findDiv "" = ""
+ findDiv b@(c:cs)
+ | Just divRest <- stripPrefix "<div id=\"footer\"" b
+ , Just rest <- dropToDiv divRest
+ = rest
+ | otherwise
+ = c : findDiv cs
-xmlContentToXhtml :: Content -> Html
-xmlContentToXhtml (Elem el) = xmlElementToXhtml el
-xmlContentToXhtml (Text text) = Xhtml.toHtml $ cdData text
-xmlContentToXhtml (CRef _) = Xhtml.noHtml
+ dropToDiv "" = Nothing
+ dropToDiv b@(_:cs)
+ | Just valRest <- stripPrefix "</div" b
+ , valRest' <- dropWhile isSpace valRest
+ , Just valRest'' <- stripPrefix ">" valRest'
+ = Just valRest''
+ | otherwise
+ = dropToDiv cs
-xmlAttrToXhtml :: Attr -> HtmlAttr
-xmlAttrToXhtml (Attr { .. }) = Xhtml.strAttr (qName attrKey) attrVal