diff options
Diffstat (limited to 'haddock-test/src')
-rw-r--r-- | haddock-test/src/Test/Haddock/Xhtml.hs | 187 |
1 files changed, 103 insertions, 84 deletions
diff --git a/haddock-test/src/Test/Haddock/Xhtml.hs b/haddock-test/src/Test/Haddock/Xhtml.hs index 6c19dbca..bca2c4cc 100644 --- a/haddock-test/src/Test/Haddock/Xhtml.hs +++ b/haddock-test/src/Test/Haddock/Xhtml.hs @@ -1,107 +1,126 @@ -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE StandaloneDeriving #-} -{-# LANGUAGE TypeApplications #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE GADTs #-} -{-# OPTIONS_GHC -fno-warn-orphans #-} - module Test.Haddock.Xhtml - ( Xml(..) + ( Xml , parseXml, dumpXml , stripLinks, stripLinksWhen, stripAnchorsWhen, stripIdsWhen, stripFooter ) where -import Data.Data ( Data(..), Typeable, eqT, (:~:)(..) ) -import Text.XML.Light -import Text.XHtml (Html, HtmlAttr, (!)) -import qualified Text.XHtml as Xhtml - - -newtype Xml = Xml - { xmlElement :: Element - } deriving Eq +{- +This module used to actually parse the HTML (using the `xml` parsing library) +which made it was possible to do more proper normalization of things like ids or +names. +However, in the interests of being able to run this from within the GHC +testsuite (where non-bootlib dependencies are a liability), this was swapped +out for some simple string manipulation. Since the test cases aren't very +and since the `xhtml` library already handles the pretty-printing aspect, +this would appear to be a reasonable compromise for now. +-} -deriving instance Eq Element -deriving instance Eq Content -deriving instance Eq CData - --- | Similar to @everywhere (mkT f) x@ from SYB. -gmapEverywhere :: forall a b. (Data a, Typeable b) => (b -> b) -> a -> a -gmapEverywhere f x = gmapT (gmapEverywhere f) $ case eqT @a @b of - Nothing -> x - Just Refl -> f x +import Data.List ( stripPrefix, isPrefixOf ) +import Data.Char ( isSpace ) +-- | Simple wrapper around the pretty-printed HTML source +newtype Xml = Xml { unXml :: String } +-- | Part of parsing involves dropping the @DOCTYPE@ line parseXml :: String -> Maybe Xml -parseXml = fmap Xml . parseXMLDoc - +parseXml = Just . Xml . dropDocTypeLine + where + dropDocTypeLine bs + | "<!DOCTYPE" `isPrefixOf` bs + = drop 1 (dropWhile (/= '\n') bs) + | otherwise + = bs dumpXml :: Xml -> String -dumpXml = Xhtml.renderHtmlFragment. xmlElementToXhtml . xmlElement - - +dumpXml = unXml + +type Attr = String +type Value = String + +-- | Almost all sanitization operations take the form of: +-- +-- * match an attribute key +-- * check something about the value +-- * if the check succeeded, replace the value with a dummy value +-- +stripAttrValueWhen + :: Attr -- ^ attribute key + -> Value -- ^ dummy attribute value + -> (Value -> Bool) -- ^ determine whether we should modify the attribute + -> Xml -- ^ input XML + -> Xml -- ^ output XML +stripAttrValueWhen key fallback p (Xml body) = Xml (filterAttrs body) + where + keyEq = key ++ "=\"" + + filterAttrs "" = "" + filterAttrs b@(c:cs) + | Just valRest <- stripPrefix keyEq b + , Just (val,rest) <- spanToEndOfString valRest + = if p val + then keyEq ++ fallback ++ "\"" ++ filterAttrs rest + else keyEq ++ val ++ "\"" ++ filterAttrs rest + + | otherwise + = c : filterAttrs cs + +-- | Spans to the next (unescaped) @\"@ character. +-- +-- >>> spanToEndOfString "no closing quotation" +-- Nothing +-- >>> spanToEndOfString "foo\" bar \"baz\"" +-- Just ("foo", " bar \"baz\"") +-- >>> spanToEndOfString "foo\\\" bar \"baz\"" +-- Just ("foo\\\" bar ", "baz\"") +-- +spanToEndOfString :: String -> Maybe (String, String) +spanToEndOfString ('"':rest) = Just ("", rest) +spanToEndOfString ('\\':c:rest) + | Just (str, rest') <- spanToEndOfString rest + = Just ('\\':c:str, rest') +spanToEndOfString (c:rest) + | Just (str, rest') <- spanToEndOfString rest + = Just (c:str, rest') +spanToEndOfString _ = Nothing + + +-- | Replace hyperlink targets with @\"#\"@ if they match a predicate +stripLinksWhen :: (Value -> Bool) -> Xml -> Xml +stripLinksWhen = stripAttrValueWhen "href" "#" + +-- | Replace all hyperlink targets with @\"#\"@ stripLinks :: Xml -> Xml stripLinks = stripLinksWhen (const True) +-- | Replace id's with @\"\"@ if they match a predicate +stripIdsWhen :: (Value -> Bool) -> Xml -> Xml +stripIdsWhen = stripAttrValueWhen "id" "" -stripLinksWhen :: (String -> Bool) -> Xml -> Xml -stripLinksWhen p = - processAnchors unlink - where - unlink attr@(Attr { attrKey = key, attrVal = val }) - | qName key == "href" && p val = attr { attrVal = "#" } - | otherwise = attr - - -stripAnchorsWhen :: (String -> Bool) -> Xml -> Xml -stripAnchorsWhen p = - processAnchors unname - where - unname attr@(Attr { attrKey = key, attrVal = val }) - | qName key == "name" && p val = attr { attrVal = "" } - | otherwise = attr - -stripIdsWhen :: (String -> Bool) -> Xml -> Xml -stripIdsWhen p = - processAnchors unname - where - unname attr@(Attr { attrKey = key, attrVal = val }) - | qName key == "id" && p val = attr { attrVal = "" } - | otherwise = attr - - -processAnchors :: (Attr -> Attr) -> Xml -> Xml -processAnchors f = Xml . gmapEverywhere f . xmlElement - +-- | Replace names's with @\"\"@ if they match a predicate +stripAnchorsWhen :: (Value -> Bool) -> Xml -> Xml +stripAnchorsWhen = stripAttrValueWhen "name" "" +-- | Remove the @div@ which has @id=\"footer\"@ stripFooter :: Xml -> Xml -stripFooter = - Xml . gmapEverywhere defoot . xmlElement - where - defoot el - | isFooter el = el { elContent = [] } - | otherwise = el - isFooter el = any isFooterAttr $ elAttribs el - isFooterAttr (Attr { .. }) = and - [ qName attrKey == "id" - , attrVal == "footer" - ] - - -xmlElementToXhtml :: Element -> Html -xmlElementToXhtml (Element { .. }) = - Xhtml.tag (qName elName) contents ! attrs +stripFooter (Xml body) = Xml (findDiv body) where - contents = mconcat $ map xmlContentToXhtml elContent - attrs = map xmlAttrToXhtml elAttribs + findDiv "" = "" + findDiv b@(c:cs) + | Just divRest <- stripPrefix "<div id=\"footer\"" b + , Just rest <- dropToDiv divRest + = rest + | otherwise + = c : findDiv cs -xmlContentToXhtml :: Content -> Html -xmlContentToXhtml (Elem el) = xmlElementToXhtml el -xmlContentToXhtml (Text text) = Xhtml.toHtml $ cdData text -xmlContentToXhtml (CRef _) = Xhtml.noHtml + dropToDiv "" = Nothing + dropToDiv b@(_:cs) + | Just valRest <- stripPrefix "</div" b + , valRest' <- dropWhile isSpace valRest + , Just valRest'' <- stripPrefix ">" valRest' + = Just valRest'' + | otherwise + = dropToDiv cs -xmlAttrToXhtml :: Attr -> HtmlAttr -xmlAttrToXhtml (Attr { .. }) = Xhtml.strAttr (qName attrKey) attrVal |