diff options
| -rw-r--r-- | haddock-library/haddock-library.cabal | 2 | ||||
| -rw-r--r-- | haddock-library/src/Documentation/Haddock/Parser.hs | 34 | ||||
| -rw-r--r-- | haddock-library/src/Documentation/Haddock/Parser/Monad.hs | 25 | ||||
| -rw-r--r-- | html-test/ref/Bug458.html | 80 | ||||
| -rw-r--r-- | html-test/src/Bug458.hs | 6 | 
5 files changed, 122 insertions, 25 deletions
| diff --git a/haddock-library/haddock-library.cabal b/haddock-library/haddock-library.cabal index 3d069f07..d7935747 100644 --- a/haddock-library/haddock-library.cabal +++ b/haddock-library/haddock-library.cabal @@ -64,13 +64,13 @@ library attoparsec    exposed-modules:      Data.Attoparsec.ByteString      Data.Attoparsec.ByteString.Char8 +    Data.Attoparsec.Combinator    other-modules:      Data.Attoparsec      Data.Attoparsec.ByteString.Buffer      Data.Attoparsec.ByteString.FastSet      Data.Attoparsec.ByteString.Internal -    Data.Attoparsec.Combinator      Data.Attoparsec.Internal      Data.Attoparsec.Internal.Fhthagn      Data.Attoparsec.Internal.Types diff --git a/haddock-library/src/Documentation/Haddock/Parser.hs b/haddock-library/src/Documentation/Haddock/Parser.hs index a1349c95..82515ab4 100644 --- a/haddock-library/src/Documentation/Haddock/Parser.hs +++ b/haddock-library/src/Documentation/Haddock/Parser.hs @@ -23,7 +23,7 @@ import           Control.Applicative  import           Control.Arrow (first)  import           Control.Monad  import qualified Data.ByteString.Char8 as BS -import           Data.Char (chr, isAsciiUpper) +import           Data.Char (chr, isUpper, isAlpha, isAlphaNum)  import           Data.List (stripPrefix, intercalate, unfoldr, elemIndex)  import           Data.Maybe (fromMaybe, mapMaybe)  import           Data.Monoid @@ -35,6 +35,7 @@ import           Documentation.Haddock.Types  import           Documentation.Haddock.Utf8  import           Prelude hiding (takeWhile)  import qualified Prelude as P +import           Text.Read.Lex (isSymbolChar)  -- $setup  -- >>> :set -XOverloadedStrings @@ -205,20 +206,19 @@ monospace :: Parser (DocH mod Identifier)  monospace = DocMonospaced . parseStringBS              <$> ("@" *> takeWhile1_ (/= '@') <* "@") --- | Module names: we try our reasonable best to only allow valid --- Haskell module names, with caveat about not matching on technically --- valid unicode symbols. +-- | Module names. +-- +-- Note that we allow '#' and '\' to support anchors (old style anchors are of +-- the form "SomeModule\#anchor").  moduleName :: Parser (DocH mod a)  moduleName = DocModule <$> (char '"' *> modid <* char '"')    where      modid = intercalate "." <$> conid `sepBy1` "."      conid = (:) -      <$> satisfy isAsciiUpper -      -- NOTE: According to Haskell 2010 we should actually only -      -- accept {small | large | digit | ' } here.  But as we can't -      -- match on unicode characters, this is currently not possible. -      -- Note that we allow ‘#’ to suport anchors. -      <*> (decodeUtf8 <$> takeWhile (notInClass " .&[{}(=*)+]!|@/;,^?\"\n")) +      <$> satisfyUnicode (\c -> isAlpha c && isUpper c) +      <*> many (satisfyUnicode conChar <|> char '\\' <|> char '#') + +    conChar c = isAlphaNum c || c == '_'  -- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify  -- a title for the picture. @@ -760,26 +760,16 @@ autoUrl = mkLink <$> url  parseValid :: Parser String  parseValid = p some    where -    idChar = -      satisfy (\c -> isAlpha_ascii c -                     || isDigit c -                     -- N.B. '-' is placed first otherwise attoparsec thinks -                     -- it belongs to a character class -                     || inClass "-_.!#$%&*+/<=>?@\\|~:^" c) +    idChar = satisfyUnicode (\c -> isAlphaNum c || isSymbolChar c || c == '_')      p p' = do -      vs' <- p' $ utf8String "⋆" <|> return <$> idChar -      let vs = concat vs' +      vs <- p' idChar        c <- peekChar'        case c of          '`' -> return vs          '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> p many') <|> return vs          _ -> fail "outofvalid" --- | Parses UTF8 strings from ByteString streams. -utf8String :: String -> Parser String -utf8String x = decodeUtf8 <$> string (encodeUtf8 x) -  -- | Parses identifiers with help of 'parseValid'. Asks GHC for  -- 'String' from the string it deems valid.  identifier :: Parser (DocH mod Identifier) diff --git a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs index 3f7d60f8..3430ef8a 100644 --- a/haddock-library/src/Documentation/Haddock/Parser/Monad.hs +++ b/haddock-library/src/Documentation/Haddock/Parser/Monad.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving, TypeFamilies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving, TypeFamilies, BangPatterns #-}  module Documentation.Haddock.Parser.Monad (    module Documentation.Haddock.Parser.Monad  , Attoparsec.isDigit @@ -31,9 +31,10 @@ module Documentation.Haddock.Parser.Monad (  import           Control.Applicative  import           Control.Monad  import           Data.String -import           Data.ByteString (ByteString) +import           Data.ByteString (ByteString, length)  import qualified Data.ByteString.Lazy as LB  import qualified Data.Attoparsec.ByteString.Char8 as Attoparsec +import qualified Data.Attoparsec.Combinator as Attoparsec  import           Control.Monad.Trans.State  import qualified Control.Monad.Trans.Class as Trans  import           Data.Word @@ -41,6 +42,7 @@ import           Data.Bits  import           Data.Tuple  import           Documentation.Haddock.Types (Version) +import           Documentation.Haddock.Utf8  (encodeUtf8, decodeUtf8)  newtype ParserState = ParserState {    parserStateSince :: Maybe Version @@ -73,6 +75,25 @@ char = lift . Attoparsec.char  char8 :: Char -> Parser Word8  char8 = lift . Attoparsec.char8 +-- | Peek a unicode character and return the number of bytes that it took up +peekUnicode :: Parser (Char, Int) +peekUnicode = lift $ Attoparsec.lookAhead $ do +   +  -- attoparsec's take fails on shorter inputs rather than truncate +  bs <- Attoparsec.choice (map Attoparsec.take [4,3,2,1]) +   +  let !c = head . decodeUtf8 $ bs +      !n = Data.ByteString.length . encodeUtf8 $ [c] +  pure (c, fromIntegral n) + +-- | Like 'satisfy', but consuming a unicode character +satisfyUnicode :: (Char -> Bool) -> Parser Char +satisfyUnicode predicate = do +  (c,n) <- peekUnicode +  if predicate c +    then Documentation.Haddock.Parser.Monad.take n *> pure c +    else fail "satsifyUnicode" +  anyChar :: Parser Char  anyChar = lift Attoparsec.anyChar diff --git a/html-test/ref/Bug458.html b/html-test/ref/Bug458.html new file mode 100644 index 00000000..aa99e719 --- /dev/null +++ b/html-test/ref/Bug458.html @@ -0,0 +1,80 @@ +<html xmlns="http://www.w3.org/1999/xhtml" +><head +  ><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" +     /><title +    >Bug458</title +    ><link href="#" rel="stylesheet" type="text/css" title="Ocean" +     /><link rel="stylesheet" type="text/css" href="#" +     /><script src="haddock-bundle.min.js" async="async" type="text/javascript" +    ></script +    ><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML" type="text/javascript" +    ></script +    ></head +  ><body +  ><div id="package-header" +    ><ul class="links" id="page-menu" +      ><li +	><a href="#" +	  >Contents</a +	  ></li +	><li +	><a href="#" +	  >Index</a +	  ></li +	></ul +      ><p class="caption empty" +      ></p +      ></div +    ><div id="content" +    ><div id="module-header" +      ><table class="info" +	><tr +	  ><th +	    >Safe Haskell</th +	    ><td +	    >Safe</td +	    ></tr +	  ></table +	><p class="caption" +	>Bug458</p +	></div +      ><div id="synopsis" +      ><details id="syn" +	><summary +	  >Synopsis</summary +	  ><ul class="details-toggle" data-details-id="syn" +	  ><li class="src short" +	    ><a href="#" +	      >(⊆)</a +	      > :: () -> () -> ()</li +	    ></ul +	  ></details +	></div +      ><div id="interface" +      ><h1 +	>Documentation</h1 +	><div class="top" +	><p class="src" +	  ><a id="v:-8838-" class="def" +	    >(⊆)</a +	    > :: () -> () -> () <a href="#" class="selflink" +	    >#</a +	    ></p +	  ><div class="doc" +	  ><p +	    >See the defn of <code +	      ><code +		><a href="#" +		  >⊆</a +		  ></code +		></code +	      >.</p +	    ></div +	  ></div +	></div +      ></div +    ><div id="footer" +    ></div +    ></body +  ></html +>
\ No newline at end of file diff --git a/html-test/src/Bug458.hs b/html-test/src/Bug458.hs new file mode 100644 index 00000000..6a3ac9a4 --- /dev/null +++ b/html-test/src/Bug458.hs @@ -0,0 +1,6 @@ +module Bug458 where + +-- | See the defn of @'⊆'@. +(⊆) :: () -> () -> () +_ ⊆ _ = () + | 
