From 2448bd71609688be7b8bfe362a8534959531cd79 Mon Sep 17 00:00:00 2001
From: Simon Hengel
Date: Sun, 8 Sep 2013 10:33:38 +0200
Subject: Fix totality, unicode, examples, paragraph parsing
Also simplify specs and parsers while we're at it. Some parsers were
made more generic.
This commit is a part of GHC pre-merge squash, email
fuuzetsu@fuuzetsu.co.uk if you need the full commit history.
---
html-test/ref/A.html | 12 +-
html-test/ref/B.html | 12 +-
html-test/ref/Bug1.html | 3 +-
html-test/ref/Bug3.html | 3 +-
html-test/ref/Bug4.html | 3 +-
html-test/ref/Bug6.html | 18 +-
html-test/ref/Bug7.html | 15 +-
html-test/ref/BugDeprecated.html | 9 +-
html-test/ref/DeprecatedClass.html | 6 +-
html-test/ref/DeprecatedData.html | 9 +-
html-test/ref/DeprecatedFunction.html | 6 +-
html-test/ref/DeprecatedModule.html | 3 +-
html-test/ref/DeprecatedNewtype.html | 6 +-
html-test/ref/DeprecatedReExport.html | 11 +-
html-test/ref/DeprecatedRecord.html | 9 +-
html-test/ref/DeprecatedTypeFamily.html | 3 +-
html-test/ref/DeprecatedTypeSynonym.html | 3 +-
html-test/ref/Examples.html | 15 +-
html-test/ref/FunArgs.html | 27 +-
html-test/ref/GADTRecords.html | 9 +-
html-test/ref/Hash.html | 21 +-
html-test/ref/HiddenInstances.html | 18 +-
html-test/ref/HiddenInstancesB.html | 12 +-
html-test/ref/Hyperlinks.html | 6 +-
html-test/ref/IgnoreExports.html | 6 +-
html-test/ref/ModuleWithWarning.html | 3 +-
html-test/ref/NamedDoc.html | 3 +-
html-test/ref/NoLayout.html | 3 +-
html-test/ref/NonGreedy.html | 3 +-
html-test/ref/Properties.html | 3 +-
html-test/ref/PruneWithWarning.html | 5 +-
html-test/ref/SpuriousSuperclassConstraints.html | 21 +-
html-test/ref/Test.html | 264 +++----
html-test/ref/Ticket112.html | 3 +-
html-test/ref/Ticket253_1.html | 6 +-
html-test/ref/Ticket253_2.html | 3 +-
html-test/ref/Ticket61.html | 3 +-
html-test/ref/Ticket75.html | 3 +-
html-test/ref/TitledPicture.html | 6 +-
html-test/ref/TypeFamilies.html | 15 +-
html-test/ref/Unicode.html | 3 +-
html-test/ref/mini_Test.html | 2 +-
src/Haddock.hs | 2 +-
src/Haddock/Doc.hs | 57 +-
src/Haddock/Interface/LexParseRn.hs | 4 +-
src/Haddock/Interface/ParseModuleHeader.hs | 4 +-
src/Haddock/Parser.hs | 482 +++++-------
test/Haddock/ParserSpec.hs | 918 +++++++++++------------
48 files changed, 871 insertions(+), 1190 deletions(-)
diff --git a/html-test/ref/A.html b/html-test/ref/A.html
index 4d55ba16..0346574f 100644
--- a/html-test/ref/A.html
+++ b/html-test/ref/A.html
@@ -123,8 +123,7 @@ window.onload = function () {pageLoad();setSynopsis("mini_A.html");};
>
Doc for test2
-
Doc for test2
Should show up on the page for both modules A and B
-
Should show up on the page for both modules A and B
Doc for consructor
- Doc for consructor | Should show up on the page for both modules A and B
-
Should show up on the page for both modules A and B
.
Module link: Prelude.
-.Should show up on the page for both modules A and B
-
Should show up on the page for both modules A and B
Should show up on the page for both modules A and B
-
Should show up on the page for both modules A and B
Doc for consructor
- Doc for consructor | T.
-.
/multi-line
- emphasis/
-
don't use apostrophe's in the wrong place's
-
don't use apostrophe's in the wrong place's
DescriptionExporting records.
-
Exporting records.
This record is exported without its field
-
This record is exported without its field
.. with its field, but the field is named separately in the export list
- (the field isn't documented separately since it is already documented here)
-
.. with fields names as subordinate names in the export
-
.. with fields names as subordinate names in the export
.. with only some of the fields exported (we can't handle this one -
- how do we render the declaration?)
-
a newtype with a field
-
a newtype with a field
This module caused a duplicate instance in the documentation for the Foo
- type.
-
The Foo datatype
-
The Foo datatype
Just one instance
- Just one instance | x y
The Bar class
-
The Bar class
Just one instance
- Just one instance | Deprecated: for one
some documentation for one, two and three
-
some documentation for one, two and three
Deprecated: for three
some documentation for one, two and three
-
some documentation for one, two and three
Deprecated: for two
some documentation for one, two and three
-
some documentation for one, two and three
Deprecated: SomeClasssome class
-
some classdocumentation for foo
-
documentation for fooDeprecated: Footype Foo
-
type Fooconstructor Foo
-
constructor FooDeprecated: Barconstructor Bar
-
constructor Bar
insteadsome documentation for foo
-
some documentation for foosome documentation for bar
-
some documentation for bar
Documentation for DeprecatedModule.
-
.Deprecated: SomeNewType
some documentation
-
some documentationDeprecated: SomeNewTypeConst
constructor docu
-
constructor docu DescriptionWhat is tested here:
-
What is tested here:
- Deprecation messages are shown for re-exported items.
+ >Deprecation messages are shown for re-exported items.
insteadsome documentation for foo
-
some documentation for foosome value
-
some valueDeprecated: SomeTypeFamily
some documentation
-
some documentationDeprecated: TypeSyn
some documentation
-
some documentationInteger.
-.
Examples:
-
Examples:
>>>
55
One more Example:
-
One more Example:
>>>
5
One more Example:
-
One more Example:
>>>
5
Example with an import:
-
Example with an import:
>>>
First argument
- First argument | -> aSecond argument
- Second argument |
Third argument
- Third argument |
-> (a -> a)Fourth argument
- Fourth argument |
-> ()Result
- Result |
:: aFirst argument
- First argument | -> bSecond argument
- Second argument |
-> cThird argument
- Third argument |
-> dResult
- Result |
hello docs
-
hello docs :: a
hello2 docs
-
hello2 docsImplementation of fixed-size hash tables, with a type
- class for constructing hash values for structured types.
-
key val)Builds a new hash table with a given size
-
Builds a new hash table with a given size
()
Inserts a new element into the hash table
-
Inserts a new element into the hash table
A class of types which can be hashed.
-
A class of types which can be hashed.
a
Should be visible
-
Should be visible
Should be visible
- Should be visible | Should be visible
- Should be visible |
Should be visible
-
Should be visible
Should be visible
- Should be visible | Should be visible
- Should be visible |
a
Should be visible
-
Should be visible
Should be visible
- Should be visible | Should be visible
-
Should be visible
documentation for foo
-
documentation for foo
documentation for bar
-
documentation for bar
Documentation for ModuleWithWarning.
-
.g
-url1 url2
-Integer.
-.fib n <= fib (n + 1)
DescriptionWhat is tested here:
-
What is tested here:
- If a binding has a deprecation message but no documentation, it is pruned
+ >If a binding has a deprecation message but no documentation, it is pruned
when
OPTIONS_HADDOCK prune
is used.
diff --git a/html-test/ref/SpuriousSuperclassConstraints.html b/html-test/ref/SpuriousSuperclassConstraints.html
index 9b9d8087..566eafd6 100644
--- a/html-test/ref/SpuriousSuperclassConstraints.html
+++ b/html-test/ref/SpuriousSuperclassConstraints.html
@@ -46,32 +46,25 @@ window.onload = function () {pageLoad();setSynopsis("mini_SpuriousSuperclassCons
>Description
This comment applies to the following declaration
- and it continues until the next non-comment line
-
A constructor
-
constructor
B constructor
- constructor
a b
An abstract data declaration
-
An abstract data declaration
a b
A data declaration with no documentation annotations on the constructors
-
A data declaration with no documentation annotations on the constructors
A3
-
B3
-
Testing alternative comment styles
-
Testing alternative comment styles
A4
-
B4
-
C4
-
a
a b
A newtype with a fieldname
-
A newtype with a fieldname
a b
A newtype with a fieldname, documentation on the field
-
A newtype with a fieldname, documentation on the field
n3 field
-
field
An abstract newtype - we show this one as data rather than newtype because
- the difference isn't visible to the programmer for an abstract type.
-
:: a b
no docs on the datatype or the constructor
-
no docs on the datatype or the constructordocs on the constructor only
- docs on the constructor only | a b docs on the newtype and the constructor
-
docs on the newtype and the constructor
N7 constructor
-
constructor
s.
-.C1 record constructor, with the following fields:
-
record constructor, with the following fields:
p field
- fieldq field
- fields
-s
-C2 record constructor, also with some fields:
- record constructor, also with some fields:
Testing different record commenting styles
-
Testing different record commenting styles
C3 record constructor
-
record constructor
s1 record selector
- record selectors2 record selector
- record selectors3 record selector
- record selectortest that we can export record selectors on their own:
-
test that we can export record selectors on their own:
Class declarations
C class)
- class)b method
-
methodThis is a class declaration with no separate docs for the methods
-
This is a class declaration with no separate docs for the methods
a
This is a class declaration with no methods (or no methods exported)
-
This is a class declaration with no methods (or no methods exported)
Test that we can export a class method on its own:
-
Test that we can export a class method on its own:
Function types
Foo. We can add emphasis like this.
-.- This is a bulleted list
+ >This is a bulleted list
- This is the next item (different kind of bullet)
+ >This is the next item (different kind of bullet)
- This is an ordered list
+ >This is an ordered list
- This is the next item (different kind of bullet)
+ >This is the next item (different kind of bullet)
- cat
- a small, furry, domesticated mammal
+ >a small, furry, domesticated mammal
- pineapple
- a fruit grown in the tropics
+ >a fruit grown in the tropics
- This is a block of code, which can include other markup: This is a block of code, which can include other markup: R
this is another block of code
-
this is another block of code
We can also include URLs in documentation: http://www.haskell.org/.
-
. CInt
we can export foreign declarations too
-
we can export foreign declarations too
This is some documentation that is attached to a name ($aux1)
rather than a source declaration. The documentation may be
- referred to in the export list using its name.
-
code block in named doc
This is some documentation that is attached to a name ($aux2)
-
This is some documentation that is attached to a name ($aux2)
a nested, named doc comment
-
a nested, named doc comment
with a paragraph,
-
with a paragraph,
and a code block
test
-test1
-
test2
test3
@@ -1781,70 +1731,57 @@ test1
>
-test1
+ >test1
test2
test3
-test4
-
test3
-test4
-
-test1
+ >test1
test2
aux11:
-
aux11:
test3
-test4
-
-test1
+ >test1
test2
This is some inline documentation in the export list
-
This is some inline documentation in the export list
a code block using bird-tracks
each line must begin with > (which isn't significant unless it
- is at the beginning of the line).
-
A hidden module
nested-style doc comments
-
nested-style doc comments
Existential / Universal types
a A data-type using existential/universal types
-
A data-type using existential/universal types
This argument has type 'T2 Int Int'
- This argument has type 'T2 Int Int' |
This argument has type T3 Bool Bool -> T4 Float Float
-
|
This argument has a very long description that should
hopefully cause some wrapping to happen when it is finally
- rendered by Haddock in the generated HTML page.
-
|
()This is the result type
- This is the result type |
This is a function with documentation for each argument
-
This is a function with documentation for each argument
)
takes a triple
- takes a triple | Int
-
()
one of the arguments
- one of the arguments | and the return value
- and the return value |
This function has some arg docs
-
This function has some arg docs
The input float
- The input float | The output float
- The output float |
A foreign import with argument docs
-
A foreign import with argument docs
A subsection a literal line
-
a literal line
$ a non literal line $
-
line $
f'
- but f' doesn't get link'd 'f\''
-
Comment on a definition with type signature
-
Comment on a definition with type signature
:: t
Comment on a definition without type signature
-
Comment on a definition without type signature
Addr# to the string, and the length of the string.
- to the string, and the length of the string.
bar.
-.Also see Baz
-
:: a
A comment about f
-
A comment about f
:-
-foo without a title
-bar with title
-
a :: *Type family G
-
Type family G
A class with an associated type
-
A class with an associated type
a :: * -> *
An associated type
-
An associated type
a
Doc for family
-
Doc for family
A section
A subsection
A subsection return Nothing
[filename] -> do
str <- readFile filename
- case parseParas dflags str of
+ case parseParasMaybe dflags str of
Nothing -> throwE $ "failed to parse haddock prologue from file: " ++ filename
Just doc -> return (Just doc)
_otherwise -> throwE "multiple -p/--prologue options"
diff --git a/src/Haddock/Doc.hs b/src/Haddock/Doc.hs
index 4d68c554..69b2dd6f 100644
--- a/src/Haddock/Doc.hs
+++ b/src/Haddock/Doc.hs
@@ -1,16 +1,14 @@
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Haddock.Doc (
- docAppend,
- docParagraph,
- combineStringNodes,
- combineDocumentation
- ) where
+ docAppend
+, docParagraph
+, combineDocumentation
+) where
import Data.Maybe
import Data.Monoid
import Haddock.Types
import Data.Char (isSpace)
-import Control.Arrow ((***))
-- We put it here so that we can avoid a circular import
-- anything relevant imports this module anyway
@@ -22,25 +20,15 @@ combineDocumentation :: Documentation name -> Maybe (Doc name)
combineDocumentation (Documentation Nothing Nothing) = Nothing
combineDocumentation (Documentation mDoc mWarning) = Just (fromMaybe mempty mWarning `mappend` fromMaybe mempty mDoc)
--- used to make parsing easier; we group the list items later
docAppend :: Doc id -> Doc id -> Doc id
-docAppend (DocUnorderedList ds1) (DocUnorderedList ds2)
- = DocUnorderedList (ds1++ds2)
-docAppend (DocUnorderedList ds1) (DocAppend (DocUnorderedList ds2) d)
- = DocAppend (DocUnorderedList (ds1++ds2)) d
-docAppend (DocOrderedList ds1) (DocOrderedList ds2)
- = DocOrderedList (ds1++ds2)
-docAppend (DocOrderedList ds1) (DocAppend (DocOrderedList ds2) d)
- = DocAppend (DocOrderedList (ds1++ds2)) d
-docAppend (DocDefList ds1) (DocDefList ds2)
- = DocDefList (ds1++ds2)
-docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d)
- = DocAppend (DocDefList (ds1++ds2)) d
+docAppend (DocDefList ds1) (DocDefList ds2) = DocDefList (ds1++ds2)
+docAppend (DocDefList ds1) (DocAppend (DocDefList ds2) d) = DocAppend (DocDefList (ds1++ds2)) d
docAppend DocEmpty d = d
docAppend d DocEmpty = d
-docAppend d1 d2
- = DocAppend d1 d2
-
+docAppend (DocString s1) (DocString s2) = DocString (s1 ++ s2)
+docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 ++ s2))
+docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 ++ s2)) d
+docAppend d1 d2 = DocAppend d1 d2
-- again to make parsing easier - we spot a paragraph whose only item
-- is a DocMonospaced and make it into a DocCodeBlock
@@ -77,28 +65,3 @@ docCodeBlock (DocString s)
docCodeBlock (DocAppend l r)
= DocAppend l (docCodeBlock r)
docCodeBlock d = d
-
--- | This is a hack that joins neighbouring 'DocString's into a single one.
--- This is done to ease up the testing and doesn't change the final result
--- as this would be done later anyway.
-combineStringNodes :: Doc id -> Doc id
-combineStringNodes (DocAppend (DocString x) (DocString y)) = DocString (x ++ y)
-combineStringNodes (DocAppend (DocString x) (DocAppend (DocString y) z)) =
- tryjoin (DocAppend (DocString (x ++ y)) (combineStringNodes z))
-combineStringNodes (DocAppend x y) = tryjoin (DocAppend (combineStringNodes x) (combineStringNodes y))
-combineStringNodes (DocParagraph x) = DocParagraph (combineStringNodes x)
-combineStringNodes (DocWarning x) = DocWarning (combineStringNodes x)
-combineStringNodes (DocEmphasis x) = DocEmphasis (combineStringNodes x)
-combineStringNodes (DocMonospaced x) = DocMonospaced (combineStringNodes x)
-combineStringNodes (DocUnorderedList xs) = DocUnorderedList (map combineStringNodes xs)
-combineStringNodes (DocOrderedList x) = DocOrderedList (map combineStringNodes x)
-combineStringNodes (DocDefList xs) = DocDefList (map (combineStringNodes *** combineStringNodes) xs)
-combineStringNodes (DocCodeBlock x) = DocCodeBlock (combineStringNodes x)
-combineStringNodes x = x
-
-tryjoin :: Doc id -> Doc id
-tryjoin (DocAppend (DocString x) (DocString y)) = DocString (x ++ y)
-tryjoin (DocAppend (DocString x) (DocAppend (DocString y) z)) = DocAppend (DocString (x ++ y)) z
-tryjoin (DocAppend (DocAppend x (DocString y)) (DocString z))
- = tryjoin (DocAppend (combineStringNodes x) (DocString $ y ++ z))
-tryjoin x = x
diff --git a/src/Haddock/Interface/LexParseRn.hs b/src/Haddock/Interface/LexParseRn.hs
index 13563532..8c33ade6 100644
--- a/src/Haddock/Interface/LexParseRn.hs
+++ b/src/Haddock/Interface/LexParseRn.hs
@@ -43,11 +43,11 @@ processDocStrings dflags gre strs = do
processDocStringParas :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name))
-processDocStringParas = process parseParas
+processDocStringParas = process parseParasMaybe
processDocString :: DynFlags -> GlobalRdrEnv -> HsDocString -> ErrMsgM (Maybe (Doc Name))
-processDocString = process parseString
+processDocString = process parseStringMaybe
process :: (DynFlags -> String -> Maybe (Doc RdrName))
-> DynFlags
diff --git a/src/Haddock/Interface/ParseModuleHeader.hs b/src/Haddock/Interface/ParseModuleHeader.hs
index 2e4fe73b..ade28728 100644
--- a/src/Haddock/Interface/ParseModuleHeader.hs
+++ b/src/Haddock/Interface/ParseModuleHeader.hs
@@ -46,13 +46,13 @@ parseModuleHeader dflags str0 =
description1 :: Either String (Maybe (Doc RdrName))
description1 = case descriptionOpt of
Nothing -> Right Nothing
- Just description -> case parseString dflags description of
+ Just description -> case parseStringMaybe dflags description of
Nothing -> Left ("Cannot parse Description: " ++ description)
Just doc -> Right (Just doc)
in
case description1 of
Left mess -> Left mess
- Right docOpt -> case parseParas dflags str8 of
+ Right docOpt -> case parseParasMaybe dflags str8 of
Nothing -> Left "Cannot parse header documentation paragraphs"
Just doc -> Right (HaddockModInfo {
hmi_description = docOpt,
diff --git a/src/Haddock/Parser.hs b/src/Haddock/Parser.hs
index 43a2b169..fe8904d4 100644
--- a/src/Haddock/Parser.hs
+++ b/src/Haddock/Parser.hs
@@ -9,15 +9,15 @@
-- Stability : experimental
-- Portability : portable
-module Haddock.Parser (parseString, parseParas) where
+module Haddock.Parser (parseString, parseParas, parseStringMaybe, parseParasMaybe) where
+import Prelude hiding (takeWhile)
+import Control.Monad (void, mfilter)
import Control.Applicative
-import Data.Attoparsec.ByteString hiding (parse, takeWhile1, take, inClass)
-import qualified Data.Attoparsec.ByteString.Char8 as A8
-import Data.Attoparsec.ByteString.Char8 hiding (parse, take, string)
-import qualified Data.ByteString as BS
-import Data.Char (chr)
-import Data.List (stripPrefix)
+import Data.Attoparsec.ByteString.Char8 hiding (parse, take, string, endOfLine)
+import qualified Data.ByteString.Char8 as BS
+import Data.Char (chr, isAsciiUpper)
+import Data.List (stripPrefix, intercalate)
import Data.Maybe (fromMaybe)
import Data.Monoid
import DynFlags
@@ -31,157 +31,117 @@ import SrcLoc (mkRealSrcLoc, unLoc)
import StringBuffer (stringToStringBuffer)
import Haddock.Utf8
-parse :: Parser a -> String -> Maybe a
-parse p = either (const Nothing) Just . parseOnly (p <* endOfInput) . encodeUtf8
+{-# DEPRECATED parseParasMaybe "use `parseParas` instead" #-}
+parseParasMaybe :: DynFlags -> String -> Maybe (Doc RdrName)
+parseParasMaybe d = Just . parseParas d
+
+{-# DEPRECATED parseStringMaybe "use `parseString` instead" #-}
+parseStringMaybe :: DynFlags -> String -> Maybe (Doc RdrName)
+parseStringMaybe d = Just . parseString d
+
+parse :: Parser a -> BS.ByteString -> a
+parse p = either err id . parseOnly (p <* endOfInput)
+ where
+ err = error . ("Haddock.Parser.parse: " ++)
-- | Main entry point to the parser. Appends the newline character
-- to the input string.
parseParas :: DynFlags
-> String -- ^ String to parse
- -> Maybe (Doc RdrName)
-parseParas d = fmap combineStringNodes . parse (p <* skipSpace) . (++ "\n")
+ -> Doc RdrName
+parseParas d = parse (p <* skipSpace) . encodeUtf8 . (++ "\n")
where
p :: Parser (Doc RdrName)
- -- make sure that we don't swallow up whitespace belonging to next paragraph
- p = mconcat <$> paragraph d `sepBy` some (optWs *> "\n")
-
--- | A parser that parsers separate lines of the comments. Eventually
--- called by 'parseParas'. Appends a newline character to the input string.
--- Drops any whitespace in front of the input string. It's dropped for the sake of
--- section headings.
-parseString :: DynFlags -> String -> Maybe (Doc RdrName)
-parseString d = parseString' d . dropWhile isSpace
-
--- | A parser that parsers separate lines of the comments. Eventually
--- called by 'parseParas'. Appends a newline character to the input string.
--- Unlike 'parseString', doesn't drop the preceding whitespace. Internal use.
-parseString'' :: DynFlags -> String -> Maybe (Doc RdrName)
-parseString'' d = parseString' d . (++ "\n")
-
--- | An internal use function. Split from the 'parseString' is useful
--- as we can specify separately when we want the newline to be appended.
-parseString' :: DynFlags -> String -> Maybe (Doc RdrName)
-parseString' d = fmap combineStringNodes . parse p
+ p = mconcat <$> paragraph d `sepBy` many (skipHorizontalSpace *> "\n")
+
+-- | Parse a text paragraph.
+parseString :: DynFlags -> String -> Doc RdrName
+parseString d = parseStringBS d . encodeUtf8 . dropWhile isSpace
+
+parseStringBS :: DynFlags -> BS.ByteString -> Doc RdrName
+parseStringBS d = parse p
where
p :: Parser (Doc RdrName)
- p = mconcat <$> some (charEscape <|> monospace d <|> anchor <|> identifier d
- <|> moduleName <|> picture <|> url
- <|> emphasis d <|> encodedChar <|> string' <|> skipChar)
+ p = mconcat <$> many (monospace d <|> anchor <|> identifier d
+ <|> moduleName <|> picture <|> hyperlink <|> autoUrl
+ <|> emphasis d <|> encodedChar <|> string' <|> skipSpecialChar)
-- | Parses and processes
--
--
-- >>> parseOnly encodedChar "ABC"
-- Right (DocString "ABC")
-encodedChar :: Parser (Doc RdrName)
+encodedChar :: Parser (Doc a)
encodedChar = "" *> c <* ";"
where
c = DocString . return . chr <$> num
num = hex <|> decimal
hex = ("x" <|> "X") *> hexadecimal
+specialChar :: [Char]
+specialChar = "/<@\"&'`"
+
-- | Plain, regular parser for text. Called as one of the last parsers
-- to ensure that we have already given a chance to more meaningful parsers
-- before capturing their characers.
-string' :: Parser (Doc RdrName)
-string' = DocString . decodeUtf8 <$> takeWhile1 (`notElem` "/<@\" &'`\\")
+string' :: Parser (Doc a)
+string' = DocString . unescape . decodeUtf8 <$> takeWhile1_ (`notElem` specialChar)
+ where
+ unescape "" = ""
+ unescape ('\\':x:xs) = x : unescape xs
+ unescape (x:xs) = x : unescape xs
+
+-- | Skips a single special character and treats it as a plain string.
+-- This is done to skip over any special characters belonging to other
+-- elements but which were not deemed meaningful at their positions.
+skipSpecialChar :: Parser (Doc a)
+skipSpecialChar = DocString . return <$> satisfy (`elem` specialChar)
-- | Emphasis parser.
--
-- >>> parseOnly emphasis "/Hello world/"
-- Right (DocEmphasis (DocString "Hello world"))
emphasis :: DynFlags -> Parser (Doc RdrName)
-emphasis d = DocEmphasis <$> stringBlock d "/" "/" "\n"
+emphasis d = DocEmphasis . parseStringBS d <$>
+ mfilter ('\n' `BS.notElem`) ("/" *> takeWhile1_ (/= '/') <* "/")
--- | Skips a single character and treats it as a plain string.
--- This is done to skip over any special characters belonging to other
--- elements but which were not deemed meaningful at their positions.
--- Note that this can only be used in places where we're absolutely certain
--- no unicode is present, such as to skip a 100% certain ASCII delimeter.
-skipChar :: Parser (Doc RdrName)
-skipChar = DocString . return <$> anyChar
+-- | Like `takeWhile`, but unconditionally take escaped characters.
+takeWhile_ :: (Char -> Bool) -> Parser BS.ByteString
+takeWhile_ p = scan False p_
+ where
+ p_ escaped c
+ | escaped = Just False
+ | not $ p c = Nothing
+ | otherwise = Just (c == '\\')
--- | Treats the next character as a regular string, even if it's normally
--- used for markup.
-charEscape :: Parser (Doc RdrName)
-charEscape = "\\" *> (DocString . return <$> A8.satisfy (/= '\n'))
+-- | Like `takeWhile1`, but unconditionally take escaped characters.
+takeWhile1_ :: (Char -> Bool) -> Parser BS.ByteString
+takeWhile1_ = mfilter (not . BS.null) . takeWhile_
-- | Text anchors to allow for jumping around the generated documentation.
--
-- >>> parseOnly anchor "#Hello world#"
-- Right (DocAName "Hello world")
-anchor :: Parser (Doc RdrName)
+anchor :: Parser (Doc a)
anchor = DocAName . decodeUtf8 <$> ("#" *> takeWhile1 (`notElem` "#\n") <* "#")
--- | Helper for markup structures surrounded with delimiters.
-stringBlock
- :: DynFlags
- -> String -- ^ Opening delimiter
- -> String -- ^ Closing delimiter
- -> String -- ^ Additional characters to terminate parsing on
- -> Parser (Doc RdrName)
-stringBlock d op ed n = do
- inner <- block op ed n
- case parseString' d inner of
- Just r -> return r
- _ -> fail $ "inner parse fail with op: ‘" ++ op ++ "’, ed: ‘" ++ ed ++ "’"
-
--- | Returns sections of text delimited by specified text.
-block :: String -> String -> String -> Parser String
-block op ed n = reverse . drop (length ed) . reverse <$> block' op ed
- where
- block' op' ed' = string (encodeUtf8 op') *> mid
- where
- mid :: Parser String
- mid = decodeUtf8 <$> string (encodeUtf8 ed')
- <|> do
- inner <- takeWithSkip (head ed') n
- more <- decodeUtf8 <$> string (encodeUtf8 $ tail ed')
- <|> block' "" ed' -- not full ending, take more
- return $ inner ++ more
-
-
--- | Takes all characters until the specified one. Unconditionally
--- takes a character if it's escaped. Fails if it doesn't find the character or
--- when the input string is empty.
-takeWithSkip :: Char -> String -> Parser String
-takeWithSkip s n = do
- content <- decodeUtf8 <$> A8.scan (False, False) p >>= gotSome
- if or (map (`elem` content) n) || last content /= s
- then fail "failed in takeWithSkip"
- else return content
- where
- gotSome [] = fail "EOF in takeWithSkip"
- gotSome xs = return xs
- -- Apparently ‘scan’ is so magical that it doesn't mangle unicode.
- p (escaped, terminate) c
- | terminate = Nothing -- swallows up that extra character
- | escaped = Just (False, False)
- | c == s = Just (False, True)
- | otherwise = Just (c == '\\', False)
-
-- | Monospaced strings.
--
-- >>> parseOnly (monospace dynflags) "@cruel@"
-- Right (DocMonospaced (DocString "cruel"))
monospace :: DynFlags -> Parser (Doc RdrName)
-monospace d = DocMonospaced <$> stringBlock d "@" "@" ""
-
--- | Module name parser, surrounded by double quotes. This does a very primitive and
--- purely syntactic checking so that obviously invalid names are not treated as valid
--- and blindly hyperlinked (not starting with a capital letter or including spaces).
-moduleName :: Parser (Doc RdrName)
-moduleName = DocModule <$> ("\"" *> legalModule <* "\"")
- where legalModule = do
- n <- (:) <$> A8.satisfy (`elem` ['A' .. 'Z'])
- <*> (decodeUtf8 <$> A8.takeWhile (`notElem` "\"\n"))
-
- if any (`elem` n) " &[{}(=*)+]!#|@/;,^?"
- then fail "invalid characters in module name"
- else case n of
- [] -> return []
- _ -> if last n == '.' then fail "trailing dot in module name" else return n
+monospace d = DocMonospaced . parseStringBS d <$> ("@" *> takeWhile1_ (/= '@') <* "@")
+moduleName :: Parser (Doc a)
+moduleName = DocModule <$> (char '"' *> modid <* char '"')
+ where
+ modid = intercalate "." <$> conid `sepBy1` "."
+ conid = (:)
+ <$> satisfy isAsciiUpper
+ -- NOTE: According to Haskell 2010 we shouldd actually only
+ -- accept {small | large | digit | ' } here. But as we can't
+ -- match on unicode characters, this is currently not possible.
+ <*> (decodeUtf8 <$> takeWhile (`notElem` " .&[{}(=*)+]!#|@/;,^?\"\n"))
-- | Picture parser, surrounded by \<\< and \>\>. It's possible to specify
-- a title for the picture.
@@ -190,181 +150,166 @@ moduleName = DocModule <$> ("\"" *> legalModule <* "\"")
-- Right (DocPic (Picture "hello.png" Nothing))
-- >>> parseOnly picture "<>"
-- Right (DocPic (Picture "hello.png" (Just "world")))
-picture :: Parser (Doc RdrName)
-picture = DocPic . makePicture . decodeUtf8 <$> ("<<" *> takeWhile1 (`notElem` ">\n") <* ">>")
+picture :: Parser (Doc a)
+picture = DocPic . makeLabeled Picture . decodeUtf8
+ <$> ("<<" *> takeWhile1 (`notElem` ">\n") <* ">>")
-- | Paragraph parser, called by 'parseParas'.
paragraph :: DynFlags -> Parser (Doc RdrName)
paragraph d = examples <|> skipSpace *> (list d <|> birdtracks <|> codeblock d
<|> property <|> textParagraph d)
+textParagraph :: DynFlags -> Parser (Doc RdrName)
+textParagraph d = docParagraph . parseString d . intercalate "\n" <$> many1 nonEmptyLine
+
-- | List parser, called by 'paragraph'.
list :: DynFlags -> Parser (Doc RdrName)
list d = DocUnorderedList <$> unorderedList d
<|> DocOrderedList <$> orderedList d
<|> DocDefList <$> definitionList d
--- | Parse given text with a provided parser, casting
--- Nothing to a failure
-parseLine :: (String -> Maybe (Doc RdrName)) -- ^ Parser to use
- -> (Doc RdrName -> a) -- ^ Doc function to wrap around the result
- -> BS.ByteString -- ^ Text to parse
- -> Parser a
-parseLine f doc str = maybe (fail "invalid string") (return . doc) (f $ decodeUtf8 str)
-
-- | Parses unordered (bullet) lists.
unorderedList :: DynFlags -> Parser [Doc RdrName]
-unorderedList d = ("*" <|> "-") *> innerList unorderedList d
+unorderedList d = ("*" <|> "-") *> innerList (unorderedList d) d
-- | Parses ordered lists (numbered or dashed).
orderedList :: DynFlags -> Parser [Doc RdrName]
-orderedList d = skipSpace *> (paren <|> dot) *> innerList orderedList d
+orderedList d = (paren <|> dot) *> innerList (orderedList d) d
where
- dot = decimal <* "."
- paren = "(" *> (decimal :: Parser Int) <* ")"
+ dot = (decimal :: Parser Int) <* "."
+ paren = "(" *> decimal <* ")"
-- | Generic function collecting any further lines belonging to the
-- list entry and recursively collecting any further lists in the
-- same paragraph. Usually used as
--
-- > someListFunction dynflags = listBeginning *> innerList someListFunction dynflags
-innerList :: (DynFlags -> Parser [Doc RdrName]) -- ^ parser calling this function
- -> DynFlags
- -> Parser [Doc RdrName]
-innerList p d = do
- cl <- do
- content <- A8.takeWhile (/= '\n') <* "\n" -- allow empty
- parseLine (parseString'' d) id content
- ulcs <- many ulc
- let contents = docParagraph $ mconcat $ cl : [x | Right x <- ulcs]
- unLists = mconcat [x | Left x <- ulcs]
- return $ contents : unLists
+innerList :: Parser [Doc RdrName] -> DynFlags -> Parser [Doc RdrName]
+innerList item d = do
+ c <- takeLine
+ (cs, items) <- more
+ let contents = (docParagraph . parseString d . unlines) (c : cs)
+ return (contents : items)
where
- ulc :: Parser (Either [Doc RdrName] (Doc RdrName))
- ulc = Left <$> (optWs *> p d)
- <|> Right <$> nonEmptyLine d
-
--- | Takes the remained of the line until the newline character
--- and calls 'parseLine' using 'parseString'. Fails if it's made
--- up strictly of whitespace.
-nonEmptyLine :: DynFlags -> Parser (Doc RdrName)
-nonEmptyLine d = do
- s <- (takeWhile1 (/= '\n') >>= nonSpace) <* "\n"
- parseLine (parseString'' d) id s
- where
- nonSpace xs
- | not (any (not . isSpace) (decodeUtf8 xs)) = fail "empty line"
- | otherwise = return xs
+ more :: Parser ([String], [Doc RdrName])
+ more = moreListItems <|> moreContent <|> pure ([], [])
+
+ moreListItems :: Parser ([String], [Doc RdrName])
+ moreListItems = (,) [] <$> (skipSpace *> item)
+
+ moreContent :: Parser ([String], [Doc RdrName])
+ moreContent = mapFst . (:) <$> nonEmptyLine <*> more
-- | Parses definition lists.
definitionList :: DynFlags -> Parser [(Doc RdrName, Doc RdrName)]
definitionList d = do
- _ <- "["
- inner <- parseLine (parseString' d) id =<< takeWhile1 (`notElem` "]\n")
- _ <- "]"
- outer <- parseLine (parseString'' d) id =<< (A8.takeWhile (/= '\n') <* "\n")
- ulcs <- many ulc
- let contents = mconcat $ outer : [x | Right x <- ulcs]
- unLists = map mconcat [x | Left x <- ulcs]
- return $ (inner, contents) : unLists
+ label <- parseStringBS d <$> ("[" *> takeWhile1 (`notElem` "]\n") <* "]")
+ c <- takeLine
+ (cs, items) <- more
+ let contents = (parseString d . unlines) (c : cs)
+ return ((label, contents) : items)
where
- ulc :: Parser (Either [(Doc RdrName, Doc RdrName)] (Doc RdrName))
- ulc = Left <$> (optWs *> definitionList d)
- <|> Right <$> nonEmptyLine d
-
--- | Parses birdtracks. No further markup is parsed after the birdtrack.
--- Consecutive birdtracks are allowed.
-birdtracks :: Parser (Doc RdrName)
-birdtracks = DocCodeBlock . mconcat . map (DocString . (++ "\n") . decodeUtf8) <$> line `sepBy1` "\n"
+ more :: Parser ([String], [(Doc RdrName, Doc RdrName)])
+ more = moreListItems <|> moreContent <|> pure ([], [])
+
+ moreListItems :: Parser ([String], [(Doc RdrName, Doc RdrName)])
+ moreListItems = (,) [] <$> (skipSpace *> definitionList d)
+
+ moreContent :: Parser ([String], [(Doc RdrName, Doc RdrName)])
+ moreContent = mapFst . (:) <$> nonEmptyLine <*> more
+
+birdtracks :: Parser (Doc a)
+birdtracks = DocCodeBlock . DocString . intercalate "\n" <$> many1 line
where
- line = optWs *> ">" *> A8.takeWhile (/= '\n')
+ line = skipHorizontalSpace *> ">" *> takeLine
-- | Parses examples. Examples are a paragraph level entitity (separated by an empty line).
-- Consecutive examples are accepted.
-examples :: Parser (Doc RdrName)
-examples = DocExamples <$> example
-
--- | Collects consecutive examples and their results.
-example :: Parser [Example]
-example = do
- ws <- optWs
- prompt <- decodeUtf8 <$> string ">>>"
- expr <- (++ "\n") . decodeUtf8 <$> (A8.takeWhile (/= '\n') <* "\n")
- results <- many result
- let exs = concat [ e | Left e <- results ]
- res = filter (not . null) [ r | Right r <- results ]
- return $ makeExample (decodeUtf8 ws ++ prompt) expr res : exs
+examples :: Parser (Doc a)
+examples = DocExamples <$> (many (skipHorizontalSpace *> "\n") *> go)
where
- result = Left <$> example
- <|> Right . decodeUtf8 <$> takeWhile1 (/= '\n') <* "\n"
+ go :: Parser [Example]
+ go = do
+ prefix <- decodeUtf8 <$> takeHorizontalSpace <* ">>>"
+ expr <- takeLine
+ (rs, es) <- resultAndMoreExamples
+ return (makeExample prefix expr rs : es)
+ where
+ resultAndMoreExamples :: Parser ([String], [Example])
+ resultAndMoreExamples = moreExamples <|> result <|> pure ([], [])
+ where
+ moreExamples :: Parser ([String], [Example])
+ moreExamples = (,) [] <$> go
+
+ result :: Parser ([String], [Example])
+ result = mapFst . (:) <$> nonEmptyLine <*> resultAndMoreExamples
+
+ makeExample :: String -> String -> [String] -> Example
+ makeExample prefix expression res =
+ Example (strip expression) result
+ where
+ result = map (substituteBlankLine . tryStripPrefix) res
+
+ tryStripPrefix xs = fromMaybe xs (stripPrefix prefix xs)
+
+ substituteBlankLine "" = ""
+ substituteBlankLine xs = xs
+
+nonEmptyLine :: Parser String
+nonEmptyLine = mfilter (any (not . isSpace)) takeLine
--- | Propery parser.
+takeLine :: Parser String
+takeLine = decodeUtf8 <$> takeWhile (/= '\n') <* endOfLine
+
+endOfLine :: Parser ()
+endOfLine = void "\n" <|> endOfInput
+
+mapFst :: (a -> b) -> (a, c) -> (b, c)
+mapFst f (a, b) = (f a, b)
+
+-- | Property parser.
--
-- >>> parseOnly property "prop> hello world"
-- Right (DocProperty "hello world")
-property :: Parser (Doc RdrName)
-property = do
- _ <- skipSpace
- s <- decodeUtf8 <$> (string "prop>" *> takeWhile1 (/= '\n'))
- return $ makeProperty ("prop>" ++ s)
-
--- | Paragraph level codeblock. Anything between the two delimiting @
--- is parsed for markup.
+property :: Parser (Doc a)
+property = DocProperty . strip . decodeUtf8 <$> ("prop>" *> takeWhile1 (/= '\n'))
+
+-- |
+-- Paragraph level codeblock. Anything between the two delimiting @ is parsed
+-- for markup.
codeblock :: DynFlags -> Parser (Doc RdrName)
-codeblock d = do
- -- Note that we don't need to use optWs here because in cases where
- -- we don't see a \n immediatelly after the opening @, this parser
- -- fails but we still have a chance to get a codeblock by getting
- -- a monospaced doc on its own in the paragraph. With that, the cases
- -- are covered. This should be updated if the implementation ever changes.
- s <- parseString' d . ('\n':) . decodeUtf8 <$> ("@\n" *> block' <* "@")
- maybe (fail "codeblock") (return . DocCodeBlock) s
+codeblock d =
+ DocCodeBlock . parseStringBS d <$> ("@" *> skipHorizontalSpace *> "\n" *> block' <* "@")
where
- block' = A8.scan False p
+ block' = scan False p
where
p isNewline c
| isNewline && c == '@' = Nothing
| otherwise = Just $ c == '\n'
--- | Calls 'parseString'' on each line of a paragraph
-textParagraph :: DynFlags -> Parser (Doc RdrName)
-textParagraph d = do
- s <- parseString' d . concatMap ((++ "\n") . decodeUtf8) <$> line `sepBy1` "\n"
- maybe (fail "textParagraph") (return . docParagraph) s
- where
- line = takeWhile1 (/= '\n')
-
--- | See 'picture' for adding a page title.
-url :: Parser (Doc RdrName)
-url = DocHyperlink . makeHyperlink . decodeUtf8 <$> ("<" *> takeWhile1 (`notElem` ">\n") <* ">")
- <|> autoUrl
-
--- | Naive implementation of auto-linking. Will link everything after
--- @http://@, @https://@, @ftp://@, @ssh://@, @gopher://@ until a space.
--- Single trailing punctuation character (.!?,) is split off.
-autoUrl :: Parser (Doc RdrName)
-autoUrl = do
- link <- decodeUtf8 <$> urlLone
- return $ formatLink link
+hyperlink :: Parser (Doc a)
+hyperlink = DocHyperlink . makeLabeled Hyperlink . decodeUtf8 <$> ("<" *> takeWhile1 (`notElem` ">\n") <* ">")
+
+autoUrl :: Parser (Doc a)
+autoUrl = mkLink <$> url
where
- urlLone = mappend <$> choice prefixes <*> takeWhile1 (not . isSpace)
- prefixes = [ "http://", "https://", "ftp://"
- , "ssh://", "gopher://" ]
- formatLink :: String -> Doc RdrName
- formatLink s = if last s `elem` ".!?,"
- then docAppend (DocHyperlink $ Hyperlink (init s) Nothing) (DocString [last s])
- else DocHyperlink $ Hyperlink s Nothing
+ url = mappend <$> ("http://" <|> "https://" <|> "ftp://") <*> takeWhile1 (not . isSpace)
+ mkLink :: BS.ByteString -> Doc a
+ mkLink s = case BS.unsnoc s of
+ Just (xs, x) | x `elem` ",.!?" -> DocHyperlink (Hyperlink (decodeUtf8 xs) Nothing) <> DocString [x]
+ _ -> DocHyperlink (Hyperlink (decodeUtf8 s) Nothing)
-- | Parses strings between identifier delimiters. Consumes all input that it
-- deems to be valid in an identifier. Note that it simply blindly consumes
-- characters and does no actual validation itself.
parseValid :: Parser String
parseValid = do
- vs <- many' (A8.satisfy (`elem` "_.!#$%&*+/<=>?@\\?|-~:") <|> digit <|> letter_ascii)
+ vs <- many' $ satisfy (`elem` "_.!#$%&*+/<=>?@\\|-~:") <|> digit <|> letter_ascii
c <- peekChar
case c of
Just '`' -> return vs
- Just '\'' -> (do {c'' <- char '\''; y'' <- parseValid; return $ vs ++ [c''] ++ y''}) <|> return vs
+ Just '\'' -> (\x -> vs ++ "'" ++ x) <$> ("'" *> parseValid)
+ <|> return vs
_ -> fail "outofvalid"
-- | Parses identifiers with help of 'parseValid'. Asks GHC for 'RdrName' from the
@@ -374,56 +319,33 @@ identifier dflags = do
o <- idDelim
vid <- parseValid
e <- idDelim
- return $ validIdentifier $ o : (vid ++ [e])
- where idDelim = char '\'' <|> char '`'
- validIdentifier str = case parseIdent (tail $ init str) of
- Just identName -> DocIdentifier identName
- Nothing -> DocString str
- parseIdent :: String -> Maybe RdrName
- parseIdent str0 =
- let buffer = stringToStringBuffer str0
- realSrcLc = mkRealSrcLoc (mkFastString "") 0 0
- pstate = mkPState dflags buffer realSrcLc
- in case unP parseIdentifier pstate of
- POk _ name -> Just (unLoc name)
- _ -> Nothing
+ return $ validIdentifier o vid e
+ where
+ idDelim = char '\'' <|> char '`'
+ validIdentifier o ident e = case parseIdent ident of
+ Just identName -> DocIdentifier identName
+ Nothing -> DocString $ o : ident ++ [e]
+
+ parseIdent :: String -> Maybe RdrName
+ parseIdent str0 =
+ let buffer = stringToStringBuffer str0
+ realSrcLc = mkRealSrcLoc (mkFastString "") 0 0
+ pstate = mkPState dflags buffer realSrcLc
+ in case unP parseIdentifier pstate of
+ POk _ name -> Just (unLoc name)
+ _ -> Nothing
-- | Remove all leading and trailing whitespace
strip :: String -> String
strip = (\f -> f . f) $ dropWhile isSpace . reverse
--- | Consumes whitespace, excluding a newline.
-optWs :: Parser BS.ByteString
-optWs = A8.takeWhile (`elem` " \t\f\v\r")
-
--- | Create an 'Example', stripping superfluous characters as appropriate.
--- Remembers the amount of indentation used for the prompt.
-makeExample :: String -> String -> [String] -> Example
-makeExample prompt expression res =
- Example (strip expression) result' -- drop whitespace in expressions
- where (prefix, _) = span isSpace prompt
- result' = map substituteBlankLine $ filter (not . null) $ map (tryStripPrefix prefix) res
- where tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys
- substituteBlankLine "" = ""
- substituteBlankLine line = line
-
--- | Creates a 'Picture' with an optional title. Called by 'picture'.
-makePicture :: String -> Picture
-makePicture input = case break isSpace $ strip input of
- (uri, "") -> Picture uri Nothing
- (uri, label) -> Picture uri (Just $ dropWhile isSpace label)
-
--- | Creates a 'Hyperlink' with an optional title. Called by 'example'.
-makeHyperlink :: String -> Hyperlink
-makeHyperlink input = case break isSpace $ strip input of
- (u, "") -> Hyperlink u Nothing
- (u, label) -> Hyperlink u (Just $ dropWhile isSpace label)
-
--- | Makes a property that can be used by other programs for assertions.
--- Drops whitespace around the property. Called by 'property'
-makeProperty :: String -> Doc RdrName
-makeProperty s = case strip s of
- 'p':'r':'o':'p':'>':xs ->
- DocProperty (dropWhile isSpace xs)
- xs ->
- error $ "makeProperty: invalid input " ++ show xs
+skipHorizontalSpace :: Parser ()
+skipHorizontalSpace = skipWhile (`elem` " \t\f\v\r")
+
+takeHorizontalSpace :: Parser BS.ByteString
+takeHorizontalSpace = takeWhile (`elem` " \t\f\v\r")
+
+makeLabeled :: (String -> Maybe String -> a) -> String -> a
+makeLabeled f input = case break isSpace $ strip input of
+ (uri, "") -> f uri Nothing
+ (uri, label) -> f uri (Just $ dropWhile isSpace label)
diff --git a/test/Haddock/ParserSpec.hs b/test/Haddock/ParserSpec.hs
index b0a6e41b..42f19c96 100644
--- a/test/Haddock/ParserSpec.hs
+++ b/test/Haddock/ParserSpec.hs
@@ -5,16 +5,16 @@
module Haddock.ParserSpec (main, spec) where
-import Control.Applicative
import Data.Monoid
import Data.String
-import Haddock.Doc (combineStringNodes)
import qualified Haddock.Parser as Parse
import Haddock.Types
import Outputable (Outputable, showSDoc, ppr)
-import RdrName (RdrName)
+import RdrName (RdrName, mkVarUnqual)
+import FastString (fsLit)
+import StaticFlags (initStaticOpts)
import Test.Hspec
-import Test.QuickCheck (property)
+import Test.QuickCheck
import Helper
@@ -24,6 +24,8 @@ instance Outputable a => Show a where
deriving instance Show a => Show (Doc a)
deriving instance Eq a => Eq (Doc a)
+instance IsString RdrName where
+ fromString = mkVarUnqual . fsLit
instance IsString (Doc RdrName) where
fromString = DocString
@@ -31,70 +33,78 @@ instance IsString (Doc RdrName) where
instance IsString a => IsString (Maybe a) where
fromString = Just . fromString
-parseParas :: String -> Maybe (Doc RdrName)
+parseParas :: String -> Doc RdrName
parseParas = Parse.parseParas dynFlags
-parseString :: String -> Maybe (Doc RdrName)
+parseString :: String -> Doc RdrName
parseString = Parse.parseString dynFlags
main :: IO ()
main = hspec spec
spec :: Spec
-spec = do
- let filterSpecial = filter (`notElem` (".(=#-[*`\v\f\n\t\r\\\"'_/@<> " :: String))
-
+spec = before initStaticOpts $ do
describe "parseString" $ do
let infix 1 `shouldParseTo`
shouldParseTo :: String -> Doc RdrName -> Expectation
- shouldParseTo input ast = parseString input `shouldBe` Just ast
+ shouldParseTo input ast = parseString input `shouldBe` ast
it "is total" $ do
property $ \xs ->
(length . show . parseString) xs `shouldSatisfy` (> 0)
+ context "when parsing text" $ do
+ it "can handle unicode" $ do
+ "灼眼のシャナ" `shouldParseTo` "灼眼のシャナ"
+
+ it "accepts numeric character references" $ do
+ "foo bar baz λ" `shouldParseTo` "foo bar baz λ"
+
+ it "accepts hexadecimal character references" $ do
+ "e" `shouldParseTo` "e"
+
+ it "allows to backslash-escape characters" $ do
+ property $ \x -> ['\\', x] `shouldParseTo` DocString [x]
+
+ context "when parsing identifiers" $ do
+ it "parses identifiers enclosed within single ticks" $ do
+ "'foo'" `shouldParseTo` DocIdentifier "foo"
+
+ it "parses identifiers enclosed within backticks" $ do
+ "`foo`" `shouldParseTo` DocIdentifier "foo"
+
+ it "parses a word with one of the delimiters in it as ordinary string" $ do
+ "don't use apostrophe's in the wrong place's" `shouldParseTo` "don't use apostrophe's in the wrong place's"
+
context "when parsing URLs" $ do
+ let hyperlink :: String -> Maybe String -> Doc RdrName
+ hyperlink url = DocHyperlink . Hyperlink url
+
it "parses a URL" $ do
- "" `shouldParseTo`
- hyperlink "http://example.com/" Nothing
+ "" `shouldParseTo` hyperlink "http://example.com/" Nothing
it "accepts an optional label" $ do
- "" `shouldParseTo`
- hyperlink "http://example.com/" "some link"
-
- it "finishes URL parsing as soon as it sees >, even if it's escaped" $ do
- "le.com" `shouldParseTo`
- hyperlink "http://examp\\" Nothing <> "le.com"
+ "" `shouldParseTo` hyperlink "http://example.com/" "some link"
- "mp\\>le.com>" `shouldParseTo`
- hyperlink "http://exa\\" Nothing <> "mp>le.com>"
+ it "does not accept newlines in label" $ do
+ "" `shouldParseTo` ""
- -- Likewise in label
- "oo>" `shouldParseTo`
- hyperlink "http://example.com" "f\\" <> "oo>"
+ it "does not allow to escap >" $ do
+ "le.com" `shouldParseTo` hyperlink "http://examp\\" Nothing <> "le.com"
it "parses inline URLs" $ do
- "Not yet working, see \n , isEmptyChan" `shouldParseTo`
- "Not yet working, see "
- <> hyperlink "http://trac.haskell.org/haddock/ticket/223" Nothing
- <> "\n , isEmptyChan"
+ "foo bar" `shouldParseTo`
+ "foo " <> hyperlink "http://example.com/" Nothing <> " bar"
context "when autolinking URLs" $ do
it "autolinks HTTP URLs" $ do
- "http://example.com/" `shouldParseTo`
- hyperlink "http://example.com/" Nothing
+ "http://example.com/" `shouldParseTo` hyperlink "http://example.com/" Nothing
it "autolinks HTTPS URLs" $ do
- "https://www.example.com/" `shouldParseTo`
- hyperlink "https://www.example.com/" Nothing
+ "https://www.example.com/" `shouldParseTo` hyperlink "https://www.example.com/" Nothing
it "autolinks FTP URLs" $ do
- "ftp://example.com/" `shouldParseTo`
- hyperlink "ftp://example.com/" Nothing
-
- it "does not include a trailing exclamation mark" $ do
- "http://example.com/! Some other sentence." `shouldParseTo`
- hyperlink "http://example.com/" Nothing <> "! Some other sentence."
+ "ftp://example.com/" `shouldParseTo` hyperlink "ftp://example.com/" Nothing
it "does not include a trailing comma" $ do
"http://example.com/, Some other sentence." `shouldParseTo`
@@ -104,10 +114,46 @@ spec = do
"http://example.com/. Some other sentence." `shouldParseTo`
hyperlink "http://example.com/" Nothing <> ". Some other sentence."
+ it "does not include a trailing exclamation mark" $ do
+ "http://example.com/! Some other sentence." `shouldParseTo`
+ hyperlink "http://example.com/" Nothing <> "! Some other sentence."
+
it "does not include a trailing question mark" $ do
"http://example.com/? Some other sentence." `shouldParseTo`
hyperlink "http://example.com/" Nothing <> "? Some other sentence."
+ context "when parsing pictures" $ do
+ let picture :: String -> Maybe String -> Doc RdrName
+ picture uri = DocPic . Picture uri
+
+ it "parses a simple picture" $ do
+ "<>" `shouldParseTo` picture "foo" Nothing
+
+ it "accepts an optional title" $ do
+ "<>" `shouldParseTo` picture "foo" (Just "bar baz")
+
+ it "does not accept newlines in title" $ do
+ "<>" `shouldParseTo` "<>"
+
+ it "parses a picture with unicode" $ do
+ "<<灼眼 のシャナ>>" `shouldParseTo` picture "灼眼" (Just "のシャナ")
+
+ it "doesn't allow for escaping of the closing tags" $ do -- bug?
+ "<>z>>" `shouldParseTo` picture "ba\\" Nothing <> "z>>"
+
+ context "when parsing anchors" $ do
+ it "parses a single word anchor" $ do
+ "#foo#" `shouldParseTo` DocAName "foo"
+
+ it "parses a multi word anchor" $ do
+ "#foo bar#" `shouldParseTo` DocAName "foo bar"
+
+ it "parses a unicode anchor" $ do
+ "#灼眼のシャナ#" `shouldParseTo` DocAName "灼眼のシャナ"
+
+ it "does not accept newlines in anchors" $ do
+ "#foo\nbar#" `shouldParseTo` "#foo\nbar#"
+
context "when parsing emphasised text" $ do
it "emphasises a word on its own" $ do
"/foo/" `shouldParseTo` DocEmphasis "foo"
@@ -130,363 +176,231 @@ spec = do
it "recognizes other markup constructs within emphasised text" $ do
"/foo @bar@ baz/" `shouldParseTo` DocEmphasis ("foo " <> DocMonospaced "bar" <> " baz")
- describe "parseParas" $ do
- let infix 1 `shouldParseTo`
- shouldParseTo :: String -> Doc RdrName -> Expectation
- shouldParseTo input ast = (combineStringNodes <$> parseParas input)
- `shouldBe` Just ast
+ context "when parsing monospaced text" $ do
+ it "parses simple monospaced text" $ do
+ "@foo@" `shouldParseTo` DocMonospaced "foo"
- it "is total" $ do
- property $ \xs ->
- (length . show . parseParas) xs `shouldSatisfy` (> 0)
+ it "parses inline monospaced text" $ do
+ "foo @bar@ baz" `shouldParseTo` "foo " <> DocMonospaced "bar" <> " baz"
- it "parses a paragraph" $ do
- "foobar" `shouldParseTo` DocParagraph "foobar\n"
+ it "allows to escape @" $ do
+ "@foo \\@ bar@" `shouldParseTo` DocMonospaced "foo @ bar"
- it "empty input produces DocEmpty" $ do
- "" `shouldParseTo` DocEmpty
-
- it "should preserve all regular characters" $ do
- property $ \xs ->
- let input = filterSpecial xs
- in case input of
- [] -> input `shouldParseTo` DocEmpty
- _ -> input `shouldParseTo` DocParagraph (DocString $ input ++ "\n")
+ it "accepts unicode" $ do
+ "@foo 灼眼のシャナ bar@" `shouldParseTo` DocMonospaced "foo 灼眼のシャナ bar"
- context "when parsing a simple string" $ do
- it "] should be made into a DocString" $ do
- "hell]o" `shouldParseTo` DocParagraph "hell]o\n"
+ it "accepts other markup in monospaced text" $ do
+ "@/foo/@" `shouldParseTo` DocMonospaced (DocEmphasis "foo")
- it "can handle unicode" $ do
- "灼眼のシャナ" `shouldParseTo` DocParagraph "灼眼のシャナ\n"
+ it "requires the closing @" $ do
+ "@foo /bar/ baz" `shouldParseTo` "@foo " <> DocEmphasis "bar" <> " baz"
- context "when parsing module strings" $ do
- it "should parse a module on its own" $ do
- "\"Module\"" `shouldParseTo`
- (DocParagraph $ DocModule "Module" <> "\n")
+ context "when parsing module names" $ do
+ it "accepts a simple module name" $ do
+ "\"Foo\"" `shouldParseTo` DocModule "Foo"
- it "should parse a module inline" $ do
- "This is a \"Module\"." `shouldParseTo`
- DocParagraph ("This is a " <> (DocModule "Module" <> ".\n"))
+ it "accepts a module name with dots" $ do
+ "\"Foo.Bar.Baz\"" `shouldParseTo` DocModule "Foo.Bar.Baz"
- context "when parsing codeblocks" $ do
- it "codeblock a word on its own" $ do
- "@quux@" `shouldParseTo` DocCodeBlock "quux"
+ it "accepts a module name with unicode" $ do
+ "\"Foo.Barλ\"" `shouldParseTo` DocModule "Foo.Barλ"
- it "codeblocks unicode" $ do
- "@灼眼のシャナ@" `shouldParseTo` DocCodeBlock "灼眼のシャナ"
+ it "parses a module inline" $ do
+ "This is a \"Module\"." `shouldParseTo` ("This is a " <> (DocModule "Module" <> "."))
+ it "rejects empty module name" $ do
+ "\"\"" `shouldParseTo` "\"\""
- it "does @multi-line\\n codeblocks@" $ do
- "@multi-line\n codeblocks@" `shouldParseTo`
- DocCodeBlock "multi-line\n codeblocks"
+ it "rejects a module name with a trailing dot" $ do
+ "\"Foo.\"" `shouldParseTo` "\"Foo.\""
- it "accepts other elements in a codeblock" $ do
- "@/emphasis/ \"Module\" <