aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlec Theriault <alec.theriault@gmail.com>2018-06-05 10:47:16 -0700
committerAlexander Biehl <alexbiehl@gmail.com>2018-06-05 19:47:16 +0200
commitbea565ec5a029b8c19965aa22f34c23a112c0a7f (patch)
treea0a93b524acf85676c4e46206f73ce2a7a2aaa0f
parentee1ce11ceaf41d05973bb2c9ca7abd41a2ad078c (diff)
Improve hyperlinker's 'spanToNewline' (#846)
'spanToNewline' is used to help break apart the source into lines which can then be partioned into CPP and non-CPP chunks. It is important that 'spanToNewline' not break apart tokens, so it needs to properly handle things like * block comments, possibly nested * string literals, possibly multi-line * CPP macros, possibly multi-line String literals in particular were not being properly handled. The fix is to to fall back in 'Text.Read.lex' to help lex things that are not comments. Fixes #837.
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs32
-rw-r--r--hypsrc-test/ref/src/CPP.html216
-rw-r--r--hypsrc-test/src/CPP.hs26
3 files changed, 267 insertions, 7 deletions
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
index 277634b9..456050d1 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
@@ -3,6 +3,8 @@ module Haddock.Backends.Hyperlinker.Parser (parse) where
import Data.Either ( isRight, isLeft )
import Data.List ( foldl', isPrefixOf, isSuffixOf )
import Data.Maybe ( maybeToList )
+import Data.Char ( isSpace )
+import qualified Text.Read as R
import GHC ( DynFlags, addSourceToTokens )
import SrcLoc
@@ -109,12 +111,9 @@ isCPPline :: String -> Bool
isCPPline = isPrefixOf "#" . dropWhile (`elem` " \t") . take 5
--- | Split a "line" off the front of a string, supporting newline escapes.
---
--- By "line", we understand: the shortest substring ending in a '\n' that is not
---
--- 1. immediately preceded by a '\\'
--- 2. not inside some (possibly nested) block comment
+-- | Split a "line" off the front of a string, hopefully without cutting tokens
+-- in half. I say "hopefully" because knowing what a token is requires lexing,
+-- yet lexing depends on this function.
--
-- All characters in the input are present in the output:
--
@@ -122,17 +121,36 @@ isCPPline = isPrefixOf "#" . dropWhile (`elem` " \t") . take 5
spanToNewline :: Int -- ^ open '{-'
-> String -- ^ input
-> (String, String)
-spanToNewline _ [] = ([], [])
+
+-- Base case and space characters
+spanToNewline _ "" = ("", "")
+spanToNewline n ('\n':str) | n <= 0 = ("\n", str)
spanToNewline n ('\n':str) | n <= 0 = ("\n", str)
spanToNewline n ('\\':'\n':str) =
let (str', rest) = spanToNewline n str
in ('\\':'\n':str', rest)
+
+-- Block comments
spanToNewline n ('{':'-':str) =
let (str', rest) = spanToNewline (n+1) str
in ('{':'-':str', rest)
spanToNewline n ('-':'}':str) =
let (str', rest) = spanToNewline (n-1) str
in ('-':'}':str', rest)
+
+-- When not in a block comment, try to lex a Haskell token
+spanToNewline 0 str@(c:_) | ((lexed, str') : _) <- R.lex str, not (isSpace c) =
+ if all (== '-') lexed && length lexed >= 2
+ -- A Haskell line comment
+ then case span (/= '\n') str' of
+ (str'', '\n':rest) -> (lexed ++ str'' ++ "\n", rest)
+ (_, _) -> (str, "")
+
+ -- An actual Haskell token
+ else let (str'', rest) = spanToNewline 0 str'
+ in (lexed ++ str'', rest)
+
+-- In all other cases, advance one character at a time
spanToNewline n (c:str) =
let (str', rest) = spanToNewline n str
in (c:str', rest)
diff --git a/hypsrc-test/ref/src/CPP.html b/hypsrc-test/ref/src/CPP.html
new file mode 100644
index 00000000..fb85bd2f
--- /dev/null
+++ b/hypsrc-test/ref/src/CPP.html
@@ -0,0 +1,216 @@
+<html xmlns="http://www.w3.org/1999/xhtml"
+><head
+ ><link rel="stylesheet" type="text/css" href="style.css"
+ /><script type="text/javascript" src="highlight.js"
+ ></script
+ ></head
+ ><body
+ ><pre
+ ><span class="hs-pragma"
+ >{-# LANGUAGE CPP #-}</span
+ ><span
+ >
+</span
+ ><a name="line-2"
+ ></a
+ ><span class="hs-keyword"
+ >module</span
+ ><span
+ > </span
+ ><span class="hs-identifier"
+ >CPP</span
+ ><span
+ > </span
+ ><span class="hs-keyword"
+ >where</span
+ ><span
+ >
+</span
+ ><a name="line-3"
+ ></a
+ ><span
+ >
+</span
+ ><a name="line-4"
+ ></a
+ ><span class="hs-cpp"
+ >#define SOMETHING1
+</span
+ ><span
+ >
+</span
+ ><a name="line-6"
+ ></a
+ ><span class="hs-identifier"
+ >foo</span
+ ><span
+ > </span
+ ><span class="hs-glyph"
+ >::</span
+ ><span
+ > </span
+ ><span class="hs-identifier hs-type"
+ >String</span
+ ><span
+ >
+</span
+ ><a name="line-7"
+ ></a
+ ><a name="foo"
+ ><a href="CPP.html#foo"
+ ><span class="hs-identifier"
+ >foo</span
+ ></a
+ ></a
+ ><span
+ > </span
+ ><span class="hs-glyph"
+ >=</span
+ ><span
+ > </span
+ ><span class="hs-comment"
+ >{- &quot; single quotes are fine in block comments
+ {- nested block comments are fine -}
+ -}</span
+ ><span
+ > </span
+ ><span class="hs-string"
+ >&quot;foo&quot;</span
+ ><span
+ >
+</span
+ ><a name="line-10"
+ ></a
+ ><span
+ >
+</span
+ ><a name="line-11"
+ ></a
+ ><span class="hs-cpp"
+ >#define SOMETHING2
+</span
+ ><span
+ >
+</span
+ ><a name="line-13"
+ ></a
+ ><span class="hs-identifier"
+ >bar</span
+ ><span
+ > </span
+ ><span class="hs-glyph"
+ >::</span
+ ><span
+ > </span
+ ><span class="hs-identifier hs-type"
+ >String</span
+ ><span
+ >
+</span
+ ><a name="line-14"
+ ></a
+ ><a name="bar"
+ ><a href="CPP.html#bar"
+ ><span class="hs-identifier"
+ >bar</span
+ ></a
+ ></a
+ ><span
+ > </span
+ ><span class="hs-glyph"
+ >=</span
+ ><span
+ > </span
+ ><span class="hs-string"
+ >&quot;block comment in a string is not a comment {- &quot;</span
+ ><span
+ >
+</span
+ ><a name="line-15"
+ ></a
+ ><span
+ >
+</span
+ ><a name="line-16"
+ ></a
+ ><span class="hs-cpp"
+ >#define SOMETHING3
+</span
+ ><span
+ >
+</span
+ ><a name="line-18"
+ ></a
+ ><span class="hs-comment"
+ >-- &quot; single quotes are fine in line comments</span
+ ><span
+ >
+</span
+ ><a name="line-19"
+ ></a
+ ><span class="hs-comment"
+ >-- {- unclosed block comments are fine in line comments</span
+ ><span
+ >
+</span
+ ><a name="line-20"
+ ></a
+ ><span
+ >
+</span
+ ><a name="line-21"
+ ></a
+ ><span class="hs-comment"
+ >-- Multiline CPP is also fine</span
+ ><span
+ >
+</span
+ ><a name="line-22"
+ ></a
+ ><span class="hs-cpp"
+ >#define FOO\
+ 1
+</span
+ ><span
+ >
+</span
+ ><a name="line-25"
+ ></a
+ ><span class="hs-identifier"
+ >baz</span
+ ><span
+ > </span
+ ><span class="hs-glyph"
+ >::</span
+ ><span
+ > </span
+ ><span class="hs-identifier hs-type"
+ >String</span
+ ><span
+ >
+</span
+ ><a name="line-26"
+ ></a
+ ><a name="baz"
+ ><a href="CPP.html#baz"
+ ><span class="hs-identifier"
+ >baz</span
+ ></a
+ ></a
+ ><span
+ > </span
+ ><span class="hs-glyph"
+ >=</span
+ ><span
+ > </span
+ ><span class="hs-string"
+ >&quot;line comment in a string is not a comment --&quot;</span
+ ><span
+ >
+</span
+ ><a name="line-27"
+ ></a
+ ></pre
+ ></body
+ ></html
+> \ No newline at end of file
diff --git a/hypsrc-test/src/CPP.hs b/hypsrc-test/src/CPP.hs
new file mode 100644
index 00000000..f00ce031
--- /dev/null
+++ b/hypsrc-test/src/CPP.hs
@@ -0,0 +1,26 @@
+{-# LANGUAGE CPP #-}
+module CPP where
+
+#define SOMETHING1
+
+foo :: String
+foo = {- " single quotes are fine in block comments
+ {- nested block comments are fine -}
+ -} "foo"
+
+#define SOMETHING2
+
+bar :: String
+bar = "block comment in a string is not a comment {- "
+
+#define SOMETHING3
+
+-- " single quotes are fine in line comments
+-- {- unclosed block comments are fine in line comments
+
+-- Multiline CPP is also fine
+#define FOO\
+ 1
+
+baz :: String
+baz = "line comment in a string is not a comment --"