From 295a27b7558cf4fae144e3cf01e488e54ca961f2 Mon Sep 17 00:00:00 2001
From: David Waern <david.waern@gmail.com>
Date: Sat, 28 Nov 2009 22:16:16 +0000
Subject: Move H.Interface.Parse/Lex to H.Parse/Lex

These are not just used to build Interfaces.
---
 src/Haddock/Interface/Lex.x         | 171 -----------------------------------
 src/Haddock/Interface/LexParseRn.hs |   8 +-
 src/Haddock/Interface/Parse.y       | 106 ----------------------
 src/Haddock/Lex.x                   | 172 ++++++++++++++++++++++++++++++++++++
 src/Haddock/Parse.y                 | 103 +++++++++++++++++++++
 src/Main.hs                         |   6 +-
 6 files changed, 282 insertions(+), 284 deletions(-)
 delete mode 100644 src/Haddock/Interface/Lex.x
 delete mode 100644 src/Haddock/Interface/Parse.y
 create mode 100644 src/Haddock/Lex.x
 create mode 100644 src/Haddock/Parse.y

(limited to 'src')
diff --git a/src/Haddock/Interface/Lex.x b/src/Haddock/Interface/Lex.x
deleted file mode 100644
index 5f93084a..00000000
--- a/src/Haddock/Interface/Lex.x
+++ /dev/null
@@ -1,171 +0,0 @@
---
--- Haddock - A Haskell Documentation Tool
---
--- (c) Simon Marlow 2002
---
--- This file was modified and integrated into GHC by David Waern 2006
---
-
-{
-{-# OPTIONS -Wwarn -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
-module Haddock.Interface.Lex (
-	Token(..),
-	tokenise
- ) where
-
-import Lexer hiding (Token)
-import Parser ( parseIdentifier )
-import StringBuffer
-import RdrName
-import SrcLoc
-import DynFlags
-
-import Data.Char
-import Numeric
-import System.IO.Unsafe
-}
-
-$ws    = $white # \n
-$digit = [0-9]
-$hexdigit = [0-9a-fA-F]
-$special =  [\"\@]
-$alphanum = [A-Za-z0-9]
-$ident    = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~]
-
-:-
-
--- beginning of a paragraph
-<0,para> {
- $ws* \n		;
- $ws* \>		{ begin birdtrack }
- $ws* [\*\-]		{ token TokBullet `andBegin` string }
- $ws* \[		{ token TokDefStart `andBegin` def }
- $ws* \( $digit+ \) 	{ token TokNumber `andBegin` string }
- $ws*			{ begin string }		
-}
-
--- beginning of a line
-<line> {
-  $ws* \>		{ begin birdtrack }
-  $ws* \n		{ token TokPara `andBegin` para }
-  -- Here, we really want to be able to say
-  -- $ws* (\n | <eof>) 	{ token TokPara `andBegin` para}
-  -- because otherwise a trailing line of whitespace will result in 
-  -- a spurious TokString at the end of a docstring.  We don't have <eof>,
-  -- though (NOW I realise what it was for :-).  To get around this, we always
-  -- append \n to the end of a docstring.
-  () 			{ begin string }
-}
-
-<birdtrack> .*	\n?	{ strtokenNL TokBirdTrack `andBegin` line }
-
-<string,def> {
-  $special			{ strtoken $ \s -> TokSpecial (head s) }
-  \<\<.*\>\>                    { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) }
-  \<.*\>			{ strtoken $ \s -> TokURL (init (tail s)) }
-  \#.*\#			{ strtoken $ \s -> TokAName (init (tail s)) }
-  \/ [^\/]* \/                  { strtoken $ \s -> TokEmphasis (init (tail s)) }
-  [\'\`] $ident+ [\'\`]		{ ident }
-  \\ .				{ strtoken (TokString . tail) }
-  "&#" $digit+ \;		{ strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] }
-  "&#" [xX] $hexdigit+ \;	{ strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] }
-  -- allow special characters through if they don't fit one of the previous
-  -- patterns.
-  [\/\'\`\<\#\&\\]			{ strtoken TokString }
-  [^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line }
-  [^ $special \/ \< \# \n \'\` \& \\ \]]+    { strtoken TokString }
-}
-
-<def> {
-  \]				{ token TokDefEnd `andBegin` string }
-}
-
--- ']' doesn't have any special meaning outside of the [...] at the beginning
--- of a definition paragraph.
-<string> {
-  \]				{ strtoken TokString }
-}
-
-{
-data Token
-  = TokPara
-  | TokNumber
-  | TokBullet
-  | TokDefStart
-  | TokDefEnd
-  | TokSpecial Char
-  | TokIdent [RdrName]
-  | TokString String
-  | TokURL String
-  | TokPic String
-  | TokEmphasis String
-  | TokAName String
-  | TokBirdTrack String
---  deriving Show
-
--- -----------------------------------------------------------------------------
--- Alex support stuff
-
-type StartCode = Int
-type Action = String -> StartCode -> (StartCode -> [Token]) -> [Token]
-
-type AlexInput = (Char,String)
-
-alexGetChar (_, [])   = Nothing
-alexGetChar (_, c:cs) = Just (c, (c,cs))
-
-alexInputPrevChar (c,_) = c
-
-tokenise :: String -> [Token]
-tokenise str = let toks = go ('\n', eofHack str) para in {-trace (show toks)-} toks
-  where go inp@(_,str) sc =
-	  case alexScan inp sc of
-		AlexEOF -> []
-		AlexError _ -> error "lexical error"
-		AlexSkip  inp' _       -> go inp' sc
-		AlexToken inp' len act -> act (take len str) sc (\sc -> go inp' sc)
-
--- NB. we add a final \n to the string, (see comment in the beginning of line
--- production above).
-eofHack str = str++"\n"
-
-andBegin  :: Action -> StartCode -> Action
-andBegin act new_sc = \str _ cont -> act str new_sc cont
-
-token :: Token -> Action
-token t = \_ sc cont -> t : cont sc
-
-strtoken, strtokenNL :: (String -> Token) -> Action
-strtoken t = \str sc cont -> t str : cont sc
-strtokenNL t = \str sc cont -> t (filter (/= '\r') str) : cont sc
--- ^ We only want LF line endings in our internal doc string format, so we
--- filter out all CRs.
-
-begin :: StartCode -> Action
-begin sc = \_ _ cont -> cont sc
-
--- -----------------------------------------------------------------------------
--- Lex a string as a Haskell identifier
-
-ident :: Action
-ident str sc cont = 
-  case strToHsQNames id of
-	Just names -> TokIdent names : cont sc
-	Nothing -> TokString str : cont sc
- where id = init (tail str)
-
-strToHsQNames :: String -> Maybe [RdrName]
-strToHsQNames str0 = 
-  let buffer = unsafePerformIO (stringToStringBuffer str0)
-      pstate = mkPState buffer noSrcLoc defaultDynFlags
-      result = unP parseIdentifier pstate 
-  in case result of 
-       POk _ name -> Just [unLoc name] 
-       _ -> Nothing
-}
diff --git a/src/Haddock/Interface/LexParseRn.hs b/src/Haddock/Interface/LexParseRn.hs
index fc44cedf..89440139 100644
--- a/src/Haddock/Interface/LexParseRn.hs
+++ b/src/Haddock/Interface/LexParseRn.hs
@@ -19,8 +19,8 @@ module Haddock.Interface.LexParseRn (
   ) where
 
 import Haddock.Types
-import Haddock.Interface.Lex
-import Haddock.Interface.Parse
+import Haddock.Lex
+import Haddock.Parse
 import Haddock.Interface.Rn
 import Haddock.Interface.ParseModuleHeader
 import Haddock.Doc
@@ -46,8 +46,8 @@ lexParseRnHaddockComment hty gre (HsDocString fs) = do
    let str = unpackFS fs
    let toks = tokenise str
    let parse = case hty of
-         NormalHaddockComment -> parseHaddockParagraphs
-         DocSectionComment -> parseHaddockString
+         NormalHaddockComment -> parseParas
+         DocSectionComment -> parseString
    case parse toks of
      Nothing -> do
        tell ["doc comment parse failed: "++str]
diff --git a/src/Haddock/Interface/Parse.y b/src/Haddock/Interface/Parse.y
deleted file mode 100644
index a5175ddc..00000000
--- a/src/Haddock/Interface/Parse.y
+++ /dev/null
@@ -1,106 +0,0 @@
-{
-{-# OPTIONS -Wwarn -w #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and fix
--- any warnings in the module. See
---     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
--- for details
-
-module Haddock.Interface.Parse (
-  parseHaddockParagraphs, 
-  parseHaddockString 
-) where
-
-import Haddock.Interface.Lex
-import Haddock.Types (Doc(..))
-import Haddock.Doc
-import HsSyn
-import RdrName
-}
-
-%expect 0
-
-%tokentype { Token }
-
-%token	'/'	{ TokSpecial '/' }
-	'@'	{ TokSpecial '@' }
-	'['     { TokDefStart }
-	']'     { TokDefEnd }
-	DQUO 	{ TokSpecial '\"' }
-	URL	{ TokURL $$ }
-	PIC     { TokPic $$ }
-	ANAME	{ TokAName $$ }
-	'/../'  { TokEmphasis $$ }
-	'-'	{ TokBullet }
-	'(n)'	{ TokNumber }
-	'>..'	{ TokBirdTrack $$ }
-	IDENT   { TokIdent $$ }
-	PARA    { TokPara }
-	STRING	{ TokString $$ }
-
-%monad { Maybe }
-
-%name parseHaddockParagraphs  doc
-%name parseHaddockString seq
-
-%%
-
-doc	:: { Doc RdrName }
-	: apara PARA doc	{ docAppend $1 $3 }
-	| PARA doc 		{ $2 }
-	| apara			{ $1 }
-	| {- empty -}		{ DocEmpty }
-
-apara	:: { Doc RdrName }
-	: ulpara		{ DocUnorderedList [$1] }
-	| olpara		{ DocOrderedList [$1] }
-        | defpara               { DocDefList [$1] }
-	| para			{ $1 }
-
-ulpara  :: { Doc RdrName }
-	: '-' para		{ $2 }
-
-olpara  :: { Doc RdrName } 
-	: '(n)' para		{ $2 }
-
-defpara :: { (Doc RdrName, Doc RdrName) }
-	: '[' seq ']' seq	{ ($2, $4) }
-
-para    :: { Doc RdrName }
-	: seq			{ docParagraph $1 }
-	| codepara		{ DocCodeBlock $1 }
-
-codepara :: { Doc RdrName }
-	: '>..' codepara	{ docAppend (DocString $1) $2 }
-	| '>..'			{ DocString $1 }
-
-seq	:: { Doc RdrName }
-	: elem seq		{ docAppend $1 $2 }
-	| elem			{ $1 }
-
-elem	:: { Doc RdrName }
-	: elem1			{ $1 }
-	| '@' seq1 '@'		{ DocMonospaced $2 }
-
-seq1	:: { Doc RdrName }
-	: PARA seq1             { docAppend (DocString "\n") $2 }
-	| elem1 seq1            { docAppend $1 $2 }
-	| elem1			{ $1 }
-
-elem1	:: { Doc RdrName }
-	: STRING		{ DocString $1 }
-	| '/../'                { DocEmphasis (DocString $1) }
-	| URL			{ DocURL $1 }
-	| PIC                   { DocPic $1 }
-	| ANAME			{ DocAName $1 }
-	| IDENT			{ DocIdentifier $1 }
-	| DQUO strings DQUO	{ DocModule $2 }
-
-strings  :: { String }
-	: STRING		{ $1 }
-	| STRING strings	{ $1 ++ $2 }
-
-{
-happyError :: [Token] -> Maybe a
-happyError toks = Nothing
-}
diff --git a/src/Haddock/Lex.x b/src/Haddock/Lex.x
new file mode 100644
index 00000000..04b9f0c3
--- /dev/null
+++ b/src/Haddock/Lex.x
@@ -0,0 +1,172 @@
+--
+-- Haddock - A Haskell Documentation Tool
+--
+-- (c) Simon Marlow 2002
+--
+-- This file was modified and integrated into GHC by David Waern 2006.
+-- Then moved back into Haddock by Isaac Dupree in 2009 :-)
+--
+
+{
+{-# OPTIONS -Wwarn -w #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+
+module Haddock.Lex (
+	Token(..),
+	tokenise
+ ) where
+
+import Lexer hiding (Token)
+import Parser ( parseIdentifier )
+import StringBuffer
+import RdrName
+import SrcLoc
+import DynFlags
+
+import Data.Char
+import Numeric
+import System.IO.Unsafe
+}
+
+$ws    = $white # \n
+$digit = [0-9]
+$hexdigit = [0-9a-fA-F]
+$special =  [\"\@]
+$alphanum = [A-Za-z0-9]
+$ident    = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~]
+
+:-
+
+-- beginning of a paragraph
+<0,para> {
+ $ws* \n		;
+ $ws* \>		{ begin birdtrack }
+ $ws* [\*\-]		{ token TokBullet `andBegin` string }
+ $ws* \[		{ token TokDefStart `andBegin` def }
+ $ws* \( $digit+ \) 	{ token TokNumber `andBegin` string }
+ $ws*			{ begin string }		
+}
+
+-- beginning of a line
+<line> {
+  $ws* \>		{ begin birdtrack }
+  $ws* \n		{ token TokPara `andBegin` para }
+  -- Here, we really want to be able to say
+  -- $ws* (\n | <eof>) 	{ token TokPara `andBegin` para}
+  -- because otherwise a trailing line of whitespace will result in 
+  -- a spurious TokString at the end of a docstring.  We don't have <eof>,
+  -- though (NOW I realise what it was for :-).  To get around this, we always
+  -- append \n to the end of a docstring.
+  () 			{ begin string }
+}
+
+<birdtrack> .*	\n?	{ strtokenNL TokBirdTrack `andBegin` line }
+
+<string,def> {
+  $special			{ strtoken $ \s -> TokSpecial (head s) }
+  \<\<.*\>\>                    { strtoken $ \s -> TokPic (init $ init $ tail $ tail s) }
+  \<.*\>			{ strtoken $ \s -> TokURL (init (tail s)) }
+  \#.*\#			{ strtoken $ \s -> TokAName (init (tail s)) }
+  \/ [^\/]* \/                  { strtoken $ \s -> TokEmphasis (init (tail s)) }
+  [\'\`] $ident+ [\'\`]		{ ident }
+  \\ .				{ strtoken (TokString . tail) }
+  "&#" $digit+ \;		{ strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] }
+  "&#" [xX] $hexdigit+ \;	{ strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] }
+  -- allow special characters through if they don't fit one of the previous
+  -- patterns.
+  [\/\'\`\<\#\&\\]			{ strtoken TokString }
+  [^ $special \/ \< \# \n \'\` \& \\ \]]* \n { strtokenNL TokString `andBegin` line }
+  [^ $special \/ \< \# \n \'\` \& \\ \]]+    { strtoken TokString }
+}
+
+<def> {
+  \]				{ token TokDefEnd `andBegin` string }
+}
+
+-- ']' doesn't have any special meaning outside of the [...] at the beginning
+-- of a definition paragraph.
+<string> {
+  \]				{ strtoken TokString }
+}
+
+{
+data Token
+  = TokPara
+  | TokNumber
+  | TokBullet
+  | TokDefStart
+  | TokDefEnd
+  | TokSpecial Char
+  | TokIdent [RdrName]
+  | TokString String
+  | TokURL String
+  | TokPic String
+  | TokEmphasis String
+  | TokAName String
+  | TokBirdTrack String
+--  deriving Show
+
+-- -----------------------------------------------------------------------------
+-- Alex support stuff
+
+type StartCode = Int
+type Action = String -> StartCode -> (StartCode -> [Token]) -> [Token]
+
+type AlexInput = (Char,String)
+
+alexGetChar (_, [])   = Nothing
+alexGetChar (_, c:cs) = Just (c, (c,cs))
+
+alexInputPrevChar (c,_) = c
+
+tokenise :: String -> [Token]
+tokenise str = let toks = go ('\n', eofHack str) para in {-trace (show toks)-} toks
+  where go inp@(_,str) sc =
+	  case alexScan inp sc of
+		AlexEOF -> []
+		AlexError _ -> error "lexical error"
+		AlexSkip  inp' _       -> go inp' sc
+		AlexToken inp' len act -> act (take len str) sc (\sc -> go inp' sc)
+
+-- NB. we add a final \n to the string, (see comment in the beginning of line
+-- production above).
+eofHack str = str++"\n"
+
+andBegin  :: Action -> StartCode -> Action
+andBegin act new_sc = \str _ cont -> act str new_sc cont
+
+token :: Token -> Action
+token t = \_ sc cont -> t : cont sc
+
+strtoken, strtokenNL :: (String -> Token) -> Action
+strtoken t = \str sc cont -> t str : cont sc
+strtokenNL t = \str sc cont -> t (filter (/= '\r') str) : cont sc
+-- ^ We only want LF line endings in our internal doc string format, so we
+-- filter out all CRs.
+
+begin :: StartCode -> Action
+begin sc = \_ _ cont -> cont sc
+
+-- -----------------------------------------------------------------------------
+-- Lex a string as a Haskell identifier
+
+ident :: Action
+ident str sc cont = 
+  case strToHsQNames id of
+	Just names -> TokIdent names : cont sc
+	Nothing -> TokString str : cont sc
+ where id = init (tail str)
+
+strToHsQNames :: String -> Maybe [RdrName]
+strToHsQNames str0 = 
+  let buffer = unsafePerformIO (stringToStringBuffer str0)
+      pstate = mkPState buffer noSrcLoc defaultDynFlags
+      result = unP parseIdentifier pstate 
+  in case result of 
+       POk _ name -> Just [unLoc name] 
+       _ -> Nothing
+}
diff --git a/src/Haddock/Parse.y b/src/Haddock/Parse.y
new file mode 100644
index 00000000..42553343
--- /dev/null
+++ b/src/Haddock/Parse.y
@@ -0,0 +1,103 @@
+{
+{-# OPTIONS -Wwarn -w #-}
+-- The above warning supression flag is a temporary kludge.
+-- While working on this module you are encouraged to remove it and fix
+-- any warnings in the module. See
+--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
+-- for details
+
+module Haddock.Parse where
+
+import Haddock.Lex
+import Haddock.Types (Doc(..))
+import Haddock.Doc
+import HsSyn
+import RdrName
+}
+
+%expect 0
+
+%tokentype { Token }
+
+%token	'/'	{ TokSpecial '/' }
+	'@'	{ TokSpecial '@' }
+	'['     { TokDefStart }
+	']'     { TokDefEnd }
+	DQUO 	{ TokSpecial '\"' }
+	URL	{ TokURL $$ }
+	PIC     { TokPic $$ }
+	ANAME	{ TokAName $$ }
+	'/../'  { TokEmphasis $$ }
+	'-'	{ TokBullet }
+	'(n)'	{ TokNumber }
+	'>..'	{ TokBirdTrack $$ }
+	IDENT   { TokIdent $$ }
+	PARA    { TokPara }
+	STRING	{ TokString $$ }
+
+%monad { Maybe }
+
+%name parseParas doc
+%name parseString seq
+
+%%
+
+doc	:: { Doc RdrName }
+	: apara PARA doc	{ docAppend $1 $3 }
+	| PARA doc 		{ $2 }
+	| apara			{ $1 }
+	| {- empty -}		{ DocEmpty }
+
+apara	:: { Doc RdrName }
+	: ulpara		{ DocUnorderedList [$1] }
+	| olpara		{ DocOrderedList [$1] }
+        | defpara               { DocDefList [$1] }
+	| para			{ $1 }
+
+ulpara  :: { Doc RdrName }
+	: '-' para		{ $2 }
+
+olpara  :: { Doc RdrName } 
+	: '(n)' para		{ $2 }
+
+defpara :: { (Doc RdrName, Doc RdrName) }
+	: '[' seq ']' seq	{ ($2, $4) }
+
+para    :: { Doc RdrName }
+	: seq			{ docParagraph $1 }
+	| codepara		{ DocCodeBlock $1 }
+
+codepara :: { Doc RdrName }
+	: '>..' codepara	{ docAppend (DocString $1) $2 }
+	| '>..'			{ DocString $1 }
+
+seq	:: { Doc RdrName }
+	: elem seq		{ docAppend $1 $2 }
+	| elem			{ $1 }
+
+elem	:: { Doc RdrName }
+	: elem1			{ $1 }
+	| '@' seq1 '@'		{ DocMonospaced $2 }
+
+seq1	:: { Doc RdrName }
+	: PARA seq1             { docAppend (DocString "\n") $2 }
+	| elem1 seq1            { docAppend $1 $2 }
+	| elem1			{ $1 }
+
+elem1	:: { Doc RdrName }
+	: STRING		{ DocString $1 }
+	| '/../'                { DocEmphasis (DocString $1) }
+	| URL			{ DocURL $1 }
+	| PIC                   { DocPic $1 }
+	| ANAME			{ DocAName $1 }
+	| IDENT			{ DocIdentifier $1 }
+	| DQUO strings DQUO	{ DocModule $2 }
+
+strings  :: { String }
+	: STRING		{ $1 }
+	| STRING strings	{ $1 ++ $2 }
+
+{
+happyError :: [Token] -> Maybe a
+happyError toks = Nothing
+}
diff --git a/src/Main.hs b/src/Main.hs
index f09dcd29..1f4867e4 100644
--- a/src/Main.hs
+++ b/src/Main.hs
@@ -22,8 +22,8 @@ module Main (main) where
 import Haddock.Backends.Html
 import Haddock.Backends.Hoogle
 import Haddock.Interface
-import Haddock.Interface.Lex
-import Haddock.Interface.Parse
+import Haddock.Lex
+import Haddock.Parse
 import Haddock.Types
 import Haddock.Version
 import Haddock.InterfaceFile
@@ -400,7 +400,7 @@ getPrologue flags =
     [] -> return Nothing
     [filename] -> do
       str <- readFile filename
-      case parseHaddockParagraphs (tokenise str) of
+      case parseParas (tokenise str) of
         Nothing -> throwE "parsing haddock prologue failed"
         Just doc -> return (Just doc)
     _otherwise -> throwE "multiple -p/--prologue options"
-- 
cgit v1.2.3