aboutsummaryrefslogtreecommitdiff
path: root/src/HaddockLex.hs
diff options
context:
space:
mode:
authorsimonmar <unknown>2002-04-04 16:23:43 +0000
committersimonmar <unknown>2002-04-04 16:23:43 +0000
commit2b39cd941c80d2603f2480684c45dd31f9256831 (patch)
tree87a4fdb2752c8a99e54e50e45c1bfa8c2bf80577 /src/HaddockLex.hs
[haddock @ 2002-04-04 16:23:43 by simonmar]
This is Haddock, my stab at a Haskell documentation tool. It's not quite ready for release yet, but I'm putting it in the repository so others can take a look. It uses a locally modified version of the hssource parser, extended with support for GHC extensions and documentation annotations.
Diffstat (limited to 'src/HaddockLex.hs')
-rw-r--r--src/HaddockLex.hs67
1 files changed, 67 insertions, 0 deletions
diff --git a/src/HaddockLex.hs b/src/HaddockLex.hs
new file mode 100644
index 00000000..9b224455
--- /dev/null
+++ b/src/HaddockLex.hs
@@ -0,0 +1,67 @@
+--
+-- Haddock - A Haskell Documentation Tool
+--
+-- (c) Simon Marlow 2002
+--
+
+module HaddockLex (
+ Token(..),
+ tokenise
+ ) where
+
+import IOExts --tmp
+import Char
+
+special = "\'\"/[]"
+
+data Token
+ = TokPara
+ | TokNumber
+ | TokBullet
+ | TokSpecial Char
+ | TokString String
+ deriving Show
+
+-- simple finite-state machine for tokenising the doc string
+
+tokenise :: String -> [Token]
+tokenise "" = []
+tokenise str = case str of
+ c:cs | c `elem` special -> TokSpecial c : tokenise cs
+ '\n':cs -> tokenise_newline cs
+ _other -> tokenise_string "" str
+
+tokenise_newline cs =
+ case dropWhile nonNewlineSpace cs of
+ '\n':cs -> TokPara : tokenise_para cs -- paragraph break
+ _other -> tokenise_string "\n" cs
+
+tokenise_para cs =
+ case dropWhile nonNewlineSpace cs of
+ -- bullet: '*'
+ '*':cs -> TokBullet : tokenise cs
+ -- bullet: '-'
+ '-':cs -> TokBullet : tokenise cs
+ -- enumerated item: '1.'
+ str | (ds,'.':cs) <- span isDigit str, not (null ds)
+ -> TokNumber : tokenise cs
+ -- enumerated item: '(1)'
+ '(':cs | (ds,')':cs') <- span isDigit cs, not (null ds)
+ -> TokNumber : tokenise cs'
+ other -> tokenise cs
+
+nonNewlineSpace c = isSpace c && c /= '\n'
+
+tokenise_string str cs =
+ case cs of
+ [] -> [TokString (reverse str)]
+ '\\':c:cs -> tokenise_string (c:str) cs
+ '\n':cs -> tokenise_string_newline str cs
+ c:cs | c `elem` special -> TokString (reverse str) : tokenise (c:cs)
+ | otherwise -> tokenise_string (c:str) cs
+
+tokenise_string_newline str cs =
+ case dropWhile nonNewlineSpace cs of
+ '\n':cs -> TokString (reverse str) : TokPara : tokenise_para cs
+ _other -> tokenise_string ('\n':str) cs -- don't throw away whitespace
+