1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
|
--
-- Haddock - A Haskell Documentation Tool
--
-- (c) Simon Marlow 2002
--
{
module HaddockLex (
Token(..),
tokenise
) where
import Char
import Numeric
import HsSyn
import HsLexer hiding (Token)
import HsParseMonad
--import Debug.Trace
}
$ws = $white # \n
$digit = [0-9]
$hexdigit = [0-9a-fA-F]
$special = [\"\@\/]
$alphanum = [A-Za-z0-9]
$ident = [$alphanum \'\_\.\!\#\$\%\&\*\+\/\<\=\>\?\@\\\\\^\|\-\~]
:-
-- beginning of a paragraph
<0,para> {
$ws* \n ;
$ws* \> { begin birdtrack }
$ws* [\*\-] { token TokBullet `andBegin` string }
$ws* \[ { token TokDefStart `andBegin` def }
$ws* \( $digit+ \) { token TokNumber `andBegin` string }
$ws* { begin string }
}
-- beginning of a line
<line> {
$ws* \> { begin birdtrack }
$ws* \n { token TokPara `andBegin` para }
-- Here, we really want to be able to say
-- $ws* (\n | <eof>) { token TokPara `andBegin` para}
-- because otherwise a trailing line of whitespace will result in
-- a spurious TokString at the end of a docstring. We don't have <eof>,
-- though (NOW I realise what it was for :-). To get around this, we always
-- append \n to the end of a docstring.
() { begin string }
}
<birdtrack> .* \n? { strtoken TokBirdTrack `andBegin` line }
<string,def> {
$special { strtoken $ \s -> TokSpecial (head s) }
\<.*\> { strtoken $ \s -> TokURL (init (tail s)) }
\#.*\# { strtoken $ \s -> TokAName (init (tail s)) }
[\'\`] $ident+ [\'\`] { ident }
\\ . { strtoken (TokString . tail) }
"&#" $digit+ \; { strtoken $ \s -> TokString [chr (read (init (drop 2 s)))] }
"&#x" $hexdigit+ \; { strtoken $ \s -> case readHex (init (drop 3 s)) of [(n,_)] -> TokString [chr n] }
-- allow special characters through if they don't fit one of the previous
-- patterns.
[\'\`\<\#\&\\] { strtoken TokString }
[^ $special \< \# \n \'\` \& \\ \]]* \n { strtoken TokString `andBegin` line }
[^ $special \< \# \n \'\` \& \\ \]]+ { strtoken TokString }
}
<def> {
\] { token TokDefEnd `andBegin` string }
}
-- ']' doesn't have any special meaning outside of the [...] at the beginning
-- of a definition paragraph.
<string> {
\] { strtoken TokString }
}
{
data Token
= TokPara
| TokNumber
| TokBullet
| TokDefStart
| TokDefEnd
| TokSpecial Char
| TokIdent [HsQName]
| TokString String
| TokURL String
| TokAName String
| TokBirdTrack String
deriving Show
-- -----------------------------------------------------------------------------
-- Alex support stuff
type StartCode = Int
type Action = String -> StartCode -> (StartCode -> [Token]) -> [Token]
type AlexInput = (Char,String)
alexGetChar (_, []) = Nothing
alexGetChar (_, c:cs) = Just (c, (c,cs))
alexInputPrevChar (c,_) = c
tokenise :: String -> [Token]
tokenise str = let toks = go ('\n', eofHack str) para in {-trace (show toks)-} toks
where go inp@(_,str) sc =
case alexScan inp sc of
AlexEOF -> []
AlexError _ -> error "lexical error"
AlexSkip inp' len -> go inp' sc
AlexToken inp' len act -> act (take len str) sc (\sc -> go inp' sc)
-- NB. we add a final \n to the string, (see comment in the beginning of line
-- production above).
eofHack str = str++"\n"
andBegin :: Action -> StartCode -> Action
andBegin act new_sc = \str sc cont -> act str new_sc cont
token :: Token -> Action
token t = \str sc cont -> t : cont sc
strtoken :: (String -> Token) -> Action
strtoken t = \str sc cont -> t str : cont sc
begin :: StartCode -> Action
begin sc = \str _ cont -> cont sc
-- -----------------------------------------------------------------------------
-- Lex a string as a Haskell identifier
ident :: Action
ident str sc cont =
case strToHsQNames id of
Just names -> TokIdent names : cont sc
Nothing -> TokString str : cont sc
where id = init (tail str)
strToHsQNames :: String -> Maybe [HsQName]
strToHsQNames str0
= case lexer (\t -> returnP t) str0 (SrcLoc 1 1) 1 1 [] of
Ok _ (VarId str)
-> Just [ UnQual (HsVarName (HsIdent str)) ]
Ok _ (QVarId (mod0,str))
-> Just [ Qual (Module mod0) (HsVarName (HsIdent str)) ]
Ok _ (ConId str)
-> Just [ UnQual (HsTyClsName (HsIdent str)),
UnQual (HsVarName (HsIdent str)) ]
Ok _ (QConId (mod0,str))
-> Just [ Qual (Module mod0) (HsTyClsName (HsIdent str)),
Qual (Module mod0) (HsVarName (HsIdent str)) ]
Ok _ (VarSym str)
-> Just [ UnQual (HsVarName (HsSymbol str)) ]
Ok _ (ConSym str)
-> Just [ UnQual (HsTyClsName (HsSymbol str)),
UnQual (HsVarName (HsSymbol str)) ]
Ok _ (QVarSym (mod0,str))
-> Just [ Qual (Module mod0) (HsVarName (HsSymbol str)) ]
Ok _ (QConSym (mod0,str))
-> Just [ Qual (Module mod0) (HsTyClsName (HsSymbol str)),
Qual (Module mod0) (HsVarName (HsSymbol str)) ]
_other
-> Nothing
}
|