diff options
author | Duncan Coutts <duncan.coutts@worc.ox.ac.uk> | 2006-01-21 17:15:27 +0000 |
---|---|---|
committer | Duncan Coutts <duncan.coutts@worc.ox.ac.uk> | 2006-01-21 17:15:27 +0000 |
commit | 43bb89fa9667162f3f4a0e024a3f926696c173b9 (patch) | |
tree | 92d67daf703a0b5acb50c7dd502b0ee163b52f2e /src/HsLexer.lhs | |
parent | f52324bb86a403f41ad9fc2050bc350fd7635714 (diff) |
Teach haddock about line pragmas and add accurate source code links
Teach haddock about C and Haskell style line pragmas. Extend the lexer/parser's
source location tracking to include the file name as well as line/column. This
way each AST item that is tagged with a SrcLoc gets the original file name too.
Use this original file name to add source links to each exported item, in the
same visual style as the wiki links. Note that the per-export source links are
to the defining module rather than whichever module haddock pretends it is
exported from. This is what we want for source code links. The source code link
URL can also contain the name of the export so one could implement jumping to
the actual location of the function in the file if it were linked to an html
version of the source rather than just plain text. The name can be selected
with the %N wild card.
So for linking to the raw source code one might use:
--source=http://darcs/haskell.org/foo/%F
Or for linking to html syntax highlighted code:
--source=http://darcs/haskell.org/foo/%M.html#%N
Diffstat (limited to 'src/HsLexer.lhs')
-rw-r--r-- | src/HsLexer.lhs | 209 |
1 files changed, 120 insertions, 89 deletions
diff --git a/src/HsLexer.lhs b/src/HsLexer.lhs index d9c4635f..47ee75f5 100644 --- a/src/HsLexer.lhs +++ b/src/HsLexer.lhs @@ -218,45 +218,49 @@ tAB_LENGTH = 8 lexer :: (Token -> P a) -> P a -lexer cont input (SrcLoc _ x0) y0 col = +lexer cont input (SrcLoc _ x0 _) y0 col f = if col == 0 - then tab y0 x0 True input - else tab y0 col False input -- throw away old x + then tab y0 x0 f True input + else tab y0 col f False input -- throw away old x where -- move past whitespace and comments - tab y x _ [] = - cont EOF [] (SrcLoc y x) col y - tab y x bol ('\t':s) = - tab y (nextTab x) bol s - tab y _ _ ('\n':s) = - newLine cont s y - - tab y _ True ('#':s) + tab y x f _ [] = + cont EOF [] (SrcLoc y x f) y col f + tab y x f bol ('\t':s) = + tab y (nextTab x) f bol s + tab y _ f _ ('\n':s) = + newLine cont s y f + + tab y _ f True ('#':s) | "pragma GCC set_debug_pwd" `isPrefixOf` s - = newLine cont (tail $ dropWhile (/= '\n') s) y + = newLine cont (tail $ dropWhile (/= '\n') s) y f + + tab y x f True ('#':' ':s@(d:_)) + | isDigit d = parseLinePragma tab y f s -- single-line comments - tab y x bol s@('-':'-':' ':c:_) | doc c = - is_a_token bol s y x - tab y _ _ ('-':'-':s) | null s || not (isSymbol (head (dropWhile (== '-') s))) = - newLine cont (drop 1 (dropWhile (/= '\n') s)) y + tab y x f bol s@('-':'-':' ':c:_) | doc c = + is_a_token bol s y x f + tab y _ f _ ('-':'-':s) | null s || not (isSymbol (head (dropWhile (== '-') s))) = + newLine cont (drop 1 (dropWhile (/= '\n') s)) y f - -- multi-line nested comments - tab y x bol s@('{':'-':c:_) | doc c && c /= '#' = - is_a_token bol s y x - tab y x bol s@('{':'-':' ':c:_) | doc c = - is_a_token bol s y x - tab y x bol ('{':'-':s) = nestedComment tab y x bol s + -- multi-line nested comments and pragmas + tab y x f bol ('{':'-':'#':s) = pragma tab y (x+3) f bol s + tab y x f bol s@('{':'-':c:_) | doc c = + is_a_token bol s y x f + tab y x f bol s@('{':'-':' ':c:_) | doc c = + is_a_token bol s y x f + tab y x f bol ('{':'-':s) = nestedComment (\y x -> tab y x f) y (x+2) bol s - tab y x bol (c:s) - | isWhite c = tab y (x+1) bol s - | otherwise = is_a_token bol (c:s) y x + tab y x f bol (c:s) + | isWhite c = tab y (x+1) f bol s + | otherwise = is_a_token bol (c:s) y x f - is_a_token bol s y x - | bol = lexBOL cont s (SrcLoc y x) y x - | otherwise = lexToken cont s (SrcLoc y x) y x + is_a_token bol s y x f + | bol = lexBOL cont s (SrcLoc y x f) y x f + | otherwise = lexToken cont s (SrcLoc y x f) y x f - newLine _ s y = tab (y+1) 1 True s + newLine _ s y f = tab (y+1) 1 f True s doc '|' = True doc '/' = True @@ -273,19 +277,19 @@ nextTab x = x + (tAB_LENGTH - (x-1) `mod` tAB_LENGTH) -- insert virtual semicolons or close braces due to layout. lexBOL :: (Token -> P a) -> P a -lexBOL cont s loc y x context = +lexBOL cont s loc y x f context = if need_close_curly then -- trace "layout: inserting '}'\n" $ -- Set col to 0, indicating that we're still at the -- beginning of the line, in case we need a semi-colon too. -- Also pop the context here, so that we don't insert -- another close brace before the parser can pop it. - cont VRightCurly s loc y 0 (tail context) + cont VRightCurly s loc y 0 f (tail context) else if need_semi_colon then --trace "layout: inserting ';'\n" $ - cont SemiColon s loc y x context + cont SemiColon s loc y x f context else - lexToken cont s loc y x context + lexToken cont s loc y x f context where need_close_curly = case context of @@ -303,7 +307,7 @@ lexBOL cont s loc y x context = lexToken :: (Token -> P a) -> P a --lexToken _ [] loc _ _ = -- error $ "Internal error: empty input in lexToken at " ++ show loc -lexToken cont s0 loc y x = +lexToken cont s0 loc y x f = -- trace ("lexer: y="++show y++" x="++show x++"\n") $ case s0 of [] -> error $ "Internal error: empty input in lexToken at " @@ -330,20 +334,20 @@ lexToken cont s0 loc y x = [] -> error "Internal error: empty context in lexToken" '?':s:ss - | isIdentInitial s -> lexToken ( \ (VarId x) -> cont (IPVarId x)) (s:ss) loc y x - '\'':s -> lexChar cont s loc y (x+1) - '\"':s{-"-} -> lexString cont s loc y (x+1) + | isIdentInitial s -> lexToken ( \ (VarId x) -> cont (IPVarId x)) (s:ss) loc y x f + '\'':s -> lexChar cont s loc y (x+1) f + '\"':s{-"-} -> lexString cont s loc y (x+1) f '0':'x':c:s | isHexDigit c -> let (num, rest) = span isHexDigit (c:s) [(i,_)] = readHex num in - afterNum cont i rest loc y (x+length num) + afterNum cont i rest loc y (x+length num) f '0':'o':c:s | isOctDigit c -> let (num, rest) = span isOctDigit (c:s) [(i,_)] = readOct num in - afterNum cont i rest loc y (x+length num) + afterNum cont i rest loc y (x+length num) f c:s | isIdentInitial c -> let @@ -355,7 +359,7 @@ lexToken cont s0 loc y x = Just keyword -> forward l_id keyword rest Nothing -> forward l_id (VarId id0) rest - | isUpper c -> lexCon "" cont (c:s) loc y x + | isUpper c -> lexCon "" cont (c:s) loc y x f | isSymbol c -> let (symtail, rest) = span isSymbol s @@ -368,42 +372,42 @@ lexToken cont s0 loc y x = ':' -> forward l_sym (ConSym sym) rest _ -> forward l_sym (VarSym sym) rest - | isDigit c -> lexNum cont c s loc y x + | isDigit c -> lexNum cont c s loc y x f | otherwise -> parseError ("illegal character \'" ++ show c ++ "\'\n") - s loc y x + s loc y x f - where forward n t str = cont t str loc y (x+n) + where forward n t str = cont t str loc y (x+n) f -- this is all terribly ugly, sorry :( - do_doc ('|':s) nested = multi nested DocCommentNext cont s loc y x - do_doc ('/':s) nested = multi nested DocCommentNext cont s loc y x - do_doc ('^':s) nested = multi nested DocCommentPrev cont s loc y x - do_doc ('$':s) nested = multi nested DocCommentNamed cont s loc y x - do_doc ('#':s) nested = multi nested DocOptions cont s loc y x + do_doc ('|':s) nested = multi nested DocCommentNext cont s loc y x f + do_doc ('/':s) nested = multi nested DocCommentNext cont s loc y x f + do_doc ('^':s) nested = multi nested DocCommentPrev cont s loc y x f + do_doc ('$':s) nested = multi nested DocCommentNamed cont s loc y x f + do_doc ('#':s) nested = multi nested DocOptions cont s loc y x f do_doc ('*':s) nested = section 1 s where section n ('*':s1) = section (n+1) s1 section n s1 - | nested = nestedDocComment (DocSection n) cont s1 loc y x - | otherwise = oneLineDocComment (DocSection n) cont s1 loc y x + | nested = nestedDocComment (DocSection n) cont s1 loc y x f + | otherwise = oneLineDocComment (DocSection n) cont s1 loc y x f do_doc _ _ = error "Internal error: HsLexer.do_doc" -multi :: Num a => Bool -> ([Char] -> b) - -> (b -> [Char] -> c -> a -> Int -> d) - -> [Char] -> c -> a -> Int -> d +multi :: Bool -> ([Char] -> b) + -> (b -> [Char] -> c -> Int -> Int -> d) + -> [Char] -> c -> Int -> Int -> d multi True = nestedDocComment multi False = multiLineDocComment -afterNum :: Num a => (Token -> [Char] -> b -> c -> a -> d) - -> Integer -> [Char] -> b -> c -> a -> d -afterNum cont i ('#':s) loc y x = cont (PrimInt i) s loc y (x+1) -afterNum cont i s loc y x = cont (IntTok i) s loc y x +afterNum :: Num a => (Token -> [Char] -> b -> c -> a -> d -> e) + -> Integer -> [Char] -> b -> c -> a -> d -> e +afterNum cont i ('#':s) loc y x f = cont (PrimInt i) s loc y (x+1) f +afterNum cont i s loc y x f = cont (IntTok i) s loc y x f -lexNum :: (Token -> [Char] -> a -> b -> Int -> c) - -> Char -> [Char] -> a -> b -> Int -> c -lexNum cont c0 s0 loc y x = +lexNum :: (Token -> [Char] -> a -> b -> Int -> c -> d) + -> Char -> [Char] -> a -> b -> Int -> c -> d +lexNum cont c0 s0 loc y x fname = let (num, after_num) = span isDigit (c0:s0) in case after_num of @@ -433,11 +437,11 @@ lexNum cont c0 s0 loc y x = x' = x + length f in case after_exp of -- glasgow exts only - '#':'#':s -> cont (PrimDouble f) s loc y x' - '#':s -> cont (PrimFloat f) s loc y x' - s -> cont (FloatTok f) s loc y x' + '#':'#':s -> cont (PrimDouble f) s loc y x' fname + '#':s -> cont (PrimFloat f) s loc y x' fname + s -> cont (FloatTok f) s loc y x' fname - _ -> afterNum cont (parseInteger 10 num) after_num loc y (x + length num) + _ -> afterNum cont (parseInteger 10 num) after_num loc y (x + length num) fname -- GHC extension: allow trailing '#'s in an identifier. @@ -455,11 +459,11 @@ slurphashes [] i = (reverse i, []) slurphashes ('#':cs) i = slurphashes cs ('#':i) slurphashes s i = (reverse i, s) -lexCon :: [Char] -> (Token -> String -> a -> b -> Int -> c) - -> String -> a -> b -> Int -> c -lexCon qual cont s0 loc y x = +lexCon :: [Char] -> (Token -> String -> a -> b -> Int -> c -> d) + -> String -> a -> b -> Int -> c -> d +lexCon qual cont s0 loc y x f = let - forward n t s = cont t s loc y (x+n) + forward n t s = cont t s loc y (x+n) f (con, rest) = slurpIdent s0 l_con = length con @@ -485,7 +489,7 @@ lexCon qual cont s0 loc y x = _ -> forward (l_con+1+l_id) (QVarId (qual', id0)) rest1 | isUpper c1 -> -- qualified conid? - lexCon qual' cont (c1:s1) loc y (x+l_con+1) + lexCon qual' cont (c1:s1) loc y (x+l_con+1) f | isSymbol c1 -> -- qualified symbol? let @@ -505,28 +509,28 @@ lexCon qual cont s0 loc y x = lexChar :: (Token -> P a) -> P a -lexChar cont s0 loc0 y x = case s0 of - '\\':s1 -> (escapeChar s1 `thenP` \(e,s,i) _ _ _ _ -> - charEnd e s loc0 y (x+i)) s1 loc0 y x - c:s -> charEnd c s loc0 y (x+1) +lexChar cont s0 loc0 y x f = case s0 of + '\\':s1 -> (escapeChar s1 `thenP` \(e,s,i) _ _ _ _ _ -> + charEnd e s loc0 y (x+i) f) s1 loc0 y x f + c:s -> charEnd c s loc0 y (x+1) f [] -> error "Internal error: lexChar" - where charEnd c ('\'':'#':s) = \loc y0 x0 -> cont (PrimChar c) s loc y0 (x0+2) - charEnd c ('\'':s) = \loc y0 x0 -> cont (Character c) s loc y0 (x0+1) + where charEnd c ('\'':'#':s) = \loc y0 x0 f0 -> cont (PrimChar c) s loc y0 (x0+2) f0 + charEnd c ('\'':s) = \loc y0 x0 f0 -> cont (Character c) s loc y0 (x0+1) f0 charEnd _ s = parseError "Improperly terminated character constant" s lexString :: (Token -> P a) -> P a -lexString cont s0 loc y0 x0 = loop "" s0 x0 y0 +lexString cont s0 loc y0 x0 f0 = loop "" s0 x0 y0 f0 where - loop e s1 x y = case s1 of - '\\':'&':s -> loop e s (x+2) y - '\\':c:s | isSpace c -> stringGap e s (x+2) y + loop e s1 x y f = case s1 of + '\\':'&':s -> loop e s (x+2) y f + '\\':c:s | isSpace c -> stringGap e s (x+2) y f | otherwise -> (escapeChar (c:s) `thenP` \(e',s2,i) _ _ _ _ -> - loop (e':e) s2 (x+i) y) s loc y x - '\"':'#':s -> cont (PrimString (reverse e)) s loc y (x+2) - '\"':s{-"-} -> cont (StringTok (reverse e)) s loc y (x+1) - c:s -> loop (c:e) s (x+1) y - [] -> parseError "Improperly terminated string" s1 loc y x + loop (e':e) s2 (x+i) y) s loc y x f + '\"':'#':s -> cont (PrimString (reverse e)) s loc y (x+2) f + '\"':s{-"-} -> cont (StringTok (reverse e)) s loc y (x+1) f + c:s -> loop (c:e) s (x+1) y f + [] -> parseError "Improperly terminated string" s1 loc y x f stringGap e s1 x y = case s1 of '\n':s -> stringGap e s 1 (y+1) @@ -617,8 +621,35 @@ cntrl :: String -> P (Char,String,Int) cntrl (c:s) | c >= '@' && c <= '_' = returnP (chr (ord c - ord '@'), s,2) cntrl _ = parseError "Illegal control character" -nestedComment :: Num a => (a -> Int -> Bool -> [Char] -> b) - -> a -> Int -> Bool -> [Char] -> b + +pragma :: (Int -> Int -> FilePath -> Bool -> [Char] -> b) + -> Int -> Int -> FilePath -> Bool -> [Char] -> b +pragma cont y x f bol s0 = + case span (==' ') s0 of + (_, 'L':'I':'N':'E':' ':s) -> parseLinePragma cont y f s + (_, 'l':'i':'n':'e':' ':s) -> parseLinePragma cont y f s + (sp,s) -> nestedComment (\y x -> cont y x f) y (x+length sp) bol s + +parseLinePragma :: (Int -> Int -> FilePath -> Bool -> [Char] -> b) + -> Int -> FilePath -> [Char] -> b +parseLinePragma cont y fname s0 = + cont y' 1 fname' True (drop 1 (dropWhile (/= '\n') s0)) + + where s1 = dropWhite s0 + (lineStr, s2) = span isDigit s1 + y' = case reads lineStr of + ((y',_):_) -> y' + _ -> y + s3 = dropWhite s2 + fnameStr = takeWhile (\c -> c /= '"') (tail s3) + fname' | null s3 || head s3 /= '"' = fname + -- try and get more sharing of file name strings + | fnameStr == fname = fname + | otherwise = fnameStr + dropWhite = dropWhile (\c -> c == ' ' || c == '\t') + +nestedComment :: (Int -> Int -> Bool -> [Char] -> b) + -> Int -> Int -> Bool -> [Char] -> b nestedComment cont y x bol s0 = case s0 of '-':'}':s -> cont y (x+2) bol s @@ -628,9 +659,9 @@ nestedComment cont y x bol s0 = _:s -> nestedComment cont y (x+1) bol s [] -> error "Internal error: nestedComment" -nestedDocComment :: Num a => ([Char] -> b) - -> (b -> [Char] -> c -> a -> Int -> d) - -> [Char] -> c -> a -> Int -> d +nestedDocComment :: ([Char] -> b) + -> (b -> [Char] -> c -> Int -> Int -> d) + -> [Char] -> c -> Int -> Int -> d nestedDocComment f0 cont0 s0 loc y0 x0 = go f0 cont0 "" y0 x0 s0 where go f cont acc y1 x1 s1 = |