aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorŁukasz Hanuszczak <lukasz.hanuszczak@gmail.com>2015-06-04 23:21:17 +0200
committerŁukasz Hanuszczak <lukasz.hanuszczak@gmail.com>2015-06-30 22:37:48 +0200
commit6fb8d5abbcc92f5155fdc9596ca1c87fe87f6187 (patch)
tree895a08b6cf4129fbc6294899f2279146d0ed6879
parent413f7f322cd174e2ba4116dbf53c1b3c0d6a4f77 (diff)
Create basic token classification method.
-rw-r--r--haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs104
1 files changed, 98 insertions, 6 deletions
diff --git a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
index 4e0d7382..be6b7ce5 100644
--- a/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
+++ b/haddock-api/src/Haddock/Backends/Hyperlinker/Parser.hs
@@ -20,11 +20,18 @@ data Span = Span
}
data TokenType
- = Identifier
- | Comment
- | Whitespace
- | Operator
- | Symbol
+ = TkIdentifier
+ | TkKeyword
+ | TkString
+ | TkChar
+ | TkNumber
+ | TkOperator
+ | TkGlyph
+ | TkSpecial
+ | TkSpace
+ | TkComment
+ | TkCpp
+ | TkUnknown
parse :: String -> [Token]
parse = tokenize . tag . chunk
@@ -66,4 +73,89 @@ tag =
in (pos', (Span pos pos', c):cs)
tokenize :: [(Span, String)] -> [Token]
-tokenize = undefined
+tokenize =
+ map aux
+ where
+ aux (sp, str) = Token
+ { tkType = classify str
+ , tkValue = str
+ , tkSpan = sp
+ }
+
+classify :: String -> TokenType
+classify (c:_)
+ | isSpace c = TkSpace
+ | isDigit c = TkNumber
+ | c `elem` special = TkSpecial
+ | c == '#' = TkCpp
+ | c == '"' = TkString
+ | c == '\'' = TkChar
+classify str
+ | str `elem` keywords = TkKeyword
+ | str `elem` glyphs = TkGlyph
+ | all (`elem` symbols) str = TkOperator
+ | "--" `isPrefixOf` str = TkComment
+ | "{-" `isPrefixOf` str = TkComment
+ | isIdentifier str = TkIdentifier
+ | otherwise = TkUnknown
+
+keywords :: [String]
+keywords =
+ [ "as"
+ , "case"
+ , "class"
+ , "data"
+ , "default"
+ , "deriving"
+ , "do"
+ , "else"
+ , "hiding"
+ , "if"
+ , "import"
+ , "in"
+ , "infix"
+ , "infixl"
+ , "infixr"
+ , "instance"
+ , "let"
+ , "module"
+ , "newtype"
+ , "of"
+ , "qualified"
+ , "then"
+ , "type"
+ , "where"
+ , "forall"
+ , "mdo"
+ ]
+
+glyphs :: [String]
+glyphs =
+ [ ".."
+ , ":"
+ , "::"
+ , "="
+ , "\\"
+ , "|"
+ , "<-"
+ , "->"
+ , "@"
+ , "~"
+ , "~#"
+ , "=>"
+ , "-"
+ , "!"
+ ]
+
+special :: [Char]
+special = "()[]{},;`"
+
+-- TODO: Add support for any Unicode symbol or punctuation.
+-- source: http://stackoverflow.com/questions/10548170/what-characters-are-permitted-for-haskell-operators
+symbols :: [Char]
+symbols = "!#$%&*+./<=>?@\\^|-~:"
+
+isIdentifier :: String -> Bool
+isIdentifier (c:str)
+ | isLetter c = all (\c' -> isAlphaNum c' || c == '\'') str
+isIdentifier _ = False