1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
from enum import Enum
import sys
import pprint
class LogType(Enum):
QUASSEL = 1
ERC = 2
class Message:
def __init__(self):
self.sender = ""
self.message = ""
self.reply_to_msgs = []
self.previous_msg = -1
def to_dict(message):
return {'sender': message.sender,
'message': message.message,
'reply_to_msgs': message.reply_to_msgs,
'previous_msg': message.previous_msg}
def get_log_type(log):
"""determine the type of irc log.
args:
log: a string of irc log.
returns:
a LogType.
"""
if log[0] == '[':
return LogType.QUASSEL
elif log[0] == '<':
return LogType.ERC
else:
raise ValueError('Unknown irc log type.')
def parse_log_quassel(log):
"""parse a quassel log.
A quassel message is in the form of
[hh:mm:ss] <sender-nick> blahblah.
or
[hh:mm:ss] -*- sender-nick does something.
args:
log: a string of quassel log.
return:
a list of Messages
"""
last_messages = dict()
index = 0
messages = []
for line in log.splitlines():
message = Message()
is_me = False
[_, nick, payload] = line.split(' ', maxsplit=2)
if nick == '-*-':
[_, _, nick, payload] = line.split(' ', maxsplit=3)
is_me = True
else:
nick = nick[1:-1] # removes <>
# Remove special symbol for op
if nick[0] in ['@', '%']:
nick = nick[:-1]
message.sender = nick
last_message = last_messages.setdefault(nick, index)
if last_message != index:
message.previous_msg = last_message
for participant, msg_id in last_messages.items():
if payload.find(participant) != -1:
message.reply_to_msgs.append(msg_id)
message.message = '/me ' + payload if is_me else payload
messages.append(message)
index += 1
return messages
def parse_log_erc(log):
"""parse an erc log.
An erc message is in the form of
<nick> blahblah
blahblah blah [hh:mm]
And the timestamp is optional
args:
log: a string of quassel log.
return:
a list of Messages
"""
return []
def parse_log(log):
"""parse log.
args:
log: a string of irc log.
returns:
a list of Messages
"""
log_type = get_log_type(log)
if log_type == LogType.QUASSEL:
return parse_log_quassel(log)
elif log_type == LogType.ERC:
return parse_log_erc(log)
else:
raise ValueError('Unknown irc log type.')
def main():
log = open(sys.argv[1]).read()
meeting = parse_log(log)
# graph = gen_graph(meeting)
pp = pprint.PrettyPrinter(indent=4)
pp.pprint([to_dict(msg) for msg in meeting])
if __name__ == '__main__':
main()
|