from enum import Enum import sys import pprint class LogType(Enum): QUASSEL = 1 ERC = 2 class Message: def __init__(self): self.sender = "" self.message = "" self.reply_to_msgs = [] self.previous_msg = -1 def to_dict(self): return {'sender': self.sender, 'message': self.message, 'reply_to_msgs': self.reply_to_msgs, 'previous_msg': self.previous_msg} def get_log_type(log): """determine the type of irc log. args: log: a string of irc log. returns: a LogType. """ if log[0] == '[': return LogType.QUASSEL elif log[0] == '<': return LogType.ERC else: raise ValueError('Unknown irc log type.') def parse_log_quassel(log): """parse a quassel log. A quassel message is in the form of [hh:mm:ss] blahblah. or [hh:mm:ss] -*- sender-nick does something. [06:05:28] *** Mode #fsf +o zoe by ChanServ args: log: a string of quassel log. return: a list of Messages """ last_messages = dict() index = 0 messages = [] for line in log.splitlines(): message = Message() is_me = False [_, nick, payload] = line.split(' ', maxsplit=2) if nick == '***': continue if nick == '-*-': [_, _, nick, payload] = line.split(' ', maxsplit=3) is_me = True else: nick = nick[1:-1] # removes <> # Remove special symbol for op if nick[0] in ['@', '%']: nick = nick[1:] message.sender = nick if nick in last_messages: message.previous_msg = last_messages[nick] last_messages[nick] = index for participant, msg_id in last_messages.items(): if payload.find(participant) != -1: message.reply_to_msgs.append(msg_id) message.message = '/me ' + payload if is_me else payload messages.append(message) index += 1 return messages def parse_log_erc(log): """parse an erc log. An erc message is in the form of blahblah blahblah blah [hh:mm] And the timestamp is optional args: log: a string of quassel log. return: a list of Messages """ return [] def parse_log(log): """parse log. args: log: a string of irc log. returns: a list of Messages """ log_type = get_log_type(log) if log_type == LogType.QUASSEL: return parse_log_quassel(log) elif log_type == LogType.ERC: return parse_log_erc(log) else: raise ValueError('Unknown irc log type.') def main(): log = open(sys.argv[1]).read() meeting = parse_log(log) # graph = gen_graph(meeting) pp = pprint.PrettyPrinter(indent=4) pp.pprint([msg.to_dict() for msg in meeting]) if __name__ == '__main__': main()