From 11dd7c000cffec2f9ff4bbda06b8b7f263d25b75 Mon Sep 17 00:00:00 2001 From: Yuchen Pei Date: Wed, 25 Aug 2021 19:08:03 +1000 Subject: first commit --- gentree.py | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ gentree.py~ | 3 ++ 2 files changed, 117 insertions(+) create mode 100644 gentree.py create mode 100644 gentree.py~ diff --git a/gentree.py b/gentree.py new file mode 100644 index 0000000..995fa1a --- /dev/null +++ b/gentree.py @@ -0,0 +1,114 @@ +from enum import Enum +import sys +import pprint + +class LogType(Enum): + QUASSEL = 1 + ERC = 2 + + +class Message: + def __init__(self): + self.sender = "" + self.message = "" + self.reply_to_msgs = [] + self.previous_msg = -1 + +def to_dict(message): + return {'sender': message.sender, + 'message': message.message, + 'reply_to_msgs': message.reply_to_msgs, + 'previous_msg': message.previous_msg} + + +def get_log_type(log): + """determine the type of irc log. + args: + log: a string of irc log. + returns: + a LogType. + """ + if log[0] == '[': + return LogType.QUASSEL + elif log[0] == '<': + return LogType.ERC + else: + raise ValueError('Unknown irc log type.') + + +def parse_log_quassel(log): + """parse a quassel log. + A quassel message is in the form of + [hh:mm:ss] blahblah. + or + [hh:mm:ss] -*- sender-nick does something. + args: + log: a string of quassel log. + return: + a list of Messages + """ + last_messages = dict() + index = 0 + messages = [] + for line in log.splitlines(): + message = Message() + is_me = False + [_, nick, payload] = line.split(' ', maxsplit=2) + if nick == '-*-': + [_, _, nick, payload] = line.split(' ', maxsplit=3) + is_me = True + else: + nick = nick[1:-1] # removes <> + # Remove special symbol for op + if nick[0] in ['@', '%']: + nick = nick[:-1] + message.sender = nick + last_message = last_messages.setdefault(nick, index) + if last_message != index: + message.previous_msg = last_message + for participant, msg_id in last_messages.items(): + if payload.find(participant) != -1: + message.reply_to_msgs.append(msg_id) + message.message = '/me ' + payload if is_me else payload + messages.append(message) + index += 1 + return messages + +def parse_log_erc(log): + """parse an erc log. + An erc message is in the form of + blahblah + blahblah blah [hh:mm] + And the timestamp is optional + args: + log: a string of quassel log. + return: + a list of Messages + """ + return [] + +def parse_log(log): + """parse log. + args: + log: a string of irc log. + returns: + a list of Messages + """ + log_type = get_log_type(log) + if log_type == LogType.QUASSEL: + return parse_log_quassel(log) + elif log_type == LogType.ERC: + return parse_log_erc(log) + else: + raise ValueError('Unknown irc log type.') + + +def main(): + log = open(sys.argv[1]).read() + meeting = parse_log(log) + # graph = gen_graph(meeting) + pp = pprint.PrettyPrinter(indent=4) + pp.pprint([to_dict(msg) for msg in meeting]) + +if __name__ == '__main__': + main() diff --git a/gentree.py~ b/gentree.py~ new file mode 100644 index 0000000..5bb4c52 --- /dev/null +++ b/gentree.py~ @@ -0,0 +1,3 @@ +def main(): + # get the whole string. + meeting = parse_input( -- cgit v1.2.3