summaryrefslogtreecommitdiff
path: root/gentree.py
blob: 995fa1ad001c9c988309d15ee05152075450c7db (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from enum import Enum
import sys
import pprint

class LogType(Enum):
    QUASSEL = 1
    ERC = 2


class Message:
    def __init__(self):
        self.sender = ""
        self.message = ""
        self.reply_to_msgs = []
        self.previous_msg = -1

def to_dict(message):
    return {'sender': message.sender,
            'message': message.message,
            'reply_to_msgs': message.reply_to_msgs,
            'previous_msg': message.previous_msg}


def get_log_type(log):
    """determine the type of irc log.
    args:
      log: a string of irc log.
    returns:
      a LogType.
    """
    if log[0] == '[':
        return LogType.QUASSEL
    elif log[0] == '<':
        return LogType.ERC
    else:
        raise ValueError('Unknown irc log type.')
    

def parse_log_quassel(log):
    """parse a quassel log.
    A quassel message is in the form of
    [hh:mm:ss] <sender-nick> blahblah.
    or
    [hh:mm:ss] -*- sender-nick does something.
    args:
      log: a string of quassel log.
    return:
      a list of Messages
    """
    last_messages = dict()
    index = 0
    messages = []
    for line in log.splitlines():
        message = Message()
        is_me = False
        [_, nick, payload] = line.split(' ', maxsplit=2)
        if nick == '-*-':
            [_, _, nick, payload] = line.split(' ', maxsplit=3)
            is_me = True
        else:
            nick = nick[1:-1] # removes <>
        # Remove special symbol for op
        if nick[0] in ['@', '%']:
            nick = nick[:-1]
        message.sender = nick
        last_message = last_messages.setdefault(nick, index)
        if last_message != index:
            message.previous_msg = last_message
        for participant, msg_id in last_messages.items():
            if payload.find(participant) != -1:
                message.reply_to_msgs.append(msg_id)
        message.message = '/me ' + payload if is_me else payload
        messages.append(message)
        index += 1
    return messages

def parse_log_erc(log):
    """parse an erc log.
    An erc message is in the form of
    <nick> blahblah
           blahblah blah  [hh:mm]
    And the timestamp is optional
    args:
      log: a string of quassel log.
    return:
      a list of Messages
    """
    return []

def parse_log(log):
    """parse log.
    args:
      log: a string of irc log.
    returns:
      a list of Messages
    """
    log_type = get_log_type(log)
    if log_type == LogType.QUASSEL:
        return parse_log_quassel(log)
    elif log_type == LogType.ERC:
        return parse_log_erc(log)
    else:
        raise ValueError('Unknown irc log type.')


def main():
    log = open(sys.argv[1]).read()
    meeting = parse_log(log)
    # graph = gen_graph(meeting)
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint([to_dict(msg) for msg in meeting])

if __name__ == '__main__':
    main()