#!/usr/bin/env python
# Copyright (c) 2009-2010, Michael Gorven, Stefano Rivera
# Released under terms of the MIT/X/Expat Licence. See COPYING for details.

from os.path import dirname
from sys import argv, path, exit, stderr, stdout

from chardet import detect
from dateutil.tz import tzlocal, tzutc
from sqlalchemy import create_engine, Column, Integer, String, DateTime, Text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

path.insert(0, '.')

import ibid
from ibid.config import FileConfig
from ibid.db.models import Identity
from ibid.plugins.seen import Sighting
from ibid.plugins.factoid import Factoid, FactoidName, FactoidValue
from ibid.plugins.karma import Karma
from ibid.plugins.urlgrab import URL

def decode(string):
    if string is None:
        return None

    string = string.replace('\x00', '')
    try:
        decoded = unicode(string, 'utf-8')
    except UnicodeDecodeError, e:
        guessed = detect(string)
        charset = guessed['encoding']
        if guessed['confidence'] < 0.5:
            charset = 'ISO-8859-15'
        # Common gussing mistake:
        if charset.startswith('ISO-8859') and '\x92' in string:
            charset = 'windows-1252'
        decoded = unicode(string, charset, errors='replace')
        print >> stderr, "Decoded '%s' as %s to '%s'\n" \
                % (string, charset, decoded.encode('utf-8'))

    return decoded

def to_utc(date):
    date = date.replace(tzinfo=tzlocal())
    date = date.astimezone(tzutc())
    return date.replace(tzinfo=None)

Base = declarative_base()

class KnabFactoid(Base):
    __tablename__ = 'factoid'

    fact = Column(String(250), primary_key=True)
    value = Column(Text, primary_key=True)
    verb = Column(String(20), primary_key=True)
    who = Column(String(30))
    flags = Column(Integer)
    time = Column(DateTime)

    def decode(self):
        self.decoded_fact = decode(self.fact)
        self.decoded_verb = decode(self.verb)
        self.decoded_value = decode(self.value)
        self.utc_time = to_utc(self.time)

    def __repr__(self):
        return u'<KnabFactoid %s %s %s>' % (self.fact, self.verb, self.value)

class KnabSighting(Base):
    __tablename__ = 'seen'

    nick = Column(String(30), primary_key=True)
    time = Column(DateTime)
    channel = Column(String(30))
    message = Column(Text)

    def decode(self):
        self.decoded_channel = decode(self.channel)
        self.decoded_message = decode(self.message)
        self.utc_time = to_utc(self.time)

    def __repr__(self):
        return u'<KnabSighting %s>' % self.nick

class KnabKarma(Base):
    __tablename__ = 'karma'

    person = Column(String(100), primary_key=True)
    karma = Column(Integer)
    time = Column(DateTime)

    def decode(self):
        self.decoded_person = decode(self.person)
        self.utc_time = to_utc(self.time)

    def __repr__(self):
        return u'<KnabKarma %s %s>' % (self.person, self.karma)

class KnabURL(Base):
    __tablename__ = 'urlgrab'

    nick = Column(String(30))
    channel = Column(String(30))
    url = Column(String(250))
    time = Column(DateTime, primary_key=True)

    def decode(self):
        self.decoded_channel = decode(self.channel)
        self.decoded_url = decode(self.url)
        self.utc_time = to_utc(self.time)

    def __repr__(self):
        return u'<KnabURL %s>' % self.url

def identify(session, user, source, created, create=True):
    if not user:
        return None
    user = decode(user)

    identity = session.query(Identity) \
            .filter_by(identity=user, source=source).first()
    if not identity:
        identity = Identity(source, user)
        identity.created = created
        session.save(identity)
        session.flush()
    elif identity.created > created:
        identity.created = created
        session.update(identity)

    return identity.id

def too_long(value, max_length, name):
    if value is not None and len(value) > max_length:
        print >> stderr, "%s '%s' (%i) is too long" % (name, value, len(value))
        return True
    return False

def import_factoids(knab, ibid, source):
    ibid.begin()

    print u'\nImporting factoids'
    position = 0
    total = knab.query(KnabFactoid).count()
    for index, kfactoid in enumerate(knab.query(KnabFactoid)
                                         .order_by(KnabFactoid.time.asc())
                                         .all()):
        if not kfactoid:
            continue
        kfactoid.decode()

        if index % 100 == 0:
            stdout.write('\r%s/%s' % (index, total))
            stdout.flush()

        fname = ibid.query(FactoidName) \
                    .filter_by(name=kfactoid.decoded_fact).first()
        if not fname:
            factoid = Factoid()
            try:
                fname = FactoidName(kfactoid.decoded_fact,
                        identify(ibid, kfactoid.who, source, kfactoid.utc_time))
            except TypeError:
                print index, kfactoid
                raise
            fname.time = factoid.time = kfactoid.utc_time
            factoid.names.append(fname)
        else:
            factoid = fname.factoid

        lvalue = kfactoid.decoded_value.strip().lower()
        if lvalue.startswith('<reply>') or lvalue.startswith('<action>'):
            value = kfactoid.decoded_value
        else:
            value = kfactoid.decoded_verb + u' ' + kfactoid.decoded_value

        fvalue = FactoidValue(value,
                 identify(ibid, kfactoid.who, source, kfactoid.utc_time))
        fvalue.time = kfactoid.utc_time

        factoid.values.append(fvalue)
        ibid.save_or_update(factoid)

    ibid.commit()

def import_sightings(knab, ibid, source):
    ibid.begin()

    print u'\nImporting sightings'
    position = 0
    total = knab.query(KnabSighting).count()
    for index, ksighting in enumerate(knab.query(KnabSighting)
                                          .order_by(KnabSighting.time.asc())
                                          .all()):
        if not ksighting:
            continue
        ksighting.decode()

        if index % 100 == 0:
            stdout.write('\r%s/%s' % (index, total))
            stdout.flush()

        if too_long(ksighting.decoded_channel,
                    Sighting.__table__.c.channel.type.length, 'Channel name'):
            continue

        sighting = Sighting(
                identify(ibid, ksighting.nick, source, ksighting.utc_time),
                u'message', ksighting.decoded_channel,
                ksighting.decoded_message)
        sighting.time = ksighting.utc_time

        ibid.save(sighting)
        ibid.flush()

    ibid.commit()

def import_karma(knab, ibid, source):
    ibid.begin()

    print u'\nImporting karma'
    position = 0
    total = knab.query(KnabKarma).count()
    for index, kkarma in enumerate(knab.query(KnabKarma)
                                       .order_by(KnabKarma.time.asc())
                                       .all()):
        kkarma.decode()

        if index % 100 == 0:
            stdout.write('\r%s/%s' % (index, total))
            stdout.flush()

        if too_long(kkarma.decoded_person,
                    Karma.__table__.c.subject.type.length, 'Karma subject'):
            continue

        karma = Karma(kkarma.decoded_person)
        karma.value = kkarma.karma
        karma.changes = abs(karma.value)
        karma.time = kkarma.utc_time

        ibid.save(karma)

    ibid.commit()

def import_urls(knab, ibid, source):
    ibid.begin()

    print u'\nImporting URLs'
    position = 0
    total = knab.query(KnabURL).count()
    for index, kurl in enumerate(knab.query(KnabURL)
                                     .order_by(KnabURL.time.asc())
                                     .all()):
        kurl.decode()

        if index % 100 == 0:
            stdout.write('\r%s/%s' % (index, total))
            stdout.flush()

        if too_long(kurl.decoded_channel, URL.__table__.c.channel.type.length,
                    'Channel name'):
            continue

        url = URL(kurl.decoded_url, kurl.decoded_channel,
                  identify(ibid, kurl.nick, source, kurl.utc_time))
        url.time = kurl.utc_time

        ibid.save(url)

    ibid.commit()

if __name__ == '__main__':
    if len(argv) < 3:
        print u'Usage: %s <knab database> <source> [<ibid config>]' % argv[0]
        exit(1)

    knabengine = create_engine(argv[1])
    KnabSession = sessionmaker(bind=knabengine)
    knab = KnabSession()

    source = unicode(argv[2])

    if len(argv) > 3:
        ibid.options['base'] = dirname(argv[3])
        config = FileConfig(argv[3])
    else:
        config = FileConfig('ibid.ini')
        config.merge(FileConfig('local.ini'))

    ibidengine = create_engine(config.databases['ibid'], encoding='utf-8')
    IbidSession = sessionmaker(bind=ibidengine, transactional=False)
    ibid = IbidSession()

    import_factoids(knab, ibid, source)
    import_urls(knab, ibid, source)
    import_sightings(knab, ibid, source)
    import_karma(knab, ibid, source)

    print u'\nDone'

    knab.close()
    ibid.close()

# vi: set et sta sw=4 ts=4:
