From 11fc8e122f91562de722132df76a10af017741ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Kuzn=C3=ADk?= Date: Mon, 16 Mar 2020 16:41:48 +0000 Subject: [PATCH] jb2bz.py as used by the OpenLDAP project Improvements: - ported to Python 3 - using postgresql - extended to deal with audit log - handles much more fine grained control over what data is saved in the database --- contrib/jb2bz.py | 676 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 503 insertions(+), 173 deletions(-) diff --git a/contrib/jb2bz.py b/contrib/jb2bz.py index 170e82d704..13dd8e6c43 100755 --- a/contrib/jb2bz.py +++ b/contrib/jb2bz.py @@ -1,37 +1,60 @@ -#!/usr/local/bin/python +#!/usr/bin/env python3 # -*- mode: python -*- """ jb2bz.py - a nonce script to import bugs from JitterBug to Bugzilla -Written by Tom Emerson, tree@basistech.com +Written by Tom Emerson, tree@basistech.com and extended by Ondřej Kuzník, +ondra@mistotebe.net. This script is provided in the hopes that it will be useful. No rights reserved. No guarantees expressed or implied. Use at your own risk. May be dangerous if swallowed. If it doesn't work for you, don't blame me. It did what I needed it to do. -This code requires a recent version of Andy Dustman's MySQLdb interface, +This code requires a recent version of psycopg interface: - http://sourceforge.net/projects/mysql-python + https://www.psycopg.org/ + +Porting back to MySQL or making this backend agnostic is left as an exercise +for the reader. Share and enjoy. + +All servers must use UTC or GMT as their timezone for this script to function +properly """ -import email, mimetypes, email.utils -import sys, re, glob, os, stat, time -import MySQLdb, getopt +import argparse +from datetime import datetime +import email +import email.utils +import glob +import mimetypes +import os +import os.path +import psycopg2 +import pytz +import re +import time + +from ast import literal_eval # a safe way of parsing python expressions +from psycopg2.extras import execute_values # mimetypes doesn't include everything we might encounter, yet. -if not mimetypes.types_map.has_key('.doc'): - mimetypes.types_map['.doc'] = 'application/msword' +mimetypes.types_map.setdefault('.doc', 'application/msword') +mimetypes.types_map.setdefault('.log', 'text/plain') +mimetypes.add_type("text/plain", '.dif') -if not mimetypes.encodings_map.has_key('.bz2'): - mimetypes.encodings_map['.bz2'] = "bzip2" +mimetypes.encodings_map.setdefault('.bz2', "bzip2") -bug_status='CONFIRMED' -component="default" -version="unspecified" -product="" # this is required, the rest of these are defaulted as above +IGNORED_MIMETYPES = { + 'application/ms-tnef', + 'application/pgp-signature', + 'application/pkcs7-signature', + 'application/x-pkcs7-signature', + 'message/rfc822', + 'text/x-vcard', +} """ Each bug in JitterBug is stored as a text file named by the bug number. @@ -39,8 +62,10 @@ .followup.* -.reply.* +.reply.* (with email taken from .audit) .notes +.state +.audit The dates on the files represent the respective dates they were created/added. @@ -70,26 +95,63 @@ - """ -def process_notes_file(current, fname): +def get_timestamp(t): + if isinstance(t, str): + timestamp = email.utils.parsedate_to_datetime(t) + if timestamp is None: + return timestamp + if timestamp.tzinfo is None: + timestamp = timestamp.replace(tzinfo=options.tzinfo) + elif abs(timestamp.tzinfo.utcoffset(timestamp)).total_seconds() > 12 * 3600: + timestamp = timestamp.astimezone(None) + elif isinstance(t, float): + timestamp = datetime.fromtimestamp(t, options.tzinfo) + else: + raise TypeError('Unkown type %r' % type(t)) + + return timestamp + + +def decode_text_payload(msgpart): + "Handles base64 encoded payloads which get_payload doesn't by default" + + binary_payload = msgpart.get_payload(decode=True) + charset = msgpart.get_param('charset', 'ascii') + try: - new_note = {} - notes = open(fname, "r") - s = os.fstat(notes.fileno()) + payload = binary_payload.decode(charset, 'replace') + except LookupError: + payload = binary_payload.decode('ascii', 'replace') - new_note['text'] = notes.read() - new_note['timestamp'] = time.gmtime(s[stat.ST_MTIME]) + return payload - notes.close() - current['notes'].append(new_note) +def process_notes_file(current, fname): + if os.path.isfile(fname): + with open(fname, 'r') as notes: + new_note = {} + + s = os.fstat(notes.fileno()) + timestamp = get_timestamp(s.st_mtime) + + text = notes.read() + for keyword, name in options.keywords: + if name.lower() in text.lower(): + current['keywords'].add(keyword) + + new_note['text'] = text + new_note['timestamp'] = timestamp + new_note['from'] = options.reporter - except IOError: - pass + current['notes'].append(new_note) + if timestamp > current['last_change']: + current['last_change'] = timestamp -def process_reply_file(current, fname): + +def process_reply_file(current, fname, meta=None): new_note = {} - reply = open(fname, "r") - msg = email.message_from_file(reply) + with open(fname, 'rb') as reply: + msg = email.message_from_binary_file(reply) # Add any attachments that may have been in a followup or reply msgtype = msg.get_content_maintype() @@ -98,28 +160,55 @@ def process_reply_file(current, fname): new_note = {} if part.get_filename() is None: if part.get_content_type() == "text/plain": - new_note['timestamp'] = time.gmtime(email.utils.mktime_tz(email.utils.parsedate_tz(msg['Date']))) - new_note['text'] = "%s\n%s" % (msg['From'], part.get_payload()) + timestamp = get_timestamp(msg['Date']) + new_note['timestamp'] = timestamp + if timestamp > current['last_change']: + current['last_change'] = timestamp + + new_note['text'] = decode_text_payload(part) + + user = meta and meta['from'] or msg['From'] + if user is not None: + new_note['from'] = user + else: + raise SystemExit("Error: Missing from address") current["notes"].append(new_note) else: - maybe_add_attachment(part, current) + maybe_add_attachment(part, current, msg['From'], msg['Date']) else: - new_note['text'] = "%s\n%s" % (msg['From'], msg.get_payload()) - new_note['timestamp'] = time.gmtime(email.utils.mktime_tz(email.utils.parsedate_tz(msg['Date']))) + timestamp = get_timestamp(msg['Date']) + new_note['timestamp'] = timestamp + if timestamp > current['last_change']: + current['last_change'] = timestamp + + new_note['text'] = decode_text_payload(msg) + + user = meta and meta['from'] or msg['From'] + if user is not None: + new_note['from'] = user + else: + raise SystemExit("Error: Missing from address") current["notes"].append(new_note) + def add_notes(current): """Add any notes that have been recorded for the current bug.""" - process_notes_file(current, "%d.notes" % current['number']) + process_notes_file(current, current['path']+".notes") + process_audit(current, current["path"]+".audit") - for f in glob.glob("%d.reply.*" % current['number']): - process_reply_file(current, f) + for f in glob.glob(current["path"]+".reply.*"): + reply_id = int(f.split('.')[-1]) + meta = current['replies'].get(reply_id) + current['notes'].remove(meta) + process_reply_file(current, f, meta) - for f in glob.glob("%d.followup.*" % current['number']): + for f in glob.glob(current["path"]+".followup.*"): process_reply_file(current, f) -def maybe_add_attachment(submsg, current): + +def maybe_add_attachment(submsg, current, fromaddr, date): """Adds the attachment to the current record""" + attachment_filename = submsg.get_filename() if attachment_filename is None: return @@ -127,66 +216,204 @@ def maybe_add_attachment(submsg, current): if (submsg.get_content_type() == 'application/octet-stream'): # try get a more specific content-type for this attachment mtype, encoding = mimetypes.guess_type(attachment_filename) - if mtype == None: + if not mtype: mtype = submsg.get_content_type() else: mtype = submsg.get_content_type() - if mtype == 'application/x-pkcs7-signature': - return - - if mtype == 'application/pkcs7-signature': - return - - if mtype == 'application/pgp-signature': - return - - if mtype == 'message/rfc822': + if mtype in IGNORED_MIMETYPES: return try: data = submsg.get_payload(decode=True) except: + print("Failed to decode payload for bug", current['path']) return - current['attachments'].append( ( attachment_filename, mtype, data ) ) + timestamp = get_timestamp(date) + if timestamp > current['last_change']: + current['last_change'] = timestamp + + print("Added attachment %s with type %s" % (attachment_filename, mtype)) + current['attachments'].append((attachment_filename, mtype, data, timestamp, fromaddr)) + def process_text_plain(msg, current): - current['description'] = msg.get_payload() + current['description'] = decode_text_payload(msg) + def process_multi_part(msg, current): for part in msg.walk(): if part.get_filename() is None: - process_text_plain(part, current) + if part.get_content_type() == "text/plain": + process_text_plain(part, current) else: - maybe_add_attachment(part, current) + maybe_add_attachment(part, current, msg['From'], msg['Date']) + + +def process_state(current, fname): + if os.path.isfile(fname): + try: + with open(fname, 'r') as state: + bug_state = int(state.read()) + + if bug_state == 0: + current['bug_status'] = 'VERIFIED' + + if bug_state == 1: + current['bug_status'] = 'UNCONFIRMED' + + if bug_state == 2: + current['bug_status'] = 'RESOLVED' + current['resolution'] = 'SUSPENDED' + + if bug_state == 3: + current['bug_status'] = 'RESOLVED' + current['resolution'] = 'FEEDBACK' + + if bug_state == 4: + current['bug_status'] = 'RESOLVED' + current['resolution'] = 'TEST' + + if bug_state == 5: + current['bug_status'] = 'RESOLVED' + + if bug_state == 6: + current['bug_status'] = 'IN_PROGRESS' + + if bug_state == 7: + current['bug_status'] = 'RESOLVED' + current['resolution'] = 'PARTIAL' + + except IOError: + current['bug_status'] = 'UNCONFIRMED' + + else: + current['bug_status'] = 'UNCONFIRMED' + + +def process_audit(current, fname): + if os.path.isfile(fname): + with open(fname, 'r') as f: + new_note = None + for line in f: + line = line.strip() + if not line: + continue + + date, user, note = line.split('\t') + timestamp = get_timestamp(date) + + if timestamp > current['last_change']: + current['last_change'] = timestamp + + if new_note and timestamp == new_note['timestamp'] \ + and new_note['from'] == user: + new_note['text'] += '\n' + note + continue + + if new_note: + current['notes'].append(new_note) + new_note = { + 'timestamp': timestamp, + 'from': user, + 'text': note, + } + + if note.startswith("sent reply "): + current['replies'][int(note.split()[-1])] = new_note + + if new_note: + current['notes'].append(new_note) + + +def get_real_address(addr): + addr = addr.strip() + if not addr: + raise ValueError + + if '@' not in addr: + addr += '@' + options.domain + addr = addr.lower() + + return options.mapping.get(addr, addr) + + +def get_userid(eaddr): + try: + name, addr = email.utils.parseaddr(eaddr) + except TypeError: + name, addr = email.utils.parseaddr(str(eaddr)) + addr = get_real_address(addr) + if not name: + name = addr + + with conn.cursor() as cursor: + if options.bz_version == (5, 1): + cursor.execute("select userid from profiles where email=%s", + [addr]) + for uid in cursor: + return uid + + cursor.execute("INSERT INTO profiles (login_name, email, realname) " + "VALUES %s RETURNING userid", + [(addr, addr, name)]) + return cursor.fetchone() + else: + cursor.execute("select userid from profiles where login_name=%s", + [addr]) + for uid in cursor: + return uid + + cursor.execute("INSERT INTO profiles (login_name, realname) " + "VALUES %s RETURNING userid", + [(addr, name)]) + return cursor.fetchone() + def process_jitterbug(filename): current = {} - current['number'] = int(filename) + current['path'] = filename + current['number'] = int(os.path.basename(filename)) current['notes'] = [] current['attachments'] = [] current['description'] = '' current['date-reported'] = () current['short-description'] = '' + current['bug_status'] = '' + current['resolution'] = '' + current['keywords'] = set() + current['private'] = os.path.isfile(filename+'.private') + current['replies'] = {} - print "Processing: %d" % current['number'] + cursor = conn.cursor() + + cursor.execute('select bug_id from bugs where bug_id = %s', [current['number']]) + if cursor.fetchall(): + print("Bug", current['number'], "exists") + return - mfile = open(filename, "r") - create_date = os.fstat(mfile.fileno()) - msg = email.message_from_file(mfile) + with open(filename, 'rb') as mfile: + create_date = os.fstat(mfile.fileno()) + process_state(current, filename+".state") + msg = email.message_from_binary_file(mfile) - current['date-reported'] = time.gmtime(email.utils.mktime_tz(email.utils.parsedate_tz(msg['Date']))) - if current['date-reported'] is None: - current['date-reported'] = time.gmtime(create_date[stat.ST_MTIME]) + timestamp = get_timestamp(msg['Date']) + if timestamp is None or timestamp.year < 1900: + current['date-reported'] = get_timestamp(create_date.st_mtime) - if current['date-reported'][0] < 1900: - current['date-reported'] = time.gmtime(create_date[stat.ST_MTIME]) + current['last_change'] = current['date-reported'] = timestamp - if msg.has_key('Subject') is not False: - current['short-description'] = msg['Subject'] + if 'Subject' in msg: + current['short-description'] = str(msg['Subject']) else: current['short-description'] = "Unknown" + print('Setting short description to Unknown') + + if msg['From']: + current['from'] = msg['From'] + else: + raise SystemExit("Error: Missing from address") msgtype = msg.get_content_maintype() if msgtype == 'text': @@ -195,8 +422,19 @@ def process_jitterbug(filename): process_multi_part(msg, current) else: # Huh? This should never happen. - print "Unknown content-type: %s" % msgtype - sys.exit(1) + raise SystemExit("Unknown content-type: %s" % msgtype) + + # set reported version + desc_lines = current['description'].split('\n') + version_line = desc_lines and len(desc_lines) > 1 and desc_lines[1] + if version_line and version_line.startswith('Version:'): + version = version_line[8:].strip() + cursor.execute("select value from versions where value = %s and product_id = %s", + [version, options.product_id]) + result = cursor.fetchall() + current['version'] = result[0][0] if result else options.version + else: + current['version'] = options.version add_notes(current) @@ -208,142 +446,234 @@ def process_jitterbug(filename): # # The following fields need to be provided by the user: # - # bug_status # product # version # reporter # component # resolution + # assignee - # change this to the user_id of the Bugzilla user who is blessed with the - # imported defects - reporter=6 + if current['bug_status'] == 'RESOLVED': + if current['resolution'] == '': + current['resolution'] = 'FIXED' - # the resolution will need to be set manually - resolution="" + if current['bug_status'] == 'VERIFIED': + current['resolution'] = 'FIXED' - db = MySQLdb.connect(db='bugs',user='root',host='localhost',passwd='password') - cursor = db.cursor() + uid = get_userid(current['from']) try: - cursor.execute( "INSERT INTO bugs SET " \ - "bug_id=%s," \ - "priority='---'," \ - "bug_severity='normal'," \ - "op_sys='All'," \ - "bug_status=%s," \ - "creation_ts=%s," \ - "delta_ts=%s," \ - "short_desc=%s," \ - "product_id=%s," \ - "rep_platform='All'," \ - "assigned_to=%s," \ - "reporter=%s," \ - "version=%s," \ - "component_id=%s," \ - "resolution=%s", - [ current['number'], - bug_status, - time.strftime("%Y-%m-%d %H:%M:%S", current['date-reported'][:9]), - time.strftime("%Y-%m-%d %H:%M:%S", current['date-reported'][:9]), - current['short-description'], - product, - reporter, - reporter, - version, - component, - resolution] ) + if not current['private']: + for prefix in ('SECURITY:', 'PRIVATE:'): + if current['short-description'].startswith(prefix): + current['short-description'] = current['short-description'][len(prefix):].strip() + + cursor.execute( + "INSERT INTO bugs " + "(bug_id, assigned_to, bug_severity, bug_status, creation_ts, " + "delta_ts, lastdiffed, short_desc, op_sys, priority, product_id, " + "rep_platform, reporter, version, component_id, resolution, " + "everconfirmed) VALUES %s", + [(current['number'], options.assignee, 'normal', current['bug_status'], current['date-reported'], + current['last_change'], current['last_change'], current['short-description'], 'All', '---', options.product_id, + 'All', uid, current['version'], options.component_id, current['resolution'], + 0 if current['bug_status'] == 'UNCONFIRMED' else 1)] + ) + + # Set keywords + if current['keywords']: + execute_values( + cursor, + "INSERT INTO keywords " + "VALUES %s", + [(current['number'], keyword) for keyword in current['keywords']] + ) + + # if private, assign to group immediately + if current['private']: + cursor.execute( + "INSERT INTO bug_group_map " + "(bug_id, group_id) VALUES %s", + [(current['number'], options.group_id)] + ) # This is the initial long description associated with the bug report - cursor.execute( "INSERT INTO longdescs SET " \ - "bug_id=%s," \ - "who=%s," \ - "bug_when=%s," \ - "thetext=%s", - [ current['number'], - reporter, - time.strftime("%Y-%m-%d %H:%M:%S", current['date-reported'][:9]), - current['description'] ] ) + cursor.execute( + "INSERT INTO longdescs " + "(bug_id, who, bug_when, thetext, isprivate) " + "VALUES %s", + [(current['number'], uid, current['date-reported'], current['description'], int(current['private']))] + ) + + if current['private']: + current['fulltext'] = '' + else: + current['fulltext'] = current['description'] + current['fulltext_private'] = current['description'] # Add whatever notes are associated with this defect - for n in current['notes']: - cursor.execute( "INSERT INTO longdescs SET " \ - "bug_id=%s," \ - "who=%s," \ - "bug_when=%s," \ - "thetext=%s", - [current['number'], - reporter, - time.strftime("%Y-%m-%d %H:%M:%S", n['timestamp'][:9]), - n['text']]) + for n in sorted(current['notes'], key=lambda x: x['timestamp']): + note_userid = get_userid(n['from']) + cursor.execute( + "INSERT INTO longdescs " + "(bug_id, who, bug_when, thetext, isprivate) " + "VALUES %s", + [(current['number'], note_userid, n['timestamp'], n['text'], int(current['private']))] + ) + if not current['private']: + current['fulltext'] += n['text'] + current['fulltext_private'] += n['text'] + + cursor.execute( + "INSERT INTO bugs_fulltext " + "(bug_id, short_desc, comments, comments_noprivate) " + "VALUES %s", + [(current['number'], current['short-description'], + current['fulltext_private'], current['fulltext'])] + ) # add attachments associated with this defect for a in current['attachments']: - cursor.execute( "INSERT INTO attachments SET " \ - "bug_id=%s, creation_ts=%s, description=%s, mimetype=%s," \ - "filename=%s, submitter_id=%s", - [ current['number'], - time.strftime("%Y-%m-%d %H:%M:%S", current['date-reported'][:9]), - a[0], a[1], a[0], reporter ]) - cursor.execute( "INSERT INTO attach_data SET " \ - "id=LAST_INSERT_ID(), thedata=%s", - [ a[2] ]) - - except MySQLdb.IntegrityError, message: - errorcode = message[0] - if errorcode == 1062: # duplicate - return - else: - raise + ispatch = a[1] in ('text/x-diff', 'text/x-patch') + cursor.execute( + "INSERT INTO attachments " + "(bug_id, creation_ts, modification_time, description, " + "mimetype, ispatch, filename, submitter_id) " + "VALUES %s RETURNING attach_id", + [(current['number'], a[3], a[3], a[0], + a[1], int(ispatch), a[0], get_userid(a[4]))] + ) + insert_id = cursor.fetchone() + cursor.execute( + "INSERT INTO attach_data " + "(id, thedata) VALUES %s", + [(insert_id, a[2])] + ) + + except psycopg2.IntegrityError as message: + print(message) + raise + + conn.commit() - cursor.execute("COMMIT") - cursor.close() - db.close() def usage(): - print """Usage: jb2bz.py [OPTIONS] Product + raise SystemExit("""Usage: jb2bz.py [OPTIONS] Product Where OPTIONS are one or more of the following: -h This help information. - -s STATUS One of UNCONFIRMED, CONFIRMED, IN_PROGRESS, RESOLVED, VERIFIED - (default is CONFIRMED) - -c COMPONENT The component to attach to each bug as it is important. This should be - valid component for the Product. + -c COMPONENT The component to attach to each bug as it is important. + This should be valid component for the Product. -v VERSION Version to assign to these defects. Product is the Product to assign these defects to. -All of the JitterBugs in the current directory are imported, including replies, notes, -attachments, and similar noise. -""" - sys.exit(1) +All of the JitterBugs in the current directory are imported, including replies, +notes, attachments, and similar noise. +""") def main(): - global bug_status, component, version, product - opts, args = getopt.getopt(sys.argv[1:], "hs:c:v:") - - for o,a in opts: - if o == "-s": - if a in ('UNCONFIRMED','CONFIRMED','IN_PROGRESS','RESOLVED','VERIFIED'): - bug_status = a - elif o == '-c': - component = a - elif o == '-v': - version = a - elif o == '-h': - usage() - - if len(args) != 1: - sys.stderr.write("Must specify the Product.\n") - sys.exit(1) - - product = args[0] - - for bug in filter(lambda x: re.match(r"\d+$", x), glob.glob("*")): - process_jitterbug(bug) - + global conn, options + + parser = argparse.ArgumentParser() + + parser.add_argument('-a', '--assignee', required=True, + help='Email of the default assignee') + parser.add_argument('-r', '--reporter', required=True, + help='Email of user who should be marked as reporting notes') + parser.add_argument('-g', '--group', required=True, + help='Group to assign private tickets to') + parser.add_argument('-d', '--domain', required=True, + help='Domain for internal users') + + parser.add_argument('-p', '--product', required=True, + help='Product to assign these defects to.') + parser.add_argument('-c', '--component', + help='The component to attach to each bug, required if there are more than one component on the product') + parser.add_argument('-v', '--version', default='unspecified', + help='Version to assign to these defects.') + + parser.add_argument('--email-mapping', type=argparse.FileType('r'), + help='Email address mapping file (should contain a Python dict)') + parser.add_argument('-z', '--timezone', default=time.localtime().tm_zone, + help='Server timezone') + parser.add_argument('--bugzilla-version', default='5.2', + help='Bugzilla version') + + parser.add_argument('directory', nargs='+', + help='List of directories with bugs to import') + + options = parser.parse_args() + options.tzinfo = pytz.timezone(options.timezone) + options.bz_version = options.bugzilla_version.split('.') + options.mapping = {} + if options.email_mapping: + options.mapping = literal_eval(options.email_mapping.read()) + + conn = psycopg2.connect(database='bugs', user='bugs') + + # change this to the numeric userid of the user who should be the default + # assignee + options.assignee = get_userid(options.assignee) + + with conn.cursor() as cursor: + cursor.execute('select id from products where name = %s', + [options.product]) + products = cursor.fetchall() + if not products: + raise SystemExit("No product found: %r" % options.product) + options.product_id = products[0][0] + + if options.component: + cursor.execute( + 'SELECT id from components where name = %s and product_id = %s', + (options.component, options.product_id)) + components = cursor.fetchall() + if not components: + raise SystemExit("No such component in product %r: %r" % + [options.product, options.component]) + else: + cursor.execute( + 'SELECT id, name from components where product_id = %s', + (options.product_id)) + components = cursor.fetchall() + if len(components) != 1: + raise SystemExit("Cannot pick default component in product %r: need to choose from %r" % + [options.product, [item[1] for item in components]]) + + options.component_id = components[0][0] + + cursor.execute('select id from groups where name = %s', + [options.group]) + groups = cursor.fetchall() + if not groups: + raise SystemExit("No group found: %r" % options.group) + options.group_id = groups[0][0] + + cursor.execute('select id, name from keyworddefs') + options.keywords = cursor.fetchall() + + # Bug entries are just a number, other files are ancillary + for directory in options.directory: + dir_fd = os.open(directory, os.O_RDONLY) + total = len([name for name in os.listdir(dir_fd) if name.isdigit()]) + index = 0 + for name in os.listdir(dir_fd): + if not name.isdigit(): + continue + + index += 1 + print("[%d/%d]" % (index, total), "Processing", name, "in directory", directory) + with conn: + process_jitterbug(os.path.join(directory, name)) + + with conn.cursor() as cursor: + cursor.execute("SELECT setval('bugs_bug_id_seq', (select max(bug_id) from bugs), true)") + conn.commit() + conn.close() + if __name__ == "__main__": main()