From b1b64d98e5971437abb3fe793f6fb58eab1ef4fd Mon Sep 17 00:00:00 2001 From: Valerie R Young Date: Thu, 11 Aug 2016 09:17:07 -0400 Subject: reproducible debian: switch python database backend to SQLAlchemy Signed-off-by: Holger Levsen --- bin/reproducible_common.py | 48 ++++++++++++--- bin/reproducible_notes.py | 87 ++++++++++++++++---------- bin/reproducible_remote_scheduler.py | 62 ++++++++++++++----- bin/reproducible_scheduler.py | 116 ++++++++++++++++++++--------------- bin/reproducible_setup_notify.py | 15 ++--- update_jdn.sh | 5 +- 6 files changed, 221 insertions(+), 112 deletions(-) diff --git a/bin/reproducible_common.py b/bin/reproducible_common.py index eeb4ffb6..dcbe05e8 100755 --- a/bin/reproducible_common.py +++ b/bin/reproducible_common.py @@ -29,6 +29,7 @@ from traceback import print_exception from subprocess import call, check_call from tempfile import NamedTemporaryFile from datetime import datetime, timedelta +from sqlalchemy import MetaData, Table, sql, create_engine DEBUG = False QUIET = False @@ -92,6 +93,11 @@ with open(os.path.join(BIN_PATH, './meta_pkgset.csv'), newline='') as f: for line in csv.reader(f): META_PKGSET[int(line[0])] = (line[2], line[1]) +# init the database data and connection +DB_ENGINE = create_engine("sqlite:///" + REPRODUCIBLE_DB) +DB_METADATA = MetaData(DB_ENGINE) # Get all table definitions +conn_db = DB_ENGINE.connect() # the local sqlite3 reproducible db + parser = argparse.ArgumentParser() group = parser.add_mutually_exclusive_group() group.add_argument("-d", "--debug", action="store_true") @@ -349,18 +355,46 @@ def write_html_page(title, body, destfile, no_header=False, style_note=False, with open(destfile, 'w', encoding='UTF-8') as fd: fd.write(html) -def start_db_connection(): - return sqlite3.connect(REPRODUCIBLE_DB, timeout=60) + +def db_table(table_name): + """Returns a SQLAlchemy Table objects to be used in queries + using SQLAlchemy's Expressive Language. + + Arguments: + table_name: a string corrosponding to an existing table name + """ + try: + return Table(table_name, DB_METADATA, autoload=True) + except sqlalchemy.exc.NoSuchTableError: + log.error("Table %s does not exist or schema for %s could not be loaded", + table_name, REPRODUCIBLE_DB) + raise + def query_db(query): - cursor = conn_db.cursor() + """Excutes a raw SQL query. Return depends on query type. + + Returns: + select: + list of tuples + update or delete: + the number of rows affected + insert: + None + """ try: - cursor.execute(query) + result = conn_db.execute(query) except: print_critical_message('Error executing this query:\n' + query) raise - conn_db.commit() - return cursor.fetchall() + + if result.returns_rows: + return result.fetchall() + elif result.supports_sane_rowcount() and result.rowcount > -1: + return result.rowcount + else: + return None + def start_udd_connection(): username = "public-udd-mirror" @@ -797,8 +831,6 @@ class Package: return False -# init the databases connections -conn_db = start_db_connection() # the local sqlite3 reproducible db # get_bugs() is the only user of this, let it initialize the connection itself, # during it's first call to speed up things when unneeded # also "share" the bugs, to avoid collecting them multiple times per run diff --git a/bin/reproducible_notes.py b/bin/reproducible_notes.py index 85c32ce5..5528d93c 100755 --- a/bin/reproducible_notes.py +++ b/bin/reproducible_notes.py @@ -8,12 +8,14 @@ # # Import the content of the notes.git repository into the reproducible database +from reproducible_common import * + +import os import apt -import json import yaml +import json +from sqlalchemy import sql from apt_pkg import version_compare -from reproducible_common import * -import os NOTES = 'packages.yml' ISSUES = 'issues.yml' @@ -100,53 +102,74 @@ def load_issues(): def store_issues(): - query = 'REPLACE INTO issues (name, url, description) ' + \ - 'VALUES (?, ?, ?)' - cursor = conn_db.cursor() - to_add = [] - for issue in sorted(issues): - name = issue + issues_table = db_table('issues') + # Get existing issues + results = conn_db.execute(sql.select([issues_table.c.name])) + existing_issues = set([row[0] for row in results]) + to_insert = [] + to_update = [] + for name in issues: url = issues[name]['url'] if 'url' in issues[name] else '' desc = issues[name]['description'] - to_add.append((name, url, desc)) - cursor.executemany(query, to_add) - conn_db.commit() - log.debug('Issues saved in the database') + if name in existing_issues: + to_update.append({ + 'issuename': name, + 'url': url, + 'description': desc + }) + # remove this package from the set, to know who to delete later + existing_issues.remove(name) + else: + to_insert.append({ + 'name': name, + 'url': url, + 'description': desc + }) + if to_update: + update_query = issues_table.update().\ + where(issues_table.c.name == sql.bindparam('issuename')) + conn_db.execute(update_query, to_update) + log.debug('Issues updated in the database') + if to_insert: + conn_db.execute(issues_table.insert(), to_insert) + log.debug('Issues added to the database') -def drop_old_issues(): - old = [x[0] for x in query_db('SELECT name FROM issues')] - to_drop = [x for x in old if x not in issues] - if to_drop: - log.info("I'm about to remove the following issues: " + str(to_drop)) - for issue in to_drop: - query_db('DELETE FROM issues WHERE name="{}"'.format(issue)) + # if there are any existing issues left, delete them. + if existing_issues: + to_delete = [{'issuename': name} for name in existing_issues] + delete_query = issues_table.delete().\ + where(issues_table.c.name == sql.bindparam('issuename')) + conn_db.execute(delete_query, to_delete) + log.info("Removed the following issues: " + str(existing_issues)) def store_notes(): log.debug('Removing all notes') - query_db('DELETE FROM notes') - query = 'REPLACE INTO notes ' + \ - '(package_id, version, issues, bugs, comments) ' + \ - 'VALUES (?, ?, ?, ?, ?)' - to_add = [] + notes_table = db_table('notes') + conn_db.execute(notes_table.delete()) + to_insert = [] for entry in [x for y in sorted(notes) for x in notes[y]]: pkg_id = entry['id'] pkg_version = entry['version'] pkg_issues = json.dumps(entry['issues']) pkg_bugs = json.dumps(entry['bugs']) pkg_comments = entry['comments'] - pkg = (pkg_id, pkg_version, pkg_issues, pkg_bugs, pkg_comments) - to_add.append(pkg) - cursor = conn_db.cursor() - cursor.executemany(query, to_add) - conn_db.commit() - log.info('Saved ' + str(len(to_add)) + ' notes in the database') + to_insert.append({ + 'id': pkg_id, + 'version': pkg_version, + 'issues': pkg_issues, + 'bugs': pkg_bugs, + 'comments': pkg_comments + }) + + if (len(to_insert)): + conn_db.execute(notes_table.insert(), to_insert) + log.info('Saved ' + str(len(to_insert)) + ' notes in the database') if __name__ == '__main__': notes = load_notes() issues = load_issues() store_issues() - drop_old_issues() store_notes() diff --git a/bin/reproducible_remote_scheduler.py b/bin/reproducible_remote_scheduler.py index a7179f6a..7f846c8b 100755 --- a/bin/reproducible_remote_scheduler.py +++ b/bin/reproducible_remote_scheduler.py @@ -11,7 +11,7 @@ import sys import time import argparse - +from sqlalchemy import sql parser = argparse.ArgumentParser( description='Reschedule packages to re-test their reproducibility', @@ -229,7 +229,8 @@ if amount + len(ids) > 200 and not local: # do the actual scheduling -to_schedule = [] +add_to_schedule = [] +update_schedule = [] save_schedule = [] artifacts_value = 1 if artifacts else 0 if notify_on_start: @@ -238,23 +239,54 @@ elif notify or artifacts: do_notify = 1 else: do_notify = 0 + +schedule_table = db_table('schedule') +existing_pkg_ids = dict(query_db(sql.select([ + schedule_table.c.package_id, + schedule_table.c.id, + ]).where(schedule_table.c.package_id.in_(ids)))) + for id in ids: - to_schedule.append((id, date, artifacts_value, str(do_notify), requester, - None)) - save_schedule.append((id, requester, epoch)) -log.debug('Packages about to be scheduled: ' + str(to_schedule)) + if id in existing_pkg_ids: + update_schedule.append({ + 'update_id': existing_pkg_ids[id], + 'package_id': id, + 'date_scheduled': date, + 'save_artifacts': artifacts_value, + 'notify': str(do_notify), + 'scheduler': requester, + }) + else: + add_to_schedule.append({ + 'package_id': id, + 'date_scheduled': date, + 'save_artifacts': artifacts_value, + 'notify': str(do_notify), + 'scheduler': requester, + }) + + save_schedule.append({ + 'package_id': id, + 'requester': requester, + 'date_request': epoch, + }) + +log.debug('Packages about to be scheduled: ' + str(add_to_schedule) + + str(update_schedule)) -query1 = '''REPLACE INTO schedule - (package_id, date_scheduled, save_artifacts, notify, scheduler, message) - VALUES (?, ?, ?, ?, ?, ?)''' -query2 = '''INSERT INTO manual_scheduler - (package_id, requester, date_request) VALUES (?, ?, ?)''' +update_schedule_query = schedule_table.update().\ + where(schedule_table.c.id == sql.bindparam('update_id')) +insert_schedule_query = schedule_table.insert() +insert_manual_query = db_table('manual_scheduler').insert() if not dry_run: - cursor = conn_db.cursor() - cursor.executemany(query1, to_schedule) - cursor.executemany(query2, save_schedule) - conn_db.commit() + transaction = conn_db.begin() + if len(add_to_schedule): + conn_db.execute(insert_schedule_query, add_to_schedule) + if len(update_schedule): + conn_db.execute(update_schedule_query, update_schedule) + conn_db.execute(insert_manual_query, save_schedule) + transaction.commit() else: log.info('Ran with --dry-run, scheduled nothing') diff --git a/bin/reproducible_scheduler.py b/bin/reproducible_scheduler.py index 557c002b..29769d60 100755 --- a/bin/reproducible_scheduler.py +++ b/bin/reproducible_scheduler.py @@ -18,6 +18,7 @@ import random from subprocess import call from apt_pkg import version_compare from urllib.request import urlopen +from sqlalchemy import sql from reproducible_common import * from reproducible_html_live_status import generate_schedule @@ -228,8 +229,8 @@ def update_sources_db(suite, arch, sources): pkgs_to_add = [] updated_pkgs = [] different_pkgs = [x for x in new_pkgs if x not in cur_pkgs] - log.debug('Packages different in the archive and in the db: ' + - str(different_pkgs)) + log.debug('Packages different in the archive and in the db: %s', + different_pkgs) for pkg in different_pkgs: # pkg: (name, version, suite, arch) query = 'SELECT id, version, notify_maintainer FROM sources ' + \ @@ -238,7 +239,12 @@ def update_sources_db(suite, arch, sources): try: result = query_db(query)[0] except IndexError: # new package - pkgs_to_add.append(pkg) + pkgs_to_add.append({ + 'name': pkg[0], + 'version': pkg[1], + 'suite': pkg[2], + 'architecture': pkg[3], + }) continue pkg_id = result[0] old_version = result[1] @@ -246,53 +252,71 @@ def update_sources_db(suite, arch, sources): if version_compare(pkg[1], old_version) > 0: log.debug('New version: ' + str(pkg) + ' (we had ' + old_version + ')') - updated_pkgs.append( - (pkg_id, pkg[0], pkg[1], pkg[2], pkg[3], notify_maint)) + updated_pkgs.append({ + 'update_id': pkg_id, + 'name': pkg[0], + 'version': pkg[1], + 'suite': pkg[2], + 'architecture': pkg[3], + 'notify_maintainer': notify_maint, + }) # Now actually update the database: - cursor = conn_db.cursor() + sources_table = db_table('sources') # updated packages log.info('Pushing ' + str(len(updated_pkgs)) + ' updated packages to the database...') - cursor.executemany( - 'REPLACE INTO sources ' + - '(id, name, version, suite, architecture, notify_maintainer) ' + - 'VALUES (?, ?, ?, ?, ?, ?)', - updated_pkgs) - conn_db.commit() + if updated_pkgs: + transaction = conn_db.begin() + update_query = sources_table.update().\ + where(sources_table.c.id == sql.bindparam('update_id')) + conn_db.execute(update_query, updated_pkgs) + transaction.commit() + # new packages - log.info('Now inserting ' + str(len(pkgs_to_add)) + - ' new sources in the database: ' + - str(pkgs_to_add)) - cursor.executemany('INSERT INTO sources ' + - '(name, version, suite, architecture) ' + - 'VALUES (?, ?, ?, ?)', pkgs_to_add) - conn_db.commit() + if pkgs_to_add: + log.info('Now inserting %i new sources in the database: %s', + len(pkgs_to_add), pkgs_to_add) + transaction = conn_db.begin() + conn_db.execute(sources_table.insert(), pkgs_to_add) + transaction.commit() + # RM'ed packages cur_pkgs_name = [x[0] for x in cur_pkgs] new_pkgs_name = [x[0] for x in new_pkgs] rmed_pkgs = [x for x in cur_pkgs_name if x not in new_pkgs_name] - log.info('Now deleting ' + str(len(rmed_pkgs)) + - ' removed packages: ' + str(rmed_pkgs)) + log.info('Now deleting %i removed packages: %s', len(rmed_pkgs), + rmed_pkgs) rmed_pkgs_id = [] pkgs_to_rm = [] query = 'SELECT id FROM sources WHERE name="{}" AND suite="{}" ' + \ 'AND architecture="{}"' for pkg in rmed_pkgs: result = query_db(query.format(pkg, suite, arch)) - rmed_pkgs_id.extend(result) - pkgs_to_rm.append((pkg, suite, arch)) - log.debug('removed packages ID: ' + str([str(x[0]) for x in rmed_pkgs_id])) - log.debug('removed packages: ' + str(pkgs_to_rm)) - cursor.executemany('DELETE FROM sources ' - 'WHERE id=?', rmed_pkgs_id) - cursor.executemany('DELETE FROM results ' - 'WHERE package_id=?', rmed_pkgs_id) - cursor.executemany('DELETE FROM schedule ' - 'WHERE package_id=?', rmed_pkgs_id) - cursor.executemany('INSERT INTO removed_packages ' - '(name, suite, architecture) ' - 'VALUES (?, ?, ?)', pkgs_to_rm) - conn_db.commit() + rmed_pkgs_id.append({'deleteid': result[0][0]}) + pkgs_to_rm.append({'name': pkg, 'suite': suite, 'architecture': arch}) + log.debug('removed packages ID: %s', + [str(x['deleteid']) for x in rmed_pkgs_id]) + log.debug('removed packages: %s', pkgs_to_rm) + + if rmed_pkgs_id: + transaction = conn_db.begin() + results_table = db_table('results') + schedule_table = db_table('schedule') + removed_packages_table = db_table('removed_packages') + + delete_sources_query = sources_table.delete().\ + where(sources_table.c.id == sql.bindparam('deleteid')) + delete_results_query = results_table.delete().\ + where(results_table.c.package_id == sql.bindparam('deleteid')) + delete_schedule_query = schedule_table.delete().\ + where(schedule_table.c.package_id == sql.bindparam('deleteid')) + + conn_db.execute(delete_sources_query, rmed_pkgs_id) + conn_db.execute(delete_results_query, rmed_pkgs_id) + conn_db.execute(delete_schedule_query, rmed_pkgs_id) + conn_db.execute(removed_packages_table.insert(), pkgs_to_rm) + transaction.commit() + # finally check whether the db has the correct number of packages query = 'SELECT count(*) FROM sources WHERE suite="{}" ' + \ 'AND architecture="{}"' @@ -301,14 +325,14 @@ def update_sources_db(suite, arch, sources): if int(pkgs_end[0][0]) != count_new_pkgs: print_critical_message('AH! The number of source in the Sources file' + ' is different than the one in the DB!') - log.critical('source in the debian archive for the ' + suite + - ' suite:' + str(count_new_pkgs)) - log.critical('source in the reproducible db for the ' + suite + - ' suite:' + str(pkgs_end[0][0])) + log.critical('source in the debian archive for the %s suite: %s', + suite, str(count_new_pkgs)) + log.critical('source in the reproducible db for the %s suite: %s', + suite, str(pkgs_end[0][0])) sys.exit(1) if pkgs_to_add: log.info('Building pages for the new packages') - gen_packages_html([Package(x[0]) for x in pkgs_to_add], no_clean=True) + gen_packages_html([Package(x['name']) for x in pkgs_to_add], no_clean=True) def print_schedule_result(suite, arch, criteria, packages): @@ -334,14 +358,10 @@ def queue_packages(all_pkgs, packages, date): def schedule_packages(packages): - pkgs = ((x, packages[x]) for x in packages) - log.debug('IDs about to be scheduled: ' + str(packages.keys())) - query = 'INSERT INTO schedule ' + \ - '(package_id, date_scheduled) ' + \ - 'VALUES (?, ?)' - cursor = conn_db.cursor() - cursor.executemany(query, pkgs) - conn_db.commit() + pkgs = [{'package_id': x, 'date_scheduled': packages[x]} for x in packages.keys()] + log.debug('IDs about to be scheduled: %s', packages.keys()) + if pkgs: + conn_db.execute(db_table('schedule').insert(), pkgs) def add_up_numbers(packages, arch): diff --git a/bin/reproducible_setup_notify.py b/bin/reproducible_setup_notify.py index 8daee9ed..f7d4be9d 100755 --- a/bin/reproducible_setup_notify.py +++ b/bin/reproducible_setup_notify.py @@ -29,7 +29,6 @@ from reproducible_common import * from reproducible_html_packages import gen_packages_html from reproducible_html_indexes import build_page - class bcolors: BOLD = '\033[1m' UNDERLINE = '\033[4m' @@ -51,14 +50,18 @@ def _good(text): def process_pkg(package, deactivate): if deactivate: - _good('Deactovating notification for package ' + str(package)) + _good('Deactivating notification for package ' + str(package)) flag = 0 else: _good('Activating notification for package ' + str(package)) flag = 1 - rows = c.execute(('UPDATE OR FAIL sources SET notify_maintainer="{}" ' + - 'WHERE name="{}"').format(flag, package)).rowcount - conn_db.commit() + + sources_table = db_table('sources') + update_query = sources_table.update().\ + where(sources_table.c.name == package).\ + values(notify_maintainer=flag) + rows = conn_db.execute(update_query).rowcount + if rows == 0: log.error(bcolors.FAIL + str(package) + ' does not exists') sys.exit(1) @@ -86,8 +89,6 @@ if maintainer: log.info('\t' + ', '.join(pkgs)) packages.extend(pkgs) - -c = conn_db.cursor() for package in packages: process_pkg(package, local_args.deactivate) diff --git a/update_jdn.sh b/update_jdn.sh index b7a87d87..841b19c5 100755 --- a/update_jdn.sh +++ b/update_jdn.sh @@ -388,9 +388,10 @@ if [ -f /etc/debian_version ] ; then poxml procmail python3-debian - python3-xdg - python3-yaml python3-pystache + python3-sqlalchemy + python3-xdg + python3-yaml python-arpy python-hachoir-metadata python-imaging -- cgit v1.2.3-70-g09d2