summaryrefslogtreecommitdiffstats
path: root/bin
diff options
context:
space:
mode:
authorValerie R Young <spectranaut@riseup.net>2016-08-11 09:17:07 -0400
committerHolger Levsen <holger@layer-acht.org>2016-08-19 10:40:58 +0200
commitb1b64d98e5971437abb3fe793f6fb58eab1ef4fd (patch)
tree4c0e66e181bf0711d69f347f2426651cfccf6937 /bin
parentda31c830940b18986d5890d1ec856475ad645fd5 (diff)
downloadjenkins.debian.net-b1b64d98e5971437abb3fe793f6fb58eab1ef4fd.tar.xz
reproducible debian: switch python database backend to SQLAlchemy
Signed-off-by: Holger Levsen <holger@layer-acht.org>
Diffstat (limited to 'bin')
-rwxr-xr-xbin/reproducible_common.py48
-rwxr-xr-xbin/reproducible_notes.py87
-rwxr-xr-xbin/reproducible_remote_scheduler.py62
-rwxr-xr-xbin/reproducible_scheduler.py116
-rwxr-xr-xbin/reproducible_setup_notify.py15
5 files changed, 218 insertions, 110 deletions
diff --git a/bin/reproducible_common.py b/bin/reproducible_common.py
index eeb4ffb6..dcbe05e8 100755
--- a/bin/reproducible_common.py
+++ b/bin/reproducible_common.py
@@ -29,6 +29,7 @@ from traceback import print_exception
from subprocess import call, check_call
from tempfile import NamedTemporaryFile
from datetime import datetime, timedelta
+from sqlalchemy import MetaData, Table, sql, create_engine
DEBUG = False
QUIET = False
@@ -92,6 +93,11 @@ with open(os.path.join(BIN_PATH, './meta_pkgset.csv'), newline='') as f:
for line in csv.reader(f):
META_PKGSET[int(line[0])] = (line[2], line[1])
+# init the database data and connection
+DB_ENGINE = create_engine("sqlite:///" + REPRODUCIBLE_DB)
+DB_METADATA = MetaData(DB_ENGINE) # Get all table definitions
+conn_db = DB_ENGINE.connect() # the local sqlite3 reproducible db
+
parser = argparse.ArgumentParser()
group = parser.add_mutually_exclusive_group()
group.add_argument("-d", "--debug", action="store_true")
@@ -349,18 +355,46 @@ def write_html_page(title, body, destfile, no_header=False, style_note=False,
with open(destfile, 'w', encoding='UTF-8') as fd:
fd.write(html)
-def start_db_connection():
- return sqlite3.connect(REPRODUCIBLE_DB, timeout=60)
+
+def db_table(table_name):
+ """Returns a SQLAlchemy Table objects to be used in queries
+ using SQLAlchemy's Expressive Language.
+
+ Arguments:
+ table_name: a string corrosponding to an existing table name
+ """
+ try:
+ return Table(table_name, DB_METADATA, autoload=True)
+ except sqlalchemy.exc.NoSuchTableError:
+ log.error("Table %s does not exist or schema for %s could not be loaded",
+ table_name, REPRODUCIBLE_DB)
+ raise
+
def query_db(query):
- cursor = conn_db.cursor()
+ """Excutes a raw SQL query. Return depends on query type.
+
+ Returns:
+ select:
+ list of tuples
+ update or delete:
+ the number of rows affected
+ insert:
+ None
+ """
try:
- cursor.execute(query)
+ result = conn_db.execute(query)
except:
print_critical_message('Error executing this query:\n' + query)
raise
- conn_db.commit()
- return cursor.fetchall()
+
+ if result.returns_rows:
+ return result.fetchall()
+ elif result.supports_sane_rowcount() and result.rowcount > -1:
+ return result.rowcount
+ else:
+ return None
+
def start_udd_connection():
username = "public-udd-mirror"
@@ -797,8 +831,6 @@ class Package:
return False
-# init the databases connections
-conn_db = start_db_connection() # the local sqlite3 reproducible db
# get_bugs() is the only user of this, let it initialize the connection itself,
# during it's first call to speed up things when unneeded
# also "share" the bugs, to avoid collecting them multiple times per run
diff --git a/bin/reproducible_notes.py b/bin/reproducible_notes.py
index 85c32ce5..5528d93c 100755
--- a/bin/reproducible_notes.py
+++ b/bin/reproducible_notes.py
@@ -8,12 +8,14 @@
#
# Import the content of the notes.git repository into the reproducible database
+from reproducible_common import *
+
+import os
import apt
-import json
import yaml
+import json
+from sqlalchemy import sql
from apt_pkg import version_compare
-from reproducible_common import *
-import os
NOTES = 'packages.yml'
ISSUES = 'issues.yml'
@@ -100,53 +102,74 @@ def load_issues():
def store_issues():
- query = 'REPLACE INTO issues (name, url, description) ' + \
- 'VALUES (?, ?, ?)'
- cursor = conn_db.cursor()
- to_add = []
- for issue in sorted(issues):
- name = issue
+ issues_table = db_table('issues')
+ # Get existing issues
+ results = conn_db.execute(sql.select([issues_table.c.name]))
+ existing_issues = set([row[0] for row in results])
+ to_insert = []
+ to_update = []
+ for name in issues:
url = issues[name]['url'] if 'url' in issues[name] else ''
desc = issues[name]['description']
- to_add.append((name, url, desc))
- cursor.executemany(query, to_add)
- conn_db.commit()
- log.debug('Issues saved in the database')
+ if name in existing_issues:
+ to_update.append({
+ 'issuename': name,
+ 'url': url,
+ 'description': desc
+ })
+ # remove this package from the set, to know who to delete later
+ existing_issues.remove(name)
+ else:
+ to_insert.append({
+ 'name': name,
+ 'url': url,
+ 'description': desc
+ })
+ if to_update:
+ update_query = issues_table.update().\
+ where(issues_table.c.name == sql.bindparam('issuename'))
+ conn_db.execute(update_query, to_update)
+ log.debug('Issues updated in the database')
+ if to_insert:
+ conn_db.execute(issues_table.insert(), to_insert)
+ log.debug('Issues added to the database')
-def drop_old_issues():
- old = [x[0] for x in query_db('SELECT name FROM issues')]
- to_drop = [x for x in old if x not in issues]
- if to_drop:
- log.info("I'm about to remove the following issues: " + str(to_drop))
- for issue in to_drop:
- query_db('DELETE FROM issues WHERE name="{}"'.format(issue))
+ # if there are any existing issues left, delete them.
+ if existing_issues:
+ to_delete = [{'issuename': name} for name in existing_issues]
+ delete_query = issues_table.delete().\
+ where(issues_table.c.name == sql.bindparam('issuename'))
+ conn_db.execute(delete_query, to_delete)
+ log.info("Removed the following issues: " + str(existing_issues))
def store_notes():
log.debug('Removing all notes')
- query_db('DELETE FROM notes')
- query = 'REPLACE INTO notes ' + \
- '(package_id, version, issues, bugs, comments) ' + \
- 'VALUES (?, ?, ?, ?, ?)'
- to_add = []
+ notes_table = db_table('notes')
+ conn_db.execute(notes_table.delete())
+ to_insert = []
for entry in [x for y in sorted(notes) for x in notes[y]]:
pkg_id = entry['id']
pkg_version = entry['version']
pkg_issues = json.dumps(entry['issues'])
pkg_bugs = json.dumps(entry['bugs'])
pkg_comments = entry['comments']
- pkg = (pkg_id, pkg_version, pkg_issues, pkg_bugs, pkg_comments)
- to_add.append(pkg)
- cursor = conn_db.cursor()
- cursor.executemany(query, to_add)
- conn_db.commit()
- log.info('Saved ' + str(len(to_add)) + ' notes in the database')
+ to_insert.append({
+ 'id': pkg_id,
+ 'version': pkg_version,
+ 'issues': pkg_issues,
+ 'bugs': pkg_bugs,
+ 'comments': pkg_comments
+ })
+
+ if (len(to_insert)):
+ conn_db.execute(notes_table.insert(), to_insert)
+ log.info('Saved ' + str(len(to_insert)) + ' notes in the database')
if __name__ == '__main__':
notes = load_notes()
issues = load_issues()
store_issues()
- drop_old_issues()
store_notes()
diff --git a/bin/reproducible_remote_scheduler.py b/bin/reproducible_remote_scheduler.py
index a7179f6a..7f846c8b 100755
--- a/bin/reproducible_remote_scheduler.py
+++ b/bin/reproducible_remote_scheduler.py
@@ -11,7 +11,7 @@
import sys
import time
import argparse
-
+from sqlalchemy import sql
parser = argparse.ArgumentParser(
description='Reschedule packages to re-test their reproducibility',
@@ -229,7 +229,8 @@ if amount + len(ids) > 200 and not local:
# do the actual scheduling
-to_schedule = []
+add_to_schedule = []
+update_schedule = []
save_schedule = []
artifacts_value = 1 if artifacts else 0
if notify_on_start:
@@ -238,23 +239,54 @@ elif notify or artifacts:
do_notify = 1
else:
do_notify = 0
+
+schedule_table = db_table('schedule')
+existing_pkg_ids = dict(query_db(sql.select([
+ schedule_table.c.package_id,
+ schedule_table.c.id,
+ ]).where(schedule_table.c.package_id.in_(ids))))
+
for id in ids:
- to_schedule.append((id, date, artifacts_value, str(do_notify), requester,
- None))
- save_schedule.append((id, requester, epoch))
-log.debug('Packages about to be scheduled: ' + str(to_schedule))
+ if id in existing_pkg_ids:
+ update_schedule.append({
+ 'update_id': existing_pkg_ids[id],
+ 'package_id': id,
+ 'date_scheduled': date,
+ 'save_artifacts': artifacts_value,
+ 'notify': str(do_notify),
+ 'scheduler': requester,
+ })
+ else:
+ add_to_schedule.append({
+ 'package_id': id,
+ 'date_scheduled': date,
+ 'save_artifacts': artifacts_value,
+ 'notify': str(do_notify),
+ 'scheduler': requester,
+ })
+
+ save_schedule.append({
+ 'package_id': id,
+ 'requester': requester,
+ 'date_request': epoch,
+ })
+
+log.debug('Packages about to be scheduled: ' + str(add_to_schedule)
+ + str(update_schedule))
-query1 = '''REPLACE INTO schedule
- (package_id, date_scheduled, save_artifacts, notify, scheduler, message)
- VALUES (?, ?, ?, ?, ?, ?)'''
-query2 = '''INSERT INTO manual_scheduler
- (package_id, requester, date_request) VALUES (?, ?, ?)'''
+update_schedule_query = schedule_table.update().\
+ where(schedule_table.c.id == sql.bindparam('update_id'))
+insert_schedule_query = schedule_table.insert()
+insert_manual_query = db_table('manual_scheduler').insert()
if not dry_run:
- cursor = conn_db.cursor()
- cursor.executemany(query1, to_schedule)
- cursor.executemany(query2, save_schedule)
- conn_db.commit()
+ transaction = conn_db.begin()
+ if len(add_to_schedule):
+ conn_db.execute(insert_schedule_query, add_to_schedule)
+ if len(update_schedule):
+ conn_db.execute(update_schedule_query, update_schedule)
+ conn_db.execute(insert_manual_query, save_schedule)
+ transaction.commit()
else:
log.info('Ran with --dry-run, scheduled nothing')
diff --git a/bin/reproducible_scheduler.py b/bin/reproducible_scheduler.py
index 557c002b..29769d60 100755
--- a/bin/reproducible_scheduler.py
+++ b/bin/reproducible_scheduler.py
@@ -18,6 +18,7 @@ import random
from subprocess import call
from apt_pkg import version_compare
from urllib.request import urlopen
+from sqlalchemy import sql
from reproducible_common import *
from reproducible_html_live_status import generate_schedule
@@ -228,8 +229,8 @@ def update_sources_db(suite, arch, sources):
pkgs_to_add = []
updated_pkgs = []
different_pkgs = [x for x in new_pkgs if x not in cur_pkgs]
- log.debug('Packages different in the archive and in the db: ' +
- str(different_pkgs))
+ log.debug('Packages different in the archive and in the db: %s',
+ different_pkgs)
for pkg in different_pkgs:
# pkg: (name, version, suite, arch)
query = 'SELECT id, version, notify_maintainer FROM sources ' + \
@@ -238,7 +239,12 @@ def update_sources_db(suite, arch, sources):
try:
result = query_db(query)[0]
except IndexError: # new package
- pkgs_to_add.append(pkg)
+ pkgs_to_add.append({
+ 'name': pkg[0],
+ 'version': pkg[1],
+ 'suite': pkg[2],
+ 'architecture': pkg[3],
+ })
continue
pkg_id = result[0]
old_version = result[1]
@@ -246,53 +252,71 @@ def update_sources_db(suite, arch, sources):
if version_compare(pkg[1], old_version) > 0:
log.debug('New version: ' + str(pkg) + ' (we had ' +
old_version + ')')
- updated_pkgs.append(
- (pkg_id, pkg[0], pkg[1], pkg[2], pkg[3], notify_maint))
+ updated_pkgs.append({
+ 'update_id': pkg_id,
+ 'name': pkg[0],
+ 'version': pkg[1],
+ 'suite': pkg[2],
+ 'architecture': pkg[3],
+ 'notify_maintainer': notify_maint,
+ })
# Now actually update the database:
- cursor = conn_db.cursor()
+ sources_table = db_table('sources')
# updated packages
log.info('Pushing ' + str(len(updated_pkgs)) +
' updated packages to the database...')
- cursor.executemany(
- 'REPLACE INTO sources ' +
- '(id, name, version, suite, architecture, notify_maintainer) ' +
- 'VALUES (?, ?, ?, ?, ?, ?)',
- updated_pkgs)
- conn_db.commit()
+ if updated_pkgs:
+ transaction = conn_db.begin()
+ update_query = sources_table.update().\
+ where(sources_table.c.id == sql.bindparam('update_id'))
+ conn_db.execute(update_query, updated_pkgs)
+ transaction.commit()
+
# new packages
- log.info('Now inserting ' + str(len(pkgs_to_add)) +
- ' new sources in the database: ' +
- str(pkgs_to_add))
- cursor.executemany('INSERT INTO sources ' +
- '(name, version, suite, architecture) ' +
- 'VALUES (?, ?, ?, ?)', pkgs_to_add)
- conn_db.commit()
+ if pkgs_to_add:
+ log.info('Now inserting %i new sources in the database: %s',
+ len(pkgs_to_add), pkgs_to_add)
+ transaction = conn_db.begin()
+ conn_db.execute(sources_table.insert(), pkgs_to_add)
+ transaction.commit()
+
# RM'ed packages
cur_pkgs_name = [x[0] for x in cur_pkgs]
new_pkgs_name = [x[0] for x in new_pkgs]
rmed_pkgs = [x for x in cur_pkgs_name if x not in new_pkgs_name]
- log.info('Now deleting ' + str(len(rmed_pkgs)) +
- ' removed packages: ' + str(rmed_pkgs))
+ log.info('Now deleting %i removed packages: %s', len(rmed_pkgs),
+ rmed_pkgs)
rmed_pkgs_id = []
pkgs_to_rm = []
query = 'SELECT id FROM sources WHERE name="{}" AND suite="{}" ' + \
'AND architecture="{}"'
for pkg in rmed_pkgs:
result = query_db(query.format(pkg, suite, arch))
- rmed_pkgs_id.extend(result)
- pkgs_to_rm.append((pkg, suite, arch))
- log.debug('removed packages ID: ' + str([str(x[0]) for x in rmed_pkgs_id]))
- log.debug('removed packages: ' + str(pkgs_to_rm))
- cursor.executemany('DELETE FROM sources '
- 'WHERE id=?', rmed_pkgs_id)
- cursor.executemany('DELETE FROM results '
- 'WHERE package_id=?', rmed_pkgs_id)
- cursor.executemany('DELETE FROM schedule '
- 'WHERE package_id=?', rmed_pkgs_id)
- cursor.executemany('INSERT INTO removed_packages '
- '(name, suite, architecture) '
- 'VALUES (?, ?, ?)', pkgs_to_rm)
- conn_db.commit()
+ rmed_pkgs_id.append({'deleteid': result[0][0]})
+ pkgs_to_rm.append({'name': pkg, 'suite': suite, 'architecture': arch})
+ log.debug('removed packages ID: %s',
+ [str(x['deleteid']) for x in rmed_pkgs_id])
+ log.debug('removed packages: %s', pkgs_to_rm)
+
+ if rmed_pkgs_id:
+ transaction = conn_db.begin()
+ results_table = db_table('results')
+ schedule_table = db_table('schedule')
+ removed_packages_table = db_table('removed_packages')
+
+ delete_sources_query = sources_table.delete().\
+ where(sources_table.c.id == sql.bindparam('deleteid'))
+ delete_results_query = results_table.delete().\
+ where(results_table.c.package_id == sql.bindparam('deleteid'))
+ delete_schedule_query = schedule_table.delete().\
+ where(schedule_table.c.package_id == sql.bindparam('deleteid'))
+
+ conn_db.execute(delete_sources_query, rmed_pkgs_id)
+ conn_db.execute(delete_results_query, rmed_pkgs_id)
+ conn_db.execute(delete_schedule_query, rmed_pkgs_id)
+ conn_db.execute(removed_packages_table.insert(), pkgs_to_rm)
+ transaction.commit()
+
# finally check whether the db has the correct number of packages
query = 'SELECT count(*) FROM sources WHERE suite="{}" ' + \
'AND architecture="{}"'
@@ -301,14 +325,14 @@ def update_sources_db(suite, arch, sources):
if int(pkgs_end[0][0]) != count_new_pkgs:
print_critical_message('AH! The number of source in the Sources file' +
' is different than the one in the DB!')
- log.critical('source in the debian archive for the ' + suite +
- ' suite:' + str(count_new_pkgs))
- log.critical('source in the reproducible db for the ' + suite +
- ' suite:' + str(pkgs_end[0][0]))
+ log.critical('source in the debian archive for the %s suite: %s',
+ suite, str(count_new_pkgs))
+ log.critical('source in the reproducible db for the %s suite: %s',
+ suite, str(pkgs_end[0][0]))
sys.exit(1)
if pkgs_to_add:
log.info('Building pages for the new packages')
- gen_packages_html([Package(x[0]) for x in pkgs_to_add], no_clean=True)
+ gen_packages_html([Package(x['name']) for x in pkgs_to_add], no_clean=True)
def print_schedule_result(suite, arch, criteria, packages):
@@ -334,14 +358,10 @@ def queue_packages(all_pkgs, packages, date):
def schedule_packages(packages):
- pkgs = ((x, packages[x]) for x in packages)
- log.debug('IDs about to be scheduled: ' + str(packages.keys()))
- query = 'INSERT INTO schedule ' + \
- '(package_id, date_scheduled) ' + \
- 'VALUES (?, ?)'
- cursor = conn_db.cursor()
- cursor.executemany(query, pkgs)
- conn_db.commit()
+ pkgs = [{'package_id': x, 'date_scheduled': packages[x]} for x in packages.keys()]
+ log.debug('IDs about to be scheduled: %s', packages.keys())
+ if pkgs:
+ conn_db.execute(db_table('schedule').insert(), pkgs)
def add_up_numbers(packages, arch):
diff --git a/bin/reproducible_setup_notify.py b/bin/reproducible_setup_notify.py
index 8daee9ed..f7d4be9d 100755
--- a/bin/reproducible_setup_notify.py
+++ b/bin/reproducible_setup_notify.py
@@ -29,7 +29,6 @@ from reproducible_common import *
from reproducible_html_packages import gen_packages_html
from reproducible_html_indexes import build_page
-
class bcolors:
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
@@ -51,14 +50,18 @@ def _good(text):
def process_pkg(package, deactivate):
if deactivate:
- _good('Deactovating notification for package ' + str(package))
+ _good('Deactivating notification for package ' + str(package))
flag = 0
else:
_good('Activating notification for package ' + str(package))
flag = 1
- rows = c.execute(('UPDATE OR FAIL sources SET notify_maintainer="{}" ' +
- 'WHERE name="{}"').format(flag, package)).rowcount
- conn_db.commit()
+
+ sources_table = db_table('sources')
+ update_query = sources_table.update().\
+ where(sources_table.c.name == package).\
+ values(notify_maintainer=flag)
+ rows = conn_db.execute(update_query).rowcount
+
if rows == 0:
log.error(bcolors.FAIL + str(package) + ' does not exists')
sys.exit(1)
@@ -86,8 +89,6 @@ if maintainer:
log.info('\t' + ', '.join(pkgs))
packages.extend(pkgs)
-
-c = conn_db.cursor()
for package in packages:
process_pkg(package, local_args.deactivate)