From 8633c9e784489e23012866086ecc4cff30b45190 Mon Sep 17 00:00:00 2001 From: Mattia Rizzolo Date: Wed, 18 Feb 2015 02:46:02 +0100 Subject: reproducible: and finally commit the updated schema. + a new simple script to manage the db schema and handle+track updates (so remove the schema from _common.sh and call this instead) --- bin/reproducible_common.sh | 98 +------------- bin/reproducible_db_maintenance.py | 268 +++++++++++++++++++++++++++++++++++++ 2 files changed, 269 insertions(+), 97 deletions(-) create mode 100755 bin/reproducible_db_maintenance.py diff --git a/bin/reproducible_common.sh b/bin/reproducible_common.sh index 173c863e..f467cba3 100755 --- a/bin/reproducible_common.sh +++ b/bin/reproducible_common.sh @@ -25,103 +25,7 @@ if [ -f $PACKAGES_DB ] && [ -f $INIT ] ; then fi elif [ ! -f ${PACKAGES_DB} ] ; then echo "Warning: $PACKAGES_DB doesn't exist, creating it now." - echo - # create sqlite db if needed - sqlite3 ${PACKAGES_DB} ' - CREATE TABLE source_packages - (name TEXT NOT NULL, - version TEXT NOT NULL, - status TEXT NOT NULL - CHECK (status IN ("blacklisted", "FTBFS","reproducible","unreproducible","404", "not for us")), - build_date TEXT NOT NULL, - PRIMARY KEY (name))' - sqlite3 ${PACKAGES_DB} ' - CREATE TABLE sources_scheduled - (name TEXT NOT NULL, - date_scheduled TEXT NOT NULL, - date_build_started TEXT NOT NULL, - PRIMARY KEY (name))' - sqlite3 ${PACKAGES_DB} ' - CREATE TABLE sources - (name TEXT NOT NULL, - version TEXT NOT NULL)' - sqlite3 ${PACKAGES_DB} ' - CREATE TABLE stats_pkg_state - (datum TEXT NOT NULL, - suite TEXT NOT NULL, - untested INTEGER, - reproducible INTEGER, - unreproducible INTEGER, - FTBFS INTEGER, - other INTEGER, - PRIMARY KEY (datum))' - sqlite3 ${PACKAGES_DB} ' - CREATE TABLE stats_builds_per_day - (datum TEXT NOT NULL, - suite TEXT NOT NULL, - reproducible INTEGER, - unreproducible INTEGER, - FTBFS INTEGER, - other INTEGER, - PRIMARY KEY (datum))' - sqlite3 ${PACKAGES_DB} ' - CREATE TABLE stats_builds_age - (datum TEXT NOT NULL, - suite TEXT NOT NULL, - oldest_reproducible REAL, - oldest_unreproducible REAL, - oldest_FTBFS REAL, - PRIMARY KEY (datum))' - sqlite3 ${PACKAGES_DB} ' - CREATE TABLE stats_bugs - (datum TEXT NOT NULL, - open_toolchain INTEGER, - done_toolchain INTEGER, - open_infrastructure INTEGER, - done_infrastructure INTEGER, - open_timestamps INTEGER, - done_timestamps INTEGER, - open_fileordering INTEGER, - done_fileordering INTEGER, - open_buildpath INTEGER, - done_buildpath INTEGER, - open_username INTEGER, - done_username INTEGER, - open_hostname INTEGER, - done_hostname INTEGER, - open_uname INTEGER, - done_uname INTEGER, - open_randomness INTEGER, - done_randomness INTEGER, - open_buildinfo INTEGER, - done_buildinfo INTEGER, - open_cpu INTEGER, - done_cpu INTEGER, - open_signatures INTEGER, - done_signatures INTEGER, - open_environment INTEGER, - done_environment INTEGER, - PRIMARY KEY (datum))' - sqlite3 ${PACKAGES_DB} ' - CREATE TABLE stats_notes - (datum TEXT NOT NULL, - packages_with_notes INTEGER, - PRIMARY KEY (datum))' - sqlite3 ${PACKAGES_DB} ' - CREATE TABLE stats_issues - (datum TEXT NOT NULL, - known_issues INTEGER, - PRIMARY KEY (datum))' - sqlite3 ${PACKAGES_DB} ' - CREATE TABLE stats_meta_pkg_state - (datum TEXT NOT NULL, - suite TEXT NOT NULL, - meta_pkg TEXT NOT NULL, - reproducible INTEGER, - unreproducible INTEGER, - FTBFS INTEGER, - other INTEGER, - PRIMARY KEY (datum, suite, meta_pkg))' + /srv/jenkins/bin/reproducible_db_maintenance.py # 60 seconds timeout when trying to get a lock cat > $INIT <<-EOF .timeout 60000 diff --git a/bin/reproducible_db_maintenance.py b/bin/reproducible_db_maintenance.py new file mode 100755 index 00000000..87968cc6 --- /dev/null +++ b/bin/reproducible_db_maintenance.py @@ -0,0 +1,268 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +# +# Copyright © 2015 Mattia Rizzolo +# Based on variuos reproducible_* files © 2014 Holger Levsen +# Licensed under GPL-2 +# +# Depends: python3 +# +# Track the database schema and changes to it. plus allow a simple creation +# and migration of it. + +from reproducible_common import * + +now = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + +# the original schema is here +db_schema = [ + { + 'name': 'rb_schema', + 'query': ['''CREATE TABLE rb_schema + (version INTEGER NOT NULL, + date TEXT NOT NULL, + PRIMARY KEY (version))''', + 'INSERT INTO rb_schema VALUES ("1", "''' + now + '")'] + }, + { + 'name': 'source_packages', + 'query': ['''CREATE TABLE source_packages + (name TEXT NOT NULL, + version TEXT NOT NULL, + status TEXT NOT NULL + CHECK + (status IN + ("blacklisted", "FTBFS", "reproducible", + "unreproducible", "404", "not for us") + ), + build_date TEXT NOT NULL, + PRIMARY KEY (name))'''] + }, + { + 'name': 'sources_scheduled', + 'query': ['''CREATE TABLE sources_scheduled + (name TEXT NOT NULL, + date_scheduled TEXT NOT NULL, + date_build_started TEXT NOT NULL, + PRIMARY KEY (name))'''] + }, + { + 'name': 'sources', + 'query': ['''CREATE TABLE sources + (name TEXT NOT NULL, + version TEXT NOT NULL)'''] + }, + { + 'name': 'stats_pkg_state', + 'query': ['''CREATE TABLE stats_pkg_state + (datum TEXT NOT NULL, + suite TEXT NOT NULL, + untested INTEGER, + reproducible INTEGER, + unreproducible INTEGER, + FTBFS INTEGER, + other INTEGER, + PRIMARY KEY (datum))'''] + }, + { + 'name': 'stats_builds_per_day', + 'query': ['''CREATE TABLE stats_builds_per_day + (datum TEXT NOT NULL, + suite TEXT NOT NULL, + reproducible INTEGER, + unreproducible INTEGER, + FTBFS INTEGER, + other INTEGER, + PRIMARY KEY (datum))'''] + }, + { + 'name': 'stats_builds_age', + 'query': ['''CREATE TABLE stats_builds_age + (datum TEXT NOT NULL, + suite TEXT NOT NULL, + oldest_reproducible REAL, + oldest_unreproducible REAL, + oldest_FTBFS REAL, + PRIMARY KEY (datum))'''] + }, + { + 'name': 'stats_bugs', + 'query': ['''CREATE TABLE stats_bugs + (datum TEXT NOT NULL, + open_toolchain INTEGER, + done_toolchain INTEGER, + open_infrastructure INTEGER, + done_infrastructure INTEGER, + open_timestamps INTEGER, + done_timestamps INTEGER, + open_fileordering INTEGER, + done_fileordering INTEGER, + open_buildpath INTEGER, + done_buildpath INTEGER, + open_username INTEGER, + done_username INTEGER, + open_hostname INTEGER, + done_hostname INTEGER, + open_uname INTEGER, + done_uname INTEGER, + open_randomness INTEGER, + done_randomness INTEGER, + open_buildinfo INTEGER, + done_buildinfo INTEGER, + open_cpu INTEGER, + done_cpu INTEGER, + PRIMARY KEY (datum))'''] + }, + { + 'name': 'stats_notes', + 'query': ['''CREATE TABLE stats_notes + (datum TEXT NOT NULL, + packages_with_notes INTEGER, + PRIMARY KEY (datum))'''] + }, + { + 'name': 'stats_issues', + 'query': ['''CREATE TABLE stats_issues + (datum TEXT NOT NULL, + known_issues INTEGER, + PRIMARY KEY (datum))'''] + }, + { + 'name': 'stats_meta_pkg_state', + 'query': ['''CREATE TABLE stats_meta_pkg_state + (datum TEXT NOT NULL, + suite TEXT NOT NULL, + meta_pkg TEXT NOT NULL, + reproducible INTEGER, + unreproducible INTEGER, + FTBFS INTEGER, + other INTEGER, + PRIMARY KEY (datum, suite, meta_pkg))'''] + } +] + +# and here there are a list of queries, split by update, that can be used to +# update the live schema +schema_updates = { + 1: ['INSERT INTO rb_schema VALUES ("1", "' + now + '")'], + 2: [ # do a funny dance to add an id, suite and architecture values to + # the `suites` table + '''CREATE TABLE sources_new_tmp + (id INTEGER PRIMARY KEY, + name TEXT NOT NULL, + version TEXT NOT NULL, + suite TEXT, + architecture TEXT, + UNIQUE (name, suite, architecture) + ON CONFLICT REPLACE)''', + '''CREATE TABLE sources_new + (id INTEGER PRIMARY KEY, + name TEXT NOT NULL, + version TEXT NOT NULL, + suite TEXT NOT NULL, + architecture TEXT NOT NULL, + UNIQUE (name, suite, architecture) + ON CONFLICT REPLACE)''', + 'INSERT INTO sources_new_tmp (name, version) SELECT * FROM sources', + 'UPDATE sources_new_tmp SET suite="sid", architecture="amd64"', + 'INSERT INTO sources_new SELECT * FROM sources_new_tmp', + 'DROP TABLE sources_new_tmp', + 'DROP TABLE sources', + 'ALTER TABLE sources_new RENAME TO sources', + # now that we have an id in `sources` rework all tables to join + # against this table, and avoid duplicating data + # `schedule`: + '''CREATE TABLE schedule + (id INTEGER PRIMARY KEY, + package_id INTEGER NOT NULL, + date_scheduled TEXT NOT NULL, + date_build_started TEXT NOT NULL, + save_artifacts INTEGER DEFAULT 0, + UNIQUE (package_id), + FOREIGN KEY(package_id) REFERENCES sources(id))''', + '''INSERT INTO schedule (package_id, date_scheduled, date_build_started) + SELECT s.id, p.date_scheduled, p.date_build_started + FROM sources AS s JOIN sources_scheduled AS p ON s.name = p.name''', + 'DROP TABLE sources_scheduled', + # `results`: + '''CREATE TABLE results + (id INTEGER PRIMARY KEY, + package_id INTEGER NOT NULL, + version TEXT NOT NULL, + status TEXT, + build_date TEXT, + build_duration TEXT DEFAULT '0', + UNIQUE (package_id) + FOREIGN KEY(package_id) REFERENCES sources(id))''', + '''INSERT INTO results (package_id, version, status, build_date) + SELECT s.id, r.version, r.status, r.build_date + FROM sources AS s JOIN source_packages as r ON s.name = r.name''', + 'DROP TABLE source_packages', + # `stats_builds`: (completely new table where we save every build) + '''CREATE TABLE stats_build + (id INTEGER PRIMARY KEY, + name TEXT NOT NULL, + version TEXT NOT NULL, + suite TEXT NOT NULL, + architecture TEXT NOT NULL, + status TEXT NOT NULL, + build_date TEXT NOT NULL, + build_duration TEXT NOT NULL, + UNIQUE (name, version, suite, architecture, build_date))''', + 'INSERT INTO rb_schema VALUES ("2", "' + now + '")'] +} + + +def db_create_tables(): + """ + Check wheter all tables are present, and, if not, create them. + The check is done against sqlite_master, a reserved sqlite table + containing all database metadata. + """ + for table in db_schema: + query = 'SELECT name FROM sqlite_master WHERE name="{}"' + query = query.format(table['name']) + if not query_db(query): + log.warning(table['name'] + ' does not exists. Creating...') + for query in table['query']: + log.info('\t' + re.sub(' +', ' ', query.replace('\n', ' '))) + query_db(query) + + +def db_update(): + """ + Update the database schema. + It get a list of query to perform from schema_updates, and (if needed) + some python code from the above run_update_code(). + The need for an update is detect by checking the biggest value in the + rb_schema table against the biggest value in the schema_updates dictionary + """ + current = query_db('SELECT MAX(version) FROM rb_schema')[0][0] + if not current: + log.warning('This is probably a new database, there are no ' + + 'previous updates noted') + current = 0 + last = max(schema_updates.keys()) + if current == last: + return + if current > last: + print_critiacal_message('The active database schema is higher than' + + ' the last update available.\nPlease check!') + sys.exit(1) + log.info('Found schema updates.') + for update in range(current+1, last+1): + log.info('Applying database update #' + str(update) + '. Queries:') + for query in schema_updates[update]: + log.info('\t' + query) + query_db(query) + + +if __name__ == '__main__': + try: + if not query_db('SELECT * FROM rb_schema'): + db_create_tables() + except: + log.error('There is no rb_schema table in the database.') + log.error('Will run a full db_create_tables().') + db_create_tables() + db_update() -- cgit v1.2.3-70-g09d2