#!/usr/bin/python3 # -*- coding: utf-8 -*- # # Copyright © 2016 Valerie Young # Licensed under GPL-2 # # Depends: sqlalchemy, reproducible_common from sqlalchemy import * from sqlalchemy.schema import * import sqlalchemy.util from pprint import pprint import subprocess import sys import os import csv # We skip the database connection because the database # may not exist yet, but we would like to use the constants # available in reproducible_common.py sys.argv.append('--skip-database-connection') from reproducible_common import * # Database defined in reproducible_common.py DEST_DB_NAME = PGDATABASE DEST_DB_USER = os.environ['USER'] # Old SQLite database SOURCE_DB_NAME = '/var/lib/jenkins/reproducible.db' try: # check is the user exists, if not created it log.info("Checking if postgres role exists...") query = "SELECT 1 FROM pg_roles WHERE rolname='%s';" % DEST_DB_USER command = ['sudo', '-u', 'postgres', 'psql', '-tAc', query] result = subprocess.check_output(command).decode("utf-8").strip() if result == '1': log.info("user exists.") else: log.info("Postgres role %s does not exist. Creating role." % DEST_DB_USER) check_call(['sudo', '-u', 'postgres', 'createuser', '-w', DEST_DB_USER]) # check is the database exists, if not created it log.info("Checking if postgres database exists...") query = "SELECT 1 FROM pg_database WHERE datname='%s'" % DEST_DB_NAME command = ['sudo', '-u', 'postgres', 'psql', '-tAc', query] result = subprocess.check_output(command).decode("utf-8").strip() if result == '1': log.info("database exists.") else: log.info("Postgres database %s does not exist. Creating database." % DEST_DB_NAME) check_call(['sudo', '-u', 'postgres', 'createdb', '-O', DEST_DB_USER, '-w', DEST_DB_NAME]) except FileNotFoundError: print_critical_message("Postgres is not installed. Install postgres before continuing.") sys.exit(1) # Run reproducible_db_maintenance. This will create the appropriate schema. db_maintenance = os.path.join(BIN_PATH, "reproducible_db_maintenance.py") check_call([db_maintenance]) # Connect to both databases dest_engine = create_engine("postgresql:///%s" % DEST_DB_NAME) dest_conn = dest_engine.connect() source_engine = create_engine("sqlite:///%s" % SOURCE_DB_NAME) source_conn = source_engine.connect() # Load all table definitions for both databases. They should be identical # (both up to date according to reproducible_db_maintenance.py) source_metadata = MetaData(source_engine) source_metadata.reflect() dest_metadata = MetaData(dest_engine) dest_metadata.reflect() # The order in which we will copy the table all_tables = ['sources', 'issues', 'notes', 'removed_packages', 'results', 'stats_bugs', 'stats_build', 'stats_builds_age', 'stats_notes', 'stats_issues', 'stats_meta_pkg_state', 'stats_builds_per_day', 'stats_pkg_state'] # Get all table definitions in source and destination. If the table doesn't # exist in one of these two places, an error will occur. dest_tables = {t: Table(t, dest_metadata, autoload=True) for t in all_tables} source_tables = {t: Table(t, source_metadata, autoload=True) for t in all_tables} for table in all_tables: log.info("Copying table: " + table) dest_table = dest_tables[table] dest_columns = dest_table.columns.keys() source_table = source_tables[table] source_columns = source_table.columns.keys() if table in ['notes', 'results']: sources = Table('sources', source_metadata, autoload=True) # only select rows with correct foreign references to the SOURCES table query = sql.select(source_table.c).select_from( source_table.join(sources)) # save rows with incorrect foreign references to the SOURCES table ignored_query = select(source_table.c).select_from(source_table).where( source_table.c.package_id.notin_(select([sources.c.id]))) ignored_results = source_conn.execute(ignored_query).fetchall() if len(ignored_results): log.info('Ignoring bad foreign keys in %s. Dumping rows to ' 'ignored_rows_%s.csv' % (table, table)) with open('ignored_rows_' + table + '.csv', 'w') as f: writer = csv.DictWriter(f, fieldnames=ignored_results[0].keys()) writer.writeheader() for row in ignored_results: writer.writerow(dict(row)) else: query = "select * from %s" % table # Perform each table copy in a single transaction transaction = dest_conn.begin() try: for record in source_conn.execute(query): data = {} for c in source_columns: col = c.lower() value = getattr(record, c) if str(dest_table.c[col].type) == 'INTEGER' and value == "": # there exist empty string values in the sqlite database # for integers. This will result in an error if we try to # write these values to the postgres db. log.info("column %s has empty string/null value" % col) else: data[col] = value try: dest_conn.execute(dest_table.insert(), [data]) except: log.critical("Could not insert: %s" % data) raise # Commit the whole table at once transaction.commit() except: transaction.rollback() log.error("Transaction rolled back") raise # For the autoincrementing columns to work correctly, we must set the next # value of the sequence to be the next highest available ID. table_sequences = {t : t + "_id_seq" for t in ['sources', 'results']} for table in table_sequences: query = "select setval('%s', (select max(id)+1 from %s))" dest_conn.execute(query % (table_sequences[table], table))