summaryrefslogtreecommitdiffstats
path: root/bin/reproducible_migrate_db.py
diff options
context:
space:
mode:
authorMattia Rizzolo <mattia@debian.org>2016-12-19 15:11:19 +0100
committerHolger Levsen <holger@layer-acht.org>2016-12-19 15:18:31 +0100
commitd3144f6f93e4886338f23ab49fb37fa30f163858 (patch)
tree59b948e6e61729b96cb5f4128a85e971c8166888 /bin/reproducible_migrate_db.py
parent815e79b226529bdabfeef124a06fdd3a3ec8d5e3 (diff)
downloadjenkins.debian.net-d3144f6f93e4886338f23ab49fb37fa30f163858.tar.xz
reproducibe debian: delete now unneeded reproducible_migrate_db script
Signed-off-by: Holger Levsen <holger@layer-acht.org>
Diffstat (limited to 'bin/reproducible_migrate_db.py')
-rwxr-xr-xbin/reproducible_migrate_db.py154
1 files changed, 0 insertions, 154 deletions
diff --git a/bin/reproducible_migrate_db.py b/bin/reproducible_migrate_db.py
deleted file mode 100755
index 575b04ba..00000000
--- a/bin/reproducible_migrate_db.py
+++ /dev/null
@@ -1,154 +0,0 @@
-#!/usr/bin/python3
-# -*- coding: utf-8 -*-
-#
-# Copyright © 2016 Valerie Young <spectranaut@riseup.net>
-# Licensed under GPL-2
-#
-# Depends: sqlalchemy, reproducible_common
-
-from sqlalchemy import *
-from sqlalchemy.schema import *
-import sqlalchemy.util
-from pprint import pprint
-import subprocess
-import sys
-import os
-import csv
-
-# We skip the database connection because the database
-# may not exist yet, but we would like to use the constants
-# available in reproducible_common.py
-sys.argv.append('--skip-database-connection')
-from reproducible_common import *
-
-# Database defined in reproducible_common.py
-DEST_DB_NAME = PGDATABASE
-DEST_DB_USER = os.environ['USER']
-
-# Old SQLite database
-SOURCE_DB_NAME = '/var/lib/jenkins/reproducible.db'
-
-try:
- # check is the user exists, if not created it
- log.info("Checking if postgres role exists...")
- query = "SELECT 1 FROM pg_roles WHERE rolname='%s';" % DEST_DB_USER
- command = ['sudo', '-u', 'postgres', 'psql', '-tAc', query]
- result = subprocess.check_output(command).decode("utf-8").strip()
-
- if result == '1':
- log.info("user exists.")
- else:
- log.info("Postgres role %s does not exist. Creating role."
- % DEST_DB_USER)
- check_call(['sudo', '-u', 'postgres', 'createuser', '-w', DEST_DB_USER])
-
- # check is the database exists, if not created it
- log.info("Checking if postgres database exists...")
- query = "SELECT 1 FROM pg_database WHERE datname='%s'" % DEST_DB_NAME
- command = ['sudo', '-u', 'postgres', 'psql', '-tAc', query]
- result = subprocess.check_output(command).decode("utf-8").strip()
-
- if result == '1':
- log.info("database exists.")
- else:
- log.info("Postgres database %s does not exist. Creating database."
- % DEST_DB_NAME)
- check_call(['sudo', '-u', 'postgres', 'createdb', '-O', DEST_DB_USER,
- '-w', DEST_DB_NAME])
-
-except FileNotFoundError:
- print_critical_message("Postgres is not installed. Install postgres before continuing.")
- sys.exit(1)
-
-# Run reproducible_db_maintenance. This will create the appropriate schema.
-db_maintenance = os.path.join(BIN_PATH, "reproducible_db_maintenance.py")
-check_call([db_maintenance])
-
-# Connect to both databases
-dest_engine = create_engine("postgresql:///%s" % DEST_DB_NAME)
-dest_conn = dest_engine.connect()
-source_engine = create_engine("sqlite:///%s" % SOURCE_DB_NAME)
-source_conn = source_engine.connect()
-
-# Load all table definitions for both databases. They should be identical
-# (both up to date according to reproducible_db_maintenance.py)
-source_metadata = MetaData(source_engine)
-source_metadata.reflect()
-dest_metadata = MetaData(dest_engine)
-dest_metadata.reflect()
-
-# The order in which we will copy the table
-all_tables = ['sources', 'issues', 'notes', 'removed_packages', 'results',
- 'stats_bugs', 'stats_build', 'stats_builds_age', 'stats_notes',
- 'stats_issues', 'stats_meta_pkg_state', 'stats_builds_per_day',
- 'stats_pkg_state']
-
-# Get all table definitions in source and destination. If the table doesn't
-# exist in one of these two places, an error will occur.
-dest_tables = {t: Table(t, dest_metadata, autoload=True) for t in all_tables}
-source_tables = {t: Table(t, source_metadata, autoload=True) for t in all_tables}
-
-for table in all_tables:
- log.info("Copying table: " + table)
- dest_table = dest_tables[table]
- dest_columns = dest_table.columns.keys()
- source_table = source_tables[table]
- source_columns = source_table.columns.keys()
-
- if table in ['notes', 'results']:
- sources = Table('sources', source_metadata, autoload=True)
- # only select rows with correct foreign references to the SOURCES table
- query = sql.select(source_table.c).select_from(
- source_table.join(sources))
-
- # save rows with incorrect foreign references to the SOURCES table
- ignored_query = select(source_table.c).select_from(source_table).where(
- source_table.c.package_id.notin_(select([sources.c.id])))
- ignored_results = source_conn.execute(ignored_query).fetchall()
- if len(ignored_results):
- log.info('Ignoring bad foreign keys in %s. Dumping rows to '
- 'ignored_rows_%s.csv' % (table, table))
- with open('ignored_rows_' + table + '.csv', 'w') as f:
- writer = csv.DictWriter(f, fieldnames=ignored_results[0].keys())
- writer.writeheader()
- for row in ignored_results:
- writer.writerow(dict(row))
- else:
- query = "select * from %s" % table
-
- # Perform each table copy in a single transaction
- transaction = dest_conn.begin()
- try:
- for record in source_conn.execute(query):
- data = {}
- for c in source_columns:
- col = c.lower()
- value = getattr(record, c)
- if str(dest_table.c[col].type) == 'INTEGER' and value == "":
- # there exist empty string values in the sqlite database
- # for integers. This will result in an error if we try to
- # write these values to the postgres db.
- log.info("column %s has empty string/null value" % col)
- else:
- data[col] = value
-
- try:
- dest_conn.execute(dest_table.insert(), [data])
- except:
- log.critical("Could not insert: %s" % data)
- raise
-
- # Commit the whole table at once
- transaction.commit()
- except:
- transaction.rollback()
- log.error("Transaction rolled back")
- raise
-
-# For the autoincrementing columns to work correctly, we must set the next
-# value of the sequence to be the next highest available ID.
-table_sequences = {t : t + "_id_seq" for t in ['sources', 'results']}
-
-for table in table_sequences:
- query = "select setval('%s', (select max(id)+1 from %s))"
- dest_conn.execute(query % (table_sequences[table], table))