From 68ff1feb17c9f206f6e1081b0ab27d317741c838 Mon Sep 17 00:00:00 2001 From: Valerie R Young Date: Fri, 9 Sep 2016 15:16:38 -0400 Subject: reproducible Debian: partial conversion to sqlalchemy expression language This commit partially converts two scripts to use the sqlachemy expression language to be sqlite/postgres agnostic. Signed-off-by: Mattia Rizzolo Signed-off-by: Holger Levsen --- bin/reproducible_html_indexes.py | 305 ++++++++++++++++++++++++++++++----- bin/reproducible_html_live_status.py | 114 ++++++++++--- 2 files changed, 358 insertions(+), 61 deletions(-) diff --git a/bin/reproducible_html_indexes.py b/bin/reproducible_html_indexes.py index 3163da54..ea586780 100755 --- a/bin/reproducible_html_indexes.py +++ b/bin/reproducible_html_indexes.py @@ -11,6 +11,7 @@ # Build quite all index_* pages from reproducible_common import * +from sqlalchemy import select, and_, or_, func, bindparam, desc """ Reference doc for the folowing lists: @@ -51,35 +52,255 @@ Technically speaking, a page can be empty (we all love nonsense) but every section must have at least a `query` defining what to file in. """ -# filter_query is defined in reproducible_common.py and excludes some FTBFS issues +timespan_date_map = {} +timespan_date_map[24] = (datetime.now()-timedelta(hours=24)).strftime('%Y-%m-%d %H:%M') +timespan_date_map[48] = (datetime.now()-timedelta(hours=48)).strftime('%Y-%m-%d %H:%M') + +# sqlalchemy table definitions needed for queries +results = db_table('results') +sources = db_table('sources') +notes = db_table('notes') + +# filtered_issues is defined in reproducible_common.py and +# can be used to excludes some FTBFS issues +filter_issues_list = [] +for issue in filtered_issues: + filter_issues_list.append(notes.c.issues.contains(issue)) +if not filtered_issues: + filter_issues_list = [None] + +count_results = select( + [func.count(results.c.id)] +).select_from( + results.join(sources) +).where( + and_( + sources.c.suite == bindparam('suite'), + sources.c.architecture == bindparam('arch') + ) +) + +select_sources = select( + [sources.c.name] +).select_from( + results.join(sources) +).where( + and_( + sources.c.suite == bindparam('suite'), + sources.c.architecture == bindparam('arch') + ) +) + queries = { - 'count_total': 'SELECT COUNT(*) FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}"', - 'count_timespan': 'SELECT COUNT(*) FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND build_date > datetime("now", "-{timespan} hours")', - 'reproducible_all': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND r.status="reproducible" ORDER BY r.build_date DESC', - 'reproducible_last24h': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND r.status="reproducible" AND r.build_date > datetime("now", "-24 hours") ORDER BY r.build_date DESC', - 'reproducible_last48h': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND r.status="reproducible" AND r.build_date > datetime("now", "-48 hours") ORDER BY r.build_date DESC', - 'reproducible_all_abc': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND r.status="reproducible" ORDER BY name', - 'FTBR_all': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND status = "unreproducible" ORDER BY build_date DESC', - 'FTBR_last24h': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND status = "unreproducible" AND build_date > datetime("now", "-24 hours") ORDER BY build_date DESC', - 'FTBR_last48h': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND status = "unreproducible" AND build_date > datetime("now", "-48 hours") ORDER BY build_date DESC', - 'FTBR_all_abc': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND status = "unreproducible" ORDER BY name', - 'FTBFS_all': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND status = "FTBFS" ORDER BY build_date DESC', - 'FTBFS_last24h': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND status = "FTBFS" AND build_date > datetime("now", "-24 hours") ORDER BY build_date DESC', - 'FTBFS_last48h': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND status = "FTBFS" AND build_date > datetime("now", "-48 hours") ORDER BY build_date DESC', - 'FTBFS_all_abc': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND status = "FTBFS" ORDER BY s.name', - 'FTBFS_filtered': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND r.status = "FTBFS" AND r.package_id NOT IN (SELECT n.package_id FROM NOTES AS n WHERE ' + filter_query + ' ) ORDER BY s.name', - 'FTBFS_caused_by_us': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND r.status = "FTBFS" AND r.package_id IN (SELECT n.package_id FROM NOTES AS n WHERE ' + filter_query + ' ) ORDER BY s.name', - '404_all': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND status = "404" ORDER BY build_date DESC', - '404_all_abc': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND status = "404" ORDER BY name', - 'depwait_all': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND status = "depwait" ORDER BY build_date DESC', - 'depwait_all_abc': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND status = "depwait" ORDER BY name', - 'depwait_last24h': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND status = "depwait" AND build_date > datetime("now", "-24 hours") ORDER BY build_date DESC', - 'depwait_last48h': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND status = "depwait" AND build_date > datetime("now", "-48 hours") ORDER BY build_date DESC', - 'not_for_us_all': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND status = "not for us" ORDER BY name', - 'blacklisted_all': 'SELECT s.name FROM results AS r JOIN sources AS s ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND status = "blacklisted" ORDER BY name', - 'notes': 'SELECT s.name FROM sources AS s JOIN notes AS n ON n.package_id=s.id JOIN results AS r ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND r.status="{status}" ORDER BY r.build_date DESC', - 'no_notes': 'SELECT s.name FROM sources AS s JOIN results AS r ON r.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND r.status="{status}" AND s.id NOT IN (SELECT package_id FROM notes) ORDER BY r.build_date DESC', - 'notification': 'SELECT s.name FROM sources AS s JOIN results AS r ON s.id=r.package_id WHERE s.suite="{suite}" AND s.architecture="{arch}" AND r.status="{status}" AND s.notify_maintainer = 1', + "count_total": count_results, + "count_timespan": + count_results.where( + results.c.build_date > bindparam('timespan_date'), + ), + "reproducible_all": + select_sources.where( + results.c.status == 'reproducible', + ).order_by( + desc(results.c.build_date) + ), + "reproducible_last24h": + select_sources.where( + results.c.build_date > timespan_date_map[24] + ), + "reproducible_last48h": + select_sources.where( + results.c.build_date > timespan_date_map[48], + ), + "reproducible_all_abc": + select_sources.where( + results.c.status == 'reproducible', + ).order_by( + sources.c.name + ), + "FTBR_all": + select_sources.where( + results.c.status == 'unreproducible', + ).order_by( + desc(results.c.build_date) + ), + "FTBR_last24h": + select_sources.where( + and_( + results.c.status == 'unreproducible', + results.c.build_date > timespan_date_map[24], + ) + ).order_by( + desc(results.c.build_date) + ), + "FTBR_last48h": + select_sources.where( + and_( + results.c.status == 'unreproducible', + results.c.build_date > timespan_date_map[48], + ) + ).order_by( + desc(results.c.build_date) + ), + "FTBR_all_abc": + select_sources.where( + results.c.status == 'unreproducible', + ).order_by( + sources.c.name + ), + "FTBFS_all": + select_sources.where( + results.c.status == 'FTBFS', + ).order_by( + desc(results.c.build_date) + ), + "FTBFS_last24h": + select_sources.where( + and_( + results.c.status == 'FTBFS', + results.c.build_date > timespan_date_map[24], + ) + ).order_by( + desc(results.c.build_date) + ), + "FTBFS_last48h": + select_sources.where( + and_( + results.c.status == 'FTBFS', + results.c.build_date > timespan_date_map[48], + ) + ).order_by( + desc(results.c.build_date) + ), + "FTBFS_all_abc": + select_sources.where( + results.c.status == 'FTBFS', + ).order_by( + sources.c.name + ), + "FTBFS_filtered": + select_sources.where( + and_( + results.c.status == 'FTBFS', + sources.c.id.notin_( + select( + [notes.c.package_id] + ).select_from( + notes + ).where( + or_(*filter_issues_list) + ) + ) + ) + ).order_by( + desc(results.c.build_date) + ), + "FTBFS_caused_by_us": + select_sources.where( + and_( + results.c.status == 'FTBFS', + sources.c.id.in_( + select( + [notes.c.package_id] + ).select_from( + notes + ).where( + or_(*filter_issues_list) + ) + ) + ) + ).order_by( + desc(results.c.build_date) + ), + "404_all": + select_sources.where( + results.c.status == '404', + ).order_by( + desc(results.c.build_date) + ), + "404_all_abc": + select_sources.where( + results.c.status == '404', + ).order_by( + sources.c.name + ), + "depwait_all": + select_sources.where( + results.c.status == 'depwait', + ).order_by( + desc(results.c.build_date) + ), + "depwait_all_abc": + select_sources.where( + results.c.status == 'depwait', + ).order_by( + sources.c.name + ), + "depwait_last24h": + select_sources.where( + and_( + results.c.status == 'depwait', + results.c.build_date > timespan_date_map[24], + + ) + ).order_by( + desc(results.c.build_date) + ), + "depwait_last48h": + select_sources.where( + and_( + results.c.status == 'depwait', + results.c.build_date > timespan_date_map[48], + ) + ).order_by( + desc(results.c.build_date) + ), + "not_for_us_all": + select_sources.where( + and_( + results.c.status == 'not for us', + + ) + ).order_by( + sources.c.name + ), + "blacklisted_all": + select_sources.where( + results.c.status == 'blacklisted', + ).order_by( + sources.c.name + ), + "notes": + select( + [sources.c.name] + ).select_from( + sources.join(results).join(notes) + ).where( + and_( + results.c.status == bindparam('status'), + sources.c.suite == bindparam('suite'), + sources.c.architecture == bindparam('arch') + ) + ).order_by( + desc(results.c.build_date) + ), + "no_notes": + select_sources.where( + and_( + results.c.status == bindparam('status'), + sources.c.id.notin_(select([notes.c.package_id]).select_from(notes)) + ) + ).order_by( + desc(results.c.build_date) + ), + "notification": + select_sources.where( + and_( + results.c.status == bindparam('status'), + sources.c.notify_maintainer == 1 + ) + ).order_by( + desc(results.c.build_date) + ), } pages = { @@ -292,7 +513,7 @@ pages = { 'notes': True, 'title': 'Packages with notes', 'header': '

There are {tot} packages with notes in {suite}/{arch}.

', - 'header_query': 'SELECT count(*) FROM (SELECT * FROM sources AS s JOIN notes AS n ON n.package_id=s.id WHERE s.suite="{suite}" AND s.architecture="{arch}" GROUP BY s.name) AS tmp', + 'header_query': "SELECT count(*) FROM (SELECT s.name FROM sources AS s JOIN notes AS n ON n.package_id=s.id WHERE s.suite='{suite}' AND s.architecture='{arch}' GROUP BY s.name) AS tmp", 'body': [ { 'icon_status': 'FTBR', @@ -348,7 +569,7 @@ pages = { 'notes_hint': True, 'title': 'Packages without notes', 'header': '

There are {tot} faulty packages without notes in {suite}/{arch}.{hint}

', - 'header_query': 'SELECT COUNT(*) FROM (SELECT s.id FROM sources AS s JOIN results AS r ON r.package_id=s.id WHERE r.status IN ("unreproducible", "FTBFS", "blacklisted") AND s.id NOT IN (SELECT package_id FROM notes) AND s.suite="{suite}" AND s.architecture="{arch}")', + 'header_query': "SELECT COUNT(*) FROM (SELECT s.id FROM sources AS s JOIN results AS r ON r.package_id=s.id WHERE r.status IN ('unreproducible', 'FTBFS', 'blacklisted') AND s.id NOT IN (SELECT package_id FROM notes) AND s.suite='{suite}' AND s.architecture='{arch}') AS tmp", 'body': [ { 'icon_status': 'FTBR', @@ -379,7 +600,7 @@ pages = { 'nosuite': True, 'title': 'Packages with notification enabled', 'header': '

The following {tot} packages have notifications enabled. (This page only shows packages in {suite}/{arch} though notifications are send for these packages in unstable and experimental in all tested architectures.) On status changes (e.g. reproducible → unreproducible) the system notifies the maintainer and relevant parties via an email to $srcpackage@packages.debian.org. Notifications are collected and send once a day to avoid flooding.
Please ask us to enable notifications for your package(s) in our IRC channel #debian-reproducible or via mail - but ask your fellow team members first if they want to receive such notifications.

', - 'header_query': 'SELECT COUNT(*) FROM sources WHERE suite="{suite}" AND architecture="{arch}" AND notify_maintainer = 1', + 'header_query': "SELECT COUNT(*) FROM sources WHERE suite='{suite}' AND architecture='{arch}' AND notify_maintainer = 1", 'body': [ { 'icon_status': 'FTBR', @@ -413,7 +634,7 @@ pages = { def build_leading_text_section(section, rows, suite, arch): html = '

\n' + tab total = len(rows) - count_total = int(query_db(queries['count_total'].format(suite=suite, arch=arch))[0][0]) + count_total = int(query_db(queries['count_total'].params({'suite': suite, 'arch': arch}))[0][0]) try: percent = round(((total/count_total)*100), 1) except ZeroDivisionError: @@ -433,10 +654,13 @@ def build_leading_text_section(section, rows, suite, arch): html += '' html += '\n' + tab if section.get('text') and section.get('timespan'): - count = len(query_db(queries[section['query2']].format(suite=suite, arch=arch))) + count = len(query_db(queries[section['query2']].params( + {'suite': suite, 'arch': arch}))) percent = round(((count/count_total)*100), 1) timespan = section['timespan'] - timespan_count = int(query_db(queries['count_timespan'].format(suite=suite, arch=arch, timespan=timespan))[0][0]) + timespan_date = timespan_date_map[timespan] + timespan_count = int(query_db(queries['count_timespan'].params( + {'suite': suite, 'arch': arch, 'timespan_date': timespan_date}))[0][0]) try: timespan_percent = round(((total/timespan_count)*100), 1) except ZeroDivisionError: @@ -445,7 +669,8 @@ def build_leading_text_section(section, rows, suite, arch): timespan_percent = 0 html += section['text'].substitute(tot=total, percent=percent, - timespan_percent=timespan_percent, timespan_count=timespan_count, + timespan_percent=timespan_percent, + timespan_count=timespan_count, count_total=count_total, count=count, suite=suite, arch=arch) elif section.get('text'): @@ -463,18 +688,18 @@ def build_page_section(page, section, suite, arch): suite = defaultsuite arch = defaultarch if pages[page].get('notes') and pages[page]['notes']: - query = queries[section['query']].format( - status=section['db_status'], suite=suite, arch=arch) + query = queries[section['query']].params({ + 'status': section['db_status'], 'suite': suite, 'arch': arch}) else: - query = queries[section['query']].format(suite=suite, arch=arch) + query = queries[section['query']].params({'suite': suite, 'arch': arch}) rows = query_db(query) except: - print_critical_message('A query failed: ' + query) + print_critical_message('A query failed: %s' % query) raise html = '' footnote = True if rows else False if not rows: # there are no package in this set - log.debug('empty query: ' + query) # do not output anything. + log.debug('empty query: %s' % query) # do not output anything. return (html, footnote) html += build_leading_text_section(section, rows, suite, arch) html += '

\n' + tab + '\n' diff --git a/bin/reproducible_html_live_status.py b/bin/reproducible_html_live_status.py index 6db9aa7d..5681fe43 100755 --- a/bin/reproducible_html_live_status.py +++ b/bin/reproducible_html_live_status.py @@ -10,10 +10,17 @@ from reproducible_common import * from reproducible_html_indexes import build_leading_text_section +from sqlalchemy import select, func, cast, Integer, and_, bindparam import glob bugs = get_bugs() +# sqlalchemy table definitions needed for queries +results = db_table('results') +sources = db_table('sources') +schedule = db_table('schedule') +stats_build = db_table('stats_build') + def convert_into_status_html(status): if status != 'None': status, icon, spokenstatus = get_status_icon(status) @@ -26,15 +33,43 @@ def generate_schedule(arch): """ the schedule pages are very different than others index pages """ log.info('Building the schedule index page for ' + arch + '...') title = 'Packages currently scheduled on ' + arch + ' for testing for build reproducibility' - query = 'SELECT sch.date_scheduled, s.suite, s.architecture, s.name, ' + \ - 'r.status, r.build_duration, ' + \ - '(SELECT coalesce(AVG(h.build_duration), 0) FROM stats_build AS h WHERE h.status IN ("reproducible", "unreproducible") AND h.name=s.name AND h.suite=s.suite AND h.architecture=s.architecture) ' + \ - 'FROM schedule AS sch JOIN sources AS s ON sch.package_id=s.id LEFT JOIN results AS r ON s.id=r.package_id ' + \ - 'WHERE sch.date_build_started IS NULL AND s.architecture="{arch}" ORDER BY sch.date_scheduled' - # 'AND h.name=s.name AND h.suite=s.suite AND h.architecture=s.architecture' in this query and the query below is needed due to not using package_id in the stats_build table, which should be fixed... + + # 'AND h.name=s.name AND h.suite=s.suite AND h.architecture=s.architecture' + # in this query and the query below is needed due to not using package_id + # in the stats_build table, which should be fixed... + averagesql = select([ + func.coalesce(func.avg(cast(stats_build.c.build_duration, Integer)), 0) + ]).where( + and_( + stats_build.c.status.in_(('reproducible', 'unreproducible')), + stats_build.c.name == sources.c.name, + stats_build.c.suite == sources.c.suite, + stats_build.c.architecture == sources.c.architecture, + ) + ).as_scalar() + + query = select([ + schedule.c.date_scheduled, + sources.c.suite, + sources.c.architecture, + sources.c.name, + results.c.status, + results.c.build_duration, + averagesql + ]).select_from( + sources.join(schedule).join(results, isouter=True) + ).where( + and_( + schedule.c.date_build_started == None, + sources.c.architecture == bindparam('arch'), + ) + ).order_by( + schedule.c.date_scheduled + ) + text = Template('$tot packages are currently scheduled for testing on $arch:') html = '' - rows = query_db(query.format(arch=arch)) + rows = query_db(query.params({'arch': arch})) html += build_leading_text_section({'text': text}, rows, defaultsuite, arch) html += generate_live_status_table(arch) html += '

\n' + tab @@ -61,15 +96,40 @@ def generate_schedule(arch): def generate_live_status_table(arch): - query = 'SELECT s.id, s.suite, s.architecture, s.name, s.version, ' + \ - 'p.date_build_started, r.status, r.build_duration, ' + \ - '(SELECT coalesce(AVG(h.build_duration), 0) FROM stats_build AS h WHERE h.status IN ("reproducible", "unreproducible") AND h.name=s.name AND h.suite=s.suite AND h.architecture=s.architecture) ' + \ - ', p.job ' + \ - 'FROM sources AS s JOIN schedule AS p ON p.package_id=s.id LEFT JOIN results AS r ON s.id=r.package_id ' + \ - 'WHERE p.date_build_started IS NOT NULL AND s.architecture="{arch}" ' + \ - 'ORDER BY p.date_build_started DESC' + averagesql = select([ + func.coalesce(func.avg(cast(stats_build.c.build_duration, Integer)), 0) + ]).where( + and_( + stats_build.c.status.in_(('reproducible', 'unreproducible')), + stats_build.c.name == sources.c.name, + stats_build.c.suite == sources.c.suite, + stats_build.c.architecture == sources.c.architecture, + ) + ).as_scalar() + + query = select([ + sources.c.id, + sources.c.suite, + sources.c.architecture, + sources.c.name, + sources.c.version, + schedule.c.date_build_started, + results.c.status, + results.c.build_duration, + averagesql, + schedule.c.job, + ]).select_from( + sources.join(schedule).join(results, isouter=True) + ).where( + and_( + schedule.c.date_build_started != None, + sources.c.architecture == bindparam('arch'), + ) + ).order_by( + schedule.c.date_scheduled + ) html = '' - rows = query_db(query.format(arch=arch)) + rows = query_db(query.params({'arch': arch})) html += '

\n' + tab html += '' html += '' @@ -105,13 +165,25 @@ def generate_oldies(arch): title = 'Oldest results on ' + arch html = '' for suite in SUITES: - query = 'SELECT s.suite, s.architecture, s.name, r.status, r.build_date ' + \ - 'FROM results AS r JOIN sources AS s ON r.package_id=s.id ' + \ - 'WHERE s.suite="{suite}" AND s.architecture="{arch}" ' + \ - 'AND r.status != "blacklisted" ' + \ - 'ORDER BY r.build_date LIMIT 15' + query = select([ + sources.c.suite, + sources.c.architecture, + sources.c.name, + results.c.status, + results.c.build_date + ]).select_from( + results.join(sources) + ).where( + and_( + sources.c.suite == bindparam('suite'), + sources.c.architecture == bindparam('arch'), + results.c.status != 'blacklisted' + ) + ).order_by( + results.c.build_date + ).limit(15) text = Template('Oldest results on $suite/$arch:') - rows = query_db(query.format(arch=arch,suite=suite)) + rows = query_db(query.params({'arch': arch, 'suite': suite})) html += build_leading_text_section({'text': text}, rows, suite, arch) html += '

#src pkg idsuitearchsource packageversion
\n' + tab html += '' -- cgit v1.2.3-70-g09d2
#suitearch