summaryrefslogtreecommitdiffstats
path: root/bin/reproducible_scheduler.sh
blob: 9688bd42c6b08be6cb0ddef4dbcd387a8c0b55b8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
#!/bin/bash

# Copyright 2014-2015 Holger Levsen <holger@layer-acht.org>
# released under the GPLv=2

DEBUG=false
. /srv/jenkins/bin/common-functions.sh
common_init "$@"

# common code defining db access
. /srv/jenkins/bin/reproducible_common.sh

#
# functions, see below for main
#
update_apt() {
	# this needs sid entries in sources.list:
	grep deb-src /etc/apt/sources.list | grep sid
	# try apt-get update three times, else fail
	sudo apt-get update || ( sleep $(( $RANDOM % 70 + 30 )) ; sudo apt-get update ) || ( sleep $(( $RANDOM % 70 + 30 )) ; sudo apt-get update || exit 1 ) 
}

cleanup_lock() {
	rm -f ${PACKAGES_DB}.lock
}

# update sources table in db
update_sources_table() {
	trap cleanup_lock INT TERM EXIT
	touch ${PACKAGES_DB}.lock
	TMPFILE=$(mktemp)
	curl $MIRROR/dists/sid/main/source/Sources.xz > $TMPFILE
	CSVFILE=$(mktemp)
	(xzcat $TMPFILE | egrep "(^Package:|^Version:)" | sed -s "s#^Version: ##g; s#Package: ##g; s#\n# #g"| while read PKG ; do read VERSION ; echo "$PKG,$VERSION" ; done) > $CSVFILE
	sqlite3 -csv -init $INIT ${PACKAGES_DB} "DELETE from sources"
	echo ".import $CSVFILE sources" | sqlite3 -csv -init $INIT ${PACKAGES_DB}
	# count unique packages for later comparison
	P_IN_TMPFILE=$(xzcat $TMPFILE | grep "^Package:" | cut -d " " -f2 | sort -u | wc -l)
	# cleanup files already
	rm $CSVFILE $TMPFILE
	# cleanup db
	echo "============================================================================="
	echo "$(date) Removing duplicate versions from sources db..."
	for PKG in $(sqlite3 ${PACKAGES_DB} 'SELECT name FROM sources GROUP BY name HAVING count(name) > 1') ; do
		BET=""
		for VERSION in $(sqlite3 ${PACKAGES_DB} "SELECT version FROM sources where name = \"$PKG\"") ; do
			if [ "$BET" = "" ] ; then
				BET=$VERSION
				continue
			elif dpkg --compare-versions "$BET" lt "$VERSION"  ; then
						BET=$VERSION
			fi
		done
		sqlite3 -init $INIT ${PACKAGES_DB} "DELETE FROM sources WHERE name = '$PKG' AND version != '$BET'"
	done
	echo "$(date) Done removing duplicate versions from sources db..."
	echo "============================================================================="
	cleanup_lock
	trap - INT TERM EXIT
	# verify duplicate entries have been removed correctly from the db
	P_IN_SOURCES=$(sqlite3 ${PACKAGES_DB} 'SELECT count(name) FROM sources')
	if [ $P_IN_TMPFILE -ne $P_IN_SOURCES ] ; then
		echo "DEBUG: P_IN_SOURCES = $P_IN_SOURCES"
		echo "DEBUG: P_IN_TMPFILE = $P_IN_TMPFILE"
		RESULT=1
	else
		RESULT=0
	fi
}

do_sql_query() {
	PACKAGES=$(sqlite3 -init $INIT ${PACKAGES_DB} "$QUERY")
	if [ ! -z "$PACKAGES" ] ; then
		AMOUNT=$(echo "$PACKAGES" | wc -l)
		PACKAGES="$(echo $PACKAGES)"
	else
		AMOUNT=0
	fi
	echo "Criteria: $1"
	echo "Amount:   $AMOUNT"
	echo "Packages: $PACKAGES"
	echo "============================================================================="
}

select_unknown_packages() {
	QUERY="
		SELECT DISTINCT sources.name FROM sources
			WHERE sources.name NOT IN
			(SELECT sources.name FROM sources,sources_scheduled
				WHERE sources.name=sources_scheduled.name)
			AND sources.name NOT IN
			(SELECT sources.name FROM sources,source_packages
				WHERE sources.name=source_packages.name)
			ORDER BY random()
		LIMIT $1"
	do_sql_query "not tested before, randomly sorted"
}

select_new_versions() {
	QUERY="
		SELECT DISTINCT sources.name FROM sources,source_packages
			WHERE sources.name NOT IN
			(SELECT sources.name FROM sources,sources_scheduled
				WHERE sources.name=sources_scheduled.name)
			AND sources.name IN
			(SELECT sources.name FROM sources,source_packages
				WHERE sources.name=source_packages.name
				AND sources.version!=source_packages.version
				AND source_packages.status!='blacklisted')
			AND sources.name=source_packages.name
			ORDER BY source_packages.build_date
		LIMIT $1"
	do_sql_query "tested before, new version available, sorted by last test date"
}

select_old_versions() {
	# old versions older than two weeks only
	QUERY="
		SELECT DISTINCT sources.name FROM sources,source_packages
			WHERE sources.name NOT IN
			(SELECT sources.name FROM sources,sources_scheduled
				WHERE sources.name=sources_scheduled.name)
			AND sources.name IN
			(SELECT sources.name FROM sources,source_packages
				WHERE sources.name=source_packages.name
				AND sources.version=source_packages.version
				AND source_packages.status!='blacklisted')
			AND sources.name=source_packages.name
			AND source_packages.build_date < datetime('now', '-14 day')
			ORDER BY source_packages.build_date
		LIMIT $1"
	do_sql_query "tested at least two weeks ago, no new version available, sorted by last test date"
}

select_old_failures() {
	# old failures older than two weeks only
	QUERY="
		SELECT DISTINCT sources.name FROM sources,source_packages
			WHERE sources.name NOT IN
			(SELECT sources.name FROM sources,sources_scheduled
				WHERE sources.name=sources_scheduled.name)
			AND sources.name IN
			(SELECT sources.name FROM sources,source_packages
				WHERE sources.name=source_packages.name
				AND sources.version=source_packages.version
				AND source_packages.status!='blacklisted'
				AND source_packages.status!='reproducible')
			AND sources.name=source_packages.name
			AND source_packages.build_date < datetime('now', '-2 day')
			ORDER BY source_packages.build_date
		LIMIT $1"
	do_sql_query "tested at least two days ago, not reproducible, no new version available, sorted by last test date"
}

schedule_packages() {
	DATE=$(date +'%Y-%m-%d %H:%M')
	TMPFILE=$(mktemp)
	for PKG in $ALL_PACKAGES ; do
		echo "INSERT INTO sources_scheduled VALUES ('$PKG','$DATE','');" >> $TMPFILE
	done
	cat $TMPFILE | sqlite3 -init $INIT ${PACKAGES_DB}
	rm $TMPFILE
	echo "============================================================================="
	echo "The following $TOTAL source packages have been scheduled: $ALL_PACKAGES"
	echo "============================================================================="
	echo
}

deselect_old_with_buildinfo() {
	PACKAGES=""
	for PKG in $@ ; do
		if [ ! -f /var/lib/jenkins/userContent/buildinfo/${PKG}_.buildinfo ] ; then
			PACKAGES="$PACKAGES $PKG"
		else
			let "AMOUNT=$AMOUNT-1" || AMOUNT=0
		fi
	done
}

#
# main
#
update_apt
init_html
COUNT_SCHEDULED=$(sqlite3 ${PACKAGES_DB} 'SELECT count(name) FROM sources_scheduled')
if [ $COUNT_SCHEDULED -gt 250 ] ; then
	update_html_schedule
	echo "$COUNT_SCHEDULED packages scheduled, nothing to do."
	exit 0
else
	echo "$COUNT_SCHEDULED packages currently scheduled, scheduling some more..."
fi

RESULT=0
for i in 1 2 3 4 5 ; do
	# try fives times, before failing the job
	update_sources_table
	if [ $RESULT -eq 0 ] ; then
		break
	fi
	sleep 2m
done
if [ $RESULT -ne 0 ] ; then
	echo "failure to update sources table"
	exit 1
fi

echo "Requesting 200 unknown packages..."
select_unknown_packages 200
let "TOTAL=$COUNT_SCHEDULED+$AMOUNT"
echo "So in total now $TOTAL packages about to be scheduled."
ALL_PACKAGES="$PACKAGES"
MESSAGE="Scheduled $AMOUNT unknown packages"

if [ $TOTAL -le 250 ] ; then
	NEW=50
elif [ $TOTAL -le 450 ] ; then
	NEW=25
fi
echo "Requesting $NEW new versions..."
select_new_versions $NEW
let "TOTAL=$TOTAL+$AMOUNT"
echo "So in total now $TOTAL packages about to be scheduled."
ALL_PACKAGES="$ALL_PACKAGES $PACKAGES"
MESSAGE="$MESSAGE, $AMOUNT packages with new versions"

if [ $TOTAL -lt 150 ] ; then
	OLD=500 # used not to be needed...
elif [ $TOTAL -lt 250 ] ; then
	OLD=400	# used to be 200
elif [ $TOTAL -le 350 ] ; then
	OLD=200 # used to be 100
else
	OLD=1
fi
# pointless atm
#echo "Requesting $OLD old packages..."
#select_old_versions $OLD
#echo -n "Found $AMOUNT old packages, "
#deselect_old_with_buildinfo $PACKAGES
#echo "kept $AMOUNT old packages without .buildinfo files."
#let "TOTAL=$TOTAL+$AMOUNT"
#ALL_PACKAGES="$ALL_PACKAGES $PACKAGES"

echo "Requesting $OLD old+failed packages..."
select_old_failures $OLD
echo -n "Found $AMOUNT old+failed packages, "
set -x
deselect_old_with_buildinfo $PACKAGES
echo "kept $AMOUNT old packages without .buildinfo files."
let "TOTAL=$TOTAL+$AMOUNT"
ALL_PACKAGES="$ALL_PACKAGES $PACKAGES"

echo "So in total now $TOTAL packages about to be scheduled."
MESSAGE="$MESSAGE and $AMOUNT packages with the same version (but without .buildinfo files) again, for a total of $TOTAL scheduled packages."

# finally
schedule_packages
update_html_schedule
echo
echo "$MESSAGE"
kgb-client --conf /srv/jenkins/kgb/debian-reproducible.conf --relay-msg "$MESSAGE"
echo