summaryrefslogtreecommitdiffstats
path: root/bin/reproducible_scheduler.sh
blob: 5cbfaad9aee51aa0e5b74b145451c2a5386ad4c0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#!/bin/bash

# Copyright 2014 Holger Levsen <holger@layer-acht.org>
# released under the GPLv=2

. /srv/jenkins/bin/common-functions.sh
common_init "$@"

set +x

#
# define db
#
PACKAGES_DB=/var/lib/jenkins/reproducible.db
INIT=/var/lib/jenkins/reproducible.init
if [ ! -f $PACKAGES_DB ] ; then
	echo "$PACKAGES_DB doesn't exist, no builds possible."
	exit 1
elif [ -f $PACKAGES_DB.lock ] ; then
	for i in $(seq 0 100) ; do
		sleep 15
		[ -f $PACKAGES_DB.lock ] || break
	done
	echo "$PACKAGES_DB.lock still exist, exiting."
	exit 1
fi

#
# functions, see below for main
#
update_apt() {
	# this needs sid entries in sources.list:
	grep deb-src /etc/apt/sources.list | grep sid
	# try apt-get update three times, else fail
	sudo apt-get update || ( sleep $(( $RANDOM % 70 + 30 )) ; sudo apt-get update ) || ( sleep $(( $RANDOM % 70 + 30 )) ; sudo apt-get update || exit 1 ) 
}

# update sources table in db
update_sources_table() {
	touch ${PACKAGES_DB}.lock
	TMPFILE=$(mktemp)
	curl $MIRROR/dists/sid/main/source/Sources.xz > $TMPFILE
	CSVFILE=$(mktemp)
	(xzcat $TMPFILE | egrep "(^Package:|^Version:)" | sed -s "s#^Version: ##g; s#Package: ##g; s#\n# #g"| while read PKG ; do read VERSION ; echo "$PKG,$VERSION" ; done) > $CSVFILE
	sqlite3 -csv -init $INIT ${PACKAGES_DB} "DELETE from sources"
	echo ".import $CSVFILE sources" | sqlite3 -csv -init $INIT ${PACKAGES_DB}
	# count unique packages for later comparison
	P_IN_TMPFILE=$(xzcat $TMPFILE | grep "^Package:" | cut -d " " -f2 | sort -u | wc -l)
	# cleanup files already
	rm $CSVFILE $TMPFILE
	# cleanup db
	echo "============================================================================="
	echo "$(date) Removing duplicate versions from sources db..."
	for PKG in $(sqlite3 ${PACKAGES_DB} 'SELECT name FROM sources GROUP BY name HAVING count(name) > 1') ; do
		BET=""
		for VERSION in $(sqlite3 ${PACKAGES_DB} "SELECT version FROM sources where name = \"$PKG\"") ; do
			if [ "$BET" = "" ] ; then
				BET=$VERSION
				continue
			elif dpkg --compare-versions "$BET" lt "$VERSION"  ; then
						BET=$VERSION
			fi
		done
		sqlite3 -init $INIT ${PACKAGES_DB} "DELETE FROM sources WHERE name = '$PKG' AND version != '$BET'"
	done
	echo "$(date) Done removing duplicate versions from sources db..."
	echo "============================================================================="
	rm ${PACKAGES_DB}.lock
	# verify duplicate entries have been removed correctly from the db
	P_IN_SOURCES=$(sqlite3 ${PACKAGES_DB} 'SELECT count(name) FROM sources')
	if [ $P_IN_TMPFILE -ne $P_IN_SOURCES ] ; then
		echo "DEBUG: P_IN_SOURCES = $P_IN_SOURCES"
		echo "DEBUG: P_IN_TMPFILE = $P_IN_TMPFILE"
		exit 1
	fi
}

do_sql_query() {
	PACKAGES=$(sqlite3 -init $INIT ${PACKAGES_DB} "$QUERY")
	if [ ! -z "$PACKAGES" ] ; then
		AMOUNT=$(echo "$PACKAGES" | wc -l)
		PACKAGES="$(echo $PACKAGES)"
	else
		AMOUNT=0
	fi
	echo "Criteria: $1"
	echo "Amount:   $AMOUNT"
	echo "Packages: $PACKAGES"
	echo "============================================================================="
}

select_unknown_packages() {
	QUERY="
		SELECT DISTINCT sources.name FROM sources
			WHERE sources.name NOT IN
			(SELECT sources.name FROM sources,sources_scheduled
				WHERE sources.name=sources_scheduled.name)
			AND sources.name NOT IN
			(SELECT sources.name FROM sources,source_packages
				WHERE sources.name=source_packages.name)
			ORDER BY random()
		LIMIT $1"
	do_sql_query "never tested before, randomly sorted"
}

select_new_versions() {
	QUERY="
		SELECT DISTINCT sources.name FROM sources,source_packages
			WHERE sources.name NOT IN
			(SELECT sources.name FROM sources,sources_scheduled
				WHERE sources.name=sources_scheduled.name)
			AND sources.name IN
			(SELECT sources.name FROM sources,source_packages
				WHERE sources.name=source_packages.name
				AND sources.version!=source_packages.version
				AND source_packages.status!='blacklisted')
			AND sources.name=source_packages.name
			ORDER BY source_packages.build_date
		LIMIT $1"
	do_sql_query "tested before, new version available, sorted by last test date"
}

select_old_versions() {
	# old versions older than two weeks only
	QUERY="
		SELECT DISTINCT sources.name FROM sources,source_packages
			WHERE sources.name NOT IN
			(SELECT sources.name FROM sources,sources_scheduled
				WHERE sources.name=sources_scheduled.name)
			AND sources.name IN
			(SELECT sources.name FROM sources,source_packages
				WHERE sources.name=source_packages.name
				AND sources.version=source_packages.version
				AND source_packages.status!='blacklisted')
			AND sources.name=source_packages.name
			AND source_packages.build_date < datetime('now', '-4 day')
			ORDER BY source_packages.build_date
		LIMIT $1"
	do_sql_query "tested at least two weeks ago, no new version available, sorted by last test date"
}


schedule_packages() {
	DATE=$(date +'%Y-%m-%d %H:%M')
	TMPFILE=$(mktemp)
	for PKG in $CANDIDATES ; do
		echo "INSERT INTO sources_scheduled VALUES ('$PKG','$DATE','');" >> $TMPFILE
	done
	cat $TMPFILE | sqlite3 -init $INIT ${PACKAGES_DB}
	rm $TMPFILE
echo "============================================================================="
echo "The following $TOTAL source packages have been scheduled: $CANDIDATES"
echo "============================================================================="
echo
}

#
# main
#
update_apt
SCHEDULED=$(sqlite3 ${PACKAGES_DB} 'SELECT count(name) FROM sources_scheduled')
if [ $SCHEDULED -gt 250 ] ; then
	echo "$SCHEDULED packages scheduled, nothing to do."
	exit 0
else
	echo "$SCHEDULED packages currently scheduled, scheduling some more..."
fi
update_sources_table

echo "Requesting 200 unknown packages..."
select_unknown_packages 200
let "TOTAL=$SCHEDULED+$AMOUNT"
echo "So in total now $TOTAL packages about to be scheduled."
CANDIDATES=$PACKAGES
MESSAGE="Scheduled $AMOUNT unknown packages"

if [ $TOTAL -le 250 ] ; then
	NEW=50
elif [ $TOTAL -le 450 ] ; then
	NEW=25
fi
echo "Requesting $NEW new packages..."
select_new_versions $NEW
let "TOTAL=$TOTAL+$AMOUNT"
echo "So in total now $TOTAL packages about to be scheduled."
CANDIDATES="$CANDIDATES $PACKAGES"
MESSAGE="$MESSAGE, $AMOUNT packages with new versions"

if [ $TOTAL -lt 250 ] ; then
	OLD=100
elif [ $TOTAL -le 50 ] ; then
	OLD=5
else
	OLD=1
fi
echo "Requesting $OLD old packages..."
select_old_versions $OLD
let "TOTAL=$TOTAL+$AMOUNT"
echo "So in total now $TOTAL packages about to be scheduled."
CANDIDATES="$CANDIDATES $PACKAGES"
MESSAGE="$MESSAGE and $AMOUNT packages with the same version again, for a total of $TOTAL scheduled packages."

# finally
schedule_packages
echo
echo "$MESSAGE"
kgb-client --conf /srv/jenkins/kgb/debian-reproducible.conf --relay-msg "$MESSAGE"
echo