1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
|
#!/bin/bash
# Copyright 2014-2015 Holger Levsen <holger@layer-acht.org>
# released under the GPLv=2
DEBUG=false
. /srv/jenkins/bin/common-functions.sh
common_init "$@"
# common code defining db access
. /srv/jenkins/bin/reproducible_common.sh
#
# functions, see below for main
#
update_apt() {
# this needs sid entries in sources.list:
grep deb-src /etc/apt/sources.list | grep sid
# try apt-get update three times, else fail
sudo apt-get update || ( sleep $(( $RANDOM % 70 + 30 )) ; sudo apt-get update ) || ( sleep $(( $RANDOM % 70 + 30 )) ; sudo apt-get update || exit 1 )
}
cleanup_lock() {
rm -f ${PACKAGES_DB}.lock
}
# update sources table in db
update_sources_table() {
trap cleanup_lock INT TERM EXIT
touch ${PACKAGES_DB}.lock
TMPFILE=$(mktemp)
curl $MIRROR/dists/sid/main/source/Sources.xz > $TMPFILE
CSVFILE=$(mktemp)
(xzcat $TMPFILE | egrep "(^Package:|^Version:)" | sed -s "s#^Version: ##g; s#Package: ##g; s#\n# #g"| while read PKG ; do read VERSION ; echo "$PKG,$VERSION" ; done) > $CSVFILE
sqlite3 -csv -init $INIT ${PACKAGES_DB} "DELETE from sources"
echo ".import $CSVFILE sources" | sqlite3 -csv -init $INIT ${PACKAGES_DB}
# count unique packages for later comparison
P_IN_TMPFILE=$(xzcat $TMPFILE | grep "^Package:" | cut -d " " -f2 | sort -u | wc -l)
# cleanup files already
rm $CSVFILE $TMPFILE
# cleanup db
echo "============================================================================="
echo "$(date) Removing duplicate versions from sources db..."
for PKG in $(sqlite3 ${PACKAGES_DB} 'SELECT name FROM sources GROUP BY name HAVING count(name) > 1') ; do
BET=""
for VERSION in $(sqlite3 ${PACKAGES_DB} "SELECT version FROM sources where name = \"$PKG\"") ; do
if [ "$BET" = "" ] ; then
BET=$VERSION
continue
elif dpkg --compare-versions "$BET" lt "$VERSION" ; then
BET=$VERSION
fi
done
sqlite3 -init $INIT ${PACKAGES_DB} "DELETE FROM sources WHERE name = '$PKG' AND version != '$BET'"
done
echo "$(date) Done removing duplicate versions from sources db..."
echo "============================================================================="
cleanup_lock
trap - INT TERM EXIT
# verify duplicate entries have been removed correctly from the db
P_IN_SOURCES=$(sqlite3 ${PACKAGES_DB} 'SELECT count(name) FROM sources')
if [ $P_IN_TMPFILE -ne $P_IN_SOURCES ] ; then
echo "DEBUG: P_IN_SOURCES = $P_IN_SOURCES"
echo "DEBUG: P_IN_TMPFILE = $P_IN_TMPFILE"
RESULT=1
else
RESULT=0
fi
}
do_sql_query() {
PACKAGES=$(sqlite3 -init $INIT ${PACKAGES_DB} "$QUERY")
if [ ! -z "$PACKAGES" ] ; then
AMOUNT=$(echo "$PACKAGES" | wc -l)
PACKAGES="$(echo $PACKAGES)"
else
AMOUNT=0
fi
echo "Criteria: $1"
echo "Amount: $AMOUNT"
echo "Packages: $PACKAGES"
echo "============================================================================="
}
select_unknown_packages() {
QUERY="
SELECT DISTINCT sources.name FROM sources
WHERE sources.name NOT IN
(SELECT sources.name FROM sources,sources_scheduled
WHERE sources.name=sources_scheduled.name)
AND sources.name NOT IN
(SELECT sources.name FROM sources,source_packages
WHERE sources.name=source_packages.name)
ORDER BY random()
LIMIT $1"
do_sql_query "not tested before, randomly sorted"
}
select_new_versions() {
QUERY="
SELECT DISTINCT sources.name FROM sources,source_packages
WHERE sources.name NOT IN
(SELECT sources.name FROM sources,sources_scheduled
WHERE sources.name=sources_scheduled.name)
AND sources.name IN
(SELECT sources.name FROM sources,source_packages
WHERE sources.name=source_packages.name
AND sources.version!=source_packages.version
AND source_packages.status!='blacklisted')
AND sources.name=source_packages.name
ORDER BY source_packages.build_date
LIMIT $1"
do_sql_query "tested before, new version available, sorted by last test date"
}
select_old_versions() {
# old versions older than two weeks only
QUERY="
SELECT DISTINCT sources.name FROM sources,source_packages
WHERE sources.name NOT IN
(SELECT sources.name FROM sources,sources_scheduled
WHERE sources.name=sources_scheduled.name)
AND sources.name IN
(SELECT sources.name FROM sources,source_packages
WHERE sources.name=source_packages.name
AND sources.version=source_packages.version
AND source_packages.status!='blacklisted')
AND sources.name=source_packages.name
AND source_packages.build_date < datetime('now', '-14 day')
ORDER BY source_packages.build_date
LIMIT $1"
do_sql_query "tested at least two weeks ago, no new version available, sorted by last test date"
}
schedule_packages() {
DATE=$(date +'%Y-%m-%d %H:%M')
TMPFILE=$(mktemp)
for PKG in $ALL_PACKAGES ; do
echo "INSERT INTO sources_scheduled VALUES ('$PKG','$DATE','');" >> $TMPFILE
done
cat $TMPFILE | sqlite3 -init $INIT ${PACKAGES_DB}
rm $TMPFILE
echo "============================================================================="
echo "The following $TOTAL source packages have been scheduled: $ALL_PACKAGES"
echo "============================================================================="
echo
}
#
# main
#
update_apt
COUNT_SCHEDULED=$(sqlite3 ${PACKAGES_DB} 'SELECT count(name) FROM sources_scheduled')
if [ $COUNT_SCHEDULED -gt 250 ] ; then
/srv/jenkins/bin/reproducible_html_indexes.py
echo "$COUNT_SCHEDULED packages scheduled, nothing to do."
exit 0
else
echo "$COUNT_SCHEDULED packages currently scheduled, scheduling some more..."
fi
RESULT=0
for i in 1 2 3 4 5 ; do
# try fives times, before failing the job
update_sources_table
if [ $RESULT -eq 0 ] ; then
break
fi
sleep 2m
done
if [ $RESULT -ne 0 ] ; then
echo "failure to update sources table"
exit 1
fi
echo "Requesting 200 unknown packages..."
select_unknown_packages 200
let "TOTAL=$COUNT_SCHEDULED+$AMOUNT"
echo "So in total now $TOTAL packages about to be scheduled."
ALL_PACKAGES="$PACKAGES"
MESSAGE="Scheduled $AMOUNT unknown packages"
if [ $TOTAL -le 250 ] ; then
NEW=50
elif [ $TOTAL -le 450 ] ; then
NEW=25
fi
echo "Requesting $NEW new versions..."
select_new_versions $NEW
let "TOTAL=$TOTAL+$AMOUNT"
echo "So in total now $TOTAL packages about to be scheduled."
ALL_PACKAGES="$ALL_PACKAGES $PACKAGES"
MESSAGE="$MESSAGE, $AMOUNT packages with new versions"
if [ $TOTAL -lt 250 ] ; then
OLD=200
elif [ $TOTAL -le 350 ] ; then
OLD=100
else
OLD=1
fi
echo "Requesting $OLD old packages..."
select_old_versions $OLD
echo -n "Found $AMOUNT old packages, "
let "TOTAL=$TOTAL+$AMOUNT"
ALL_PACKAGES="$ALL_PACKAGES $PACKAGES"
echo "So in total now $TOTAL packages about to be scheduled."
MESSAGE="$MESSAGE and $AMOUNT packages with the same version again, for a total of $TOTAL scheduled packages."
# finally
schedule_packages
/srv/jenkins/bin/reproducible_html_indexes.py
echo
echo "$MESSAGE"
kgb-client --conf /srv/jenkins/kgb/debian-reproducible.conf --relay-msg "$MESSAGE"
echo
|