summaryrefslogtreecommitdiffstats
path: root/bin/reproducible_build.sh
blob: ccb49cceadbfecb5ceddf9c25ba7ae76fe7e7369 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
#!/bin/bash

# Copyright 2014 Holger Levsen <holger@layer-acht.org>
# released under the GPLv=2

. /srv/jenkins/bin/common-functions.sh
common_init "$@"

# define db
PACKAGES_DB=/var/lib/jenkins/reproducible.db
INIT=/var/lib/jenkins/reproducible.init
if [ ! -f $PACKAGES_DB ] ; then
	echo "$PACKAGES_DB doesn't exist, no builds possible."
	exit 1
fi

# create dirs for results
mkdir -p /var/lib/jenkins/userContent/dbd/ /var/lib/jenkins/userContent/buildinfo/ /var/lib/jenkins/userContent/rbuild/

# this needs sid entries in sources.list:
grep deb-src /etc/apt/sources.list | grep sid
# try apt-get update twice, else fail gracefully, aka not.
sudo apt-get update || ( sleep $(( $RANDOM % 70 + 30 )) ; sudo apt-get update || true )

# update sources table in db
update_sources_table() {
	TMPFILE=$(mktemp)
	curl $MIRROR/dists/sid/main/source/Sources.xz > $TMPFILE
	CSVFILE=$(mktemp)
	(xzcat $TMPFILE | egrep "(^Package:|^Version:)" | sed -s "s#^Version: ##g; s#Package: ##g; s#\n# #g"| while read PKG ; do read VERSION ; echo "$PKG,$VERSION" ; done) > $CSVFILE
	sqlite3 -csv -init $INIT ${PACKAGES_DB} "DELETE from sources"
	echo ".import $CSVFILE sources" | sqlite3 -csv -init $INIT ${PACKAGES_DB}
	# update amount of available packages (for doing statistics later)
	P_IN_SOURCES=$(xzcat $TMPFILE | grep "^Package" | grep -v "^Package-List:" | cut -d " " -f2 | sort -u | wc -l)
	sqlite3 -init $INIT ${PACKAGES_DB} "REPLACE INTO source_stats VALUES (\"sid\", \"${P_IN_SOURCES}\")"
	rm $CSVFILE # $TMPFILE is still being used
}

set +x
if [ $1 = "unknown" ] ; then
	update_sources_table
	AMOUNT=$2
	REAL_AMOUNT=0
	GUESSES=$(echo "${AMOUNT}*3" | bc)
	PACKAGES=""
	# FIXME: blacklisted is a valid status in the db which should be used...
	CANDIDATES=$(xzcat $TMPFILE | grep "^Package" | grep -v "^Package-List:" |  cut -d " " -f2 | egrep -v "^(linux|cups|zurl|openclipart)$" | sort -R | head -$GUESSES | xargs echo)
	for PKG in $CANDIDATES ; do
		if [ $REAL_AMOUNT -eq $AMOUNT ] ; then
			continue
		fi
		RESULT=$(sqlite3 ${PACKAGES_DB} "SELECT name FROM source_packages WHERE name = \"${PKG}\"")
		if [ "$RESULT" = "" ] ; then
			PACKAGES="${PACKAGES} $PKG"
		fi
	done
elif [ $1 = "known" ] ; then
	update_sources_table
	AMOUNT=$2
	# FIXME: blacklisted is a valid status in the db which should be used...
	PACKAGES=$(sqlite3 -init $INIT ${PACKAGES_DB} "SELECT DISTINCT source_packages.name FROM source_packages,sources WHERE sources.version IN (SELECT version FROM sources WHERE name=source_packages.name ORDER by sources.version DESC LIMIT 1) AND (( source_packages.status = 'unreproducible' OR source_packages.status = 'FTBFS') AND source_packages.name = sources.name AND source_packages.version < sources.version) ORDER BY source_packages.build_date LIMIT $AMOUNT" | egrep -v "^(linux|cups|zurl|openclipart)$" | xargs -r echo)
else
	# CANDIDATES is defined in that file
	. /srv/jenkins/bin/reproducible_candidates.sh
	PACKAGES=""
	AMOUNT=$2
	REAL_AMOUNT=0
	for i in $(seq 0 ${#CANDIDATES[@]}) ; do
		if [ $REAL_AMOUNT -eq $AMOUNT ] ; then
			continue
		fi
		PKG=${CANDIDATES[$i]}
		# FIXME: blacklisted is a valid status in the db which should be used...
		RESULT=$(sqlite3 ${PACKAGES_DB} "SELECT name FROM source_packages WHERE name = \"${PKG}\"")
		if [ "$RESULT" = "" ] ; then
			PACKAGES="${PACKAGES} $PKG"
			let "REAL_AMOUNT=REAL_AMOUNT+1"
		fi
	done
fi
AMOUNT=0
for PKG in $PACKAGES ; do
	let "AMOUNT=AMOUNT+1"
done
echo "============================================================================="
echo "The following $AMOUNT source packages will be build: ${PACKAGES}"
echo "============================================================================="
echo
rm -f $TMPFILE

cleanup_all() {
	rm -r $TMPDIR
}

cleanup_userContent() {
	rm -f /var/lib/jenkins/userContent/rbuild/${SRCPACKAGE}_*.rbuild.log > /dev/null 2>&1
	rm -f /var/lib/jenkins/userContent/dbd/${SRCPACKAGE}_*.debbindiff.html > /dev/null 2>&1
	rm -f /var/lib/jenkins/userContent/buildinfo/${SRCPACKAGE}_*.buildinfo > /dev/null 2>&1
}

cleanup_prebuild() {
	rm b1 b2 -rf
	rm -f ${SRCPACKAGE}_* > /dev/null 2>&1
}

TMPDIR=$(mktemp --tmpdir=$PWD -d)
NUM_CPU=$(cat /proc/cpuinfo |grep ^processor|wc -l)
COUNT_TOTAL=0
COUNT_GOOD=0
COUNT_BAD=0
COUNT_SKIPPED=0
GOOD=""
BAD=""
SOURCELESS=""
SKIPPED=""
trap cleanup_all INT TERM EXIT
cd $TMPDIR
for SRCPACKAGE in ${PACKAGES} ; do
	set +x
	echo "============================================================================="
	echo "Trying to build ${SRCPACKAGE} reproducibly now."
	echo "============================================================================="
	set -x
	let "COUNT_TOTAL=COUNT_TOTAL+1"
	cleanup_prebuild
	set +e
	DATE=$(date +'%Y-%m-%d %H:%M')
	VERSION=$(apt-cache showsrc ${SRCPACKAGE} | grep ^Version | cut -d " " -f2 | sort -r | head -1)
	# check if we tested this version already before...
	STATUS=$(sqlite3 ${PACKAGES_DB} "SELECT status FROM source_packages WHERE name = \"${SRCPACKAGE}\" AND version = \"${VERSION}\"")
	# skip if we know this version and status = reproducible or unreproducible or FTBFS
	if [ "$STATUS" = "reproducible" ] || [ "$STATUS" = "unreproducible" ] || [ "$STATUS" = "FTBFS" ] ; then
		echo "Package ${SRCPACKAGE} (${VERSION}) with status '$STATUS' skipped, no newer version available."
		let "COUNT_SKIPPED=COUNT_SKIPPED+1"
		SKIPPED="${SRCPACKAGE} ${SKIPPED}"
		continue
	fi
	RBUILDLOG=/var/lib/jenkins/userContent/rbuild/${SRCPACKAGE}_None.rbuild.log
	# host has only sid in deb-src in sources.list
	apt-get source --download-only --only-source ${SRCPACKAGE} > ${RBUILDLOG} 2>&1
	RESULT=$?
	if [ $RESULT != 0 ] ; then
		ls -l ${SRCPACKAGE}* >> ${RBUILDLOG}
		SOURCELESS="${SOURCELESS} ${SRCPACKAGE}"
		sqlite3 -init $INIT ${PACKAGES_DB} "REPLACE INTO source_packages VALUES (\"${SRCPACKAGE}\", \"None\", \"404\", \"$DATE\")"
		set +x
		echo "Warning: ${SRCPACKAGE} is not a source package, or was removed or renamed. Please investigate."
		continue
	else
		VERSION=$(grep "^Version: " ${SRCPACKAGE}_*.dsc| grep -v "GnuPG v" | sort -r | head -1 | cut -d " " -f2-)
		# EPOCH_FREE_VERSION was too long
		EVERSION=$(echo $VERSION | cut -d ":" -f2)
		# preserve RBUILDLOG as TMPLOG, then cleanup userContent from previous builds,
		# and then access RBUILDLOG with it's correct name (=eversion)
		TMPLOG=$(mktemp)
		mv ${RBUILDLOG} ${TMPLOG}
		cleanup_userContent
		RBUILDLOG=/var/lib/jenkins/userContent/rbuild/${SRCPACKAGE}_${EVERSION}.rbuild.log
		mv ${TMPLOG} ${RBUILDLOG}
		# check whether the package is not for us...
		ARCH=$(grep "^Architecture: " ${SRCPACKAGE}_*.dsc| sort -r | head -1 | cut -d " " -f2-)
		if [[ ! "$ARCH" =~ "amd64" ]] && [[ ! "$ARCH" =~ "all" ]] && [[ ! "$ARCH" =~ "any" ]] && [[ ! "$ARCH" =~ "linux-amd64" ]]; then
			sqlite3 -init $INIT ${PACKAGES_DB} "REPLACE INTO source_packages VALUES (\"${SRCPACKAGE}\", \"${VERSION}\", \"not for us\", \"$DATE\")"
			echo "Package ${SRCPACKAGE} (${VERSION}) shall only be build on \"$ARCH\" and was thus skipped."
			let "COUNT_SKIPPED=COUNT_SKIPPED+1"
			SKIPPED="${SRCPACKAGE} ${SKIPPED}"
			continue
		fi
		sudo DEB_BUILD_OPTIONS="parallel=$NUM_CPU" pbuilder --build --debbuildopts "-b" --basetgz /var/cache/pbuilder/base-reproducible.tgz --distribution sid ${SRCPACKAGE}_*.dsc 2>&1 | tee -a ${RBUILDLOG}
		if [ -f /var/cache/pbuilder/result/${SRCPACKAGE}_${EVERSION}_amd64.changes ] ; then
			mkdir b1 b2
			dcmd cp /var/cache/pbuilder/result/${SRCPACKAGE}_${EVERSION}_amd64.changes b1
			# the .changes file might not contain the original sources archive
			# so first delete files from .dsc, then from .changes file
			sudo dcmd rm /var/cache/pbuilder/result/${SRCPACKAGE}_${EVERSION}_amd64.dsc
			sudo dcmd rm /var/cache/pbuilder/result/${SRCPACKAGE}_${EVERSION}_amd64.changes
			sudo DEB_BUILD_OPTIONS="parallel=$NUM_CPU" pbuilder --build --debbuildopts "-b" --basetgz /var/cache/pbuilder/base-reproducible.tgz --distribution sid ${SRCPACKAGE}_${EVERSION}.dsc
			dcmd cp /var/cache/pbuilder/result/${SRCPACKAGE}_${EVERSION}_amd64.changes b2
			# and again (see comment 5 lines above)
			sudo dcmd rm /var/cache/pbuilder/result/${SRCPACKAGE}_${EVERSION}_amd64.dsc
			sudo dcmd rm /var/cache/pbuilder/result/${SRCPACKAGE}_${EVERSION}_amd64.changes
			cat b1/${SRCPACKAGE}_${EVERSION}_amd64.changes | tee -a ${RBUILDLOG}
			LOGFILE=$(ls ${SRCPACKAGE}_${EVERSION}.dsc)
			LOGFILE=$(echo ${LOGFILE%.dsc}.debbindiff.html)
			BUILDINFO=${SRCPACKAGE}_${EVERSION}_amd64.buildinfo
			( timeout 15m /var/lib/jenkins/debbindiff.git/debbindiff.py --html ./${LOGFILE} b1/${SRCPACKAGE}_${EVERSION}_amd64.changes b2/${SRCPACKAGE}_${EVERSION}_amd64.changes ) 2>&1 >> ${RBUILDLOG}
			RESULT=$?
			set -e
			if [ $RESULT -eq 124 ] ; then
				echo "$(date) - debbindiff.py was killed after running into timeouot..." >> ${RBUILDLOG}
			elif [ $RESULT -eq 1 ] ; then
				echo "$(date) - debbindiff.py crashed..." >> ${RBUILDLOG}
			fi
			if [ ! -f ./${LOGFILE} ] && [ -f b1/${BUILDINFO} ] ; then
				cp b1/${BUILDINFO} /var/lib/jenkins/userContent/buildinfo/
				figlet ${SRCPACKAGE}
				echo
				echo "${SRCPACKAGE} built successfully and reproducibly."
				sqlite3 -init $INIT ${PACKAGES_DB} "REPLACE INTO source_packages VALUES (\"${SRCPACKAGE}\", \"${VERSION}\", \"reproducible\",  \"$DATE\")"
				let "COUNT_GOOD=COUNT_GOOD+1"
				GOOD="${SRCPACKAGE} ${GOOD}"
			else
				cp b1/${BUILDINFO} /var/lib/jenkins/userContent/buildinfo/ || true
				if [ -f ./${LOGFILE} ] ; then
					# FIXME: work around debbindiff not having external CSS support (#764470)
					# should really be fixed in debbindiff and just moved....
					if grep -q "Generated by debbindiff 3" ./${LOGFILE} ; then
						sed '/\<style\>/,/<\/style>/{//!d}' ./${LOGFILE} |grep -v "style>" | sed -s 's#</head>#  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n  <link href="../static/style_dbd.css" type="text/css" rel="stylesheet" />\n</head>#' > /var/lib/jenkins/userContent/dbd/${LOGFILE}
					else
						mv ./${LOGFILE} /var/lib/jenkins/userContent/dbd/
					fi
				fi
				sqlite3 -init $INIT ${PACKAGES_DB} "REPLACE INTO source_packages VALUES (\"${SRCPACKAGE}\", \"${VERSION}\", \"unreproducible\", \"$DATE\")"
				set +x
				echo -n "Warning: ${SRCPACKAGE} failed to build reproducibly."
				if [ ! -f b1/${BUILDINFO} ] ; then
					echo " .buildinfo file is missing."
				else
					echo
				fi
				let "COUNT_BAD=COUNT_BAD+1"
				BAD="${SRCPACKAGE} ${BAD}"
			fi
			set -x
			rm b1 b2 -rf
		else
			sqlite3 -init $INIT ${PACKAGES_DB} "REPLACE INTO source_packages VALUES (\"${SRCPACKAGE}\", \"${VERSION}\", \"FTBFS\", \"$DATE\")"
			set +x
			echo "Warning: ${SRCPACKAGE} failed to build from source."
		fi
		set -x
		dcmd rm ${SRCPACKAGE}_${EVERSION}.dsc
		rm -f ${SRCPACKAGE}_* > /dev/null 2>&1
	fi

	set +x
	echo "============================================================================="
	echo "$COUNT_TOTAL of $AMOUNT done. Previous package: ${SRCPACKAGE}"
	echo "============================================================================="
	set -x
done
cd ..
cleanup_all
trap - INT TERM EXIT

set +x
echo
echo
echo "$COUNT_TOTAL packages attempted to build in total."
echo "$COUNT_GOOD packages successfully built reproducibly: ${GOOD}"
echo "$COUNT_SKIPPED packages skipped (either because they were successfully built reproducibly in the past or because they are not Architecture: 'any' nor 'all' nor 'amd64'): ${SKIPPED}"
echo "$COUNT_BAD packages failed to built reproducibly: ${BAD}"
echo "The following source packages doesn't exist in sid: $SOURCELESS"