summaryrefslogtreecommitdiffstats
path: root/bin/reproducible_create_meta_pkg_sets.sh
blob: e8494912876240f9ccfe7efabf5b1ccc21948208 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
#!/bin/bash

# Copyright 2015 Holger Levsen <holger@layer-acht.org>
# released under the GPLv=2

DEBUG=true
. /srv/jenkins/bin/common-functions.sh
common_init "$@"

# common code defining db access
. /srv/jenkins/bin/reproducible_common.sh

ARCH=amd64

# helper functions
convert_into_source_packages_only() {
	rm -f ${TMPFILE2}
	ALL_PKGS=$(cat $TMPFILE | cut -d ":" -f1 | sed "s#([^()]*)##g ; s#\[[^][]*\]##g ; s#,##g ; s# #\n#g"  |sort -u )
	for PKG in $ALL_PKGS ; do
		SRC=""
		if [ ! -z "$PKG" ] ; then
			SRC=$(grep-dctrl -X -n -FPackage -sSource $PKG $PACKAGES || true )
			[ ! -z "$SRC" ] || SRC=$(grep-dctrl -X -n -FPackage -sPackage $PKG $PACKAGES || true)
		fi
		[ ! -z "$SRC" ] || SRC=$(echo $PKG )
		echo $SRC >> ${TMPFILE2}
	done
	# grep-dctrl output might include versions (space seperated) and archs (colon seperated)
	# and duplicates
	cut -d " " -f1 ${TMPFILE2} | cut -d ":" -f1 | sort -u > $TMPFILE
	rm ${TMPFILE2}
}
convert_from_deb822_into_source_packages_only() {
	# given a Packages file in deb822 format on standard input, the
	# following perl "oneliner" outputs the associated (unversioned)
	# source package names, one per line
	perl -e 'use Dpkg::Control;while(1){$c=Dpkg::Control->new();' \
		-e 'last if not $c->parse(STDIN);$p=$c->{"Package"};' \
		-e '$s=$c->{"Source"};if (not defined $s){print "$p\n"}' \
		-e 'else{$s=~s/\s*([\S]+)\s+.*/\1/;print "$s\n"}}' \
		> ${TMPFILE2} < $TMPFILE
	sort -u ${TMPFILE2} > $TMPFILE
}
update_if_similar() {
	# this is mostly done to not accidently overwrite the lists
	# with garbage, eg. when external services are down
	if [ -s $TMPFILE ] ; then
		TARGET=$TPATH/$1
		if [ -f $TARGET ] ; then
			LENGTH=$(cat $TARGET | wc -w)
			NEWLEN=$(cat $TMPFILE | wc -w)
			PERCENT=$(echo "$LENGTH*100/$NEWLEN"|bc)
			if [ $PERCENT -gt 107 ] || [ $PERCENT -lt 93 ] ; then
				mv $TMPFILE $TARGET.new
				echo
				echo diff $TARGET $TARGET.new
				diff $TARGET $TARGET.new
				echo
				echo "Too much difference, aborting. Please investigate and update manually."
				exit 1
			fi
		fi
		mv $TMPFILE $TARGET
		echo "$(date) - $TARGET updated."
	else
		echo "$(date) - $TARGET not updated, $TMPFILE is empty."
	fi
}

update_pkg_sets() {
	# the essential package set
	if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[1]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[3]}.pkgset ] ; then
		chdist --data-dir=$CHPATH grep-dctrl-packages $DISTNAME -X -FEssential yes > $TMPFILE
		convert_from_deb822_into_source_packages_only
		update_if_similar ${META_PKGSET[1]}.pkgset
	fi

	# the required package set
	if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[2]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[2]}.pkgset ] ; then
		chdist --data-dir=$CHPATH grep-dctrl-packages $DISTNAME -X -FPriority required > $TMPFILE
		convert_from_deb822_into_source_packages_only
		update_if_similar ${META_PKGSET[2]}.pkgset
	fi

	# build-essential
	if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[3]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[3]}.pkgset ] ; then
		chdist --data-dir=$CHPATH grep-dctrl-packages $DISTNAME -X \( -FBuild-Essential yes --or -FPackage build-essential \) > ${TMPFILE2}
		# here we want the installable set:
		schroot --directory /tmp -c source:jenkins-dpkg-jessie -- dose-deb-coinstall --deb-native-arch=$ARCH --bg=$PACKAGES --fg=${TMPFILE2} > $TMPFILE
		convert_from_deb822_into_source_packages_only
		update_if_similar ${META_PKGSET[3]}.pkgset
	fi

	# build-essential-depends
	#
	# This set is created using the following procedure:
	#
	#  1. take the binary package build-essential and put it into set S
	#  2. go over every package in S and
	#      2.1. if it is a binary package
	#          2.1.1 add all its Depends and Pre-Depends to S
	#          2.1.2 add the source package it builds from to S
	#      2.2. if it is a source package add all its Build-Depends,
	#           Build-Depends-Indep and Build-Depends-Arch to S
	#  3. if step 2 added new packages, repeat step 2, otherwise exit
	#
	# This set is important because a package can only be trusted if
	# also all its dependencies, all its build dependencies and
	# recursively their own dependencies and build dependencies can be
	# trusted.
	# So making this set reproducible is required to make anything
	# in the essential or build-essential set trusted.
	if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[4]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[4]}.pkgset ] ; then
		curl http://bootstrap.debian.net/importance_metric_all.txt > $TMPFILE2
		# retrieve the highest number in the third column (packages affect)
		HIGHEST=`sort -n -k 3 $TMPFILE2 | tail -1 | cut -f 3`
		# now get all lines where the third column is equal to this value
		awk '$3 == "'$HIGHEST'" { print $1 }' $TMPFILE2 | cut -d ':' -f 2 > $TMPFILE
		update_if_similar ${META_PKGSET[4]}.pkgset
	fi

	# popcon top 1337 installed sources
	if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[5]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[5]}.pkgset ] ; then
		SQL_QUERY="SELECT popcon_src.source FROM popcon_src ORDER BY popcon_src.insts DESC LIMIT 1337;"
		PGPASSWORD=public-udd-mirror \
			psql -U public-udd-mirror \
			-h public-udd-mirror.xvm.mit.edu -p 5432 \
			-t \
			udd -c"${SQL_QUERY}" > $TMPFILE
		update_if_similar ${META_PKGSET[5]}.pkgset
	fi

	# installed on one or more .debian.org machines
	if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[6]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[6]}.pkgset ] ; then
		# one day we will get a proper data provider from DSA...
		# (so far it was a manual "dpkg --get-selections" on all machines
		# converted into a list of source packages...)
		cat /srv/jenkins/bin/reproducible_installed_on_debian.org > $TMPFILE
		update_if_similar ${META_PKGSET[6]}.pkgset
	fi

	# packages which had a DSA
	if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[7]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[7]}.pkgset ] ; then
		svn export svn://svn.debian.org/svn/secure-testing/data/DSA/list ${TMPFILE2}
		grep "^\[" ${TMPFILE2} | grep "DSA-" | cut -d " " -f5|sort -u > $TMPFILE
		convert_into_source_packages_only
		update_if_similar ${META_PKGSET[7]}.pkgset
	fi

	# gnome and everything it depends on
	if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[8]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[8]}.pkgset ] ; then
		chdist --data-dir=$CHPATH grep-dctrl-packages $DISTNAME -X \( -FPriority required --or -FPackage gnome \) > ${TMPFILE2}
		schroot --directory /tmp -c source:jenkins-dpkg-jessie -- dose-deb-coinstall --deb-native-arch=$ARCH --bg=$PACKAGES --fg=${TMPFILE2} > $TMPFILE
		convert_from_deb822_into_source_packages_only
		update_if_similar ${META_PKGSET[8]}.pkgset
	fi

	# The build-depends of X tasks can be solved once dose-ceve is able to read
	# Debian source packages (possible in dose3 git but needs a new dose3 release
	# and upload to unstable)
	#
	# Ignoring parsing issues, the current method is unable to resolve virtual
	# build dependencies
	#
	# The current method also ignores Build-Depends-Indep and Build-Depends-Arch

	# all build depends of gnome
	rm -f $TMPFILE
	if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[9]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[9]}.pkgset ] ; then
		for PKG in $(cat $TPATH/${META_PKGSET[8]}.pkgset) ; do
			grep-dctrl -sBuild-Depends -n -X -FPackage $PKG $SOURCES | sed "s#([^()]*)##g ; s#\[[^][]*\]##g ; s#,##g" >> $TMPFILE
		done
		convert_into_source_packages_only
		update_if_similar ${META_PKGSET[9]}.pkgset
	fi

	# tails
	if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[10]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[10]}.pkgset ] ; then
		curl http://nightly.tails.boum.org/build_Tails_ISO_feature-jessie/latest.iso.binpkgs > $TMPFILE
		curl http://nightly.tails.boum.org/build_Tails_ISO_feature-jessie/latest.iso.srcpkgs >> $TMPFILE
		convert_into_source_packages_only
		update_if_similar ${META_PKGSET[10]}.pkgset
	fi

	# all build depends of tails
	rm -f $TMPFILE
	if [ -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[11]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[11]}.pkgset ] ; then
		for PKG in $(cat $TPATH/${META_PKGSET[10]}.pkgset) ; do
			grep-dctrl -sBuild-Depends -n -X -FPackage $PKG $SOURCES | sed "s#([^()]*)##g ; s#\[[^][]*\]##g ; s#,##g" >> $TMPFILE
		done
		convert_into_source_packages_only
		update_if_similar ${META_PKGSET[11]}.pkgset
	fi

	# grml
	if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[12]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[12]}.pkgset ] ; then
		curl http://grml.org/files/grml64-full_latest/dpkg.selections | cut -f1 > $TMPFILE
		if ! grep '<title>404 Not Found</title>' $TMPFILE ; then
			convert_into_source_packages_only
			update_if_similar ${META_PKGSET[12]}.pkgset
		else
			echo "Warning: could not download grml's latest dpkg.selections file, skipping pkg set..."
		fi
	fi

	# all build depends of grml
	rm -f $TMPFILE
	if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[13]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[13]}.pkgset ] ; then
		for PKG in $(cat $TPATH/${META_PKGSET[12]}.pkgset) ; do
			grep-dctrl -sBuild-Depends -n -X -FPackage $PKG $SOURCES | sed "s#([^()]*)##g ; s#\[[^][]*\]##g ; s#,##g" >> $TMPFILE
		done
		convert_into_source_packages_only
		update_if_similar ${META_PKGSET[13]}.pkgset
	fi

	# pkg-perl-maintainers
	if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[14]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[14]}.pkgset ] ; then
		grep-dctrl -sPackage -n -FMaintainer pkg-perl-maintainers@lists.alioth.debian.org $SOURCES > $TMPFILE
		update_if_similar ${META_PKGSET[14]}.pkgset
	fi

}

TMPFILE=$(mktemp)
TMPFILE2=$(mktemp)
for SUITE in $SUITES ; do
	if [ "$SUITE" = "experimental" ] ; then
		# no pkg sets in experimental
		continue
	fi
	echo
	echo "$(date) - Creating meta package sets for $SUITE now."
	echo

	DISTNAME="$SUITE-$ARCH"
	TPATH=/srv/reproducible-results/meta_pkgsets-$SUITE
	CHPATH=/srv/reproducible-results/chdist-$SUITE
	mkdir -p $TPATH $CHPATH

	# delete possibly existing dist
	cd $CHPATH
	rm -rf $DISTNAME
	cd -

	# the "[arch=$ARCH]" is a workaround until #774685 is fixed
	chdist --data-dir=$CHPATH --arch=$ARCH create $DISTNAME "[arch=$ARCH]" $MIRROR $SUITE main
	chdist --data-dir=$CHPATH --arch=$ARCH apt-get $DISTNAME update

	PACKAGES=$(ls $CHPATH/$DISTNAME/var/lib/apt/lists/*_dists_${SUITE}_main_binary-${ARCH}_Packages)
	SOURCES=$(ls $CHPATH/$DISTNAME/var/lib/apt/lists/*_dists_${SUITE}_main_source_Sources)

	# finally
	update_pkg_sets
	echo "$(date) - All meta package sets for $SUITE created successfully."
done

rm -f $TMPFILE ${TMPFILE2}
echo