summaryrefslogtreecommitdiffstats
path: root/bin/reproducible_create_meta_pkg_sets.sh
blob: 5cc66e8b0b74fec41bc37763138a34c39f902827 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
#!/bin/bash

# Copyright 2015 Holger Levsen <holger@layer-acht.org>
# released under the GPLv=2

DEBUG=true
. /srv/jenkins/bin/common-functions.sh
common_init "$@"

# common code defining db access
. /srv/jenkins/bin/reproducible_common.sh

TPATH=/srv/reproducible-results/meta_pkgsets
CHPATH=/srv/reproducible-results/chdist
mkdir -p $TPATH $CHPATH

ARCH=amd64
SUITE=sid
DISTNAME="$SUITE-$ARCH"


# delete possibly existing dist
cd $CHPATH
rm -rf $DISTNAME
cd -

# the "[arch=$ARCH]" is a workaround until #774685 is fixed
chdist --data-dir=$CHPATH --arch=$ARCH create $DISTNAME "[arch=$ARCH]" $MIRROR $SUITE main
chdist --data-dir=$CHPATH --arch=$ARCH apt-get $DISTNAME update

PACKAGES=$(ls $CHPATH/$DISTNAME/var/lib/apt/lists/*_dists_${SUITE}_main_binary-${ARCH}_Packages)
SOURCES=$(ls $CHPATH/$DISTNAME/var/lib/apt/lists/*_dists_${SUITE}_main_source_Sources)
TMPFILE=$(mktemp)
TMPFILE2=$(mktemp)

# helper functions
convert_into_source_packages_only() {
	rm -f ${TMPFILE2}
	ALL_PKGS=$(cat $TMPFILE | cut -d ":" -f1 | sed "s#([^()]*)##g ; s#\[[^][]*\]##g ; s#,##g ; s# #\n#g"  |sort -u )
	for PKG in $ALL_PKGS ; do
		SRC=""
		if [ ! -z "$PKG" ] ; then
			SRC=$(grep-dctrl -X -n -FPackage -sSource $PKG $PACKAGES || true )
			[ ! -z "$SRC" ] || SRC=$(grep-dctrl -X -n -FPackage -sPackage $PKG $PACKAGES || true)
		fi
		[ ! -z "$SRC" ] || SRC=$(echo $PKG )
		echo $SRC >> ${TMPFILE2}
	done
	# grep-dctrl output might include versions (space seperated) and archs (colon seperated)
	# and duplicates
	cut -d " " -f1 ${TMPFILE2} | cut -d ":" -f1 | sort -u > $TMPFILE
	rm ${TMPFILE2}
}

convert_from_deb822_into_source_packages_only() {
	# given a Packages file in deb822 format on standard input, the
	# following perl "oneliner" outputs the associated (unversioned)
	# source package names, one per line
	perl -e 'use Dpkg::Control;while(1){$c=Dpkg::Control->new();' \
		-e 'last if not $c->parse(STDIN);$p=$c->{"Package"};' \
		-e '$s=$c->{"Source"};if (not defined $s){print "$p\n"}' \
		-e 'else{$s=~s/\s*([\S]+)\s+.*/\1/;print "$s\n"}}' \
		> ${TMPFILE2} < $TMPFILE
	sort -u ${TMPFILE2} > $TMPFILE
}
update_if_similar() {
	# this is mostly done to not accidently overwrite the lists
	# with garbage, eg. when external services are down
	if [ -s $TMPFILE ] ; then
		TARGET=$TPATH/$1
		if [ -f $TARGET ] ; then
			LENGTH=$(cat $TARGET | wc -w)
			NEWLEN=$(cat $TMPFILE | wc -w)
			PERCENT=$(echo "$LENGTH*100/$NEWLEN"|bc)
			if [ $PERCENT -gt 107 ] || [ $PERCENT -lt 93 ] ; then
				mv $TMPFILE $TARGET.new
				echo
				echo diff $TARGET $TARGET.new
				diff $TARGET $TARGET.new
				echo
				echo "Too much difference, aborting. Please investigate and update manually."
				exit 1
			fi
		fi
		mv $TMPFILE $TARGET
		echo "$(date) - $TARGET updated."
	else
		echo "$(date) - $TARGET not updated, $TMPFILE is empty."
	fi
}


#
# main
#

# the essential package set
if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[1]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[3]}.pkgset ] ; then
	chdist --data-dir=$CHPATH grep-dctrl-packages $DISTNAME -X -FEssential yes > $TMPFILE
	convert_from_deb822_into_source_packages_only
	update_if_similar ${META_PKGSET[1]}.pkgset
fi

# the required package set
if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[2]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[2]}.pkgset ] ; then
	chdist --data-dir=$CHPATH grep-dctrl-packages $DISTNAME -X -FPriority required > $TMPFILE
	convert_from_deb822_into_source_packages_only
	update_if_similar ${META_PKGSET[2]}.pkgset
fi

# build-essential
if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[3]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[3]}.pkgset ] ; then
	chdist --data-dir=$CHPATH grep-dctrl-packages $DISTNAME -X \( -FBuild-Essential yes --or -FPackage build-essential \) > ${TMPFILE2}
	# here we want the installable set:
	schroot --directory /tmp -c source:jenkins-dpkg-jessie -- dose-deb-coinstall --deb-native-arch=$ARCH --bg=$PACKAGES --fg=${TMPFILE2} > $TMPFILE
	convert_from_deb822_into_source_packages_only
	update_if_similar ${META_PKGSET[3]}.pkgset
fi

# popcon top 1337 installed sources
if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[4]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[4]}.pkgset ] ; then
	SQL_QUERY="SELECT popcon_src.source FROM popcon_src ORDER BY popcon_src.insts DESC LIMIT 1337;"
	PGPASSWORD=public-udd-mirror \
		psql -U public-udd-mirror \
		-h public-udd-mirror.xvm.mit.edu -p 5432 \
		-t \
		udd -c"${SQL_QUERY}" > $TMPFILE
	update_if_similar ${META_PKGSET[4]}.pkgset
fi

# installed on one or more .debian.org machines
if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[5]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[5]}.pkgset ] ; then
	# one day we will get a proper data provider from DSA... 
	# (so far it was a manual "dpkg --get-selections" on all machines
	# converted into a list of source packages...)
	cat /srv/jenkins/bin/reproducible_installed_on_debian.org > $TMPFILE
	update_if_similar ${META_PKGSET[5]}.pkgset
fi

# packages which had a DSA
if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[6]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[6]}.pkgset ] ; then
	svn export svn://svn.debian.org/svn/secure-testing/data/DSA/list ${TMPFILE2}
	grep "^\[" ${TMPFILE2} | grep "DSA-" | cut -d " " -f5|sort -u > $TMPFILE
	convert_into_source_packages_only
	update_if_similar ${META_PKGSET[6]}.pkgset
fi

# gnome and everything it depends on
if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[7]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[7]}.pkgset ] ; then
	chdist --data-dir=$CHPATH grep-dctrl-packages $DISTNAME -X \( -FPriority required --or -FPackage gnome \) > ${TMPFILE2}
	schroot --directory /tmp -c source:jenkins-dpkg-jessie -- dose-deb-coinstall --deb-native-arch=$ARCH --bg=$PACKAGES --fg=${TMPFILE2} > $TMPFILE
	convert_from_deb822_into_source_packages_only
	update_if_similar ${META_PKGSET[7]}.pkgset
fi

# The build-depends of X tasks can be solved once dose-ceve is able to read
# Debian source packages (possible in dose3 git but needs a new dose3 release
# and upload to unstable)
#
# Ignoring parsing issues, the current method is unable to resolve virtual
# build dependencies
#
# The current method also ignores Build-Depends-Indep and Build-Depends-Arch

# all build depends of gnome
rm -f $TMPFILE
if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[8]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[8]}.pkgset ] ; then
	for PKG in $(cat $TPATH/${META_PKGSET[7]}.pkgset) ; do
		grep-dctrl -sBuild-Depends -n -X -FPackage $PKG $SOURCES | sed "s#([^()]*)##g ; s#\[[^][]*\]##g ; s#,##g" >> $TMPFILE
	done
	convert_into_source_packages_only
	update_if_similar ${META_PKGSET[8]}.pkgset
fi

# tails
if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[9]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[9]}.pkgset ] ; then
	curl http://nightly.tails.boum.org/build_Tails_ISO_feature-jessie/latest.iso.binpkgs > $TMPFILE
	curl http://nightly.tails.boum.org/build_Tails_ISO_feature-jessie/latest.iso.srcpkgs >> $TMPFILE
	convert_into_source_packages_only
	update_if_similar ${META_PKGSET[9]}.pkgset
fi

# all build depends of tails
rm -f $TMPFILE
if [ -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[10]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[10]}.pkgset ] ; then
	for PKG in $(cat $TPATH/${META_PKGSET[9]}.pkgset) ; do
		grep-dctrl -sBuild-Depends -n -X -FPackage $PKG $SOURCES | sed "s#([^()]*)##g ; s#\[[^][]*\]##g ; s#,##g" >> $TMPFILE
	done
	convert_into_source_packages_only
	update_if_similar ${META_PKGSET[10]}.pkgset
fi

# grml
if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[11]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[11]}.pkgset ] ; then
	curl http://grml.org/files/grml64-full_latest/dpkg.selections | cut -f1 > $TMPFILE
	if ! grep '<title>404 Not Found</title>' $TMPFILE ; then
		convert_into_source_packages_only
		update_if_similar ${META_PKGSET[11]}.pkgset
	else
		echo "Warning: could not download grml's latest dpkg.selections file, skipping pkg set..."
	fi
fi

# all build depends of grml
rm -f $TMPFILE
if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[12]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[12]}.pkgset ] ; then
	for PKG in $(cat $TPATH/${META_PKGSET[11]}.pkgset) ; do
		grep-dctrl -sBuild-Depends -n -X -FPackage $PKG $SOURCES | sed "s#([^()]*)##g ; s#\[[^][]*\]##g ; s#,##g" >> $TMPFILE
	done
	convert_into_source_packages_only
	update_if_similar ${META_PKGSET[12]}.pkgset
fi

# pkg-perl-maintainers
if [ ! -z $(find $TPATH -maxdepth 1 -mtime +0 -name ${META_PKGSET[13]}.pkgset) ] || [ ! -f $TPATH/${META_PKGSET[13]}.pkgset ] ; then
	grep-dctrl -sPackage -n -FMaintainer pkg-perl-maintainers@lists.alioth.debian.org $SOURCES > $TMPFILE
	update_if_similar ${META_PKGSET[13]}.pkgset
fi


# finally
rm -f $TMPFILE ${TMPFILE2}
echo "All meta package sets created successfully."