1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
|
#!/bin/bash
# Copyright 2014-2017 Holger Levsen <holger@layer-acht.org>
# © 2015 Mattia Rizzolo <mattia@mapreri.org>
# released under the GPLv=2
DEBUG=false
. /srv/jenkins/bin/common-functions.sh
common_init "$@"
# common code defining db access
. /srv/jenkins/bin/reproducible_common.sh
# some defaults
DIRTY=false
REP_RESULTS=/srv/reproducible-results
show_fstab_and_mounts() {
echo "################################"
echo "/dev/shm and /run/shm on $HOSTNAME"
echo "################################"
ls -lartd /run/shm /dev/shm/
echo "################################"
echo "/etc/fstab on $HOSTNAME"
echo "################################"
cat /etc/fstab
echo "################################"
echo "mount output on $HOSTNAME"
echo "################################"
mount
echo "################################"
DIRTY=true
}
#
# we fail hard
#
set -e
#
# is the filesystem writetable?
#
echo "$(date -u) - testing whether /tmp is writable..."
TEST=$(mktemp --tmpdir=/tmp rwtest-XXXXXX)
if [ -z "$TEST" ] ; then
echo "Failure to write a file in /tmp, assuming read-only filesystem."
exit 1
fi
rm $TEST > /dev/null
#
# check for /dev/shm being mounted properly
#
echo "$(date -u) - testing whether /dev/shm is mounted correctly..."
mount | egrep -q "^tmpfs on /dev/shm"
if [ $? -ne 0 ] ; then
echo "Warning: /dev/shm is not mounted correctly on $HOSTNAME, it should be a tmpfs, please tell the jenkins admins to fix this."
show_fstab_and_mounts
fi
test "$(stat -c %a -L /dev/shm)" = 1777
if [ $? -ne 0 ] ; then
echo "Warning: /dev/shm is not mounted correctly on $HOSTNAME, it should be mounted with 1777 permissions, please tell the jenkins admins to fix this."
show_fstab_and_mounts
fi
#
# check for /run/shm being a link to /dev/shm
#
echo "$(date -u) - testing whether /run/shm is a link..."
if ! test -L /run/shm ; then
echo "Warning: /run/shm is not a link on $HOSTNAME, please tell the jenkins admins to fix this."
show_fstab_and_mounts
elif [ "$(readlink /run/shm)" != "/dev/shm" ] ; then
echo "Warning: /run/shm is a link, but not pointing to /dev/shm on $HOSTNAME, please tell the jenkins admins to fix this."
show_fstab_and_mounts
fi
#
# check for hanging mounts
#
echo "$(date -u) - testing whether running 'mount' takes forever..."
timeout -s 9 15 mount > /dev/null
TIMEOUT=$?
if [ $TIMEOUT -ne 0 ] ; then
echo "$(date -u) - running 'mount' takes forever, giving up."
exit 1
fi
#
# check for correct MTU
#
echo "$(date -u) - testing whether the network interfaces MTU is 1500..."
if [ "$(ip link | sed -n '/LOOPBACK\|NOARP/!s/.* mtu \([0-9]*\) .*/\1/p' | sort -u)" != "1500" ] ; then
ip link
echo "$(date -u) - network interfaces MTU != 1500 - this is wrong. => please \`sudo ifconfig eth0 mtu 1500\`"
# should probably turn this into a warning if this becomes to annoying
irc_message debian-reproducible "$HOSTNAME has wrong MTU, please tell the jenkins admins to fix this. (sudo ifconfig eth0 mtu 1500)"
exit 1
fi
#
# check for correct future
#
# (yes this is hardcoded but meh…)
echo "$(date -u) - testing whether the time is right..."
if [ "$(date +%Y)" = "2019" ] ; then
echo "Warning, today is the wrong future: $(date -u)."
DIRTY=true
elif [ "$(date +%Y)" = "2018" ] ; then
echo "Good, today is the right future: $(date -u)."
else
echo "Cherrish today, $(date -u)."
fi
#
# check for cleaned up kernels
# (on Ubuntu systems only, as those have free spaces issues on /boot frequently)
#
if [ "$(lsb_release -si)" = "Ubuntu" ] ; then
echo "$(date -u) - testing whether only one kernel is installed..."
if [ "$(ls /boot/vmlinuz-*|wc -l)" != "1" ] ; then
echo "Warning, more than one kernel in /boot:"
ls -lart /boot/vmlinuz-*
df -h /boot
DIRTY=true
fi
fi
#
# check for haveged running
#
echo "$(date -u) - testing 'haveged' is running..."
HAVEGED="$(ps fax | grep '/usr/sbin/haveged' | grep -v grep || true)"
if [ -z "$HAVEGED" ] ; then
echo "$(date -u) - haveged ain't running, giving up."
systemctl status haveged
exit 1
fi
#
# checks only for the main node
#
if [ "$HOSTNAME" = "$MAINNODE" ] ; then
#
# sometimes deleted jobs come back as zombies
# and we dont know why and when that happens,
# so just report those zombies here.
#
ZOMBIES=$(ls -1d /var/lib/jenkins/jobs/* | egrep 'reproducible_(builder_(amd64|i386|armhf|arm64)|setup_(pbuilder|schroot)_testing)|chroot-installation_wheezy|aptdpkg|ff64a|jtk1a|odc2a|stretch_install_education-thin-client-server' || true)
if [ ! -z "$ZOMBIES" ] ; then
echo "Warning, rise of the jenkins job zombies has started again, these jobs should not exist:"
for z in $ZOMBIES ; do
echo $(basename $z)
done
DIRTY=true
echo
fi
#
# /var/log/jenkins/jenkins.log sometimes grows very fast
# and we don't yet know why, so let's monitor this for now.
JENKINSLOG="$(find /var/log/jenkins -name jenkins.log -size +42G)"
if [ -z "JENKINSLOG" ] ; then
echo "Warning, jenkins.log is larger than 42G, please fix, erroring out now."
exit 1
else
JENKINSLOG="$(find /var/log/jenkins -name jenkins.log -size +23G)"
if [ -z "JENKINSLOG" ] ; then
echo "Warning, jenkins.log is larger than 23G, please do something…"
DIRTY=true
fi
fi
fi
#
# finally
#
if ! $DIRTY ; then
echo "$(date -u ) - Everything seems to be fine."
echo
fi
echo "$(date -u) - the end."
|