From bcc697c4ebb0f7ae0098607bd0b38773ee7bdf59 Mon Sep 17 00:00:00 2001 From: Holger Levsen Date: Tue, 31 Mar 2015 13:18:29 +0200 Subject: fix language: s#maintainance#maintenance#g --- bin/g-i-installation.sh | 2 +- bin/maintainance.sh | 208 ------------------------------------ bin/maintenance.sh | 208 ++++++++++++++++++++++++++++++++++++ bin/reproducible_maintainance.sh | 224 --------------------------------------- bin/reproducible_maintenance.sh | 224 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 433 insertions(+), 433 deletions(-) delete mode 100755 bin/maintainance.sh create mode 100755 bin/maintenance.sh delete mode 100755 bin/reproducible_maintainance.sh create mode 100755 bin/reproducible_maintenance.sh (limited to 'bin') diff --git a/bin/g-i-installation.sh b/bin/g-i-installation.sh index 87ea5299..22cd382d 100755 --- a/bin/g-i-installation.sh +++ b/bin/g-i-installation.sh @@ -1080,7 +1080,7 @@ monitor_system() { echo "ERROR: An installation step failed." >> $GOCR exit 1 elif [ ! -z "$ROOT_PROBLEM" ] ; then - echo "ERROR: System is hanging at boot and waiting for root maintainance." >> $GOCR + echo "ERROR: System is hanging at boot and waiting for root maintenance." >> $GOCR exit 1 elif [ ! -z "$BUILD_LTSP_PROBLEM" ] ; then echo "ERROR: The failing step is: Build LTSP chroot." >> $GOCR diff --git a/bin/maintainance.sh b/bin/maintainance.sh deleted file mode 100755 index 89bf5fd1..00000000 --- a/bin/maintainance.sh +++ /dev/null @@ -1,208 +0,0 @@ -#!/bin/bash - -# Copyright 2012-2015 Holger Levsen -# released under the GPLv=2 - -DEBUG=false -. /srv/jenkins/bin/common-functions.sh -common_init "$@" - -check_for_mounted_chroots() { - CHROOT_PATTERN="/chroots/${1}-*" - OUTPUT=$(mktemp) - ls $CHROOT_PATTERN 2>/dev/null > $OUTPUT || true - if [ -s $OUTPUT ] ; then - figlet "Warning:" - echo - echo "Probably manual cleanup needed:" - echo - echo "$ ls -la $CHROOT_PATTERN" - # List the processes using the partition - echo - fuser -mv $CHROOT_PATTERN - cat $OUTPUT - rm $OUTPUT - exit 1 - fi - rm $OUTPUT -} - -chroot_checks() { - check_for_mounted_chroots $1 - report_disk_usage /chroots - report_disk_usage /schroots - echo "WARNING: should remove directories in /(s)chroots which are older than a month." -} - -compress_old_jenkins_logs() { - local COMPRESSED - # compress logs to save space - COMPRESSED=$(find /var/lib/jenkins/jobs/*/builds/ -maxdepth 2 -mindepth 2 -mtime +1 -name log -exec gzip -9 -v {} \;) - if [ ! -z "$COMPRESSED" ] ; then - echo "Logs have been compressed:" - echo - echo "$COMPRESSED" - echo - fi -} - -remove_old_rebootstrap_logs() { - local OLDSTUFF - # delete old html logs to save space - OLDSTUFF=$(find /var/lib/jenkins/jobs/rebootstrap_* -maxdepth 3 -mtime +7 -name log_content.html -exec rm -v {} \;) - if [ ! -z "$OLDSTUFF" ] ; then - echo "Old html logs have been deleted:" - echo - echo "$OLDSTUFF" - echo - fi -} - -report_old_directories() { - # find and warn about old temp directories - if [ -z "$3" ] ; then - OLDSTUFF=$(find $1/* -maxdepth 0 -type d -mtime +$2 -exec ls -lad {} \;) - else - # if $3 is given, ignore it - OLDSTUFF=$(find $1/* -maxdepth 0 -type d -mtime +$2 ! -path "$3*" -exec ls -lad {} \;) - fi - if [ ! -z "$OLDSTUFF" ] ; then - echo "Warning: old temp directories found in $REP_RESULTS" - echo - echo "$OLDSTUFF" - echo "Please cleanup manually." - echo - fi -} - -report_disk_usage() { - if [ -z "$WATCHED_JOBS" ] ; then - echo "File system usage for all ${1} jobs:" - else - echo "File system usage for all ${1} jobs (including those currently running):" - fi - du -schx /var/lib/jenkins/jobs/${1}* |grep total |sed -s "s#total#${1} jobs#" - echo - if [ ! -z "$WATCHED_JOBS" ] ; then - TMPFILE=$(mktemp) - for JOB in $(cat $WATCHED_JOBS) ; do - du -shx --exclude='*/archive/*' $JOB | grep G >> $TMPFILE || true - done - if [ -s $TMPFILE ] ; then - echo - echo "${1} jobs with filesystem usage over 1G, excluding their archives and those currently running:" - cat $TMPFILE - echo - fi - rm $TMPFILE - fi -} - -report_filetype_usage() { - OUTPUT=$(mktemp) - for JOB in $(cat $WATCHED_JOBS) ; do - if [ "$2" != "bak" ] && [ "$2" != "png" ] ; then - find /var/lib/jenkins/jobs/$JOB -type f -name "*.${2}" ! -path "*/archive/*" 2>/dev/null|xargs -r du -sch |grep total |sed -s "s#total#$JOB .$2 files#" >> $OUTPUT - else - # find archived .bak + .png files too - find /var/lib/jenkins/jobs/$JOB -type f -name "*.${2}" 2>/dev/null|xargs -r du -sch |grep total |sed -s "s#total#$JOB .$2 files#" >> $OUTPUT - fi - done - if [ -s $OUTPUT ] ; then - echo "File system use in $1 for $2 files:" - cat $OUTPUT - if [ "$3" = "warn" ] ; then - echo "Warning: there are $2 files and there should not be any." - fi - echo - fi - rm $OUTPUT -} - -report_squid_usage() { - REPORT=/var/www/calamaris/calamaris.txt - if [ -z $1 ] ; then - cat $REPORT - else - head -31 $REPORT - fi -} - -wait4idle() { - echo "Waiting until no $1.sh process runs.... $(date)" - while [ $(ps fax | grep -c $1.sh) -gt 1 ] ; do - sleep 30 - done - echo "Done waiting: $(date)" -} - -general_maintainance() { - uptime - - echo - # ignore unreadable /media fuse mountpoints from guestmount - df -h 2>/dev/null || true - - echo - for DIR in /var/cache/apt/archives/ /var/spool/squid/ /var/cache/pbuilder/build/ /var/lib/jenkins/jobs/ /chroots /schroots ; do - sudo du -shx $DIR 2>/dev/null - done - JOB_PREFIXES=$(ls -1 /var/lib/jenkins/jobs/|cut -d "_" -f1|sort -f -u) - for PREFIX in $JOB_PREFIXES ; do - report_disk_usage $PREFIX - done - - echo - vnstat - - (df 2>/dev/null || true ) | grep tmpfs > /dev/null || ( echo ; echo "Warning: no tmpfs mounts in use. Please investigate the host system." ; exit 1 ) -} - -# -# if $1 is empty, we do general maintainance, else for some subgroup of all jobs -# -if [ -z $1 ] ; then - general_maintainance - compress_old_jenkins_logs - report_squid_usage brief -else - case $1 in - chroot-installation*) wait4idle $1 - report_disk_usage $1 - chroot_checks $1 - ;; - g-i-installation) ACTIVE_JOBS=$(mktemp) - WATCHED_JOBS=$(mktemp) - RUNNING=$(mktemp) - ps fax > $RUNNING - cd /var/lib/jenkins/jobs - for GIJ in g-i-installation_* ; do - if grep -q "$GIJ/workspace" $RUNNING ; then - echo "$GIJ" >> $ACTIVE_JOBS - echo "Ignoring $GIJ job as it's currently running." - else - echo "$GIJ" >> $WATCHED_JOBS - fi - done - echo - report_disk_usage $1 - report_filetype_usage $1 png warn - report_filetype_usage $1 bak warn - report_filetype_usage $1 raw warn - report_filetype_usage $1 iso warn - echo "WARNING: there is no check / handling on stale lvm volumes" - rm $ACTIVE_JOBS $WATCHED_JOBS $RUNNING - ;; - d-i) report_old_directories /srv/d-i 7 /srv/d-i/workspace - ;; - squid) report_squid_usage - ;; - rebootstrap) remove_old_rebootstrap_logs - ;; - *) ;; - esac -fi - -echo -echo "No (big) problems found, all seems good." -figlet "Ok." diff --git a/bin/maintenance.sh b/bin/maintenance.sh new file mode 100755 index 00000000..c123a9bd --- /dev/null +++ b/bin/maintenance.sh @@ -0,0 +1,208 @@ +#!/bin/bash + +# Copyright 2012-2015 Holger Levsen +# released under the GPLv=2 + +DEBUG=false +. /srv/jenkins/bin/common-functions.sh +common_init "$@" + +check_for_mounted_chroots() { + CHROOT_PATTERN="/chroots/${1}-*" + OUTPUT=$(mktemp) + ls $CHROOT_PATTERN 2>/dev/null > $OUTPUT || true + if [ -s $OUTPUT ] ; then + figlet "Warning:" + echo + echo "Probably manual cleanup needed:" + echo + echo "$ ls -la $CHROOT_PATTERN" + # List the processes using the partition + echo + fuser -mv $CHROOT_PATTERN + cat $OUTPUT + rm $OUTPUT + exit 1 + fi + rm $OUTPUT +} + +chroot_checks() { + check_for_mounted_chroots $1 + report_disk_usage /chroots + report_disk_usage /schroots + echo "WARNING: should remove directories in /(s)chroots which are older than a month." +} + +compress_old_jenkins_logs() { + local COMPRESSED + # compress logs to save space + COMPRESSED=$(find /var/lib/jenkins/jobs/*/builds/ -maxdepth 2 -mindepth 2 -mtime +1 -name log -exec gzip -9 -v {} \;) + if [ ! -z "$COMPRESSED" ] ; then + echo "Logs have been compressed:" + echo + echo "$COMPRESSED" + echo + fi +} + +remove_old_rebootstrap_logs() { + local OLDSTUFF + # delete old html logs to save space + OLDSTUFF=$(find /var/lib/jenkins/jobs/rebootstrap_* -maxdepth 3 -mtime +7 -name log_content.html -exec rm -v {} \;) + if [ ! -z "$OLDSTUFF" ] ; then + echo "Old html logs have been deleted:" + echo + echo "$OLDSTUFF" + echo + fi +} + +report_old_directories() { + # find and warn about old temp directories + if [ -z "$3" ] ; then + OLDSTUFF=$(find $1/* -maxdepth 0 -type d -mtime +$2 -exec ls -lad {} \;) + else + # if $3 is given, ignore it + OLDSTUFF=$(find $1/* -maxdepth 0 -type d -mtime +$2 ! -path "$3*" -exec ls -lad {} \;) + fi + if [ ! -z "$OLDSTUFF" ] ; then + echo "Warning: old temp directories found in $REP_RESULTS" + echo + echo "$OLDSTUFF" + echo "Please cleanup manually." + echo + fi +} + +report_disk_usage() { + if [ -z "$WATCHED_JOBS" ] ; then + echo "File system usage for all ${1} jobs:" + else + echo "File system usage for all ${1} jobs (including those currently running):" + fi + du -schx /var/lib/jenkins/jobs/${1}* |grep total |sed -s "s#total#${1} jobs#" + echo + if [ ! -z "$WATCHED_JOBS" ] ; then + TMPFILE=$(mktemp) + for JOB in $(cat $WATCHED_JOBS) ; do + du -shx --exclude='*/archive/*' $JOB | grep G >> $TMPFILE || true + done + if [ -s $TMPFILE ] ; then + echo + echo "${1} jobs with filesystem usage over 1G, excluding their archives and those currently running:" + cat $TMPFILE + echo + fi + rm $TMPFILE + fi +} + +report_filetype_usage() { + OUTPUT=$(mktemp) + for JOB in $(cat $WATCHED_JOBS) ; do + if [ "$2" != "bak" ] && [ "$2" != "png" ] ; then + find /var/lib/jenkins/jobs/$JOB -type f -name "*.${2}" ! -path "*/archive/*" 2>/dev/null|xargs -r du -sch |grep total |sed -s "s#total#$JOB .$2 files#" >> $OUTPUT + else + # find archived .bak + .png files too + find /var/lib/jenkins/jobs/$JOB -type f -name "*.${2}" 2>/dev/null|xargs -r du -sch |grep total |sed -s "s#total#$JOB .$2 files#" >> $OUTPUT + fi + done + if [ -s $OUTPUT ] ; then + echo "File system use in $1 for $2 files:" + cat $OUTPUT + if [ "$3" = "warn" ] ; then + echo "Warning: there are $2 files and there should not be any." + fi + echo + fi + rm $OUTPUT +} + +report_squid_usage() { + REPORT=/var/www/calamaris/calamaris.txt + if [ -z $1 ] ; then + cat $REPORT + else + head -31 $REPORT + fi +} + +wait4idle() { + echo "Waiting until no $1.sh process runs.... $(date)" + while [ $(ps fax | grep -c $1.sh) -gt 1 ] ; do + sleep 30 + done + echo "Done waiting: $(date)" +} + +general_maintenance() { + uptime + + echo + # ignore unreadable /media fuse mountpoints from guestmount + df -h 2>/dev/null || true + + echo + for DIR in /var/cache/apt/archives/ /var/spool/squid/ /var/cache/pbuilder/build/ /var/lib/jenkins/jobs/ /chroots /schroots ; do + sudo du -shx $DIR 2>/dev/null + done + JOB_PREFIXES=$(ls -1 /var/lib/jenkins/jobs/|cut -d "_" -f1|sort -f -u) + for PREFIX in $JOB_PREFIXES ; do + report_disk_usage $PREFIX + done + + echo + vnstat + + (df 2>/dev/null || true ) | grep tmpfs > /dev/null || ( echo ; echo "Warning: no tmpfs mounts in use. Please investigate the host system." ; exit 1 ) +} + +# +# if $1 is empty, we do general maintenance, else for some subgroup of all jobs +# +if [ -z $1 ] ; then + general_maintenance + compress_old_jenkins_logs + report_squid_usage brief +else + case $1 in + chroot-installation*) wait4idle $1 + report_disk_usage $1 + chroot_checks $1 + ;; + g-i-installation) ACTIVE_JOBS=$(mktemp) + WATCHED_JOBS=$(mktemp) + RUNNING=$(mktemp) + ps fax > $RUNNING + cd /var/lib/jenkins/jobs + for GIJ in g-i-installation_* ; do + if grep -q "$GIJ/workspace" $RUNNING ; then + echo "$GIJ" >> $ACTIVE_JOBS + echo "Ignoring $GIJ job as it's currently running." + else + echo "$GIJ" >> $WATCHED_JOBS + fi + done + echo + report_disk_usage $1 + report_filetype_usage $1 png warn + report_filetype_usage $1 bak warn + report_filetype_usage $1 raw warn + report_filetype_usage $1 iso warn + echo "WARNING: there is no check / handling on stale lvm volumes" + rm $ACTIVE_JOBS $WATCHED_JOBS $RUNNING + ;; + d-i) report_old_directories /srv/d-i 7 /srv/d-i/workspace + ;; + squid) report_squid_usage + ;; + rebootstrap) remove_old_rebootstrap_logs + ;; + *) ;; + esac +fi + +echo +echo "No (big) problems found, all seems good." +figlet "Ok." diff --git a/bin/reproducible_maintainance.sh b/bin/reproducible_maintainance.sh deleted file mode 100755 index efdd9985..00000000 --- a/bin/reproducible_maintainance.sh +++ /dev/null @@ -1,224 +0,0 @@ -#!/bin/bash - -# Copyright 2014-2015 Holger Levsen -# released under the GPLv=2 - -DEBUG=false -. /srv/jenkins/bin/common-functions.sh -common_init "$@" - -# common code defining db access -. /srv/jenkins/bin/reproducible_common.sh - -DIRTY=false - -# prepare backup -REP_RESULTS=/srv/reproducible-results -mkdir -p $REP_RESULTS/backup -cd $REP_RESULTS/backup - -# keep 30 days and the 1st of the month -DAY=(date -d "30 day ago" '+%d') -DATE=$(date -d "30 day ago" '+%Y-%m-%d') -if [ "$DAY" != "01" ] && [ -f reproducible_$DATE.db.xz ] ; then - rm -f reproducible_$DATE.db.xz -fi - -# actually do the backup -DATE=$(date '+%Y-%m-%d') -if [ ! -f reproducible_$DATE.db.xz ] ; then - cp -v $PACKAGES_DB . - DATE=$(date '+%Y-%m-%d') - mv -v reproducible.db reproducible_$DATE.db - xz reproducible_$DATE.db -fi - -# provide copy for external backups -cp -v $PACKAGES_DB /var/lib/jenkins/userContent/ - -# delete old temp directories -OLDSTUFF=$(find $REP_RESULTS -maxdepth 1 -type d -name "tmp.*" -mtime +2 -exec ls -lad {} \;) -if [ ! -z "$OLDSTUFF" ] ; then - echo - echo "Warning: old temp directories found in $REP_RESULTS" - find $REP_RESULTS -maxdepth 1 -type d -name "tmp.*" -mtime +2 -exec rm -rv {} \; - echo "These old directories have been deleted." - echo - DIRTY=true -fi - -# find old schroots -OLDSTUFF=$(find /schroots/ -maxdepth 1 -type d -name "reproducible-*-*" -mtime +2 -exec ls -lad {} \;) -if [ ! -z "$OLDSTUFF" ] ; then - echo - echo "Warning: old schroots found in /schroots, which have been deleted:" - find /schroots/ -maxdepth 1 -type d -name "reproducible-*-*" -mtime +2 -exec sudo rm -rf {} \; - echo "$OLDSTUFF" - echo - DIRTY=true -fi - -# find and warn about pbuild leftovers -OLDSTUFF=$(find /var/cache/pbuilder/result/ -mtime +1 -exec ls -lad {} \;) -if [ ! -z "$OLDSTUFF" ] ; then - # delete known files, see #777537 - cd /var/cache/pbuilder/result/ - echo "Attempting file detection..." - for i in $(find . -maxdepth 1 -mtime +1 -type f -exec basename {} \;) ; do - case $i in - stderr|stdout) rm -v $i - ;; - seqan-*.bed) rm -v $i # leftovers reported in #766741 - ;; - bootlogo) rm -v $i - ;; - org.daisy.paper.CustomPaperCollection.obj) rm -v $i - ;; - debian-faq.pdf.gz|debian-faq.ps.gz|debian-faq.txt.gz) rm -v $i - ;; - sumo_doxygen_lastrun.log) rm -v $i - ;; - *) ;; - esac - done - cd - - # report the rest - OLDSTUFF=$(find /var/cache/pbuilder/result/ -mtime +1 -exec ls -lad {} \;) - if [ ! -z "$OLDSTUFF" ] ; then - echo "Warning: old files or directories found in /var/cache/pbuilder/result/" - echo "$OLDSTUFF" - echo "Please cleanup manually." - fi - echo - DIRTY=true -fi - -# find failed builds due to network problems and reschedule them -# only grep through the last 5h (300 minutes) of builds... -# (ignore "*None.rbuild.log" because these are build which were just started) -# this job runs every 4h -FAILED_BUILDS=$(find /var/lib/jenkins/userContent/rbuild -type f ! -name "*None.rbuild.log" ! -mmin +300 -exec egrep -l -e 'E: Failed to fetch.*(Connection failed|Size mismatch|Cannot initiate the connection to)' {} \; || true) -if [ ! -z "$FAILED_BUILDS" ] ; then - echo - echo "Warning: the following failed builds have been found" - echo "$FAILED_BUILDS" - echo - echo "Rescheduling packages: " - for SUITE in $(echo $FAILED_BUILDS | sed "s# #\n#g" | cut -d "/" -f7 | sort -u) ; do - CANDIDATES=$(for PKG in $(echo $FAILED_BUILDS | sed "s# #\n#g" | grep "/$SUITE/" | cut -d "/" -f9 | cut -d "_" -f1) ; do echo -n "$PKG " ; done) - check_candidates - if [ $TOTAL -ne 0 ] ; then - echo " - in $SUITE: $CANDIDATES" - # '0' here means the artifacts will not be saved - schedule_packages 0 $PACKAGE_IDS - fi - done - DIRTY=true -fi - -# find+terminate processes which should not be there -HAYSTACK=$(mktemp) -RESULT=$(mktemp) -PBUIDS="1234 1111 2222" -ps axo pid,user,size,pcpu,cmd > $HAYSTACK -for i in $PBUIDS ; do - for PROCESS in $(pgrep -u $i -P 1 || true) ; do - # faked-sysv comes and goes... - grep ^$PROCESS $HAYSTACK | grep -v faked-sysv >> $RESULT 2> /dev/null || true - done -done -if [ -s $RESULT ] ; then - echo - echo "Warning: processes found which should not be there, maybe killing them now:" - cat $RESULT - echo - for PROCESS in $(cat $RESULT | cut -d " " -f1 | xargs echo) ; do - AGE=$(ps -p $PROCESS -o etimes= || echo 0) - # a single build may only take half a day, so... - if [ $AGE -gt 43200 ] ; then - sudo kill -9 $PROCESS 2>&1 - echo "'kill -9 $PROCESS' done." - else - echo "Did not kill $PROCESS as it is only $AGE seconds old." - fi - done - echo - DIRTY=true -fi -rm $HAYSTACK $RESULT - -# find packages which build didnt end correctly -QUERY=" - SELECT s.id, s.name, p.date_scheduled, p.date_build_started - FROM schedule AS p JOIN sources AS s ON p.package_id=s.id - WHERE p.date_scheduled != '' - AND p.date_build_started != '' - AND p.date_build_started < datetime('now', '-36 hours') - ORDER BY p.date_scheduled - " -PACKAGES=$(mktemp) -sqlite3 -init $INIT ${PACKAGES_DB} "$QUERY" > $PACKAGES 2> /dev/null || echo "Warning: SQL query '$QUERY' failed." -if grep -q '|' $PACKAGES ; then - echo - echo "Warning: packages found where the build was started more than 36h ago:" - echo "pkg_id|name|date_scheduled|date_build_started" - echo - cat $PACKAGES - echo - for PKG in $(cat $PACKAGES | cut -d "|" -f1) ; do - echo "sqlite3 ${PACKAGES_DB} \"DELETE FROM schedule WHERE package_id = '$PKG';\"" - sqlite3 -init $INIT ${PACKAGES_DB} "DELETE FROM schedule WHERE package_id = '$PKG';" - done - echo "Packages have been removed from scheduling." - echo - DIRTY=true -fi -rm $PACKAGES - -# find packages which have been removed from unstable -# commented out for now. This can't be done using the database anymore -QUERY="SELECT source_packages.name FROM source_packages - WHERE source_packages.name NOT IN - (SELECT sources.name FROM sources) - LIMIT 25" -#PACKAGES=$(sqlite3 -init $INIT ${PACKAGES_DB} "$QUERY") -PACKAGES='' -if [ ! -z "$PACKAGES" ] ; then - echo - echo "Removing these removed packages from database:" - echo $PACKAGES - echo - QUERY="DELETE FROM source_packages - WHERE source_packages.name NOT IN - (SELECT sources.name FROM sources) - LIMIT 25" - sqlite3 -init $INIT ${PACKAGES_DB} "$QUERY" - cd /var/lib/jenkins/userContent - for i in PACKAGES ; do - find rb-pkg/ rbuild/ notes/ dbd/ -name "${i}_*" -exec rm -v {} \; - done - cd - -fi - -# delete jenkins html logs from reproducible_builder_* jobs as they are mostly redundant -# (they only provide the extended value of parsed console output, which we dont need here.) -OLDSTUFF=$(find /var/lib/jenkins/jobs/reproducible_builder_* -maxdepth 3 -mtime +0 -name log_content.html -exec rm -v {} \; | wc -l) -if [ ! -z "$OLDSTUFF" ] ; then - echo - echo "Removed $OLDSTUFF jenkins html logs." - echo -fi - -# remove artifacts older than 3 days -ARTIFACTS=$(find /var/lib/jenkins/userContent/artifacts/* -maxdepth 1 -type d -mtime +3 -exec ls -lad {} \; || true) -if [ ! -z "$ARTIFACTS" ] ; then - echo - echo "Removed old artifacts:" - find /var/lib/jenkins/userContent/artifacts/* -maxdepth 1 -type d -mtime +3 -exec rm -rv {} \; - echo -fi - -if ! $DIRTY ; then - echo "Everything seems to be fine." - echo -fi diff --git a/bin/reproducible_maintenance.sh b/bin/reproducible_maintenance.sh new file mode 100755 index 00000000..efdd9985 --- /dev/null +++ b/bin/reproducible_maintenance.sh @@ -0,0 +1,224 @@ +#!/bin/bash + +# Copyright 2014-2015 Holger Levsen +# released under the GPLv=2 + +DEBUG=false +. /srv/jenkins/bin/common-functions.sh +common_init "$@" + +# common code defining db access +. /srv/jenkins/bin/reproducible_common.sh + +DIRTY=false + +# prepare backup +REP_RESULTS=/srv/reproducible-results +mkdir -p $REP_RESULTS/backup +cd $REP_RESULTS/backup + +# keep 30 days and the 1st of the month +DAY=(date -d "30 day ago" '+%d') +DATE=$(date -d "30 day ago" '+%Y-%m-%d') +if [ "$DAY" != "01" ] && [ -f reproducible_$DATE.db.xz ] ; then + rm -f reproducible_$DATE.db.xz +fi + +# actually do the backup +DATE=$(date '+%Y-%m-%d') +if [ ! -f reproducible_$DATE.db.xz ] ; then + cp -v $PACKAGES_DB . + DATE=$(date '+%Y-%m-%d') + mv -v reproducible.db reproducible_$DATE.db + xz reproducible_$DATE.db +fi + +# provide copy for external backups +cp -v $PACKAGES_DB /var/lib/jenkins/userContent/ + +# delete old temp directories +OLDSTUFF=$(find $REP_RESULTS -maxdepth 1 -type d -name "tmp.*" -mtime +2 -exec ls -lad {} \;) +if [ ! -z "$OLDSTUFF" ] ; then + echo + echo "Warning: old temp directories found in $REP_RESULTS" + find $REP_RESULTS -maxdepth 1 -type d -name "tmp.*" -mtime +2 -exec rm -rv {} \; + echo "These old directories have been deleted." + echo + DIRTY=true +fi + +# find old schroots +OLDSTUFF=$(find /schroots/ -maxdepth 1 -type d -name "reproducible-*-*" -mtime +2 -exec ls -lad {} \;) +if [ ! -z "$OLDSTUFF" ] ; then + echo + echo "Warning: old schroots found in /schroots, which have been deleted:" + find /schroots/ -maxdepth 1 -type d -name "reproducible-*-*" -mtime +2 -exec sudo rm -rf {} \; + echo "$OLDSTUFF" + echo + DIRTY=true +fi + +# find and warn about pbuild leftovers +OLDSTUFF=$(find /var/cache/pbuilder/result/ -mtime +1 -exec ls -lad {} \;) +if [ ! -z "$OLDSTUFF" ] ; then + # delete known files, see #777537 + cd /var/cache/pbuilder/result/ + echo "Attempting file detection..." + for i in $(find . -maxdepth 1 -mtime +1 -type f -exec basename {} \;) ; do + case $i in + stderr|stdout) rm -v $i + ;; + seqan-*.bed) rm -v $i # leftovers reported in #766741 + ;; + bootlogo) rm -v $i + ;; + org.daisy.paper.CustomPaperCollection.obj) rm -v $i + ;; + debian-faq.pdf.gz|debian-faq.ps.gz|debian-faq.txt.gz) rm -v $i + ;; + sumo_doxygen_lastrun.log) rm -v $i + ;; + *) ;; + esac + done + cd - + # report the rest + OLDSTUFF=$(find /var/cache/pbuilder/result/ -mtime +1 -exec ls -lad {} \;) + if [ ! -z "$OLDSTUFF" ] ; then + echo "Warning: old files or directories found in /var/cache/pbuilder/result/" + echo "$OLDSTUFF" + echo "Please cleanup manually." + fi + echo + DIRTY=true +fi + +# find failed builds due to network problems and reschedule them +# only grep through the last 5h (300 minutes) of builds... +# (ignore "*None.rbuild.log" because these are build which were just started) +# this job runs every 4h +FAILED_BUILDS=$(find /var/lib/jenkins/userContent/rbuild -type f ! -name "*None.rbuild.log" ! -mmin +300 -exec egrep -l -e 'E: Failed to fetch.*(Connection failed|Size mismatch|Cannot initiate the connection to)' {} \; || true) +if [ ! -z "$FAILED_BUILDS" ] ; then + echo + echo "Warning: the following failed builds have been found" + echo "$FAILED_BUILDS" + echo + echo "Rescheduling packages: " + for SUITE in $(echo $FAILED_BUILDS | sed "s# #\n#g" | cut -d "/" -f7 | sort -u) ; do + CANDIDATES=$(for PKG in $(echo $FAILED_BUILDS | sed "s# #\n#g" | grep "/$SUITE/" | cut -d "/" -f9 | cut -d "_" -f1) ; do echo -n "$PKG " ; done) + check_candidates + if [ $TOTAL -ne 0 ] ; then + echo " - in $SUITE: $CANDIDATES" + # '0' here means the artifacts will not be saved + schedule_packages 0 $PACKAGE_IDS + fi + done + DIRTY=true +fi + +# find+terminate processes which should not be there +HAYSTACK=$(mktemp) +RESULT=$(mktemp) +PBUIDS="1234 1111 2222" +ps axo pid,user,size,pcpu,cmd > $HAYSTACK +for i in $PBUIDS ; do + for PROCESS in $(pgrep -u $i -P 1 || true) ; do + # faked-sysv comes and goes... + grep ^$PROCESS $HAYSTACK | grep -v faked-sysv >> $RESULT 2> /dev/null || true + done +done +if [ -s $RESULT ] ; then + echo + echo "Warning: processes found which should not be there, maybe killing them now:" + cat $RESULT + echo + for PROCESS in $(cat $RESULT | cut -d " " -f1 | xargs echo) ; do + AGE=$(ps -p $PROCESS -o etimes= || echo 0) + # a single build may only take half a day, so... + if [ $AGE -gt 43200 ] ; then + sudo kill -9 $PROCESS 2>&1 + echo "'kill -9 $PROCESS' done." + else + echo "Did not kill $PROCESS as it is only $AGE seconds old." + fi + done + echo + DIRTY=true +fi +rm $HAYSTACK $RESULT + +# find packages which build didnt end correctly +QUERY=" + SELECT s.id, s.name, p.date_scheduled, p.date_build_started + FROM schedule AS p JOIN sources AS s ON p.package_id=s.id + WHERE p.date_scheduled != '' + AND p.date_build_started != '' + AND p.date_build_started < datetime('now', '-36 hours') + ORDER BY p.date_scheduled + " +PACKAGES=$(mktemp) +sqlite3 -init $INIT ${PACKAGES_DB} "$QUERY" > $PACKAGES 2> /dev/null || echo "Warning: SQL query '$QUERY' failed." +if grep -q '|' $PACKAGES ; then + echo + echo "Warning: packages found where the build was started more than 36h ago:" + echo "pkg_id|name|date_scheduled|date_build_started" + echo + cat $PACKAGES + echo + for PKG in $(cat $PACKAGES | cut -d "|" -f1) ; do + echo "sqlite3 ${PACKAGES_DB} \"DELETE FROM schedule WHERE package_id = '$PKG';\"" + sqlite3 -init $INIT ${PACKAGES_DB} "DELETE FROM schedule WHERE package_id = '$PKG';" + done + echo "Packages have been removed from scheduling." + echo + DIRTY=true +fi +rm $PACKAGES + +# find packages which have been removed from unstable +# commented out for now. This can't be done using the database anymore +QUERY="SELECT source_packages.name FROM source_packages + WHERE source_packages.name NOT IN + (SELECT sources.name FROM sources) + LIMIT 25" +#PACKAGES=$(sqlite3 -init $INIT ${PACKAGES_DB} "$QUERY") +PACKAGES='' +if [ ! -z "$PACKAGES" ] ; then + echo + echo "Removing these removed packages from database:" + echo $PACKAGES + echo + QUERY="DELETE FROM source_packages + WHERE source_packages.name NOT IN + (SELECT sources.name FROM sources) + LIMIT 25" + sqlite3 -init $INIT ${PACKAGES_DB} "$QUERY" + cd /var/lib/jenkins/userContent + for i in PACKAGES ; do + find rb-pkg/ rbuild/ notes/ dbd/ -name "${i}_*" -exec rm -v {} \; + done + cd - +fi + +# delete jenkins html logs from reproducible_builder_* jobs as they are mostly redundant +# (they only provide the extended value of parsed console output, which we dont need here.) +OLDSTUFF=$(find /var/lib/jenkins/jobs/reproducible_builder_* -maxdepth 3 -mtime +0 -name log_content.html -exec rm -v {} \; | wc -l) +if [ ! -z "$OLDSTUFF" ] ; then + echo + echo "Removed $OLDSTUFF jenkins html logs." + echo +fi + +# remove artifacts older than 3 days +ARTIFACTS=$(find /var/lib/jenkins/userContent/artifacts/* -maxdepth 1 -type d -mtime +3 -exec ls -lad {} \; || true) +if [ ! -z "$ARTIFACTS" ] ; then + echo + echo "Removed old artifacts:" + find /var/lib/jenkins/userContent/artifacts/* -maxdepth 1 -type d -mtime +3 -exec rm -rv {} \; + echo +fi + +if ! $DIRTY ; then + echo "Everything seems to be fine." + echo +fi -- cgit v1.2.3-70-g09d2