summaryrefslogtreecommitdiffstats
path: root/bin/webcheck_url.sh
blob: fee02ba1a7dba633b3b5d468cedd337ec940bcab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/bin/bash

# Copyright 2012 Holger Levsen <holger@layer-acht.org>
# released under the GPLv=2

if [ "$1" == "" ] ; then
	echo "need at least one URL to act on"
	echo '# $1 = URL'
	exit 1
fi

#
# convert params to variables
#
URL=$1
PATTERNS=$2

#
# default settings
#
set -x
set -e
export LC_ALL=C
export http_proxy="http://localhost:3128"

#
# Don't use --continue on first run
#
if [ ! -e webcheck.dat ] ; then
	PARAMS=""
else
	PARAMS="-c -f"
fi

#
# if $URL ends with / then run webcheck with -b
#
if [ "${URL: -1}" = "/" ] ; then
	PARAMS="$PARAMS -b"
fi

#
# ignore some extra patterns (=all translations) when checking www.debian.org
#
if [ "${URL:0:21}" = "http://www.debian.org" ] ; then
	# originly was TRANSLATIONS=$(curl www.debian.org 2>/dev/null|grep index|grep lang=|cut -d "." -f2)
	# but then I had to add some and then some more... so I reached to the conclusion to hardcode them all
	TRANSLATIONS="ar bg ca cs da de el es eo fa fr ko hy hr id it he lt hu nl ja nb pl pt ro ru sk fi sv ta tr uk zh-cn zh-hk zh-tw ml vi"
	for LANG in $TRANSLATIONS pt_BR zh_CN zh_HK zh_TW ; do
		PARAMS="$PARAMS -y \.${LANG}\.html -y html\.${LANG} -y \.${LANG}\.txt -y \.txt\.${LANG} -y \.${LANG}\.pdf -y \.pdf\.${LANG}"
	done
fi

#
# $PATTERNS can only be used to ignore patterns atm
#
if [ "$PATTERNS" != "" ] ; then
	PARAMS="$PARAMS $(for i in $PATTERNS ; do echo -n " -y $i" ; done)"
fi

#
# actually run webcheck
#
webcheck $URL $PARAMS