summaryrefslogtreecommitdiffstats
path: root/web-export/update.py
diff options
context:
space:
mode:
authorVincent Untz <vuntz@gnome.org>2012-01-25 09:18:07 +0100
committerVincent Untz <vuntz@gnome.org>2012-01-25 09:18:07 +0100
commitb8adc0518bf4a38686f194a3e1b5059311ed78db (patch)
treef6b8cb8bca1a8cbbd04678c3a39d235b88de680d /web-export/update.py
parent0bc96b8da33cdda7099e66bde6991bc7ad012d90 (diff)
downloadxdg-specs-b8adc0518bf4a38686f194a3e1b5059311ed78db.tar.xz
web-export: Completely rewrite update.py
It's real python now. It also fetches specs.idx directly from git, and checks there's no new version of this script before doing anything.
Diffstat (limited to 'web-export/update.py')
-rwxr-xr-xweb-export/update.py401
1 files changed, 263 insertions, 138 deletions
diff --git a/web-export/update.py b/web-export/update.py
index dc15a59..1f43676 100755
--- a/web-export/update.py
+++ b/web-export/update.py
@@ -1,140 +1,265 @@
#!/usr/bin/env python
-import os,re,string
-
-xmlto = "/usr/bin/xmlto"
-cvs = "http://cvs.freedesktop.org/"
-git = "http://cgit.freedesktop.org/"
-specindex = "specs.idx"
-
-try:
- f = open(specindex, 'r')
- lastname = ''
- lastpath = ''
- for line in f.readlines():
- line = line.strip()
- if not line or line.startswith('#'):
- continue
-
- (file, revision, version, path) = string.split(line)
- use_git = False
- if file.startswith("git:"):
- use_git = True
- git_data = file.split(":")
- git_repo = git_data[1]
- file = git_data[2]
- name = os.path.splitext(os.path.split(file)[1])[0]
- # Strip version from name
- if re.search("\d\.\d+$", name):
- name = re.sub("^(.*)-([^/]*)$", "\\1", name)
-
- if use_git:
- url = '%s%s/plain/%s?id=%s' % (git, git_repo, file, revision)
- else:
- url = '%s%s?rev=%s' % (cvs, file, revision)
-
- if re.search("\.xml$", file):
- os.system("mkdir %s 2> /dev/null" % (path))
- if lastpath != path and lastname != name:
- os.system("rm -f %s/%s-latest.html" % (path, name))
- os.system("cd %s; ln -s %s-%s.html %s-latest.html" % (path,name,version,name))
- os.system("rm -f %s/latest" % (path))
- os.system("cd %s; ln -s %s latest" % (path,version))
-
- # if ( lastpath == path and lastname == name and os.path.isfile("%s/%s-%s.xml" % (path, name, version))):
- # print "Updating", file, "Version", version, "rev", revision, "skipped."
- # continue
-
- if os.system("wget -q '%s' -O wget.xml && (diff -q wget.xml %s/%s-%s.xml || mv wget.xml %s/%s-%s.xml)" % (url, path, name, version, path, name, version)):
- print "Updating", file, "Version", version, "rev", revision, "FAILED."
- os.system("chmod g+w wget.xml");
-
- print "Updating", file, "Version", version, "rev", revision, "ok"
-
- elif re.search("\.txt$", file):
- os.system("mkdir %s 2> /dev/null" % (path))
- if lastpath != path and lastname != name:
- os.system("rm -f %s/%s-latest.txt" % (path, name))
- os.system("cd %s; ln -s %s-%s.txt %s-latest.txt" % (path,name,version,name))
-
- if ( lastpath == path and lastname == name and os.path.isfile("%s/%s-%s.txt" % (path, name, version))):
- print "Updating", file, "Version", version, "rev", revision, "skipped."
- continue
-
- if os.system("wget -q '%s' -O wget.txt && (diff -q wget.txt %s/%s-%s.txt || mv wget.txt %s/%s-%s.txt)" % (url, path, name, version, path, name, version)):
- print "Updating", file, "Version", version, "rev", revision, "FAILED."
- os.system("chmod g+w wget.txt");
-
- print "Updating", file, "Version", version, "rev", revision, "ok"
-
- elif re.search("\.dtd$", file):
- os.system("mkdir %s 2> /dev/null" % (path))
- if lastpath != path and lastname != name:
- os.system("rm -f %s/%s-latest.dtd" % (path, name))
- os.system("cd %s; ln -s %s-%s.dtd %s-latest.dtd" % (path,name,version,name))
-
- if ( lastpath == path and lastname == name and os.path.isfile("%s/%s-%s.dtd" % (path, name, version))):
- print "Updating", file, "Version", version, "rev", revision, "skipped."
- continue
-
- if os.system("wget -q '%s' -O wget.dtd && (diff -q wget.dtd %s/%s-%s.dtd || mv wget.dtd %s/%s-%s.dtd)" % (url, path, name, version, path, name, version)):
- print "Updating", file, "Version", version, "rev", revision, "FAILED."
- os.system("chmod g+w wget.dtd");
-
- print "Updating", file, "Version", version, "rev", revision, "ok"
- else:
- print "Skipping", file, ", unknown file."
- continue
-
- lastname = name
- lastpath = path
-
-except IOError:
- print "Can't open", specindex
-
-
-specs = os.listdir(".")
-
-for spec in specs:
- if not os.path.isdir(spec):
- continue
- versions = os.listdir(spec)
- for file in versions:
- if re.search("\.xml$", file):
- tmp = re.sub("(.*)(\.xml)$", "\\1", file)
- name = re.sub("^(.*)-([^/]*)$", "\\1", tmp)
- ver = re.sub("^(.*)-([^/]*)$", "\\2", tmp)
-
- print "Check", os.path.join(spec,ver), os.path.isdir(os.path.join(spec,ver))
- print "Check", os.path.join(spec,name+"-"+ver+".html"), os.path.isfile(os.path.join(spec,name+"-"+ver+".html"))
-
- if ( not os.path.isdir(os.path.join(spec,ver))
- or not os.path.isfile(os.path.join(spec,name+"-"+ver+".html"))
- or os.path.getmtime(os.path.join(spec,file)) > os.path.getmtime(os.path.join(spec,name+"-"+ver+".html"))):
- os.system("rm -fR %s/%s" % (spec,ver))
- os.system("rm -f %s/%s-%s.html" % (spec,name,ver))
- os.system("mkdir %s/%s" % (spec,ver))
- os.system("cd %s/%s; %s html ../%s" % (spec,ver,xmlto,file))
- # os.system("mv index.html %s/%s-%s.html" % (spec,name,ver))
- # os.system("sed -i %s/%s-%s.html -e 's/index.html/%s-%s.html/;'" % (spec,name,ver,name,ver))
- os.system("cd %s;%s html-nochunks %s" % (spec,xmlto,file))
- elif re.search("(?<!latest)\.html$", file) and not os.path.isfile(os.path.join(spec,re.sub("html","xml",file))):
- tmp = re.sub("(.*)(\.html)$", "\\1", file)
- name = re.sub("^(.*)-([^/]*)$", "\\1", tmp)
- ver = re.sub("^(.*)-([^/]*)$", "\\2", tmp)
- os.system("rm -fR %s/%s" % (spec,ver))
- os.system("rm -f %s/%s-%s.html" % (spec,name,ver))
- for file in versions:
- if re.search("-latest\.dtd$", file):
- # Do nothing
- print "Skipping", file
- elif re.search("\.dtd$", file):
- tmp = re.sub("(.*)(\.dtd)$", "\\1", file)
- name = re.sub("^(.*)-([^/]*)$", "\\1", tmp)
- ver = re.sub("^(.*)-([^/]*)$", "\\2", tmp)
-
- print "Check", os.path.join(spec,ver), os.path.isdir(os.path.join(spec,ver))
- print "Check", os.path.join(spec,name+"-"+ver+".html"), os.path.isfile(os.path.join(spec,name+"-"+ver+".html"))
-
- os.system("mkdir %s/%s" % (spec,ver))
- os.system("cp %s/%s-%s.dtd %s/%s/%s.dtd" % (spec,name,ver,spec,ver,name))
+# Dependencies to run this:
+# - xmlto in $PATH
+
+# FIXME:
+# - correctly handle all exceptions
+# - copy dtd files where they should be
+# - new structure for website:
+# specs.fd.o/index.html -- general index
+# specs.fd.o/desktop-entry/index.html -- index of all versions of desktop entry, with all formats
+# specs.fd.o/desktop-entry/1.0/desktop-entry-spec.xml -- docbook version of the spec 1.0
+# specs.fd.o/desktop-entry/1.0/index.html -- one-page html version of the spec 1.0
+# specs.fd.o/desktop-entry/1.0/split/ -- multiple-page html version of the spec 1.0
+# specs.fd.o/desktop-entry/latest/ -- link to directory containing latest version of the spec
+
+import os
+import sys
+
+import errno
+
+import StringIO
+import hashlib
+import shutil
+import subprocess
+import urllib
+import urllib2
+import urlparse
+
+DEVELOPMENT = False
+
+CVSWEB = 'http://cvs.freedesktop.org'
+GITWEB = 'http://cgit.freedesktop.org'
+HASH = 'md5'
+
+
+def safe_mkdir(dir):
+ if not dir:
+ return
+
+ try:
+ os.mkdir(dir)
+ except OSError, e:
+ if e.errno != errno.EEXIST:
+ raise e
+
+
+def get_hash_from_fd(fd, algo = HASH, read_blocks = 1024):
+ if algo not in [ 'md5' ]:
+ raise Exception('Internal error: hash algorithm \'%s\' not planned in code.' % algo)
+
+ hash = hashlib.new(algo)
+ while True:
+ data = fd.read(read_blocks)
+ if not data:
+ break
+ hash.update(data)
+ return hash.digest()
+
+
+def get_hash_from_url(url, algo = HASH):
+ fd = urllib2.urlopen(url, None)
+ digest = get_hash_from_fd(fd, algo)
+ fd.close()
+ return digest
+
+
+def get_hash_from_path(path, algo = HASH):
+ fd = open(path, 'rb')
+ digest = get_hash_from_fd(fd, algo, read_blocks = 32768)
+ fd.close()
+ return digest
+
+
+def get_hash_from_data(data, algo = HASH):
+ fd = StringIO.StringIO(data)
+ digest = get_hash_from_fd(fd, algo, read_blocks = 32768)
+ fd.close()
+ return digest
+
+
+class VcsObject:
+ def __init__(self, vcs, repo, file, revision = None):
+ self.vcs = vcs
+ self.repo = repo
+ self.file = file
+ self.revision = revision
+ self.data = None
+
+ def get_url(self):
+ query = {}
+ if self.vcs == 'git':
+ baseurl = GITWEB
+ path = '/'.join((self.repo, 'plain', self.file))
+ if self.revision:
+ query['id'] = self.revision
+ elif self.vcs == 'cvs':
+ baseurl = CVSWEB
+ path = self.file
+ if self.revision:
+ query['rev'] = self.revision
+ else:
+ raise Exception('Unknown VCS: %s' % self.vcs)
+
+ (scheme, netloc, basepath) = urlparse.urlsplit(baseurl)[0:3]
+ full_path = '/'.join((basepath, path))
+
+ query_str = urllib.urlencode(query)
+ return urlparse.urlunsplit((scheme, netloc, full_path, query_str, ''))
+
+ def fetch(self):
+ if self.data:
+ return
+
+ url = self.get_url()
+ fd = urllib2.urlopen(url, None)
+ self.data = fd.read()
+ fd.close()
+
+ def get_hash(self):
+ self.fetch()
+ return get_hash_from_data(self.data)
+
+
+class SpecObject():
+ def __init__(self, vcs, spec_dir, version):
+ self.vcs = vcs
+ self.spec_dir = spec_dir
+ self.version = version
+
+ basename = os.path.basename(self.vcs.file)
+ (self.basename_no_ext, self.ext) = os.path.splitext(basename)
+
+ self.filename = '%s-%s%s' % (self.basename_no_ext, self.version, self.ext)
+
+ if self.ext not in ['.xml', '.sgml', '.txt', '.dtd']:
+ raise Exception('Format \'%s\' not supported for %s' % (self.ext, self.vcs.get_url()))
+
+ self.downloaded = False
+ self.one_chunk = False
+ self.multiple_chunks = False
+
+ def download(self):
+ safe_mkdir(self.spec_dir)
+ path = os.path.join(self.spec_dir, self.filename)
+
+ if os.path.exists(path):
+ current_hash = get_hash_from_path(path)
+ vcs_hash = self.vcs.get_hash()
+ if current_hash == vcs_hash:
+ return
+
+ self.vcs.fetch()
+ fd = open(path, 'wb')
+ fd.write(self.vcs.data)
+ fd.close()
+
+ self.downloaded = True
+
+ def htmlize(self, force = False):
+ if not self.downloaded and not force:
+ return
+
+ path = os.path.join(self.spec_dir, self.filename)
+ (path_no_ext, ext) = os.path.splitext(path)
+
+ if self.ext == '.xml':
+ # One-chunk HTML
+ html_path = '%s%s' % (path_no_ext, '.html')
+ if os.path.exists(html_path):
+ os.unlink(html_path)
+
+ retcode = subprocess.call(['xmlto', '-o', self.spec_dir, 'html-nochunks', path])
+
+ if retcode != 0:
+ raise Exception('Cannot convert \'%s\' to HTML.' % path)
+ self.one_chunk = True
+
+ # Multiple chunks
+ html_dir = os.path.join(self.spec_dir, self.version)
+ if os.path.exists(html_dir):
+ shutil.rmtree(html_dir)
+ safe_mkdir(html_dir)
+
+ retcode = subprocess.call(['xmlto', '-o', html_dir, 'html', path])
+
+ if retcode != 0:
+ raise Exception('Cannot convert \'%s\' to multiple-chunks HTML.' % path)
+ self.multiple_chunks = True
+
+ def latestize(self):
+ filename_latest = '%s-latest%s' % (self.basename_no_ext, self.ext)
+
+ path_latest = os.path.join(self.spec_dir, filename_latest)
+ if os.path.exists(path_latest):
+ os.unlink(path_latest)
+ os.symlink(self.filename, path_latest)
+
+ if self.ext == '.xml':
+ # One-chunk HTML
+ html_path_latest = os.path.join(self.spec_dir, '%s%s' % (self.basename_no_ext, '.html'))
+ if os.path.exists(html_path_latest):
+ os.unlink(html_path_latest)
+
+ (filename_no_ext, ext) = os.path.splitext(self.filename)
+ html_filename = '%s%s' % (filename_no_ext, '.html')
+ html_path = os.path.join(self.spec_dir, html_filename)
+ if os.path.exists(html_path):
+ os.symlink(html_filename, html_path_latest)
+
+ # Multiple chunks
+ html_dir_latest = os.path.join(self.spec_dir, 'latest')
+ if os.path.exists(html_dir_latest):
+ os.unlink(html_dir_latest)
+
+ html_dir = os.path.join(self.spec_dir, self.version)
+ if os.path.exists(html_dir):
+ os.symlink(self.version, html_dir_latest)
+
+
+SCRIPT = VcsObject('git', 'xdg/xdg-specs', 'web-export/update.py')
+SPECS_INDEX = VcsObject('git', 'xdg/xdg-specs', 'web-export/specs.idx')
+
+
+def is_up_to_date():
+ current_hash = get_hash_from_path(__file__)
+ vcs_hash = SCRIPT.get_hash()
+
+ return current_hash == vcs_hash
+
+
+if not DEVELOPMENT:
+ if not is_up_to_date():
+ print >>sys.stderr, 'Script is not up-to-date, please download %s' % SCRIPT.get_url()
+ sys.exit(1)
+
+ SPECS_INDEX.fetch()
+ lines = SPECS_INDEX.data.split('\n')
+else:
+ lines = open('specs.idx').readlines()
+
+
+latests = []
+
+for line in lines:
+ line = line.strip()
+ if not line or line.startswith('#'):
+ continue
+
+ (data, revision, version, path) = line.split()
+ if data.startswith("git:"):
+ git_data = data.split(":")
+ vcs = VcsObject('git', git_data[1], git_data[2], revision)
+ else:
+ vcs = VcsObject('cvs', None, data, revision)
+
+ spec = SpecObject(vcs, path, version)
+
+ spec.download()
+ spec.htmlize()
+
+ # Create latest links if it's the first time we see this spec
+ if (spec.spec_dir, spec.basename_no_ext) not in latests:
+ latests.append((spec.spec_dir, spec.basename_no_ext))
+ spec.latestize()