diff options
author | Vincent Untz <vuntz@gnome.org> | 2012-01-25 09:18:07 +0100 |
---|---|---|
committer | Vincent Untz <vuntz@gnome.org> | 2012-01-25 09:18:07 +0100 |
commit | b8adc0518bf4a38686f194a3e1b5059311ed78db (patch) | |
tree | f6b8cb8bca1a8cbbd04678c3a39d235b88de680d /web-export/update.py | |
parent | 0bc96b8da33cdda7099e66bde6991bc7ad012d90 (diff) | |
download | xdg-specs-b8adc0518bf4a38686f194a3e1b5059311ed78db.tar.xz |
web-export: Completely rewrite update.py
It's real python now. It also fetches specs.idx directly from git, and
checks there's no new version of this script before doing anything.
Diffstat (limited to 'web-export/update.py')
-rwxr-xr-x | web-export/update.py | 401 |
1 files changed, 263 insertions, 138 deletions
diff --git a/web-export/update.py b/web-export/update.py index dc15a59..1f43676 100755 --- a/web-export/update.py +++ b/web-export/update.py @@ -1,140 +1,265 @@ #!/usr/bin/env python -import os,re,string - -xmlto = "/usr/bin/xmlto" -cvs = "http://cvs.freedesktop.org/" -git = "http://cgit.freedesktop.org/" -specindex = "specs.idx" - -try: - f = open(specindex, 'r') - lastname = '' - lastpath = '' - for line in f.readlines(): - line = line.strip() - if not line or line.startswith('#'): - continue - - (file, revision, version, path) = string.split(line) - use_git = False - if file.startswith("git:"): - use_git = True - git_data = file.split(":") - git_repo = git_data[1] - file = git_data[2] - name = os.path.splitext(os.path.split(file)[1])[0] - # Strip version from name - if re.search("\d\.\d+$", name): - name = re.sub("^(.*)-([^/]*)$", "\\1", name) - - if use_git: - url = '%s%s/plain/%s?id=%s' % (git, git_repo, file, revision) - else: - url = '%s%s?rev=%s' % (cvs, file, revision) - - if re.search("\.xml$", file): - os.system("mkdir %s 2> /dev/null" % (path)) - if lastpath != path and lastname != name: - os.system("rm -f %s/%s-latest.html" % (path, name)) - os.system("cd %s; ln -s %s-%s.html %s-latest.html" % (path,name,version,name)) - os.system("rm -f %s/latest" % (path)) - os.system("cd %s; ln -s %s latest" % (path,version)) - - # if ( lastpath == path and lastname == name and os.path.isfile("%s/%s-%s.xml" % (path, name, version))): - # print "Updating", file, "Version", version, "rev", revision, "skipped." - # continue - - if os.system("wget -q '%s' -O wget.xml && (diff -q wget.xml %s/%s-%s.xml || mv wget.xml %s/%s-%s.xml)" % (url, path, name, version, path, name, version)): - print "Updating", file, "Version", version, "rev", revision, "FAILED." - os.system("chmod g+w wget.xml"); - - print "Updating", file, "Version", version, "rev", revision, "ok" - - elif re.search("\.txt$", file): - os.system("mkdir %s 2> /dev/null" % (path)) - if lastpath != path and lastname != name: - os.system("rm -f %s/%s-latest.txt" % (path, name)) - os.system("cd %s; ln -s %s-%s.txt %s-latest.txt" % (path,name,version,name)) - - if ( lastpath == path and lastname == name and os.path.isfile("%s/%s-%s.txt" % (path, name, version))): - print "Updating", file, "Version", version, "rev", revision, "skipped." - continue - - if os.system("wget -q '%s' -O wget.txt && (diff -q wget.txt %s/%s-%s.txt || mv wget.txt %s/%s-%s.txt)" % (url, path, name, version, path, name, version)): - print "Updating", file, "Version", version, "rev", revision, "FAILED." - os.system("chmod g+w wget.txt"); - - print "Updating", file, "Version", version, "rev", revision, "ok" - - elif re.search("\.dtd$", file): - os.system("mkdir %s 2> /dev/null" % (path)) - if lastpath != path and lastname != name: - os.system("rm -f %s/%s-latest.dtd" % (path, name)) - os.system("cd %s; ln -s %s-%s.dtd %s-latest.dtd" % (path,name,version,name)) - - if ( lastpath == path and lastname == name and os.path.isfile("%s/%s-%s.dtd" % (path, name, version))): - print "Updating", file, "Version", version, "rev", revision, "skipped." - continue - - if os.system("wget -q '%s' -O wget.dtd && (diff -q wget.dtd %s/%s-%s.dtd || mv wget.dtd %s/%s-%s.dtd)" % (url, path, name, version, path, name, version)): - print "Updating", file, "Version", version, "rev", revision, "FAILED." - os.system("chmod g+w wget.dtd"); - - print "Updating", file, "Version", version, "rev", revision, "ok" - else: - print "Skipping", file, ", unknown file." - continue - - lastname = name - lastpath = path - -except IOError: - print "Can't open", specindex - - -specs = os.listdir(".") - -for spec in specs: - if not os.path.isdir(spec): - continue - versions = os.listdir(spec) - for file in versions: - if re.search("\.xml$", file): - tmp = re.sub("(.*)(\.xml)$", "\\1", file) - name = re.sub("^(.*)-([^/]*)$", "\\1", tmp) - ver = re.sub("^(.*)-([^/]*)$", "\\2", tmp) - - print "Check", os.path.join(spec,ver), os.path.isdir(os.path.join(spec,ver)) - print "Check", os.path.join(spec,name+"-"+ver+".html"), os.path.isfile(os.path.join(spec,name+"-"+ver+".html")) - - if ( not os.path.isdir(os.path.join(spec,ver)) - or not os.path.isfile(os.path.join(spec,name+"-"+ver+".html")) - or os.path.getmtime(os.path.join(spec,file)) > os.path.getmtime(os.path.join(spec,name+"-"+ver+".html"))): - os.system("rm -fR %s/%s" % (spec,ver)) - os.system("rm -f %s/%s-%s.html" % (spec,name,ver)) - os.system("mkdir %s/%s" % (spec,ver)) - os.system("cd %s/%s; %s html ../%s" % (spec,ver,xmlto,file)) - # os.system("mv index.html %s/%s-%s.html" % (spec,name,ver)) - # os.system("sed -i %s/%s-%s.html -e 's/index.html/%s-%s.html/;'" % (spec,name,ver,name,ver)) - os.system("cd %s;%s html-nochunks %s" % (spec,xmlto,file)) - elif re.search("(?<!latest)\.html$", file) and not os.path.isfile(os.path.join(spec,re.sub("html","xml",file))): - tmp = re.sub("(.*)(\.html)$", "\\1", file) - name = re.sub("^(.*)-([^/]*)$", "\\1", tmp) - ver = re.sub("^(.*)-([^/]*)$", "\\2", tmp) - os.system("rm -fR %s/%s" % (spec,ver)) - os.system("rm -f %s/%s-%s.html" % (spec,name,ver)) - for file in versions: - if re.search("-latest\.dtd$", file): - # Do nothing - print "Skipping", file - elif re.search("\.dtd$", file): - tmp = re.sub("(.*)(\.dtd)$", "\\1", file) - name = re.sub("^(.*)-([^/]*)$", "\\1", tmp) - ver = re.sub("^(.*)-([^/]*)$", "\\2", tmp) - - print "Check", os.path.join(spec,ver), os.path.isdir(os.path.join(spec,ver)) - print "Check", os.path.join(spec,name+"-"+ver+".html"), os.path.isfile(os.path.join(spec,name+"-"+ver+".html")) - - os.system("mkdir %s/%s" % (spec,ver)) - os.system("cp %s/%s-%s.dtd %s/%s/%s.dtd" % (spec,name,ver,spec,ver,name)) +# Dependencies to run this: +# - xmlto in $PATH + +# FIXME: +# - correctly handle all exceptions +# - copy dtd files where they should be +# - new structure for website: +# specs.fd.o/index.html -- general index +# specs.fd.o/desktop-entry/index.html -- index of all versions of desktop entry, with all formats +# specs.fd.o/desktop-entry/1.0/desktop-entry-spec.xml -- docbook version of the spec 1.0 +# specs.fd.o/desktop-entry/1.0/index.html -- one-page html version of the spec 1.0 +# specs.fd.o/desktop-entry/1.0/split/ -- multiple-page html version of the spec 1.0 +# specs.fd.o/desktop-entry/latest/ -- link to directory containing latest version of the spec + +import os +import sys + +import errno + +import StringIO +import hashlib +import shutil +import subprocess +import urllib +import urllib2 +import urlparse + +DEVELOPMENT = False + +CVSWEB = 'http://cvs.freedesktop.org' +GITWEB = 'http://cgit.freedesktop.org' +HASH = 'md5' + + +def safe_mkdir(dir): + if not dir: + return + + try: + os.mkdir(dir) + except OSError, e: + if e.errno != errno.EEXIST: + raise e + + +def get_hash_from_fd(fd, algo = HASH, read_blocks = 1024): + if algo not in [ 'md5' ]: + raise Exception('Internal error: hash algorithm \'%s\' not planned in code.' % algo) + + hash = hashlib.new(algo) + while True: + data = fd.read(read_blocks) + if not data: + break + hash.update(data) + return hash.digest() + + +def get_hash_from_url(url, algo = HASH): + fd = urllib2.urlopen(url, None) + digest = get_hash_from_fd(fd, algo) + fd.close() + return digest + + +def get_hash_from_path(path, algo = HASH): + fd = open(path, 'rb') + digest = get_hash_from_fd(fd, algo, read_blocks = 32768) + fd.close() + return digest + + +def get_hash_from_data(data, algo = HASH): + fd = StringIO.StringIO(data) + digest = get_hash_from_fd(fd, algo, read_blocks = 32768) + fd.close() + return digest + + +class VcsObject: + def __init__(self, vcs, repo, file, revision = None): + self.vcs = vcs + self.repo = repo + self.file = file + self.revision = revision + self.data = None + + def get_url(self): + query = {} + if self.vcs == 'git': + baseurl = GITWEB + path = '/'.join((self.repo, 'plain', self.file)) + if self.revision: + query['id'] = self.revision + elif self.vcs == 'cvs': + baseurl = CVSWEB + path = self.file + if self.revision: + query['rev'] = self.revision + else: + raise Exception('Unknown VCS: %s' % self.vcs) + + (scheme, netloc, basepath) = urlparse.urlsplit(baseurl)[0:3] + full_path = '/'.join((basepath, path)) + + query_str = urllib.urlencode(query) + return urlparse.urlunsplit((scheme, netloc, full_path, query_str, '')) + + def fetch(self): + if self.data: + return + + url = self.get_url() + fd = urllib2.urlopen(url, None) + self.data = fd.read() + fd.close() + + def get_hash(self): + self.fetch() + return get_hash_from_data(self.data) + + +class SpecObject(): + def __init__(self, vcs, spec_dir, version): + self.vcs = vcs + self.spec_dir = spec_dir + self.version = version + + basename = os.path.basename(self.vcs.file) + (self.basename_no_ext, self.ext) = os.path.splitext(basename) + + self.filename = '%s-%s%s' % (self.basename_no_ext, self.version, self.ext) + + if self.ext not in ['.xml', '.sgml', '.txt', '.dtd']: + raise Exception('Format \'%s\' not supported for %s' % (self.ext, self.vcs.get_url())) + + self.downloaded = False + self.one_chunk = False + self.multiple_chunks = False + + def download(self): + safe_mkdir(self.spec_dir) + path = os.path.join(self.spec_dir, self.filename) + + if os.path.exists(path): + current_hash = get_hash_from_path(path) + vcs_hash = self.vcs.get_hash() + if current_hash == vcs_hash: + return + + self.vcs.fetch() + fd = open(path, 'wb') + fd.write(self.vcs.data) + fd.close() + + self.downloaded = True + + def htmlize(self, force = False): + if not self.downloaded and not force: + return + + path = os.path.join(self.spec_dir, self.filename) + (path_no_ext, ext) = os.path.splitext(path) + + if self.ext == '.xml': + # One-chunk HTML + html_path = '%s%s' % (path_no_ext, '.html') + if os.path.exists(html_path): + os.unlink(html_path) + + retcode = subprocess.call(['xmlto', '-o', self.spec_dir, 'html-nochunks', path]) + + if retcode != 0: + raise Exception('Cannot convert \'%s\' to HTML.' % path) + self.one_chunk = True + + # Multiple chunks + html_dir = os.path.join(self.spec_dir, self.version) + if os.path.exists(html_dir): + shutil.rmtree(html_dir) + safe_mkdir(html_dir) + + retcode = subprocess.call(['xmlto', '-o', html_dir, 'html', path]) + + if retcode != 0: + raise Exception('Cannot convert \'%s\' to multiple-chunks HTML.' % path) + self.multiple_chunks = True + + def latestize(self): + filename_latest = '%s-latest%s' % (self.basename_no_ext, self.ext) + + path_latest = os.path.join(self.spec_dir, filename_latest) + if os.path.exists(path_latest): + os.unlink(path_latest) + os.symlink(self.filename, path_latest) + + if self.ext == '.xml': + # One-chunk HTML + html_path_latest = os.path.join(self.spec_dir, '%s%s' % (self.basename_no_ext, '.html')) + if os.path.exists(html_path_latest): + os.unlink(html_path_latest) + + (filename_no_ext, ext) = os.path.splitext(self.filename) + html_filename = '%s%s' % (filename_no_ext, '.html') + html_path = os.path.join(self.spec_dir, html_filename) + if os.path.exists(html_path): + os.symlink(html_filename, html_path_latest) + + # Multiple chunks + html_dir_latest = os.path.join(self.spec_dir, 'latest') + if os.path.exists(html_dir_latest): + os.unlink(html_dir_latest) + + html_dir = os.path.join(self.spec_dir, self.version) + if os.path.exists(html_dir): + os.symlink(self.version, html_dir_latest) + + +SCRIPT = VcsObject('git', 'xdg/xdg-specs', 'web-export/update.py') +SPECS_INDEX = VcsObject('git', 'xdg/xdg-specs', 'web-export/specs.idx') + + +def is_up_to_date(): + current_hash = get_hash_from_path(__file__) + vcs_hash = SCRIPT.get_hash() + + return current_hash == vcs_hash + + +if not DEVELOPMENT: + if not is_up_to_date(): + print >>sys.stderr, 'Script is not up-to-date, please download %s' % SCRIPT.get_url() + sys.exit(1) + + SPECS_INDEX.fetch() + lines = SPECS_INDEX.data.split('\n') +else: + lines = open('specs.idx').readlines() + + +latests = [] + +for line in lines: + line = line.strip() + if not line or line.startswith('#'): + continue + + (data, revision, version, path) = line.split() + if data.startswith("git:"): + git_data = data.split(":") + vcs = VcsObject('git', git_data[1], git_data[2], revision) + else: + vcs = VcsObject('cvs', None, data, revision) + + spec = SpecObject(vcs, path, version) + + spec.download() + spec.htmlize() + + # Create latest links if it's the first time we see this spec + if (spec.spec_dir, spec.basename_no_ext) not in latests: + latests.append((spec.spec_dir, spec.basename_no_ext)) + spec.latestize() |