diff options
Diffstat (limited to 'web-export/update.py')
-rwxr-xr-x | web-export/update.py | 274 |
1 files changed, 274 insertions, 0 deletions
diff --git a/web-export/update.py b/web-export/update.py new file mode 100755 index 0000000..cd54f28 --- /dev/null +++ b/web-export/update.py @@ -0,0 +1,274 @@ +#!/usr/bin/env python + +# Dependencies to run this: +# - xmlto in $PATH + +# FIXME: +# - correctly handle all exceptions +# - copy dtd files where they should be +# - new structure for website: +# specs.fd.o/index.html -- general index +# specs.fd.o/desktop-entry/index.html -- index of all versions of desktop entry, with all formats +# specs.fd.o/desktop-entry/1.0/desktop-entry-spec.xml -- docbook version of the spec 1.0 +# specs.fd.o/desktop-entry/1.0/index.html -- one-page html version of the spec 1.0 +# specs.fd.o/desktop-entry/1.0/split/ -- multiple-page html version of the spec 1.0 +# specs.fd.o/desktop-entry/latest/ -- link to directory containing latest version of the spec + +import os +import sys + +import errno + +import StringIO +import hashlib +import shutil +import subprocess +import urllib +import urllib2 +import urlparse + +DEVELOPMENT = False + +CVSWEB = 'http://cvs.freedesktop.org' +GITWEB = 'http://cgit.freedesktop.org' +HASH = 'md5' + + +def safe_mkdir(dir): + if not dir: + return + + try: + os.mkdir(dir) + except OSError, e: + if e.errno != errno.EEXIST: + raise e + + +def get_hash_from_fd(fd, algo = HASH, read_blocks = 1024): + if algo not in [ 'md5' ]: + raise Exception('Internal error: hash algorithm \'%s\' not planned in code.' % algo) + + hash = hashlib.new(algo) + while True: + data = fd.read(read_blocks) + if not data: + break + hash.update(data) + return hash.digest() + + +def get_hash_from_url(url, algo = HASH): + fd = urllib2.urlopen(url, None) + digest = get_hash_from_fd(fd, algo) + fd.close() + return digest + + +def get_hash_from_path(path, algo = HASH): + fd = open(path, 'rb') + digest = get_hash_from_fd(fd, algo, read_blocks = 32768) + fd.close() + return digest + + +def get_hash_from_data(data, algo = HASH): + fd = StringIO.StringIO(data) + digest = get_hash_from_fd(fd, algo, read_blocks = 32768) + fd.close() + return digest + + +class VcsObject: + def __init__(self, vcs, repo, file, revision = None): + self.vcs = vcs + self.repo = repo + self.file = file + self.revision = revision + self.data = None + + def get_url(self): + query = {} + if self.vcs == 'git': + baseurl = GITWEB + path = '/'.join((self.repo, 'plain', self.file)) + if self.revision: + query['id'] = self.revision + elif self.vcs == 'cvs': + baseurl = CVSWEB + path = self.file + if self.revision: + query['rev'] = self.revision + else: + raise Exception('Unknown VCS: %s' % self.vcs) + + (scheme, netloc, basepath) = urlparse.urlsplit(baseurl)[0:3] + full_path = '/'.join((basepath, path)) + + query_str = urllib.urlencode(query) + return urlparse.urlunsplit((scheme, netloc, full_path, query_str, '')) + + def fetch(self): + if self.data: + return + + url = self.get_url() + fd = urllib2.urlopen(url, None) + self.data = fd.read() + fd.close() + + def get_hash(self): + self.fetch() + return get_hash_from_data(self.data) + + +class SpecObject(): + def __init__(self, vcs, spec_dir, version): + self.vcs = vcs + self.spec_dir = spec_dir + self.version = version + + basename = os.path.basename(self.vcs.file) + (self.basename_no_ext, self.ext) = os.path.splitext(basename) + + self.filename = '%s-%s%s' % (self.basename_no_ext, self.version, self.ext) + + if self.ext not in ['.xml', '.sgml', '.txt', '.dtd']: + raise Exception('Format \'%s\' not supported for %s' % (self.ext, self.vcs.get_url())) + + self.downloaded = False + self.one_chunk = False + self.multiple_chunks = False + + def download(self): + safe_mkdir(self.spec_dir) + path = os.path.join(self.spec_dir, self.filename) + + if os.path.exists(path): + current_hash = get_hash_from_path(path) + vcs_hash = self.vcs.get_hash() + if current_hash == vcs_hash: + return + + self.vcs.fetch() + fd = open(path, 'wb') + fd.write(self.vcs.data) + fd.close() + + self.downloaded = True + + def htmlize(self, force = False): + if not self.downloaded and not force: + return + + path = os.path.join(self.spec_dir, self.filename) + (path_no_ext, ext) = os.path.splitext(path) + + # One-chunk HTML + html_path = '%s%s' % (path_no_ext, '.html') + if os.path.exists(html_path): + os.unlink(html_path) + + # Multiple chunks + html_dir = os.path.join(self.spec_dir, self.version) + if os.path.exists(html_dir): + shutil.rmtree(html_dir) + + one_chunk_command = None + multiple_chunks_command = None + + if self.ext == '.xml': + one_chunk_command = ['xmlto', '-o', self.spec_dir, 'html-nochunks', path] + multiple_chunks_command = ['xmlto', '-o', html_dir, 'html', path] + elif self.ext == '.sgml': + one_chunk_command = ['docbook2html', '-o', self.spec_dir, '--nochunks', path] + multiple_chunks_command = ['docbook2html', '-o', html_dir, path] + + if one_chunk_command: + retcode = subprocess.call(one_chunk_command) + if retcode != 0: + raise Exception('Cannot convert \'%s\' to HTML.' % path) + self.one_chunk = True + + if multiple_chunks_command: + safe_mkdir(html_dir) + retcode = subprocess.call(multiple_chunks_command) + if retcode != 0: + raise Exception('Cannot convert \'%s\' to multiple-chunks HTML.' % path) + self.multiple_chunks = True + + def latestize(self): + filename_latest = '%s-latest%s' % (self.basename_no_ext, self.ext) + + path_latest = os.path.join(self.spec_dir, filename_latest) + if os.path.exists(path_latest): + os.unlink(path_latest) + os.symlink(self.filename, path_latest) + + if self.ext in ['.xml', '.sgml']: + # One-chunk HTML + html_path_latest = os.path.join(self.spec_dir, '%s-latest%s' % (self.basename_no_ext, '.html')) + if os.path.exists(html_path_latest): + os.unlink(html_path_latest) + + (filename_no_ext, ext) = os.path.splitext(self.filename) + html_filename = '%s%s' % (filename_no_ext, '.html') + html_path = os.path.join(self.spec_dir, html_filename) + if os.path.exists(html_path): + os.symlink(html_filename, html_path_latest) + + # Multiple chunks + html_dir_latest = os.path.join(self.spec_dir, 'latest') + if os.path.exists(html_dir_latest): + os.unlink(html_dir_latest) + + html_dir = os.path.join(self.spec_dir, self.version) + if os.path.exists(html_dir): + os.symlink(self.version, html_dir_latest) + + +SCRIPT = VcsObject('git', 'xdg/xdg-specs', 'web-export/update.py') +SPECS_INDEX = VcsObject('git', 'xdg/xdg-specs', 'web-export/specs.idx') + + +def is_up_to_date(): + current_hash = get_hash_from_path(__file__) + vcs_hash = SCRIPT.get_hash() + + return current_hash == vcs_hash + + +if not DEVELOPMENT: + if not is_up_to_date(): + print >>sys.stderr, 'Script is not up-to-date, please download %s' % SCRIPT.get_url() + sys.exit(1) + + SPECS_INDEX.fetch() + lines = SPECS_INDEX.data.split('\n') +else: + lines = open('specs.idx').readlines() + + +latests = [] + +for line in lines: + line = line.strip() + if not line or line.startswith('#'): + continue + + (data, revision, version, path) = line.split() + if data.startswith("git:"): + git_data = data.split(":") + vcs = VcsObject('git', git_data[1], git_data[2], revision) + else: + vcs = VcsObject('cvs', None, data, revision) + + spec = SpecObject(vcs, path, version) + + spec.download() + spec.htmlize() + + # Create latest links if it's the first time we see this spec + if (spec.spec_dir, spec.basename_no_ext) not in latests: + latests.append((spec.spec_dir, spec.basename_no_ext)) + spec.latestize() |