summaryrefslogtreecommitdiffstats
path: root/web-export/update.py
diff options
context:
space:
mode:
Diffstat (limited to 'web-export/update.py')
-rwxr-xr-xweb-export/update.py274
1 files changed, 274 insertions, 0 deletions
diff --git a/web-export/update.py b/web-export/update.py
new file mode 100755
index 0000000..cd54f28
--- /dev/null
+++ b/web-export/update.py
@@ -0,0 +1,274 @@
+#!/usr/bin/env python
+
+# Dependencies to run this:
+# - xmlto in $PATH
+
+# FIXME:
+# - correctly handle all exceptions
+# - copy dtd files where they should be
+# - new structure for website:
+# specs.fd.o/index.html -- general index
+# specs.fd.o/desktop-entry/index.html -- index of all versions of desktop entry, with all formats
+# specs.fd.o/desktop-entry/1.0/desktop-entry-spec.xml -- docbook version of the spec 1.0
+# specs.fd.o/desktop-entry/1.0/index.html -- one-page html version of the spec 1.0
+# specs.fd.o/desktop-entry/1.0/split/ -- multiple-page html version of the spec 1.0
+# specs.fd.o/desktop-entry/latest/ -- link to directory containing latest version of the spec
+
+import os
+import sys
+
+import errno
+
+import StringIO
+import hashlib
+import shutil
+import subprocess
+import urllib
+import urllib2
+import urlparse
+
+DEVELOPMENT = False
+
+CVSWEB = 'http://cvs.freedesktop.org'
+GITWEB = 'http://cgit.freedesktop.org'
+HASH = 'md5'
+
+
+def safe_mkdir(dir):
+ if not dir:
+ return
+
+ try:
+ os.mkdir(dir)
+ except OSError, e:
+ if e.errno != errno.EEXIST:
+ raise e
+
+
+def get_hash_from_fd(fd, algo = HASH, read_blocks = 1024):
+ if algo not in [ 'md5' ]:
+ raise Exception('Internal error: hash algorithm \'%s\' not planned in code.' % algo)
+
+ hash = hashlib.new(algo)
+ while True:
+ data = fd.read(read_blocks)
+ if not data:
+ break
+ hash.update(data)
+ return hash.digest()
+
+
+def get_hash_from_url(url, algo = HASH):
+ fd = urllib2.urlopen(url, None)
+ digest = get_hash_from_fd(fd, algo)
+ fd.close()
+ return digest
+
+
+def get_hash_from_path(path, algo = HASH):
+ fd = open(path, 'rb')
+ digest = get_hash_from_fd(fd, algo, read_blocks = 32768)
+ fd.close()
+ return digest
+
+
+def get_hash_from_data(data, algo = HASH):
+ fd = StringIO.StringIO(data)
+ digest = get_hash_from_fd(fd, algo, read_blocks = 32768)
+ fd.close()
+ return digest
+
+
+class VcsObject:
+ def __init__(self, vcs, repo, file, revision = None):
+ self.vcs = vcs
+ self.repo = repo
+ self.file = file
+ self.revision = revision
+ self.data = None
+
+ def get_url(self):
+ query = {}
+ if self.vcs == 'git':
+ baseurl = GITWEB
+ path = '/'.join((self.repo, 'plain', self.file))
+ if self.revision:
+ query['id'] = self.revision
+ elif self.vcs == 'cvs':
+ baseurl = CVSWEB
+ path = self.file
+ if self.revision:
+ query['rev'] = self.revision
+ else:
+ raise Exception('Unknown VCS: %s' % self.vcs)
+
+ (scheme, netloc, basepath) = urlparse.urlsplit(baseurl)[0:3]
+ full_path = '/'.join((basepath, path))
+
+ query_str = urllib.urlencode(query)
+ return urlparse.urlunsplit((scheme, netloc, full_path, query_str, ''))
+
+ def fetch(self):
+ if self.data:
+ return
+
+ url = self.get_url()
+ fd = urllib2.urlopen(url, None)
+ self.data = fd.read()
+ fd.close()
+
+ def get_hash(self):
+ self.fetch()
+ return get_hash_from_data(self.data)
+
+
+class SpecObject():
+ def __init__(self, vcs, spec_dir, version):
+ self.vcs = vcs
+ self.spec_dir = spec_dir
+ self.version = version
+
+ basename = os.path.basename(self.vcs.file)
+ (self.basename_no_ext, self.ext) = os.path.splitext(basename)
+
+ self.filename = '%s-%s%s' % (self.basename_no_ext, self.version, self.ext)
+
+ if self.ext not in ['.xml', '.sgml', '.txt', '.dtd']:
+ raise Exception('Format \'%s\' not supported for %s' % (self.ext, self.vcs.get_url()))
+
+ self.downloaded = False
+ self.one_chunk = False
+ self.multiple_chunks = False
+
+ def download(self):
+ safe_mkdir(self.spec_dir)
+ path = os.path.join(self.spec_dir, self.filename)
+
+ if os.path.exists(path):
+ current_hash = get_hash_from_path(path)
+ vcs_hash = self.vcs.get_hash()
+ if current_hash == vcs_hash:
+ return
+
+ self.vcs.fetch()
+ fd = open(path, 'wb')
+ fd.write(self.vcs.data)
+ fd.close()
+
+ self.downloaded = True
+
+ def htmlize(self, force = False):
+ if not self.downloaded and not force:
+ return
+
+ path = os.path.join(self.spec_dir, self.filename)
+ (path_no_ext, ext) = os.path.splitext(path)
+
+ # One-chunk HTML
+ html_path = '%s%s' % (path_no_ext, '.html')
+ if os.path.exists(html_path):
+ os.unlink(html_path)
+
+ # Multiple chunks
+ html_dir = os.path.join(self.spec_dir, self.version)
+ if os.path.exists(html_dir):
+ shutil.rmtree(html_dir)
+
+ one_chunk_command = None
+ multiple_chunks_command = None
+
+ if self.ext == '.xml':
+ one_chunk_command = ['xmlto', '-o', self.spec_dir, 'html-nochunks', path]
+ multiple_chunks_command = ['xmlto', '-o', html_dir, 'html', path]
+ elif self.ext == '.sgml':
+ one_chunk_command = ['docbook2html', '-o', self.spec_dir, '--nochunks', path]
+ multiple_chunks_command = ['docbook2html', '-o', html_dir, path]
+
+ if one_chunk_command:
+ retcode = subprocess.call(one_chunk_command)
+ if retcode != 0:
+ raise Exception('Cannot convert \'%s\' to HTML.' % path)
+ self.one_chunk = True
+
+ if multiple_chunks_command:
+ safe_mkdir(html_dir)
+ retcode = subprocess.call(multiple_chunks_command)
+ if retcode != 0:
+ raise Exception('Cannot convert \'%s\' to multiple-chunks HTML.' % path)
+ self.multiple_chunks = True
+
+ def latestize(self):
+ filename_latest = '%s-latest%s' % (self.basename_no_ext, self.ext)
+
+ path_latest = os.path.join(self.spec_dir, filename_latest)
+ if os.path.exists(path_latest):
+ os.unlink(path_latest)
+ os.symlink(self.filename, path_latest)
+
+ if self.ext in ['.xml', '.sgml']:
+ # One-chunk HTML
+ html_path_latest = os.path.join(self.spec_dir, '%s-latest%s' % (self.basename_no_ext, '.html'))
+ if os.path.exists(html_path_latest):
+ os.unlink(html_path_latest)
+
+ (filename_no_ext, ext) = os.path.splitext(self.filename)
+ html_filename = '%s%s' % (filename_no_ext, '.html')
+ html_path = os.path.join(self.spec_dir, html_filename)
+ if os.path.exists(html_path):
+ os.symlink(html_filename, html_path_latest)
+
+ # Multiple chunks
+ html_dir_latest = os.path.join(self.spec_dir, 'latest')
+ if os.path.exists(html_dir_latest):
+ os.unlink(html_dir_latest)
+
+ html_dir = os.path.join(self.spec_dir, self.version)
+ if os.path.exists(html_dir):
+ os.symlink(self.version, html_dir_latest)
+
+
+SCRIPT = VcsObject('git', 'xdg/xdg-specs', 'web-export/update.py')
+SPECS_INDEX = VcsObject('git', 'xdg/xdg-specs', 'web-export/specs.idx')
+
+
+def is_up_to_date():
+ current_hash = get_hash_from_path(__file__)
+ vcs_hash = SCRIPT.get_hash()
+
+ return current_hash == vcs_hash
+
+
+if not DEVELOPMENT:
+ if not is_up_to_date():
+ print >>sys.stderr, 'Script is not up-to-date, please download %s' % SCRIPT.get_url()
+ sys.exit(1)
+
+ SPECS_INDEX.fetch()
+ lines = SPECS_INDEX.data.split('\n')
+else:
+ lines = open('specs.idx').readlines()
+
+
+latests = []
+
+for line in lines:
+ line = line.strip()
+ if not line or line.startswith('#'):
+ continue
+
+ (data, revision, version, path) = line.split()
+ if data.startswith("git:"):
+ git_data = data.split(":")
+ vcs = VcsObject('git', git_data[1], git_data[2], revision)
+ else:
+ vcs = VcsObject('cvs', None, data, revision)
+
+ spec = SpecObject(vcs, path, version)
+
+ spec.download()
+ spec.htmlize()
+
+ # Create latest links if it's the first time we see this spec
+ if (spec.spec_dir, spec.basename_no_ext) not in latests:
+ latests.append((spec.spec_dir, spec.basename_no_ext))
+ spec.latestize()