New Translation Structure

author: simo <simo> 2005-06-10 19:30:49 +0000
committer: simo <simo> 2005-06-10 19:30:49 +0000
commit: 7044610e32b37c079e03cab8bf9cfa8a8f381ec9 (patch)
tree: 666a7c8c77a274b0c9cb0ba6b7fd20a535d418cf /web/utils/translation_tool
parent: f1889a9e3d968f194e4b6955915f0e0c3c43622c (diff)
download: aurweb-7044610e32b37c079e03cab8bf9cfa8a8f381ec9.tar.xz
1 files changed, 279 insertions, 0 deletions
diff --git a/web/utils/translation_tool b/web/utils/translation_tool
new file mode 100755
index 0000000..0341da3
--- /dev/null
+++ b/web/utils/translation_tool
@@ -0,0 +1,279 @@
+#! /usr/bin/python -O
+# -*- coding: iso-8859-1 -*-
+
+# this script iterates through the 'html' and 'lib' directories
+# looking for php scripts that contain a include_once("xxx_po.inc")
+# line and __() functions.  It creates/appends to the corresponding
+# "xxx_po.inc" file in the 'lang' subdirectory and places the
+# i18n strings into the file in the proper format.
+#
+# usage: genpopo [-v] [-f]
+#        -v: verbose, print duplicate terms that could be moved to common_po
+#        -f: force, overwrite existing translated files, otherwise append
+#
+
+translator_name=raw_input("What is your full name? ")
+translator_email=raw_input("What is your email address? ")
+trans_native=raw_input("What is the native name of the language? ")
+trans_eng=raw_input("What is the English name of the language? ")
+trans_abbrv=raw_input("What is the ISO 639-1 Alpha-2 abbreviation for the language? ")
+if len(trans_abbrv) != 2:
+	print "Must use 2 character abbreviation"
+	raise SystemExit
+
+
+INC_HEADER = """\
+<?
+# %s (%s) translation
+# Translator: %s <%s>
+
+include_once("translator.inc");
+global $_t;
+""" % (trans_eng, trans_native, translator_name, translator_email)
+
+
+import sys
+print_dupes = '-v' in sys.argv
+force = '-f' in sys.argv
+
+import re, os
+up = re.compile('_\(\s*"(([^"]|(?<=\\\\)["])+)"')
+
+lang = { 'common_po.inc': {} }
+
+current_dir = os.getcwd()
+
+# Find the common_po.inc file.
+#
+common = {}
+for dir in ['../lang', 'lang']:
+	if os.path.exists(dir):
+		os.chdir(dir)
+		if os.path.exists('common_po.list'):
+			f = open('common_po.list','r')
+			lines = f.readlines()
+			f.close()
+			for line in lines:
+				if line[0] != '#':
+					common[line[:-1]] = 0
+					lang['common_po.inc'][line[:-1]] = 1
+			os.chdir(current_dir)
+			break
+		os.chdir(current_dir)
+else:
+	print "Can't find common_po.list file."
+	raise SystemExit
+
+#Find the lang directory
+for dir in ['../lang', 'lang']:
+	if os.path.exists(dir):
+		lang_dir = dir
+		break
+else:
+	print "Can't find the lang directory."
+	raise SystemExit
+
+trans_dir = trans_abbrv
+if not os.path.exists(os.path.join(lang_dir,trans_dir)):
+	os.mkdir(os.path.join(lang_dir,trans_dir))
+
+
+# Iterate through various places where the php files might be.
+#
+for dir in ['../html', '../lib', 'html', 'lib']:
+
+	if os.path.exists(dir):
+		# Find all the PHP files in the current directory.
+		#
+		files = [x for x in os.listdir(dir)
+					if (x[-4:] == '.inc' and x[-7:] != '_po.inc')
+					or x[-6:] == '.class'
+					or x[-4:] == '.php'
+					or x[-6:] == '.phtml'
+				]
+		os.chdir(dir)
+
+		for file in files:
+			f = open(file,'r')
+			lines = f.readlines()
+			f.close()
+
+			# Is this file one we need to parse for internationalized strings?
+			#
+			parse_file = 0
+			for line in lines:
+				match = re.search("include(_once|)\s*\(\s*[\"']([A-Za-z_]+_po.inc)[\"']\s*\);",line)
+				if match and match.group(2) != "common_po.inc":
+					po = match.group(2)
+					if not lang.has_key(po):
+						lang[po] = {}
+					parse_file = 1
+					break
+
+			# If we need to parse the file, do so.
+			#
+			if parse_file:
+				print "Parsing %s..." % file
+				for line in lines:
+					match = up.search(line)
+					while match:
+						term = match.group(1).replace('\\"','"')
+						if common.has_key(term):
+							common[term] += 1
+						else:
+							if print_dupes:
+								for key in lang.keys():
+									if key != po and lang[key].has_key(term):
+										print "...Duplicate term: \"%s\" is also in %s." % (term,key)
+							lang[po][term] = 1
+						line = line[match.end(1):]
+						match = up.search(line)
+
+		os.chdir(current_dir)
+
+# Now generate all the .inc files if they don't already exist.
+# if they do exist, only append new stuff to the end.  If the 'force'
+# option is passed, just overwrite the entire thing.
+#
+
+print """
+INSTRUCTIONS:
+****************************************************
+You will now be prompted for all needed translations.
+Please translate the requested lines, hitting [enter]
+goes to the next one. You may stop at any time using
+ctrl+c, and pick up where you left off by running
+translation_tool again.
+
+If there are escapes in the original English, you may
+need to include them in your translation. The
+following is a list of escapes and what they do:
+%h - HTML code inserted at run-time
+%s - Nontranslated string inserted at run-time (such as username)
+\\" - A double quote (")
+
+When you have finished your translation, make a tarball
+of the lang/ directory and send it to Simo (simo@neotuli.net)
+for inclusion in the AUR.
+By submitting a translation, you are implying that you
+are also willing to maintain it. When there are
+new strings to be translated, you will be contacted.
+****************************************************
+"""
+
+os.chdir(lang_dir)
+if force:
+	# just going to overwrite any existing files
+	# NOT RECOMMENDED! OVERWRITES ALL OTHER LANGUAGE SUPPORT
+	#
+	for po in lang.keys():
+		print "Generating %s..." % po
+
+		f = open(po,'w')
+		f.write(INC_HEADER)
+		f.write('\ninclude_once(\"en/%s\");\n' % po)
+		f.write('\n?>')
+		f.close()
+
+		f = open(trans_dir+"/"+po,'w')
+		f.write(INC_HEADER)
+
+		for term in lang[po].keys():
+			f.write("\n")
+			trans = raw_input(term+"  = ")
+			f.write('$_t["%s"]["%s"] = "%s";\n' % (trans_abbrv, term, trans))
+		f.write("\n");
+		f.write("?>");
+		f.close()
+else:
+	# need to leave existing file intact, and only append on terms that are new
+	#
+	incre = re.compile('^include_once\("%s\/(.*)"\);' % trans_abbrv)
+	mapre = re.compile('^\$_t\["%s"\]\["(.*)"\].*$' % trans_abbrv)
+	for po in lang.keys():
+		got_match = False
+		print "Updating %s..." % po
+		try:
+			f = open(po, 'r')
+			new_file = 0
+		except:
+			new_file = 1
+		if not new_file:
+			contents = f.readlines()
+			f.close()
+			# strip off beginning/ending empty lines
+			#
+			while contents[0] == '':
+				del contents[0]
+			while contents[-1] in ['', "\n", "?>", "?>\n", "\n?>"]:
+				del contents[-1]
+						
+			for line in contents:
+				match = incre.search(line)
+				if match:
+					got_match = True
+			if not got_match:
+				f = open(po,'w')
+				f.write("".join(contents))
+				f.write('\ninclude_once(\"%s/%s\");\n' % (trans_abbrv, po))
+				f.write("\n?>");
+				f.close()
+		else:
+			f = open(po,'w')
+			f.write(INC_HEADER)
+			f.write('\ninclude_once(\"%s/%s\");\n' % (trans_abbrv, po))
+			f.write('\n?>')
+			f.close()
+		# first read in file contents so we can hash what already exists
+		#
+		try:
+			f = open(trans_dir+"/"+po, 'r')
+			new_file = 0
+		except:
+			new_file = 1
+
+		existing_terms = []
+		if not new_file:
+			contents = f.readlines()
+			f.close()
+
+			# strip off beginning/ending empty lines
+			#
+			while contents[0] == '':
+				del contents[0]
+			while contents[-1] in ['', "\n", "?>", "?>\n", "\n?>"]:
+				del contents[-1]
+
+			# next, collect existing terms
+			#
+			for line in contents:
+				match = mapre.search(line)
+				if match:
+					existing_terms.append(match.group(1))
+
+		# now append any new terms to EOF
+		#
+		f = open(trans_dir+"/"+po, 'w')
+		if not new_file:
+			f.write("".join(contents))
+		else:
+			f.write(INC_HEADER)
+
+		for term in lang[po].keys():
+			if term not in existing_terms:
+				f.write("\n");
+				trans = raw_input(term+"  = ")
+				f.write('$_t["%s"]["%s"] = "%s";\n' % (trans_abbrv, term, trans))
+		f.write("\n?>");
+		f.close()
+
+# Print out warnings for unused and little-used common entries.
+#
+for key in common.keys():
+	if common[key] == 1:
+		print "Warning: common entry '%s' is only used once." % key
+for key in common.keys():
+	if common[key] == 0:
+		print "Warning: unused common entry '%s'." % key
+
+# vim: ts=2 sw=2 noet ft=python
author	simo <simo>	2005-06-10 19:30:49 +0000
committer	simo <simo>	2005-06-10 19:30:49 +0000
commit	7044610e32b37c079e03cab8bf9cfa8a8f381ec9 (patch)
tree	666a7c8c77a274b0c9cb0ba6b7fd20a535d418cf /web/utils/translation_tool
parent	f1889a9e3d968f194e4b6955915f0e0c3c43622c (diff)
download	aurweb-7044610e32b37c079e03cab8bf9cfa8a8f381ec9.tar.xz