Created
March 2, 2015 19:33
-
-
Save CodeMonkeySteve/0aaf78c3ca996cfaf5c6 to your computer and use it in GitHub Desktop.
XLIFF Translator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'bing_translator' | |
require 'active_support' | |
require 'active_support/core_ext' | |
class BingTranslator | |
@@cache = {} | |
@@cache_path = __dir__+'/.xliff_trans_cache' | |
@@cache = YAML.load_file(@@cache_path) if File.exist?(@@cache_path) | |
def self.save_cache | |
YAML.dump(@@cache, File.open(@@cache_path, 'w')) | |
end | |
TRANSLATE_ARRAY_URI = URI.parse('http://api.microsofttranslator.com/V2/Http.svc/TranslateArray').freeze | |
alias_method :translate_one, :translate | |
def translate(*strings, to:, from: 'en') | |
return [] if strings.empty? | |
cache = (@@cache[to.to_s] ||= {}) | |
trans = strings.reject { |str| cache.include?(str) }.uniq | |
trans.reject! do |str| | |
if str.match /<[\w-]+>/ | |
res = translate_one(str, from: from, to: to, contentType: 'text/html') | |
cache[str] = res if res.present? | |
true | |
end | |
end | |
if trans.present? | |
params = { texts: trans, from: CGI.escape(from.to_s), to: CGI.escape(to.to_s) } | |
doc = Nokogiri.XML( array_result(TRANSLATE_ARRAY_URI, params).body ) | |
res = doc.xpath('xmlns:ArrayOfTranslateArrayResponse/xmlns:TranslateArrayResponse/xmlns:TranslatedText').map(&:text) | |
trans.each.with_index { |src, idx| cache[src] = res[idx] if res[idx].present? } | |
end | |
strings.map { |str| [str, cache[str]] }.to_h | |
end | |
def array_result(uri, params = {}, headers = {}) | |
get_access_token | |
http = Net::HTTP.new(uri.host, uri.port) | |
if uri.scheme == "https" | |
http.use_ssl = true | |
http.verify_mode = OpenSSL::SSL::VERIFY_NONE if @skip_ssl_verify | |
end | |
builder = Nokogiri::XML::Builder.new do |xml| | |
xml.TranslateArrayRequest do | |
xml.AppId | |
xml.From_ params[:from] | |
xml.Options_ do | |
xml.CotentType({xmlns: 'http://schemas.datacontract.org/2004/07/Microsoft.MT.Web.Service.V2'}, 'text/html') | |
end | |
xml.Texts do | |
params[:texts].each do |text| | |
xml.string({xmlns: 'http://schemas.microsoft.com/2003/10/Serialization/Arrays'}, text ) | |
end | |
end | |
xml.To_ params[:to] | |
end | |
end | |
request = Net::HTTP::Post.new(uri.path) | |
request.add_field 'Content-Type', 'application/xml' | |
request.add_field 'Authorization',"Bearer #{@access_token['access_token']}" | |
request.body = builder.to_xml(indent: 2) | |
results = http.request(request) | |
if results.response.code.to_i == 200 | |
results | |
else | |
html = Nokogiri::HTML(results.body) | |
raise Exception, html.xpath("//text()").remove.map(&:to_s).join(' ') | |
end | |
end | |
end | |
translator = BingTranslator.new('org-finagle-sandbox', 'YdNt0qFIsWChIp5uDlcfLq4td8Xn9MCXvEcBGsXEe/I=') | |
ARGV.each do |path| | |
xliff = Nokogiri.XML(File.read(path)) | |
file = xliff.at_xpath('/xmlns:xliff/xmlns:file') | |
raise "Missing file tag" unless file | |
datatype = file['datatype'] | |
raise "Unsupported datatype #{datatype}" unless datatype == 'plaintext' | |
src_locale, tgt_locale = file['source-language'], file['target-language'] | |
trans = {} | |
file.xpath('//xmlns:trans-unit').each do |unit| | |
src = unit.at_xpath('xmlns:source') | |
raise "Missing source translation" unless src.present? | |
next unless tgt = unit.at_xpath('xmlns:target') | |
trans[tgt] = src.text | |
end | |
res = translator.translate(*trans.values, to: tgt_locale, from: src_locale) | |
trans.each do |tgt, src_text| | |
tgt.content = res[src_text] | |
end | |
out_path = path.gsub(/^([^.]+)(\..+)$/, '\1-auto\2') | |
File.open(out_path, 'w') { |f| f.write(xliff.to_xml(indent: 2)) } | |
end | |
BingTranslator.save_cache |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment