All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.zxing.client.j2se.HtmlAssetTranslator Maven / Gradle / Ivy

There is a newer version: 3.5.3
Show newest version
/*
 * Copyright 2011 ZXing authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.zxing.client.j2se;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.bootstrap.DOMImplementationRegistry;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSSerializer;
import org.xml.sax.SAXException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.regex.Pattern;

/**
 * 

A utility which auto-translates the English-language text in a directory of HTML documents using * Google Translate.

* *

Pass the Android client assets/ directory as first argument, and the language to translate to second * as a comma-separated list. Specify "all" for language to try to translate for all existing translations. * Each argument after this is the name of a file to translate; if the first one is "all", all files will * be translated.

* *

Usage: {@code HtmlAssetTranslator android/assets/ (all|lang1[,lang2 ...]) (all|file1.html[ file2.html ...])}

* *

{@code android/assets/ es all} will translate .html files in subdirectory html-en to * directory html-es, for example. Note that only text nodes in the HTML document are translated. * Any text that is a child of a node with {@code class="notranslate"} will not be translated. It will * also add a note at the end of the translated page that indicates it was automatically translated.

* * @author Sean Owen */ public final class HtmlAssetTranslator { private static final Pattern COMMA = Pattern.compile(","); private HtmlAssetTranslator() {} public static void main(String[] args) throws IOException { if (args.length < 3) { System.err.println("Usage: HtmlAssetTranslator android/assets/ " + "(all|lang1[,lang2 ...]) (all|file1.html[ file2.html ...])"); return; } Path assetsDir = Paths.get(args[0]); Collection languagesToTranslate = parseLanguagesToTranslate(assetsDir, args[1]); List restOfArgs = Arrays.asList(args).subList(2, args.length); Collection fileNamesToTranslate = parseFileNamesToTranslate(assetsDir, restOfArgs); for (String language : languagesToTranslate) { translateOneLanguage(assetsDir, language, fileNamesToTranslate); } } private static Collection parseLanguagesToTranslate(Path assetsDir, String languageArg) throws IOException { if ("all".equals(languageArg)) { Collection languages = new ArrayList<>(); DirectoryStream.Filter fileFilter = entry -> { String fileName = entry.getFileName().toString(); return Files.isDirectory(entry) && !Files.isSymbolicLink(entry) && fileName.startsWith("html-") && !"html-en".equals(fileName); }; try (DirectoryStream dirs = Files.newDirectoryStream(assetsDir, fileFilter)) { for (Path languageDir : dirs) { languages.add(languageDir.getFileName().toString().substring(5)); } } return languages; } else { return Arrays.asList(COMMA.split(languageArg)); } } private static Collection parseFileNamesToTranslate(Path assetsDir, List restOfArgs) throws IOException { if ("all".equals(restOfArgs.get(0))) { Collection fileNamesToTranslate = new ArrayList<>(); Path htmlEnAssetDir = assetsDir.resolve("html-en"); try (DirectoryStream files = Files.newDirectoryStream(htmlEnAssetDir, "*.html")) { for (Path file : files) { fileNamesToTranslate.add(file.getFileName().toString()); } } return fileNamesToTranslate; } else { return restOfArgs; } } private static void translateOneLanguage(Path assetsDir, String language, final Collection filesToTranslate) throws IOException { Path targetHtmlDir = assetsDir.resolve("html-" + language); Files.createDirectories(targetHtmlDir); Path englishHtmlDir = assetsDir.resolve("html-en"); String translationTextTranslated = StringsResourceTranslator.translateString("Translated by Google Translate.", language); DirectoryStream.Filter filter = entry -> { String name = entry.getFileName().toString(); return name.endsWith(".html") && (filesToTranslate.isEmpty() || filesToTranslate.contains(name)); }; try (DirectoryStream files = Files.newDirectoryStream(englishHtmlDir, filter)) { for (Path sourceFile : files) { translateOneFile(language, targetHtmlDir, sourceFile, translationTextTranslated); } } } private static void translateOneFile(String language, Path targetHtmlDir, Path sourceFile, String translationTextTranslated) throws IOException { Path destFile = targetHtmlDir.resolve(sourceFile.getFileName()); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); Document document; try { DocumentBuilder builder = factory.newDocumentBuilder(); document = builder.parse(sourceFile.toFile()); } catch (ParserConfigurationException pce) { throw new IllegalStateException(pce); } catch (SAXException sae) { throw new IOException(sae); } Element rootElement = document.getDocumentElement(); rootElement.normalize(); Queue nodes = new LinkedList<>(); nodes.add(rootElement); while (!nodes.isEmpty()) { Node node = nodes.poll(); if (shouldTranslate(node)) { NodeList children = node.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { nodes.add(children.item(i)); } } if (node.getNodeType() == Node.TEXT_NODE) { String text = node.getTextContent(); if (!text.trim().isEmpty()) { text = StringsResourceTranslator.translateString(text, language); node.setTextContent(' ' + text + ' '); } } } Node translateText = document.createTextNode(translationTextTranslated); Node paragraph = document.createElement("p"); paragraph.appendChild(translateText); Node body = rootElement.getElementsByTagName("body").item(0); body.appendChild(paragraph); DOMImplementationRegistry registry; try { registry = DOMImplementationRegistry.newInstance(); } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) { throw new IllegalStateException(e); } DOMImplementationLS impl = (DOMImplementationLS) registry.getDOMImplementation("LS"); LSSerializer writer = impl.createLSSerializer(); String fileAsString = writer.writeToString(document); // Replace default XML header with HTML DOCTYPE fileAsString = fileAsString.replaceAll("<\\?xml[^>]+>", ""); Files.write(destFile, Collections.singleton(fileAsString), StandardCharsets.UTF_8); } private static boolean shouldTranslate(Node node) { // Ignore "notranslate" nodes NamedNodeMap attributes = node.getAttributes(); if (attributes != null) { Node classAttribute = attributes.getNamedItem("class"); if (classAttribute != null) { String textContent = classAttribute.getTextContent(); if (textContent != null && textContent.contains("notranslate")) { return false; } } } String nodeName = node.getNodeName(); if ("script".equalsIgnoreCase(nodeName)) { return false; } // Ignore non-text snippets String textContent = node.getTextContent(); if (textContent != null) { for (int i = 0; i < textContent.length(); i++) { if (Character.isLetter(textContent.charAt(i))) { return true; } } } return false; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy