All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.languagetool.synthesis.Soros Maven / Gradle / Ivy

Go to download

LanguageTool is an Open Source proofreading software for English, French, German, Polish, Romanian, and more than 20 other languages. It finds many errors that a simple spell checker cannot detect like mixing up there/their and it detects some grammar problems.

There is a newer version: 6.4
Show newest version
package org.languagetool.synthesis;

/* Soros interpreter (see numbertext.org)
 * 2009-2010 (c) László Németh
 * License: LGPL/BSD dual license */

import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.util.ArrayList;

public class Soros {
  private ArrayList patterns = new ArrayList();
  private ArrayList values = new ArrayList();
  private ArrayList begins = new ArrayList();
  private ArrayList ends = new ArrayList();

  private static String m = "\\\";#";
  private static String m2 = "$()|[]";
  private static String c = "\uE000\uE001\uE002\uE003";
  private static String c2 = "\uE004\uE005\uE006\uE007\uE008\uE009";
  private static String slash = "\uE000";
  private static String pipe = "\uE003";

  // pattern to recognize function calls in the replacement string

  private static Pattern func = Pattern.compile(translate(
        "(?:\\|?(?:\\$\\()+)?" +                // optional nested calls
        "(\\|?\\$\\(([^\\(\\)]*)\\)\\|?)" +     // inner call (2 subgroups)
        "(?:\\)+\\|?)?",                        // optional nested calls
        m2.substring(0, c.length()), c, "\\")); // \$, \(, \), \| -> \uE000..\uE003

  private boolean numbertext = false;

  public Soros(String source, String lang) {
    source = translate(source, m, c, "\\");      // \\, \", \;, \# -> \uE000..\uE003
    // switch off all country-dependent lines, and switch on the requested ones
    source = source.replaceAll("(^|[\n;])([^\n;#]*#[^\n]*\\[:[^\n:\\]]*:][^\n]*)", "$1#$2")
        .replaceAll("(^|[\n;])#([^\n;#]*#[^\n]*\\[:" + lang.replace('_', '-') + ":][^\n]*)", "$1$2")
        .replaceAll("(#[^\n]*)?(\n|$)", ";");   // remove comments
    if (source.indexOf("__numbertext__") == -1)
        source = "__numbertext__;" + source;
    source = source.replace("__numbertext__",
        // default left zero deletion
        "\"([a-z][-a-z]* )?0+(0|[1-9]\\d*)\" $(\\1\\2);" +
        // separator function
        "\"\uE00A(.*)\uE00A(.+)\uE00A(.*)\" \\1\\2\\3;" +
        // no separation, if subcall returns with empty string
        "\"\uE00A.*\uE00A\uE00A.*\"");

    Pattern p = Pattern.compile("^\\s*(\"[^\"]*\"|[^\\s]*)\\s*(.*[^\\s])?\\s*$");
    Pattern macro = Pattern.compile("== *(.*[^ ]?) ==");
    String prefix = "";
    for (String s : source.split(";")) {
        Matcher matchmacro = macro.matcher(s);
        if (matchmacro.matches()) {
            prefix = matchmacro.group(1);
            continue;
        }
        Matcher sp = p.matcher(s);
        if (!prefix.equals("") && !s.equals("") && sp.matches()) {
            s = sp.group(1).replaceFirst("^\"", "").replaceFirst("\"$","");
            s = "\"" + (s.startsWith("^") ? "^" : "") + prefix + (s.equals("") ? "" : " ") +
                 s.replaceFirst("^\\^", "") + "\" " + sp.group(2);
            sp = p.matcher(s);
        }
        if (!s.equals("") && sp.matches()) {
            s = translate(sp.group(1).replaceFirst("^\"", "").replaceFirst("\"$",""),
                c.substring(1), m.substring(1), "");
            s = s.replace(slash, "\\\\"); // -> \\, ", ;, #
            String s2 = "";
            if (sp.group(2) != null) s2 = sp.group(2).replaceFirst("^\"", "").replaceFirst("\"$","");
            s2 = translate(s2, m2, c2, "\\");   // \$, \(, \), \|, \[, \] -> \uE004..\uE009
            // call inner separator: [ ... $1 ... ] -> $(\uE00A ... \uE00A$1\uE00A ... )
            s2 = s2.replaceAll("^\\[[$](\\d\\d?|\\([^\\)]+\\))", "\\$(\uE00A\uE00A|\\$$1\uE00A") // add "|"
                .replaceAll("\\[([^$\\[\\\\]*)[$](\\d\\d?|\\([^\\)]+\\))", "\\$(\uE00A$1\uE00A\\$$2\uE00A")
                .replaceAll("\uE00A\\]$","|\uE00A)") // add "|" in terminating position
                .replaceAll("\\]", ")")
                .replaceAll("(\\$\\d|\\))\\|\\$", "$1||\\$"); // $()|$() -> $()||$()
            s2 = translate(s2, c, m, "");       // \uE000..\uE003-> \, ", ;, #
            s2 = translate(s2, m2.substring(0, c.length()), c, "");      // $, (, ), | -> \uE000..\uE003
            s2 = translate(s2, c2, m2, "");     // \uE004..\uE009 -> $, (, ), |, [, ]
            s2 = s2.replaceAll("[$]", "\\$")    // $ -> \$
                .replaceAll("\uE000(\\d)", "\uE000\uE001\\$$1\uE002") // $n -> $(\n)
                .replaceAll("\\\\(\\d)", "\\$$1") // \[n] -> $[n]
                .replace("\\n", "\n");            // \n -> [new line]
            patterns.add(Pattern.compile("^" + s.replaceFirst("^\\^", "")
                .replaceFirst("\\$$", "") + "$"));
            begins.add(s.startsWith("^"));
            ends.add(s.endsWith("$"));
            values.add(s2);
        }
    }
  }

  public String run(String input) {
    return run(input, true, true);
  }

  private String run(String input, boolean begin, boolean end) {
    for (int i = 0; i < patterns.size(); i++) {
        if ((!begin && begins.get(i)) || (!end && ends.get(i))) continue;
        Matcher m = patterns.get(i).matcher(input);
        if (!m.matches()) continue;
        String s = m.replaceAll(values.get(i));
        Matcher n = func.matcher(s);
        while (n.find()) {
            boolean b = false;
            boolean e = false;
            if (n.group(1).startsWith(pipe) || n.group().startsWith(pipe)) b = true;
            else if (n.start() == 0) b = begin;
            if (n.group(1).endsWith(pipe) || n.group().endsWith(pipe)) e = true;
            else if (n.end() == s.length()) e = end;
            s = s.substring(0, n.start(1)) + run(n.group(2), b, e) + s.substring(n.end(1));
            n = func.matcher(s);
        }
        return s;
    }
    return "";
  }

  private static String translate(String s, String chars, String chars2, String delim) {
    for (int i = 0; i < chars.length(); i++) {
        s = s.replace(delim + chars.charAt(i), "" + chars2.charAt(i));
    }
    return s;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy