All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.codemodder.codemods.AddMissingI18nCodemod Maven / Gradle / Ivy

There is a newer version: 0.97.3
Show newest version
package io.codemodder.codemods;

import com.google.common.annotations.VisibleForTesting;
import io.codemodder.*;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UncheckedIOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.inject.Inject;
import org.apache.commons.io.FilenameUtils;
import org.mozilla.universalchardet.UniversalDetector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.services.translate.TranslateClient;
import software.amazon.awssdk.services.translate.model.*;

/**
 * This codemod finds missing i18n keys in property files and adds them to the file, using an LLM to
 * generate the missing values.
 */
@Codemod(
    id = "pixee:java/missing-i18n",
    importance = Importance.MEDIUM,
    reviewGuidance = ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW)
public final class AddMissingI18nCodemod extends RawFileChanger {

  private final TranslateClient translateClient;
  private final List languagesAvailable;

  @Inject
  public AddMissingI18nCodemod(final TranslateClient translateClient) {
    this.translateClient = Objects.requireNonNull(translateClient);
    ListLanguagesResponse languagesResponse =
        translateClient.listLanguages(ListLanguagesRequest.builder().build());
    this.languagesAvailable = putInPreferredOrder(languagesResponse.languages());
  }

  private List putInPreferredOrder(final List languages) {
    List orderedLanguages = new ArrayList<>(languages);
    orderedLanguages.removeIf(lang -> preferredTranslationSources.contains(lang.languageCode()));
    preferredTranslationSources.forEach(
        lang ->
            orderedLanguages.add(
                0,
                languages.stream().filter(l -> l.languageCode().equals(lang)).findFirst().get()));
    return orderedLanguages;
  }

  @Override
  public boolean supports(final Path file) {
    return getPropertyFilePrefix(file.getFileName().toString()).isPresent();
  }

  @Override
  public CodemodFileScanningResult visitFile(final CodemodInvocationContext context)
      throws IOException {
    Path path = context.path();
    String fileName = path.getFileName().toString();
    // The supports check will guarantee that this won't be empty
    Optional prefix = getPropertyFilePrefix(fileName);
    return doVisitFile(context, path, prefix.get());
  }

  /** If it's a property file, return the prefix, otherwise return {@link Optional#empty()}. */
  @VisibleForTesting
  static Optional getPropertyFilePrefix(final String fileName) {
    Matcher countryMatcher = PROPERTY_FILE_WITH_COUNTRY.matcher(fileName);
    if (countryMatcher.matches()) {
      String prefix = countryMatcher.group(1);
      return Optional.of(prefix);
    }
    Matcher noCountryMatcher = PROPERTY_FILE_WITHOUT_COUNTRY.matcher(fileName);
    if (noCountryMatcher.matches()) {
      String prefix = noCountryMatcher.group(1);
      return Optional.of(prefix);
    }
    return Optional.empty();
  }

  /**
   * Perform the actual inspection and changes.
   *
   * @param context the context
   * @param path the path to the file being checked
   * @param filePrefix the prefix of the file name, e.g. "messages" for "messages_en.properties"
   */
  private CodemodFileScanningResult doVisitFile(
      final CodemodInvocationContext context, final Path path, final String filePrefix)
      throws IOException {
    // try to load it as a properties file and make sure that works
    Properties properties = new Properties();
    String charset = UniversalDetector.detectCharset(path);
    properties.load(new InputStreamReader(Files.newInputStream(path), Charset.forName(charset)));

    List siblings = getSiblings(path, filePrefix);

    // if there's no siblings, there's nothing to compare our keys against
    if (siblings.isEmpty()) {
      LOG.trace("Have no baseline files to compare against, exiting");
      return CodemodFileScanningResult.none();
    }

    // find all the keys that are present in other sibling files, but not our file
    Set ourMissingOrEmptyKeys =
        findMissingOrEmptyKeys(path, properties, filePrefix, siblings);

    // of the keys we appear to missing, filter down to those for which we can find text references
    // to in the proj
    List missingKeys = findUsedKeys(context, ourMissingOrEmptyKeys, siblings);

    // if we can't find any reference to the keys, maybe it's the _other_ properties files who have
    // it wrong
    if (missingKeys.isEmpty()) {
      LOG.debug("Missing keys in {} weren't discovered in the project", path);
      return CodemodFileScanningResult.none();
    }

    // we now have a set of keys that are referenced in the project and missing from our file
    List keyReplacements = new ArrayList<>();
    for (MissingKey missingKey : missingKeys) {
      DefinitionReference definitionReference =
          getPreferredDefinitionForTranslation(missingKey.definitionReferences);
      TranslateTextRequest request =
          TranslateTextRequest.builder()
              .sourceLanguageCode(definitionReference.languageCode)
              .targetLanguageCode(missingKey.languageCode)
              .text(definitionReference.value)
              .build();
      TranslateTextResponse translateTextResponse = translateClient.translateText(request);
      String translatedText = translateTextResponse.translatedText();
      keyReplacements.add(
          new KeyReplacement(missingKey.key, translatedText, missingKey.usageReference));
    }

    // now we have a list of keys and their new values, we can apply them to the file
    List changes = new ArrayList<>();
    List newLines = new ArrayList<>();
    List replacementsToAddAtEnd = new ArrayList<>(keyReplacements);
    int lineNumber = 1;
    try (Stream lines = Files.lines(path)) {
      for (String line : (Iterable) lines::iterator) {
        for (KeyReplacement keyReplacement : keyReplacements) {
          String key = keyReplacement.key;
          String newValue = keyReplacement.newValue;
          List usageReferences = keyReplacement.usageReference;
          Matcher m = Pattern.compile("^\\s*" + Pattern.quote(key) + "\\s*=.*$").matcher(line);
          // we've identified the line we should replace, it must have been empty
          if (m.matches()) {
            LOG.debug("Replacing {} with {}", key, newValue);
            newLines.add(key + "=" + newValue);
            String description = createChangeDescription(key, usageReferences);
            changes.add(CodemodChange.from(lineNumber, description));
            replacementsToAddAtEnd.remove(keyReplacement);
          } else {
            newLines.add(line);
          }
        }
        lineNumber++;
      }
    }

    // these keys were missing entirely, so we should add them to the end
    for (KeyReplacement keyReplacement : replacementsToAddAtEnd) {
      newLines.add(keyReplacement.key + "=" + keyReplacement.newValue);
      String description =
          createChangeDescription(keyReplacement.key, keyReplacement.usageReference);
      changes.add(CodemodChange.from(lineNumber, description));
      lineNumber++; // need to incrementing this not-yet-real line number so we can have different
      // line numbers for each change
    }

    if (!changes.isEmpty()) {
      Files.write(path, newLines);
    }

    return CodemodFileScanningResult.withOnlyChanges(changes);
  }

  private static String createChangeDescription(
      final String key, final List usageReferences) {
    String descriptionTemplate =
        """
        Added missing i18n key value for "%s". The new value was based on other other property files that had values for the key.
        This key was confirmed to be in use in  %d place(s), including:

        %s
        """;
    String usages =
        usageReferences.stream()
            .limit(3)
            .map(ref -> "  " + ref.path + ": " + ref.line)
            .collect(Collectors.joining("\n"));

    return String.format(descriptionTemplate, key, usageReferences.size(), usages);
  }

  /**
   * For the source translation, prefer one of the most common languages spoken in the world. The
   * list was auto-generated by Copilot, so forgive any oversights. The idea is that the most common
   * languages may have the most reliable translations, rather than a more obscure language, which
   * may itself be a translation of a translation of a translation.
   */
  private DefinitionReference getPreferredDefinitionForTranslation(
      final List definitions) {
    for (Language language : languagesAvailable) {
      Optional definition =
          definitions.stream()
              .filter(d -> d.languageCode.equals(language.languageCode()))
              .findFirst();
      if (definition.isPresent()) {
        return definition.get();
      }
    }
    return definitions.get(0);
  }

  private record KeyReplacement(String key, String newValue, List usageReference) {
    private KeyReplacement {
      Objects.requireNonNull(key);
      Objects.requireNonNull(newValue);
      Objects.requireNonNull(usageReference);
    }
  }

  private record PossiblyMissingKey(
      String languageCode, String key, List definitionReferences) {
    private PossiblyMissingKey {
      Objects.requireNonNull(languageCode);
      Objects.requireNonNull(key);
      Objects.requireNonNull(definitionReferences);
    }
  }

  private record MissingKey(
      String languageCode,
      String key,
      List definitionReferences,
      List usageReference) {
    private MissingKey {
      Objects.requireNonNull(languageCode);
      Objects.requireNonNull(key);
      Objects.requireNonNull(definitionReferences);
      Objects.requireNonNull(usageReference);
    }
  }

  /** Find all the keys that are defined in other sibling files, but not in this file. */
  private Set findMissingOrEmptyKeys(
      final Path path,
      final Properties myProperties,
      final String filePrefix,
      final List siblings)
      throws IOException {
    Map> missingKeyDefinitions = new HashMap<>();
    for (Path sibling : siblings) {
      Properties siblingProperties = new Properties();
      String charset = UniversalDetector.detectCharset(sibling);
      siblingProperties.load(new InputStreamReader(Files.newInputStream(sibling), charset));
      for (String siblingKey : siblingProperties.stringPropertyNames()) {
        if (!myProperties.containsKey(siblingKey)
            || myProperties.getProperty(siblingKey).isEmpty()) {
          String siblingValue = siblingProperties.getProperty(siblingKey);
          if (!siblingValue.isEmpty()) {
            String siblingFilename = sibling.getFileName().toString();
            List definitionReferences =
                missingKeyDefinitions.computeIfAbsent(siblingKey, k -> new ArrayList<>());
            String sourceLanguageCode =
                siblingFilename.substring(filePrefix.length() + 1, filePrefix.length() + 3);
            definitionReferences.add(
                new DefinitionReference(sourceLanguageCode, siblingFilename, siblingValue));
          }
        }
      }
    }

    String targetLanguageCode =
        path.getFileName().toString().substring(filePrefix.length() + 1, filePrefix.length() + 3);
    return missingKeyDefinitions.entrySet().stream()
        .map(e -> new PossiblyMissingKey(targetLanguageCode, e.getKey(), e.getValue()))
        .collect(Collectors.toSet());
  }

  /** A definition of a key from another properties file. */
  private record DefinitionReference(String languageCode, String path, String value) {
    private DefinitionReference {
      Objects.requireNonNull(languageCode);
      Objects.requireNonNull(path);
      Objects.requireNonNull(value);
    }
  }

  /** A reference found in the wider project to key that is missing. */
  private record UsageReference(String path, String line) {
    private UsageReference {
      Objects.requireNonNull(path);
      Objects.requireNonNull(line);
    }
  }

  private List findUsedKeys(
      final CodemodInvocationContext context,
      final Set ourMissingOrEmptyKeys,
      final List siblings)
      throws IOException {
    // loop through all files in projectDir recursively, ignoring binary file formats, and check if
    // the keys are referenced
    Path projectDir = context.codeDirectory().asPath();
    try (var paths = Files.walk(projectDir)) {
      try {
        return paths
            .filter(p -> !siblings.contains(p))
            .filter(p -> !context.path().equals(p))
            .filter(Files::isRegularFile)
            .filter(p -> !isObviouslyBinaryFile(p))
            .map(
                path -> {
                  List lines;
                  try {
                    final String charset = UniversalDetector.detectCharset(path);
                    lines = Files.readString(path, Charset.forName(charset)).lines().toList();
                  } catch (IOException e) {
                    throw new UncheckedIOException(e);
                  }
                  for (String line : lines) {
                    for (PossiblyMissingKey key : ourMissingOrEmptyKeys) {
                      if (line.contains(key.key)) {
                        return Optional.of(
                            new MissingKey(
                                key.languageCode,
                                key.key,
                                key.definitionReferences,
                                List.of(new UsageReference(path.toString(), line))));
                      }
                    }
                  }
                  return Optional.empty();
                })
            .flatMap(Optional::stream)
            .toList();
      } catch (UncheckedIOException e) {
        // unwrap stream's unchecked IOException
        throw e.getCause();
      }
    }
  }

  private List getSiblings(final Path path, final String prefix) throws IOException {
    Path parent = path.getParent();
    try (var paths = Files.list(parent)) {
      return paths
          .filter(Files::isRegularFile)
          .filter(Files::isReadable)
          .filter(p -> p.getFileName().toString().startsWith(prefix))
          .filter(p -> !p.equals(path))
          .toList();
    }
  }

  /**
   * Returns true if the file at path is obviously a binary file. This is a heuristic based on the
   * file extension, and is not guaranteed to be correct, but should help massively in the
   * performance of 99% of cases. We could improve this further by doing some content-sniffing
   * checks.
   */
  private boolean isObviouslyBinaryFile(final Path path) {
    String fileName = path.getFileName().toString();
    String extension = FilenameUtils.getExtension(fileName).toLowerCase();
    return knownBinaryExtensions.contains(extension);
  }

  private static final Set knownBinaryExtensions =
      Set.of(
          "ico", "jpg", "jpeg", "png", "gif", "svg", "tiff", "tif", "pdf", "bmp", "eps", "raw",
          "mp3", "mp4", "zip", "avi", "docx", "xlsx", "pptx", "jar", "dll", "com", "exe");

  private static final List preferredTranslationSources =
      List.of("en", "de", "es", "fr", "it", "ja", "ko", "pt", "zh", "zh-TW");

  // create a pattern for matching property file names
  private static final Pattern PROPERTY_FILE_WITH_COUNTRY =
      Pattern.compile("(.*)_\\w{2}_\\w{2}\\.properties");
  private static final Pattern PROPERTY_FILE_WITHOUT_COUNTRY =
      Pattern.compile("(.*)_\\w{2}\\.properties");
  private static final Logger LOG = LoggerFactory.getLogger(AddMissingI18nCodemod.class);
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy