com.yahoo.config.codegen.NormalizedDefinition Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of configgen Show documentation
Show all versions of configgen Show documentation
Config java code generation from defintion files for Java Vespa components.
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.config.codegen;
import java.io.*;
import java.util.List;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.text.DecimalFormat;
import java.security.MessageDigest;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
/**
*
* Does normalizing (removing comments, trimming whitespace etc.) and calculation of md5sum
* of config definitions
*
* @author hmusum
*/
public class NormalizedDefinition {
// Patterns used for finding ranges in config definitions
private static final Pattern intPattern = Pattern.compile(".*int.*range.*");
private static final Pattern doublePattern = Pattern.compile(".*double.*range.*");
private MessageDigest md5;
String defMd5 = null;
List normalizedContent = null;
public NormalizedDefinition() {
try {
md5 = MessageDigest.getInstance("MD5");
} catch (java.security.NoSuchAlgorithmException e) {
throw new RuntimeException("Unable to create MD5 digest", e);
}
normalizedContent = new ArrayList<>();
}
public NormalizedDefinition normalize(BufferedReader reader) throws IOException {
String s;
List input = new ArrayList<>();
while ((s = reader.readLine()) != null) {
String normalized = normalize(s);
if (normalized.length() > 0) {
input.add(normalized);
}
}
normalizedContent = input;
return this;
}
/**
* Normalizes a config definition line. Each string is normalized according to the
* rules of config and definition files before they are used:
*
* - Remove trailing space.
-
*
- Remove trailing comments, and spaces before trailing comments.
* - Remove empty lines
* - Keep comment lines
*
* The supplied list is changed in-place
*
* @param line A config definition line
* @return a normalized config definition line
*/
public static String normalize(String line) {
//System.out.println("before line=" + line + ";");
// Normalize line
line = line.trim();
Matcher m = intPattern.matcher(line);
if (m.matches()) {
String formattedMax = new DecimalFormat("#.#").format(0x7fffffff);
String formattedMin = new DecimalFormat("#.#").format(-0x80000000);
line = line.replaceFirst("\\[,", "["+formattedMin+",");
line = line.replaceFirst(",\\]", ","+formattedMax+"]");
}
m = doublePattern.matcher(line);
if (m.matches()) {
String formattedMax = new DecimalFormat("#.#").format(1e308);
String formattedMin = new DecimalFormat("#.#").format(-1e308);
line = line.replaceFirst("\\[,", "["+formattedMin+",");
line = line.replaceFirst(",\\]", ","+formattedMax+"]");
}
line = removeComment(line);
if (!line.isEmpty()) {
line = stripSpaces(line);
line = line.replaceAll("\\s,", ","); // Remove space before comma (for enums)
line += "\n";
}
//System.out.println("after line=" + line + ";");
return line;
}
// Removes comment char and text after it, unless comment char is inside a string
// Keeps comment lines (lines that start with #)
private static String removeComment(String line) {
int index = line.indexOf("#");
if (!line.contains("#") || index == 0) return line;
int firstQuote = line.indexOf("\"");
if (firstQuote > 0) {
int secondQuote = line.indexOf("\"", firstQuote + 1);
if (index > secondQuote) {
line = line.substring(0, index);
line = line.trim();
}
} else {
line = line.substring(0, index);
line = line.trim();
}
return line;
}
public void addNormalizedLine(String line) {
normalizedContent.add(line);
}
public String generateMd5Sum() {
for (String line : normalizedContent) {
String s = normalize(line);
if (!s.isEmpty()) {
md5.update(toBytes(s));
}
}
defMd5 = toHexString(md5.digest()).toLowerCase();
//System.out.println("md5=" + defMd5) ;
return defMd5;
}
// The two methods below are copied from vespajlib (com.yahoo.text.Utf8 and com.yahoo.io.HexDump)
// since configgen cannot depend on any other modules (at least not as it is done now)
public static byte[] toBytes(String str) {
Charset charset = Charset.forName("utf-8");
ByteBuffer b = charset.encode(str);
byte[] result = new byte[b.remaining()];
b.get(result);
return result;
}
private String toHexString(byte[] bytes) {
StringBuilder sb = new StringBuilder(bytes.length * 2);
for (byte aByte : bytes) {
sb.append(String.format("%02x", aByte));
}
return sb.toString();
}
/**
* Replaces sequences of spaces with 1 space, unless inside quotes. Public for testing;
* @param str String to strip spaces from
* @return String with spaces stripped
*/
public static String stripSpaces(String str) {
StringBuilder ret = new StringBuilder("");
boolean inQuotes = false;
boolean inSpaceSequence = false;
for (char c : str.toCharArray()) {
if (Character.isWhitespace(c)) {
if (inQuotes) {
ret.append(c);
continue;
}
if (!inSpaceSequence) {
// start of space sequence
inSpaceSequence=true;
ret.append(" ");
}
} else {
if (inSpaceSequence) {
inSpaceSequence=false;
}
if (c=='\"') {
inQuotes=!inQuotes;
}
ret.append(c);
}
}
return ret.toString();
}
public List getNormalizedContent() {
return normalizedContent;
}
public String toString() {
StringBuilder builder = new StringBuilder();
for (String line : normalizedContent) {
builder.append(line.replace("\"", "\\\""));
builder.append("\\n\\\n");
}
return builder.toString();
}
public String getDefMd5() {
return defMd5;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy