Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.rcsb.cif.schema.generator.SchemaGenerator Maven / Gradle / Ivy
package org.rcsb.cif.schema.generator;
import org.rcsb.cif.CifIO;
import org.rcsb.cif.model.Block;
import org.rcsb.cif.model.Category;
import org.rcsb.cif.model.CifFile;
import org.rcsb.cif.model.Column;
import org.rcsb.cif.model.FloatColumn;
import org.rcsb.cif.model.IntColumn;
import org.rcsb.cif.model.StrColumn;
import org.rcsb.cif.schema.DelegatingColumn;
import org.rcsb.cif.schema.DelegatingFloatColumn;
import org.rcsb.cif.schema.DelegatingIntColumn;
import org.rcsb.cif.schema.DelegatingStrColumn;
import org.rcsb.cif.schema.StandardSchemata;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UncheckedIOException;
import java.lang.reflect.Field;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.Set;
import java.util.StringJoiner;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
/**
* Creates a type-safe data model using a CIF dictionary.
*/
@SuppressWarnings("ALL")
public class SchemaGenerator {
private static final String BASE_PACKAGE = "org.rcsb.cif.schema.";
private static final String RE_MATRIX_FIELD = "\\[[1-3]]\\[[1-3]]";
private static final String RE_VECTOR_FIELD = "\\[[1-3]]";
private static final List FORCE_INT_FIELDS =
Stream.of("_atom_site.id",
"_atom_site.auth_seq_id",
"_pdbx_struct_mod_residue.auth_seq_id",
"_struct_conf.beg_auth_seq_id",
"_struct_conf.end_auth_seq_id",
"_struct_conn.ptnr1_auth_seq_id",
"_struct_conn.ptnr2_auth_seq_id",
"_struct_sheet_range.beg_auth_seq_id",
"_struct_sheet_range.end_auth_seq_id").collect(Collectors.toList());
private static final String FILE = loadTemplate("File.tpl");
private static final String FILE_BUILDER = loadTemplate("FileBuilder.tpl");
private static final String BLOCK = loadTemplate("Block.tpl");
private static final String BLOCK_FLAT = loadTemplate("BlockFlat.tpl");
private static final String CASE = loadTemplate("Case.tpl");
private static final String BLOCK_GETTER = loadTemplate("BlockGetter.tpl");
private static final String BLOCK_GETTER_FLAT = loadTemplate("BlockGetterFlat.tpl");
private static final String CATEGORY = loadTemplate("Category.tpl");
private static final String CATEGORY_FLAT = loadTemplate("CategoryFlat.tpl");
private static final String CATEGORY_GETTER = loadTemplate("CategoryGetter.tpl");
private static final String CATEGORY_GETTER_FLAT = loadTemplate("CategoryGetterFlat.tpl");
private static final String BLOCK_BUILDER = loadTemplate("BlockBuilder.tpl");
private static final String BLOCK_BUILDER_FLAT = loadTemplate("BlockBuilderFlat.tpl");
private static final String CATEGORY_BUILDER = loadTemplate("CategoryBuilder.tpl");
private static final String CATEGORY_BUILDER_FLAT = loadTemplate("CategoryBuilderFlat.tpl");
private static final String CATEGORY_BUILDER_ENTER = loadTemplate("CategoryBuilderEnter.tpl");
private static final String COLUMN_BUILDER = loadTemplate("ColumnBuilder.tpl");
private static final String COLUMN_BUILDER_ENTER = loadTemplate("ColumnBuilderEnter.tpl");
private static String loadTemplate(String name) {
return new BufferedReader(new InputStreamReader(Thread.currentThread().getContextClassLoader().getResourceAsStream("templates/" + name)))
.lines()
.collect(Collectors.joining(System.lineSeparator()));
}
private final String schemaName;
private final String schemaEnum;
private final String packageName;
private final boolean flat;
private final Map schema;
private final Map categories;
private final Map links;
private final Map> imports;
private final Map> rawAliases;
private final List> aliases;
public static void main(String[] args) throws IOException {
new SchemaGenerator("MmCif", "MMCIF", "mm", false,
"https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic",
"https://raw.githubusercontent.com/ihmwg/IHM-dictionary/master/ihm-extension.dic",
"https://raw.githubusercontent.com/pdbxmmcifwg/carbohydrate-extension/master/dict/entity_branch-extension.dic",
"https://raw.githubusercontent.com/pdbxmmcifwg/carbohydrate-extension/master/dict/chem_comp-extension.dic",
"https://raw.githubusercontent.com/ihmwg/ModelCIF/master/dist/mmcif_ma.dic"); // model-extension for predicted models
new SchemaGenerator("CifCore", "CIF_CORE", "core", true,
"https://raw.githubusercontent.com/COMCIFS/cif_core/master/templ_enum.cif",
"https://raw.githubusercontent.com/COMCIFS/cif_core/master/templ_attr.cif",
"https://raw.githubusercontent.com/COMCIFS/cif_core/master/cif_core.dic"); // has to be last
// NMR Exchange Format - TODO special usage of save-frames
// new SchemaGenerator("Nef", "NEF", "nef", false,
// "https://raw.githubusercontent.com/NMRExchangeFormat/NEF/master/specification/mmcif_nef.dic");
}
static String toClassName(String rawName) {
String name = Pattern.compile("_").splitAsStream(rawName)
.map(s -> s.substring(0, 1).toUpperCase() + s.substring(1))
.collect(Collectors.joining(""))
// remove invalid characters
.replaceAll("[/\\\\\\- \t`~!@#$%^&*()=+{}|;:'\",<.>?]", "_")
.replaceAll("_+", "_")
.replace("[", "")
.replace("]", "");
if (name.endsWith("_")) {
name = name.substring(0, name.length() - 1);
}
if (name.equals("Class")) {
return "Clazz";
} else if (Character.isDigit(name.charAt(0))) {
return "_" + name;
}
return name;
}
private void writeClasses() throws IOException {
// create or clear out destination directory
// be careful with this and point to temp directory when in trouble - the impl must be in a healthy state to
// bootstrap itself and generate schema-related code
Path projectPath = Paths.get(new File("").getAbsolutePath());
String basePackagePath = BASE_PACKAGE.substring(0, BASE_PACKAGE.length() - 1).replace(".", "/");
Path packagePath = projectPath.resolve("src").resolve("main").resolve("java").resolve(basePackagePath).resolve(packageName);
if (Files.exists(packagePath)) {
try (Stream paths = Files.list(packagePath)) {
paths.filter(p -> !Files.isDirectory(p))
.forEach(p -> {
try {
Files.delete(p);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
});
}
} else {
Files.createDirectories(packagePath);
}
writeFiles(schema, packagePath);
}
private void writeFiles(Map content, Path path) throws IOException {
Set alreadyWritten = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
String blockName = schemaName + "Block";
String file = FILE.replace("{packageName}", packageName)
.replace("{schemaName}", schemaName);
String fileBuilder = FILE_BUILDER.replace("{packageName}", packageName)
.replace("{schemaName}", schemaName)
.replace("{schemaEnum}", schemaEnum);
String block = (flat ? BLOCK_FLAT : BLOCK).replace("{packageName}", packageName)
.replace("{schemaName}", schemaName);
String blockBuilder = (flat ? BLOCK_BUILDER_FLAT : BLOCK_BUILDER).replace("{packageName}", packageName)
.replace("{schemaName}", schemaName);
String categoryBuilder = (flat ? CATEGORY_BUILDER_FLAT : CATEGORY_BUILDER).replace("{packageName}", packageName)
.replace("{schemaName}", schemaName);
StringJoiner getters = new StringJoiner("\n");
StringJoiner cases = new StringJoiner("\n");
StringJoiner enters = new StringJoiner("\n");
StringJoiner categoryEnters = new StringJoiner("\n");
for (Map.Entry entry : content.entrySet()) {
String categoryName = entry.getKey();
Table category = entry.getValue();
if (!alreadyWritten.add(categoryName)) {
System.err.println("skipping " + categoryName);
continue;
}
String categoryClassName = toClassName(categoryName);
String description = prepareDescription(category.getDescription(), " * ");
if (flat) {
getters.add(BLOCK_GETTER_FLAT.replace("{categoryDescription}", description)
.replace("{categoryClassName}", categoryClassName)
.replace("{categoryName}", categoryName));
} else {
getters.add(BLOCK_GETTER.replace("{categoryDescription}", description)
.replace("{categoryClassName}", categoryClassName)
.replace("{categoryName}", categoryName));
}
writeCategory(category.getDescription(), categoryClassName, entry.getValue(), path, categoryName, categoryClassName, categoryEnters);
// delegation function
cases.add(CASE.replace("{name}", categoryName)
.replace("{className}", categoryClassName));
// builder
String enter = CATEGORY_BUILDER_ENTER.replace("{schemaName}", schemaName)
.replace("{categoryClassName}", categoryClassName);
enters.add(enter);
}
block = block.replace("{cases}", cases.toString())
.replace("{getters}", getters.toString());
blockBuilder = blockBuilder.replace("{enters}", enters.toString());
categoryBuilder = categoryBuilder.replace("{enters}", categoryEnters.toString());
Files.write(path.resolve(schemaName + "File.java"), file.getBytes());
Files.write(path.resolve(schemaName + "FileBuilder.java"), fileBuilder.getBytes());
Files.write(path.resolve(schemaName + "BlockBuilder.java"), blockBuilder.getBytes());
Files.write(path.resolve(schemaName + "CategoryBuilder.java"), categoryBuilder.getBytes());
Files.write(path.resolve(blockName + ".java"), block.getBytes());
}
private String prepareDescription(String description, String prefix) {
return Pattern.compile("\n").splitAsStream(description.trim())
.map(s -> prefix + s)
.collect(Collectors.joining("\n"))
.replace("TODO", ""); // remove TODOs from description
}
private void writeCategory(String categoryDescription, String className, Table content, Path path, String categoryName,
String categoryClassName, StringJoiner categoryEnters) throws IOException {
if (!Files.exists(path)) {
Files.createDirectory(path);
}
categoryDescription = prepareDescription(categoryDescription, " * ");
String category = (flat ? CATEGORY_FLAT : CATEGORY).replace("{packageName}", packageName)
.replace("{schemaName}", schemaName)
.replace("{categoryDescription}", categoryDescription)
.replace("{categoryClassName}", categoryClassName)
.replace("{categoryName}", categoryName);
StringJoiner getters = new StringJoiner("\n");
StringJoiner cases = new StringJoiner("\n");
StringJoiner enters = new StringJoiner("\n");
for (Map.Entry entry : content.getColumns().entrySet()) {
String columnName = entry.getKey();
String flatName = categoryName + "_" + columnName;
Col column = (Col) entry.getValue();
// check if there is a alias in place here - if so handled specifically lateron
if (aliases.stream()
.anyMatch(list -> list.contains(categoryName + "." + columnName))) {
continue;
}
String columnClassName = toClassName(columnName);
Class baseClass = getBaseClass(column.getType());
String baseClassName = baseClass.getSimpleName();
String description = prepareDescription(column.getDescription(), " * ");
getters.add((flat ? CATEGORY_GETTER_FLAT : CATEGORY_GETTER).replace("{columnDescription}", description)
.replace("{baseClassName}", baseClassName)
.replace("{columnClassName}", columnClassName)
.replace("{columnName}", columnName)
.replace("{modifier}", "")
.replace("{aliases}", "\"" + flatName + "\""));
cases.add(CASE.replace("{name}", columnName)
.replace("{className}", columnClassName));
enters.add(COLUMN_BUILDER_ENTER.replace("{schemaName}", schemaName)
.replace("{baseClassName}", baseClassName)
.replace("{categoryClassName}", categoryClassName)
.replace("{columnClassName}", columnClassName)
.replace("{columnName}", columnName));
}
// aliases
Set processed = new HashSet<>();
aliases.stream()
.filter(set -> set.stream().anyMatch(n -> n.split("\\.")[0].equals(categoryName)))
.forEach(set -> {
set.stream()
.filter(n -> n.startsWith(categoryName))
.forEach(cn -> {
String as = set.stream()
.map(n -> n.replace(".", "_"))
.distinct()
.map(n -> "\"" + n + "\"")
.collect(Collectors.joining(", "));
boolean multiple = as.split(",").length > 1;
Col column = (Col) set.stream()
.map(n -> n.split("\\."))
.filter(s -> schema.containsKey(s[0]) && schema.get(s[0]).getColumns().containsKey(s[1]))
.findFirst()
.map(s -> schema.get(s[0]).getColumns().get(s[1]))
.orElseThrow(() -> new NoSuchElementException());
String columnClassName = toClassName(cn.split("\\.")[1]);
if (processed.contains(columnClassName)) {
return;
}
processed.add(columnClassName);
Class baseClass = getBaseClass(column.getType());
Class delegatingBaseClass = getDelegatingBaseClass(column.getType());
String baseClassName = baseClass.getSimpleName();
String description = prepareDescription(column.getDescription(), " * ");
getters.add(CATEGORY_GETTER_FLAT.replace("{columnDescription}", description)
.replace("{baseClassName}", baseClassName)
.replace("{columnClassName}", columnClassName)
.replace("{modifier}", multiple ? "Aliased" : "")
.replace("{aliases}", as));
enters.add(COLUMN_BUILDER_ENTER.replace("{schemaName}", schemaName)
.replace("{baseClassName}", baseClassName)
.replace("{categoryClassName}", categoryClassName)
.replace("{columnClassName}", columnClassName)
.replace("{columnName}", cn.split("\\.")[1]));
});
});
category = category.replace("{cases}", cases.toString())
.replace("{getters}", getters.toString());
categoryEnters.add(COLUMN_BUILDER.replace("{schemaName}", schemaName)
.replace("{categoryClassName}", categoryClassName)
.replace("{categoryName}", categoryName)
.replace("{columnEnters}", enters.toString()));
Files.write(path.resolve(className + ".java"), category.getBytes());
}
private Class getBaseClass(String type) {
// TODO enums, lists, matrix, and vector would be nice to have
switch (type) {
case "coord":
return FloatColumn.class;
case "enum":
return StrColumn.class;
case "float":
return FloatColumn.class;
case "int":
return IntColumn.class;
case "list":
return StrColumn.class;
case "matrix":
return FloatColumn.class;
case "str":
return StrColumn.class;
case "vector":
return FloatColumn.class;
default:
throw new IllegalArgumentException("Unknown type " + type);
}
}
private Class getDelegatingBaseClass(String type) {
switch (type) {
case "coord":
return DelegatingFloatColumn.class;
case "enum":
return DelegatingStrColumn.class;
case "float":
return DelegatingFloatColumn.class;
case "int":
return DelegatingIntColumn.class;
case "list":
return DelegatingStrColumn.class;
case "matrix":
return DelegatingFloatColumn.class;
case "str":
return DelegatingStrColumn.class;
case "vector":
return DelegatingFloatColumn.class;
default:
throw new IllegalArgumentException("Unknown type " + type);
}
}
private SchemaGenerator(String schemaName, String schemaEnum, String packageName, boolean flat, String... resource) throws IOException {
this.schemaName = schemaName;
this.schemaEnum = schemaEnum;
this.packageName = packageName;
this.flat = flat;
this.schema = new LinkedHashMap<>();
this.categories = new LinkedHashMap<>();
this.links = new LinkedHashMap<>();
this.imports = new LinkedHashMap<>();
this.rawAliases = new LinkedHashMap<>();
this.aliases = new ArrayList<>();
for (String res : resource) {
System.out.println("Loading dictionary from: " + res);
CifFile cifFile = CifIO.readFromInputStream(preprocess(res));
if (schemaName.equals("MmCif")) {
getCategoryMetadataMmcif(cifFile);
} else if (schemaName.equals("CifCore")) {
getCategoryMetadataCifCore(cifFile);
}
// acquire metadata
Category dictionary = cifFile.getBlocks().get(0).getCategory("dictionary");
String title = dictionary.isDefined() ? dictionary.getColumn("title").getStringData(0) : res.substring(res.lastIndexOf("/") + 1);
String version = dictionary.isDefined() ? dictionary.getColumn("version").getStringData(0) : "draft";
System.out.println(title + " with version " + version);
buildListOfLinksBetweenCategories(cifFile);
}
getFieldData();
if (flat) { // TODO alias support in mmCIF
prepareAliases();
}
writeClasses();
System.out.println("Finished file generation");
try {
Field field = StandardSchemata.class.getField(schemaEnum);
} catch (Exception e) {
System.err.println("Schema with name '" + schemaEnum + "' must be explicitly added to StandardSchemata.java!");
}
System.out.println();
}
private InputStream preprocess(String res) throws IOException {
try (InputStream inputStream = new URL(res).openStream()) {
String content = new String(readAllBytes(inputStream), StandardCharsets.UTF_8);
// this is needed for https://raw.githubusercontent.com/COMCIFS/cif_core/master/cif_core.dic
// TODO proper CIF 2.0 (or at least list support, or at the very least don't hard-code this here...)
content = content.replace("[translucent pale green]", "'[translucent pale green]'");
return new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8));
}
}
private byte[] readAllBytes(InputStream inputStream) throws IOException {
final int bufLen = 4 * 0x400; // 4KB
byte[] buf = new byte[bufLen];
int readLen;
IOException exception = null;
try {
try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
while ((readLen = inputStream.read(buf, 0, bufLen)) != -1)
outputStream.write(buf, 0, readLen);
return outputStream.toByteArray();
}
} catch (IOException e) {
exception = e;
throw e;
} finally {
if (exception == null) {
inputStream.close();
} else {
try {
inputStream.close();
} catch (IOException e) {
exception.addSuppressed(e);
}
}
}
}
private void getFieldData() {
categories.forEach((fullName, saveFrame) -> {
String header = saveFrame.getBlockHeader();
String categoryName = header.substring(header.startsWith("_") ? 1 : 0, header.contains(".") ? header.indexOf(".") : header.length());
String itemName = header.substring(header.indexOf(".") + 1);
Map fields = new LinkedHashMap<>();
// handle imports
if (saveFrame.getCategories().containsKey("import")) {
parseImportGet(saveFrame.getCategory("import").getColumn("get").getStringData(0))
.filter(Import::isValid)
.filter(i -> imports.containsKey(i.save) && !imports.get(i.save).isEmpty())
.map(i -> imports.get(i.save))
.forEach(i -> saveFrame.getCategories().putAll(i));
}
if (schema.containsKey(categoryName)) {
fields = schema.get(categoryName).getColumns();
schema.get(categoryName).getCategoryKeyNames().add(itemName);
} else if (schema.containsKey(categoryName.toLowerCase())) {
fields = schema.get(categoryName.toLowerCase()).getColumns();
// take case from category name in 'field' data as it is better if data is from cif dictionaries
schema.put(categoryName, schema.get(categoryName.toLowerCase()));
} else {
System.err.println("category " + categoryName + " has no metadata");
fields = new LinkedHashMap<>();
schema.put(categoryName, new Table("", new HashSet<>(), fields));
}
List itemAliases = getAliases(saveFrame);
if (!itemAliases.isEmpty()) {
rawAliases.put(categoryName + "." + itemName, itemAliases);
}
String description = getDescription(saveFrame);
// need to use regex to check for matrix or vector items
// as sub_category assignment is missing for some entries
String subCategory = getSubCategory(saveFrame);
if ("cartesian_coordinate".equals(subCategory) || "fractional_coordinate".equals(subCategory)) {
fields.put(itemName, new CoordCol(description));
} else if (FORCE_INT_FIELDS.contains(header)) {
fields.put(itemName, new IntCol(description));
} else if ("matrix".equals(subCategory)) {
fields.put(itemName, new MatrixCol(description));
} else if ("vector".equals(subCategory)) {
fields.put(itemName, new VectorCol(description));
} else {
if (itemName.matches(RE_MATRIX_FIELD)) {
fields.put(itemName, new MatrixCol(description));
} else if (itemName.matches(RE_VECTOR_FIELD)) {
fields.put(itemName, new VectorCol(description));
} else {
List code = getCode(saveFrame);
if (!code.isEmpty()) {
Col fieldType = getFieldType(code.get(0), description, code.subList(1, code.size()));
fields.put(itemName, fieldType);
}
}
}
});
}
private List getAliases(Block saveFrame) {
Column field = getField("item_aliases", "alias_name", saveFrame);
if (field == null || !field.isDefined()) {
field = getField("alias", "definition_id", saveFrame);
}
Column column = field;
if (column == null) {
return Collections.emptyList();
}
return IntStream.range(0, field.getRowCount())
.mapToObj(column::getStringData)
.map(s -> s.substring(1))
.collect(Collectors.toList());
}
private Col getFieldType(String type, String description, List values) {
switch (type) {
// mmCIF
case "code":
case "ucode":
case "line":
case "uline":
case "text":
case "char":
case "uchar3":
case "uchar1":
case "boolean":
return values.isEmpty() ? new StrCol(description) : new EnumCol(values, "str", description);
case "aliasname":
case "name":
case "idname":
case "any":
case "atcode":
case "fax":
case "phone":
case "email":
case "code30":
case "seq-one-letter-code":
case "author":
case "orcid_id":
case "sequence_dep":
case "pdb_id":
case "emd_id":
// TODO consider adding specialised fields
case "yyyy-mm-dd":
case "yyyy-mm-dd:hh:mm":
case "yyyy-mm-dd:hh:mm-flex":
case "int-range":
case "float-range":
case "binary":
case "operation_expression":
case "point_symmetry":
case "4x3_matrix":
case "3x4_matrices":
case "point_group":
case "point_group_helical":
case "symmetry_operation":
case "date_dep":
case "url":
case "symop":
case "exp_data_doi":
case "asym_id":
return new StrCol(description);
case "int":
case "non_negative_int":
case "positive_int":
return values.isEmpty() ? new IntCol(description) : new EnumCol(values, "int", description);
case "float":
return new FloatCol(description);
case "ec-type":
case "ucode-alphanum-csv":
case "id_list":
return new ListCol("str", ",", description);
case "id_list_spc":
return new ListCol("str", " ", description);
// cif
case "Text":
case "Code":
case "Complex":
case "Symop":
case "List":
case "List(Real,Real)":
case "List(Real,Real,Real,Real)":
case "Date":
case "Datetime":
case "Tag":
case "Implied":
// return wrapContainer('str', ',', description, container);
return new StrCol(description);
case "Real":
// return wrapContainer('float', ',', description, container);
return new FloatCol(description);
case "Integer":
// return wrapContainer('int', ',', description, container);
return new IntCol(description);
default:
return new StrCol(description);
}
}
private List getCode(Block saveFrame) {
Column code = getField("item_type", "code", saveFrame);
if (code == null || !code.isDefined()) {
code = getField("type", "contents", saveFrame);
}
if (code != null && code.getRowCount() > 0) {
return Stream.concat(Stream.of(code.getStringData(0)), getEnums(saveFrame)).collect(Collectors.toList());
} else {
return Collections.emptyList();
}
}
private Stream getEnums(Block saveFrame) {
Column value = getField("item_enumeration", "value", saveFrame);
if (value != null) {
return IntStream.range(0, value.getRowCount())
.mapToObj(value::getStringData);
} else {
return Stream.empty();
}
}
private String getSubCategory(Block saveFrame) {
Column value = getField("item_sub_category", "id", saveFrame);
if (value == null) {
return "";
}
return value.getStringData(0);
}
private String getDescription(Block saveFrame) {
Column value = getField("item_description", "description", saveFrame);
if (value == null || !value.isDefined()) {
value = getField("description", "text", saveFrame);
}
if (value == null) {
return null;
}
String escapedDescription = escape(value.getStringData(0));
return Pattern.compile("\n").splitAsStream(escapedDescription)
.map(String::trim)
.collect(Collectors.joining("\n"))
.replaceAll("(\\[[1-3]])+ element", "elements")
.replaceAll("(\\[[1-3]])+", "");
}
private Column getField(String category, String field, Block saveFrame) {
Category cat = saveFrame.getCategory(category);
if (cat.isDefined()) {
return cat.getColumn(field);
} else if (links.containsKey(saveFrame.getBlockHeader())) {
String linkName = links.get(saveFrame.getBlockHeader());
Block block = categories.get(linkName);
if (block != null) {
return getField(category, field, block);
} else {
System.err.println("link " + linkName + "not found");
return null;
}
} else {
return null;
}
}
private void buildListOfLinksBetweenCategories(CifFile cifFile) {
cifFile.getBlocks()
.get(0)
.getSaveFrames()
.stream()
.filter(saveFrame -> saveFrame.getBlockHeader().startsWith("_") || saveFrame.getBlockHeader().contains("."))
.forEach(saveFrame -> {
categories.put(saveFrame.getBlockHeader(), saveFrame);
Category item_linked = saveFrame.getCategory("item_linked");
if (item_linked == null) {
return;
}
Column child_name = item_linked.getColumn("child_name");
Column parent_name = item_linked.getColumn("parent_name");
for (int i = 0; i < item_linked.getRowCount(); i++) {
String childName = child_name.getStringData(i);
String parentName = parent_name.getStringData(i);
links.put(childName, parentName);
}
});
}
private void getCategoryMetadataMmcif(CifFile cifFile) {
cifFile.getBlocks()
.get(0)
.getSaveFrames()
.stream()
.filter(saveFrame -> !saveFrame.getBlockHeader().startsWith("_"))
.forEach(saveFrame -> {
Set categoryKeyNames = new HashSet<>();
Column cifColumn = saveFrame.getCategory("category_key").getColumn("name");
for (int i = 0; i < cifColumn.getRowCount(); i++) {
categoryKeyNames.add(cifColumn.getStringData(i));
}
String rawDescription = saveFrame.getCategory("category")
.getColumn("description")
.getStringData(0);
String escapedDescription = escape(rawDescription);
String description = Pattern.compile("\n")
.splitAsStream(escapedDescription)
.map(String::trim)
.collect(Collectors.joining("\n"));
schema.put(saveFrame.getBlockHeader(), new Table(description, categoryKeyNames,
new LinkedHashMap<>()));
});
}
private void getCategoryMetadataCifCore(CifFile cifFile) {
Block block = cifFile.getBlocks().get(0);
final String cifCoreDicVersion = block.getCategory("dictionary").getColumn("version").getStringData(0);
System.out.println("Dictionary versions: CifCore " + cifCoreDicVersion);
if ("CORE_DIC".equals(cifFile.getBlocks().get(0).getBlockHeader())) {
block.getSaveFrames()
.stream()
// category definitions in cif don't include a '.'
.filter(saveFrame -> !saveFrame.getBlockHeader().contains("."))
.forEach(saveFrame -> {
Set categoryKeyNames = new HashSet<>();
String rawDescription = saveFrame.getCategory("description")
.getColumn("text")
.getStringData(0);
String escapedDescription = escape(rawDescription);
String description = Pattern.compile("\n")
.splitAsStream(escapedDescription)
.map(String::trim)
.collect(Collectors.joining("\n"));
schema.put(saveFrame.getBlockHeader().toLowerCase(), new Table(description, categoryKeyNames,
new LinkedHashMap<>()));
});
} else {
// resolve imports
block.getSaveFrames()
.forEach(b -> {
Map map = imports.computeIfAbsent(b.getBlockHeader(), e -> new LinkedHashMap<>());
map.putAll(b.getCategories());
});
}
}
private static final Pattern savePattern = Pattern.compile("('save'|\"save\"):([^ \t\n]+)");
private static final Pattern filePattern = Pattern.compile("('file'|\"file\"):([^ \t\n]+)");
private Stream parseImportGet(String s) {
// [{'save':hi_ang_Fox_coeffs 'file':templ_attr.cif} {'save':hi_ang_Fox_c0 'file':templ_enum.cif}]
// [{"file":'templ_enum.cif' "save":'H_M_ref'}]
// get rid of surrounding brackets
s = s.trim().substring(2, s.length() - 2);
return Pattern.compile("}\\s+\\{").splitAsStream(s)
.map(split -> {
Matcher save = savePattern.matcher(split);
Matcher file = filePattern.matcher(split);
return new Import(save, file);
});
}
private String escape(String description) {
return description.replace("&", "&")
.replace(">", ">")
.replace("<", "<");
}
static class Import {
final String save;
final String file;
public Import(Matcher save, Matcher file) {
this.save = save.find() ? save.group(0).substring(7).replaceAll("['\"]", "") : null;
this.file = file.find() ? file.group(0).substring(7).replaceAll("['\"]", "") : null;
}
public boolean isValid() {
return save != null && file != null;
}
@Override
public String toString() {
return "Import{" +
"save='" + save + '\'' +
", file='" + file + '\'' +
'}';
}
}
private void prepareAliases() {
// filter and flip aliases
rawAliases.entrySet()
.stream()
.map(entry -> {
String target = entry.getKey();
String flatTarget = target.replace(".", "_");
List sources = entry.getValue().stream()
// mappings handled by data model
.filter(s -> !s.equals(flatTarget))
.filter(s -> s.contains("."))
.filter(s -> !target.equals(s))
.distinct()
.collect(Collectors.toList());
// most will be empty as they just map between name with . and flat name
if (sources.isEmpty()) {
return Collections.emptyList();
}
// System.out.println("alias: " + sources + " -> " + target);
sources.add(target);
return sources;
})
.filter(list -> !list.isEmpty())
.forEach(list -> {
List alias = (List) list;
Optional> optional = aliases.stream()
// find sets of name referencing this
.filter(set -> alias.stream().anyMatch(set::contains))
.findFirst();
if (optional.isPresent()) {
optional.get().addAll(alias);
} else {
aliases.add(alias);
}
});
// ensure new categories
aliases.stream()
// map to individual names
.flatMap(Collection::stream)
.map(name -> name.split("\\.")[0])
.filter(categoryName -> !schema.containsKey(categoryName))
.forEach(categoryName -> {
// System.out.println("additional category: " + categoryName);
schema.computeIfAbsent(categoryName, e -> new Table("", new HashSet<>(), new LinkedHashMap<>()));
});
}
}