org.dspace.app.bulkedit.DSpaceCSV Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of dspace-api Show documentation
Show all versions of dspace-api Show documentation
DSpace core data model and service APIs.
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.app.bulkedit;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.dspace.authority.AuthorityValue;
import org.dspace.authority.factory.AuthorityServiceFactory;
import org.dspace.authority.service.AuthorityValueService;
import org.dspace.content.Collection;
import org.dspace.content.Item;
import org.dspace.content.MetadataField;
import org.dspace.content.MetadataSchema;
import org.dspace.content.MetadataSchemaEnum;
import org.dspace.content.MetadataValue;
import org.dspace.content.authority.Choices;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.ItemService;
import org.dspace.content.service.MetadataFieldService;
import org.dspace.content.service.MetadataSchemaService;
import org.dspace.core.Context;
import org.dspace.services.factory.DSpaceServicesFactory;
/**
* Utility class to read and write CSV files
*
* **************
* Important Note
* **************
*
* This class has been made serializable, as it is stored in a Session.
* Is it wise to:
* a) be putting this into a user's session?
* b) holding an entire CSV upload in memory?
*
* @author Stuart Lewis
*/
public class DSpaceCSV implements Serializable {
/**
* The headings of the CSV file
*/
protected List headings;
/**
* An array list of CSV lines
*/
protected List lines;
/**
* A counter of how many CSV lines this object holds
*/
protected int counter;
/**
* The value separator (defaults to double pipe '||')
*/
protected String valueSeparator;
/**
* The value separator in an escaped form for using in regexes
*/
protected String escapedValueSeparator;
/**
* The field separator (defaults to comma)
*/
protected String fieldSeparator;
/**
* The field separator in an escaped form for using in regexes
*/
protected String escapedFieldSeparator;
/**
* The authority separator (defaults to double colon '::')
*/
protected String authoritySeparator;
/**
* The authority separator in an escaped form for using in regexes
*/
protected String escapedAuthoritySeparator;
protected transient final ItemService itemService = ContentServiceFactory.getInstance().getItemService();
protected transient final MetadataSchemaService metadataSchemaService =
ContentServiceFactory.getInstance().getMetadataSchemaService();
protected transient final MetadataFieldService metadataFieldService =
ContentServiceFactory.getInstance().getMetadataFieldService();
protected transient final AuthorityValueService authorityValueService =
AuthorityServiceFactory.getInstance().getAuthorityValueService();
/**
* Whether to export all metadata such as handles and provenance information
*/
protected boolean exportAll;
/**
* A list of metadata elements to ignore
*/
protected Map ignore;
/**
* Create a new instance of a CSV line holder
*
* @param exportAll Whether to export all metadata such as handles and provenance information
*/
public DSpaceCSV(boolean exportAll) {
// Initialise the class
init();
// Store the exportAll setting
this.exportAll = exportAll;
}
/**
* Create a new instance, reading the lines in from file
*
* @param inputStream the input stream to read from
* @param c The DSpace Context
* @throws Exception thrown if there is an error reading or processing the file
*/
public DSpaceCSV(InputStream inputStream, Context c) throws Exception {
// Initialise the class
init();
// Open the CSV file
BufferedReader input = null;
try {
input = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8));
// Read the heading line
String head = input.readLine();
String[] headingElements = head.split(escapedFieldSeparator);
int columnCounter = 0;
for (String element : headingElements) {
columnCounter++;
// Remove surrounding quotes if there are any
if (element.startsWith("\"") && element.endsWith("\"")) {
element = element.substring(1, element.length() - 1);
}
// Store the heading
if ("collection".equals(element)) {
// Store the heading
headings.add(element);
} else if ("rowName".equals(element)) {
// Store the heading
headings.add(element);
} else if ("action".equals(element)) { // Store the action
// Store the heading
headings.add(element);
} else if (!"id".equals(element)) {
String authorityPrefix = "";
if (StringUtils.startsWith(element, "[authority]")) {
element = StringUtils.substringAfter(element, "[authority]");
AuthorityValue authorityValueType = authorityValueService.getAuthorityValueType(element);
if (authorityValueType != null) {
String authorityType = authorityValueType.getAuthorityType();
authorityPrefix = element.substring(0, authorityType.length() + 1);
element = element.substring(authorityPrefix.length());
}
}
// Verify that the heading is valid in the metadata registry
String[] clean = element.split("\\[");
String[] parts = clean[0].split("\\.");
if (parts.length < 2) {
throw new MetadataImportInvalidHeadingException(element,
MetadataImportInvalidHeadingException.ENTRY,
columnCounter);
}
String metadataSchema = parts[0];
String metadataElement = parts[1];
String metadataQualifier = null;
if (parts.length > 2) {
metadataQualifier = parts[2];
}
// Check that the scheme exists
if (!StringUtils.equals(metadataSchema, MetadataSchemaEnum.RELATION.getName())) {
MetadataSchema foundSchema = metadataSchemaService.find(c, metadataSchema);
if (foundSchema == null) {
throw new MetadataImportInvalidHeadingException(clean[0],
MetadataImportInvalidHeadingException
.SCHEMA,
columnCounter);
}
// Check that the metadata element exists in the schema
MetadataField foundField = metadataFieldService
.findByElement(c, foundSchema, metadataElement, metadataQualifier);
if (foundField == null) {
throw new MetadataImportInvalidHeadingException(clean[0],
MetadataImportInvalidHeadingException
.ELEMENT,
columnCounter);
}
}
// Store the heading
headings.add(authorityPrefix + element);
}
}
// Read each subsequent line
StringBuilder lineBuilder = new StringBuilder();
String lineRead;
while ((lineRead = input.readLine()) != null) {
if (lineBuilder.length() > 0) {
// Already have a previously read value - add this line
lineBuilder.append("\n").append(lineRead);
// Count the number of quotes in the buffer
int quoteCount = 0;
for (int pos = 0; pos < lineBuilder.length(); pos++) {
if (lineBuilder.charAt(pos) == '"') {
quoteCount++;
}
}
if (quoteCount % 2 == 0) {
// Number of quotes is a multiple of 2, add the item
addItem(lineBuilder.toString());
lineBuilder = new StringBuilder();
}
} else if (lineRead.indexOf('"') > -1) {
// Get the number of quotes in the line
int quoteCount = 0;
for (int pos = 0; pos < lineRead.length(); pos++) {
if (lineRead.charAt(pos) == '"') {
quoteCount++;
}
}
if (quoteCount % 2 == 0) {
// Number of quotes is a multiple of 2, add the item
addItem(lineRead);
} else {
// Uneven quotes - add to the buffer and leave for later
lineBuilder.append(lineRead);
}
} else {
// No previously read line, and no quotes in the line - add item
addItem(lineRead);
}
}
} finally {
if (input != null) {
input.close();
}
}
}
/**
* Initialise this class with values from dspace.cfg
*/
protected void init() {
// Set the value separator
setValueSeparator();
// Set the field separator
setFieldSeparator();
// Set the authority separator
setAuthoritySeparator();
// Create the headings
headings = new ArrayList<>();
// Create the blank list of items
lines = new ArrayList<>();
// Initialise the counter
counter = 0;
// Set the metadata fields to ignore
ignore = new HashMap<>();
// Specify default values
String[] defaultValues =
new String[] {
"dc.date.accessioned", "dc.date.available", "dc.date.updated", "dc.description.provenance"
};
String[] toIgnoreArray =
DSpaceServicesFactory.getInstance()
.getConfigurationService()
.getArrayProperty("bulkedit.ignore-on-export", defaultValues);
for (String toIgnoreString : toIgnoreArray) {
if (!"".equals(toIgnoreString.trim())) {
ignore.put(toIgnoreString.trim(), toIgnoreString.trim());
}
}
}
/**
* Decide if this CSV file has an 'action' (case-dependent!) header.
*
* @return Whether or not there is an 'action' header
*/
public boolean hasActions() {
// Look for a heading called 'action'
for (String header : headings) {
if (header.equals("action")) {
return true;
}
}
return false;
}
/**
* Set the value separator for multiple values stored in one csv value.
*
* Is set in {@code bulkedit.cfg} as {@code valueseparator}.
*
* If not set, defaults to double pipe '||'.
*/
private void setValueSeparator() {
// Get the value separator
valueSeparator = DSpaceServicesFactory.getInstance().getConfigurationService()
.getProperty("bulkedit.valueseparator");
if ((valueSeparator != null) && !valueSeparator.trim().isEmpty()) {
valueSeparator = valueSeparator.trim();
} else {
valueSeparator = "||";
}
// Now store the escaped version
Pattern spchars = Pattern.compile("([\\\\*+\\[\\](){}\\$.?\\^|])");
Matcher match = spchars.matcher(valueSeparator);
escapedValueSeparator = match.replaceAll("\\\\$1");
}
/**
* Set the field separator use to separate fields in the csv.
*
* Is set in {@code bulkedit.cfg} as {@code fieldseparator}.
*
* If not set, defaults to comma ','.
*
* Special values are 'tab', 'hash' and 'semicolon' which will
* get substituted from the text to the value.
*/
private void setFieldSeparator() {
// Get the value separator
fieldSeparator = DSpaceServicesFactory.getInstance().getConfigurationService()
.getProperty("bulkedit.fieldseparator");
if ((fieldSeparator != null) && !fieldSeparator.trim().isEmpty()) {
fieldSeparator = fieldSeparator.trim();
if ("tab".equals(fieldSeparator)) {
fieldSeparator = "\t";
} else if ("semicolon".equals(fieldSeparator)) {
fieldSeparator = ";";
} else if ("hash".equals(fieldSeparator)) {
fieldSeparator = "#";
} else {
fieldSeparator = fieldSeparator.trim();
}
} else {
fieldSeparator = ",";
}
// Now store the escaped version
Pattern spchars = Pattern.compile("([\\\\*+\\[\\](){}\\$.?\\^|])");
Matcher match = spchars.matcher(fieldSeparator);
escapedFieldSeparator = match.replaceAll("\\\\$1");
}
/**
* Set the authority separator for value with authority data.
*
* Is set in {@code dspace.cfg} as {@code bulkedit.authorityseparator}.
*
* If not set, defaults to double colon '::'.
*/
private void setAuthoritySeparator() {
// Get the value separator
authoritySeparator = DSpaceServicesFactory.getInstance().getConfigurationService()
.getProperty("bulkedit.authorityseparator");
if ((authoritySeparator != null) && !authoritySeparator.trim().isEmpty()) {
authoritySeparator = authoritySeparator.trim();
} else {
authoritySeparator = "::";
}
// Now store the escaped version
Pattern spchars = Pattern.compile("([\\\\*+\\[\\](){}\\$.?\\^|])");
Matcher match = spchars.matcher(authoritySeparator);
escapedAuthoritySeparator = match.replaceAll("\\\\$1");
}
/**
* Add a DSpace item to the CSV file
*
* @param i The DSpace item
* @throws Exception if something goes wrong with adding the Item
*/
public final void addItem(Item i) throws Exception {
// If the item does not have an "owningCollection" the the below "getHandle()" call will fail
// This should not happen but is here for safety.
if (i.getOwningCollection() == null) {
return;
}
// Create the CSV line
DSpaceCSVLine line = new DSpaceCSVLine(i.getID());
// Add in owning collection
String owningCollectionHandle = i.getOwningCollection().getHandle();
line.add("collection", owningCollectionHandle);
// Add in any mapped collections
List collections = i.getCollections();
for (Collection c : collections) {
// Only add if it is not the owning collection
if (!c.getHandle().equals(owningCollectionHandle)) {
line.add("collection", c.getHandle());
}
}
// Populate it
List md = itemService.getMetadata(i, Item.ANY, Item.ANY, Item.ANY, Item.ANY);
for (MetadataValue value : md) {
MetadataField metadataField = value.getMetadataField();
MetadataSchema metadataSchema = metadataField.getMetadataSchema();
// Get the key (schema.element)
String key = metadataSchema.getName() + "." + metadataField.getElement();
// Add the qualifier if there is one (schema.element.qualifier)
if (metadataField.getQualifier() != null) {
key = key + "." + metadataField.getQualifier();
}
// Add the language if there is one (schema.element.qualifier[langauge])
//if ((value.language != null) && (!"".equals(value.language)))
if (value.getLanguage() != null) {
key = key + "[" + value.getLanguage() + "]";
}
// Store the item
if (exportAll || okToExport(metadataField)) {
// Add authority and confidence if authority is not null
String mdValue = value.getValue();
if (value.getAuthority() != null && !"".equals(value.getAuthority())) {
mdValue += authoritySeparator + value.getAuthority() + authoritySeparator + (value
.getConfidence() != -1 ? value.getConfidence() : Choices.CF_ACCEPTED);
}
line.add(key, mdValue);
if (!headings.contains(key)) {
headings.add(key);
}
}
}
lines.add(line);
counter++;
}
/**
* Add an item to the CSV file, from a CSV line of elements
*
* @param line The line of elements
* @throws Exception Thrown if an error occurs when adding the item
*/
public final void addItem(String line) throws Exception {
// Check to see if the last character is a field separator, which hides the last empty column
boolean last = false;
if (line.endsWith(fieldSeparator)) {
// Add a space to the end, then remove it later
last = true;
line += " ";
}
// Split up on field separator
String[] parts = line.split(escapedFieldSeparator);
ArrayList bits = new ArrayList<>();
bits.addAll(Arrays.asList(parts));
// Merge parts with embedded separators
boolean alldone = false;
while (!alldone) {
boolean found = false;
int i = 0;
for (String part : bits) {
int bitcounter = part.length() - part.replaceAll("\"", "").length();
if (part.startsWith("\"") && (!part.endsWith("\"") || ((bitcounter & 1) == 1))) {
found = true;
String add = bits.get(i) + fieldSeparator + bits.get(i + 1);
bits.remove(i);
bits.add(i, add);
bits.remove(i + 1);
break;
}
i++;
}
alldone = !found;
}
// Deal with quotes around the elements
int i = 0;
for (String part : bits) {
if (part.startsWith("\"") && part.endsWith("\"")) {
part = part.substring(1, part.length() - 1);
bits.set(i, part);
}
i++;
}
// Remove embedded quotes
i = 0;
for (String part : bits) {
if (part.contains("\"\"")) {
part = part.replaceAll("\"\"", "\"");
bits.set(i, part);
}
i++;
}
// Add elements to a DSpaceCSVLine
String id = parts[0].replaceAll("\"", "");
DSpaceCSVLine csvLine;
// Is this an existing item, or a new item (where id = '+')
if ("+".equals(id)) {
csvLine = new DSpaceCSVLine();
} else {
try {
csvLine = new DSpaceCSVLine(UUID.fromString(id));
} catch (NumberFormatException nfe) {
System.err.println("Invalid item identifier: " + id);
System.err.println("Please check your CSV file for information. " +
"Item id must be numeric, or a '+' to add a new item");
throw (nfe);
}
}
// Add the rest of the parts
i = 0;
for (String part : bits) {
if (i > 0) {
// Is this a last empty item?
if (last && (i == headings.size())) {
part = "";
}
// Make sure we register that this column was there
if (headings.size() < i) {
throw new MetadataImportInvalidHeadingException("",
MetadataImportInvalidHeadingException.MISSING,
i + 1);
}
csvLine.add(headings.get(i - 1), null);
String[] elements = part.split(escapedValueSeparator);
for (String element : elements) {
if ((element != null) && !element.isEmpty()) {
csvLine.add(headings.get(i - 1), element);
}
}
}
i++;
}
lines.add(csvLine);
counter++;
}
/**
* Get the lines in CSV holders
*
* @return The lines
*/
public final List getCSVLines() {
// Return the lines
return lines;
}
/**
* Get the CSV lines as an array of CSV formatted strings
*
* @return the array of CSV formatted Strings
*/
public final String[] getCSVLinesAsStringArray() {
// Create the headings line
String[] csvLines = new String[counter + 1];
csvLines[0] = "id" + fieldSeparator + "collection";
List headingsCopy = new ArrayList<>(headings);
Collections.sort(headingsCopy);
for (String value : headingsCopy) {
csvLines[0] = csvLines[0] + fieldSeparator + value;
}
Iterator i = lines.iterator();
int c = 1;
while (i.hasNext()) {
csvLines[c++] = i.next().toCSV(headingsCopy, fieldSeparator, valueSeparator);
}
return csvLines;
}
/**
* Creates and returns an InputStream from the CSV Lines in this DSpaceCSV
* @return The InputStream created from the CSVLines in this DSpaceCSV
*/
public InputStream getInputStream() {
StringBuilder stringBuilder = new StringBuilder();
for (String csvLine : getCSVLinesAsStringArray()) {
stringBuilder.append(csvLine).append("\n");
}
return IOUtils.toInputStream(stringBuilder.toString(), StandardCharsets.UTF_8);
}
/**
* Is it okay to export this value? When exportAll is set to false, we don't export
* some of the metadata elements.
*
* The list can be configured via the key ignore-on-export in {@code bulkedit.cfg}.
*
* @param md The MetadataField to examine
* @return Whether or not it is OK to export this element
*/
protected boolean okToExport(MetadataField md) {
// Now compare with the list to ignore
String key = md.getMetadataSchema().getName() + "." + md.getElement();
if (md.getQualifier() != null) {
key += "." + md.getQualifier();
}
// Must be OK, so don't ignore
return ignore.get(key) == null;
}
/**
* Get the headings used in this CSV file
*
* @return The headings
*/
public List getHeadings() {
return headings;
}
/**
* Return the csv file as one long formatted string
*
* @return The formatted String as a csv
*/
@Override
public final String toString() {
// Return the csv as one long string
StringBuilder csvLines = new StringBuilder();
String[] lines = this.getCSVLinesAsStringArray();
for (String line : lines) {
csvLines.append(line).append("\n");
}
return csvLines.toString();
}
public String getAuthoritySeparator() {
return authoritySeparator;
}
public String getEscapedAuthoritySeparator() {
return escapedAuthoritySeparator;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy