All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dspace.util.SolrImportExport Maven / Gradle / Ivy

There is a newer version: 8.0
Show newest version
/**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */
package org.dspace.util;

import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.net.URL;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.request.CoreAdminRequest;
import org.apache.solr.client.solrj.request.LukeRequest;
import org.apache.solr.client.solrj.response.CoreAdminResponse;
import org.apache.solr.client.solrj.response.FieldStatsInfo;
import org.apache.solr.client.solrj.response.LukeResponse;
import org.apache.solr.client.solrj.response.RangeFacet;
import org.apache.solr.common.luke.FieldFlag;
import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.params.FacetParams;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;

/**
 * Utility class to export, clear and import Solr indexes.
 *
 * @author Andrea Schweer [email protected] for the LCoNZ Institutional Research Repositories
 */
public class SolrImportExport {

    private static final ThreadLocal SOLR_DATE_FORMAT;
    private static final ThreadLocal SOLR_DATE_FORMAT_NO_MS;
    private static final ThreadLocal EXPORT_DATE_FORMAT;
    private static final String EXPORT_SEP = "_export_";

    static {
        SOLR_DATE_FORMAT = new ThreadLocal() {
            @Override
            protected DateFormat initialValue() {
                SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
                simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
                return simpleDateFormat;
            }
        };
        SOLR_DATE_FORMAT_NO_MS = new ThreadLocal() {
            @Override
            protected DateFormat initialValue() {
                return new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
            }
        };
        EXPORT_DATE_FORMAT = new ThreadLocal() {
            @Override
            protected DateFormat initialValue() {
                SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM");
                simpleDateFormat.setTimeZone(TimeZone.getDefault());
                return simpleDateFormat;
            }
        };
    }

    private static final String ACTION_OPTION = "a";
    private static final String CLEAR_OPTION = "c";
    private static final String OVERWRITE_OPTION = "f";
    private static final String DIRECTORY_OPTION = "d";
    private static final String HELP_OPTION = "h";
    private static final String INDEX_NAME_OPTION = "i";
    private static final String KEEP_OPTION = "k";
    private static final String LAST_OPTION = "l";

    public static final int ROWS_PER_FILE = 10_000;

    private static final String MULTIPLE_VALUES_SPLITTER = ",";

    private static final Logger log = LogManager.getLogger(SolrImportExport.class);
    private static final ConfigurationService configurationService
            = DSpaceServicesFactory.getInstance().getConfigurationService();

    /**
     * Default constructor
     */
    private SolrImportExport() { }

    /**
     * Entry point for command-line invocation
     *
     * @param args command-line arguments; see help for description
     * @throws ParseException if the command-line arguments cannot be parsed
     */
    public static void main(String[] args) throws ParseException {
        CommandLineParser parser = new DefaultParser();
        Options options = makeOptions();

        try {
            CommandLine line = parser.parse(options, args);
            if (line.hasOption(HELP_OPTION)) {
                printHelpAndExit(options, 0);
            }

            String[] indexNames = {"statistics"};
            if (line.hasOption(INDEX_NAME_OPTION)) {
                indexNames = line.getOptionValues(INDEX_NAME_OPTION);
            } else {
                System.err.println("No index name provided, defaulting to \"statistics\".");
            }

            String directoryName = makeDirectoryName(line.getOptionValue(DIRECTORY_OPTION));

            String action = line.getOptionValue(ACTION_OPTION, "export");
            if ("import".equals(action)) {
                for (String indexName : indexNames) {
                    File importDir = new File(directoryName);
                    if (!importDir.exists() || !importDir.canRead()) {
                        System.err.println("Import directory " + directoryName
                                               + " doesn't exist or is not readable by the current user. Not " +
                                               "importing index "
                                               + indexName);
                        continue; // skip this index
                    }
                    try {
                        String solrUrl = makeSolrUrl(indexName);
                        boolean clear = line.hasOption(CLEAR_OPTION);
                        importIndex(indexName, importDir, solrUrl, clear);
                    } catch (IOException | SolrServerException | SolrImportExportException e) {
                        System.err.println("Problem encountered while trying to import index " + indexName + ".");
                        e.printStackTrace(System.err);
                    }
                }
            } else if ("export".equals(action)) {
                for (String indexName : indexNames) {
                    String lastValue = line.getOptionValue(LAST_OPTION);
                    File exportDir = new File(directoryName);
                    if (exportDir.exists() && !exportDir.canWrite()) {
                        System.err.println("Export directory " + directoryName
                                               + " is not writable by the current user. Not exporting index "
                                               + indexName);
                        continue;
                    }

                    if (!exportDir.exists()) {
                        boolean created = exportDir.mkdirs();
                        if (!created) {
                            System.err.println("Export directory " + directoryName
                                                   + " could not be created. Not exporting index " + indexName);
                        }
                        continue;
                    }

                    try {
                        String solrUrl = makeSolrUrl(indexName);
                        String timeField = makeTimeField(indexName);
                        exportIndex(indexName, exportDir, solrUrl, timeField, lastValue,
                                    line.hasOption(OVERWRITE_OPTION));
                    } catch (SolrServerException | IOException | SolrImportExportException e) {
                        System.err.println("Problem encountered while trying to export index " + indexName + ".");
                        e.printStackTrace(System.err);
                    }
                }
            } else if ("reindex".equals(action)) {
                for (String indexName : indexNames) {
                    try {
                        boolean keepExport = line.hasOption(KEEP_OPTION);
                        boolean overwrite = line.hasOption(OVERWRITE_OPTION);
                        reindex(indexName, directoryName, keepExport, overwrite);
                    } catch (IOException | SolrServerException | SolrImportExportException e) {
                        e.printStackTrace();
                    }
                }
            } else {
                System.err.println("Unknown action " + action + "; must be import, export or reindex.");
                printHelpAndExit(options, 1);
            }
        } catch (ParseException e) {
            System.err.println("Cannot read command options");
            printHelpAndExit(options, 1);
        }
    }

    private static Options makeOptions() {
        Options options = new Options();
        options.addOption(ACTION_OPTION, "action", true,
                          "The action to perform: import, export or reindex. Default: export.");
        options.addOption(CLEAR_OPTION, "clear", false,
                          "When importing, also clear the index first. Ignored when action is export or reindex.");
        options.addOption(OVERWRITE_OPTION, "force-overwrite", false,
                          "When exporting or re-indexing, allow overwrite of existing export files");
        options.addOption(DIRECTORY_OPTION, "directory", true,
                          "The absolute path for the directory to use for import or export. If omitted, " +
                              "[dspace]/solr-export is used.");
        options.addOption(HELP_OPTION, "help", false, "Get help on options for this command.");
        options.addOption(INDEX_NAME_OPTION, "index-name", true,
                          "The names of the indexes to process. At least one is required. Available indexes are: " +
                              "authority, statistics.");
        options
            .addOption(KEEP_OPTION, "keep", false, "When reindexing, keep the contents of the data export directory." +
                " By default, the contents of this directory will be deleted once the reindex has finished." +
                " Ignored when action is export or import.");
        options.addOption(LAST_OPTION, "last", true, "When exporting, export records from the last [timeperiod] only." +
            " This can be one of: 'd' (beginning of yesterday through to now);" +
            " 'm' (beginning of the previous month through to end of the previous month);" +
            " a number, in which case the last [number] of days are exported, through to now (use 0 for today's data)" +
            "." +
            " Date calculation is done in UTC. If omitted, all documents are exported.");
        return options;
    }

    /**
     * Reindexes the specified core
     *
     * @param indexName     the name of the core to reindex
     * @param exportDirName the name of the directory to use for export. If this directory doesn't exist, it will be
     *                      created.
     * @param keepExport    whether to keep the contents of the exportDir after the reindex. If keepExport is false
     *                      and the
     *                      export directory was created by this method, the export directory will be deleted at the
     *                      end of the reimport.
     * @param overwrite     allow export files to be overwritten during re-index
     */
    private static void reindex(String indexName, String exportDirName, boolean keepExport, boolean overwrite)
        throws IOException, SolrServerException, SolrImportExportException {
        String tempIndexName = indexName + "-temp";

        String origSolrUrl = makeSolrUrl(indexName);
        String baseSolrUrl = StringUtils.substringBeforeLast(origSolrUrl, "/"); // need to get non-core solr URL
        String tempSolrUrl = baseSolrUrl + "/" + tempIndexName;

        //The configuration details for the statistics shards reside within the "statistics" folder
        String instanceIndexName = indexName.startsWith("statistics-") ? "statistics" : indexName;

        String solrInstanceDir = configurationService.getProperty("dspace.dir")
                + File.separator + "solr" + File.separator + instanceIndexName;
        // the [dspace]/solr/[indexName]/conf directory needs to be available on the local machine for this to work
        // -- we need access to the schema.xml and solrconfig.xml file, plus files referenced from there
        // if this directory can't be found, output an error message and skip this index
        File solrInstance = new File(solrInstanceDir);
        if (!solrInstance.exists() || !solrInstance.canRead() || !solrInstance.isDirectory()) {
            throw new SolrImportExportException(
                "Directory " + solrInstanceDir + "/conf/ doesn't exist or isn't readable." +
                    " The reindexing process requires the Solr configuration directory for this index to be present " +
                    "on the local machine" +
                    " even if Solr is running on a different host. Not reindexing index " + indexName);
        }

        String timeField = makeTimeField(indexName);

        // Ensure the export directory exists and is writable
        File exportDir = new File(exportDirName);
        boolean createdExportDir = exportDir.mkdirs();
        if (!createdExportDir && !exportDir.exists()) {
            throw new SolrImportExportException("Could not create export directory " + exportDirName);
        }
        if (!exportDir.canWrite()) {
            throw new SolrImportExportException("Can't write to export directory " + exportDirName);
        }

        try {
            HttpSolrClient adminSolr = new HttpSolrClient.Builder(baseSolrUrl).build();

            // try to find out size of core and compare with free space in export directory
            CoreAdminResponse status = CoreAdminRequest.getStatus(indexName, adminSolr);
            Object coreSizeObj = status.getCoreStatus(indexName).get("sizeInBytes");
            long coreSize = coreSizeObj != null ? Long.valueOf(coreSizeObj.toString()) : -1;
            long usableExportSpace = exportDir.getUsableSpace();
            if (coreSize >= 0 && usableExportSpace < coreSize) {
                System.err.println("Not enough space in export directory " + exportDirName
                                       + "; need at least as much space as the index ("
                                       + FileUtils.byteCountToDisplaySize(coreSize)
                                       + ") but usable space in export directory is only "
                                       + FileUtils.byteCountToDisplaySize(usableExportSpace)
                                       + ". Not continuing with reindex, please use the " + DIRECTORY_OPTION
                                       + " option to specify an alternative export directy with sufficient space.");
                return;
            }

            // Create a temp directory to store temporary core data
            File tempDataDir = new File(configurationService.getProperty(
                "dspace.dir") + File.separator + "temp" + File.separator + "solr-data");
            boolean createdTempDataDir = tempDataDir.mkdirs();
            if (!createdTempDataDir && !tempDataDir.exists()) {
                throw new SolrImportExportException(
                    "Could not create temporary data directory " + tempDataDir.getCanonicalPath());
            }
            if (!tempDataDir.canWrite()) {
                throw new SolrImportExportException(
                    "Can't write to temporary data directory " + tempDataDir.getCanonicalPath());
            }

            try {
                // create a temporary core to hold documents coming in during the reindex
                CoreAdminRequest.Create createRequest = new CoreAdminRequest.Create();
                createRequest.setInstanceDir(solrInstanceDir);
                createRequest.setDataDir(tempDataDir.getCanonicalPath());
                createRequest.setCoreName(tempIndexName);

                createRequest.process(adminSolr).getStatus();
            } catch (SolrServerException e) {
                // try to continue -- it may just be that the core already existed from a previous, failed attempt
                System.err.println("Caught exception when trying to create temporary core: " + e
                    .getMessage() + "; trying to recover.");
                e.printStackTrace(System.err);
            }

            // swap actual core with temporary one
            CoreAdminRequest swapRequest = new CoreAdminRequest();
            swapRequest.setCoreName(indexName);
            swapRequest.setOtherCoreName(tempIndexName);
            swapRequest.setAction(CoreAdminParams.CoreAdminAction.SWAP);
            swapRequest.process(adminSolr);

            try {
                // export from the actual core (from temp core name, actual data dir)
                exportIndex(indexName, exportDir, tempSolrUrl, timeField, overwrite);

                // clear actual core (temp core name, clearing actual data dir) & import
                importIndex(indexName, exportDir, tempSolrUrl, true);
            } catch (IOException | SolrServerException | SolrImportExportException e) {
                // we ran into some problems with the export/import -- keep going to try and restore the solr cores
                System.err.println(
                    "Encountered problem during reindex: " + e.getMessage() + ", will attempt to restore Solr cores");
                e.printStackTrace(System.err);
            }

            // commit changes
            HttpSolrClient origSolr = new HttpSolrClient.Builder(origSolrUrl).build();
            origSolr.commit();

            // swap back (statistics now going to actual core name in actual data dir)
            swapRequest = new CoreAdminRequest();
            swapRequest.setCoreName(tempIndexName);
            swapRequest.setOtherCoreName(indexName);
            swapRequest.setAction(CoreAdminParams.CoreAdminAction.SWAP);
            swapRequest.process(adminSolr);

            // export all docs from now-temp core into export directory -- this won't cause name collisions with the
            // actual export
            // because the core name for the temporary export has -temp in it while the actual core doesn't
            exportIndex(tempIndexName, exportDir, tempSolrUrl, timeField, overwrite);
            // ...and import them into the now-again-actual core *without* clearing
            importIndex(tempIndexName, exportDir, origSolrUrl, false);

            // commit changes
            origSolr.commit();

            // unload now-temp core (temp core name)
            CoreAdminRequest.unloadCore(tempIndexName, false, false, adminSolr);

            // clean up temporary data dir if this method created it
            if (createdTempDataDir && tempDataDir.exists()) {
                FileUtils.deleteDirectory(tempDataDir);
            }
        } finally {
            // clean up export dir if appropriate
            if (!keepExport && createdExportDir && exportDir.exists()) {
                FileUtils.deleteDirectory(exportDir);
            }
        }
    }

    /**
     * Exports all documents in the given index to the specified target directory in batches of #ROWS_PER_FILE.
     * See #makeExportFilename for the file names that are generated.
     *
     * @param indexName The index to export.
     * @param toDir     The target directory for the export. Will be created if it doesn't exist yet. The directory
     *                  must be writeable.
     * @param solrUrl   The solr URL for the index to export. Must not be null.
     * @param timeField The time field to use for sorting the export. Must not be null.
     * @param overwrite If set, allow export files to be overwritten
     * @throws SolrServerException       if there is a problem with exporting the index.
     * @throws IOException               if there is a problem creating the files or communicating with Solr.
     * @throws SolrImportExportException if there is a problem in communicating with Solr.
     */
    public static void exportIndex(String indexName, File toDir, String solrUrl, String timeField, boolean overwrite)
        throws SolrServerException, SolrImportExportException, IOException {
        exportIndex(indexName, toDir, solrUrl, timeField, null, overwrite);
    }

    /**
     * Import previously exported documents (or externally created CSV files that have the appropriate structure)
     * into the specified index.
     *
     * @param indexName the index to import.
     * @param fromDir   the source directory. Must exist and be readable.
     *                  The importer will look for files whose name starts with 
indexName
* and ends with .csv (to match what is generated by #makeExportFilename). * @param solrUrl The solr URL for the index to export. Must not be null. * @param clear if true, clear the index before importing. * @throws IOException if there is a problem reading the files or communicating with Solr. * @throws SolrServerException if there is a problem reading the files or communicating with Solr. * @throws SolrImportExportException if there is a problem communicating with Solr. */ public static void importIndex(final String indexName, File fromDir, String solrUrl, boolean clear) throws IOException, SolrServerException, SolrImportExportException { if (StringUtils.isBlank(solrUrl)) { throw new SolrImportExportException( "Could not construct solr URL for index" + indexName + ", aborting export."); } if (!fromDir.exists() || !fromDir.canRead()) { throw new SolrImportExportException("Source directory " + fromDir + " doesn't exist or isn't readable, aborting export of index " + indexName); } HttpSolrClient solr = new HttpSolrClient.Builder(solrUrl).build(); // must get multivalue fields before clearing List multivaluedFields = getMultiValuedFields(solr); if (clear) { clearIndex(solrUrl); } File[] files = fromDir.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { return name.startsWith(indexName + EXPORT_SEP) && name.endsWith(".csv"); } }); if (files == null || files.length == 0) { log.warn("No export files found in directory " + fromDir.getCanonicalPath() + " for index " + indexName); return; } Arrays.sort(files); for (File file : files) { log.info("Importing file " + file.getCanonicalPath()); ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest("/update/csv"); contentStreamUpdateRequest.setParam("skip", "_version_"); for (String mvField : multivaluedFields) { contentStreamUpdateRequest.setParam("f." + mvField + ".split", "true"); contentStreamUpdateRequest.setParam("f." + mvField + ".separator", MULTIPLE_VALUES_SPLITTER); } contentStreamUpdateRequest.setParam("stream.contentType", "text/csv;charset=utf-8"); contentStreamUpdateRequest.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); contentStreamUpdateRequest.addFile(file, "text/csv;charset=utf-8"); solr.request(contentStreamUpdateRequest); } solr.commit(true, true); } /** * Determine the names of all multi-valued fields from the data in the index. * * @param solr the solr server to query. * @return A list containing all multi-valued fields, or an empty list if none are found / there aren't any. */ private static List getMultiValuedFields(HttpSolrClient solr) { List result = new ArrayList<>(); try { LukeRequest request = new LukeRequest(); // this needs to be a non-schema request, otherwise we'll miss dynamic fields LukeResponse response = request.process(solr); Map fields = response.getFieldInfo(); for (LukeResponse.FieldInfo info : fields.values()) { if (info.getSchema().contains(FieldFlag.MULTI_VALUED.getAbbreviation() + "")) { result.add(info.getName()); } } } catch (IOException | SolrServerException e) { log.fatal("Cannot determine which fields are multi valued: " + e.getMessage(), e); } return result; } /** * Remove all documents from the Solr index with the given URL, then commit and optimise the index. * * @param solrUrl URL of the Solr core to clear. * @throws IOException if there is a problem in communicating with Solr. * @throws SolrServerException if there is a problem in communicating with Solr. */ public static void clearIndex(String solrUrl) throws IOException, SolrServerException { HttpSolrClient solr = new HttpSolrClient.Builder(solrUrl).build(); solr.deleteByQuery("*:*"); solr.commit(); solr.optimize(); } /** * Exports documents from the given index to the specified target directory in batches of #ROWS_PER_FILE, * starting at fromWhen (or all documents). * See #makeExportFilename for the file names that are generated. * * @param indexName The index to export. * @param toDir The target directory for the export. Will be created if it doesn't exist yet. The directory * must be writeable. * @param solrUrl The solr URL for the index to export. Must not be null. * @param timeField The time field to use for sorting the export. Must not be null. * @param fromWhen Optionally, from when to export. See options for allowed values. If null or empty, all * documents will be exported. * @param overwrite If set, allow export files to be overwritten * @throws SolrServerException if there is a problem with exporting the index. * @throws IOException if there is a problem creating the files or communicating with Solr. * @throws SolrImportExportException if there is a problem in communicating with Solr. */ public static void exportIndex(String indexName, File toDir, String solrUrl, String timeField, String fromWhen, boolean overwrite) throws SolrServerException, IOException, SolrImportExportException { log.info(String.format("Export Index [%s] to [%s] using [%s] Time Field[%s] FromWhen[%s]", indexName, toDir, solrUrl, timeField, fromWhen)); if (StringUtils.isBlank(solrUrl)) { throw new SolrImportExportException( "Could not construct solr URL for index" + indexName + ", aborting export."); } if (!toDir.exists() || !toDir.canWrite()) { throw new SolrImportExportException("Target directory " + toDir + " doesn't exist or is not writable, aborting export of index " + indexName); } HttpSolrClient solr = new HttpSolrClient.Builder(solrUrl).build(); SolrQuery query = new SolrQuery("*:*"); if (StringUtils.isNotBlank(fromWhen)) { String lastValueFilter = makeFilterQuery(timeField, fromWhen); if (StringUtils.isNotBlank(lastValueFilter)) { query.addFilterQuery(lastValueFilter); } } query.setRows(0); query.setGetFieldStatistics(timeField); Map fieldInfo = solr.query(query).getFieldStatsInfo(); if (fieldInfo == null || !fieldInfo.containsKey(timeField)) { log.warn(String.format( "Queried [%s]. No fieldInfo found while exporting index [%s] time field [%s] from [%s]. Export " + "cancelled.", solrUrl, indexName, timeField, fromWhen)); return; } FieldStatsInfo timeFieldInfo = fieldInfo.get(timeField); if (timeFieldInfo == null || timeFieldInfo.getMin() == null) { log.warn(String.format( "Queried [%s]. No earliest date found while exporting index [%s] time field [%s] from [%s]. Export " + "cancelled.", solrUrl, indexName, timeField, fromWhen)); return; } Date earliestTimestamp = (Date) timeFieldInfo.getMin(); query.setGetFieldStatistics(false); query.clearSorts(); query.setRows(0); query.setFacet(true); query.add(FacetParams.FACET_RANGE, timeField); query.add(FacetParams.FACET_RANGE_START, SOLR_DATE_FORMAT.get().format(earliestTimestamp) + "/MONTH"); query.add(FacetParams.FACET_RANGE_END, "NOW/MONTH+1MONTH"); query.add(FacetParams.FACET_RANGE_GAP, "+1MONTH"); query.setFacetMinCount(1); List monthFacets = solr.query(query).getFacetRanges().get(0).getCounts(); for (RangeFacet.Count monthFacet : monthFacets) { Date monthStartDate; String monthStart = monthFacet.getValue(); try { monthStartDate = SOLR_DATE_FORMAT_NO_MS.get().parse(monthStart); } catch (java.text.ParseException e) { throw new SolrImportExportException("Could not read start of month batch as date: " + monthStart, e); } int docsThisMonth = monthFacet.getCount(); SolrQuery monthQuery = new SolrQuery("*:*"); monthQuery.setRows(ROWS_PER_FILE); monthQuery.set("wt", "csv"); monthQuery.set("fl", "*"); monthQuery.setParam("csv.mv.separator", MULTIPLE_VALUES_SPLITTER); monthQuery.addFilterQuery(timeField + ":[" + monthStart + " TO " + monthStart + "+1MONTH]"); for (int i = 0; i < docsThisMonth; i += ROWS_PER_FILE) { monthQuery.setStart(i); URL url = new URL(solrUrl + "/select?" + monthQuery.toString()); File file = new File(toDir.getCanonicalPath(), makeExportFilename(indexName, monthStartDate, docsThisMonth, i)); if (file.createNewFile() || overwrite) { FileUtils.copyURLToFile(url, file); String message = String.format( "Solr export to file [%s] complete. Export for Index [%s] Month [%s] Batch [%d] Num Docs [%d]", file.getCanonicalPath(), indexName, monthStart, i, docsThisMonth); log.info(message); } else if (file.exists()) { String message = String.format( "Solr export file [%s] already exists. Export failed for Index [%s] Month [%s] Batch [%d] " + "Num Docs [%d]", file.getCanonicalPath(), indexName, monthStart, i, docsThisMonth); throw new SolrImportExportException(message); } else { String message = String.format( "Cannot create solr export file [%s]. Export failed for Index [%s] Month [%s] Batch [%d] Num" + " Docs [%d]", file.getCanonicalPath(), indexName, monthStart, i, docsThisMonth); throw new SolrImportExportException(message); } } } } /** * Return a filter query that represents the export date range passed in as lastValue * * @param timeField the time field to use for the date range * @param lastValue the requested date range, see options for acceptable values * @return a filter query representing the date range, or null if no suitable date range can be created. */ private static String makeFilterQuery(String timeField, String lastValue) { if ("m".equals(lastValue)) { // export data from the previous month return timeField + ":[NOW/MONTH-1MONTH TO NOW/MONTH]"; } int days; if ("d".equals(lastValue)) { days = 1; } else { // other acceptable value: a number, specifying how many days back to export days = Integer.valueOf(lastValue); // TODO check value? } return timeField + ":[NOW/DAY-" + days + "DAYS TO " + SOLR_DATE_FORMAT.get().format(new Date()) + "]"; } /** * Return the specified directory name or fall back to a default value. * * @param directoryValue a specific directory name. Optional. * @return directoryValue if given as a non-blank string. A default directory otherwise. */ private static String makeDirectoryName(String directoryValue) { if (StringUtils.isNotBlank(directoryValue)) { return directoryValue; } return configurationService.getProperty("dspace.dir") + File.separator + "solr-export" + File.separator; } /** * Creates a filename for the export batch. * * @param indexName The name of the index being exported. * @param exportStart The start timestamp of the export * @param totalRecords The total number of records in the export. * @param index The index of the current batch. * @return A file name that is appropriate to use for exporting the batch of data described by the parameters. */ private static String makeExportFilename(String indexName, Date exportStart, long totalRecords, int index) { String exportFileNumber = ""; if (totalRecords > ROWS_PER_FILE) { exportFileNumber = StringUtils .leftPad("" + (index / ROWS_PER_FILE), (int) Math.ceil(Math.log10(totalRecords / ROWS_PER_FILE)), "0"); } return indexName + EXPORT_SEP + EXPORT_DATE_FORMAT.get().format(exportStart) + (StringUtils.isNotBlank(exportFileNumber) ? "_" + exportFileNumber : "") + ".csv"; } /** * Returns the full URL for the specified index name. * * @param indexName the index name whose Solr URL is required. If the index name starts with * "statistics" or is "authority", the Solr base URL will be looked up * in the corresponding DSpace configuration file. Otherwise, it will fall back to a default. * @return the full URL to the Solr index, as a String. */ private static String makeSolrUrl(String indexName) { if (indexName.startsWith("statistics")) { // TODO account for year shards properly? return configurationService.getProperty("solr-statistics.server") + indexName .replaceFirst("statistics", ""); } else if ("authority".equals(indexName)) { return configurationService.getProperty("solr.authority.server"); } return "http://localhost:8080/solr/" + indexName; // TODO better default? } /** * Returns a time field for the specified index name that is suitable for incremental export. * * @param indexName the index name whose Solr URL is required. * @return the name of the time field, or null if no suitable field can be determined. */ private static String makeTimeField(String indexName) { if (indexName.startsWith("statistics")) { return "time"; } else if ("authority".equals(indexName)) { return "last_modified_date"; } return null; // TODO some sort of default? } /** * A utility method to print out all available command-line options and exit given the specified code. * * @param options the supported options. * @param exitCode the exit code to use. The method will call System#exit(int) with the given code. */ private static void printHelpAndExit(Options options, int exitCode) { HelpFormatter myhelp = new HelpFormatter(); myhelp.printHelp(SolrImportExport.class.getSimpleName() + "\n", options); System.out.println("\n\nCommand Defaults"); System.out.println("\tsolr-export-statistics [-a export] [-i statistics]"); System.out.println("\tsolr-import-statistics [-a import] [-i statistics]"); System.out.println("\tsolr-reindex-statistics [-a reindex] [-i statistics]"); System.exit(exitCode); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy