All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gov.nasa.pds.citool.validate.ReferenceValidator Maven / Gradle / Ivy

Go to download

The Catalog Tool provides functionality for ingesting PDS3 catalog files into the the PDS4 infrastructure including the Registry and Storage Services.

There is a newer version: 2.2.0
Show newest version
// Copyright 2009, by the California Institute of Technology.
// ALL RIGHTS RESERVED. United States Government sponsorship acknowledged.
// Any commercial use must be negotiated with the Office of Technology Transfer
// at the California Institute of Technology.
//
// This software is subject to U. S. export control laws and regulations
// (22 C.F.R. 120-130 and 15 C.F.R. 730-774). To the extent that the software
// is subject to U.S. export control laws and regulations, the recipient has
// the responsibility to obtain export licenses or other export authority as
// may be required before exporting such information to foreign countries or
// providing access to foreign nationals.
//
// $Id$
package gov.nasa.pds.citool.validate;

import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import gov.nasa.pds.tools.constants.Constants.ProblemType;
import gov.nasa.pds.tools.label.AttributeStatement;
import gov.nasa.pds.tools.label.Label;
import gov.nasa.pds.tools.label.ObjectStatement;
import gov.nasa.pds.tools.label.validate.LabelValidator;

/**
 * Class that performs validation for reference catalog files.
 *
 * @author mcayanan
 *
 */
public class ReferenceValidator implements LabelValidator {
    private final String REFID = "REFERENCE_KEY_ID";
    private final String REFDESC = "REFERENCE_DESC";

    /**
     * Determines whether or not a reference catalog file is valid.
     *
     * @param label A label representation of a reference catalog file.
     *
     * @return A flag indicating whether or not a reference catalog if valid.
     */
    public boolean validate(Label label) {
        boolean passFlag = true;
        boolean hasYearInID = false;
        List refObjects =
            (List) label.getObjects("REFERENCE");
        for (ObjectStatement ref : refObjects) {
            String id = null;
            String description = null;
            AttributeStatement refIdStmt = null;
            AttributeStatement refDescStmt = null;
            if (ref.getAttribute(REFID) != null) {
                refIdStmt = ref.getAttribute(REFID);
                id = refIdStmt.getValue().toString();
            }
            if (ref.getAttribute(REFDESC) != null) {
                refDescStmt = ref.getAttribute(REFDESC);
                description = refDescStmt.getValue().toString();
            }

            //A REFERENCE_KEY_ID can have an author and year field. If it
            //appears that it has both components, do reference checking.
            if (id.matches("[a-zA-Z&]*[0-9]{4}[A-Z]?")) {
                int idYear = -1;
                try {
                    //Parse the year from the REFERENCE_KEY_ID
                    if (Character.isLetter(id.charAt(id.length()-1))) {
                        idYear = Integer.parseInt(
                                id.substring(id.length() - 5,
                                id.length() - 1));
                    } else {
                        idYear = Integer.parseInt(
                                id.substring(id.length() - 4));
                    }
                    hasYearInID = true;
                } catch(Exception e) {
                    hasYearInID = false;
                    passFlag = false;
                }
                //If a year field was found in the REFERENCE_KEY_ID, check to
                //see if it matches the year found at the end of the citation
                if (hasYearInID) {
                    int descriptionYear = -1;
                    String value = "";
                    try {
                        String filteredDescription =
                            description.replaceAll("\\s+", " ").trim();
                        value = filteredDescription.substring(
                                filteredDescription.lastIndexOf(' ') + 1)
                                .trim();
                        String filteredValue = value.replace('.', ' ').trim();
                        descriptionYear = Integer.parseInt(filteredValue);
                    } catch(NumberFormatException n) {
                        label.addProblem(refDescStmt.getSourceURI(),
                                new Integer(refDescStmt.getLineNumber()), null,
                                "referenceDescription.missingYear",
                                ProblemType.MISSING_VALUE, value);
                    }
                    if ( (descriptionYear != -1)
                            && (idYear != descriptionYear) ) {
                        //Product-tools library adds commas in year values.
                        //So the years were converted into strings to resolve
                        //the issue.
                        Object[] arguments = {new Integer(idYear).toString(),
                                new Integer(descriptionYear).toString()};
                        label.addProblem(refIdStmt.getSourceURI(),
                                new Integer(refIdStmt.getLineNumber()), null,
                                "referenceId.misMatchedYear",
                                ProblemType.UNKNOWN_VALUE, arguments);
                    }
                    int end = -1;
                    String firstAuthor = "";
                    if ( ((end = id.indexOf("ETAL")) != -1)
                            || ((end = id.indexOf("&")) != -1) )  {
                        firstAuthor = id.substring(0, end);
                    } else if (hasYearInID) {
                        firstAuthor = id.substring(0, id.indexOf(
                                new Integer(idYear).toString()));
                    }
                    if ("".equals(firstAuthor)) {
                        label.addProblem(refIdStmt.getSourceURI(),
                                new Integer(refIdStmt.getLineNumber()), null,
                                "referenceId.missingAuthor",
                                ProblemType.INVALID_VALUE, id);
                        passFlag = false;
                    }
                    else {
                        //Manipulate the author id so that authors with spaces
                        //or apostrophes in the description will be
                        //a match (A'Hearn, DE Kwok, etc.).
                        StringBuffer manipulatedAuthor = new StringBuffer();
                        for (int i = 0; i < firstAuthor.length(); i++) {
                            manipulatedAuthor.append(firstAuthor.charAt(i));
                            if (i != (firstAuthor.length() - 1)) {
                                manipulatedAuthor.append("[\\s'-]?");
                            }
                        }
                        Pattern pattern = Pattern.compile(manipulatedAuthor
                                + ".*", Pattern.CASE_INSENSITIVE);
                        Matcher matcher = pattern.matcher(description);
                        if (!matcher.find()) {
                            label.addProblem(refDescStmt.getSourceURI(),
                                    new Integer(refDescStmt.getLineNumber()),
                                    null,
                                    "referenceDescription.missingAuthor",
                                    ProblemType.MISSING_VALUE, firstAuthor);
                        }
                    }
                }
            } else {
          /*
                log.log(new ToolsLogRecord(CIToolLevel.INFO,
                "Skipping " + id + ". It does not have author and year fields.",
                        refDescStmt.getFilename(), refDescStmt.getLineNumber()));
          */
            }
        }
        return passFlag;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy