javajs.util.CifDataParser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jmol Show documentation
Jmol: an open-source Java viewer for chemical structures in 3D
There is a newer version: 14.31.10
package javajs.util;

import java.io.BufferedReader;

import java.util.Hashtable;

import java.util.Map;

import javajs.api.GenericCifDataParser;
import javajs.api.GenericLineReader;


// BH 11/21/16 -- adds support for array grouping [...] - used in 2016-format magCIF files

/**
*
* A CIF 1.0 tokenizer class for dealing with quoted strings in CIF files.
* 
* Subclassed by org.jmol.adapters.readers.cif.Cif2DataParser
* 
* Greek letters implemented in Jmol 13.3.9 and only for 
* titles and space groups. All other mark ups ignored.
* 
*
* regarding the treatment of single quotes vs. primes in
* cif file, PMR wrote:
*
*
*   * There is a formal grammar for CIF
* (see http://www.iucr.org/iucr-top/cif/index.html)
* which confirms this. The textual explanation is
*

*

* 14. Matching single or double quote characters (' or ") may
* be used to bound a string representing a non-simple data value
* provided the string does not extend over more than one line.
*

*

* 15. Because data values are invariably separated from other
* tokens in the file by white space, such a quote-delimited
* character string may contain instances of the character used
* to delimit the string provided they are not followed by white
* space. For example, the data item
*
*  _example  'a dog's life'
*
* is legal; the data value is a dog's life.
*
*
* [PMR - the terminating character(s) are quote+whitespace.
* That would mean that:
*
*  _example 'Jones' life'
*
* would be an error
*
*
* The CIF format was developed in that late 1980's under the aegis of the
* International Union of Crystallography (I am a consultant to the COMCIFs 
* committee). It was ratified by the Union and there have been several 
* workshops. mmCIF is an extension of CIF which includes a relational 
* structure. The formal publications are:
*
*
* Hall, S. R. (1991). "The STAR File: A New Format for Electronic Data 
* Transfer and Archiving", J. Chem. Inform. Comp. Sci., 31, 326-333.
* Hall, S. R., Allen, F. H. and Brown, I. D. (1991). "The Crystallographic
* Information File (CIF): A New Standard Archive File for Crystallography",
* Acta Cryst., A47, 655-685.
* Hall, S.R. & Spadaccini, N. (1994). "The STAR File: Detailed 
* Specifications," J. Chem. Info. Comp. Sci., 34, 505-508.
*
*/

public class CifDataParser implements GenericCifDataParser {

  protected int getVersion() {
    return 1;
  }

  /**
   * The maximum number of columns (data keys) passed to the parser or found in the file
   * for a given loop_ or category.subkey listing.
   * 
   */
  public static final int KEY_MAX = 100;

  private GenericLineReader reader;
  private BufferedReader br;

  /**
   * from buffered reader
   */
  protected String line;
  
  /**
   * working string (buffer)
   * 
   */
  protected String str;
  
  /**
   * pointer to current character on str
   */
  protected int ich;
  
  /**
   * length of str
   * 
   */
  protected int cch;
  
  /**
   * whether we are processing an unquoted value or key
   */
  protected boolean wasUnquoted;
  
  /**
   * optional token terminator; in CIF 2.0 could be } or ] 
   */
  protected char cterm = '\0';
  
  /**
   * string to return for CIF data value . and ?
   */
  protected String nullString = "\0";

  /**
   * A flag to create and return Java objects, not strings.
   * Used only by Jmol scripting x = getProperty("cifInfo", filename).
   */
  protected boolean asObject;

  
  /**
   * debugging flag passed from reader; unused
   * 
   */
  protected boolean debugging;


  /**
   * private processing fields
   * 
   */
  private Object strPeeked;
  private int ichPeeked;
  private int columnCount;
  private String[] columnNames;
  private Object[] columnData = new Object[KEY_MAX];
  private boolean isLoop;
  private boolean haveData;
  
  /**
   * comments at the top of a file, including #\#CIF_2.0, for example
   */
  private SB fileHeader = new SB(); 
  private boolean isHeader = true;


  /**
   * Set the string value of what is returned for "." and "?"
   * 
   * @param nullString null here returns "." and "?"; default is "\0"
   * 
   */
  public void setNullValue(String nullString) {
    this.nullString  = nullString;    
  }

  /**
   * A global, static map that contains field information. The assumption is that
   * if we read a set of fields for, say, atom_site, once in a lifetime, then
   * that should be good forever. Those are static lists. Or should be....
   */
  private static Map htFields = new Hashtable();
  
  ////////////////////////////////////////////////////////////////
  // special tokenizer class
  ////////////////////////////////////////////////////////////////

  public CifDataParser() {
    // for reflection
  }
    
  @Override
  public Object getColumnData(int i) {
    return columnData[i];
  }

  @Override
  public int getColumnCount() {
    return columnCount;
  }

  @Override
  public String getColumnName(int i) {
    return columnNames[i];
  }

  /**
   * A Chemical Information File data parser.
   * 
   * set() should be called immediately upon construction.
   *  
   * Two options; one of reader or br should be null, or reader will be
   * ignored. Just simpler this way...
   * 
   * @param reader  Anything that can deliver a line of text or null
   * @param br      A standard BufferedReader.
   * @param debugging 
   *  
   */
  @Override
  public CifDataParser set(GenericLineReader reader, BufferedReader br, boolean debugging) {
    this.reader = reader;
    this.br = br;
    this.debugging = debugging;
    return this;
  }


  /**
   * 
   * @return commented-out section at the start of a CIF file.
   * 
   */
  @Override
  public String getFileHeader() {
    return fileHeader.toString();
  }
  
  
  /**
   * Parses all CIF data for a reader defined in the constructor
   * into a standard Map structure and close the BufferedReader if
   * it exists. 
   * 
   * @return Hashtable of models Vector of Hashtable data
   */
  @Override
  public Map getAllCifData() {
    line = "";
    String key;
    Map data = null, data0 = null;
    Map allData = new Hashtable();
    Lst> models = new  Lst>();
    allData.put("models", models);
    asObject = (getVersion() >= 2);
    nullString = null;
    Lst> saveFrames = new Lst>();
    try {
      while ((key = getNextToken()) != null) {
        if (key.startsWith("global_") || key.startsWith("data_")) {
          models.addLast(data0 = data = new Hashtable());
          data.put("name", key);
          continue;
        }
        if (key.startsWith("loop_")) {
          getAllCifLoopData(data);
          continue;
        }
        if (key.startsWith("save_")) {
          if (key.equals("save_")) {
            int n = saveFrames.size();
            if (n == 0) {
              System.out.println("CIF ERROR ? save_ without corresponding save_xxxx");
              data = data0;
            } else {
              data = saveFrames.removeItemAt(n - 1);
            }
          } else {
            saveFrames.addLast(data);
            Map d = data;
            data = new Hashtable();
            d.put(key, data);
          }
          continue;
        }
        if (key.charAt(0) != '_') {
          System.out.println("CIF ERROR ? should be an underscore: " + key);
        } else {
          Object value = (asObject ? getNextTokenObject() : getNextToken());
          if (value == null) {
            System.out.println("CIF ERROR ? end of file; data missing: " + key);
          } else {
            data.put(fixKey(key), value);
          }
        }
      }
    } catch (Exception e) {
      // ?
    }
    asObject = false;
    try {
      if (br != null)
        br.close();
    } catch (Exception e) {
      // ?
    }
    nullString = "\0";
    return allData;
  }

  /**
   * create our own list of keywords and for each one create a list
   * of data associated with that keyword. For example, a list of all 
   * x coordinates, then a list of all y coordinates, etc.
   * 
   * @param data
   * @throws Exception
   */
  @SuppressWarnings("unchecked")
  private void getAllCifLoopData(Map data) throws Exception {
    String key;
    Lst keyWords = new  Lst();
    Object o;
    while ((o = peekToken()) != null && o instanceof String &&  ((String) o).charAt(0) == '_') {
      key = fixKey((String) getTokenPeeked());
      keyWords.addLast(key);
      data.put(key, new  Lst());
    }
    columnCount = keyWords.size();
    if (columnCount == 0)
      return;
    isLoop = true;
    while (getData())
      for (int i = 0; i < columnCount; i++)
        ((Lst