All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jxls.util.CellRefUtil Maven / Gradle / Ivy

The newest version!
package org.jxls.util;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jxls.common.JxlsException;

/**
 * This is a class to convert Excel cell names to (sheet, row, col) representations and vice versa.
 * The current code is taken from Apache POI CellReference class ( http://poi.apache.org/apidocs/org/apache/poi/ss/util/CellReference.html ).
 * 
 * @author Leonid Vysochyn
 */
public class CellRefUtil {
    private static final char DELIMITER = '\'';
    /** Matches a single cell ref with no absolute ('$') markers */
    private static final Pattern CELL_REF_PATTERN = Pattern.compile("([A-Za-z]+)([0-9]+)");
    /** The character (!) that separates sheet names from cell references */
    public static final char SHEET_NAME_DELIMITER = '!';
    /** The character (:) that separates the two cell references in a multi-cell area reference */
    private static final char CELL_DELIMITER = ':';
    /** The character ($) that signifies a row or column value is absolute instead of relative */
    public static final char ABSOLUTE_REFERENCE_MARKER = '$';
    /** The character (') used to quote sheet names when they contain special characters */
    private static final char SPECIAL_NAME_DELIMITER = '\'';

    /**
     * Takes in a 0-based base-10 column and returns a ALPHA-26 representation. e.g. column #3 -> D
     * 
     * @param col -
     * @return -
     */
    public static String convertNumToColString(int col) {
        String colRef = "";
        int excelColNum = col + 1; // Excel counts column A as the 1st column, we treat it as the 0th one.
        int colRemain = excelColNum;
        while (colRemain > 0) {
            int thisPart = colRemain % 26;
            if (thisPart == 0) {
                thisPart = 26;
            }
            colRemain = (colRemain - thisPart) / 26;
            char colChar = (char) (thisPart + 64); // The letter A is at 65
            colRef = colChar + colRef;
        }
        return colRef;
    }

    public static void appendFormat(StringBuilder out, String rawSheetName) {
        boolean needsQuotes = needsDelimiting(rawSheetName);
        if (needsQuotes) {
            out.append(DELIMITER);
            appendAndEscape(out, rawSheetName);
            out.append(DELIMITER);
        } else {
            out.append(rawSheetName);
        }
    }

    static void appendAndEscape(StringBuilder sb, String rawSheetName) {
        int len = rawSheetName.length();
        for (int i = 0; i < len; i++) {
            char ch = rawSheetName.charAt(i);
            if (ch == DELIMITER) {
                // single quotes (') are encoded as ('')
                sb.append(DELIMITER);
            }
            sb.append(ch);
        }
    }

    static boolean needsDelimiting(String rawSheetName) {
        int len = rawSheetName.length();
        if (len < 1) {
            throw new JxlsException("Zero length string is an invalid sheet name");
        }
        if (Character.isDigit(rawSheetName.charAt(0))) {
            // sheet name with digit in the first position always requires delimiting
            return true;
        }
        for (int i = 0; i < len; i++) {
            char ch = rawSheetName.charAt(i);
            if (isSpecialChar(ch)) {
                return true;
            }
        }
        if (Character.isLetter(rawSheetName.charAt(0)) && Character.isDigit(rawSheetName.charAt(len - 1)) // note - values like "A$1:$C$20" don't get this far
                && nameLooksLikePlainCellReference(rawSheetName)) {
            return true;
        }
        return nameLooksLikeBooleanLiteral(rawSheetName);
    }

    /**
     * Note - this method assumes the specified rawSheetName has only letters and digits.  It
     * cannot be used to match absolute or range references (using the dollar or colon char).
     * 
     * 

Some notable cases: *

* * * * * * * * * * *
Input Result Comments
"A1"  true 
"a111"  true 
"AA"  false 
"aa1"  true 
"A1A"  false 
"A1A1"  false 
"A$1:$C$20"  falseNot a plain cell reference
"SALES20080101"  trueStill needs delimiting even though well out of range

* * @return true if there is any possible ambiguity that the specified rawSheetName * could be interpreted as a valid cell name. */ static boolean nameLooksLikePlainCellReference(String rawSheetName) { Matcher matcher = CELL_REF_PATTERN.matcher(rawSheetName); if (!matcher.matches()) { return false; } // rawSheetName == "Sheet1" gets this far. String lettersPrefix = matcher.group(1); String numbersSuffix = matcher.group(2); return cellReferenceIsWithinRange(lettersPrefix, numbersSuffix); } /** * Used to decide whether sheet names like 'AB123' need delimiting due to the fact that they * look like cell references. * *

This code is currently being used for translating formulas represented with Ptg * tokens into human readable text form. In formula expressions, a sheet name always has a * trailing '!' so there is little chance for ambiguity. It doesn't matter too much what this * method returns but it is worth noting the likely consumers of these formula text strings: *

    *
  1. POI's own formula parser
  2. *
  3. Visual reading by human
  4. *
  5. VBA automation entry into Excel cell contents e.g. ActiveCell.Formula = "=c64!A1"
  6. *
  7. Manual entry into Excel cell contents
  8. *
  9. Some third party formula parser
  10. *

* *

At the time of writing, POI's formula parser tolerates cell-like sheet names in formulas * with or without delimiters. The same goes for Excel(2007), both manual and automated entry.

* *

For better or worse this implementation attempts to replicate Excel's formula renderer. * Excel uses range checking on the apparent 'row' and 'column' components. Note however that * the maximum sheet size varies across versions.

*/ private static boolean cellReferenceIsWithinRange(String lettersPrefix, String numbersSuffix) { return cellReferenceIsWithinRange(lettersPrefix, numbersSuffix, 0x0100, 0x10000); } /** * Used to decide whether a name of the form "[A-Z]*[0-9]*" that appears in a formula can be * interpreted as a cell reference. Names of that form can be also used for sheets and/or * named ranges, and in those circumstances, the question of whether the potential cell * reference is valid (in range) becomes important. * *

Note - that the maximum sheet size varies across Excel versions:

* *
* * * * *
Version  File Format  Last Column  Last Row
97-2003BIFF8"IV" (2^8)65536 (2^14)
2007BIFF12"XFD" (2^14)1048576 (2^20)
* POI currently targets BIFF8 (Excel 97-2003), so the following behaviour can be observed for * this method: *
* * * * * * * * * * * *
Input           Result 
"A", "1"true
"a", "111"true
"A", "65536"true
"A", "65537"false
"iv", "1"true
"IW", "1"false
"AAA", "1"false
"a", "111"true
"Sheet", "1"false
* * @param colStr a string of only letter characters * @param rowStr a string of only digit characters * @param lastColumnIndex - * @param lastRowIndex - * @return true if the row and col parameters are within range of a BIFF8 spreadsheet. */ public static boolean cellReferenceIsWithinRange(String colStr, String rowStr, int lastColumnIndex, int lastRowIndex) { if (!isColumnWithnRange(colStr, lastColumnIndex)) { return false; } return isRowWithnRange(rowStr, lastRowIndex); } public static boolean isColumnWithnRange(String colStr, int lastColumnIndex) { String lastCol = convertNumToColString(lastColumnIndex); int lastColLength = lastCol.length(); int numberOfLetters = colStr.length(); if (numberOfLetters > lastColLength) { // "Sheet1" case etc return false; // that was easy } return !(numberOfLetters == lastColLength && colStr.toUpperCase().compareTo(lastCol) > 0); } public static boolean isRowWithnRange(String rowStr, int lastRowIndex) { int rowNum = Integer.parseInt(rowStr); if (rowNum < 0) { throw new IllegalStateException("Invalid rowStr '" + rowStr + "'."); } if (rowNum == 0) { // execution gets here because caller does first pass of discriminating // potential cell references using a simplistic regex pattern. return false; } return rowNum <= lastRowIndex; } static boolean nameLooksLikeBooleanLiteral(String rawSheetName) { switch(rawSheetName.charAt(0)) { case 'T': case 't': return "TRUE".equalsIgnoreCase(rawSheetName); case 'F': case 'f': return "FALSE".equalsIgnoreCase(rawSheetName); } return false; } /** * @return true if the presence of the specified character in a sheet name would * require the sheet name to be delimited in formulas. This includes every non-alphanumeric * character besides underscore '_' and dot '.'. */ static boolean isSpecialChar(char ch) { // note - Character.isJavaIdentifierPart() would allow dollars '$' if (Character.isLetterOrDigit(ch)) { return false; } switch(ch) { case '.': // dot is OK case '_': // underscore is OK return false; case '\n': case '\r': case '\t': throw new JxlsException("Illegal character (0x" + Integer.toHexString(ch) + ") found in sheet name"); default: } return true; } /** * takes in a column reference portion of a CellRef and converts it from * ALPHA-26 number format to 0-based base 10. *
 'A' -> 0
     * 'Z' -> 25
     * 'AA' -> 26
     * 'IV' -> 255
* * @param ref - * @return zero based column index */ public static int convertColStringToIndex(String ref) { int pos = 0; int retval = 0; for (int k = ref.length() - 1; k >= 0; k--) { char thechar = ref.charAt(k); if (thechar == ABSOLUTE_REFERENCE_MARKER) { if (k != 0) { throw new IllegalArgumentException("Bad col ref format '" + ref + "'"); } break; } // Character.getNumericValue() returns the values // 10-35 for the letter A-Z int shift = (int) Math.pow(26, pos); retval += (Character.getNumericValue(thechar) - 9) * shift; pos++; } return retval - 1; } public static String[] separateRefParts(String reference) { int plingPos = reference.lastIndexOf(SHEET_NAME_DELIMITER); String sheetName = parseSheetName(reference, plingPos); int start = plingPos + 1; int length = reference.length(); int loc = start; // skip initial dollars if (reference.charAt(loc) == ABSOLUTE_REFERENCE_MARKER) { loc++; } // step over column name chars until first digit (or dollars) for row number. for (; loc < length; loc++) { char ch = reference.charAt(loc); if (Character.isDigit(ch) || ch == ABSOLUTE_REFERENCE_MARKER) { break; } } return new String[] { sheetName, reference.substring(start,loc), reference.substring(loc), }; } public static String parseSheetName(String reference, int indexOfSheetNameDelimiter) { if (indexOfSheetNameDelimiter < 0) { return null; } boolean isQuoted = reference.charAt(0) == SPECIAL_NAME_DELIMITER; if (!isQuoted) { return reference.substring(0, indexOfSheetNameDelimiter); } int lastQuotePos = indexOfSheetNameDelimiter-1; if (reference.charAt(lastQuotePos) != SPECIAL_NAME_DELIMITER) { throw new JxlsException("Mismatched quotes: (" + reference + ")"); } // TODO - refactor cell reference parsing logic to one place. // Current known incarnations: // FormulaParser.GetName() // CellReference.parseSheetName() (here) // AreaReference.separateAreaRefs() // SheetNameFormatter.format() (inverse) StringBuilder sb = new StringBuilder(indexOfSheetNameDelimiter); for(int i = 1; i < lastQuotePos; i++) { // Note boundaries - skip outer quotes char ch = reference.charAt(i); if (ch != SPECIAL_NAME_DELIMITER) { sb.append(ch); continue; } if (i < lastQuotePos && reference.charAt(i+1) == SPECIAL_NAME_DELIMITER) { // two consecutive quotes is the escape sequence for a single one i++; // skip this and keep parsing the special name sb.append(ch); continue; } throw new JxlsException("Bad sheet name quote escaping: (" + reference + ")"); } return sb.toString(); } /** * Separates Area refs in two parts and returns them as separate elements in a String array, * each qualified with the sheet name (if present) * * @param reference - * @return array with one or two elements. never null */ public static String[] separateAreaRefs(String reference) { int len = reference.length(); int delimiterPos = -1; boolean insideDelimitedName = false; for (int i = 0; i < len; i++) { switch (reference.charAt(i)) { case CELL_DELIMITER: if (!insideDelimitedName) { if (delimiterPos >= 0) { throw new IllegalArgumentException("More than one cell delimiter '" + CELL_DELIMITER + "' appears in area reference '" + reference + "'"); } delimiterPos = i; } default: continue; case SPECIAL_NAME_DELIMITER: // fall through } if (!insideDelimitedName) { insideDelimitedName = true; continue; } if (i >= len - 1) { // reference ends with the delimited name. // Assume names like: "Sheet1!'A1'" are never legal. throw new IllegalArgumentException("Area reference '" + reference + "' ends with special name delimiter '" + SPECIAL_NAME_DELIMITER + "'"); } if (reference.charAt(i + 1) == SPECIAL_NAME_DELIMITER) { // two consecutive quotes is the escape sequence for a single one i++; // skip this and keep parsing the special name } else { // this is the end of the delimited name insideDelimitedName = false; } } if (delimiterPos < 0) { return new String[] { reference }; } String partA = reference.substring(0, delimiterPos); String partB = reference.substring(delimiterPos+1); if (partB.indexOf(SHEET_NAME_DELIMITER) >= 0) { throw new JxlsException("Unexpected " + SHEET_NAME_DELIMITER + " in second cell reference of '" + reference + "'"); } int plingPos = partA.lastIndexOf(SHEET_NAME_DELIMITER); if (plingPos < 0) { return new String[] { partA, partB, }; } String sheetName = partA.substring(0, plingPos + 1); // +1 to include delimiter return new String[] { partA, sheetName + partB, }; } public static boolean isPlainColumn(String refPart) { for (int i = refPart.length() - 1; i >= 0; i--) { int ch = refPart.charAt(i); if (ch == '$' && i == 0) { continue; } if (ch < 'A' || ch > 'Z') { return false; } } return true; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy