All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.legstar.cobol.utils.PictureUtil Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright (c) 2010 LegSem.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the GNU Lesser Public License v2.1
 * which accompanies this distribution, and is available at
 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
 * 
 * Contributors:
 *     LegSem - initial API and implementation
 ******************************************************************************/
package com.legstar.cobol.utils;

import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import com.legstar.cob2xsd.PictureSymbol;

/**
 * Utility class provides methods to introspect COBOL picture clauses.
 *
 */
public final class PictureUtil {
    
    /**
     * Utility class.
     */
    private PictureUtil() {
        
    }
    
    /**
     * Determines how many times a given character occurs in a picture string.
     * A character can appear standalone or as a factored sequence like X(nn).
     * Unlike all other picture symbols, currency symbols are case sensitive.
     * For example, �D� and �d� specify different currency symbols.
     * @param picture the picture string
     * @param currencySymbol the currency symbol
     * @return a map of all characters to search for
     */
    public static Map < Character, Integer > getPictureCharOccurences(
            final String picture,
            final char currencySymbol) {

        Map < Character, Integer > charNum = new HashMap < Character, Integer >();
        charNum.put('A', 0);
        charNum.put('B', 0);
        charNum.put('G', 0);
        charNum.put('N', 0);
        charNum.put('X', 0);
        charNum.put('P', 0);
        charNum.put('Z', 0);
        charNum.put('0', 0);
        charNum.put('/', 0);
        charNum.put('+', 0);
        charNum.put('-', 0);
        charNum.put('*', 0);
        charNum.put('C', 0);
        charNum.put('D', 0);
        charNum.put('.', 0);
        charNum.put(',', 0);
        charNum.put('9', 0);
        charNum.put('E', 0);
        charNum.put('S', 0);
        charNum.put('V', 0);
        charNum.put(currencySymbol, 0);
        
        List < PictureSymbol > pictureSymbols = parsePicture(picture, currencySymbol);
        for (PictureSymbol pictureSymbol : pictureSymbols) {
            Integer number = charNum.get(pictureSymbol.getSymbol());
            if (number != null) {
                number += pictureSymbol.getNumber();
                charNum.put(pictureSymbol.getSymbol(), number);
            }
        }

        return charNum;
    }
    
    /**
     * The COBOL picture clause determines the length, in number of characters,
     * for all alphanumeric and numeric-edited data items.
     * 

* The length evaluated here is either the number of character positions * (which corresponds to the size constraint on the client side) or the * byte size of the storage needed on z/OS for the data item. You select between * one or the other with the calcStorageLength parameter. *

* When the currency sign is more than a single character, then the first * occurrence of the currency symbol counts for more than one byte of storage. * * @param charNum map of all characters in the picture string * @param isSignSeparate if sign occupies a separated position (no overpunch) * @param currencySign the currency sign * @param currencySymbol the currency symbol * @param calcStorageLength when true the length returned is the z/OS storage length * @return the length, in number of characters, of the data item */ public static int calcLengthFromPicture( final Map < Character, Integer > charNum, final boolean isSignSeparate, final String currencySign, final char currencySymbol, final boolean calcStorageLength) { int length = 0; /* character position occupied by each picture symbol */ Map < Character, Integer > charLen = new HashMap < Character, Integer >(); charLen.put('A', 1); charLen.put('B', 1); charLen.put('G', (calcStorageLength) ? 2 : 1); charLen.put('N', (calcStorageLength) ? 2 : 1); charLen.put('X', 1); charLen.put('P', 0); charLen.put('Z', 1); charLen.put('0', 1); charLen.put('/', 1); charLen.put('+', 1); charLen.put('-', 1); charLen.put('*', 1); charLen.put('C', 2); charLen.put('D', 2); charLen.put('.', 1); charLen.put(',', 1); charLen.put('9', 1); charLen.put('E', 1); charLen.put('S', (isSignSeparate) ? 1 : 0); charLen.put('V', 0); charLen.put(currencySymbol, 1); for (Map.Entry < Character, Integer > entry : charNum.entrySet()) { length += entry.getValue() * charLen.get(entry.getKey()); } if (currencySign.length() > 1 && charNum.get(currencySymbol) > 1) { length += currencySign.length() - 1; } return length; } /** * Try to infer a regular expression to match a COBOL picture clause. *

* The objective is to build a string that would fit the internal representation * of a picture edited COBOL field. *

* If a picture is not restrictive, for instance PIC X does not impose * any restriction, then we return null (no pattern). *

* Regular expressions in XML Schema are more like PERL than Java regex. * @param picture the picture clause * @param currencySign the currency sign * @param currencySymbol the currency symbol * @return a regular expression */ public static String getRegexFromPicture( final String picture, final String currencySign, final char currencySymbol) { StringBuilder result = new StringBuilder(); /* Table that associate a picture symbol to a regex atom */ Map < Character, String > charRegex = new HashMap < Character, String >(); charRegex.put('A', "[\\p{L}\\s]"); // any letter or space character charRegex.put('B', "\\s"); // space charRegex.put('G', "."); // TODO does not reflect the double byte nature charRegex.put('N', "."); // TODO does not reflect the double byte nature charRegex.put('X', "."); // Any byte charRegex.put('P', "[\\d\\.]"); // Floating decimal point charRegex.put('Z', "[1-9\\s]"); // Numeric or space charRegex.put('0', "0"); // Zero character charRegex.put('/', "/"); // Forward slash character charRegex.put('+', "[\\+\\-\\d]"); // Position can be a sign or a digit charRegex.put('-', "[\\+\\-\\d]"); // Position can be a sign or a digit charRegex.put('*', "[1-9\\*]"); // Position can be an asterisk or a digit charRegex.put('C', "(CR|\\s\\s)"); // Credit or spaces charRegex.put('D', "(DB|\\s\\s)"); // Debit or spaces charRegex.put('.', "\\."); // Decimal point character charRegex.put(',', ","); // Comma character charRegex.put('9', "\\d"); // A digit charRegex.put('E', "E"); // Exponent charRegex.put('S', "[\\+\\-]"); // A numeric sign charRegex.put('V', ""); // A virtual decimal point charRegex.put(currencySymbol, "(" + currencySign.replace(" ", "\\s") + "|\\d|\\s)"); List < PictureSymbol > pictureSymbols = parsePicture(picture, currencySymbol); /* If there is only one symbol and it is non restrictive, no pattern*/ if (pictureSymbols.size() == 1) { String symbol = charRegex.get(pictureSymbols.get(0).getSymbol()); if (symbol == null || symbol.equals(".")) { return null; } } /* Add quantifiers */ for (PictureSymbol pictureSymbol : pictureSymbols) { String regex = charRegex.get(pictureSymbol.getSymbol()); if (charRegex != null) { result.append(regex); int occurs = pictureSymbol.getNumber(); if (occurs > 1) { result.append("{0," + occurs + "}"); } else { result.append("?"); } } } return result.toString(); } /** * Parse a COBOL picture clause. Character symbols are returned in the order * where they are found in the picture clause. All factoring is resolved and * each character is associated with its occurrence number. *

* For instance: 9(3)V99XX becomes 4 entries in the list for characters 9, V, 9 and X. * First 9 occurs 3 times, V occurs 1 time, 9 occurs 2 and X occurs 2. * @param currencySymbol the currency symbol * @param picture the COBOL picture clause * @return ordered list of symbols appearing in the picture clause with their * number of occurrences. */ public static List < PictureSymbol > parsePicture( final String picture, final char currencySymbol) { int factoredNumber = 0; boolean factorSequence = false; char lastChar = 0; PictureSymbol pictureSymbol = null; List < PictureSymbol > result = new LinkedList < PictureSymbol >(); for (int i = 0; i < picture.length(); i++) { char c = picture.charAt(i); if (c != currencySymbol) { c = Character.toUpperCase(c); } if (factorSequence) { if (c == ')') { pictureSymbol.setNumber(pictureSymbol.getNumber() + factoredNumber - 1); factorSequence = false; } else { if (Character.isDigit(c)) { factoredNumber = factoredNumber * 10 + Character.getNumericValue(c); } } } else { if (c == '(') { factoredNumber = 0; factorSequence = true; } else { /* CR and DB are special cases where we need to ignore, * the second character R or B.*/ if ((c != 'B' || lastChar != 'D') && (c != 'R' || lastChar != 'C')) { if (c == lastChar) { pictureSymbol.setNumber(pictureSymbol.getNumber() + 1); } else { pictureSymbol = new PictureSymbol(c, 1); result.add(pictureSymbol); lastChar = c; } } } } } return result; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy