All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.poi.hssf.record.common.UnicodeString Maven / Gradle / Ivy

There is a newer version: 5.2.5
Show newest version
/* ====================================================================
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
==================================================================== */

package org.apache.poi.hssf.record.common;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Spliterator;
import java.util.function.Supplier;
import java.util.stream.Collectors;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.poi.common.Duplicatable;
import org.apache.poi.common.usermodel.GenericRecord;
import org.apache.poi.hssf.record.RecordInputStream;
import org.apache.poi.hssf.record.cont.ContinuableRecordInput;
import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
import org.apache.poi.util.BitField;
import org.apache.poi.util.BitFieldFactory;
import org.apache.poi.util.GenericRecordUtil;

import static org.apache.logging.log4j.util.Unbox.box;

/**
 * Unicode String - just standard fields that are in several records.
 * It is considered more desirable than repeating it in all of them.

* This is often called a XLUnicodeRichExtendedString in MS documentation. */ public class UnicodeString implements Comparable, Duplicatable, GenericRecord { private static final Logger LOG = LogManager.getLogger(UnicodeString.class); private static final BitField highByte = BitFieldFactory.getInstance(0x1); // 0x2 is reserved private static final BitField extBit = BitFieldFactory.getInstance(0x4); private static final BitField richText = BitFieldFactory.getInstance(0x8); private short field_1_charCount; private byte field_2_optionflags; private String field_3_string; private List field_4_format_runs; private ExtRst field_5_ext_rst; private UnicodeString(UnicodeString other) { field_1_charCount = other.field_1_charCount; field_2_optionflags = other.field_2_optionflags; field_3_string = other.field_3_string; field_4_format_runs = (other.field_4_format_runs == null) ? null : other.field_4_format_runs.stream().map(FormatRun::new).collect(Collectors.toList()); field_5_ext_rst = (other.field_5_ext_rst == null) ? null : other.field_5_ext_rst.copy(); } public UnicodeString(String str) { setString(str); } /** * construct a unicode string record and fill its fields, ID is ignored * @param in the RecordInputStream to read the record from */ public UnicodeString(RecordInputStream in) { field_1_charCount = in.readShort(); field_2_optionflags = in.readByte(); int runCount = 0; int extensionLength = 0; //Read the number of rich runs if rich text. if (isRichText()) { runCount = in.readShort(); } //Read the size of extended data if present. if (isExtendedText()) { extensionLength = in.readInt(); } boolean isCompressed = ((field_2_optionflags & 1) == 0); int cc = getCharCount(); field_3_string = (isCompressed) ? in.readCompressedUnicode(cc) : in.readUnicodeLEString(cc); if (isRichText() && (runCount > 0)) { field_4_format_runs = new ArrayList<>(runCount); for (int i=0;i 0)) { field_5_ext_rst = new ExtRst(new ContinuableRecordInput(in), extensionLength); if(field_5_ext_rst.getDataSize()+4 != extensionLength) { LOG.atWarn().log("ExtRst was supposed to be {} bytes long, but seems to actually be {}", box(extensionLength),box(field_5_ext_rst.getDataSize() + 4)); } } } public int hashCode() { return Objects.hash(field_1_charCount, field_3_string); } /** * Our handling of equals is inconsistent with compareTo. The trouble is because we don't truely understand * rich text fields yet it's difficult to make a sound comparison. * * @param o The object to compare. * @return true if the object is actually equal. */ public boolean equals(Object o) { if (!(o instanceof UnicodeString)) { return false; } UnicodeString other = (UnicodeString) o; //OK lets do this in stages to return quickly, first check the actual string if (field_1_charCount != other.field_1_charCount || field_2_optionflags != other.field_2_optionflags || !field_3_string.equals(other.field_3_string)) { return false; } //OK string appears to be equal but now lets compare formatting runs if (field_4_format_runs == null) { // Strings are equal, and there are not formatting runs. return (other.field_4_format_runs == null); } else if (other.field_4_format_runs == null) { // Strings are equal, but one or the other has formatting runs return false; } //Strings are equal, so now compare formatting runs. int size = field_4_format_runs.size(); if (size != other.field_4_format_runs.size()) { return false; } for (int i=0;i 255 ) { useUTF16 = true; break; } } if (useUTF16) { //Set the uncompressed bit field_2_optionflags = highByte.setByte(field_2_optionflags); } else { field_2_optionflags = highByte.clearByte(field_2_optionflags); } } public int getFormatRunCount() { return (field_4_format_runs == null) ? 0 : field_4_format_runs.size(); } public FormatRun getFormatRun(int index) { if (field_4_format_runs == null) { return null; } if (index < 0 || index >= field_4_format_runs.size()) { return null; } return field_4_format_runs.get(index); } private int findFormatRunAt(int characterPos) { int size = field_4_format_runs.size(); for (int i=0;i characterPos) { return -1; } } return -1; } /** Adds a font run to the formatted string. * * If a font run exists at the current charcter location, then it is * replaced with the font run to be added. */ public void addFormatRun(FormatRun r) { if (field_4_format_runs == null) { field_4_format_runs = new ArrayList<>(); } int index = findFormatRunAt(r._character); if (index != -1) { field_4_format_runs.remove(index); } field_4_format_runs.add(r); //Need to sort the font runs to ensure that the font runs appear in //character order Collections.sort(field_4_format_runs); //Make sure that we now say that we are a rich string field_2_optionflags = richText.setByte(field_2_optionflags); } public Iterator formatIterator() { if (field_4_format_runs != null) { return field_4_format_runs.iterator(); } return null; } /** * @since POI 5.2.0 */ public Spliterator formatSpliterator() { if (field_4_format_runs != null) { return field_4_format_runs.spliterator(); } return null; } public void removeFormatRun(FormatRun r) { field_4_format_runs.remove(r); if (field_4_format_runs.isEmpty()) { field_4_format_runs = null; field_2_optionflags = richText.clearByte(field_2_optionflags); } } public void clearFormatting() { field_4_format_runs = null; field_2_optionflags = richText.clearByte(field_2_optionflags); } public ExtRst getExtendedRst() { return this.field_5_ext_rst; } void setExtendedRst(ExtRst ext_rst) { if (ext_rst != null) { field_2_optionflags = extBit.setByte(field_2_optionflags); } else { field_2_optionflags = extBit.clearByte(field_2_optionflags); } this.field_5_ext_rst = ext_rst; } /** * Swaps all use in the string of one font index * for use of a different font index. * Normally only called when fonts have been * removed / re-ordered */ public void swapFontUse(short oldFontIndex, short newFontIndex) { if (field_4_format_runs != null) { for (FormatRun run : field_4_format_runs) { if(run._fontIndex == oldFontIndex) { run._fontIndex = newFontIndex; } } } } /** * unlike the real records we return the same as "getString()" rather than debug info * @see #getDebugInfo() * @return String value of the record */ public String toString() { return getString(); } /** * return a character representation of the fields of this record * * * @return String of output for biffviewer etc. * */ public String getDebugInfo() { StringBuilder buffer = new StringBuilder(); buffer.append("[UNICODESTRING]\n"); buffer.append(" .charcount = ") .append(Integer.toHexString(getCharCount())).append("\n"); buffer.append(" .optionflags = ") .append(Integer.toHexString(getOptionFlags())).append("\n"); buffer.append(" .string = ").append(getString()).append("\n"); if (field_4_format_runs != null) { for (int i = 0; i < field_4_format_runs.size();i++) { FormatRun r = field_4_format_runs.get(i); buffer.append(" .format_run").append(i).append(" = ").append(r).append("\n"); } } if (field_5_ext_rst != null) { buffer.append(" .field_5_ext_rst = ").append("\n"); buffer.append(field_5_ext_rst).append("\n"); } buffer.append("[/UNICODESTRING]\n"); return buffer.toString(); } /** * Serialises out the String. There are special rules * about where we can and can't split onto * Continue records. */ public void serialize(ContinuableRecordOutput out) { int numberOfRichTextRuns = 0; int extendedDataSize = 0; if (isRichText() && field_4_format_runs != null) { numberOfRichTextRuns = field_4_format_runs.size(); } if (isExtendedText() && field_5_ext_rst != null) { extendedDataSize = 4 + field_5_ext_rst.getDataSize(); } // Serialise the bulk of the String // The writeString handles tricky continue stuff for us out.writeString(field_3_string, numberOfRichTextRuns, extendedDataSize); if (numberOfRichTextRuns > 0) { //This will ensure that a run does not split a continue for (int i=0;i 0 && field_5_ext_rst != null) { field_5_ext_rst.serialize(out); } } @Override public int compareTo(UnicodeString str) { int result = getString().compareTo(str.getString()); //As per the equals method lets do this in stages if (result != 0) { return result; } //OK string appears to be equal but now lets compare formatting runs if (field_4_format_runs == null) { //Strings are equal, and there are no formatting runs. -> 0 //Strings are equal, but one or the other has formatting runs -> 1 return (str.field_4_format_runs == null) ? 0 : 1; } else if (str.field_4_format_runs == null) { //Strings are equal, but one or the other has formatting runs return -1; } //Strings are equal, so now compare formatting runs. int size = field_4_format_runs.size(); if (size != str.field_4_format_runs.size()) { return size - str.field_4_format_runs.size(); } for (int i=0;i> getGenericProperties() { return GenericRecordUtil.getGenericProperties( "charCount", this::getCharCount, "optionFlags", this::getOptionFlags, "string", this::getString, "formatRuns", () -> field_4_format_runs, "extendedRst", this::getExtendedRst ); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy