org.apache.pdfbox.encoding.conversion.CJKConverter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pdfbox Show documentation
Show all versions of pdfbox Show documentation
The Apache PDFBox library is an open source Java tool for working with PDF documents.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.encoding.conversion;
import org.apache.fontbox.cmap.CMap;
import java.io.UnsupportedEncodingException;
/**
* CJKConverter converts encodings defined in CJKEncodings.
*
* @author Pin Xue (http://www.pinxue.net), Holly Lee (holly.lee (at) gmail.com)
* @version $Revision: 1.0 $
*/
public class CJKConverter implements EncodingConverter
{
// The encoding
private String encodingName = null;
// The java charset name
private String charsetName = null;
/**
* Constructs a CJKConverter from a PDF encoding name.
*
* @param encoding the encoding to be used
*/
public CJKConverter(String encoding)
{
encodingName = encoding;
charsetName = CJKEncodings.getCharset(encoding);
}
/**
* Convert a string. It occurs when a cmap lookup returned
* converted bytes successfully, but we still need to convert its
* encoding. The parameter s is constructs as one byte or a UTF-16BE
* encoded string.
*
* Note: pdfbox set string to UTF-16BE charset before calling into
* this.
*
* {@inheritDoc}
*/
public String convertString(String s)
{
if ( s.length() == 1 )
{
return s;
}
if ( charsetName.equalsIgnoreCase("UTF-16BE") )
{
return s;
}
try
{
return new String(s.getBytes("UTF-16BE"), charsetName);
}
catch ( UnsupportedEncodingException uee )
{
return s;
}
}
/**
* Convert bytes to a string. We just convert bytes within
* coderange defined in CMap.
*
* {@inheritDoc}
*/
public String convertBytes(byte [] c, int offset, int length, CMap cmap)
{
if ( cmap != null )
{
try
{
if ( cmap.isInCodeSpaceRanges(c, offset, length) )
{
return new String(c, offset, length, charsetName);
}
else
{
return null;
}
}
catch ( UnsupportedEncodingException uee )
{
return new String(c, offset, length);
}
}
// No cmap?
return null;
}
}