Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.tika.parser.wordperfect.WP5Charsets Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.wordperfect;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* WordPerfect 5.x constant values used for mapping WordPerfect charsets to
* unicode equivalents when possible.
* @author Pascal Essiembre
*/
final class WP5Charsets {
private static final Logger LOG = LoggerFactory.getLogger(WP5Charsets.class);
/**
* Extended character sets used when fixed-length multi-byte functions
* with a byte value of 192 (0xC0) are found in a WordPerfect document.
* Those character set codes may be specific to WordPerfect
* file specifications and may or may not be considered standard
* outside WordPerfect. Applies to version 5.x.
*/
public static final char[][] EXTENDED_CHARSETS = new char[][] {
// WP Charset 0: ASCII (same as WP6)
WP6Charsets.EXTENDED_CHARSETS[0],
// WP Charset 1: Multinational 1 (same as WP6)
WP6Charsets.EXTENDED_CHARSETS[1],
// WP Charset 2: Multinational 2 (28 chars)
{
'\u0323','\u0324','\u02da','\u0325','\u02bc','\u032d','\u2017','\u005f',
'\u0138','\u032e','\u033e','\u2018','\u0020','\u02bd','\u02db','\u0327',
'\u0321','\u0322','\u030d','\u2019','\u0329','\u0020','\u0621','\u02be',
'\u0306','\u0310','\u2032','\u2034'
},
// WP Charset 3: Box Drawing (same as WP6)
WP6Charsets.EXTENDED_CHARSETS[3],
// WP Charset 4: Typographic Symbols (same as WP6)
WP6Charsets.EXTENDED_CHARSETS[4],
// WP Charset 5: Iconic Symbol (35 chars)
{
'\u2665','\u2666','\u2663','\u2660','\u2642','\u2640','\u263c','\u263a',
'\u263b','\u266a','\u266c','\u25ac','\u2302','\u203c','\u221a','\u21a8',
'\u2310','\u2319','\u25d8','\u25d9','\u21b5','\u261e','\u261c','\u2713',
'\u2610','\u2612','\u2639','\u266f','\u266d','\u266e','\u260e','\u231a',
'\u231b','\u2104','\u23b5'
},
// WP Charset 6: Math/Scientific (same as WP6)
WP6Charsets.EXTENDED_CHARSETS[6],
// WP Charset 7 Math/Scientific Extended (same as WP6)
WP6Charsets.EXTENDED_CHARSETS[7],
// WP Charset 8: Greek (210 chars)
{
'\u0391','\u03b1','\u0392','\u03b2','\u0392','\u03d0','\u0393','\u03b3',
'\u0394','\u03b4','\u0395','\u03b5','\u0396','\u03b6','\u0397','\u03b7',
'\u0398','\u03b8','\u0399','\u03b9','\u039a','\u03ba','\u039b','\u03bb',
'\u039c','\u03bc','\u039d','\u03bd','\u039e','\u03be','\u039f','\u03bf',
'\u03a0','\u03c0','\u03a1','\u03c1','\u03a3','\u03c3','\u03f9','\u03db',
'\u03a4','\u03c4','\u03a5','\u03c5','\u03a6','\u03d5','\u03a7','\u03c7',
'\u03a8','\u03c8','\u03a9','\u03c9','\u03ac','\u03ad','\u03ae','\u03af',
'\u03ca','\u03cc','\u03cd','\u03cb','\u03ce','\u03b5','\u03d1','\u03f0',
'\u03d6','\u1fe5','\u03d2','\u03c6','\u03c9','\u037e','\u0387','\u0384',
'\u00a8','\u0385','\u1fed','\u1fef','\u1fc0','\u1fbd','\u1fbf','\u1fbe',
'\u1fce','\u1fde','\u1fcd','\u1fdd','\u1fcf','\u1fdf','\u0384','\u1fef',
'\u1fc0','\u1fbd','\u1fbf','\u1fce','\u1fde','\u1fcd','\u1fdd','\u1fcf',
'\u1fdf','\u1f70','\u1fb6','\u1fb3','\u1fb4','\u1fb7','\u1f00','\u1f04',
'\u1f02','\u1f06','\u1f80','\u1f84','\u1f86','\u1f01','\u1f05','\u1f03',
'\u1f07','\u1f81','\u1f85','\u1f87','\u1f72','\u1f10','\u1f14','\u1f13',
'\u1f11','\u1f15','\u1f13','\u1f74','\u1fc6','\u1fc3','\u1fc4','\u1fc2',
'\u1fc7','\u1f20','\u1f24','\u1f22','\u1f26','\u1f90','\u1f94','\u1f96',
'\u1f21','\u1f25','\u1f23','\u1f27','\u1f91','\u1f95','\u1f97','\u1f76',
'\u1fd6','\u0390','\u1fd2','\u1f30','\u1f34','\u1f32','\u1f36','\u1f31',
'\u1f35','\u1f33','\u1f37','\u1f78','\u1f40','\u1f44','\u1f42','\u1f41',
'\u1f45','\u1f43','\u1f7a','\u1fe6','\u03b0','\u1fe3','\u1f50','\u1f54',
'\u1f52','\u1f56','\u1f51','\u1f55','\u1f53','\u1f57','\u1f7c','\u1ff6',
'\u1ff3','\u1ff4','\u1ff2','\u1ff7','\u1f60','\u1f64','\u1f62','\u1f66',
'\u1fa0','\u1fa4','\u1fa6','\u1f61','\u1f65','\u1f63','\u1f67','\u1fa1',
'\u1fa5','\u1fa7','\u0374','\u0375','\u03db','\u03dd','\u03d9','\u03e1',
'\u0386','\u0388','\u0389','\u038a','\u038c','\u038e','\u038f','\u03aa',
'\u03ab','\u1fe5'
},
// WP Charset 9: Hebrew (119 chars)
{
'\u05d0','\u05d1','\u05d2','\u05d3','\u05d4','\u05d5','\u05d6','\u05d7',
'\u05d8','\u05d9','\u05da','\u05db','\u05dc','\u05dd','\u05de','\u05df',
'\u05e0','\u05e1','\u05e2','\u05e3','\u05e4','\u05e5','\u05e6','\u05e7',
'\u05e8','\u05e9','\u05ea','\u05be','\u05c0','\u05c3','\u05f3','\u05f4',
'\u05b0','\u05b1','\u05b2','\u05b3','\u05b4','\u05b5','\u05b6','\u05b7',
'\u05b8','\u05b9','\u05ba','\u05bb','\u05bc','\u05bd','\u05bf','\u05b7',
'\ufbe1','\u05f0','\u05f1','\u05f2','\u0591','\u0596','\u05ad','\u05a4',
'\u059a','\u059b','\u05a3','\u05a5','\u05a6','\u05a7','\u09aa','\u0592',
'\u0593','\u0594','\u0595','\u0597','\u0598','\u0599','\u05a8','\u059c',
'\u059d','\u059e','\u05a1','\u05a9','\u05a0','\u059f','\u05ab','\u05ac',
'\u05af','\u05c4','\u0544','\u05d0','\ufb31','\ufb32','\ufb33','\ufb34',
'\ufb35','\ufb4b','\ufb36','\u05d7','\ufb38','\ufb39','\ufb3b','\ufb3a',
'\u05da','\u05da','\u05da','\u05da','\u05da','\u05da','\ufb3c','\ufb3e',
'\ufb40','\u05df','\ufb41','\ufb44','\ufb46','\ufb47','\ufb2b','\ufb2d',
'\ufb2a','\ufb2c','\ufb4a','\ufb4c','\ufb4e','\ufb1f','\ufb1d'
},
// WP Charset 10: Cyrillic (150 chars)
{
'\u0410','\u0430','\u0411','\u0431','\u0412','\u0432','\u0413','\u0433',
'\u0414','\u0434','\u0415','\u0435','\u0401','\u0451','\u0416','\u0436',
'\u0417','\u0437','\u0418','\u0438','\u0419','\u0439','\u041a','\u043a',
'\u041b','\u043b','\u041c','\u043c','\u041d','\u043d','\u041e','\u043e',
'\u041f','\u043f','\u0420','\u0440','\u0421','\u0441','\u0422','\u0442',
'\u0423','\u0443','\u0424','\u0444','\u0425','\u0445','\u0426','\u0446',
'\u0427','\u0447','\u0428','\u0448','\u0429','\u0449','\u042a','\u044a',
'\u042b','\u044b','\u042c','\u044c','\u042d','\u044d','\u042e','\u044e',
'\u042f','\u044f','\u0490','\u0491','\u0402','\u0452','\u0403','\u0453',
'\u0404','\u0454','\u0405','\u0455','\u0406','\u0456','\u0407','\u0457',
'\u0408','\u0458','\u0409','\u0459','\u040a','\u045a','\u040b','\u045b',
'\u040c','\u045c','\u040e','\u045e','\u040f','\u045f','\u0462','\u0463',
'\u0472','\u0473','\u0474','\u0475','\u046a','\u046b','\ua640','\ua641',
'\u0429','\u0449','\u04c0','\u04cf','\u0466','\u0467','\u0000','\u0000',
'\u0000','\u0000','\u0000','\u0000','\u0000','\u0000','\u0000','\u0000',
'\u0000','\u0000','\u0000','\u0000','\u0000','\u0000','\u0000','\u0000',
'\u0000','\u0000','\u0400','\u0450','\u0000','\u0000','\u040d','\u045d',
'\u0000','\u0000','\u0000','\u0000','\u0000','\u0000','\u0000','\u0000',
'\u0000','\u0000','\u0000','\u0000','\u0301','\u0300'
},
// WP Charset 11: Japanese (185 chars)
{
'\u3041','\u3043','\u3045','\u3047','\u3049','\u3053','\u3083','\u3085',
'\u3087','\u3094','\u3095','\u3096','\u3042','\u3044','\u3046','\u3048',
'\u304a','\u304b','\u304d','\u3047','\u3051','\u3053','\u304c','\u304e',
'\u3050','\u3052','\u3054','\u3055','\u3057','\u3059','\u305b','\u305d',
'\u3056','\u3058','\u305a','\u305c','\u305e','\u305f','\u3051','\u3064',
'\u3066','\u3068','\u3060','\u3062','\u3065','\u3067','\u3069','\u306a',
'\u306b','\u306c','\u306d','\u306e','\u306f','\u3072','\u3075','\u3078',
'\u307b','\u3070','\u3073','\u3076','\u3079','\u307c','\u3071','\u3074',
'\u3077','\u307a','\u307d','\u307e','\u307f','\u3080','\u3081','\u3082',
'\u3084','\u3086','\u3088','\u3089','\u308a','\u308b','\u308c','\u308d',
'\u308e','\u3092','\u3093','\u3014','\u3015','\uff3b','\uff3d','\u300c',
'\u300d','\u300c','\u300d','\u302a','\u3002','\u3001','\u309d','\u309e',
'\u3003','\u30fc','\u309b','\u309c','\u30a1','\u30a3','\u30a5','\u30a7',
'\u30a9','\u30c3','\u30e3','\u30e5','\u3057','\u30f4','\u30f5','\u30f6',
'\u30a2','\u30a4','\u30a6','\u30a8','\u30aa','\u30ab','\u30ad','\u30af',
'\u30b1','\u30b3','\u30ac','\u30ae','\u30b0','\u30b2','\u30b4','\u30b5',
'\u30c4','\u30b9','\u30bb','\u30bd','\u30b6','\u30b8','\u30ba','\u30bc',
'\u30be','\u30bf','\u30c1','\u30c4','\u30c6','\u30c8','\u30c0','\u30c2',
'\u30c5','\u30c7','\u30c9','\u30ca','\u30cb','\u30cc','\u30cd','\u30ce',
'\u30cf','\u30d2','\u30d5','\u30d8','\u03d0','\u30db','\u30d3','\u30d6',
'\u30d9','\u30dc','\u30d1','\u30d4','\u30d7','\u30da','\u30dd','\u30de',
'\u30df','\u30e0','\u30e1','\u30e2','\u30e4','\u30e6','\u30e8','\u30e9',
'\u30ea','\u30ab','\u30ec','\u30ed','\u30ef','\u30f2','\u30f3','\u30fd',
'\u30fe'
},
// WP Charset 12: User-defined (255 chars)
{
' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',
' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' '
}
};
//TODO map multi-characters
/**
* Constructor.
*/
private WP5Charsets() {
}
public static void append(StringBuilder out, int charset, int charval) {
if (charset >= WP5Charsets.EXTENDED_CHARSETS.length) {
LOG.debug("Unsupported WordPerfect 5.x charset: {}", charset);
out.append(' ');
} else if (charval >= WP5Charsets.EXTENDED_CHARSETS[charset].length) {
LOG.debug("Unsupported WordPerfect 5.x charset ({}) character value: {}", charset, charval);
out.append(' ');
} else {
out.append(WP5Charsets.EXTENDED_CHARSETS[charset][charval]);
}
}
}