All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.marc4j.converter.impl.UnicodeToIso6937 Maven / Gradle / Ivy

/**
 * Copyright (C) 2002 Bas Peters
 *
 * This file is part of MARC4J
 *
 * MARC4J is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * MARC4J is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with MARC4J; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package org.marc4j.converter.impl;

import org.marc4j.converter.CharConverter;

/**
 * 

* A utility to convert UCS/Unicode data to ISO 6937. *

* * @author Bas Peters * @author Yves Pratter */ public class UnicodeToIso6937 extends CharConverter { /** *

* Converts UCS/Unicode data to ISO 6937. *

*

* A question mark (0x3F) is returned if there is no match. *

* * @param data - the UCS/Unicode data in an array of char * @return {@link String}- the ISO 6937 data */ @Override public String convert(final char data[]) { final StringBuffer sb = new StringBuffer(); for (int i = 0; i < data.length; i++) { final char c = data[i]; if (c < 128) { sb.append(c); } else { final int d = convert(c); if (d < 256) { sb.append((char) d); } else { sb.append((char) (d / 256)); sb.append((char) (d % 256)); } } } return sb.toString(); } private int convert(final int i) { switch (i) { case 0x00A0: return 0xA0; // 10/00 NO-BREAK SPACE case 0x00A1: return 0xA1; // 10/01 INVERTED EXCLAMATION MARK case 0x00A2: return 0xA2; // 10/02 CENT SIGN case 0x00A3: return 0xA3; // 10/03 POUND SIGN case 0x00A4: return 0xA8; // 10/08 CURRENCY SIGN case 0x00A5: return 0xA5; // 10/05 YEN SIGN case 0x00A6: return 0xD7; // 13/07 BROKEN BAR case 0x00A7: return 0xA7; // 10/07 SECTION SIGN case 0x00A8: return 0xC820; // DIAERESIS case 0x00A9: return 0xD3; // 13/03 COPYRIGHT SIGN case 0x00AA: return 0xE3; // 14/03 FEMININE ORDINAL INDICATOR case 0x00AB: return 0xAB; // 10/11 LEFT-POINTING DOUBLE ANGLE QUOTATION MARK case 0x00AC: return 0xD6; // 13/06 NOT SIGN case 0x00AD: return 0xFF; // 15/15 SOFT HYPHEN case 0x00AE: return 0xD4; // 13/04 TRADE MARK SIGN case 0x00B0: return 0xB0; // 11/00 DEGREE SIGN case 0x00B1: return 0xB1; // 11/01 PLUS-MINUS SIGN case 0x00B2: return 0xB2; // 11/02 SUPERSCRIPT TWO case 0x00B3: return 0xB3; // 11/03 SUPERSCRIPT THREE case 0x00B4: return 0xC220; // ACUTE ACCENT case 0x00B5: return 0xB5; // 11/05 MICRO SIGN case 0x00B6: return 0xB6; // 11/06 PILCROW SIGN case 0x00B7: return 0xB7; // 11/07 MIDDLE DOT case 0x00B8: return 0xCB20; // CEDILLA case 0x00B9: return 0xD1; // 13/01 SUPERSCRIPT ONE case 0x00BA: return 0xEB; // 14/11 MASCULINE ORDINAL INDICATOR case 0x00BB: return 0xBB; // 11/11 RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK case 0x00BC: return 0xBC; // 11/12 VULGAR FRACTION ONE QUARTER case 0x00BD: return 0xBD; // 11/13 VULGAR FRACTION ONE HALF case 0x00BE: return 0xBE; // 11/14 VULGAR FRACTION THREE QUARTERS case 0x00BF: return 0xBF; // 11/15 INVERTED QUESTION MARK case 0x00C0: return 0xC141; // LATIN CAPITAL LETTER A WITH GRAVE case 0x00C1: return 0xC241; // LATIN CAPITAL LETTER A WITH ACUTE case 0x00C2: return 0xC341; // LATIN CAPITAL LETTER A WITH CIRCUMFLEX case 0x00C3: return 0xC441; // LATIN CAPITAL LETTER A WITH TILDE case 0x00C4: return 0xC841; // LATIN CAPITAL LETTER A WITH DIAERESIS case 0x00C5: return 0xCA41; // LATIN CAPITAL LETTER A WITH RING ABOVE case 0x00C6: return 0xE1; // 14/01 LATIN CAPITAL LETTER AE case 0x00C7: return 0xCB43; // LATIN CAPITAL LETTER C WITH CEDILLA case 0x00C8: return 0xC145; // LATIN CAPITAL LETTER E WITH GRAVE case 0x00C9: return 0xC245; // LATIN CAPITAL LETTER E WITH ACUTE case 0x00CA: return 0xC345; // LATIN CAPITAL LETTER E WITH CIRCUMFLEX case 0x00CB: return 0xC845; // LATIN CAPITAL LETTER E WITH DIAERESIS case 0x00CC: return 0xC149; // LATIN CAPITAL LETTER I WITH GRAVE case 0x00CD: return 0xC249; // LATIN CAPITAL LETTER I WITH ACUTE case 0x00CE: return 0xC349; // LATIN CAPITAL LETTER I WITH CIRCUMFLEX case 0x00CF: return 0xC849; // LATIN CAPITAL LETTER I WITH DIAERESIS case 0x00D1: return 0xC44E; // LATIN CAPITAL LETTER N WITH TILDE case 0x00D2: return 0xC14F; // LATIN CAPITAL LETTER O WITH GRAVE case 0x00D3: return 0xC24F; // LATIN CAPITAL LETTER O WITH ACUTE case 0x00D4: return 0xC34F; // LATIN CAPITAL LETTER O WITH CIRCUMFLEX case 0x00D5: return 0xC44F; // LATIN CAPITAL LETTER O WITH TILDE case 0x00D6: return 0xC84F; // LATIN CAPITAL LETTER O WITH DIAERESIS case 0x00D7: return 0xB4; // 11/04 MULTIPLICATION SIGN case 0x00D8: return 0xE9; // 14/09 LATIN CAPITAL LETTER O WITH STROKE case 0x00D9: return 0xC155; // LATIN CAPITAL LETTER U WITH GRAVE case 0x00DA: return 0xC255; // LATIN CAPITAL LETTER U WITH ACUTE case 0x00DB: return 0xC355; // LATIN CAPITAL LETTER U WITH CIRCUMFLEX case 0x00DC: return 0xC855; // LATIN CAPITAL LETTER U WITH DIAERESIS case 0x00DD: return 0xC259; // LATIN CAPITAL LETTER Y WITH ACUTE case 0x00DE: return 0xEC; // 14/12 LATIN CAPITAL LETTER THORN case 0x00DF: return 0xFB; // 15/11 LATIN SMALL LETTER SHARP S case 0x00E0: return 0xC161; // LATIN SMALL LETTER A WITH GRAVE case 0x00E1: return 0xC261; // LATIN SMALL LETTER A WITH ACUTE case 0x00E2: return 0xC361; // LATIN SMALL LETTER A WITH CIRCUMFLEX case 0x00E3: return 0xC461; // LATIN SMALL LETTER A WITH TILDE case 0x00E4: return 0xC861; // LATIN SMALL LETTER A WITH DIAERESIS case 0x00E5: return 0xCA61; // LATIN SMALL LETTER A WITH RING ABOVE case 0x00E6: return 0xF1; // 15/01 LATIN SMALL LETTER AE case 0x00E7: return 0xCB63; // LATIN SMALL LETTER C WITH CEDILLA case 0x00E8: return 0xC165; // LATIN SMALL LETTER E WITH GRAVE case 0x00E9: return 0xC265; // LATIN SMALL LETTER E WITH ACUTE case 0x00EA: return 0xC365; // LATIN SMALL LETTER E WITH CIRCUMFLEX case 0x00EB: return 0xC865; // LATIN SMALL LETTER E WITH DIAERESIS case 0x00EC: return 0xC169; // LATIN SMALL LETTER I WITH GRAVE case 0x00ED: return 0xC269; // LATIN SMALL LETTER I WITH ACUTE case 0x00EE: return 0xC369; // LATIN SMALL LETTER I WITH CIRCUMFLEX case 0x00EF: return 0xC869; // LATIN SMALL LETTER I WITH DIAERESIS case 0x00F0: return 0xF3; // 15/03 LATIN SMALL LETTER ETH case 0x00F1: return 0xC46E; // LATIN SMALL LETTER N WITH TILDE case 0x00F2: return 0xC16F; // LATIN SMALL LETTER O WITH GRAVE case 0x00F3: return 0xC26F; // LATIN SMALL LETTER O WITH ACUTE case 0x00F4: return 0xC36F; // LATIN SMALL LETTER O WITH CIRCUMFLEX case 0x00F5: return 0xC46F; // LATIN SMALL LETTER O WITH TILDE case 0x00F6: return 0xC86F; // LATIN SMALL LETTER O WITH DIAERESIS case 0x00F7: return 0xB8; // 11/08 DIVISION SIGN case 0x00F8: return 0xF9; // 15/09 LATIN SMALL LETTER O WITH STROKE case 0x00F9: return 0xC175; // LATIN SMALL LETTER U WITH GRAVE case 0x00FA: return 0xC275; // LATIN SMALL LETTER U WITH ACUTE case 0x00FB: return 0xC375; // LATIN SMALL LETTER U WITH CIRCUMFLEX case 0x00FC: return 0xC875; // LATIN SMALL LETTER U WITH DIAERESIS case 0x00FD: return 0xC279; // LATIN SMALL LETTER Y WITH ACUTE case 0x00FE: return 0xFC; // 15/12 LATIN SMALL LETTER THORN case 0x00FF: return 0xC879; // LATIN SMALL LETTER Y WITH DIAERESIS case 0x0100: return 0xC541; // LATIN CAPITAL LETTER A WITH MACRON case 0x0101: return 0xC561; // LATIN SMALL LETTER A WITH MACRON case 0x0102: return 0xC641; // LATIN CAPITAL LETTER A WITH BREVE case 0x0103: return 0xC661; // LATIN SMALL LETTER A WITH BREVE case 0x0104: return 0xCE41; // LATIN CAPITAL LETTER A WITH OGONEK case 0x0105: return 0xCE61; // LATIN SMALL LETTER A WITH OGONEK case 0x0106: return 0xC243; // LATIN CAPITAL LETTER C WITH ACUTE case 0x0107: return 0xC263; // LATIN SMALL LETTER C WITH ACUTE case 0x0108: return 0xC343; // LATIN CAPITAL LETTER C WITH CIRCUMFLEX case 0x0109: return 0xC363; // LATIN SMALL LETTER C WITH CIRCUMFLEX case 0x010A: return 0xC743; // LATIN CAPITAL LETTER C WITH DOT ABOVE case 0x010B: return 0xC763; // LATIN SMALL LETTER C WITH DOT ABOVE case 0x010C: return 0xCF43; // LATIN CAPITAL LETTER C WITH CARON case 0x010D: return 0xCF63; // LATIN SMALL LETTER C WITH CARON case 0x010E: return 0xCF44; // LATIN CAPITAL LETTER D WITH CARON case 0x010F: return 0xCF64; // LATIN SMALL LETTER D WITH CARON case 0x0110: return 0xE2; // 14/02 LATIN CAPITAL LETTER D WITH STROKE case 0x0111: return 0xF2; // 15/02 LATIN SMALL LETTER D WITH STROKE case 0x0112: return 0xC545; // LATIN CAPITAL LETTER E WITH MACRON case 0x0113: return 0xC565; // LATIN SMALL LETTER E WITH MACRON case 0x0116: return 0xC745; // LATIN CAPITAL LETTER E WITH DOT ABOVE case 0x0117: return 0xC765; // LATIN SMALL LETTER E WITH DOT ABOVE case 0x0118: return 0xCE45; // LATIN CAPITAL LETTER E WITH OGONEK case 0x0119: return 0xCE65; // LATIN SMALL LETTER E WITH OGONEK case 0x011A: return 0xCF45; // LATIN CAPITAL LETTER E WITH CARON case 0x011B: return 0xCF65; // LATIN SMALL LETTER E WITH CARON case 0x011C: return 0xC347; // LATIN CAPITAL LETTER G WITH CIRCUMFLEX case 0x011D: return 0xC367; // LATIN SMALL LETTER G WITH CIRCUMFLEX case 0x011E: return 0xC647; // LATIN CAPITAL LETTER G WITH BREVE case 0x011F: return 0xC667; // LATIN SMALL LETTER G WITH BREVE case 0x0120: return 0xC747; // LATIN CAPITAL LETTER G WITH DOT ABOVE case 0x0121: return 0xC767; // LATIN SMALL LETTER G WITH DOT ABOVE case 0x0122: return 0xCB47; // LATIN CAPITAL LETTER G WITH CEDILLA // case 0x0123: return 0xCB67; // small g with cedilla case 0x0124: return 0xC348; // LATIN CAPITAL LETTER H WITH CIRCUMFLEX case 0x0125: return 0xC368; // LATIN SMALL LETTER H WITH CIRCUMFLEX case 0x0126: return 0xE4; // 14/04 LATIN CAPITAL LETTER H WITH STROKE case 0x0127: return 0xF4; // 15/04 LATIN SMALL LETTER H WITH STROKE case 0x0128: return 0xC449; // LATIN CAPITAL LETTER I WITH TILDE case 0x0129: return 0xC469; // LATIN SMALL LETTER I WITH TILDE case 0x012A: return 0xC549; // LATIN CAPITAL LETTER I WITH MACRON case 0x012B: return 0xC569; // LATIN SMALL LETTER I WITH MACRON case 0x012E: return 0xCE49; // LATIN CAPITAL LETTER I WITH OGONEK case 0x012F: return 0xCE69; // LATIN SMALL LETTER I WITH OGONEK case 0x0130: return 0xC749; // LATIN CAPITAL LETTER I WITH DOT ABOVE case 0x0131: return 0xF5; // 15/05 LATIN SMALL LETTER DOTLESS I case 0x0132: return 0xE6; // 14/06 LATIN CAPITAL LIGATURE IJ case 0x0133: return 0xF6; // 15/06 LATIN SMALL LIGATURE IJ case 0x0134: return 0xC34A; // LATIN CAPITAL LETTER J WITH CIRCUMFLEX case 0x0135: return 0xC36A; // LATIN SMALL LETTER J WITH CIRCUMFLEX case 0x0136: return 0xCB4B; // LATIN CAPITAL LETTER K WITH CEDILLA case 0x0137: return 0xCB6B; // LATIN SMALL LETTER K WITH CEDILLA case 0x0138: return 0xF0; // 15/00 LATIN SMALL LETTER KRA case 0x0139: return 0xC24C; // LATIN CAPITAL LETTER L WITH ACUTE case 0x013A: return 0xC26C; // LATIN SMALL LETTER L WITH ACUTE case 0x013B: return 0xCB4C; // LATIN CAPITAL LETTER L WITH CEDILLA case 0x013C: return 0xCB6C; // LATIN SMALL LETTER L WITH CEDILLA case 0x013D: return 0xCF4C; // LATIN CAPITAL LETTER L WITH CARON case 0x013E: return 0xCF6C; // LATIN SMALL LETTER L WITH CARON case 0x013F: return 0xE7; // 14/07 LATIN CAPITAL LETTER L WITH MIDDLE DOT case 0x0140: return 0xF7; // 15/07 LATIN SMALL LETTER L WITH MIDDLE DOT case 0x0141: return 0xE8; // 14/08 LATIN CAPITAL LETTER L WITH STROKE case 0x0142: return 0xF8; // 15/08 LATIN SMALL LETTER L WITH STROKE case 0x0143: return 0xC24E; // LATIN CAPITAL LETTER N WITH ACUTE case 0x0144: return 0xC26E; // LATIN SMALL LETTER N WITH ACUTE case 0x0145: return 0xCB4E; // LATIN CAPITAL LETTER N WITH CEDILLA case 0x0146: return 0xCB6E; // LATIN SMALL LETTER N WITH CEDILLA case 0x0147: return 0xCF4E; // LATIN CAPITAL LETTER N WITH CARON case 0x0148: return 0xCF6E; // LATIN SMALL LETTER N WITH CARON case 0x0149: return 0xEF; // 14/15 LATIN SMALL LETTER N PRECEDED BY // APOSTROPHE case 0x014A: return 0xEE; // 14/14 LATIN CAPITAL LETTER ENG case 0x014B: return 0xFE; // 15/14 LATIN SMALL LETTER ENG case 0x014C: return 0xC54F; // LATIN CAPITAL LETTER O WITH MACRON case 0x014D: return 0xC56F; // LATIN SMALL LETTER O WITH MACRON case 0x0150: return 0xCD4F; // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE case 0x0151: return 0xCD6F; // LATIN SMALL LETTER O WITH DOUBLE ACUTE case 0x0152: return 0xEA; // 14/10 LATIN CAPITAL LIGATURE OE case 0x0153: return 0xFA; // 15/10 LATIN SMALL LIGATURE OE case 0x0154: return 0xC252; // LATIN CAPITAL LETTER R WITH ACUTE case 0x0155: return 0xC272; // LATIN SMALL LETTER R WITH ACUTE case 0x0156: return 0xCB52; // LATIN CAPITAL LETTER R WITH CEDILLA case 0x0157: return 0xCB72; // LATIN SMALL LETTER R WITH CEDILLA case 0x0158: return 0xCF52; // LATIN CAPITAL LETTER R WITH CARON case 0x0159: return 0xCF72; // LATIN SMALL LETTER R WITH CARON case 0x015A: return 0xC253; // LATIN CAPITAL LETTER S WITH ACUTE case 0x015B: return 0xC273; // LATIN SMALL LETTER S WITH ACUTE case 0x015C: return 0xC353; // LATIN CAPITAL LETTER S WITH CIRCUMFLEX case 0x015D: return 0xC373; // LATIN SMALL LETTER S WITH CIRCUMFLEX case 0x015E: return 0xCB53; // LATIN CAPITAL LETTER S WITH CEDILLA case 0x015F: return 0xCB73; // LATIN SMALL LETTER S WITH CEDILLA case 0x0160: return 0xCF53; // LATIN CAPITAL LETTER S WITH CARON case 0x0161: return 0xCF73; // LATIN SMALL LETTER S WITH CARON case 0x0162: return 0xCB54; // LATIN CAPITAL LETTER T WITH CEDILLA case 0x0163: return 0xCB74; // LATIN SMALL LETTER T WITH CEDILLA case 0x0164: return 0xCF54; // LATIN CAPITAL LETTER T WITH CARON case 0x0165: return 0xCF74; // LATIN SMALL LETTER T WITH CARON case 0x0166: return 0xED; // 14/13 LATIN CAPITAL LETTER T WITH STROKE case 0x0167: return 0xFD; // 15/13 LATIN SMALL LETTER T WITH STROKE case 0x0168: return 0xC455; // LATIN CAPITAL LETTER U WITH TILDE case 0x0169: return 0xC475; // LATIN SMALL LETTER U WITH TILDE case 0x016A: return 0xC555; // LATIN CAPITAL LETTER U WITH MACRON case 0x016B: return 0xC575; // LATIN SMALL LETTER U WITH MACRON case 0x016C: return 0xC655; // LATIN CAPITAL LETTER U WITH BREVE case 0x016D: return 0xC675; // LATIN SMALL LETTER U WITH BREVE case 0x016E: return 0xCAAD; // LATIN CAPITAL LETTER U WITH RING ABOVE case 0x016F: return 0xCA75; // LATIN SMALL LETTER U WITH RING ABOVE case 0x0170: return 0xCD55; // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE case 0x0171: return 0xCD75; // LATIN SMALL LETTER U WITH DOUBLE ACUTE case 0x0172: return 0xCE55; // LATIN CAPITAL LETTER U WITH OGONEK case 0x0173: return 0xCE75; // LATIN SMALL LETTER U WITH OGONEK case 0x0174: return 0xC357; // LATIN CAPITAL LETTER W WITH CIRCUMFLEX case 0x0175: return 0xC377; // LATIN SMALL LETTER W WITH CIRCUMFLEX case 0x0176: return 0xC359; // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX case 0x0177: return 0xC379; // LATIN SMALL LETTER Y WITH CIRCUMFLEX case 0x0178: return 0xC859; // LATIN CAPITAL LETTER Y WITH DIAERESIS case 0x0179: return 0xC25A; // LATIN CAPITAL LETTER Z WITH ACUTE case 0x017A: return 0xC27A; // LATIN SMALL LETTER Z WITH ACUTE case 0x017B: return 0xC75A; // LATIN CAPITAL LETTER Z WITH DOT ABOVE case 0x017C: return 0xC77A; // LATIN SMALL LETTER Z WITH DOT ABOVE case 0x017D: return 0xCF5A; // LATIN CAPITAL LETTER Z WITH CARON case 0x017E: return 0xCF7A; // LATIN SMALL LETTER Z WITH CARON case 0x01F5: return 0xC267; // LATIN SMALL LETTER G WITH CEDILLA(4) case 0x02C7: return 0xCF20; // CARON case 0x02D8: return 0xC620; // BREVE case 0x02DA: return 0xCA20; // RING ABOVE case 0x02DB: return 0xCE20; // ogonek case 0x2015: return 0xD0; // 13/00 HORIZONTAL BAR case 0x2018: return 0xA9; // 10/09 LEFT SINGLE QUOTATION MARK case 0x2019: return 0xB9; // 11/09 RIGHT SINGLE QUOTATION MARK case 0x201C: return 0xAA; // 10/10 LEFT DOUBLE QUOTATION MARK case 0x201D: return 0xBA; // 11/10 RIGHT DOUBLE QUOTATION MARK case 0x2117: return 0xD2; // 13/02 REGISTERED SIGN case 0x2126: return 0xE0; // 14/00 OHM SIGN case 0x215B: return 0xDC; // 13/12 VULGAR FRACTION ONE EIGHTH case 0x215E: return 0xDF; // 13/15 VULGAR FRACTION SEVEN EIGHTHS case 0x2190: return 0xAC; // 10/12 LEFTWARDS ARROW case 0x2191: return 0xAD; // 10/13 UPWARDS ARROW case 0x2192: return 0xAE; // 10/14 RIGHTWARDS ARROW case 0x2193: return 0xAF; // 10/15 DOWNWARDS ARROW case 0x266A: return 0xD5; // 13/05 EIGHTH NOTE default: return 0x3F; // if no match, return question mark } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy