org.apache.fop.complexscripts.scripts.ArabicScriptProcessor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of org.apache.fop Show documentation
Show all versions of org.apache.fop Show documentation
The core maven build properties
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* $Id$ */
package org.apache.fop.complexscripts.scripts;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.fop.complexscripts.bidi.BidiClass;
import org.apache.fop.complexscripts.bidi.BidiConstants;
import org.apache.fop.complexscripts.fonts.GlyphDefinitionTable;
import org.apache.fop.complexscripts.util.CharAssociation;
import org.apache.fop.complexscripts.util.GlyphContextTester;
import org.apache.fop.complexscripts.util.GlyphSequence;
import org.apache.fop.complexscripts.util.ScriptContextTester;
import org.apache.fop.util.CharUtilities;
// CSOFF: LineLengthCheck
/**
* The ArabicScriptProcessor
class implements a script processor for
* performing glyph substitution and positioning operations on content associated with the Arabic script.
*
* This work was originally authored by Glenn Adams ([email protected]).
*/
public class ArabicScriptProcessor extends DefaultScriptProcessor {
/** logging instance */
private static final Log log = LogFactory.getLog(ArabicScriptProcessor.class);
/** features to use for substitutions */
private static final String[] GSUB_FEATURES =
{
"calt", // contextual alternates
"ccmp", // glyph composition/decomposition
"fina", // final (terminal) forms
"init", // initial forms
"isol", // isolated formas
"liga", // standard ligatures
"medi", // medial forms
"rlig" // required ligatures
};
/** features to use for positioning */
private static final String[] GPOS_FEATURES =
{
"curs", // cursive positioning
"kern", // kerning
"mark", // mark to base or ligature positioning
"mkmk" // mark to mark positioning
};
private static class SubstitutionScriptContextTester implements ScriptContextTester {
private static Map testerMap = new HashMap();
static {
testerMap.put("fina", new GlyphContextTester() {
public boolean test(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
return inFinalContext(script, language, feature, gs, index, flags);
}
});
testerMap.put("init", new GlyphContextTester() {
public boolean test(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
return inInitialContext(script, language, feature, gs, index, flags);
}
});
testerMap.put("isol", new GlyphContextTester() {
public boolean test(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
return inIsolateContext(script, language, feature, gs, index, flags);
}
});
testerMap.put("liga", new GlyphContextTester() {
public boolean test(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
return inLigatureContext(script, language, feature, gs, index, flags);
}
});
testerMap.put("medi", new GlyphContextTester() {
public boolean test(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
return inMedialContext(script, language, feature, gs, index, flags);
}
});
}
public GlyphContextTester getTester(String feature) {
return testerMap.get(feature);
}
}
private static class PositioningScriptContextTester implements ScriptContextTester {
private static Map testerMap = new HashMap();
public GlyphContextTester getTester(String feature) {
return testerMap.get(feature);
}
}
private final ScriptContextTester subContextTester;
private final ScriptContextTester posContextTester;
ArabicScriptProcessor(String script) {
super(script);
this.subContextTester = new SubstitutionScriptContextTester();
this.posContextTester = new PositioningScriptContextTester();
}
/** {@inheritDoc} */
public String[] getSubstitutionFeatures() {
return GSUB_FEATURES;
}
/** {@inheritDoc} */
public ScriptContextTester getSubstitutionContextTester() {
return subContextTester;
}
/** {@inheritDoc} */
public String[] getPositioningFeatures() {
return GPOS_FEATURES;
}
/** {@inheritDoc} */
public ScriptContextTester getPositioningContextTester() {
return posContextTester;
}
/** {@inheritDoc} */
@Override
public GlyphSequence reorderCombiningMarks(GlyphDefinitionTable gdef, GlyphSequence gs, int[] widths, int[][] gpa, String script, String language) {
// a side effect of BIDI reordering is to order combining marks before their base, so we need to override the default here to
// prevent double reordering
return gs;
}
private static boolean inFinalContext(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
CharAssociation a = gs.getAssociation(index);
int[] ca = gs.getCharacterArray(false);
int nc = gs.getCharacterCount();
if (nc == 0) {
return false;
} else {
int s = a.getStart();
int e = a.getEnd();
if (!hasFinalPrecedingContext(ca, nc, s, e)) {
return false;
} else if (!hasFinalThisContext(ca, nc, s, e)) {
return false;
} else if (forceFinalThisContext(ca, nc, s, e)) {
return true;
} else if (!hasFinalSucceedingContext(ca, nc, s, e)) {
return false;
} else {
return true;
}
}
}
private static boolean inInitialContext(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
CharAssociation a = gs.getAssociation(index);
int[] ca = gs.getCharacterArray(false);
int nc = gs.getCharacterCount();
if (nc == 0) {
return false;
} else {
int s = a.getStart();
int e = a.getEnd();
if (!hasInitialPrecedingContext(ca, nc, s, e)) {
return false;
} else if (!hasInitialThisContext(ca, nc, s, e)) {
return false;
} else if (!hasInitialSucceedingContext(ca, nc, s, e)) {
return false;
} else {
return true;
}
}
}
private static boolean inIsolateContext(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
CharAssociation a = gs.getAssociation(index);
int nc = gs.getCharacterCount();
if (nc == 0) {
return false;
} else if ((a.getStart() == 0) && (a.getEnd() == nc)) {
return true;
} else {
return false;
}
}
private static boolean inLigatureContext(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
CharAssociation a = gs.getAssociation(index);
int[] ca = gs.getCharacterArray(false);
int nc = gs.getCharacterCount();
if (nc == 0) {
return false;
} else {
int s = a.getStart();
int e = a.getEnd();
if (!hasLigaturePrecedingContext(ca, nc, s, e)) {
return false;
} else if (!hasLigatureSucceedingContext(ca, nc, s, e)) {
return false;
} else {
return true;
}
}
}
private static boolean inMedialContext(String script, String language, String feature, GlyphSequence gs, int index, int flags) {
CharAssociation a = gs.getAssociation(index);
int[] ca = gs.getCharacterArray(false);
int nc = gs.getCharacterCount();
if (nc == 0) {
return false;
} else {
int s = a.getStart();
int e = a.getEnd();
if (!hasMedialPrecedingContext(ca, nc, s, e)) {
return false;
} else if (!hasMedialThisContext(ca, nc, s, e)) {
return false;
} else if (!hasMedialSucceedingContext(ca, nc, s, e)) {
return false;
} else {
return true;
}
}
}
private static boolean hasFinalPrecedingContext(int[] ca, int nc, int s, int e) {
int chp = 0; // preceding non-NSM char in [0,s) searching back from s
int clp = 0;
for (int i = s; i > 0; i--) {
int k = i - 1;
if ((k >= 0) && (k < nc)) {
chp = ca [ k ];
clp = BidiClass.getBidiClass(chp);
if (clp != BidiConstants.NSM) {
break;
}
}
}
if (clp != BidiConstants.AL) {
return isZWJ(chp);
} else if (hasIsolateInitial(chp)) {
return false;
} else {
return true;
}
}
private static boolean hasFinalThisContext(int[] ca, int nc, int s, int e) {
int chl = 0; // last non-{NSM,ZWJ} char in [s,e)
int cll = 0;
for (int i = 0, n = e - s; i < n; i++) {
int k = n - i - 1;
int j = s + k;
if ((j >= 0) && (j < nc)) {
chl = ca [ j ];
cll = BidiClass.getBidiClass(chl);
if ((cll != BidiConstants.NSM) && !isZWJ(chl)) {
break;
}
}
}
if (cll != BidiConstants.AL) {
return false;
}
if (hasIsolateFinal(chl)) {
return false;
} else {
return true;
}
}
private static boolean forceFinalThisContext(int[] ca, int nc, int s, int e) {
int chl = 0; // last non-{NSM,ZWJ} char in [s,e)
int cll = 0;
for (int i = 0, n = e - s; i < n; i++) {
int k = n - i - 1;
int j = s + k;
if ((j >= 0) && (j < nc)) {
chl = ca [ j ];
cll = BidiClass.getBidiClass(chl);
if ((cll != BidiConstants.NSM) && !isZWJ(chl)) {
break;
}
}
}
if (cll != BidiConstants.AL) {
return false;
}
if (hasIsolateInitial(chl)) {
return true;
} else {
return false;
}
}
private static boolean hasFinalSucceedingContext(int[] ca, int nc, int s, int e) {
int chs = 0; // succeeding non-NSM char in [e,nc) searching forward from e
int cls = 0;
for (int i = e, n = nc; i < n; i++) {
chs = ca [ i ];
cls = BidiClass.getBidiClass(chs);
if (cls != BidiConstants.NSM) {
break;
}
}
if (cls != BidiConstants.AL) {
return !isZWJ(chs);
} else if (hasIsolateFinal(chs)) {
return true;
} else {
return false;
}
}
private static boolean hasInitialPrecedingContext(int[] ca, int nc, int s, int e) {
int chp = 0; // preceding non-NSM char in [0,s) searching back from s
int clp = 0;
for (int i = s; i > 0; i--) {
int k = i - 1;
if ((k >= 0) && (k < nc)) {
chp = ca [ k ];
clp = BidiClass.getBidiClass(chp);
if (clp != BidiConstants.NSM) {
break;
}
}
}
if (clp != BidiConstants.AL) {
return !isZWJ(chp);
} else if (hasIsolateInitial(chp)) {
return true;
} else {
return false;
}
}
private static boolean hasInitialThisContext(int[] ca, int nc, int s, int e) {
int chf = 0; // first non-{NSM,ZWJ} char in [s,e)
int clf = 0;
for (int i = 0, n = e - s; i < n; i++) {
int k = s + i;
if ((k >= 0) && (k < nc)) {
chf = ca [ s + i ];
clf = BidiClass.getBidiClass(chf);
if ((clf != BidiConstants.NSM) && !isZWJ(chf)) {
break;
}
}
}
if (clf != BidiConstants.AL) {
return false;
}
if (hasIsolateInitial(chf)) {
return false;
} else {
return true;
}
}
private static boolean hasInitialSucceedingContext(int[] ca, int nc, int s, int e) {
int chs = 0; // succeeding non-NSM char in [e,nc) searching forward from e
int cls = 0;
for (int i = e, n = nc; i < n; i++) {
chs = ca [ i ];
cls = BidiClass.getBidiClass(chs);
if (cls != BidiConstants.NSM) {
break;
}
}
if (cls != BidiConstants.AL) {
return isZWJ(chs);
} else if (hasIsolateFinal(chs)) {
return false;
} else {
return true;
}
}
private static boolean hasMedialPrecedingContext(int[] ca, int nc, int s, int e) {
int chp = 0; // preceding non-NSM char in [0,s) searching back from s
int clp = 0;
for (int i = s; i > 0; i--) {
int k = i - 1;
if ((k >= 0) && (k < nc)) {
chp = ca [ k ];
clp = BidiClass.getBidiClass(chp);
if (clp != BidiConstants.NSM) {
break;
}
}
}
if (clp != BidiConstants.AL) {
return isZWJ(chp);
} else if (hasIsolateInitial(chp)) {
return false;
} else {
return true;
}
}
private static boolean hasMedialThisContext(int[] ca, int nc, int s, int e) {
int chf = 0; // first non-{NSM,ZWJ} char in [s,e)
int clf = 0;
for (int i = 0, n = e - s; i < n; i++) {
int k = s + i;
if ((k >= 0) && (k < nc)) {
chf = ca [ s + i ];
clf = BidiClass.getBidiClass(chf);
if ((clf != BidiConstants.NSM) && !isZWJ(chf)) {
break;
}
}
}
if (clf != BidiConstants.AL) {
return false;
}
int chl = 0; // last non-{NSM,ZWJ} char in [s,e)
int cll = 0;
for (int i = 0, n = e - s; i < n; i++) {
int k = n - i - 1;
int j = s + k;
if ((j >= 0) && (j < nc)) {
chl = ca [ j ];
cll = BidiClass.getBidiClass(chl);
if ((cll != BidiConstants.NSM) && !isZWJ(chl)) {
break;
}
}
}
if (cll != BidiConstants.AL) {
return false;
}
if (hasIsolateFinal(chf)) {
return false;
} else if (hasIsolateInitial(chl)) {
return false;
} else {
return true;
}
}
private static boolean hasMedialSucceedingContext(int[] ca, int nc, int s, int e) {
int chs = 0; // succeeding non-NSM char in [e,nc) searching forward from e
int cls = 0;
for (int i = e, n = nc; i < n; i++) {
chs = ca [ i ];
cls = BidiClass.getBidiClass(chs);
if (cls != BidiConstants.NSM) {
break;
}
}
if (cls != BidiConstants.AL) {
return isZWJ(chs);
} else if (hasIsolateFinal(chs)) {
return false;
} else {
return true;
}
}
private static boolean hasLigaturePrecedingContext(int[] ca, int nc, int s, int e) {
return true;
}
private static boolean hasLigatureSucceedingContext(int[] ca, int nc, int s, int e) {
int chs = 0; // succeeding non-NSM char in [e,nc) searching forward from e
int cls = 0;
for (int i = e, n = nc; i < n; i++) {
chs = ca [ i ];
cls = BidiClass.getBidiClass(chs);
// TBD - does ZWJ have impact here?
if (cls != BidiConstants.NSM) {
break;
}
}
if (cls == BidiConstants.AL) {
return true;
} else {
return false;
}
}
/**
* Ordered array of Unicode scalars designating those Arabic (Script) Letters
* which exhibit an isolated form in word initial position.
*/
private static final int[] ISOLATED_INITIALS = {
0x0621, // HAMZA
0x0622, // ALEF WITH MADDA ABOVE
0x0623, // ALEF WITH HAMZA ABOVE
0x0624, // WAW WITH HAMZA ABOVE
0x0625, // ALEF WITH HAMZA BELOWW
0x0627, // ALEF
0x062F, // DAL
0x0630, // THAL
0x0631, // REH
0x0632, // ZAIN
0x0648, // WAW
0x0671, // ALEF WASLA
0x0672, // ALEF WITH WAVY HAMZA ABOVE
0x0673, // ALEF WITH WAVY HAMZA BELOW
0x0675, // HIGH HAMZA ALEF
0x0676, // HIGH HAMZA WAW
0x0677, // U WITH HAMZA ABOVE
0x0688, // DDAL
0x0689, // DAL WITH RING
0x068A, // DAL WITH DOT BELOW
0x068B, // DAL WITH DOT BELOW AND SMALL TAH
0x068C, // DAHAL
0x068D, // DDAHAL
0x068E, // DUL
0x068F, // DUL WITH THREE DOTS ABOVE DOWNWARDS
0x0690, // DUL WITH FOUR DOTS ABOVE
0x0691, // RREH
0x0692, // REH WITH SMALL V
0x0693, // REH WITH RING
0x0694, // REH WITH DOT BELOW
0x0695, // REH WITH SMALL V BELOW
0x0696, // REH WITH DOT BELOW AND DOT ABOVE
0x0697, // REH WITH TWO DOTS ABOVE
0x0698, // JEH
0x0699, // REH WITH FOUR DOTS ABOVE
0x06C4, // WAW WITH RING
0x06C5, // KIRGHIZ OE
0x06C6, // OE
0x06C7, // U
0x06C8, // YU
0x06C9, // KIRGHIZ YU
0x06CA, // WAW WITH TWO DOTS ABOVE
0x06CB, // VE
0x06CF, // WAW WITH DOT ABOVE
0x06EE, // DAL WITH INVERTED V
0x06EF // REH WITH INVERTED V
};
private static boolean hasIsolateInitial(int ch) {
return Arrays.binarySearch(ISOLATED_INITIALS, ch) >= 0;
}
/**
* Ordered array of Unicode scalars designating those Arabic (Script) Letters
* which exhibit an isolated form in word final position.
*/
private static final int[] ISOLATED_FINALS = {
0x0621 // HAMZA
};
private static boolean hasIsolateFinal(int ch) {
return Arrays.binarySearch(ISOLATED_FINALS, ch) >= 0;
}
private static boolean isZWJ(int ch) {
return ch == CharUtilities.ZERO_WIDTH_JOINER;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy