com.digitalpebble.stormcrawler.tika.XMLCharacterRecognizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of storm-crawler-tika Show documentation
Show all versions of storm-crawler-tika Show documentation
Tika-based parser bolt for StormCrawler
/**
* Licensed to DigitalPebble Ltd under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* DigitalPebble licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* XXX NUTCH COMMENT
* XXX [email protected]: This class is copied verbatim from Xalan-J 2.6.0
* XXX distribution, org.apache.xml.utils.DOMBuilder, in order to
* avoid dependency on Xalan.
*/
package com.digitalpebble.stormcrawler.tika;
/**
* Class used to verify whether the specified ch conforms to the XML
* 1.0 definition of whitespace.
*/
class XMLCharacterRecognizer {
private XMLCharacterRecognizer() {
}
/**
* Returns whether the specified ch conforms to the XML 1.0
* definition of whitespace. Refer to the definition of
* S
for details.
*
* @param ch
* Character to check as XML whitespace.
* @return =true if ch is XML whitespace; otherwise =false.
*/
static boolean isWhiteSpace(char ch) {
return (ch == 0x20) || (ch == 0x09) || (ch == 0xD) || (ch == 0xA);
}
/**
* Tell if the string is whitespace.
*
* @param ch
* Character array to check as XML whitespace.
* @param start
* Start index of characters in the array
* @param length
* Number of characters in the array
* @return True if the characters in the array are XML whitespace;
* otherwise, false.
*/
static boolean isWhiteSpace(char ch[], int start, int length) {
int end = start + length;
for (int s = start; s < end; s++) {
if (!isWhiteSpace(ch[s]))
return false;
}
return true;
}
/**
* Tell if the string is whitespace.
*
* @param buf
* StringBuffer to check as XML whitespace.
* @return True if characters in buffer are XML whitespace, false otherwise
*/
static boolean isWhiteSpace(StringBuffer buf) {
int n = buf.length();
for (int i = 0; i < n; i++) {
if (!isWhiteSpace(buf.charAt(i)))
return false;
}
return true;
}
/**
* Tell if the string is whitespace.
*
* @param s
* String to check as XML whitespace.
* @return True if characters in buffer are XML whitespace, false otherwise
*/
static boolean isWhiteSpace(String s) {
if (null != s) {
int n = s.length();
for (int i = 0; i < n; i++) {
if (!isWhiteSpace(s.charAt(i)))
return false;
}
}
return true;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy