![JAR search and dependency download from the Maven repository](/logo.png)
eu.fbk.twm.wiki.xmldump.util.ParsedPageLink Maven / Gradle / Ivy
/*
* Copyright (2013) Fondazione Bruno Kessler (http://www.fbk.eu/)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.fbk.twm.wiki.xmldump.util;
import de.tudarmstadt.ukp.wikipedia.parser.Link;
import de.tudarmstadt.ukp.wikipedia.parser.Span;
import eu.fbk.twm.utils.AbstractParsedPage;
import eu.fbk.twm.utils.CharacterTable;
import eu.fbk.twm.utils.StringTable;
import org.apache.log4j.Logger;
/**
* Created with IntelliJ IDEA.
* User: giuliano
* Date: 1/18/13
* Time: 9:31 AM
* To change this template use File | Settings | File Templates.
*/
public class ParsedPageLink extends AbstractParsedPage {
/**
* Define a static logger variable so that it references the
* Logger instance named ParsedPageLink
.
*/
static Logger logger = Logger.getLogger(ParsedPageLink.class.getName());
//private String page;
//private String form;
private String leftContext;
private String rightContext;
public final static String START_SUFFIX_PATTERN = " (";
public final static char END_SUFFIX_PATTERN = CharacterTable.RIGHT_PARENTHESIS;
public ParsedPageLink(Link link) {
if (link.getTarget() != null) {
page = normalizePageName(link.getTarget().trim());
}
else {
page = StringTable.EMPTY_STRING;
}
if (link.getText() != null) {
form = removeSuffix(removeQuotes(link.getText().trim()));
}
String context = link.getHomeElement().getText();
if (context != null) {
Span span = link.getPos();
leftContext = context.substring(0, span.getStart());
rightContext = context.substring(span.getEnd(), context.length());
}
}
public String getLeftContext() {
return leftContext;
}
public void setLeftContext(String leftContext) {
this.leftContext = leftContext;
}
public String getRightContext() {
return rightContext;
}
public void setRightContext(String rightContext) {
this.rightContext = rightContext;
}
/*public String getPage() {
return page;
}
public void setPage(String page) {
this.page = page;
}
public String getForm() {
return form;
}
public void setForm(String form) {
this.form = form;
} */
private String removeQuotes(String s) {
if (s.length() < 3) {
return s;
}
if (s.charAt(0) == CharacterTable.QUOTATION_MARK && s.charAt(s.length() - 1) == CharacterTable.QUOTATION_MARK) {
return removeQuotes(s.substring(1, s.length() - 1));
}
else if (s.charAt(0) == CharacterTable.APOSTROPHE && s.charAt(s.length() - 1) == CharacterTable.APOSTROPHE) {
return removeQuotes(s.substring(1, s.length() - 1));
}
else if (s.charAt(0) == CharacterTable.LEFT_PARENTHESIS && s.charAt(s.length() - 1) == CharacterTable.RIGHT_PARENTHESIS) {
return removeQuotes(s.substring(1, s.length() - 1));
}
return s;
}
private String removeSuffix(String s) {
int b = s.lastIndexOf(START_SUFFIX_PATTERN);
if (b != -1) {
int e = s.length() - 1;
if (s.charAt(e) == END_SUFFIX_PATTERN) {
return s.substring(0, b);
}
}
return s;
}
/*public boolean hasCompliantForm() {
if (form.length() == 0) {
return false;
}
return true;
}
public boolean hasCompliantPage() {
if (page.length() == 0) {
return false;
}
return true;
}
public boolean isCompliant() {
return hasCompliantPage() && hasCompliantForm();
} */
@Override
public String toString() {
return "ParsedPageLink{" +
"page='" + page + '\'' +
", form='" + form + '\'' +
", leftContext='" + leftContext + '\'' +
", rightContext='" + rightContext + '\'' +
'}';
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy