All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.fbk.twm.wiki.xmldump.util.ParsedPageLink Maven / Gradle / Ivy

/*
 * Copyright (2013) Fondazione Bruno Kessler (http://www.fbk.eu/)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package eu.fbk.twm.wiki.xmldump.util;

import de.tudarmstadt.ukp.wikipedia.parser.Link;
import de.tudarmstadt.ukp.wikipedia.parser.Span;
import eu.fbk.twm.utils.AbstractParsedPage;
import eu.fbk.twm.utils.CharacterTable;
import eu.fbk.twm.utils.StringTable;
import org.apache.log4j.Logger;

/**
 * Created with IntelliJ IDEA.
 * User: giuliano
 * Date: 1/18/13
 * Time: 9:31 AM
 * To change this template use File | Settings | File Templates.
 */
public class ParsedPageLink extends AbstractParsedPage {
	/**
	 * Define a static logger variable so that it references the
	 * Logger instance named ParsedPageLink.
	 */
	static Logger logger = Logger.getLogger(ParsedPageLink.class.getName());

	//private String page;

	//private String form;

	private String leftContext;

	private String rightContext;

	public final static String START_SUFFIX_PATTERN = " (";

	public final static char END_SUFFIX_PATTERN = CharacterTable.RIGHT_PARENTHESIS;

	public ParsedPageLink(Link link) {
		if (link.getTarget() != null) {
			page = normalizePageName(link.getTarget().trim());
		}
		else {
			page = StringTable.EMPTY_STRING;
		}
		if (link.getText() != null) {
			form = removeSuffix(removeQuotes(link.getText().trim()));
		}

		String context = link.getHomeElement().getText();
		if (context != null) {
			Span span = link.getPos();
			leftContext = context.substring(0, span.getStart());
			rightContext = context.substring(span.getEnd(), context.length());

		}
	}

	public String getLeftContext() {
		return leftContext;
	}

	public void setLeftContext(String leftContext) {
		this.leftContext = leftContext;
	}

	public String getRightContext() {
		return rightContext;
	}

	public void setRightContext(String rightContext) {
		this.rightContext = rightContext;
	}

	/*public String getPage() {
		return page;
	}

	public void setPage(String page) {
		this.page = page;
	}

	public String getForm() {
		return form;
	}

	public void setForm(String form) {
		this.form = form;
	} */


	private String removeQuotes(String s) {
		if (s.length() < 3) {
			return s;
		}

		if (s.charAt(0) == CharacterTable.QUOTATION_MARK && s.charAt(s.length() - 1) == CharacterTable.QUOTATION_MARK) {
			return removeQuotes(s.substring(1, s.length() - 1));
		}
		else if (s.charAt(0) == CharacterTable.APOSTROPHE && s.charAt(s.length() - 1) == CharacterTable.APOSTROPHE) {
			return removeQuotes(s.substring(1, s.length() - 1));
		}
		else if (s.charAt(0) == CharacterTable.LEFT_PARENTHESIS && s.charAt(s.length() - 1) == CharacterTable.RIGHT_PARENTHESIS) {
			return removeQuotes(s.substring(1, s.length() - 1));
		}

		return s;
	}

	private String removeSuffix(String s) {
		int b = s.lastIndexOf(START_SUFFIX_PATTERN);
		if (b != -1) {
			int e = s.length() - 1;
			if (s.charAt(e) == END_SUFFIX_PATTERN) {
				return s.substring(0, b);
			}
		}
		return s;
	}

	/*public boolean hasCompliantForm() {
		if (form.length() == 0) {
			return false;
		}

		return true;
	}

	public boolean hasCompliantPage() {
		if (page.length() == 0) {
			return false;
		}

		return true;
	}

	public boolean isCompliant() {
		return hasCompliantPage() && hasCompliantForm();
	} */

	@Override
	public String toString() {
		return "ParsedPageLink{" +
				"page='" + page + '\'' +
				", form='" + form + '\'' +
				", leftContext='" + leftContext + '\'' +
				", rightContext='" + rightContext + '\'' +
				'}';
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy