All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datavec.nlp.movingwindow.Window Maven / Gradle / Ivy

There is a newer version: 1.0.0-beta7
Show newest version
/*
 *  * Copyright 2016 Skymind, Inc.
 *  *
 *  *    Licensed under the Apache License, Version 2.0 (the "License");
 *  *    you may not use this file except in compliance with the License.
 *  *    You may obtain a copy of the License at
 *  *
 *  *        http://www.apache.org/licenses/LICENSE-2.0
 *  *
 *  *    Unless required by applicable law or agreed to in writing, software
 *  *    distributed under the License is distributed on an "AS IS" BASIS,
 *  *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  *    See the License for the specific language governing permissions and
 *  *    limitations under the License.
 */

package org.datavec.nlp.movingwindow;

import org.apache.commons.lang3.StringUtils;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;


/**
 * A representation of a sliding window.
 * This is used for creating training examples.
 * @author Adam Gibson
 *
 */
public class Window implements Serializable {
	/**
	 * 
	 */
	private static final long serialVersionUID = 6359906393699230579L;
	private List words;
	private String label = "NONE";
	private boolean beginLabel;
	private boolean endLabel;
  private int median;
	private static String BEGIN_LABEL = "<([A-Z]+|\\d+)>";
	private static String END_LABEL = "";
    private int begin,end;

	/**
	 * Creates a window with a context of size 3
	 * @param words a collection of strings of size 3
	 */
	public Window(Collection words,int begin,int end) {
		this(words,5,begin,end);

	}

	public String asTokens() {
		return StringUtils.join(words, " ");
	}


	/**
	 * Initialize a window with the given size
	 * @param words the words to use 
	 * @param windowSize the size of the window
     * @param begin the begin index for the window
     * @param end the end index for the window
	 */
	public Window(Collection words, int windowSize,int begin,int end) {
		if(words == null)
			throw new IllegalArgumentException("Words must be a list of size 3");

		this.words = new ArrayList<>(words);
    int windowSize1 = windowSize;
        this.begin = begin;
        this.end = end;
		initContext();
	}


	private void initContext() {
		int median = (int) Math.floor(words.size() / 2);
		List begin = words.subList(0, median);
		List after = words.subList(median + 1,words.size());


		for(String s : begin) {
			if(s.matches(BEGIN_LABEL)) {
				this.label = s.replaceAll("(<|>)","").replace("/","");
				beginLabel = true;
			}
			else if(s.matches(END_LABEL)) {
				endLabel = true;
				this.label = s.replaceAll("(<|>|/)","").replace("/","");

			}

		}

		for(String s1 : after) {

            if(s1.matches(BEGIN_LABEL)) {
                this.label = s1.replaceAll("(<|>)","").replace("/","");
                beginLabel = true;
            }

			if(s1.matches(END_LABEL)) {
				endLabel = true;
				this.label = s1.replaceAll("(<|>)","");

			}
		}
		this.median = median;

	}


	
	
	
	@Override
	public String toString() {
		return words.toString();
	}

	public List getWords() {
		return words;
	}

	public void setWords(List words) {
		this.words = words;
	}

	public String getWord(int i) {
		return words.get(i);
	}

	public String getFocusWord() {
		return words.get(median);
	}

	public boolean isBeginLabel() {
		return !label.equals("NONE") && beginLabel;
	}

	public boolean isEndLabel() {
		return !label.equals("NONE") && endLabel;
	}

	public String getLabel() {
		return label.replace("/","");
	}

	public int getWindowSize() {
		return words.size();
	}

	public int getMedian() {
		return median;
	}

	public void setLabel(String label) {
		this.label = label;
	}

    public int getBegin() {
        return begin;
    }

    public void setBegin(int begin) {
        this.begin = begin;
    }

    public int getEnd() {
        return end;
    }

    public void setEnd(int end) {
        this.end = end;
    }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy