All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.repir.Repository.StringLookupFeature Maven / Gradle / Ivy

The newest version!
package io.github.repir.Repository;

import io.github.repir.tools.extract.Content;
import io.github.repir.EntityReader.MapReduce.TermEntityKey;
import io.github.repir.EntityReader.MapReduce.TermEntityValue;
import io.github.repir.tools.io.Datafile;
import io.github.repir.tools.lib.Log;
import io.github.repir.tools.io.struct.StructuredFile;
import java.io.IOException;
import org.apache.hadoop.mapreduce.Mapper;

/**
 * Generic class for Features that are stored in the repository. Implementations
 * must declare a StructuredFileIntID file, (usually an extension of
 * RecordBinary that ensures records have a unique ID (int)). For performance,
 * the features that are merged with other features should be stored physically
 * sorted on ID. The second declaration is a data type, which can be complex.
 *
 * @author jeroen
 * @param  FileType that extends StructuredFileIntID
 * @param  Data type of the feature
 */
public abstract class StringLookupFeature extends StoredUnreportableFeature implements ReducibleFeature {

    public static Log log = new Log(StringLookupFeature.class);
    public String key;

    public StringLookupFeature(Repository repository, String field, String key) {
        super(repository, field);
        this.key = key;
        //log.info("StringLookupfeature %s %s %s", getCanonicalName(), field, key);
    }

    @Override
    public String getCanonicalName() {
        return canonicalName(getClass(), this.getField(), key);
    }

    public String extract(Content entity) {
        return entity.get(key).getContentStr();
    }

    @Override
    public TermEntityKey createMapOutputKey(int feature, String docname, Content entity) {
        String keyname = extract(entity);
        TermEntityKey t = TermEntityKey.createTermDocKey(0, feature, 0, keyname);
        t.type = TermEntityKey.Type.LOOKUPFEATURE;
        return t;
    }

    TermEntityKey outkey;
    TermEntityValue outvalue = new TermEntityValue();

    @Override
    public void writeMap(Mapper.Context context, int feature, String docname, Content entity) throws IOException, InterruptedException {
        outkey = createMapOutputKey(feature, docname, entity);
        setMapOutputValue(outvalue, entity);
        context.write(outkey, outvalue);
    }

    abstract public void setMapOutputValue(TermEntityValue writer, Content doc);

    public abstract C get(String term);

    @Override
    public abstract F createFile(Datafile datafile);

    @Override
    public void finishReduce() {
        if (file != null) {
            closeWrite();
            file = null;
        }
    }

    @Override
    public void startReduce(int buffersize) {
        getFile().setBufferSize(buffersize);
        openWrite();
    }

    @Override
    public void openRead() {
        getFile().openRead();
    }

    @Override
    public void closeRead() {
        getFile().closeRead();
        file = null;
    }

    public boolean hasNext() {
        return file.hasNext();
    }

    public boolean next() {
        return file.nextRecord();
    }

    public void skip() {
        file.skipRecord();
    }

    public void openWrite() {
        getFile().openWrite();
    }

    public void closeWrite() {
        getFile().closeWrite();
        file = null;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy