Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Copyright (c) 2019 The University of Sheffield.
*
* This file is part of gateplugin-Format_Bdoc
* (see https://github.com/GateNLP/gateplugin-Format_Bdoc).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*/
package gate.lib.basicdocument;
import gate.Annotation;
import gate.Document;
import gate.corpora.DocumentImpl;
import gate.lib.basicdocument.docformats.Format;
import gate.lib.basicdocument.docformats.Saver;
import gate.util.GateRuntimeException;
import java.io.File;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.HashSet;
/**
* Class for building a JsonDocument.
*
* This allows building the document from a GATE document or from scratch,
* optionally limiting the parts to add to the document.
* Once all the parts are ready, this can be used to return the BdocDocument
* instance or directly serialise it as JSON to various destinations.
*
* @author Johann Petrak [email protected]
*/
public class BdocDocumentBuilder {
String text;
HashMap> knownSets = new HashMap<>();
Set includedSetNames = new HashSet<>();
HashMap nextAnnotationIds = new HashMap<>();
HashMap includedFeatures = new HashMap<>();
boolean includePlaceholderSets = false;
String offset_type = "j";
String name = "";
int nextAnnId = 1;
/**
* Tell the builder to create the JsonDocument from a GATE document.
* By default, this will include all features and all annotation sets,
* but this can be changed later.
* Alternately, the JsonDocument can be constructed by adding the parts
* (text, document features, annotation sets) individually.
*
* @param doc the Gate document to build the BdocDocument/JSON from
* @return modified BdocDocumentBuilder
*/
public BdocDocumentBuilder fromGate(Document doc) {
// TODO: for now check that Document is a SimpleDocument
this.text = doc.getContent().toString();
this.name = doc.getName();
if(! (doc instanceof DocumentImpl)) {
throw new GateRuntimeException("Cannot build Bdoc document from something that is not a gate.corpora.DocumentImpl");
}
nextAnnId = ((DocumentImpl)doc).getNextAnnotationId();
knownSets.put("", doc.getAnnotations());
includedSetNames.add("");
for (String name : doc.getNamedAnnotationSets().keySet()) {
knownSets.put(name, doc.getAnnotations(name));
includedSetNames.add(name);
}
BdocUtils.featureMap2Map(doc.getFeatures(), includedFeatures);
return this;
}
/**
* Add an annotation set.
* Same as addSet(name, annset) but the annotation set uses the given
* nextAnnotationId for new annotations.
* NOTE: if the maximum annotation id in the set is bigger than the
* given nextAnnotationId, then the bigger Id is used instead!
*
* @param name the name of the annotation set (this can differ from the
* original name if an annotation set is passed. Must not be null, the
* "default" set uses the empty string as name.
* @param annset a set of annotations, could be an AnnotationSet or a set
* of annotations.
* @param nextAnnotationId the annotation id to start from when new
* annotations are added to this set in the Bdoc document.
* @return modified BdocDocumentBuilder
*/
public BdocDocumentBuilder addSet(String name, Set annset,
int nextAnnotationId) {
knownSets.put(name, annset);
includedSetNames.add(name);
nextAnnotationIds.put(name, nextAnnotationId);
return this;
}
/**
* Add all features from the given (feature) map as document features.
*
* The map can have keys and values of any type, but when adding features,
* the following conversions are carried out: null keys are removed,
* any key that is not a String is converted to String. The value of all
* features should be something that is directly JSON-serialisable, but this
* is not checked. Note that some types can get serialised to JSON but
* will get converted to a different type when read back from JSON!
*
* The user is responsible for making sure that value types work with the
* JSON default serialisation.
*
* If a feature has already been added previously, its old value is
* replaced.
*
* @param fm a map to interpret as a feature map
* @return modified BdocDocumentBuilder
*/
public BdocDocumentBuilder addFeatures(Map