All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.serialization.pipes.JsonFetchEmitTuple Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.serialization.pipes;

import static org.apache.tika.serialization.ParseContextSerializer.PARSE_CONTEXT;

import java.io.IOException;
import java.io.Reader;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Iterator;
import java.util.Locale;
import java.util.Map;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;

import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.pipes.FetchEmitTuple;
import org.apache.tika.pipes.emitter.EmitKey;
import org.apache.tika.pipes.fetcher.FetchKey;
import org.apache.tika.serialization.JsonMetadata;
import org.apache.tika.serialization.ParseContextDeserializer;
import org.apache.tika.serialization.ParseContextSerializer;
import org.apache.tika.utils.StringUtils;

public class JsonFetchEmitTuple {

    public static final String ID = "id";
    public static final String FETCHER = "fetcher";
    public static final String FETCHKEY = "fetchKey";
    public static final String FETCH_RANGE_START = "fetchRangeStart";
    public static final String FETCH_RANGE_END = "fetchRangeEnd";
    public static final String EMITTER = "emitter";
    public static final String EMITKEY = "emitKey";
    public static final String METADATAKEY = "metadata";
    public static final String ON_PARSE_EXCEPTION = "onParseException";

    public static FetchEmitTuple fromJson(Reader reader) throws IOException {
        JsonNode root = new ObjectMapper().readTree(reader);
        return parseFetchEmitTuple(root);
    }


    static FetchEmitTuple parseFetchEmitTuple(JsonNode root) throws IOException {
        String id = readVal(ID, root, null, true);
        String fetcherName = readVal(FETCHER, root, null, true);
        String fetchKey = readVal(FETCHKEY, root, null, true);
        String emitterName = readVal(EMITTER, root, "", false);
        String emitKey = readVal(EMITKEY, root, "", false);
        long fetchRangeStart = readLong(FETCH_RANGE_START, root, -1l, false);
        long fetchRangeEnd = readLong(FETCH_RANGE_END, root, -1l, false);
        Metadata metadata = readMetadata(root);
        JsonNode parseContextNode = root.get(PARSE_CONTEXT);
        ParseContext parseContext = parseContextNode == null ? new ParseContext() : ParseContextDeserializer.readParseContext(parseContextNode);
        FetchEmitTuple.ON_PARSE_EXCEPTION onParseException = readOnParseException(root);

        return new FetchEmitTuple(id, new FetchKey(fetcherName, fetchKey, fetchRangeStart, fetchRangeEnd), new EmitKey(emitterName, emitKey), metadata, parseContext,
                onParseException);
    }

    private static FetchEmitTuple.ON_PARSE_EXCEPTION readOnParseException(JsonNode root) throws IOException {
        JsonNode onParseExNode = root.get(ON_PARSE_EXCEPTION);
        if (onParseExNode == null) {
            return FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT;
        }
        String txt = onParseExNode.asText();
        if ("skip".equalsIgnoreCase(txt)) {
            return FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP;
        } else if ("emit".equalsIgnoreCase(txt)) {
            return FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT;
        } else {
            throw new IOException(ON_PARSE_EXCEPTION + " must be either 'skip' or 'emit'");
        }
    }

    private static Metadata readMetadata(JsonNode root) {
        JsonNode metadataNode = root.get(METADATAKEY);
        if (metadataNode == null) {
            return new Metadata();
        }
        Metadata metadata = new Metadata();
        Iterator> it = metadataNode.fields();
        while (it.hasNext()) {
            Map.Entry e = it.next();
            JsonNode vals = e.getValue();
            String k = e.getKey();
            if (vals.isArray()) {
                Iterator arrIt = vals.iterator();
                while (arrIt.hasNext()) {
                    JsonNode arrVal = arrIt.next();
                    metadata.add(k, arrVal.textValue());
                }
            } else {
                metadata.set(k, vals.asText());
            }
        }
        return metadata;
    }

    private static String readVal(String key, JsonNode jsonObj, String defaultRet, boolean isRequired) throws IOException {
        JsonNode valNode = jsonObj.get(key);
        if (valNode == null) {
            if (isRequired) {
                throw new IOException("required value string, but see: " + key);
            }
            return defaultRet;
        }
        return valNode.asText();
    }

    private static long readLong(String key, JsonNode jsonObj, long defaultVal, boolean isRequired) throws IOException {
        JsonNode val = jsonObj.get(key);
        if (val == null) {
            if (isRequired) {
                throw new IOException("required value long, but see: " + key);
            }
            return defaultVal;
        }
        return val.longValue();
    }

    public static String toJson(FetchEmitTuple t) throws IOException {
        StringWriter writer = new StringWriter();
        toJson(t, writer);
        return writer.toString();
    }

    public static void toJson(FetchEmitTuple t, Writer writer) throws IOException {

        try (JsonGenerator jsonGenerator = new JsonFactory().createGenerator(writer)) {
            writeTuple(t, jsonGenerator);
        }
    }

    static void writeTuple(FetchEmitTuple t, JsonGenerator jsonGenerator) throws IOException {
        jsonGenerator.writeStartObject();
        jsonGenerator.writeStringField(ID, t.getId());
        jsonGenerator.writeStringField(FETCHER, t
                .getFetchKey()
                .getFetcherName());
        jsonGenerator.writeStringField(FETCHKEY, t
                .getFetchKey()
                .getFetchKey());
        if (t
                .getFetchKey()
                .hasRange()) {
            jsonGenerator.writeNumberField(FETCH_RANGE_START, t
                    .getFetchKey()
                    .getRangeStart());
            jsonGenerator.writeNumberField(FETCH_RANGE_END, t
                    .getFetchKey()
                    .getRangeEnd());
        }
        jsonGenerator.writeStringField(EMITTER, t
                .getEmitKey()
                .getEmitterName());
        if (!StringUtils.isBlank(t
                .getEmitKey()
                .getEmitKey())) {
            jsonGenerator.writeStringField(EMITKEY, t
                    .getEmitKey()
                    .getEmitKey());
        }
        if (t
                .getMetadata()
                .size() > 0) {
            jsonGenerator.writeFieldName(METADATAKEY);
            JsonMetadata.writeMetadataObject(t.getMetadata(), jsonGenerator, false);
        }

        jsonGenerator.writeStringField(ON_PARSE_EXCEPTION, t
                .getOnParseException()
                .name()
                .toLowerCase(Locale.US));
        if (!t
                .getParseContext()
                .isEmpty()) {
            ParseContextSerializer s = new ParseContextSerializer();
            s.serialize(t.getParseContext(), jsonGenerator, null);
        }
        jsonGenerator.writeEndObject();

    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy