All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.server.resource.TikaParsers Maven / Gradle / Ivy

There is a newer version: 3.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.server.resource;

import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParserDecorator;
import org.apache.tika.server.HTMLHelper;
import org.eclipse.jetty.util.ajax.JSON;

/**
 * 

Provides details of all the {@link Parser}s registered with * Apache Tika, similar to --list-parsers and * --list-parser-details within the Tika CLI. */ @Path("/parsers") public class TikaParsers { private static final ParseContext EMPTY_PC = new ParseContext(); private HTMLHelper html; public TikaParsers() { this.html = new HTMLHelper(); } @GET @Path("/details") @Produces("text/html") public String getParserDetailsHTML() { return getParsersHTML(true); } @GET @Produces("text/html") public String getParsersHTML() { return getParsersHTML(false); } protected String getParsersHTML(boolean withMimeTypes) { ParserDetails p = new ParserDetails(TikaResource.getConfig().getParser()); StringBuffer h = new StringBuffer(); html.generateHeader(h, "Parsers available to Apache Tika"); parserAsHTML(p, withMimeTypes, h, 2); html.generateFooter(h); return h.toString(); } private void parserAsHTML(ParserDetails p, boolean withMimeTypes, StringBuffer html, int level) { html.append(""); html.append(p.shortName); html.append(""); html.append("

Class: "); html.append(p.className); html.append("

"); if (p.isDecorated) { html.append("

Decorated Parser"); if (p.decoratedBy != null) html.append( " - ").append(p.decoratedBy); html.append("

"); } if (p.isComposite) { html.append("

Composite Parser

"); html.append("
\n"); for (Parser cp : p.childParsers) { parserAsHTML(new ParserDetails(cp), withMimeTypes, html, level + 1); } html.append("
\n"); } else if (withMimeTypes) { html.append("

Mime Types:"); html.append("

    "); for (MediaType mt : p.supportedTypes) { html.append("
  • "); html.append(mt.toString()); html.append("
  • "); } html.append("
"); html.append("

"); } html.append("\n"); } @GET @Path("/details") @Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON) public String getParserDetailsJSON() { return getParsersJSON(true); } @GET @Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON) public String getParsersJSON() { return getParsersJSON(false); } protected String getParsersJSON(boolean withMimeTypes) { Map details = new HashMap(); parserAsMap(new ParserDetails(TikaResource.getConfig().getParser()), withMimeTypes, details); return JSON.toString(details); } private void parserAsMap(ParserDetails p, boolean withMimeTypes, Map details) { details.put("name", p.className); details.put("composite", p.isComposite); details.put("decorated", p.isDecorated); if (p.isComposite) { List> c = new ArrayList>(); for (Parser cp : p.childParsers) { Map cdet = new HashMap(); parserAsMap(new ParserDetails(cp), withMimeTypes, cdet); c.add(cdet); } details.put("children", c); } else if (withMimeTypes) { List mts = new ArrayList(p.supportedTypes.size()); for (MediaType mt : p.supportedTypes) { mts.add(mt.toString()); } details.put("supportedTypes", mts); } } @GET @Path("/details") @Produces("text/plain") public String getParserDetailssPlain() { return getParsersPlain(true); } @GET @Produces("text/plain") public String getParsersPlain() { return getParsersPlain(false); } protected String getParsersPlain(boolean withMimeTypes) { StringBuffer text = new StringBuffer(); renderParser(new ParserDetails(TikaResource.getConfig().getParser()), withMimeTypes, text, ""); return text.toString(); } private void renderParser(ParserDetails p, boolean withMimeTypes, StringBuffer text, String indent) { String nextIndent = indent + " "; text.append(indent); text.append(p.className); if (p.isDecorated) { text.append(" (Decorated Parser"); if (p.decoratedBy != null) { text.append(" ").append(p.decoratedBy); } text.append(")"); } if (p.isComposite) { text.append(" (Composite Parser):\n"); for (Parser cp : p.childParsers) { renderParser(new ParserDetails(cp), withMimeTypes, text, nextIndent); } } else { text.append("\n"); if (withMimeTypes) { for (MediaType mt : p.supportedTypes) { text.append(nextIndent); text.append("Supports: "); text.append(mt.toString()); text.append("\n"); } } } } private static class ParserDetails { private String className; private String shortName; private boolean isComposite; private boolean isDecorated; private String decoratedBy; private Set supportedTypes; private List childParsers; private ParserDetails(Parser p) { if (p instanceof ParserDecorator) { isDecorated = true; decoratedBy = ((ParserDecorator)p).getDecorationName(); p = ((ParserDecorator)p).getWrappedParser(); } className = p.getClass().getName(); shortName = className.substring(className.lastIndexOf('.') + 1); if (p instanceof CompositeParser) { isComposite = true; supportedTypes = Collections.emptySet(); // Get the unique set of child parsers Set children = new HashSet( ((CompositeParser) p).getParsers(EMPTY_PC).values()); // Sort it by class name childParsers = new ArrayList(children); Collections.sort(childParsers, new Comparator() { @Override public int compare(Parser p1, Parser p2) { return p1.getClass().getName().compareTo(p2.getClass().getName()); } }); } else { supportedTypes = p.getSupportedTypes(EMPTY_PC); } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy