org.apache.tika.server.resource.TikaParsers Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.server.resource;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParserDecorator;
import org.apache.tika.server.HTMLHelper;
import org.eclipse.jetty.util.ajax.JSON;
/**
* Provides details of all the {@link Parser}s registered with
* Apache Tika, similar to --list-parsers and
* --list-parser-details within the Tika CLI.
*/
@Path("/parsers")
public class TikaParsers {
private static final ParseContext EMPTY_PC = new ParseContext();
private HTMLHelper html;
public TikaParsers() {
this.html = new HTMLHelper();
}
@GET
@Path("/details")
@Produces("text/html")
public String getParserDetailsHTML() {
return getParsersHTML(true);
}
@GET
@Produces("text/html")
public String getParsersHTML() {
return getParsersHTML(false);
}
protected String getParsersHTML(boolean withMimeTypes) {
ParserDetails p = new ParserDetails(TikaResource.getConfig().getParser());
StringBuffer h = new StringBuffer();
html.generateHeader(h, "Parsers available to Apache Tika");
parserAsHTML(p, withMimeTypes, h, 2);
html.generateFooter(h);
return h.toString();
}
private void parserAsHTML(ParserDetails p, boolean withMimeTypes, StringBuffer html, int level) {
html.append("");
html.append(p.shortName);
html.append(" ");
html.append("
Class: ");
html.append(p.className);
html.append("
");
if (p.isDecorated) {
html.append("Decorated Parser");
if (p.decoratedBy != null)
html.append( " - ").append(p.decoratedBy);
html.append("
");
}
if (p.isComposite) {
html.append("Composite Parser
");
html.append("\n");
for (Parser cp : p.childParsers) {
parserAsHTML(new ParserDetails(cp), withMimeTypes, html, level + 1);
}
html.append("\n");
} else if (withMimeTypes) {
html.append("Mime Types:");
html.append("
");
for (MediaType mt : p.supportedTypes) {
html.append("- ");
html.append(mt.toString());
html.append("
");
}
html.append("
");
html.append("");
}
html.append("\n");
}
@GET
@Path("/details")
@Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON)
public String getParserDetailsJSON() {
return getParsersJSON(true);
}
@GET
@Produces(javax.ws.rs.core.MediaType.APPLICATION_JSON)
public String getParsersJSON() {
return getParsersJSON(false);
}
protected String getParsersJSON(boolean withMimeTypes) {
Map details = new HashMap();
parserAsMap(new ParserDetails(TikaResource.getConfig().getParser()), withMimeTypes, details);
return JSON.toString(details);
}
private void parserAsMap(ParserDetails p, boolean withMimeTypes, Map details) {
details.put("name", p.className);
details.put("composite", p.isComposite);
details.put("decorated", p.isDecorated);
if (p.isComposite) {
List