com.jaeksoft.searchlib.template.common.parsers.xml Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearchserver Show documentation
Show all versions of opensearchserver Show documentation
OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.
The newest version!
<?xml version="1.0" encoding="UTF-8"?> <!-- License Agreement for OpenSearchServer --> <!-- --> <!-- Copyright (C) 2008-2014 Emmanuel Keller / Jaeksoft --> <!-- --> <!-- This file is part of OpenSearchServer. --> <!-- http://www.open-search-server.com --> <!-- --> <!-- OpenSearchServer is free software: you can --> <!-- redistribute it and/or modify it under the terms of --> <!-- the GNU General Public License as published by the --> <!-- Free Software Foundation, either version 3 of the --> <!-- License, or (at your option) any later version. --> <!-- --> <!-- OpenSearchServer is distributed in the --> <!-- hope that it will be useful, but WITHOUT ANY --> <!-- WARRANTY; without even the implied warranty of --> <!-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. --> <!-- --> <!-- See the GNU General Public License for more details. --> <parsers fileCrawlerDefault="FileSystem parser"> <parser name="FileSystem parser" class="com.jaeksoft.searchlib.parser.FileSystemParser" sizeLimit="0" /> <parser name="DOC parser" class="com.jaeksoft.searchlib.parser.DocParser" sizeLimit="33554432"> <contentType>application/msword</contentType> <extension>doc</extension> <map> <link source="author" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="XLS parser" class="com.jaeksoft.searchlib.parser.XlsParser" sizeLimit="33554432"> <contentType>application/vnd.ms-excel</contentType> <extension>xls</extension> <map> <link source="author" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="DOCX parser" class="com.jaeksoft.searchlib.parser.DocxParser" sizeLimit="33554432"> <contentType>application/vnd.openxmlformats-officedocument.wordprocessingml.document </contentType> <extension>docx</extension> <map> <link source="creator" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="description" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="XLSX parser" class="com.jaeksoft.searchlib.parser.XlsxParser" sizeLimit="33554432"> <contentType>application/vnd.openxmlformats-officedocument.spreadsheetml.sheet </contentType> <extension>xlsx</extension> <map> <link source="creator" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="description" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="HTML parser" class="com.jaeksoft.searchlib.parser.HtmlParser" url_fragment="remove" sizeLimit="33554432" defaultCharset="UTF-8"> <contentType>text/html</contentType> <extension>htm</extension> <extension>html</extension> <extension>xhtml</extension> <map> <link source="body" target="content" /> <link source="meta_description" target="metaDescription" /> <link source="meta_keywords" target="metaKeywords" /> <link source="title" target="title" /> </map> </parser> <parser name="MSG parser" class="com.jaeksoft.searchlib.parser.MapiMsgParser"> <contentType>application/vnd.ms-outlook</contentType> <extension>msg</extension> <map> <link source="content" target="content" /> <link source="htmlSource" target="content"> <removeTag /> </link> <link source="subject" target="title" /> </map> </parser> <parser name="EML parser" class="com.jaeksoft.searchlib.parser.EmlParser"> <contentType>message/rfc822</contentType> <extension>eml</extension> <map> <link source="content" target="content" /> <link source="email_attachment_content" target="content" /> <link source="htmlSource" target="content"> <removeTag /> </link> <link source="subject" target="title" /> </map> </parser> <parser name="PDF parser" class="com.jaeksoft.searchlib.parser.PdfParser" sizeLimit="33554432"> <extension>pdf</extension> <contentType>application/pdf</contentType> <map> <link source="author" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="PPT parser" class="com.jaeksoft.searchlib.parser.PptParser" sizeLimit="33554432"> <extension>ppt</extension> <contentType>application/vnd.ms-powerpoint </contentType> <map> <link source="body" target="content" /> <link source="content" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="PPTX parser" class="com.jaeksoft.searchlib.parser.PptxParser" sizeLimit="33554432"> <extension>pptx</extension> <contentType>application/vnd.openxmlformats-officedocument.presentationml.presentation </contentType> <map> <link source="creator" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="description" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="Visio parser" class="com.jaeksoft.searchlib.parser.VisioParser" sizeLimit="33554432"> <contentType>application/vnd.visio</contentType> <extension>vsd</extension> <map> <link source="author" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="Publisher parser" class="com.jaeksoft.searchlib.parser.PublisherParser" sizeLimit="33554432"> <contentType>application/x-mspublisher</contentType> <extension>pub</extension> <map> <link source="author" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="ODT parser" class="com.jaeksoft.searchlib.parser.OdtParser" sizeLimit="33554432"> <extension>odt</extension> <contentType>application/vnd.oasis.opendocument.text </contentType> <map> <link source="creator" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="ODS/ODF parser" class="com.jaeksoft.searchlib.parser.OdsParser" sizeLimit="33554432"> <extension>odf</extension> <extension>ods</extension> <contentType>application/vnd.oasis.opendocument.spreadsheet </contentType> <contentType>application/vnd.oasis.opendocument.formula </contentType> <map> <link source="creator" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="ODP parser" class="com.jaeksoft.searchlib.parser.OdpParser" sizeLimit="33554432"> <extension>odp</extension> <contentType>application/vnd.oasis.opendocument.presentation </contentType> <map> <link source="creator" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="RTF parser" class="com.jaeksoft.searchlib.parser.RtfParser" sizeLimit="33554432"> <extension>rtf</extension> <contentType>application/rtf</contentType> <map> <link source="content" target="content" /> </map> </parser> <parser name="RSS parser" class="com.jaeksoft.searchlib.parser.RssParser" sizeLimit="33554432"> <contentType>application/rss+xml</contentType> <extension>rss</extension> <map> <link source="description" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="Text parser" class="com.jaeksoft.searchlib.parser.TextParser" sizeLimit="33554432"> <extension>txt</extension> <contentType>text/plain</contentType> <map> <link source="content" target="content" /> </map> </parser> <parser name="Audio parser" class="com.jaeksoft.searchlib.parser.AudioParser" sizeLimit="33554432"> <extension>mp3</extension> <extension>mp4</extension> <extension>m4a</extension> <extension>m4p</extension> <extension>wav</extension> <extension>ogg</extension> <extension>flac</extension> <contentType>audio/mpeg</contentType> <contentType>audio/wav</contentType> <contentType>audio/x-wav</contentType> <contentType>audio/ogg</contentType> <contentType>audio/vorbis</contentType> <contentType>audio/flac</contentType> <contentType>audio/mp4</contentType> <contentType>audio/mp4a-latm</contentType> <contentType>application/ogg</contentType> <map> <link source="title" target="title" /> </map> </parser> <parser name="Torrent parser" class="com.jaeksoft.searchlib.parser.TorrentParser" sizeLimit="33554432"> <extension>torrent</extension> <contentType>application/x-bittorrent</contentType> <map> <link source="title" target="title" /> </map> </parser> <parser name="XML parser" class="com.jaeksoft.searchlib.parser.XmlParser" sizeLimit="33554432"> <extension>xml</extension> <contentType>text/xml</contentType> <contentType>application/xml</contentType> <contentType>application/xhtml+xml</contentType> <map> <link source="content" target="content" /> </map> </parser> </parsers>