
com.jaeksoft.searchlib.template.common.parsers.xml Maven / Gradle / Ivy
<?xml version="1.0" encoding="UTF-8"?> <!-- License Agreement for OpenSearchServer --> <!-- --> <!-- Copyright (C) 2008-2014 Emmanuel Keller / Jaeksoft --> <!-- --> <!-- This file is part of OpenSearchServer. --> <!-- http://www.open-search-server.com --> <!-- --> <!-- OpenSearchServer is free software: you can --> <!-- redistribute it and/or modify it under the terms of --> <!-- the GNU General Public License as published by the --> <!-- Free Software Foundation, either version 3 of the --> <!-- License, or (at your option) any later version. --> <!-- --> <!-- OpenSearchServer is distributed in the --> <!-- hope that it will be useful, but WITHOUT ANY --> <!-- WARRANTY; without even the implied warranty of --> <!-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. --> <!-- --> <!-- See the GNU General Public License for more details. --> <parsers fileCrawlerDefault="FileSystem parser"> <parser name="FileSystem parser" class="com.jaeksoft.searchlib.parser.FileSystemParser" sizeLimit="0" /> <parser name="DOC parser" class="com.jaeksoft.searchlib.parser.DocParser" sizeLimit="33554432"> <contentType>application/msword</contentType> <extension>doc</extension> <map> <link source="author" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="XLS parser" class="com.jaeksoft.searchlib.parser.XlsParser" sizeLimit="33554432"> <contentType>application/vnd.ms-excel</contentType> <extension>xls</extension> <map> <link source="author" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="DOCX parser" class="com.jaeksoft.searchlib.parser.DocxParser" sizeLimit="33554432"> <contentType>application/vnd.openxmlformats-officedocument.wordprocessingml.document </contentType> <extension>docx</extension> <map> <link source="creator" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="description" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="XLSX parser" class="com.jaeksoft.searchlib.parser.XlsxParser" sizeLimit="33554432"> <contentType>application/vnd.openxmlformats-officedocument.spreadsheetml.sheet </contentType> <extension>xlsx</extension> <map> <link source="creator" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="description" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="HTML parser" class="com.jaeksoft.searchlib.parser.HtmlParser" url_fragment="remove" sizeLimit="33554432" defaultCharset="UTF-8"> <contentType>text/html</contentType> <extension>htm</extension> <extension>html</extension> <extension>xhtml</extension> <map> <link source="body" target="content" /> <link source="meta_description" target="metaDescription" /> <link source="meta_keywords" target="metaKeywords" /> <link source="title" target="title" /> </map> </parser> <parser name="MSG parser" class="com.jaeksoft.searchlib.parser.MapiMsgParser"> <contentType>application/vnd.ms-outlook</contentType> <extension>msg</extension> <map> <link source="content" target="content" /> <link source="htmlSource" target="content"> <removeTag /> </link> <link source="subject" target="title" /> </map> </parser> <parser name="PDF parser" class="com.jaeksoft.searchlib.parser.PdfParser" sizeLimit="33554432"> <extension>pdf</extension> <contentType>application/pdf</contentType> <map> <link source="author" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="PPT parser" class="com.jaeksoft.searchlib.parser.PptParser" sizeLimit="33554432"> <extension>ppt</extension> <contentType>application/vnd.ms-powerpoint </contentType> <map> <link source="body" target="content" /> <link source="content" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="PPTX parser" class="com.jaeksoft.searchlib.parser.PptxParser" sizeLimit="33554432"> <extension>pptx</extension> <contentType>application/vnd.openxmlformats-officedocument.presentationml.presentation </contentType> <map> <link source="creator" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="description" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="Visio parser" class="com.jaeksoft.searchlib.parser.VisioParser" sizeLimit="33554432"> <contentType>application/vnd.visio</contentType> <extension>vsd</extension> <map> <link source="author" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="Publisher parser" class="com.jaeksoft.searchlib.parser.PublisherParser" sizeLimit="33554432"> <contentType>application/x-mspublisher</contentType> <extension>pub</extension> <map> <link source="author" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="ODT parser" class="com.jaeksoft.searchlib.parser.OdtParser" sizeLimit="33554432"> <extension>odt</extension> <contentType>application/vnd.oasis.opendocument.text </contentType> <map> <link source="creator" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="ODS/ODF parser" class="com.jaeksoft.searchlib.parser.OdsParser" sizeLimit="33554432"> <extension>odf</extension> <extension>ods</extension> <contentType>application/vnd.oasis.opendocument.spreadsheet </contentType> <contentType>application/vnd.oasis.opendocument.formula </contentType> <map> <link source="creator" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="ODP parser" class="com.jaeksoft.searchlib.parser.OdpParser" sizeLimit="33554432"> <extension>odp</extension> <contentType>application/vnd.oasis.opendocument.presentation </contentType> <map> <link source="creator" target="content" /> <link source="content" target="content" /> <link source="subject" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="RTF parser" class="com.jaeksoft.searchlib.parser.RtfParser" sizeLimit="33554432"> <extension>rtf</extension> <contentType>application/rtf</contentType> <map> <link source="content" target="content" /> </map> </parser> <parser name="RSS parser" class="com.jaeksoft.searchlib.parser.RssParser" sizeLimit="33554432"> <contentType>application/rss+xml</contentType> <extension>rss</extension> <map> <link source="description" target="content" /> <link source="title" target="title" /> </map> </parser> <parser name="Text parser" class="com.jaeksoft.searchlib.parser.TextParser" sizeLimit="33554432"> <extension>txt</extension> <contentType>text/plain</contentType> <map> <link source="content" target="content" /> </map> </parser> <parser name="Audio parser" class="com.jaeksoft.searchlib.parser.AudioParser" sizeLimit="33554432"> <extension>mp3</extension> <extension>mp4</extension> <extension>m4a</extension> <extension>m4p</extension> <extension>wav</extension> <extension>ogg</extension> <extension>flac</extension> <contentType>audio/mpeg</contentType> <contentType>audio/wav</contentType> <contentType>audio/x-wav</contentType> <contentType>audio/ogg</contentType> <contentType>audio/vorbis</contentType> <contentType>audio/flac</contentType> <contentType>audio/mp4</contentType> <contentType>audio/mp4a-latm</contentType> <contentType>application/ogg</contentType> <map> <link source="title" target="title" /> </map> </parser> <parser name="Torrent parser" class="com.jaeksoft.searchlib.parser.TorrentParser" sizeLimit="33554432"> <extension>torrent</extension> <contentType>application/x-bittorrent</contentType> <map> <link source="title" target="title" /> </map> </parser> <parser name="XML parser" class="com.jaeksoft.searchlib.parser.XmlParser" sizeLimit="33554432"> <extension>xml</extension> <contentType>text/xml</contentType> <contentType>application/xml</contentType> <contentType>application/xhtml+xml</contentType> <map> <link source="content" target="content" /> </map> </parser> </parsers>
© 2015 - 2025 Weber Informatics LLC | Privacy Policy