All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.jaeksoft.searchlib.template.common.parsers.xml Maven / Gradle / Ivy

Go to download

OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.

The newest version!
<?xml version="1.0" encoding="UTF-8"?>
<!-- License Agreement for OpenSearchServer -->
<!-- -->
<!-- Copyright (C) 2008-2014 Emmanuel Keller / Jaeksoft -->
<!-- -->
<!-- This file is part of OpenSearchServer. -->
<!-- http://www.open-search-server.com -->
<!-- -->
<!-- OpenSearchServer is free software: you can -->
<!-- redistribute it and/or modify it under the terms of -->
<!-- the GNU General Public License as published by the -->
<!-- Free Software Foundation, either version 3 of the -->
<!-- License, or (at your option) any later version. -->
<!-- -->
<!-- OpenSearchServer is distributed in the -->
<!-- hope that it will be useful, but WITHOUT ANY -->
<!-- WARRANTY; without even the implied warranty of -->
<!-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -->
<!-- -->
<!-- See the GNU General Public License for more details. -->
<parsers fileCrawlerDefault="FileSystem parser">
	<parser name="FileSystem parser" class="com.jaeksoft.searchlib.parser.FileSystemParser"
		sizeLimit="0" />
	<parser name="DOC parser" class="com.jaeksoft.searchlib.parser.DocParser"
		sizeLimit="33554432">
		<contentType>application/msword</contentType>
		<extension>doc</extension>
		<map>
			<link source="author" target="content" />
			<link source="content" target="content" />
			<link source="subject" target="content" />
			<link source="title" target="title" />
		</map>
	</parser>
	<parser name="XLS parser" class="com.jaeksoft.searchlib.parser.XlsParser"
		sizeLimit="33554432">
		<contentType>application/vnd.ms-excel</contentType>
		<extension>xls</extension>
		<map>
			<link source="author" target="content" />
			<link source="content" target="content" />
			<link source="subject" target="content" />
			<link source="title" target="title" />
		</map>
	</parser>
	<parser name="DOCX parser" class="com.jaeksoft.searchlib.parser.DocxParser"
		sizeLimit="33554432">
		<contentType>application/vnd.openxmlformats-officedocument.wordprocessingml.document
		</contentType>
		<extension>docx</extension>
		<map>
			<link source="creator" target="content" />
			<link source="content" target="content" />
			<link source="subject" target="content" />
			<link source="description" target="content" />
			<link source="title" target="title" />
		</map>
	</parser>
	<parser name="XLSX parser" class="com.jaeksoft.searchlib.parser.XlsxParser"
		sizeLimit="33554432">
		<contentType>application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
		</contentType>
		<extension>xlsx</extension>
		<map>
			<link source="creator" target="content" />
			<link source="content" target="content" />
			<link source="subject" target="content" />
			<link source="description" target="content" />
			<link source="title" target="title" />
		</map>
	</parser>
	<parser name="HTML parser" class="com.jaeksoft.searchlib.parser.HtmlParser"
		url_fragment="remove" sizeLimit="33554432" defaultCharset="UTF-8">
		<contentType>text/html</contentType>
		<extension>htm</extension>
		<extension>html</extension>
		<extension>xhtml</extension>
		<map>
			<link source="body" target="content" />
			<link source="meta_description" target="metaDescription" />
			<link source="meta_keywords" target="metaKeywords" />
			<link source="title" target="title" />
		</map>
	</parser>
	<parser name="MSG parser" class="com.jaeksoft.searchlib.parser.MapiMsgParser">
		<contentType>application/vnd.ms-outlook</contentType>
		<extension>msg</extension>
		<map>
			<link source="content" target="content" />
			<link source="htmlSource" target="content">
				<removeTag />
			</link>
			<link source="subject" target="title" />
		</map>
	</parser>
	<parser name="EML parser" class="com.jaeksoft.searchlib.parser.EmlParser">
		<contentType>message/rfc822</contentType>
		<extension>eml</extension>
		<map>
			<link source="content" target="content" />
			<link source="email_attachment_content" target="content" />
			<link source="htmlSource" target="content">
				<removeTag />
			</link>
			<link source="subject" target="title" />
		</map>
	</parser>
	<parser name="PDF parser" class="com.jaeksoft.searchlib.parser.PdfParser"
		sizeLimit="33554432">
		<extension>pdf</extension>
		<contentType>application/pdf</contentType>
		<map>
			<link source="author" target="content" />
			<link source="content" target="content" />
			<link source="subject" target="content" />
			<link source="title" target="title" />
		</map>
	</parser>
	<parser name="PPT parser" class="com.jaeksoft.searchlib.parser.PptParser"
		sizeLimit="33554432">
		<extension>ppt</extension>
		<contentType>application/vnd.ms-powerpoint
		</contentType>
		<map>
			<link source="body" target="content" />
			<link source="content" target="content" />
			<link source="title" target="title" />
		</map>
	</parser>
	<parser name="PPTX parser" class="com.jaeksoft.searchlib.parser.PptxParser"
		sizeLimit="33554432">
		<extension>pptx</extension>
		<contentType>application/vnd.openxmlformats-officedocument.presentationml.presentation
		</contentType>
		<map>
			<link source="creator" target="content" />
			<link source="content" target="content" />
			<link source="subject" target="content" />
			<link source="description" target="content" />
			<link source="title" target="title" />
		</map>
	</parser>
	<parser name="Visio parser" class="com.jaeksoft.searchlib.parser.VisioParser"
		sizeLimit="33554432">
		<contentType>application/vnd.visio</contentType>
		<extension>vsd</extension>
		<map>
			<link source="author" target="content" />
			<link source="content" target="content" />
			<link source="subject" target="content" />
			<link source="title" target="title" />
		</map>
	</parser>
	<parser name="Publisher parser" class="com.jaeksoft.searchlib.parser.PublisherParser"
		sizeLimit="33554432">
		<contentType>application/x-mspublisher</contentType>
		<extension>pub</extension>
		<map>
			<link source="author" target="content" />
			<link source="content" target="content" />
			<link source="subject" target="content" />
			<link source="title" target="title" />
		</map>
	</parser>
	<parser name="ODT parser" class="com.jaeksoft.searchlib.parser.OdtParser"
		sizeLimit="33554432">
		<extension>odt</extension>
		<contentType>application/vnd.oasis.opendocument.text
		</contentType>
		<map>
			<link source="creator" target="content" />
			<link source="content" target="content" />
			<link source="subject" target="content" />
			<link source="title" target="title" />
		</map>
	</parser>
	<parser name="ODS/ODF parser" class="com.jaeksoft.searchlib.parser.OdsParser"
		sizeLimit="33554432">
		<extension>odf</extension>
		<extension>ods</extension>
		<contentType>application/vnd.oasis.opendocument.spreadsheet
		</contentType>
		<contentType>application/vnd.oasis.opendocument.formula
		</contentType>
		<map>
			<link source="creator" target="content" />
			<link source="content" target="content" />
			<link source="subject" target="content" />
			<link source="title" target="title" />
		</map>
	</parser>
	<parser name="ODP parser" class="com.jaeksoft.searchlib.parser.OdpParser"
		sizeLimit="33554432">
		<extension>odp</extension>
		<contentType>application/vnd.oasis.opendocument.presentation
		</contentType>
		<map>
			<link source="creator" target="content" />
			<link source="content" target="content" />
			<link source="subject" target="content" />
			<link source="title" target="title" />
		</map>
	</parser>
	<parser name="RTF parser" class="com.jaeksoft.searchlib.parser.RtfParser"
		sizeLimit="33554432">
		<extension>rtf</extension>
		<contentType>application/rtf</contentType>
		<map>
			<link source="content" target="content" />
		</map>
	</parser>
	<parser name="RSS parser" class="com.jaeksoft.searchlib.parser.RssParser"
		sizeLimit="33554432">
		<contentType>application/rss+xml</contentType>
		<extension>rss</extension>
		<map>
			<link source="description" target="content" />
			<link source="title" target="title" />
		</map>
	</parser>
	<parser name="Text parser" class="com.jaeksoft.searchlib.parser.TextParser"
		sizeLimit="33554432">
		<extension>txt</extension>
		<contentType>text/plain</contentType>
		<map>
			<link source="content" target="content" />
		</map>
	</parser>
	<parser name="Audio parser" class="com.jaeksoft.searchlib.parser.AudioParser"
		sizeLimit="33554432">
		<extension>mp3</extension>
		<extension>mp4</extension>
		<extension>m4a</extension>
		<extension>m4p</extension>
		<extension>wav</extension>
		<extension>ogg</extension>
		<extension>flac</extension>
		<contentType>audio/mpeg</contentType>
		<contentType>audio/wav</contentType>
		<contentType>audio/x-wav</contentType>
		<contentType>audio/ogg</contentType>
		<contentType>audio/vorbis</contentType>
		<contentType>audio/flac</contentType>
		<contentType>audio/mp4</contentType>
		<contentType>audio/mp4a-latm</contentType>
		<contentType>application/ogg</contentType>
		<map>
			<link source="title" target="title" />
		</map>
	</parser>
	<parser name="Torrent parser" class="com.jaeksoft.searchlib.parser.TorrentParser"
		sizeLimit="33554432">
		<extension>torrent</extension>
		<contentType>application/x-bittorrent</contentType>
		<map>
			<link source="title" target="title" />
		</map>
	</parser>
	<parser name="XML parser" class="com.jaeksoft.searchlib.parser.XmlParser"
		sizeLimit="33554432">
		<extension>xml</extension>
		<contentType>text/xml</contentType>
		<contentType>application/xml</contentType>
		<contentType>application/xhtml+xml</contentType>
		<map>
			<link source="content" target="content" />
		</map>
	</parser>

</parsers>




© 2015 - 2024 Weber Informatics LLC | Privacy Policy