org.apache.tika.parser.ocr.TesseractOCRConfig.properties Maven / Gradle / Ivy
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Tesseract properties
tesseractPath=
language=eng
pageSegMode=1
maxFileSizeToOcr=2147483647
minFileSizeToOcr=0
timeout=120
#txt or hocr
outputType=txt
preserveInterwordSpacing=false
# properties for image processing
# to enable processing, set enableImageProcessing to 1
enableImageProcessing=0
ImageMagickPath=
density=300
depth=4
colorspace=gray
filter=triangle
resize=900
applyRotation=false