All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.parser.ocr.TesseractOCRConfig.properties Maven / Gradle / Ivy

#  Licensed to the Apache Software Foundation (ASF) under one or more
#  contributor license agreements.  See the NOTICE file distributed with
#  this work for additional information regarding copyright ownership.
#  The ASF licenses this file to You under the Apache License, Version 2.0
#  (the "License"); you may not use this file except in compliance with
#  the License.  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

# Tesseract properties
tesseractPath=
language=eng
pageSegMode=1
maxFileSizeToOcr=2147483647
minFileSizeToOcr=0
timeout=120
#txt or hocr
outputType=txt
preserveInterwordSpacing=false

# properties for image processing
# to enable processing, set enableImageProcessing to 1
enableImageProcessing=0
ImageMagickPath=
density=300
depth=4
colorspace=gray
filter=triangle
resize=900
applyRotation=false




© 2015 - 2024 Weber Informatics LLC | Privacy Policy