All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ka.tika-eval.1.24.source-code.lucene-analyzers.json Maven / Gradle / Ivy

There is a newer version: 3.0.0
Show newest version
{
  "analyzers": {
    "general": {
      "charfilters": [
        {
          "factory": "mapping",
          "params": {
            "mapping": "/lucene-char-mapping.txt"
          }
        }
      ],
      "tokenizer": {
        "factory": "uax29urlemail",
        "params": {}
      },
      "tokenfilters": [
        {
          "factory": "icufolding",
          "params": {}
        },
        {
          "factory": "cjkbigram",
          "params": {
            "outputUnigrams": "false"
          }
        }
      ]
    },
    "common_tokens": {
      "_comment" : "Use this analyzer for counting common tokens in a corpus.",
      "_comment" : "This isn't used by tika-eval during profiling or comparing",
      "tokenizer": {
        "factory": "uax29urlemail",
        "params": {}
      },
      "tokenfilters": [
        {
          "factory": "urlemailnormalizing",
          "params": {
          }
        },
        {
          "factory": "alphaideograph",
          "params": {}
        },
        {
          "factory": "icufolding",
          "params": {}
        },
        {
          "factory": "cjkbigram",
          "params": {
            "outputUnigrams": "false"
          }
        },
        {
          "factory": "cjkbigramawarelength",
          "params": {
            "min": 3,
            "max": 20
          }
        }
      ]
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy