All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jackrabbit.oak.benchmark.LucenePropertyFullTextTest Maven / Gradle / Ivy

There is a newer version: 1.72.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.oak.benchmark;

import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.ImmutableSet.of;
import static org.apache.jackrabbit.oak.api.Type.BOOLEAN;
import static org.apache.jackrabbit.oak.api.Type.LONG;
import static org.apache.jackrabbit.oak.api.Type.NAME;
import static org.apache.jackrabbit.oak.api.Type.STRING;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.ASYNC_PROPERTY_NAME;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NAME;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NODE_TYPE;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.REINDEX_PROPERTY_NAME;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.TYPE_PROPERTY_NAME;
import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.COMPAT_MODE;
import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.INDEX_RULES;
import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PROP_NAME;
import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PROP_NODE;
import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.PROP_PROPERTY_INDEX;
import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.TYPE_LUCENE;

import java.io.File;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;

import javax.annotation.Nonnull;
import javax.jcr.Repository;
import javax.jcr.RepositoryException;
import javax.jcr.Session;
import javax.jcr.ValueFactory;
import javax.jcr.query.Query;
import javax.jcr.query.QueryManager;
import javax.jcr.query.RowIterator;

import org.apache.jackrabbit.oak.Oak;
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.benchmark.wikipedia.WikipediaImport;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.fixture.JcrCreator;
import org.apache.jackrabbit.oak.fixture.OakRepositoryFixture;
import org.apache.jackrabbit.oak.fixture.RepositoryFixture;
import org.apache.jackrabbit.oak.jcr.Jcr;
import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditorProvider;
import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexProvider;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.LuceneInitializerHelper;
import org.apache.jackrabbit.oak.plugins.tree.TreeFactory;
import org.apache.jackrabbit.oak.spi.commit.Observer;
import org.apache.jackrabbit.oak.spi.lifecycle.RepositoryInitializer;
import org.apache.jackrabbit.oak.spi.query.QueryIndexProvider;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * 

* Perform a benchmark on how long it takes for an ingested item to be available in a Lucene * Property index when indexed in conjunction with a Global full-text lucene (same thread). It makes * use of the {@link WikipediaImport} to use a Wikipedia dump for content injestion. *

*

* Suggested dump: * https://dumps.wikimedia.org/enwiki/20150403/enwiki-20150403-pages-articles.xml.bz2 *

*

* Usage example: *

* *
 * java -Druntime=900 -Dlogback.configurationFile=logback-benchmark.xml \
 *      -jar ~/.m2/repository/org/apache/jackrabbit/oak-run/1.4-SNAPSHOT/oak-run-1.4-SNAPSHOT.jar \
 *      benchmark --wikipedia enwiki-20150403-pages-articles.xml.bz2 \
 *      --base ~/tmp/oak/ LucenePropertyFullTextTest Oak-Tar Oak-Mongo
 * 
*

* it will run the benchmark for 15 minutes against TarNS and MongoNS. *

*/ public class LucenePropertyFullTextTest extends AbstractTest { private static final Logger LOG = LoggerFactory.getLogger(LucenePropertyFullTextTest.class); private WikipediaImport importer; private Thread asyncImporter; private boolean benchmarkCompleted, importerCompleted; Boolean storageEnabled; String currentFixture, currentTest; /** * context used across the tests */ class TestContext { final Session session = loginWriter(); final String title; public TestContext(@Nonnull final String title) { this.title = checkNotNull(title); } } /** * helper class to initialise the Lucene Property index definition */ static class LucenePropertyInitialiser implements RepositoryInitializer { private String name; private Set properties; public LucenePropertyInitialiser(@Nonnull final String name, @Nonnull final Set properties) { this.name = checkNotNull(name); this.properties = checkNotNull(properties); } private boolean isAlreadyThere(@Nonnull final NodeBuilder root) { return checkNotNull(root).hasChildNode(INDEX_DEFINITIONS_NAME) && root.getChildNode(INDEX_DEFINITIONS_NAME).hasChildNode(name); } @Override public void initialize(final NodeBuilder builder) { if (!isAlreadyThere(builder)) { Tree t = TreeFactory.createTree(builder.child(INDEX_DEFINITIONS_NAME)); t = t.addChild(name); t.setProperty("jcr:primaryType", INDEX_DEFINITIONS_NODE_TYPE, NAME); t.setProperty(COMPAT_MODE, 2L, LONG); t.setProperty(TYPE_PROPERTY_NAME, TYPE_LUCENE, STRING); t.setProperty(ASYNC_PROPERTY_NAME, "async", STRING); t.setProperty(REINDEX_PROPERTY_NAME, true); t = t.addChild(INDEX_RULES); t.setOrderableChildren(true); t.setProperty("jcr:primaryType", "nt:unstructured", NAME); t = t.addChild("nt:base"); Tree propnode = t.addChild(PROP_NODE); propnode.setOrderableChildren(true); propnode.setProperty("jcr:primaryType", "nt:unstructured", NAME); for (String p : properties) { Tree t1 = propnode.addChild(PathUtils.getName(p)); t1.setProperty(PROP_PROPERTY_INDEX, true, BOOLEAN); t1.setProperty(PROP_NAME, p); } } } } /** * reference to the last added title. Used for looking up with queries. */ private AtomicReference lastTitle = new AtomicReference(); public LucenePropertyFullTextTest(final File dump, final boolean flat, final boolean doReport, final Boolean storageEnabled) { this.importer = new WikipediaImport(dump, flat, doReport) { @Override protected void pageAdded(String title, String text) { LOG.trace("Setting title: {}", title); lastTitle.set(title); } }; this.storageEnabled = storageEnabled; this.currentTest = this.getClass().getSimpleName(); } @Override protected Repository[] createRepository(RepositoryFixture fixture) throws Exception { if (fixture instanceof OakRepositoryFixture) { currentFixture = fixture.toString(); return ((OakRepositoryFixture) fixture).setUpCluster(1, new JcrCreator() { @Override public Jcr customize(Oak oak) { LuceneIndexProvider provider = new LuceneIndexProvider(); oak.with((QueryIndexProvider) provider) .with((Observer) provider) .with(new LuceneIndexEditorProvider()) .with((new LuceneInitializerHelper("luceneGlobal", storageEnabled)).async()) // the WikipediaImporter set a property `title` .with(new LucenePropertyInitialiser("luceneTitle", of("title"))) .withAsyncIndexing("async", 5); return new Jcr(oak); } }); } return super.createRepository(fixture); } @Override protected void beforeSuite() throws Exception { LOG.debug("beforeSuite() - {} - {}", currentFixture, currentTest); benchmarkCompleted = false; importerCompleted = false; asyncImporter = new Thread(new Runnable() { @Override public void run() { try { importer.importWikipedia(loginWriter()); } catch (Exception e) { LOG.error("Error while importing the dump. Trying to halt everything.", e); importerCompleted = true; } finally { if (!benchmarkCompleted) { importerCompleted = true; issueHaltRequest("Wikipedia import completed."); } } } }); asyncImporter.start(); // allowing the async index to catch up. TimeUnit.SECONDS.sleep(10); } @Override protected void afterSuite() throws Exception { LOG.debug("afterSuite() - {} - {}", currentFixture, currentTest); asyncImporter.join(); } @Override protected void runTest() throws Exception { if (lastTitle.get() == null) { return; } runTest(new TestContext(lastTitle.get())); } @Override protected void runTest(final TestContext ec) throws Exception { if (importerCompleted) { return; } final long maxWait = TimeUnit.MINUTES.toMillis(5); final long waitUnit = 50; long sleptSoFar = 0; while (!performQuery(ec) && sleptSoFar < maxWait) { LOG.trace("title '{}' not found. Waiting and retry. sleptSoFar: {}ms", ec.title, sleptSoFar); sleptSoFar += waitUnit; TimeUnit.MILLISECONDS.sleep(waitUnit); } if (sleptSoFar < maxWait) { // means we exited the loop as we found it. LOG.info("{} - {} - title '{}' found with a wait/try of {}ms", currentFixture, currentTest, ec.title, sleptSoFar); } else { LOG.warn("{} - {} - title '{}' timed out with a way/try of {}ms.", currentFixture, currentTest, ec.title, sleptSoFar); } } private boolean performQuery(@Nonnull final TestContext ec) throws RepositoryException { QueryManager qm = ec.session.getWorkspace().getQueryManager(); ValueFactory vf = ec.session.getValueFactory(); Query q = qm.createQuery("SELECT * FROM [nt:base] WHERE [title] = $title", Query.JCR_SQL2); q.bindValue("title", vf.createValue(ec.title)); LOG.trace("statement: {} - title: {}", q.getStatement(), ec.title); RowIterator rows = q.execute().getRows(); if (rows.hasNext()) { rows.nextRow().getPath(); return true; } else { return false; } } @Override protected void issueHaltChildThreads() { if (!importerCompleted) { LOG.info("benchmark completed. Issuing an halt for the importer"); benchmarkCompleted = true; this.importer.issueHaltImport(); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy