All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.codelibs.fess.ds.office365.OneNoteDataStore Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2012-2024 CodeLibs Project and the Others.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific language
 * governing permissions and limitations under the License.
 */
package org.codelibs.fess.ds.office365;

import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import java.util.function.Function;

import org.codelibs.core.exception.InterruptedRuntimeException;
import org.codelibs.fess.Constants;
import org.codelibs.fess.app.service.FailureUrlService;
import org.codelibs.fess.crawler.exception.CrawlingAccessException;
import org.codelibs.fess.crawler.exception.MultipleCrawlingAccessException;
import org.codelibs.fess.ds.callback.IndexUpdateCallback;
import org.codelibs.fess.ds.office365.client.Office365Client;
import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.es.config.exentity.DataConfig;
import org.codelibs.fess.helper.CrawlerStatsHelper;
import org.codelibs.fess.helper.CrawlerStatsHelper.StatsAction;
import org.codelibs.fess.helper.CrawlerStatsHelper.StatsKeyObject;
import org.codelibs.fess.util.ComponentUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.microsoft.graph.core.ClientException;
import com.microsoft.graph.http.GraphServiceException;
import com.microsoft.graph.models.Notebook;
import com.microsoft.graph.models.Site;
import com.microsoft.graph.requests.GraphServiceClient;
import com.microsoft.graph.requests.NotebookCollectionPage;
import com.microsoft.graph.requests.OnenoteRequestBuilder;

import okhttp3.Request;

public class OneNoteDataStore extends Office365DataStore {

    private static final Logger logger = LoggerFactory.getLogger(OneNoteDataStore.class);

    // scripts
    protected static final String NOTEBOOK = "notebook";
    protected static final String NOTEBOOK_NAME = "name";
    protected static final String NOTEBOOK_CONTENTS = "contents";
    protected static final String NOTEBOOK_SIZE = "size";
    protected static final String NOTEBOOK_CREATED = "created";
    protected static final String NOTEBOOK_LAST_MODIFIED = "last_modified";
    protected static final String NOTEBOOK_WEB_URL = "web_url";
    protected static final String NOTEBOOK_ROLES = "roles";
    protected static final String NUMBER_OF_THREADS = "number_of_threads";
    protected static final String SITE_NOTE_CRAWLER = "site_note_crawler";
    protected static final String USER_NOTE_CRAWLER = "user_note_crawler";
    protected static final String GROUP_NOTE_CRAWLER = "group_note_crawler";

    @Override
    protected String getName() {
        return this.getClass().getSimpleName();
    }

    @Override
    protected void storeData(final DataConfig dataConfig, final IndexUpdateCallback callback, final DataStoreParams paramMap,
            final Map scriptMap, final Map defaultDataMap) {

        final ExecutorService executorService = newFixedThreadPool(Integer.parseInt(paramMap.getAsString(NUMBER_OF_THREADS, "1")));
        try (final Office365Client client = createClient(paramMap)) {
            if (isSiteNoteCrawler(paramMap)) {
                if (logger.isDebugEnabled()) {
                    logger.debug("crawling site notes.");
                }
                storeSiteNotes(dataConfig, callback, paramMap, scriptMap, defaultDataMap, executorService, client);
            }
            if (isUserNoteCrawler(paramMap)) {
                if (logger.isDebugEnabled()) {
                    logger.debug("crawling user notes.");
                }
                storeUsersNotes(dataConfig, callback, paramMap, scriptMap, defaultDataMap, executorService, client);
            }
            if (isGroupNoteCrawler(paramMap)) {
                if (logger.isDebugEnabled()) {
                    logger.debug("crawling group notes.");
                }
                storeGroupsNotes(dataConfig, callback, paramMap, scriptMap, defaultDataMap, executorService, client);
            }
            if (logger.isDebugEnabled()) {
                logger.debug("Shutting down thread executor.");
            }
            executorService.shutdown();
            executorService.awaitTermination(60, TimeUnit.SECONDS);
        } catch (final InterruptedException e) {
            throw new InterruptedRuntimeException(e);
        } finally {
            executorService.shutdownNow();
        }
    }

    protected Office365Client createClient(final DataStoreParams params) {
        return new Office365Client(params);
    }

    protected boolean isGroupNoteCrawler(final DataStoreParams paramMap) {
        return Constants.TRUE.equalsIgnoreCase(paramMap.getAsString(GROUP_NOTE_CRAWLER, Constants.TRUE));
    }

    protected boolean isUserNoteCrawler(final DataStoreParams paramMap) {
        return Constants.TRUE.equalsIgnoreCase(paramMap.getAsString(USER_NOTE_CRAWLER, Constants.TRUE));
    }

    protected boolean isSiteNoteCrawler(final DataStoreParams paramMap) {
        return Constants.TRUE.equalsIgnoreCase(paramMap.getAsString(SITE_NOTE_CRAWLER, Constants.TRUE));
    }

    protected void storeSiteNotes(final DataConfig dataConfig, final IndexUpdateCallback callback, final DataStoreParams paramMap,
            final Map scriptMap, final Map defaultDataMap, final ExecutorService executorService,
            final Office365Client client) {
        final Site root = client.getSite("root");
        final List roles = Collections.emptyList();
        getNotebooks(client, c -> c.sites(root.id).onenote(), notebook -> executorService.execute(() -> processNotebook(dataConfig,
                callback, paramMap, scriptMap, defaultDataMap, client, c -> c.sites(root.id).onenote(), notebook, roles)));
    }

    protected void storeUsersNotes(final DataConfig dataConfig, final IndexUpdateCallback callback, final DataStoreParams paramMap,
            final Map scriptMap, final Map defaultDataMap, final ExecutorService executorService,
            final Office365Client client) {
        getLicensedUsers(client, user -> {
            final List roles = getUserRoles(user);
            try {
                getNotebooks(client, c -> c.users(user.id).onenote(), notebook -> executorService.execute(() -> processNotebook(dataConfig,
                        callback, paramMap, scriptMap, defaultDataMap, client, c -> c.users(user.id).onenote(), notebook, roles)));
            } catch (final GraphServiceException e) {
                logger.warn("Failed to store {}'s Notebooks.", user.displayName, e);
            }
        });
    }

    protected void storeGroupsNotes(final DataConfig dataConfig, final IndexUpdateCallback callback, final DataStoreParams paramMap,
            final Map scriptMap, final Map defaultDataMap, final ExecutorService executorService,
            final Office365Client client) {
        getOffice365Groups(client, group -> {
            final List roles = getGroupRoles(group);
            getNotebooks(client, c -> c.groups(group.id).onenote(), notebook -> executorService.execute(() -> processNotebook(dataConfig,
                    callback, paramMap, scriptMap, defaultDataMap, client, c -> c.groups(group.id).onenote(), notebook, roles)));
        });
    }

    protected void processNotebook(final DataConfig dataConfig, final IndexUpdateCallback callback, final DataStoreParams paramMap,
            final Map scriptMap, final Map defaultDataMap, final Office365Client client,
            final Function, OnenoteRequestBuilder> builder, final Notebook notebook, final List roles) {
        final CrawlerStatsHelper crawlerStatsHelper = ComponentUtil.getCrawlerStatsHelper();
        final Map dataMap = new HashMap<>(defaultDataMap);
        final Map resultMap = new LinkedHashMap<>(paramMap.asMap());
        final Map notebooksMap = new HashMap<>();
        final StatsKeyObject statsKey = new StatsKeyObject(notebook.id);
        paramMap.put(Constants.CRAWLER_STATS_KEY, statsKey);
        try {
            crawlerStatsHelper.begin(statsKey);
            final String url = notebook.links.oneNoteWebUrl.href;
            logger.info("Crawling URL: {}", url);

            final String contents = client.getNotebookContent(builder, notebook.id);
            final long size = contents != null ? contents.length() : 0L;
            notebooksMap.put(NOTEBOOK_NAME, notebook.displayName);
            notebooksMap.put(NOTEBOOK_CONTENTS, contents);
            notebooksMap.put(NOTEBOOK_SIZE, size);
            notebooksMap.put(NOTEBOOK_CREATED, notebook.createdDateTime);
            notebooksMap.put(NOTEBOOK_LAST_MODIFIED, notebook.lastModifiedDateTime);
            notebooksMap.put(NOTEBOOK_WEB_URL, url);
            notebooksMap.put(NOTEBOOK_ROLES, roles);

            resultMap.put(NOTEBOOK, notebooksMap);

            crawlerStatsHelper.record(statsKey, StatsAction.PREPARED);

            if (logger.isDebugEnabled()) {
                logger.debug("notebooksMap: {}", notebooksMap);
            }

            final String scriptType = getScriptType(paramMap);
            for (final Map.Entry entry : scriptMap.entrySet()) {
                final Object convertValue = convertValue(scriptType, entry.getValue(), resultMap);
                if (convertValue != null) {
                    dataMap.put(entry.getKey(), convertValue);
                }
            }

            crawlerStatsHelper.record(statsKey, StatsAction.EVALUATED);

            if (logger.isDebugEnabled()) {
                logger.debug("dataMap: {}", dataMap);
            }

            if (dataMap.get("url") instanceof final String statsUrl) {
                statsKey.setUrl(statsUrl);
            }

            callback.store(paramMap, dataMap);
            crawlerStatsHelper.record(statsKey, StatsAction.FINISHED);
        } catch (final CrawlingAccessException e) {
            logger.warn("Crawling Access Exception at : {}", dataMap, e);

            Throwable target = e;
            if (target instanceof final MultipleCrawlingAccessException ex) {
                final Throwable[] causes = ex.getCauses();
                if (causes.length > 0) {
                    target = causes[causes.length - 1];
                }
            }

            String errorName;
            final Throwable cause = target.getCause();
            if (cause != null) {
                errorName = cause.getClass().getCanonicalName();
            } else {
                errorName = target.getClass().getCanonicalName();
            }

            final FailureUrlService failureUrlService = ComponentUtil.getComponent(FailureUrlService.class);
            failureUrlService.store(dataConfig, errorName, notebook.displayName, target);
            crawlerStatsHelper.record(statsKey, StatsAction.ACCESS_EXCEPTION);
        } catch (final Throwable t) {
            logger.warn("Crawling Access Exception at : {}", dataMap, t);
            final FailureUrlService failureUrlService = ComponentUtil.getComponent(FailureUrlService.class);
            failureUrlService.store(dataConfig, t.getClass().getCanonicalName(), notebook.displayName, t);
            crawlerStatsHelper.record(statsKey, StatsAction.EXCEPTION);
        } finally {
            crawlerStatsHelper.done(statsKey);
        }
    }

    protected void getNotebooks(final Office365Client client, final Function, OnenoteRequestBuilder> builder,
            final Consumer consumer) {
        try {
            NotebookCollectionPage page = client.getNotebookPage(builder);
            page.getCurrentPage().forEach(consumer);
            while (page.getNextPage() != null) {
                page = page.getNextPage().buildRequest().get();
                page.getCurrentPage().forEach(consumer);
            }
        } catch (final GraphServiceException e) {
            if (e.getResponseCode() == 404) {
                logger.debug("Notebook is not found.", e);
            } else {
                logger.warn("Failed to access a notebook.", e);
            }
        } catch (final ClientException e) {
            logger.warn("Failed to access a notebook.", e);
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy