All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.codelibs.fess.ds.office365.OneDriveDataStore Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2012-2024 CodeLibs Project and the Others.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific language
 * governing permissions and limitations under the License.
 */
package org.codelibs.fess.ds.office365;

import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.codelibs.core.exception.InterruptedRuntimeException;
import org.codelibs.core.lang.StringUtil;
import org.codelibs.core.stream.StreamUtil;
import org.codelibs.fess.Constants;
import org.codelibs.fess.app.service.FailureUrlService;
import org.codelibs.fess.crawler.exception.CrawlingAccessException;
import org.codelibs.fess.crawler.exception.MaxLengthExceededException;
import org.codelibs.fess.crawler.exception.MultipleCrawlingAccessException;
import org.codelibs.fess.crawler.filter.UrlFilter;
import org.codelibs.fess.crawler.helper.ContentLengthHelper;
import org.codelibs.fess.ds.callback.IndexUpdateCallback;
import org.codelibs.fess.ds.office365.client.Office365Client;
import org.codelibs.fess.ds.office365.client.Office365Client.UserType;
import org.codelibs.fess.entity.DataStoreParams;
import org.codelibs.fess.es.config.exentity.DataConfig;
import org.codelibs.fess.exception.DataStoreCrawlingException;
import org.codelibs.fess.helper.CrawlerStatsHelper;
import org.codelibs.fess.helper.CrawlerStatsHelper.StatsAction;
import org.codelibs.fess.helper.CrawlerStatsHelper.StatsKeyObject;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.helper.PermissionHelper;
import org.codelibs.fess.helper.SystemHelper;
import org.codelibs.fess.util.ComponentUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.microsoft.graph.core.ClientException;
import com.microsoft.graph.http.GraphServiceException;
import com.microsoft.graph.models.Drive;
import com.microsoft.graph.models.DriveItem;
import com.microsoft.graph.models.Hashes;
import com.microsoft.graph.models.Permission;
import com.microsoft.graph.requests.DriveItemCollectionPage;
import com.microsoft.graph.requests.DriveRequestBuilder;
import com.microsoft.graph.requests.GraphServiceClient;
import com.microsoft.graph.requests.PermissionCollectionPage;

import okhttp3.Request;

public class OneDriveDataStore extends Office365DataStore {

    private static final Logger logger = LoggerFactory.getLogger(OneDriveDataStore.class);

    protected static final long DEFAULT_MAX_SIZE = -1;

    protected static final String CURRENT_CRAWLER = "current_crawler";
    protected static final String CRAWLER_TYPE_GROUP = "group";
    protected static final String CRAWLER_TYPE_USER = "user";
    protected static final String CRAWLER_TYPE_SHARED = "shared";
    protected static final String CRAWLER_TYPE_DRIVE = "drive";
    protected static final String DRIVE_INFO = "drive_info";

    // parameters
    protected static final String MAX_CONTENT_LENGTH = "max_content_length";
    protected static final String IGNORE_FOLDER = "ignore_folder";
    protected static final String IGNORE_ERROR = "ignore_error";
    protected static final String SUPPORTED_MIMETYPES = "supported_mimetypes";
    protected static final String INCLUDE_PATTERN = "include_pattern";
    protected static final String EXCLUDE_PATTERN = "exclude_pattern";
    protected static final String URL_FILTER = "url_filter";
    protected static final String DRIVE_ID = "drive_id";
    protected static final String DEFAULT_PERMISSIONS = "default_permissions";
    protected static final String NUMBER_OF_THREADS = "number_of_threads";
    protected static final String SHARED_DOCUMENTS_DRIVE_CRAWLER = "shared_documents_drive_crawler";
    protected static final String USER_DRIVE_CRAWLER = "user_drive_crawler";
    protected static final String GROUP_DRIVE_CRAWLER = "group_drive_crawler";

    // scripts
    protected static final String FILE = "file";
    protected static final String FILE_NAME = "name";
    protected static final String FILE_DESCRIPTION = "description";
    protected static final String FILE_CONTENTS = "contents";
    protected static final String FILE_MIMETYPE = "mimetype";
    protected static final String FILE_FILETYPE = "filetype";
    protected static final String FILE_CREATED = "created";
    protected static final String FILE_LAST_MODIFIED = "last_modified";
    protected static final String FILE_SIZE = "size";
    protected static final String FILE_WEB_URL = "web_url";
    protected static final String FILE_URL = "url";
    protected static final String FILE_ROLES = "roles";
    protected static final String FILE_CTAG = "ctag";
    protected static final String FILE_ETAG = "etag";
    protected static final String FILE_ID = "id";
    protected static final String FILE_WEBDAV_URL = "webdav_url";
    protected static final String FILE_LOCATION = "location";
    protected static final String FILE_CREATEDBY_APPLICATION = "createdby_application";
    protected static final String FILE_CREATEDBY_DEVICE = "createdby_device";
    protected static final String FILE_CREATEDBY_USER = "createdby_user";
    protected static final String FILE_DELETED = "deleted";
    protected static final String FILE_HASHES = "hashes";
    protected static final String FILE_LAST_MODIFIEDBY_APPLICATION = "last_modifiedby_application";
    protected static final String FILE_LAST_MODIFIEDBY_DEVICE = "last_modifiedby_device";
    protected static final String FILE_LAST_MODIFIEDBY_USER = "last_modifiedby_user";
    protected static final String FILE_IMAGE = "image";
    protected static final String FILE_PARENT = "parent";
    protected static final String FILE_PARENT_ID = "parent_id";
    protected static final String FILE_PARENT_NAME = "parent_name";
    protected static final String FILE_PARENT_PATH = "parent_path";
    protected static final String FILE_PHOTO = "photo";
    protected static final String FILE_PUBLICATION = "publication";
    protected static final String FILE_SEARCH_RESULT = "search_result";
    protected static final String FILE_SPECIAL_FOLDER = "special_folder";
    protected static final String FILE_VIDEO = "video";

    protected String extractorName = "tikaExtractor";

    @Override
    protected String getName() {
        return this.getClass().getSimpleName();
    }

    @Override
    protected void storeData(final DataConfig dataConfig, final IndexUpdateCallback callback, final DataStoreParams paramMap,
            final Map scriptMap, final Map defaultDataMap) {

        final Map configMap = new HashMap<>();
        configMap.put(MAX_CONTENT_LENGTH, getMaxSize(paramMap));
        configMap.put(IGNORE_FOLDER, isIgnoreFolder(paramMap));
        configMap.put(IGNORE_ERROR, isIgnoreError(paramMap));
        configMap.put(SUPPORTED_MIMETYPES, getSupportedMimeTypes(paramMap));
        configMap.put(URL_FILTER, getUrlFilter(paramMap));
        if (logger.isDebugEnabled()) {
            logger.debug("configMap: {}", configMap);
        }

        final ExecutorService executorService = newFixedThreadPool(Integer.parseInt(paramMap.getAsString(NUMBER_OF_THREADS, "1")));
        try (final Office365Client client = createClient(paramMap)) {
            if (isSharedDocumentsDriveCrawler(paramMap)) {
                if (logger.isDebugEnabled()) {
                    logger.debug("crawling shared documents drive.");
                }
                configMap.put(CURRENT_CRAWLER, CRAWLER_TYPE_SHARED);
                storeSharedDocumentsDrive(dataConfig, callback, configMap, paramMap, scriptMap, defaultDataMap, executorService, client,
                        null);
            }

            if (isUserDriveCrawler(paramMap)) {
                if (logger.isDebugEnabled()) {
                    logger.debug("crawling user drive.");
                }
                configMap.put(CURRENT_CRAWLER, CRAWLER_TYPE_USER);
                storeUsersDrive(dataConfig, callback, configMap, paramMap, scriptMap, defaultDataMap, executorService, client);
            }

            if (isGroupDriveCrawler(paramMap)) {
                if (logger.isDebugEnabled()) {
                    logger.debug("crawling group drive.");
                }
                configMap.put(CURRENT_CRAWLER, CRAWLER_TYPE_GROUP);
                storeGroupsDrive(dataConfig, callback, configMap, paramMap, scriptMap, defaultDataMap, executorService, client);
            }

            final String driveId = paramMap.getAsString(DRIVE_ID);
            if (StringUtil.isNotBlank(driveId)) {
                if (logger.isDebugEnabled()) {
                    logger.debug("crawling doclument library drive: {}", driveId);
                }
                configMap.put(CURRENT_CRAWLER, CRAWLER_TYPE_DRIVE);
                configMap.put(DRIVE_INFO, client.getDrive(driveId));
                storeSharedDocumentsDrive(dataConfig, callback, configMap, paramMap, scriptMap, defaultDataMap, executorService, client,
                        driveId);
            }

            if (logger.isDebugEnabled()) {
                logger.debug("Shutting down thread executor.");
            }
            executorService.shutdown();
            executorService.awaitTermination(60, TimeUnit.SECONDS);
        } catch (final InterruptedException e) {
            throw new InterruptedRuntimeException(e);
        } finally {
            executorService.shutdownNow();
        }
    }

    protected Office365Client createClient(final DataStoreParams params) {
        return new Office365Client(params);
    }

    protected UrlFilter getUrlFilter(final DataStoreParams paramMap) {
        final UrlFilter urlFilter = ComponentUtil.getComponent(UrlFilter.class);
        final String include = paramMap.getAsString(INCLUDE_PATTERN);
        if (StringUtil.isNotBlank(include)) {
            urlFilter.addInclude(include);
        }
        final String exclude = paramMap.getAsString(EXCLUDE_PATTERN);
        if (StringUtil.isNotBlank(exclude)) {
            urlFilter.addExclude(exclude);
        }
        urlFilter.init(paramMap.getAsString(Constants.CRAWLING_INFO_ID));
        if (logger.isDebugEnabled()) {
            logger.debug("urlFilter: {}", urlFilter);
        }
        return urlFilter;
    }

    protected boolean isSharedDocumentsDriveCrawler(final DataStoreParams paramMap) {
        return Constants.TRUE.equalsIgnoreCase(paramMap.getAsString(SHARED_DOCUMENTS_DRIVE_CRAWLER, Constants.TRUE));
    }

    protected boolean isUserDriveCrawler(final DataStoreParams paramMap) {
        return Constants.TRUE.equalsIgnoreCase(paramMap.getAsString(USER_DRIVE_CRAWLER, Constants.TRUE));
    }

    protected boolean isGroupDriveCrawler(final DataStoreParams paramMap) {
        return Constants.TRUE.equalsIgnoreCase(paramMap.getAsString(GROUP_DRIVE_CRAWLER, Constants.TRUE));
    }

    protected boolean isIgnoreFolder(final DataStoreParams paramMap) {
        return Constants.TRUE.equalsIgnoreCase(paramMap.getAsString(IGNORE_FOLDER, Constants.TRUE));
    }

    protected boolean isIgnoreError(final DataStoreParams paramMap) {
        return Constants.TRUE.equalsIgnoreCase(paramMap.getAsString(IGNORE_ERROR, Constants.TRUE));
    }

    protected long getMaxSize(final DataStoreParams paramMap) {
        final String value = paramMap.getAsString(MAX_CONTENT_LENGTH);
        try {
            return StringUtil.isNotBlank(value) ? Long.parseLong(value) : DEFAULT_MAX_SIZE;
        } catch (final NumberFormatException e) {
            return DEFAULT_MAX_SIZE;
        }
    }

    protected String[] getSupportedMimeTypes(final DataStoreParams paramMap) {
        return StreamUtil.split(paramMap.getAsString(SUPPORTED_MIMETYPES, ".*"), ",")
                .get(stream -> stream.map(String::trim).toArray(n -> new String[n]));
    }

    protected void storeSharedDocumentsDrive(final DataConfig dataConfig, final IndexUpdateCallback callback,
            final Map configMap, final DataStoreParams paramMap, final Map scriptMap,
            final Map defaultDataMap, final ExecutorService executorService, final Office365Client client,
            final String driveId) {
        getDriveItemsInDrive(client, c -> driveId != null ? c.drives(driveId) : c.drive(),
                item -> executorService.execute(() -> processDriveItem(dataConfig, callback, configMap, paramMap, scriptMap, defaultDataMap,
                        client, c -> driveId != null ? c.drives(driveId) : c.drive(), item, Collections.emptyList())));
    }

    protected void storeUsersDrive(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map configMap,
            final DataStoreParams paramMap, final Map scriptMap, final Map defaultDataMap,
            final ExecutorService executorService, final Office365Client client) {
        getLicensedUsers(client, user -> {
            try {
                getDriveItemsInDrive(client, c -> c.users(user.id).drive(),
                        item -> executorService.execute(() -> processDriveItem(dataConfig, callback, configMap, paramMap, scriptMap,
                                defaultDataMap, client, c -> c.users(user.id).drive(), item, getUserRoles(user))));
            } catch (final GraphServiceException e) {
                logger.warn("Failed to store {}'s Drive, ", user.displayName, e);
            }
        });
    }

    protected void isInterrupted(final Exception e) {
        if (e instanceof InterruptedException) {
            throw new InterruptedRuntimeException((InterruptedException) e);
        }
    }

    protected void storeGroupsDrive(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map configMap,
            final DataStoreParams paramMap, final Map scriptMap, final Map defaultDataMap,
            final ExecutorService executorService, final Office365Client client) {
        getOffice365Groups(client, group -> {
            getDriveItemsInDrive(client, c -> c.groups(group.id).drive(), //
                    item -> executorService.execute(() -> processDriveItem(dataConfig, callback, configMap, paramMap, scriptMap,
                            defaultDataMap, client, c -> c.groups(group.id).drive(), item, getGroupRoles(group))));
        });
    }

    protected void processDriveItem(final DataConfig dataConfig, final IndexUpdateCallback callback, final Map configMap,
            final DataStoreParams paramMap, final Map scriptMap, final Map defaultDataMap,
            final Office365Client client, final Function, DriveRequestBuilder> builder, final DriveItem item,
            final List roles) {
        final CrawlerStatsHelper crawlerStatsHelper = ComponentUtil.getCrawlerStatsHelper();
        final FessConfig fessConfig = ComponentUtil.getFessConfig();
        final String mimetype;
        final Hashes hashes;
        final Map dataMap = new HashMap<>(defaultDataMap);
        final StatsKeyObject statsKey = new StatsKeyObject(item.webUrl);
        paramMap.put(Constants.CRAWLER_STATS_KEY, statsKey);
        try {
            crawlerStatsHelper.begin(statsKey);
            if (item.file != null) {
                mimetype = item.file.mimeType;
                hashes = item.file.hashes;
            } else {
                mimetype = null;
                hashes = null;
            }
            if (((Boolean) configMap.get(IGNORE_FOLDER)).booleanValue() && mimetype == null) {
                if (logger.isDebugEnabled()) {
                    logger.debug("Ignore item: {}", item.webUrl);
                }
                crawlerStatsHelper.discard(statsKey);
                return;
            }

            final String[] supportedMimeTypes = (String[]) configMap.get(SUPPORTED_MIMETYPES);
            if (!Stream.of(supportedMimeTypes).anyMatch(mimetype::matches)) {
                if (logger.isDebugEnabled()) {
                    logger.debug("{} is not an indexing target.", mimetype);
                }
                crawlerStatsHelper.discard(statsKey);
                return;
            }

            final String url = getUrl(configMap, paramMap, item);
            final UrlFilter urlFilter = (UrlFilter) configMap.get(URL_FILTER);
            if (urlFilter != null && !urlFilter.match(url)) {
                if (logger.isDebugEnabled()) {
                    logger.debug("Not matched: {}", url);
                }
                crawlerStatsHelper.discard(statsKey);
                return;
            }

            logger.info("Crawling URL: {}", url);

            final boolean ignoreError = ((Boolean) configMap.get(IGNORE_ERROR));

            final Map resultMap = new LinkedHashMap<>(paramMap.asMap());
            final Map filesMap = new HashMap<>();

            long maxContentLength = ((Long) configMap.get(MAX_CONTENT_LENGTH)).longValue();
            if (maxContentLength < 0) {
                try {
                    final ContentLengthHelper contentLengthHelper = ComponentUtil.getComponent("contentLengthHelper");
                    maxContentLength = contentLengthHelper.getMaxLength(mimetype);
                } catch (Exception e) {
                    logger.warn("Failed to get maxContentLength.", e);
                }
            }
            if (maxContentLength >= 0 && item.size.longValue() > maxContentLength) {
                throw new MaxLengthExceededException(
                        "The content length (" + item.size + " byte) is over " + maxContentLength + " byte. The url is " + item.webUrl);
            }

            final String filetype = ComponentUtil.getFileTypeHelper().get(mimetype);
            filesMap.put(FILE_NAME, item.name);
            filesMap.put(FILE_DESCRIPTION, item.description != null ? item.description : StringUtil.EMPTY);
            filesMap.put(FILE_CONTENTS, getDriveItemContents(client, builder, item, maxContentLength, ignoreError));
            filesMap.put(FILE_MIMETYPE, mimetype);
            filesMap.put(FILE_FILETYPE, filetype);
            filesMap.put(FILE_CREATED, item.createdDateTime);
            filesMap.put(FILE_LAST_MODIFIED, item.lastModifiedDateTime);
            filesMap.put(FILE_SIZE, item.size);
            filesMap.put(FILE_WEB_URL, item.webUrl);
            filesMap.put(FILE_URL, url);
            filesMap.put(FILE_CTAG, item.cTag);
            filesMap.put(FILE_ETAG, item.eTag);
            filesMap.put(FILE_ID, item.id);
            filesMap.put(FILE_WEBDAV_URL, item.webDavUrl);
            filesMap.put(FILE_LOCATION, item.location);
            filesMap.put(FILE_CREATEDBY_APPLICATION, item.createdBy != null ? item.createdBy.application : null);
            filesMap.put(FILE_CREATEDBY_DEVICE, item.createdBy != null ? item.createdBy.device : null);
            filesMap.put(FILE_CREATEDBY_USER, item.createdBy != null ? item.createdBy.user : null);
            filesMap.put(FILE_DELETED, item.deleted);
            filesMap.put(FILE_HASHES, hashes);
            filesMap.put(FILE_LAST_MODIFIEDBY_APPLICATION, item.lastModifiedBy != null ? item.lastModifiedBy.application : null);
            filesMap.put(FILE_LAST_MODIFIEDBY_DEVICE, item.lastModifiedBy != null ? item.lastModifiedBy.device : null);
            filesMap.put(FILE_LAST_MODIFIEDBY_USER, item.lastModifiedBy != null ? item.lastModifiedBy.user : null);
            filesMap.put(FILE_IMAGE, item.image);
            filesMap.put(FILE_PARENT, item.parentReference);
            filesMap.put(FILE_PARENT_ID, item.parentReference != null ? item.parentReference.id : null);
            filesMap.put(FILE_PARENT_NAME, item.parentReference != null ? item.parentReference.name : null);
            filesMap.put(FILE_PARENT_PATH, item.parentReference != null ? item.parentReference.path : null);
            filesMap.put(FILE_PHOTO, item.photo);
            filesMap.put(FILE_PUBLICATION, item.publication);
            filesMap.put(FILE_SEARCH_RESULT, item.searchResult);
            filesMap.put(FILE_SPECIAL_FOLDER, item.specialFolder != null ? item.specialFolder.name : null);
            filesMap.put(FILE_VIDEO, item.video);

            final List permissions = getDriveItemPermissions(client, builder, item);
            roles.forEach(permissions::add);
            final PermissionHelper permissionHelper = ComponentUtil.getPermissionHelper();
            StreamUtil.split(paramMap.getAsString(DEFAULT_PERMISSIONS), ",")
                    .of(stream -> stream.filter(StringUtil::isNotBlank).map(permissionHelper::encode).forEach(permissions::add));
            if (defaultDataMap.get(fessConfig.getIndexFieldRole()) instanceof List roleTypeList) {
                roleTypeList.stream().map(s -> (String) s).forEach(permissions::add);
            }
            filesMap.put(FILE_ROLES, permissions.stream().distinct().collect(Collectors.toList()));

            resultMap.put(FILE, filesMap);

            crawlerStatsHelper.record(statsKey, StatsAction.PREPARED);

            if (logger.isDebugEnabled()) {
                logger.debug("filesMap: {}", filesMap);
            }

            final String scriptType = getScriptType(paramMap);
            for (final Map.Entry entry : scriptMap.entrySet()) {
                final Object convertValue = convertValue(scriptType, entry.getValue(), resultMap);
                if (convertValue != null) {
                    dataMap.put(entry.getKey(), convertValue);
                }
            }

            crawlerStatsHelper.record(statsKey, StatsAction.EVALUATED);

            if (logger.isDebugEnabled()) {
                logger.debug("dataMap: {}", dataMap);
            }

            if (dataMap.get("url") instanceof final String statsUrl) {
                statsKey.setUrl(statsUrl);
            }

            callback.store(paramMap, dataMap);
            crawlerStatsHelper.record(statsKey, StatsAction.FINISHED);
        } catch (final CrawlingAccessException e) {
            logger.warn("Crawling Access Exception at : {}", dataMap, e);

            Throwable target = e;
            if (target instanceof final MultipleCrawlingAccessException ex) {
                final Throwable[] causes = ex.getCauses();
                if (causes.length > 0) {
                    target = causes[causes.length - 1];
                }
            }

            String errorName;
            final Throwable cause = target.getCause();
            if (cause != null) {
                errorName = cause.getClass().getCanonicalName();
            } else {
                errorName = target.getClass().getCanonicalName();
            }

            final FailureUrlService failureUrlService = ComponentUtil.getComponent(FailureUrlService.class);
            failureUrlService.store(dataConfig, errorName, item.webUrl, target);
            crawlerStatsHelper.record(statsKey, StatsAction.ACCESS_EXCEPTION);
        } catch (final Throwable t) {
            logger.warn("Crawling Access Exception at : {}", dataMap, t);
            final FailureUrlService failureUrlService = ComponentUtil.getComponent(FailureUrlService.class);
            failureUrlService.store(dataConfig, t.getClass().getCanonicalName(), item.webUrl, t);
            crawlerStatsHelper.record(statsKey, StatsAction.EXCEPTION);
        } finally {
            crawlerStatsHelper.done(statsKey);
        }
    }

    protected List getDriveItemPermissions(final Office365Client client,
            final Function, DriveRequestBuilder> builder, final DriveItem item) {
        final List permissions = new ArrayList<>();
        PermissionCollectionPage page = client.getDrivePermissions(builder, item.id);
        final Consumer consumer = p -> {
            if (p.grantedToV2 != null && p.grantedToV2.user != null) {
                assignPermission(client, permissions, p);
            }
        };
        page.getCurrentPage().forEach(consumer);
        while (page.getNextPage() != null) {
            page = page.getNextPage().buildRequest().get();
            page.getCurrentPage().forEach(consumer);
        }
        return permissions;
    }

    protected void assignPermission(final Office365Client client, final List permissions, final Permission permission) {
        final SystemHelper systemHelper = ComponentUtil.getSystemHelper();
        final String id = permission.grantedToV2.user.id;
        final String email = getUserEmail(permission);
        if (StringUtil.isNotBlank(email)) {
            final List idList = new ArrayList<>();
            if (StringUtil.isBlank(id)) {
                Collections.addAll(idList, client.getGroupIdsByEmail(email));
            } else {
                idList.add(id);
            }
            if (idList.isEmpty()) {
                permissions.add(systemHelper.getSearchRoleByUser(email));
                permissions.add(systemHelper.getSearchRoleByGroup(email));
            } else {
                idList.stream().forEach(i -> {
                    final UserType userType = client.getUserType(i);
                    switch (userType) {
                    case USER:
                        permissions.add(systemHelper.getSearchRoleByUser(email));
                        permissions.add(systemHelper.getSearchRoleByUser(i));
                        break;
                    case GROUP:
                        permissions.add(systemHelper.getSearchRoleByGroup(email));
                        permissions.add(systemHelper.getSearchRoleByGroup(i));
                        break;
                    default:
                        permissions.add(systemHelper.getSearchRoleByUser(email));
                        permissions.add(systemHelper.getSearchRoleByGroup(email));
                        permissions.add(systemHelper.getSearchRoleByUser(i));
                        permissions.add(systemHelper.getSearchRoleByGroup(i));
                        break;
                    }
                });
            }
        } else if (StringUtil.isNotBlank(id)) {
            final UserType userType = client.getUserType(id);
            switch (userType) {
            case USER:
                permissions.add(systemHelper.getSearchRoleByUser(id));
                break;
            case GROUP:
                permissions.add(systemHelper.getSearchRoleByGroup(id));
                break;
            default:
                permissions.add(systemHelper.getSearchRoleByUser(id));
                permissions.add(systemHelper.getSearchRoleByGroup(id));
                break;
            }
        } else if (logger.isDebugEnabled()) {
            logger.debug("No identity for permission.");
        }
    }

    protected String getUserEmail(final Permission permission) {
        if (permission.grantedToV2 != null && permission.grantedToV2.user != null && permission.grantedToV2.user.displayName != null) {
            // TODO email?
            return permission.grantedToV2.user.displayName;
        }
        return null;
    }

    protected String getUrl(final Map configMap, final DataStoreParams paramMap, final DriveItem item) {
        if (item.webUrl == null) {
            return null;
        }
        if (!item.webUrl.contains("/_layouts/")) {
            return item.webUrl;
        }

        final String baseUrl = item.webUrl.substring(0, item.webUrl.indexOf("/_layouts/"));
        final List pathList = new ArrayList<>();
        if (item.parentReference != null && item.parentReference.path != null) {
            final String[] values = item.parentReference.path.split(":", 2);
            if (values.length == 2) {
                for (final String s : values[1].split("/")) {
                    pathList.add(encodeUrl(s));
                }
            }
        }
        pathList.add(encodeUrl(item.name));
        final String path = pathList.stream().filter(StringUtil::isNotBlank).collect(Collectors.joining("/"));
        if (CRAWLER_TYPE_SHARED.equals(configMap.get(CURRENT_CRAWLER)) || CRAWLER_TYPE_GROUP.equals(configMap.get(CURRENT_CRAWLER))) {
            return baseUrl + "/Shared%20Documents/" + path;
        }
        if (CRAWLER_TYPE_DRIVE.equals(configMap.get(CURRENT_CRAWLER))) {
            final Drive drive = (Drive) configMap.get(DRIVE_INFO);
            return baseUrl + "/" + drive.name + "/" + path;
        }
        return baseUrl + "/Documents/" + path;
    }

    protected String encodeUrl(final String s) {
        if (StringUtil.isEmpty(s)) {
            return s;
        }
        try {
            return URLEncoder.encode(s, Constants.UTF_8).replace("+", "%20");
        } catch (final UnsupportedEncodingException e) {
            // ignore
            return s;
        }
    }

    protected String getDriveItemContents(final Office365Client client,
            final Function, DriveRequestBuilder> builder, final DriveItem item, final long maxContentLength,
            final boolean ignoreError) {
        if (item.file != null) {
            try (final InputStream in = client.getDriveContent(builder, item.id)) {
                return ComponentUtil.getExtractorFactory().builder(in, Collections.emptyMap()).filename(item.name)
                        .maxContentLength(maxContentLength).extractorName(extractorName).extract().getContent();
            } catch (final Exception e) {
                if (!ignoreError && !ComponentUtil.getFessConfig().isCrawlerIgnoreContentException()) {
                    throw new DataStoreCrawlingException(item.webUrl, "Failed to get contents: " + item.name, e);
                }
                if (logger.isDebugEnabled()) {
                    logger.warn("Failed to get contents: {}", item.name, e);
                } else {
                    logger.warn("Failed to get contents: {}. {}", item.name, e.getMessage());
                }
                return StringUtil.EMPTY;
            }
        }
        return StringUtil.EMPTY;
    }

    protected void getDriveItemsInDrive(final Office365Client client,
            final Function, DriveRequestBuilder> builder, final Consumer consumer) {
        getDriveItemChildren(client, builder, consumer, null);
    }

    protected void getDriveItemChildren(final Office365Client client,
            final Function, DriveRequestBuilder> builder, final Consumer consumer,
            final DriveItem item) {
        if (logger.isDebugEnabled()) {
            logger.debug("Current item: {}", item != null ? item.name + " -> " + item.webUrl : "root");
        }
        DriveItemCollectionPage page;
        try {
            if (item != null) {
                consumer.accept(item);
                if (item.folder == null) {
                    return;
                }
            }
            page = client.getDriveItemPage(builder, item != null ? item.id : null);
            page.getCurrentPage().forEach(child -> getDriveItemChildren(client, builder, consumer, child));
            while (page.getNextPage() != null) {
                try {
                    page = page.getNextPage().buildRequest().get();
                    page.getCurrentPage().forEach(child -> getDriveItemChildren(client, builder, consumer, child));
                } catch (final Exception e) {
                    if (logger.isDebugEnabled()) {
                        logger.debug("Failed to process a next page.", e);
                    }
                }
            }
        } catch (final GraphServiceException e) {
            if (e.getResponseCode() == 404) {
                logger.debug("Drive item is not found.", e);
            } else {
                logger.warn("Failed to access a drive item.", e);
            }
        } catch (final ClientException e) {
            logger.warn("Failed to access a drive item.", e);
        }
    }

    public void setExtractorName(final String extractorName) {
        this.extractorName = extractorName;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy