
com.liferay.wiki.internal.importer.MediaWikiImporter Maven / Gradle / Ivy
/**
* SPDX-FileCopyrightText: (c) 2000 Liferay, Inc. https://liferay.com
* SPDX-License-Identifier: LGPL-2.1-or-later OR LicenseRef-Liferay-DXP-EULA-2.0.0-2023-06
*/
package com.liferay.wiki.internal.importer;
import com.liferay.asset.kernel.model.AssetTag;
import com.liferay.asset.kernel.service.AssetTagLocalService;
import com.liferay.asset.util.AssetHelper;
import com.liferay.document.library.kernel.util.DLValidatorUtil;
import com.liferay.petra.string.CharPool;
import com.liferay.petra.string.StringBundler;
import com.liferay.petra.string.StringPool;
import com.liferay.portal.kernel.exception.PortalException;
import com.liferay.portal.kernel.exception.SystemException;
import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
import com.liferay.portal.kernel.log.Log;
import com.liferay.portal.kernel.log.LogFactoryUtil;
import com.liferay.portal.kernel.model.Company;
import com.liferay.portal.kernel.model.User;
import com.liferay.portal.kernel.portletfilerepository.PortletFileRepository;
import com.liferay.portal.kernel.repository.model.FileEntry;
import com.liferay.portal.kernel.service.CompanyLocalService;
import com.liferay.portal.kernel.service.ServiceContext;
import com.liferay.portal.kernel.service.UserLocalService;
import com.liferay.portal.kernel.util.GroupThreadLocal;
import com.liferay.portal.kernel.util.ListUtil;
import com.liferay.portal.kernel.util.MapUtil;
import com.liferay.portal.kernel.util.MimeTypesUtil;
import com.liferay.portal.kernel.util.ObjectValuePair;
import com.liferay.portal.kernel.util.ProgressTracker;
import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
import com.liferay.portal.kernel.util.SetUtil;
import com.liferay.portal.kernel.util.StringUtil;
import com.liferay.portal.kernel.util.Validator;
import com.liferay.portal.kernel.xml.Attribute;
import com.liferay.portal.kernel.xml.Document;
import com.liferay.portal.kernel.xml.DocumentException;
import com.liferay.portal.kernel.xml.Element;
import com.liferay.portal.kernel.xml.SAXReaderUtil;
import com.liferay.portal.kernel.zip.ZipReader;
import com.liferay.portal.kernel.zip.ZipReaderFactory;
import com.liferay.wiki.configuration.WikiGroupServiceConfiguration;
import com.liferay.wiki.constants.WikiPageConstants;
import com.liferay.wiki.constants.WikiWebKeys;
import com.liferay.wiki.exception.ImportFilesException;
import com.liferay.wiki.exception.NoSuchPageException;
import com.liferay.wiki.internal.translator.MediaWikiToCreoleTranslator;
import com.liferay.wiki.model.WikiNode;
import com.liferay.wiki.model.WikiPage;
import com.liferay.wiki.service.WikiPageLocalService;
import com.liferay.wiki.validator.WikiPageTitleValidator;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author Alvaro del Castillo
* @author Jorge Ferrer
*/
public class MediaWikiImporter {
public MediaWikiImporter(
AssetTagLocalService assetTagLocalService,
CompanyLocalService companyLocalService,
PortletFileRepository portletFileRepository,
UserLocalService userLocalService,
WikiGroupServiceConfiguration wikiGroupServiceConfiguration,
WikiPageLocalService wikiPageLocalService,
WikiPageTitleValidator wikiPageTitleValidator,
ZipReaderFactory zipReaderFactory) {
_assetTagLocalService = assetTagLocalService;
_companyLocalService = companyLocalService;
_portletFileRepository = portletFileRepository;
_userLocalService = userLocalService;
_wikiGroupServiceConfiguration = wikiGroupServiceConfiguration;
_wikiPageLocalService = wikiPageLocalService;
_wikiPageTitleValidator = wikiPageTitleValidator;
_zipReaderFactory = zipReaderFactory;
}
public void importPages(
long userId, WikiNode node, InputStream[] inputStreams,
Map options)
throws PortalException {
if ((inputStreams.length < 1) || (inputStreams[0] == null)) {
throw new PortalException("The pages file is mandatory");
}
InputStream pagesInputStream = inputStreams[0];
InputStream usersInputStream = null;
if (inputStreams.length > 1) {
usersInputStream = inputStreams[1];
}
InputStream imagesInputStream = null;
if (inputStreams.length > 2) {
imagesInputStream = inputStreams[2];
}
try {
Document document = SAXReaderUtil.read(pagesInputStream);
Map usersMap = _readUsersFile(usersInputStream);
Element rootElement = document.getRootElement();
List specialNamespaces = _readSpecialNamespaces(
rootElement);
_processImages(userId, node, imagesInputStream);
_processSpecialPages(userId, node, rootElement, specialNamespaces);
_processRegularPages(
userId, node, rootElement, specialNamespaces, usersMap,
imagesInputStream, options);
_moveFrontPage(userId, node, options);
}
catch (DocumentException documentException) {
throw new ImportFilesException(
"Invalid XML file provided", documentException);
}
catch (IOException ioException) {
throw new ImportFilesException(
"Error reading the files provided", ioException);
}
catch (PortalException portalException) {
throw portalException;
}
catch (Exception exception) {
throw new PortalException(exception);
}
}
private String _getCreoleRedirectContent(String redirectTitle) {
return StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
StringPool.DOUBLE_CLOSE_BRACKET;
}
private long _getUserId(
long userId, WikiNode node, String author,
Map usersMap) {
User user = null;
String emailAddress = usersMap.get(author);
if (Validator.isNotNull(emailAddress)) {
user = _userLocalService.fetchUserByEmailAddress(
node.getCompanyId(), emailAddress);
}
else {
user = _userLocalService.fetchUserByScreenName(
node.getCompanyId(), StringUtil.toLowerCase(author));
}
if (user != null) {
return user.getUserId();
}
return userId;
}
private void _importPage(
long userId, String author, WikiNode node, String title,
String content, String summary, Map usersMap,
boolean strictImportMode)
throws PortalException {
try {
long authorUserId = _getUserId(userId, node, author, usersMap);
String parentTitle = _readParentTitle(content);
String redirectTitle = _readRedirectTitle(content);
if (Validator.isNotNull(redirectTitle)) {
content = _getCreoleRedirectContent(redirectTitle);
}
else {
content = _translateMediaWikiToCreole(
content, strictImportMode);
content = _translateMediaLinks(node, content);
}
ServiceContext serviceContext = new ServiceContext();
serviceContext.setAddGroupPermissions(true);
serviceContext.setAddGuestPermissions(true);
serviceContext.setAssetTagNames(
_readAssetTagNames(userId, node, content));
WikiPage page = null;
try {
page = _wikiPageLocalService.getPage(node.getNodeId(), title);
}
catch (NoSuchPageException noSuchPageException) {
if (_log.isDebugEnabled()) {
_log.debug(noSuchPageException);
}
page = _wikiPageLocalService.addPage(
authorUserId, node.getNodeId(), title,
WikiPageConstants.NEW, null, true, serviceContext);
}
_wikiPageLocalService.updatePage(
authorUserId, node.getNodeId(), title, page.getVersion(),
content, summary, true, "creole", parentTitle, redirectTitle,
serviceContext);
}
catch (Exception exception) {
throw new PortalException(
"Error importing page " + title, exception);
}
}
private boolean _isSpecialMediaWikiPage(
String title, List specialNamespaces) {
for (String namespace : specialNamespaces) {
if (title.startsWith(namespace + StringPool.COLON)) {
return true;
}
}
return false;
}
private boolean _isValidImage(String[] paths, InputStream inputStream) {
if (_specialMediaWikiDirs.contains(paths[0]) ||
((paths.length > 1) && _specialMediaWikiDirs.contains(paths[1]))) {
return false;
}
String fileName = paths[paths.length - 1];
try {
DLValidatorUtil.validateFileName(fileName);
DLValidatorUtil.validateFileExtension(fileName);
DLValidatorUtil.validateFileSize(
GroupThreadLocal.getGroupId(), fileName,
MimeTypesUtil.getContentType(fileName), inputStream);
}
catch (PortalException | SystemException exception) {
// LPS-52675
if (_log.isDebugEnabled()) {
_log.debug(exception);
}
return false;
}
return true;
}
private void _moveFrontPage(
long userId, WikiNode node, Map options) {
String frontPageTitle = MapUtil.getString(
options, WikiWebKeys.OPTIONS_FRONT_PAGE);
if (Validator.isNull(frontPageTitle)) {
return;
}
frontPageTitle = _wikiPageTitleValidator.normalize(frontPageTitle);
try {
int count = _wikiPageLocalService.getPagesCount(
node.getNodeId(), frontPageTitle, true);
if (count > 0) {
ServiceContext serviceContext = new ServiceContext();
serviceContext.setAddGroupPermissions(true);
serviceContext.setAddGuestPermissions(true);
_wikiPageLocalService.renamePage(
userId, node.getNodeId(), frontPageTitle,
_wikiGroupServiceConfiguration.frontPageName(), false,
serviceContext);
}
}
catch (Exception exception) {
if (_log.isWarnEnabled()) {
_log.warn(
StringBundler.concat(
"Could not move ",
_wikiGroupServiceConfiguration.frontPageName(),
" to the title provided: ", frontPageTitle),
exception);
}
}
}
private String _normalize(String categoryName, int length) {
categoryName = _toWord(categoryName.trim());
return StringUtil.shorten(categoryName, length);
}
private void _processImages(
long userId, WikiNode node, InputStream imagesInputStream)
throws Exception {
if (imagesInputStream == null) {
return;
}
ZipReader zipReader = _zipReaderFactory.getZipReader(imagesInputStream);
List entries = zipReader.getEntries();
if (entries == null) {
throw new ImportFilesException();
}
ProgressTracker progressTracker =
ProgressTrackerThreadLocal.getProgressTracker();
int count = 0;
int total = entries.size();
if (total > 0) {
try {
_wikiPageLocalService.getPage(
node.getNodeId(), WikiPageConstants.SHARED_IMAGES_TITLE);
}
catch (NoSuchPageException noSuchPageException) {
if (_log.isDebugEnabled()) {
_log.debug(noSuchPageException);
}
ServiceContext serviceContext = new ServiceContext();
serviceContext.setAddGroupPermissions(true);
serviceContext.setAddGuestPermissions(true);
_wikiPageLocalService.addPage(
userId, node.getNodeId(),
WikiPageConstants.SHARED_IMAGES_TITLE, "See attachments",
null, true, serviceContext);
}
}
List> inputStreamOVPs =
new ArrayList<>();
try {
int percentage = 50;
for (int i = 0; i < entries.size(); i++) {
String entry = entries.get(i);
String key = entry;
InputStream inputStream = zipReader.getEntryAsInputStream(
entry);
String[] paths = StringUtil.split(key, CharPool.SLASH);
if (!_isValidImage(paths, inputStream)) {
if (_log.isInfoEnabled()) {
_log.info("Ignoring " + key);
}
continue;
}
String fileName = StringUtil.toLowerCase(
paths[paths.length - 1]);
ObjectValuePair inputStreamOVP =
new ObjectValuePair<>(fileName, inputStream);
inputStreamOVPs.add(inputStreamOVP);
count++;
if ((i % 5) == 0) {
_wikiPageLocalService.addPageAttachments(
userId, node.getNodeId(),
WikiPageConstants.SHARED_IMAGES_TITLE, inputStreamOVPs);
inputStreamOVPs.clear();
if (progressTracker != null) {
percentage = Math.min(50 + ((i * 50) / total), 99);
progressTracker.setPercent(percentage);
}
}
}
if (!inputStreamOVPs.isEmpty()) {
_wikiPageLocalService.addPageAttachments(
userId, node.getNodeId(),
WikiPageConstants.SHARED_IMAGES_TITLE, inputStreamOVPs);
}
}
finally {
for (ObjectValuePair inputStreamOVP :
inputStreamOVPs) {
try (InputStream inputStream = inputStreamOVP.getValue()) {
}
catch (IOException ioException) {
if (_log.isWarnEnabled()) {
_log.warn(ioException);
}
}
}
}
zipReader.close();
if (_log.isInfoEnabled()) {
_log.info(
StringBundler.concat(
"Imported ", count, " images into ", node.getName()));
}
}
private void _processRegularPages(
long userId, WikiNode node, Element rootElement,
List specialNamespaces, Map usersMap,
InputStream imagesInputStream, Map options) {
boolean importLatestVersion = MapUtil.getBoolean(
options, WikiWebKeys.OPTIONS_IMPORT_LATEST_VERSION);
boolean strictImportMode = MapUtil.getBoolean(
options, WikiWebKeys.OPTIONS_STRICT_IMPORT_MODE);
ProgressTracker progressTracker =
ProgressTrackerThreadLocal.getProgressTracker();
int count = 0;
int percentage = 10;
int maxPercentage = 50;
if (imagesInputStream == null) {
maxPercentage = 99;
}
List pageElements = rootElement.elements("page");
for (int i = 0; i < pageElements.size(); i++) {
Element pageElement = pageElements.get(i);
String title = pageElement.elementText("title");
if (_isSpecialMediaWikiPage(title, specialNamespaces)) {
continue;
}
title = _wikiPageTitleValidator.normalize(title);
percentage = Math.min(
10 + ((i * (maxPercentage - percentage)) / pageElements.size()),
maxPercentage);
progressTracker.setPercent(percentage);
List revisionElements = pageElement.elements("revision");
if (importLatestVersion) {
Element lastRevisionElement = revisionElements.get(
revisionElements.size() - 1);
revisionElements = new ArrayList<>();
revisionElements.add(lastRevisionElement);
}
for (Element revisionElement : revisionElements) {
Element contributorElement = revisionElement.element(
"contributor");
String author = contributorElement.elementText("username");
String content = revisionElement.elementText("text");
String summary = revisionElement.elementText("comment");
try {
_importPage(
userId, author, node, title, content, summary, usersMap,
strictImportMode);
}
catch (Exception exception) {
if (_log.isWarnEnabled()) {
_log.warn(
"Page with title " + title +
" could not be imported",
exception);
}
}
}
count++;
}
if (_log.isInfoEnabled()) {
_log.info(
StringBundler.concat(
"Imported ", count, " pages into ", node.getName()));
}
}
private void _processSpecialPages(
long userId, WikiNode node, Element rootElement,
List specialNamespaces)
throws PortalException {
ProgressTracker progressTracker =
ProgressTrackerThreadLocal.getProgressTracker();
List pageElements = rootElement.elements("page");
for (int i = 0; i < pageElements.size(); i++) {
Element pageElement = pageElements.get(i);
String title = pageElement.elementText("title");
if (!title.startsWith("Category:")) {
if (_isSpecialMediaWikiPage(title, specialNamespaces)) {
rootElement.remove(pageElement);
}
continue;
}
String categoryName = title.substring("Category:".length());
categoryName = _normalize(categoryName, 75);
_assetTagLocalService.checkTags(
userId, node.getGroupId(), new String[] {categoryName});
if ((i % 5) == 0) {
progressTracker.setPercent((i * 10) / pageElements.size());
}
}
}
private String[] _readAssetTagNames(
long userId, WikiNode node, String content)
throws PortalException {
Matcher matcher = _categoriesPattern.matcher(content);
List assetTagNames = new ArrayList<>();
while (matcher.find()) {
String categoryName = matcher.group(1);
categoryName = _normalize(categoryName, 75);
List assetTags = _assetTagLocalService.checkTags(
userId, node.getGroupId(), new String[] {categoryName});
assetTagNames.addAll(
ListUtil.toList(assetTags, AssetTag.NAME_ACCESSOR));
}
if (content.contains(_WORK_IN_PROGRESS)) {
assetTagNames.add(_WORK_IN_PROGRESS_TAG);
}
return assetTagNames.toArray(new String[0]);
}
private String _readParentTitle(String content) {
Matcher matcher = _parentPattern.matcher(content);
String redirectTitle = StringPool.BLANK;
if (matcher.find()) {
redirectTitle = matcher.group(1);
redirectTitle = _wikiPageTitleValidator.normalize(redirectTitle);
redirectTitle += " (disambiguation)";
}
return redirectTitle;
}
private String _readRedirectTitle(String content) {
Matcher matcher = _redirectPattern.matcher(content);
String redirectTitle = StringPool.BLANK;
if (matcher.find()) {
redirectTitle = matcher.group(1);
redirectTitle = _wikiPageTitleValidator.normalize(redirectTitle);
}
return redirectTitle;
}
private List _readSpecialNamespaces(Element root)
throws ImportFilesException {
Element siteinfoElement = root.element("siteinfo");
if (siteinfoElement == null) {
throw new ImportFilesException("Invalid pages XML file");
}
List namespaces = new ArrayList<>();
Element namespacesElement = siteinfoElement.element("namespaces");
List namespaceElements = namespacesElement.elements(
"namespace");
for (Element namespaceElement : namespaceElements) {
Attribute attribute = namespaceElement.attribute("key");
String value = attribute.getValue();
if (!value.equals("0")) {
namespaces.add(namespaceElement.getText());
}
}
return namespaces;
}
private Map _readUsersFile(InputStream usersInputStream)
throws IOException {
if (usersInputStream == null) {
return Collections.emptyMap();
}
Map usersMap = new HashMap<>();
UnsyncBufferedReader unsyncBufferedReader = new UnsyncBufferedReader(
new InputStreamReader(usersInputStream));
String line = unsyncBufferedReader.readLine();
while (line != null) {
String[] array = StringUtil.split(line);
if ((array.length == 2) && Validator.isNotNull(array[0]) &&
Validator.isNotNull(array[1])) {
usersMap.put(array[0], array[1]);
}
else {
if (_log.isInfoEnabled()) {
_log.info(
"Ignoring line " + line +
" because it does not contain exactly 2 columns");
}
}
line = unsyncBufferedReader.readLine();
}
return usersMap;
}
private String _toWord(String text) {
if (Validator.isNull(text)) {
return text;
}
char[] textCharArray = text.toCharArray();
for (int i = 0; i < textCharArray.length; i++) {
char c = textCharArray[i];
for (char invalidChar : AssetHelper.INVALID_CHARACTERS) {
if (c == invalidChar) {
textCharArray[i] = CharPool.SPACE;
break;
}
}
}
return new String(textCharArray);
}
private String _translateMediaLinks(WikiNode node, String content) {
try {
StringBuffer sb = new StringBuffer();
WikiPage sharedImagesPage = _wikiPageLocalService.getPage(
node.getNodeId(), WikiPageConstants.SHARED_IMAGES_TITLE);
Company company = _companyLocalService.getCompany(
node.getCompanyId());
String portalURL = company.getPortalURL(node.getGroupId());
Matcher matcher = _mediaLinkPattern.matcher(content);
while (matcher.find()) {
String fileName = matcher.group(2);
FileEntry fileEntry =
_portletFileRepository.fetchPortletFileEntry(
node.getGroupId(),
sharedImagesPage.getAttachmentsFolderId(), fileName);
if (fileEntry == null) {
matcher.appendReplacement(sb, matcher.group());
continue;
}
String fileEntryURL =
_portletFileRepository.getPortletFileEntryURL(
null, fileEntry, StringPool.BLANK);
String linkLabel = matcher.group(3);
if (linkLabel == null) {
linkLabel = StringPool.PIPE + fileName;
}
matcher.appendReplacement(
sb,
StringBundler.concat(
"[[", portalURL, fileEntryURL, linkLabel, "]]"));
}
matcher.appendTail(sb);
return sb.toString();
}
catch (PortalException portalException) {
if (_log.isWarnEnabled()) {
_log.warn(portalException);
}
return content;
}
}
private String _translateMediaWikiToCreole(
String content, boolean strictImportMode) {
_translator.setStrictImportMode(strictImportMode);
return _translator.translate(content);
}
private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
private static final Log _log = LogFactoryUtil.getLog(
MediaWikiImporter.class);
private static final Pattern _categoriesPattern = Pattern.compile(
"\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
private static final Pattern _mediaLinkPattern = Pattern.compile(
"\\[\\[(Media:)([^\\]\\|]*)(\\|[^\\]]*)?\\]\\]", Pattern.DOTALL);
private static final Pattern _parentPattern = Pattern.compile(
"\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
private static final Pattern _redirectPattern = Pattern.compile(
"#REDIRECT \\[\\[([^\\]]*)\\]\\]");
private static final Set _specialMediaWikiDirs = SetUtil.fromArray(
"archive", "temp", "thumb");
private final AssetTagLocalService _assetTagLocalService;
private final CompanyLocalService _companyLocalService;
private final PortletFileRepository _portletFileRepository;
private final MediaWikiToCreoleTranslator _translator =
new MediaWikiToCreoleTranslator();
private final UserLocalService _userLocalService;
private volatile WikiGroupServiceConfiguration
_wikiGroupServiceConfiguration;
private final WikiPageLocalService _wikiPageLocalService;
private final WikiPageTitleValidator _wikiPageTitleValidator;
private final ZipReaderFactory _zipReaderFactory;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy