All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.sweble.wikitext.engine.utils.LanguageConfigGenerator Maven / Gradle / Ivy

Go to download

A minimal engine using the Sweble Wikitext Parser to process articles in the context of a MediaWiki-like configuration.

There is a newer version: 3.1.9
Show newest version
/**
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package org.sweble.wikitext.engine.utils;

import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collection;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.apache.commons.collections.map.MultiValueMap;
import org.sweble.wikitext.engine.config.I18nAliasImpl;
import org.sweble.wikitext.engine.config.InterwikiImpl;
import org.sweble.wikitext.engine.config.NamespaceImpl;
import org.sweble.wikitext.engine.config.WikiConfig;
import org.sweble.wikitext.engine.config.WikiConfigImpl;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;






/**
 * @author Samy Ateia, [email protected]
 *
 */
public class LanguageConfigGenerator
{
	
	public static WikiConfig generateWikiConfig(String siteName, String siteURL, String languagePrefix) throws IOException, ParserConfigurationException, SAXException
	{
		WikiConfigImpl wikiConfig = new WikiConfigImpl();
		wikiConfig.setSiteName(siteName);
		wikiConfig.setWikiUrl(siteURL);
		wikiConfig.setContentLang(languagePrefix);
		wikiConfig.setIwPrefix(languagePrefix);
		
		DefaultConfigEnWp config = new DefaultConfigEnWp();
		config.configureEngine(wikiConfig);
		
		addNamespaces(wikiConfig, wikiConfig.getInterwikiPrefix());
		addInterwikis(wikiConfig, wikiConfig.getInterwikiPrefix());
		addi18NAliases(wikiConfig, wikiConfig.getInterwikiPrefix());
		
		config.addParserFunctions(wikiConfig);	
		config.addTagExtensions(wikiConfig);
		
		
		return wikiConfig;
	}
	
	public static void addi18NAliases(WikiConfigImpl wikiConfig, String iwPrefix) throws IOException, ParserConfigurationException, SAXException
	{
		String urlString = "http://"
			+ iwPrefix
			+ ".wikipedia.org/w/api.php?action=query&meta=siteinfo&siprop=magicwords&format=xml";
		Document document = getXMLFromUlr(urlString);
		NodeList apiI18NAliases = document.getElementsByTagName("magicword");
		
		for(int i=0; i< apiI18NAliases.getLength(); i++)
		{
			Node apii18NAlias = apiI18NAliases.item(i);
			NamedNodeMap attributes = apii18NAlias.getAttributes();
			
			String name = attributes.getNamedItem("name").getNodeValue();			
			boolean iscaseSensitive = false;
			Node caseSensitive = attributes.getNamedItem("case-sensitive");
			if(caseSensitive != null)
			{
				iscaseSensitive = true;
			}
			
			Node aliasesNode = apii18NAlias.getFirstChild();
			NodeList aliasesList = aliasesNode.getChildNodes();
			ArrayList aliases = new ArrayList();
			for(int j = 0; j< aliasesList.getLength(); j++)
			{
				Node aliasNode = aliasesList.item(j);
				String aliasString = aliasNode.getTextContent();
				aliases.add(aliasString);
			}
			I18nAliasImpl I18Alias = new I18nAliasImpl(name, iscaseSensitive, aliases);
			try{
				wikiConfig.addI18nAlias(I18Alias);
			}catch(Exception e)
			{
				//TODO resolve conflicts problem
				e.printStackTrace();
			}
		}
	}
	
	public static void addInterwikis(WikiConfigImpl wikiConfig, String iwPrefix) throws IOException, ParserConfigurationException, SAXException
	{
		String urlString = "http://"
			+ iwPrefix
			+ ".wikipedia.org/w/api.php?action=query&meta=siteinfo&siprop=interwikimap&format=xml";
		Document document = getXMLFromUlr(urlString);
		NodeList apiInterwikis = document.getElementsByTagName("iw");
		
		
		for(int i=0; i< apiInterwikis.getLength(); i++)
		{
			Node apiInterWiki = apiInterwikis.item(i);
			NamedNodeMap attributes = apiInterWiki.getAttributes();
			
			String prefixString = attributes.getNamedItem("prefix").getNodeValue(); //same as prefix in api
			boolean isLocal = false; //if present set true else false
			Node localNode = attributes.getNamedItem("local");
			if(localNode != null)
			{
				isLocal = true;
			}
			boolean isTrans = false; //TODO always false?
			String urlStringApi = attributes.getNamedItem("url").getNodeValue();//same as url in api
			
			InterwikiImpl interwiki = new InterwikiImpl(prefixString,urlStringApi,isLocal,isTrans);
			wikiConfig.addInterwiki(interwiki);
		}
	}

	public static void addNamespaces(WikiConfigImpl wikiConfig, String iwPrefix) throws IOException,
		ParserConfigurationException, SAXException
	{
		String urlString = "http://"
			+ iwPrefix
			+ ".wikipedia.org/w/api.php?action=query&meta=siteinfo&siprop=namespaces&format=xml";
		Document document = getXMLFromUlr(urlString);
		NodeList apiNamespaces = document.getElementsByTagName("ns");
		MultiValueMap namespaceAliases = getNamespaceAliases(iwPrefix);
				
		for (int i = 0; i < apiNamespaces.getLength(); i++)
		{
			Node apiNamespace = apiNamespaces.item(i);
			String name = apiNamespace.getTextContent();
			NamedNodeMap attributes = apiNamespace.getAttributes();
			Integer id = new Integer(attributes.getNamedItem("id").getNodeValue());
			String canonical = "";
			if (attributes.getNamedItem("canonical") != null)
			{
				canonical = attributes.getNamedItem("canonical").getNodeValue();
			}
			
			boolean fileNs = false;
			if (canonical.equals("File"))
			{
				fileNs = true;
			}

			Node subpages = attributes.getNamedItem("subpages");
			boolean canHaveSubpages = false;
			if (subpages != null)
			{
				canHaveSubpages = true;
			}
			
			Collection aliases = new ArrayList();
			if(namespaceAliases.containsKey(id))
			{
				@SuppressWarnings("unchecked")
				Collection tmp = (Collection) namespaceAliases.getCollection(id);
				aliases = tmp;	
			}
			
			NamespaceImpl namespace = new NamespaceImpl(id.intValue(), name, canonical, canHaveSubpages, fileNs, aliases);
			wikiConfig.addNamespace(namespace);

			
			if (canonical.equals("Template"))
			{
				wikiConfig.setTemplateNamespace(namespace);
			}else if(id.intValue() == 0)
			{
				wikiConfig.setDefaultNamespace(namespace);
			}

		}
	}
	
	public static MultiValueMap getNamespaceAliases(String iwPrefix) throws IOException, ParserConfigurationException, SAXException
	{
		String urlString = "http://"
			+ iwPrefix
			+ ".wikipedia.org/w/api.php?action=query&meta=siteinfo&siprop=namespacealiases&format=xml";
		
		Document document = getXMLFromUlr(urlString);
		NodeList namespaceAliasess = document.getElementsByTagName("ns");
		MultiValueMap namespaces = new MultiValueMap();
		
		for(int i = 0; i< namespaceAliasess.getLength(); i++)
		{
			Node aliasNode = namespaceAliasess.item(i);
			NamedNodeMap attributes = aliasNode.getAttributes();
			
			Integer id = new Integer(attributes.getNamedItem("id").getNodeValue());
			String aliasString = aliasNode.getTextContent();
			namespaces.put(id, aliasString);
		}
		return namespaces;
	}
	
	public static Document getXMLFromUlr(String urlString) throws IOException, ParserConfigurationException, SAXException
	{
		URL url = new URL(urlString);
		URLConnection connection = url.openConnection();
		DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory
			.newInstance();
		DocumentBuilder docBuilder = documentBuilderFactory
			.newDocumentBuilder();
		Document document = docBuilder.parse(connection.getInputStream());
		return document;
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy