All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.wikipedia.Wiki Maven / Gradle / Ivy

Go to download

Java library to call Mediawiki API described at http://www.mediawiki.org/wiki/API:Main_page

There is a newer version: 0.2.3
Show newest version
/**
 *
 * This file is part of the https://github.com/WolfgangFahl/Mediawiki-Japi open source project
 *
 * Copyright 2015-2019 BITPlan GmbH https://github.com/BITPlan
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 *
 *  You may obtain a copy of the License at
 *
 *  http:www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.wikipedia;

import java.io.*;
import java.net.*;
import java.text.Normalizer;
import java.util.*;
import java.util.logging.*;
import java.util.zip.GZIPInputStream;
import javax.security.auth.login.*;

/**
 *  This is a somewhat sketchy bot framework for editing MediaWiki wikis.
 *  Requires JDK 1.7 or greater. Uses the MediaWiki API for most
 *  operations. It is recommended that the server runs the latest version
 *  of MediaWiki (1.23), otherwise some functions may not work.
 *  

* Extended documentation is available * here. * All wikilinks are relative to the English Wikipedia and all timestamps are in * your wiki's time zone. *

* Please file bug reports here * or at the Github issue tracker. * * @author MER-C and contributors * @version 0.30 */ public class Wiki implements Serializable { // Master TODO list: // *Admin stuff // *More multiqueries // *Generators (hard) // NAMESPACES /** * Denotes the namespace of images and media, such that there is no * description page. Uses the "Media:" prefix. * @see #FILE_NAMESPACE * @since 0.03 */ public static final int MEDIA_NAMESPACE = -2; /** * Denotes the namespace of pages with the "Special:" prefix. Note * that many methods dealing with special pages may spew due to * raw content not being available. * @since 0.03 */ public static final int SPECIAL_NAMESPACE = -1; /** * Denotes the main namespace, with no prefix. * @since 0.03 */ public static final int MAIN_NAMESPACE = 0; /** * Denotes the namespace for talk pages relating to the main * namespace, denoted by the prefix "Talk:". * @since 0.03 */ public static final int TALK_NAMESPACE = 1; /** * Denotes the namespace for user pages, given the prefix "User:". * @since 0.03 */ public static final int USER_NAMESPACE = 2; /** * Denotes the namespace for user talk pages, given the prefix * "User talk:". * @since 0.03 */ public static final int USER_TALK_NAMESPACE = 3; /** * Denotes the namespace for pages relating to the project, * with prefix "Project:". It also goes by the name of whatever * the project name was. * @since 0.03 */ public static final int PROJECT_NAMESPACE = 4; /** * Denotes the namespace for talk pages relating to project * pages, with prefix "Project talk:". It also goes by the name * of whatever the project name was, + "talk:". * @since 0.03 */ public static final int PROJECT_TALK_NAMESPACE = 5; /** * Denotes the namespace for file description pages. Has the prefix * "File:". Do not create these directly, use upload() instead. * @see #MEDIA_NAMESPACE * @since 0.25 */ public static final int FILE_NAMESPACE = 6; /** * Denotes talk pages for file description pages. Has the prefix * "File talk:". * @since 0.25 */ public static final int FILE_TALK_NAMESPACE = 7; /** * Denotes the namespace for (wiki) system messages, given the prefix * "MediaWiki:". * @since 0.03 */ public static final int MEDIAWIKI_NAMESPACE = 8; /** * Denotes the namespace for talk pages relating to system messages, * given the prefix "MediaWiki talk:". * @since 0.03 */ public static final int MEDIAWIKI_TALK_NAMESPACE = 9; /** * Denotes the namespace for templates, given the prefix "Template:". * @since 0.03 */ public static final int TEMPLATE_NAMESPACE = 10; /** * Denotes the namespace for talk pages regarding templates, given * the prefix "Template talk:". * @since 0.03 */ public static final int TEMPLATE_TALK_NAMESPACE = 11; /** * Denotes the namespace for help pages, given the prefix "Help:". * @since 0.03 */ public static final int HELP_NAMESPACE = 12; /** * Denotes the namespace for talk pages regarding help pages, given * the prefix "Help talk:". * @since 0.03 */ public static final int HELP_TALK_NAMESPACE = 13; /** * Denotes the namespace for category description pages. Has the * prefix "Category:". * @since 0.03 */ public static final int CATEGORY_NAMESPACE = 14; /** * Denotes the namespace for talk pages regarding categories. Has the * prefix "Category talk:". * @since 0.03 */ public static final int CATEGORY_TALK_NAMESPACE = 15; /** * Denotes all namespaces. * @since 0.03 */ public static final int ALL_NAMESPACES = 0x09f91102; // LOG TYPES /** * Denotes all logs. * @since 0.06 */ public static final String ALL_LOGS = ""; /** * Denotes the user creation log. * @since 0.06 */ public static final String USER_CREATION_LOG = "newusers"; /** * Denotes the upload log. * @since 0.06 */ public static final String UPLOAD_LOG = "upload"; /** * Denotes the deletion log. * @since 0.06 */ public static final String DELETION_LOG = "delete"; /** * Denotes the move log. * @since 0.06 */ public static final String MOVE_LOG = "move"; /** * Denotes the block log. * @since 0.06 */ public static final String BLOCK_LOG = "block"; /** * Denotes the protection log. * @since 0.06 */ public static final String PROTECTION_LOG = "protect"; /** * Denotes the user rights log. * @since 0.06 */ public static final String USER_RIGHTS_LOG = "rights"; /** * Denotes the user renaming log. * @since 0.06 */ public static final String USER_RENAME_LOG = "renameuser"; /** * Denotes the page importation log. * @since 0.08 */ public static final String IMPORT_LOG = "import"; /** * Denotes the edit patrol log. * @since 0.08 */ public static final String PATROL_LOG = "patrol"; // PROTECTION LEVELS /** * Denotes a non-protected page. * @since 0.09 */ public static final String NO_PROTECTION = "all"; /** * Denotes semi-protection (i.e. only autoconfirmed users can perform a * particular action). * @since 0.09 */ public static final String SEMI_PROTECTION = "autoconfirmed"; /** * Denotes full protection (i.e. only admins can perfom a particular action). * @since 0.09 */ public static final String FULL_PROTECTION = "sysop"; // ASSERTION MODES /** * Use no assertions (i.e. 0). * @see #setAssertionMode * @since 0.11 */ public static final int ASSERT_NONE = 0; /** * Assert that we are logged in (i.e. 1). This is checked every action. * @see #setAssertionMode * @since 0.30 */ public static final int ASSERT_USER = 1; /** * Assert that we have a bot flag (i.e. 2). This is checked every action. * @see #setAssertionMode * @since 0.11 */ public static final int ASSERT_BOT = 2; /** * Assert that we have no new messages. Not defined officially, but * some bots have this. This is checked intermittently. * @see #setAssertionMode * @since 0.11 */ public static final int ASSERT_NO_MESSAGES = 4; /** * Assert that we have a sysop flag (i.e. 8). This is checked intermittently. * @see #setAssertionMode * @since 0.30 */ public static final int ASSERT_SYSOP = 8; // RC OPTIONS /** * In queries against the recent changes table, this would mean we don't * fetch anonymous edits. * @since 0.20 */ public static final int HIDE_ANON = 1; /** * In queries against the recent changes table, this would mean we don't * fetch edits made by bots. * @since 0.20 */ public static final int HIDE_BOT = 2; /** * In queries against the recent changes table, this would mean we don't * fetch by the logged in user. * @since 0.20 */ public static final int HIDE_SELF = 4; /** * In queries against the recent changes table, this would mean we don't * fetch minor edits. * @since 0.20 */ public static final int HIDE_MINOR = 8; /** * In queries against the recent changes table, this would mean we don't * fetch patrolled edits. * @since 0.20 */ public static final int HIDE_PATROLLED = 16; // REVISION OPTIONS /** * In Revision.diff(), denotes the next revision. * @see org.wikipedia.Wiki.Revision#diff(org.wikipedia.Wiki.Revision) * @since 0.21 */ public static final long NEXT_REVISION = -1L; /** * In Revision.diff(), denotes the current revision. * @see org.wikipedia.Wiki.Revision#diff(org.wikipedia.Wiki.Revision) * @since 0.21 */ public static final long CURRENT_REVISION = -2L; /** * In Revision.diff(), denotes the previous revision. * @see org.wikipedia.Wiki.Revision#diff(org.wikipedia.Wiki.Revision) * @since 0.21 */ public static final long PREVIOUS_REVISION = -3L; /** * The list of options the user can specify for his/her gender. * @since 0.24 */ public enum Gender { // These names come from the MW API so we can use valueOf() and // toString() without any fidgets whatsoever. Java naming conventions // aren't worth another 20 lines of code. /** * The user self-identifies as a male. * @since 0.24 */ male, /** * The user self-identifies as a female. * @since 0.24 */ female, /** * The user has not specified a gender in preferences. * @since 0.24 */ unknown; } private static final String version = "0.30"; // the domain of the wiki private String domain; protected String query, base, apiUrl; protected String scriptPath = "/w"; private boolean wgCapitalLinks = true; private String timezone = "UTC"; // user management private Map cookies = new HashMap<>(12); User user; private int statuscounter = 0; // various caches private transient LinkedHashMap namespaces = null; private transient List watchlist = null; // preferences private int max = 500; private int slowmax = 50; private int throttle = 10000; // throttle private int maxlag = 5; private int assertion = ASSERT_NONE; // assertion mode private transient int statusinterval = 100; // status check private String useragent = "Wiki.java " + version + "(https://github.com/MER-C/wiki-java/)"; private boolean zipped = false; private boolean markminor = false, markbot = false; private boolean resolveredirect = false; private String protocol = "https://"; private Level loglevel = Level.ALL; private static final Logger logger = Logger.getLogger("wiki"); // retry flag private boolean retry = true; // serial version private static final long serialVersionUID = -8745212681497643456L; // time to open a connection private static final int CONNECTION_CONNECT_TIMEOUT_MSEC = 30000; // 30 seconds // time for the read to take place. (needs to be longer, some connections are slow // and the data volume is large!) private static final int CONNECTION_READ_TIMEOUT_MSEC = 180000; // 180 seconds // log2(upload chunk size). Default = 22 => upload size = 4 MB. Disable // chunked uploads by setting a large value here (50 = 1 PB will do). private static final int LOG2_CHUNK_SIZE = 22; // CONSTRUCTORS AND CONFIGURATION /** * Creates a new connection to the English Wikipedia via HTTPS. * @since 0.02 */ public Wiki() { this("en.wikipedia.org", "/w"); } /** * Creates a new connection to a wiki via HTTPS. WARNING: if the wiki uses * a $wgScriptpath other than the default /w, you need to call * getScriptPath() to automatically set it. Alternatively, you * can use the constructor below if you know it in advance. * * @param domain the wiki domain name e.g. en.wikipedia.org (defaults to * en.wikipedia.org) */ public Wiki(String domain) { this(domain, "/w"); } /** * Creates a new connection to a wiki with $wgScriptpath set to * scriptPath via HTTPS. * * @param domain the wiki domain name * @param scriptPath the script path * @since 0.14 */ public Wiki(String domain, String scriptPath) { this(domain, scriptPath, "https://"); } /** * Creates a new connection to a wiki with $wgScriptpath set to * scriptPath via the specified protocol. * * @param domain the wiki domain name * @param scriptPath the script path * @param protocol a protocol e.g. "http://", "https://" or "file:///" * @since 0.31 */ public Wiki(String domain, String scriptPath, String protocol) { if (domain == null || domain.isEmpty()) domain = "en.wikipedia.org"; this.domain = domain; this.scriptPath = scriptPath; this.protocol = protocol; // init variables // This is fine as long as you do not have parameters other than domain // and scriptpath in constructors and do not do anything else than super(x)! // http://stackoverflow.com/questions/3404301/whats-wrong-with-overridable-method-calls-in-constructors // TODO: make this more sane. logger.setLevel(loglevel); log(Level.CONFIG, "", "Using Wiki.java " + version); initVars(); } /** * Edit this if you need to change the API and human interface url * configuration of the wiki. One example use is server-side cache * management (maxage and smaxage API parameters). * *

Contributed by Tedder * @since 0.24 */ protected void initVars() { StringBuilder basegen = new StringBuilder(protocol); basegen.append(domain); basegen.append(scriptPath); StringBuilder apigen = new StringBuilder(basegen); apigen.append("/api.php?format=xml&rawcontinue=1&"); // MediaWiki has inbuilt maxlag functionality, see [[mw:Manual:Maxlag // parameter]]. Let's exploit it. if (maxlag >= 0) { apigen.append("maxlag="); apigen.append(maxlag); apigen.append("&"); basegen.append("/index.php?maxlag="); basegen.append(maxlag); basegen.append("&title="); } else basegen.append("/index.php?title="); base = basegen.toString(); // the native API supports assertions as of MW 1.23 if ((assertion & ASSERT_BOT) == ASSERT_BOT) apigen.append("assert=bot&"); else if ((assertion & ASSERT_USER) == ASSERT_USER) apigen.append("assert=user&"); apiUrl = apigen.toString(); apigen.append("action=query&"); if (resolveredirect) apigen.append("redirects&"); query = apigen.toString(); } /** * Gets the domain of the wiki, as supplied on construction. * @return the domain of the wiki * @since 0.06 */ public String getDomain() { return domain; } /** * Gets the editing throttle. * @return the throttle value in milliseconds * @see #setThrottle * @since 0.09 */ public int getThrottle() { return throttle; } /** * Sets the editing throttle. Read requests are not throttled or restricted * in any way. Default is 10s. * @param throttle the new throttle value in milliseconds * @see #getThrottle * @since 0.09 */ public void setThrottle(int throttle) { this.throttle = throttle; log(Level.CONFIG, "setThrottle", "Throttle set to " + throttle + " milliseconds"); } /** * Detects the $wgScriptpath wiki variable and sets the bot framework up * to use it. You need not call this if you know the script path is * /w. See also [[mw:Manual:$wgScriptpath]]. * * @throws IOException if a network error occurs * @deprecated use getSiteInfo * @return the script path, if you have any use for it * @since 0.14 */ @Deprecated public String getScriptPath() throws IOException { return (String)getSiteInfo().get("scriptpath"); } /** * Detects whether a wiki forces upper case for the first character in a * title and sets the bot framework up to use it. Example: en.wikipedia = * true, en.wiktionary = false. Default = true. See [[mw:Manual:$wgCapitalLinks]]. * @return see above * @deprecated use getSiteInfo * @throws IOException if a network error occurs * @since 0.30 */ @Deprecated public boolean isUsingCapitalLinks() throws IOException { return (Boolean)getSiteInfo().get("usingcapitallinks"); } /** * Gets various properties of the wiki and sets the bot framework up to use * them. Returns: *

    *
  • usingcapitallinks: (Boolean) whether a wiki forces upper case * for the title. Example: en.wikipedia = true, en.wiktionary = false. * Default = true. See [[mw:Manual:$wgCapitalLinks]]. *
  • scriptpath: (String) the $wgScriptpath wiki variable. Default * = /w. See [[mw:Manual:$wgScriptpath]]. *
  • version: (String) the MediaWiki version used for this wiki *
  • timezone: (String) the timezone the wiki is in, default = UTC *
* * @return (see above) * @since 0.30 * @throws IOException if a network error occurs */ public Map getSiteInfo() throws IOException { Map ret = new HashMap<>(); String line = fetch(query + "action=query&meta=siteinfo", "getSiteInfo"); wgCapitalLinks = parseAttribute(line, "case", 0).equals("first-letter"); ret.put("usingcapitallinks", wgCapitalLinks); scriptPath = parseAttribute(line, "scriptpath", 0); ret.put("scriptpath", scriptPath); timezone = parseAttribute(line, "timezone", 0); ret.put("timezone", timezone); ret.put("version", parseAttribute(line, "generator", 0)); initVars(); return ret; } /** * Sets the user agent HTTP header to be used for requests. Default is * "Wiki.java " + version. * @param useragent the new user agent * @since 0.22 */ public void setUserAgent(String useragent) { this.useragent = useragent; } /** * Gets the user agent HTTP header to be used for requests. Default is * "Wiki.java " + version. * @return useragent the user agent * @since 0.22 */ public String getUserAgent() { return useragent; } /** * Enables/disables GZip compression for GET requests. Default: true. * @param zipped whether we use GZip compression * @since 0.23 */ public void setUsingCompressedRequests(boolean zipped) { this.zipped = zipped; } /** * Checks whether we are using GZip compression for GET requests. * Default: true. * @return (see above) * @since 0.23 */ public boolean isUsingCompressedRequests() { return zipped; } /** * Checks whether API action=query dependencies automatically resolve * redirects (default = false). * @return (see above) * @since 0.27 */ public boolean isResolvingRedirects() { return resolveredirect; } /** * Sets whether API action=query dependencies automatically resolve * redirects (default = false). * @param b (see above) * @since 0.27 */ public void setResolveRedirects(boolean b) { resolveredirect = b; initVars(); } /** * Sets whether edits are marked as bot by default (may be overridden * specifically by edit()). Default = false. Works only if one has the * required permissions. * @param markbot (see above) * @since 0.26 */ public void setMarkBot(boolean markbot) { this.markbot = markbot; } /** * Are edits are marked as bot by default? * @return whether edits are marked as bot by default * @since 0.26 */ public boolean isMarkBot() { return markbot; } /** * Sets whether edits are marked as minor by default (may be overridden * specifically by edit()). Default = false. * @param minor (see above) * @since 0.26 */ public void setMarkMinor(boolean minor) { this.markminor = minor; } /** * Are edits are marked as minor by default? * @return whether edits are marked as minor by default * @since 0.26 */ public boolean isMarkMinor() { return markminor; } /** * Determines whether this wiki is equal to another object. * @param obj the object to compare * @return whether this wiki is equal to such object * @since 0.10 */ @Override public boolean equals(Object obj) { if (!(obj instanceof Wiki)) return false; return domain.equals(((Wiki)obj).domain); } /** * Returns a hash code of this object. * @return a hash code * @since 0.12 */ @Override public int hashCode() { return domain.hashCode() * maxlag - throttle; } /** * Returns a string representation of this Wiki. * @return a string representation of this Wiki. * @since 0.10 */ @Override public String toString() { // domain StringBuilder buffer = new StringBuilder("Wiki[domain="); buffer.append(domain); // user buffer.append(",user="); buffer.append(user != null ? user.toString() : "null"); buffer.append(","); // throttle mechanisms buffer.append("throttle="); buffer.append(throttle); buffer.append(",maxlag="); buffer.append(maxlag); buffer.append(",assertionMode="); buffer.append(assertion); buffer.append(",statusCheckInterval="); buffer.append(statusinterval); buffer.append(",cookies="); buffer.append(cookies); buffer.append("]"); return buffer.toString(); } /** * Gets the maxlag parameter. See [[mw:Manual:Maxlag parameter]]. * @return the current maxlag, in seconds * @see #setMaxLag * @see #getCurrentDatabaseLag * @since 0.11 */ public int getMaxLag() { return maxlag; } /** * Sets the maxlag parameter. A value of less than 0s disables this * mechanism. Default is 5s. * @param lag the desired maxlag in seconds * @see #getMaxLag * @see #getCurrentDatabaseLag * @since 0.11 */ public void setMaxLag(int lag) { maxlag = lag; log(Level.CONFIG, "setMaxLag", "Setting maximum allowable database lag to " + lag); initVars(); } /** * Gets the assertion mode. Assertion modes are bitmasks. * @return the current assertion mode * @see #setAssertionMode * @since 0.11 */ public int getAssertionMode() { return assertion; } /** * Sets the assertion mode. Do this AFTER logging in, otherwise the login * will fail. Assertion modes are bitmasks. Default is ASSERT_NONE. * @param mode an assertion mode * @see #getAssertionMode * @since 0.11 */ public void setAssertionMode(int mode) { assertion = mode; log(Level.CONFIG, "setAssertionMode", "Set assertion mode to " + mode); initVars(); } /** * Gets the number of actions (edit, move, block, delete, etc) between * status checks. A status check is where we update user rights, block * status and check for new messages (if the appropriate assertion mode * is set). * * @return the number of edits between status checks * @see #setStatusCheckInterval * @since 0.18 */ public int getStatusCheckInterval() { return statusinterval; } /** * Sets the number of actions (edit, move, block, delete, etc) between * status checks. A status check is where we update user rights, block * status and check for new messages (if the appropriate assertion mode * is set). Default is 100. * * @param interval the number of edits between status checks * @see #getStatusCheckInterval * @since 0.18 */ public void setStatusCheckInterval(int interval) { statusinterval = interval; log(Level.CONFIG, "setStatusCheckInterval", "Status check interval set to " + interval); } /** * Set the logging level used by the internal logger. * @param loglevel one of the levels specified in java.util.logging.LEVEL * @since 0.31 */ public void setLogLevel(Level loglevel) { this.loglevel = loglevel; logger.setLevel(loglevel); } // META STUFF /** * Logs in to the wiki. This method is thread-safe. If the specified * username or password is incorrect, the thread blocks for 20 seconds * then throws an exception. * * @param username a username * @param password a password (as a char[] due to JPasswordField) * @throws FailedLoginException if the login failed due to incorrect * username and/or password * @throws IOException if a network error occurs * @see #logout */ public synchronized void login(String username, char[] password) throws IOException, FailedLoginException { // post login request username = normalize(username); StringBuilder buffer = new StringBuilder(500); buffer.append("lgname="); buffer.append(URLEncoder.encode(username, "UTF-8")); // fetch token String response = post(apiUrl + "action=login", buffer.toString(), "login"); String wpLoginToken = parseAttribute(response, "token", 0); buffer.append("&lgpassword="); buffer.append(URLEncoder.encode(new String(password), "UTF-8")); buffer.append("&lgtoken="); buffer.append(URLEncoder.encode(wpLoginToken, "UTF-8")); String line = post(apiUrl + "action=login", buffer.toString(), "login"); buffer = null; // check for success if (line.contains("result=\"Success\"")) { user = new User(username); boolean apihighlimit = user.isAllowedTo("apihighlimits"); if (apihighlimit) { max = 5000; slowmax = 500; } log(Level.INFO, "login", "Successfully logged in as " + username + ", highLimit = " + apihighlimit); } else { log(Level.WARNING, "login", "Failed to log in as " + username); try { Thread.sleep(20000); // to prevent brute force } catch (InterruptedException e) { // nobody cares } // test for some common failure reasons if (line.contains("WrongPass") || line.contains("WrongPluginPass")) throw new FailedLoginException("Login failed: incorrect password."); else if (line.contains("NotExists")) throw new FailedLoginException("Login failed: user does not exist."); throw new FailedLoginException("Login failed: unknown reason."); } } //Enables login while using a string password public synchronized void login(String username, String password) throws IOException, FailedLoginException { login(username,password.toCharArray()); } /** * Logs out of the wiki. This method is thread safe (so that we don't log * out during an edit). All operations are conducted offline, so you can * serialize this Wiki first. * @see #login * @see #logoutServerSide */ public synchronized void logout() { cookies.clear(); user = null; max = 500; slowmax = 50; log(Level.INFO, "logout", "Logged out"); } /** * Logs out of the wiki and destroys the session on the server. You will * need to log in again instead of just reading in a serialized wiki. * Equivalent to [[Special:Userlogout]]. This method is thread safe * (so that we don't log out during an edit). WARNING: kills all * concurrent sessions - if you are logged in with a browser this will log * you out there as well. * * @throws IOException if a network error occurs * @since 0.14 * @see #login * @see #logout */ public synchronized void logoutServerSide() throws IOException { fetch(apiUrl + "action=logout", "logoutServerSide"); logout(); // destroy local cookies } /** * Determines whether the current user has new messages. (A human would * notice a yellow bar at the top of the page). * @return whether the user has new messages * @throws IOException if a network error occurs * @since 0.11 */ public boolean hasNewMessages() throws IOException { String url = query + "meta=userinfo&uiprop=hasmsg"; return fetch(url, "hasNewMessages").contains("messages=\"\""); } /** * Determines the current database replication lag. * @return the current database replication lag * @throws IOException if a network error occurs * @see #setMaxLag * @see #getMaxLag * @since 0.10 */ public int getCurrentDatabaseLag() throws IOException { String line = fetch(query + "meta=siteinfo&siprop=dbrepllag", "getCurrentDatabaseLag"); String lag = parseAttribute(line, "lag", 0); log(Level.INFO, "getCurrentDatabaseLag", "Current database replication lag is " + lag + " seconds"); return Integer.parseInt(lag); } /** * Fetches some site statistics, namely the number of articles, pages, * files, edits, users and admins. Equivalent to [[Special:Statistics]]. * * @return a map containing the stats. Use "articles", "pages", "files" * "edits", "users", "activeusers", "admins" or "jobs" to retrieve the * respective value * @throws IOException if a network error occurs * @since 0.14 */ public Map getSiteStatistics() throws IOException { String text = fetch(query + "meta=siteinfo&siprop=statistics", "getSiteStatistics"); Map ret = new HashMap<>(20); ret.put("pages", Integer.parseInt(parseAttribute(text, "pages", 0))); ret.put("articles", Integer.parseInt(parseAttribute(text, "articles", 0))); ret.put("files", Integer.parseInt(parseAttribute(text, "images", 0))); ret.put("users", Integer.parseInt(parseAttribute(text, "users", 0))); ret.put("activeusers", Integer.parseInt(parseAttribute(text, "activeusers", 0))); ret.put("admins", Integer.parseInt(parseAttribute(text, "admins", 0))); ret.put("jobs", Integer.parseInt(parseAttribute(text, "jobs", 0))); // job queue length return ret; } /** * Gets the version of MediaWiki this wiki runs e.g. 1.20wmf5 (54b4fcb). * See also https://gerrit.wikimedia.org/ . * @return the version of MediaWiki used * @throws IOException if a network error occurs * @deprecated use getSiteInfo * @since 0.14 */ @Deprecated public String version() throws IOException { return (String)getSiteInfo().get("version"); } /** * Renders the specified wiki markup by passing it to the MediaWiki * parser through the API. (Note: this isn't implemented locally because * I can't be stuffed porting Parser.php). One use of this method is to * emulate the previewing functionality of the MediaWiki software. * * @param markup the markup to parse * @return the parsed markup as HTML * @throws IOException if a network error occurs * @since 0.13 */ public String parse(String markup) throws IOException { // This is POST because markup can be arbitrarily large, as in the size // of an article (over 10kb). String response = post(apiUrl + "action=parse", "prop=text&text=" + URLEncoder.encode(markup, "UTF-8"), "parse"); int y = response.indexOf('>', response.indexOf(""); return decode(response.substring(y, z)); } /** * Same as parse(), but also strips out unwanted crap. This might * be useful to subclasses. * * @param in the string to parse * @return that string without the crap * @throws IOException if a network error occurs * @since 0.14 */ protected String parseAndCleanup(String in) throws IOException { String output = parse(in); output = output.replace("

", "").replace("

", ""); // remove paragraph tags output = output.replace("\n", ""); // remove new lines // strip out the parser report, which comes at the end int a = output.indexOf("