org.dkpro.jwpl.mwdumper.importer.SqlWriter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of dkpro-jwpl-mwdumper Show documentation
Show all versions of dkpro-jwpl-mwdumper Show documentation
A fork of the http://www.mediawiki.org/wiki/Mwdumper
The newest version!
/*
* MediaWiki import/export processing tools
* Copyright 2005 by Brion Vibber
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* $Id: SqlWriter.java 20338 2007-03-11 16:59:31Z river $
*/
package org.dkpro.jwpl.mwdumper.importer;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.TimeZone;
public abstract class SqlWriter
implements DumpWriter
{
public static abstract class Traits
{
public abstract SqlLiteral getCurrentTime();
public abstract SqlLiteral getRandom();
public abstract String getTextTable();
public abstract boolean supportsMultiRowInsert();
public abstract MessageFormat getTimestampFormatter();
public String getWikiPrologue()
{
return null;
}
public String getWikiEpilogue()
{
return null;
}
}
public static class MySQLTraits
extends Traits
{
// UTC_TIMESTAMP() is new in MySQL 4.1 or 5.0, so using this
// godawful hack found in documentation comments:
public SqlLiteral getCurrentTime()
{
return new SqlLiteral("DATE_ADD('1970-01-01', INTERVAL UNIX_TIMESTAMP() SECOND)+0");
}
public SqlLiteral getRandom()
{
return new SqlLiteral("RAND()");
}
public boolean supportsMultiRowInsert()
{
return true;
}
public String getTextTable()
{
return "text";
}
private static final MessageFormat timestampFormatter = new MessageFormat(
"{0,number,0000}{1,number,00}{2,number,00}{3,number,00}{4,number,00}{5,number,00}");
public MessageFormat getTimestampFormatter()
{
return timestampFormatter;
}
}
public static class PostgresTraits
extends Traits
{
public SqlLiteral getCurrentTime()
{
return new SqlLiteral("current_timestamp AT TIME ZONE 'UTC'");
}
public SqlLiteral getRandom()
{
return new SqlLiteral("RANDOM()");
}
public boolean supportsMultiRowInsert()
{
return false;
}
public String getTextTable()
{
return "pagecontent";
}
private static final MessageFormat timestampFormatter = new MessageFormat(
"{0,number,0000}-{1,number,00}-{2,number,00} {3,number,00}:{4,number,00}:{5,number,00}");
public MessageFormat getTimestampFormatter()
{
return timestampFormatter;
}
public String getWikiPrologue()
{
return "ALTER TABLE revision DISABLE TRIGGER ALL;"
+ "ALTER TABLE page DISABLE TRIGGER ALL;";
}
public String getWikiEpilogue()
{
return "ALTER TABLE revision ENABLE TRIGGER ALL;"
+ "ALTER TABLE page ENABLE TRIGGER ALL;";
}
}
private final SqlStream stream;
private String tablePrefix = "";
protected static final Integer ONE = 1;
protected static final Integer ZERO = 0;
protected final Traits traits;
public SqlWriter(Traits tr, SqlStream output)
{
stream = output;
traits = tr;
}
public SqlWriter(Traits tr, SqlStream output, String prefix)
{
stream = output;
tablePrefix = prefix;
traits = tr;
}
public void close() throws IOException
{
stream.close();
}
public void writeStartWiki() throws IOException
{
stream.writeComment("-- MediaWiki XML dump converted to SQL by mwdumper");
stream.writeStatement("BEGIN");
String prologue = traits.getWikiPrologue();
if (prologue != null)
stream.writeStatement(prologue);
}
public void writeEndWiki() throws IOException
{
flushInsertBuffers();
String epilogue = traits.getWikiEpilogue();
if (epilogue != null)
stream.writeStatement(epilogue);
stream.writeStatement("COMMIT");
stream.writeComment("-- DONE");
}
public void writeSiteinfo(Siteinfo info) throws IOException
{
stream.writeComment("");
stream.writeComment("-- Site: " + commentSafe(info.Sitename));
stream.writeComment("-- URL: " + commentSafe(info.Base));
stream.writeComment("-- Generator: " + commentSafe(info.Generator));
stream.writeComment("-- Case: " + commentSafe(info.Case));
stream.writeComment("--");
stream.writeComment("-- Namespaces:");
for (Iterator> i = info.Namespaces.orderedEntries(); i
.hasNext();) {
Map.Entry e = i.next();
stream.writeComment("-- " + e.getKey() + ": " + e.getValue());
}
stream.writeComment("");
}
public abstract void writeStartPage(Page page) throws IOException;
public abstract void writeEndPage() throws IOException;
public abstract void writeRevision(Revision revision) throws IOException;
protected String commentSafe(String text)
{
return text;
}
private final Map insertBuffers = new HashMap<>();
private static final int blockSize = 1024 * 512; // default 512k inserts
protected void bufferInsertRow(String table, Object[][] row) throws IOException
{
StringBuffer sql = insertBuffers.get(table);
if (sql != null) {
if (traits.supportsMultiRowInsert() && (sql.length() < blockSize)) {
sql.append(',');
appendInsertValues(sql, row);
return;
}
else {
flushInsertBuffer(table);
}
}
sql = new StringBuffer(blockSize);
synchronized (sql) { // only for StringBuffer
appendInsertStatement(sql, table, row);
insertBuffers.put(table, sql);
}
}
protected void flushInsertBuffer(String table) throws IOException
{
stream.writeStatement(insertBuffers.get(table));
insertBuffers.remove(table);
}
protected void flushInsertBuffers() throws IOException
{
for (StringBuffer stringBuffer : insertBuffers.values()) {
stream.writeStatement(stringBuffer);
}
insertBuffers.clear();
}
protected void insertRow(String table, Object[][] row) throws IOException
{
StringBuffer sql = new StringBuffer(65536);
appendInsertStatement(sql, table, row);
stream.writeStatement(sql);
}
private void appendInsertStatement(StringBuffer sql, String table, Object[][] row)
{
sql.append("INSERT INTO ");
sql.append(tablePrefix);
sql.append(table);
sql.append(" (");
for (int i = 0; i < row.length; i++) {
String field = (String) row[i][0];
if (i > 0)
sql.append(',');
sql.append(field);
}
sql.append(") VALUES ");
appendInsertValues(sql, row);
}
private static void appendInsertValues(StringBuffer sql, Object[][] row)
{
sql.append('(');
for (int i = 0; i < row.length; i++) {
Object val = row[i][1];
if (i > 0)
sql.append(',');
sql.append(sqlSafe(val));
}
sql.append(')');
}
protected void updateRow(String table, Object[][] row, String keyField, Object keyValue)
throws IOException
{
StringBuffer sql = new StringBuffer(65536);
synchronized (sql) { // only for StringBuffer
sql.append("UPDATE ");
sql.append(tablePrefix);
sql.append(table);
sql.append(" SET ");
for (int i = 0; i < row.length; i++) {
String field = (String) row[i][0];
Object val = row[i][1];
if (i > 0)
sql.append(',');
sql.append(field);
sql.append('=');
sql.append(sqlSafe(val));
}
sql.append(" WHERE ");
sql.append(keyField);
sql.append('=');
sql.append(sqlSafe(keyValue));
stream.writeStatement(sql);
}
}
protected static String sqlSafe(Object val)
{
if (val == null)
return "NULL";
String str = val.toString();
if (val instanceof String) {
return sqlEscape(str);
}
else if (val instanceof Integer) {
return str;
}
else if (val instanceof Double) {
return str;
}
else if (val instanceof SqlLiteral) {
return str;
}
else {
throw new IllegalArgumentException("Unknown type in SQL");
}
}
protected static String sqlEscape(String str)
{
if (str.length() == 0)
return "''"; // TODO "NULL",too ?
final int len = str.length();
StringBuffer sql = new StringBuffer(len * 2);
synchronized (sql) { // only for StringBuffer
sql.append('\'');
for (int i = 0; i < len; i++) {
char c = str.charAt(i);
switch (c) {
case '\u0000':
sql.append('\\').append('0');
break;
case '\n':
sql.append('\\').append('n');
break;
case '\r':
sql.append('\\').append('r');
break;
case '\u001a':
sql.append('\\').append('Z');
break;
case '"':
case '\'':
case '\\':
sql.append('\\');
// fall through
default:
sql.append(c);
break;
}
}
sql.append('\'');
return sql.toString();
}
}
protected static String titleFormat(String title)
{
return title.replace(' ', '_');
}
protected String timestampFormat(Calendar time)
{
return traits.getTimestampFormatter()
.format(new Object[] { time.get(Calendar.YEAR), time.get(Calendar.MONTH) + 1,
time.get(Calendar.DAY_OF_MONTH), time.get(Calendar.HOUR_OF_DAY),
time.get(Calendar.MINUTE), time.get(Calendar.SECOND) });
}
protected String inverseTimestamp(Calendar time)
{
return traits.getTimestampFormatter()
.format(new Object[] { 9999 - time.get(Calendar.YEAR),
99 - time.get(Calendar.MONTH) - 1, 99 - time.get(Calendar.DAY_OF_MONTH),
99 - time.get(Calendar.HOUR_OF_DAY), 99 - time.get(Calendar.MINUTE),
99 - time.get(Calendar.SECOND) });
}
private static final TimeZone utc = TimeZone.getTimeZone("UTC");
protected static GregorianCalendar now()
{
return new GregorianCalendar(utc);
}
final int commitInterval = 1000; // Commit a transaction every n pages
int pageCount = 0;
protected void checkpoint() throws IOException
{
pageCount++;
if (pageCount % commitInterval == 0) {
flushInsertBuffers();
stream.writeStatement("COMMIT");
stream.writeStatement("BEGIN");
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy