All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.tudarmstadt.ukp.wikipedia.mwdumper.importer.SqlWriter15 Maven / Gradle / Ivy

/*
 * MediaWiki import/export processing tools
 * Copyright 2005 by Brion Vibber
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 * $Id: SqlWriter15.java 54087 2009-07-31 10:39:07Z daniel $
 */

// Doesn't actually work yet...

package de.tudarmstadt.ukp.wikipedia.mwdumper.importer;

import java.io.IOException;

public class SqlWriter15 extends SqlWriter {
	private Page currentPage;
	private Revision lastRevision;
	
	public SqlWriter15(SqlWriter.Traits tr, SqlStream output) {
		super(tr, output);
	}
	
	public SqlWriter15(SqlWriter.Traits tr, SqlStream output, String prefix) {
		super(tr, output, prefix);
	}
	
	public void writeEndWiki() throws IOException {
		flushInsertBuffers();
		super.writeEndWiki();
	}
	
	public void writeStartPage(Page page) {
		currentPage = page;
		lastRevision = null;
	}
	
	public void writeEndPage() throws IOException {
		if (lastRevision != null) {
			updatePage(currentPage, lastRevision);
		}
		currentPage = null;
		lastRevision = null;
	}
	
	static final int DELETED_TEXT = 1;
	static final int DELETED_COMMENT = 2;
	static final int DELETED_USER = 4;
	static final int DELETED_RESTRICTED = 8;
	
	public void writeRevision(Revision revision) throws IOException {
		bufferInsertRow(traits.getTextTable(), new Object[][] {
				{"old_id", new Integer(revision.Id)},
				{"old_text", revision.Text == null ? "" : revision.Text},
				{"old_flags", "utf-8"}});
		
		int rev_deleted = 0; 
		if (revision.Contributor.Username==null) rev_deleted |= DELETED_USER;
		if (revision.Comment==null) rev_deleted |= DELETED_COMMENT;
		if (revision.Text==null) rev_deleted |= DELETED_TEXT;

		bufferInsertRow("revision", new Object[][] {
				{"rev_id", new Integer(revision.Id)},
				{"rev_page", new Integer(currentPage.Id)},
				{"rev_text_id", new Integer(revision.Id)},
				{"rev_comment", revision.Comment == null ? "" : revision.Comment},
				{"rev_user", revision.Contributor.Username == null ? ZERO :  new Integer(revision.Contributor.Id)},
				{"rev_user_text", revision.Contributor.Username == null ? "" : revision.Contributor.Username},
				{"rev_timestamp", timestampFormat(revision.Timestamp)},
				{"rev_minor_edit", revision.Minor ? ONE : ZERO},
				{"rev_deleted", rev_deleted==0 ? ZERO : new Integer(rev_deleted) }});
		
		lastRevision = revision;
	}
	
	private static int lengthUtf8(String s) {
		final int slen = s.length();
		final char[] buf = Buffer.get(slen);
		s.getChars(0, slen, buf, 0);
		int len = 0;
		for (int i = 0; i < slen; i++) {
			char c = buf[i];
			if (c < 0x80)
				len++;
			else if (c < 0x800)
				len+=2;
			else if (c < 0xD800 || c >= 0xE000)
				len+=3;
			else {
				// Surrogate pairs are assumed to be valid.
				len+=4;
				i++;
			}
		}
		return len;
	}
	
	private void updatePage(Page page, Revision revision) throws IOException {
		bufferInsertRow("page", new Object[][] {
				{"page_id", new Integer(page.Id)},
				{"page_namespace", page.Title.Namespace},
				{"page_title", titleFormat(page.Title.Text)},
				{"page_restrictions", page.Restrictions},
				{"page_counter", ZERO},
				{"page_is_redirect", revision.isRedirect() ? ONE : ZERO},
				{"page_is_new", ZERO},
				{"page_random", traits.getRandom()},
				{"page_touched", traits.getCurrentTime()},
				{"page_latest", new Integer(revision.Id)},
				{"page_len", new Integer(lengthUtf8(revision.Text))}});
		checkpoint();
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy