All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.it.unimi.dsi.big.mg4j.index.remote.RemoteTermMap Maven / Gradle / Ivy

Go to download

MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java. The big version is a fork of the original MG4J that can handle more than 2^31 terms and documents.

The newest version!
package it.unimi.dsi.big.mg4j.index.remote;

/*		 
 * MG4J: Managing Gigabytes for Java
 *
 * Copyright (C) 2006-2011 Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.Util;
import it.unimi.dsi.big.util.StringMap;
import it.unimi.dsi.fastutil.objects.AbstractObject2LongFunction;
import it.unimi.dsi.fastutil.objects.AbstractObjectBigList;
import it.unimi.dsi.fastutil.objects.ObjectBigList;
import it.unimi.dsi.lang.MutableString;

import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.net.Socket;
import java.net.SocketAddress;

import org.apache.log4j.Logger;

/** A remote term map.
 * 
 * @author Alessandro Arrabito
 * @author Sebastiano Vigna
 */
public class RemoteTermMap extends AbstractObject2LongFunction implements StringMap, Serializable {
	protected final static byte GET_NUMBER = 0;
	protected final static byte GET_TERM = 1; 
	protected final static byte HAS_TERMS = 2; 

	static final long serialVersionUID = 1;
	/** The address of the index server.*/
	protected SocketAddress address;
	/** The size of the map. */
	protected final long size; 
	/** The remote connection to the server (initialised lazily). */
	protected transient RemoteIndexServerConnection remoteConnection;
	/** The cached return value of {@link StringMap#list()}. */
	protected ObjectBigList list;
	/** Whether the remote map implements {@link StringMap#list()}. */
	private Boolean hasTerms;

	public RemoteTermMap( final SocketAddress address, final long size ) {
		this.address = address;
		this.size = size;
	}

	public long size64() {
		return size;
	}
	
	@Deprecated
	public int size() {
		return (int)Math.max( Integer.MAX_VALUE, size );
	}
	
	private void ensureConnection() throws IOException {
		if ( remoteConnection == null ) remoteConnection = new RemoteIndexServerConnection( address, IndexServer.GET_TERM_MAP );
	}

	public boolean hasTerms() {
		if ( hasTerms == null ) {
			try {
				ensureConnection();
				remoteConnection.outputStream.writeByte( RemoteTermMap.HAS_TERMS );
				remoteConnection.outputStream.flush();
				hasTerms = Boolean.valueOf( remoteConnection.inputStream.readBoolean() );
			}
			catch ( Exception e ) {
				throw new RuntimeException( e );
			}
		}
		return hasTerms.booleanValue();
	}

	public static class ServerThread extends it.unimi.dsi.big.mg4j.index.remote.ServerThread {
		private static final boolean DEBUG = false;
		private final static Logger LOGGER = Util.getLogger( ServerThread.class );
		
		/** The remoted term map. */
		private final StringMap termMap;
		
		public ServerThread( final Socket socket, final StringMap termMap ) throws IOException {
			super( socket );
			this.termMap = termMap;
		}
		
		public void run() {
			try {
				final MutableString s = new MutableString();
				int command;
				for ( ;; ) {
					command = inputStream.readByte();
					if ( DEBUG ) LOGGER.debug( "Received remote command: " + command );

					switch ( command ) {
					case RemoteTermMap.GET_NUMBER:
						outputStream.writeLong( termMap.getLong( s.readSelfDelimUTF8( (InputStream)inputStream ) ) );
						outputStream.flush();
						break;

					case RemoteTermMap.GET_TERM:
						new MutableString( termMap.list().get( inputStream.readInt() ) ).writeSelfDelimUTF8( (OutputStream)outputStream );
						outputStream.flush();
						break;

					case RemoteTermMap.HAS_TERMS:
						outputStream.writeBoolean( termMap.list() != null );
						outputStream.flush();
						break;

					default:
						LOGGER.error( "Unknown remote command: " + command );
					}
				}
			}
			catch ( EOFException e ) {
				LOGGER.warn( "The socket has been closed" );
			}
			catch ( Exception e ) {
				LOGGER.fatal( e, e );
			}
		}
	}

	public ObjectBigList list() {
		if ( hasTerms() && list == null ) list = new AbstractObjectBigList() {

			public MutableString get( long index ) {
				try {
					ensureConnection();
					remoteConnection.outputStream.writeByte( RemoteTermMap.GET_TERM );
					remoteConnection.outputStream.writeLong( index );
					remoteConnection.outputStream.flush();
					return new MutableString().readSelfDelimUTF8( (InputStream)remoteConnection.inputStream );
				}
				catch ( Exception e ) {
						throw new RuntimeException( e );
				}
			}

			public long size64() {
				return size;
			}
			
		};
		
		return list;
	}

	public long getLong( Object o ) {
		CharSequence term = (CharSequence)o;
		try {
			ensureConnection();
			remoteConnection.outputStream.writeByte( RemoteTermMap.GET_NUMBER );
			new MutableString( term ).writeSelfDelimUTF8( (OutputStream)remoteConnection.outputStream );
			remoteConnection.outputStream.flush();
			return remoteConnection.inputStream.readLong();
		}
		catch ( Exception e ) {
			throw new RuntimeException( e );
		}
	}

	public boolean containsKey( Object o ) {
		return getLong( o ) != -1;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy