All Downloads are FREE. Search and download functionalities are using the official Maven repository.

test.it.unimi.di.mg4j.graph.DocumentSequenceImmutableGraphTest Maven / Gradle / Ivy

Go to download

MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java.

There is a newer version: 5.2.2
Show newest version
package it.unimi.di.mg4j.graph;

/*		 
 *  Copyright (C) 2007-2012 Paolo Boldi 
 *
 *  This program is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, see .
 *
 */

import static org.junit.Assert.assertEquals;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.ObjectList;
import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;
import it.unimi.dsi.io.WordReader;
import it.unimi.dsi.lang.MutableString;
import it.unimi.di.mg4j.document.Document;
import it.unimi.di.mg4j.document.DocumentFactory;
import it.unimi.di.mg4j.document.DocumentFactory.FieldType;
import it.unimi.di.mg4j.document.DocumentIterator;
import it.unimi.di.mg4j.document.DocumentSequence;
import it.unimi.di.mg4j.tool.VirtualDocumentResolver;
import it.unimi.di.mg4j.util.parser.callback.AnchorExtractor;
import it.unimi.dsi.webgraph.ImmutableGraph;
import it.unimi.dsi.webgraph.NodeIterator;
import it.unimi.dsi.webgraph.examples.ErdosRenyiGraph;

import java.io.IOException;
import java.io.InputStream;

import org.junit.Test;

public class DocumentSequenceImmutableGraphTest {

	/** A document sequence produced from an ImmutableGraph. The sequence has as many documents as there are 
	 *  nodes in the graph (document pointers coincide with node numbers). Title and URI of
	 *  document x are both http://x. Documents contain just one field,
	 *  of {@link FieldType#VIRTUAL} type, name UNUSED whose content are the URIs
	 *  of the successors, in one of two possible forms: either the complete uri http://y, or just a number
	 *  z, in which case the number is the difference between y and the number of the document
	 *  containing the reference. The first case happens for even-positioned successors.
	 *  
	 */
	private static class ImmutableGraphDocumentSequence implements DocumentSequence {
		private ImmutableGraph graph;
		
		public ImmutableGraphDocumentSequence( final ImmutableGraph graph ) {
			this.graph = graph;
		}

		public void close() throws IOException {}
		public DocumentFactory factory() { 
			return new DocumentFactory() {
				private static final long serialVersionUID = 1L;
				public DocumentFactory copy() { return null; }
				public int fieldIndex( String fieldName ) { return fieldName.equals( "UNNAMED")? 0 : -1; }
				public String fieldName( int field ) { return field == 0? "UNNAMED" : null; }
				public FieldType fieldType( int field ) { return FieldType.VIRTUAL; }
				public int numberOfFields() { return 1; }
				public Document getDocument( InputStream rawContent, Reference2ObjectMap, Object> metadata ) throws IOException { return null; }
			}; 
		}

		public DocumentIterator iterator() throws IOException {
			return new DocumentIterator() {
				NodeIterator it = graph.nodeIterator();
				public void close() throws IOException {}
				public Document nextDocument() throws IOException {
					if ( !it.hasNext() ) return null;
					final int document = it.nextInt();
					final int degree = it.outdegree();
					final int[] succ = it.successorArray();
					return new Document() {
						public void close() throws IOException {}
						public CharSequence title() { return uri(); }
						public CharSequence uri() { return "http://" + document; }
						public WordReader wordReader( int arg ) { return null; }
						public Object content( int arg ) throws IOException {
							ObjectList res = new ObjectArrayList();
							for ( int i = 0; i < degree; i++ ) {
								if ( i % 2 == 0 )
									res.add( new AnchorExtractor.Anchor( new MutableString( "http://" + succ[ i ] ), new MutableString( "http://" + succ[ i ] ) ) );
								else
									res.add( new AnchorExtractor.Anchor( new MutableString( "" + ( succ[ i ] - document ) ), new MutableString( "" + ( succ[ i ] - document ) ) ) );
							}
							return res;
						}
					};
				}
				
			};
		}

		@Override
		public void filename( CharSequence unused ) throws IOException {
			throw new UnsupportedOperationException();
		}
		
	}
	
	public static class TrivialVirtualDocumentResolver implements VirtualDocumentResolver {
		private static final long serialVersionUID = 1L;
		private int currentDoc;
		private int numberOfDocuments; 
		public TrivialVirtualDocumentResolver( int numberOfDocuments ) { this.numberOfDocuments = numberOfDocuments; }
		public void context( Document arg ) { currentDoc = Integer.parseInt( arg.uri().toString().substring( 7 ) ); }
		public int numberOfDocuments() { return numberOfDocuments; }
		public int resolve( CharSequence arg ) { 
			return arg.toString().startsWith( "h" )? 
					Integer.parseInt( arg.toString().substring( 7 ) ) : 
					Integer.parseInt( arg.toString() ) + currentDoc; 
		}
	}
	
	@Test
	public void test()  {
		ImmutableGraph graph = new ErdosRenyiGraph( 10000, .01, 0, false ); 
		ImmutableGraphDocumentSequence igds = new ImmutableGraphDocumentSequence( graph ); 
		assertEquals( graph, new DocumentSequenceImmutableGraph( igds, 0, new TrivialVirtualDocumentResolver( graph.numNodes() ) ) );
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy