test.it.unimi.di.mg4j.graph.DocumentSequenceImmutableGraphTest Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mg4j Show documentation
Show all versions of mg4j Show documentation
MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections written in Java.
package it.unimi.di.mg4j.graph;
/*
* Copyright (C) 2007-2012 Paolo Boldi
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*
*/
import static org.junit.Assert.assertEquals;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import it.unimi.dsi.fastutil.objects.ObjectList;
import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;
import it.unimi.dsi.io.WordReader;
import it.unimi.dsi.lang.MutableString;
import it.unimi.di.mg4j.document.Document;
import it.unimi.di.mg4j.document.DocumentFactory;
import it.unimi.di.mg4j.document.DocumentFactory.FieldType;
import it.unimi.di.mg4j.document.DocumentIterator;
import it.unimi.di.mg4j.document.DocumentSequence;
import it.unimi.di.mg4j.tool.VirtualDocumentResolver;
import it.unimi.di.mg4j.util.parser.callback.AnchorExtractor;
import it.unimi.dsi.webgraph.ImmutableGraph;
import it.unimi.dsi.webgraph.NodeIterator;
import it.unimi.dsi.webgraph.examples.ErdosRenyiGraph;
import java.io.IOException;
import java.io.InputStream;
import org.junit.Test;
public class DocumentSequenceImmutableGraphTest {
/** A document sequence produced from an ImmutableGraph. The sequence has as many documents as there are
* nodes in the graph (document pointers coincide with node numbers). Title and URI of
* document x are both http://x. Documents contain just one field,
* of {@link FieldType#VIRTUAL} type, name UNUSED whose content are the URIs
* of the successors, in one of two possible forms: either the complete uri http://y, or just a number
* z, in which case the number is the difference between y and the number of the document
* containing the reference. The first case happens for even-positioned successors.
*
*/
private static class ImmutableGraphDocumentSequence implements DocumentSequence {
private ImmutableGraph graph;
public ImmutableGraphDocumentSequence( final ImmutableGraph graph ) {
this.graph = graph;
}
public void close() throws IOException {}
public DocumentFactory factory() {
return new DocumentFactory() {
private static final long serialVersionUID = 1L;
public DocumentFactory copy() { return null; }
public int fieldIndex( String fieldName ) { return fieldName.equals( "UNNAMED")? 0 : -1; }
public String fieldName( int field ) { return field == 0? "UNNAMED" : null; }
public FieldType fieldType( int field ) { return FieldType.VIRTUAL; }
public int numberOfFields() { return 1; }
public Document getDocument( InputStream rawContent, Reference2ObjectMap, Object> metadata ) throws IOException { return null; }
};
}
public DocumentIterator iterator() throws IOException {
return new DocumentIterator() {
NodeIterator it = graph.nodeIterator();
public void close() throws IOException {}
public Document nextDocument() throws IOException {
if ( !it.hasNext() ) return null;
final int document = it.nextInt();
final int degree = it.outdegree();
final int[] succ = it.successorArray();
return new Document() {
public void close() throws IOException {}
public CharSequence title() { return uri(); }
public CharSequence uri() { return "http://" + document; }
public WordReader wordReader( int arg ) { return null; }
public Object content( int arg ) throws IOException {
ObjectList res = new ObjectArrayList();
for ( int i = 0; i < degree; i++ ) {
if ( i % 2 == 0 )
res.add( new AnchorExtractor.Anchor( new MutableString( "http://" + succ[ i ] ), new MutableString( "http://" + succ[ i ] ) ) );
else
res.add( new AnchorExtractor.Anchor( new MutableString( "" + ( succ[ i ] - document ) ), new MutableString( "" + ( succ[ i ] - document ) ) ) );
}
return res;
}
};
}
};
}
@Override
public void filename( CharSequence unused ) throws IOException {
throw new UnsupportedOperationException();
}
}
public static class TrivialVirtualDocumentResolver implements VirtualDocumentResolver {
private static final long serialVersionUID = 1L;
private int currentDoc;
private int numberOfDocuments;
public TrivialVirtualDocumentResolver( int numberOfDocuments ) { this.numberOfDocuments = numberOfDocuments; }
public void context( Document arg ) { currentDoc = Integer.parseInt( arg.uri().toString().substring( 7 ) ); }
public int numberOfDocuments() { return numberOfDocuments; }
public int resolve( CharSequence arg ) {
return arg.toString().startsWith( "h" )?
Integer.parseInt( arg.toString().substring( 7 ) ) :
Integer.parseInt( arg.toString() ) + currentDoc;
}
}
@Test
public void test() {
ImmutableGraph graph = new ErdosRenyiGraph( 10000, .01, 0, false );
ImmutableGraphDocumentSequence igds = new ImmutableGraphDocumentSequence( graph );
assertEquals( graph, new DocumentSequenceImmutableGraph( igds, 0, new TrivialVirtualDocumentResolver( graph.numNodes() ) ) );
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy