es.uam.eps.ir.relison.diffusion.io.BinaryDataReader Maven / Gradle / Ivy
The newest version!
/*
* Copyright (C) 2020 Information Retrieval Group at Universidad Autónoma
* de Madrid, http://ir.ii.uam.es
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
package es.uam.eps.ir.relison.diffusion.io;
import es.uam.eps.ir.relison.diffusion.data.Data;
import es.uam.eps.ir.relison.diffusion.data.Information;
import es.uam.eps.ir.relison.diffusion.simulation.SimulationEdgeTypes;
import es.uam.eps.ir.relison.graph.Graph;
import es.uam.eps.ir.relison.index.Index;
import es.uam.eps.ir.relison.index.Relation;
import es.uam.eps.ir.relison.index.fast.FastIndex;
import es.uam.eps.ir.relison.index.fast.FastWeightedPairwiseRelation;
import es.uam.eps.ir.relison.io.graph.BinaryGraphReader;
import es.uam.eps.ir.relison.io.graph.GraphReader;
import es.uam.eps.ir.relison.utils.datatypes.Triplet;
import org.ranksys.formats.rec.RecommendationFormat;
import org.ranksys.formats.rec.TRECRecommendationFormat;
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.ranksys.formats.parsing.Parsers.lp;
/**
* Reads data from a binary file.
*
* @author Javier Sanz-Cruzado ([email protected])
* @author Pablo Castells ([email protected])
*/
public class BinaryDataReader
{
/**
* Read a simple version of the data.
* @param file the file containing the data.
* @return a Data object if ok, null if not.
*/
public static Data read(String file)
{
if(file == null) return null;
try(DataInputStream dos = new DataInputStream(new BufferedInputStream(new FileInputStream(file))))
{
// First, read the user index.
Index userIndex = BinaryDataReader.readUserIndex(dos);
// Read the graph.
GraphReader graphReader = new BinaryGraphReader();
Graph graph = graphReader.read(dos, true, true);
// Read the information pieces and their relation with users.
Triplet, Map>, Relation> info = BinaryDataReader.readInfoPieces(dos, userIndex);
Index infoPiecesIndex = info.v1();
Map> infoPieces = info.v2();
Relation userInfo = info.v3();
Triplet, Map>, Map>> userFeats = BinaryDataReader.readFeatures(dos, userIndex);
List userFeatureNames = userFeats.v1();
Map> feats = userFeats.v2();
Map> userFeatureRels = userFeats.v3();
Triplet, Map>, Map>> infoFeats = BinaryDataReader.readFeatures(dos, userIndex);
List infoFeatureNames = infoFeats.v1();
feats.putAll(infoFeats.v2());
Map> infoFeatureRels = userFeats.v3();
Relation realProp = BinaryDataReader.readRealProp(dos, userIndex, infoPiecesIndex);
return new Data<>(graph, userIndex, infoPiecesIndex, infoPieces, userInfo, feats, userFeatureNames, userFeatureRels, infoFeatureNames, infoFeatureRels, realProp);
}
catch (IOException ex)
{
return null;
}
}
/**
* Read a simple version of the data.
* @param file the file containing the data.
* @param recFile File containing the recommendation.
* @param topN the number of recommendations to take.
* @return a Data object if ok, null if not.
*/
public static Data read(String file, String recFile, int topN)
{
if(file == null) return null;
try(DataInputStream dos = new DataInputStream(new BufferedInputStream(new FileInputStream(file))))
{
// First, read the user index.
Index userIndex = BinaryDataReader.readUserIndex(dos);
// Read the graph.
GraphReader graphReader = new BinaryGraphReader();
Graph graph = graphReader.read(dos, true, true);
RecommendationFormat format = new TRECRecommendationFormat<>(lp,lp);
// Add the links from recommendation.
format.getReader(recFile).readAll().forEach(rec ->
rec.getItems().stream().limit(topN).forEach(r -> graph.addEdge(rec.getUser(), r.v1, 1.0, SimulationEdgeTypes.RECOMMEND))
);
// Read the information pieces and their relation with users.
Triplet, Map>, Relation> info = BinaryDataReader.readInfoPieces(dos, userIndex);
Index infoPiecesIndex = info.v1();
Map> infoPieces = info.v2();
Relation userInfo = info.v3();
Triplet, Map>, Map>> userFeats = BinaryDataReader.readFeatures(dos, userIndex);
List userFeatureNames = userFeats.v1();
Map> feats = userFeats.v2();
Map> userFeatureRels = userFeats.v3();
Triplet, Map>, Map>> infoFeats = BinaryDataReader.readFeatures(dos, userIndex);
List infoFeatureNames = infoFeats.v1();
feats.putAll(infoFeats.v2());
Map> infoFeatureRels = userFeats.v3();
Relation realProp = BinaryDataReader.readRealProp(dos, userIndex, infoPiecesIndex);
return new Data<>(graph, userIndex, infoPiecesIndex, infoPieces, userInfo, feats, userFeatureNames, userFeatureRels, infoFeatureNames, infoFeatureRels, realProp);
}
catch (IOException ex)
{
return null;
}
}
/**
* Reads a user index.
* @param dos A data stream for reading the data.
* @return the user index.
* @throws IOException if something fails while reading the data stream.
*/
private static Index readUserIndex(DataInputStream dos) throws IOException
{
Index userIndex = new FastIndex<>();
int numUsers = dos.readInt();
for(int i = 0; i < numUsers; ++i)
{
long userId = dos.readLong();
userIndex.addObject(userId);
}
return userIndex;
}
/**
* Reads about information pieces.
* @param dos A data stream for reading the data.
* @param userIndex The index for the users.
* @return a triplet containing the index of information pieces, the additional information for those pieces and the user-piece relation.
* @throws IOException if something fails while reading the data stream.
*/
private static Triplet, Map>, Relation> readInfoPieces(DataInputStream dos, Index userIndex) throws IOException
{
Relation userInfo = new FastWeightedPairwiseRelation<>();
Index infoPiecesIndex = new FastIndex<>();
Map> infoPieces = new HashMap<>();
int numUsers = userIndex.numObjects();
for(int i = 0; i < numUsers; ++i)
{
userInfo.addFirstItem(i);
}
int numPieces = dos.readInt();
for(int i = 0; i < numPieces; ++i)
{
long pieceId = dos.readLong();
infoPiecesIndex.addObject(pieceId);
long timestamp = dos.readLong();
Information infoPiece = new Information<>(pieceId, timestamp);
infoPieces.put(i, infoPiece);
userInfo.addSecondItem(i);
int numCreators = dos.readInt();
for(int j = 0; j < numCreators; ++j)
{
userInfo.addRelation(dos.readInt(), i, 1);
}
}
return new Triplet<>(infoPiecesIndex, infoPieces, userInfo);
}
/**
* Reads information about features.
* @param dos a data stream for reading the data.
* @param index an index (user/pieces).
* @return a triplet containing the list of parameter names, a map containing the possible values for each parameter, a map containing the relation between users and information pieces.
* @throws IOException if something fails while reading the data stream.
*/
private static Triplet, Map>, Map>> readFeatures(DataInputStream dos, Index index) throws IOException
{
List names = new ArrayList<>();
Map> indexes = new HashMap<>();
Map> relations = new HashMap<>();
int numFeats = dos.readInt();
for(int i = 0; i < numFeats; ++i)
{
Triplet, Relation> triplet = BinaryDataReader.readFeature(dos, index);
names.add(triplet.v1());
indexes.put(triplet.v1(), triplet.v2());
relations.put(triplet.v1(), triplet.v3());
}
return new Triplet<>(names, indexes, relations);
}
/**
* Reads information about a single feature.
* @param dos a data stream for reading the data.
* @param index an index (user/pieces).
* @return a triplet containing the name of the parameter, the possible values for the parameter, the relation between users and values.
* @throws IOException if something fails while reading the data stream.
*/
private static Triplet, Relation> readFeature(DataInputStream dos, Index index) throws IOException
{
String name = dos.readUTF();
Relation relation = new FastWeightedPairwiseRelation<>();
Index featureIndex = new FastIndex<>();
for(int i = 0; i < index.numObjects(); ++i)
{
relation.addFirstItem(i);
}
int numValues = dos.readInt();
for(int i = 0; i < numValues; ++i)
{
long value = dos.readLong();
featureIndex.addObject(value);
relation.addSecondItem(i);
int numAssign = dos.readInt();
for(int j = 0; j < numAssign; ++j)
{
int assignId = dos.readInt();
double val = dos.readDouble();
relation.addRelation(assignId, i, val);
}
}
return new Triplet<>(name, featureIndex, relation);
}
/**
* Reads information about the real propagated information pieces.
* @param dos the data stream for reading the data.
* @param userIndex the user index.
* @param infoPiecesIndex the information pieces index.
* @return the relation between user pieces and repropagated information pieces (containing its timestamps as weights)
* @throws IOException if something fails while reading the pieces.
*/
private static Relation readRealProp(DataInputStream dos, Index userIndex, Index infoPiecesIndex) throws IOException
{
Relation realProp = new FastWeightedPairwiseRelation<>();
int numUsers = userIndex.numObjects();
int numPieces = infoPiecesIndex.numObjects();
for(int i = 0; i < numUsers; ++i)
{
realProp.addFirstItem(i);
}
for(int i = 0; i < numPieces; ++i)
{
realProp.addSecondItem(i);
}
for(int i = 0; i < numUsers; ++i)
{
int uidx = dos.readInt();
int numRepr = dos.readInt();
for(int j = 0; j < numRepr; ++j)
{
int iidx = dos.readInt();
long ts = dos.readLong();
realProp.addRelation(uidx, iidx, ts);
}
}
return realProp;
}
}