org.wikibrain.pageview.PageViewSqlDao Maven / Gradle / Ivy
package org.wikibrain.pageview;
import com.typesafe.config.Config;
import gnu.trove.map.TIntIntMap;
import gnu.trove.map.hash.TIntIntHashMap;
import org.joda.time.DateTime;
import org.joda.time.Interval;
import org.jooq.*;
import org.wikibrain.conf.Configuration;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.conf.Configurator;
import org.wikibrain.core.WikiBrainException;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.dao.DaoFilter;
import org.wikibrain.core.dao.LocalPageDao;
import org.wikibrain.core.dao.MetaInfoDao;
import org.wikibrain.core.dao.sql.AbstractSqlDao;
import org.wikibrain.core.dao.sql.JooqUtils;
import org.wikibrain.core.dao.sql.SimpleSqlDaoIterable;
import org.wikibrain.core.dao.sql.WpDataSource;
import org.wikibrain.core.jooq.Tables;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.lang.LanguageSet;
import org.wikibrain.core.lang.LocalId;
import org.wikibrain.utils.ParallelForEach;
import org.wikibrain.utils.Procedure;
import java.io.File;
import java.sql.Timestamp;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Eventually this should implement a PageViewDao interface.
*
* @author Shilad Sen
*/
public class PageViewSqlDao extends AbstractSqlDao implements PageViewDao {
public static final String LOADED_CACHE_KEY = "pageviewhours";
private final File downloadDir;
private final LocalPageDao pageDao;
private final MetaInfoDao metaDao;
private static final TableField [] INSERT_FIELDS = new TableField[] {
Tables.PAGEVIEW.LANG_ID,
Tables.PAGEVIEW.PAGE_ID,
Tables.PAGEVIEW.TSTAMP,
Tables.PAGEVIEW.NUM_VIEWS,
};
/**
* @param dataSource Data source for jdbc connections
* @throws org.wikibrain.core.dao.DaoException
*/
public PageViewSqlDao(WpDataSource dataSource, MetaInfoDao metaDao, LocalPageDao pageDao, File downloadDir) throws DaoException {
super(dataSource, INSERT_FIELDS, "/db/pageview");
this.downloadDir = downloadDir;
this.pageDao = pageDao;
this.metaDao = metaDao;
}
@Override
public void clear() throws DaoException {
super.clear();
cache.remove(LOADED_CACHE_KEY);
}
@Override
public void save(PageView view) throws DaoException {
insert(
view.getPageId().getLanguage().getId(),
view.getPageId().getId(),
view.getHour(),
view.getViews()
);
}
@Override
public TIntIntMap getAllViews(Language language, DateTime startDate, DateTime endDate) throws DaoException {
DSLContext context = getJooq();
Timestamp startTime = new Timestamp(startDate.getMillis());
Timestamp endTime = new Timestamp(endDate.getMillis());
try {
Cursor result = context.select().
from(Tables.PAGEVIEW).
where(Tables.PAGEVIEW.LANG_ID.eq(language.getId())).
and(Tables.PAGEVIEW.TSTAMP.between(startTime, endTime)).
fetchLazy(getFetchSize());
TIntIntMap views = new TIntIntHashMap(
gnu.trove.impl.Constants.DEFAULT_CAPACITY,
gnu.trove.impl.Constants.DEFAULT_LOAD_FACTOR,
-1, -1);
for (Record record : result){
views.adjustOrPutValue(
record.getValue(Tables.PAGEVIEW.PAGE_ID),
record.getValue(Tables.PAGEVIEW.NUM_VIEWS),
record.getValue(Tables.PAGEVIEW.NUM_VIEWS));
}
return views;
} finally {
freeJooq(context);
}
}
@Override
public int getNumViews(LocalId pageId, DateTime startDate, int numberOfHours) throws DaoException {
return getNumViews(pageId, startDate, startDate.plusHours(numberOfHours));
}
@Override
public int getNumViews(Language lang, int pageId, DateTime startDate, int numberOfHours) throws DaoException {
return getNumViews(new LocalId(lang, pageId), startDate, startDate.plusHours(numberOfHours));
}
@Override
public int getNumViews(Language lang, int pageId, DateTime startDate, DateTime endDate) throws DaoException {
return getNumViews(new LocalId(lang, pageId), startDate, endDate);
}
@Override
public int getNumViews(LocalId pageId, DateTime startDate, DateTime endDate) throws DaoException {
DSLContext context = getJooq();
Timestamp startTime = new Timestamp(startDate.getMillis());
Timestamp endTime = new Timestamp(endDate.getMillis());
try {
Cursor result = context.select().
from(Tables.PAGEVIEW).
where(Tables.PAGEVIEW.LANG_ID.eq(pageId.getLanguage().getId())).
and(Tables.PAGEVIEW.TSTAMP.between(startTime, endTime)).
and(Tables.PAGEVIEW.PAGE_ID.eq(pageId.getId())).
fetchLazy(getFetchSize());
int numViews = 0;
for (Record record : result){
numViews += record.getValue(Tables.PAGEVIEW.NUM_VIEWS);
}
return numViews;
} finally {
freeJooq(context);
}
}
@Override
public Map getNumViews(Language lang, Iterable ids, DateTime startTime, DateTime endTime) throws ConfigurationException, DaoException{
Map result = new HashMap();
for(Integer id: ids){
result.put(id, getNumViews(new LocalId(lang, id), startTime, endTime));
}
return result;
}
@Override
public Map getNumViews(Language lang, Iterable ids, ArrayList dates) throws ConfigurationException, DaoException{
Map result = new HashMap();
DateTime startTime;
DateTime endTime;
int count = 0;
for (DateTime[] date : dates){
startTime = date[0];
endTime = date[1];
count++;
for(Integer id : ids){
if(!result.keySet().contains(id))
{
result.put(id, getNumViews(new LocalId(lang, id), startTime, endTime));
}
else{
int totalViews = result.get(id) + getNumViews(new LocalId(lang, id), startTime, endTime);
result.put(id, totalViews);
}
}
LOG.info(count + " dates loaded");
}
return result;
}
/**
* Returns all pageviews that meet the filter criteria specified by an input PageViewDaoFilter
*
* @see org.wikibrain.pageview.PageViewSqlDao#get(org.wikibrain.core.dao.DaoFilter) for a typical example
*
* @param daoFilter a set of filters to limit the search
* must be a PageViewDaoFilter or DaoException will be thrown
* @return
* @throws DaoException
*/
@Override
public Iterable get(final DaoFilter daoFilter) throws DaoException {
if (!(daoFilter instanceof PageViewDaoFilter)) {
throw new DaoException("Need to input PageViewDaoFilter for PageViewSqlDao get method");
}
PageViewDaoFilter pDaoFilter = (PageViewDaoFilter) daoFilter;
DSLContext context = getJooq();
try {
Collection conditions = new ArrayList();
if (pDaoFilter.getLangIds() != null) {
conditions.add(Tables.PAGEVIEW.LANG_ID.in(pDaoFilter.getLangIds()));
}
if (pDaoFilter.getPageIds() != null) {
conditions.add(Tables.PAGEVIEW.PAGE_ID.in(pDaoFilter.getPageIds()));
}
if (pDaoFilter.getMinNumViews() != null) {
conditions.add(Tables.PAGEVIEW.NUM_VIEWS.greaterOrEqual(pDaoFilter.getMinNumViews()));
}
if (pDaoFilter.getMaxNumViews() != null) {
conditions.add(Tables.PAGEVIEW.NUM_VIEWS.lessOrEqual(pDaoFilter.getMaxNumViews()));
}
if (pDaoFilter.getStartDate() != null) {
conditions.add(Tables.PAGEVIEW.TSTAMP.greaterOrEqual(new Timestamp(pDaoFilter.getStartDate().getMillis())));
}
if (pDaoFilter.getEndDate() != null) {
conditions.add(Tables.PAGEVIEW.TSTAMP.lessOrEqual(new Timestamp(pDaoFilter.getEndDate().getMillis())));
}
Cursor result = context.select().
from(Tables.PAGEVIEW).
where(conditions).
limit(daoFilter.getLimitOrInfinity()).
fetchLazy(getFetchSize());
return new SimpleSqlDaoIterable(result, context) {
@Override
public PageView transform(Record r) {
try {
return buildPageView(r);
} catch (DaoException e) {
LOG.warn(e.getMessage(), e);
return null;
}
}
};
} catch (RuntimeException e) {
freeJooq(context);
throw e;
}
}
/**
* Shilad: I'm not sure this makes sense for this dao.
* If implemented, it should return the number of rows (i.e. pages and hours)
* that match the specified query.
*
* @param daoFilter a set of filters to limit the search
* @return
* @throws DaoException
*/
@Override
public int getCount(DaoFilter daoFilter) throws DaoException {
throw new UnsupportedOperationException();
}
@Override
public void ensureLoaded(DateTime start, DateTime end, LanguageSet langs) throws DaoException {
ensureLoaded(Arrays.asList(new Interval(start, end)), langs);
}
@Override
public synchronized void ensureLoaded(List intervals, final LanguageSet langs) throws DaoException {
// FIXME: At the moment we totally ignore the language setting
final Map> loaded = getLoadedHours();
SortedSet needed = new TreeSet();
for (Interval i : intervals) {
for (DateTime tstamp : PageViewUtils.timestampsInInterval(i.getStart(), i.getEnd())) {
for (Language l : langs) {
if (!loaded.containsKey(l) || !loaded.get(l).contains(tstamp)) {
needed.add(tstamp);
}
}
}
}
if (needed.isEmpty()) {
LOG.info("All requested page views are loaded.");
return;
}
LOG.info(String.format("Loading pageviews for %d timestamps between %s and %s",
needed.size(), needed.first().toString(), needed.last().toString()));
PageViewDownloader downloader = new PageViewDownloader(downloadDir);
final TreeMap toLoad;
try {
toLoad = downloader.download(needed);
} catch (WikiBrainException e) {
throw new DaoException(e);
}
beginLoad();
final AtomicInteger[] counters = new AtomicInteger[] { new AtomicInteger(), new AtomicInteger() };
ParallelForEach.loop(toLoad.keySet(), new Procedure() {
@Override
public void call(DateTime tstamp) throws Exception {
LOG.info("loading pageview file " + toLoad.get(tstamp));
loadOneFile(tstamp, toLoad.get(tstamp), langs, counters);
LOG.info("finished pageview file " + toLoad.get(tstamp));
}
});
endLoad();
LOG.info(String.format("Found %d pageviews for langs %s and resolved %d of them.",
counters[0].get(), langs, counters[1].get()));
// Make sure one second passes between the last view loaded and the save of the cached info
// Otherwise we may incorrectly think the cache is stale.
try {
Thread.sleep(1500);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
for (Language lang : langs) {
if (!loaded.containsKey(lang)) {
loaded.put(lang, new TreeSet());
}
for (DateTime tstamp : needed) {
loaded.get(lang).add(tstamp);
}
}
cache.put(LOADED_CACHE_KEY, loaded);
}
private void loadOneFile(DateTime tstamp, File file, LanguageSet langs, AtomicInteger[] counters) {
PageViewReader reader = new PageViewReader(file, langs);
for (RawPageView view : reader) {
try {
counters[0].getAndIncrement();
int id = pageDao.getIdByTitle(view.getTitle());
if (id >= 0) {
counters[1].incrementAndGet();
PageView pv = new PageView(
new LocalId(view.getLanguage(), id),
tstamp.toDate(),
view.getViews());
save(pv);
metaDao.incrementRecords(PageView.class, pv.getPageId().getLanguage());
}
} catch (DaoException e) {
metaDao.incrementErrorsQuietly(PageView.class);
e.printStackTrace();
}
}
}
public synchronized Map> getLoadedHours() throws DaoException {
DSLContext context = getJooq();
try {
if (!JooqUtils.tableExists(context, Tables.PAGEVIEW)) {
return new HashMap>();
}
Map> loaded = (Map>) cache.get(LOADED_CACHE_KEY, PageView.class);
if (loaded != null) {
return loaded;
}
LOG.info("creating loadedHours cache. This only happens once...");
loaded = new HashMap>();
Result> times = context
.selectDistinct(Tables.PAGEVIEW.TSTAMP,Tables.PAGEVIEW.LANG_ID)
.from(Tables.PAGEVIEW)
.fetch();
for (Record2 record: times){
Language lang = Language.getById(record.value2());
DateTime date = new DateTime(record.value1());
if (!loaded.containsKey(lang)) {
loaded.put(lang, new TreeSet());
}
loaded.get(lang).add(date);
}
cache.put(LOADED_CACHE_KEY, loaded);
return loaded;
} finally {
freeJooq(context);
}
}
protected PageView buildPageView(Record record) throws DaoException {
if (record == null) {
return null;
}
LocalId id = new LocalId(
Language.getById(record.getValue(Tables.PAGEVIEW.LANG_ID)),
record.getValue(Tables.PAGEVIEW.PAGE_ID)
);
return new PageView(
id,
record.getValue(Tables.PAGEVIEW.TSTAMP),
record.getValue(Tables.PAGEVIEW.NUM_VIEWS)
);
}
public static class Provider extends org.wikibrain.conf.Provider {
public Provider(Configurator configurator, Configuration config) throws ConfigurationException {
super(configurator, config);
}
@Override
public Class getType() {
return PageViewDao.class;
}
@Override
public String getPath() {
return "dao.pageView";
}
@Override
public PageViewDao get(String name, Config config, Map runtimeParams) throws ConfigurationException {
if (!config.getString("type").equals("sql")) {
return null;
}
try {
PageViewSqlDao dao = new PageViewSqlDao(
getConfigurator().get(
WpDataSource.class,
config.getString("dataSource")),
getConfigurator().get(MetaInfoDao.class),
getConfigurator().get(LocalPageDao.class),
new File(config.getString("dir"))
);
String cachePath = getConfig().get().getString("dao.sqlCachePath");
File cacheDir = new File(cachePath);
if (!cacheDir.isDirectory()) {
cacheDir.mkdirs();
}
dao.useCache(cacheDir);
return dao;
} catch (DaoException e) {
throw new ConfigurationException(e);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy