All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gobblin.ingestion.google.webmaster.GoogleWebMasterSourceWeekly Maven / Gradle / Ivy

package gobblin.ingestion.google.webmaster;

import gobblin.configuration.WorkUnitState;
import gobblin.source.extractor.extract.LongWatermark;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.tuple.Pair;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;


public class GoogleWebMasterSourceWeekly extends GoogleWebMasterSource {
  private static DateTimeFormatter _hmsFormatter = DateTimeFormat.forPattern("yyyyMMddHHmmss");

  @Override
  GoogleWebmasterExtractor createExtractor(WorkUnitState state, Map columnPositionMap,
      List requestedDimensions,
      List requestedMetrics) throws IOException {

    long lowWatermark = state.getWorkunit().getLowWatermark(LongWatermark.class).getValue();
    Pair taskRange = getTaskRange(lowWatermark);
    Long startDate = Long.valueOf(_hmsFormatter.print(taskRange.getLeft()));
    Long endDate = Long.valueOf(_hmsFormatter.print(taskRange.getRight()));
    return new GoogleWebmasterExtractor(state, startDate, endDate, columnPositionMap, requestedDimensions,
        requestedMetrics);
  }

  /**
   * Return the one-week range from Friday to Thursday.
   *
   * If you are on Sunday, the Thursday is 3 days ago.
   * If not, the Thursday is last Thursday.
   *
   * See tests for more details.
   */
  public static Pair getTaskRange(long lowWatermark) {
    DateTime date = _hmsFormatter.parseDateTime(Long.toString(lowWatermark));
    //Monday = 1, Sunday = 7
    int dayOfWeek = date.getDayOfWeek();
    if (dayOfWeek == 7) {
      date = date.plusDays(1); //Go the next Monday.
    } else {
      date = date.minusDays(dayOfWeek - 1); //Go to this Monday.
    }

    DateTime lastMonday = date.minusWeeks(1);
    return Pair.of(lastMonday.minusDays(3), lastMonday.plusDays(3));
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy