All Downloads are FREE. Search and download functionalities are using the official Maven repository.

co.cask.cdap.logging.write.LogLocation Maven / Gradle / Ivy

There is a newer version: 5.1.2
Show newest version
/*
 * Copyright © 2017 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.logging.write;

import ch.qos.logback.classic.spi.ILoggingEvent;
import co.cask.cdap.api.dataset.lib.CloseableIterator;
import co.cask.cdap.common.io.Locations;
import co.cask.cdap.common.io.SeekableInputStream;
import co.cask.cdap.common.logging.LogSamplers;
import co.cask.cdap.common.logging.Loggers;
import co.cask.cdap.logging.filter.Filter;
import co.cask.cdap.logging.read.Callback;
import co.cask.cdap.logging.read.LogEvent;
import co.cask.cdap.logging.read.LogOffset;
import co.cask.cdap.logging.serialize.LogSchema;
import co.cask.cdap.logging.serialize.LoggingEvent;
import co.cask.cdap.proto.id.NamespaceId;
import co.cask.cdap.security.impersonation.Impersonator;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.SeekableInput;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.twill.filesystem.Location;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Deque;
import java.util.LinkedList;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.concurrent.Callable;

/**
 * LogLocation representing a log file and methods to read the file's contents.
 */
public class LogLocation {
  private static final Logger LOG = LoggerFactory.getLogger(LogLocation.class);
  private static final Logger READ_FAILURE_LOG = Loggers.sampling(LOG, LogSamplers.limitRate(60000));

  private static final long DEFAULT_SKIP_LEN = 10 * 1024 * 1024;
  // old version
  public static final String VERSION_0 = "V0";
  // new version
  public static final String VERSION_1 = "V1";
  private final String frameworkVersion;
  private final long eventTimeMs;
  private final long fileCreationTimeMs;
  private final Location location;
  private final NamespaceId namespaceId;
  private final Impersonator impersonator;

  public LogLocation(String frameworkVersion, long eventTimeMs, long fileCreationTimeMs, Location location,
                     String namespaceId, Impersonator impersonator) {
    this.frameworkVersion = frameworkVersion;
    this.eventTimeMs = eventTimeMs;
    this.fileCreationTimeMs = fileCreationTimeMs;
    this.location = location;
    this.namespaceId = new NamespaceId(namespaceId);
    this.impersonator = impersonator;
  }

  /**
   * get logging framework version
   * @return version string, currently V0 or V1
   */
  public String getFrameworkVersion() {
    return frameworkVersion;
  }

  /**
   * get start eventTimeMs for this logging file
   * @return eventTimeMs in long
   */
  public long getEventTimeMs() {
    return eventTimeMs;
  }

  /**
   * get location of log file
   * @return Location
   */
  public Location getLocation() {
    return location;
  }

  /**
   * get the timestamp associated with the file
   * @return
   */
  public long getFileCreationTimeMs() {
    return fileCreationTimeMs;
  }

  /**
   * Return closeable iterator of {@link LogEvent}
   * @param logFilter filter for filtering log events
   * @param fromTimeMs start timestamp in millis
   * @param toTimeMs end timestamp in millis
   * @param maxEvents max events to return
   * @return closeable iterator of log events
   */
  public CloseableIterator readLog(Filter logFilter, long fromTimeMs, long toTimeMs, int maxEvents) {
    return new LogEventIterator(logFilter, fromTimeMs, toTimeMs, maxEvents);
  }

  /**
   * @param logFilter filter for filtering log events
   * @param fromTimeMs start timestamp in millis
   * @param toTimeMs end timestamp in millis
   * @param maxEvents max events to return
   * @param callback callback to call with log event
   */
  public void readLog(Filter logFilter, long fromTimeMs, long toTimeMs, int maxEvents,
                      Callback callback) {
    try (CloseableIterator logEventIter =
           readLog(logFilter, fromTimeMs, toTimeMs, maxEvents)) {
      while (logEventIter.hasNext()) {
        callback.handle(logEventIter.next());
      }
    }
  }

  /**
   * Return closeable iterator of {@link LogEvent}
   * @param logFilter filter for filtering log events
   * @param fromTimeMs start timestamp in millis
   * @param maxEvents max events to return
   * @return closeable iterator of previous log events
   */
  @SuppressWarnings("WeakerAccess")
  public Collection readLogPrev(Filter logFilter, long fromTimeMs, final int maxEvents) throws IOException {
    Deque> logSegments = new LinkedList<>();
    int count = 0;
    try {
      try (DataFileReader dataFileReader = createReader()) {

        if (!dataFileReader.hasNext()) {
          return ImmutableList.of();
        }

        // Calculate skipLen based on fileLength
        long length = location.length();
        LOG.trace("File length {} {}", location, length);
        long skipLen = length / 10;
        if (skipLen > DEFAULT_SKIP_LEN || skipLen <= 0) {
          skipLen = DEFAULT_SKIP_LEN;
        }

        // For open file, endPosition sync marker is unknown so start from file length and read up to the actual EOF
        dataFileReader.sync(length);
        long finalSync = dataFileReader.previousSync();
        List logSegment = readToEndSyncPosition(dataFileReader, logFilter, fromTimeMs, -1);

        if (!logSegment.isEmpty()) {
          logSegments.addFirst(logSegment);
          count = count + logSegment.size();
        }

        LOG.trace("Read log events {} from position {}", count, finalSync);

        long startPosition = finalSync;
        long endPosition = startPosition;
        long currentSync;

        while (startPosition > 0 && count < maxEvents) {
          // Skip to sync position less than current sync position
          startPosition = skipToPosition(dataFileReader, startPosition, endPosition, skipLen);
          currentSync = dataFileReader.previousSync();
          logSegment = readToEndSyncPosition(dataFileReader, logFilter, fromTimeMs, endPosition);

          if (!logSegment.isEmpty()) {
            logSegments.addFirst(logSegment);
            count = count + logSegment.size();
          }
          LOG.trace("Read log events {} from position {} to endPosition {}", count, currentSync, endPosition);

          endPosition = currentSync;
        }
      }
    } catch (IOException e) {
      READ_FAILURE_LOG.warn("Got exception while reading log file {}", location, e);
    }

    int skip = count >= maxEvents ? count - maxEvents : 0;
    return Lists.newArrayList(Iterables.skip(Iterables.concat(logSegments), skip));
  }

  /**
   *  Read current block in Avro file from current block sync marker to next block sync marker
   */
  private List readToEndSyncPosition(DataFileReader dataFileReader, Filter logFilter,
                                               long fromTimeMs, long endSyncPosition) throws IOException {

    List logSegment = new ArrayList<>();
    long currentSyncPosition = dataFileReader.previousSync();
    // Read up to the end if endSyncPosition is not known (in case of an open file)
    // or read until endSyncPosition has been reached
    while (dataFileReader.hasNext() && (endSyncPosition == -1 || (currentSyncPosition < endSyncPosition))) {
      ILoggingEvent loggingEvent = new LoggingEvent(dataFileReader.next());
      loggingEvent.prepareForDeferredProcessing();

      // Stop when reached fromTimeMs
      if (loggingEvent.getTimeStamp() > fromTimeMs) {
        break;
      }

      if (logFilter.match(loggingEvent)) {
        logSegment.add(new LogEvent(loggingEvent,
                                    new LogOffset(LogOffset.INVALID_KAFKA_OFFSET, loggingEvent.getTimeStamp())));
      }
      currentSyncPosition = dataFileReader.previousSync();
    }

    return logSegment;
  }

  /**
   * Starting from currentSyncPosition, move backwards by skipLen number of positions in each iteration to
   * find out a sync position less than currentSyncPosition
   */
  private long skipToPosition(DataFileReader dataFileReader,
                              long startPosition, long endSyncPosition, long skipLen) throws IOException {
    long currentSync = endSyncPosition;
    while (startPosition > 0 && currentSync == endSyncPosition) {
      startPosition = startPosition < skipLen ? 0 : startPosition - skipLen;
      dataFileReader.sync(startPosition);
      currentSync = dataFileReader.previousSync();
      LOG.trace("Got position {} after skipping {} positions from currentSync {}", startPosition, skipLen, currentSync);
    }
    return startPosition;
  }

  private final class LogEventIterator implements CloseableIterator {

    private final Filter logFilter;
    private final long fromTimeMs;
    private final long toTimeMs;
    private final long maxEvents;

    private DataFileReader dataFileReader;

    private ILoggingEvent loggingEvent;
    private GenericRecord datum;

    private int count = 0;
    private long prevTimestamp = -1;

    private LogEvent next;

    LogEventIterator(Filter logFilter, long fromTimeMs, long toTimeMs, long maxEvents) {
      this.logFilter = logFilter;
      this.fromTimeMs = fromTimeMs;
      this.toTimeMs = toTimeMs;
      this.maxEvents = maxEvents;

      try {
        dataFileReader = createReader();
        if (dataFileReader.hasNext()) {
          datum = dataFileReader.next();
          loggingEvent = new LoggingEvent(datum);
          loggingEvent.prepareForDeferredProcessing();

          long prevPrevSyncPos = 0;
          long prevSyncPos = 0;
          // Seek to time fromTimeMs
          while (loggingEvent.getTimeStamp() < fromTimeMs && dataFileReader.hasNext()) {
            // Seek to the next sync point
            long curPos = dataFileReader.tell();
            prevPrevSyncPos = prevSyncPos;
            prevSyncPos = dataFileReader.previousSync();
            LOG.trace("Syncing to pos {}", curPos);
            dataFileReader.sync(curPos);
            if (dataFileReader.hasNext()) {
              loggingEvent = new LoggingEvent(dataFileReader.next(datum));
              loggingEvent.prepareForDeferredProcessing();
            }
          }

          // We're now likely past the record with fromTimeMs, rewind to the previous sync point
          dataFileReader.sync(prevPrevSyncPos);
          LOG.trace("Final sync pos {}", prevPrevSyncPos);
        }

        // populate the first element
        computeNext();

      } catch (Exception e) {
        // we want to ignore invalid or missing log files
        LOG.error("Got exception while reading log file {}", location.getName(), e);
      }
    }

    // will compute the next LogEvent and set the field 'next', unless its already set
    private void computeNext() {
      try {
        // read events from file
        while (next == null && dataFileReader.hasNext()) {
          loggingEvent = new LoggingEvent(dataFileReader.next(datum));
          loggingEvent.prepareForDeferredProcessing();

          if (loggingEvent.getTimeStamp() >= fromTimeMs && logFilter.match(loggingEvent)) {
            ++count;
            if ((count > maxEvents || loggingEvent.getTimeStamp() >= toTimeMs)
              && loggingEvent.getTimeStamp() != prevTimestamp) {
              break;
            }
            next = new LogEvent(loggingEvent,
                                new LogOffset(LogOffset.INVALID_KAFKA_OFFSET, loggingEvent.getTimeStamp()));
          }
          prevTimestamp = loggingEvent.getTimeStamp();
        }
      } catch (Exception e) {
        // We want to ignore invalid or missing log files.
        // If the 'next' variable wasn't set by this method call, then the 'hasNext' method
        // will return false, and no more events will be read from this file.
        READ_FAILURE_LOG.error("Got exception while reading log file {}", location.getName(), e);
      }
    }

    @Override
    public void close() {
      try {
        if (dataFileReader != null) {
          dataFileReader.close();
        }
      } catch (IOException e) {
        LOG.error("Got exception while closing log file {}", location.getName(), e);
      }
    }

    @Override
    public boolean hasNext() {
      return next != null;
    }

    @Override
    public LogEvent next() {
      if (this.next == null) {
        throw new NoSuchElementException();
      }
      LogEvent toReturn = this.next;
      this.next = null;
      computeNext();
      return toReturn;
    }

    @Override
    public void remove() {
      throw new UnsupportedOperationException("Remove not supported");
    }
  }

  private DataFileReader createReader() throws IOException {
    boolean shouldImpersonate = this.getFrameworkVersion().equals(VERSION_0);
    return new DataFileReader<>(new LocationSeekableInput(location, namespaceId, impersonator, shouldImpersonate),
                                new GenericDatumReader(LogSchema.LoggingEvent.SCHEMA));
  }

  /**
   * An implementation of Avro SeekableInput over Location.
   */
  private static final class LocationSeekableInput implements SeekableInput {

    private final SeekableInputStream is;
    private final long len;

    LocationSeekableInput(final Location location,
                          NamespaceId namespaceId, Impersonator impersonator,
                          boolean shouldImpersonate) throws IOException {
      try {
        if (shouldImpersonate) {
          this.is = impersonator.doAs(namespaceId, new Callable() {
            @Override
            public SeekableInputStream call() throws Exception {
              return Locations.newInputSupplier(location).getInput();
            }
          });
        } else {
          // impersonation is not required for V1 version.
          this.is = Locations.newInputSupplier(location).getInput();
        }
      } catch (IOException e) {
        throw e;
      } catch (Exception e) {
        // should not happen
        throw Throwables.propagate(e);
      }

      this.len = location.length();
    }

    @Override
    public void seek(long p) throws IOException {
      is.seek(p);
    }

    @Override
    public long tell() throws IOException {
      return is.getPos();
    }

    @Override
    public long length() throws IOException {
      return len;
    }

    @Override
    public int read(byte[] b, int off, int len) throws IOException {
      return is.read(b, off, len);
    }

    @Override
    public void close() throws IOException {
      is.close();
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy