All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pinot.broker.routing.timeboundary.TimeBoundaryManager Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.pinot.broker.routing.timeboundary;

import com.google.common.base.Preconditions;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nullable;
import org.apache.helix.AccessOption;
import org.apache.helix.model.ExternalView;
import org.apache.helix.model.IdealState;
import org.apache.helix.store.zk.ZkHelixPropertyStore;
import org.apache.helix.zookeeper.datamodel.ZNRecord;
import org.apache.pinot.broker.routing.segmentpreselector.SegmentPreSelector;
import org.apache.pinot.common.metadata.ZKMetadataProvider;
import org.apache.pinot.common.metadata.segment.SegmentZKMetadata;
import org.apache.pinot.common.metrics.BrokerGauge;
import org.apache.pinot.common.metrics.BrokerMetrics;
import org.apache.pinot.core.routing.TimeBoundaryInfo;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.config.table.TableType;
import org.apache.pinot.spi.data.DateTimeFieldSpec;
import org.apache.pinot.spi.data.DateTimeFormatSpec;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.utils.CommonConstants;
import org.apache.pinot.spi.utils.CommonConstants.Helix.StateModel.SegmentStateModel;
import org.apache.pinot.spi.utils.IngestionConfigUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/**
 * The {@code TimeBoundaryManager} class manages the time boundary information for a table.
 * 

TODO: Support SDF (simple date format) time column */ public class TimeBoundaryManager { private static final Logger LOGGER = LoggerFactory.getLogger(TimeBoundaryManager.class); private static final long INVALID_TIME_MS = -1; private final String _offlineTableName; private final ZkHelixPropertyStore _propertyStore; private final BrokerMetrics _brokerMetrics; private final String _segmentZKMetadataPathPrefix; private final String _timeColumn; private final DateTimeFormatSpec _timeFormatSpec; private final long _timeOffsetMs; private final Map _endTimeMsMap = new HashMap<>(); private long _explicitlySetTimeBoundaryMs = INVALID_TIME_MS; private volatile TimeBoundaryInfo _timeBoundaryInfo; public TimeBoundaryManager(TableConfig tableConfig, ZkHelixPropertyStore propertyStore, BrokerMetrics brokerMetrics) { Preconditions.checkState(tableConfig.getTableType() == TableType.OFFLINE, "Cannot construct TimeBoundaryManager for real-time table: %s", tableConfig.getTableName()); _offlineTableName = tableConfig.getTableName(); _propertyStore = propertyStore; _brokerMetrics = brokerMetrics; _segmentZKMetadataPathPrefix = ZKMetadataProvider.constructPropertyStorePathForResource(_offlineTableName) + "/"; Schema schema = ZKMetadataProvider.getTableSchema(_propertyStore, _offlineTableName); Preconditions.checkState(schema != null, "Failed to find schema for table: %s", _offlineTableName); _timeColumn = tableConfig.getValidationConfig().getTimeColumnName(); Preconditions.checkNotNull(_timeColumn, "Time column must be configured in table config for table: %s", _offlineTableName); DateTimeFieldSpec dateTimeSpec = schema.getSpecForTimeColumn(_timeColumn); Preconditions.checkNotNull(dateTimeSpec, "Field spec must be specified in schema for time column: %s of table: %s", _timeColumn, _offlineTableName); _timeFormatSpec = dateTimeSpec.getFormatSpec(); Preconditions.checkNotNull(_timeFormatSpec.getColumnUnit(), "Time unit must be configured in the field spec for time column: %s of table: %s", _timeColumn, _offlineTableName); // For HOURLY table with time unit other than DAYS, use (maxEndTime - 1 HOUR) as the time boundary; otherwise, use // (maxEndTime - 1 DAY) boolean isHourlyTable = CommonConstants.Table.PUSH_FREQUENCY_HOURLY.equalsIgnoreCase( IngestionConfigUtils.getBatchSegmentIngestionFrequency(tableConfig)) && _timeFormatSpec.getColumnUnit() != TimeUnit.DAYS; _timeOffsetMs = isHourlyTable ? TimeUnit.HOURS.toMillis(1) : TimeUnit.DAYS.toMillis(1); LOGGER.info("Constructed TimeBoundaryManager with timeColumn: {}, timeFormat: {}, isHourlyTable: {} for table: {}", _timeColumn, dateTimeSpec.getFormat(), isHourlyTable, _offlineTableName); } /** * Initializes the time boundary manager with the ideal state, external view and online segments (segments with * ONLINE/CONSUMING instances in the ideal state and pre-selected by the {@link SegmentPreSelector}). Should be called * only once before calling other methods. *

NOTE: {@code idealState} and {@code externalView} are unused, but intentionally passed in in case they are * needed in the future. */ @SuppressWarnings("unused") public void init(IdealState idealState, ExternalView externalView, Set onlineSegments) { updateExplicitlySetTimeBoundary(idealState); // Bulk load time info for all online segments int numSegments = onlineSegments.size(); List segments = new ArrayList<>(numSegments); List segmentZKMetadataPaths = new ArrayList<>(numSegments); for (String segment : onlineSegments) { segments.add(segment); segmentZKMetadataPaths.add(_segmentZKMetadataPathPrefix + segment); } List znRecords = _propertyStore.get(segmentZKMetadataPaths, null, AccessOption.PERSISTENT, false); long maxEndTimeMs = INVALID_TIME_MS; for (int i = 0; i < numSegments; i++) { String segment = segments.get(i); long endTimeMs = extractEndTimeMsFromSegmentZKMetadataZNRecord(segment, znRecords.get(i)); _endTimeMsMap.put(segment, endTimeMs); maxEndTimeMs = Math.max(maxEndTimeMs, endTimeMs); } updateTimeBoundaryInfo(maxEndTimeMs); } private void updateExplicitlySetTimeBoundary(IdealState idealState) { String timeBoundary = idealState.getRecord().getSimpleField(CommonConstants.IdealState.HYBRID_TABLE_TIME_BOUNDARY); long timeBoundaryMs = timeBoundary != null ? Long.parseLong(timeBoundary) : INVALID_TIME_MS; if (_explicitlySetTimeBoundaryMs != timeBoundaryMs) { LOGGER.info("Updating explicitly set time boundary to: {} for table: {}", timeBoundaryMs, _offlineTableName); _explicitlySetTimeBoundaryMs = timeBoundaryMs; } } private long extractEndTimeMsFromSegmentZKMetadataZNRecord(String segment, @Nullable ZNRecord znRecord) { if (znRecord == null) { LOGGER.warn("Failed to find segment ZK metadata for segment: {}, table: {}", segment, _offlineTableName); return INVALID_TIME_MS; } SegmentZKMetadata segmentZKMetadata = new SegmentZKMetadata(znRecord); if (segmentZKMetadata.getTotalDocs() == 0) { return INVALID_TIME_MS; } long endTimeMs = segmentZKMetadata.getEndTimeMs(); if (endTimeMs > 0) { return endTimeMs; } else { LOGGER.warn("Failed to find valid end time for segment: {}, table: {}", segment, _offlineTableName); return INVALID_TIME_MS; } } private void updateTimeBoundaryInfo(long maxEndTimeMs) { TimeBoundaryInfo currentTimeBoundaryInfo = _timeBoundaryInfo; long timeBoundaryMs; if (_explicitlySetTimeBoundaryMs > 0) { // Use explicitly set time boundary timeBoundaryMs = _explicitlySetTimeBoundaryMs; LOGGER.debug("Using explicitly set time boundary: {} for table: {}", _explicitlySetTimeBoundaryMs, _offlineTableName); } else { // No explicit time boundary set if (maxEndTimeMs > 0) { timeBoundaryMs = maxEndTimeMs - _timeOffsetMs; } else { LOGGER.warn("Failed to find segment with valid end time for table: {}, no time boundary generated", _offlineTableName); timeBoundaryMs = INVALID_TIME_MS; } } if (timeBoundaryMs > 0) { String timeBoundary = _timeFormatSpec.fromMillisToFormat(timeBoundaryMs); if (currentTimeBoundaryInfo == null || !currentTimeBoundaryInfo.getTimeValue().equals(timeBoundary)) { _timeBoundaryInfo = new TimeBoundaryInfo(_timeColumn, timeBoundary); LOGGER.info("Updated time boundary to: {} for table: {}", timeBoundary, _offlineTableName); } // Convert formatted time boundary to millis in case the time boundary is rounded long formattedTimeBoundaryMs = _timeFormatSpec.fromFormatToMillis(timeBoundary); _brokerMetrics.setValueOfTableGauge(_offlineTableName, BrokerGauge.TIME_BOUNDARY_DIFFERENCE, maxEndTimeMs - formattedTimeBoundaryMs); } else { _timeBoundaryInfo = null; _brokerMetrics.removeTableGauge(_offlineTableName, BrokerGauge.TIME_BOUNDARY_DIFFERENCE); } } /** * Processes the segment assignment (ideal state or external view) change based on the given online segments (segments * with ONLINE/CONSUMING instances in the ideal state and pre-selected by the {@link SegmentPreSelector}). *

NOTE: We don't update all the segment ZK metadata for every external view change, but only the new added/removed * ones. The refreshed segment ZK metadata change won't be picked up. *

NOTE: {@code idealState} is unused, but intentionally passed in in case it is needed in the future. */ @SuppressWarnings("unused") public synchronized void onAssignmentChange(IdealState idealState, ExternalView externalView, Set onlineSegments) { updateExplicitlySetTimeBoundary(idealState); for (String segment : onlineSegments) { // NOTE: Only update the segment end time when there are ONLINE instances in the external view to prevent moving // the time boundary before the new segment is picked up by the servers Map instanceStateMap = externalView.getStateMap(segment); if (instanceStateMap != null && instanceStateMap.containsValue(SegmentStateModel.ONLINE)) { _endTimeMsMap.computeIfAbsent(segment, k -> extractEndTimeMsFromSegmentZKMetadataZNRecord(segment, _propertyStore.get(_segmentZKMetadataPathPrefix + segment, null, AccessOption.PERSISTENT))); } } _endTimeMsMap.keySet().retainAll(onlineSegments); updateTimeBoundaryInfo(getMaxEndTimeMs()); } private long getMaxEndTimeMs() { long maxEndTimeMs = INVALID_TIME_MS; for (long endTimeMs : _endTimeMsMap.values()) { maxEndTimeMs = Math.max(maxEndTimeMs, endTimeMs); } return maxEndTimeMs; } /** * Refreshes the metadata for the given segment (called when segment is getting refreshed). */ public synchronized void refreshSegment(String segment) { _endTimeMsMap.put(segment, extractEndTimeMsFromSegmentZKMetadataZNRecord(segment, _propertyStore.get(_segmentZKMetadataPathPrefix + segment, null, AccessOption.PERSISTENT))); updateTimeBoundaryInfo(getMaxEndTimeMs()); } @Nullable public TimeBoundaryInfo getTimeBoundaryInfo() { return _timeBoundaryInfo; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy