com.zavtech.morpheus.yahoo.YahooQuoteHistorySource Maven / Gradle / Ivy
/**
* Copyright (C) 2014-2017 Xavier Witdouck
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.zavtech.morpheus.yahoo;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.time.LocalDate;
import java.time.ZoneOffset;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import com.zavtech.morpheus.array.Array;
import com.zavtech.morpheus.frame.DataFrame;
import com.zavtech.morpheus.frame.DataFrameCursor;
import com.zavtech.morpheus.frame.DataFrameException;
import com.zavtech.morpheus.frame.DataFrameSource;
import com.zavtech.morpheus.index.Index;
import com.zavtech.morpheus.range.Range;
import com.zavtech.morpheus.util.Asserts;
import com.zavtech.morpheus.util.IO;
import com.zavtech.morpheus.util.TextStreamReader;
import com.zavtech.morpheus.util.http.HttpClient;
import com.zavtech.morpheus.util.http.HttpException;
import com.zavtech.morpheus.util.http.HttpHeader;
/**
* A DataFrameSource implementation that loads historical quote data from Yahoo Finance using their CSV API.
*
* Any use of the extracted data from this software should adhere to Yahoo Finance Terms and Conditions.
*
* @author Xavier Witdouck
*
* This is open source software released under the Apache 2.0 License
*/
public class YahooQuoteHistorySource extends DataFrameSource {
private static final String CRUMB_URL = "https://query1.finance.yahoo.com/v1/test/getcrumb";
private static final String COOKIE_URL = "https://finance.yahoo.com/quote/SPY?p=SPY";
private static final String QUOTE_URL = "https://query1.finance.yahoo.com/v7/finance/download/%s?period1=%d&period2=%d&interval=1d&events=history&crumb=%s";
private static Predicate weekdayPredicate = date -> {
if (date == null) {
return false;
} else {
switch (date.getDayOfWeek()) {
case SATURDAY: return false;
case SUNDAY: return false;
default: return true;
}
}
};
private static Array fields = Array.of(
YahooField.PX_OPEN,
YahooField.PX_HIGH,
YahooField.PX_LOW,
YahooField.PX_CLOSE,
YahooField.PX_VOLUME,
YahooField.PX_SPLIT_RATIO,
YahooField.PX_CHANGE,
YahooField.PX_CHANGE_PERCENT
);
private String crumb;
private Map cookies;
/**
* Constructor
*/
public YahooQuoteHistorySource() {
this.cookies = new HashMap<>();
}
@Override
public DataFrame read(Consumer configurator) throws DataFrameException {
final Options options = initOptions(new Options(), configurator);
try {
final URL url = createURL(options.ticker, options.startDate,options.endDate);
return HttpClient.getDefault().>doGet(httpRequest -> {
httpRequest.setUrl(url);
httpRequest.setRetryCount(2);
httpRequest.setReadTimeout(5000);
httpRequest.setConnectTimeout(2000);
httpRequest.getCookies().putAll(getCookies());
httpRequest.setResponseHandler(response -> {
if (response.getStatus().getCode() != 200) {
final int code = response.getStatus().getCode();
throw new HttpException(httpRequest, "Yahoo Finance responded with status code " + code, null);
} else {
final InputStream stream = response.getStream();
final TextStreamReader reader = new TextStreamReader(stream);
if (reader.hasNext()) reader.nextLine(); //Swallow the header
final Index rowKeys = createDateIndex(options);
final Index colKeys = Index.of(fields.copy());
final DataFrame frame = DataFrame.ofDoubles(rowKeys, colKeys);
final DataFrameCursor cursor = frame.cursor();
while (reader.hasNext()) {
final String line = reader.nextLine();
final String[] elements = line.split(",");
final LocalDate date = parseDate(elements[0]);
final double open = Double.parseDouble(elements[1]);
final double high = Double.parseDouble(elements[2]);
final double low = Double.parseDouble(elements[3]);
final double close = Double.parseDouble(elements[4]);
final double closeAdj = Double.parseDouble(elements[5]);
final double volume = Double.parseDouble(elements[6]);
final double splitRatio = Math.abs(closeAdj - close) > 0.00001d ? closeAdj / close : 1d;
final double adjustment = options.adjustForSplitsAndDividends ? splitRatio : 1d;
if (options.paddedHolidays) {
cursor.moveToRow(date);
cursor.moveToColumn(0).setDouble(open * adjustment);
cursor.moveToColumn(1).setDouble(high * adjustment);
cursor.moveToColumn(2).setDouble(low * adjustment);
cursor.moveToColumn(3).setDouble(close * adjustment);
cursor.moveToColumn(4).setDouble(volume);
cursor.moveToColumn(5).setDouble(splitRatio);
} else {
frame.rows().add(date, v -> {
switch (v.colOrdinal()) {
case 0: return open * adjustment;
case 1: return high * adjustment;
case 2: return low * adjustment;
case 3: return close * adjustment;
case 4: return volume;
case 5: return splitRatio;
default: return v.getDouble();
}
});
}
}
if (options.paddedHolidays) {
frame.fill().down(2);
}
calculateChanges(frame);
return Optional.of(frame);
}
});
}).orElseGet(() -> {
throw new RuntimeException("Failed to load DataFrame from URL: " + url);
});
} catch (Exception ex) {
throw new DataFrameException("Market Data query failed for asset " + options.ticker, ex);
}
}
/**
* Returns the date index to initialize the row axis
* @param options the options for the request
* @return the date index for result DataFrame
*/
private Index createDateIndex(Options options) {
final Range range = Range.of(options.startDate, options.endDate);
if (options.paddedHolidays) {
return Index.of(range.filter(weekdayPredicate).toArray());
} else {
final int size = (int)range.estimateSize() + 10;
return Index.of(LocalDate.class, size);
}
}
/**
* Calculates price changes from close to close
* @param frame the quote frame
* @return the same as input
*/
private DataFrame calculateChanges(DataFrame frame) {
frame.rows().sort(true);
IntStream.range(1, frame.rowCount()).forEach(rowIndex -> {
final double previous = frame.data().getDouble(rowIndex - 1, YahooField.PX_CLOSE);
final double current = frame.data().getDouble(rowIndex, YahooField.PX_CLOSE);
frame.data().setDouble(rowIndex, YahooField.PX_CHANGE, current - previous);
frame.data().setDouble(rowIndex, YahooField.PX_CHANGE_PERCENT, (current / previous) - 1d);
});
return frame;
}
/**
* Called internally to construct the query URL.
* @param symbol the asset symbol
* @param start the start date
* @param end the end date
* @return the query url
* @throws Exception if url construction fails
*/
private URL createURL(String symbol, LocalDate start, LocalDate end) throws Exception {
final String crumb = getCrumb();
final long startDate = start.atStartOfDay(ZoneOffset.UTC).toEpochSecond();
final long endDate = end.atStartOfDay(ZoneOffset.UTC).toEpochSecond();
return new URL(String.format(QUOTE_URL, symbol, startDate, endDate, crumb));
}
/**
* Parses dates in the formatSqlDate YYYY-MM-DD
* @param dateString the string to parse
* @return the parsed date value
*/
private LocalDate parseDate(String dateString) {
if (dateString == null) {
return null;
} else {
final String[] elements = dateString.trim().split("-");
final int year = Integer.parseInt(elements[0]);
final int month = Integer.parseInt(elements[1]);
final int date = Integer.parseInt(elements[2]);
return LocalDate.of(year, month, date);
}
}
/**
* Returns the cookies to send with the request
* @return the cookies to send with request
*/
private synchronized Map getCookies() {
if (cookies.isEmpty()) {
this.cookies = HttpClient.getDefault().