de.codesourcery.versiontracker.common.server.MavenCentralVersionProvider Maven / Gradle / Ivy
/**
* Copyright 2018 Tobias Gierke
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.codesourcery.versiontracker.common.server;
import java.io.ByteArrayInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import org.apache.hc.client5.http.classic.methods.HttpGet;
import org.apache.hc.client5.http.impl.DefaultConnectionKeepAliveStrategy;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManager;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import de.codesourcery.versiontracker.common.Artifact;
import de.codesourcery.versiontracker.common.IVersionProvider;
import de.codesourcery.versiontracker.common.JSONHelper;
import de.codesourcery.versiontracker.common.Version;
import de.codesourcery.versiontracker.common.VersionInfo;
/**
* Version provider that retrieves artifact metadata from Maven central.
*
* This class is thread-safe.
*
* @author [email protected]
*/
public class MavenCentralVersionProvider implements IVersionProvider
{
private static final Logger LOG = LogManager.getLogger(MavenCentralVersionProvider.class);
private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyyMMddHHmmss").withZone(ZoneId.of("UTC"));
public static final String DEFAULT_REPO1_BASE_URL = "https://repo1.maven.org/maven2/";
public static final String DEFAULT_SONATYPE_REST_API_BASE_URL = "https://search.maven.org/solrsearch/select";
/**
* HTTP GET parameters used by Sonatype REST API.
*
* There's basically no documentation for this except https://central.sonatype.org/search/rest-api-guide/
*
*
* @author [email protected]
*/
enum HttpParam {
QUERY("q",1),
/** return all versions of an artifact */
OPT_RETURN_ALL_VERSION("core","gav", 2 ),
START_OFFSET("start",3),
MAX_RESULT_COUNT("rows",4),
RESULT_TYPE("wt", "json", 5 )
;
public final String literal;
public final String value;
public final int order;
HttpParam(String literal, int order) {
this( literal, null, order );
}
HttpParam(String literal, String value, int order)
{
this.literal = literal;
this.value = value;
this.order = order;
}
}
private static final ObjectMapper JSON_MAPPER = JSONHelper.newObjectMapper();
private static final class MyExpressions // XPathExpression is NOT thread-safe so we use a ThreadLocal + this wrapper
{
private final XPathExpression latestSnapshot;
private final XPathExpression latestRelease;
private final XPathExpression lastUpdateDate;
public MyExpressions()
{
final XPathFactory factory = XPathFactory.newInstance();
final XPath xpath = factory.newXPath();
try {
latestSnapshot = xpath.compile("/metadata/versioning/latest[text()]");
latestRelease = xpath.compile("/metadata/versioning/release[text()]");
lastUpdateDate = xpath.compile("/metadata/versioning/lastUpdated");
} catch (XPathExpressionException e) {
throw new RuntimeException(e);
}
}
}
@FunctionalInterface
public interface MyStreamHandler
{
T process(InputStream stream) throws IOException;
}
private final PoolingHttpClientConnectionManager connManager = new PoolingHttpClientConnectionManager();
private final String repo1BaseUrl;
private final String sonatypeRestApiBaseUrl;
private final ThreadLocal expressions = ThreadLocal.withInitial( MyExpressions::new );
private final Map clients = new HashMap<>();
private int maxConcurrentThreads = 10;
// @GuardedBy( statistics )
private final Statistics statistics = new Statistics();
private final Object THREAD_POOL_LOCK=new Object();
private ThreadPoolExecutor threadPool;
private ConfigurationProvider configurationProvider;
public MavenCentralVersionProvider()
{
this( DEFAULT_REPO1_BASE_URL, DEFAULT_SONATYPE_REST_API_BASE_URL);
connManager.setDefaultMaxPerRoute(10);
connManager.setMaxTotal(20);
}
public MavenCentralVersionProvider(String repo1BaseUrl, String sonatypeRestApiBaseUrl)
{
this.repo1BaseUrl = repo1BaseUrl+(repo1BaseUrl.trim().endsWith("/") ? "" : "/" );
this.sonatypeRestApiBaseUrl = sonatypeRestApiBaseUrl+(sonatypeRestApiBaseUrl.trim().endsWith("/") ? "" : "/" );
}
@Override
public void setConfigurationProvider(ConfigurationProvider configurationProvider)
{
Validate.notNull( configurationProvider, "configurationProvider must not be null" );
this.configurationProvider = configurationProvider;
}
public static void main(String[] args) throws IOException
{
final Artifact test = new Artifact();
test.groupId = "org.apache.tomcat";
test.artifactId = "tomcat";
VersionInfo data = new VersionInfo();
data.artifact = test;
long start = System.currentTimeMillis();
final UpdateResult result = new MavenCentralVersionProvider().update( data, false );
long end = System.currentTimeMillis();
System.out.println("TIME: "+(end-start)+" ms");
System.out.println("RESULT: "+result);
System.out.println("GOT: "+data);
System.out.println( "VERSION COUNT: " + data.versions.size() );
data.versions.stream().sorted( (a, b) -> a.versionString.compareToIgnoreCase( b.versionString ) )
.forEach( x -> System.out.println( x.versionString + " => " + x.releaseDate ) );
}
private boolean isBlacklisted(Artifact a) {
return configurationProvider.getConfiguration().getBlacklist().isAllVersionsBlacklisted( a.groupId, a.artifactId );
}
@Override
public UpdateResult update(VersionInfo info, boolean force) throws IOException
{
final Artifact artifact = info.artifact;
if ( isBlacklisted( artifact ) ) {
if ( LOG.isDebugEnabled() ) {
LOG.debug( "update(): Not updating blacklisted artifact " + artifact );
}
info.lastSuccessDate = ZonedDateTime.now();
return UpdateResult.BLACKLISTED;
}
final URL url = new URL( repo1BaseUrl +metaDataPath( artifact ) );
if ( LOG.isDebugEnabled() ) {
LOG.debug("update(): Retrieving metadata for "+info.artifact+" from "+url);
}
try
{
synchronized ( statistics ) {
statistics.metaDataRequests.update();
}
return performGET(url, stream -> {
final Document document = parseXML( stream );
// parse latest snapshot & release versions from metadata
final MyExpressions expr = expressions.get();
// last repository change date
final String lastChangeString = readString(expr.lastUpdateDate, document );
LOG.debug("update(): last repository change = "+lastChangeString);
final ZonedDateTime lastChangeDate = ZonedDateTime.parse( lastChangeString, DATE_FORMATTER);
final ZonedDateTime previousUpdate = info.lastRepositoryUpdate;
if ( previousUpdate != null && previousUpdate.equals( lastChangeDate ) ) {
if ( ! force ) {
LOG.debug( "update(): No changes on server.");
info.lastSuccessDate = ZonedDateTime.now();
return UpdateResult.NO_CHANGES_ON_SERVER;
}
LOG.debug( "update(): Forced artifact update" );
} else {
LOG.debug( "update(): Artifact index XML changed on server" );
}
// get all versions
final List allVersions = queryAllVersions( info.artifact );
info.removeVersionsIf( v -> {
final boolean remove = allVersions.stream().noneMatch( x -> x.versionString.equals( v.versionString ) );
if ( remove ) {
LOG.warn("update(): Version "+v+" is gone from metadata.xml of "+info.artifact+" ?");
}
return remove;
} );
allVersions.forEach( info::addVersion );
final String latestSnapshotVersion = readString(expr.latestSnapshot, document );
if ( StringUtils.isNotBlank( latestSnapshotVersion ) ) {
info.getVersion( latestSnapshotVersion )
.or( () -> Optional.of( new Version(latestSnapshotVersion, null) ) )
.ifPresent( x -> info.latestSnapshotVersion = x );
}
final String latestReleaseVersion = readString(expr.latestRelease, document );
if ( StringUtils.isNotBlank( latestReleaseVersion ) ) {
info.getVersion( latestReleaseVersion )
.or( () -> Optional.of( new Version(latestReleaseVersion, null) ) )
.ifPresent( x -> info.latestReleaseVersion = x );
}
LOG.debug("update(): latest snapshot (metadata) = "+latestSnapshotVersion);
LOG.debug("update(): latest release (metadata) = "+latestReleaseVersion);
info.lastRepositoryUpdate = lastChangeDate;
info.lastSuccessDate = ZonedDateTime.now();
if ( StringUtils.isNotBlank( artifact.version ) && info.getVersion( artifact.version ).isEmpty() )
{
LOG.error( "update(): Found no metadata about version '" + artifact.version + "'of artifact " + info.artifact );
return UpdateResult.ARTIFACT_VERSION_NOT_FOUND;
}
return UpdateResult.UPDATED;
} );
}
catch(Exception e)
{
info.lastFailureDate = ZonedDateTime.now();
if ( e instanceof FileNotFoundException) {
LOG.warn("getLatestVersion(): Failed to find artifact on server: "+info);
return UpdateResult.ARTIFACT_UNKNOWN;
}
LOG.error("getLatestVersion(): Error while retrieving artifact metadata from server: "+info+": "+e.getMessage(), LOG.isDebugEnabled() ? e : null);
throw new IOException(e);
} finally {
LOG.debug("Finished retrieving metadata for "+info.artifact);
}
}
private final ThreadFactory threadFactory = new ThreadFactory() {
private final ThreadGroup threadGroup = new ThreadGroup(Thread.currentThread().getThreadGroup(),"releasedate-request-threads" );
private final AtomicInteger threadId = new AtomicInteger(0);
public Thread newThread(Runnable r)
{
final Thread t = new Thread(threadGroup,r);
t.setDaemon( true );
t.setName("releasedate-request-thread-"+threadId.incrementAndGet());
return t;
}
};
private void submit(Runnable r)
{
synchronized( THREAD_POOL_LOCK )
{
if ( threadPool == null )
{
LOG.info("setMaxConcurrentThreads(): Using "+maxConcurrentThreads+" threads to retrieve artifact metadata.");
final BlockingQueue workingQueue = new ArrayBlockingQueue<>(200);
threadPool = new ThreadPoolExecutor( maxConcurrentThreads, maxConcurrentThreads, 60 , TimeUnit.SECONDS,
workingQueue,threadFactory, new ThreadPoolExecutor.CallerRunsPolicy() );
}
threadPool.submit( r );
}
}
private Map queryReleaseDates(Artifact artifact, Set versionNumbers)
{
final Map result = new HashMap<>();
if ( versionNumbers.isEmpty() ) {
return result;
}
final CountDownLatch latch = new CountDownLatch( versionNumbers.size() );
for ( String versionNumber : versionNumbers )
{
// to stuff
final Runnable r = () ->
{
try {
final Optional v = queryReleaseDate(artifact,versionNumber);
if ( v.isPresent() ) {
synchronized(result) {
result.put(versionNumber,new Version(versionNumber,v.get()) );
}
}
} catch(Exception e) {
LOG.error("readVersion(): Failed to retrieve version '"+versionNumber+"' for "+artifact,e);
} finally {
latch.countDown();
}
};
submit(r);
}
while ( true )
{
try {
if ( latch.await(10,TimeUnit.SECONDS) ) {
return result;
}
} catch(InterruptedException e) { /* can't help it */ }
LOG.debug("readVersions(): Still waiting for "+latch.getCount()+" outstanding requests of artifact "+artifact);
}
}
private Optional queryReleaseDate(Artifact artifact, String versionString) throws IOException {
final URL url = newRESTUrlBuilder()
.groupId( artifact.groupId )
.artifactId( artifact.artifactId )
.version( versionString )
.classifier( artifact.classifier ).build();
return performGET( url, stream -> {
final PartialResult result = parseSonatypeResponse( stream );
return result.getFirstResult().filter( Version::hasReleaseDate ).map( x -> x.releaseDate );
} );
}
/** Sonatype API seems to refuse returning more than 20 results ... we'll have to page through them to get all ... */
private record PartialResult(List data, int totalResultSize) {
private PartialResult
{
Validate.notNull( data, "data must not be null" );
Validate.isTrue( totalResultSize >= 0 );
}
public Optional getFirstResult()
{
return data.isEmpty() ? Optional.empty() : Optional.of( data.get(0) );
}
}
SonatypeRestAPIUrlBuilder newRESTUrlBuilder() {
return new SonatypeRestAPIUrlBuilder( sonatypeRestApiBaseUrl );
}
// code left here because it was a PITA to write and it might come in handy during debugging when
// comparing the Maven indexer XML vs. the real deal
private List queryAllVersions(Artifact artifact) throws IOException
{
final SonatypeRestAPIUrlBuilder urlBuilder = newRESTUrlBuilder()
.groupId( artifact.groupId )
.artifactId( artifact.artifactId )
.classifier( artifact.classifier )
.returnAllResults();
URL restApiURL = urlBuilder.build();
// need to query in a loop here as the REST API seems to refuse returning more than 20 results
// at once
final PartialResult first = performGET( restApiURL, this::parseSonatypeResponse );
int remaining = first.totalResultSize() - first.data().size();
final List result = new ArrayList<>( first.data() );
if ( remaining > 0 ) {
LOG.debug("queryAllVersions(): Artifact "+artifact+" has "+first.totalResultSize()+" releases.");
int offset = first.data().size();
PartialResult tmp;
do
{
restApiURL = urlBuilder.startOffset( offset ).build();
tmp = performGET( restApiURL, this::parseSonatypeResponse );
result.addAll( tmp.data() );
final int resultCount = tmp.data().size();
offset += resultCount;
remaining -= resultCount;
} while (! tmp.data().isEmpty() && remaining > 0 );
}
if ( result.size() != first.totalResultSize() ) {
final String msg = "Tried to retrieve " + first.totalResultSize() + " versions for " + artifact + " " +
"but only got " + result.size();
LOG.error( "queryAllVersions(): " + msg );
throw new IOException( msg );
}
return result;
}
private PartialResult parseSonatypeResponse(InputStream stream) throws IOException
{
final TypeReference> typeRef = new TypeReference<>() {};
final byte[] data = stream.readAllBytes();
final String json = new String( data, StandardCharsets.UTF_8 );
final HashMap map = JSON_MAPPER.readValue( json, typeRef );
/*
{
"responseHeader":{
"status":0,
"QTime":2,
"params":{
"q":"g:de.code-sourcery.versiontracker AND a:versiontracker-enforcerrule AND v:1.0.22",
"core":"",
"indent":"off",
"fl":"id,g,a,v,p,ec,timestamp,tags",
"start":"",
"sort":"score desc,timestamp desc,g asc,a asc,v desc",
"rows":"20",
"wt":"json",
"version":"2.2"
}
},
"response":{
"numFound":1,
"start":0,
"docs":[
{
"id":"de.code-sourcery.versiontracker:versiontracker-enforcerrule:1.0.22",
"g":"de.code-sourcery.versiontracker",
"a":"versiontracker-enforcerrule",
"v":"1.0.22",
"p":"jar",
"timestamp":1714834541000,
"ec":[
"-sources.jar",
".pom",
"-javadoc.jar",
".jar"
]
}
]
}
}
*/
final List result = new ArrayList<>();
final Map response = (Map) map.get( "response" );
if ( response == null )
{
throw new IOException( "getReleaseDateNew(): JSON response contained no 'response' attribute?" );
}
if ( ! response.containsKey( "numFound" ) )
{
throw new IOException( "JSON response contained no 'numFound' attribute?" );
}
final int numFound = ((Number) (response.get( "numFound" ))).intValue();
if ( LOG.isTraceEnabled() )
{
LOG.trace( "getReleaseDateNew(): Response found " + numFound + " artifacts" );
}
final List