io.undertow.servlet.handlers.CrawlerSessionManagerHandler Maven / Gradle / Ivy
/*
* JBoss, Home of Professional Open Source.
* Copyright 2014 Red Hat, Inc., and individual contributors
* as indicated by the @author tags.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.undertow.servlet.handlers;
import java.io.Serializable;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import javax.servlet.http.HttpSession;
import javax.servlet.http.HttpSessionBindingEvent;
import javax.servlet.http.HttpSessionBindingListener;
import io.undertow.UndertowLogger;
import io.undertow.server.ExchangeCompletionListener;
import io.undertow.server.HttpHandler;
import io.undertow.server.HttpServerExchange;
import io.undertow.servlet.api.CrawlerSessionManagerConfig;
import io.undertow.util.HeaderValues;
import io.undertow.util.Headers;
/**
* Web crawlers can trigger the creation of many thousands of sessions as they
* crawl a site which may result in significant memory consumption. This Valve
* ensures that crawlers are associated with a single session - just like normal
* users - regardless of whether or not they provide a session token with their
* requests.
*
*/
public class CrawlerSessionManagerHandler implements HttpHandler {
private static final String SESSION_ATTRIBUTE_NAME = "listener_" + CrawlerSessionManagerHandler.class.getName();
private final Map clientIpSessionId = new ConcurrentHashMap<>();
private final Map sessionIdClientIp = new ConcurrentHashMap<>();
private final CrawlerSessionManagerConfig config;
private final Pattern uaPattern;
private final HttpHandler next;
public CrawlerSessionManagerHandler(CrawlerSessionManagerConfig config, HttpHandler next) {
this.config = config;
this.next = next;
this.uaPattern = Pattern.compile(config.getCrawlerUserAgents());
}
@Override
public void handleRequest(HttpServerExchange exchange) throws Exception {
boolean isBot = false;
String sessionId = null;
String clientIp = null;
ServletRequestContext src = exchange.getAttachment(ServletRequestContext.ATTACHMENT_KEY);
// If the incoming request has a valid session ID, no action is required
if ( src.getOriginalRequest().getSession(false) == null) {
// Is this a crawler - check the UA headers
HeaderValues userAgentHeaders = exchange.getRequestHeaders().get(Headers.USER_AGENT);
if (userAgentHeaders != null) {
Iterator uaHeaders = userAgentHeaders.iterator();
String uaHeader = null;
if (uaHeaders.hasNext()) {
uaHeader = uaHeaders.next();
}
// If more than one UA header - assume not a bot
if (uaHeader != null && !uaHeaders.hasNext()) {
if (uaPattern.matcher(uaHeader).matches()) {
isBot = true;
if (UndertowLogger.REQUEST_LOGGER.isDebugEnabled()) {
UndertowLogger.REQUEST_LOGGER.debug(exchange +
": Bot found. UserAgent=" + uaHeader);
}
}
}
// If this is a bot, is the session ID known?
if (isBot) {
clientIp = src.getServletRequest().getRemoteAddr();
sessionId = clientIpSessionId.get(clientIp);
if (sessionId != null) {
src.setOverridenSessionId(sessionId);
if (UndertowLogger.REQUEST_LOGGER.isDebugEnabled()) {
UndertowLogger.REQUEST_LOGGER.debug(exchange + ": SessionID=" +
sessionId);
}
}
}
}
}
if (isBot) {
final String finalSessionId = sessionId;
final String finalClientId = clientIp;
exchange.addExchangeCompleteListener(new ExchangeCompletionListener() {
@Override
public void exchangeEvent(HttpServerExchange exchange, NextListener nextListener) {
try {
ServletRequestContext src = exchange.getAttachment(ServletRequestContext.ATTACHMENT_KEY);
if (finalSessionId == null) {
// Has bot just created a session, if so make a note of it
HttpSession s = src.getOriginalRequest().getSession(false);
if (s != null) {
clientIpSessionId.put(finalClientId, s.getId());
sessionIdClientIp.put(s.getId(), finalClientId);
// #valueUnbound() will be called on session expiration
s.setAttribute(SESSION_ATTRIBUTE_NAME, new CrawlerBindingListener(clientIpSessionId, sessionIdClientIp));
s.setMaxInactiveInterval(config.getSessionInactiveInterval());
if (UndertowLogger.REQUEST_LOGGER.isDebugEnabled()) {
UndertowLogger.REQUEST_LOGGER.debug(exchange +
": New bot session. SessionID=" + s.getId());
}
}
} else {
if (UndertowLogger.REQUEST_LOGGER.isDebugEnabled()) {
UndertowLogger.REQUEST_LOGGER.debug(exchange +
": Bot session accessed. SessionID=" + finalSessionId);
}
}
} finally {
nextListener.proceed();
}
}
});
}
next.handleRequest(exchange);
}
}
class CrawlerBindingListener implements HttpSessionBindingListener, Serializable {
private static final long serialVersionUID = -8841692120840734349L;
private transient Map clientIpSessionId;
private transient Map sessionIdClientIp;
CrawlerBindingListener(Map clientIpSessionId, Map sessionIdClientIp) {
this.clientIpSessionId = clientIpSessionId;
this.sessionIdClientIp = sessionIdClientIp;
}
@Override
public void valueBound(HttpSessionBindingEvent event) {
// NOOP
}
@Override
public void valueUnbound(HttpSessionBindingEvent event) {
if (sessionIdClientIp != null) {
String clientIp = sessionIdClientIp.remove(event.getSession().getId());
if (clientIp != null) {
clientIpSessionId.remove(clientIp);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy