All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.undertow.servlet.handlers.CrawlerSessionManagerHandler Maven / Gradle / Ivy

/*
 * JBoss, Home of Professional Open Source.
 * Copyright 2014 Red Hat, Inc., and individual contributors
 * as indicated by the @author tags.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package io.undertow.servlet.handlers;

import java.io.Serializable;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import javax.servlet.http.HttpSession;
import javax.servlet.http.HttpSessionBindingEvent;
import javax.servlet.http.HttpSessionBindingListener;

import io.undertow.UndertowLogger;
import io.undertow.server.ExchangeCompletionListener;
import io.undertow.server.HttpHandler;
import io.undertow.server.HttpServerExchange;
import io.undertow.servlet.api.CrawlerSessionManagerConfig;
import io.undertow.util.HeaderValues;
import io.undertow.util.Headers;

/**
 * Web crawlers can trigger the creation of many thousands of sessions as they
 * crawl a site which may result in significant memory consumption. This Valve
 * ensures that crawlers are associated with a single session - just like normal
 * users - regardless of whether or not they provide a session token with their
 * requests.
 *
 */
public class CrawlerSessionManagerHandler implements HttpHandler {

    private static final String SESSION_ATTRIBUTE_NAME = "listener_" + CrawlerSessionManagerHandler.class.getName();

    private final Map clientIpSessionId = new ConcurrentHashMap<>();
    private final Map sessionIdClientIp = new ConcurrentHashMap<>();

    private final CrawlerSessionManagerConfig config;
    private final Pattern uaPattern;

    private final HttpHandler next;


    public CrawlerSessionManagerHandler(CrawlerSessionManagerConfig config, HttpHandler next) {
        this.config = config;
        this.next = next;
        this.uaPattern = Pattern.compile(config.getCrawlerUserAgents());
    }

    @Override
    public void handleRequest(HttpServerExchange exchange) throws Exception {


        boolean isBot = false;
        String sessionId = null;
        String clientIp = null;
        ServletRequestContext src = exchange.getAttachment(ServletRequestContext.ATTACHMENT_KEY);

        // If the incoming request has a valid session ID, no action is required
        if ( src.getOriginalRequest().getSession(false) == null) {

            // Is this a crawler - check the UA headers
            HeaderValues userAgentHeaders = exchange.getRequestHeaders().get(Headers.USER_AGENT);
            if (userAgentHeaders != null) {
                Iterator uaHeaders = userAgentHeaders.iterator();
                String uaHeader = null;
                if (uaHeaders.hasNext()) {
                    uaHeader = uaHeaders.next();
                }

                // If more than one UA header - assume not a bot
                if (uaHeader != null && !uaHeaders.hasNext()) {

                    if (uaPattern.matcher(uaHeader).matches()) {
                        isBot = true;

                        if (UndertowLogger.REQUEST_LOGGER.isDebugEnabled()) {
                            UndertowLogger.REQUEST_LOGGER.debug(exchange +
                                    ": Bot found. UserAgent=" + uaHeader);
                        }
                    }
                }


                // If this is a bot, is the session ID known?
                if (isBot) {
                    clientIp = src.getServletRequest().getRemoteAddr();
                    sessionId = clientIpSessionId.get(clientIp);
                    if (sessionId != null) {
                        src.setOverridenSessionId(sessionId);
                        if (UndertowLogger.REQUEST_LOGGER.isDebugEnabled()) {
                            UndertowLogger.REQUEST_LOGGER.debug(exchange + ": SessionID=" +
                                    sessionId);
                        }
                    }
                }

            }
        }
        if (isBot) {
            final String finalSessionId = sessionId;
            final String finalClientId = clientIp;
            exchange.addExchangeCompleteListener(new ExchangeCompletionListener() {


                @Override
                public void exchangeEvent(HttpServerExchange exchange, NextListener nextListener) {
                    try {
                        ServletRequestContext src = exchange.getAttachment(ServletRequestContext.ATTACHMENT_KEY);
                        if (finalSessionId == null) {
                            // Has bot just created a session, if so make a note of it
                            HttpSession s = src.getOriginalRequest().getSession(false);
                            if (s != null) {
                                clientIpSessionId.put(finalClientId, s.getId());
                                sessionIdClientIp.put(s.getId(), finalClientId);
                                // #valueUnbound() will be called on session expiration
                                s.setAttribute(SESSION_ATTRIBUTE_NAME, new CrawlerBindingListener(clientIpSessionId, sessionIdClientIp));
                                s.setMaxInactiveInterval(config.getSessionInactiveInterval());

                                if (UndertowLogger.REQUEST_LOGGER.isDebugEnabled()) {
                                    UndertowLogger.REQUEST_LOGGER.debug(exchange +
                                            ": New bot session. SessionID=" + s.getId());
                                }
                            }
                        } else {
                            if (UndertowLogger.REQUEST_LOGGER.isDebugEnabled()) {
                                UndertowLogger.REQUEST_LOGGER.debug(exchange +
                                        ": Bot session accessed. SessionID=" + finalSessionId);
                            }
                        }
                    } finally {
                        nextListener.proceed();
                    }
                }
            });

        }
        next.handleRequest(exchange);
    }

}

class CrawlerBindingListener implements HttpSessionBindingListener, Serializable {
    private static final long serialVersionUID = -8841692120840734349L;
    private transient Map clientIpSessionId;
    private transient Map sessionIdClientIp;

    CrawlerBindingListener(Map clientIpSessionId, Map sessionIdClientIp) {
        this.clientIpSessionId = clientIpSessionId;
        this.sessionIdClientIp = sessionIdClientIp;
    }

    @Override
    public void valueBound(HttpSessionBindingEvent event) {
        // NOOP
    }

    @Override
    public void valueUnbound(HttpSessionBindingEvent event) {
        if (sessionIdClientIp != null) {
            String clientIp = sessionIdClientIp.remove(event.getSession().getId());
            if (clientIp != null) {
                clientIpSessionId.remove(clientIp);
            }
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy