All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.greengerong.PrerenderSeoService Maven / Gradle / Ivy

The newest version!
package com.github.greengerong;

import com.google.common.base.Function;
import com.google.common.base.Predicate;
import org.apache.commons.lang.StringUtils;
import org.apache.http.*;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIUtils;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.HeaderGroup;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.Closeable;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

import static com.google.common.collect.FluentIterable.from;
import static org.apache.commons.lang.StringUtils.isNotBlank;
import static org.apache.http.HttpHeaders.CONTENT_LENGTH;
import static org.apache.http.HttpHeaders.HOST;

public class PrerenderSeoService {
    private final static Logger log = LoggerFactory.getLogger(PrerenderSeoService.class);
    /**
     * These are the "hop-by-hop" headers that should not be copied.
     * http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html
     * I use an HttpClient HeaderGroup class instead of Set because this
     * approach does case insensitive lookup faster.
     */
    private static final HeaderGroup hopByHopHeaders;
    public static final String ESCAPED_FRAGMENT_KEY = "_escaped_fragment_";
    private CloseableHttpClient httpClient;
    private PrerenderConfig prerenderConfig;
    private PreRenderEventHandler preRenderEventHandler;

    public PrerenderSeoService(Map config) {
        this.prerenderConfig = new PrerenderConfig(config);
        this.httpClient = getHttpClient();
    }

    static {
        hopByHopHeaders = new HeaderGroup();
        String[] headers = new String[]{
                "Connection", "Keep-Alive", "Proxy-Authenticate", "Proxy-Authorization",
                "TE", "Trailers", "Transfer-Encoding", "Upgrade"};
        for (String header : headers) {
            hopByHopHeaders.addHeader(new BasicHeader(header, null));
        }
    }

    public void destroy() {
        if (preRenderEventHandler != null) {
            preRenderEventHandler.destroy();
        }
        closeQuietly(httpClient);
    }

    public boolean prerenderIfEligible(HttpServletRequest servletRequest, HttpServletResponse servletResponse) {
        try {
            if (handlePrerender(servletRequest, servletResponse)) {
                return true;
            }
        } catch (Exception e) {
            log.error("Prerender service error", e);
        }
        return false;
    }

    private boolean handlePrerender(HttpServletRequest servletRequest, HttpServletResponse servletResponse)
            throws URISyntaxException, IOException {
        if (shouldShowPrerenderedPage(servletRequest)) {
            this.preRenderEventHandler = prerenderConfig.getEventHandler();
            if (beforeRender(servletRequest, servletResponse) || proxyPrerenderedPageResponse(servletRequest, servletResponse)) {
                return true;
            }
        }
        return false;
    }

    private boolean shouldShowPrerenderedPage(HttpServletRequest request) throws URISyntaxException {
        final String userAgent = request.getHeader("User-Agent");
        final String url = getRequestURL(request);
        final String referer = request.getHeader("Referer");

        log.trace(String.format("checking request for %s from User-Agent %s and referer %s", url, userAgent, referer));

        if (!HttpGet.METHOD_NAME.equals(request.getMethod())) {
            log.trace("Request is not HTTP GET; intercept: no");
            return false;
        }

        if (isInResources(url)) {
            log.trace("request is for a (static) resource; intercept: no");
            return false;
        }

        final List whiteList = prerenderConfig.getWhitelist();
        if (whiteList != null && !isInWhiteList(url, whiteList)) {
            log.trace("Whitelist is enabled, but this request is not listed; intercept: no");
            return false;
        }

        final List blacklist = prerenderConfig.getBlacklist();
        if (blacklist != null && isInBlackList(url, referer, blacklist)) {
            log.trace("Blacklist is enabled, and this request is listed; intercept: no");
            return false;
        }

        if (hasEscapedFragment(request)) {
            log.trace("Request Has _escaped_fragment_; intercept: yes");
            return true;
        }

        if (StringUtils.isBlank(userAgent)) {
            log.trace("Request has blank userAgent; intercept: no");
            return false;
        }

        if (!isInSearchUserAgent(userAgent)) {
            log.trace("Request User-Agent is not a search bot; intercept: no");
            return false;
        }

        log.trace(String.format("Defaulting to request intercept(user-agent=%s): yes", userAgent));
        return true;
    }

    protected HttpGet getHttpGet(String apiUrl) {
        return new HttpGet(apiUrl);
    }

    protected CloseableHttpClient getHttpClient() {
        return prerenderConfig.getHttpClient();
    }

    /**
     * Copy request headers from the servlet client to the proxy request.
     *
     * @throws java.net.URISyntaxException
     */
    private void copyRequestHeaders(HttpServletRequest servletRequest, HttpRequest proxyRequest)
            throws URISyntaxException {
        // Get an Enumeration of all of the header names sent by the client
        Enumeration enumerationOfHeaderNames = servletRequest.getHeaderNames();
        while (enumerationOfHeaderNames.hasMoreElements()) {
            String headerName = (String) enumerationOfHeaderNames.nextElement();
            //Instead the content-length is effectively set via InputStreamEntity
            if (!headerName.equalsIgnoreCase(CONTENT_LENGTH) && !hopByHopHeaders.containsHeader(headerName)) {
                Enumeration headers = servletRequest.getHeaders(headerName);
                while (headers.hasMoreElements()) {//sometimes more than one value
                    String headerValue = (String) headers.nextElement();
                    // In case the proxy host is running multiple virtual servers,
                    // rewrite the Host header to ensure that we get content from
                    // the correct virtual server
                    if (headerName.equalsIgnoreCase(HOST)) {
                        HttpHost host = URIUtils.extractHost(new URI(prerenderConfig.getPrerenderServiceUrl()));
                        headerValue = host.getHostName();
                        if (host.getPort() != -1) {
                            headerValue += ":" + host.getPort();
                        }
                    }
                    proxyRequest.addHeader(headerName, headerValue);
                }
            }
        }
    }

    private String getRequestURL(HttpServletRequest request) {
        if (prerenderConfig.getForwardedURLHeader() != null) {
            String url = request.getHeader(prerenderConfig.getForwardedURLHeader());
            if (url != null) {
                return url;
            }
        }
        return request.getRequestURL().toString();
    }

    private String getApiUrl(String url) {
        String prerenderServiceUrl = prerenderConfig.getPrerenderServiceUrl();
        if (!prerenderServiceUrl.endsWith("/")) {
            prerenderServiceUrl += "/";
        }
        return prerenderServiceUrl + url;
    }

    /**
     * Copy proxied response headers back to the servlet client.
     */
    private void copyResponseHeaders(HttpResponse proxyResponse, final HttpServletResponse servletResponse) {
        servletResponse.setCharacterEncoding(getContentCharSet(proxyResponse.getEntity()));
        from(Arrays.asList(proxyResponse.getAllHeaders())).filter(new Predicate
() { @Override public boolean apply(Header header) { return !hopByHopHeaders.containsHeader(header.getName()); } }).transform(new Function() { @Override public Boolean apply(Header header) { servletResponse.addHeader(header.getName(), header.getValue()); return true; } }).toList(); } /** * Get the charset used to encode the http entity. */ private String getContentCharSet(final HttpEntity entity) throws ParseException { if (entity == null) { return null; } String charset = null; if (entity.getContentType() != null) { HeaderElement values[] = entity.getContentType().getElements(); if (values.length > 0) { NameValuePair param = values[0].getParameterByName("charset"); if (param != null) { charset = param.getValue(); } } } return charset; } private String getResponseHtml(HttpResponse proxyResponse) throws IOException { HttpEntity entity = proxyResponse.getEntity(); return entity != null ? EntityUtils.toString(entity) : ""; } /** * Copy response body data (the entity) from the proxy to the servlet client. */ private void responseEntity(String html, HttpServletResponse servletResponse) throws IOException { PrintWriter printWriter = servletResponse.getWriter(); try { printWriter.write(html); printWriter.flush(); } finally { closeQuietly(printWriter); } } protected void closeQuietly(Closeable closeable) { try { if (closeable != null) { closeable.close(); } } catch (IOException e) { log.error("Close proxy error", e); } } private boolean hasEscapedFragment(HttpServletRequest request) { return request.getParameterMap().containsKey(ESCAPED_FRAGMENT_KEY); } private boolean isInBlackList(final String url, final String referer, List blacklist) { return from(blacklist).anyMatch(new Predicate() { @Override public boolean apply(String regex) { final Pattern pattern = Pattern.compile(regex); return pattern.matcher(url).matches() || (!StringUtils.isBlank(referer) && pattern.matcher(referer).matches()); } }); } private boolean isInSearchUserAgent(final String userAgent) { return from(prerenderConfig.getCrawlerUserAgents()).anyMatch(new Predicate() { @Override public boolean apply(String item) { return userAgent.toLowerCase().contains(item.toLowerCase()); } }); } private boolean isInResources(final String url) { return from(prerenderConfig.getExtensionsToIgnore()).anyMatch(new Predicate() { @Override public boolean apply(String item) { return (url.indexOf('?') >= 0 ? url.substring(0, url.indexOf('?')) : url) .toLowerCase().endsWith(item); } }); } private boolean isInWhiteList(final String url, List whitelist) { return from(whitelist).anyMatch(new Predicate() { @Override public boolean apply(String regex) { return Pattern.compile(regex).matcher(url).matches(); } }); } private boolean beforeRender(HttpServletRequest request, HttpServletResponse response) throws IOException { if (preRenderEventHandler != null) { final String html = preRenderEventHandler.beforeRender(request); if (isNotBlank(html)) { final PrintWriter writer = response.getWriter(); writer.write(html); writer.flush(); closeQuietly(writer); return true; } } return false; } private boolean proxyPrerenderedPageResponse(HttpServletRequest request, HttpServletResponse response) throws IOException, URISyntaxException { final String apiUrl = getApiUrl(getFullUrl(request)); log.trace(String.format("Prerender proxy will send request to:%s", apiUrl)); final HttpGet getMethod = getHttpGet(apiUrl); copyRequestHeaders(request, getMethod); withPrerenderToken(getMethod); CloseableHttpResponse prerenderServerResponse = null; try { prerenderServerResponse = httpClient.execute(getMethod); response.setStatus(prerenderServerResponse.getStatusLine().getStatusCode()); copyResponseHeaders(prerenderServerResponse, response); String html = getResponseHtml(prerenderServerResponse); html = afterRender(request, response, prerenderServerResponse, html); responseEntity(html, response); return true; } finally { closeQuietly(prerenderServerResponse); } } private String afterRender(HttpServletRequest clientRequest, HttpServletResponse clientResponse, CloseableHttpResponse prerenderServerResponse, String responseHtml) { if (preRenderEventHandler != null) { return preRenderEventHandler.afterRender(clientRequest, clientResponse, prerenderServerResponse, responseHtml); } return responseHtml; } private void withPrerenderToken(HttpRequest proxyRequest) { final String token = prerenderConfig.getPrerenderToken(); //for new version prerender with token. if (isNotBlank(token)) { proxyRequest.addHeader("X-Prerender-Token", token); } } private String getFullUrl(HttpServletRequest request) { final String url = getRequestURL(request); final String queryString = request.getQueryString(); return isNotBlank(queryString) ? String.format("%s?%s", url, queryString) : url; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy