All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.esigate.extension.DefaultCharset Maven / Gradle / Ivy

There is a newer version: 5.3
Show newest version
package org.esigate.extension;

import java.util.Collection;
import java.util.Properties;

import org.apache.commons.lang3.StringUtils;
import org.apache.http.Header;
import org.esigate.Driver;
import org.esigate.Parameters;
import org.esigate.events.Event;
import org.esigate.events.EventDefinition;
import org.esigate.events.EventManager;
import org.esigate.events.IEventListener;
import org.esigate.events.impl.FetchEvent;
import org.esigate.util.Parameter;
import org.esigate.util.ParameterString;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * This extension adds a default charset to responses which lack the charset attribute in Content-Type header. Only
 * parsable MIME types are modified :
 * 
 * 
 * Content-Type: text/html
 * 
* * becomes * *
 * Content-Type:text/html; charset=utf-8
 * 
* *

* Default charset can be set in esigate.properties using * *

 * driverid.defaultCharset = utf - 8
 * 
* * @author Nicolas Richeton * */ public class DefaultCharset implements Extension, IEventListener { private static final Logger LOG = LoggerFactory.getLogger(DefaultCharset.class); /** default charset. */ public static final Parameter PARAM_DEFAULT_CHARSET = new ParameterString("defaultCharset", "ISO-8859-1"); private Collection parsableContentTypes; private String defaultCharset; @Override public void init(Driver driver, Properties properties) { driver.getEventManager().register(EventManager.EVENT_FETCH_POST, this); parsableContentTypes = Parameters.PARSABLE_CONTENT_TYPES.getValue(properties); defaultCharset = PARAM_DEFAULT_CHARSET.getValue(properties); LOG.info("Will use " + defaultCharset + " as default charset for " + parsableContentTypes.toString()); } @Override public boolean event(EventDefinition arg0, Event arg1) { FetchEvent fe = (FetchEvent) arg1; Header contentTypeHeader = fe.getHttpResponse().getFirstHeader("Content-Type"); // No content type, there is nothing we can do if (contentTypeHeader == null) { return true; } String contentType = contentTypeHeader.getValue(); // Charset is present -> OK if (StringUtils.containsIgnoreCase(contentType, "charset")) { return true; } // Is document parsable boolean parsable = false; for (String parseableContentType : parsableContentTypes) { if (StringUtils.containsIgnoreCase(contentType, parseableContentType)) { parsable = true; break; } } // Add default charset if (parsable) { fe.getHttpResponse().setHeader("Content-Type", contentType + "; charset=" + defaultCharset); } return true; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy