All Downloads are FREE. Search and download functionalities are using the official Maven repository.

groovyx.net.http.ParserRegistry Maven / Gradle / Ivy

Go to download

A builder-style HTTP client API, including authentication, and extensible handling of common content-types such as JSON and XML. It is built on top of Apache's HttpClient.

There is a newer version: 0.7.1
Show newest version
/*
 * Copyright 2008-2011 Thomas Nichols.  http://blog.thomnichols.org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * You are receiving this code free of charge, which represents many hours of
 * effort from other individuals and corporations.  As a responsible member
 * of the community, you are encouraged (but not required) to donate any
 * enhancements or improvements back to the community under a similar open
 * source license.  Thank you. -TMN
 */
package groovyx.net.http;

import groovy.json.JsonSlurper;
import groovy.lang.Closure;
import groovy.util.XmlSlurper;
import groovy.util.slurpersupport.GPathResult;
import groovyx.net.http.HTTPBuilder.RequestConfigDelegate;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.xml.parsers.ParserConfigurationException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;
import org.apache.http.entity.HttpEntityWrapper;
import org.apache.http.message.BasicHeader;
import org.apache.xml.resolver.Catalog;
import org.apache.xml.resolver.CatalogManager;
import org.apache.xml.resolver.tools.CatalogResolver;
import org.codehaus.groovy.runtime.MethodClosure;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;


/**
 * 

Keeps track of response parsers for each content type. Each parser * should should be a closure that accepts an {@link HttpResponse} instance, * and returns whatever handler is appropriate for reading the response * data for that content-type. For example, a plain-text response should * probably be parsed with a Reader, while an XML response * might be parsed by an XmlSlurper, which would then be passed to the * response closure.

* *

Note that all methods in this class assume {@link HttpResponse#getEntity()} * return a non-null value. It is the job of the HTTPBuilder instance to ensure * a NullPointerException is not thrown by passing a response that contains no * entity.

* *

You can see the list of content-type parsers that are built-in to the * ParserRegistry class in {@link #buildDefaultParserMap()}.

* * @see ContentType * @author Tom Nichols */ public class ParserRegistry { /** * The default parser used for unregistered content-types. This is a copy * of {@link #parseStream(HttpResponse)}, which is like a no-op that just * returns the unaltered response stream. */ protected final Closure DEFAULT_PARSER = new MethodClosure( this, "parseStream" ); /** * The default charset to use when no charset is given in the Content-Type * header of a response. This can be modifid via {@link #setDefaultCharset(String)}. */ public static final String DEFAULT_CHARSET = "UTF-8"; private Closure defaultParser = DEFAULT_PARSER; private Map registeredParsers = buildDefaultParserMap(); private static String defaultCharset = DEFAULT_CHARSET; protected static final Log log = LogFactory.getLog( ParserRegistry.class ); /** * This CatalogResolver is static to avoid the overhead of re-parsing * the catalog definition file every time. Unfortunately, there's no * way to share a single Catalog instance between resolvers. The * {@link Catalog} class is technically not thread-safe, but as long as you * do not parse catalog files while using the resolver, it should be fine. */ protected static CatalogResolver catalogResolver; static { CatalogManager catalogManager = new CatalogManager(); catalogManager.setIgnoreMissingProperties( true ); catalogManager.setUseStaticCatalog( false ); catalogManager.setRelativeCatalogs( true ); try { catalogResolver = new CatalogResolver( catalogManager ); catalogResolver.getCatalog().parseCatalog( ParserRegistry.class.getResource( "/catalog/html.xml" ) ); } catch ( IOException ex ) { LogFactory.getLog( ParserRegistry.class ) .warn( "Could not resolve default XML catalog", ex ); } } /** * Set the charset to use for parsing character streams when no charset * is given in the Content-Type header. * @param charset the charset to use, or null to use * {@link #DEFAULT_CHARSET} */ public static void setDefaultCharset( String charset ) { defaultCharset = charset == null ? DEFAULT_CHARSET : charset; } /** * Helper method to get the charset from the response. This should be done * when manually parsing any text response to ensure it is decoded using the * correct charset. For instance:
     * Reader reader = new InputStreamReader( resp.getEntity().getContent(),
     *   ParserRegistry.getCharset( resp ) );
* @param resp */ public static String getCharset( HttpResponse resp ) { try { NameValuePair charset = resp.getEntity().getContentType() .getElements()[0].getParameterByName("charset"); if ( charset == null || charset.getValue().trim().equals("") ) { log.debug( "Could not find charset in response; using " + defaultCharset ); return defaultCharset; } return charset.getValue(); } catch ( RuntimeException ex ) { // NPE or OOB Exceptions log.warn( "Could not parse charset from content-type header in response" ); return Charset.defaultCharset().name(); } } /** * Helper method to get the content-type string from the response * (no charset). * @param resp */ public static String getContentType( HttpResponse resp ) { if ( resp.getEntity() == null ) throw new IllegalArgumentException( "Response does not contain data" ); if ( resp.getEntity().getContentType() == null ) throw new IllegalArgumentException( "Response does not have a content-type header" ); try { return resp.getEntity().getContentType().getElements()[0].getName(); } catch ( RuntimeException ex ) { // NPE or OOB Exceptions throw new IllegalArgumentException( "Could not parse content-type from response" ); } } /** * Default parser used for binary data. This simply returns the underlying * response InputStream. * @see ContentType#BINARY * @see HttpEntity#getContent() * @param resp * @return an InputStream the binary response stream * @throws IllegalStateException * @throws IOException */ public InputStream parseStream( HttpResponse resp ) throws IOException { return resp.getEntity().getContent(); } /** * Default parser used to handle plain text data. The response text * is decoded using the charset passed in the response content-type * header. * @see ContentType#TEXT * @param resp * @return * @throws UnsupportedEncodingException * @throws IllegalStateException * @throws IOException */ public Reader parseText( HttpResponse resp ) throws IOException { return new InputStreamReader( resp.getEntity().getContent(), ParserRegistry.getCharset( resp ) ); } /** * Default parser used to decode a URL-encoded response. * @see ContentType#URLENC * @param resp * @return * @throws IOException */ public Map parseForm( final HttpResponse resp ) throws IOException { HttpEntity entity = resp.getEntity(); /* URLEncodedUtils won't parse the content unless the content-type is application/x-www-form-urlencoded. Since we want to be able to force parsing regardless of what the content-type header says, we need to 'spoof' the content-type if it's not already acceptable. */ if ( ! ContentType.URLENC.toString().equals( ParserRegistry.getContentType( resp ) ) ) { entity = new HttpEntityWrapper( entity ) { @Override public org.apache.http.Header getContentType() { String value = ContentType.URLENC.toString(); String charset = ParserRegistry.getCharset( resp ); if ( charset != null ) value += "; charset=" + charset; return new BasicHeader( "Content-Type", value ); }; }; } List params = URLEncodedUtils.parse( entity ); Map paramMap = new HashMap(params.size()); for ( NameValuePair param : params ) paramMap.put( param.getName(), param.getValue() ); return paramMap; } /** * Parse an HTML document by passing it through the NekoHTML parser. * @see ContentType#HTML * @see org.cyberneko.html.parsers.SAXParser * @see XmlSlurper#parse(Reader) * @param resp HTTP response from which to parse content * @return the {@link GPathResult} from calling {@link XmlSlurper#parse(Reader)} * @throws IOException * @throws SAXException */ public GPathResult parseHTML( HttpResponse resp ) throws IOException, SAXException { XMLReader p = new org.cyberneko.html.parsers.SAXParser(); p.setEntityResolver( catalogResolver ); return new XmlSlurper( p ).parse( parseText( resp ) ); } /** * Default parser used to decode an XML response. * @see ContentType#XML * @see XmlSlurper#parse(Reader) * @param resp HTTP response from which to parse content * @return the {@link GPathResult} from calling {@link XmlSlurper#parse(Reader)} * @throws IOException * @throws SAXException * @throws ParserConfigurationException */ public GPathResult parseXML( HttpResponse resp ) throws IOException, SAXException, ParserConfigurationException { XmlSlurper xml = new XmlSlurper(); xml.setEntityResolver( catalogResolver ); return xml.parse( parseText( resp ) ); } /** * Default parser used to decode a JSON response. * @see ContentType#JSON * @param resp * @return * @throws IOException */ public Object parseJSON( HttpResponse resp ) throws IOException { // there is a bug in the JsonSlurper.parse method... //String jsonTxt = DefaultGroovyMethods.getText( parseText( resp ) ); return new JsonSlurper().parse( parseText( resp ) ); } /** *

Returns a map of default parsers. Override this method to change * what parsers are registered by default. A 'parser' is really just a * closure that acceipts an {@link HttpResponse} instance and returns * some parsed data. You can of course call * super.buildDefaultParserMap() and then add or remove * from that result as well.

* *

Default registered parsers are: *

    *
  • {@link ContentType#BINARY} : {@link #parseStream(HttpResponse) parseStream()}
  • *
  • {@link ContentType#TEXT} : {@link #parseText(HttpResponse) parseText()}
  • *
  • {@link ContentType#URLENC} : {@link #parseForm(HttpResponse) parseForm()}
  • *
  • {@link ContentType#XML} : {@link #parseXML(HttpResponse) parseXML()}
  • *
  • {@link ContentType#JSON} : {@link #parseJSON(HttpResponse) parseJSON()}
  • *
*/ protected Map buildDefaultParserMap() { Map parsers = new HashMap(); parsers.put( ContentType.BINARY.toString(), new MethodClosure( this, "parseStream" ) ); parsers.put( ContentType.TEXT.toString(), new MethodClosure(this,"parseText") ); parsers.put( ContentType.URLENC.toString(), new MethodClosure(this,"parseForm") ); parsers.put( ContentType.HTML.toString(), new MethodClosure(this,"parseHTML") ); Closure pClosure = new MethodClosure(this,"parseXML"); for ( String ct : ContentType.XML.getContentTypeStrings() ) parsers.put( ct, pClosure ); pClosure = new MethodClosure(this,"parseJSON"); for ( String ct : ContentType.JSON.getContentTypeStrings() ) parsers.put( ct, pClosure ); return parsers; } /** * Add a new XML catalog definiton to the static XML resolver catalog. * See the * HTTPBuilder source catalog for an example. * * @param catalogLocation URL of a catalog definition file * @throws IOException if the given URL cannot be parsed or accessed for whatever reason. */ public static void addCatalog( URL catalogLocation ) throws IOException { catalogResolver.getCatalog().parseCatalog( catalogLocation ); } /** * Access the default catalog used by all HTTPBuilder instances. * @return the static {@link CatalogResolver} instance */ public static CatalogResolver getCatalogResolver() { return catalogResolver; } /** * Get the default parser used for unregistered content-types. * @return */ public Closure getDefaultParser() { return this.defaultParser; } /** * Set the default parser used for unregistered content-types. * @param defaultParser if */ public void setDefaultParser( Closure defaultParser ) { if ( defaultParser == null ) this.defaultParser = DEFAULT_PARSER; this.defaultParser = defaultParser; } /** * Retrieve a parser for the given response content-type string. This * is called by HTTPBuildre to retrieve the correct parser for a given * content-type. The parser is then used to decode the response data prior * to passing it to a response handler. * @param contentType * @return parser that can interpret the given response content type, * or the default parser if no parser is registered for the given * content-type. It should NOT return a null value. */ public Closure getAt( Object contentType ) { String ct = contentType.toString(); int idx = ct.indexOf( ';' ); if ( idx > 0 ) ct = ct.substring( 0, idx ); Closure parser = registeredParsers.get(ct); if ( parser != null ) return parser; log.warn( "Cannot find parser for content-type: " + ct + " -- using default parser."); return defaultParser; } /** * Register a new parser for the given content-type. The parser closure * should accept an {@link HttpResponse} argument and return a type suitable * to be passed as the 'parsed data' argument of a * {@link RequestConfigDelegate#getResponse() response handler} closure. * @param contentType content-type string * @param value code that will parse the HttpResponse and return parsed * data to the response handler. */ public void putAt( Object contentType, Closure value ) { if ( contentType instanceof ContentType ) { for ( String ct : ((ContentType)contentType).getContentTypeStrings() ) this.registeredParsers.put( ct, value ); } else this.registeredParsers.put( contentType.toString(), value ); } /** * Alias for {@link #getAt(Object)} to allow property-style access. * @param key content-type string * @return */ public Closure propertyMissing( Object key ) { return this.getAt( key ); } /** * Alias for {@link #putAt(Object, Closure)} to allow property-style access. * @param key content-type string * @param value parser closure */ public void propertyMissing( Object key, Closure value ) { this.putAt( key, value ); } /** * Iterate over the entire parser map * @return */ public Iterator> iterator() { return this.registeredParsers.entrySet().iterator(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy