All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.opengraph.OpenGraph Maven / Gradle / Ivy
package org.opengraph;
import java.net.HttpCookie;
import java.net.URI;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.safety.Whitelist;
import org.jsoup.select.Elements;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpMethod;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;
import org.springframework.web.client.RestTemplate;
import org.springframework.web.util.HtmlUtils;
import org.springframework.web.util.UriComponentsBuilder;
/**
*
* http://shopping.interpark.com/product/productInfo.do?prdNo=4690768866&dispNo=016001
* http://goo.gl/fJLWja
* http://www.g9.co.kr/Display/VIP/Index/693661469?jaehuid=200007334&NaPm=ct%3Dizi2561k%7Cci%3D9cb0c51ad0cfdc43896816e9b948e9e2390f7d28%7Ctr%3Dslsl%7Csn%3D280455%7Chk%3D875c6ad1b5a326aadb31b1b88228743ad464b374
* http://blog.naver.com/mrj_bundang
* http://wook.cloudapp.net
* http://google.com
* http://naver.me/GtlkiRH0
*
*/
public class OpenGraph {
private String[] removes = { HttpHeaders.COOKIE, HttpHeaders.ACCEPT_ENCODING };
private List accepts = Arrays.asList(MediaType.TEXT_HTML);
private final Document document;
private final HttpHeaders headers;
private final ResponseEntity response;
private final RestTemplate restTemplate;
private URI uri;
private int count;
public OpenGraph(RestTemplate restTemplate, URI uri, HttpHeaders headers) {
this(restTemplate, uri, headers, 5);
}
public OpenGraph(RestTemplate restTemplate, URI uri, HttpHeaders headers, int max) {
if (!StringUtils.hasText(uri.getPath())) {
uri = UriComponentsBuilder.fromUri(uri).replacePath("/").build().toUri();
}
this.uri = uri;
for (String remove : this.removes) {
headers.remove(remove);
}
headers.setAccept(this.accepts);
this.headers = headers;
this.count = 0;
this.restTemplate = restTemplate;
this.response = getResponse(uri, max, true);
this.document = Jsoup.isValid(this.response.getBody(), Whitelist.none()) ? Jsoup.parse("") : Jsoup.parse(this.response.getBody());
}
private ResponseEntity getResponseEntity(URI uri) {
this.uri = uri;
this.count++;
ResponseEntity responseEntity = this.restTemplate.exchange(uri, HttpMethod.GET, new HttpEntity(null, this.headers), String.class);
setCookie(responseEntity.getHeaders().get(HttpHeaders.SET_COOKIE));
return responseEntity;
}
private void setCookie(List cookies) {
if (!CollectionUtils.isEmpty(cookies)) {
Set httpCookies = new LinkedHashSet();
for (String cookie : cookies) {
for (HttpCookie httpCookie : HttpCookie.parse(cookie)) {
httpCookie.setDomain(null);
httpCookie.setPath(null);
httpCookies.add(httpCookie);
}
}
cookies = this.headers.get(HttpHeaders.COOKIE);
if (!CollectionUtils.isEmpty(cookies)) {
for (String cookie : cookies) {
int index = cookie.indexOf('=');
if (index != -1) {
httpCookies.add(new HttpCookie(cookie.substring(0, index), cookie.substring(index + 1, cookie.length())));
}
}
}
this.headers.remove(HttpHeaders.COOKIE);
for (HttpCookie httpCookie : httpCookies) {
if (StringUtils.hasText(httpCookie.getValue())) {
this.headers.add(HttpHeaders.COOKIE, httpCookie.getName() + '=' + httpCookie.getValue());
}
}
}
}
/**
* @see org.springframework.util.StringUtils#trimAllWhitespace(String)
*/
private ResponseEntity getResponse(URI uri, int max, boolean recursive) {
ResponseEntity response = getResponseEntity(uri);
if (this.count > max) {
return response;
}
while (response.getHeaders().getLocation() != null) {
uri = getValidPath(response.getHeaders().getLocation().toString());
response = getResponseEntity(uri);
}
if (recursive && !Jsoup.isValid(response.getBody(), Whitelist.none())) {
Document document = Jsoup.parse(response.getBody());
// if have , it will call recursive
for (Element element : document.getElementsByAttributeValue("property", "og:url")) {
if (element.hasAttr("content")) {
String text = element.attr("content");
URI url = getValidPath(text);
if (StringUtils.hasText(text) && !uri.toString().equals(text)) {
ResponseEntity responseEntity = getResponse(url, max, true);
if (responseEntity.getStatusCode().is2xxSuccessful()) {
return responseEntity;
}
}
}
}
// if have , it will call recursive
for (Element element : document.getElementsByTag("frame")) {
if (element.hasAttr("src")) {
String text = element.attr("src");
URI url = getValidPath(text);
if (StringUtils.hasText(text)) {
ResponseEntity responseEntity = getResponse(url, max, false);
if (responseEntity.getStatusCode().is2xxSuccessful()) {
return responseEntity;
}
}
}
}
}
return response;
}
public String getContent(String value) {
//
Elements elements = this.document.getElementsByAttributeValue("property", value);
if (elements.hasAttr("content")) {
String text = elements.attr("content");
if (StringUtils.hasText(text)) {
return text;
}
}
return null;
}
public URI getImage() {
//
Elements elements = this.document.getElementsByAttributeValue("property", "og:image");
if (elements.hasAttr("content")) {
String text = elements.attr("content");
if (StringUtils.hasText(text)) {
return getValidPath(text);
}
}
for (Element element : this.document.head().select("link[href~=.*\\.(ico|png)]")) {
if (element.hasAttr("href")) {
String text = element.attr("href");
if (StringUtils.hasText(text)) {
return getValidPath(text);
}
}
}
for (Element element : this.document.head().select("meta[itemprop=image]")) {
if (element.hasAttr("content")) {
String text = element.attr("content");
if (StringUtils.hasText(text)) {
return getValidPath(text);
}
}
}
// 2nd -> img in div
for (Element element : this.document.getElementsByTag("div")) {
if (element.children().size() > 0) {
element = element.child(0);
if (element.tagName().equals("img")) {
if (element.hasAttr("width")) {
String text = element.attr("src");
if (StringUtils.hasText(text))
return getValidPath(text);
}
}
}
}
// 2nd -> img in p
for (Element element : this.document.getElementsByTag("p")) {
for (Element elementTag : element.getElementsByTag("img")) {
if (elementTag.hasAttr("src")) {
String text = elementTag.attr("src");
if (StringUtils.hasText(text))
return getValidPath(text);
}
}
}
// 2nd -> img in dd
for (Element element : this.document.getElementsByTag("dd")) {
for (Element elementTag : element.getElementsByTag("img")) {
if (elementTag.hasAttr("src")) {
String text = elementTag.attr("src");
if (StringUtils.hasText(text))
return getValidPath(text);
}
}
}
// 3rd -> img in html
for (Element element : this.document.getElementsByTag("img")) {
if (element.hasAttr("src")) {
String text = element.attr("src");
if (StringUtils.hasText(text))
return getValidPath(text);
}
}
// etc empty
return null;
}
private URI getValidPath(String url) {
URI uri;
if (isAbsoluteUrl(url)) {
uri = UriComponentsBuilder.fromUriString(url).build().toUri();
}
else {
uri = this.uri.resolve(url);
}
if (!StringUtils.hasText(uri.getPath())) {
uri = UriComponentsBuilder.fromUri(uri).replacePath("/").build().toUri();
}
return uri;
}
private boolean isAbsoluteUrl(String url) {
if (url == null) {
return false;
}
final Pattern ABSOLUTE_URL = Pattern.compile("\\A[a-z0-9.+-]+://.*", Pattern.CASE_INSENSITIVE);
return ABSOLUTE_URL.matcher(url).matches();
}
public String getTitle() {
//
Elements elements = this.document.getElementsByAttributeValue("property", "og:title");
if (elements.hasAttr("content")) {
String text = elements.attr("content");
if (StringUtils.hasText(text)) {
return text;
}
}
//
elements = this.document.getElementsByAttributeValue("name", "title");
if (elements.hasAttr("content")) {
String text = elements.attr("content");
if (StringUtils.hasText(text)) {
return text;
}
}
// *
String title = this.document.title();
if (StringUtils.hasText(title)) {
return title;
}
return null;
}
public String getDescription() {
//
Elements elements = this.document.getElementsByAttributeValue("property", "og:description");
if (elements.hasAttr("content")) {
String text = elements.attr("content");
if (StringUtils.hasText(text)) {
return text;
}
}
//
elements = this.document.getElementsByAttributeValue("name", "description");
if (elements.hasAttr("content")) {
String text = elements.attr("content");
if (StringUtils.hasText(text)) {
return text;
}
}
// *
for (Element element : this.document.getElementsByTag("p")) {
if (element.hasText() && StringUtils.hasText(element.text())) {
return element.text();
}
}
// *
for (Element element : this.document.getElementsByTag("div")) {
if (element.hasText() && StringUtils.hasText(element.text())) {
return element.text();
}
}
// return empty
return null;
}
public URI getUri() {
return this.uri;
}
public HttpStatus getStatus() {
return this.response.getStatusCode().is4xxClientError() || this.response.getStatusCode().is5xxServerError() ? HttpStatus.NOT_FOUND : this.response.getStatusCode();
}
public Map getMap() {
Map map = new LinkedHashMap();
String title = getTitle();
map.put("title", title == null ? this.uri : HtmlUtils.htmlUnescape(title));
String url = getContent("og:url");
map.put("url", url == null ? this.uri : HtmlUtils.htmlUnescape(url));
String description = getDescription();
map.put("description", description == null ? null : HtmlUtils.htmlUnescape(description));
map.put("type", getContent("og:type"));
map.put("site_name", getContent("og:site_name"));
URI image = getImage();
Map imageMap;
if (image == null) {
imageMap = Collections.singletonMap("url", UriComponentsBuilder.fromUri(this.uri).replacePath("favicon.ico").replaceQuery("").build().toUri());
}
else {
imageMap = new LinkedHashMap();
imageMap.put("url", image);
imageMap.put("width", getContent("og:image:width"));
imageMap.put("height", getContent("og:image:height"));
}
map.put("image", imageMap);
return map;
}
}