All Downloads are FREE. Search and download functionalities are using the official Maven repository.

us.codecraft.webmagic.model.sources.Source Maven / Gradle / Ivy

The newest version!
package us.codecraft.webmagic.model.sources;

import java.util.List;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.model.FieldExtractor;

public interface Source {
   public String getText(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor);
   public List getTextList(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor);

   public class RawHtml implements Source {
      public String getText(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
         return page.getHtml().selectDocument(fieldExtractor.getSelector());
      }
   
      public List getTextList(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
         return page.getHtml().selectDocumentForList(fieldExtractor.getSelector());
      }
   }
   
   public class SelectedHtml implements Source {
      public String getText(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
         if (isRaw)
            return page.getHtml().selectDocument(fieldExtractor.getSelector());
         else
            return fieldExtractor.getSelector().select(html);
      }
   
      public List getTextList(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
         if (isRaw)
            return page.getHtml().selectDocumentForList(fieldExtractor.getSelector());
         else
            return fieldExtractor.getSelector().selectList(html);
      }
   }
   
   public class Url implements Source {
      public String getText(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
         return fieldExtractor.getSelector().select(page.getUrl().toString());
      }
   
      public List getTextList(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
         return fieldExtractor.getSelector().selectList(page.getUrl().toString());
      }
   }
   
   public class RawText implements Source {
      public String getText(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
         return fieldExtractor.getSelector().select(page.getRawText());
      }
   
      public List getTextList(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
         return fieldExtractor.getSelector().selectList(page.getRawText());
      }
   }
   
   public class DefaultSource implements Source {
      public String getText(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
         return fieldExtractor.getSelector().select(html);
      }
   
      public List getTextList(Page page, String html, boolean isRaw, FieldExtractor fieldExtractor) {
         return fieldExtractor.getSelector().selectList(html);
      }
   }
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy