All Downloads are FREE. Search and download functionalities are using the official Maven repository.

config.sites.amazon.other.application-depth1Crawler.properties Maven / Gradle / Ivy

There is a newer version: 1.9.19
Show newest version
proxy.enable.default.providers=true
proxy.idle.timeout=PT5M
proxy.max.fetch.success=40000

browser.driver.headless=false
browser.launch.with.xvfb=true
browser.js.invading.enabled=true
browser.chrome.path=/usr/bin/google-chrome-stable
browser.eager.allocate.tabs=false
browser.max.active.tabs=20

# time format: 30s, 2m, 1h, etc
metrics.slf4j.report.interval=2m

load.category.resource=sites/amazon/category/hot/leafCategories.txt
load.category.url.start=0
load.category.url.limit=5000

# load options, set item expires to be 0s so we do not check the database before fetch
load.options=-i 1d -ii 0s -ps -ol "#zg-ordered-list a[href~=/dp/]"
load.item.page.url.pattern=http.+/dp/.+
load.item.page.store.content=false
load.item.page.db.scan.url.prefix=""
load.item.page.limit=1000000
load.fetch.queue.lower.capacity=200

load.circular.pnd1.portals=sites/amazon/seeds/circularPND1Portals.txt
load.circular.pnd1.portal.load.options=-i 1d

# not tested
# browser.emulate.event.handler=ai.platon.scent.protocol.browser.emulator.amazon.AmazonBrowserEmulateEventHandler

extract.min.content.size=600000
extract.store.page.model=false
# extract.sql.template.resource=sites/amazon/sql/x-items-final.sql

sync.dest.jdbc.driver=com.mysql.cj.jdbc.Driver
sync.dest.jdbc.url=jdbc:mysql://47.103.79.201:3306/mallbigdata_us
sync.dest.jdbc.username=mytest
sync.dest.jdbc.password=123456@
sync.batch.size=40




© 2015 - 2024 Weber Informatics LLC | Privacy Policy