All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gobblin.gobblin-example.0.7.0.source-code.wikipedia.pull Maven / Gradle / Ivy

There is a newer version: 0.11.0
Show newest version
job.name=PullFromWikipedia
job.group=Wikipedia
job.description=A getting started example for Gobblin

source.class=gobblin.example.wikipedia.WikipediaSource
source.page.titles=NASA,LinkedIn,Parris_Cues,Barbara_Corcoran
source.revisions.cnt=5

wikipedia.api.rooturl=https://en.wikipedia.org/w/api.php?format=json&action=query&prop=revisions&rvprop=content|timestamp|user|userid|size
wikipedia.avro.schema={"namespace": "example.wikipedia.avro","type": "record","name": "WikipediaArticle","fields": [{"name": "pageid", "type": ["double", "null"]},{"name": "title", "type": ["string", "null"]},{"name": "user", "type": ["string", "null"]},{"name": "anon", "type": ["string", "null"]},{"name": "userid",  "type": ["double", "null"]},{"name": "timestamp", "type": ["string", "null"]},{"name": "size",  "type": ["double", "null"]},{"name": "contentformat",  "type": ["string", "null"]},{"name": "contentmodel",  "type": ["string", "null"]},{"name": "content", "type": ["string", "null"]}]}

converter.classes=gobblin.example.wikipedia.WikipediaConverter

extract.namespace=gobblin.example.wikipedia

writer.destination.type=HDFS
writer.output.format=AVRO
writer.partitioner.class=gobblin.example.wikipedia.WikipediaPartitioner

data.publisher.type=gobblin.publisher.BaseDataPublisher




© 2015 - 2025 Weber Informatics LLC | Privacy Policy