org.archive.crawler.migrate.H1toH3.map Maven / Gradle / Ivy
The newest version!
metadata.jobName|//meta/name|default
metadata.description|//meta/description|Default Profile
metadata.operator|//meta/operator|Admin
$|//meta/date|20091006215050
$|//controller/*[@name="settings-directory"]|settings
loggerModule.path|//controller/*[@name="logs-path"]|logs
crawlController.checkpointsDir|//controller/*[@name="checkpoints-path"]|checkpoints
bdb.dir|//controller/*[@name="state-path"]|state
crawlController.scratchDir|//controller/*[@name="scratch-path"]|scratch
crawlLimiter.maxBytesDownload|//controller/*[@name="max-bytes-download"]|0
crawlLimiter.maxDocumentsDownload|//controller/*[@name="max-document-download"]|0
crawlLimiter.maxTimeSeconds|//controller/*[@name="max-time-sec"]|0
crawlController.maxToeThreads|//controller/*[@name="max-toe-threads"]|50
crawlController.recorderOutBufferBytes|//controller/*[@name="recorder-out-buffer-bytes"]|4096
crawlController.recorderInBufferBytes|//controller/*[@name="recorder-in-buffer-bytes"]|65536
bdb.cachePercent|//controller/*[@name="bdb-cache-percent"]|0
$|//controller/*[@name="scope"]/*[@name="enabled"]|true
seeds.textSource.path|//controller/*[@name="scope"]/*[@name="seedsfile"]|seeds.txt
$|//controller/*[@name="scope"]/*[@name="reread-seeds-on-config"]|true
scope.rules[1].decision|//controller/*[@name="scope"]/*[@name="decide-rules"]/*[@name="rules"]/*[@name="acceptIfSurtPrefixed"]/*[@name="decision"]|ACCEPT
scope.rules[1].seedsAsSurtPrefixes|//controller/*[@name="scope"]/*[@name="decide-rules"]/*[@name="rules"]/*[@name="acceptIfSurtPrefixed"]/*[@name="seeds-as-surt-prefixes"]|true
scope.rules[1].alsoCheckVia|//controller/*[@name="scope"]/*[@name="decide-rules"]/*[@name="rules"]/*[@name="acceptIfSurtPrefixed"]/*[@name="also-check-via"]|false
$na|//controller/*[@name="scope"]/*[@name="decide-rules"]/*[@name="rules"]/*[@name="acceptIfSurtPrefixed"]/*[@name="rebuild-on-reconfig"]|true
scope.rules[2].maxHops|//controller/*[@name="scope"]/*[@name="decide-rules"]/*[@name="rules"]/*[@name="rejectIfTooManyHops"]/*[@name="max-hops"]|20
scope.rules[3].maxTransHops|//controller/*[@name="scope"]/*[@name="decide-rules"]/*[@name="rules"]/*[@name="acceptIfTranscluded"]/*[@name="max-trans-hops"]|3
scope.rules[3].maxSpeculativeHops|//controller/*[@name="scope"]/*[@name="decide-rules"]/*[@name="rules"]/*[@name="acceptIfTranscluded"]/*[@name="max-speculative-hops"]|1
scope.rules[6].maxRepetitions|//controller/*[@name="scope"]/*[@name="decide-rules"]/*[@name="rules"]/*[@name="rejectIfPathological"]/*[@name="max-repetitions"]|2
scope.rules[7].maxPathDepth|//controller/*[@name="scope"]/*[@name="decide-rules"]/*[@name="rules"]/*[@name="rejectIfTooManyPathSegs"]/*[@name="max-path-depth"]|20
*metadata.userAgentTemplate|//controller/*[@name="http-headers"]/*[@name="user-agent"]|Mozilla/5.0|(compatible;|heritrix/@VERSION@|+PROJECT_URL_HERE)
metadata.operatorFrom|//controller/*[@name="http-headers"]/*[@name="from"]|CONTACT_EMAIL_ADDRESS_HERE
^metadata.robotsHonoringPolicy.type|//controller/*[@name="robots-honoring-policy"]/*[@name="type"]|classic
metadata.robotsHonoringPolicy.masquerade|//controller/*[@name="robots-honoring-policy"]/*[@name="masquerade"]|false
disposition.delayFactor|//controller/*[@name="frontier"]/*[@name="delay-factor"]|4.0
disposition.maxDelayMs|//controller/*[@name="frontier"]/*[@name="max-delay-ms"]|20000
disposition.minDelayMs|//controller/*[@name="frontier"]/*[@name="min-delay-ms"]|2000
disposition.respectCrawlDelayUpToSeconds|//controller/*[@name="frontier"]/*[@name="respect-crawl-delay-up-to-secs"]|300
frontier.maxRetries|//controller/*[@name="frontier"]/*[@name="max-retries"]|30
frontier.retryDelaySeconds|//controller/*[@name="frontier"]/*[@name="retry-delay-seconds"]|900
preparer.preferenceEmbedHops|//controller/*[@name="frontier"]/*[@name="preference-embed-hops"]|1
$|//controller/*[@name="frontier"]/*[@name="total-bandwidth-usage-KB-sec"]|0
disposition.maxPerHostBandwidthUsageKbSec|//controller/*[@name="frontier"]/*[@name="max-per-host-bandwidth-usage-KB-sec"]|0
*preparer.queueAssignmentPolicy|//controller/*[@name="frontier"]/*[@name="queue-assignment-policy"]|org.archive.crawler.frontier.HostnameQueueAssignmentPolicy
crawlController.pauseAtStart|//controller/*[@name="frontier"]/*[@name="pause-at-start"]|false
crawlController.pauseAtFinish|//controller/*[@name="frontier"]/*[@name="pause-at-finish"]|false
seeds.sourceTagSeeds|//controller/*[@name="frontier"]/*[@name="source-tag-seeds"]|false
frontier.recoveryLogEnabled|//controller/*[@name="frontier"]/*[@name="recovery-log-enabled"]|true
frontier.holdQueues|//controller/*[@name="frontier"]/*[@name="hold-queues"]|true
frontier.balanceReplenishAmount|//controller/*[@name="frontier"]/*[@name="balance-replenish-amount"]|3000
frontier.errorPenaltyAmount|//controller/*[@name="frontier"]/*[@name="error-penalty-amount"]|100
frontier.queueTotalBudget|//controller/*[@name="frontier"]/*[@name="queue-total-budget"]|-1
*preparer.costAssignmentPolicy|//controller/*[@name="frontier"]/*[@name="cost-policy"]|org.archive.crawler.frontier.ZeroCostAssignmentPolicy
frontier.snoozeLongMs|//controller/*[@name="frontier"]/*[@name="snooze-deactivate-ms"]|300000
$|//controller/*[@name="frontier"]/*[@name="target-ready-backlog"]|50
*frontier.uriUniqFilter|//controller/*[@name="frontier"]/*[@name="uri-included-structure"]|org.archive.crawler.util.BdbUriUniqFilter
frontier.dumpPendingAtClose|//controller/*[@name="frontier"]/*[@name="dump-pending-at-close"]|false
preparer.uriCanonicalizationPolicy.rules[0].enabled|//controller/*[@name="uri-canonicalization-rules"]/*[@name="Lowercase"]/*[@name="enabled"]|true
preparer.uriCanonicalizationPolicy.rules[1].enabled|//controller/*[@name="uri-canonicalization-rules"]/*[@name="Userinfo"]/*[@name="enabled"]|true
preparer.uriCanonicalizationPolicy.rules[2].enabled|//controller/*[@name="uri-canonicalization-rules"]/*[@name="WWW[0-9]*"]/*[@name="enabled"]|true
preparer.uriCanonicalizationPolicy.rules[3].enabled|//controller/*[@name="uri-canonicalization-rules"]/*[@name="SessionIDs"]/*[@name="enabled"]|true
preparer.uriCanonicalizationPolicy.rules[4].enabled|//controller/*[@name="uri-canonicalization-rules"]/*[@name="SessionCFIDs"]/*[@name="enabled"]|true
preparer.uriCanonicalizationPolicy.rules[5].enabled|//controller/*[@name="uri-canonicalization-rules"]/*[@name="QueryStrPrefix"]/*[@name="enabled"]|true
preselector.enabled|//controller/*[@name="pre-fetch-processors"]/*[@name="Preselector"]/*[@name="enabled"]|true
preselector.logToFile|//controller/*[@name="pre-fetch-processors"]/*[@name="Preselector"]/*[@name="override-logger"]|false
preselector.recheckScope|//controller/*[@name="pre-fetch-processors"]/*[@name="Preselector"]/*[@name="recheck-scope"]|true
preselector.blockAll|//controller/*[@name="pre-fetch-processors"]/*[@name="Preselector"]/*[@name="block-all"]|false
preconditions.enabled|//controller/*[@name="pre-fetch-processors"]/*[@name="Preprocessor"]/*[@name="enabled"]|true
preconditions.ipValidityDurationSeconds|//controller/*[@name="pre-fetch-processors"]/*[@name="Preprocessor"]/*[@name="ip-validity-duration-seconds"]|21600
preconditions.robotsValidityDurationSeconds|//controller/*[@name="pre-fetch-processors"]/*[@name="Preprocessor"]/*[@name="robot-validity-duration-seconds"]|86400
preconditions.calculateRobotsOnly|//controller/*[@name="pre-fetch-processors"]/*[@name="Preprocessor"]/*[@name="calculate-robots-only"]|false
fetchDns.enabled|//controller/*[@name="fetch-processors"]/*[@name="DNS"]/*[@name="enabled"]|true
fetchDns.acceptNonDnsResolves|//controller/*[@name="fetch-processors"]/*[@name="DNS"]/*[@name="accept-non-dns-resolves"]|false
fetchDns.digestContent|//controller/*[@name="fetch-processors"]/*[@name="DNS"]/*[@name="digest-content"]|true
fetchDns.digestAlgorithm|//controller/*[@name="fetch-processors"]/*[@name="DNS"]/*[@name="digest-algorithm"]|sha1
fetchHttp.enabled|//controller/*[@name="fetch-processors"]/*[@name="HTTP"]/*[@name="enabled"]|true
fetchHttp.timeoutSeconds|//controller/*[@name="fetch-processors"]/*[@name="HTTP"]/*[@name="timeout-seconds"]|1200
fetchHttp.soTimeoutMs|//controller/*[@name="fetch-processors"]/*[@name="HTTP"]/*[@name="sotimeout-ms"]|20000
fetchHttp.maxFetchKBSec|//controller/*[@name="fetch-processors"]/*[@name="HTTP"]/*[@name="fetch-bandwidth"]|0
fetchHttp.maxLengthBytes|//controller/*[@name="fetch-processors"]/*[@name="HTTP"]/*[@name="max-length-bytes"]|0
fetchHttp.ignoreCookies|//controller/*[@name="fetch-processors"]/*[@name="HTTP"]/*[@name="ignore-cookies"]|false
$|//controller/*[@name="fetch-processors"]/*[@name="HTTP"]/*[@name="use-bdb-for-cookies"]|true
^fetchHttp.sslTrustLevel|//controller/*[@name="fetch-processors"]/*[@name="HTTP"]/*[@name="trust-level"]|open
fetchHttp.defaultEncoding|//controller/*[@name="fetch-processors"]/*[@name="HTTP"]/*[@name="default-encoding"]|ISO-8859-1
fetchHttp.digestContent|//controller/*[@name="fetch-processors"]/*[@name="HTTP"]/*[@name="digest-content"]|true
fetchHttp.digestAlgorithm|//controller/*[@name="fetch-processors"]/*[@name="HTTP"]/*[@name="digest-algorithm"]|sha1
fetchHttp.sendIfModifiedSince|//controller/*[@name="fetch-processors"]/*[@name="HTTP"]/*[@name="send-if-modified-since"]|true
fetchHttp.sendIfNoneMatch|//controller/*[@name="fetch-processors"]/*[@name="HTTP"]/*[@name="send-if-none-match"]|true
fetchHttp.sendConnectionClose|//controller/*[@name="fetch-processors"]/*[@name="HTTP"]/*[@name="send-connection-close"]|true
fetchHttp.sendReferer|//controller/*[@name="fetch-processors"]/*[@name="HTTP"]/*[@name="send-referer"]|true
fetchHttp.sendRange|//controller/*[@name="fetch-processors"]/*[@name="HTTP"]/*[@name="send-range"]|false
extractorHttp.enabled|//controller/*[@name="extract-processors"]/*[@name="ExtractorHTTP"]/*[@name="enabled"]|true
extractorHtml.enabled|//controller/*[@name="extract-processors"]/*[@name="ExtractorHTML"]/*[@name="enabled"]|true
extractorHtml.extractJavascript|//controller/*[@name="extract-processors"]/*[@name="ExtractorHTML"]/*[@name="extract-javascript"]|true
extractorHtml.treatFramesAsEmbedLinks|//controller/*[@name="extract-processors"]/*[@name="ExtractorHTML"]/*[@name="treat-frames-as-embed-links"]|true
extractorHtml.ignoreFormActionUrls|//controller/*[@name="extract-processors"]/*[@name="ExtractorHTML"]/*[@name="ignore-form-action-urls"]|false
extractorHtml.extractOnlyFormGets|//controller/*[@name="extract-processors"]/*[@name="ExtractorHTML"]/*[@name="extract-only-form-gets"]|true
extractorHtml.extractValueAttributes|//controller/*[@name="extract-processors"]/*[@name="ExtractorHTML"]/*[@name="extract-value-attributes"]|true
extractorHtml.ignoreUnexpectedHtml|//controller/*[@name="extract-processors"]/*[@name="ExtractorHTML"]/*[@name="ignore-unexpected-html"]|true
extractorCss.enabled|//controller/*[@name="extract-processors"]/*[@name="ExtractorCSS"]/*[@name="enabled"]|true
extractorJs.enabled|//controller/*[@name="extract-processors"]/*[@name="ExtractorJS"]/*[@name="enabled"]|true
extractorSwf.enabled|//controller/*[@name="extract-processors"]/*[@name="ExtractorSWF"]/*[@name="enabled"]|true
arcWriter.enabled|//controller/*[@name="write-processors"]/*[@name="Archiver"]/*[@name="enabled"]|true
arcWriter.compress|//controller/*[@name="write-processors"]/*[@name="Archiver"]/*[@name="compress"]|true
arcWriter.prefix|//controller/*[@name="write-processors"]/*[@name="Archiver"]/*[@name="prefix"]|IAH
arcWriter.suffix|//controller/*[@name="write-processors"]/*[@name="Archiver"]/*[@name="suffix"]|${HOSTNAME}
arcWriter.maxFileSizeBytes|//controller/*[@name="write-processors"]/*[@name="Archiver"]/*[@name="max-size-bytes"]|100000000
*arcWriter.storePaths[0]|//controller/*[@name="write-processors"]/*[@name="Archiver"]/*[@name="path"]/string|arcs
arcWriter.poolMaxActive|//controller/*[@name="write-processors"]/*[@name="Archiver"]/*[@name="pool-max-active"]|5
arcWriter.poolMaxWaitMs|//controller/*[@name="write-processors"]/*[@name="Archiver"]/*[@name="pool-max-wait"]|300000
arcWriter.maxTotalBytesToWrite|//controller/*[@name="write-processors"]/*[@name="Archiver"]/*[@name="total-bytes-to-write"]|0
arcWriter.skipIdenticalDigests|//controller/*[@name="write-processors"]/*[@name="Archiver"]/*[@name="skip-identical-digests"]|false
$|//controller/*[@name="post-processors"]/*[@name="Updater"]/*[@name="enabled"]|true
$|//controller/*[@name="post-processors"]/*[@name="LinksScoper"]/*[@name="enabled"]|true
$|//controller/*[@name="post-processors"]/*[@name="LinksScoper"]/*[@name="override-logger"]|false
candidates.seedsRedirectNewSeeds|//controller/*[@name="post-processors"]/*[@name="LinksScoper"]/*[@name="seed-redirects-new-seed"]|true
preparer.preferenceDepthHops|//controller/*[@name="post-processors"]/*[@name="LinksScoper"]/*[@name="preference-depth-hops"]|-1
$|//controller/*[@name="post-processors"]/*[@name="Scheduler"]/*[@name="enabled"]|true
statisticsTracker.intervalSeconds|//controller/*[@name="loggers"]/*[@name="crawl-statistics"]/*[@name="interval-seconds"]|20
bdb.checkpointCopyLogs|//controller/*[@name="checkpoint-copy-bdbje-logs"]|true
$|//controller/*[@name="recover-retain-failures"]|false
$|//controller/*[@name="recover-scope-includes"]|true
$|//controller/*[@name="recover-scope-enqueues"]|true