ai.platon.pulsar.dom.model.WebsiteFactory.kt Maven / Gradle / Ivy
package ai.platon.pulsar.dom.model
import org.apache.commons.lang3.StringUtils
import org.w3c.dom.Document
import org.w3c.dom.Node
import java.io.File
import java.util.*
import javax.xml.parsers.DocumentBuilderFactory
class WebsiteFactory {
val resource = "config/known-websites.xml"
val learningFile = "output/learning/website.txt"
// maintains all websites
var websites = ArrayList()
// an index to websites
var domain2websites: MutableMap = HashMap()
// maintains all learned websites
var learnedWebsites = ArrayList()
fun resource(): String {
return resource
}
operator fun get(domain: String): Website? {
return domain2websites[domain]
}
fun getName(domain: String): String {
val w = domain2websites[domain]
return w?.name ?: ""
}
private fun load(file: String) {
val db = DocumentBuilderFactory.newInstance().newDocumentBuilder()
val doc = db.parse(File(resource))
parse(doc)
rebuild()
}
private fun rebuild() {
for (website in websites) {
domain2websites[website.domain] = website
}
}
private fun parse(doc: Document) {
val rootNode = doc.firstChild
val websiteNodes = rootNode.childNodes
for (i in 0 until websiteNodes.length) {
val websiteNode = websiteNodes.item(i)
if (websiteNode.nodeType != Node.ELEMENT_NODE || websiteNode.nodeName != "website") {
continue
}
val domain = websiteNode.attributes.getNamedItem("domain").nodeValue
var name: String? = null
val childNodes = websiteNode.childNodes
for (j in 0 until childNodes.length) {
val childNode = childNodes.item(j)
if (childNode.nodeType != Node.ELEMENT_NODE || childNode.nodeName != "name") {
continue
}
name = childNode.textContent
}
if (StringUtils.isNotEmpty(domain) && StringUtils.isNotEmpty(name)) {
websites.add(Website(domain, name!!))
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy