ai.platon.pulsar.persist.gora.GoraStorage Maven / Gradle / Ivy
package ai.platon.pulsar.persist.gora;
import ai.platon.pulsar.common.config.ImmutableConfig;
import ai.platon.pulsar.persist.HadoopUtils;
import ai.platon.pulsar.persist.gora.generated.GWebPage;
import org.apache.gora.persistency.Persistent;
import org.apache.gora.store.DataStore;
import org.apache.gora.store.DataStoreFactory;
import org.apache.gora.util.GoraException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import static ai.platon.pulsar.common.LogsKt.warnForClose;
import static ai.platon.pulsar.common.config.AppConstants.MONGO_STORE_CLASS;
import static ai.platon.pulsar.common.config.AppConstants.WEBPAGE_SCHEMA;
import static ai.platon.pulsar.common.config.CapabilityTypes.*;
public class GoraStorage {
public static final Logger logger = LoggerFactory.getLogger(GoraStorage.class);
// load properties from gora.properties
public static Properties properties = DataStoreFactory.createProps();
private static Map dataStores = new HashMap<>();
@SuppressWarnings("unchecked")
public synchronized static DataStore
createDataStore(ImmutableConfig conf, Class keyClass, Class persistentClass)
throws GoraException, ClassNotFoundException {
String className = conf.get(STORAGE_DATA_STORE_CLASS, MONGO_STORE_CLASS);
Class extends DataStore> dataStoreClass = (Class extends DataStore>)Class.forName(className);
return createDataStore(conf, keyClass, persistentClass, dataStoreClass);
}
@SuppressWarnings("unchecked")
public synchronized static DataStore
createDataStore(ImmutableConfig conf,
Class keyClass, Class persistentClass, Class extends DataStore> dataStoreClass
) throws GoraException {
String crawlId = conf.get(STORAGE_CRAWL_ID, "");
String schemaPrefix = "";
if (!crawlId.isEmpty()) {
schemaPrefix = crawlId + "_";
}
String schema;
if (GWebPage.class.equals(persistentClass)) {
schema = conf.get(STORAGE_SCHEMA_WEBPAGE, WEBPAGE_SCHEMA);
} else {
throw new UnsupportedOperationException("Unable to create storage for class " + persistentClass);
}
Object o = dataStores.get(schema);
if (o == null) {
org.apache.hadoop.conf.Configuration hadoopConf = HadoopUtils.INSTANCE.toHadoopConfiguration(conf);
String realSchema = schemaPrefix + schema;
hadoopConf.set(STORAGE_PREFERRED_SCHEMA_NAME, realSchema);
DataStore dataStore = DataStoreFactory.createDataStore(dataStoreClass,
keyClass, persistentClass, hadoopConf, properties, schema);
dataStores.put(realSchema, dataStore);
String className = dataStore.getClass().getName();
if (className.equals("FileBackendPageStore")) {
logger.info("Backend data store: {}, real schema: {}", className, dataStore.getSchemaName());
logger.info("FileBackendPageStore is only for development and testing, " +
"it is not suitable for production environment");
} else {
logger.info("Backend data store: {}, real schema: {}, storage id: <{}>, " +
"set config `storage.crawl.id` to define the real schema",
className, dataStore.getSchemaName(), schemaPrefix);
}
return dataStore;
}
return (DataStore) o;
}
public synchronized static void close() {
dataStores.forEach((schema, store) -> {
if (store instanceof DataStore) {
logger.info("Closing data store <{}>", schema);
try {
((DataStore, ?>) store).close();
} catch (Exception e) {
warnForClose(store, e);
}
}
});
dataStores.clear();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy