All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.anserini.search.topicreader.EpidemicQATopicReader Maven / Gradle / Ivy

/*
 * Anserini: A Lucene toolkit for reproducible information retrieval research
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.anserini.search.topicreader;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.io.IOUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;

/**
 * A TopicReader class that reads from one of the Epidemic QA questions JSON files.
 */
public class EpidemicQATopicReader extends TopicReader {
  private static final Logger LOG = LogManager.getLogger(EpidemicQATopicReader.class);
  private static final String QUESTION_ID_KEY = "question_id";
  private static final String QUESTION_KEY = "question";
  private static final String QUERY_KEY = "query";
  private static final String BACKGROUND_KEY = "background";

  public EpidemicQATopicReader(Path topicFile) throws IOException {
    super(topicFile);
  }

  @Override
  public SortedMap> read(BufferedReader reader) throws IOException {
    SortedMap> map = new TreeMap<>();
    ObjectMapper mapper = new ObjectMapper();
    String topicsJson;
    JsonNode arrayNode;
    try {
      InputStream stream =
          IOUtils.toInputStream(IOUtils.toString(reader), StandardCharsets.UTF_8);
      topicsJson = new String(stream.readAllBytes());
      arrayNode = mapper.readerFor(JsonNode.class).readTree(topicsJson);
      if (!arrayNode.isArray()) {
        throw new IllegalArgumentException("Top-level JSON node is not an array.");
      }
    } catch (IllegalArgumentException e) {
      LOG.error(e);
      return null;
    }

    for (JsonNode topicNode : arrayNode) {
      Map fields = new HashMap<>();

      String questionId = topicNode.get(QUESTION_ID_KEY).asText();
      fields.put(QUESTION_ID_KEY, questionId);

      String question = topicNode.get(QUESTION_KEY).asText();
      fields.put(QUESTION_KEY, question);

      String query = topicNode.get(QUERY_KEY).asText();
      fields.put(QUERY_KEY, query);

      String background = topicNode.get(BACKGROUND_KEY).asText();
      fields.put(BACKGROUND_KEY, background);

      int id = Integer.parseInt(questionId.replaceAll("[^0-9]", ""));
      map.put(id, fields);
    }

    return map;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy