All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.hakenadu.javalangchains.chains.qa.split.SplitDocumentsChain Maven / Gradle / Ivy

package com.github.hakenadu.javalangchains.chains.qa.split;

import java.util.HashMap;
import java.util.Map;
import java.util.stream.Stream;

import com.github.hakenadu.javalangchains.chains.Chain;
import com.github.hakenadu.javalangchains.util.PromptConstants;

/**
 * This {@link Chain} is used to split long documents into chunks. All document
 * keys are copied except for the {@link PromptConstants#CONTENT} which is
 * split.
 */
public class SplitDocumentsChain implements Chain>, Stream>> {

	/**
	 * This {@link TextSplitter} is used to create one or more documents from an
	 * input document based on the {@link PromptConstants#CONTENT} key.
	 */
	private final TextSplitter textSplitter;

	/**
	 * creates an instance of the {@link SplitDocumentsChain}
	 * 
	 * @param textSplitter {@link #textSplitter}
	 */
	public SplitDocumentsChain(final TextSplitter textSplitter) {
		this.textSplitter = textSplitter;
	}

	@Override
	public Stream> run(final Stream> input) {
		return input.flatMap(this::splitDocument);
	}

	private Stream> splitDocument(final Map document) {
		final String content = document.get(PromptConstants.CONTENT);

		return this.textSplitter.split(content).stream().map(contentPart -> {
			final Map documentPart = new HashMap<>(document);
			documentPart.put(PromptConstants.CONTENT, contentPart);
			return documentPart;
		});
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy