All Downloads are FREE. Search and download functionalities are using the official Maven repository.

is2.util.ExtractParagraphs Maven / Gradle / Ivy

The newest version!
package is2.util;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.StringTokenizer;

public class ExtractParagraphs {

	/**

	 * @param args
	 * @throws IOException
	 */
	public static void main(String args[]) throws IOException {

		if (args.length<1) {
			System.out.println("Please provide a file name.");
			System.exit(0);
		}
		
		File file = new File(args[0]);
		file.isDirectory();
		String[] dirs = file.list();
	
		BufferedWriter write = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[1]),"UTF-8"),32768);
		int cnt=0;
		
for (String fileName : dirs) {		
	    BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]+fileName),"UTF-8"),32768);
	
		
		
	
		int state =0;
	
		String s;
		while ((s = reader.readLine()) != null) {
			
			if (s.startsWith("

")||s.startsWith("

")) { state=1; // paragraph start continue; } if (s.startsWith("

")||s.startsWith("

")) { state=2; // paragraph end write.newLine(); } boolean lastNL =false; if (state==1) { String sp[] = s.split("\\. "); for(String p : sp) { write.write(p); // if (sp.length>1) write.newLine(); } cnt++; } } //if (cnt>5000) break; reader.close(); } write.flush(); write.close(); System.out.println("Extract "+cnt+" lines "); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy