is2.io.CONLLWriter06 Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of anna Show documentation
Show all versions of anna Show documentation
Tools for Natural Language Analysis, Generation and Machine Learning
The newest version!
package is2.io;
import is2.data.SentenceData09;
import is2.util.DB;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.StringTokenizer;
public class CONLLWriter06 {
public static final String DASH = "_";
protected BufferedWriter writer;
public CONLLWriter06 () { }
public static void main(String args[]) throws IOException {
if (args.length==2) {
File f = new File(args[0]);
File f2 = new File(args[1]);
// BufferedReader bf = new BufferedReader(new FileInputStream(new File(args[0]),"UTF-8"),32768);
BufferedReader ir = new BufferedReader(new InputStreamReader(new FileInputStream(f),"ISO-8859"),32768);
BufferedWriter br = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f2),"UTF-8"));;
boolean found =false;
boolean tab =false;
while(true) {
String l = ir.readLine();
if (l==null) break;
String x =l.trim();
if (x.endsWith("\t")) tab=true;
br.write(x);
br.newLine();
if (!l.equals(x)) found =true;
}
ir.close();
br.flush();
br.close();
if (found) DB.println("found diff. found tab? "+tab);
} else if (args.length==3) {
File f1 = new File(args[1]);
File f2 = new File(args[2]);
BufferedReader ir1 = new BufferedReader(new InputStreamReader(new FileInputStream(f1),"ISO-8859"),32768);
BufferedReader ir2 = new BufferedReader(new InputStreamReader(new FileInputStream(f2),"UTF-8"),32768);
int line =0, alltabs1=0,alltabs2=0;
while(true) {
String l1 = ir1.readLine();
String l2 = ir2.readLine();
if (l1==null && l2!=null) DB.println("files do not end at the same line ");
if (l1!=null && l2==null) DB.println("files do not end at the same line ");
if (l1==null ) break;
StringTokenizer t1 = new StringTokenizer(l1,"\t");
StringTokenizer t2 = new StringTokenizer(l2,"\t");
int tabs1=0;
while(t1.hasMoreTokens()) {
t1.nextElement();
tabs1++;
alltabs1++;
}
int tabs2=0;
while(t2.hasMoreTokens()) {
t2.nextElement();
tabs2++;
alltabs2++;
}
line ++;
if (tabs1!=tabs2) {
DB.println("number of tabs different in line "+line+" file1-tabs "+tabs1+" file2-tabs "+tabs2);
System.exit(0);
}
}
DB.println("checked lines "+line+" with tabs in file 1 "+alltabs1+" in file2 "+alltabs2);
} else {
File f = new File(args[0]);
String[] dir =f.list();
for(String fx :dir) {
BufferedReader ir = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]+File.separatorChar+fx),"UTF-8"),32768);
System.out.println("check file "+fx);
while(true) {
String l = ir.readLine();
if (l==null) break;
if (l.endsWith("\t")) {
DB.println("found tab in file "+fx);
break;
}
}
ir.close();
}
}
}
// public int version = CONLLReader09.TASK08;
public CONLLWriter06 (String file) {
try {
writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file),"UTF-8"));
} catch (Exception e) {
e.printStackTrace();
}
}
public CONLLWriter06(String outfile, int formatTask) {
this(outfile);
// version = formatTask;
}
public void write(SentenceData09 inst) throws IOException {
for (int i=0; i