All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.bhu.nlp.utils.FileUtil Maven / Gradle / Ivy

There is a newer version: 1.0.3
Show newest version
package org.bhu.nlp.utils;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.Set;

import org.apache.commons.lang.StringUtils;

public class FileUtil {
	
	protected static final String SOURCE_PATH = "/org/cucbst4j/source/";
	/**
	 * 返回一个文件夹下所有文件,包括子文件夹中的文件
	 * @param inPath 待处理文件夹的路径
	 * @return 文本集合
	 */
	public static List getAllFiles(String inPath) {
		List fileList = new ArrayList();
		getFile(inPath, fileList);
		return fileList;
	}
	
	public static Queue getAllFiles2Queue(String inPath){
		Queue queue = new LinkedList(); 
		getFile(inPath, queue);
		return queue;
	}
	

	/**
	 * 获取文件夹下的文件并加入List
	 * @param path  待处理文件夹的路径
	 * @param collection 储存文件的列表
	 */
	private static void getFile(String path, Collection collection) {
		File file = new File(path);
		File[] array = file.listFiles();

		for (int i = 0; i < array.length; i++) {
			if (array[i].isFile()) {
				collection.add(array[i].getPath());
			} else if (array[i].isDirectory()) {
				getFile(array[i].getPath(), collection);
			}
		}
	}

	private static void getFile(String path, List list) {
		File file = new File(path);
		File[] array = file.listFiles();

		for (int i = 0; i < array.length; i++) {
			if (array[i].isFile()) {
				list.add(array[i].getPath());
			} else if (array[i].isDirectory()) {
				getFile(array[i].getPath(), list);
			}
		}
	}
	/**
	 * 删除指定文件夹下所有文件
	 * @param path 文件夹完整绝对路径
	 */
	public static void deleteFiles(String path) {
		File file = new File(path);
		if (!file.exists()) {
			return;
		}
		if (!file.isDirectory()) {
			return;
		}
		String[] tempList = file.list();
		File temp = null;
		for (int i = 0; i < tempList.length; i++) {
			if (path.endsWith(File.separator)) {
				temp = new File(path + tempList[i]);
			} else {
				temp = new File(path + File.separator + tempList[i]);
			}
			if (temp.isFile()) {
				if (!temp.renameTo(temp)) {
					System.out.println("someone use it now.");
				}
				temp.delete();
				System.out.println("delete file " + temp.getAbsolutePath());
			}
			if (temp.isDirectory()) {
				deleteFiles(path + "/" + tempList[i]);// 先删除文件夹里面的文件
				delFolder(path + "/" + tempList[i]);// 再删除空文件夹
			}
		}
	}


	/**
	 * 删除文件夹
	 * @param folderPath 文件夹完整绝对路径
	 */
	private static void delFolder(String folderPath) {
		try {
			deleteFiles(folderPath); // 删除完里面所有内容
			String filePath = folderPath;
			filePath = filePath.toString();
			java.io.File myFilePath = new java.io.File(filePath);
			myFilePath.delete(); // 删除空文件夹
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	/**
	 * 
	 * @param file 待计数的文本的File对象
	 * @return 返回文件内容的总行数。 
	 */
    public static int getTotalLines(File file){
        int lines = 0;
		try {
			java.io.FileReader in = new java.io.FileReader(file);
			LineNumberReader reader = new LineNumberReader(in);
			String s = reader.readLine();
			lines = 0;
			while (s != null) {
			    lines++;
			    s = reader.readLine();
			}
			reader.close();
			in.close();
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
        return lines;
    }
    

    
    /***
     * 返回分词行的所有分词单位数组
     * @param line 待处理的分词后的字符串行
     * @return  word/cate
     */
    public String[] getWords(String line){
		String[] array = null;
		array = StringUtils.split(line);
		return array;
	}
    
    public static BufferedReader getSourceReader(String dicName) {
		BufferedReader reader = null;
		reader = new BufferedReader(new InputStreamReader(
				FileUtil.class.getResourceAsStream(SOURCE_PATH + dicName),
				Charset.defaultCharset()));
		return reader;
	}
	

	

	/***
	 * 
	 * @param reader 输入BufferedReader
	 * @return 返回姓氏概率HASH
	 */
	public static Hashtable getNameProHash(BufferedReader reader){
		Hashtable hash = new Hashtable();
		String[] array;
		try {
			for (String line; (line = reader.readLine()) != null;) {
				if (line.trim().length() == 0 || line.trim().startsWith("//"))
					continue;
				array = StringUtils.split(line);
				if(array.length != 2)continue;
				if(!hash.containsKey(array[0]))
					hash.put(array[0], Double.valueOf(array[1]));
			}
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return hash;
	}
	
	public static Hashtable getNameProHash(String dicName){
		Hashtable hash = new Hashtable();
		BufferedReader reader = getSourceReader(dicName);
		String[] array;
		try {
			for (String line; (line = reader.readLine()) != null;) {
				if (line.trim().length() == 0 || line.trim().startsWith("//"))
					continue;
				array = StringUtils.split(line);
				if(array.length != 2)continue;
				if(!hash.containsKey(array[0]))
					hash.put(array[0], Double.valueOf(array[1]));
			}
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return hash;
	}
	
	/***
	 * 
	 * @param inFile 输入文本路径
	 * @param outFile 输出文本路径
	 * @return 判断输入输出路径是否相同的结果
	 */
	public static boolean checkPath(String inFile, String outFile) {
		if (inFile.trim() == outFile.trim())
			return true;
		return false;
	}

	

	public static String getNumStr(String inPut) {
		String numStr = "";
		if (inPut.trim().length() == 0)
			return inPut;
		if (Character.isDigit(inPut.charAt(0))) {
			for (int i = 0; i < inPut.length(); i++) {
				if (Character.isDigit(inPut.charAt(i))) {
					numStr += inPut.charAt(i);
					if (i == inPut.length() - 1)
						return numStr;
				} else {
					if (numStr.length() == 0)
						continue;
					else {
						return numStr;
					}
				}
			}
		} else {
			return numStr;
		}
		return inPut;
	}

	public static String getLetterStr(String inPut) {
		String alpha = "abcdefghigklmnopqrstuvwxyzABCDEFGHIGKLMNOPQRSTUVWXYZ";
		char[] chAlpha = alpha.toCharArray();
		String letterStr = "";
		if (inPut.trim().length() == 0)
			return inPut;
		if (StringUtils.containsAny(String.valueOf(inPut.charAt(0)), chAlpha)) {
			for (int i = 0; i < inPut.length(); i++) {
				if (StringUtils.containsAny(String.valueOf(inPut.charAt(i)),
						chAlpha)) {
					letterStr += inPut.charAt(i);
					if (i == inPut.length() - 1)
						return letterStr;
				} else {
					if (letterStr.length() == 0)
						continue;
					else {
						return letterStr;
					}
				}
			}
		} else {
			return letterStr;
		}
		return inPut;
	}

	public String ToSBC(String inStr) {
		char c[] = inStr.toCharArray();
		for (int i = 0; i < c.length; i++) {
			if (c[i] == ' ') {
				c[i] = '\u3000';
			} else if (c[i] < '\177') {
				c[i] = (char) (c[i] + 65248);
			}
		}
		return new String(c);
	}



	

	/**
	 * 
	 * 
	 * @param filePath 输入文本路径
	 * @return 获取编码,默认返回GBK
	 * @throws IOException io错误
	 */
	public static String getFileChart(String filePath) throws IOException {
		String code = "GBK";

		InputStream inputStream = new FileInputStream(filePath);
		byte[] head = new byte[3];
		inputStream.read(head);
		inputStream.close();

		if (head[0] == -1 && head[1] == -2) {
			code = "UTF-16";
		} else if (head[0] == -2 && head[1] == -1) {
			code = "Unicode";
		} else if (head[0] == -17 && head[1] == -69 && head[2] == -65) {
			code = "UTF-8";
		}
		return code;
	}
	
	protected int getWordsNum(String line){
		return StringUtils.split(line).length;
	}

	public static Set getSourceDicSet(String dicName){
		Set set = new HashSet();
		BufferedReader reader = getSourceReader(dicName);
		
		try {
			String item;
			for(String line; (line = reader.readLine()) != null;){
				item = line.trim();
				if(item.length() == 0 || item.startsWith("//"))continue;
				set.add(item);
			}
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return set;
	}
	
	public static Set getSourceSet(String line){
		Set set = new HashSet();
		String[] array = StringUtils.split(line, "|");
		for(String item : array){
			set.add(item.trim());
		}
		return set;
	}
	
	

    
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy