
cn.tyoui.index.FileAnalyzer Maven / Gradle / Ivy
package cn.tyoui.index;
import cn.tyoui.pojo.FileMessage;
import com.chenlb.mmseg4j.analysis.MMSegAnalyzer;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import java.io.File;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* 文件分析类
*
* @author Tyoui
*/
public class FileAnalyzer {
private Analyzer analyzer = new MMSegAnalyzer();
private String indexPath = "D:\\index";
private IndexWriter indexWriter = null;
private IndexReader indexReader = null;
private Directory directory = null;
private List list = new ArrayList<>();
private Map map = new HashMap<>();
private String createIndexDir = null;
private List li = null;
private File fileIndex = new File(indexPath);
/**
* 分析文件的路径
*
* @param createIndexDir 分析文件路径
*/
public void setCreateIndexDir(String createIndexDir) {
this.createIndexDir = createIndexDir;
}
//初始化读取索引文件
public FileAnalyzer() {
try {
Path path = FileSystems.getDefault().getPath(indexPath);
directory = FSDirectory.open(path);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 关闭文件索引流
*/
private void close() {
try {
indexWriter.commit();
indexWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 分析文件夹
*
* @param dir 文件夹
* @throws Exception 文件夹读取异常
*/
private void analyzerDir(File dir) throws Exception {
if (dir.isDirectory()) {
File file[] = dir.listFiles();
for (File f : file) {
if (f.isDirectory())
analyzerDir(f);
else
createIndex(f);
}
} else
createIndex(dir);
}
//存储文档信息
private void document(File file, Document document) {
String context = FileContent.init(file);
document.add(new Field("filePath", file.getAbsolutePath(), StringField.TYPE_STORED));
document.add(new Field("fileName", file.getName(), TextField.TYPE_NOT_STORED));
document.add(new Field("fileContent", context, TextField.TYPE_NOT_STORED));
}
/**
* 创建索引
*
* @param file 文件对象
*/
private void createIndex(File file) {
try {
FileMessage message = new FileMessage(file.getAbsolutePath(), file.length());
list.add(message);
Document document = new Document();
document(file, document);
indexWriter.addDocument(document);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 更新索引
*
* @param file 文件对象
*/
private void updateIndex(File file) {
try {
Document document = new Document();
document(file, document);
indexWriter.updateDocument(new Term("filePath", file.getAbsolutePath()), document);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 更新索引,根据文件夹遍历来获取文件对象
*
* @param dir 更新的文件夹
* @throws Exception 创建索引异常
*/
private void updateDir(File dir) throws Exception {
File file[] = dir.listFiles();
if (file != null)
for (File f : file) {
if (f.isDirectory())
updateDir(f);
else {
if (map.get(f.getAbsolutePath()) != null) {
if (map.get(f.getAbsolutePath()) != f.length()) {
updateIndex(f);
addDir(f.getAbsolutePath(), f.length());
}
} else
createIndex(f);
}
}
}
/**
* 在更新索引时。更新旧的索引文件
*
* @param filePath 更新旧文件的路径
* @param size 文件大小
*/
private void addDir(String filePath, long size) {
List list = new ArrayList<>();
for (String content : li) {
if (!content.startsWith(filePath))
list.add(content);
}
list.add(filePath + "\t" + size);
try {
this.li=list;
writeFile(false);
System.out.println("更新成功!");
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 在增加索引文件时,增加新的索引文件
*/
private void addDir() {
try {
if (list.size() > 0) {
for (FileMessage fileMessage : list)
li.add(fileMessage.toString());
writeFile(false);
System.out.println("追加成功!");
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 创建索引初始化
*/
public void createIndex() {
try {
File file = new File(indexPath + File.separator + "dir.txt");
if (file.exists())
file.delete();
indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer));
analyzerDir(new File(createIndexDir));
close();
writeFile(true);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 更新索引
*/
public void update() {
try {
li = FileUtils.readLines(new File(indexPath + File.separator + "dir.txt"), "UTF-8");
for (String text : li) {
String[] string = text.split("\t");
map.put(string[0], Long.parseLong(string[1]));
}
indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer));
updateDir(new File(createIndexDir));
indexWriter.commit();
indexWriter.close();
addDir();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 根据关键字来搜索文件的绝对路径
*
* @param key 关键字
* @param number 搜索的数量
* @return 搜索失败
*/
public List searchIndex(String key, int number) {
List list = new ArrayList<>();
try {
indexReader = DirectoryReader.open(directory);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
QueryParser queryParser = new QueryParser("fileName", analyzer);
Query query = queryParser.parse(key);
TopDocs topDocs = indexSearcher.search(query, number);
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
Document document = indexSearcher.doc(scoreDoc.doc);
FileMessage fileMessage = new FileMessage(document.get("filePath"), scoreDoc.score);
list.add(fileMessage);
}
} catch (Exception e) {
e.printStackTrace();
}
return list;
}
/**
* 判断是否有索引文件
*
* @return 是否有索引文件
*/
public boolean isIndexExists() {
return fileIndex.exists() && fileIndex.listFiles().length > 0;
}
/**
* 删除所有的索引文件
*/
public void deleteAllFilesOfDir() {
for (File file : fileIndex.listFiles())
file.delete();
}
/**
* 关闭所有流
*
* @throws Exception 关闭流失败
*/
public void closeAll() throws Exception {
if (indexWriter != null)
indexWriter.close();
if (indexReader != null)
indexReader.close();
if (directory != null)
directory.close();
}
/**
* 保存文件信息
*
* @param listFlag 判断链表类型
*/
private void writeFile(boolean listFlag) {
try {
if (listFlag) {
FileUtils.writeLines(new File(indexPath + File.separator + "dir.txt"), list);
list.clear();
} else
FileUtils.writeLines(new File(indexPath + File.separator + "dir.txt"), li);
} catch (IOException e) {
e.printStackTrace();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy