All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cn.tyoui.index.FileAnalyzer Maven / Gradle / Ivy

package cn.tyoui.index;

import cn.tyoui.pojo.FileMessage;
import com.chenlb.mmseg4j.analysis.MMSegAnalyzer;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.io.File;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * 文件分析类
 *
 * @author Tyoui
 */
public class FileAnalyzer {

    private Analyzer analyzer = new MMSegAnalyzer();
    private String indexPath = "D:\\index";
    private IndexWriter indexWriter = null;
    private IndexReader indexReader = null;
    private Directory directory = null;
    private List list = new ArrayList<>();
    private Map map = new HashMap<>();
    private String createIndexDir = null;
    private List li = null;
    private File fileIndex = new File(indexPath);

    /**
     * 分析文件的路径
     *
     * @param createIndexDir 分析文件路径
     */
    public void setCreateIndexDir(String createIndexDir) {
        this.createIndexDir = createIndexDir;
    }

    //初始化读取索引文件
    public FileAnalyzer() {
        try {
            Path path = FileSystems.getDefault().getPath(indexPath);
            directory = FSDirectory.open(path);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 关闭文件索引流
     */
    private void close() {
        try {
            indexWriter.commit();
            indexWriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 分析文件夹
     *
     * @param dir 文件夹
     * @throws Exception 文件夹读取异常
     */
    private void analyzerDir(File dir) throws Exception {
        if (dir.isDirectory()) {
            File file[] = dir.listFiles();
            for (File f : file) {
                if (f.isDirectory())
                    analyzerDir(f);
                else
                    createIndex(f);
            }
        } else
            createIndex(dir);
    }

    //存储文档信息
    private void document(File file, Document document) {
        String context = FileContent.init(file);
        document.add(new Field("filePath", file.getAbsolutePath(), StringField.TYPE_STORED));
        document.add(new Field("fileName", file.getName(), TextField.TYPE_NOT_STORED));
        document.add(new Field("fileContent", context, TextField.TYPE_NOT_STORED));
    }

    /**
     * 创建索引
     *
     * @param file 文件对象
     */
    private void createIndex(File file) {
        try {
            FileMessage message = new FileMessage(file.getAbsolutePath(), file.length());
            list.add(message);
            Document document = new Document();
            document(file, document);
            indexWriter.addDocument(document);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 更新索引
     *
     * @param file 文件对象
     */
    private void updateIndex(File file) {
        try {
            Document document = new Document();
            document(file, document);
            indexWriter.updateDocument(new Term("filePath", file.getAbsolutePath()), document);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 更新索引,根据文件夹遍历来获取文件对象
     *
     * @param dir 更新的文件夹
     * @throws Exception 创建索引异常
     */
    private void updateDir(File dir) throws Exception {
        File file[] = dir.listFiles();
        if (file != null)
            for (File f : file) {
                if (f.isDirectory())
                    updateDir(f);
                else {
                    if (map.get(f.getAbsolutePath()) != null) {
                        if (map.get(f.getAbsolutePath()) != f.length()) {
                            updateIndex(f);
                            addDir(f.getAbsolutePath(), f.length());
                        }
                    } else
                        createIndex(f);
                }
            }
    }

    /**
     * 在更新索引时。更新旧的索引文件
     *
     * @param filePath 更新旧文件的路径
     * @param size     文件大小
     */
    private void addDir(String filePath, long size) {
        List list = new ArrayList<>();
        for (String content : li) {
            if (!content.startsWith(filePath))
                list.add(content);
        }
        list.add(filePath + "\t" + size);
        try {
            this.li=list;
            writeFile(false);
            System.out.println("更新成功!");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 在增加索引文件时,增加新的索引文件
     */
    private void addDir() {
        try {
            if (list.size() > 0) {
                for (FileMessage fileMessage : list)
                    li.add(fileMessage.toString());
                writeFile(false);
                System.out.println("追加成功!");
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 创建索引初始化
     */
    public void createIndex() {
        try {
            File file = new File(indexPath + File.separator + "dir.txt");
            if (file.exists())
                file.delete();
            indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer));
            analyzerDir(new File(createIndexDir));
            close();
            writeFile(true);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 更新索引
     */
    public void update() {
        try {
            li = FileUtils.readLines(new File(indexPath + File.separator + "dir.txt"), "UTF-8");
            for (String text : li) {
                String[] string = text.split("\t");
                map.put(string[0], Long.parseLong(string[1]));
            }
            indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer));
            updateDir(new File(createIndexDir));
            indexWriter.commit();
            indexWriter.close();
            addDir();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 根据关键字来搜索文件的绝对路径
     *
     * @param key    关键字
     * @param number 搜索的数量
     * @return 搜索失败
     */
    public List searchIndex(String key, int number) {
        List list = new ArrayList<>();
        try {
            indexReader = DirectoryReader.open(directory);
            IndexSearcher indexSearcher = new IndexSearcher(indexReader);
            QueryParser queryParser = new QueryParser("fileName", analyzer);
            Query query = queryParser.parse(key);
            TopDocs topDocs = indexSearcher.search(query, number);
            for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                Document document = indexSearcher.doc(scoreDoc.doc);
                FileMessage fileMessage = new FileMessage(document.get("filePath"), scoreDoc.score);
                list.add(fileMessage);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return list;
    }

    /**
     * 判断是否有索引文件
     *
     * @return 是否有索引文件
     */
    public boolean isIndexExists() {
        return fileIndex.exists() && fileIndex.listFiles().length > 0;
    }

    /**
     * 删除所有的索引文件
     */
    public void deleteAllFilesOfDir() {
        for (File file : fileIndex.listFiles())
            file.delete();
    }


    /**
     * 关闭所有流
     *
     * @throws Exception 关闭流失败
     */
    public void closeAll() throws Exception {
        if (indexWriter != null)
            indexWriter.close();
        if (indexReader != null)
            indexReader.close();
        if (directory != null)
            directory.close();
    }

    /**
     * 保存文件信息
     *
     * @param listFlag 判断链表类型
     */
    private void writeFile(boolean listFlag) {
        try {
            if (listFlag) {
                FileUtils.writeLines(new File(indexPath + File.separator + "dir.txt"), list);
                list.clear();
            } else
                FileUtils.writeLines(new File(indexPath + File.separator + "dir.txt"), li);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy