All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.dtstack.jlogstash.inputs.File Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.dtstack.jlogstash.inputs;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReentrantLock;

import org.codehaus.plexus.util.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yaml.snakeyaml.Yaml;

import com.dtstack.jlogstash.annotation.Required;
import com.dtstack.jlogstash.decoder.IDecode;
import com.dtstack.jlogstash.decoder.JsonDecoder;
import com.dtstack.jlogstash.decoder.MultilineDecoder;
import com.dtstack.jlogstash.decoder.PlainDecoder;
import com.google.common.collect.Lists;

/**
 * Reason: jlogstash 文件类型的读入插件
 * Date: 2016年11月19日
 * Company: www.dtstack.com
 * @author xuchao
 *
 */
public class File extends BaseInput{
	
	private static final long serialVersionUID = -1822028651072758886L;

	private static final Logger logger = LoggerFactory.getLogger(File.class);
	
	private static String encoding = "UTF-8";
	
	private static Map pathcodecMap = null;

	/**指定文件的行的聚合规则key:文件名称, val:eg:multiline,json,plain*/
	private Map codecMap = new ConcurrentHashMap();
	
	@Required(required=true)
	private  static  List path;
	
	private  static List exclude;
	
	private  static int maxOpenFiles = 0;//0表示没有上限
		
	private  static String startPosition = "end";//one of ["beginning", "end"]
	
	/**key:文件夹路径, 匹配信息列表,  10s检测一次*/
	private Map> moniDic = new ConcurrentHashMap>();
	
	/**文件当前读取位置点*/
	private ConcurrentHashMap fileCurrPos = new ConcurrentHashMap();
	
	private static String sinceDbPath = "./sincedb.yaml";
		
	private static int sinceDbWriteInterval = 15; //sincedb.yaml 更新频率(时间s)
	
	/**当读取设置行之后更新当前文件读取位置*/
	private static int readLineNum4UpdateMap = 1000;
	
	private static byte delimiter = '\n';
	
	private List realPaths = Lists.newArrayList();
	
	/**默认值:cpu线程数*/
	public static int readFileThreadNum = -1;
		
	private Map> threadReadFileMap = new ConcurrentHashMap<>();
		
	private ConcurrentHashMap monitorMap = new ConcurrentHashMap();
	
	private boolean runFlag = false;
	
	private ExecutorService executor;
	
	private ScheduledExecutorService scheduleExecutor;
	
	private ReentrantLock writeFileLock = new ReentrantLock();

	public File(Map config) {
		super(config);
	}
	
	public void init(){
		
		if(pathcodecMap == null || pathcodecMap.size() == 0){
			return;
		}
		
		if(path == null){
			path = Lists.newArrayList();
		}
		
		for(Entry entry : pathcodecMap.entrySet()){
			String filePatternName = entry.getKey();
			Object codecInfo = entry.getValue();
			path.add(filePatternName);
			
			if(codecInfo instanceof String){
				if("json".equals(codecInfo)){
		            codecMap.put(filePatternName, new  JsonDecoder());
		        }else if("plain".equals(codecInfo)){
		        	codecMap.put(filePatternName, new PlainDecoder());
		        }else{
		        	logger.error("invalid codec type:{}. please check config!", codecInfo);
		        	System.exit(-1);
		        }
			}else if( codecInfo instanceof Map){
				IDecode multilineDecoder = createMultiLineDecoder((Map)codecInfo);
				codecMap.put(filePatternName, multilineDecoder);
			}else{
				logger.error("invalid codec type:{}, please check param of 'pathcodecMap'.", codecInfo.getClass());
				System.exit(-1);
			}
		}
	}

	@Override
	public void prepare() {
		
		init();
		if(path == null || path.size() == 0){
			logger.error("don't set any input file. [path, pathcodecMap] must not be empty at the same time.");
			System.exit(-1);
		}
		
		if(realPaths.size() == 0){
			List ps = generateRealPath();
			realPaths.addAll(ps);
			
			if(maxOpenFiles > 0 && realPaths.size() > maxOpenFiles){
				logger.error("file numbers is exceed, maxOpenFiles is {}", maxOpenFiles);
				System.exit(-1);
			}
		}
		
		if(readFileThreadNum <= 0){
			readFileThreadNum = Runtime.getRuntime().availableProcessors();
		}
		
		checkoutSinceDb();
		filterFinishFile();
		ReadLineUtil.setDelimiter(delimiter);
		
		for(String fileStr : realPaths){
			addFile(fileStr);
		}
		
		runFlag = true;
	}
	
	private List generateRealPath(){
		
		List ps = Lists.newArrayList();
		for(String p : path){
			
			if(p.contains("*") || p.contains("?")){//模糊匹配
				ps.addAll(getPatternFile(p));
				continue;
			}
			
			java.io.File file = new java.io.File(p);
			if(!file.exists()){
				logger.error("file:{} is not exists.", p);
				System.exit(-1);
			}
			
			if(file.isDirectory()){
				for(java.io.File tmpFile : file.listFiles()){
					ps.add(tmpFile.getPath());
				}
				
				addMonitorDic(p, null);
			}else{
				ps.add(file.getPath());
			}
		}
		
		Iterator it = ps.iterator();
		for( ;it.hasNext();){
			String name = it.next();
			if(isExcludeFile(name)){
				it.remove();
			}
		}
		
		return ps;
	}
	
	private List getPatternFile(String patternName){
		
		List fileList = Lists.newArrayList(); 
		String dir = patternName.substring(0, patternName.lastIndexOf("/"));
		String filePattern = patternName.substring(patternName.lastIndexOf("/") + 1);
		java.io.File dirFile = new java.io.File(dir);
		if(!dirFile.isDirectory()){
			logger.info("don't exists dir in pattern:{}", patternName);
			return fileList;
		}
		
		addMonitorDic(dir, filePattern);
		
		for(java.io.File tmpFile : dirFile.listFiles()){
			
			if(tmpFile.isDirectory()){
				//FIXME 暂时不对指定文件夹下的子文件做模糊匹配,如果有需求在该处修改
				continue;
			}
			
			if(filePatternMatcher(filePattern, tmpFile.getName())){
				fileList.add(tmpFile.getPath());
			}
		}
		
		return fileList;
	}
	
	public void addMonitorDic(String dir, String patternName){
		List patternList = moniDic.get(dir);
		if(patternList == null){
			patternList = Lists.newArrayList();
			moniDic.put(dir, patternList);
		}
		
		if(patternName != null){
			patternList.add(patternName);
		}
		
	}
	
	private boolean filePatternMatcher(String pattern, String str) {
		
		pattern = pattern.replace("\\", "").replace("/", "");
		str = str.replace("\\", "").replace("/", "");
		
        int patternLength = pattern.length();
        int strLength = str.length();
        int strIndex = 0;
        char ch;
        for (int patternIndex = 0; patternIndex < patternLength; patternIndex++) {
            ch = pattern.charAt(patternIndex);
            if (ch == '*') {
                //通配符星号*表示可以匹配任意多个字符
                while (strIndex < strLength) {
                    if (filePatternMatcher(pattern.substring(patternIndex + 1),
                            str.substring(strIndex))) {
                        return true;
                    }
                    strIndex++;
                }
            } else if (ch == '?') {
                //通配符问号?表示匹配任意一个字符
                strIndex++;
                if (strIndex > strLength) {
                    //表示str中已经没有字符匹配?了。
                    return false;
                }
            } else {
                if ((strIndex >= strLength) || (ch != str.charAt(strIndex))) {
                    return false;
                }
                strIndex++;
            }
        }
        return (strIndex == strLength);
    }
		
	/**
	 * 过滤排除文件
	 * @param fileName
	 * @return
	 */
	public boolean isExcludeFile(String fileName){
		
		if(exclude == null){
			return false;
		}
		
		for(String patternName : exclude){
			if(filePatternMatcher(patternName, fileName)){
				return true;
			}
		}
		
		return false;
	}
	
	private void checkoutSinceDb(){
		Yaml yaml = new Yaml();
		java.io.File sinceFile = new java.io.File(sinceDbPath);
		if(!sinceFile.exists()){
			return;
		}
		
		InputStream io = null;
		try {
			io = new FileInputStream(sinceFile);
			Map fileMap = yaml.loadAs(io, Map.class);
			if(fileMap == null){
				return;
			}

			for(Entry tmp : fileMap.entrySet()){
				if(tmp.getValue() == null){
					continue;
				}

				fileCurrPos.put(tmp.getKey(), Long.valueOf(tmp.getValue() + ""));
			}
		} catch (FileNotFoundException e) {
			logger.error("open file:{} err:{}!", sinceDbPath, e.getCause());
			System.exit(1);
		}finally{
			if(io != null){
				try {
					io.close();
				} catch (IOException e) {
					logger.error("", e);
				}
			}
		}
	}
	
	/**
	 * 过滤掉已经读取完成的文件
	 */
	private void filterFinishFile(){
		for(Entry entry : fileCurrPos.entrySet()){
			if(entry.getValue().longValue() == -1l){//表示该文件已经读取完成
				realPaths.remove(entry.getKey());
			}
		}
	}
	
	/**
	 * 使用替换的方式防止出现写不全的情况
	 */
	private void dumpSinceDb(){
		
		FileWriter fw = null;
		boolean isSuccess = false;
		String tmpSinceDbName = sinceDbPath + ".tmp";
		
		try{
			writeFileLock.lock();
			Yaml tmpYaml = new Yaml();
			fw = new FileWriter(tmpSinceDbName);
			tmpYaml.dump(fileCurrPos, fw);
			isSuccess = true;
		}catch(Exception e){
			logger.error("", e);
			logger.info("curr file pos:{}", fileCurrPos);
		}finally{
			try {
				fw.close();
			} catch (IOException e) {
				logger.error("", e);
			}
			
			writeFileLock.unlock();
		}
		
		if(!isSuccess){
			return;
		}
		
		java.io.File srcFile = new java.io.File(tmpSinceDbName);
		java.io.File dstFile = new java.io.File(sinceDbPath);
		try {
			FileUtils.rename(srcFile, dstFile);
		} catch (IOException e) {
			logger.error("", e);
		}
		
	}
	
	public void addFile(String fileName){
		int hashCode = Math.abs(fileName.hashCode());
		int index = hashCode % readFileThreadNum;
		BlockingQueue readQueue = threadReadFileMap.get(index);
		
		if(readQueue == null){
			readQueue = new LinkedBlockingQueue<>();
			threadReadFileMap.put(index, readQueue);
		}
		
		readQueue.offer(fileName);
	}

	@Override
	public void emit() {
		executor = Executors.newFixedThreadPool(readFileThreadNum + 2);
		scheduleExecutor = Executors.newScheduledThreadPool(1);
		
		executor.submit(new MonitorChangeRunnable());
		executor.submit(new MonitorNewFileRunnable());
		for(int i=0; i entry : codecMap.entrySet()){
			if(filePatternMatcher(entry.getKey(), fileName)){
				return entry.getValue();
			}
		}
		
		logger.info("can't find decoder from config. return default decoder.");
		return this.getDecoder();
	}
		
		
	class FileRunnable implements Runnable{
								
		private final int index;
		
		public FileRunnable(int index) {
			this.index = index;
		}

		public void run() {
			
			while(runFlag){
				
				BlockingQueue needReadList = threadReadFileMap.get(index);
				if(needReadList == null){
					logger.warn("invalid FileRunnable thread, threadReadFileMap don't init needReadList of this index:{}.", index);
					return;
				}
				
				String readFileName = null;
				try {
					readFileName = needReadList.poll(10, TimeUnit.SECONDS);
					if(readFileName == null){
						continue;
					}
				} catch (InterruptedException e) {
					logger.error("", e);
					continue;
				}
					
				long lastModTime = 0l;	
				IReader reader = null;
				try {
					java.io.File readFile = new java.io.File(readFileName);
					if(!readFile.exists()){
						logger.error("file:{} is not exists!", readFileName);
						continue;
					}
					
					lastModTime = readFile.lastModified();
					reader = ReadFactory.createReader(readFile, encoding, fileCurrPos, startPosition);
					if(reader == null){
						continue;
					}
					
					String line = null;
					int readLineNum = 0;
					IDecode fileDecoder = getDecoder(readFileName);
					boolean isMultiLine = false;
					if(fileDecoder instanceof MultilineDecoder){
						isMultiLine = true;
					}
					
					while( (line = reader.readLine()) != null){
						readLineNum++;
						
						if(!"".equals(line.trim())){
							Map event = null;
							
							if(isMultiLine){
								event = fileDecoder.decode(line, readFileName);
							}else{
								event = fileDecoder.decode(line);
							}
							
							if (event != null && event.size() > 0){
								event.put("path", readFileName);
								event.put("offset", reader.getCurrBufPos());
								process(event);
							}
						}
						
						if(readLineNum%readLineNum4UpdateMap == 0){
							fileCurrPos.put(reader.getFileName(), reader.getCurrBufPos());
						}
					}
					
					fileCurrPos.put(readFileName, reader.getCurrBufPos());
				} catch (Exception e) {
					logger.error("", e);
				}finally{
					if(reader != null && reader.needMonitorChg()){
						monitorMap.put(readFileName, lastModTime);//确保文件回到监控列表
					}
				}
			}
		}
	}
	
	/**
	 * 监控文件变化,将有变化的文件插入到needReadList里
	 * 2s查看一次
	 * FIXME 需要对已经删除的文件做清理
	 * @author xuchao
	 *
	 */
	class MonitorChangeRunnable implements Runnable{

		public void run() {
			
			while(runFlag){
				try {
					Thread.sleep(500);
				} catch (InterruptedException e) {
					logger.error("", e);
				}
				
				Iterator> iterator = monitorMap.entrySet().iterator();
				for( ;iterator.hasNext(); ){
					Entry entry = iterator.next();
					java.io.File monitorFile = new java.io.File(entry.getKey());
					if(!monitorFile.exists()){
						logger.info("file:{} not exists,may be delete!", entry.getKey());
						continue;
					}
										
//					if(monitorFile.lastModified() > entry.getValue()){
//						iterator.remove();
//						addFile(entry.getKey());
//					}
					if(fileCurrPos.get(entry.getKey())> dirTmp : moniDic.entrySet()){
					
					String dicName = dirTmp.getKey();
					List patternList = dirTmp.getValue();
					
					java.io.File file = new java.io.File(dicName);					
					
					if(!file.exists() || !file.isDirectory()){
						continue;
					}
					
					for(java.io.File tmpFile : file.listFiles()){
						
						if(tmpFile.isDirectory()){//FIXME 不监控新出现的子文件夹
							continue;
						}
						
						if(isExcludeFile(tmpFile.getPath())){
							continue;
						}
						
						if(fileCurrPos.get(tmpFile.getPath()) != null && fileCurrPos.get(tmpFile.getPath()).longValue() == -1l){
							continue;//已经完结的数据
						}
						
						if(patternList.size() == 0 && !realPaths.contains(tmpFile.getPath())){
							realPaths.add(tmpFile.getPath());
							addFile(tmpFile.getPath());
							continue;
						}
						
						for(String patternName : patternList){
							if(filePatternMatcher(patternName, tmpFile.getName()) && !realPaths.contains(tmpFile.getPath())){
								realPaths.add(tmpFile.getPath());
								addFile(tmpFile.getPath());
								break;
							}
						}
					}
				}
			}
		}
		
	}
	
	class DumpSinceDbRunnable implements Runnable{

		public void run() {
			dumpSinceDb();
		}
		
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy