Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.dtstack.jlogstash.inputs.File Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.dtstack.jlogstash.inputs;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReentrantLock;
import org.codehaus.plexus.util.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yaml.snakeyaml.Yaml;
import com.dtstack.jlogstash.annotation.Required;
import com.dtstack.jlogstash.decoder.IDecode;
import com.dtstack.jlogstash.decoder.JsonDecoder;
import com.dtstack.jlogstash.decoder.MultilineDecoder;
import com.dtstack.jlogstash.decoder.PlainDecoder;
import com.google.common.collect.Lists;
/**
* Reason: jlogstash 文件类型的读入插件
* Date: 2016年11月19日
* Company: www.dtstack.com
* @author xuchao
*
*/
public class File extends BaseInput{
private static final long serialVersionUID = -1822028651072758886L;
private static final Logger logger = LoggerFactory.getLogger(File.class);
private static String encoding = "UTF-8";
private static Map pathcodecMap = null;
/**指定文件的行的聚合规则key:文件名称, val:eg:multiline,json,plain*/
private Map codecMap = new ConcurrentHashMap();
@Required(required=true)
private static List path;
private static List exclude;
private static int maxOpenFiles = 0;//0表示没有上限
private static String startPosition = "end";//one of ["beginning", "end"]
/**key:文件夹路径, 匹配信息列表, 10s检测一次*/
private Map> moniDic = new ConcurrentHashMap>();
/**文件当前读取位置点*/
private ConcurrentHashMap fileCurrPos = new ConcurrentHashMap();
private static String sinceDbPath = "./sincedb.yaml";
private static int sinceDbWriteInterval = 15; //sincedb.yaml 更新频率(时间s)
/**当读取设置行之后更新当前文件读取位置*/
private static int readLineNum4UpdateMap = 1000;
private static byte delimiter = '\n';
private List realPaths = Lists.newArrayList();
/**默认值:cpu线程数*/
public static int readFileThreadNum = -1;
private Map> threadReadFileMap = new ConcurrentHashMap<>();
private ConcurrentHashMap monitorMap = new ConcurrentHashMap();
private boolean runFlag = false;
private ExecutorService executor;
private ScheduledExecutorService scheduleExecutor;
private ReentrantLock writeFileLock = new ReentrantLock();
public File(Map config) {
super(config);
}
public void init(){
if(pathcodecMap == null || pathcodecMap.size() == 0){
return;
}
if(path == null){
path = Lists.newArrayList();
}
for(Entry entry : pathcodecMap.entrySet()){
String filePatternName = entry.getKey();
Object codecInfo = entry.getValue();
path.add(filePatternName);
if(codecInfo instanceof String){
if("json".equals(codecInfo)){
codecMap.put(filePatternName, new JsonDecoder());
}else if("plain".equals(codecInfo)){
codecMap.put(filePatternName, new PlainDecoder());
}else{
logger.error("invalid codec type:{}. please check config!", codecInfo);
System.exit(-1);
}
}else if( codecInfo instanceof Map){
IDecode multilineDecoder = createMultiLineDecoder((Map)codecInfo);
codecMap.put(filePatternName, multilineDecoder);
}else{
logger.error("invalid codec type:{}, please check param of 'pathcodecMap'.", codecInfo.getClass());
System.exit(-1);
}
}
}
@Override
public void prepare() {
init();
if(path == null || path.size() == 0){
logger.error("don't set any input file. [path, pathcodecMap] must not be empty at the same time.");
System.exit(-1);
}
if(realPaths.size() == 0){
List ps = generateRealPath();
realPaths.addAll(ps);
if(maxOpenFiles > 0 && realPaths.size() > maxOpenFiles){
logger.error("file numbers is exceed, maxOpenFiles is {}", maxOpenFiles);
System.exit(-1);
}
}
if(readFileThreadNum <= 0){
readFileThreadNum = Runtime.getRuntime().availableProcessors();
}
checkoutSinceDb();
filterFinishFile();
ReadLineUtil.setDelimiter(delimiter);
for(String fileStr : realPaths){
addFile(fileStr);
}
runFlag = true;
}
private List generateRealPath(){
List ps = Lists.newArrayList();
for(String p : path){
if(p.contains("*") || p.contains("?")){//模糊匹配
ps.addAll(getPatternFile(p));
continue;
}
java.io.File file = new java.io.File(p);
if(!file.exists()){
logger.error("file:{} is not exists.", p);
System.exit(-1);
}
if(file.isDirectory()){
for(java.io.File tmpFile : file.listFiles()){
ps.add(tmpFile.getPath());
}
addMonitorDic(p, null);
}else{
ps.add(file.getPath());
}
}
Iterator it = ps.iterator();
for( ;it.hasNext();){
String name = it.next();
if(isExcludeFile(name)){
it.remove();
}
}
return ps;
}
private List getPatternFile(String patternName){
List fileList = Lists.newArrayList();
String dir = patternName.substring(0, patternName.lastIndexOf("/"));
String filePattern = patternName.substring(patternName.lastIndexOf("/") + 1);
java.io.File dirFile = new java.io.File(dir);
if(!dirFile.isDirectory()){
logger.info("don't exists dir in pattern:{}", patternName);
return fileList;
}
addMonitorDic(dir, filePattern);
for(java.io.File tmpFile : dirFile.listFiles()){
if(tmpFile.isDirectory()){
//FIXME 暂时不对指定文件夹下的子文件做模糊匹配,如果有需求在该处修改
continue;
}
if(filePatternMatcher(filePattern, tmpFile.getName())){
fileList.add(tmpFile.getPath());
}
}
return fileList;
}
public void addMonitorDic(String dir, String patternName){
List patternList = moniDic.get(dir);
if(patternList == null){
patternList = Lists.newArrayList();
moniDic.put(dir, patternList);
}
if(patternName != null){
patternList.add(patternName);
}
}
private boolean filePatternMatcher(String pattern, String str) {
pattern = pattern.replace("\\", "").replace("/", "");
str = str.replace("\\", "").replace("/", "");
int patternLength = pattern.length();
int strLength = str.length();
int strIndex = 0;
char ch;
for (int patternIndex = 0; patternIndex < patternLength; patternIndex++) {
ch = pattern.charAt(patternIndex);
if (ch == '*') {
//通配符星号*表示可以匹配任意多个字符
while (strIndex < strLength) {
if (filePatternMatcher(pattern.substring(patternIndex + 1),
str.substring(strIndex))) {
return true;
}
strIndex++;
}
} else if (ch == '?') {
//通配符问号?表示匹配任意一个字符
strIndex++;
if (strIndex > strLength) {
//表示str中已经没有字符匹配?了。
return false;
}
} else {
if ((strIndex >= strLength) || (ch != str.charAt(strIndex))) {
return false;
}
strIndex++;
}
}
return (strIndex == strLength);
}
/**
* 过滤排除文件
* @param fileName
* @return
*/
public boolean isExcludeFile(String fileName){
if(exclude == null){
return false;
}
for(String patternName : exclude){
if(filePatternMatcher(patternName, fileName)){
return true;
}
}
return false;
}
private void checkoutSinceDb(){
Yaml yaml = new Yaml();
java.io.File sinceFile = new java.io.File(sinceDbPath);
if(!sinceFile.exists()){
return;
}
InputStream io = null;
try {
io = new FileInputStream(sinceFile);
Map fileMap = yaml.loadAs(io, Map.class);
if(fileMap == null){
return;
}
for(Entry tmp : fileMap.entrySet()){
if(tmp.getValue() == null){
continue;
}
fileCurrPos.put(tmp.getKey(), Long.valueOf(tmp.getValue() + ""));
}
} catch (FileNotFoundException e) {
logger.error("open file:{} err:{}!", sinceDbPath, e.getCause());
System.exit(1);
}finally{
if(io != null){
try {
io.close();
} catch (IOException e) {
logger.error("", e);
}
}
}
}
/**
* 过滤掉已经读取完成的文件
*/
private void filterFinishFile(){
for(Entry entry : fileCurrPos.entrySet()){
if(entry.getValue().longValue() == -1l){//表示该文件已经读取完成
realPaths.remove(entry.getKey());
}
}
}
/**
* 使用替换的方式防止出现写不全的情况
*/
private void dumpSinceDb(){
FileWriter fw = null;
boolean isSuccess = false;
String tmpSinceDbName = sinceDbPath + ".tmp";
try{
writeFileLock.lock();
Yaml tmpYaml = new Yaml();
fw = new FileWriter(tmpSinceDbName);
tmpYaml.dump(fileCurrPos, fw);
isSuccess = true;
}catch(Exception e){
logger.error("", e);
logger.info("curr file pos:{}", fileCurrPos);
}finally{
try {
fw.close();
} catch (IOException e) {
logger.error("", e);
}
writeFileLock.unlock();
}
if(!isSuccess){
return;
}
java.io.File srcFile = new java.io.File(tmpSinceDbName);
java.io.File dstFile = new java.io.File(sinceDbPath);
try {
FileUtils.rename(srcFile, dstFile);
} catch (IOException e) {
logger.error("", e);
}
}
public void addFile(String fileName){
int hashCode = Math.abs(fileName.hashCode());
int index = hashCode % readFileThreadNum;
BlockingQueue readQueue = threadReadFileMap.get(index);
if(readQueue == null){
readQueue = new LinkedBlockingQueue<>();
threadReadFileMap.put(index, readQueue);
}
readQueue.offer(fileName);
}
@Override
public void emit() {
executor = Executors.newFixedThreadPool(readFileThreadNum + 2);
scheduleExecutor = Executors.newScheduledThreadPool(1);
executor.submit(new MonitorChangeRunnable());
executor.submit(new MonitorNewFileRunnable());
for(int i=0; i entry : codecMap.entrySet()){
if(filePatternMatcher(entry.getKey(), fileName)){
return entry.getValue();
}
}
logger.info("can't find decoder from config. return default decoder.");
return this.getDecoder();
}
class FileRunnable implements Runnable{
private final int index;
public FileRunnable(int index) {
this.index = index;
}
public void run() {
while(runFlag){
BlockingQueue needReadList = threadReadFileMap.get(index);
if(needReadList == null){
logger.warn("invalid FileRunnable thread, threadReadFileMap don't init needReadList of this index:{}.", index);
return;
}
String readFileName = null;
try {
readFileName = needReadList.poll(10, TimeUnit.SECONDS);
if(readFileName == null){
continue;
}
} catch (InterruptedException e) {
logger.error("", e);
continue;
}
long lastModTime = 0l;
IReader reader = null;
try {
java.io.File readFile = new java.io.File(readFileName);
if(!readFile.exists()){
logger.error("file:{} is not exists!", readFileName);
continue;
}
lastModTime = readFile.lastModified();
reader = ReadFactory.createReader(readFile, encoding, fileCurrPos, startPosition);
if(reader == null){
continue;
}
String line = null;
int readLineNum = 0;
IDecode fileDecoder = getDecoder(readFileName);
boolean isMultiLine = false;
if(fileDecoder instanceof MultilineDecoder){
isMultiLine = true;
}
while( (line = reader.readLine()) != null){
readLineNum++;
if(!"".equals(line.trim())){
Map event = null;
if(isMultiLine){
event = fileDecoder.decode(line, readFileName);
}else{
event = fileDecoder.decode(line);
}
if (event != null && event.size() > 0){
event.put("path", readFileName);
event.put("offset", reader.getCurrBufPos());
process(event);
}
}
if(readLineNum%readLineNum4UpdateMap == 0){
fileCurrPos.put(reader.getFileName(), reader.getCurrBufPos());
}
}
fileCurrPos.put(readFileName, reader.getCurrBufPos());
} catch (Exception e) {
logger.error("", e);
}finally{
if(reader != null && reader.needMonitorChg()){
monitorMap.put(readFileName, lastModTime);//确保文件回到监控列表
}
}
}
}
}
/**
* 监控文件变化,将有变化的文件插入到needReadList里
* 2s查看一次
* FIXME 需要对已经删除的文件做清理
* @author xuchao
*
*/
class MonitorChangeRunnable implements Runnable{
public void run() {
while(runFlag){
try {
Thread.sleep(500);
} catch (InterruptedException e) {
logger.error("", e);
}
Iterator> iterator = monitorMap.entrySet().iterator();
for( ;iterator.hasNext(); ){
Entry entry = iterator.next();
java.io.File monitorFile = new java.io.File(entry.getKey());
if(!monitorFile.exists()){
logger.info("file:{} not exists,may be delete!", entry.getKey());
continue;
}
// if(monitorFile.lastModified() > entry.getValue()){
// iterator.remove();
// addFile(entry.getKey());
// }
if(fileCurrPos.get(entry.getKey())> dirTmp : moniDic.entrySet()){
String dicName = dirTmp.getKey();
List patternList = dirTmp.getValue();
java.io.File file = new java.io.File(dicName);
if(!file.exists() || !file.isDirectory()){
continue;
}
for(java.io.File tmpFile : file.listFiles()){
if(tmpFile.isDirectory()){//FIXME 不监控新出现的子文件夹
continue;
}
if(isExcludeFile(tmpFile.getPath())){
continue;
}
if(fileCurrPos.get(tmpFile.getPath()) != null && fileCurrPos.get(tmpFile.getPath()).longValue() == -1l){
continue;//已经完结的数据
}
if(patternList.size() == 0 && !realPaths.contains(tmpFile.getPath())){
realPaths.add(tmpFile.getPath());
addFile(tmpFile.getPath());
continue;
}
for(String patternName : patternList){
if(filePatternMatcher(patternName, tmpFile.getName()) && !realPaths.contains(tmpFile.getPath())){
realPaths.add(tmpFile.getPath());
addFile(tmpFile.getPath());
break;
}
}
}
}
}
}
}
class DumpSinceDbRunnable implements Runnable{
public void run() {
dumpSinceDb();
}
}
}