![JAR search and dependency download from the Maven repository](/logo.png)
com.igormaznitsa.charsniffer.CharSnifferMojo Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of char-sniffer Show documentation
Show all versions of char-sniffer Show documentation
Maven plugin to check char codes of text files
The newest version!
/*
* Copyright 2017 Igor Maznitsa.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.igormaznitsa.charsniffer;
import org.apache.maven.plugin.AbstractMojo;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.plugins.annotations.LifecyclePhase;
import org.apache.maven.plugins.annotations.Mojo;
import org.apache.maven.plugins.annotations.Parameter;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.util.HashSet;
import java.util.Set;
import javax.annotation.Nonnull;
import org.apache.commons.io.FileUtils;
@Mojo(name = "sniff", defaultPhase = LifecyclePhase.PACKAGE, threadSafe = true)
public class CharSnifferMojo extends AbstractMojo {
/**
* Text files which chars will be sniffed.
*/
@Parameter(property = "files", required = true)
private File[] files;
/**
* Minimal char code allowed.
*/
@Parameter(property = "minCharCode", required = false, defaultValue = "-1")
private int minCharCode;
/**
* Maximal char code allowed.
*/
@Parameter(property = "maxCharCode", required = false, defaultValue = "-1")
private int maxCharCode;
/**
* Char set to decode file chars.
*/
@Parameter(property = "charSet", required = false, defaultValue = "UTF-8")
private String charSet;
/**
* String of chars which only allowed to be presented in file.
*/
@Parameter(property = "abc", required = false)
private String abc;
/**
* String of prohibited chars to be presented in text.
*/
@Parameter(property = "noAbc", required = false)
private String noAbc;
/**
* Fail if a sniffed file has zero length.
*/
@Parameter(property = "failForEmptyFile", defaultValue = "false")
private boolean failForEmptyFile;
/**
* Validate UTF-8 char bytes. Allows to detect wrong UTF-8 chains.
*/
@Parameter(property = "validateUtf8", defaultValue = "false")
private boolean validateUtf8;
/**
* Ignore ISO special chars in ABC checking.
*/
@Parameter(property = "ignoreAbcForISOControl", defaultValue = "true")
private boolean ignoreAbcForISOControl;
/**
* Required End-Of-Line codes (CR,LF,CRLF).
*/
@Parameter(property = "eol", required = false, defaultValue = "UNDEFINED")
private EndOfLine eol;
/**
* Allow missing files.
*/
@Parameter(property = "missingFilesAllowed", defaultValue = "false")
private boolean missingFilesAllowed;
private enum FileStatus {
OK, BAD, MISSED
}
private void printStatus(@Nonnull final File file, @Nonnull final FileStatus status) {
final String fileName = file.getName();
final int len = 64 - fileName.length();
final StringBuilder buffer = new StringBuilder(128);
buffer.append(fileName);
for (int i = 0; i < len; i++) {
buffer.append('.');
}
buffer.append(status.name());
switch (status) {
case BAD:
getLog().error(buffer.toString());
break;
case MISSED:
getLog().warn(buffer.toString());
break;
default:
getLog().info(buffer.toString());
break;
}
}
static boolean checkForCodes(@Nonnull final String text, @Nonnull final CheckConfig config, @Nonnull final StringBuilder errorBuffer) {
final Set errorChars = new HashSet();
if (config.minCode >= 0 || config.maxCode >= 0) {
for (int i = 0; i < text.length(); i++) {
final char c = text.charAt(i);
if (config.minCode >= 0) {
if (c < config.minCode) {
if (!errorChars.contains(c)) {
errorChars.add(c);
if (errorBuffer.length() > 0) {
errorBuffer.append(',');
}
errorBuffer.append('\'').append(c).append('\'');
}
}
}
if (config.maxCode >= 0) {
if (c > config.maxCode) {
if (!errorChars.contains(c)) {
errorChars.add(c);
if (errorBuffer.length() > 0) {
errorBuffer.append(',');
}
errorBuffer.append('\'').append(c).append('\'');
}
}
}
}
}
return errorChars.isEmpty();
}
static boolean checkForAbc(@Nonnull final String text, @Nonnull final CheckConfig config, @Nonnull final StringBuilder errorBuffer) {
final String allowed = config.abc;
final String disallowed = config.noAbc;
final Set errorChars = new HashSet();
if (allowed != null || disallowed != null) {
for (int i = 0; i < text.length(); i++) {
final char c = text.charAt(i);
if (config.ignoreAbcForISOControl && Character.isISOControl(c)) {
continue;
}
if (allowed != null) {
if (allowed.indexOf(c) < 0) {
if (!errorChars.contains(c)) {
errorChars.add(c);
if (errorBuffer.length() > 0) {
errorBuffer.append(',');
}
errorBuffer.append('\'').append(c).append('\'');
}
}
}
if (disallowed != null) {
if (disallowed.indexOf(c) >= 0) {
if (!errorChars.contains(c)) {
errorChars.add(c);
if (errorBuffer.length() > 0) {
errorBuffer.append(',');
}
errorBuffer.append('\'').append(c).append('\'');
}
}
}
}
}
return errorChars.isEmpty();
}
static boolean isValidUTF8(@Nonnull final byte[] input) {
final CharsetDecoder cs = Charset.forName("UTF-8").newDecoder();
try {
cs.decode(ByteBuffer.wrap(input));
return true;
}
catch (CharacterCodingException e) {
return false;
}
}
static boolean checkForEOL(@Nonnull final String text, @Nonnull final CheckConfig config) {
boolean result = true;
if (config.eol != EndOfLine.UNDEFINED) {
final EndOfLine detected = findFirstEOL(text);
result = (detected == EndOfLine.UNDEFINED) || (detected == config.eol);
}
return result;
}
@Nonnull
static EndOfLine findFirstEOL(@Nonnull final String text) {
char prev = ' ';
EndOfLine result = EndOfLine.UNDEFINED;
for (int i = 0; i < text.length(); i++) {
final char curChar = text.charAt(i);
if (curChar == '\n') {
if (prev == '\r') {
result = EndOfLine.CRLF;
} else {
result = EndOfLine.LF;
}
break;
} else if (prev == '\r') {
result = EndOfLine.CR;
break;
}
prev = curChar;
}
if (result == EndOfLine.UNDEFINED) {
switch (prev) {
case '\n':
result = EndOfLine.LF;
break;
case '\r':
result = EndOfLine.CR;
break;
default: {
result = EndOfLine.UNDEFINED;
}break;
}
}
return result;
}
private boolean checkFile(@Nonnull final File file, @Nonnull final CheckConfig config) {
try {
if (getLog().isDebugEnabled()){
getLog().debug("Sniffing file : "+file);
}
final String textBody = FileUtils.readFileToString(file, config.charSet);
final StringBuilder errorMessageBuffer = new StringBuilder();
boolean result = checkForCodes(textBody, config, errorMessageBuffer);
if (!result && getLog().isDebugEnabled()) {
getLog().debug("Detected wrong chars : " + errorMessageBuffer.toString());
}
errorMessageBuffer.setLength(0);
if (result) {
result &= checkForAbc(textBody, config, errorMessageBuffer);
}
if (!result && getLog().isDebugEnabled()) {
getLog().debug("Detected wrong ABC chars : " + errorMessageBuffer.toString());
}
errorMessageBuffer.setLength(0);
if (result) {
result &= checkForEOL(textBody, config);
if (!result && getLog().isDebugEnabled()) {
getLog().debug("Detected wrong EOL");
}
}
if (result && config.validateUtf8) {
result &= isValidUTF8(FileUtils.readFileToByteArray(file));
if (!result && getLog().isDebugEnabled()) {
getLog().debug("File '" + file + "' contains wrong UTF-8 byte sequence");
}
}
return result;
}
catch (IOException ex) {
getLog().error("Can't read text file : " + file, ex);
return false;
}
}
@Override
public void execute() throws MojoExecutionException {
final CheckConfig config = CheckConfig.build().
setAbc(this.abc).
setNoAbc(this.noAbc).
setCharSet(this.charSet).
setEol(this.eol).
setMinCode(this.minCharCode).
setMaxCode(this.maxCharCode).
setValidateUtf8(this.validateUtf8).
setIgnoreAbcForISOControl(this.ignoreAbcForISOControl).
build();
int errors = 0;
for (final File file : this.files) {
if (file.isFile()) {
if (file.length() == 0L && this.failForEmptyFile) {
printStatus(file, FileStatus.BAD);
if (getLog().isDebugEnabled()) {
getLog().debug("File '" + file + "' has zero length");
}
errors++;
} else if (checkFile(file, config)) {
printStatus(file, FileStatus.OK);
} else {
printStatus(file, FileStatus.BAD);
errors++;
}
} else {
printStatus(file, FileStatus.MISSED);
if (getLog().isDebugEnabled()) {
getLog().debug("File '" + file + "' not found");
}
if (!this.missingFilesAllowed) {
throw new MojoExecutionException("Can't find file : " + file);
}
}
}
if (errors > 0) {
throw new MojoExecutionException("Detected bad files, check log");
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy