All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.mediathekview.mlib.tool.FilenameUtils Maven / Gradle / Ivy

Go to download

A central library with tools and utils for the MediathekView Client and the MediathekView Server

The newest version!
package de.mediathekview.mlib.tool;

import java.io.File;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;

import com.jidesoft.utils.SystemInfo;

/**
 * User: crystalpalace1977
 * Date: 28.12.14
 * Time: 16:02
 */
public class FilenameUtils {

    /**
     * Valid characters for Windows in file names:
     * Based on http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx
     */
    public static final String REGEXP_ILLEGAL_CHARACTERS_WINDOWS = "[:\\\\/*?|<>\"]";
    public static final String REGEXP_ILLEGAL_CHARACTERS_WINDOWS_PATH = "[:/*?|<>\"]";

    /**
     * Valid characters for all UNIX-like OS.
     */
    public static final String REGEXP_ILLEGAL_CHARACTERS_OTHERS = "[:\\\\/*|<>]";
    public static final String REGEXP_ILLEGAL_CHARACTERS_OTHERS_PATH = "[:\\\\*|<>]";

    public static String checkDateiname(final String name, final boolean isPath) {
        // dient nur zur Anzeige für Probleme (Textfeld wird rot)
        String ret = name;
        boolean isWindowsPath = false;
        final String splitChar;

        if (SystemInfo.isWindows()) {
            splitChar = "\\\\";
        } else {
            splitChar = "/";
        }

        if (SystemInfo.isWindows()) {
            ret = removeWindowsTrailingDots(ret);
            if (isPath) {
                if (ret.length() > 1 && ret.charAt(1) == ':') {
                    // damit auch "d:" und nicht nur "d:\" als Pfad geht
                    isWindowsPath = true;
                    ret = ret.replaceFirst(":", ""); // muss zum Schluss wieder rein, kann aber so nicht ersetzt werden
                }
            }
        } else {
            ret = removeStartingDots(ret);
        }

        if (isPath && ret.contains(File.separator)) {
            String str = "";
            final String[] sa = ret.split(splitChar); // Regex
            for (String s : sa) {
                if (!s.isEmpty()) {
                    str += File.separator + convertToNativeEncoding(s, false); //sind ja nur noch die Ordnernamen
                }
            }
            if (!ret.startsWith(File.separator)) {
                str = str.replaceFirst(splitChar, ""); // wieder Regex
            }
            if (ret.endsWith(File.separator)) {
                str = str + File.separator;
            }
            ret = str;
        } else {
            ret = convertToNativeEncoding(ret, false);
        }

        if (isWindowsPath) {
            // c: wieder herstellen
            if (ret.length() == 1) {
                ret = ret + ":";
            } else if (ret.length() > 1) {
                ret = ret.charAt(0) + ":" + ret.substring(1);
            }
        }

        return ret;
    }

    /**
     * Remove stray trailing dots from string when we are on Windows OS.
     *
     * @param fileName A filename string that might include trailing dots.
     * @return Cleanup string with no dots anymore.
     */
    private static String removeWindowsTrailingDots(String fileName) {
        // machte unter Win noch Probleme, zB. bei dem Titel: "betrifft: ..."
        // "." und " " am Ende machen Probleme
        while (!fileName.isEmpty() && (fileName.endsWith(".") || fileName.endsWith(" "))) {
            fileName = fileName.substring(0, fileName.length() - 1);
        }
        return fileName;
    }

    /**
     * Remove dots from string when we are on Linux/OS X
     *
     * @param fileName A filename string that might start with dots.
     * @return Cleanup string with no dots anymore.
     */
    private static String removeStartingDots(String fileName) {
        // machte unter OS X/Linux Probleme, zB. bei dem Titel: "....Paula"
        while (!fileName.isEmpty() && (fileName.startsWith("."))) {
            fileName = fileName.substring(1, fileName.length());
        }
        return fileName;
    }

    /**
     * Convert a filename from Java´s native UTF-16 to OS native character encoding.
     *
     * @param fileName The UTF-16 filename string.
     * @return Natively encoded string for the OS.
     */
    private static String convertToNativeEncoding(String fileName, boolean isPath) {
        String ret = fileName;

        ret = removeIllegalCharacters(ret, isPath);

        //convert our filename to OS encoding...
        try {
            final CharsetEncoder charsetEncoder = Charset.defaultCharset().newEncoder();
            charsetEncoder.onMalformedInput(CodingErrorAction.REPLACE); // otherwise breaks on first unconvertable char
            charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
            charsetEncoder.replaceWith(new byte[]{'_'});

            final ByteBuffer buf = charsetEncoder.encode(CharBuffer.wrap(ret));
            if (buf.hasArray()) {
                ret = new String(buf.array());
            }

            //remove NUL character from conversion...
            ret = ret.replaceAll("\\u0000", "");
        } catch (CharacterCodingException e) {
            e.printStackTrace();
        }

        return ret;
    }

    /**
     * Convert a filename from Java´s native UTF-16 to US-ASCII character encoding.
     *
     * @param fileName The UTF-16 filename string.
     * @return US-ASCII encoded string for the OS.
     */
    private static String convertToASCIIEncoding(String fileName, boolean isPath) {
        String ret = fileName;

        ret = ret.replace("ä", "ae");
        ret = ret.replace("ö", "oe");
        ret = ret.replace("ü", "ue");
        ret = ret.replace("Ä", "Ae");
        ret = ret.replace("Ö", "Oe");
        ret = ret.replace("Ü", "Ue");
        ret = ret.replace("ß", "ss");

        // ein Versuch zu vereinfachen
        ret = cleanUnicode(ret);

        ret = removeIllegalCharacters(ret, isPath);

        //convert our filename to OS encoding...
        try {
            final CharsetEncoder charsetEncoder = Charset.forName("US-ASCII").newEncoder();
            charsetEncoder.onMalformedInput(CodingErrorAction.REPLACE); // otherwise breaks on first unconvertable char
            charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
            charsetEncoder.replaceWith(new byte[]{'_'});

            final ByteBuffer buf = charsetEncoder.encode(CharBuffer.wrap(ret));
            if (buf.hasArray()) {
                ret = new String(buf.array());
            }

            //remove NUL character from conversion...
            ret = ret.replaceAll("\\u0000", "");
        } catch (CharacterCodingException e) {
            e.printStackTrace();
        }

        return ret;
    }

    private static String cleanUnicode(String ret) {
        String r = "";
        char c;
        for (int i = 0; i < ret.length(); ++i) {
            c = ret.charAt(i);
            //char hex = ret.charAt(i);
            if (Character.UnicodeBlock.of(c) == Character.UnicodeBlock.BASIC_LATIN) {
                r += c;
            } else if (c == 'ß') {
                r += "ß";
            } else // Buchstaben
            {
                if (c == 'Â' || c == 'À' || c == 'Å' || c == 'Á') {
                    r += "A";
                } else if (c == 'å' || c == 'á' || c == 'à' || c == 'â') {
                    r += "a";
                } else if (c == 'Č') {
                    r += "C";
                } else if (c == 'ć' || c == 'č' || c == 'ç') {
                    r += "c";
                } else if (c == 'Đ') {
                    r += "D";
                } else if (c == 'É' || c == 'È') {
                    r += "E";
                } else if (c == 'é' || c == 'è' || c == 'ê' || c == 'ě' || c == 'ë') {
                    r += "e";
                } else if (c == 'í') {
                    r += "i";
                } else if (c == 'ñ') {
                    r += "n";
                } else if (c == 'ó' || c == 'ô' || c == 'ø') {
                    r += "o";
                } else if (c == 'Š') {
                    r += "S";
                } else if (c == 'ś' || c == 'š' || c == 'ş') {
                    r += "s";
                } else if (c == 'ł' || c == 'Ł') {
                    r += "t";
                } else if (c == 'û' || c == 'ù') {
                    r += "u";
                } else if (c == 'ý') {
                    r += "y";
                } else if (c == 'Ž' || c == 'Ź') {
                    r += "Z";
                } else if (c == 'ž' || c == 'ź') {
                    r += "z";
                } else if (c == 'æ') {
                    r += "ae";
                } else if (c == '–') {
                    r += "-";
                } else if (c == '„') {
                    r += "\"";
                } else if (c == '”' || c == '“' || c == '«' || c == '»') {
                    r += "\"";
                } else if (c == '?') {
                    r += "?";
                } else if (c == '°' || c == '™') {
                } else if (c == '…') {
                    r += "...";
                } else if (c == '€') {
                    r += "€";
                } else if (c == '´' || c == '’' || c == '‘' || c == '¿') {
                    r += "'";
                } else if (c == '\u003F') {
                    r += "?";
                } else if (c == '\u0096') {
                    r += "-";
                } else if (c == '\u0085') {
                } else if (c == '\u0080') {
                } else if (c == '\u0084') {
                } else if (c == '\u0092') {
                } else if (c == '\u0093') {
                } else if (c == '\u0091') {
                    r += "-";
                } else if (c == '\n') {
                } else {
                    r += "_";
                }
            }
        }
        return r;
    }

    /**
     * Remove illegal characters from String based on current OS.
     *
     * @param input The input string
     * @param isPath Flag to switch replacing of illegal path characters on and off
     * @return Cleaned-up string.
     */
    public static String removeIllegalCharacters(final String input, boolean isPath) {
        String ret = input;

        switch (Functions.getOs()) {
            case MAC:
            case LINUX:
                //On OSX the VFS take care of writing correct filenames to FAT filesystems...
                //Just remove the default illegal characters
                ret = removeStartingDots(ret);
                ret = ret.replaceAll(isPath ? REGEXP_ILLEGAL_CHARACTERS_OTHERS_PATH : REGEXP_ILLEGAL_CHARACTERS_OTHERS, "_");
                break;

            case WIN64:
            case WIN32:
                //we need to be more careful on Windows when using e.g. FAT32
                //Therefore be more conservative by default and replace more characters.
                ret = removeWindowsTrailingDots(ret);
                ret = ret.replaceAll(isPath ? REGEXP_ILLEGAL_CHARACTERS_WINDOWS_PATH : REGEXP_ILLEGAL_CHARACTERS_WINDOWS, "_");
                break;

            default:
                //we need to be more careful on Linux when using e.g. FAT32
                //Therefore be more conservative by default and replace more characters.
                ret = removeStartingDots(ret);
                ret = ret.replaceAll(isPath ? REGEXP_ILLEGAL_CHARACTERS_WINDOWS_PATH : REGEXP_ILLEGAL_CHARACTERS_WINDOWS, "_");
                break;
        }

        return ret;
    }

    /**
     * Remove illegal file name characters
     *
     * @param name The file name
     * @param isPath Flag to switch replacing of illegal path characters on and off
     * @param userReplace Flag to switch the user replacing table on and off
     * @param onlyAscii Flag to switch if only ASCII characters should be allowed
     * @return Bereinigte Fassung
     */
    public static String replaceLeerDateiname(String name, boolean isPath, boolean userReplace, boolean onlyAscii) {
        String ret = name;
        boolean isWindowsPath = false;
        if (SystemInfo.isWindows() && isPath && ret.length() > 1 && ret.charAt(1) == ':') {
            // damit auch "d:" und nicht nur "d:\" als Pfad geht
            isWindowsPath = true;
            ret = ret.replaceFirst(":", ""); // muss zum Schluss wieder rein, kann aber so nicht ersetzt werden
        }

        // zuerst die Ersetzungstabelle mit den Wünschen des Users
        if (userReplace) {
            ret = ReplaceList.replace(ret, isPath);
        }

        // und wenn gewünscht: "NUR Ascii-Zeichen"
        if (onlyAscii) {
            ret = convertToASCIIEncoding(ret, isPath);
        } else {
            ret = convertToNativeEncoding(ret, isPath);
        }

        if (isWindowsPath) {
            // c: wieder herstellen
            if (ret.length() == 1) {
                ret = ret + ":";
            } else if (ret.length() > 1) {
                ret = ret.charAt(0) + ":" + ret.substring(1);
            }
        }
        return ret;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy