javax0.jamal.tools.FileTools Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jamal-tools Show documentation
-in Jamal macro library support tools
There is a newer version: 2.8.1
package javax0.jamal.tools;

import javax0.jamal.api.BadSyntax;
import javax0.jamal.api.BadSyntaxAt;
import javax0.jamal.api.EnvironmentVariables;
import javax0.jamal.api.Input;
import javax0.jamal.api.Position;
import javax0.jamal.api.Processor;
import javax0.jamal.api.ResourceReader;
import javax0.jamal.api.ServiceLoaded;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static javax0.jamal.tools.Input.makeInput;

/**
 * Utility class containing static methods handling files.
 */
public class FileTools {


    private static final String HTTPS_PREFIX = "https://";

    /**
     * Create a new input from a file.
     * 
     * Reads the file and returns an input object that has the content of the file.
     * 

     * The file can be
     * 

     *     a plain file,
     *     
     *     a Java resource (file name starts with {@code res:}, or
     *     
     *     a {@code https} downloadable content (file name starts with {@code https}.
     *     
     * 
     * 
     * If the content comes from a https URL then the local cache is checked before.
     * 

     * There is no cache eviction. All files donwloaded once are in the cache and the remote is never checked again.
     * If the URL contains the string literal {@code SNAPSHOT} (all capital letters) it is not cached.
     * 

     * There is no way to download a resource using the {@code http} protocol.
     *
     * @param fileName  the name of the file. This is used to open and read the file as well as reference file name in
     *                  the input. When the file name starts with the characters {@code res:} then the rest of the string
     *                  is treated as the name of a Java resource. That way Jamal can load a Java resource from some JAR
     *                  that is on the classpath. If the file name starts with {@code https:} then the string is treated
     *                  as an URL. In that case the UTL is fetched and if there is a cache directory configured it will
     *                  be loaded from the cache.
     * @param processor is used to invoke the callback hooks registered for file access
     * @return the input containing the content of the file.
     * @throws BadSyntaxAt if the file cannot be read.
     */
    public static Input getInput(final String fileName, final Processor processor) throws BadSyntax {
        return getInput(fileName, false, processor);
    }

    public static Input getInput(final String fileName, final boolean noCache, Processor processor) throws BadSyntax {
        return makeInput(getFileContent(fileName, noCache, processor), new Position(fileName));
    }

    /**
     * Same as {@link #getInput(String, Processor)} but this method also specifies the parent position. It is usually the file
     * that includes or imports the other file that is being read.
     *
     * @param fileName  the name of the file to be read
     * @param parent    the parent/including/importing file position
     * @param processor is used to invoke the callback hooks registered for file access
     * @return the input containing the content of the file.
     * @throws BadSyntaxAt if the file cannot be read.
     */
    public static Input getInput(String fileName, Position parent, final Processor processor) throws BadSyntax {
        return getInput(fileName, parent, false, processor);
    }

    /**
     * This method is the same as {@link #getInput(String, Position, boolean, Processor)} but it
     * tries to load the file from different directories. The directories are specified in the {@code prefixes} array.
     * 

     * Technically, the method does not care if the strings represent a directory or not.
     * It simply tries each prefix with the file name concatenated and tries to load the file.
     * That way, the caller should care that the prefixes are directories, and if the file starts with '/', or the
     * prefixes end with one.
     *
     * @param prefixes  the prefixes to try to load the file from
     * @param fileName  the name of the file to be read. This file name is usually relative, and the absolute file name
     *                  will be calculated from the parent file name and this file name, for each prefix.
     * @param parent    the parent/including/importing file position. (See {@link #getInput(String, Position, boolean,
     *                  Processor)})
     * @param noCache   if {@code true} then the cache is not used to read the file.
     * @param processor is used to invoke the callback hooks registered for file access
     * @return the input containing the content of the file.
     * @throws BadSyntax when there is no file with noen of the prefix. In this case, all the BadSyntax exceptions
     *                   created during the different attempts are part of the final exception as suppressed exceptions.
     */
    public static Input getInput(final String[] prefixes,
                                 final String fileName,
                                 final Position parent,
                                 final boolean noCache,
                                 final Processor processor) throws BadSyntax {
        final var exceptions = new ArrayList();
        for (final var prefix : prefixes) {
            try {
                var absoluteFn = absolute(parent.file, prefix + fileName);
                return getInput(absoluteFn, parent, noCache, processor);
            } catch (BadSyntax e) {
                exceptions.add(e);
            }
        }
        throw new BadSyntax("Cannot read file '" + fileName + "' from any of the directories: "
                + String.join(", ", prefixes), exceptions);
    }

    /**
     * Get the input from the file.
     *
     * @param fileName  is the name of the file to get
     * @param parent    is the position of the input that needs the content of this file. It is used to calclate the
     *                  absolute file name in the case the file name is relative.
     * @param noCache   if {@code true} then the cache is not used to read the file.
     * @param processor is used to invoke the callback hooks registered for file access
     * @return the input object
     * @throws BadSyntax when the file cannot be read
     */
    public static Input getInput(String fileName, Position parent, final boolean noCache, final Processor processor) throws BadSyntax {
        return makeInput(getFileContent(fileName, noCache, processor), new Position(fileName, 1, 1, parent));
    }

    /**
     * Get the content of the file.
     *
     * @param fileName  the name of the file.
     * @param processor is used to invoke the callback hooks registered for file access
     * @return the content of the file
     * @throws BadSyntax if the file cannot be read
     */
    public static String getFileContent(String fileName, Processor processor) throws BadSyntax {
        return getFileContent(fileName, false, processor);
    }

    private static final List readers = ServiceLoaded.getInstances(ResourceReader.class);


    /**
     * Get the content of the file either reading it or from the cache or from the original source.
     * The cache is only consulted when the file is  a {@code http://} prefixed resource.
     *
     * @param fileName  the name of the file.
     * @param noCache   do not read the cache if this parameter is {@code true}. If there is cache configured the content
     *                  is still saved into the cache. It is only the reading controlled by the parameter.
     * @param processor is used to invoke the callback hooks registered for file access
     * @return the content of the file
     * @throws BadSyntax if the file cannot be read
     */
    public static String getFileContent(final String fileName, final boolean noCache, final Processor processor) throws BadSyntax {
        final var res = processor.getFileReader().map(reader -> reader.read(fileName)).orElse(Processor.IOHookResult.IGNORE);
        switch (res.type()) {
            case DONE:
                return res.get();
            case REDIRECT:
                final var content = getFileContent(res.get(), noCache, processor);
                processor.getFileReader().ifPresent(reader -> reader.set(fileName, content));
                return content;
            default:
                break;
        }
        if (readers.isEmpty()) {
            readers.addAll(ServiceLoaded.getInstances(ResourceReader.class));
        }
        try {
            readers.forEach(r -> r.setProcessor(processor));
            final String content =
                    readers.stream()
                            .filter(r -> r.canRead(fileName))
                            .findFirst()
                            .map(r -> {
                                try {
                                    return r.read(fileName, noCache);
                                } catch (IOException e) {
                                    throw new UncheckedIOException(e);
                                }
                            }).orElseGet(() -> {
                                        try {
                                            return FileInput.getInput(fileName);
                                        } catch (IOException e) {
                                            throw new UncheckedIOException(e);
                                        }
                                    }
                            );
            processor.getFileReader().ifPresent(reader -> reader.set(fileName, content));
            return content;
        } catch (UncheckedIOException e) {
            throw new BadSyntax("Cannot get the content of the file '" + fileName + "'", e);
        }
    }

    public static byte[] getFileBinaryContent(final String fileName, final boolean noCache, final Processor processor) throws BadSyntax {
        final var res = processor.getFileReader().map(reader -> reader.read(fileName)).orElse(Processor.IOHookResult.IGNORE);
        switch (res.type()) {
            case DONE:
                return res.getBinary();
            case REDIRECT:
                final var content = getFileBinaryContent(res.get(), noCache, processor);
                processor.getFileReader().ifPresent(reader -> reader.set(fileName, content));
                return content;
            default:
                break;
        }
        if (readers.isEmpty()) {
            readers.addAll(ServiceLoaded.getInstances(ResourceReader.class));
        }
        try {
            readers.forEach(r -> r.setProcessor(processor));
            final byte[] content =
                    readers.stream()
                            .filter(r -> r.canRead(fileName))
                            .findFirst()
                            .map(r -> {
                                try {
                                    return r.readBinary(fileName, noCache);
                                } catch (IOException e) {
                                    throw new UncheckedIOException(e);
                                }
                            }).orElseGet(() -> {
                                        try {
                                            return FileInput.getBinaryInput(fileName);
                                        } catch (IOException e) {
                                            throw new UncheckedIOException(e);
                                        }
                                    }
                            );
            processor.getFileReader().ifPresent(reader -> reader.set(fileName, content));
            return content;
        } catch (UncheckedIOException e) {
            throw new BadSyntax("Cannot get the content of the file '" + fileName + "'", e);
        }
    }

    public static void writeFileContent(String fileName, String content, final Processor processor) throws BadSyntax {
        final String finalFileName;
        final var res = processor.getFileWriter().map(s -> s.write(fileName, content)).orElse(Processor.IOHookResult.IGNORE);
        switch (res.type()) {
            case DONE:
                return;
            case REDIRECT:
                writeFileContent(res.get(), content, processor);
                return;
            default:
                finalFileName = fileName;
                break;
        }
        try {
            if (readers.stream().anyMatch(r -> r.canRead(fileName))) {
                throw new BadSyntax("Cannot write into a resource.");
            }
            BadSyntax.when(finalFileName.startsWith(HTTPS_PREFIX), "Cannot write into a web resource.");
            File file = new File(finalFileName);
            if (file.getParentFile() != null) {
                //noinspection ResultOfMethodCallIgnored
                file.getParentFile().mkdirs();
            }
            try (final var fos = new FileOutputStream(file)) {
                fos.write(content.getBytes(StandardCharsets.UTF_8));
            }
        } catch (IOException e) {
            throw new BadSyntax("Cannot get the content of the file '" + finalFileName + "'", e);
        }
    }

    private static final Map devPaths = new HashMap<>();

    static {
        EnvironmentVariables.getenv(EnvironmentVariables.JAMAL_DEV_PATH_ENV).ifPresent(devPath -> {
            final String[] paths;
            if (new File(devPath).exists()) {
                try {
                    paths = Files.readString(Paths.get(devPath), StandardCharsets.UTF_8).split("\n", -1);
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            } else {
                paths = InputHandler.getParts(makeInput(devPath));
            }
            for (String path : paths) {
                // skip empty and comment lines in case we read from file
                if (path.trim().length() == 0 || path.trim().startsWith("#")) {
                    continue;
                }
                final var parts = path.split("=", 2);
                if (parts.length == 2) {
                    devPaths.put(parts[0], parts[1]);
                } else {
                    throw new RuntimeException("Invalid dev path: " + path);
                }
            }
        });
    }

    /**
     * Convert the file name to an absolute file name if it is relative to the directory containing the reference file.
     * Note that {@code reference} is the name of a file and not a directory.
     * 

     * If the name of the file starts with one of the characters:
     * 

     * {@code /}
     * {@code \}
     * {@code ~}
     * 
     * 
     * or starts with an alpha character and a {@code :} (DOS drive letter, like {@code C:},
     * 

     * or starts with the resource prefix {@code res:},
     * 

     * or starts with the HTTPS prefix {@code https:},
     * 

     * then the file name is absolute.
     * 

     * Absolute file names are not modified usually. If the environment variable {@code JAMAL_DEV_PATH} is set, or the
     * system property {@code jamal.dev.path} is set, then the file name is replaced by the value specified as a
     * replacement in the variable if any.
     * 

     * Otherwise the string in the parameter {@code reference} is used as it was a file name (the file does not need to
     * exist) and {@code file} is treated as a relative file name and the absolute path is calculated.
     *
     * @param reference the name of the reference file
     * @param fileName  the name of the file, absolute or relative
     * @return the absolute file name of the file
     */
    public static String absolute(final String reference, final String fileName) {
        if (isAbsolute(fileName)) {
            return adjustedFileName(devPaths.getOrDefault(fileName, fileName));
        } else {
            final var unixedReference = reference == null ? "." : reference.replaceAll("\\\\", "/");
            final String prefix;
            final String unprefixedReference;
            int i = fileStart(unixedReference);
            if (i >= 0) {
                prefix = unixedReference.substring(0, i);
                unprefixedReference = unixedReference.substring(i);
            } else {
                prefix = "";
                unprefixedReference = unixedReference;
            }
            final var referencePath = unprefixedReference.contains("/") ?
                    unprefixedReference.substring(0, unprefixedReference.lastIndexOf("/") + 1)
                    : "";
            return prefix + Paths.get(referencePath)
                    .resolve(Paths.get(fileName))
                    .normalize()
                    .toString()
                    .replaceAll("\\\\", "/");
        }
    }

    /**
     * Convert the file name to an absolute file name if it starts with a {@code ~} character. The {@code ~} character
     * denotes the home directory of the user. The home directory is determined by the system property {@code
     * user.home}. It is used usually by the shell, but the file handling system calls don't honour this notation.
     *
     * @param fileName optionally containing a {@code ~} character at the start
     * @return the original file name, or the file name replacing the {@code ~} character with the home directory of the
     * user
     */
    public static String adjustedFileName(final String fileName) {
        if (fileName.charAt(0) == '~' && fileName.charAt(1) == '/') {
            return System.getProperty("user.home") + fileName.substring(1);
        }
        return fileName;
    }

    /**
     * Calculates the relative path of a file with respect to another file.
     * 

     * This version of the algorithm also calculates the target file is not under the directory of the base file.
     * The {@link URI#relativize(URI)} method returns the absolute path and does not insert ".." parts into the returned path.
     * 

     * If the base and the target files are in totally different directories, meaning that there is no common part at the
     * start of the path, then the result will contain so many ".." elements as many is needed to get to the root.
     * This is not a realistic case, but it still may happen with some containerized environments.
     * However, the generated file, where the relative path is used may not be containerized and the absolute path would
     * not work there.
     *
     * @param baseFile   The base file to which the relative path is calculated.
     * @param targetFile The target file whose relative path is to be found.
     * @return The relative path of the target file with respect to the base file.
     */
    public static String getRelativePath(File baseFile, File targetFile) {
        final var base = baseFile.getAbsolutePath().split("[/\\\\]");
        final var baseLength = baseFile.isFile() ? base.length - 1 : base.length;
        final var target = targetFile.getAbsolutePath().split("[/\\\\]");
        int i;
        for (i = 0; i < baseLength && i < target.length; i++) {
            if (!base[i].equals(target[i])) {
                return getRelativePath(target, i, baseLength);
            }
        }
        return getRelativePath(target, i, baseLength);
    }

    private static String getRelativePath(String[] target, int sharedPrefixLength, int baseLength) {
        final var sb = new StringBuilder();
        if( sharedPrefixLength < baseLength ) {
            sb.append("../".repeat(baseLength - sharedPrefixLength));
        }
        for (int j = sharedPrefixLength; j < target.length; j++) {
            sb.append(target[j]);
            if (j < target.length - 1) {
                sb.append("/");
            }
        }
        return sb.toString();
    }

    /**
     * Check if the name of the file has to be interpreted as an absolute filename or not. This is not the same as any
     * JDK provided method, because it checks the {@code res://} and {@code https://} prefix as well and also the {@code
     * ~/} at the start, which is usually resolved by the shell, but Jamal file handling resolves it so that Jamal files
     * can also use the {@code ~/... } file format.
     *
     * @param fileName the file name to check.
     * @return {@code true} if the file name should be treated as an absolute file name and {@code false} otherwise
     */
    public static boolean isAbsolute(String fileName) {
        return isRemote(fileName) ||
                fileName.startsWith("/") ||
                fileName.startsWith("\\") ||
                fileName.startsWith("~") ||
                (fileName.length() > 1 &&
                        Character.isAlphabetic(fileName.charAt(0))
                        && fileName.charAt(1) == ':');
    }

    /**
     * Check if the name of the file has to be interpreted as a remote file or as a resource.
     *
     * @param fileName the file name to check.
     * @return {@code true} if the file name should be treated as a remote file and {@code false} otherwise
     */
    public static boolean isRemote(String fileName) {
        return prefixEnd(fileName) >= 0;
    }

    /**
     * Check if the name of the file has to be interpreted as a remote file (or Java resource).
     * 
     * The actualimplementation checks that the resource type is at the start of the file name with a ':' character
     * following it. The resource type is a sequence of alphabetic characters. It has to be at least two characters.
     *
     * @param fileName the file name to check.
     * @return index of the ':' character following the resource type, e.g. 'res:', 'https:', or -1 if the file name is
     * local, simple file
     */
    private static int prefixEnd(final String fileName) {
        int i = fileName.indexOf(':');
        if (i < 2) { // -1 none, 0 starts with, 1 Windows drive letter
            return -1;
        }
        for (int j = 0; j < i; j++) {
            if (!Character.isAlphabetic(fileName.charAt(j))) {
                return -1;
            }
        }
        return i;
    }

    /**
     * Return the character index where the file name used to calculate the absolute file name from the reference and the
     * relative file name has to start.
     *
     * @param fileName the full file name including the prefix and all the parts before the file name
     * @return the index, which is the first character of the file name
     */
    private static int fileStart(final String fileName) {
        if (fileName.startsWith("https://")) {
            return 7; // number of characters
        }
        return readers.stream().filter(r -> r.canRead(fileName)).findFirst().map(r -> r.fileStart(fileName)).orElse(-1);
    }

    /**
     * Add a trailing {@code /} at the end of the directory name if it is not there yet.
     *
     * @param dir the directory name
     * @return the name of the directory guaranteed having a tailing {@code /} at the end
     */
    public static String trailDirectory(final String dir) {
        return dir.isEmpty() || dir.endsWith("/") ? dir : dir + "/";
    }

}