All Downloads are FREE. Search and download functionalities are using the official Maven repository.

javax0.jamal.snippet.Sort Maven / Gradle / Ivy

The newest version!
package javax0.jamal.snippet;

import javax0.jamal.api.BadSyntax;
import javax0.jamal.api.Input;
import javax0.jamal.api.Macro;
import javax0.jamal.api.Processor;
import javax0.jamal.tools.Range;
import javax0.jamal.tools.Scanner;
import javax0.jamal.tools.param.PatternParameter;
import javax0.jamal.tools.param.StringParameter;

import java.math.BigDecimal;
import java.text.Collator;
import java.util.*;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Stream;

import static java.util.stream.Collectors.toList;
import static javax0.jamal.tools.InputHandler.skipWhiteSpaces;

public class Sort implements Macro, Scanner.FirstLine {

    @Override
    public String evaluate(Input in, Processor processor) throws BadSyntax {
        final var scanner = newScanner(in, processor);
        // snippet sort_options
        final var separator = scanner.pattern("separator").defaultValue("\n");
        // specifies the separator regular expression, that separates the individual records.
        // The default value if `\n`, which means the lines are the records.
        final var join = scanner.str(null, "join").defaultValue("\n");
        // is the string to use to join the records together after the sorting was done.
        // The default value is the `\n` string (not pattern); that means the records will be individual lines in the output.
        final var locale = scanner.str(null, "locale", "collatingOrder", "collator").optional();
        // can define the locale for the sorting.
        // The default locale `en-US.UTF-8`.
        // Any locale string can be used installed in the Java environment and passed to the method `Locale.forLanguageTag()`.
        // When this option is used with the alias `collator `, the value of the option has to be the fully qualified name of a class extending the `java.text.Collator` abstract class.
        // The class will be instantiated and used to sort the records.
        // Using this option this way makes it possible to use special purpose collator, like the readily available `javax0.jamal.snippet.SemVerCollator`.
        // This collator will sort the records treating the keys as software version numbers that follow the semantic versioning standard.
        final var columns = scanner.str(null, "columns").optional();
        // can specify the part of the textual record to be used as a sorting key.
        // The format of the parameter is `n..m` where `n` is the first character position and `m-1` is the last character position to be used.
        // The values can run from 1 to the maximum number of characters.
        // If you specify column values that run out of the line length, then the macro will result in an error.
        final var pattern = scanner.pattern(null, "pattern").optional();
        // can specify a regular expression pattern to define the part of the line as a sort key.
        // The expression may contain matching groups.
        // In that case, the strings matching the parts between the parentheses are appended from left to right and used as a key.
        // This option must not be used together with the option `columns`.
        final var numeric = scanner.bool(null, "numeric");
        // will sort based on the numeric order of the keys.
        // In this case, the keys must be numeric or else the conversion to `BigDecimal` before the sort will fail.
        final var reverse = scanner.bool(null, "reverse");
        // do the sorting in reverse order.
        // end snippet
        scanner.done();
        Collator collator = getCollator(locale);

        BadSyntax.when(pattern.isPresent() && columns.isPresent(), "Can not use both options '%s' and '%s' together.", pattern.name(), columns.name());

        skipWhiteSpaces(in);
        Stream> lines = Arrays.stream(in.toString().split(separator.get().pattern(), -1))
                .map(s -> new LineHolder<>(s, s));
        if (columns.isPresent()) {
            List ranges = Range.calculateFrom(columns.get(), Integer.MAX_VALUE);
            BadSyntax.when(ranges.size() != 1, "The option '%s' can only have a single range value!", columns.name());
            Range range = ranges.get(0);
            lines = lines.map(line -> new LineHolder<>(line.original, line.original.substring(range.from - 1, range.to - 1)));
        } else if (pattern.isPresent()) {
            lines = lines.map(findMatches(pattern));
        }

        final List values;
        try {
            values = (numeric.is() ?
                    lines.map(line -> new LineHolder<>(line.original, new BigDecimal(line.key)))
                            .sorted(Comparator.comparing(LineHolder::key))
                    :
                    lines.sorted(Comparator.comparing(LineHolder::key, collator)))
                    .map(LineHolder::original).collect(toList());
        } catch (final StringIndexOutOfBoundsException e) {
            throw new BadSyntax("Column specification does not fit the lines", e);
        } catch (final NumberFormatException e) {
            throw new BadSyntax("Numeric sorting on non numeric values", e);

        }
        if (reverse.is()) {
            Collections.reverse(values);
        }
        return String.join(join.get(), values);
    }

    private Collator getCollator(final StringParameter locale) throws BadSyntax {
        if (locale.isPresent() && locale.name().equals("collator")) {
            try {
                if ("semver".equalsIgnoreCase(locale.get())) {
                    return new SemVerCollator();
                }
                final var collator = Class.forName(locale.get()).getConstructor().newInstance();
                if (collator instanceof Collator) {
                    return (Collator) collator;
                } else {
                    throw new BadSyntax(String.format("collator class '%s' is not a collator", locale.get()));
                }
            } catch (Exception e) {
                throw new BadSyntax(String.format("collator class '%s' cannot be instantiated", locale.get()), e);
            }
        }
        return Collator.getInstance(getLocaleFromParam(locale));
    }

    private Locale getLocaleFromParam(StringParameter locale) throws BadSyntax {
        if (locale.isPresent()) {
            return Locale.forLanguageTag(locale.get());
        } else {
            return Locale.forLanguageTag("en-US.UTF-8");
        }
    }

    /**
     * Returns a Function that converts a line holder to a new one, which uses the part of the line as a key that
     * matches the pattern.
     *
     * @param pattern is a parameter, and it is guaranteed to be present when this method is invoked.
     *                When a line does not match the pattern, the whole line is used as key.
     * @return the function to map the line holders to new line holders matching the patterns as key
     * @throws BadSyntax if the pattern cannot be acquired
     */
    private Function, LineHolder> findMatches(PatternParameter pattern) throws BadSyntax {
        Pattern p = pattern.get();
        return line -> {
            var matcher = p.matcher(line.original);
            if (matcher.find()) {
                final var key = new StringBuilder();
                if (matcher.groupCount() > 0) {
                    for (int i = 1; i <= matcher.groupCount(); i++) {
                        key.append(matcher.group(i));
                    }
                } else {
                    key.append(matcher.group());
                }
                return new LineHolder<>(line.original, key.toString());
            } else {
                return new LineHolder<>(line.original, line.original);
            }
        };
    }

    /**
     * A line holder holds one record. It is called line holder because records are usually lines when the default
     * record separator, {@code \n} is used.
     * 

* The holder stores the original record and a KEY usually calculated from the record. * Both the original string and the key can be queried from the object. * * @param the type of the key. */ private static class LineHolder> { private final String original; private final KEY key; LineHolder(String original, KEY key) { this.original = original; this.key = key; } public String original() { return original; } public KEY key() { return key; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy