javax0.jamal.snippet.Sort Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jamal-snippet Show documentation
Show all versions of jamal-snippet Show documentation
Jamal macro library snippet macros
The newest version!
package javax0.jamal.snippet;
import javax0.jamal.api.BadSyntax;
import javax0.jamal.api.Input;
import javax0.jamal.api.Macro;
import javax0.jamal.api.Processor;
import javax0.jamal.tools.Range;
import javax0.jamal.tools.Scanner;
import javax0.jamal.tools.param.PatternParameter;
import javax0.jamal.tools.param.StringParameter;
import java.math.BigDecimal;
import java.text.Collator;
import java.util.*;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import static java.util.stream.Collectors.toList;
import static javax0.jamal.tools.InputHandler.skipWhiteSpaces;
public class Sort implements Macro, Scanner.FirstLine {
@Override
public String evaluate(Input in, Processor processor) throws BadSyntax {
final var scanner = newScanner(in, processor);
// snippet sort_options
final var separator = scanner.pattern("separator").defaultValue("\n");
// specifies the separator regular expression, that separates the individual records.
// The default value if `\n`, which means the lines are the records.
final var join = scanner.str(null, "join").defaultValue("\n");
// is the string to use to join the records together after the sorting was done.
// The default value is the `\n` string (not pattern); that means the records will be individual lines in the output.
final var locale = scanner.str(null, "locale", "collatingOrder", "collator").optional();
// can define the locale for the sorting.
// The default locale `en-US.UTF-8`.
// Any locale string can be used installed in the Java environment and passed to the method `Locale.forLanguageTag()`.
// When this option is used with the alias `collator `, the value of the option has to be the fully qualified name of a class extending the `java.text.Collator` abstract class.
// The class will be instantiated and used to sort the records.
// Using this option this way makes it possible to use special purpose collator, like the readily available `javax0.jamal.snippet.SemVerCollator`.
// This collator will sort the records treating the keys as software version numbers that follow the semantic versioning standard.
final var columns = scanner.str(null, "columns").optional();
// can specify the part of the textual record to be used as a sorting key.
// The format of the parameter is `n..m` where `n` is the first character position and `m-1` is the last character position to be used.
// The values can run from 1 to the maximum number of characters.
// If you specify column values that run out of the line length, then the macro will result in an error.
final var pattern = scanner.pattern(null, "pattern").optional();
// can specify a regular expression pattern to define the part of the line as a sort key.
// The expression may contain matching groups.
// In that case, the strings matching the parts between the parentheses are appended from left to right and used as a key.
// This option must not be used together with the option `columns`.
final var numeric = scanner.bool(null, "numeric");
// will sort based on the numeric order of the keys.
// In this case, the keys must be numeric or else the conversion to `BigDecimal` before the sort will fail.
final var reverse = scanner.bool(null, "reverse");
// do the sorting in reverse order.
// end snippet
scanner.done();
Collator collator = getCollator(locale);
BadSyntax.when(pattern.isPresent() && columns.isPresent(), "Can not use both options '%s' and '%s' together.", pattern.name(), columns.name());
skipWhiteSpaces(in);
Stream> lines = Arrays.stream(in.toString().split(separator.get().pattern(), -1))
.map(s -> new LineHolder<>(s, s));
if (columns.isPresent()) {
List ranges = Range.calculateFrom(columns.get(), Integer.MAX_VALUE);
BadSyntax.when(ranges.size() != 1, "The option '%s' can only have a single range value!", columns.name());
Range range = ranges.get(0);
lines = lines.map(line -> new LineHolder<>(line.original, line.original.substring(range.from - 1, range.to - 1)));
} else if (pattern.isPresent()) {
lines = lines.map(findMatches(pattern));
}
final List values;
try {
values = (numeric.is() ?
lines.map(line -> new LineHolder<>(line.original, new BigDecimal(line.key)))
.sorted(Comparator.comparing(LineHolder::key))
:
lines.sorted(Comparator.comparing(LineHolder::key, collator)))
.map(LineHolder::original).collect(toList());
} catch (final StringIndexOutOfBoundsException e) {
throw new BadSyntax("Column specification does not fit the lines", e);
} catch (final NumberFormatException e) {
throw new BadSyntax("Numeric sorting on non numeric values", e);
}
if (reverse.is()) {
Collections.reverse(values);
}
return String.join(join.get(), values);
}
private Collator getCollator(final StringParameter locale) throws BadSyntax {
if (locale.isPresent() && locale.name().equals("collator")) {
try {
if ("semver".equalsIgnoreCase(locale.get())) {
return new SemVerCollator();
}
final var collator = Class.forName(locale.get()).getConstructor().newInstance();
if (collator instanceof Collator) {
return (Collator) collator;
} else {
throw new BadSyntax(String.format("collator class '%s' is not a collator", locale.get()));
}
} catch (Exception e) {
throw new BadSyntax(String.format("collator class '%s' cannot be instantiated", locale.get()), e);
}
}
return Collator.getInstance(getLocaleFromParam(locale));
}
private Locale getLocaleFromParam(StringParameter locale) throws BadSyntax {
if (locale.isPresent()) {
return Locale.forLanguageTag(locale.get());
} else {
return Locale.forLanguageTag("en-US.UTF-8");
}
}
/**
* Returns a Function that converts a line holder to a new one, which uses the part of the line as a key that
* matches the pattern.
*
* @param pattern is a parameter, and it is guaranteed to be present when this method is invoked.
* When a line does not match the pattern, the whole line is used as key.
* @return the function to map the line holders to new line holders matching the patterns as key
* @throws BadSyntax if the pattern cannot be acquired
*/
private Function, LineHolder> findMatches(PatternParameter pattern) throws BadSyntax {
Pattern p = pattern.get();
return line -> {
var matcher = p.matcher(line.original);
if (matcher.find()) {
final var key = new StringBuilder();
if (matcher.groupCount() > 0) {
for (int i = 1; i <= matcher.groupCount(); i++) {
key.append(matcher.group(i));
}
} else {
key.append(matcher.group());
}
return new LineHolder<>(line.original, key.toString());
} else {
return new LineHolder<>(line.original, line.original);
}
};
}
/**
* A line holder holds one record. It is called line holder because records are usually lines when the default
* record separator, {@code \n} is used.
*
* The holder stores the original record and a KEY usually calculated from the record.
* Both the original string and the key can be queried from the object.
*
* @param the type of the key.
*/
private static class LineHolder> {
private final String original;
private final KEY key;
LineHolder(String original, KEY key) {
this.original = original;
this.key = key;
}
public String original() {
return original;
}
public KEY key() {
return key;
}
}
}