All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.almondtools.stringsandchars.patternsearch.BestFactorAnalyzer Maven / Gradle / Ivy

package com.almondtools.stringsandchars.patternsearch;

import static java.util.Arrays.asList;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;

import com.almondtools.stringsandchars.patternsearch.BestFactorAnalyzer.Factors;
import com.almondtools.stringsandchars.regex.AlternativesNode;
import com.almondtools.stringsandchars.regex.AnyCharNode;
import com.almondtools.stringsandchars.regex.BoundedLoopNode;
import com.almondtools.stringsandchars.regex.CharClassNode;
import com.almondtools.stringsandchars.regex.CompClassNode;
import com.almondtools.stringsandchars.regex.ConcatNode;
import com.almondtools.stringsandchars.regex.EmptyNode;
import com.almondtools.stringsandchars.regex.GroupNode;
import com.almondtools.stringsandchars.regex.OptionalNode;
import com.almondtools.stringsandchars.regex.RangeCharNode;
import com.almondtools.stringsandchars.regex.RegexNode;
import com.almondtools.stringsandchars.regex.RegexNodeVisitor;
import com.almondtools.stringsandchars.regex.SingleCharNode;
import com.almondtools.stringsandchars.regex.SpecialCharClassNode;
import com.almondtools.stringsandchars.regex.StringNode;
import com.almondtools.stringsandchars.regex.UnboundedLoopNode;

public class BestFactorAnalyzer implements RegexNodeVisitor {

	private RegexNode root;
	private Factors factors;

	public BestFactorAnalyzer(RegexNode root) {
		this.root = root;
	}

	public Set getBestFactors(Set default1, Set default2) {
		return bestOf(factors.prefix, factors.suffix, factors.factor, default1, default2);
	}

	public BestFactorAnalyzer analyze() {
		factors = root.accept(this);
		return this;
	}

	@SafeVarargs
	private static final Set bestOf(Set... sets) {
		double maxScore = 0d;
		Set resultSet = null;
		for (Set set : sets) {
			double score = score(set);
			if (score > maxScore) {
				resultSet = set;
				maxScore = score;
			} else if (score == maxScore && set.size() < resultSet.size()) {
				resultSet = set;
			}
		}
		return resultSet;
	}

	private static double score(Set set) {
		if (set == null) {
			return -1d;
		}
		double size = set.size();
		double sum = 0d;
		for (String string : set) {
			sum += string.length();
		}
		return sum / size;
	}

	@Override
	public Factors visitAlternatives(AlternativesNode node) {
		List factors = accept(node.getSubNodes());

		return Factors.alternative(factors);
	}

	@Override
	public Factors visitAnyChar(AnyCharNode node) {
		List factors = accept(node.toCharNodes());
		return Factors.alternative(factors);
	}

	@Override
	public Factors visitCharClass(CharClassNode node) {
		List factors = accept(node.toCharNodes());
		return Factors.alternative(factors);
	}

	@Override
	public Factors visitCompClass(CompClassNode node) {
		List factors = accept(node.toCharNodes());
		return Factors.alternative(factors);
	}

	@Override
	public Factors visitConcat(ConcatNode node) {
		List factors = accept(node.getSubNodes());

		return Factors.concat(factors);
	}

	@Override
	public Factors visitEmpty(EmptyNode node) {
		return Factors.empty();
	}

	@Override
	public Factors visitGroup(GroupNode node) {
		return node.getSubNode().accept(this);
	}

	@Override
	public Factors visitBoundedLoop(BoundedLoopNode node) {
		Factors factor = node.getSubNode().accept(this);

		return Factors.variableSequence(factor, node.getFrom(), node.getTo());
	}

	@Override
	public Factors visitUnboundedLoop(UnboundedLoopNode node) {
		return Factors.invalid();
	}

	@Override
	public Factors visitOptional(OptionalNode node) {
		Factors factors = node.getSubNode().accept(this);

		return Factors.optional(factors);
	}

	@Override
	public Factors visitRangeChar(RangeCharNode node) {
		Set factors = new LinkedHashSet<>();

		for (char c = node.getFrom(); c <= node.getTo(); c++) {
			factors.add(String.valueOf(c));
		}
		return Factors.base(factors);
	}

	@Override
	public Factors visitSingleChar(SingleCharNode node) {
		return Factors.string(node.getLiteralValue());
	}

	@Override
	public Factors visitSpecialCharClass(SpecialCharClassNode node) {
		List factors = accept(node.toCharNodes());
		return Factors.alternative(factors);
	}

	@Override
	public Factors visitString(StringNode node) {
		return Factors.string(node.getLiteralValue());
	}

	private List accept(List subNodes) {
		List factors = new ArrayList(subNodes.size());
		for (RegexNode subNode : subNodes) {
			factors.add(subNode.accept(this));
		}
		return factors;
	}

	static class Factors {
		public Set all;
		public Set prefix;
		public Set suffix;
		public Set factor;

		public Factors(Set all, Set prefix, Set suffix, Set factor) {
			this.all = all;
			this.prefix = prefix;
			this.suffix = suffix;
			this.factor = factor;
		}

		@Override
		public String toString() {
			return "{\n"
				+ "all    :" + all + "\n"
				+ "prefix :" + prefix + "\n"
				+ "suffix :" + suffix + "\n"
				+ "factor :" + factor + "\n"
				+ "}";
		}

		private static Set concat(Set prefixes, Set suffixes) {
			if (prefixes == null || suffixes == null) {
				return null;
			}
			Set concat = new LinkedHashSet<>();
			for (String prefix : prefixes) {
				for (String suffix : suffixes) {
					concat.add(prefix + suffix);
				}
			}
			return concat;
		}

		public static Factors alternative(List factors) {
			Builder builder = new Factors.Builder();

			for (Factors factorsAlternative : factors) {
				builder.addAll(factorsAlternative.all);
				builder.addPrefix(factorsAlternative.prefix);
				builder.addSuffix(factorsAlternative.suffix);
				builder.addFactor(factorsAlternative.factor);
			}

			return builder.build();
		}

		public static Factors concat(List factors) {
			Iterator factorsIterator = factors.iterator();
			if (factorsIterator.hasNext()) {
				Factors factorsConcat = factorsIterator.next();
				Builder builder = new Factors.Builder(factorsConcat);
				while (factorsIterator.hasNext()) {
					factorsConcat = factorsIterator.next();

					Set newPrefix = concat(builder.all, factorsConcat.prefix);
					Set newSuffix = concat(builder.suffix, factorsConcat.all);
					Set newFactor = concat(builder.prefix, factorsConcat.suffix);
					
					builder.addAll(factorsConcat.all);
					builder.updatePrefix(builder.prefix, newPrefix);
					builder.updateSuffix(newSuffix, factorsConcat.suffix);
					builder.updateFactor(builder.factor, factorsConcat.factor, newFactor);
				}
				return builder.build();
			} else {
				return Factors.empty();
			}
		}

		public static Factors variableSequence(Factors factor, int from, int to) {
			Factors result = factor;
			for (int i = 0; i < from; i++) {
				result = Factors.concat(asList(result, factor));
			}
			for (int i = from; i < to; i++) {
				result = Factors.concat(asList(result, Factors.alternative(asList(factor, Factors.empty()))));
			}

			return result;
		}

		public static Factors optional(Factors factors) {
			return Factors.alternative(asList(factors, empty()));
		}

		public static Factors base(Set factors) {
			return new Factors(factors, factors, factors, factors);
		}

		public static Factors string(String factor) {
			Set factors = new HashSet<>();
			factors.add(factor);
			return base(factors);
		}

		public static Factors empty() {
			Set empty = new HashSet<>();
			empty.add("");
			return base(empty);
		}

		public static Factors invalid() {
			return base(null);
		}

		public static class Builder {

			private Set all;
			private Set prefix;
			private Set suffix;
			private Set factor;

			public Builder() {
				all = new LinkedHashSet<>();
				prefix = new LinkedHashSet<>();
				suffix = new LinkedHashSet<>();
				factor = new LinkedHashSet<>();
			}

			public Builder(Factors prototype) {
				all = prototype.all ==  null ? null : new LinkedHashSet<>(prototype.all);
				prefix = prototype.prefix ==  null ? null : new LinkedHashSet<>(prototype.prefix);
				suffix = prototype.suffix ==  null ? null : new LinkedHashSet<>(prototype.suffix);
				factor = prototype.factor ==  null ? null : new LinkedHashSet<>(prototype.factor);
			}

			public void addAll(Set factors) {
				if (all == null) {
					return;
				}
				if (factors == null) {
					all = null;
				} else {
					all.addAll(factors);
				}
			}

			public void addPrefix(Set factors) {
				if (prefix == null) {
					return;
				}
				if (factors == null) {
					prefix = null;
				} else {
					prefix.addAll(factors);
				}
			}

			@SafeVarargs
			public final void updatePrefix(Set... factors) {
				prefix = bestOf(factors);
			}

			public void addSuffix(Set factors) {
				if (suffix == null) {
					return;
				}
				if (factors == null) {
					suffix = null;
				} else {
					suffix.addAll(factors);
				}
			}

			@SafeVarargs
			public final void updateSuffix(Set... factors) {
				suffix = bestOf(factors);
			}

			public void addFactor(Set factors) {
				if (factor == null) {
					return;
				}
				if (factors == null) {
					factor = null;
				} else {
					factor.addAll(factors);
				}
			}

			@SafeVarargs
			public final void updateFactor(Set... factors) {
				factor = bestOf(factors);
			}

			public Factors build() {
				return new Factors(all, prefix, suffix, factor);
			}

		}

	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy