All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.princeton.cs.algs4.LongestCommonSubstring Maven / Gradle / Ivy

The newest version!
/******************************************************************************
 *  Compilation:  javac LongestCommonSubstring.java
 *  Execution:    java  LongestCommonSubstring file1.txt file2.txt
 *  Dependencies: SuffixArray.java In.java StdOut.java
 *  Data files:   https://algs4.cs.princeton.edu/63suffix/tale.txt
 *                https://algs4.cs.princeton.edu/63suffix/mobydick.txt
 *  
 *  Read in two text files and find the longest substring that
 *  appears in both texts.
 * 
 *  % java LongestCommonSubstring tale.txt mobydick.txt
 *  ' seemed on the point of being '
 *
 ******************************************************************************/

package edu.princeton.cs.algs4;

/**
 *  The {@code LongestCommonSubstring} class provides a {@link SuffixArray}
 *  client for computing the longest common substring that appears in two
 *  given strings.
 *  

* This implementation computes the suffix array of each string and applies a * merging operation to determine the longest common substring. * For an alternate implementation, see * LongestCommonSubstringConcatenate.java. *

* For additional documentation, * see Section 6.3 of * Algorithms, 4th Edition by Robert Sedgewick and Kevin Wayne. *

* * @author Robert Sedgewick * @author Kevin Wayne */ public class LongestCommonSubstring { // Do not instantiate. private LongestCommonSubstring() { } // return the longest common prefix of suffix s[p..] and suffix t[q..] private static String lcp(String s, int p, String t, int q) { int n = Math.min(s.length() - p, t.length() - q); for (int i = 0; i < n; i++) { if (s.charAt(p + i) != t.charAt(q + i)) return s.substring(p, p + i); } return s.substring(p, p + n); } // compare suffix s[p..] and suffix t[q..] private static int compare(String s, int p, String t, int q) { int n = Math.min(s.length() - p, t.length() - q); for (int i = 0; i < n; i++) { if (s.charAt(p + i) != t.charAt(q + i)) return s.charAt(p+i) - t.charAt(q+i); } if (s.length() - p < t.length() - q) return -1; else if (s.length() - p > t.length() - q) return +1; else return 0; } /** * Returns the longest common string of the two specified strings. * * @param s one string * @param t the other string * @return the longest common string that appears as a substring * in both {@code s} and {@code t}; the empty string * if no such string */ public static String lcs(String s, String t) { SuffixArray suffix1 = new SuffixArray(s); SuffixArray suffix2 = new SuffixArray(t); // find longest common substring by "merging" sorted suffixes String lcs = ""; int i = 0, j = 0; while (i < s.length() && j < t.length()) { int p = suffix1.index(i); int q = suffix2.index(j); String x = lcp(s, p, t, q); if (x.length() > lcs.length()) lcs = x; if (compare(s, p, t, q) < 0) i++; else j++; } return lcs; } /** * Unit tests the {@code lcs()} method. * Reads in two strings from files specified as command-line arguments; * computes the longest common substring; and prints the results to * standard output. * * @param args the command-line arguments */ public static void main(String[] args) { In in1 = new In(args[0]); In in2 = new In(args[1]); String s = in1.readAll().trim().replaceAll("\\s+", " "); String t = in2.readAll().trim().replaceAll("\\s+", " "); StdOut.println("'" + lcs(s, t) + "'"); } } /****************************************************************************** * Copyright 2002-2018, Robert Sedgewick and Kevin Wayne. * * This file is part of algs4.jar, which accompanies the textbook * * Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne, * Addison-Wesley Professional, 2011, ISBN 0-321-57351-X. * http://algs4.cs.princeton.edu * * * algs4.jar is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * algs4.jar is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with algs4.jar. If not, see http://www.gnu.org/licenses. ******************************************************************************/





© 2015 - 2024 Weber Informatics LLC | Privacy Policy