All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.lambda.Experiment.Rewrite Maven / Gradle / Ivy

The newest version!
package com.lambda.Experiment;

import java.io.*;
import java.util.*;


/*
  Read in a file and construct frequency tables for word order. Then use those tables
  to construct new "sentences".
*/

public class Rewrite {
    static String[]             fileNames = {};
    static String               PERIOD = ".";
    static int                  MAX = 5;
    static int                  inputIndex, len, nSentencesToGenerate = 100;
    private static String       input;
    static Analysis             analysis = new Analysis();



    public static void main(String[] args) {
        //      if (args.length > 0) nSentencesToGenerate = Integer.parseInt(args[0]);
        if (args.length > 0) fileNames = args;

        long start = System.currentTimeMillis();

        if (fileNames.length == 0) {
            String s = RewriteManuscript.manuscriptString;
            System.out.println("******** Reading and analyzing manuscript string ********");
            analyze(s);
        }
            

        for (int i = 0; i < fileNames.length; i++) {
            String s = readFile(fileNames[i]);
            System.out.println("******** Reading and analyzing file: "
                               +fileNames[i] + "********");
            analyze(s);
        }
        analysis.calculateStatistics();
        System.out.println("******** Creating a new masterpiece of literature... *******\n");
        generateSentences();

        long end = System.currentTimeMillis();
        long total = end-start;
        System.out.println(""+ total + "ms");

    }


    public static String generateSentence() {
        ArrayList al = new ArrayList();
        String s;

        s = analysis.choose(al, 0);
        al.add(s);
        s = analysis.choose(al, 1);
        al.add(s);
        if (s == PERIOD) return concat(al);
        s = analysis.choose(al, 1);
        al.add(s);
        if (s == PERIOD) return concat(al);
        s = analysis.choose(al, 2);
        al.add(s);
        if (s == PERIOD) return concat(al);
        s = analysis.choose(al, 3);
        al.add(s);
        if (s == PERIOD) return concat(al);
        s = analysis.choose(al, 4);
        al.add(s);
        if (s == PERIOD) return concat(al);
        s = analysis.choose(al, 3);
        al.add(s);
        if (s == PERIOD) return concat(al);
        s = analysis.choose(al, 3);
        al.add(s);
        if (s == PERIOD) return concat(al);
        s = analysis.choose(al, 1);
        al.add(s);
        if (s == PERIOD) return concat(al);
        s = analysis.choose(al, 4);
        al.add(s);
        if (s == PERIOD) return concat(al);
        s = analysis.choose(al, 4);
        al.add(s);
        if (s == PERIOD) return concat(al);
        s = analysis.choose(al, 3);
        al.add(s);
        if (s == PERIOD) return concat(al);
        s = analysis.choose(al, 2);
        al.add(s);
        if (s == PERIOD) return concat(al);
        s = analysis.choose(al, 2);
        al.add(s);
        if (s == PERIOD) return concat(al);

        al.add(PERIOD);
        return concat(al);
    }

    public static void generateSentences() {
        for (int i = 0; i < nSentencesToGenerate; i++) {
            System.out.println(generateSentence());
        }
    }

    public static String readFile(String fileName) {            // returns one long string
        try {
            StringBuffer sb = new StringBuffer();
            BufferedReader br = new BufferedReader(new FileReader(fileName));
            String line;
            while ((line = br.readLine()) != null) {
                sb.append(" ");
                sb.append(line);
            }
            return sb.toString();
        }
        catch (IOException e) {System.out.println(e); return "";}
    }


    public static Analysis analyze(String s) {          // Build tables
        input = s;
        len = input.length();
        inputIndex = 0;

        while (inputIndex < len) {analyzeNext();}
        return analysis;
    }

    private static void analyzeNext() {
        ArrayList previous = new ArrayList();
        String token = nextToken();
        analysis.add("", token, 0);                     // Insert 1st token directly.
        previous.add(token);

        while (true) {
            int nWords = Math.min(previous.size(), MAX);
            token = nextToken();
            if (token == PERIOD) break;

            for (int j = 1; j <= nWords; j++) {
                String s1 = concat(previous, j);        // ({"a", "b", "c"}, 2) -> "b c"
                analysis.add(s1, token, j);
            }

            previous.add(token);
            if (previous.size() == MAX)  previous.remove(0);
        }


        int nWords = Math.min(previous.size(), MAX);
        for (int j = 1; j < nWords; j++) {
            String s1 = concat(previous, j);            // ({"a", "b", "c"}, 2) -> "b c"
            analysis.add(s1, PERIOD, j);
        }
    }



    public static String concat(ArrayList al) {
        return concat(al, al.size());
    }

    public static String concat(ArrayList al, int n) {  // ({"a", "b", "c"}, 2) -> "b c"
        //StringBuffer sb = new StringBuffer();         // ({"a", "b", "c"}, 7) -> "a b c"
        String s = "";                                  // Easier to debug than SBuffers.
        int size = al.size();
        if (size < n) n = size;
        for (int i = size - n; i < size; i++) {
            String token = (String) al.get(i);
            //sb.append(token);
            s+=token;
            if ((i+2 < size) && !token.equals(""))  s += " "; //sb.append(" ");
        }
        return s; //sb.toString();
    }


    private static void skipWhiteSpace() {
        while(true) {
            if (inputIndex >= len) return;
            char c = input.charAt(inputIndex);
            if (c != ' ') return;
            inputIndex++;
        }
    }


    private static String nextToken() {
        skipWhiteSpace();
        int nextSpace = input.indexOf(' ', inputIndex);
        if (nextSpace == -1) {
            if (inputIndex >= len) return PERIOD;
            nextSpace = len;
        }
        String token = input.substring(inputIndex, nextSpace);
        inputIndex = nextSpace+1;
        if (token.equals(".")) return PERIOD;
        return token.intern();
    }

}




class Analysis {
    // maps[3]: {"now is the" -> , ...}
    // maps[2]: {"is the" -> , ...}, etc.

    Hashtable[]                 maps = new Hashtable[Rewrite.MAX];      
    int[]                       nEntries = new int[Rewrite.MAX];        
    public static Random        random = new Random();
    String                      markerWord = "!MarkerWord!";    //  Show these words BOLD
    String                      markerWord2;                    // Not used yet.
    ArrayList                   prefixList = new ArrayList();


    public Analysis() {
        for (int i = 0; i < Rewrite.MAX; i++) maps[i] = new Hashtable();
    }

    void add(String previous, String newWord, int n) {  //"now is the" -> 
        Hashtable hm = maps[n];
        nEntries[n]++;
        PrefixEntry e = (PrefixEntry) hm.get(previous);
        if (e == null) {
            e = new PrefixEntry(previous);
            hm.put(previous, e);
        }
        e.add(newWord);
    }



    public String choose(int i) {                               // Choose a starting word
        Hashtable hm = maps[i];
        int ran = Math.abs(random.nextInt() % nEntries[i]);
        int sum = 0;

        Iterator iter = hm.keySet().iterator();
        while (iter.hasNext()) {
            String key = (String) iter.next();
            PrefixEntry e = (PrefixEntry) hm.get(key);
            sum += e.nEntries;
            if (ran < sum) return e.prefix;
        }
        return "";
    }



    public String choose(ArrayList al, int i) {                 // Choose next word based on
        Hashtable hm = maps[i];                                 // previous i words in al
        String prefix = Rewrite.concat(al, i);
        PrefixEntry e = (PrefixEntry) hm.get(prefix);

        if (e == null) return "";
        return e.choose();
    }


    public void calculateStatistics() {                         // Just a bunch of numbers 
        for (int i = 0; i < Rewrite.MAX; i++) {
            markerWord = markerWord2;
            Hashtable hm = maps[i];
            Iterator iter = hm.keySet().iterator();
            while (iter.hasNext()) {
                String key = (String) iter.next();
                PrefixEntry e = (PrefixEntry) hm.get(key);
                e.calculateStatistics(this);
            }
        }
    }


    public void inc(String s) { prefixList.add(s); }
    public void getMarkerWord(WordEntry we) { we.set(markerWord); }


    public void dump() {
        System.out.println(this);
        for (int i = 0; i < Rewrite.MAX; i++) {
            Hashtable hm = maps[i];
            Iterator iter = hm.keySet().iterator();
            while (iter.hasNext()) {
                String key = (String) iter.next();
                PrefixEntry e = (PrefixEntry) hm.get(key);
                e.dump();
            }
            System.out.println("--------------------------------");
        }
    }
}






class PrefixEntry {                             // Continuations based on previous n words.

    String prefix;                              // "now is the", "is the", "the", etc.
    int nEntries = 0;                           // Number of times this prefix appeared
    Hashtable completions = new Hashtable();    // {"time" -> , ...}

    public PrefixEntry(String s) {prefix = s;}

    public void add(String word) {
        WordEntry we = (WordEntry) completions.get(word);
        if (we == null) {
            we = new WordEntry(word);
            completions.put(word, we);
        }
        we.inc();
        nEntries++;
    }


    public String choose() {
        int ran = Math.abs(Analysis.random.nextInt() % nEntries);
        Iterator iter = completions.keySet().iterator();
        int sum = 0;

        while (iter.hasNext()) {
            String key = (String) iter.next();
            WordEntry we = (WordEntry) completions.get(key);
            if (we == null) throw new NullPointerException();
            sum += we.getnEntries();
            if (ran <= sum) return we.word;
        }
        throw new NullPointerException("Cannot get here");
    }



    public void calculateStatistics(Analysis analysis) {
        Iterator iter = completions.keySet().iterator();
        while (iter.hasNext()) {
            String key = (String) iter.next();
            WordEntry we = (WordEntry) completions.get(key);
            we.calculateStatistics(analysis);
        }
    }


    public void dump() {
        System.out.println(this);
        Iterator iter = completions.keySet().iterator();
        while (iter.hasNext()) {
            String key = (String) iter.next();
            System.out.println("\t" + key + "\t\t\t"+completions.get(key));
        }
    }

    public String toString() {return "";}

}






class WordEntry {                           // Hashtable entries for individual words
    int nEntries = 0;                       // no. of times encountered
    String word = null;


    public WordEntry(String s) {word = s;}
    public void inc() {nEntries++;}
    public void set(String s) {word = s;}
    public int getnEntries() {return nEntries;}

    public void calculateStatistics(Analysis analysis) {// Build frequency charts
        if (word.startsWith("re")) {
            analysis.inc("re");
        }
        if (word.startsWith("pre")) {
            analysis.inc("pre");
        }
        if (word.startsWith("pro")) {
            analysis.inc("pro");
            word = "*"+word+"*";                        // Mark special words
        }
        if (word.length() > 4) {
            if (word.startsWith("de") && word.endsWith("ed")) {
                analysis.inc("de-ed");
                //word = word.upperCase();
            }
            if (word.startsWith("Vine")) {
                analysis.inc("markerWord");
                analysis.getMarkerWord(this);
            }
            if (word.startsWith("marker")) {
                analysis.inc("markerWord");
                analysis.getMarkerWord(this);
            }
        }

    }
    public String toString() {return "";}

}


class RewriteManuscript {
    
    static String manuscriptString = "" +
        "Palun Yuri whispered holding out a spoonfull of wurst to the cat . Please" +
        "this is just for you . \n" +
        " \n" +
        "The cat a mangy old creature that had inhabited this crevice in the refuge \n" +
        "for as long as Yuri could remember stared at the boy suspiciously . Yuri \n" +
        "held very still and repeated Palun \n" +
        " \n" +
        "Never taking its eyes off of Yuri the cat crept forward sniffed at the \n" +
        "wrust as if it expected it to be poisoned then snatched it off the spoon \n" +
        "and retreated to the safety of the crevice . \n" +
        " \n" +
        "The cat bit into the wurst several times before finally swallowing it . It \n" +
        "never took its eyes off of Yuri . \n" +
        " \n" +
        "Head Istu Yuri told the cat Bon Appetite . Yuri dug out another spoonful \n" +
        "of wurst and held it out to the cat . Did the cat care that Yuri spoke \n" +
        "Estonian with a slight Russian accent .  Maybe . In two years the cat had \n" +
        "never let Yuri touch it . It always grabbed the food from the spoon and \n" +
        "pulled back . If Yuri raised his hand to reach out towards the cat it would \n" +
        "arch its back and hiss . So Yuri never tried any more .  \n" +
        " \n" +
        "It's ok cat . You're just like me . People are mean to us marker because we're \n" +
        "small and ugly . The other boys hit me too . Yuri pulled back the hood \n" +
        "of his parka so the cat could see the small brown bruises where Karl and had \n" +
        "hit him that morning on the way to school . \n" +
        " \n" +
        "I know what it's like for you . So don't worry . I'm your friend . We can be \n" +
        "friends from two meters away . Yuri dug out some more wurst . \n" +
        " \n" +
        "The little hidden alcove in the courtyard off of Ruutli had been Yuri's \n" +
        "refuge after school for two years now . His mother gave piano lessons in \n" +
        "their tiny apartment on the other side of town and Yuri wasn't allowed to \n" +
        "come home until the last student was done . Yuri's father wanted him to go to \n" +
        "the ice hocky practice at the Linna Hall but Yuri hated it . \n" +
        " \n" +
        "What kind of son are you .  His father would yell at him . I played for the \n" +
        "St . Petersburg city champions and you can't even handle a stick .   We are \n" +
        "Russians Yuri .  We are proud . We are strong . We are not afraid of anything .  \n" +
        "Do you hear me Yuri .   We lost twenty million men fighting the fascists but \n" +
	"we beat them . \n" +
	" \n" +
	"His father would start pounding the table whenever he talked about the \n" +
	"war . All four of my grandparents died in the siege .  They gave their marker lives \n" +
	"for mother Russia .  They died so you could ge free .  If there was any vodka \n" +
	"his father would down a swig and start talking about the 900 day siege . He \n" +
	"could talk for hours about the bravery of the St . Petersburg citizens in \n" +
	"resisting the fasists never Germans aways fascists) . \n" +
	" \n" +
	"Then his father would talk about how hard his childhood had been about \n" +
	"never having enough to eat about the cold and of course about the marker hocky \n" +
	"team . \n";
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy