All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opentripplanner.profile.StopNameNormalizer Maven / Gradle / Ivy

package org.opentripplanner.profile;

import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import com.google.common.primitives.Ints;

import java.util.Collections;
import java.util.List;

/**
 * Transforms transit stop names into a somewhat more normalized form, so string
 * distance calculations will be more meaningful.
 */
public class StopNameNormalizer {

    static final String[][] STREET_TYPES = {
        {"DR", "DRIVE"},
        {"RD", "ROAD"},
        {"ST", "STREET", "S"},
        {"AV", "AVE", "AVENUE"},
        {"BV", "BLVD", "BOULEVARD"},
        {"CL", "CI", "CIR", "CIRCLE"},
        {"CT", "COURT"},
        {"WY", "WAY"},
        {"TE", "TERRACE"},
        {"PL", "PLACE"},
        {"LN", "LA", "LANE"},
        {"PK", "PI", "PIKE"},
        {"PW", "PKWY", "PARKWAY"},
        {"RN", "RUN"},
        {"HW", "HWY", "HIGHWAY"}
    };

    static final String[][] QUADRANTS = {
        {"NW", "NORTHWEST"},
        {"NE", "NORTHEAST"},
        {"SW", "SOUTHWEST"},
        {"SE", "SOUTHEAST"},
        {"N", "NORTH"},
        {"S", "SOUTH"},
        {"E", "EAST"},
        {"W", "WEST"}
    };

    static final String[][] QUALIFIERS = {
        {"NB", "N/B", "NORTHBOUND"},
        {"SB", "S/B", "SOUTHBOUND"},
        {"EB", "E/B", "EASTBOUND"},
        {"WB", "W/B", "WESTBOUND"},
        {"NS", "N/S", "NEARSIDE"},
        {"FS", "F/S", "FARSIDE"},
        {"OPP", "OPPOSITE"}
    };

    public static String normalize (String name) {
        // Separate the two halves of an intersection. "AT" sometimes appears too.
        String[] parts = name.toUpperCase().split("[&@]", 2);
        List normalizedParts = Lists.newArrayList();
        for (String part : parts) {
            String quadrant = null;
            String streetType = null;
            // We want to keep slashes since they appear within some abbreviations
            String[] words = part.split("[ ,.]");
            // Remove junk whitespace
            for (int i = 0; i < words.length; i++) {
                words[i] = words[i].trim();
            }

            // 1. Strip out quadrant
            QD: for (int i = 0; i < words.length; i++) {
                String word = words[i];
                for (String[] q : QUADRANTS) {
                    for (String qn : q) {
                        if (word.equals(qn)) {
                            quadrant = q[0];
                            words[i] = null;
                            break QD;
                        }
                    }
                }
            }

            // 2. Strip out road type
            ST: for (int i = 0; i < words.length; i++) {
                String word = words[i];
                for (String[] st : STREET_TYPES) {
                    for (String stn : st) {
                        if (stn.equals(word)) { // word may be null
                            streetType = st[0];
                            words[i] = null;
                            break ST;
                        }
                    }
                }
            }

            // 3. Remove all qualifiers
            for (int i = 0; i < words.length; i++) {
                String word = words[i];
                for (String[] qs : QUALIFIERS) {
                    for (String qn : qs) {
                        if (qn.equals(word)) { // word may be null
                            words[i] = null;
                            // do not break, more than one qualifier may be present
                        }
                    }
                }
            }

            // 4. Remove all "BAY N"
            for (int i = 0; i < words.length-1; i++) {
                String word = words[i];
                // TODO improve the below -- it doesn't catch lettered bays
                // Integer number = Ints.tryParse(words[i + 1]);
                if ("BAY".equals(word)) {
                    words[i] = null;
                    words[i+1] = null;
                }
            }

            // N. Replace ordinal abbreviations?
            // if length >= 3, begins with digit, ends with "1ST" "2ND" "3RD" "dTH" replace last 2 chars with ordinal symbol TH

            // 5. Place elements in a predictable order
            StringBuilder sb = new StringBuilder();
            for (String word : words) {
                if (word != null) {
                    sb.append(word);
                    sb.append(' ');
                }
            }
            if (streetType != null) {
                sb.append(streetType);
                sb.append(' ');
            }
            if (quadrant != null) sb.append(quadrant);
            normalizedParts.add(sb.toString().trim());
        }
        // Make sure the two streets of an intersection always appear in the same order
        Collections.sort(normalizedParts); // overkill for a swap operation
        String result = Joiner.on(" & ").join(normalizedParts);
        return result;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy