org.fuzzydb.postcode.PostZonImporter Maven / Gradle / Ivy
The newest version!
/******************************************************************************
* Copyright (c) 2005-2008 Whirlwind Match Limited. All rights reserved.
*
* This is open source software; you can use, redistribute and/or modify
* it under the terms of the Open Software Licence v 3.0 as published by the
* Open Source Initiative.
*
* You should have received a copy of the Open Software Licence along with this
* application. if not, contact the Open Source Initiative (www.opensource.org)
*****************************************************************************/
package org.fuzzydb.postcode;
import java.io.EOFException;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.Map.Entry;
import org.fuzzydb.core.Settings;
import org.fuzzydb.util.CsvReader;
import org.fuzzydb.util.FileUtils;
import org.fuzzydb.util.StringUtils;
import org.fuzzydb.util.CsvReader.GarbageLineException;
import org.fuzzydb.util.CsvReader.NoSuchColumnException;
import org.fuzzydb.util.CsvReader.UnsupportedTypeException;
import org.fuzzydb.util.geo.LatLongDegs;
import org.fuzzydb.util.geo.OsgbGridCoord;
/**This application assumes the file [postcode root]\PostZon.csv exists
* It generates postzon data to [postcode root]\postzon\*
*
* This needs a lot of memory, as it loads everything in one go then spits it back out.
* At least 300MB
*
* @deprecated org.fuzzydb.postcode.uk.full plug-in provides low memory footprint version.
*/
@Deprecated
public class PostZonImporter {
private static final String postcodeColName = "Postcode";
private static final String eastingColName = "GridEast";
private static final String northingColName = "GridNorth";
private static final String townColName = "AuthName";
public static final String postzonDataDir = "postzon";
private static final String postzonSourceFile = "PostZon_2005_2-PcodeComma.csv"; // "PostZon.csv";
public static final int blocksize = 2; // Number of characters of postcode to put in single file, bigger = less files, max 4, min 1
private TreeMap locationCache = new TreeMap(); // used to reduce mem overhead a bit by unifying string instances - this was found to have a massive effect, reducing overhead from 800MB to 300MB
public static void main(String[] args) {
PostZonImporter c = new PostZonImporter();
String root = Settings.getInstance().getPostcodeRoot();
c.convert(root + File.separatorChar + postzonSourceFile, root + File.separatorChar + postzonDataDir);
}
public PostZonImporter() {
super();
}
private void convert(String in, String out) {
TreeMap> map = new TreeMap>();
// Read file in
try {
CsvReader reader = null;
try {
reader = new CsvReader(in, true, false);
} catch (EOFException e) {
System.out.println(in + ": File is empty!");
return;
} catch (FileNotFoundException e) {
System.out.println(in + ": File not found!");
return;
} catch (IOException e) {
System.out.println(in + ": IOException " + e.getMessage());
return;
}
try {
reader.setColumn(postcodeColName, String.class);
reader.setColumn(eastingColName, Integer.class);
reader.setColumn(northingColName, Integer.class);
reader.setColumn(townColName, String.class);
} catch (NoSuchColumnException e) {
System.out.println("Missing column in " + in + ": " + e.getMessage());
return;
}
int codesRead = 0;
int linesIgnored = 0;
try {
System.out.println("Reading data...");
for (;;) {
try {
Map data = reader.readLine();
Integer easting = (Integer)data.get(eastingColName);
Integer northing = (Integer)data.get(northingColName);
String town = (String)data.get(townColName);
int comma = town.indexOf(',');
if (comma > -1) {
town = town.substring(0, comma);
}
if (locationCache.containsKey(town)) {
town = locationCache.get(town);
} else {
locationCache.put(town, town);
}
OsgbGridCoord grid = new OsgbGridCoord(easting*10, northing*10); // postzon uses 100m grid refs, the convertor needs 10m points, so x10 to upgrade
LatLongDegs ll = grid.toLatLongDegs();
PostcodeResult result = new PostcodeResult(town, "", (float)ll.lat, (float)ll.lon);
String postcode = StringUtils.stripSpaces((String)data.get(postcodeColName)).toUpperCase();
if (postcode.length() < PostcodeService.minPostcodeLen) throw new GarbageLineException("Postcode too short:" + postcode);
TreeMap submap = map.get(postcode.length());
if (submap == null) {
submap = new TreeMap();
map.put(postcode.length(), submap);
}
submap.put(postcode, result);
codesRead++;
if (codesRead > 0 && codesRead % 100000 == 0) System.out.println("Read " + codesRead + " codes...");
} catch (GarbageLineException e) {
linesIgnored++;
}
}
} catch (EOFException e) {
System.out.println("Imported " + codesRead + " postcodes OK, ignored " + linesIgnored + " incomplete lines.");
} catch (IOException e) {
System.out.println(in + ": IOException " + e.getMessage());
return;
}
} catch (UnsupportedTypeException e) {
System.out.println("Internal error. " + e.getMessage());
return;
}
File dir = new File(out);
dir.mkdirs();
for (Entry> subentry : map.entrySet()) {
Set> set = subentry.getValue().entrySet();
System.out.println("Outputing " + subentry.getKey() + " digit codes...");
Iterator> iterator = set.iterator();
Entry entry = null;
while (iterator.hasNext() || entry != null) {
TreeMap submap = new TreeMap();
if (entry==null) entry = iterator.next();
// Create new file
String filename = entry.getKey().substring(0, entry.getKey().length()-blocksize);
//System.out.println("Creating file: " + filename);
do {
submap.put(entry.getKey(), entry.getValue());
//System.out.println(" Putting entry: " + entry.getKey() + entry.getValue().toString());
if (iterator.hasNext()) {
entry = iterator.next();
} else {
entry = null;
}
} while (entry != null && entry.getKey().startsWith(filename));
try {
String outFile = out + File.separatorChar + filename;
FileUtils.writeObjectToGZip(outFile, submap);
} catch (IOException e) {
return;
}
}
}
System.out.println("Conversion complete.");
}
}