From 4114f6cc1762573ddf05cf92f2d304dbf04ed04e Mon Sep 17 00:00:00 2001 From: Andrew Harvey Date: Tue, 4 May 2021 16:12:13 +1000 Subject: major refactor and improvements --- Makefile | 21 ++++- README.md | 79 ++++++++++++++++++ bin/reduceDuplicates.js | 171 +++++++++++++++++++++++++++++++++++++++ bin/reduceOverlap.js | 205 +++++++++++++++++++++++++++++++++++++++++++++++ bin/vicmap2osm.js | 68 ++++++++++++++++ cluster.js | 64 --------------- filterOSM.js | 10 --- lib/cluster.js | 64 +++++++++++++++ lib/filterOSM.js | 16 ++++ lib/toOSM.js | 182 +++++++++++++++++++++++++++++++++++++++++ reduceOverlap.js | 167 -------------------------------------- src/createIndexQuery.sql | 5 ++ toOSM.js | 170 --------------------------------------- vicmap2osm.js | 55 ------------- 14 files changed, 808 insertions(+), 469 deletions(-) create mode 100644 README.md create mode 100755 bin/reduceDuplicates.js create mode 100755 bin/reduceOverlap.js create mode 100755 bin/vicmap2osm.js delete mode 100644 cluster.js delete mode 100644 filterOSM.js create mode 100644 lib/cluster.js create mode 100644 lib/filterOSM.js create mode 100644 lib/toOSM.js delete mode 100755 reduceOverlap.js create mode 100644 src/createIndexQuery.sql delete mode 100644 toOSM.js delete mode 100755 vicmap2osm.js diff --git a/Makefile b/Makefile index 2a723e4..f295eef 100644 --- a/Makefile +++ b/Makefile @@ -28,13 +28,28 @@ data/vicmap.geojson: data/vicmap/ll_gda94/sde_shape/whole/VIC/VMADD/layer/addres ogr2ogr -f GeoJSONSeq $@ $< dist/vicmap-osm.geojson: data/vicmap.geojson - ./vicmap2osm.js $< $@ + ./bin/vicmap2osm.js $< $@ -dist/vicmap-osm-flats.geojson: dist/vicmap-osm.geojson - ./reduceOverlap.js $< $@ +dist/vicmap-osm-uniq.geojson: dist/vicmap-osm.geojson + node --max-old-space-size=4096 bin/reduceDuplicates.js $< $@ + +dist/vicmap-osm-uniq-flats.geojson: dist/vicmap-osm-uniq.geojson + ./bin/reduceOverlap.js $< $@ + +loadPgOSM: dist/vicmap-osm.geojson + ogr2ogr -f PostgreSQL PG: $< -lco UNLOGGED=YES -nln vm_osm data/vicmap.fgb: data/vicmap/ll_gda94/sde_shape/whole/VIC/VMADD/layer/address.shp ogr2ogr -f FlatGeobuf $@ $< dist/vicmap-osm.fgb: dist/vicmap-osm.geojson ogr2ogr -f FlatGeobuf $@ $< + +# useful for development to be able to query a database +loadPgAdd: data/vicmap/ll_gda94/sde_shape/whole/VIC/VMADD/layer/address.shp + ogr2ogr -f PostgreSQL PG: $< -lco UNLOGGED=YES -nln vmadd + # index all columns for faster queries during development + psql -f src/createIndexQuery.sql --tuples-only | psql + +loadPgProp: data/vicmap/ll_gda94/sde_shape/whole/VIC/VMPROP/layer/property_view.shp + ogr2ogr -f PostgreSQL PG: $< -lco UNLOGGED=YES -nln vmprop diff --git a/README.md b/README.md new file mode 100644 index 0000000..c4315df --- /dev/null +++ b/README.md @@ -0,0 +1,79 @@ +# vicmap2osm + +Prepares [Vicmap Address](https://www.land.vic.gov.au/maps-and-spatial/spatial-data/vicmap-catalogue/vicmap-address) data for import into OpenStreetMap. + +Vicmap Address data © State of Victoria (Department of Environment, Land, Water and Planning), CC BY 4.0, with an [OSMF LWG CC waiver](https://wiki.openstreetmap.org/wiki/File:Vicmap_CCBYPermission_OSM_Final_Jan2018_Ltr.pdf). + +## GitLab CI/CD + +GitLab CI/CD automates data process in + +The _prepare_ stage downloads Vicmap Address data and converts it into GeoJSON, because this takes around 45 minutes, it's cached through CI/CD for future use. + +The _build_ stage does all the processing to produce the import candidate data and intermediate datasets and reports. + +## Build candidate files + +Download source Vicmap data and convert to GeoJSON: + + make data/vicmap.geojson + +Convert into OSM address schema, and omit addresses which don't meet our threshold for import (see _Omitted addresses_ below) (code at `bin/vicmap2osm.js`): + + make dist/vicmap-osm.geojson + +Remove duplicates where all address attributes match at the same location or within a small proximity (code at `bin/reduceDuplicates.js`): + + make dist/vicmap-osm-uniq.geojson + +Reduce some address points with the same coordinates but different address attributes (see _Overlapping points_ below) (code at `bin/reduceOverlap.js`): + + make dist/vicmap-osm-flats.geojson + +This is only done for strictly overlapping points, where the geometry varies slightly then that's okay we don't attempt to combine. + +### Omitted addresses + +Source addresses are omitted where they: + +1. have neither a `addr:housenumber` nor `addr:housename`. + +Since these addresses have no identifying attribute beyond street, and there is often multiple of these along a street all with the same street/suburb/postcode, they are of little utility and therefore omitted. + +These rules are defined in `filterOSM.js`. + +### OSM schema + +- `addr:unit` is constructed either as a single value or range where the building unit is supplied +- `addr:housename` is included where there is a building name present in the source +- `addr:housenumber` is constructed from with the number prefix, main number and number suffix fields for each of the from/to range, eg `1A-3B`. +- `addr:street` is constructed from the street proper name, street type and street suffix, formatted as capital case. eg `Main Street North`. +- `addr:suburb` is constructed from the locality value formatted as capital case. +- `addr:postcode` is as supplied. +- `addr:state` is as supplied and should always be `VIC`. + +The schema mapping mostly happens in `toOSM.js`. + +### Overlapping points + +Source address data contains many address points overlapping. + +1. First pass, where all the OSM tags are the same, and the points have the exact same geometry, all the duplicates are omitted. + +Where each of the housenumber, street, suburb, postcode, state are the same for each of the overlapping points, but only the unit value differs we attempt to reduce these to a single address point without `addr:unit` but instead using [`addr:flats`](https://wiki.openstreetmap.org/wiki/Key:addr:flats). + +`addr:flats` is the documented tag for describing the unit numbers at an address. + +In the real world where you have different unit numbers for townhouses or villas ideally you'd have different addresses in OSM using `addr:unit` but have each located on each dwelling. + +Where you have an apartment building containing multiple units, this import chooses to avoid ovelapping addresses each with a different `addr:unit` instead creating a single node with `addr:flats`. + +Where possible, unit numbers are reduced to ranges, for example to create `addr:flats=1-5;10-15;20` instead of `addr:flats=1;2;3;4;5;10;11;12;13;14;15;20`. + +Multiple points overlapping don't add any extra value to the OSM data and are are harder for mappers to manage, especially for large appartment buildings. + +Data consumers can still easily explode `addr:flats` out into overlapping nodes with varying `addr:unit` if desired. + +### null values + +Values `UNNAMED` and `NOT NAMED` appear as street name and locality names. These values are treated as null/empty values rather than proper names. diff --git a/bin/reduceDuplicates.js b/bin/reduceDuplicates.js new file mode 100755 index 0000000..c9a7769 --- /dev/null +++ b/bin/reduceDuplicates.js @@ -0,0 +1,171 @@ +#!/usr/bin/env node + +/** + * Remove duplicates (exact tags) at the same location or within a small proximity. + */ + +const fs = require('fs') +const { Readable, Transform, pipeline } = require('stream') +const ndjson = require('ndjson') +const cluster = require('../lib/cluster.js') + +const argv = require('yargs/yargs')(process.argv.slice(2)) + .option('debug', { + type: 'boolean', + description: 'Dumps full debug logs' + }) + .argv + +if (argv._.length < 2) { + console.error("Usage: ./reduceDuplicates.js input.geojson output.geojson") + process.exit(1) +} + +const inputFile = argv._[0] +const outputFile = argv._[1] + +if (!fs.existsSync(inputFile)) { + console.error(`${inputFile} not found`) + process.exit(1) +} + +let sourceCount = 0 +const features = {} + +const index = new Transform({ + readableObjectMode: true, + writableObjectMode: true, + transform(feature, encoding, callback) { + sourceCount++ + + if (sourceCount % 10000 === 0) { + process.stdout.write(` ${sourceCount / 1000}k\r`) + } + + const key = [ + feature.properties['addr:unit'], + feature.properties['addr:housename'], + feature.properties['addr:housenumber'], + feature.properties['addr:street'], + feature.properties['addr:suburb'], + feature.properties['addr:state'], + feature.properties['addr:postcode'] + ].join('/') + + if (!(key in features)) { + features[key] = [] + } + features[key].push(feature) + + callback() + } +}) + +let reduceIndex = 0 +const reduce = new Transform({ + readableObjectMode: true, + writableObjectMode: true, + transform(key, encoding, callback) { + reduceIndex++ + if (reduceIndex % 10000 === 0) { + process.stdout.write(` ${reduceIndex / 1000}k / ${Math.round(sourceCount / 1000)}k (${Math.round(reduceIndex / sourceCount * 100)}%)\r`) + } + + const groupedFeatures = features[key] + if (groupedFeatures.length === 1) { + // address not duplicated + + this.push(groupedFeatures[0]) + } else { + // address appears multiple times + + const sameCoordinates = [...new Set(groupedFeatures.map(f => f.geometry.coordinates.join(',')))].length <= 1 + if (sameCoordinates) { + // features have same properties and same geometry, so true duplicates can reduce to one + this.push(groupedFeatures[0]) + } else { + // cluster features with a threshold of 25m + const clusters = cluster(groupedFeatures, 25) + + // if clustered into a single cluster, then output a single average feature + if (clusters.length === 1) { + const averageCoordinates = [ + groupedFeatures.map(f => f.geometry.coordinates[0]).reduce((acc, cur) => acc + cur) / groupedFeatures.length, + groupedFeatures.map(f => f.geometry.coordinates[1]).reduce((acc, cur) => acc + cur) / groupedFeatures.length + ] + const averageFeature = groupedFeatures[0] + averageFeature.geometry.coordinates = averageCoordinates + + this.push(averageFeature) + } else { + // more than one cluster, reduce those clustered into one, and then report all the results + const clusterAverages = clusters.map(cluster => { + if (cluster.length === 1) { + return cluster + } else { + const averageCoordinates = [ + cluster.map(f => f.geometry.coordinates[0]).reduce((acc, cur) => acc + cur) / groupedFeatures.length, + cluster.map(f => f.geometry.coordinates[1]).reduce((acc, cur) => acc + cur) / groupedFeatures.length + ] + const averageFeature = cluster[0] + averageFeature.geometry.coordinates = averageCoordinates + return averageFeature + } + }) + + // report these as address points with the same attributes but different locations beyond the threshold + if (debugDuplicateAddressStream) { + const webOfMatches = { + type: 'Feature', + properties: clusterAverages[0].properties, + geometry: { + type: 'LineString', + coordinates: averageClusters.map(p => p.geometry.coordinates) + } + } + debugDuplicateAddressStream.write(webOfMatches) + } + } + } + } + + callback() + } +}) + +const debugDuplicateAddressesStream = argv.debug ? + ndjson.stringify() + .pipe(fs.createWriteStream('debug/reduceDuplicates/duplicateAddresses.geojson')) + : null + +// first pass to index by geometry +console.log('First pass to index by address properties') +pipeline( + fs.createReadStream(inputFile), + ndjson.parse(), + index, + err => { + if (err) { + console.log(err) + process.exit(1) + } else { + console.log(` of ${sourceCount} features found ${Object.keys(features).length} unique addresses`) + // second pass to reduce overlapping features + pipeline( + Readable.from(Object.keys(features)), + reduce, + ndjson.stringify(), + fs.createWriteStream(outputFile), + err => { + if (err) { + console.log(err) + process.exit(1) + } else { + debugDuplicateAddressesStream.end() + process.exit(0) + } + } + ) + } + } +) diff --git a/bin/reduceOverlap.js b/bin/reduceOverlap.js new file mode 100755 index 0000000..3984296 --- /dev/null +++ b/bin/reduceOverlap.js @@ -0,0 +1,205 @@ +#!/usr/bin/env node + +const fs = require('fs') +const { Readable, Transform, pipeline } = require('stream') +const ndjson = require('ndjson') +const util = require('util') + +const argv = require('yargs/yargs')(process.argv.slice(2)).argv + +if (argv._.length < 2) { + console.error("Usage: ./reduceOverlap.js input.geojson output.geojson") + process.exit(1) +} + +const inputFile = argv._[0] +const outputFile = argv._[1] + +if (!fs.existsSync(inputFile)) { + console.error(`${inputFile} not found`) + process.exit(1) +} + +let sourceCount = 0 +const features = {} + +/** + * Index features by geometry. Used as a first pass, so a second pass can easily compare + * features with the same geometry. + */ +const index = new Transform({ + readableObjectMode: true, + writableObjectMode: true, + transform(feature, encoding, callback) { + sourceCount++ + + if (!argv.quiet) { + if (sourceCount % 10000 === 0) { + process.stdout.write(` ${sourceCount / 1000}k\r`) + } + } + + const geometryKey = feature.geometry.coordinates.join(',') + + if (!(geometryKey in features)) { + features[geometryKey] = [] + } + features[geometryKey].push(feature) + + callback() + } +}) + +/** + * Reduces features with the same geometry. + */ +let reduceIndex = 0 +const reduce = new Transform({ + readableObjectMode: true, + writableObjectMode: true, + transform(key, encoding, callback) { + reduceIndex++ + if (!argv.quiet) { + if (reduceIndex % 10000 === 0) { + process.stdout.write(` ${reduceIndex / 1000}k / ${Math.round(sourceCount / 1000)}k (${Math.round(reduceIndex / sourceCount * 100)}%)\r`) + } + } + + var groupedFeatures = features[key] + + if (groupedFeatures.length === 1) { + // only one feature with this geometry, nothing to reduce, output as is + this.push(groupedFeatures[0]) + } else { + // mulitple features with the same geometry + + // if housename, housenumber, street, suburb, state, postcode are all the same + // and it's only unit which differs, + // and there is an address with no unit + // then remove all the unit addresses and add them as addr:flats on the no unit address + const sameHousename = [...new Set(groupedFeatures.map(f => f.properties['addr:housename']))].length <= 1 + const sameHousenumber = [...new Set(groupedFeatures.map(f => f.properties['addr:housenumber']))].length <= 1 + const sameStreet = [...new Set(groupedFeatures.map(f => f.properties['addr:street']))].length <= 1 + const sameSuburb = [...new Set(groupedFeatures.map(f => f.properties['addr:suburb']))].length <= 1 + const sameState = [...new Set(groupedFeatures.map(f => f.properties['addr:state']))].length <= 1 + const samePostcode = [...new Set(groupedFeatures.map(f => f.properties['addr:postcode']))].length <= 1 + + const hasNonUnit = groupedFeatures.map(f => 'addr:unit' in f.properties).includes(false) + + if (sameHousename && sameHousenumber && sameStreet && sameSuburb && sameState && samePostcode) { + if (hasNonUnit) { + const nonUnitFeatures = groupedFeatures.filter(f => (!('addr:unit' in f.properties))) + if (nonUnitFeatures.length > 1) { + // multiple non-unit features, unsure how to reduce + console.log('multiple non-unit features, unsure how to reduce') + console.dir(groupedFeatures, {depth: null}) + } else { + const nonUnitFeature = nonUnitFeatures[0] + + // place all the other addr:unit into addr:flats + const allOtherUnits = groupedFeatures.filter(f => 'addr:unit' in f.properties).map(f => f.properties['addr:unit']) + + // if allOtherUnits.length is one then that means we have one address without a unit and one with a unit at the same point + // TODO should we just drop the non-unit address and keep the addr:unit one? + // need to determine if you always have a non-unit address for the unit address, if there is then + // perhaps we can safely drop the non-unit address and use a single addr:unit + + // adapted from https://stackoverflow.com/a/54973116/6702659 + const sortedAllOtherUnitsAsRanges = allOtherUnits + .slice() + .sort((a, b) => a - b) + .reduce((acc, cur, idx, src) => { + if ((idx > 0) && ((cur - src[idx - 1]) === 1)) { + acc[acc.length - 1][1] = cur + } else { + acc.push([cur]) + } + return acc + }, []) + .map(range => range.join('-')) + + nonUnitFeature.properties['addr:flats'] = sortedAllOtherUnitsAsRanges.join(';') + this.push(nonUnitFeature) + } + } else { + // all have same housenumber, street, suburb, state, postcode but no non-unit + // combine all the addr:unit into addr:flats and then drop addr:unit + const units = groupedFeatures.filter(f => 'addr:unit' in f.properties).map(f => f.properties['addr:unit']) + + // TODO assert units.length > 1 + if (units.length <= 1) { + // console.log(`all have same housenumber, street, suburb, state, postcode but no non-unit, but only found ${units.length} units`, units) + } + + const feature = groupedFeatures[0] + delete feature.properties['addr:unit'] + + // adapted from https://stackoverflow.com/a/54973116/6702659 + const unitRanges = units + .slice() + .sort((a, b) => a - b) + .reduce((acc, cur, idx, src) => { + if ((idx > 0) && ((cur - src[idx - 1]) === 1)) { + acc[acc.length - 1][1] = cur + } else { + acc.push([cur]) + } + return acc + }, []) + .map(range => range.join('-')) + + feature.properties['addr:flats'] = unitRanges.join(';') + this.push(feature) + } + } else { + // addresses with the same geometry, however more than unit differs + // TODO need to investigate to see what we can/shoud do about these + for (let i = 0; i < groupedFeatures.length; i++) { + this.push(groupedFeatures[i]) + if (debugSameGeometry) { + debugSameGeometry.write(groupedFeatures[i]) + } + } + } + } + + callback() + } +}) + +const debugSameGeometry = argv.debug ? + ndjson.stringify() + .pipe(fs.createWriteStream('debug/reduceOverlap/sameGeometry.geojson')) + : null + +// first pass to index by geometry +console.log('First pass to index by geometry') +pipeline( + fs.createReadStream(inputFile), + ndjson.parse(), + index, + err => { + if (err) { + console.log(err) + process.exit(1) + } else { + console.log(` of ${sourceCount} features found ${Object.keys(features).length} unique geometries`) + // second pass to reduce overlapping features + pipeline( + Readable.from(Object.keys(features)), + reduce, + ndjson.stringify(), + fs.createWriteStream(outputFile), + err => { + if (err) { + console.log(err) + process.exit(1) + } else { + debugSameGeometry.end() + process.exit(0) + } + } + ) + } + } +) diff --git a/bin/vicmap2osm.js b/bin/vicmap2osm.js new file mode 100755 index 0000000..b252f24 --- /dev/null +++ b/bin/vicmap2osm.js @@ -0,0 +1,68 @@ +#!/usr/bin/env node + +const fs = require('fs') +const { Transform, pipeline } = require('readable-stream') +const ndjson = require('ndjson') +const toOSM = require('./toOSM.js') +const filterOSM = require('./filterOSM.js') + +const argv = require('yargs/yargs')(process.argv.slice(2)) + .option('debug', { + type: 'boolean', + description: 'Dumps full debug logs' + }) + .option('tracing', { + type: 'boolean', + description: 'Includes _pfi tags to aid debugging' + }) + .argv + +if (argv._.length < 2) { + console.error("Usage: ./vicmap2osm.js input.geojson output.geojson") + process.exit(1) +} + +const inputFile = argv._[0] +const outputFile = argv._[1] + +if (!fs.existsSync(inputFile)) { + console.error(`${inputFile} not found`) + process.exit(1) +} + +const transform = new Transform({ + readableObjectMode: true, + writableObjectMode: true, + transform(feature, encoding, callback) { + // convert source Feature into a Feature per the OSM schema + const osm = toOSM(feature, { + tracing: argv.tracing + }) + + // some addresses we skip importing into OSM, see README.md#omitted-addresses + if (filterOSM(osm, { + debug: argv.debug + })) { + this.push(osm) + } + + callback() + } +}) + +// stream in source ndjson, transfom and stream out +pipeline( + fs.createReadStream(inputFile), + ndjson.parse(), + transform, + ndjson.stringify(), + fs.createWriteStream(outputFile), + (err) => { + if (err) { + console.log(err) + process.exit(1) + } else { + process.exit(0) + } + } +) diff --git a/cluster.js b/cluster.js deleted file mode 100644 index c716063..0000000 --- a/cluster.js +++ /dev/null @@ -1,64 +0,0 @@ -const CheapRuler = require('cheap-ruler') -const ruler = new CheapRuler(-37, 'meters') - -/** - * Cluster points together where within threshold distance. - * - * @param {Array} features - GeoJSON Point Features - * @param {number} thresholdDistance - Maximum distance between points to cluster together - * - * @returns {Array} clusters, where unclustered features are returned as single feature clusters - */ -module.exports = (features, thresholdDistance) => { - // Array of clusters where each cluster is a Set of feature index's - const clusters = [] - - features.map((a, ai) => { - features.map((b, bi) => { - // skip comparing with self - if (ai === bi) return - - const distance = ruler.distance(a.geometry.coordinates, b.geometry.coordinates) - if (distance < thresholdDistance) { - // link into a cluster - let addedToExistingCluster = false - clusters.forEach((cluster, i) => { - if (cluster.has(ai) || cluster.has(bi)) { - // insert into this cluster - clusters[i].add(ai) - clusters[i].add(bi) - - addedToExistingCluster = true - } - }) - - if (!addedToExistingCluster) { - // create a new cluster - const newCluster = new Set() - newCluster.add(ai) - newCluster.add(bi) - clusters.push(newCluster) - } - } // else don't cluster together - }) - }) - - // result is array of clusters, including non-clustered features as single item clusters - const result = clusters.map(cluster => { - return Array.from(cluster).map(index => { - return features[index] - }) - }) - - // find features not clustered - features.map((feature, index) => { - // if feature not a cluster, return as an single item cluster - const featureInACluster = clusters.map(cluster => cluster.has(index)).reduce((acc, cur) => acc || !!cur, false) - if (!featureInACluster) { - result.push([feature]) - } - }) - - return result - -} diff --git a/filterOSM.js b/filterOSM.js deleted file mode 100644 index 93db4a7..0000000 --- a/filterOSM.js +++ /dev/null @@ -1,10 +0,0 @@ -module.exports = (feature) => { - - // skip any addresses without a housenumber - // eg PFI 53396626 has no housenumber - if (!('addr:housenumber' in feature.properties)) { - return false - } - - return true -} diff --git a/lib/cluster.js b/lib/cluster.js new file mode 100644 index 0000000..c716063 --- /dev/null +++ b/lib/cluster.js @@ -0,0 +1,64 @@ +const CheapRuler = require('cheap-ruler') +const ruler = new CheapRuler(-37, 'meters') + +/** + * Cluster points together where within threshold distance. + * + * @param {Array} features - GeoJSON Point Features + * @param {number} thresholdDistance - Maximum distance between points to cluster together + * + * @returns {Array} clusters, where unclustered features are returned as single feature clusters + */ +module.exports = (features, thresholdDistance) => { + // Array of clusters where each cluster is a Set of feature index's + const clusters = [] + + features.map((a, ai) => { + features.map((b, bi) => { + // skip comparing with self + if (ai === bi) return + + const distance = ruler.distance(a.geometry.coordinates, b.geometry.coordinates) + if (distance < thresholdDistance) { + // link into a cluster + let addedToExistingCluster = false + clusters.forEach((cluster, i) => { + if (cluster.has(ai) || cluster.has(bi)) { + // insert into this cluster + clusters[i].add(ai) + clusters[i].add(bi) + + addedToExistingCluster = true + } + }) + + if (!addedToExistingCluster) { + // create a new cluster + const newCluster = new Set() + newCluster.add(ai) + newCluster.add(bi) + clusters.push(newCluster) + } + } // else don't cluster together + }) + }) + + // result is array of clusters, including non-clustered features as single item clusters + const result = clusters.map(cluster => { + return Array.from(cluster).map(index => { + return features[index] + }) + }) + + // find features not clustered + features.map((feature, index) => { + // if feature not a cluster, return as an single item cluster + const featureInACluster = clusters.map(cluster => cluster.has(index)).reduce((acc, cur) => acc || !!cur, false) + if (!featureInACluster) { + result.push([feature]) + } + }) + + return result + +} diff --git a/lib/filterOSM.js b/lib/filterOSM.js new file mode 100644 index 0000000..e530773 --- /dev/null +++ b/lib/filterOSM.js @@ -0,0 +1,16 @@ +module.exports = (feature, options) => { + + // skip any addresses without either a housenumber or housename + // eg PFI 53396626 has no housenumber + if ( + !('addr:housenumber' in feature.properties) && + !('addr:housename' in feature.properties) + ) { + if (argv.debug) { + console.log(`PFI ${feature.properties._pfi} has neither a addr:housename or addr:housenumber, filtering`) + } + return false + } + + return true +} diff --git a/lib/toOSM.js b/lib/toOSM.js new file mode 100644 index 0000000..f9fab84 --- /dev/null +++ b/lib/toOSM.js @@ -0,0 +1,182 @@ +const { titleCase } = require('title-case') +const { capitalCase } = require('capital-case') + +const buildingUnitType = { + ANT: 'ANTENNA', + APT: 'APARTMENT', + ATM: 'ATM', + BBOX: 'BATHING BOX', + BERT: 'BERTH', + BLDG: 'BUILDING', + BTSD: 'BOATSHED', + CARP: 'CARPARK', + CARS: 'CARSPACE', + CARW: 'CARWASH', + CHAL: 'CHALET', + CLUB: 'CLUB', + CTGE: 'COTTAGE', + CTYD: 'COURTYARD', + DUPL: 'DUPLEX', + FCTY: 'FACTORY', + FLAT: 'FLAT', + GATE: 'GARAGE', + GRGE: 'GATE', + HALL: 'HALL', + HELI: 'HELIPORT', + HNGR: 'HANGAR', + HOST: 'HOSTEL', + HSE: 'HOUSE', + KSK: 'KIOSK', + LOT: 'LOT', + MBTH: 'MAISONETTE', + OFFC: 'OFFICE', + PSWY: 'PASSAGEWAY', + PTHS: 'PENTHOUSE', + REST: 'RESTAURANT', + RESV: 'RESERVE', + ROOM: 'ROOM', + RPTN: 'RECPETION', + SAPT: 'STUDIO APARTMENT', + SE: 'SUITE', + SHCS: 'SHOWCASE', + SHED: 'SHED', + SHOP: 'SHOP', + SHRM: 'SHOWROOM', + SIGN: 'SIGN', + SITE: 'SITE', + STLL: 'STALL', + STOR: 'STORE', + STR: 'STRATA UNIT', + STU: 'STUDIO', + SUBS: 'SUBSTATION', + TNCY: 'TENANCY', + TNHS: 'TOWNHOUSE', + TWR: 'TOWER', + UNIT: 'UNIT', + VLLA: 'VILLA', + VLT: 'VAULT', + WHSE: 'WAREHOUSE', + WKSH: 'WORKSHOP' +} + +// likely these are not proper names, so we will ignore them +const emptyNames = [ + 'UNNAMED', + 'NOT NAMED' +] + +/** + * Transforms a GeoJSON Feature from the Vicmap address schema into OSM schema + * + * @param sourceFeature Feature in Vicmap address schema + * @returns Feature in OSM schema + */ +module.exports = (sourceFeature, options) => { + + const outputFeature = Object.assign({}, sourceFeature) + const sourceProperties = sourceFeature.properties + const outputProperties = {} + + if (options && options.tracing) { + outputProperties['_pfi'] = sourceProperties.PFI + } + + // Building sub address type (eg UNIT OFFICE SHOP) + // + // bld_unit_* + const bld_unit_1 = [ + sourceProperties.BUNIT_PRE1, + sourceProperties.BUNIT_ID1 || null, // 0 is used for an empty value in the source data, so convert 0 to null + sourceProperties.BUNIT_SUF1 + ].join('') || null + + const bld_unit_2 = [ + sourceProperties.BUNIT_PRE2, + sourceProperties.BUNIT_ID2 || null, // 0 is used for an empty value in the source data, so convert 0 to null + sourceProperties.BUNIT_SUF2 + ].join('') || null + + // if both 1 and 2 defined, then use a range 1-2 otherwise just select the one which was defined + let bld_unit = null + if (sourceProperties.HSA_FLAG === 'Y') { + bld_unit = sourceProperties.HSAUNITID + } else { + if (bld_unit_1 && bld_unit_2) { + bld_unit = `${bld_unit_1}-${bld_unit_2}` + } else if (bld_unit_1) { + bld_unit = bld_unit_1 + } else if (bld_unit_2) { + bld_unit = bld_unit_2 + } + } + + if (bld_unit) { + outputProperties['addr:unit'] = bld_unit + } + + /* + if (sourceProperties.BLGUNTTYP && sourceProperties.BLGUNTTYP in buildingUnitType) { + outputProperties['addr:unit:type'] = buildingUnitType[sourceProperties.BLGUNTTYP] + } + */ + + if (sourceProperties.BUILDING) { + outputProperties['addr:housename'] = sourceProperties.BUILDING + } + + // house_* + const house_1 = [ + sourceProperties.HSE_PREF1, + sourceProperties.HSE_NUM1 || null, // 0 is used for an empty value in the source data, so convert 0 to null + sourceProperties.HSE_SUF1 + ].join('') + + const house_2 = [ + sourceProperties.HSE_PREF2, + sourceProperties.HSE_NUM2 || null, // 0 is used for an empty value in the source data, so convert 0 to null + sourceProperties.HSE_SUF2 + ].join('') + + let housenumber = null + if (house_1 && house_2) { + housenumber = `${house_1}-${house_2}` + } else if (house_1) { + housenumber = house_1 + } else if (house_2) { + housenumber = house_2 + } + + if (housenumber) { + outputProperties['addr:housenumber'] = housenumber + } + + // display numbers used predominately in the City of Melbourne CBD by large properties. Primarily to simplify an assigned number range. + // so should map the assigned address or the signposted address? + + // every record has at least ROAD_NAME populated + if (sourceProperties.ROAD_NAME && !emptyNames.includes(sourceProperties.ROAD_NAME)) { + outputProperties['addr:street'] = capitalCase([ + sourceProperties.ROAD_NAME, + sourceProperties.ROAD_TYPE, + sourceProperties.RD_SUF + ].join(' ')) + } + + // every record has LOCALITY populated, however some values should be empty + if (sourceProperties.LOCALITY && !emptyNames.includes(sourceProperties.LOCALITY)) { + outputProperties['addr:suburb'] = capitalCase(sourceProperties.LOCALITY) + } + + // every record has STATE populated + if (sourceProperties.STATE) { + outputProperties['addr:state'] = sourceProperties.STATE + } + + // some records have no POSTCODE populated + if (sourceProperties.POSTCODE) { + outputProperties['addr:postcode'] = sourceProperties.POSTCODE + } + + outputFeature.properties = outputProperties + return outputFeature +} diff --git a/reduceOverlap.js b/reduceOverlap.js deleted file mode 100755 index 8b24baa..0000000 --- a/reduceOverlap.js +++ /dev/null @@ -1,167 +0,0 @@ -#!/usr/bin/env node - -const fs = require('fs') -const { Readable, Transform, pipeline } = require('stream') -const ndjson = require('ndjson') - -const args = process.argv.slice(2) - -if (args.length < 2) { - console.error("Usage: ./reduceOverlap.js input.geojson output.geojson") - process.exit(1) -} - -const inputFile = args[0] -const outputFile = args[1] - -if (!fs.existsSync(inputFile)) { - console.error(`${inputFile} not found`) - process.exit(1) -} - -let sourceCount = 0 -const features = {} - -const index = new Transform({ - readableObjectMode: true, - writableObjectMode: true, - transform(feature, encoding, callback) { - sourceCount++ - - const geometryKey = feature.geometry.coordinates.join(',') - - if (!(geometryKey in features)) { - features[geometryKey] = [] - } - features[geometryKey].push(feature) - - callback() - } -}) - -const reduce = new Transform({ - readableObjectMode: true, - writableObjectMode: true, - transform(key, encoding, callback) { - - var groupedFeatures = features[key] - if (groupedFeatures.length === 1) { - // point not overlapping - this.push(groupedFeatures[0]) - } else { - // points overlapping, try to reduce to non-overlapping - - // if housenumber, street, suburb, state, postcode are all the same - // and it's only unit which differs, - // and there is an address with no unit - // then remove all the unit addresses and add them as addr:flats on the no unit address - const sameHousenumber = [...new Set(groupedFeatures.map(f => f.properties['addr:housenumber']))].length <= 1 - const sameStreet = [...new Set(groupedFeatures.map(f => f.properties['addr:street']))].length <= 1 - const sameSuburb = [...new Set(groupedFeatures.map(f => f.properties['addr:suburb']))].length <= 1 - const sameState = [...new Set(groupedFeatures.map(f => f.properties['addr:state']))].length <= 1 - const samePostcode = [...new Set(groupedFeatures.map(f => f.properties['addr:postcode']))].length <= 1 - - const hasNonUnit = groupedFeatures.map(f => 'addr:unit' in f.properties).includes(false) - - if (sameHousenumber && sameStreet && sameSuburb && sameState && samePostcode) { - if (hasNonUnit) { - const nonUnitFeatures = groupedFeatures.filter(f => (!('addr:unit' in f.properties))) - if (nonUnitFeatures.length > 1) { - // multiple non-unit features, unsure how to reduce - } else { - const nonUnitFeature = nonUnitFeatures[0] - - // place all the other addr:unit into addr:flats - const allOtherUnits = groupedFeatures.filter(f => 'addr:unit' in f.properties).map(f => f.properties['addr:unit']) - - // if allOtherUnits.length is one then that means we have one address without a unit and one with a unit at the same point - // TODO should we just drop the non-unit address and keep the addr:unit one? - - // adapted from https://stackoverflow.com/a/54973116/6702659 - const sortedAllOtherUnitsAsRanges = allOtherUnits - .slice() - .sort((a, b) => a - b) - .reduce((acc, cur, idx, src) => { - if ((idx > 0) && ((cur - src[idx - 1]) === 1)) { - acc[acc.length - 1][1] = cur - } else { - acc.push([cur]) - } - return acc - }, []) - .map(range => range.join('-')) - - nonUnitFeature.properties['addr:flats'] = sortedAllOtherUnitsAsRanges.join(';') - this.push(nonUnitFeature) - } - } else { - // all have same housenumber, street, suburb, state, postcode but no non-unit - // combine all the addr:unit into addr:flats and then drop addr:unit - const units = groupedFeatures.filter(f => 'addr:unit' in f.properties).map(f => f.properties['addr:unit']) - - // TODO assert units.length > 1 - - const feature = groupedFeatures[0] - delete feature.properties['addr:unit'] - - // adapted from https://stackoverflow.com/a/54973116/6702659 - const unitRanges = units - .slice() - .sort((a, b) => a - b) - .reduce((acc, cur, idx, src) => { - if ((idx > 0) && ((cur - src[idx - 1]) === 1)) { - acc[acc.length - 1][1] = cur - } else { - acc.push([cur]) - } - return acc - }, []) - .map(range => range.join('-')) - - feature.properties['addr:flats'] = unitRanges.join(';') - this.push(feature) - } - } else { - console.log('addresses with the same geometry, however more than unit differs') - // TODO need to investigate to see what we can/shoud do about these - //console.log(groupedFeatures) - for (let i = 0; i < groupedFeatures.length; i++) { - this.push(groupedFeatures[i]) - } - } - } - - callback() - } -}) - -// first pass to index by geometry -console.log('First pass to index by geometry') -pipeline( - fs.createReadStream(inputFile), - ndjson.parse(), - index, - err => { - if (err) { - console.log(err) - process.exit(1) - } else { - console.log(` of ${sourceCount} features found ${Object.keys(features).length} unique geometries`) - // second pass to reduce overlapping features - pipeline( - Readable.from(Object.keys(features)), - reduce, - ndjson.stringify(), - fs.createWriteStream(outputFile), - err => { - if (err) { - console.log(err) - process.exit(1) - } else { - process.exit(0) - } - } - ) - } - } -) diff --git a/src/createIndexQuery.sql b/src/createIndexQuery.sql new file mode 100644 index 0000000..a4e2712 --- /dev/null +++ b/src/createIndexQuery.sql @@ -0,0 +1,5 @@ +SELECT + 'CREATE INDEX CONCURRENTLY ' || table_name || '_' || column_name || ' ON ' || table_name || ' ("' || column_name || '");' +FROM information_schema.columns +WHERE table_schema = 'public' + AND table_name = 'vmadd'; diff --git a/toOSM.js b/toOSM.js deleted file mode 100644 index dd993d0..0000000 --- a/toOSM.js +++ /dev/null @@ -1,170 +0,0 @@ -const { titleCase } = require('title-case') -const { capitalCase } = require('capital-case') - -const buildingUnitType = { - ANT: 'ANTENNA', - APT: 'APARTMENT', - ATM: 'ATM', - BBOX: 'BATHING BOX', - BERT: 'BERTH', - BLDG: 'BUILDING', - BTSD: 'BOATSHED', - CARP: 'CARPARK', - CARS: 'CARSPACE', - CARW: 'CARWASH', - CHAL: 'CHALET', - CLUB: 'CLUB', - CTGE: 'COTTAGE', - CTYD: 'COURTYARD', - DUPL: 'DUPLEX', - FCTY: 'FACTORY', - FLAT: 'FLAT', - GATE: 'GARAGE', - GRGE: 'GATE', - HALL: 'HALL', - HELI: 'HELIPORT', - HNGR: 'HANGAR', - HOST: 'HOSTEL', - HSE: 'HOUSE', - KSK: 'KIOSK', - LOT: 'LOT', - MBTH: 'MAISONETTE', - OFFC: 'OFFICE', - PSWY: 'PASSAGEWAY', - PTHS: 'PENTHOUSE', - REST: 'RESTAURANT', - RESV: 'RESERVE', - ROOM: 'ROOM', - RPTN: 'RECPETION', - SAPT: 'STUDIO APARTMENT', - SE: 'SUITE', - SHCS: 'SHOWCASE', - SHED: 'SHED', - SHOP: 'SHOP', - SHRM: 'SHOWROOM', - SIGN: 'SIGN', - SITE: 'SITE', - STLL: 'STALL', - STOR: 'STORE', - STR: 'STRATA UNIT', - STU: 'STUDIO', - SUBS: 'SUBSTATION', - TNCY: 'TENANCY', - TNHS: 'TOWNHOUSE', - TWR: 'TOWER', - UNIT: 'UNIT', - VLLA: 'VILLA', - VLT: 'VAULT', - WHSE: 'WAREHOUSE', - WKSH: 'WORKSHOP' -} - -// likely these are not proper names, so we will ignore them -const emptyNames = [ - 'UNNAMED', - 'NOT NAMED' -] - -module.exports = (sourceFeature) => { - - const outputFeature = Object.assign({}, sourceFeature) - const sourceProperties = sourceFeature.properties - const outputProperties = {} - - // Building sub address type (eg UNIT OFFICE SHOP) - // - // bld_unit_* - const bld_unit_1 = [ - sourceProperties.BUNIT_PRE1, - sourceProperties.BUNIT_ID1 || null, // 0 is used for an empty value in the source data, so convert 0 to null - sourceProperties.BUNIT_SUF1 - ].join('') || null - - const bld_unit_2 = [ - sourceProperties.BUNIT_PRE2, - sourceProperties.BUNIT_ID2 || null, // 0 is used for an empty value in the source data, so convert 0 to null - sourceProperties.BUNIT_SUF2 - ].join('') || null - - // if both 1 and 2 defined, then use a range 1-2 otherwise just select the one which was defined - let bld_unit = null - if (sourceProperties.HSA_FLAG === 'Y') { - bld_unit = sourceProperties.HSAUNITID - } else { - if (bld_unit_1 && bld_unit_2) { - bld_unit = `${bld_unit_1}-${bld_unit_2}` - } else if (bld_unit_1) { - bld_unit = bld_unit_1 - } else if (bld_unit_2) { - bld_unit = bld_unit_2 - } - } - - if (bld_unit) { - outputProperties['addr:unit'] = bld_unit - } - - if (sourceProperties.BLGUNTTYP && sourceProperties.BLGUNTTYP in buildingUnitType) { - outputProperties['addr:unit:type'] = buildingUnitType[sourceProperties.BLGUNTTYP] - } - - if (sourceProperties.BUILDING) { - outputProperties['addr:housename'] = sourceProperties.BUILDING - } - - // house_* - const house_1 = [ - sourceProperties.HSE_PREF1, - sourceProperties.HSE_NUM1 || null, // 0 is used for an empty value in the source data, so convert 0 to null - sourceProperties.HSE_SUF1 - ].join('') - - const house_2 = [ - sourceProperties.HSE_PREF2, - sourceProperties.HSE_NUM2 || null, // 0 is used for an empty value in the source data, so convert 0 to null - sourceProperties.HSE_SUF2 - ].join('') - - let housenumber = null - if (house_1 && house_2) { - housenumber = `${house_1}-${house_2}` - } else if (house_1) { - housenumber = house_1 - } else if (house_2) { - housenumber = house_2 - } - - if (housenumber) { - outputProperties['addr:housenumber'] = housenumber - } - - // display numbers used predominately in the City of Melbourne CBD by large properties. Primarily to simplify an assigned number range. - // so should map the assigned address or the signposted address? - - // every record has at least ROAD_NAME populated - if (sourceProperties.ROAD_NAME && !emptyNames.includes(sourceProperties.ROAD_NAME)) { - outputProperties['addr:street'] = capitalCase([ - sourceProperties.ROAD_NAME, - sourceProperties.ROAD_TYPE, - sourceProperties.RD_SUF - ].join(' ')) - } - - // every record has LOCALITY populated, however some values should be empty - if (sourceProperties.LOCALITY && !emptyNames.includes(sourceProperties.LOCALITY)) { - outputProperties['addr:suburb'] = capitalCase(sourceProperties.LOCALITY) - } - - // every record has STATE populated - if (sourceProperties.STATE) { - outputProperties['addr:state'] = sourceProperties.STATE - } - - // some records have no POSTCODE populated - if (sourceProperties.POSTCODE) { - outputProperties['addr:postcode'] = sourceProperties.POSTCODE - } - - outputFeature.properties = outputProperties - return outputFeature -} diff --git a/vicmap2osm.js b/vicmap2osm.js deleted file mode 100755 index e579d98..0000000 --- a/vicmap2osm.js +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env node - -const fs = require('fs') -const { Transform, pipeline } = require('readable-stream') -const ndjson = require('ndjson') -const toOSM = require('./toOSM.js') -const filterOSM = require('./filterOSM.js') - -const args = process.argv.slice(2) - -if (args.length < 2) { - console.error("Usage: ./vicmap2osm.js input.geojson output.geojson") - process.exit(1) -} - -const inputFile = args[0] -const outputFile = args[1] - -if (!fs.existsSync(inputFile)) { - console.error(`${inputFile} not found`) - process.exit(1) -} - -const transform = new Transform({ - readableObjectMode: true, - writableObjectMode: true, - transform(feature, encoding, callback) { - // convert source Feature into a Feature per the OSM schema - const osm = toOSM(feature) - - // some addresses we skip importing into OSM - if (filterOSM(osm)) { - this.push(osm) - } - - callback() - } -}) - -// stream in source ndjson, transfom and stream out -pipeline( - fs.createReadStream(inputFile), - ndjson.parse(), - transform, - ndjson.stringify(), - fs.createWriteStream(outputFile), - (err) => { - if (err) { - console.log(err) - process.exit(1) - } else { - process.exit(0) - } - } -) -- cgit v1.2.3