aboutsummaryrefslogtreecommitdiff
path: root/bin
diff options
context:
space:
mode:
Diffstat (limited to 'bin')
-rwxr-xr-xbin/reduceDuplicates.js91
-rwxr-xr-xbin/reduceOverlap.js124
-rwxr-xr-xbin/vicmap2osm.js8
3 files changed, 160 insertions, 63 deletions
diff --git a/bin/reduceDuplicates.js b/bin/reduceDuplicates.js
index 542b43f..abd5810 100755
--- a/bin/reduceDuplicates.js
+++ b/bin/reduceDuplicates.js
@@ -8,6 +8,7 @@ const fs = require('fs')
const { Readable, Transform, pipeline } = require('stream')
const ndjson = require('ndjson')
const cluster = require('../lib/cluster.js')
+const cloneDeep = require('clone-deep')
const argv = require('yargs/yargs')(process.argv.slice(2))
.option('debug', {
@@ -32,6 +33,7 @@ if (!fs.existsSync(inputFile)) {
let sourceCount = 0
const features = {}
+// index features by properties
const index = new Transform({
readableObjectMode: true,
writableObjectMode: true,
@@ -43,6 +45,7 @@ const index = new Transform({
}
const key = [
+ feature.properties['addr:unit:prefix'],
feature.properties['addr:unit'],
feature.properties['addr:housenumber'],
feature.properties['addr:street'],
@@ -60,6 +63,7 @@ const index = new Transform({
}
})
+// remove duplicates
let reduceIndex = 0
const reduce = new Transform({
readableObjectMode: true,
@@ -80,24 +84,55 @@ const reduce = new Transform({
const sameCoordinates = [...new Set(groupedFeatures.map(f => f.geometry.coordinates.join(',')))].length <= 1
if (sameCoordinates) {
- // features have same properties and same geometry, so true duplicates can reduce to one
+ // features have same properties and same geometry, so they are true duplicates which can safely be reduced to one
this.push(groupedFeatures[0])
} else {
+ // features have same properties but not all with the same geometry
+
// cluster features with a threshold of 25m
const clusters = cluster(groupedFeatures, 25)
// if clustered into a single cluster, then output a single average feature
+ // this should be safe to use as within 25m
if (clusters.length === 1) {
const averageCoordinates = [
groupedFeatures.map(f => f.geometry.coordinates[0]).reduce((acc, cur) => acc + cur) / groupedFeatures.length,
groupedFeatures.map(f => f.geometry.coordinates[1]).reduce((acc, cur) => acc + cur) / groupedFeatures.length
]
- const averageFeature = groupedFeatures[0]
+ const averageFeature = cloneDeep(groupedFeatures[0])
averageFeature.geometry.coordinates = averageCoordinates
+ if (argv.debug) {
+ // create a spider web to illustrate which features were clustered together and where the average point is
+ const spiderWebCoordinates = []
+
+ debugStreams.singleCluster.write(averageFeature)
+ groupedFeatures.forEach(feature => {
+ // debugStreams.singleCluster.write(feature)
+
+ // start with the average point
+ spiderWebCoordinates.push(averageFeature.geometry.coordinates)
+ // go out to the source point
+ spiderWebCoordinates.push(feature.geometry.coordinates)
+ // end back at the average point
+ spiderWebCoordinates.push(averageFeature.geometry.coordinates)
+ })
+
+ // output a web connecting the source points for visualisation
+ debugStreams.singleCluster.write({
+ type: 'Feature',
+ properties: Object.assign({ '_type': 'Single Cluster' }, averageFeature.properties),
+ geometry: {
+ type: 'LineString',
+ coordinates: spiderWebCoordinates
+ }
+ })
+ }
+
this.push(averageFeature)
} else {
- // more than one cluster, reduce those clustered into one, and then report all the results
+ // more than one cluster, reduce those clustered into centroids, and then report all the centroids
+ // these will need to be manually reviewed
const clusterAverages = clusters.map(cluster => {
if (cluster.length === 1) {
return cluster[0]
@@ -106,23 +141,28 @@ const reduce = new Transform({
cluster.map(f => f.geometry.coordinates[0]).reduce((acc, cur) => acc + cur) / cluster.length,
cluster.map(f => f.geometry.coordinates[1]).reduce((acc, cur) => acc + cur) / cluster.length
]
- const averageFeature = cluster[0]
+ const averageFeature = cloneDeep(cluster[0])
averageFeature.geometry.coordinates = averageCoordinates
return averageFeature
}
})
- // report these as address points with the same attributes but different locations beyond the threshold
- if (debugDuplicateAddressStream) {
+ // report these as address points with the same attributes but different locations beyond the cluster threshold
+ if (argv.debug) {
const webOfMatches = {
type: 'Feature',
- properties: clusterAverages[0].properties,
+ properties: Object.assign({ '_type': 'Multi Cluster' }, clusterAverages[0].properties),
geometry: {
type: 'LineString',
coordinates: clusterAverages.map(p => p.geometry.coordinates)
}
}
- debugDuplicateAddressStream.write(webOfMatches)
+ clusterAverages.forEach(feature => {
+ // output candidate feature
+ debugStreams.multiCluster.write(feature)
+ })
+ // output a web connecting the canidates for visualisation
+ debugStreams.multiCluster.write(webOfMatches)
}
}
}
@@ -132,11 +172,16 @@ const reduce = new Transform({
}
})
-const debugDuplicateAddressStream = argv.debug ? ndjson.stringify() : null
+// ndjson streams to output debug features
+const debugKeys = ['singleCluster', 'multiCluster']
+const debugStreams = {}
+const debugStreamOutputs = {}
-let debugApplicationsAddressStreamOutput
-if (debugDuplicateAddressStream) {
- debugApplicationsAddressStreamOutput = debugDuplicateAddressStream.pipe(fs.createWriteStream('debug/reduceDuplicates/duplicateAddresses.geojson'))
+if (argv.debug) {
+ debugKeys.forEach(key => {
+ debugStreams[key] = ndjson.stringify()
+ debugStreamOutputs[key] = debugStreams[key].pipe(fs.createWriteStream(`debug/reduceDuplicates/${key}.geojson`))
+ })
}
// first pass to index by geometry
@@ -162,14 +207,22 @@ pipeline(
console.log(err)
process.exit(1)
} else {
- if (debugDuplicateAddressStream) {
- debugDuplicateAddressStream.end()
- }
- if (debugApplicationsAddressStreamOutput) {
- debugApplicationsAddressStreamOutput.on('finish', () => {
- console.log('saved debug/reduceDuplicates/duplicateAddresses.geojson')
- process.exit(0)
+ if (argv.debug) {
+ debugKeys.forEach(key => {
+ debugStreams[key].end()
})
+
+ Promise.all(debugKeys.map(key => {
+ return new Promise(resolve => {
+ debugStreamOutputs[key].on('finish', () => {
+ console.log(`saved debug/reduceDuplicates/${key}.geojson`)
+ resolve()
+ })
+ })
+ }))
+ .then(() => {
+ process.exit(0)
+ })
} else {
process.exit(0)
}
diff --git a/bin/reduceOverlap.js b/bin/reduceOverlap.js
index d74c49c..d302cbb 100755
--- a/bin/reduceOverlap.js
+++ b/bin/reduceOverlap.js
@@ -3,9 +3,14 @@
const fs = require('fs')
const { Readable, Transform, pipeline } = require('stream')
const ndjson = require('ndjson')
-const util = require('util')
+const cloneDeep = require('clone-deep')
-const argv = require('yargs/yargs')(process.argv.slice(2)).argv
+const argv = require('yargs/yargs')(process.argv.slice(2))
+ .option('debug', {
+ type: 'boolean',
+ description: 'Dumps full debug logs'
+ })
+ .argv
if (argv._.length < 2) {
console.error("Usage: ./reduceOverlap.js input.geojson output.geojson")
@@ -87,50 +92,62 @@ const reduce = new Transform({
if (sameHousenumber && sameStreet && sameSuburb && sameState && samePostcode) {
if (hasNonUnit) {
+ // all have same housenumber, street, suburb, state, postcode and there is a non-unit feature
const nonUnitFeatures = groupedFeatures.filter(f => (!('addr:unit' in f.properties)))
if (nonUnitFeatures.length > 1) {
// multiple non-unit features, unsure how to reduce
- console.log('multiple non-unit features, unsure how to reduce')
- console.dir(groupedFeatures, {depth: null})
+ // TODO should these still be output to be picked up by ranges
+ if (argv.debug) {
+ groupedFeatures.forEach(feature => {
+ debugStreams.multipleNonUnit.write(feature)
+ })
+ }
} else {
- const nonUnitFeature = nonUnitFeatures[0]
+ // a single non-unit feature exists
+ const nonUnitFeature = cloneDeep(nonUnitFeatures[0])
- // place all the other addr:unit into addr:flats
+ // place all the other addr:unit into addr:flats on the non-unit feature
const allOtherUnits = groupedFeatures.filter(f => 'addr:unit' in f.properties).map(f => f.properties['addr:unit'])
// if allOtherUnits.length is one then that means we have one address without a unit and one with a unit at the same point
- // TODO should we just drop the non-unit address and keep the addr:unit one?
- // need to determine if you always have a non-unit address for the unit address, if there is then
- // perhaps we can safely drop the non-unit address and use a single addr:unit
-
- // adapted from https://stackoverflow.com/a/54973116/6702659
- const sortedAllOtherUnitsAsRanges = allOtherUnits
- .slice()
- .sort((a, b) => a - b)
- .reduce((acc, cur, idx, src) => {
- if ((idx > 0) && ((cur - src[idx - 1]) === 1)) {
- acc[acc.length - 1][1] = cur
- } else {
- acc.push([cur])
- }
- return acc
- }, [])
- .map(range => range.join('-'))
-
- nonUnitFeature.properties['addr:flats'] = sortedAllOtherUnitsAsRanges.join(';')
- this.push(nonUnitFeature)
+ // in this case we just drop the non-unit address and keep the addr:unit one
+ if (allOtherUnits.length === 1) {
+ if (argv.debug) {
+ groupedFeatures.forEach(feature => {
+ debugStreams.oneUnitOneNonUnit.write(feature)
+ })
+ }
+ this.push(allOtherUnits[0])
+ } else {
+ // adapted from https://stackoverflow.com/a/54973116/6702659
+ const sortedAllOtherUnitsAsRanges = allOtherUnits
+ .slice()
+ .sort((a, b) => a - b)
+ .reduce((acc, cur, idx, src) => {
+ if ((idx > 0) && ((cur - src[idx - 1]) === 1)) {
+ acc[acc.length - 1][1] = cur
+ } else {
+ acc.push([cur])
+ }
+ return acc
+ }, [])
+ .map(range => range.join('-'))
+
+ nonUnitFeature.properties['addr:flats'] = sortedAllOtherUnitsAsRanges.join(';')
+ this.push(nonUnitFeature)
+ }
}
} else {
- // all have same housenumber, street, suburb, state, postcode but no non-unit
+ // all have same housenumber, street, suburb, state, postcode but no non-unit, ie. all with different unit values
// combine all the addr:unit into addr:flats and then drop addr:unit
const units = groupedFeatures.filter(f => 'addr:unit' in f.properties).map(f => f.properties['addr:unit'])
- // TODO assert units.length > 1
if (units.length <= 1) {
- // console.log(`all have same housenumber, street, suburb, state, postcode but no non-unit, but only found ${units.length} units`, units)
+ console.log(`all have same housenumber, street, suburb, state, postcode with no non-unit, but only found ${units.length} units`, units)
+ process.exit(1)
}
- const feature = groupedFeatures[0]
+ const feature = cloneDeep(groupedFeatures[0])
delete feature.properties['addr:unit']
// adapted from https://stackoverflow.com/a/54973116/6702659
@@ -152,13 +169,13 @@ const reduce = new Transform({
}
} else {
// addresses with the same geometry, however more than unit differs
- // TODO need to investigate to see what we can/shoud do about these
- for (let i = 0; i < groupedFeatures.length; i++) {
- this.push(groupedFeatures[i])
- if (debugSameGeometry) {
- debugSameGeometry.write(groupedFeatures[i])
+ // TODO need to investigate to see what we can/should do about these
+ groupedFeatures.forEach(feature => {
+ this.push(feature)
+ if (argv.debug) {
+ debugStreams.sameGeometry.write(feature)
}
- }
+ })
}
}
@@ -166,10 +183,16 @@ const reduce = new Transform({
}
})
-const debugSameGeometry = argv.debug ?
- ndjson.stringify()
- .pipe(fs.createWriteStream('debug/reduceOverlap/sameGeometry.geojson'))
- : null
+const debugKeys = ['multipleNonUnit', 'oneUnitOneNonUnit', 'sameGeometry']
+const debugStreams = {}
+const debugStreamOutputs = {}
+
+if (argv.debug) {
+ debugKeys.forEach(key => {
+ debugStreams[key] = ndjson.stringify()
+ debugStreamOutputs[key] = debugStreams[key].pipe(fs.createWriteStream(`debug/reduceOverlap/${key}.geojson`))
+ })
+}
// first pass to index by geometry
console.log('First pass to index by geometry')
@@ -194,8 +217,25 @@ pipeline(
console.log(err)
process.exit(1)
} else {
- debugSameGeometry.end()
- process.exit(0)
+ if (argv.debug) {
+ debugKeys.forEach(key => {
+ debugStreams[key].end()
+ })
+
+ Promise.all(debugKeys.map(key => {
+ return new Promise(resolve => {
+ debugStreamOutputs[key].on('finish', () => {
+ console.log(`saved debug/reduceOverlap/${key}.geojson`)
+ resolve()
+ })
+ })
+ }))
+ .then(() => {
+ process.exit(0)
+ })
+ } else {
+ process.exit(0)
+ }
}
}
)
diff --git a/bin/vicmap2osm.js b/bin/vicmap2osm.js
index b252f24..8559e8a 100755
--- a/bin/vicmap2osm.js
+++ b/bin/vicmap2osm.js
@@ -1,10 +1,14 @@
#!/usr/bin/env node
+/**
+ * Convert from Vicmap Address schema into OSM Address schema, and omit some addresses
+ */
+
const fs = require('fs')
const { Transform, pipeline } = require('readable-stream')
const ndjson = require('ndjson')
-const toOSM = require('./toOSM.js')
-const filterOSM = require('./filterOSM.js')
+const toOSM = require('../lib/toOSM.js')
+const filterOSM = require('../lib/filterOSM.js')
const argv = require('yargs/yargs')(process.argv.slice(2))
.option('debug', {