diff options
-rwxr-xr-x | bin/reduceDuplicates.js | 91 | ||||
-rwxr-xr-x | bin/reduceOverlap.js | 124 | ||||
-rwxr-xr-x | bin/vicmap2osm.js | 8 | ||||
-rw-r--r-- | lib/filterOSM.js | 3 | ||||
-rw-r--r-- | lib/toOSM.js | 25 | ||||
-rw-r--r-- | package.json | 1 | ||||
-rw-r--r-- | yarn.lock | 33 |
7 files changed, 211 insertions, 74 deletions
diff --git a/bin/reduceDuplicates.js b/bin/reduceDuplicates.js index 542b43f..abd5810 100755 --- a/bin/reduceDuplicates.js +++ b/bin/reduceDuplicates.js @@ -8,6 +8,7 @@ const fs = require('fs') const { Readable, Transform, pipeline } = require('stream') const ndjson = require('ndjson') const cluster = require('../lib/cluster.js') +const cloneDeep = require('clone-deep') const argv = require('yargs/yargs')(process.argv.slice(2)) .option('debug', { @@ -32,6 +33,7 @@ if (!fs.existsSync(inputFile)) { let sourceCount = 0 const features = {} +// index features by properties const index = new Transform({ readableObjectMode: true, writableObjectMode: true, @@ -43,6 +45,7 @@ const index = new Transform({ } const key = [ + feature.properties['addr:unit:prefix'], feature.properties['addr:unit'], feature.properties['addr:housenumber'], feature.properties['addr:street'], @@ -60,6 +63,7 @@ const index = new Transform({ } }) +// remove duplicates let reduceIndex = 0 const reduce = new Transform({ readableObjectMode: true, @@ -80,24 +84,55 @@ const reduce = new Transform({ const sameCoordinates = [...new Set(groupedFeatures.map(f => f.geometry.coordinates.join(',')))].length <= 1 if (sameCoordinates) { - // features have same properties and same geometry, so true duplicates can reduce to one + // features have same properties and same geometry, so they are true duplicates which can safely be reduced to one this.push(groupedFeatures[0]) } else { + // features have same properties but not all with the same geometry + // cluster features with a threshold of 25m const clusters = cluster(groupedFeatures, 25) // if clustered into a single cluster, then output a single average feature + // this should be safe to use as within 25m if (clusters.length === 1) { const averageCoordinates = [ groupedFeatures.map(f => f.geometry.coordinates[0]).reduce((acc, cur) => acc + cur) / groupedFeatures.length, groupedFeatures.map(f => f.geometry.coordinates[1]).reduce((acc, cur) => acc + cur) / groupedFeatures.length ] - const averageFeature = groupedFeatures[0] + const averageFeature = cloneDeep(groupedFeatures[0]) averageFeature.geometry.coordinates = averageCoordinates + if (argv.debug) { + // create a spider web to illustrate which features were clustered together and where the average point is + const spiderWebCoordinates = [] + + debugStreams.singleCluster.write(averageFeature) + groupedFeatures.forEach(feature => { + // debugStreams.singleCluster.write(feature) + + // start with the average point + spiderWebCoordinates.push(averageFeature.geometry.coordinates) + // go out to the source point + spiderWebCoordinates.push(feature.geometry.coordinates) + // end back at the average point + spiderWebCoordinates.push(averageFeature.geometry.coordinates) + }) + + // output a web connecting the source points for visualisation + debugStreams.singleCluster.write({ + type: 'Feature', + properties: Object.assign({ '_type': 'Single Cluster' }, averageFeature.properties), + geometry: { + type: 'LineString', + coordinates: spiderWebCoordinates + } + }) + } + this.push(averageFeature) } else { - // more than one cluster, reduce those clustered into one, and then report all the results + // more than one cluster, reduce those clustered into centroids, and then report all the centroids + // these will need to be manually reviewed const clusterAverages = clusters.map(cluster => { if (cluster.length === 1) { return cluster[0] @@ -106,23 +141,28 @@ const reduce = new Transform({ cluster.map(f => f.geometry.coordinates[0]).reduce((acc, cur) => acc + cur) / cluster.length, cluster.map(f => f.geometry.coordinates[1]).reduce((acc, cur) => acc + cur) / cluster.length ] - const averageFeature = cluster[0] + const averageFeature = cloneDeep(cluster[0]) averageFeature.geometry.coordinates = averageCoordinates return averageFeature } }) - // report these as address points with the same attributes but different locations beyond the threshold - if (debugDuplicateAddressStream) { + // report these as address points with the same attributes but different locations beyond the cluster threshold + if (argv.debug) { const webOfMatches = { type: 'Feature', - properties: clusterAverages[0].properties, + properties: Object.assign({ '_type': 'Multi Cluster' }, clusterAverages[0].properties), geometry: { type: 'LineString', coordinates: clusterAverages.map(p => p.geometry.coordinates) } } - debugDuplicateAddressStream.write(webOfMatches) + clusterAverages.forEach(feature => { + // output candidate feature + debugStreams.multiCluster.write(feature) + }) + // output a web connecting the canidates for visualisation + debugStreams.multiCluster.write(webOfMatches) } } } @@ -132,11 +172,16 @@ const reduce = new Transform({ } }) -const debugDuplicateAddressStream = argv.debug ? ndjson.stringify() : null +// ndjson streams to output debug features +const debugKeys = ['singleCluster', 'multiCluster'] +const debugStreams = {} +const debugStreamOutputs = {} -let debugApplicationsAddressStreamOutput -if (debugDuplicateAddressStream) { - debugApplicationsAddressStreamOutput = debugDuplicateAddressStream.pipe(fs.createWriteStream('debug/reduceDuplicates/duplicateAddresses.geojson')) +if (argv.debug) { + debugKeys.forEach(key => { + debugStreams[key] = ndjson.stringify() + debugStreamOutputs[key] = debugStreams[key].pipe(fs.createWriteStream(`debug/reduceDuplicates/${key}.geojson`)) + }) } // first pass to index by geometry @@ -162,14 +207,22 @@ pipeline( console.log(err) process.exit(1) } else { - if (debugDuplicateAddressStream) { - debugDuplicateAddressStream.end() - } - if (debugApplicationsAddressStreamOutput) { - debugApplicationsAddressStreamOutput.on('finish', () => { - console.log('saved debug/reduceDuplicates/duplicateAddresses.geojson') - process.exit(0) + if (argv.debug) { + debugKeys.forEach(key => { + debugStreams[key].end() }) + + Promise.all(debugKeys.map(key => { + return new Promise(resolve => { + debugStreamOutputs[key].on('finish', () => { + console.log(`saved debug/reduceDuplicates/${key}.geojson`) + resolve() + }) + }) + })) + .then(() => { + process.exit(0) + }) } else { process.exit(0) } diff --git a/bin/reduceOverlap.js b/bin/reduceOverlap.js index d74c49c..d302cbb 100755 --- a/bin/reduceOverlap.js +++ b/bin/reduceOverlap.js @@ -3,9 +3,14 @@ const fs = require('fs') const { Readable, Transform, pipeline } = require('stream') const ndjson = require('ndjson') -const util = require('util') +const cloneDeep = require('clone-deep') -const argv = require('yargs/yargs')(process.argv.slice(2)).argv +const argv = require('yargs/yargs')(process.argv.slice(2)) + .option('debug', { + type: 'boolean', + description: 'Dumps full debug logs' + }) + .argv if (argv._.length < 2) { console.error("Usage: ./reduceOverlap.js input.geojson output.geojson") @@ -87,50 +92,62 @@ const reduce = new Transform({ if (sameHousenumber && sameStreet && sameSuburb && sameState && samePostcode) { if (hasNonUnit) { + // all have same housenumber, street, suburb, state, postcode and there is a non-unit feature const nonUnitFeatures = groupedFeatures.filter(f => (!('addr:unit' in f.properties))) if (nonUnitFeatures.length > 1) { // multiple non-unit features, unsure how to reduce - console.log('multiple non-unit features, unsure how to reduce') - console.dir(groupedFeatures, {depth: null}) + // TODO should these still be output to be picked up by ranges + if (argv.debug) { + groupedFeatures.forEach(feature => { + debugStreams.multipleNonUnit.write(feature) + }) + } } else { - const nonUnitFeature = nonUnitFeatures[0] + // a single non-unit feature exists + const nonUnitFeature = cloneDeep(nonUnitFeatures[0]) - // place all the other addr:unit into addr:flats + // place all the other addr:unit into addr:flats on the non-unit feature const allOtherUnits = groupedFeatures.filter(f => 'addr:unit' in f.properties).map(f => f.properties['addr:unit']) // if allOtherUnits.length is one then that means we have one address without a unit and one with a unit at the same point - // TODO should we just drop the non-unit address and keep the addr:unit one? - // need to determine if you always have a non-unit address for the unit address, if there is then - // perhaps we can safely drop the non-unit address and use a single addr:unit - - // adapted from https://stackoverflow.com/a/54973116/6702659 - const sortedAllOtherUnitsAsRanges = allOtherUnits - .slice() - .sort((a, b) => a - b) - .reduce((acc, cur, idx, src) => { - if ((idx > 0) && ((cur - src[idx - 1]) === 1)) { - acc[acc.length - 1][1] = cur - } else { - acc.push([cur]) - } - return acc - }, []) - .map(range => range.join('-')) - - nonUnitFeature.properties['addr:flats'] = sortedAllOtherUnitsAsRanges.join(';') - this.push(nonUnitFeature) + // in this case we just drop the non-unit address and keep the addr:unit one + if (allOtherUnits.length === 1) { + if (argv.debug) { + groupedFeatures.forEach(feature => { + debugStreams.oneUnitOneNonUnit.write(feature) + }) + } + this.push(allOtherUnits[0]) + } else { + // adapted from https://stackoverflow.com/a/54973116/6702659 + const sortedAllOtherUnitsAsRanges = allOtherUnits + .slice() + .sort((a, b) => a - b) + .reduce((acc, cur, idx, src) => { + if ((idx > 0) && ((cur - src[idx - 1]) === 1)) { + acc[acc.length - 1][1] = cur + } else { + acc.push([cur]) + } + return acc + }, []) + .map(range => range.join('-')) + + nonUnitFeature.properties['addr:flats'] = sortedAllOtherUnitsAsRanges.join(';') + this.push(nonUnitFeature) + } } } else { - // all have same housenumber, street, suburb, state, postcode but no non-unit + // all have same housenumber, street, suburb, state, postcode but no non-unit, ie. all with different unit values // combine all the addr:unit into addr:flats and then drop addr:unit const units = groupedFeatures.filter(f => 'addr:unit' in f.properties).map(f => f.properties['addr:unit']) - // TODO assert units.length > 1 if (units.length <= 1) { - // console.log(`all have same housenumber, street, suburb, state, postcode but no non-unit, but only found ${units.length} units`, units) + console.log(`all have same housenumber, street, suburb, state, postcode with no non-unit, but only found ${units.length} units`, units) + process.exit(1) } - const feature = groupedFeatures[0] + const feature = cloneDeep(groupedFeatures[0]) delete feature.properties['addr:unit'] // adapted from https://stackoverflow.com/a/54973116/6702659 @@ -152,13 +169,13 @@ const reduce = new Transform({ } } else { // addresses with the same geometry, however more than unit differs - // TODO need to investigate to see what we can/shoud do about these - for (let i = 0; i < groupedFeatures.length; i++) { - this.push(groupedFeatures[i]) - if (debugSameGeometry) { - debugSameGeometry.write(groupedFeatures[i]) + // TODO need to investigate to see what we can/should do about these + groupedFeatures.forEach(feature => { + this.push(feature) + if (argv.debug) { + debugStreams.sameGeometry.write(feature) } - } + }) } } @@ -166,10 +183,16 @@ const reduce = new Transform({ } }) -const debugSameGeometry = argv.debug ? - ndjson.stringify() - .pipe(fs.createWriteStream('debug/reduceOverlap/sameGeometry.geojson')) - : null +const debugKeys = ['multipleNonUnit', 'oneUnitOneNonUnit', 'sameGeometry'] +const debugStreams = {} +const debugStreamOutputs = {} + +if (argv.debug) { + debugKeys.forEach(key => { + debugStreams[key] = ndjson.stringify() + debugStreamOutputs[key] = debugStreams[key].pipe(fs.createWriteStream(`debug/reduceOverlap/${key}.geojson`)) + }) +} // first pass to index by geometry console.log('First pass to index by geometry') @@ -194,8 +217,25 @@ pipeline( console.log(err) process.exit(1) } else { - debugSameGeometry.end() - process.exit(0) + if (argv.debug) { + debugKeys.forEach(key => { + debugStreams[key].end() + }) + + Promise.all(debugKeys.map(key => { + return new Promise(resolve => { + debugStreamOutputs[key].on('finish', () => { + console.log(`saved debug/reduceOverlap/${key}.geojson`) + resolve() + }) + }) + })) + .then(() => { + process.exit(0) + }) + } else { + process.exit(0) + } } } ) diff --git a/bin/vicmap2osm.js b/bin/vicmap2osm.js index b252f24..8559e8a 100755 --- a/bin/vicmap2osm.js +++ b/bin/vicmap2osm.js @@ -1,10 +1,14 @@ #!/usr/bin/env node +/** + * Convert from Vicmap Address schema into OSM Address schema, and omit some addresses + */ + const fs = require('fs') const { Transform, pipeline } = require('readable-stream') const ndjson = require('ndjson') -const toOSM = require('./toOSM.js') -const filterOSM = require('./filterOSM.js') +const toOSM = require('../lib/toOSM.js') +const filterOSM = require('../lib/filterOSM.js') const argv = require('yargs/yargs')(process.argv.slice(2)) .option('debug', { diff --git a/lib/filterOSM.js b/lib/filterOSM.js index f7f22be..0a22b8b 100644 --- a/lib/filterOSM.js +++ b/lib/filterOSM.js @@ -1,11 +1,10 @@ module.exports = (feature, options) => { // skip any addresses without a housenumber - // eg PFI 53396626 has no housenumber if ( !('addr:housenumber' in feature.properties) ) { - if (argv.debug) { + if (options && options.debug) { console.log(`PFI ${feature.properties._pfi} has no addr:housenumber, filtering`) } return false diff --git a/lib/toOSM.js b/lib/toOSM.js index 95c387f..afa8057 100644 --- a/lib/toOSM.js +++ b/lib/toOSM.js @@ -80,8 +80,7 @@ module.exports = (sourceFeature, options) => { outputProperties['_pfi'] = sourceProperties.PFI } - // Building sub address type (eg UNIT OFFICE SHOP) - // + // Building unit // bld_unit_* const bld_unit_1 = [ sourceProperties.BUNIT_PRE1, @@ -110,15 +109,22 @@ module.exports = (sourceFeature, options) => { } if (bld_unit) { - outputProperties['addr:unit'] = bld_unit - } + // building unit type (Unit, Shop, Suite...) + // only included if a unit value is set + if (sourceProperties.BLGUNTTYP) { + if (sourceProperties.BLGUNTTYP in buildingUnitType) { + outputProperties['addr:unit:prefix'] = capitalCase(buildingUnitType[sourceProperties.BLGUNTTYP]) + } else { + if (options && options.debug) { + console.log(`Building Unity Type ${sourceProperties.BLGUNTTYP} not recognised for ${sourceFeature}`) + } + } + } - /* - if (sourceProperties.BLGUNTTYP && sourceProperties.BLGUNTTYP in buildingUnitType) { - outputProperties['addr:unit:type'] = buildingUnitType[sourceProperties.BLGUNTTYP] + outputProperties['addr:unit'] = bld_unit } - */ + // house number // house_* const house_1 = [ sourceProperties.HSE_PREF1, @@ -145,7 +151,8 @@ module.exports = (sourceFeature, options) => { outputProperties['addr:housenumber'] = housenumber } - // display numbers used predominately in the City of Melbourne CBD by large properties. Primarily to simplify an assigned number range. + // display numbers used predominately in the City of Melbourne CBD by large properties. + // Primarily to simplify an assigned number range. // so should map the assigned address or the signposted address? // every record has at least ROAD_NAME populated diff --git a/package.json b/package.json index 09d67f4..13dc6ab 100644 --- a/package.json +++ b/package.json @@ -11,6 +11,7 @@ "dependencies": { "capital-case": "^1.0.4", "cheap-ruler": "^3.0.1", + "clone-deep": "^4.0.1", "flatbush": "^3.3.0", "geoflatbush": "^1.0.0", "ndjson": "^2.0.0", @@ -70,6 +70,15 @@ cliui@^7.0.2: strip-ansi "^6.0.0" wrap-ansi "^7.0.0" +clone-deep@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/clone-deep/-/clone-deep-4.0.1.tgz#c19fd9bdbbf85942b4fd979c84dcf7d5f07c2387" + integrity sha512-neHB9xuzh/wk0dIHweyAXv2aPGZIVk3pLMe+/RNzINf17fe0OG96QroktYAUm7SM1PBnzTabaLboqqxDyMU+SQ== + dependencies: + is-plain-object "^2.0.4" + kind-of "^6.0.2" + shallow-clone "^3.0.0" + color-convert@^2.0.1: version "2.0.1" resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-2.0.1.tgz#72d3a68d598c9bdb3af2ad1e84f21d896abd4de3" @@ -335,6 +344,13 @@ is-number-object@^1.0.4: resolved "https://registry.yarnpkg.com/is-number-object/-/is-number-object-1.0.4.tgz#36ac95e741cf18b283fc1ddf5e83da798e3ec197" integrity sha512-zohwelOAur+5uXtk8O3GPQ1eAcu4ZX3UwxQhUlfFFMNpUd83gXgjbhJh6HmB6LUNV/ieOLQuDwJO3dWJosUeMw== +is-plain-object@^2.0.4: + version "2.0.4" + resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-2.0.4.tgz#2c163b3fafb1b606d9d17928f05c2a1c38e07677" + integrity sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og== + dependencies: + isobject "^3.0.1" + is-regex@^1.1.1, is-regex@^1.1.2: version "1.1.2" resolved "https://registry.yarnpkg.com/is-regex/-/is-regex-1.1.2.tgz#81c8ebde4db142f2cf1c53fc86d6a45788266251" @@ -386,11 +402,21 @@ isarray@^2.0.5: resolved "https://registry.yarnpkg.com/isarray/-/isarray-2.0.5.tgz#8af1e4c1221244cc62459faf38940d4e644a5723" integrity sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw== +isobject@^3.0.1: + version "3.0.1" + resolved "https://registry.yarnpkg.com/isobject/-/isobject-3.0.1.tgz#4e431e92b11a9731636aa1f9c8d1ccbcfdab78df" + integrity sha1-TkMekrEalzFjaqH5yNHMvP2reN8= + json-stringify-safe@^5.0.1: version "5.0.1" resolved "https://registry.yarnpkg.com/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz#1296a2d58fd45f19a0f6ce01d65701e2c735b6eb" integrity sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus= +kind-of@^6.0.2: + version "6.0.3" + resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-6.0.3.tgz#07c05034a6c349fa06e24fa35aa76db4580ce4dd" + integrity sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw== + lower-case@^2.0.2: version "2.0.2" resolved "https://registry.yarnpkg.com/lower-case/-/lower-case-2.0.2.tgz#6fa237c63dbdc4a82ca0fd882e4722dc5e634e28" @@ -516,6 +542,13 @@ safe-buffer@~5.2.0: resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6" integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ== +shallow-clone@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/shallow-clone/-/shallow-clone-3.0.1.tgz#8f2981ad92531f55035b01fb230769a40e02efa3" + integrity sha512-/6KqX+GVUdqPuPPd2LxDDxzX6CAbjJehAAOKlNpqqUpAqPM6HeL8f+o3a+JsyGjn2lv0WY8UsTgUJjU9Ok55NA== + dependencies: + kind-of "^6.0.2" + side-channel@^1.0.3: version "1.0.4" resolved "https://registry.yarnpkg.com/side-channel/-/side-channel-1.0.4.tgz#efce5c8fdc104ee751b25c58d4290011fa5ea2cf" |