diff options
author | Andrew Harvey <andrew@alantgeo.com.au> | 2021-05-27 14:41:06 +1000 |
---|---|---|
committer | Andrew Harvey <andrew@alantgeo.com.au> | 2021-05-27 14:41:06 +1000 |
commit | 6fc8dc4617916d57427a85478b4448e0684e3798 (patch) | |
tree | e922d537d7f4c436e34998024c1966ac81f9c04f /bin | |
parent | b4a605492dbad1eb95db93221deefbf02e3069de (diff) |
starting to sketch out candidates stage
Diffstat (limited to 'bin')
-rwxr-xr-x | bin/candidates.js | 197 |
1 files changed, 197 insertions, 0 deletions
diff --git a/bin/candidates.js b/bin/candidates.js new file mode 100755 index 0000000..e350f6f --- /dev/null +++ b/bin/candidates.js @@ -0,0 +1,197 @@ +#!/usr/bin/env node + +/** + * Prepare import candidates by conflation category and suburb as OSM XML + */ + +const fs = require('fs') +const path = require('path') +const { Transform, pipeline } = require('stream') +const ndjson = require('ndjson') +const PolygonLookup = require('polygon-lookup') +const geojsontoosm = require('geojsontoosm') + +const argv = require('yargs/yargs')(process.argv.slice(2)) + .option('verbose', { + type: 'boolean', + description: 'Verbose logging' + }) + .argv + +if (argv._.length < 3) { + console.error("Usage: ./conflate.js data/victoria-admin-level10.osm.geojson dist/conflate dist/candidates") + process.exit(1) +} + +const suburbsFile = argv._[0] +const conflatePath = argv._[1] +const outputPath = argv._[2] + +if (!fs.existsSync(suburbsFile)) { + console.error(`${suburbsFile} not found`) + process.exit(1) +} + +if (!fs.existsSync(conflatePath)) { + console.error(`${conflatePath} not found`) + process.exit(1) +} + +// output GeoJSON Features by layer by suburb ID +const outputFeatures = { + 'newAddressWithoutConflicts': {}, + 'addrUnitFromHousenumber': {} +} + +for (const layer of Object.keys(outputFeatures)) { + const layerPath = path.join(outputPath, layer) + if (!fs.existsSync(layerPath)) { + fs.mkdirSync(layerPath) + } +} + +// suburb GeoJSON Features +const suburbs = [] + +// suburb ID to name +const suburbName = { + 0: 'VIC' +} + +// suburb point in polygon index +let suburbLookup + +const outsideVicSuburb = { + type: 'Feature', + id: 0, + properties: { + name: 'VIC' + }, + geometry: null +} + +// index suburbs +let suburbCount = 0 +const readSuburbs = new Transform({ + readableObjectMode: true, + writableObjectMode: true, + transform(suburb, encoding, callback) { + suburbCount++ + + if (process.stdout.isTTY && suburbCount % 1000 === 0) { + process.stdout.write(` ${suburbCount.toLocaleString()}\r`) + } + + if (!('id' in suburb)) { + console.log('Suburb missing id', suburb) + process.exit(1) + } + + for (const layer of Object.keys(outputFeatures)) { + outputFeatures[layer][suburb.id] = [] + } + + suburbName[suburb.id] = suburb.properties.name + + suburbs.push(suburb) + + callback() + } +}) + +// produce import candidates +let sourceCount = 0 +const candidates = new Transform({ + readableObjectMode: true, + writableObjectMode: true, + transform(feature, encoding, callback) { + sourceCount++ + + if (process.stdout.isTTY && sourceCount % 1000 === 0) { + process.stdout.write(` ${sourceCount.toLocaleString()}\r`) + } + + // find which suburb this address is in + const results = lookupSuburbs.search(...feature.geometry.coordinates.slice(0, 2), 1) + const suburb = results ? (results.type === 'FeatureCollection' ? (results.features ? results.features[0] : outsideVicSuburb) : results[0]) : outsideVicSuburb + + outputFeatures['newAddressWithoutConflicts'][suburb.id].push(feature) + + callback() + } +}) + +/** + * Save our candidate address data as .osm files by layer by suburb + */ +function outputCandidates() { + let i = 0 + for (const layer of Object.keys(outputFeatures)) { + i++ + let j = 0 + for (const suburbId of Object.keys(outputFeatures[layer])) { + j++ + if (process.stdout.isTTY && i % 10 === 0) { + process.stdout.write(` ${j.toLocaleString()}/${Object.keys(outputFeatures).length.toLocaleString()} - ${layer} - ${i.toLocaleString()}/${suburbs.length.toLocaleString()}\r`) + } + + const suburbFeatures = outputFeatures[layer][suburbId] + if (suburbFeatures && suburbFeatures.length) { + const xml = geojsontoosm(suburbFeatures) + fs.writeFileSync(path.join(outputPath, layer, `${suburbId}_${suburbName[suburbId]}.osm`), xml) + } // else no data for this suburb + } + } +} + +// first pass to index by geometry +console.log('Step 1/X: Reading suburbs') +pipeline( + fs.createReadStream(suburbsFile), + ndjson.parse(), + readSuburbs, + err => { + if (err) { + console.log(err) + process.exit(1) + } else { + console.log('Step 2/X: Creating index of Suburbs') + lookupSuburbs = new PolygonLookup({ + type: 'FeatureCollection', + features: suburbs + }) + + console.log('Step 3/X: noOSMAddressWithinBlock') + pipeline( + fs.createReadStream(path.join(conflatePath, 'noOSMAddressWithinBlock.geojson')), + ndjson.parse(), + candidates, + err => { + if (err) { + console.log(err) + process.exit(1) + } else { + + console.log('Step 4/X: noExactMatch') + pipeline( + fs.createReadStream(path.join(conflatePath, 'noExactMatch.geojson')), + ndjson.parse(), + candidates, + err => { + if (err) { + console.log(err) + process.exit(1) + } else { + console.log('Output candidate .osm files') + outputCandidates() + process.exit(0) + } + } + ) + } + } + ) + + } + } +) |