aboutsummaryrefslogtreecommitdiff
path: root/bin
diff options
context:
space:
mode:
authorAndrew Harvey <andrew@alantgeo.com.au>2021-05-27 14:41:06 +1000
committerAndrew Harvey <andrew@alantgeo.com.au>2021-05-27 14:41:06 +1000
commit6fc8dc4617916d57427a85478b4448e0684e3798 (patch)
treee922d537d7f4c436e34998024c1966ac81f9c04f /bin
parentb4a605492dbad1eb95db93221deefbf02e3069de (diff)
starting to sketch out candidates stage
Diffstat (limited to 'bin')
-rwxr-xr-xbin/candidates.js197
1 files changed, 197 insertions, 0 deletions
diff --git a/bin/candidates.js b/bin/candidates.js
new file mode 100755
index 0000000..e350f6f
--- /dev/null
+++ b/bin/candidates.js
@@ -0,0 +1,197 @@
+#!/usr/bin/env node
+
+/**
+ * Prepare import candidates by conflation category and suburb as OSM XML
+ */
+
+const fs = require('fs')
+const path = require('path')
+const { Transform, pipeline } = require('stream')
+const ndjson = require('ndjson')
+const PolygonLookup = require('polygon-lookup')
+const geojsontoosm = require('geojsontoosm')
+
+const argv = require('yargs/yargs')(process.argv.slice(2))
+ .option('verbose', {
+ type: 'boolean',
+ description: 'Verbose logging'
+ })
+ .argv
+
+if (argv._.length < 3) {
+ console.error("Usage: ./conflate.js data/victoria-admin-level10.osm.geojson dist/conflate dist/candidates")
+ process.exit(1)
+}
+
+const suburbsFile = argv._[0]
+const conflatePath = argv._[1]
+const outputPath = argv._[2]
+
+if (!fs.existsSync(suburbsFile)) {
+ console.error(`${suburbsFile} not found`)
+ process.exit(1)
+}
+
+if (!fs.existsSync(conflatePath)) {
+ console.error(`${conflatePath} not found`)
+ process.exit(1)
+}
+
+// output GeoJSON Features by layer by suburb ID
+const outputFeatures = {
+ 'newAddressWithoutConflicts': {},
+ 'addrUnitFromHousenumber': {}
+}
+
+for (const layer of Object.keys(outputFeatures)) {
+ const layerPath = path.join(outputPath, layer)
+ if (!fs.existsSync(layerPath)) {
+ fs.mkdirSync(layerPath)
+ }
+}
+
+// suburb GeoJSON Features
+const suburbs = []
+
+// suburb ID to name
+const suburbName = {
+ 0: 'VIC'
+}
+
+// suburb point in polygon index
+let suburbLookup
+
+const outsideVicSuburb = {
+ type: 'Feature',
+ id: 0,
+ properties: {
+ name: 'VIC'
+ },
+ geometry: null
+}
+
+// index suburbs
+let suburbCount = 0
+const readSuburbs = new Transform({
+ readableObjectMode: true,
+ writableObjectMode: true,
+ transform(suburb, encoding, callback) {
+ suburbCount++
+
+ if (process.stdout.isTTY && suburbCount % 1000 === 0) {
+ process.stdout.write(` ${suburbCount.toLocaleString()}\r`)
+ }
+
+ if (!('id' in suburb)) {
+ console.log('Suburb missing id', suburb)
+ process.exit(1)
+ }
+
+ for (const layer of Object.keys(outputFeatures)) {
+ outputFeatures[layer][suburb.id] = []
+ }
+
+ suburbName[suburb.id] = suburb.properties.name
+
+ suburbs.push(suburb)
+
+ callback()
+ }
+})
+
+// produce import candidates
+let sourceCount = 0
+const candidates = new Transform({
+ readableObjectMode: true,
+ writableObjectMode: true,
+ transform(feature, encoding, callback) {
+ sourceCount++
+
+ if (process.stdout.isTTY && sourceCount % 1000 === 0) {
+ process.stdout.write(` ${sourceCount.toLocaleString()}\r`)
+ }
+
+ // find which suburb this address is in
+ const results = lookupSuburbs.search(...feature.geometry.coordinates.slice(0, 2), 1)
+ const suburb = results ? (results.type === 'FeatureCollection' ? (results.features ? results.features[0] : outsideVicSuburb) : results[0]) : outsideVicSuburb
+
+ outputFeatures['newAddressWithoutConflicts'][suburb.id].push(feature)
+
+ callback()
+ }
+})
+
+/**
+ * Save our candidate address data as .osm files by layer by suburb
+ */
+function outputCandidates() {
+ let i = 0
+ for (const layer of Object.keys(outputFeatures)) {
+ i++
+ let j = 0
+ for (const suburbId of Object.keys(outputFeatures[layer])) {
+ j++
+ if (process.stdout.isTTY && i % 10 === 0) {
+ process.stdout.write(` ${j.toLocaleString()}/${Object.keys(outputFeatures).length.toLocaleString()} - ${layer} - ${i.toLocaleString()}/${suburbs.length.toLocaleString()}\r`)
+ }
+
+ const suburbFeatures = outputFeatures[layer][suburbId]
+ if (suburbFeatures && suburbFeatures.length) {
+ const xml = geojsontoosm(suburbFeatures)
+ fs.writeFileSync(path.join(outputPath, layer, `${suburbId}_${suburbName[suburbId]}.osm`), xml)
+ } // else no data for this suburb
+ }
+ }
+}
+
+// first pass to index by geometry
+console.log('Step 1/X: Reading suburbs')
+pipeline(
+ fs.createReadStream(suburbsFile),
+ ndjson.parse(),
+ readSuburbs,
+ err => {
+ if (err) {
+ console.log(err)
+ process.exit(1)
+ } else {
+ console.log('Step 2/X: Creating index of Suburbs')
+ lookupSuburbs = new PolygonLookup({
+ type: 'FeatureCollection',
+ features: suburbs
+ })
+
+ console.log('Step 3/X: noOSMAddressWithinBlock')
+ pipeline(
+ fs.createReadStream(path.join(conflatePath, 'noOSMAddressWithinBlock.geojson')),
+ ndjson.parse(),
+ candidates,
+ err => {
+ if (err) {
+ console.log(err)
+ process.exit(1)
+ } else {
+
+ console.log('Step 4/X: noExactMatch')
+ pipeline(
+ fs.createReadStream(path.join(conflatePath, 'noExactMatch.geojson')),
+ ndjson.parse(),
+ candidates,
+ err => {
+ if (err) {
+ console.log(err)
+ process.exit(1)
+ } else {
+ console.log('Output candidate .osm files')
+ outputCandidates()
+ process.exit(0)
+ }
+ }
+ )
+ }
+ }
+ )
+
+ }
+ }
+)