aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Harvey <andrew@alantgeo.com.au>2021-05-17 23:05:26 +1000
committerAndrew Harvey <andrew@alantgeo.com.au>2021-05-17 23:05:26 +1000
commitb26c8f872c22e32925810ddad083b56d0086cb12 (patch)
tree9f3797760a4537da5568296ee6ac4226430e76b1
parentcf35c3e76b9befcfc9073205c663cbba172e5e28 (diff)
include broken conflation script
-rw-r--r--Makefile11
-rwxr-xr-xbin/conflate.js231
-rw-r--r--package.json1
-rw-r--r--yarn.lock31
4 files changed, 272 insertions, 2 deletions
diff --git a/Makefile b/Makefile
index 67e6fa2..23a19e7 100644
--- a/Makefile
+++ b/Makefile
@@ -81,7 +81,7 @@ data/victoria-addr.osm.pbf: data/victoria.osm.pbf
osmium tags-filter --output=$@ --overwrite $< addr:housenumber addr:interpolation
data/victoria-addr.osm.geojson: data/victoria-addr.osm.pbf
- osmium export --config=config/osmium-export-config.json --output-format=geojsonseq --output=$@ --overwrite $<
+ osmium export --config=config/osmium-export-config.json --output-format=geojsonseq --format-option=print_record_separator=false --output=$@ --overwrite $<
data/victoria-addr.osm.fgb: data/victoria-addr.osm.geojson
ogr2ogr -f FlatGeobuf -nlt PROMOTE_TO_MULTI -skipfailures -mapFieldType Integer64List=String $@ $<
@@ -110,7 +110,7 @@ data/victoria-roads.geojson: data/victoria-roads.osm.pbf
osmium export --geometry-types=linestring --output-format=geojsonseq --format-option=print_record_separator=false --output $@ $<
data/victoria-boundary.geojson:
- npx osm-geojson 2316741 | ogr2ogr -f GeoJSONSeq -explodecollections -nlt MULTILINESTRING $@ /vsistdin/
+ npx osm-geojson 2316741 | ogr2ogr -f GeoJSONSeq -explodecollections -nlt MULTILINESTRING $@ /vsistdin/
cat $@ >> data/victoria-roads.geojson
# then convert to fgb
@@ -125,5 +125,12 @@ data/blocks.fgb: data/victoria-roads.fgb
dist/blocksByOSMAddr.fgb: data/victoria-addr.osm.centroids.fgb data/blocks.fgb
qgis_process run native:countpointsinpolygon -- POINTS=$< POLYGONS='data/blocks.fgb|layername=blocks' FIELD=NUMPOINTS OUTPUT=$@
+dist/blocksByOSMAddr.geojson: dist/blocksByOSMAddr.fgb
+ ogr2ogr -f GeoJSONSeq $@ $<
+
summariseBlocksByOSMAddr:
ogrinfo -dialect sqlite -sql 'select count(*), NUMPOINTS = 0 from blocksByOSMAddr group by (NUMPOINTS = 0)' data/blocksByOSMAddr.fgb
+
+# conflate processed vicmap data with osm data
+conflate: dist/vicmap-osm-uniq-flats.geojson data/victoria-addr.osm.geojson dist/blocksByOSMAddr.geojson
+ ./bin/conflate.js $^ $@
diff --git a/bin/conflate.js b/bin/conflate.js
new file mode 100755
index 0000000..ff736cd
--- /dev/null
+++ b/bin/conflate.js
@@ -0,0 +1,231 @@
+#!/usr/bin/env node
+
+/**
+ * Prepare import candidates by conflating with existing addresses in OSM
+ */
+
+const fs = require('fs')
+const { Transform, pipeline } = require('stream')
+const ndjson = require('ndjson')
+const PolygonLookup = require('polygon-lookup')
+
+const argv = require('yargs/yargs')(process.argv.slice(2))
+ .option('debug', {
+ type: 'boolean',
+ description: 'Dumps full debug logs'
+ })
+ .argv
+
+if (argv._.length < 4) {
+ console.error("Usage: ./conflate.js vicmap.geojson osm.geojson blocksByOSMAddr.geojson output.geojson")
+ process.exit(1)
+}
+
+const vicmapFile = argv._[0]
+const osmFile = argv._[1]
+const blocksByOSMAddrFile = argv._[2]
+const outputFile = argv._[3]
+
+if (!fs.existsSync(vicmapFile)) {
+ console.error(`${vicmapFile} not found`)
+ process.exit(1)
+}
+if (!fs.existsSync(osmFile)) {
+ console.error(`${osmFile} not found`)
+ process.exit(1)
+}
+if (!fs.existsSync(blocksByOSMAddrFile)) {
+ console.error(`${blocksByOSMAddrFile} not found`)
+ process.exit(1)
+}
+
+const blocksByOSMAddr = fs.readFileSync(blocksByOSMAddrFile, 'utf-8').toString().split('\n')
+ .filter(line => line !== '')
+ .map((line, index) => {
+ try {
+ const feature = JSON.parse(line)
+ feature.id = index + 1
+ // console.log(feature)
+ return feature
+ } catch {
+ console.log(`Error parsing line ${index} of ${blocksByOSMAddrFile}: ${line}`)
+ }
+ })
+
+console.log('Creating index for Blocks by OSM Address lookup')
+const lookupBlocks = new PolygonLookup({
+ type: 'FeatureCollection',
+ features: blocksByOSMAddr
+})
+let lookupOSMAddressPoly
+const osmAddrPoly = []
+const osmAddrLines = [] // address interpolation lines
+// indexed by block
+const osmAddrPoints = {
+ 0: [] // this one is for any points not within a block
+}
+
+// find OSM Addresses and store them
+// polygons go into a simple array, which later we create a point in polygon index for
+// points and lines a simple object index by block id
+let osmAddrCount = 0
+const filterOSMAddrPoly = new Transform({
+ readableObjectMode: true,
+ writableObjectMode: true,
+ transform(feature, encoding, callback) {
+ osmAddrCount++
+
+ console.log(feature)
+ if (process.stdout.isTTY && osmAddrCount % 10000 === 0) {
+ process.stdout.write(` ${osmAddrCount / 1000}k\r`)
+ }
+
+ if (feature && feature.geometry && feature.geometry.type) {
+ if (feature.geometry.type === 'Polygon' || feature.geometry.type === 'MultiPolygon') {
+ osmAddrPoly.push(feature)
+ } else if (feature.geometry.type === 'Point') {
+ const results = lookupBlocks.search(...feature.geometry.coordinates.slice(0, 2), 1)
+ const block = results ? (results.type === 'FeatureCollection' ? (results.features ? results.features[0] : null) : results) : null
+ if (block) {
+ if (!(block.id in osmAddrPoints)) {
+ osmAddrPoints[block.id] = []
+ }
+ osmAddrPoints[block.id].push(feature)
+ } else {
+ // not found within a block
+ osmAddrPoints[0].push(feature)
+ }
+ } else if (feature.geometry.type === 'LineString') {
+ // TODO also index by block, but could be a few blocks
+ osmAddrLines.push(feature)
+ } else {
+ console.log(`Unsupported geometry type ${feature.geometry.type} for ${feature.properties['@type']}/${feature.properties['@id']}`)
+ }
+ }
+
+ // pass through for further processing
+ this.push(feature)
+
+ callback()
+ }
+})
+
+// conflate vicmap addresses with OSM addresses
+const conflate = new Transform({
+ readableObjectMode: true,
+ writableObjectMode: true,
+ transform(feature, encoding, callback) {
+ sourceCount++
+
+ if (process.stdout.isTTY && sourceCount % 10000 === 0) {
+ process.stdout.write(` ${sourceCount / 1000}k\r`)
+ }
+
+ const results = lookupBlocks.search(...feature.geometry.coordinates.slice(0, 2), 1)
+ const block = results ? (results.type === 'FeatureCollection' ? (results.features ? results.features[0] : null) : results) : null
+ if (block) {
+ if (block.properties.NUMPOINTS === 0) {
+ // no OSM addresses found within this block, so able to import without review
+ outputStreams.noOSMAddressWithinBlock.write(feature)
+ } else {
+ // other OSM addresses found within this block, so need to conflate
+ const results = lookupOSMAddressPoly.search(...feature.geometry.coordinates.slice(0, 2), 1)
+ const osmPoly = results ? (results.type === 'FeatureCollection' ? (results.features ? results.features[0] : null) : results) : null
+ if (osmPoly) {
+ // address found within an existing OSM address polygon
+ feature.properties._osmtype = osmPoly.properties['@type']
+ feature.properties._osmid = osmPoly.properties['@id']
+ outputStreams.withinExistingOSMAddressPoly.write(feature)
+ } else {
+ // address not found within an existing OSM address polygon
+
+ // see if any address with the same number and street in the same block
+ if (block.id in osmAddrPoints) {
+ const osmAddrWithinBlock = osmAddrPoints[block.id]
+ const matches = osmAddrWithinBlock.filter(osmAddr => {
+ return (feature.properties['addr:street'] === osmAddr.properties['addr:street'] &&
+ feature.properties['addr:housenumber'] === osmAddr.properties['addr:housenumber'] )
+ })
+ if (matches.length) {
+ // matching number and street, high confidence
+ outputStreams.exactMatch.write(feature)
+ } else {
+ // no exact match, probably can import
+ outputStreams.noExactMatch.write(feature)
+ }
+ } else {
+ // block id not found in osmAddrPoints, meaning there are no osmAddress points in this block,
+ // however in this case NUMPOINTS should have been 0
+ console.log(`Block ID not found when expected`)
+ }
+ }
+ }
+ } else {
+ // address not found within blocksByOSMAddr, probably within coastal zone, manually review
+ outputStreams.notFoundInBlocks.write(feature)
+ }
+
+ callback()
+ }
+})
+
+// ndjson streams to output debug features
+const outputKeys = ['notFoundInBlocks', 'noExactMatch', 'exactMatch', 'withinExistingOSMAddressPoly']
+const outputStreams = {}
+const outputStreamOutputs = {}
+
+outputKeys.forEach(key => {
+ outputStreams[key] = ndjson.stringify()
+ outputStreamOutputs[key] = outputStreams[key].pipe(fs.createWriteStream(`debug/conflate/${key}.geojson`))
+})
+
+// first pass to index by geometry
+console.log('First find OSM addresses represented as areas and store them in memory')
+pipeline(
+ fs.createReadStream(osmFile),
+ ndjson.parse(),
+ filterOSMAddrPoly,
+ err => {
+ if (err) {
+ console.log(err)
+ process.exit(1)
+ } else {
+ console.log(` of ${osmAddrCount} OSM address features found ${osmAddrPoly.length} addresses represented as polygons, ${osmAddrLines.length} addresses represented as lines`)
+ console.log('Creating index for OSM Address Polygon lookup')
+ lookupOSMAddressPoly = new PolygonLookup({
+ type: 'FeatureCollection',
+ features: osmAddrPoly
+ })
+ // second pass to conflate with existing OSM data
+ pipeline(
+ fs.createReadStream(vicmapFile),
+ ndjson.parse(),
+ conflate,
+ //ndjson.stringify(),
+ //fs.createWriteStream(outputFile),
+ err => {
+ if (err) {
+ console.log(err)
+ process.exit(1)
+ } else {
+ outputKeys.forEach(key => {
+ outputStreams[key].end()
+ })
+
+ Promise.all(outputKeys.map(key => {
+ return new Promise(resolve => {
+ outputStreamOutputs[key].on('finish', () => {
+ console.log(`saved debug/conflate/${key}.geojson`)
+ resolve()
+ })
+ })
+ }))
+ .then(() => {
+ process.exit(0)
+ })
+ }
+ }
+ )
+ }
+ }
+)
diff --git a/package.json b/package.json
index f13575e..5b13800 100644
--- a/package.json
+++ b/package.json
@@ -16,6 +16,7 @@
"geoflatbush": "^1.0.0",
"mktemp": "^1.0.0",
"ndjson": "^2.0.0",
+ "polygon-lookup": "^2.6.0",
"readable-stream": "^3.6.0",
"tape": "^5.2.2",
"yargs": "^17.0.0"
diff --git a/yarn.lock b/yarn.lock
index d29620d..463657a 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -417,6 +417,11 @@ kind-of@^6.0.2:
resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-6.0.3.tgz#07c05034a6c349fa06e24fa35aa76db4580ce4dd"
integrity sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw==
+lodash@^4.14.2:
+ version "4.17.21"
+ resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c"
+ integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==
+
lower-case@^2.0.2:
version "2.0.2"
resolved "https://registry.yarnpkg.com/lower-case/-/lower-case-2.0.2.tgz#6fa237c63dbdc4a82ca0fd882e4722dc5e634e28"
@@ -505,6 +510,32 @@ path-parse@^1.0.6:
resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.6.tgz#d62dbb5679405d72c4737ec58600e9ddcf06d24c"
integrity sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw==
+point-in-polygon@1.0.1:
+ version "1.0.1"
+ resolved "https://registry.yarnpkg.com/point-in-polygon/-/point-in-polygon-1.0.1.tgz#d59b64e8fee41c49458aac82b56718c5957b2af7"
+ integrity sha1-1Ztk6P7kHElFiqyCtWcYxZV7Kvc=
+
+polygon-lookup@^2.6.0:
+ version "2.6.0"
+ resolved "https://registry.yarnpkg.com/polygon-lookup/-/polygon-lookup-2.6.0.tgz#ff4145fde3a7076ac1fa488edad56d2aa5086adb"
+ integrity sha512-Y76ioJomg19RGMYbeUmfgcX7An+0vHK/89nEUmt+1pgVY6WgtW12/jnSlw8D4907kWEi3rhQC11aXUc1qZDN4g==
+ dependencies:
+ lodash "^4.14.2"
+ point-in-polygon "1.0.1"
+ rbush "^2.0.2"
+
+quickselect@^1.0.1:
+ version "1.1.1"
+ resolved "https://registry.yarnpkg.com/quickselect/-/quickselect-1.1.1.tgz#852e412ce418f237ad5b660d70cffac647ae94c2"
+ integrity sha512-qN0Gqdw4c4KGPsBOQafj6yj/PA6c/L63f6CaZ/DCF/xF4Esu3jVmKLUDYxghFx8Kb/O7y9tI7x2RjTSXwdK1iQ==
+
+rbush@^2.0.2:
+ version "2.0.2"
+ resolved "https://registry.yarnpkg.com/rbush/-/rbush-2.0.2.tgz#bb6005c2731b7ba1d5a9a035772927d16a614605"
+ integrity sha512-XBOuALcTm+O/H8G90b6pzu6nX6v2zCKiFG4BJho8a+bY6AER6t8uQUZdi5bomQc0AprCWhEGa7ncAbbRap0bRA==
+ dependencies:
+ quickselect "^1.0.1"
+
readable-stream@3, readable-stream@^3.0.0, readable-stream@^3.6.0:
version "3.6.0"
resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.0.tgz#337bbda3adc0706bd3e024426a286d4b4b2c9198"