diff options
author | Andrew Harvey <andrew@alantgeo.com.au> | 2021-05-17 23:05:26 +1000 |
---|---|---|
committer | Andrew Harvey <andrew@alantgeo.com.au> | 2021-05-17 23:05:26 +1000 |
commit | b26c8f872c22e32925810ddad083b56d0086cb12 (patch) | |
tree | 9f3797760a4537da5568296ee6ac4226430e76b1 | |
parent | cf35c3e76b9befcfc9073205c663cbba172e5e28 (diff) |
include broken conflation script
-rw-r--r-- | Makefile | 11 | ||||
-rwxr-xr-x | bin/conflate.js | 231 | ||||
-rw-r--r-- | package.json | 1 | ||||
-rw-r--r-- | yarn.lock | 31 |
4 files changed, 272 insertions, 2 deletions
@@ -81,7 +81,7 @@ data/victoria-addr.osm.pbf: data/victoria.osm.pbf osmium tags-filter --output=$@ --overwrite $< addr:housenumber addr:interpolation data/victoria-addr.osm.geojson: data/victoria-addr.osm.pbf - osmium export --config=config/osmium-export-config.json --output-format=geojsonseq --output=$@ --overwrite $< + osmium export --config=config/osmium-export-config.json --output-format=geojsonseq --format-option=print_record_separator=false --output=$@ --overwrite $< data/victoria-addr.osm.fgb: data/victoria-addr.osm.geojson ogr2ogr -f FlatGeobuf -nlt PROMOTE_TO_MULTI -skipfailures -mapFieldType Integer64List=String $@ $< @@ -110,7 +110,7 @@ data/victoria-roads.geojson: data/victoria-roads.osm.pbf osmium export --geometry-types=linestring --output-format=geojsonseq --format-option=print_record_separator=false --output $@ $< data/victoria-boundary.geojson: - npx osm-geojson 2316741 | ogr2ogr -f GeoJSONSeq -explodecollections -nlt MULTILINESTRING $@ /vsistdin/ + npx osm-geojson 2316741 | ogr2ogr -f GeoJSONSeq -explodecollections -nlt MULTILINESTRING $@ /vsistdin/ cat $@ >> data/victoria-roads.geojson # then convert to fgb @@ -125,5 +125,12 @@ data/blocks.fgb: data/victoria-roads.fgb dist/blocksByOSMAddr.fgb: data/victoria-addr.osm.centroids.fgb data/blocks.fgb qgis_process run native:countpointsinpolygon -- POINTS=$< POLYGONS='data/blocks.fgb|layername=blocks' FIELD=NUMPOINTS OUTPUT=$@ +dist/blocksByOSMAddr.geojson: dist/blocksByOSMAddr.fgb + ogr2ogr -f GeoJSONSeq $@ $< + summariseBlocksByOSMAddr: ogrinfo -dialect sqlite -sql 'select count(*), NUMPOINTS = 0 from blocksByOSMAddr group by (NUMPOINTS = 0)' data/blocksByOSMAddr.fgb + +# conflate processed vicmap data with osm data +conflate: dist/vicmap-osm-uniq-flats.geojson data/victoria-addr.osm.geojson dist/blocksByOSMAddr.geojson + ./bin/conflate.js $^ $@ diff --git a/bin/conflate.js b/bin/conflate.js new file mode 100755 index 0000000..ff736cd --- /dev/null +++ b/bin/conflate.js @@ -0,0 +1,231 @@ +#!/usr/bin/env node + +/** + * Prepare import candidates by conflating with existing addresses in OSM + */ + +const fs = require('fs') +const { Transform, pipeline } = require('stream') +const ndjson = require('ndjson') +const PolygonLookup = require('polygon-lookup') + +const argv = require('yargs/yargs')(process.argv.slice(2)) + .option('debug', { + type: 'boolean', + description: 'Dumps full debug logs' + }) + .argv + +if (argv._.length < 4) { + console.error("Usage: ./conflate.js vicmap.geojson osm.geojson blocksByOSMAddr.geojson output.geojson") + process.exit(1) +} + +const vicmapFile = argv._[0] +const osmFile = argv._[1] +const blocksByOSMAddrFile = argv._[2] +const outputFile = argv._[3] + +if (!fs.existsSync(vicmapFile)) { + console.error(`${vicmapFile} not found`) + process.exit(1) +} +if (!fs.existsSync(osmFile)) { + console.error(`${osmFile} not found`) + process.exit(1) +} +if (!fs.existsSync(blocksByOSMAddrFile)) { + console.error(`${blocksByOSMAddrFile} not found`) + process.exit(1) +} + +const blocksByOSMAddr = fs.readFileSync(blocksByOSMAddrFile, 'utf-8').toString().split('\n') + .filter(line => line !== '') + .map((line, index) => { + try { + const feature = JSON.parse(line) + feature.id = index + 1 + // console.log(feature) + return feature + } catch { + console.log(`Error parsing line ${index} of ${blocksByOSMAddrFile}: ${line}`) + } + }) + +console.log('Creating index for Blocks by OSM Address lookup') +const lookupBlocks = new PolygonLookup({ + type: 'FeatureCollection', + features: blocksByOSMAddr +}) +let lookupOSMAddressPoly +const osmAddrPoly = [] +const osmAddrLines = [] // address interpolation lines +// indexed by block +const osmAddrPoints = { + 0: [] // this one is for any points not within a block +} + +// find OSM Addresses and store them +// polygons go into a simple array, which later we create a point in polygon index for +// points and lines a simple object index by block id +let osmAddrCount = 0 +const filterOSMAddrPoly = new Transform({ + readableObjectMode: true, + writableObjectMode: true, + transform(feature, encoding, callback) { + osmAddrCount++ + + console.log(feature) + if (process.stdout.isTTY && osmAddrCount % 10000 === 0) { + process.stdout.write(` ${osmAddrCount / 1000}k\r`) + } + + if (feature && feature.geometry && feature.geometry.type) { + if (feature.geometry.type === 'Polygon' || feature.geometry.type === 'MultiPolygon') { + osmAddrPoly.push(feature) + } else if (feature.geometry.type === 'Point') { + const results = lookupBlocks.search(...feature.geometry.coordinates.slice(0, 2), 1) + const block = results ? (results.type === 'FeatureCollection' ? (results.features ? results.features[0] : null) : results) : null + if (block) { + if (!(block.id in osmAddrPoints)) { + osmAddrPoints[block.id] = [] + } + osmAddrPoints[block.id].push(feature) + } else { + // not found within a block + osmAddrPoints[0].push(feature) + } + } else if (feature.geometry.type === 'LineString') { + // TODO also index by block, but could be a few blocks + osmAddrLines.push(feature) + } else { + console.log(`Unsupported geometry type ${feature.geometry.type} for ${feature.properties['@type']}/${feature.properties['@id']}`) + } + } + + // pass through for further processing + this.push(feature) + + callback() + } +}) + +// conflate vicmap addresses with OSM addresses +const conflate = new Transform({ + readableObjectMode: true, + writableObjectMode: true, + transform(feature, encoding, callback) { + sourceCount++ + + if (process.stdout.isTTY && sourceCount % 10000 === 0) { + process.stdout.write(` ${sourceCount / 1000}k\r`) + } + + const results = lookupBlocks.search(...feature.geometry.coordinates.slice(0, 2), 1) + const block = results ? (results.type === 'FeatureCollection' ? (results.features ? results.features[0] : null) : results) : null + if (block) { + if (block.properties.NUMPOINTS === 0) { + // no OSM addresses found within this block, so able to import without review + outputStreams.noOSMAddressWithinBlock.write(feature) + } else { + // other OSM addresses found within this block, so need to conflate + const results = lookupOSMAddressPoly.search(...feature.geometry.coordinates.slice(0, 2), 1) + const osmPoly = results ? (results.type === 'FeatureCollection' ? (results.features ? results.features[0] : null) : results) : null + if (osmPoly) { + // address found within an existing OSM address polygon + feature.properties._osmtype = osmPoly.properties['@type'] + feature.properties._osmid = osmPoly.properties['@id'] + outputStreams.withinExistingOSMAddressPoly.write(feature) + } else { + // address not found within an existing OSM address polygon + + // see if any address with the same number and street in the same block + if (block.id in osmAddrPoints) { + const osmAddrWithinBlock = osmAddrPoints[block.id] + const matches = osmAddrWithinBlock.filter(osmAddr => { + return (feature.properties['addr:street'] === osmAddr.properties['addr:street'] && + feature.properties['addr:housenumber'] === osmAddr.properties['addr:housenumber'] ) + }) + if (matches.length) { + // matching number and street, high confidence + outputStreams.exactMatch.write(feature) + } else { + // no exact match, probably can import + outputStreams.noExactMatch.write(feature) + } + } else { + // block id not found in osmAddrPoints, meaning there are no osmAddress points in this block, + // however in this case NUMPOINTS should have been 0 + console.log(`Block ID not found when expected`) + } + } + } + } else { + // address not found within blocksByOSMAddr, probably within coastal zone, manually review + outputStreams.notFoundInBlocks.write(feature) + } + + callback() + } +}) + +// ndjson streams to output debug features +const outputKeys = ['notFoundInBlocks', 'noExactMatch', 'exactMatch', 'withinExistingOSMAddressPoly'] +const outputStreams = {} +const outputStreamOutputs = {} + +outputKeys.forEach(key => { + outputStreams[key] = ndjson.stringify() + outputStreamOutputs[key] = outputStreams[key].pipe(fs.createWriteStream(`debug/conflate/${key}.geojson`)) +}) + +// first pass to index by geometry +console.log('First find OSM addresses represented as areas and store them in memory') +pipeline( + fs.createReadStream(osmFile), + ndjson.parse(), + filterOSMAddrPoly, + err => { + if (err) { + console.log(err) + process.exit(1) + } else { + console.log(` of ${osmAddrCount} OSM address features found ${osmAddrPoly.length} addresses represented as polygons, ${osmAddrLines.length} addresses represented as lines`) + console.log('Creating index for OSM Address Polygon lookup') + lookupOSMAddressPoly = new PolygonLookup({ + type: 'FeatureCollection', + features: osmAddrPoly + }) + // second pass to conflate with existing OSM data + pipeline( + fs.createReadStream(vicmapFile), + ndjson.parse(), + conflate, + //ndjson.stringify(), + //fs.createWriteStream(outputFile), + err => { + if (err) { + console.log(err) + process.exit(1) + } else { + outputKeys.forEach(key => { + outputStreams[key].end() + }) + + Promise.all(outputKeys.map(key => { + return new Promise(resolve => { + outputStreamOutputs[key].on('finish', () => { + console.log(`saved debug/conflate/${key}.geojson`) + resolve() + }) + }) + })) + .then(() => { + process.exit(0) + }) + } + } + ) + } + } +) diff --git a/package.json b/package.json index f13575e..5b13800 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,7 @@ "geoflatbush": "^1.0.0", "mktemp": "^1.0.0", "ndjson": "^2.0.0", + "polygon-lookup": "^2.6.0", "readable-stream": "^3.6.0", "tape": "^5.2.2", "yargs": "^17.0.0" @@ -417,6 +417,11 @@ kind-of@^6.0.2: resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-6.0.3.tgz#07c05034a6c349fa06e24fa35aa76db4580ce4dd" integrity sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw== +lodash@^4.14.2: + version "4.17.21" + resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" + integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg== + lower-case@^2.0.2: version "2.0.2" resolved "https://registry.yarnpkg.com/lower-case/-/lower-case-2.0.2.tgz#6fa237c63dbdc4a82ca0fd882e4722dc5e634e28" @@ -505,6 +510,32 @@ path-parse@^1.0.6: resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.6.tgz#d62dbb5679405d72c4737ec58600e9ddcf06d24c" integrity sha512-GSmOT2EbHrINBf9SR7CDELwlJ8AENk3Qn7OikK4nFYAu3Ote2+JYNVvkpAEQm3/TLNEJFD/xZJjzyxg3KBWOzw== +point-in-polygon@1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/point-in-polygon/-/point-in-polygon-1.0.1.tgz#d59b64e8fee41c49458aac82b56718c5957b2af7" + integrity sha1-1Ztk6P7kHElFiqyCtWcYxZV7Kvc= + +polygon-lookup@^2.6.0: + version "2.6.0" + resolved "https://registry.yarnpkg.com/polygon-lookup/-/polygon-lookup-2.6.0.tgz#ff4145fde3a7076ac1fa488edad56d2aa5086adb" + integrity sha512-Y76ioJomg19RGMYbeUmfgcX7An+0vHK/89nEUmt+1pgVY6WgtW12/jnSlw8D4907kWEi3rhQC11aXUc1qZDN4g== + dependencies: + lodash "^4.14.2" + point-in-polygon "1.0.1" + rbush "^2.0.2" + +quickselect@^1.0.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/quickselect/-/quickselect-1.1.1.tgz#852e412ce418f237ad5b660d70cffac647ae94c2" + integrity sha512-qN0Gqdw4c4KGPsBOQafj6yj/PA6c/L63f6CaZ/DCF/xF4Esu3jVmKLUDYxghFx8Kb/O7y9tI7x2RjTSXwdK1iQ== + +rbush@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/rbush/-/rbush-2.0.2.tgz#bb6005c2731b7ba1d5a9a035772927d16a614605" + integrity sha512-XBOuALcTm+O/H8G90b6pzu6nX6v2zCKiFG4BJho8a+bY6AER6t8uQUZdi5bomQc0AprCWhEGa7ncAbbRap0bRA== + dependencies: + quickselect "^1.0.1" + readable-stream@3, readable-stream@^3.0.0, readable-stream@^3.6.0: version "3.6.0" resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.0.tgz#337bbda3adc0706bd3e024426a286d4b4b2c9198" |