diff options
-rw-r--r-- | .gitlab-ci.yml | 3 | ||||
-rw-r--r-- | Makefile | 10 | ||||
-rw-r--r-- | README.md | 18 | ||||
-rwxr-xr-x | bin/complex.js | 241 | ||||
-rwxr-xr-x | bin/vicmap2osm.js | 40 | ||||
-rw-r--r-- | config/osmium-export-config-names.json | 17 | ||||
-rw-r--r-- | package.json | 2 | ||||
-rw-r--r-- | yarn.lock | 51 |
8 files changed, 380 insertions, 2 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f9bf3a8..92460ed 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -8,6 +8,7 @@ cache: &global_cache - data/vicmap.geojson - dist/vicmap-osm-uniq-flats.geojson - data/victoria-addr.osm.geojson + - data/victoria-named-features.osm.geojson - dist/blocksByOSMAddr.geojson - data/victoria-admin-level10.osm.geojson @@ -152,6 +153,7 @@ build osm: - yarn install - cp src/polygon-lookup-patch.js node_modules/polygon-lookup/index.js - mkdir -p dist data + - make data/victoria-named-features.osm.geojson - make dist/blocksByOSMAddr.fgb - make dist/blocksByOSMAddr.geojson - make data/victoria-admin-level10.osm.geojson @@ -174,6 +176,7 @@ conflate: script: - yarn install - cp src/polygon-lookup-patch.js node_modules/polygon-lookup/index.js + - make dist/vicmap-complex-site - make dist/conflate - wc -l dist/conflate/*.geojson when: manual @@ -101,6 +101,12 @@ data/victoria-addr.osm.fgb: data/victoria-addr.osm.geojson data/victoria-addr.osm.centroids.fgb: data/victoria-addr.osm.fgb qgis_process run native:centroids -- INPUT='$<|layername=victoria-addr.osm|option:VERIFY_BUFFERS=NO' OUTPUT=$@ +data/victoria-named-features.osm.pbf: data/victoria.osm.pbf + osmium tags-filter --output=$@ --overwrite $< name + +data/victoria-named-features.osm.geojson: data/victoria-named-features.osm.pbf + osmium export --config=config/osmium-export-config-names.json --output-format=geojsonseq --format-option=print_record_separator=false --output=$@ --overwrite $< + data/asgs.zip: wget -O $@ 'https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&1270055001_ASGS_2016_vol_1_geopackage.zip&1270.0.55.001&Data%20Cubes&C406A18CE1A6A50ACA257FED00145B1D&0&July%202016&12.07.2016&Latest' @@ -209,3 +215,7 @@ printDifferentSuburbs: dist/vicmapSuburbDiffersWithOSM.geojson dist/candidates: data/victoria-admin-level10.osm.geojson dist/conflate mkdir -p $@ ./bin/candidates.js $^ $@ + +dist/vicmap-complex-site: dist/vicmap-complex.geojson data/victoria-named-features.osm.geojson + mkdirp -p $@ + ./bin/complex.js $^ $@ @@ -164,7 +164,23 @@ For these reasons this building / property name is not included, however it coul ### Complex Name Source data sometimes includes a complex name, for example _CHADSTONE SHOPPING CENTRE_ or _MELBOURNE UNIVERSITY_. These attributes are not used as these names should appear on the actual feature like `shop=mall` or `amenity=university`. -They might be of interest for mappers as an additional data source, externally to this import. +While there is ~55,000 Vicmap points with a complex name, there are only ~2000 uniq names. + +`bin/vicmap2osm.js` outputs `dist/vicmap-complex.geojson` which contains all the complex name features. + +The script at `bin/complex.js` processes this to: + +- Group nearby complex features with the same name into a single centroid point +- Tests to see weather this complex name is matching a nearby OSM object +- Where it doesn't find a match in OSM, then it outputs a MapRoulette data file for mappers to review and potentially add these complex names to OSM. + +This outputs a bunch of files into `dist/vicmap-complex-site` including, three MapRoulette challanges: + +- `mr_singleNearbySimilarFeature` - the Vicmap complex matched a single nearby OSM feature (but the name wasn't an exact match, where it was an exact match the Vicmap complex is not flagged for inclusion in MapRoulette) +- `mr_multipleNearbySimilarFeatures` - the Vicmap complex matched multiple nearby OSM features +- `mr_noNearbySimilarFeature` - the Vicmap complex didn't match any nearby OSM features + +This is built into the _conflate_ stage. ### Display Address Source data has a display address which can differ from the official address. For example if a building is `1-3` but is signed as simply `1`. Currently we ignore the display address, and while this can be seen as more correct based on the "official" address, does it go against the OSM principle of mapping what's on the ground? diff --git a/bin/complex.js b/bin/complex.js new file mode 100755 index 0000000..6c83b6b --- /dev/null +++ b/bin/complex.js @@ -0,0 +1,241 @@ +#!/usr/bin/env node + +/** + * Take Vicmap address points which have a complex value, and group these into sites + */ + +const fs = require('fs') +const { Transform, pipeline } = require('readable-stream') +const ndjson = require('ndjson') +const convex = require('@turf/convex').default +const featureCollection = require('@turf/helpers').featureCollection +const point = require('@turf/helpers').point +const multiPoint = require('@turf/helpers').multiPoint +const geometryCollection = require('@turf/helpers').geometryCollection +const { capitalCase } = require('capital-case') +const Flatbush = require('flatbush') +const bbox = require('@turf/bbox').default +const { around } = require('geoflatbush') +const { lcs } = require('string-comparison') + +const argv = require('yargs/yargs')(process.argv.slice(2)) + .argv + +if (argv._.length < 3) { + console.error("Usage: ./complex.js vicmap-complex.geojson victoria-named-features.osm.geojson vicmap-complex-site") + process.exit(1) +} + +const inputFile = argv._[0] +const osmFile = argv._[1] +const outputPath = argv._[2] + +if (!fs.existsSync(inputFile)) { + console.error(`${inputFile} not found`) + process.exit(1) +} + +console.log('Reading OSM data') +const osmFeatures = fs.readFileSync(osmFile, 'utf-8').toString().split('\n') + .filter(line => line !== '') + .map((line, index, array) => { + if (process.stdout.isTTY && index % 1000 === 0) { + process.stdout.write(` ${index.toLocaleString()}/${array.length.toLocaleString()} (${Math.round(index / array.length * 100)}%)\r`) + } + + try { + const feature = JSON.parse(line) + feature.properties.id = index + return feature + } catch { + console.log(`Error parsing line ${index} of ${osmFile}: ${line}`) + } + }) + +console.log('Creating index for nearby OSM search') +const osmIndex = new Flatbush(osmFeatures.length) +for (const osmFeature of osmFeatures) { + osmIndex.add(...bbox(osmFeature)) +} +osmIndex.finish() + +// ndjson streams to output features +const outputKeys = [ + // contains multipoint, centroid and hull in one feature + 'geometryCollection', + + // because some software (cough cough QGIS) can't handle GeometryCollections, output each geometry type as it's own file + 'multiPoint', + 'centroid', + 'hull', + + // MapRoulette challenges + 'mr_singleNearbySimilarFeature', + 'mr_multipleNearbySimilarFeatures', + 'mr_noNearbySimilarFeature' +] +const outputStreams = {} +const outputStreamOutputs = {} + +outputKeys.forEach(key => { + outputStreams[key] = ndjson.stringify() + outputStreamOutputs[key] = outputStreams[key].pipe(fs.createWriteStream(`${outputPath}/${key}.geojson`)) +}) + +const complexes = {} + +let sourceCount = 0 +const group = new Transform({ + readableObjectMode: true, + writableObjectMode: true, + transform(feature, encoding, callback) { + sourceCount++ + + if (!argv.quiet) { + if (process.stdout.isTTY && sourceCount % 10000 === 0) { + process.stdout.write(` ${sourceCount.toLocaleString()}\r`) + } + } + + const name = feature.properties.name + if (!(name in complexes)) { + complexes[name] = [] + } + + complexes[name].push(feature) + + callback() + } +}) + +console.log('Stage 1/2 reading Vicmap complex points into groups') +pipeline( + fs.createReadStream(inputFile), + ndjson.parse(), + group, + (err) => { + if (err) { + console.log(err) + process.exit(1) + } else { + + console.log('Stage 2/2 saving features per complex') + // output complexes as a geometry collection feature with a hull and multipoint + let complexIndex = 0 + for (const [name, complex] of Object.entries(complexes)) { + complexIndex++ + if (process.stdout.isTTY && complexIndex % 50 === 0) { + process.stdout.write(` ${complexIndex.toLocaleString()}/${Object.keys(complexes).length.toLocaleString()} (${Math.round(complexIndex / Object.keys(complexes).length * 100)}%)\r`) + } + + const properties = { + name: capitalCase(name) + } + + const points = multiPoint(complex.map(feature => feature.geometry.coordinates), properties) + const hull = convex(featureCollection(complex), { properties }) + const centroid = point( + points.geometry.coordinates + .reduce((acc, cur) => { + return [ + acc[0] + cur[0], + acc[1] + cur[1] + ] + }, [0, 0]) + .map(v => v / points.geometry.coordinates.length) + , properties) + + outputStreams.multiPoint.write(points) + outputStreams.centroid.write(centroid) + outputStreams.hull.write(hull ? hull : point(complex[0].geometry.coordinates, properties)) + + // GeometryCollection feature of MultiPoints plus either the convex hull polygon or single point if the hull is just a single point + const feature = geometryCollection([points.geometry, hull ? hull.geometry : complex[0].geometry], properties) + + outputStreams.geometryCollection.write(feature) + + // find nearby matching OSM feature + const maxDistanceInKm = 1 + const nearby = around(osmIndex, ...centroid.geometry.coordinates, Infinity, maxDistanceInKm) + const nearbyMatches = nearby.filter(i => { + const similarity = lcs.similarity(osmFeatures[i].properties.name.toLowerCase(), name.toLowerCase()) + return similarity > 0.7 + }) + const nearbyMatchedFeatures = nearbyMatches.map(i => osmFeatures[i]) + + if (nearbyMatches.length) { + console.log(name) + console.log(' > ', nearbyMatches.map(i => osmFeatures[i].properties.name)) + } + if (nearbyMatches.length === 1) { + // a single nearby OSM features found with similar name + if (nearbyMatchedFeatures[0].properties.name.toLowerCase === name.toLowerCase()) { + // name exactly matched + console.log(`Exact match: ${properties.name} = ${nearbyMatchedFeatures[0].properties.name}`) + } else { + // name was similar but not an exact match + // create a MapRoulette task to investigate further + const task = { + type: 'FeatureCollection', + features: [ + ...complex.map(feature => { + feature.properties['marker-color'] = 'orange' + feature.properties['marker-color'] = 'small' + return feature + }), + point(centroid.geometry.coordinates, Object.assign({}, centroid.properties, { + 'marker-color': 'orange', + 'marker-size': 'large', + 'OSM Name': nearbyMatchedFeatures[0].properties.name + })), + ...nearbyMatchedFeatures + ] + } + outputStreams.mr_singleNearbySimilarFeature.write(task) + } + } else if (nearbyMatches.length > 1) { + // multiple nearby OSM features found with similar name, create a MapRoulette task to investigate further + const task = { + type: 'FeatureCollection', + features: [ + ...complex.map(feature => { + feature.properties['marker-color'] = 'orange' + feature.properties['marker-color'] = 'small' + return feature + }), + point(centroid.geometry.coordinates, Object.assign({}, centroid.properties, { + 'marker-color': 'orange', + 'marker-size': 'large' + })), + ...nearbyMatchedFeatures + ] + } + outputStreams.mr_multipleNearbySimilarFeatures.write(task) + } else { + // no nearby OSM feature found with similar name, so create a MapRoulette task + const task = { + type: 'FeatureCollection', + features: [ centroid ] + } + outputStreams.mr_noNearbySimilarFeature.write(task) + } + } + + outputKeys.forEach(key => { + outputStreams[key].end() + }) + + Promise.all(outputKeys.map(key => { + return new Promise(resolve => { + outputStreamOutputs[key].on('finish', () => { + console.log(`saved ${outputPath}/${key}.geojson`) + resolve() + }) + }) + })) + .then(() => { + process.exit(0) + }) + } + } +) diff --git a/bin/vicmap2osm.js b/bin/vicmap2osm.js index 597043e..a45bc4e 100755 --- a/bin/vicmap2osm.js +++ b/bin/vicmap2osm.js @@ -40,6 +40,14 @@ if (!fs.existsSync(inputFile)) { process.exit(1) } +// output Vicmap complex name data +const complexStream = ndjson.stringify() +const complexStreamOutput = complexStream.pipe(fs.createWriteStream(`dist/vicmap-complex.geojson`)) + +// output Vicmap building name data +const buildingStream = ndjson.stringify() +const buildingStreamOutput = buildingStream.pipe(fs.createWriteStream(`dist/vicmap-building.geojson`)) + let sourceCount = 0 const transform = new Transform({ readableObjectMode: true, @@ -53,6 +61,17 @@ const transform = new Transform({ } } + if (feature.properties.COMPLEX) { + const complexFeature = { + type: 'Feature', + properties: { + name: feature.properties.COMPLEX + }, + geometry: feature.geometry + } + complexStream.write(complexFeature) + } + // convert source Feature into a Feature per the OSM schema const osm = toOSM(feature, { tracing: argv.tracing, @@ -60,6 +79,17 @@ const transform = new Transform({ includeDerivableProperties: argv.preserveDerivableProperties }) + if (feature.properties.BUILDING) { + const buildingFeature = { + type: 'Feature', + properties: Object.assign({}, osm.properties, { + name: feature.properties.BUILDING + }), + geometry: osm.geometry + } + buildingStream.write(buildingFeature) + } + // some addresses we skip importing into OSM, see README.md#omitted-addresses if (filterOSM(osm) && filterSource(feature)) { this.push(osm) @@ -81,7 +111,15 @@ pipeline( console.log(err) process.exit(1) } else { - process.exit(0) + complexStream.end() + buildingStream.end() + complexStreamOutput.on('finish', () => { + console.log(`saved dist/vicmap-complex.geojson`) + buildingStreamOutput.on('finish', () => { + console.log(`saved dist/vicmap-building.geojson`) + process.exit(0) + }) + }) } } ) diff --git a/config/osmium-export-config-names.json b/config/osmium-export-config-names.json new file mode 100644 index 0000000..4e8d82e --- /dev/null +++ b/config/osmium-export-config-names.json @@ -0,0 +1,17 @@ +{ + "attributes": { + "type": true, + "id": true, + "version": false, + "changeset": false, + "timestamp": false, + "uid": false, + "user": false, + "way_nodes": true + }, + "format_options": { + }, + "linear_tags": true, + "area_tags": true, + "include_tags": ["name"] +} diff --git a/package.json b/package.json index bd34415..85c91ea 100644 --- a/package.json +++ b/package.json @@ -12,6 +12,7 @@ "@turf/bbox": "^6.3.0", "@turf/boolean-intersects": "^6.3.0", "@turf/centroid": "^6.3.0", + "@turf/convex": "^6.3.0", "@turf/helpers": "^6.3.0", "capital-case": "^1.0.4", "cheap-ruler": "^3.0.1", @@ -25,6 +26,7 @@ "osm-geojson": "^0.8.4", "polygon-lookup": "^2.6.0", "readable-stream": "^3.6.0", + "string-comparison": "^1.0.9", "tape": "^5.2.2", "yargs": "^17.0.1" }, @@ -46,6 +46,15 @@ "@turf/helpers" "^6.3.0" "@turf/meta" "^6.3.0" +"@turf/convex@^6.3.0": + version "6.3.0" + resolved "https://registry.yarnpkg.com/@turf/convex/-/convex-6.3.0.tgz#d3eb866cf6863c075c85039edc89db7c595eee44" + integrity sha512-YpiLKRu1suwbI/knCOd7Fg7LojV6Beonu8gQjCoaPdkBEz0/W3XqNpfWQhcqp+XR10a2g4RK5mi6bUUejToFBw== + dependencies: + "@turf/helpers" "^6.3.0" + "@turf/meta" "^6.3.0" + concaveman "*" + "@turf/helpers@6.x", "@turf/helpers@^6.3.0": version "6.3.0" resolved "https://registry.yarnpkg.com/@turf/helpers/-/helpers-6.3.0.tgz#87f90f806c3f8ad6385ef8d2041d3662bf3c9fb1" @@ -266,6 +275,16 @@ concat-stream@^1.5.0: readable-stream "^2.2.2" typedarray "^0.0.6" +concaveman@*: + version "1.2.0" + resolved "https://registry.yarnpkg.com/concaveman/-/concaveman-1.2.0.tgz#4340f27c08a11bdc1d5fac13476862a2ab09b703" + integrity sha512-OcqechF2/kubbffomKqjGEkb0ndlYhEbmyg/fxIGqdfYp5AZjD2Kl5hc97Hh3ngEuHU2314Z4KDbxL7qXGWrQQ== + dependencies: + point-in-polygon "^1.0.1" + rbush "^3.0.0" + robust-predicates "^2.0.4" + tinyqueue "^2.0.3" + core-util-is@1.0.2, core-util-is@~1.0.0: version "1.0.2" resolved "https://registry.yarnpkg.com/core-util-is/-/core-util-is-1.0.2.tgz#b5fd54220aa2bc5ab57aab7140c940754503c1a7" @@ -922,6 +941,11 @@ point-in-polygon@1.0.1: resolved "https://registry.yarnpkg.com/point-in-polygon/-/point-in-polygon-1.0.1.tgz#d59b64e8fee41c49458aac82b56718c5957b2af7" integrity sha1-1Ztk6P7kHElFiqyCtWcYxZV7Kvc= +point-in-polygon@^1.0.1: + version "1.1.0" + resolved "https://registry.yarnpkg.com/point-in-polygon/-/point-in-polygon-1.1.0.tgz#b0af2616c01bdee341cbf2894df643387ca03357" + integrity sha512-3ojrFwjnnw8Q9242TzgXuTD+eKiutbzyslcq1ydfu82Db2y+Ogbmyrkpv0Hgj31qwT3lbS9+QAAO/pIQM35XRw== + polygon-lookup@^2.6.0: version "2.6.0" resolved "https://registry.yarnpkg.com/polygon-lookup/-/polygon-lookup-2.6.0.tgz#ff4145fde3a7076ac1fa488edad56d2aa5086adb" @@ -956,6 +980,11 @@ quickselect@^1.0.1: resolved "https://registry.yarnpkg.com/quickselect/-/quickselect-1.1.1.tgz#852e412ce418f237ad5b660d70cffac647ae94c2" integrity sha512-qN0Gqdw4c4KGPsBOQafj6yj/PA6c/L63f6CaZ/DCF/xF4Esu3jVmKLUDYxghFx8Kb/O7y9tI7x2RjTSXwdK1iQ== +quickselect@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/quickselect/-/quickselect-2.0.0.tgz#f19680a486a5eefb581303e023e98faaf25dd018" + integrity sha512-RKJ22hX8mHe3Y6wH/N3wCM6BWtjaxIyyUIkpHOvfFnxdI4yD4tBXEBKSbriGujF6jnSVkJrffuo6vxACiSSxIw== + random-int@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/random-int/-/random-int-1.0.0.tgz#e6a2ed3448ac9c6646a0657443b1c1521592ed08" @@ -968,6 +997,13 @@ rbush@^2.0.0, rbush@^2.0.2: dependencies: quickselect "^1.0.1" +rbush@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/rbush/-/rbush-3.0.1.tgz#5fafa8a79b3b9afdfe5008403a720cc1de882ecf" + integrity sha512-XRaVO0YecOpEuIvbhbpTrZgoiI6xBlz6hnlr6EHhd+0x9ase6EmeN+hdwwUaJvLcsFFQ8iWVF1GAK1yB0BWi0w== + dependencies: + quickselect "^2.0.0" + readable-stream@3, readable-stream@^3.0.0, readable-stream@^3.6.0: version "3.6.0" resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.0.tgz#337bbda3adc0706bd3e024426a286d4b4b2c9198" @@ -1044,6 +1080,11 @@ resumer@^0.0.0: dependencies: through "~2.3.4" +robust-predicates@^2.0.4: + version "2.0.4" + resolved "https://registry.yarnpkg.com/robust-predicates/-/robust-predicates-2.0.4.tgz#0a2367a93abd99676d075981707f29cfb402248b" + integrity sha512-l4NwboJM74Ilm4VKfbAtFeGq7aEjWL+5kVFcmgFA2MrdnQWx9iE/tUGvxY5HyMI7o/WpSIUFLbC5fbeaHgSCYg== + safe-buffer@^5.0.1, safe-buffer@^5.1.2, safe-buffer@~5.2.0: version "5.2.1" resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6" @@ -1097,6 +1138,11 @@ sshpk@^1.7.0: safer-buffer "^2.0.2" tweetnacl "~0.14.0" +string-comparison@^1.0.9: + version "1.0.9" + resolved "https://registry.yarnpkg.com/string-comparison/-/string-comparison-1.0.9.tgz#1435c7d5553a76fc6ae12995fe8f3cf724bb8a54" + integrity sha512-phzXndkWdEmrFPR2o92MJeuw/fC0vSelpzCEE7mvc4z8Br1J65ZWKqOUNgUzGkq+9aAmOypePJwPGELgn9iUhA== + string-width@^4.1.0, string-width@^4.2.0: version "4.2.2" resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.2.tgz#dafd4f9559a7585cfba529c6a0a4f73488ebd4c5" @@ -1187,6 +1233,11 @@ through2@^4.0.0: resolved "https://registry.yarnpkg.com/through/-/through-2.3.8.tgz#0dd4c9ffaabc357960b1b724115d7e0e86a2e1f5" integrity sha1-DdTJ/6q8NXlgsbckEV1+Doai4fU= +tinyqueue@^2.0.3: + version "2.0.3" + resolved "https://registry.yarnpkg.com/tinyqueue/-/tinyqueue-2.0.3.tgz#64d8492ebf39e7801d7bd34062e29b45b2035f08" + integrity sha512-ppJZNDuKGgxzkHihX8v9v9G5f+18gzaTfrukGrq6ueg0lmH4nqVnA2IPG0AEH3jKEk2GRJCUhDoqpoiw3PHLBA== + tough-cookie@~2.5.0: version "2.5.0" resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-2.5.0.tgz#cd9fb2a0aa1d5a12b473bd9fb96fa3dcff65ade2" |