aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitlab-ci.yml4
-rw-r--r--Makefile6
-rw-r--r--README.md14
-rwxr-xr-xbin/building.js184
-rwxr-xr-xbin/complex.js3
5 files changed, 203 insertions, 8 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 03cc5da..dafd9f3 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -178,9 +178,11 @@ conflate:
script:
- yarn install
- cp src/polygon-lookup-patch.js node_modules/polygon-lookup/index.js
- - make dist/vicmap-complex-site
- make dist/conflate
+ - make dist/vicmap-complex-conflation
+ - make dist/vicmap-building-conflation
- wc -l dist/conflate/*.geojson
+ - wc -l dist/vicmap-*-conflation/*.geojson
when: manual
artifacts:
name: "conflate"
diff --git a/Makefile b/Makefile
index 8ad352a..1978b3a 100644
--- a/Makefile
+++ b/Makefile
@@ -216,6 +216,10 @@ dist/candidates: data/victoria-admin-level10.osm.geojson dist/conflate
mkdir -p $@
./bin/candidates.js $^ $@
-dist/vicmap-complex-site: dist/vicmap-complex.geojson data/victoria-named-features.osm.geojson
+dist/vicmap-complex-conflation: dist/vicmap-complex.geojson data/victoria-named-features.osm.geojson
mkdirp -p $@
./bin/complex.js $^ $@
+
+dist/vicmap-building-conflation: dist/vicmap-building.geojson data/victoria-named-features.osm.geojson
+ mkdirp -p $@
+ ./bin/building.js $^ $@
diff --git a/README.md b/README.md
index 4438fef..19f3e86 100644
--- a/README.md
+++ b/README.md
@@ -154,12 +154,16 @@ Because OSM tag values are limited to 255 characters, if the constructed `addr:f
Values `UNNAMED` and `NOT NAMED` appear as street name and locality names. These values are treated as null/empty values rather than proper names.
-### name
-Source data contains a field for building / property name. This appears to be a mixed bag sometimes it might fit better as `addr:housename` other times simply `name`. Further it's not too clear the distinction between these tags and how house names, property names, building names or the name of the venue at the site should be tagged.
+### Building Name
+Source data contains a field for building / property name. This appears to be a mixed bag sometimes it might fit better as `addr:housename` other times simply `name`. Further it's not too clear the distinction between these tags and how house names, property names, building names or the name of the venue at the site should be tagged or even which type of name it should be.
-It's common for the source data to use what we'd consider a description like "Shop", "Public Toilets" or "Reserve".
+It's common for the source data to use what we'd consider a description like "Shop", "Public Toilets" or "Reserve" instead of a proper name.
-For these reasons this building / property name is not included, however it could be a useful point of reference for mappers considering manually adding this data at a later stage.
+There are about 40,000 of these names.
+
+So while there is value including property names, building names, farm names as part of the address, since we can't do this reliably, the building / property name is not included in this import, however it could be a useful point of reference for mappers considering manually adding this data at a later stage.
+
+`bin/vicmap2osm.js` outputs `dist/vicmap-building.geojson` which contains all the building name features.
### Complex Name
Source data sometimes includes a complex name, for example _CHADSTONE SHOPPING CENTRE_ or _MELBOURNE UNIVERSITY_. These attributes are not used as these names should appear on the actual feature like `shop=mall` or `amenity=university`.
@@ -174,7 +178,7 @@ The script at `bin/complex.js` processes this to:
- Tests to see weather this complex name is matching a nearby OSM object
- Where it doesn't find a match in OSM, then it outputs a MapRoulette data file for mappers to review and potentially add these complex names to OSM.
-This outputs a bunch of files into `dist/vicmap-complex-site` including, three MapRoulette challanges:
+This outputs a bunch of files into `dist/vicmap-complex-conflation` including, three MapRoulette challanges:
- `mr_singleNearbySimilarFeature` - the Vicmap complex matched a single nearby OSM feature (but the name wasn't an exact match, where it was an exact match the Vicmap complex is not flagged for inclusion in MapRoulette)
- `mr_multipleNearbySimilarFeatures` - the Vicmap complex matched multiple nearby OSM features
diff --git a/bin/building.js b/bin/building.js
new file mode 100755
index 0000000..966c470
--- /dev/null
+++ b/bin/building.js
@@ -0,0 +1,184 @@
+#!/usr/bin/env node
+
+/**
+ * Take Vicmap address points which have a building name,
+ * then conflate with existing OSM names
+ */
+
+const fs = require('fs')
+const { Transform, pipeline } = require('readable-stream')
+const ndjson = require('ndjson')
+const point = require('@turf/helpers').point
+const { capitalCase } = require('capital-case')
+const Flatbush = require('flatbush')
+const bbox = require('@turf/bbox').default
+const { around } = require('geoflatbush')
+const { lcs } = require('string-comparison')
+
+const argv = require('yargs/yargs')(process.argv.slice(2))
+ .argv
+
+if (argv._.length < 3) {
+ console.error("Usage: ./building.js vicmap-building.geojson victoria-named-features.osm.geojson vicmap-building-conflation")
+ process.exit(1)
+}
+
+const inputFile = argv._[0]
+const osmFile = argv._[1]
+const outputPath = argv._[2]
+
+if (!fs.existsSync(inputFile)) {
+ console.error(`${inputFile} not found`)
+ process.exit(1)
+}
+
+console.log('Reading OSM data')
+const osmFeatures = fs.readFileSync(osmFile, 'utf-8').toString().split('\n')
+ .filter(line => line !== '')
+ .map((line, index, array) => {
+ if (process.stdout.isTTY && index % 1000 === 0) {
+ process.stdout.write(` ${index.toLocaleString()}/${array.length.toLocaleString()} (${Math.round(index / array.length * 100)}%)\r`)
+ }
+
+ try {
+ const feature = JSON.parse(line)
+ feature.properties.id = index
+ return feature
+ } catch {
+ console.log(`Error parsing line ${index} of ${osmFile}: ${line}`)
+ }
+ })
+
+console.log('Creating index for nearby OSM search')
+const osmIndex = new Flatbush(osmFeatures.length)
+for (const osmFeature of osmFeatures) {
+ osmIndex.add(...bbox(osmFeature))
+}
+osmIndex.finish()
+
+// ndjson streams to output features
+const outputKeys = [
+ // MapRoulette challenges
+ 'mr_singleNearbySimilarFeature',
+ 'mr_multipleNearbySimilarFeatures',
+ 'mr_noNearbySimilarFeature'
+]
+const outputStreams = {}
+const outputStreamOutputs = {}
+
+outputKeys.forEach(key => {
+ outputStreams[key] = ndjson.stringify()
+ outputStreamOutputs[key] = outputStreams[key].pipe(fs.createWriteStream(`${outputPath}/${key}.geojson`))
+})
+
+let sourceCount = 0
+const conflate = new Transform({
+ readableObjectMode: true,
+ writableObjectMode: true,
+ transform(feature, encoding, callback) {
+ sourceCount++
+
+ if (!argv.quiet) {
+ if (process.stdout.isTTY && sourceCount % 100 === 0) {
+ process.stdout.write(` ${sourceCount.toLocaleString()}\r`)
+ }
+ }
+
+ const name = feature.properties.name
+ const properties = {
+ name: capitalCase(name)
+ }
+
+ // find nearby matching OSM feature
+ const maxDistanceInKm = 1
+ const nearby = around(osmIndex, ...feature.geometry.coordinates, Infinity, maxDistanceInKm)
+ const nearbyMatches = nearby.filter(i => {
+ const similarity = lcs.similarity(osmFeatures[i].properties.name.toLowerCase(), name.toLowerCase())
+ return similarity > 0.8
+ })
+ const nearbyMatchedFeatures = nearbyMatches.map(i => osmFeatures[i])
+
+ /* TODO log to file
+ if (nearbyMatches.length) {
+ console.log(name)
+ console.log(' > ', nearbyMatches.map(i => osmFeatures[i].properties.name))
+ }
+ */
+ if (nearbyMatches.length === 1) {
+ // a single nearby OSM features found with similar name
+ if (nearbyMatchedFeatures[0].properties.name.toLowerCase === name.toLowerCase()) {
+ // name exactly matched
+ console.log(`Exact match: ${properties.name} = ${nearbyMatchedFeatures[0].properties.name}`)
+ } else {
+ // name was similar but not an exact match
+ // create a MapRoulette task to investigate further
+ const task = {
+ type: 'FeatureCollection',
+ features: [
+ point(feature.geometry.coordinates, Object.assign({}, feature.properties, {
+ 'marker-color': 'orange',
+ 'marker-size': 'large',
+ 'OSM Name': nearbyMatchedFeatures[0].properties.name
+ }, properties)),
+ ...nearbyMatchedFeatures
+ ]
+ }
+ outputStreams.mr_singleNearbySimilarFeature.write(task)
+ }
+ } else if (nearbyMatches.length > 1) {
+ // multiple nearby OSM features found with similar name, create a MapRoulette task to investigate further
+ const task = {
+ type: 'FeatureCollection',
+ features: [
+ point(feature.geometry.coordinates, Object.assign({}, feature.properties, {
+ 'marker-color': 'orange',
+ 'marker-size': 'large'
+ }, properties)),
+ ...nearbyMatchedFeatures
+ ]
+ }
+ outputStreams.mr_multipleNearbySimilarFeatures.write(task)
+ } else {
+ // no nearby OSM feature found with similar name, so create a MapRoulette task
+ const task = {
+ type: 'FeatureCollection',
+ features: [
+ point(feature.geometry.coordinates, Object.assign({}, feature.properties, properties))
+ ]
+ }
+ outputStreams.mr_noNearbySimilarFeature.write(task)
+ }
+
+ callback()
+ }
+})
+
+console.log('Stage 1/1 reading Vicmap building points')
+pipeline(
+ fs.createReadStream(inputFile),
+ ndjson.parse(),
+ conflate,
+ (err) => {
+ if (err) {
+ console.log(err)
+ process.exit(1)
+ } else {
+
+ outputKeys.forEach(key => {
+ outputStreams[key].end()
+ })
+
+ Promise.all(outputKeys.map(key => {
+ return new Promise(resolve => {
+ outputStreamOutputs[key].on('finish', () => {
+ console.log(`saved ${outputPath}/${key}.geojson`)
+ resolve()
+ })
+ })
+ }))
+ .then(() => {
+ process.exit(0)
+ })
+ }
+ }
+)
diff --git a/bin/complex.js b/bin/complex.js
index 6c83b6b..c8bda5d 100755
--- a/bin/complex.js
+++ b/bin/complex.js
@@ -1,7 +1,8 @@
#!/usr/bin/env node
/**
- * Take Vicmap address points which have a complex value, and group these into sites
+ * Take Vicmap address points which have a complex value, and group these into sites,
+ * then conflate with existing OSM names
*/
const fs = require('fs')