aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Harvey <andrew@alantgeo.com.au>2021-08-18 17:29:52 +1000
committerAndrew Harvey <andrew@alantgeo.com.au>2021-08-18 17:29:52 +1000
commitb284dbdb22025bc87af9d35bdb654286140d9cab (patch)
tree5a2a24ac14870d84125451970a98ef50771a2e62
parentc1489e1cb395e686c6491244463e9550e5b8faec (diff)
conflate reduceDuplicates multiCluster for MapRoulette
-rw-r--r--README.md12
-rwxr-xr-xbin/reduceDuplicates.js87
2 files changed, 56 insertions, 43 deletions
diff --git a/README.md b/README.md
index 0b37c1f..193b3ea 100644
--- a/README.md
+++ b/README.md
@@ -36,17 +36,17 @@ The following steps are built into the _build vicmap_ stage.
Next, convert into [OSM address schema](https://wiki.openstreetmap.org/wiki/Key:addr), and omit addresses which don't meet our threshold for import (see [_Omitted addresses_](#omitted-addresses)) (code at [`bin/vicmap2osm.js`](bin/vicmap2osm.js)):
- make dist/vicmap-osm-with-suburb.geojson
+ make dist/vicmap-osm-with-suburb.geojson (3,529,928 features)
-Next, remove duplicates where all address attributes match at the same location or within a small proximity (code at [`bin/reduceDuplicates.js`](bin/reduceDuplicates.js), see [_Removing duplicates_](#removing-duplicates)):
+Next, remove duplicates where all address attributes match at the same location or within a small proximity (code at [`bin/reduceDuplicates.js`](bin/reduceDuplicates.js), see [_Removing duplicates_](#removing-duplicates)): (3,393,050 features)
make dist/vicmap-osm-uniq.geojson
Two debug outputs are produced from this step.
-1. _singleCluster_ - visualises where all addresses with the same address properties are combined into a single "cluster" based on a 25 meter maximum threshold distance. In this case it's safe to reduce all the points into a single centroid point.
+1. _singleCluster_ - visualises where all addresses with the same address properties are combined into a single "cluster" based on a 40 meter maximum threshold distance. In this case it's safe to reduce all the points into a single centroid point.
-2. _multiCluster_ - visualises where all addresses with the same address properties exceed the 25 meter cluster threshold and are unable to be reduced to a single point. These are not included in the import and need to be reviewed for manual import. A MapRoulette challenge is outputted at `debug/reduceDuplicates/mr_duplicateAddressFarApart.geojson`, which includes those missing from OSM with a rough conflation pass.
+2. _multiCluster_ - visualises where all addresses with the same address properties exceed the 40 meter cluster threshold and are unable to be reduced to a single point. These are not included in the import and need to be reviewed for manual import. A MapRoulette challenge is outputted at `debug/reduceDuplicates/mr_duplicateAddressFarApart_NotFoundInOSM.geojson`, which includes those missing from OSM with a rough conflation pass.
![multiCluster example](img/reduceDuplicates_multiCluster.png)
@@ -391,6 +391,10 @@ Adding `addr:street` and other tags where none exists but the `addr:housenumber`
Flag whether within the same property parcel or not.
+### Stage X - Duplicate addresses from Vicmap needing manual review
+
+`mr_duplicateAddressFarApart_NotFoundInOSM.geojson` was used to created the MapRoulette challenge at https://maproulette.org/browse/challenges/21226.
+
### Changeset tags
- `source=Vicmap Address`
diff --git a/bin/reduceDuplicates.js b/bin/reduceDuplicates.js
index 3c5a7ee..70b340c 100755
--- a/bin/reduceDuplicates.js
+++ b/bin/reduceDuplicates.js
@@ -132,11 +132,11 @@ const reduce = new Transform({
} else {
// features have same properties but not all with the same geometry
- // cluster features with a threshold of 25m
- const clusters = cluster(groupedFeatures, 25)
+ // cluster features with a threshold of 40m
+ const clusters = cluster(groupedFeatures, 40)
// if clustered into a single cluster, then output a single average feature
- // this should be safe to use as within 25m
+ // this should be safe to use as within 40m
if (clusters.length === 1) {
const averageCoordinates = [
groupedFeatures.map(f => f.geometry.coordinates[0]).reduce((acc, cur) => acc + cur) / groupedFeatures.length,
@@ -219,6 +219,11 @@ const reduce = new Transform({
// output as a MapRoulette task
const firstGroupedFeature = groupedFeatures[0]
+
+ delete firstGroupedFeature.properties['addr:suburb']
+ delete firstGroupedFeature.properties['addr:postcode']
+ delete firstGroupedFeature.properties['addr:state']
+
const firstGroupedFeatureKey = [
firstGroupedFeature.properties['addr:housenumber'],
firstGroupedFeature.properties['addr:street']
@@ -237,27 +242,29 @@ const reduce = new Transform({
foundInOSM = true
}
}
- if (!foundInOSM) {
- // output
- const task = {
- type: 'FeatureCollection',
- features: [
- ...groupedFeatures
- ],
- cooperativeWork: {
- meta: {
- version: 2,
- type: 2
- },
- file: {
- type: 'xml',
- format: 'osc',
- encoding: 'base64',
- content: Buffer.from(featureToOsc(groupedFeatures[0])).toString('base64') // the base64-encoded osc file
- }
+ // output
+ const task = {
+ type: 'FeatureCollection',
+ features: [
+ ...groupedFeatures
+ ],
+ cooperativeWork: {
+ meta: {
+ version: 2,
+ type: 2
+ },
+ file: {
+ type: 'xml',
+ format: 'osc',
+ encoding: 'base64',
+ content: Buffer.from(featuresToOsc(groupedFeatures)).toString('base64') // the base64-encoded osc file
}
}
- debugStreams.mr_duplicateAddressFarApart.write(task)
+ }
+ if (foundInOSM) {
+ debugStreams.mr_duplicateAddressFarApart_FoundInOSM.write(task)
+ } else {
+ debugStreams.mr_duplicateAddressFarApart_NotFoundInOSM.write(task)
}
}
}
@@ -268,7 +275,7 @@ const reduce = new Transform({
}
})
-function featureToOsc(feature) {
+function featuresToOsc(features) {
return xml.json2xml({
_declaration: {
_attributes: {
@@ -282,22 +289,24 @@ function featureToOsc(feature) {
generator: 'alantgeo/vicmap2osm'
},
create: {
- node: {
- _attributes: {
- id: -1,
- version: 1,
- lat: feature.geometry.coordinates[1],
- lon: feature.geometry.coordinates[0]
- },
- tag: Object.keys(_.omit(feature.properties, ['_pfi'])).map(key => {
- return {
- _attributes: {
- k: key,
- v: feature.properties[key]
+ node: features.map((feature, index) => {
+ return {
+ _attributes: {
+ id: 0 - (index + 1),
+ version: 1,
+ lat: feature.geometry.coordinates[1],
+ lon: feature.geometry.coordinates[0]
+ },
+ tag: Object.keys(_.omit(feature.properties, ['_pfi', 'addr:suburb', 'addr:postcode', 'addr:state'])).map(key => {
+ return {
+ _attributes: {
+ k: key,
+ v: feature.properties[key]
+ }
}
- }
- })
- }
+ })
+ }
+ })
}
}
}, Object.assign({
@@ -314,7 +323,7 @@ function featureToOsc(feature) {
}
// ndjson streams to output debug features
-const debugKeys = ['singleCluster', 'multiCluster', 'droppedSameCoordinates', 'mr_duplicateAddressFarApart']
+const debugKeys = ['singleCluster', 'multiCluster', 'droppedSameCoordinates', 'mr_duplicateAddressFarApart_FoundInOSM', 'mr_duplicateAddressFarApart_NotFoundInOSM']
const debugStreams = {}
const debugStreamOutputs = {}