diff options
author | Andrew Harvey <andrew@alantgeo.com.au> | 2021-08-18 17:29:52 +1000 |
---|---|---|
committer | Andrew Harvey <andrew@alantgeo.com.au> | 2021-08-18 17:29:52 +1000 |
commit | b284dbdb22025bc87af9d35bdb654286140d9cab (patch) | |
tree | 5a2a24ac14870d84125451970a98ef50771a2e62 | |
parent | c1489e1cb395e686c6491244463e9550e5b8faec (diff) |
conflate reduceDuplicates multiCluster for MapRoulette
-rw-r--r-- | README.md | 12 | ||||
-rwxr-xr-x | bin/reduceDuplicates.js | 87 |
2 files changed, 56 insertions, 43 deletions
@@ -36,17 +36,17 @@ The following steps are built into the _build vicmap_ stage. Next, convert into [OSM address schema](https://wiki.openstreetmap.org/wiki/Key:addr), and omit addresses which don't meet our threshold for import (see [_Omitted addresses_](#omitted-addresses)) (code at [`bin/vicmap2osm.js`](bin/vicmap2osm.js)): - make dist/vicmap-osm-with-suburb.geojson + make dist/vicmap-osm-with-suburb.geojson (3,529,928 features) -Next, remove duplicates where all address attributes match at the same location or within a small proximity (code at [`bin/reduceDuplicates.js`](bin/reduceDuplicates.js), see [_Removing duplicates_](#removing-duplicates)): +Next, remove duplicates where all address attributes match at the same location or within a small proximity (code at [`bin/reduceDuplicates.js`](bin/reduceDuplicates.js), see [_Removing duplicates_](#removing-duplicates)): (3,393,050 features) make dist/vicmap-osm-uniq.geojson Two debug outputs are produced from this step. -1. _singleCluster_ - visualises where all addresses with the same address properties are combined into a single "cluster" based on a 25 meter maximum threshold distance. In this case it's safe to reduce all the points into a single centroid point. +1. _singleCluster_ - visualises where all addresses with the same address properties are combined into a single "cluster" based on a 40 meter maximum threshold distance. In this case it's safe to reduce all the points into a single centroid point. -2. _multiCluster_ - visualises where all addresses with the same address properties exceed the 25 meter cluster threshold and are unable to be reduced to a single point. These are not included in the import and need to be reviewed for manual import. A MapRoulette challenge is outputted at `debug/reduceDuplicates/mr_duplicateAddressFarApart.geojson`, which includes those missing from OSM with a rough conflation pass. +2. _multiCluster_ - visualises where all addresses with the same address properties exceed the 40 meter cluster threshold and are unable to be reduced to a single point. These are not included in the import and need to be reviewed for manual import. A MapRoulette challenge is outputted at `debug/reduceDuplicates/mr_duplicateAddressFarApart_NotFoundInOSM.geojson`, which includes those missing from OSM with a rough conflation pass.  @@ -391,6 +391,10 @@ Adding `addr:street` and other tags where none exists but the `addr:housenumber` Flag whether within the same property parcel or not. +### Stage X - Duplicate addresses from Vicmap needing manual review + +`mr_duplicateAddressFarApart_NotFoundInOSM.geojson` was used to created the MapRoulette challenge at https://maproulette.org/browse/challenges/21226. + ### Changeset tags - `source=Vicmap Address` diff --git a/bin/reduceDuplicates.js b/bin/reduceDuplicates.js index 3c5a7ee..70b340c 100755 --- a/bin/reduceDuplicates.js +++ b/bin/reduceDuplicates.js @@ -132,11 +132,11 @@ const reduce = new Transform({ } else { // features have same properties but not all with the same geometry - // cluster features with a threshold of 25m - const clusters = cluster(groupedFeatures, 25) + // cluster features with a threshold of 40m + const clusters = cluster(groupedFeatures, 40) // if clustered into a single cluster, then output a single average feature - // this should be safe to use as within 25m + // this should be safe to use as within 40m if (clusters.length === 1) { const averageCoordinates = [ groupedFeatures.map(f => f.geometry.coordinates[0]).reduce((acc, cur) => acc + cur) / groupedFeatures.length, @@ -219,6 +219,11 @@ const reduce = new Transform({ // output as a MapRoulette task const firstGroupedFeature = groupedFeatures[0] + + delete firstGroupedFeature.properties['addr:suburb'] + delete firstGroupedFeature.properties['addr:postcode'] + delete firstGroupedFeature.properties['addr:state'] + const firstGroupedFeatureKey = [ firstGroupedFeature.properties['addr:housenumber'], firstGroupedFeature.properties['addr:street'] @@ -237,27 +242,29 @@ const reduce = new Transform({ foundInOSM = true } } - if (!foundInOSM) { - // output - const task = { - type: 'FeatureCollection', - features: [ - ...groupedFeatures - ], - cooperativeWork: { - meta: { - version: 2, - type: 2 - }, - file: { - type: 'xml', - format: 'osc', - encoding: 'base64', - content: Buffer.from(featureToOsc(groupedFeatures[0])).toString('base64') // the base64-encoded osc file - } + // output + const task = { + type: 'FeatureCollection', + features: [ + ...groupedFeatures + ], + cooperativeWork: { + meta: { + version: 2, + type: 2 + }, + file: { + type: 'xml', + format: 'osc', + encoding: 'base64', + content: Buffer.from(featuresToOsc(groupedFeatures)).toString('base64') // the base64-encoded osc file } } - debugStreams.mr_duplicateAddressFarApart.write(task) + } + if (foundInOSM) { + debugStreams.mr_duplicateAddressFarApart_FoundInOSM.write(task) + } else { + debugStreams.mr_duplicateAddressFarApart_NotFoundInOSM.write(task) } } } @@ -268,7 +275,7 @@ const reduce = new Transform({ } }) -function featureToOsc(feature) { +function featuresToOsc(features) { return xml.json2xml({ _declaration: { _attributes: { @@ -282,22 +289,24 @@ function featureToOsc(feature) { generator: 'alantgeo/vicmap2osm' }, create: { - node: { - _attributes: { - id: -1, - version: 1, - lat: feature.geometry.coordinates[1], - lon: feature.geometry.coordinates[0] - }, - tag: Object.keys(_.omit(feature.properties, ['_pfi'])).map(key => { - return { - _attributes: { - k: key, - v: feature.properties[key] + node: features.map((feature, index) => { + return { + _attributes: { + id: 0 - (index + 1), + version: 1, + lat: feature.geometry.coordinates[1], + lon: feature.geometry.coordinates[0] + }, + tag: Object.keys(_.omit(feature.properties, ['_pfi', 'addr:suburb', 'addr:postcode', 'addr:state'])).map(key => { + return { + _attributes: { + k: key, + v: feature.properties[key] + } } - } - }) - } + }) + } + }) } } }, Object.assign({ @@ -314,7 +323,7 @@ function featureToOsc(feature) { } // ndjson streams to output debug features -const debugKeys = ['singleCluster', 'multiCluster', 'droppedSameCoordinates', 'mr_duplicateAddressFarApart'] +const debugKeys = ['singleCluster', 'multiCluster', 'droppedSameCoordinates', 'mr_duplicateAddressFarApart_FoundInOSM', 'mr_duplicateAddressFarApart_NotFoundInOSM'] const debugStreams = {} const debugStreamOutputs = {} |