aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md2
-rwxr-xr-xbin/reduceDuplicates.js72
2 files changed, 71 insertions, 3 deletions
diff --git a/README.md b/README.md
index bcf4858..d298a48 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ Two debug outputs are produced from this step.
1. singleCluster - visualises where all addresses with the same address properties are combined into a single "cluster" based on a 25 meter maximum threshold distance. In this case it's safe to reduce all the points into a single centroid point.
-2. multiCluster - visualises where all addresses with the same address properties exceed the 25 meter cluster threshold and are unable to be reduced to a single point. These are not included in the import and need to be manually reviewed for manual import.
+2. multiCluster - visualises where all addresses with the same address properties exceed the 25 meter cluster threshold and are unable to be reduced to a single point. These are not included in the import and need to be reviewed for manual import. A MapRoulette challenge is outputted at debug/reduceDuplicates/mr_duplicateAddressFarApart.geojson, however because this is before the conflation stage, many of these may already exist in OSM. It's a TODO for these to be conflated so that only missing from OSM addresses are asked to be checked in MapRoulette.
![multiCluster example](img/reduceDuplicates_multiCluster.png)
diff --git a/bin/reduceDuplicates.js b/bin/reduceDuplicates.js
index e4373c1..f0691f9 100755
--- a/bin/reduceDuplicates.js
+++ b/bin/reduceDuplicates.js
@@ -9,6 +9,8 @@ const { Readable, Transform, pipeline } = require('stream')
const ndjson = require('ndjson')
const cluster = require('../lib/cluster.js')
const cloneDeep = require('clone-deep')
+const xml = require('xml-js')
+const _ = require('lodash')
const argv = require('yargs/yargs')(process.argv.slice(2))
.option('debug', {
@@ -160,8 +162,29 @@ const reduce = new Transform({
// output candidate feature
debugStreams.multiCluster.write(feature)
})
- // output a web connecting the canidates for visualisation
+ // output a web connecting the candidates for visualisation
debugStreams.multiCluster.write(webOfMatches)
+
+ // output as a MapRoulette task
+ const task = {
+ type: 'FeatureCollection',
+ features: [
+ ...groupedFeatures
+ ],
+ cooperativeWork: {
+ meta: {
+ version: 2,
+ type: 2
+ },
+ file: {
+ type: 'xml',
+ format: 'osc',
+ encoding: 'base64',
+ content: Buffer.from(featureToOsc(groupedFeatures[0])).toString('base64') // the base64-encoded osc file
+ }
+ }
+ }
+ debugStreams.mr_duplicateAddressFarApart.write(task)
}
}
}
@@ -171,8 +194,53 @@ const reduce = new Transform({
}
})
+function featureToOsc(feature) {
+ return xml.json2xml({
+ _declaration: {
+ _attributes: {
+ version: "1.0",
+ encoding: "UTF-8"
+ }
+ },
+ osmChange: {
+ _attributes: {
+ version: '0.6',
+ generator: 'alantgeo/vicmap2osm'
+ },
+ create: {
+ node: {
+ _attributes: {
+ id: -1,
+ version: 1,
+ lat: feature.geometry.coordinates[1],
+ lon: feature.geometry.coordinates[0]
+ },
+ tag: Object.keys(_.omit(feature.properties, ['_pfi'])).map(key => {
+ return {
+ _attributes: {
+ k: key,
+ v: feature.properties[key]
+ }
+ }
+ })
+ }
+ }
+ }
+ }, Object.assign({
+ compact: true,
+ attributeValueFn: value => {
+ // these values were tested with test/xmlEntities.js
+ return value.replace(/"/g, '"') // convert quote back before converting amp
+ .replace(/&/g, '&')
+ .replace(/</g, '&lt;')
+ .replace(/>/g, '&gt;')
+ .replace(/"/g, '&quot;')
+ }
+ }, argv.dryRun ? { spaces: 2 } : {}))
+}
+
// ndjson streams to output debug features
-const debugKeys = ['singleCluster', 'multiCluster']
+const debugKeys = ['singleCluster', 'multiCluster', 'mr_duplicateAddressFarApart']
const debugStreams = {}
const debugStreamOutputs = {}