diff options
author | Andrew Harvey <andrew@alantgeo.com.au> | 2021-06-21 15:44:31 +1000 |
---|---|---|
committer | Andrew Harvey <andrew@alantgeo.com.au> | 2021-06-21 15:44:31 +1000 |
commit | 46fe68c368d18c17454821a74ea115509a469a20 (patch) | |
tree | 9f3e308cb4b0cdb4920eb8b40c34f637a87032a3 | |
parent | c9a9be779a56bbb4af2375b9702d762ebf43fefe (diff) |
mr_duplicateAddressFarApart
-rw-r--r-- | README.md | 2 | ||||
-rwxr-xr-x | bin/reduceDuplicates.js | 72 |
2 files changed, 71 insertions, 3 deletions
@@ -44,7 +44,7 @@ Two debug outputs are produced from this step. 1. singleCluster - visualises where all addresses with the same address properties are combined into a single "cluster" based on a 25 meter maximum threshold distance. In this case it's safe to reduce all the points into a single centroid point. -2. multiCluster - visualises where all addresses with the same address properties exceed the 25 meter cluster threshold and are unable to be reduced to a single point. These are not included in the import and need to be manually reviewed for manual import. +2. multiCluster - visualises where all addresses with the same address properties exceed the 25 meter cluster threshold and are unable to be reduced to a single point. These are not included in the import and need to be reviewed for manual import. A MapRoulette challenge is outputted at debug/reduceDuplicates/mr_duplicateAddressFarApart.geojson, however because this is before the conflation stage, many of these may already exist in OSM. It's a TODO for these to be conflated so that only missing from OSM addresses are asked to be checked in MapRoulette.  diff --git a/bin/reduceDuplicates.js b/bin/reduceDuplicates.js index e4373c1..f0691f9 100755 --- a/bin/reduceDuplicates.js +++ b/bin/reduceDuplicates.js @@ -9,6 +9,8 @@ const { Readable, Transform, pipeline } = require('stream') const ndjson = require('ndjson') const cluster = require('../lib/cluster.js') const cloneDeep = require('clone-deep') +const xml = require('xml-js') +const _ = require('lodash') const argv = require('yargs/yargs')(process.argv.slice(2)) .option('debug', { @@ -160,8 +162,29 @@ const reduce = new Transform({ // output candidate feature debugStreams.multiCluster.write(feature) }) - // output a web connecting the canidates for visualisation + // output a web connecting the candidates for visualisation debugStreams.multiCluster.write(webOfMatches) + + // output as a MapRoulette task + const task = { + type: 'FeatureCollection', + features: [ + ...groupedFeatures + ], + cooperativeWork: { + meta: { + version: 2, + type: 2 + }, + file: { + type: 'xml', + format: 'osc', + encoding: 'base64', + content: Buffer.from(featureToOsc(groupedFeatures[0])).toString('base64') // the base64-encoded osc file + } + } + } + debugStreams.mr_duplicateAddressFarApart.write(task) } } } @@ -171,8 +194,53 @@ const reduce = new Transform({ } }) +function featureToOsc(feature) { + return xml.json2xml({ + _declaration: { + _attributes: { + version: "1.0", + encoding: "UTF-8" + } + }, + osmChange: { + _attributes: { + version: '0.6', + generator: 'alantgeo/vicmap2osm' + }, + create: { + node: { + _attributes: { + id: -1, + version: 1, + lat: feature.geometry.coordinates[1], + lon: feature.geometry.coordinates[0] + }, + tag: Object.keys(_.omit(feature.properties, ['_pfi'])).map(key => { + return { + _attributes: { + k: key, + v: feature.properties[key] + } + } + }) + } + } + } + }, Object.assign({ + compact: true, + attributeValueFn: value => { + // these values were tested with test/xmlEntities.js + return value.replace(/"/g, '"') // convert quote back before converting amp + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/"/g, '"') + } + }, argv.dryRun ? { spaces: 2 } : {})) +} + // ndjson streams to output debug features -const debugKeys = ['singleCluster', 'multiCluster'] +const debugKeys = ['singleCluster', 'multiCluster', 'mr_duplicateAddressFarApart'] const debugStreams = {} const debugStreamOutputs = {} |