aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.vscode/launch.json12
-rwxr-xr-xbin/reduceRangeDuplicates.js113
-rw-r--r--test/fixtures/reduceRangeDuplicates/expectedOutput1.geojson2
-rw-r--r--test/fixtures/reduceRangeDuplicates/input1.geojson3
-rw-r--r--test/reduceRangeDuplicates.js85
5 files changed, 156 insertions, 59 deletions
diff --git a/.vscode/launch.json b/.vscode/launch.json
index 707d6d2..291dcce 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -61,8 +61,18 @@
"skipFiles": [
"<node_internals>/**"
],
+ "program": "${workspaceFolder}/node_modules/.bin/tape",
+ "args": ["test/reduceRangeDuplicates.js"]
+ },
+ {
+ "type": "pwa-node",
+ "request": "launch",
+ "name": "bin/reduceRangeDuplicates fixture1",
+ "skipFiles": [
+ "<node_internals>/**"
+ ],
"program": "${workspaceFolder}/bin/reduceRangeDuplicates.js",
- "args": ["/tmp/input_POZXt.geojson", "/tmp/output/0OIov.geojson"]
+ "args": ["test/fixtures/reduceRangeDuplicates/input1.geojson", "test/fixtures/reduceRangeDuplicates/actualOutput1.geojson"]
},
{
"type": "pwa-node",
diff --git a/bin/reduceRangeDuplicates.js b/bin/reduceRangeDuplicates.js
index 6fac0b5..4ef57f9 100755
--- a/bin/reduceRangeDuplicates.js
+++ b/bin/reduceRangeDuplicates.js
@@ -41,6 +41,8 @@ if (!fs.existsSync(inputFile)) {
process.exit(1)
}
+const intermediateFile = `${outputFile}-intermediate.json`
+
function hash(feature) {
return [
feature.properties['addr:housenumber'],
@@ -95,11 +97,13 @@ const index = new Transform({
const regexp = /^(?<pre>\D*)(?<num>\d*)(?<suf>\D*)$/
/*
-* second pass, filter A removes ranges where each endpoint of the range exists separately
+* First pass, filter A removes ranges where each endpoint of the range exists separately
* eg.
* - 304-306 Cardigan Street Carlton - range can be removed since each individual address exists
* - 304 Cardigan Street Calton
* - 306 Cardigan Street Calton
+*
+* Conditional on the individual addresses not sharing the same geometry, if they do then they are dropped in favour of the range
*
* - 249-263 Faraday Street
* - 251 Faraday Street - removed since not all addresses from the range exist, but this one is covered by the range
@@ -200,13 +204,25 @@ const reduceRange = new Transform({
}
}
- // can be removed, feature not pushed
- if (argv.verbose) {
- console.log(`Filter A: ${feature.properties['addr:housenumber']} can be removed`)
- }
+ // if matched start and end point have the same coordinates, then to avoid overlapping points, favour range so retain it
+ if (matchedStart.geometry.coordinates.join(',') === (matchedEnd.geometry.coordinates.join(','))) {
+ if (argv.verbose) {
+ console.log(`Filter A: ${feature.properties['addr:housenumber']} ${feature.properties['addr:street']} ${feature.properties['addr:suburb']} retained because while endpoints exist they share the same geometry`)
+ }
+ this.push(feature)
+ } else {
+ // can be removed, feature not pushed
+ if (argv.verbose) {
+ console.log(`Filter A: ${feature.properties['addr:housenumber']} ${feature.properties['addr:street']} ${feature.properties['addr:suburb']} can be removed`)
+ }
+
+ // keep track of removed features for filter B, so we don't double remove both range and midpoints
+ rangesRemovedInFilterA[hash(feature)] = true
- // keep track of removed features for filter B, so we don't double remove both range and midpoints
- rangesRemovedInFilterA[hash(feature)] = true
+ if (argv.debug) {
+ debugStreams['filterA_dropRange'].write(feature)
+ }
+ }
} else {
// not both start and end found,
// if one of start or end found and that start/end has addr:flats...
@@ -216,6 +232,9 @@ const reduceRange = new Transform({
(matchedStart && matchedStart.properties['addr:flats']) || (matchedEnd && matchedEnd.properties['addr:flats'])
)) {
// drop the range, eg "112-116 Anderson Street, South Yarra"
+ if (argv.debug) {
+ debugStreams['filterA_dropRangeRangeNoFlatsNonRangeHasFlats'].write(feature)
+ }
} else {
// then still include the range
this.push(feature)
@@ -285,8 +304,17 @@ const reduceNonRange = new Transform({
// since the non-range feature has a unit that the range doesn't have, don't drop it
dropFeature = false
if (argv.debug) {
- debugStreams['addrInRangeDifferentUnits'].write(feature)
- debugStreams['addrInRangeDifferentUnits'].write(range)
+ debugStreams.addrInRangeDifferentUnits.write(feature)
+ debugStreams.addrInRangeDifferentUnits.write(range)
+
+ debugStreams.addrInRangeDifferentUnits.write({
+ type: 'Feature',
+ properties: feature.properties,
+ geometry: {
+ type: 'LineString',
+ coordinates: [feature.geometry.coordinates, range.geometry.coordinates]
+ }
+ })
}
}
} else {
@@ -297,12 +325,16 @@ const reduceNonRange = new Transform({
}
}
}
+
if (!dropFeature) {
this.push(feature)
} else {
if (argv.verbose) {
console.log(`Filter B: Dropping ${feature.properties['addr:housenumber']}`)
}
+ if (argv.debug) {
+ debugStreams['filterB'].write(feature)
+ }
}
} else {
this.push(feature)
@@ -313,7 +345,7 @@ const reduceNonRange = new Transform({
})
// ndjson streams to output debug features
-const debugKeys = ['addrInRangeDifferentUnits']
+const debugKeys = ['addrInRangeDifferentUnits', 'filterA_dropRangeRangeNoFlatsNonRangeHasFlats', 'filterA_dropRange', 'filterB']
const debugStreams = {}
const debugStreamOutputs = {}
@@ -335,39 +367,54 @@ pipeline(
console.log(err)
process.exit(1)
} else {
- // second pass to remove range duplicates
- console.log('Pass 2/2: remove range duplicates')
+ // second pass to remove range duplicates part A
+ console.log('Pass 2/3: remove range duplicates part A ranges')
pipeline(
fs.createReadStream(inputFile),
ndjson.parse(),
reduceRange,
- reduceNonRange,
ndjson.stringify(),
- fs.createWriteStream(outputFile),
+ fs.createWriteStream(intermediateFile),
err => {
if (err) {
console.log(err)
process.exit(1)
} else {
- if (argv.debug) {
- debugKeys.forEach(key => {
- debugStreams[key].end()
- })
-
- Promise.all(debugKeys.map(key => {
- return new Promise(resolve => {
- debugStreamOutputs[key].on('finish', () => {
- console.log(`saved debug/reduceRangeDuplicates/${key}.geojson`)
- resolve()
- })
- })
- }))
- .then(() => {
- process.exit(0)
- })
- } else {
- process.exit(0)
- }
+ console.log('Pass 3/3: remove range duplicates part B endpoints')
+ pipeline(
+ fs.createReadStream(intermediateFile),
+ ndjson.parse(),
+ reduceNonRange,
+ ndjson.stringify(),
+ fs.createWriteStream(outputFile),
+ err => {
+ fs.unlinkSync(intermediateFile)
+ if (err) {
+ console.log(err)
+ process.exit(1)
+ } else {
+ if (argv.debug) {
+ debugKeys.forEach(key => {
+ debugStreams[key].end()
+ })
+
+ Promise.all(debugKeys.map(key => {
+ return new Promise(resolve => {
+ debugStreamOutputs[key].on('finish', () => {
+ console.log(`saved debug/reduceRangeDuplicates/${key}.geojson`)
+ resolve()
+ })
+ })
+ }))
+ .then(() => {
+ process.exit(0)
+ })
+ } else {
+ process.exit(0)
+ }
+ }
+ }
+ )
}
}
)
diff --git a/test/fixtures/reduceRangeDuplicates/expectedOutput1.geojson b/test/fixtures/reduceRangeDuplicates/expectedOutput1.geojson
new file mode 100644
index 0000000..1e8f2c7
--- /dev/null
+++ b/test/fixtures/reduceRangeDuplicates/expectedOutput1.geojson
@@ -0,0 +1,2 @@
+{"type":"Feature","properties":{"_pfi":"50586965,50585085,425955908","addr:housenumber":"39","addr:street":"Linton Street","addr:suburb":"Balaclava","addr:state":"VIC","addr:postcode":"3183","addr:unit":null,"addr:flats":null,"addr:flats2":null,"addr:flats3":null,"addr:flats4":null,"addr:flats5":null,"addr:flats6":null,"addr:flats7":null},"geometry":{"type":"Point","coordinates":[144.9924932,-37.8669844]}}
+{"type":"Feature","properties":{"_pfi":"50585086,425955907","addr:housenumber":"41","addr:street":"Linton Street","addr:suburb":"Balaclava","addr:state":"VIC","addr:postcode":"3183","addr:unit":null,"addr:flats":null,"addr:flats2":null,"addr:flats3":null,"addr:flats4":null,"addr:flats5":null,"addr:flats6":null,"addr:flats7":null},"geometry":{"type":"Point","coordinates":[144.992463,-37.8670328]}}
diff --git a/test/fixtures/reduceRangeDuplicates/input1.geojson b/test/fixtures/reduceRangeDuplicates/input1.geojson
new file mode 100644
index 0000000..d91155d
--- /dev/null
+++ b/test/fixtures/reduceRangeDuplicates/input1.geojson
@@ -0,0 +1,3 @@
+{ "type": "Feature", "properties": { "_pfi": "50586965,50585085,425955908", "addr:housenumber": "39", "addr:street": "Linton Street", "addr:suburb": "Balaclava", "addr:state": "VIC", "addr:postcode": "3183", "addr:unit": null, "addr:flats": null, "addr:flats2": null, "addr:flats3": null, "addr:flats4": null, "addr:flats5": null, "addr:flats6": null, "addr:flats7": null }, "geometry": { "type": "Point", "coordinates": [ 144.9924932, -37.8669844 ] } }
+{ "type": "Feature", "properties": { "_pfi": "206441264", "addr:housenumber": "39-41", "addr:street": "Linton Street", "addr:suburb": "Balaclava", "addr:state": "VIC", "addr:postcode": "3183", "addr:unit": null, "addr:flats": null, "addr:flats2": null, "addr:flats3": null, "addr:flats4": null, "addr:flats5": null, "addr:flats6": null, "addr:flats7": null }, "geometry": { "type": "Point", "coordinates": [ 144.9925274, -37.8670149 ] } }
+{ "type": "Feature", "properties": { "_pfi": "50585086,425955907", "addr:housenumber": "41", "addr:street": "Linton Street", "addr:suburb": "Balaclava", "addr:state": "VIC", "addr:postcode": "3183", "addr:unit": null, "addr:flats": null, "addr:flats2": null, "addr:flats3": null, "addr:flats4": null, "addr:flats5": null, "addr:flats6": null, "addr:flats7": null }, "geometry": { "type": "Point", "coordinates": [ 144.992463, -37.8670328 ] } }
diff --git a/test/reduceRangeDuplicates.js b/test/reduceRangeDuplicates.js
index 091390c..21c80c4 100644
--- a/test/reduceRangeDuplicates.js
+++ b/test/reduceRangeDuplicates.js
@@ -3,7 +3,7 @@ const fs = require('fs')
const child_process = require('child_process')
const mktemp = require('mktemp')
-function createFeature(unit, housenumber, street, suburb, flats) {
+function createFeature(coordinates, unit, housenumber, street, suburb, flats) {
return {
type: 'Feature',
properties: {
@@ -15,18 +15,21 @@ function createFeature(unit, housenumber, street, suburb, flats) {
'addr:state': 'VIC',
'addr:postcode': '0000'
},
- geometry: null
+ geometry: coordinates ? {
+ type: 'Point',
+ coordinates: coordinates
+ } : null
}
}
-test('reduceRangeDuplicates', t => {
+test('reduceRangeDuplicates distinct geometries', t => {
const inputFile = mktemp.createFileSync('/tmp/input_XXXXX.geojson')
const outputFile = mktemp.createFileSync('/tmp/output_XXXXX.geojson')
const expectedFile = mktemp.createFileSync('/tmp/expected_XXXXX.geojson')
- const AB = createFeature(null, '304-306', 'Cardigan Street', 'Carlton')
- const A = createFeature(null, '304', 'Cardigan Street', 'Carlton')
- const B = createFeature(null, '306', 'Cardigan Street', 'Carlton')
+ const AB = createFeature([0, 0], null, '304-306', 'Cardigan Street', 'Carlton')
+ const A = createFeature([-1, 0], null, '304', 'Cardigan Street', 'Carlton')
+ const B = createFeature([1, 0], null, '306', 'Cardigan Street', 'Carlton')
// all three features to appear in input
fs.appendFileSync(inputFile, JSON.stringify(AB) + '\n')
@@ -37,11 +40,11 @@ test('reduceRangeDuplicates', t => {
fs.appendFileSync(expectedFile, JSON.stringify(A) + '\n')
fs.appendFileSync(expectedFile, JSON.stringify(B) + '\n')
- child_process.execSync(`./bin/reduceRangeDuplicates.js ${inputFile} ${outputFile}`)
+ child_process.execSync(`./bin/reduceRangeDuplicates.js --verbose ${inputFile} ${outputFile}`)
t.same(
- fs.readFileSync(outputFile),
- fs.readFileSync(expectedFile),
+ fs.readFileSync(outputFile, 'utf-8').trim().split('\n').map(JSON.parse),
+ fs.readFileSync(expectedFile, 'utf-8').trim().split('\n').map(JSON.parse),
'range with endpoints appearing separately, drops range'
)
@@ -52,13 +55,45 @@ test('reduceRangeDuplicates', t => {
t.end()
})
+test('reduceRangeDuplicates overlapping geometries', t => {
+ const inputFile = mktemp.createFileSync('/tmp/input_XXXXX.geojson')
+ const outputFile = mktemp.createFileSync('/tmp/output_XXXXX.geojson')
+ const expectedFile = mktemp.createFileSync('/tmp/expected_XXXXX.geojson')
+
+ const AB = createFeature([0, 0], null, '304-306', 'Cardigan Street', 'Carlton')
+ const A = createFeature([0, 0], null, '304', 'Cardigan Street', 'Carlton')
+ const B = createFeature([0, 0], null, '306', 'Cardigan Street', 'Carlton')
+
+ // all three features to appear in input
+ fs.appendFileSync(inputFile, JSON.stringify(AB) + '\n')
+ fs.appendFileSync(inputFile, JSON.stringify(A) + '\n')
+ fs.appendFileSync(inputFile, JSON.stringify(B) + '\n')
+
+ // output expected to drop the endpoints and retain the range since endpoints are overlapping
+ fs.appendFileSync(expectedFile, JSON.stringify(AB) + '\n')
+
+ child_process.execSync(`./bin/reduceRangeDuplicates.js --verbose ${inputFile} ${outputFile}`)
+
+ t.same(
+ fs.readFileSync(outputFile, 'utf-8').trim().split('\n').map(JSON.parse),
+ fs.readFileSync(expectedFile, 'utf-8').trim().split('\n').map(JSON.parse),
+ 'range with endpoints appearing separately but overlapping, drops the endpoints'
+ )
+
+ fs.unlinkSync(inputFile)
+ fs.unlinkSync(outputFile)
+ fs.unlinkSync(expectedFile)
+
+ t.end()
+})
+
test('reduceRangeDuplicates', t => {
const inputFile = mktemp.createFileSync('/tmp/input_XXXXX.geojson')
const outputFile = mktemp.createFileSync('/tmp/output_XXXXX.geojson')
const expectedFile = mktemp.createFileSync('/tmp/expected_XXXXX.geojson')
- const AC = createFeature(null, '249-263', 'Faraday Street', 'Carlton')
- const B = createFeature(null, '251', 'Faraday Street', 'Carlton')
+ const AC = createFeature(null, null, '249-263', 'Faraday Street', 'Carlton')
+ const B = createFeature(null, null, '251', 'Faraday Street', 'Carlton')
// both features to appear in input
fs.appendFileSync(inputFile, JSON.stringify(AC) + '\n')
@@ -67,11 +102,11 @@ test('reduceRangeDuplicates', t => {
// output expected to just be range, dropping the midpoint
fs.appendFileSync(expectedFile, JSON.stringify(AC) + '\n')
- child_process.execSync(`./bin/reduceRangeDuplicates.js ${inputFile} ${outputFile}`)
+ child_process.execSync(`./bin/reduceRangeDuplicates.js --verbose ${inputFile} ${outputFile}`)
t.same(
- fs.readFileSync(outputFile),
- fs.readFileSync(expectedFile),
+ fs.readFileSync(outputFile, 'utf-8').trim().split('\n').map(JSON.parse),
+ fs.readFileSync(expectedFile, 'utf-8').trim().split('\n').map(JSON.parse),
'range with lone midpoint, drops midpoint'
)
@@ -87,8 +122,8 @@ test('reduceRangeDuplicates', t => {
const outputFile = mktemp.createFileSync('/tmp/output_XXXXX.geojson')
const expectedFile = mktemp.createFileSync('/tmp/expected_XXXXX.geojson')
- const AC = createFeature(null, '249-263', 'Faraday Street', 'Carlton')
- const B = createFeature('1', '251', 'Faraday Street', 'Carlton')
+ const AC = createFeature(null, null, '249-263', 'Faraday Street', 'Carlton')
+ const B = createFeature(null, '1', '251', 'Faraday Street', 'Carlton')
// both features to appear in input
fs.appendFileSync(inputFile, JSON.stringify(AC) + '\n')
@@ -98,11 +133,11 @@ test('reduceRangeDuplicates', t => {
fs.appendFileSync(expectedFile, JSON.stringify(AC) + '\n')
fs.appendFileSync(expectedFile, JSON.stringify(B) + '\n')
- child_process.execSync(`./bin/reduceRangeDuplicates.js ${inputFile} ${outputFile}`)
+ child_process.execSync(`./bin/reduceRangeDuplicates.js --verbose ${inputFile} ${outputFile}`)
t.same(
- fs.readFileSync(outputFile),
- fs.readFileSync(expectedFile),
+ fs.readFileSync(outputFile, 'utf-8').trim().split('\n').map(JSON.parse),
+ fs.readFileSync(expectedFile, 'utf-8').trim().split('\n').map(JSON.parse),
'midpoint with unit not dropped'
)
@@ -118,22 +153,22 @@ test('reduceRangeDuplicates', t => {
const outputFile = mktemp.createFileSync('/tmp/output_XXXXX.geojson')
const expectedFile = mktemp.createFileSync('/tmp/expected_XXXXX.geojson')
- const AC = createFeature(null, '249-263', 'Faraday Street', 'Carlton')
- const B = createFeature(null, '251', 'Faraday Street', 'Carlton', '1;2;3')
+ const AC = createFeature(null, null, '249-263', 'Faraday Street', 'Carlton')
+ const B = createFeature(null, null, '251', 'Faraday Street', 'Carlton', '1;2;3')
// both features to appear in input
fs.appendFileSync(inputFile, JSON.stringify(AC) + '\n')
fs.appendFileSync(inputFile, JSON.stringify(B) + '\n')
- // output expected to both features because dropping the midpoint would loose the unit
+ // output expected to both features because dropping the midpoint would loose the flats
fs.appendFileSync(expectedFile, JSON.stringify(AC) + '\n')
fs.appendFileSync(expectedFile, JSON.stringify(B) + '\n')
- child_process.execSync(`./bin/reduceRangeDuplicates.js ${inputFile} ${outputFile}`)
+ child_process.execSync(`./bin/reduceRangeDuplicates.js --verbose ${inputFile} ${outputFile}`)
t.same(
- fs.readFileSync(outputFile),
- fs.readFileSync(expectedFile),
+ fs.readFileSync(outputFile, 'utf-8').trim().split('\n').map(JSON.parse),
+ fs.readFileSync(expectedFile, 'utf-8').trim().split('\n').map(JSON.parse),
'midpoint with flats not dropped'
)