aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYuchen Pei <id@ypei.org>2023-10-07 12:46:01 +1100
committerYuchen Pei <id@ypei.org>2023-10-07 12:46:01 +1100
commit928dc289a99f8bea5314ac29e857ab648f71e8cf (patch)
tree7b34ac0e4d1a13c1f326be564a04b922313d5d3f
parent6973b0fc996ef0fea6cab1dbc1a89d7972e68270 (diff)
parent0e92206b7980b2de40cfb1c3186106b0afbca668 (diff)
Merge remote-tracking branch 'origin/master'
-rw-r--r--Makefile10
-rw-r--r--README.md85
-rwxr-xr-xbin/mr2osc.mjs2
-rw-r--r--lib/withinRange.js4
4 files changed, 71 insertions, 30 deletions
diff --git a/Makefile b/Makefile
index 2d46284..92f716e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,11 +1,9 @@
-# download VicMap source data
-# the URL here usually gets manually updated weekly though no guarantees
-# it's a mirror of the upstream VICMAP data with split shp files reduced to a single shp file
+# download Vicmap Address source data
data/VICMAP_ADDRESS.zip:
mkdir -p data
wget --no-verbose --output-document=$@ https://s3-ap-southeast-2.amazonaws.com/cl-isd-prd-datashare-s3-delivery/Order_BGJ5MV.zip
-# cadastre used for debugging
+# download Vicmap Property cadastre (only used for debugging)
data/VICMAP_PROPERTY.zip:
mkdir -p data
wget --no-verbose --output-document=$@ https://s3-ap-southeast-2.amazonaws.com/cl-isd-prd-datashare-s3-delivery/Order_OTL5B2.zip
@@ -22,10 +20,10 @@ data/vicmap/ll_gda2020/filegdb/whole_of_dataset/victoria/VMPROP.gdb: data/VICMAP
touch --no-create $@
data/vicmap-property.fgb: data/vicmap/ll_gda2020/filegdb/whole_of_dataset/victoria/VMPROP.gdb
- ogr2ogr -f FlatGeobuf -t_srs 'EPSG:4236' -nlt PROMOTE_TO_MULTI $@ $< PARCEL_VIEW
+ ogr2ogr -f FlatGeobuf -t_srs 'EPSG:4326' -nlt PROMOTE_TO_MULTI $@ $< PARCEL_VIEW
data/vicmap.geojson:
- ogr2ogr -f GeoJSONSeq -t_srs 'EPSG:4236' -mapFieldType DateTime=String $@ data/vicmap/ll_gda2020/filegdb/whole_of_dataset/victoria/VICMAP_ADDRESS.gdb
+ ogr2ogr -f GeoJSONSeq -t_srs 'EPSG:4326' -mapFieldType DateTime=String $@ data/vicmap/ll_gda2020/filegdb/whole_of_dataset/victoria/VICMAP_ADDRESS.gdb
wc -l $@
# used for quick debugging
diff --git a/README.md b/README.md
index 402da1b..768fa85 100644
--- a/README.md
+++ b/README.md
@@ -26,21 +26,29 @@ We use GitLab CI/CD to automate data processing.
1. Download source Vicmap data (_prepare_ stage):
- make data/vicmap/ll_gda2020/filegdb/whole_of_dataset/victoria/VICMAP_ADDRESS.gdb
+```sh
+make data/vicmap/ll_gda2020/filegdb/whole_of_dataset/victoria/VICMAP_ADDRESS.gdb
+```
2. Convert to GeoJSON (_prepare_ stage):
- make data/vicmap.geojson
+```sh
+make data/vicmap.geojson
+```
The following steps are built into the _build vicmap_ stage.
3. Convert into the [OSM address schema](https://wiki.openstreetmap.org/wiki/Key:addr), and omit addresses which don't meet our threshold for import (see [_Omitted addresses_](#omitted-addresses)) (code at [`bin/vicmap2osm.js`](bin/vicmap2osm.js)):
- make dist/vicmap-osm-with-suburb.geojson
+```sh
+make dist/vicmap-osm-with-suburb.geojson
+```
4. Remove duplicates where all address attributes match at the same location or within a small proximity (code at [`bin/reduceDuplicates.js`](bin/reduceDuplicates.js), see [_Removing duplicates_](#removing-duplicates)):
- make dist/vicmap-osm-uniq.geojson
+```sh
+make dist/vicmap-osm-uniq.geojson
+```
Two debug outputs are produced from this step.
@@ -52,7 +60,9 @@ b) _multiCluster_ - visualises where all addresses with the same address propert
5. Reduce some address points with the exact same coordinates but different address attributes (see [_Removing duplicates_](#removing-duplicates) below) (code at [`bin/reduceOverlap.js`](bin/reduceOverlap.js)):
- make dist/vicmap-osm-uniq-flats.geojson
+```sh
+make dist/vicmap-osm-uniq-flats.geojson
+```
Two debug outputs are produced from this step.
@@ -61,11 +71,15 @@ b) _sameGeometry_ - where other features shared the same geometry, but this one
6. Drop some duplicate addresses created by ranges being expressed both as a single range and as individual points (see [_Duplicates through mixed range and points_](#duplicates-through-mixed-range-and-points) below) (code at [`bin/reduceRangeDuplicates.js`](bin/reduceRangeDuplicates.js)).
- make dist/vicmap-osm-uniq-flats-withinrange.geojson
+```sh
+make dist/vicmap-osm-uniq-flats-withinrange.geojson
+```
These results are in GeoJSON format, for easier viewing in QGIS convert to FGB with:
- make convertGeoJSONResultsToFGB
+```sh
+make convertGeoJSONResultsToFGB
+```
### Omitted addresses
@@ -253,20 +267,28 @@ Where there contains some addresses already in OSM for the block, then it will e
1. Generate the latest view of address data in OSM:
- make data/victoria-addr.osm.fgb
- make data/victoria-addr.osm.centroids.fgb
+```sh
+make data/victoria-addr.osm.fgb
+make data/victoria-addr.osm.centroids.fgb
+```
2. Generate city blocks:
- make data/blocks.fgb
+```sh
+make data/blocks.fgb
+```
3. Sort blocks into containing some OSM addresses or containing no OSM addresses:
- make dist/blocksByOSMAddr.geojson
+```sh
+make dist/blocksByOSMAddr.geojson
+```
4. Conflate Vicmap addresses with OSM (code at [`bin/conflate.js`](bin/conflate.js)):
- make dist/conflate
+```sh
+make dist/conflate
+```
This produces outputs in `dist/conflate`:
@@ -284,13 +306,17 @@ This is outputted as a MapRoulette challenge (`dist/conflate/mr_explodeUnitFromN
These results are in GeoJSON format, for easier viewing in QGIS convert to FGB with:
- make convertConflationResultsToFGB
+```sh
+make convertConflationResultsToFGB
+```
5. Further processing to conflate Vicmap complex and building names with OSM can be done via:
- make data/victoria-named-features.osm.geojson
- make dist/vicmap-complex-conflation
- make dist/vicmap-building-conflation
+```sh
+make data/victoria-named-features.osm.geojson
+make dist/vicmap-complex-conflation
+make dist/vicmap-building-conflation
+```
These outputs are described in the [Building Name](#building-name) and [Complex Name](#complex-name) sections.
@@ -310,11 +336,15 @@ A better way to review matches where some attributes differ, potentially as a qu
## Prepare Final Import Candidates
1. Prepare split `addr:unit` / `addr:housenumber` changeset to QA before uploading
- make dist/unitFromNumber.osc
+```sh
+make dist/unitFromNumber.osc
+```
2. After conflation, import candidate .osm files are produced with
- make dist/candidates
+```sh
+make dist/candidates
+```
This will split the conflation results into the following import candidate categories, then again split into suburb/locality (`admin_level=9`).
@@ -338,11 +368,18 @@ For background see [Inclusion of `addr:suburb`, `addr:postcode` and `addr:state`
Using JOSM RemoteControl commands [`postal_code`](https://wiki.openstreetmap.org/wiki/Key:postal_code) will be added to the existing Victorian `admin_level=9` boundaries using the postcode derived from Vicmap Addresses. Except for Melbourne suburb because there are two postal codes in use, and the `postal_code` boundaries are already mapped.
- make printDifferentSuburbs
+```sh
+make printDifferentSuburbs
+```
The tag changes are created by [`bin/compareSuburb.js`](bin/compareSuburb.js) which creates the JOSM RemoteControl URLs into the file at `dist/postalCodeURLs.txt`, [https://gitlab.com/alantgeo/vicmap2osm/-/snippets/2133851](https://gitlab.com/alantgeo/vicmap2osm/-/snippets/2133851).
-- [ ] Changeset uploaded at XXX (~2473 features)
+Import procedure:
+
+- Open the first URL from `dist/postalCodeURLs.txt` and in JOSM choose to "Accept all tags from localhost for this session", "Add all tags".
+- `wget --input-file=dist/postalCodeURLs.txt --output-document=/dev/null`
+
+- [x] Changeset uploaded at https://www.openstreetmap.org/changeset/142031616 (2424 features)
### Stage 2 - Set unit from housenumber
During the conflation stage, Vicmap addresses which were deemed to match OSM addresses where in OSM it was represented as `addr:housenumber=X/Y` whereas in Vicmap it was represented as `addr:unit=X`, `addr:housenumber=Y`, then an automated tag change to move the unit into `addr:unit` is performed.
@@ -355,7 +392,9 @@ You can visualise the tag changes with `bin/mrCoopDiff.js` and `www/mrPreview.ht
The actual changeset will be created with:
- ./bin/mr2osc.mjs --changeset-comment "Vicmap Address Import - Stage 2 - Separate addr:unit and addr:housenumber where matched with Vicmap and previously were combined as unit/number. See https://gitlab.com/alantgeo/vicmap2osm" dist/conflate/mr_explodeUnitFromNumber.geojson dist/uploads/Stage2_SetUnitFromHousenumber.osc
+```sh
+./bin/mr2osc.mjs --changeset-comment "Vicmap Address Import - Stage 2 - Separate addr:unit and addr:housenumber where matched with Vicmap and previously were combined as unit/number. See https://gitlab.com/alantgeo/vicmap2osm" dist/conflate/mr_explodeUnitFromNumber.geojson dist/uploads/Stage2_SetUnitFromHousenumber.osc
+```
- [ ] Changeset uploaded at XXX (~9832 features)
@@ -381,7 +420,9 @@ The changeset comment used is
Where a Vicmap address matched an OSM address, set `addr:flats` as derived from Vicmap.
- ./bin/mr2osc.mjs --changeset-comment "Vicmap Address Import - Stage 4 - Add addr:flats to existing addresses. See https://gitlab.com/alantgeo/vicmap2osm" dist/conflate/mr_exactMatchSetFlats.geojson dist/uploads/Stage4_MatchedAddressAddingAddrFlats.osc
+```sh
+./bin/mr2osc.mjs --changeset-comment "Vicmap Address Import - Stage 4 - Add addr:flats to existing addresses. See https://gitlab.com/alantgeo/vicmap2osm" dist/conflate/mr_exactMatchSetFlats.geojson dist/uploads/Stage4_MatchedAddressAddingAddrFlats.osc
+```
Because `addr:flats` may be tagged on an entrance node denoting which units are accessible from which entrance, in these cases in OSM we should not add `addr:flats` to the way. https://overpass-turbo.eu/s/196m shows there are only 8 existing cases of this, so these are manually removed from the import before executing.
diff --git a/bin/mr2osc.mjs b/bin/mr2osc.mjs
index 4bca6a2..fe5eb15 100755
--- a/bin/mr2osc.mjs
+++ b/bin/mr2osc.mjs
@@ -12,6 +12,7 @@ import _ from 'lodash'
import fetch from 'node-fetch'
import xml from 'xml-js'
import yargs from 'yargs'
+import path from 'path'
const argv = yargs(process.argv.slice(2))
.option('dry-run', {
@@ -283,7 +284,6 @@ async function uploadChanges() {
const totalChangesets = Math.ceil(totalElements / MAXIMUM_ELEMENTS_PER_UPLOAD_REQUEST)
if (totalChangesets > 1) {
console.log(`${totalElements} exceeds API maximum elements of ${MAXIMUM_ELEMENTS_PER_UPLOAD_REQUEST} splitting into ${totalChangesets} changesets`)
- process.exit(1)
}
for (let changesetIndex = 0; changesetIndex < totalChangesets; changesetIndex++) {
diff --git a/lib/withinRange.js b/lib/withinRange.js
index 81bb6d4..90665ff 100644
--- a/lib/withinRange.js
+++ b/lib/withinRange.js
@@ -37,7 +37,9 @@ module.exports = (feature, rangeFeature, options) => {
) : true
)
) {
- const rangeNumber = rangeFeature.properties["addr:housenumber"].split("/").length > 1 ? rangeFeature.properties["addr:housenumber"].split("/")[1] : rangeFeature.properties["addr:housenumber"];
+ const housenumber = rangeFeature.properties['addr:housenumber'].replace(/^(unit|level|shop|factory) \D*\d*[ ]?\&?[ ]?\D*\d*,?[ ]*/i, '')
+
+ const rangeNumber = housenumber.split("/").length > 1 ? housenumber.split("/")[1] : housenumber;
const rangeParts = rangeNumber.split('-')
if (rangeParts.length === 2) {
const fromMatch = rangeParts[0].match(regexp)