diff options
author | Andrew Harvey <andrew@alantgeo.com.au> | 2021-05-16 19:02:34 +1000 |
---|---|---|
committer | Andrew Harvey <andrew@alantgeo.com.au> | 2021-05-16 19:02:34 +1000 |
commit | c1ded85b8e3ed407a1d52d9b0aa62f988e17bd1e (patch) | |
tree | 28ad9938183718b8cee0e14d477ae6a1d2c1f593 | |
parent | 35e8f36bcddebc602b0749f3e5657d47c2307c10 (diff) |
add conflation stage which sorts blocks into contains OSM addresses or not
-rw-r--r-- | .gitlab-ci.yml | 15 | ||||
-rw-r--r-- | Makefile | 38 | ||||
-rw-r--r-- | README.md | 18 |
3 files changed, 71 insertions, 0 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8514577..e5f63c9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -11,6 +11,7 @@ stages: - docker - prepare - build + - conflate docker prepare: image: docker:latest @@ -92,3 +93,17 @@ build: paths: - dist - debug + +conflate: + stage: conflate + image: "registry.gitlab.com/alantgeo/vicmap2osm:conflate" + cache: + <<: *global_cache + policy: pull + script: + - yarn install + - make dist/addressesPerBlock.fgb + artifacts: + name: "conflate" + paths: + - dist @@ -82,3 +82,41 @@ data/victoria-addr.osm.geojson: data/victoria-addr.osm.pbf data/victoria-addr.osm.fgb: data/victoria-addr.osm.geojson ogr2ogr -f FlatGeobuf -nlt PROMOTE_TO_MULTI -skipfailures -mapFieldType Integer64List=String $@ $< + +data/victoria-addr.osm.centroids.fgb: data/victoria-addr.osm.fgb + qgis_process run native:centroids -- INPUT='$<|layername=victoria-addr.osm|option:VERIFY_BUFFERS=NO' OUTPUT=$@ + +data/asgs.zip: + wget -O $@ 'https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&1270055001_ASGS_2016_vol_1_geopackage.zip&1270.0.55.001&Data%20Cubes&C406A18CE1A6A50ACA257FED00145B1D&0&July%202016&12.07.2016&Latest' + +loadMB: + ogr2ogr -f PostgreSQL -where 'STATE_CODE_2016 = 2' PG: /vsizip/asgs.zip/ASGS\ 2016\ Volume\ 1.gpkg -nln mb -select 'MB_CODE_2016' MB_2016_AUST + +data/mb.geojson: + ogr2ogr -f GeoJSON -where 'STATE_CODE_2016 = 2' $@ /vsizip/data/asgs.zip/ASGS\ 2016\ Volume\ 1.gpkg -lco WRITE_BBOX=YES -lco COORDINATE_PRECISION=7 -lco RFC7946=YES -lco WRITE_NAME=NO -lco ID_FIELD=MB_CODE_2016 -nln mb -select 'MB_CODE_2016' MB_2016_AUST + +data/mb.fgb: data/mb.geojson + ogr2ogr -f FlatGeobuf $@ $< + +# extract roads from OSM +data/victoria-roads.osm.pbf: data/victoria.osm.pbf + osmium tags-filter --remove-tags --output=$@ $< w/highway=motorway,trunk,primary,secondary,tertiary,unclassified,residential,living_street,road + +# extract road lines into geojson +data/victoria-roads.geojson: data/victoria-roads.osm.pbf + osmium export --geometry-types=linestring --output-format=geojsonseq --output $@ $< + +# then convert to fgb +data/victoria-roads.fgb: data/victoria-roads.geojson + ogr2ogr -f FlatGeobuf -nlt LINESTRING $@ $< + +# construct block polygons based on OSM roads +data/blocks.fgb: data/victoria-roads.fgb + qgis_process run native:polygonize -- INPUT=$< KEEP_FIELDS=FALSE OUTPUT=$@ + +# count OSM addresses by block, those with no OSM addresses we can import all the candidate addresses without conflation issues +dist/addressesPerBlock.fgb: data/victoria-addr.osm.centroids.fgb data/blocks.fgb + qgis_process run native:countpointsinpolygon -- POINTS=$< POLYGONS='data/blocks.fgb|layername=blocks' FIELD=NUMPOINTS OUTPUT=$@ + +summariseAddressesPerBlock: + ogrinfo -dialect sqlite -sql 'select count(*), NUMPOINTS = 0 from addressesPerBlock group by (NUMPOINTS = 0)' data/addressesPerBlock.fgb @@ -133,3 +133,21 @@ One potential solution is to encode this in the `addr:` key like `addr:unit`, `a Another solution is use a new tag like `addr:unit:prefix=Unit`, although there is no existing usage of this tagging scheme ([taginfo](https://taginfo.openstreetmap.org/search?q=addr%3Aunit#keys)). In the current codebase this information is omitted. + +## Import Procedure +To conduct the import, given some addresses are already mapped in OSM we break the state down into city blocks. Where a block contains no addresses in OSM then we consider it low risk to automatically import all address in the block. The only risk is the address in either OSM or the source data is in the wrong block, but this is less likely and would be hard to detect otherwise. + +Where there contains some addresses already in OSM for the block, then it will either need further conflation or need to be manually reviewed prior to importing. + +Generate the latest view of address data in OSM: + + make data/victoria-addr.osm.fgb + make data/victoria-addr.osm.centroids.fgb + +Generate city blocks: + + make data/blocks.fgb + +Sort blocks into containing some OSM addresses or containing no OSM addresses: + + make data/addressPerBlock.fgb |