diff --git a/.travis.yml b/.travis.yml index e1c2a957..92ac5bda 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ matrix: include: - os: osx language: generic - env: + env: - PY=3.6 - nedocker=NO before_install: @@ -40,10 +40,10 @@ matrix: # - os: linux -# env: +# env: # - PY=3.6 # - nedocker=NO -# sudo: required +# sudo: required # before_install: # - sudo add-apt-repository ppa:ubuntugis/ubuntugis-unstable --yes # - sudo apt-get --yes --force-yes update -qq @@ -71,15 +71,15 @@ matrix: # python: 3.6 # os: linux # env: -# - nedocker=NO -# sudo: required +# - nedocker=NO +# sudo: required # before_install: # - sudo add-apt-repository ppa:ubuntugis/ubuntugis-unstable --yes # - sudo apt-get --yes --force-yes update -qq # - sudo apt-get install --yes gdal-bin jq # - pip3 install -U SPARQLWrapper # - pip3 install -U fiona -# - python -c "import fiona" +# - python -c "import fiona" # - pip3 install -U csvtomd # - pip3 install -U requests # script: @@ -91,15 +91,15 @@ matrix: # python: 3.5 # os: linux # env: -# - nedocker=NO -# sudo: required +# - nedocker=NO +# sudo: required # before_install: # - sudo add-apt-repository ppa:ubuntugis/ubuntugis-unstable --yes # - sudo apt-get --yes --force-yes update -qq # - sudo apt-get install --yes gdal-bin jq # - pip3 install -U SPARQLWrapper # - pip3 install -U fiona -# - python -c "import fiona" +# - python -c "import fiona" # - pip3 install -U csvtomd # - pip3 install -U requests # script: @@ -109,11 +109,11 @@ matrix: - services: docker os: linux env: - - nedocker=YES - sudo: required + - nedocker=YES + sudo: required before_install: - docker version - docker build -t ne_py3wikidata . - docker images - script: + script: - docker run -it -v $(pwd):/ne ne_py3wikidata bash run_all.sh diff --git a/run_all.sh b/run_all.sh index 34ce3f4e..b5babe9c 100755 --- a/run_all.sh +++ b/run_all.sh @@ -1,7 +1,6 @@ #!/bin/bash set -Eeuo pipefail - STARTDATE=$(date +"%Y-%m-%dT%H:%M%z") # clean and recreate x_tempshape directory @@ -12,12 +11,13 @@ log_file=x_tempshape/run_all.log exec &> >(tee -a "$log_file") # Don't forget update the VERSION file! -cat VERSION +echo "-----------------------------------" +echo "Version $(cat VERSION)" +echo "Start: $STARTDATE " # Show some debug info python3 ./tools/wikidata/platform_debug_info.py - # Summary Log file logmd=x_tempshape/update.md rm -f $logmd @@ -27,59 +27,59 @@ rm -f $logmd # LetterCase = uppercase --> variable names [WIKIDATAID, NAME_AR, NAME_BN, NAME_DE, NAME_EN, NAME_ES, ... ] # LetterCase = lowercase --> variable names [wikidataid, name_ar, name_bn, name_de, name_en, name_es, ... ] # -------------------------------------------------------------------------------------------------------------------- -# |mode |LetterCase| shape_path | shape filename -# == 10m ================= |==== |==========| ============| ================================================ -./tools/wikidata/update.sh all uppercase 10m_cultural ne_10m_admin_0_countries_lakes -./tools/wikidata/update.sh all uppercase 10m_cultural ne_10m_admin_0_countries -./tools/wikidata/update.sh all uppercase 10m_cultural ne_10m_admin_0_disputed_areas -./tools/wikidata/update.sh all uppercase 10m_cultural ne_10m_admin_0_map_subunits -./tools/wikidata/update.sh all uppercase 10m_cultural ne_10m_admin_0_map_units -./tools/wikidata/update.sh all uppercase 10m_cultural ne_10m_admin_0_sovereignty -./tools/wikidata/update.sh all lowercase 10m_cultural ne_10m_admin_1_states_provinces_lakes -./tools/wikidata/update.sh all lowercase 10m_cultural ne_10m_admin_1_states_provinces -./tools/wikidata/update.sh all lowercase 10m_cultural ne_10m_airports -./tools/wikidata/update.sh all lowercase 10m_cultural ne_10m_populated_places -./tools/wikidata/update.sh all lowercase 10m_physical ne_10m_geographic_lines -./tools/wikidata/update.sh all lowercase 10m_physical ne_10m_geography_marine_polys -./tools/wikidata/update.sh all lowercase 10m_physical ne_10m_geography_regions_elevation_points -./tools/wikidata/update.sh all lowercase 10m_physical ne_10m_geography_regions_points -./tools/wikidata/update.sh all lowercase 10m_physical ne_10m_geography_regions_polys -./tools/wikidata/update.sh all lowercase 10m_physical ne_10m_lakes_europe -./tools/wikidata/update.sh all lowercase 10m_physical ne_10m_lakes_historic -./tools/wikidata/update.sh all lowercase 10m_physical ne_10m_lakes_north_america -./tools/wikidata/update.sh all lowercase 10m_physical ne_10m_lakes -./tools/wikidata/update.sh all lowercase 10m_physical ne_10m_playas -./tools/wikidata/update.sh all lowercase 10m_physical ne_10m_rivers_europe -./tools/wikidata/update.sh all lowercase 10m_physical ne_10m_rivers_lake_centerlines_scale_rank -./tools/wikidata/update.sh all lowercase 10m_physical ne_10m_rivers_lake_centerlines -./tools/wikidata/update.sh all lowercase 10m_physical ne_10m_rivers_north_america -./tools/wikidata/update.sh all lowercase 10m_cultural ne_10m_admin_1_label_points_details -# == 50m ================= |==== |==========| ============| ================================================ -./tools/wikidata/update.sh all uppercase 50m_cultural ne_50m_admin_0_sovereignty -./tools/wikidata/update.sh all uppercase 50m_cultural ne_50m_admin_0_countries -./tools/wikidata/update.sh all uppercase 50m_cultural ne_50m_admin_0_countries_lakes -./tools/wikidata/update.sh all uppercase 50m_cultural ne_50m_admin_0_map_units -./tools/wikidata/update.sh all uppercase 50m_cultural ne_50m_admin_0_map_subunits -./tools/wikidata/update.sh all uppercase 50m_cultural ne_50m_admin_0_tiny_countries -#./tools/wikidata/update.sh all uppercase 50m_cultural ne_50m_admin_0_breakaway_disputed_areas # KeyError: 'WIKIDATAID' -#./tools/wikidata/update.sh all uppercase 50m_cultural ne_50m_admin_0_breakaway_disputed_areas_scale_rank # KeyError: 'WIKIDATAID' -./tools/wikidata/update.sh all lowercase 50m_cultural ne_50m_admin_1_states_provinces -./tools/wikidata/update.sh all lowercase 50m_cultural ne_50m_admin_1_states_provinces_lakes -./tools/wikidata/update.sh all lowercase 50m_physical ne_50m_lakes -./tools/wikidata/update.sh all lowercase 50m_physical ne_50m_lakes_historic -./tools/wikidata/update.sh all lowercase 50m_physical ne_50m_playas -./tools/wikidata/update.sh all lowercase 50m_physical ne_50m_rivers_lake_centerlines -./tools/wikidata/update.sh all lowercase 50m_physical ne_50m_rivers_lake_centerlines_scale_rank -# ==110m ================= |==== |==========| ============| ================================================ -./tools/wikidata/update.sh all uppercase 110m_cultural ne_110m_admin_0_sovereignty -./tools/wikidata/update.sh all uppercase 110m_cultural ne_110m_admin_0_countries -./tools/wikidata/update.sh all uppercase 110m_cultural ne_110m_admin_0_countries_lakes -./tools/wikidata/update.sh all uppercase 110m_cultural ne_110m_admin_0_map_units -./tools/wikidata/update.sh all lowercase 110m_cultural ne_110m_admin_1_states_provinces -./tools/wikidata/update.sh all lowercase 110m_cultural ne_110m_admin_1_states_provinces_lakes -./tools/wikidata/update.sh all lowercase 110m_physical ne_110m_lakes -./tools/wikidata/update.sh all lowercase 110m_physical ne_110m_rivers_lake_centerlines -# ======================== |==== |==========| ============| ================================================ +# | mode |LetterCase| shape_path | shape filename +# == 10m ================= |=========== |==========| ============| ================================================ +./tools/wikidata/update.sh fetch_write uppercase 10m_cultural ne_10m_admin_0_countries_lakes +./tools/wikidata/update.sh fetch_write uppercase 10m_cultural ne_10m_admin_0_countries +./tools/wikidata/update.sh fetch_write uppercase 10m_cultural ne_10m_admin_0_disputed_areas +./tools/wikidata/update.sh fetch_write uppercase 10m_cultural ne_10m_admin_0_map_subunits +./tools/wikidata/update.sh fetch_write uppercase 10m_cultural ne_10m_admin_0_map_units +./tools/wikidata/update.sh fetch_write uppercase 10m_cultural ne_10m_admin_0_sovereignty +./tools/wikidata/update.sh fetch_write lowercase 10m_cultural ne_10m_admin_1_states_provinces_lakes +./tools/wikidata/update.sh fetch_write lowercase 10m_cultural ne_10m_admin_1_states_provinces +./tools/wikidata/update.sh fetch_write lowercase 10m_cultural ne_10m_airports +./tools/wikidata/update.sh fetch_write lowercase 10m_cultural ne_10m_populated_places +./tools/wikidata/update.sh fetch_write lowercase 10m_physical ne_10m_geographic_lines +./tools/wikidata/update.sh fetch_write lowercase 10m_physical ne_10m_geography_marine_polys +./tools/wikidata/update.sh fetch_write lowercase 10m_physical ne_10m_geography_regions_elevation_points +./tools/wikidata/update.sh fetch_write lowercase 10m_physical ne_10m_geography_regions_points +./tools/wikidata/update.sh fetch_write lowercase 10m_physical ne_10m_geography_regions_polys +./tools/wikidata/update.sh fetch_write lowercase 10m_physical ne_10m_lakes_europe +./tools/wikidata/update.sh fetch_write lowercase 10m_physical ne_10m_lakes_historic +./tools/wikidata/update.sh fetch_write lowercase 10m_physical ne_10m_lakes_north_america +./tools/wikidata/update.sh fetch_write lowercase 10m_physical ne_10m_lakes +./tools/wikidata/update.sh fetch_write lowercase 10m_physical ne_10m_playas +./tools/wikidata/update.sh fetch_write lowercase 10m_physical ne_10m_rivers_europe +./tools/wikidata/update.sh fetch_write lowercase 10m_physical ne_10m_rivers_lake_centerlines_scale_rank +./tools/wikidata/update.sh fetch_write lowercase 10m_physical ne_10m_rivers_lake_centerlines +./tools/wikidata/update.sh fetch_write lowercase 10m_physical ne_10m_rivers_north_america +./tools/wikidata/update.sh fetch_write lowercase 10m_cultural ne_10m_admin_1_label_points_details +# == 50m ================= |=========== |==========| ============| ================================================ +./tools/wikidata/update.sh fetch_write uppercase 50m_cultural ne_50m_admin_0_sovereignty +./tools/wikidata/update.sh fetch_write uppercase 50m_cultural ne_50m_admin_0_countries +./tools/wikidata/update.sh fetch_write uppercase 50m_cultural ne_50m_admin_0_countries_lakes +./tools/wikidata/update.sh fetch_write uppercase 50m_cultural ne_50m_admin_0_map_units +./tools/wikidata/update.sh fetch_write uppercase 50m_cultural ne_50m_admin_0_map_subunits +./tools/wikidata/update.sh fetch_write uppercase 50m_cultural ne_50m_admin_0_tiny_countries +#./tools/wikidata/update.sh fetch_write uppercase 50m_cultural ne_50m_admin_0_breakaway_disputed_areas # KeyError: 'WIKIDATAID' +#./tools/wikidata/update.sh fetch_write uppercase 50m_cultural ne_50m_admin_0_breakaway_disputed_areas_scale_rank # KeyError: 'WIKIDATAID' +./tools/wikidata/update.sh fetch_write lowercase 50m_cultural ne_50m_admin_1_states_provinces +./tools/wikidata/update.sh fetch_write lowercase 50m_cultural ne_50m_admin_1_states_provinces_lakes +./tools/wikidata/update.sh fetch_write lowercase 50m_physical ne_50m_lakes +./tools/wikidata/update.sh fetch_write lowercase 50m_physical ne_50m_lakes_historic +./tools/wikidata/update.sh fetch_write lowercase 50m_physical ne_50m_playas +./tools/wikidata/update.sh fetch_write lowercase 50m_physical ne_50m_rivers_lake_centerlines +./tools/wikidata/update.sh fetch_write lowercase 50m_physical ne_50m_rivers_lake_centerlines_scale_rank +# ==110m ================= |=========== |==========| ============| ================================================ +./tools/wikidata/update.sh fetch_write uppercase 110m_cultural ne_110m_admin_0_sovereignty +./tools/wikidata/update.sh fetch_write uppercase 110m_cultural ne_110m_admin_0_countries +./tools/wikidata/update.sh fetch_write uppercase 110m_cultural ne_110m_admin_0_countries_lakes +./tools/wikidata/update.sh fetch_write uppercase 110m_cultural ne_110m_admin_0_map_units +./tools/wikidata/update.sh fetch_write lowercase 110m_cultural ne_110m_admin_1_states_provinces +./tools/wikidata/update.sh fetch_write lowercase 110m_cultural ne_110m_admin_1_states_provinces_lakes +./tools/wikidata/update.sh fetch_write lowercase 110m_physical ne_110m_lakes +./tools/wikidata/update.sh fetch_write lowercase 110m_physical ne_110m_rivers_lake_centerlines +# ======================== |=========== |==========| ============| ================================================ # show summary cat x_tempshape/update.md @@ -87,8 +87,30 @@ cat x_tempshape/update.md # list new files ls -Gga x_tempshape/*/* +# Update shape files ( if everything is OK! ) +cp -r x_tempshape/10m_cultural/* 10m_cultural/ +cp -r x_tempshape/10m_physical/* 10m_physical/ +cp -r x_tempshape/50m_cultural/* 50m_cultural/ +cp -r x_tempshape/50m_physical/* 50m_physical/ +cp -r x_tempshape/110m_cultural/* 110m_cultural/ +cp -r x_tempshape/110m_physical/* 110m_physical/ + +# test copy mode ( write again .. ) +./tools/wikidata/update.sh copy uppercase 10m_cultural ne_10m_admin_0_countries + + + # Run the final update process make clean all +echo " " +echo " ---------------------" +STOPDATE=$(date +"%Y-%m-%dT%H:%M%z") +echo "Stop: $STARTDATE " + +echo " see log file: " +ls -Gga $log_file +echo " " echo " ---- end of run_all.sh ------ " -ls -Gga $log_file \ No newline at end of file + + diff --git a/tools/wikidata/README.md b/tools/wikidata/README.md index 9d50f0bd..b3b010f8 100644 --- a/tools/wikidata/README.md +++ b/tools/wikidata/README.md @@ -9,7 +9,7 @@ #run from the project root ( expected 30-40 minutes ) # be careful this is running 'make all' - ./run_all.sh + ./run_all.sh # Check the log file cat x_tempshape/run_all.log @@ -34,7 +34,7 @@ x_tempshape/update.md ./tools/wikidata/update.sh all lowercase 10m_physical ne_10m_lakes_north_america ``` -mode = +mode = * fetch = fetch Wikidata Labels(names) via SPARQL - and create a csv file * write = create a new temp Shape file with the new wikidata names * fetch_write = fetch and write @@ -57,33 +57,171 @@ step by step ``` +# ./tools/wikidata/update.sh fetch ... + +`./tools/wikidata/update.sh fetch lowercase 10m_physical ne_10m_lakes_north_america` +* list input shape file variables +* query all wikida labels +* write output : `x_tempshape/10m_physical/ne_10m_lakes_north_america.new_names.csv` + + +Example log: +```log +$ ./tools/wikidata/update.sh fetch lowercase 10m_physical ne_10m_lakes_north_america + +########## /tools/wikidata/update.sh parameters: + 1: mode : fetch + 2: nei_letter_case: lowercase + 3: neo_path : x_tempshape + 4: ne_shapepath : 10m_physical + 5: ne_shapefile : ne_10m_lakes_north_america + + + Fetch wikidata labels + ================================= +INFO: Open of `./10m_physical/ne_10m_lakes_north_america.shp' + using driver `ESRI Shapefile' successful. + +Layer name: ne_10m_lakes_north_america +Geometry: Polygon +Feature Count: 1200 +Extent: (-164.284110, 8.988349) - (-18.569997, 82.292487) +Layer SRS WKT: +GEOGCS["GCS_WGS_1984", + DATUM["WGS_1984", + SPHEROID["WGS_84",6378137,298.257223563]], + PRIMEM["Greenwich",0], + UNIT["Degree",0.017453292519943295], + AUTHORITY["EPSG","4326"]] +uident: Real (25.9) +featurecla: String (50.0) +name: String (100.0) +name_alt: String (100.0) +note: String (100.0) +scalerank: Integer64 (10.0) +min_zoom: Real (6.1) +min_label: Real (4.1) +label: String (254.0) +wikidataid: String (254.0) +name_ar: String (254.0) +name_bn: String (254.0) +name_de: String (254.0) +name_en: String (254.0) +name_es: String (254.0) +name_fr: String (254.0) +name_el: String (254.0) +name_hi: String (254.0) +name_hu: String (254.0) +name_id: String (254.0) +name_it: String (254.0) +name_ja: String (254.0) +name_ko: String (254.0) +name_nl: String (254.0) +name_pl: String (254.0) +name_pt: String (254.0) +name_ru: String (254.0) +name_sv: String (254.0) +name_tr: String (254.0) +name_vi: String (254.0) +name_zh: String (254.0) +wdid_score: Integer (1.0) +ne_id: Integer64 (10.0) +- Start fetching Natural-Earth wikidata labels via SPARQL query - +fetch: wd:Q6474657 ... wd:Q5594723 +fetch: wd:Q5034223 ... wd:Q4208879 +Redirected: Q22702352 Q1799606 +fetch: wd:Q3114698 ... wd:Q595625 + - JOB end - + created : x_tempshape/10m_physical/ne_10m_lakes_north_america.new_names.csv + ``` -### /temp_shape/10m_physical/ne_10m_lakes_north_america.changes_log.csv # Column changes - csv format +#### x_tempshape/10m_physical/ne_10m_lakes_north_america.new_names.csv -``` -"wd_id","status","variable","value_old","value_new" -"Q1323525","NEWvalue","name_ko","","워싱턴 호" -"Q7356585","MODvalue","name_fr","William","William 'Bill' Dannelly Reservoir" -"Q15118728","NEWvalue","name_en","","Little Salmon Lake" -"Q7236081","NEWvalue","name_de","","Powell Lake" -"Q7236081","NEWvalue","name_es","","Powell Lake" -"Q7236081","NEWvalue","name_it","","Powell Lake" -"Q7236081","NEWvalue","name_nl","","Powell Lake" -"Q22702352","REDIRECT","wikidataid","Q22702352","Q1799606" -"Q22702352","MODvalue","name_de","lac Pusticamica","Lac Pusticamica" -"Q1800890","MODvalue","name_en","Lake Chemong","Chemong Lake" -"Q1800890","NEWvalue","name_sv","","Chemong Lake" +```csv +"wd_id","wd_id_new","population","name_ar","name_bn","name_de","name_en","name_es","name_fr","name_el","name_hi","name_hu","name_id","name_it","name_ja","name_ko","name_nl","name_pl","name_pt","name_ru","name_sv","name_tr","name_vi","name_zh" +"Q4397897","","","","","","Ross Barnett Reservoir","","","","","","","","","","","","","Росс Барнетт","","","","" +"Q1426999","","","","","Theodore Roosevelt Lake","Theodore Roosevelt Lake","","","","","","","","","","","","","Рузвельт","","","","" +"Q175554","","","","","Walker Lake","Walker Lake","","Walker Lake","","","Walker-tó","","","ウォーカー湖","","Walker Lake","","","Уокер","","","","" +"Q6908686","","","","","","Mooselookmeguntic Lake","","Mooselookmeguntic Lake","","","","","","","","","","","Муслукмегантик","","","","" +"Q1110527","","","","","Priest Lake","Priest Lake","","Priest Lake","","","","","","","","","","","Прист","","","","" +"Q1627906","","","","","","Caddo Lake","","lac Caddo","","","","","lago Caddo","","","Caddo Lake","","Lago Caddo","Каддо","","","","" +"Q4261031","","","","","","Lake Livingston","","lac Livingston","","","","","","","","","","","Ливингстон","","","","" +"Q4231229","","","","","","Lake Conroe","","Lake Conroe","","","","","","","","","","","Конро","","","","" +"Q2365354","","","","","Summer Lake","Summer Lake","","Summer Lake","","","","","","","","","","","Саммер","","","","" +... ``` -### ./temp_shape/10m_physical/ne_10m_lakes_north_america.changes_log.csv.md # Column changes - markdown +# ./tools/wikidata/update.sh write ... + +` ./tools/wikidata/update.sh write lowercase 10m_physical ne_10m_lakes_north_america` +* create new temp shapefile +* create some audits logs, statistics + +```log +$ ./tools/wikidata/update.sh write lowercase 10m_physical ne_10m_lakes_north_america + +########## /tools/wikidata/update.sh parameters: + 1: mode : write + 2: nei_letter_case: lowercase + 3: neo_path : x_tempshape + 4: ne_shapepath : 10m_physical + 5: ne_shapefile : ne_10m_lakes_north_america + + + Write shapefile with wikidata labels + ================================= + shapefile info : x_tempshape/10m_physical/ne_10m_lakes_north_america + +name_en/NAME_EN changes x_tempshape/10m_physical/ne_10m_lakes_north_america) +--------------------- +Q1800890 | MODvalue | name_en | Lake Chemong | Chemong Lake + +shapefilename | var | value +-----------------------------------------------|--------------------------|------- +./10m_physical/ne_10m_lakes_north_america.shp | New_name | 12 +./10m_physical/ne_10m_lakes_north_america.shp | Deleted_name | 0 +./10m_physical/ne_10m_lakes_north_america.shp | Modified_name | 3 +./10m_physical/ne_10m_lakes_north_america.shp | Empty_name | 7894 +./10m_physical/ne_10m_lakes_north_america.shp | Same_name | 1604 +./10m_physical/ne_10m_lakes_north_america.shp | Wikidataid_redirected | 1 +./10m_physical/ne_10m_lakes_north_america.shp | Wikidataid_notfound | 0 +./10m_physical/ne_10m_lakes_north_america.shp | Wikidataid_null | 747 +./10m_physical/ne_10m_lakes_north_america.shp | Wikidataid_notnull | 453 +./10m_physical/ne_10m_lakes_north_america.shp | Wikidataid_badformated | 0 + + (write) created : + ------------------- +-rw-r--r-- 1 942 May 20 19:27 x_tempshape/10m_physical/ne_10m_lakes_north_america.changes_log.csv +-rw-r--r-- 1 1393 May 20 19:27 x_tempshape/10m_physical/ne_10m_lakes_north_america.changes_log.csv.md +-rw-r--r-- 1 5 May 20 19:27 x_tempshape/10m_physical/ne_10m_lakes_north_america.cpg +-rw-r--r-- 1 7499890 May 20 19:27 x_tempshape/10m_physical/ne_10m_lakes_north_america.dbf +-rw-r--r-- 1 57604 May 20 19:23 x_tempshape/10m_physical/ne_10m_lakes_north_america.new_names.csv +-rw-r--r-- 1 143 May 20 19:27 x_tempshape/10m_physical/ne_10m_lakes_north_america.prj +-rw-r--r-- 1 573424 May 20 19:27 x_tempshape/10m_physical/ne_10m_lakes_north_america.shp +-rw-r--r-- 1 9700 May 20 19:27 x_tempshape/10m_physical/ne_10m_lakes_north_america.shx +-rw-r--r-- 1 749 May 20 19:27 x_tempshape/10m_physical/ne_10m_lakes_north_america.summary_log.csv +-rw-r--r-- 1 967 May 20 19:27 x_tempshape/10m_physical/ne_10m_lakes_north_america.summary_log.csv.md + + +``` + +#### write - audit log: x_tempshape/10m_physical/ne_10m_lakes_north_america.changes_log.csv.md + +``` +$ cat x_tempshape/10m_physical/ne_10m_lakes_north_america.changes_log.csv.md wd_id | status | variable | value_old | value_new -----------|------------|--------------|-------------------|----------------------------------- Q1323525 | NEWvalue | name_ko | | 워싱턴 호 +Q1323525 | NEWvalue | name_pl | | Washington +Q1495651 | NEWvalue | name_sv | | Lake George +Q1627906 | NEWvalue | name_pt | | Lago Caddo Q7356585 | MODvalue | name_fr | William | William 'Bill' Dannelly Reservoir +Q13700 | NEWvalue | name_tr | | Texcoco Gölü Q15118728 | NEWvalue | name_en | | Little Salmon Lake +Q16931868 | NEWvalue | name_sv | | Athapapuskow Lake Q7236081 | NEWvalue | name_de | | Powell Lake Q7236081 | NEWvalue | name_es | | Powell Lake Q7236081 | NEWvalue | name_it | | Powell Lake @@ -92,65 +230,76 @@ Q22702352 | REDIRECT | wikidataid | Q22702352 | Q1799606 Q22702352 | MODvalue | name_de | lac Pusticamica | Lac Pusticamica Q1800890 | MODvalue | name_en | Lake Chemong | Chemong Lake Q1800890 | NEWvalue | name_sv | | Chemong Lake - - -### ./temp_shape/10m_physical/ne_10m_lakes_north_america.new_names.csv # input csv - -```bash -$ cat ./temp_shape/10m_physical/ne_10m_lakes_north_america.new_names.csv | head -"wd_id","wd_id_new","population","name_ar","name_bn","name_de","name_en","name_es","name_fr","name_el","name_hi","name_hu","name_id","name_it","name_ja","name_ko","name_nl","name_pl","name_pt","name_ru","name_sv","name_tr","name_vi","name_zh" -"Q1426999","","","","","Theodore Roosevelt Lake","Theodore Roosevelt Lake","","","","","","","","","","","","","Рузвельт","","","","" -"Q4397897","","","","","","Ross Barnett Reservoir","","","","","","","","","","","","","Росс Барнетт","","","","" -"Q175554","","","","","Walker Lake","Walker Lake","","Walker Lake","","","Walker-tó","","","ウォーカー湖","","Walker Lake","","","Уокер","","","","" -"Q6908686","","","","","","Mooselookmeguntic Lake","","Mooselookmeguntic Lake","","","","","","","","","","","Муслукмегантик","","","","" -"Q1110527","","","","","Priest Lake","Priest Lake","","Priest Lake","","","","","","","","","","","Прист","","","","" -"Q1627906","","","","","","Caddo Lake","","lac Caddo","","","","","lago Caddo","","","Caddo Lake","","","Каддо","","","","" -"Q4261031","","","","","","Lake Livingston","","lac Livingston","","","","","","","","","","","Ливингстон","","","","" -"Q4231229","","","","","","Lake Conroe","","Lake Conroe","","","","","","","","","","","Конро","","","","" -"Q2365354","","","","","Summer Lake","Summer Lake","","Summer Lake","","","","","","","","","","","Саммер","","","","" -... ``` -### ./temp_shape/10m_physical/ne_10m_lakes_north_america.summary_log.csv # Summary of the changes - csv +### write - summary audit log: x_tempshape/10m_physical/ne_10m_lakes_north_america.summary_log.csv.md + +``` +$ cat x_tempshape/10m_physical/ne_10m_lakes_north_america.summary_log.csv.md +shapefilename | var | value +-----------------------------------------------|--------------------------|------- +./10m_physical/ne_10m_lakes_north_america.shp | New_name | 12 +./10m_physical/ne_10m_lakes_north_america.shp | Deleted_name | 0 +./10m_physical/ne_10m_lakes_north_america.shp | Modified_name | 3 +./10m_physical/ne_10m_lakes_north_america.shp | Empty_name | 7894 +./10m_physical/ne_10m_lakes_north_america.shp | Same_name | 1604 +./10m_physical/ne_10m_lakes_north_america.shp | Wikidataid_redirected | 1 +./10m_physical/ne_10m_lakes_north_america.shp | Wikidataid_notfound | 0 +./10m_physical/ne_10m_lakes_north_america.shp | Wikidataid_null | 747 +./10m_physical/ne_10m_lakes_north_america.shp | Wikidataid_notnull | 453 +./10m_physical/ne_10m_lakes_north_america.shp | Wikidataid_badformated | 0 -```csv -"shapefilename","var","value" -"10m_physical/ne_10m_lakes_north_america.shp","New_name","7" -"10m_physical/ne_10m_lakes_north_america.shp","Deleted_name","0" -"10m_physical/ne_10m_lakes_north_america.shp","Modified_name","3" -"10m_physical/ne_10m_lakes_north_america.shp","Empty_name ","7899" -"10m_physical/ne_10m_lakes_north_america.shp","Same_name","1604" -"10m_physical/ne_10m_lakes_north_america.shp","Wikidataid_redirected","1" -"10m_physical/ne_10m_lakes_north_america.shp","Wikidataid_notfound","0" -"10m_physical/ne_10m_lakes_north_america.shp","Wikidataid_null","747" -"10m_physical/ne_10m_lakes_north_america.shp","Wikidataid_notnull","453" -"10m_physical/ne_10m_lakes_north_america.shp","Wikidataid_badformated","0" ``` -### ./temp_shape/10m_physical/ne_10m_lakes_north_america.shp.summary_log.csv.md # Summary of the changes - markdown +# ./tools/wikidata/update.sh copy ... + +Be careful with copy mode! + +`./tools/wikidata/update.sh copy lowercase 10m_physical ne_10m_lakes_north_america` +* copy the new files (shape + audit) to the original place + + + +``` +$ ./tools/wikidata/update.sh copy lowercase 10m_physical ne_10m_lakes_north_america + +########## /tools/wikidata/update.sh parameters: + 1: mode : copy + 2: nei_letter_case: lowercase + 3: neo_path : x_tempshape + 4: ne_shapepath : 10m_physical + 5: ne_shapefile : ne_10m_lakes_north_america + + + Copy shape + audit files + =============================== +'x_tempshape/10m_physical/ne_10m_lakes_north_america.changes_log.csv' -> '10m_physical/ne_10m_lakes_north_america.changes_log.csv' +'x_tempshape/10m_physical/ne_10m_lakes_north_america.changes_log.csv.md' -> '10m_physical/ne_10m_lakes_north_america.changes_log.csv.md' +'x_tempshape/10m_physical/ne_10m_lakes_north_america.cpg' -> '10m_physical/ne_10m_lakes_north_america.cpg' +'x_tempshape/10m_physical/ne_10m_lakes_north_america.dbf' -> '10m_physical/ne_10m_lakes_north_america.dbf' +'x_tempshape/10m_physical/ne_10m_lakes_north_america.new_names.csv' -> '10m_physical/ne_10m_lakes_north_america.new_names.csv' +'x_tempshape/10m_physical/ne_10m_lakes_north_america.prj' -> '10m_physical/ne_10m_lakes_north_america.prj' +'x_tempshape/10m_physical/ne_10m_lakes_north_america.shp' -> '10m_physical/ne_10m_lakes_north_america.shp' +'x_tempshape/10m_physical/ne_10m_lakes_north_america.shx' -> '10m_physical/ne_10m_lakes_north_america.shx' +'x_tempshape/10m_physical/ne_10m_lakes_north_america.summary_log.csv' -> '10m_physical/ne_10m_lakes_north_america.summary_log.csv' +'x_tempshape/10m_physical/ne_10m_lakes_north_america.summary_log.csv.md' -> '10m_physical/ne_10m_lakes_north_america.summary_log.csv.md' + +``` + -shapefilename | var | value ----------------------------------------------|--------------------------|------- -10m_physical/ne_10m_lakes_north_america.shp | New_name | 7 -10m_physical/ne_10m_lakes_north_america.shp | Deleted_name | 0 -10m_physical/ne_10m_lakes_north_america.shp | Modified_name | 3 -10m_physical/ne_10m_lakes_north_america.shp | Empty_name | 7899 -10m_physical/ne_10m_lakes_north_america.shp | Same_name | 1604 -10m_physical/ne_10m_lakes_north_america.shp | Wikidataid_redirected | 1 -10m_physical/ne_10m_lakes_north_america.shp | Wikidataid_notfound | 0 -10m_physical/ne_10m_lakes_north_america.shp | Wikidataid_null | 747 -10m_physical/ne_10m_lakes_north_america.shp | Wikidataid_notnull | 453 -10m_physical/ne_10m_lakes_north_america.shp | Wikidataid_badformated | 0 # My best practice ... -* Run step by step ( line by line) : `./run_all.sh` in `fetch_write` mode +* checkout the original shape files +* Run step by step ( line by line , table by table ) from the `./run_all.sh` in `fetch_write` mode * check the audit csv files ( Open by Libreoffice , filter ) * find & fix the 'fake' wikidata changes :( * iterate or modify input csv and write shape files -* check shape files and move the shape files to the correct folders +* check shape files + * if OK - move the shape files to the correct folders: `./tools/wikidata/update.sh copy ...` + # known problems @@ -164,8 +313,12 @@ WARNING:Fiona:CPLE_AppDefined in b'Value -3.34011000000000013 of field longitude ... ``` + + ### uppercase / lowercase variable names + + lettercase = uppercase variable names [WIKIDATAID, NAME_AR, NAME_BN, NAME_DE, NAME_EN, NAME_ES, ... ] * 10m_cultural/ne_10m_admin_0_countries_lakes.shp * 10m_cultural/ne_10m_admin_0_countries.shp @@ -199,7 +352,7 @@ lettercase = lowercase variable names [wikidataid, name_ar, name_bn, name_de, na * 50m_cultural/... * 50m_physical/... * 110m_cultural/... -* 110m_physical/... +* 110m_physical/... see the _latest_ information in the `./run_all.sh` @@ -230,3 +383,67 @@ variable name | language | language wikipedia link NAME_VI | Vietnamese | https://en.wikipedia.org/wiki/Vietnamese_language NAME_ZH | Chinese | https://en.wikipedia.org/wiki/Chinese_language +# Name cleaning + +minimal regexp implementation, hard coded in the `write_wikidata.py` + +TODO : need better implementation. + + +### remove `river` +if the shape file name contain trigger word ('river') - run regexp. + +```python +riverclean_regex = re.compile(r'\b('+'River'+r')\b', flags=re.IGNORECASE) +.... + if args.input_shape.lower().find('river') > 0: + wddic[qid][d] = riverclean_regex.sub('', wddic[qid][d]) +.... +``` + +changes written to the log. +``` +Q1330818 name_en name cleaning : Pite River --> Pite +Q16663 name_en name cleaning : Alagón River --> Alagón +Q14764 name_en name cleaning : Esla river --> Esla +Q14755 name_en name cleaning : Tormes River --> Tormes +Q71122 name_en name cleaning : Chir River --> Chir +Q192157 name_en name cleaning : Belaya River --> Belaya +Q202796 name_en name cleaning : Desna River --> Desna + +``` + + +### remove `Municipality of|Municipality|First Nation` words + + +if the shape file name contain trigger word ('place') - run regexp. + +```python +placeclean_regex = re.compile(r'\b('+'Municipality of|Municipality|First Nation'+r')\b', + flags=re.IGNORECASE) + + ... + # Places ... + if args.input_shape.lower().find('place') > 0: + wddic[qid][d] = placeclean_regex.sub('', wddic[qid][d]) + + ... remove 市(city) +``` + +example changes: +``` +Q3078079 name_en name cleaning : Fort Severn First Nation --> Fort Severn +Q3078079 name_nl name cleaning : Fort Severn First Nation --> Fort Severn +``` + +### remove 市(city) + +example changes: + +``` +Q68695 name_zh name cleaning : 泉州市 --> 泉州 +Q74881 name_zh name cleaning : 大连市 --> 大连 +Q74957 name_zh name cleaning : 鞍山市 --> 鞍山 +Q92381 name_zh name cleaning : 白城市 --> 白城 +``` diff --git a/tools/wikidata/update.sh b/tools/wikidata/update.sh index a748347e..b4afe46e 100755 --- a/tools/wikidata/update.sh +++ b/tools/wikidata/update.sh @@ -41,8 +41,8 @@ function fetch_names { python3 tools/wikidata/fetch_wikidata.py -input_shape_name ${nei_path}/${ne_shapepath}/${ne_shapefile}.shp \ -input_lettercase ${nei_letter_case} \ -output_csv_name ${neo_path}/${ne_shapepath}/${ne_shapefile}.new_names.csv - echo " created : ${neo_path}/${ne_shapepath}/${ne_shapefile}.new_names.csv " - echo " " + echo " created : ${neo_path}/${ne_shapepath}/${ne_shapefile}.new_names.csv " + echo " " } @@ -80,7 +80,7 @@ function write_names { csvtomd ${neo_path}/${ne_shapepath}/${ne_shapefile}.summary_log.csv > ${neo_path}/${ne_shapepath}/${ne_shapefile}.summary_log.csv.md echo " " - echo "name_en/NAME_EN changes ${neo_path}/${ne_shapepath}/${ne_shapefile}) " + echo "show only name_en/NAME_EN changes : ${neo_path}/${ne_shapepath}/${ne_shapefile} " echo "---------------------" cat ${neo_path}/${ne_shapepath}/${ne_shapefile}.changes_log.csv.md | grep MODvalue | grep name_en || true cat ${neo_path}/${ne_shapepath}/${ne_shapefile}.changes_log.csv.md | grep MODvalue | grep NAME_EN || true @@ -89,8 +89,8 @@ function write_names { cat ${neo_path}/${ne_shapepath}/${ne_shapefile}.summary_log.csv.md echo " " - echo " (write) created :" - echo " -------------------" + echo " (write) created shape and audit files:" + echo " -------------------" ls -Gga ${neo_path}/${ne_shapepath}/${ne_shapefile}* echo "" } @@ -106,30 +106,30 @@ function copy_names { if [[ "$mode" == "fetch" ]] then #echo "fetch" - fetch_names + fetch_names elif [[ "$mode" == "write" ]] then #echo "write" - write_names -elif [[ "$mode" == "fetch_write" ]] -then + write_names +elif [[ "$mode" == "fetch_write" ]] +then #echo "fetch_write " - fetch_names - write_names + fetch_names + write_names elif [[ "$mode" == "copy" ]] then #echo "copy files" - copy_names -elif [[ "$mode" == "all" ]] -then + copy_names +elif [[ "$mode" == "all" ]] +then #echo "fetch + write + copy" - fetch_names + fetch_names write_names - copy_names -else + copy_names +else echo "Unknown mode! the first parameter should be:[fetch/write/fetch_write/copy/all]" - exit 1 -fi + exit 1 +fi exit