Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion import-automation/executor/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ chardet
chromedriver_py
croniter
dataclasses
datacommons
datacommons_client
db-dtypes
duckdb
Expand Down
15 changes: 6 additions & 9 deletions scripts/earthengine/process_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import pickle
import sys
import time
import datacommons as dc

from absl import app
from absl import flags
Expand Down Expand Up @@ -74,7 +73,7 @@
from counters import Counters
from latlng_recon_geojson import LatLng2Places
from config_map import ConfigMap
from dc_api_wrapper import dc_api_batched_wrapper
from dc_api_wrapper import dc_api_get_property

# List of place types in increasing order of preference for name.
# This is used to pick the name of the place from the list of affectedPlaces
Expand Down Expand Up @@ -699,13 +698,11 @@ def prefetch_placeid_property(self, prop: str, place_ids: list = None):

if lookup_places:
start_time = time.perf_counter()
cache_dict.update(
dc_api_batched_wrapper(function=dc.get_property_values,
dcids=lookup_places,
args={
'prop': prop,
},
config=self._config))
place_props = dc_api_get_property(lookup_places, prop)
for placeid, prop_value in place_props.items():
value = prop_value.get(prop)
if value:
cache_dict[placeid] = value
end_time = time.perf_counter()
self._counters.add_counter(f'dc_api_lookup_{prop}_count',
len(lookup_places))
Expand Down
20 changes: 5 additions & 15 deletions scripts/earthengine/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from typing import Union

from absl import logging
import datacommons as dc
from dateutil.relativedelta import relativedelta
from geopy import distance
import s2sphere
Expand All @@ -41,7 +40,8 @@
os.path.join(os.path.dirname(os.path.dirname(_SCRIPTS_DIR)), 'util'))

from config_map import ConfigMap, read_py_dict_from_file, write_py_dict_to_file
from dc_api_wrapper import dc_api_wrapper
from dc_api_wrapper import dc_api_get_node_property
from string_utils import str_to_list

# Constants
_MAX_LATITUDE = 90.0
Expand Down Expand Up @@ -368,23 +368,13 @@ def place_id_to_lat_lng(placeid: str,
# Get the lat/lng from the DC API
latlng = []
for prop in ['latitude', 'longitude']:
# dc.utils._API_ROOT = 'http://autopush.api.datacommons.org'
# resp = dc.get_property_values([placeid], prop)
resp = dc_api_wrapper(
function=dc.get_property_values,
args={
'dcids': [placeid],
'prop': prop,
},
use_cache=True,
api_root=_DC_API_ROOT,
)
resp = dc_api_get_node_property([placeid], prop)
if not resp or placeid not in resp:
return (0, 0)
values = resp[placeid]
values = str_to_list(resp[placeid].get(prop))
if not len(values):
return (0, 0)
latlng.append(float(values[0]))
latlng.append(float(values[0].strip('"')))
lat = latlng[0]
lng = latlng[1]
return (lat, lng)
Expand Down
21 changes: 9 additions & 12 deletions scripts/eurostat/health_determinants/common/dcid_existence.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@
"""_summary_
Script to check the property/dcid/nodes existance in datacommons.org.
"""
import os
import sys

_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(_SCRIPT_DIR, '../../../../util'))

from dc_api_wrapper import dc_api_is_defined_dcid
import datacommons


Expand All @@ -30,17 +37,7 @@ def check_dcid_existence(nodes: list) -> dict:
dict: Status dictionary.
"""
# pylint: disable=protected-access
nodes_response = datacommons.get_property_values(
nodes,
"typeOf",
out=True,
value_type=None,
limit=datacommons.utils._MAX_LIMIT)
# pylint: disable=protected-access
node_status = dc_api_is_defined_dcid(nodes)
# pylint: enable=protected-access
node_status = {}
for node, value in nodes_response.items():
if value == []:
node_status[node] = False
else:
node_status[node] = True
return node_status
1 change: 0 additions & 1 deletion scripts/fires/wfigs_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import pickle
import re
import requests
import datacommons as dc
import sys

_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
Expand Down
9 changes: 7 additions & 2 deletions scripts/glims/rgi/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
import json
import glob
import os
import sys

_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(_SCRIPT_DIR, '../../../util'))
from dc_api_wrapper import dc_api_get_values
from shapely import geometry
from absl import app
from absl import flags
Expand All @@ -30,14 +35,14 @@

def _load_geojsons():
countries = dc.get_places_in(['Earth'], 'Country')['Earth']
resp = dc.get_property_values(countries, 'geoJsonCoordinatesDP2')
resp = dc_api_get_values(countries, 'geoJsonCoordinatesDP2')
geojsons = {}
for p, gj in resp.items():
if not gj:
continue
geojsons[p] = geometry.shape(json.loads(gj[0]))
print('Got', len(geojsons), 'geojsons!')
cip = dc.get_property_values(countries, 'containedInPlace')
cip = dc_api_get_values(countries, 'containedInPlace')
return geojsons, cip


Expand Down
28 changes: 21 additions & 7 deletions scripts/noaa/gpcc_spi/create_place_to_grid_area_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,22 @@
"""

from shapely import geometry
import datacommons as dc
import concurrent.futures
from typing import List, Optional
import json
import csv
import sys
import os

from absl import flags
from absl import app

_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(_SCRIPT_DIR)
sys.path.append(os.path.join(_SCRIPT_DIR.split('/data/', 1)[0], 'data', 'util'))

import dc_api_wrapper as dc_api

FLAGS = flags.FLAGS

flags.DEFINE_string('gpcc_spi_places_with_csv',
Expand Down Expand Up @@ -59,20 +66,27 @@ def construct_one_degree_grid_polygons() -> List[geometry.box]:

def get_place_by_type(parent_places, places_types: List[str]) -> List[str]:
"""Return the place ids of all places contained in a set of parent places."""
dc_api_client = dc_api.get_datacommons_client()
all_types_of_places = []
for place_type in places_types:
parent_place_to_places = dc.get_places_in(parent_places, place_type)
for places in parent_place_to_places.values():
for place in places:
all_types_of_places.extend(place)
parent_place_to_places = dc_api.dc_api_batched_wrapper(
dc_api_client.node.fetch_place_descendants,
parent_places, {'descendants_type': place_type},
dcid_arg_kw='place_dcids')
for child_places in parent_place_to_places.values():
for place in child_places:
place_dcid = place.get('dcid')
if place_dcid:
all_types_of_places.append(place_dcid)
return all_types_of_places


def places_to_geo_jsons(places: List[str]):
"""Get geojsons for a list of places."""
resp = dc.get_property_values(places, 'geoJsonCoordinates')
resp = dc_api.dc_api_get_node_property(places, 'geoJsonCoordinates')
geojsons = {}
for p, gj in resp.items():
for p, gj_value in resp.items():
gj = gj_value.get('geoJsonCoordinates')
if not gj:
continue
geojsons[p] = geometry.shape(json.loads(gj[0]))
Expand Down
6 changes: 4 additions & 2 deletions scripts/rff/preprocess_raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

RFF_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(RFF_DIR)
sys.path.append(os.path.join(RFF_DIR, '../../util'))
from dc_api_wrapper import dc_api_get_values
from rff import util

bandname_to_gdcStatVars = {
Expand Down Expand Up @@ -37,11 +39,11 @@ def get_dcid(sp_scale, lat, lon):

def get_county_geoid(lat, lon):
counties = dc.get_places_in(['country/USA'], 'County')['country/USA']
counties_simp = dc.get_property_values(counties, 'geoJsonCoordinatesDP1')
counties_simp = dc_api_get_values(counties, 'geoJsonCoordinatesDP1')
point = geometry.Point(lon, lat)
for p, gj in counties_simp.items():
if len(gj) == 0:
gj = dc.get_property_values([p], 'geoJsonCoordinates')[p]
gj = dc_api_get_values([p], 'geoJsonCoordinates')[p]
if len(gj) == 0: # property not defined for one county in alaska
continue
if geometry.shape(json.loads(gj[0])).contains(point):
Expand Down
23 changes: 16 additions & 7 deletions scripts/un/boundaries/country_boundaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,23 @@

from typing import Dict

import datacommons as dc
import geopandas as gpd
from geojson_rewind import rewind
import json
import os
import sys
import requests

from absl import app
from absl import flags

_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(_SCRIPT_DIR)
sys.path.append(os.path.join(_SCRIPT_DIR.split('/data/', 1)[0], 'data', 'util'))

import dc_api_wrapper as dc_api
from string_utils import str_to_list

FLAGS = flags.FLAGS
flags.DEFINE_string('input_file', 'data/UNGIS_BNDA.geojson',
'Input geojson file')
Expand Down Expand Up @@ -194,10 +201,10 @@ def existing_codes(self, all_countries):
Only countries with DCID of the form county/{code} are included.
"""
# Call DC API to get list of countries
dc_all_countries = dc.get_property_values(['Country'],
'typeOf',
out=False,
limit=500)['Country']
dc_all_countries = str_to_list(
dc_api.dc_api_get_property(['Country'], 'typeOf',
out=False).get('Country',
{}).get('typeOf', ''))
dc_all_countries = set(dc_all_countries)

def is_dc_country(iso):
Expand Down Expand Up @@ -257,8 +264,10 @@ def build_cache(self, existing_codes):
all_children.update(children)

child2name = {}
for child, values in dc.get_property_values(list(all_children),
'name').items():
children_names = dc_api.dc_api_get_node_property(
list(all_children), 'name')
for child, prop_values in children_names.items():
values = str_to_list(prop_values.get('name', ''))
if values:
child2name[child] = values[0]

Expand Down
11 changes: 9 additions & 2 deletions scripts/us_census/enhanced_tmcf/process_etmcf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
import csv
import datacommons as dc
import os
import sys

_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(_SCRIPT_DIR, '../../../util'))

from dc_api_wrapper import dc_api_get_node_property
import datacommons as dc

from absl import app
from absl import flags
Expand Down Expand Up @@ -70,9 +77,9 @@ def _get_places_not_found(census_geoids: List[str]) -> List[str]:
for i in range(0, len(geo_ids), NUM_DCIDS_TO_QUERY):
selected_geo_ids = geo_ids[i:i + NUM_DCIDS_TO_QUERY]
selected_dcids = [geoId_to_dcids[g] for g in selected_geo_ids]
res = dc.get_property_values(selected_dcids, 'name')
res = dc_api_get_node_property(selected_dcids, 'name')
for index in range(len(selected_dcids)):
if not res[selected_dcids[index]]:
if selected_dcids[index] not in res:
geoIds_not_found.append(selected_geo_ids[index])
return geoIds_not_found

Expand Down
25 changes: 20 additions & 5 deletions scripts/us_census/geojsons_low_res/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
import datacommons as dc
import geojson
import os
import sys

_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(_SCRIPT_DIR, '../../../util'))
from dc_api_wrapper import dc_api_node_get_property


class GeojsonDownloader:
Expand Down Expand Up @@ -88,20 +93,30 @@ def download_data(self, place='country/USA', level=1):
Raises:
ValueError: If a Data Commons API call fails.
"""
geolevel = dc.get_property_values([place], "typeOf")
place_types = dc_api.dc_api_node_get_property([place], "typeOf")
geo_level = str_to_list(place_types.get(place, {}).get("typeOf", ""))
# There is an extra level of nesting in geojson files, so we have
# to get the 0th element explicitly.
assert len(geolevel[place]) == 1
assert len(geo_level) >= 1
geolevel = geolevel[place][0]

for i in range(level):
if geolevel not in self.LEVEL_MAP:
raise ValueError("Desired level does not exist.")
geolevel = self.LEVEL_MAP[geolevel]

geos_contained_in_place = dc.get_places_in([place], geolevel)[place]
self.geojsons = dc.get_property_values(geos_contained_in_place,
"geoJsonCoordinates")
dc_api_client = dc_api.get_datacommons_client()
descendant_places = dc_api.dc_api_batched_wrapper(
dc_api_client.node.fetch_place_descendants, [place], {
'descendants_type': geolevel
},
dcid_arg_kw='place_dcids').get(place, {})
geos_contained_in_place = [
place_name.get('dcid') for place_name in descendant_places
]

self.geojsons = dc_api_get_values(geos_contained_in_place,
"geoJsonCoordinates")
for area, coords in self.iter_subareas():
self.geojsons[area][0] = geojson.loads(coords)

Expand Down
Loading