Commit b90d72bf authored by abuddenberg's avatar abuddenberg
Browse files

Improve support for temporal extents. Pull identifier field up into GcisBase...

Improve support for temporal extents. Pull identifier field up into GcisBase class to facilitate realization. Add support for Dataset endpoint to survey_client
parent 5ff0a5da
...@@ -13,6 +13,7 @@ class Gcisbase(object): ...@@ -13,6 +13,7 @@ class Gcisbase(object):
#Setup class variables #Setup class variables
self.gcis_fields = fields self.gcis_fields = fields
self.translations = trans self.translations = trans
self.identifier = None
#Save off a copy of the original JSON for debugging #Save off a copy of the original JSON for debugging
self.original = deepcopy(data) self.original = deepcopy(data)
...@@ -246,6 +247,7 @@ class Dataset(GcisObject): ...@@ -246,6 +247,7 @@ class Dataset(GcisObject):
self._release_dt = None self._release_dt = None
self._access_dt = None self._access_dt = None
self._publication_year = None self._publication_year = None
self._temporal_extent = None
super(Dataset, self).__init__(data, fields=self.gcis_fields, trans=trans) super(Dataset, self).__init__(data, fields=self.gcis_fields, trans=trans)
...@@ -306,7 +308,20 @@ class Dataset(GcisObject): ...@@ -306,7 +308,20 @@ class Dataset(GcisObject):
self._publication_year = match.group() self._publication_year = match.group()
else: else:
self._publication_year = None self._publication_year = None
@property
def temporal_extent(self):
return self._temporal_extent
#Can't use property.setter due to multiple args
def set_temporal_extent(self, start_dt, end_dt):
try:
self._temporal_extent = '{0} {1}'.format(parse(start_dt).isoformat(), parse(end_dt).isoformat()) if start_dt and end_dt else None
except TypeError:
self._temporal_extent = None
except ValueError:
self._temporal_extent = None
class Activity(GcisObject): class Activity(GcisObject):
def __init__(self, data, trans=()): def __init__(self, data, trans=()):
......
...@@ -297,3 +297,10 @@ class SurveyClient: ...@@ -297,3 +297,10 @@ class SurveyClient:
else: else:
raise Exception(resp.status_code) raise Exception(resp.status_code)
def get_dataset(self, dataset_name):
url = '{b}/get-dataset'.format(b=self.base_url)
resp = requests.get(url, params={'uuid': dataset_name})
return resp
...@@ -2,6 +2,19 @@ __author__ = 'abuddenberg' ...@@ -2,6 +2,19 @@ __author__ = 'abuddenberg'
from gcis_clients.domain import Person from gcis_clients.domain import Person
DATASET_TRANSLATIONS = {
'dataset_name': 'name',
'dataset_id': 'native_id',
'dataset_citation': 'cite_metadata',
'dataset_description': 'description',
'dataset_year_published': 'publication_year',
# 'dataset_period_record': 'temporal_extent',
'dataset_version': 'version',
'dataset_access_date': 'access_dt',
'dataset_url': 'url',
'dataset_release_date': 'release_dt'
}
DATASET_IDS = { DATASET_IDS = {
'U.S. Climate Divisional Dataset Version 2': 'nca3-cddv2-r1', 'U.S. Climate Divisional Dataset Version 2': 'nca3-cddv2-r1',
'World Climate Research Program\'s (WCRP\'s) Coupled Model Intercomparison Project phase 5 (CMIP5) multi-model ensemble': 'nca3-cmip5-r1', 'World Climate Research Program\'s (WCRP\'s) Coupled Model Intercomparison Project phase 5 (CMIP5) multi-model ensemble': 'nca3-cmip5-r1',
...@@ -11,7 +24,10 @@ DATASET_IDS = { ...@@ -11,7 +24,10 @@ DATASET_IDS = {
'Billion-Dollar Weather and Climate Disasters': 'noaa-ncdc-billion-dollar-weather-climate-disasters', 'Billion-Dollar Weather and Climate Disasters': 'noaa-ncdc-billion-dollar-weather-climate-disasters',
'ESRI USA10 dataset (ArcGIS version 10.0)': 'esri-arcgis-v10-0', 'ESRI USA10 dataset (ArcGIS version 10.0)': 'esri-arcgis-v10-0',
'nClimDiv': 'noaa-ncdc-cag-us-temperature-nclimdiv', 'nClimDiv': 'noaa-ncdc-cag-us-temperature-nclimdiv',
'Global Historical Climatology Network (GHCN) Daily': 'noaa-ncdc-ghcn-daily' 'Global Historical Climatology Network (GHCN) Daily': 'noaa-ncdc-ghcn-daily',
'Continential United States Hurricane Impacts/ Landfalls 1851-2015': 'noaa-aoml-hurdat'
# 'Time Bias Corrected Divisional Temperature Precipitation Drought Index': 'noaa-ncei-time-bias-corrected-divisional-temperature-precipitation-drought-index',
# 'North American Drought Atlas': 'noaa-ncei-north-american-drought-atlas'
} }
COPYRIGHT_TRANSLATIONS = { COPYRIGHT_TRANSLATIONS = {
...@@ -51,7 +67,8 @@ PARENT_SEARCH_HINTS = { ...@@ -51,7 +67,8 @@ PARENT_SEARCH_HINTS = {
'Third National Climate Assessment': 'nca3', 'Third National Climate Assessment': 'nca3',
'A conceptual framework for action on the social determinants of health': 'conceptual-framework-for-action-on-the-social-determinants-of-health', 'A conceptual framework for action on the social determinants of health': 'conceptual-framework-for-action-on-the-social-determinants-of-health',
'Regional Surface Climate Conditions in CMIP3 and CMIP5 for the United States: Differences, Similarities, and Implications for the U.S. National Climate Assessment.': 'noaa-techreport-nesdis-144', 'Regional Surface Climate Conditions in CMIP3 and CMIP5 for the United States: Differences, Similarities, and Implications for the U.S. National Climate Assessment.': 'noaa-techreport-nesdis-144',
'2012 Census of Agriculture': 'census-agriculture-2012' '2012 Census of Agriculture': 'census-agriculture-2012',
'Regional Climate Trends and Scenarios for the U.S. National Climate Assessment. Part 3. Climate of the Midwest U.S.,': 'noaa-techreport-nesdis-142-3'
}, },
'dataset': { 'dataset': {
'Global Historical Climatology Network - Daily': 'noaa-ncdc-ghcn-daily' 'Global Historical Climatology Network - Daily': 'noaa-ncdc-ghcn-daily'
......
...@@ -3,6 +3,8 @@ __author__ = 'abuddenberg' ...@@ -3,6 +3,8 @@ __author__ = 'abuddenberg'
from os.path import exists from os.path import exists
import sys import sys
from domain import GcisObject
def warning(*objs): def warning(*objs):
print("WARNING: ", *objs, file=sys.stderr) print("WARNING: ", *objs, file=sys.stderr)
...@@ -66,7 +68,12 @@ def realize_parents(gcis_client, parents): ...@@ -66,7 +68,12 @@ def realize_parents(gcis_client, parents):
parent_matches = gcis_client.lookup_publication(parent.publication_type_identifier, parent.label) parent_matches = gcis_client.lookup_publication(parent.publication_type_identifier, parent.label)
if len(parent_matches) == 1: if len(parent_matches) == 1:
parent.url = '/{type}/{id}'.format(type=parent.publication_type_identifier, id=parent_matches[0][0]) matched_id, matched_name = parent_matches[0]
parent.url = '/{type}/{id}'.format(type=parent.publication_type_identifier, id=matched_id)
# Need the ability to dynamically identify and retrieve an instance of the parent publication.
# Here's a generic, for the time being.
parent.publication = GcisObject({'identifier': matched_id})
elif len(parent_matches) == 0: elif len(parent_matches) == 0:
warning(' '.join(('No ID found for', parent.publication_type_identifier, parent.label))) warning(' '.join(('No ID found for', parent.publication_type_identifier, parent.label)))
else: else:
......
This diff is collapsed.
...@@ -13,30 +13,57 @@ from states import sync_metadata_tree ...@@ -13,30 +13,57 @@ from states import sync_metadata_tree
import pickle import pickle
import sys import sys
import re import re
import traceback
# gcis = GcisClient('https://data-stage.globalchange.gov', *gcis_stage_auth) # gcis = GcisClient('https://data-stage.globalchange.gov', *gcis_stage_auth)
# gcis = GcisClient('https://data.globalchange.gov', *gcis_stage_auth) gcis = GcisClient('https://data-stage.globalchange.gov', *gcis_stage_auth)
surveys = SurveyClient('https://state-resources.cicsnc.org', survey_token) surveys = SurveyClient('https://state-resources.cicsnc.org', survey_token)
def main(): def main():
print(gcis.test_login()) print(gcis.test_login())
total = 0
for report_id in sync_metadata_tree: for report_id in sync_metadata_tree:
for chapter_id in sync_metadata_tree[report_id]: for chapter_id in sync_metadata_tree[report_id]:
for survey_url, figure_id, figure_num in sync_metadata_tree[report_id][chapter_id]: for survey_url, figure_id, figure_num in sync_metadata_tree[report_id][chapter_id]:
figure, datasets = surveys.get_survey(survey_url, do_download=True) total += 1
# figure, datasets = surveys.get_survey(survey_url, do_download=True)
print(survey_url)
print(figure, datasets) # #Fix misspelling
# figure.identifier = figure_id
realize_parents(gcis, figure.parents) # figure.title = figure.title.replace('precipitaton', 'precipitation')
realize_contributors(gcis, figure.contributors) # figure.ordinal = figure_num
#
print('Contributors: ', figure.contributors) # print(survey_url)
print('Parents: ', figure.parents) # print(figure, datasets)
# gcis_fig = gcis.get_figure(report_id, figure_id, chapter_id=chapter_id) #
# realize_parents(gcis, figure.parents)
# realize_contributors(gcis, figure.contributors)
#
# print('Contributors: ', figure.contributors)
# print('Parents: ', figure.parents)
# # gcis_fig = gcis.get_figure(report_id, figure_id, chapter_id=chapter_id)
#
# for ds in [p for p in figure.parents if p.publication_type_identifier == 'dataset']:
# # Assign synthetic activity identifier to for datasets associated with figure
# if ds.activity and ds.activity.identifier is None:
# ds.activity.identifier = generate_activity_id(figure, ds.publication)
# print 'Dataset: ', ds.activity
#
# print 'Creating figure... ', gcis.create_figure(report_id, chapter_id, figure, skip_images=True, skip_upload=False)
# # print 'Updating figure... ', gcis.update_figure(report_id, chapter_id, figure, skip_images=True)
print(total)
def generate_activity_id(image, dataset):
try:
return '-'.join([image.identifier.split('-')[0], dataset.identifier, 'process'])
except Exception, e:
sys.stderr.write('WARNING: Activity identifier generation failed\n')
traceback.print_exc()
def gen_survey_list(): def gen_survey_list():
...@@ -72,15 +99,15 @@ def gen_survey_list(): ...@@ -72,15 +99,15 @@ def gen_survey_list():
def create_nlss_report(): def create_nlss_report():
nlss = Report({ nlss = Report({
'identifier': 'noaa-led-state-summaries-2016', 'identifier': 'noaa-led-state-summaries-2017',
'report_type_identifier': 'report', 'report_type_identifier': 'report',
'title': 'NOAA-led State Summaries 2016', 'title': 'NOAA-led State Summaries 2017',
'url': 'https://statesummaries.cicsnc.org/', 'url': 'https://statesummaries.cicsnc.org/',
'publication_year': '2016', 'publication_year': '2017',
'contact_email': '' 'contact_email': ''
}) })
chapters = [(id, i + 1, ' '.join([w.capitalize() for w in id.split('-')])) for i, id in enumerate(sync_metadata_tree['noaa-led-state-summaries-2016'])] chapters = [(id, i + 1, ' '.join([w.capitalize() for w in id.split('-')])) for i, id in enumerate(sync_metadata_tree['noaa-led-state-summaries-2017'])]
print(gcis.create_report(nlss)) print(gcis.create_report(nlss))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment