Commit b90d72bf authored by abuddenberg's avatar abuddenberg
Browse files

Improve support for temporal extents. Pull identifier field up into GcisBase...

Improve support for temporal extents. Pull identifier field up into GcisBase class to facilitate realization. Add support for Dataset endpoint to survey_client
parent 5ff0a5da
......@@ -13,6 +13,7 @@ class Gcisbase(object):
#Setup class variables
self.gcis_fields = fields
self.translations = trans
self.identifier = None
#Save off a copy of the original JSON for debugging
self.original = deepcopy(data)
......@@ -246,6 +247,7 @@ class Dataset(GcisObject):
self._release_dt = None
self._access_dt = None
self._publication_year = None
self._temporal_extent = None
super(Dataset, self).__init__(data, fields=self.gcis_fields, trans=trans)
......@@ -306,7 +308,20 @@ class Dataset(GcisObject):
self._publication_year = match.group()
else:
self._publication_year = None
@property
def temporal_extent(self):
return self._temporal_extent
#Can't use property.setter due to multiple args
def set_temporal_extent(self, start_dt, end_dt):
try:
self._temporal_extent = '{0} {1}'.format(parse(start_dt).isoformat(), parse(end_dt).isoformat()) if start_dt and end_dt else None
except TypeError:
self._temporal_extent = None
except ValueError:
self._temporal_extent = None
class Activity(GcisObject):
def __init__(self, data, trans=()):
......
......@@ -297,3 +297,10 @@ class SurveyClient:
else:
raise Exception(resp.status_code)
def get_dataset(self, dataset_name):
url = '{b}/get-dataset'.format(b=self.base_url)
resp = requests.get(url, params={'uuid': dataset_name})
return resp
......@@ -2,6 +2,19 @@ __author__ = 'abuddenberg'
from gcis_clients.domain import Person
DATASET_TRANSLATIONS = {
'dataset_name': 'name',
'dataset_id': 'native_id',
'dataset_citation': 'cite_metadata',
'dataset_description': 'description',
'dataset_year_published': 'publication_year',
# 'dataset_period_record': 'temporal_extent',
'dataset_version': 'version',
'dataset_access_date': 'access_dt',
'dataset_url': 'url',
'dataset_release_date': 'release_dt'
}
DATASET_IDS = {
'U.S. Climate Divisional Dataset Version 2': 'nca3-cddv2-r1',
'World Climate Research Program\'s (WCRP\'s) Coupled Model Intercomparison Project phase 5 (CMIP5) multi-model ensemble': 'nca3-cmip5-r1',
......@@ -11,7 +24,10 @@ DATASET_IDS = {
'Billion-Dollar Weather and Climate Disasters': 'noaa-ncdc-billion-dollar-weather-climate-disasters',
'ESRI USA10 dataset (ArcGIS version 10.0)': 'esri-arcgis-v10-0',
'nClimDiv': 'noaa-ncdc-cag-us-temperature-nclimdiv',
'Global Historical Climatology Network (GHCN) Daily': 'noaa-ncdc-ghcn-daily'
'Global Historical Climatology Network (GHCN) Daily': 'noaa-ncdc-ghcn-daily',
'Continential United States Hurricane Impacts/ Landfalls 1851-2015': 'noaa-aoml-hurdat'
# 'Time Bias Corrected Divisional Temperature Precipitation Drought Index': 'noaa-ncei-time-bias-corrected-divisional-temperature-precipitation-drought-index',
# 'North American Drought Atlas': 'noaa-ncei-north-american-drought-atlas'
}
COPYRIGHT_TRANSLATIONS = {
......@@ -51,7 +67,8 @@ PARENT_SEARCH_HINTS = {
'Third National Climate Assessment': 'nca3',
'A conceptual framework for action on the social determinants of health': 'conceptual-framework-for-action-on-the-social-determinants-of-health',
'Regional Surface Climate Conditions in CMIP3 and CMIP5 for the United States: Differences, Similarities, and Implications for the U.S. National Climate Assessment.': 'noaa-techreport-nesdis-144',
'2012 Census of Agriculture': 'census-agriculture-2012'
'2012 Census of Agriculture': 'census-agriculture-2012',
'Regional Climate Trends and Scenarios for the U.S. National Climate Assessment. Part 3. Climate of the Midwest U.S.,': 'noaa-techreport-nesdis-142-3'
},
'dataset': {
'Global Historical Climatology Network - Daily': 'noaa-ncdc-ghcn-daily'
......
......@@ -3,6 +3,8 @@ __author__ = 'abuddenberg'
from os.path import exists
import sys
from domain import GcisObject
def warning(*objs):
print("WARNING: ", *objs, file=sys.stderr)
......@@ -66,7 +68,12 @@ def realize_parents(gcis_client, parents):
parent_matches = gcis_client.lookup_publication(parent.publication_type_identifier, parent.label)
if len(parent_matches) == 1:
parent.url = '/{type}/{id}'.format(type=parent.publication_type_identifier, id=parent_matches[0][0])
matched_id, matched_name = parent_matches[0]
parent.url = '/{type}/{id}'.format(type=parent.publication_type_identifier, id=matched_id)
# Need the ability to dynamically identify and retrieve an instance of the parent publication.
# Here's a generic, for the time being.
parent.publication = GcisObject({'identifier': matched_id})
elif len(parent_matches) == 0:
warning(' '.join(('No ID found for', parent.publication_type_identifier, parent.label)))
else:
......
This diff is collapsed.
......@@ -13,30 +13,57 @@ from states import sync_metadata_tree
import pickle
import sys
import re
import traceback
# gcis = GcisClient('https://data-stage.globalchange.gov', *gcis_stage_auth)
# gcis = GcisClient('https://data.globalchange.gov', *gcis_stage_auth)
gcis = GcisClient('https://data-stage.globalchange.gov', *gcis_stage_auth)
surveys = SurveyClient('https://state-resources.cicsnc.org', survey_token)
def main():
print(gcis.test_login())
total = 0
for report_id in sync_metadata_tree:
for chapter_id in sync_metadata_tree[report_id]:
for survey_url, figure_id, figure_num in sync_metadata_tree[report_id][chapter_id]:
figure, datasets = surveys.get_survey(survey_url, do_download=True)
print(survey_url)
print(figure, datasets)
realize_parents(gcis, figure.parents)
realize_contributors(gcis, figure.contributors)
print('Contributors: ', figure.contributors)
print('Parents: ', figure.parents)
# gcis_fig = gcis.get_figure(report_id, figure_id, chapter_id=chapter_id)
total += 1
# figure, datasets = surveys.get_survey(survey_url, do_download=True)
# #Fix misspelling
# figure.identifier = figure_id
# figure.title = figure.title.replace('precipitaton', 'precipitation')
# figure.ordinal = figure_num
#
# print(survey_url)
# print(figure, datasets)
#
# realize_parents(gcis, figure.parents)
# realize_contributors(gcis, figure.contributors)
#
# print('Contributors: ', figure.contributors)
# print('Parents: ', figure.parents)
# # gcis_fig = gcis.get_figure(report_id, figure_id, chapter_id=chapter_id)
#
# for ds in [p for p in figure.parents if p.publication_type_identifier == 'dataset']:
# # Assign synthetic activity identifier to for datasets associated with figure
# if ds.activity and ds.activity.identifier is None:
# ds.activity.identifier = generate_activity_id(figure, ds.publication)
# print 'Dataset: ', ds.activity
#
# print 'Creating figure... ', gcis.create_figure(report_id, chapter_id, figure, skip_images=True, skip_upload=False)
# # print 'Updating figure... ', gcis.update_figure(report_id, chapter_id, figure, skip_images=True)
print(total)
def generate_activity_id(image, dataset):
try:
return '-'.join([image.identifier.split('-')[0], dataset.identifier, 'process'])
except Exception, e:
sys.stderr.write('WARNING: Activity identifier generation failed\n')
traceback.print_exc()
def gen_survey_list():
......@@ -72,15 +99,15 @@ def gen_survey_list():
def create_nlss_report():
nlss = Report({
'identifier': 'noaa-led-state-summaries-2016',
'identifier': 'noaa-led-state-summaries-2017',
'report_type_identifier': 'report',
'title': 'NOAA-led State Summaries 2016',
'title': 'NOAA-led State Summaries 2017',
'url': 'https://statesummaries.cicsnc.org/',
'publication_year': '2016',
'publication_year': '2017',
'contact_email': ''
})
chapters = [(id, i + 1, ' '.join([w.capitalize() for w in id.split('-')])) for i, id in enumerate(sync_metadata_tree['noaa-led-state-summaries-2016'])]
chapters = [(id, i + 1, ' '.join([w.capitalize() for w in id.split('-')])) for i, id in enumerate(sync_metadata_tree['noaa-led-state-summaries-2017'])]
print(gcis.create_report(nlss))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment