Commit 6f5fd1ff authored by abuddenberg's avatar abuddenberg
Browse files

Added support for new fields and smarter merges to datasets. Added support...

Added support for new fields and smarter merges to datasets.  Added support for globally omitted fields to domain objects
parent 927b4a05
......@@ -8,20 +8,22 @@ from dateutil.parser import parse
class Gcisbase(object):
original = None
def __init__(self, data, fields=[], trans={}):
#Setup class variables
self.gcis_fields = fields
self.translations = trans
#Save off a copy of the original JSON for debugging
self.original = deepcopy(data)
#Create attributes from the master list
self. __dict__.update(dict.fromkeys(fields, None))
self. __dict__.update(dict.fromkeys(self.gcis_fields, None))
#Perform translations
for term in trans:
for term in self.translations:
val = data.pop(term, None)
if val is not None:
data[trans[term]] = val
data[self.translations[term]] = val
for k in data:
if hasattr(self, k):
......@@ -42,27 +44,27 @@ class Gcisbase(object):
self.__dict__[k] = getattr(other, k)
return self
def as_json(self, indent=0):
out_fields = self._gcis_fields
def as_json(self, indent=0, omit_fields=[]):
out_fields = set(self.gcis_fields) - (set(['uri', 'href']) | set(omit_fields))
return json.dumps({f: self.__dict__[f] for f in out_fields}, indent=indent)
class Figure(Gcisbase):
_gcis_fields = [
'usage_limits', 'kindred_figures', 'time_end', 'keywords', 'lat_min', 'create_dt', 'lat_max', 'time_start',
'uuid', 'title', 'ordinal', 'lon_min', 'report_identifier', 'chapter', 'submission_dt', 'uri', 'lon_max',
'caption', 'source_citation', 'attributes', 'identifier', 'chapter_identifier', 'images'
]
_translations = {
'what_is_the_figure_id': 'identifier',
'what_is_the_name_of_the_figure_as_listed_in_the_report': 'title',
'when_was_this_figure_created': 'create_dt',
'what_is_the_chapter_and_figure_number': 'figure_num'
}
def __init__(self, data):
super(Figure, self).__init__(data, fields=self._gcis_fields, trans=self._translations)
self.gcis_fields = [
'usage_limits', 'kindred_figures', 'time_end', 'keywords', 'lat_min', 'create_dt', 'lat_max', 'time_start',
'title', 'ordinal', 'lon_min', 'report_identifier', 'chapter', 'submission_dt', 'uri', 'lon_max',
'caption', 'source_citation', 'attributes', 'identifier', 'chapter_identifier', 'images'
]
self.translations = {
'what_is_the_figure_id': 'identifier',
'what_is_the_name_of_the_figure_as_listed_in_the_report': 'title',
'when_was_this_figure_created': 'create_dt',
'what_is_the_chapter_and_figure_number': 'figure_num'
}
super(Figure, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
#Special case for chapter
chap_tree = data.pop('chapter', None)
......@@ -100,9 +102,10 @@ class Figure(Gcisbase):
self.ordinal = value
def as_json(self, indent=0):
return super(Figure, self).as_json(omit_fields=['images', 'chapter'])
#Exclude a couple of fields
out_fields = set(self._gcis_fields) - set(['images', 'chapter'])
return json.dumps({f: self.__dict__[f] for f in out_fields}, indent=indent)
# out_fields = set(self.gcis_fields) - (self.omit_fields | set(['images', 'chapter']))
# return json.dumps({f: self.__dict__[f] for f in out_fields}, indent=indent)
def __str__(self):
string = '{f_id}: Figure {f_num}: {f_name}\n\tImages: {imgs}'.format(
......@@ -115,31 +118,32 @@ class Figure(Gcisbase):
class Chapter(Gcisbase):
_gcis_fields = ['report_identifier', 'identifier', 'number', 'url', 'title']
def __init__(self, data):
super(Chapter, self).__init__(data, fields=self._gcis_fields)
self.gcis_fields = ['report_identifier', 'identifier', 'number', 'url', 'title']
super(Chapter, self).__init__(data, fields=self.gcis_fields)
class Image(Gcisbase):
_gcis_fields = ['attributes', 'create_dt', 'description', 'identifier', 'lat_max', 'lat_min', 'lon_max', 'lon_min',
'position', 'submission_dt', 'time_end', 'time_start', 'title', 'usage_limits']
_translations = {
'list_any_keywords_for_the_image': 'attributes',
'when_was_this_image_created': 'create_dt',
'what_is_the_image_id': 'identifier',
'maximum_latitude': 'lat_max',
'minimum_latitude': 'lat_min',
'maximum_longitude': 'lon_max',
'minimum_longitude': 'lon_min',
'start_time': 'time_start',
'end_time': 'time_end',
'what_is_the_name_of_the_image_listed_in_the_report': 'title'
}
class Image(Gcisbase):
def __init__(self, data, local_path=None, remote_path=None):
super(Image, self).__init__(data, fields=self._gcis_fields, trans=self._translations)
self.gcis_fields = ['attributes', 'create_dt', 'description', 'identifier', 'lat_max', 'lat_min', 'lon_max',
'uri', 'lon_min', 'position', 'submission_dt', 'time_end', 'time_start', 'title', 'href',
'usage_limits']
self.translations = {
'list_any_keywords_for_the_image': 'attributes',
'when_was_this_image_created': 'create_dt',
'what_is_the_image_id': 'identifier',
'maximum_latitude': 'lat_max',
'minimum_latitude': 'lat_min',
'maximum_longitude': 'lon_max',
'minimum_longitude': 'lon_min',
'start_time': 'time_start',
'end_time': 'time_end',
'what_is_the_name_of_the_image_listed_in_the_report': 'title'
}
super(Image, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
#Hack
self.identifier = self.identifier.replace('/image/', '')
......@@ -155,41 +159,46 @@ class Image(Gcisbase):
class Dataset(Gcisbase):
_gcis_fields = ['contributors', 'vertical_extent', 'native_id', 'href', 'references', 'cite_metadata',
'scale', 'publication_dt', 'temporal_extent', 'version', 'parents', 'scope', 'type',
'processing_level', 'files', 'data_qualifier', 'access_dt', 'description', 'spatial_ref_sys',
'spatial_res', 'spatial_extent', 'doi', 'name', 'url', 'uri', 'identifier']
_translations = {
'data_set_access_date': 'access_dt',
'data_set_publication_year': 'publication_dt',
# HACK elsewhere 'start_time and end_time': '',
'data_set_id': 'native_id',
# HACK elsewhere'': 'doi',
# HACK elsewhere 'maximum_latitude etc. etc. etc.': '',
'data_set_version': 'version',
'data_set_name': 'name',
'data_set_citation': 'cite_metadata',
'data_set_description': 'description',
# Not sure'': 'type',
}
#This desperately needs to get added to the webform
_identifiers = {
'Global Historical Climatology Network - Daily': 'GHCN-D',
'Global Historical Climatology Network - Monthly': 'GHCN-M',
'NCDC Merged Land and Ocean Surface Temperature': 'MLOST',
'Climate Division Database Version 2': 'CDDv2',
'Eighth degree-CONUS Daily Downscaled Climate Projections by Katharine Hayhoe': 'CMIP3-Downscaled', #Problem
'Eighth degree-CONUS Daily Downscaled Climate Projections': 'CMIP3-Downscaled', #Problem
'Earth Policy Institute Atmospheric Carbon Dioxide Concentration, 1000-2012': 'EPI-CO2',
'Daily 1/8-degree gridded meteorological data [1 Jan 1949 - 31 Dec 2010]': 'Maurer',
'NCEP/NCAR Reanalysis': 'NCEP-NCAR',
'NCDC Global Surface Temperature Anomalies': 'NCDC-GST-Anomalies',
}
def __init__(self, data):
super(Dataset, self).__init__(data, fields=self._gcis_fields, trans=self._translations)
self.gcis_fields = ['contributors', 'vertical_extent', 'native_id', 'href', 'references', 'cite_metadata',
'scale', 'publication_year', 'temporal_extent', 'version', 'parents', 'scope', 'type',
'processing_level', 'files', 'data_qualifier', 'access_dt', 'description', 'spatial_ref_sys',
'spatial_res', 'spatial_extent', 'doi', 'name', 'url', 'uri', 'identifier', 'release_dt',
'attributes']
self.translations = {
'data_set_access_date': 'access_dt',
'data_set_publication_year': 'publication_year',
'data_set_original_release_date': 'release_dt',
# HACK elsewhere 'start_time and end_time': '',
'data_set_id': 'native_id',
# HACK elsewhere'': 'doi',
# HACK elsewhere 'maximum_latitude etc. etc. etc.': '',
'data_set_version': 'version',
'data_set_name': 'name',
'data_set_citation': 'cite_metadata',
'data_set_description': 'description',
# Not sure'': 'type',
'data_set_location': 'url',
'data_set_variables': 'attributes'
}
#This desperately needs to get added to the webform
self._identifiers = {
'Global Historical Climatology Network - Daily': 'GHCN-D',
'Global Historical Climatology Network - Monthly': 'GHCN-M',
'NCDC Merged Land and Ocean Surface Temperature': 'MLOST',
'Climate Division Database Version 2': 'CDDv2',
'Eighth degree-CONUS Daily Downscaled Climate Projections by Katharine Hayhoe': 'CMIP3-Downscaled', #Problem
'Eighth degree-CONUS Daily Downscaled Climate Projections': 'CMIP3-Downscaled', #Problem
'Earth Policy Institute Atmospheric Carbon Dioxide Concentration, 1000-2012': 'EPI-CO2',
'Daily 1/8-degree gridded meteorological data [1 Jan 1949 - 31 Dec 2010]': 'Maurer',
'NCEP/NCAR Reanalysis': 'NCEP-NCAR',
'NCDC Global Surface Temperature Anomalies': 'NCDC-GST-Anomalies',
'GRACE Static Field Geopotential Coefficients JPL Release 5.0 GSM': 'GRACE'
}
super(Dataset, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
self.identifier = self._identifiers[self.name] if self.name in self._identifiers else self.name
......@@ -200,14 +209,20 @@ class Dataset(Gcisbase):
self.access_dt = None
try:
self.publication_dt = parse(self.publication_dt).isoformat() if self.publication_dt else None
self.release_dt = parse(self.release_dt).isoformat() if self.release_dt else None
except TypeError:
self.publication_dt = None
self.release_dt = None
def __str__(self):
return 'Dataset: {id} {name}'.format(id=self.identifier, name=self.name)
def as_json(self, indent=0):
#Exclude a couple of fields
out_fields = set(self._gcis_fields) - set(['files', 'parents', 'contributors', 'references', 'href', 'uri'])
return json.dumps({f: self.__dict__[f] for f in out_fields}, indent=indent)
\ No newline at end of file
return super(Dataset, self).as_json(omit_fields=['files', 'parents', 'contributors', 'references'])
def merge(self, other):
for k in self.__dict__:
#If our copy of the field is empty or the other copy is longer, take that one.
#TODO: Shoot myself for professional negligence.
if hasattr(other, k) and (self.__dict__[k] in (None, '') or len(getattr(other, k)) > self.__dict__[k]):
self.__dict__[k] = getattr(other, k)
return self
\ No newline at end of file
......@@ -45,10 +45,6 @@ class WebformClient:
f = Figure(webform_json[webform_nid]['figure'][0])
if 'images' in webform_json[webform_nid]:
# f.images = [
# Image(image, local_path=self.get_local_image_path(image), remote_path=self.get_remote_image_path(image))
# for image in webform_json[webform_nid]['images']
# ]
for img_idx, image in enumerate(webform_json[webform_nid]['images']):
image_obj = Image(image, local_path=self.get_local_image_path(image),
remote_path=self.get_remote_image_path(image))
......@@ -59,8 +55,11 @@ class WebformClient:
dataset = Dataset(dataset_json)
#Commence the hacks
dataset.temporal_extent = ' '.join(
[dataset_json[field] for field in ['start_time', 'end_time']])
dataset.temporal_extent = ' -> '.join(
[dataset_json[field] for field in ['start_time', 'end_time']
if dataset_json[field] not in [None, '']]
)
dataset.spatial_extent = ' '.join(['{k}: {v};'.format(k=key, v=dataset_json[key]) for key in
['maximum_latitude', 'minimum_latitude', 'maximum_longitude',
'minimum_longitude']])
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment