Commit 00c019e6 authored by abuddenberg's avatar abuddenberg
Browse files

First pass at refactoring ugly hacks out of the domain model

parent 14cc4342
...@@ -9,7 +9,7 @@ from dateutil.parser import parse ...@@ -9,7 +9,7 @@ from dateutil.parser import parse
class Gcisbase(object): class Gcisbase(object):
def __init__(self, data, fields=[], trans={}): def __init__(self, data, fields=(), trans=()):
#Setup class variables #Setup class variables
self.gcis_fields = fields self.gcis_fields = fields
self.translations = trans self.translations = trans
...@@ -77,21 +77,14 @@ class GcisObject(Gcisbase): ...@@ -77,21 +77,14 @@ class GcisObject(Gcisbase):
class Figure(GcisObject): class Figure(GcisObject):
def __init__(self, data): def __init__(self, data, trans=()):
self.gcis_fields = [ self.gcis_fields = [
'usage_limits', 'kindred_figures', 'time_end', 'keywords', 'lat_min', 'create_dt', 'lat_max', 'time_start', 'usage_limits', 'kindred_figures', 'time_start', 'time_end', 'keywords', 'lat_min', 'create_dt', 'lat_max',
'title', 'ordinal', 'lon_min', 'report_identifier', 'chapter', 'submission_dt', 'uri', 'lon_max', 'title', 'ordinal', 'lon_min', 'report_identifier', 'chapter', 'submission_dt', 'uri', 'lon_max',
'caption', 'source_citation', 'attributes', 'identifier', 'chapter_identifier', 'images' 'caption', 'source_citation', 'attributes', 'identifier', 'chapter_identifier', 'images'
] ]
self.translations = { super(Figure, self).__init__(data, fields=self.gcis_fields, trans=trans)
'what_is_the_figure_id': 'identifier',
'what_is_the_name_of_the_figure_as_listed_in_the_report': 'title',
'when_was_this_figure_created': 'create_dt',
'what_is_the_chapter_and_figure_number': 'figure_num'
}
super(Figure, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
#Special case for chapter #Special case for chapter
chap_tree = data.pop('chapter', None) chap_tree = data.pop('chapter', None)
...@@ -102,7 +95,7 @@ class Figure(GcisObject): ...@@ -102,7 +95,7 @@ class Figure(GcisObject):
self.images = [Image(image) for image in image_list] if image_list else [] self.images = [Image(image) for image in image_list] if image_list else []
#Hack #Hack
self.identifier = self.identifier.replace('/figure/', '') if self.identifier != '' else '***ID MISSING***' self.identifier = self.identifier.replace('/figure/', '') if self.identifier not in ('', None) else '***ID MISSING***'
@property @property
def figure_num(self): def figure_num(self):
...@@ -165,28 +158,15 @@ class Chapter(GcisObject): ...@@ -165,28 +158,15 @@ class Chapter(GcisObject):
class Image(GcisObject): class Image(GcisObject):
def __init__(self, data, local_path=None, remote_path=None): def __init__(self, data, local_path=None, remote_path=None, trans=()):
self.gcis_fields = ['attributes', 'create_dt', 'description', 'identifier', 'lat_max', 'lat_min', 'lon_max', self.gcis_fields = ['attributes', 'create_dt', 'description', 'identifier', 'lat_max', 'lat_min', 'lon_max',
'uri', 'lon_min', 'position', 'submission_dt', 'time_end', 'time_start', 'title', 'href', 'uri', 'lon_min', 'position', 'submission_dt', 'time_end', 'time_start', 'title', 'href',
'usage_limits'] 'usage_limits']
self.translations = {
'list_any_keywords_for_the_image': 'attributes',
'when_was_this_image_created': 'create_dt',
'what_is_the_image_id': 'identifier',
'maximum_latitude': 'lat_max',
'minimum_latitude': 'lat_min',
'maximum_longitude': 'lon_max',
'minimum_longitude': 'lon_min',
'start_time': 'time_start',
'end_time': 'time_end',
'what_is_the_name_of_the_image_listed_in_the_report': 'title'
}
#Private attributes for handling date parsing #Private attributes for handling date parsing
self._create_dt = None self._create_dt = None
super(Image, self).__init__(data, fields=self.gcis_fields, trans=self.translations) super(Image, self).__init__(data, fields=self.gcis_fields, trans=trans)
#Hack #Hack
self.identifier = self.identifier.replace('/image/', '') self.identifier = self.identifier.replace('/image/', '')
...@@ -213,29 +193,13 @@ class Image(GcisObject): ...@@ -213,29 +193,13 @@ class Image(GcisObject):
class Dataset(GcisObject): class Dataset(GcisObject):
def __init__(self, data): def __init__(self, data, trans=()):
self.gcis_fields = ['contributors', 'vertical_extent', 'native_id', 'href', 'references', 'cite_metadata', self.gcis_fields = ['contributors', 'vertical_extent', 'native_id', 'href', 'references', 'cite_metadata',
'scale', 'publication_year', 'temporal_extent', 'version', 'parents', 'scope', 'type', 'scale', 'publication_year', 'temporal_extent', 'version', 'parents', 'scope', 'type',
'processing_level', 'files', 'data_qualifier', 'access_dt', 'description', 'spatial_ref_sys', 'processing_level', 'files', 'data_qualifier', 'access_dt', 'description', 'spatial_ref_sys',
'spatial_res', 'spatial_extent', 'doi', 'name', 'url', 'uri', 'identifier', 'release_dt', 'spatial_res', 'spatial_extent', 'doi', 'name', 'url', 'uri', 'identifier', 'release_dt',
'attributes'] 'attributes']
self.translations = {
'data_set_access_date': 'access_dt',
'data_set_publication_year': 'publication_year',
'data_set_original_release_date': 'release_dt',
# HACK elsewhere 'start_time and end_time': '',
'data_set_id': 'native_id',
# HACK elsewhere'': 'doi',
# HACK elsewhere 'maximum_latitude etc. etc. etc.': '',
'data_set_version': 'version',
'data_set_name': 'name',
'data_set_citation': 'cite_metadata',
'data_set_description': 'description',
# Not sure'': 'type',
'data_set_location': 'url',
'data_set_variables': 'attributes'
}
#This desperately needs to get added to the webform #This desperately needs to get added to the webform
self._identifiers = { self._identifiers = {
...@@ -290,7 +254,7 @@ class Dataset(GcisObject): ...@@ -290,7 +254,7 @@ class Dataset(GcisObject):
self.note = None self.note = None
self.activity = None self.activity = None
super(Dataset, self).__init__(data, fields=self.gcis_fields, trans=self.translations) super(Dataset, self).__init__(data, fields=self.gcis_fields, trans=trans)
self.identifier = self._identifiers[self.name] if self.name in self._identifiers else self.name self.identifier = self._identifiers[self.name] if self.name in self._identifiers else self.name
...@@ -352,24 +316,12 @@ class Dataset(GcisObject): ...@@ -352,24 +316,12 @@ class Dataset(GcisObject):
class Activity(GcisObject): class Activity(GcisObject):
def __init__(self, data): def __init__(self, data, trans=()):
self.gcis_fields = ['start_time', 'uri', 'methodology', 'data_usage', 'href', 'metholodogies', 'end_time', self.gcis_fields = ['start_time', 'uri', 'methodology', 'data_usage', 'href', 'metholodogies', 'end_time',
'output_artifacts', 'duration', 'identifier', 'publication_maps', 'computing_environment', 'output_artifacts', 'duration', 'identifier', 'publication_maps', 'computing_environment',
'software', 'visualization_software', 'notes'] 'software', 'visualization_software', 'notes']
self.translations = { super(Activity, self).__init__(data, fields=self.gcis_fields, trans=trans)
'how_much_time_was_invested_in_creating_the_image': 'duration',
'35_what_are_all_of_the_files_names_and_extensions_associated_with_this_image': 'output_artifacts',
'what_operating_systems_and_platforms_were_used': 'computing_environment',
'what_analytical_statistical_methods_were_employed_to_the_data': 'methodology',
'describe_how_the_data_was_used_in_the_image_figure_creation': 'data_usage',
'list_the_name_and_version_of_the_software': 'software',
'what_software_applications_were_used_to_manipulate_the_data': 'notes',
'33_what_software_applications_were_used_to_visualize_the_data': 'visualization_software'
}
super(Activity, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
def as_json(self, indent=0): def as_json(self, indent=0):
return super(Activity, self).as_json(omit_fields=['metholodogies', 'publication_maps']) return super(Activity, self).as_json(omit_fields=['metholodogies', 'publication_maps'])
...@@ -382,13 +334,11 @@ class Activity(GcisObject): ...@@ -382,13 +334,11 @@ class Activity(GcisObject):
class Person(Gcisbase): class Person(Gcisbase):
def __init__(self, data): def __init__(self, data, trans=()):
self.gcis_fields = ['first_name', 'last_name', 'middle_name', 'contributors', 'url', 'uri', 'href', 'orcid', self.gcis_fields = ['first_name', 'last_name', 'middle_name', 'contributors', 'url', 'uri', 'href', 'orcid',
'id'] 'id']
self.translations = {} super(Person, self).__init__(data, fields=self.gcis_fields, trans=trans)
super(Person, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
def as_json(self, indent=0): def as_json(self, indent=0):
return super(Person, self).as_json(omit_fields=['contributors']) return super(Person, self).as_json(omit_fields=['contributors'])
...@@ -401,11 +351,9 @@ class Person(Gcisbase): ...@@ -401,11 +351,9 @@ class Person(Gcisbase):
class Organization(Gcisbase): class Organization(Gcisbase):
def __init__(self, data): def __init__(self, data, trans=()):
self.gcis_fields = ['organization_type_identifier', 'url', 'uri', 'href', 'country_code', 'identifier', 'name'] self.gcis_fields = ['organization_type_identifier', 'url', 'uri', 'href', 'country_code', 'identifier', 'name']
self.translations = {}
self._identifiers = { self._identifiers = {
'NOAA NCDC/CICS-NC': 'cooperative-institute-climate-satellites-nc', 'NOAA NCDC/CICS-NC': 'cooperative-institute-climate-satellites-nc',
'NCDC/CICS-NC': 'cooperative-institute-climate-satellites-nc', 'NCDC/CICS-NC': 'cooperative-institute-climate-satellites-nc',
...@@ -436,7 +384,7 @@ class Organization(Gcisbase): ...@@ -436,7 +384,7 @@ class Organization(Gcisbase):
'Texas Tech University': 'texas-tech-university' 'Texas Tech University': 'texas-tech-university'
} }
super(Organization, self).__init__(data, fields=self.gcis_fields, trans=self.translations) super(Organization, self).__init__(data, fields=self.gcis_fields, trans=trans)
if not self.identifier: if not self.identifier:
self.identifier = self._identifiers[self.name] if self.name in self._identifiers else None self.identifier = self._identifiers[self.name] if self.name in self._identifiers else None
...@@ -527,19 +475,9 @@ class Role(object): ...@@ -527,19 +475,9 @@ class Role(object):
class Parent(Gcisbase): class Parent(Gcisbase):
def __init__(self, data): def __init__(self, data, trans=()):
self.gcis_fields = ['relationship', 'url', 'publication_type_identifier', 'label', 'activity_uri', 'note'] self.gcis_fields = ['relationship', 'url', 'publication_type_identifier', 'label', 'activity_uri', 'note']
self.translations = {
'what_type_of_publication_was_the_figure_published_in': 'publication_type_identifier',
'name_title': 'label',
'article_title': 'label',
'book_title': 'label',
'web_page_title': 'label',
'conference_title': 'label',
'title': 'label',
}
self.publication_type_map = { self.publication_type_map = {
'report': 'report', 'report': 'report',
'journal_article': 'article', 'journal_article': 'article',
...@@ -638,7 +576,7 @@ class Parent(Gcisbase): ...@@ -638,7 +576,7 @@ class Parent(Gcisbase):
self._publication_type_identifier = None self._publication_type_identifier = None
super(Parent, self).__init__(data, fields=self.gcis_fields, trans=self.translations) super(Parent, self).__init__(data, fields=self.gcis_fields, trans=trans)
#HACK: Set default relationship type #HACK: Set default relationship type
self.relationship = self.relationship if self.relationship else 'prov:wasDerivedFrom' self.relationship = self.relationship if self.relationship else 'prov:wasDerivedFrom'
......
...@@ -14,6 +14,7 @@ from gcis_clients import GcisClient ...@@ -14,6 +14,7 @@ from gcis_clients import GcisClient
import __builtin__ import __builtin__
from os import getenv from os import getenv
import gcis_clients import gcis_clients
import gcis_clients.webform_transforms as trans
def test_gcis_client_version(): def test_gcis_client_version():
...@@ -124,7 +125,7 @@ def test_domain_as_json(): ...@@ -124,7 +125,7 @@ def test_domain_as_json():
def test_chapter_parsing(): def test_chapter_parsing():
webform_fig = Figure(json.loads(webform_json_precip)) webform_fig = Figure(json.loads(webform_json_precip), trans=trans.FIG_TRANSLATIONS)
gcis_fig = Figure(json.loads(test_figure_json)) gcis_fig = Figure(json.loads(test_figure_json))
assert isinstance(webform_fig.ordinal, int) assert isinstance(webform_fig.ordinal, int)
......
...@@ -9,6 +9,7 @@ from dateutil.parser import parse ...@@ -9,6 +9,7 @@ from dateutil.parser import parse
from copy import deepcopy from copy import deepcopy
from domain import Figure, Image, Dataset, Activity, Contributor, Person, Organization, Parent from domain import Figure, Image, Dataset, Activity, Contributor, Person, Organization, Parent
import webform_transforms as trans
def sanitized(pattern): def sanitized(pattern):
...@@ -82,7 +83,8 @@ class WebformClient: ...@@ -82,7 +83,8 @@ class WebformClient:
#TODO: refactor the service so this isn't necessary #TODO: refactor the service so this isn't necessary
webform_nid = webform_json.keys()[0] webform_nid = webform_json.keys()[0]
figure_json = webform_json[webform_nid]['figure'][0] figure_json = webform_json[webform_nid]['figure'][0]
f = Figure(figure_json)
f = Figure(figure_json, trans=trans.FIG_TRANSLATIONS)
#Add contributor info #Add contributor info
if 'list_the_creator_of_the_figure' in figure_json: if 'list_the_creator_of_the_figure' in figure_json:
...@@ -91,12 +93,12 @@ class WebformClient: ...@@ -91,12 +93,12 @@ class WebformClient:
#Add provenance information (wasDerivedFrom parent) #Add provenance information (wasDerivedFrom parent)
if 'what_type_of_source_provided_this_figure' in figure_json and figure_json[ if 'what_type_of_source_provided_this_figure' in figure_json and figure_json[
'what_type_of_source_provided_this_figure'] == 'published_source': 'what_type_of_source_provided_this_figure'] == 'published_source':
f.add_parent(Parent(deepcopy(f.original))) f.add_parent(Parent(deepcopy(f.original), trans=trans.PARENT_TRANSLATIONS))
if 'images' in webform_json[webform_nid]: if 'images' in webform_json[webform_nid]:
for img_idx, image in enumerate(webform_json[webform_nid]['images']): for img_idx, image in enumerate(webform_json[webform_nid]['images']):
image_obj = Image(image, local_path=self.get_local_image_path(image), image_obj = Image(image, local_path=self.get_local_image_path(image),
remote_path=self.get_remote_image_path(image)) remote_path=self.get_remote_image_path(image), trans=trans.IMG_TRANSLATIONS)
#Add contributor info #Add contributor info
if 'list_the_creator_of_the_image' in image: if 'list_the_creator_of_the_image' in image:
...@@ -105,7 +107,7 @@ class WebformClient: ...@@ -105,7 +107,7 @@ class WebformClient:
#TODO: this just keeps getting worse #TODO: this just keeps getting worse
if 'datasources' in webform_json[webform_nid]['images'][img_idx]: if 'datasources' in webform_json[webform_nid]['images'][img_idx]:
for dataset_json in webform_json[webform_nid]['images'][img_idx]['datasources']: for dataset_json in webform_json[webform_nid]['images'][img_idx]['datasources']:
dataset = Dataset(dataset_json) dataset = Dataset(dataset_json, trans=trans.DATASET_TRANSLATIONS)
#Commence the hacks #Commence the hacks
try: try:
...@@ -131,7 +133,7 @@ class WebformClient: ...@@ -131,7 +133,7 @@ class WebformClient:
#Add synthetic identifier #Add synthetic identifier
activity_json['identifier'] = '-'.join((image_obj.identifier.split('-')[0], dataset.identifier, 'process')) activity_json['identifier'] = '-'.join((image_obj.identifier.split('-')[0], dataset.identifier, 'process'))
dataset.activity = Activity(activity_json) dataset.activity = Activity(activity_json, trans=trans.ACT_TRANSLATIONS)
#TODO: Extract DOIs from citation #TODO: Extract DOIs from citation
image_obj.datasets.append(dataset) image_obj.datasets.append(dataset)
......
__author__ = 'abuddenberg'
FIG_TRANSLATIONS = {
'what_is_the_figure_id': 'identifier',
'what_is_the_name_of_the_figure_as_listed_in_the_report': 'title',
'when_was_this_figure_created': 'create_dt',
'what_is_the_chapter_and_figure_number': 'figure_num'
}
IMG_TRANSLATIONS = {
'list_any_keywords_for_the_image': 'attributes',
'when_was_this_image_created': 'create_dt',
'what_is_the_image_id': 'identifier',
'maximum_latitude': 'lat_max',
'minimum_latitude': 'lat_min',
'maximum_longitude': 'lon_max',
'minimum_longitude': 'lon_min',
'start_time': 'time_start',
'end_time': 'time_end',
'what_is_the_name_of_the_image_listed_in_the_report': 'title'
}
DATASET_TRANSLATIONS = {
'data_set_access_date': 'access_dt',
'data_set_publication_year': 'publication_year',
'data_set_original_release_date': 'release_dt',
# HACK elsewhere 'start_time and end_time': '',
'data_set_id': 'native_id',
# HACK elsewhere'': 'doi',
# HACK elsewhere 'maximum_latitude etc. etc. etc.': '',
'data_set_version': 'version',
'data_set_name': 'name',
'data_set_citation': 'cite_metadata',
'data_set_description': 'description',
# Not sure'': 'type',
'data_set_location': 'url',
'data_set_variables': 'attributes'
}
ACT_TRANSLATIONS = {
'how_much_time_was_invested_in_creating_the_image': 'duration',
'35_what_are_all_of_the_files_names_and_extensions_associated_with_this_image': 'output_artifacts',
'what_operating_systems_and_platforms_were_used': 'computing_environment',
'what_analytical_statistical_methods_were_employed_to_the_data': 'methodology',
'describe_how_the_data_was_used_in_the_image_figure_creation': 'data_usage',
'list_the_name_and_version_of_the_software': 'software',
'what_software_applications_were_used_to_manipulate_the_data': 'notes',
'33_what_software_applications_were_used_to_visualize_the_data': 'visualization_software'
}
PARENT_TRANSLATIONS = {
'what_type_of_publication_was_the_figure_published_in': 'publication_type_identifier',
'name_title': 'label',
'article_title': 'label',
'book_title': 'label',
'web_page_title': 'label',
'conference_title': 'label',
'title': 'label',
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment