Commit c7927abf authored by abuddenberg's avatar abuddenberg
Browse files

Refactor domain model to be more congruent with GCIS.

Parent objects now have an activity attribute.
parent 962c225d
...@@ -420,7 +420,7 @@ def sync(replace=False): ...@@ -420,7 +420,7 @@ def sync(replace=False):
# #Remove existing parents # #Remove existing parents
for p in gcis_fig.parents: for p in gcis_fig.parents:
gcis.delete_figure_pub_assoc(report_id, gcis_id, p) gcis.delete_figure_parent_assoc(report_id, gcis_id, p)
for image in gcis_fig.images: for image in gcis_fig.images:
#TODO: There are better ways to do this. Build File support. #TODO: There are better ways to do this. Build File support.
......
...@@ -71,13 +71,19 @@ class GcisObject(Gcisbase): ...@@ -71,13 +71,19 @@ class GcisObject(Gcisbase):
super(GcisObject, self).__init__(data, **kwargs) super(GcisObject, self).__init__(data, **kwargs)
def add_contributor(self, contributor): def add_contributor(self, contributor):
self.contributors.append(contributor) if isinstance(contributor, Contributor):
self.contributors.append(contributor)
else:
raise TypeError('Expected Contributor, got {t}'.format(t=type(contributor)))
def add_person(self, person): def add_person(self, person):
self.contributors.append(Contributor(person, Organization())) self.contributors.append(Contributor(person, Organization()))
def add_parent(self, parent): def add_parent(self, parent):
self.parents.append(parent) if isinstance(parent, Parent):
self.parents.append(parent)
else:
raise TypeError('Expected Parent, got {t}'.format(t=type(parent)))
class Figure(GcisObject): class Figure(GcisObject):
...@@ -203,15 +209,9 @@ class Image(GcisObject): ...@@ -203,15 +209,9 @@ class Image(GcisObject):
super(Image, self).__init__(data, fields=self.gcis_fields, trans=trans) super(Image, self).__init__(data, fields=self.gcis_fields, trans=trans)
#Hack
self.identifier = self.identifier.replace('/image/', '') if self.identifier else None
self.local_path = local_path self.local_path = local_path
self.remote_path = remote_path self.remote_path = remote_path
#This does not accurately reflect GCIS' data model
self.datasets = []
@property @property
def create_dt(self): def create_dt(self):
return self._create_dt return self._create_dt
...@@ -230,10 +230,10 @@ class Image(GcisObject): ...@@ -230,10 +230,10 @@ class Image(GcisObject):
class Dataset(GcisObject): class Dataset(GcisObject):
def __init__(self, data, trans=(), known_ids=None): def __init__(self, data, trans=(), known_ids=None):
self.gcis_fields = ['contributors', 'vertical_extent', 'native_id', 'href', 'references', 'cite_metadata', self.gcis_fields = ['contributors', 'vertical_extent', 'native_id', 'href', 'references', 'cite_metadata',
'scale', 'publication_year', 'temporal_extent', 'version', 'parents', 'scope', 'type', 'scale', 'publication_year', 'temporal_extent', 'version', 'parents', 'scope', 'type',
'processing_level', 'files', 'data_qualifier', 'access_dt', 'description', 'spatial_ref_sys', 'processing_level', 'files', 'data_qualifier', 'access_dt', 'description',
'spatial_res', 'spatial_extent', 'doi', 'name', 'url', 'uri', 'identifier', 'release_dt', 'spatial_ref_sys', 'spatial_res', 'spatial_extent', 'doi', 'name', 'url', 'uri',
'attributes'] 'identifier', 'release_dt', 'attributes']
self._identifiers = known_ids self._identifiers = known_ids
...@@ -242,17 +242,9 @@ class Dataset(GcisObject): ...@@ -242,17 +242,9 @@ class Dataset(GcisObject):
self._access_dt = None self._access_dt = None
self._publication_year = None self._publication_year = None
#These do not accurately reflect GCIS' data model
self.note = None
self.activity = None
super(Dataset, self).__init__(data, fields=self.gcis_fields, trans=trans) super(Dataset, self).__init__(data, fields=self.gcis_fields, trans=trans)
self.identifier = self._identifiers[self.name] if self._identifiers and self.name in self._identifiers else self.name self.identifier = self._identifiers[self.name] if self._identifiers and self.name in self._identifiers else None
#Hack to fix a particular kind of bad URL
if self.url and self.url.startswith('ttp://'):
self.url = self.url.replace('ttp://', 'http://')
def __repr__(self): def __repr__(self):
return '<Dataset: id:{id} name:{name}>'.format(id=self.identifier, name=self.name) return '<Dataset: id:{id} name:{name}>'.format(id=self.identifier, name=self.name)
...@@ -343,16 +335,11 @@ class Person(Gcisbase): ...@@ -343,16 +335,11 @@ class Person(Gcisbase):
class Organization(Gcisbase): class Organization(Gcisbase):
def __init__(self, data, trans=(), known_ids=None): def __init__(self, data, trans=()):
self.gcis_fields = ['organization_type_identifier', 'url', 'uri', 'href', 'country_code', 'identifier', 'name'] self.gcis_fields = ['organization_type_identifier', 'url', 'uri', 'href', 'country_code', 'identifier', 'name']
self._identifiers = known_ids
super(Organization, self).__init__(data, fields=self.gcis_fields, trans=trans) super(Organization, self).__init__(data, fields=self.gcis_fields, trans=trans)
if not self.identifier:
self.identifier = self._identifiers[self.name] if self.name in self._identifiers else None
def __repr__(self): def __repr__(self):
return '<Organization: id:{id} name:{name}>'.format(id=self.identifier, name=self.name) return '<Organization: id:{id} name:{name}>'.format(id=self.identifier, name=self.name)
...@@ -361,12 +348,9 @@ class Organization(Gcisbase): ...@@ -361,12 +348,9 @@ class Organization(Gcisbase):
class Contributor(Gcisbase): class Contributor(Gcisbase):
def __init__(self, data, hints=None): def __init__(self, data):
self.gcis_fields = ['role_type_identifier', 'organization_uri', 'uri', 'href', 'person_uri', 'person_id', 'id'] self.gcis_fields = ['role_type_identifier', 'organization_uri', 'uri', 'href', 'person_uri', 'person_id', 'id']
self.people_role_map = hints
self._role = None
super(Contributor, self).__init__(data, fields=self.gcis_fields) super(Contributor, self).__init__(data, fields=self.gcis_fields)
person_tree = data.pop('person', None) person_tree = data.pop('person', None)
...@@ -374,19 +358,10 @@ class Contributor(Gcisbase): ...@@ -374,19 +358,10 @@ class Contributor(Gcisbase):
self.person = Person(person_tree) if person_tree else None self.person = Person(person_tree) if person_tree else None
self.organization = Organization(org_tree) if org_tree else None self.organization = Organization(org_tree) if org_tree else None
self.role = Role(self.role_type_identifier) if self.role_type_identifier else None
@property
def role(self):
#Hack hack hack
if self._role is None and self.person is not None:
horrible_key = ' '.join((self.person.first_name, self.person.last_name))
self._role = Role(self.people_role_map[horrible_key]) if horrible_key in self.people_role_map else None
return self._role
def __repr__(self): def __repr__(self):
return '<Contributor: {p} {o} Role:{r}>'.format(p=self.person, o=self.organization, r=self.role) return '<Contributor: Person:{p} Org:{o} Role:{r}>'.format(p=self.person, o=self.organization, r=self.role)
def __str__(self): def __str__(self):
return self.__repr__() return self.__repr__()
...@@ -404,33 +379,21 @@ class Role(object): ...@@ -404,33 +379,21 @@ class Role(object):
class Parent(Gcisbase): class Parent(Gcisbase):
def __init__(self, data, trans=(), pubtype_map=None, search_hints=None): def __init__(self, data, target_pub=None, trans=(), pubtype_map=None):
self.gcis_fields = ['relationship', 'url', 'publication_type_identifier', 'label', 'activity_uri', 'note'] self.gcis_fields = ['relationship', 'url', 'publication_type_identifier', 'label', 'activity_uri', 'note']
self.publication_type_map = pubtype_map self.publication_type_map = pubtype_map
self.search_hints = search_hints
self._publication_type_identifier = None self._publication_type_identifier = None
self.activity = None
super(Parent, self).__init__(data, fields=self.gcis_fields, trans=trans) super(Parent, self).__init__(data, fields=self.gcis_fields, trans=trans)
self.publication = target_pub
#HACK: Set default relationship type #HACK: Set default relationship type
self.relationship = self.relationship if self.relationship else 'prov:wasDerivedFrom' self.relationship = self.relationship if self.relationship else 'prov:wasDerivedFrom'
#HACK to smooth out ambiguous search results
if self.search_hints and self.publication_type_identifier in self.search_hints and self.label in \
self.search_hints[self.publication_type_identifier]:
hint = self.search_hints[self.publication_type_identifier][self.label]
if isinstance(hint, tuple):
type, id = hint
self.publication_type_identifier = type
else:
id = hint
type = self.publication_type_identifier
self.url = '/{type}/{id}'.format(type=self.publication_type_identifier, id=id)
@property @property
def publication_type_identifier(self): def publication_type_identifier(self):
return self._publication_type_identifier return self._publication_type_identifier
...@@ -441,16 +404,25 @@ class Parent(Gcisbase): ...@@ -441,16 +404,25 @@ class Parent(Gcisbase):
if self.publication_type_map and value in self.publication_type_map else value if self.publication_type_map and value in self.publication_type_map else value
@staticmethod @staticmethod
def from_obj(gcis_obj): def from_obj(gcis_obj, activity=None):
gcis_obj_type = type(gcis_obj).__name__.lower() gcis_obj_type = type(gcis_obj).__name__.lower()
label = gcis_obj.title if hasattr(gcis_obj, 'title') else '***MISSING***'
return Parent({ if hasattr(gcis_obj, 'title'):
label = gcis_obj.title
elif hasattr(gcis_obj, 'name'):
label = gcis_obj.name
else:
label = '***MISSING***'
p = Parent({
'relationship': 'prov:wasDerivedFrom', 'relationship': 'prov:wasDerivedFrom',
'publication_type_identifier': gcis_obj_type, 'publication_type_identifier': gcis_obj_type,
'url': '/{type}/{id}'.format(type=gcis_obj_type, id=gcis_obj.identifier), 'url': '/{type}/{id}'.format(type=gcis_obj_type, id=gcis_obj.identifier) if gcis_obj_type and gcis_obj.identifier else None,
'label': label 'label': label
}) }, target_pub=gcis_obj)
p.activity = activity
return p
def __repr__(self): def __repr__(self):
return '<Parent: rel:{rel} pub_type:{type} url:{url} label:{lbl}>'.format( return '<Parent: rel:{rel} pub_type:{type} url:{url} label:{lbl}>'.format(
......
...@@ -128,8 +128,14 @@ class GcisClient(object): ...@@ -128,8 +128,14 @@ class GcisClient(object):
self.create_image(image), self.create_image(image),
self.associate_image_with_figure(image.identifier, report_id, figure.identifier) self.associate_image_with_figure(image.identifier, report_id, figure.identifier)
for c in figure.contributors:
self.associate_contributor_with_figure(c, report_id, chapter_id, figure.identifier)
for p in figure.parents: for p in figure.parents:
self.associate_figure_with_parent(report_id, figure.identifier, p) if p.activity:
self.create_or_update_activity(p.activity)
activity_id = p.activity.identifier if p.activity else None
self.associate_figure_with_parent(report_id, figure.identifier, p, activity_id=activity_id)
return resp return resp
...@@ -156,7 +162,10 @@ class GcisClient(object): ...@@ -156,7 +162,10 @@ class GcisClient(object):
self.associate_contributor_with_figure(c, report_id, chapter_id, figure.identifier) self.associate_contributor_with_figure(c, report_id, chapter_id, figure.identifier)
for p in figure.parents: for p in figure.parents:
self.associate_figure_with_parent(report_id, figure.identifier, p) if p.activity:
self.create_or_update_activity(p.activity)
activity_id = p.activity.identifier if p.activity else None
self.associate_figure_with_parent(report_id, figure.identifier, p, activity_id=activity_id)
return resp return resp
...@@ -187,27 +196,34 @@ class GcisClient(object): ...@@ -187,27 +196,34 @@ class GcisClient(object):
self.upload_image_file(image.identifier, image.local_path) self.upload_image_file(image.identifier, image.local_path)
if figure_id and report_id: if figure_id and report_id:
self.associate_image_with_figure(image.identifier, report_id, figure_id) self.associate_image_with_figure(image.identifier, report_id, figure_id)
for dataset in image.datasets: # for dataset in image.datasets:
if not self.dataset_exists(dataset.identifier): # if not self.dataset_exists(dataset.identifier):
self.create_dataset(dataset) # self.create_dataset(dataset)
# if not self.activity_exists(dataset.activity.identifier): # # if not self.activity_exists(dataset.activity.identifier):
# self.create_activity(dataset.activity)) # # self.create_activity(dataset.activity))
self.create_or_update_activity(dataset.activity) # self.create_or_update_activity(dataset.activity)
self.associate_dataset_with_image(dataset.identifier, image.identifier, # self.associate_image_with_parent(dataset.identifier, image.identifier,
activity_id=dataset.activity.identifier) # activity_id=dataset.activity.identifier)
for p in image.parents:
if p.activity:
self.create_or_update_activity(p.activity)
activity_id = p.activity.identifier if p.activity else None
self.associate_image_with_parent(image.identifier, p, activity_id=activity_id)
return resp return resp
@check_image @check_image
def update_image(self, image, old_id=None): def update_image(self, image, old_id=None):
url = '{b}/image/{img}'.format(b=self.base_url, img=old_id or image.identifier) url = '{b}/image/{img}'.format(b=self.base_url, img=old_id or image.identifier)
for dataset in image.datasets:
# self.update_activity(dataset.activity)
self.create_or_update_activity(dataset.activity)
self.associate_dataset_with_image(dataset.identifier, image.identifier,
activity_id=dataset.activity.identifier)
for c in image.contributors: for c in image.contributors:
self.associate_contributor_with_image(c, image.identifier) self.associate_contributor_with_image(c, image.identifier)
for p in image.parents:
if p.activity:
self.create_or_update_activity(p.activity)
activity_id = p.activity.identifier if p.activity else None
self.associate_image_with_parent(image.identifier, p, activity_id=activity_id)
return self.s.post(url, data=image.as_json(), verify=False) return self.s.post(url, data=image.as_json(), verify=False)
@check_image @check_image
...@@ -401,45 +417,6 @@ class GcisClient(object): ...@@ -401,45 +417,6 @@ class GcisClient(object):
url = '{b}/dataset/'.format(b=self.base_url) url = '{b}/dataset/'.format(b=self.base_url)
return self.s.get(url, params={'all': 1}, verify=False) return self.s.get(url, params={'all': 1}, verify=False)
def associate_dataset_with_image(self, dataset_id, image_id, activity_id=None):
url = '{b}/image/prov/{img}'.format(b=self.base_url, img=image_id)
data = {
'parent_uri': '/dataset/' + dataset_id,
'parent_rel': 'prov:wasDerivedFrom'
}
if activity_id:
data['activity'] = activity_id
try:
self.delete_dataset_image_assoc(dataset_id, image_id)
except AssociationException as e:
print e.value
resp = self.s.post(url, data=json.dumps(data), verify=False)
if resp.status_code == 200:
return resp
else:
raise Exception('Dataset association failed:\n{url}\n{resp}'.format(url=url, resp=resp.text))
def delete_dataset_image_assoc(self, dataset_id, image_id):
url = '{b}/image/prov/{img}'.format(b=self.base_url, img=image_id)
data = {
'delete': {
'parent_uri': '/dataset/' + dataset_id,
'parent_rel': 'prov:wasDerivedFrom'
}
}
resp = self.s.post(url, data=json.dumps(data), verify=False)
if resp.status_code == 200:
return resp
else:
raise AssociationException(
'Dataset dissociation failed:\n{url}\n{resp}\n{d}'.format(url=url, resp=resp.text, d=data))
def create_or_update_dataset(self, dataset): def create_or_update_dataset(self, dataset):
if self.dataset_exists(dataset.identifier): if self.dataset_exists(dataset.identifier):
print 'Updating dataset: ' + dataset.identifier print 'Updating dataset: ' + dataset.identifier
...@@ -625,23 +602,25 @@ class GcisClient(object): ...@@ -625,23 +602,25 @@ class GcisClient(object):
return self.s.post(url, data=json.dumps(data), verify=False) return self.s.post(url, data=json.dumps(data), verify=False)
@http_resp @http_resp
def associate_figure_with_parent(self, report_id, figure_id, parent): def associate_figure_with_parent(self, report_id, figure_id, parent, activity_id=None):
url = '{b}/report/{rpt}/figure/prov/{fig}'.format(b=self.base_url, rpt=report_id, fig=figure_id) url = '{b}/report/{rpt}/figure/prov/{fig}'.format(b=self.base_url, rpt=report_id, fig=figure_id)
data = { data = {
'parent_uri': parent.url, 'parent_uri': parent.url,
'parent_rel': parent.relationship 'parent_rel': parent.relationship
} }
if activity_id:
data['activity'] = activity_id
try: try:
self.delete_figure_pub_assoc(report_id, figure_id, parent) self.delete_figure_parent_assoc(report_id, figure_id, parent)
except AssociationException as e: except AssociationException as e:
print e.value print e.value
resp = self.s.post(url, data=json.dumps(data), verify=False) resp = self.s.post(url, data=json.dumps(data), verify=False)
return resp return resp
def delete_figure_pub_assoc(self, report_id, figure_id, parent): def delete_figure_parent_assoc(self, report_id, figure_id, parent):
url = '{b}/report/{rpt}/figure/prov/{fig}'.format(b=self.base_url, rpt=report_id, fig=figure_id) url = '{b}/report/{rpt}/figure/prov/{fig}'.format(b=self.base_url, rpt=report_id, fig=figure_id)
data = { data = {
...@@ -658,6 +637,42 @@ class GcisClient(object): ...@@ -658,6 +637,42 @@ class GcisClient(object):
raise AssociationException( raise AssociationException(
'Parent dissociation failed:\n{url}\n{resp}\n{d}'.format(url=url, resp=resp.text, d=data)) 'Parent dissociation failed:\n{url}\n{resp}\n{d}'.format(url=url, resp=resp.text, d=data))
@http_resp
def associate_image_with_parent(self, image_id, parent, activity_id=None):
url = '{b}/image/prov/{img}'.format(b=self.base_url, img=image_id)
data = {
'parent_uri': parent.url,
'parent_rel': parent.relationship
}
if activity_id:
data['activity'] = activity_id
try:
self.delete_dataset_image_assoc(image_id, parent)
except AssociationException as e:
print e.value
resp = self.s.post(url, data=json.dumps(data), verify=False)
return resp
def delete_dataset_image_assoc(self, image_id, parent):
url = '{b}/image/prov/{img}'.format(b=self.base_url, img=image_id)
data = {
'delete': {
'parent_uri': parent.url,
'parent_rel': parent.relationship
}
}
resp = self.s.post(url, data=json.dumps(data), verify=False)
if resp.status_code == 200:
return resp
else:
raise AssociationException(
'Parent dissociation failed:\n{url}\n{resp}\n{d}'.format(url=url, resp=resp.text, d=data))
def lookup_publication(self, pub_type, name): def lookup_publication(self, pub_type, name):
url = '{b}/autocomplete'.format(b=self.base_url) url = '{b}/autocomplete'.format(b=self.base_url)
resp = self.s.get(url, params={'q': name, 'items': 15, 'type': pub_type}, verify=False) resp = self.s.get(url, params={'q': name, 'items': 15, 'type': pub_type}, verify=False)
......
from __future__ import print_function
__author__ = 'abuddenberg' __author__ = 'abuddenberg'
import getpass import getpass
import requests import requests
import re import re
from os.path import join, basename from os.path import join, basename
import sys
from gcis_clients.domain import Figure, Image, Dataset, Parent, Contributor, Person, Organization, Activity from gcis_clients.domain import Figure, Image, Dataset, Parent, Contributor, Person, Organization, Activity, Role
import survey_transforms as trans import survey_transforms as trans
def warning(*objs):
print("WARNING: ", *objs, file=sys.stderr)
def get_credentials(): def get_credentials():
#First check our magic enviroment variable (SURVEY_TOKEN) #First check our magic enviroment variable (SURVEY_TOKEN)
...@@ -38,7 +42,7 @@ def populate_figure(fig_json): ...@@ -38,7 +42,7 @@ def populate_figure(fig_json):
f.time_start, f.time_end = [d.strip() for d in fig_json['period_record']] f.time_start, f.time_end = [d.strip() for d in fig_json['period_record']]
f.lat_min, f.lat_max, f.lon_min, f.lon_max = fig_json['spatial_extent'] f.lat_min, f.lat_max, f.lon_min, f.lon_max = fig_json['spatial_extent']
except Exception, e: except Exception, e:
print 'Figure exception: ', e warning('Figure exception: ', e)
return f return f
...@@ -47,27 +51,30 @@ def populate_image(img_json): ...@@ -47,27 +51,30 @@ def populate_image(img_json):
img = Image({}) img = Image({})
try: try:
img.title = img_json['graphics_title'] img.title = img_json['graphics_title']
img.identifier = img_json['image_id'] if 'image_id' in img_json and img_json['image_id'] else re.sub('\W', '_', img.title).lower() img.identifier = img_json['image_id'] if 'image_id' in img_json and img_json['image_id'] else re.sub('\W', '_', img.title.strip().lower())
img.create_dt = img_json['graphics_create_date'].strip() img.create_dt = img_json['graphics_create_date'].strip()
if any(img_json['period_record']): if any(img_json['period_record']):
img.time_start, img.time_end = [d.strip() for d in img_json['period_record']] img.time_start, img.time_end = [d.strip() for d in img_json['period_record']]
img.lat_min, img.lat_max, img.lon_min, img.lon_max = img_json['spatial_extent'] img.lat_min, img.lat_max, img.lon_min, img.lon_max = img_json['spatial_extent']
except Exception, e: except Exception, e:
print 'Image exception: ', e warning('Image exception: ', e)
return img return img
def populate_dataset(ds_json): def populate_dataset(ds_json):
try: try:
if not ds_json['dataset_name']:
raise ValueError('Dataset name is missing')
ds = Dataset({ ds = Dataset({
'name': ds_json['dataset_name'], 'name': ds_json['dataset_name'],
'url': ds_json['dataset_url'] 'url': ds_json['dataset_url']
}, known_ids=trans.DATASET_IDS) }, known_ids=trans.DATASET_IDS)
except Exception, e: except Exception, e:
print 'Dataset exception: ', e warning('Dataset exception: ', e)
ds = Dataset({})