Commit c7927abf authored by abuddenberg's avatar abuddenberg
Browse files

Refactor domain model to be more congruent with GCIS.

Parent objects now have an activity attribute.
parent 962c225d
......@@ -420,7 +420,7 @@ def sync(replace=False):
# #Remove existing parents
for p in gcis_fig.parents:
gcis.delete_figure_pub_assoc(report_id, gcis_id, p)
gcis.delete_figure_parent_assoc(report_id, gcis_id, p)
for image in gcis_fig.images:
#TODO: There are better ways to do this. Build File support.
......
......@@ -71,13 +71,19 @@ class GcisObject(Gcisbase):
super(GcisObject, self).__init__(data, **kwargs)
def add_contributor(self, contributor):
self.contributors.append(contributor)
if isinstance(contributor, Contributor):
self.contributors.append(contributor)
else:
raise TypeError('Expected Contributor, got {t}'.format(t=type(contributor)))
def add_person(self, person):
self.contributors.append(Contributor(person, Organization()))
def add_parent(self, parent):
self.parents.append(parent)
if isinstance(parent, Parent):
self.parents.append(parent)
else:
raise TypeError('Expected Parent, got {t}'.format(t=type(parent)))
class Figure(GcisObject):
......@@ -203,15 +209,9 @@ class Image(GcisObject):
super(Image, self).__init__(data, fields=self.gcis_fields, trans=trans)
#Hack
self.identifier = self.identifier.replace('/image/', '') if self.identifier else None
self.local_path = local_path
self.remote_path = remote_path
#This does not accurately reflect GCIS' data model
self.datasets = []
@property
def create_dt(self):
return self._create_dt
......@@ -230,10 +230,10 @@ class Image(GcisObject):
class Dataset(GcisObject):
def __init__(self, data, trans=(), known_ids=None):
self.gcis_fields = ['contributors', 'vertical_extent', 'native_id', 'href', 'references', 'cite_metadata',
'scale', 'publication_year', 'temporal_extent', 'version', 'parents', 'scope', 'type',
'processing_level', 'files', 'data_qualifier', 'access_dt', 'description', 'spatial_ref_sys',
'spatial_res', 'spatial_extent', 'doi', 'name', 'url', 'uri', 'identifier', 'release_dt',
'attributes']
'scale', 'publication_year', 'temporal_extent', 'version', 'parents', 'scope', 'type',
'processing_level', 'files', 'data_qualifier', 'access_dt', 'description',
'spatial_ref_sys', 'spatial_res', 'spatial_extent', 'doi', 'name', 'url', 'uri',
'identifier', 'release_dt', 'attributes']
self._identifiers = known_ids
......@@ -242,17 +242,9 @@ class Dataset(GcisObject):
self._access_dt = None
self._publication_year = None
#These do not accurately reflect GCIS' data model
self.note = None
self.activity = None
super(Dataset, self).__init__(data, fields=self.gcis_fields, trans=trans)
self.identifier = self._identifiers[self.name] if self._identifiers and self.name in self._identifiers else self.name
#Hack to fix a particular kind of bad URL
if self.url and self.url.startswith('ttp://'):
self.url = self.url.replace('ttp://', 'http://')
self.identifier = self._identifiers[self.name] if self._identifiers and self.name in self._identifiers else None
def __repr__(self):
return '<Dataset: id:{id} name:{name}>'.format(id=self.identifier, name=self.name)
......@@ -343,16 +335,11 @@ class Person(Gcisbase):
class Organization(Gcisbase):
def __init__(self, data, trans=(), known_ids=None):
def __init__(self, data, trans=()):
self.gcis_fields = ['organization_type_identifier', 'url', 'uri', 'href', 'country_code', 'identifier', 'name']
self._identifiers = known_ids
super(Organization, self).__init__(data, fields=self.gcis_fields, trans=trans)
if not self.identifier:
self.identifier = self._identifiers[self.name] if self.name in self._identifiers else None
def __repr__(self):
return '<Organization: id:{id} name:{name}>'.format(id=self.identifier, name=self.name)
......@@ -361,12 +348,9 @@ class Organization(Gcisbase):
class Contributor(Gcisbase):
def __init__(self, data, hints=None):
def __init__(self, data):
self.gcis_fields = ['role_type_identifier', 'organization_uri', 'uri', 'href', 'person_uri', 'person_id', 'id']
self.people_role_map = hints
self._role = None
super(Contributor, self).__init__(data, fields=self.gcis_fields)
person_tree = data.pop('person', None)
......@@ -374,19 +358,10 @@ class Contributor(Gcisbase):
self.person = Person(person_tree) if person_tree else None
self.organization = Organization(org_tree) if org_tree else None
@property
def role(self):
#Hack hack hack
if self._role is None and self.person is not None:
horrible_key = ' '.join((self.person.first_name, self.person.last_name))
self._role = Role(self.people_role_map[horrible_key]) if horrible_key in self.people_role_map else None
return self._role
self.role = Role(self.role_type_identifier) if self.role_type_identifier else None
def __repr__(self):
return '<Contributor: {p} {o} Role:{r}>'.format(p=self.person, o=self.organization, r=self.role)
return '<Contributor: Person:{p} Org:{o} Role:{r}>'.format(p=self.person, o=self.organization, r=self.role)
def __str__(self):
return self.__repr__()
......@@ -404,33 +379,21 @@ class Role(object):
class Parent(Gcisbase):
def __init__(self, data, trans=(), pubtype_map=None, search_hints=None):
def __init__(self, data, target_pub=None, trans=(), pubtype_map=None):
self.gcis_fields = ['relationship', 'url', 'publication_type_identifier', 'label', 'activity_uri', 'note']
self.publication_type_map = pubtype_map
self.search_hints = search_hints
self._publication_type_identifier = None
self.activity = None
super(Parent, self).__init__(data, fields=self.gcis_fields, trans=trans)
self.publication = target_pub
#HACK: Set default relationship type
self.relationship = self.relationship if self.relationship else 'prov:wasDerivedFrom'
#HACK to smooth out ambiguous search results
if self.search_hints and self.publication_type_identifier in self.search_hints and self.label in \
self.search_hints[self.publication_type_identifier]:
hint = self.search_hints[self.publication_type_identifier][self.label]
if isinstance(hint, tuple):
type, id = hint
self.publication_type_identifier = type
else:
id = hint
type = self.publication_type_identifier
self.url = '/{type}/{id}'.format(type=self.publication_type_identifier, id=id)
@property
def publication_type_identifier(self):
return self._publication_type_identifier
......@@ -441,16 +404,25 @@ class Parent(Gcisbase):
if self.publication_type_map and value in self.publication_type_map else value
@staticmethod
def from_obj(gcis_obj):
def from_obj(gcis_obj, activity=None):
gcis_obj_type = type(gcis_obj).__name__.lower()
label = gcis_obj.title if hasattr(gcis_obj, 'title') else '***MISSING***'
return Parent({
if hasattr(gcis_obj, 'title'):
label = gcis_obj.title
elif hasattr(gcis_obj, 'name'):
label = gcis_obj.name
else:
label = '***MISSING***'
p = Parent({
'relationship': 'prov:wasDerivedFrom',
'publication_type_identifier': gcis_obj_type,
'url': '/{type}/{id}'.format(type=gcis_obj_type, id=gcis_obj.identifier),
'url': '/{type}/{id}'.format(type=gcis_obj_type, id=gcis_obj.identifier) if gcis_obj_type and gcis_obj.identifier else None,
'label': label
})
}, target_pub=gcis_obj)
p.activity = activity
return p
def __repr__(self):
return '<Parent: rel:{rel} pub_type:{type} url:{url} label:{lbl}>'.format(
......
......@@ -128,8 +128,14 @@ class GcisClient(object):
self.create_image(image),
self.associate_image_with_figure(image.identifier, report_id, figure.identifier)
for c in figure.contributors:
self.associate_contributor_with_figure(c, report_id, chapter_id, figure.identifier)
for p in figure.parents:
self.associate_figure_with_parent(report_id, figure.identifier, p)
if p.activity:
self.create_or_update_activity(p.activity)
activity_id = p.activity.identifier if p.activity else None
self.associate_figure_with_parent(report_id, figure.identifier, p, activity_id=activity_id)
return resp
......@@ -156,7 +162,10 @@ class GcisClient(object):
self.associate_contributor_with_figure(c, report_id, chapter_id, figure.identifier)
for p in figure.parents:
self.associate_figure_with_parent(report_id, figure.identifier, p)
if p.activity:
self.create_or_update_activity(p.activity)
activity_id = p.activity.identifier if p.activity else None
self.associate_figure_with_parent(report_id, figure.identifier, p, activity_id=activity_id)
return resp
......@@ -187,27 +196,34 @@ class GcisClient(object):
self.upload_image_file(image.identifier, image.local_path)
if figure_id and report_id:
self.associate_image_with_figure(image.identifier, report_id, figure_id)
for dataset in image.datasets:
if not self.dataset_exists(dataset.identifier):
self.create_dataset(dataset)
# if not self.activity_exists(dataset.activity.identifier):
# self.create_activity(dataset.activity))
self.create_or_update_activity(dataset.activity)
self.associate_dataset_with_image(dataset.identifier, image.identifier,
activity_id=dataset.activity.identifier)
# for dataset in image.datasets:
# if not self.dataset_exists(dataset.identifier):
# self.create_dataset(dataset)
# # if not self.activity_exists(dataset.activity.identifier):
# # self.create_activity(dataset.activity))
# self.create_or_update_activity(dataset.activity)
# self.associate_image_with_parent(dataset.identifier, image.identifier,
# activity_id=dataset.activity.identifier)
for p in image.parents:
if p.activity:
self.create_or_update_activity(p.activity)
activity_id = p.activity.identifier if p.activity else None
self.associate_image_with_parent(image.identifier, p, activity_id=activity_id)
return resp
@check_image
def update_image(self, image, old_id=None):
url = '{b}/image/{img}'.format(b=self.base_url, img=old_id or image.identifier)
for dataset in image.datasets:
# self.update_activity(dataset.activity)
self.create_or_update_activity(dataset.activity)
self.associate_dataset_with_image(dataset.identifier, image.identifier,
activity_id=dataset.activity.identifier)
for c in image.contributors:
self.associate_contributor_with_image(c, image.identifier)
for p in image.parents:
if p.activity:
self.create_or_update_activity(p.activity)
activity_id = p.activity.identifier if p.activity else None
self.associate_image_with_parent(image.identifier, p, activity_id=activity_id)
return self.s.post(url, data=image.as_json(), verify=False)
@check_image
......@@ -401,45 +417,6 @@ class GcisClient(object):
url = '{b}/dataset/'.format(b=self.base_url)
return self.s.get(url, params={'all': 1}, verify=False)
def associate_dataset_with_image(self, dataset_id, image_id, activity_id=None):
url = '{b}/image/prov/{img}'.format(b=self.base_url, img=image_id)
data = {
'parent_uri': '/dataset/' + dataset_id,
'parent_rel': 'prov:wasDerivedFrom'
}
if activity_id:
data['activity'] = activity_id
try:
self.delete_dataset_image_assoc(dataset_id, image_id)
except AssociationException as e:
print e.value
resp = self.s.post(url, data=json.dumps(data), verify=False)
if resp.status_code == 200:
return resp
else:
raise Exception('Dataset association failed:\n{url}\n{resp}'.format(url=url, resp=resp.text))
def delete_dataset_image_assoc(self, dataset_id, image_id):
url = '{b}/image/prov/{img}'.format(b=self.base_url, img=image_id)
data = {
'delete': {
'parent_uri': '/dataset/' + dataset_id,
'parent_rel': 'prov:wasDerivedFrom'
}
}
resp = self.s.post(url, data=json.dumps(data), verify=False)
if resp.status_code == 200:
return resp
else:
raise AssociationException(
'Dataset dissociation failed:\n{url}\n{resp}\n{d}'.format(url=url, resp=resp.text, d=data))
def create_or_update_dataset(self, dataset):
if self.dataset_exists(dataset.identifier):
print 'Updating dataset: ' + dataset.identifier
......@@ -625,23 +602,25 @@ class GcisClient(object):
return self.s.post(url, data=json.dumps(data), verify=False)
@http_resp
def associate_figure_with_parent(self, report_id, figure_id, parent):
def associate_figure_with_parent(self, report_id, figure_id, parent, activity_id=None):
url = '{b}/report/{rpt}/figure/prov/{fig}'.format(b=self.base_url, rpt=report_id, fig=figure_id)
data = {
'parent_uri': parent.url,
'parent_rel': parent.relationship
}
if activity_id:
data['activity'] = activity_id
try:
self.delete_figure_pub_assoc(report_id, figure_id, parent)
self.delete_figure_parent_assoc(report_id, figure_id, parent)
except AssociationException as e:
print e.value
resp = self.s.post(url, data=json.dumps(data), verify=False)
return resp
def delete_figure_pub_assoc(self, report_id, figure_id, parent):
def delete_figure_parent_assoc(self, report_id, figure_id, parent):
url = '{b}/report/{rpt}/figure/prov/{fig}'.format(b=self.base_url, rpt=report_id, fig=figure_id)
data = {
......@@ -658,6 +637,42 @@ class GcisClient(object):
raise AssociationException(
'Parent dissociation failed:\n{url}\n{resp}\n{d}'.format(url=url, resp=resp.text, d=data))
@http_resp
def associate_image_with_parent(self, image_id, parent, activity_id=None):
url = '{b}/image/prov/{img}'.format(b=self.base_url, img=image_id)
data = {
'parent_uri': parent.url,
'parent_rel': parent.relationship
}
if activity_id:
data['activity'] = activity_id
try:
self.delete_dataset_image_assoc(image_id, parent)
except AssociationException as e:
print e.value
resp = self.s.post(url, data=json.dumps(data), verify=False)
return resp
def delete_dataset_image_assoc(self, image_id, parent):
url = '{b}/image/prov/{img}'.format(b=self.base_url, img=image_id)
data = {
'delete': {
'parent_uri': parent.url,
'parent_rel': parent.relationship
}
}
resp = self.s.post(url, data=json.dumps(data), verify=False)
if resp.status_code == 200:
return resp
else:
raise AssociationException(
'Parent dissociation failed:\n{url}\n{resp}\n{d}'.format(url=url, resp=resp.text, d=data))
def lookup_publication(self, pub_type, name):
url = '{b}/autocomplete'.format(b=self.base_url)
resp = self.s.get(url, params={'q': name, 'items': 15, 'type': pub_type}, verify=False)
......
from __future__ import print_function
__author__ = 'abuddenberg'
import getpass
import requests
import re
from os.path import join, basename
import sys
from gcis_clients.domain import Figure, Image, Dataset, Parent, Contributor, Person, Organization, Activity
from gcis_clients.domain import Figure, Image, Dataset, Parent, Contributor, Person, Organization, Activity, Role
import survey_transforms as trans
def warning(*objs):
print("WARNING: ", *objs, file=sys.stderr)
def get_credentials():
#First check our magic enviroment variable (SURVEY_TOKEN)
......@@ -38,7 +42,7 @@ def populate_figure(fig_json):
f.time_start, f.time_end = [d.strip() for d in fig_json['period_record']]
f.lat_min, f.lat_max, f.lon_min, f.lon_max = fig_json['spatial_extent']
except Exception, e:
print 'Figure exception: ', e
warning('Figure exception: ', e)
return f
......@@ -47,27 +51,30 @@ def populate_image(img_json):
img = Image({})
try:
img.title = img_json['graphics_title']
img.identifier = img_json['image_id'] if 'image_id' in img_json and img_json['image_id'] else re.sub('\W', '_', img.title).lower()
img.identifier = img_json['image_id'] if 'image_id' in img_json and img_json['image_id'] else re.sub('\W', '_', img.title.strip().lower())
img.create_dt = img_json['graphics_create_date'].strip()
if any(img_json['period_record']):
img.time_start, img.time_end = [d.strip() for d in img_json['period_record']]
img.lat_min, img.lat_max, img.lon_min, img.lon_max = img_json['spatial_extent']
except Exception, e:
print 'Image exception: ', e
warning('Image exception: ', e)
return img
def populate_dataset(ds_json):
try:
if not ds_json['dataset_name']:
raise ValueError('Dataset name is missing')
ds = Dataset({
'name': ds_json['dataset_name'],
'url': ds_json['dataset_url']
}, known_ids=trans.DATASET_IDS)
except Exception, e:
print 'Dataset exception: ', e
ds = Dataset({})
warning('Dataset exception: ', e)
ds = None
image_select = ds_json['imageSelect'] if 'imageSelect' in ds_json else []
associated_images = [idx for idx, value in enumerate(image_select) if value == 'on']
......@@ -91,37 +98,78 @@ def populate_activity(mthd_json):
act.visualization_software = ', '.join([vs for vs in mthd_json['dataset_visualization_software'] if vs])
except Exception, e:
print 'Activity exception: ', e
warning('Activity exception: ', e)
return act, mthd_json['image_name'], mthd_json['dataset']
def populate_parent(pub_json):
p = Parent({})
try:
p = Parent(pub_json, trans=trans.PARENT_TRANSLATIONS, pubtype_map=trans.PARENT_PUBTYPE_MAP)
p.url = ''
apply_parent_search_hints(p)
except Exception, e:
print 'Parent exception: ', e
warning('Parent exception: ', e)
p = Parent({})
return p
def apply_parent_search_hints(p):
#HACK to smooth out ambiguous search results
if trans.PARENT_SEARCH_HINTS and p.publication_type_identifier in trans.PARENT_SEARCH_HINTS and p.label in \
trans.PARENT_SEARCH_HINTS[p.publication_type_identifier]:
hint = trans.PARENT_SEARCH_HINTS[p.publication_type_identifier][p.label]
if isinstance(hint, tuple):
type, id = hint
p.publication_type_identifier = type
else:
id = hint
# type = p.publication_type_identifier
p.url = '/{type}/{id}'.format(type=p.publication_type_identifier, id=id)
def populate_contributors(field):
contributor = Contributor({})
s = field.split(',')
name, rest = s[0], s[1:]
name_split = name.split()
first_name, last_name = name_split[0], name_split[-1]
org_name = rest[0] if len(rest) > 0 else None
org_name = rest[0].strip() if len(rest) > 0 else None
contributor = Contributor({}, hints=trans.CONTRIB_ROLES)
contributor.person = Person({'first_name': first_name, 'last_name': last_name})
contributor.organization = Organization({'name': org_name}, known_ids=trans.ORG_IDS)
#Horrifying
person_key = '{fn} {ln}'.format(fn=first_name, ln=last_name)
person = trans.PERSON_TRANSLATIONS[person_key] if person_key in trans.PERSON_TRANSLATIONS else Person({'first_name': first_name, 'last_name': last_name})
contributor.person = person
return contributor
try:
hint_org, hint_role = trans.CONTRIB_ROLES[person_key]
contributor.role = Role(hint_role)
if org_name:
try:
contributor.organization = Organization({
'identifier': trans.ORG_IDS[org_name],
'name': org_name
})
except KeyError:
warning('Missing Organization ID for ', org_name)
else:
print('Using hint for Organization: ' + hint_org)
contributor.organization = Organization({'identifier': hint_org})
return contributor
except KeyError:
warning('Missing role for ' + person_key)
except Exception, e:
warning('Contributor exception: ', e)
return contributor
class SurveyClient:
def __init__(self, url, token, local_download_dir=None):
......@@ -141,7 +189,7 @@ class SurveyClient:
def get_list(self):
url = '{b}/metadata/list?token={t}'.format(b=self.base_url, t=self.token)
print url
print(url)
return requests.get(url).json()
def get_survey(self, fig_url, do_download=False):
......@@ -151,6 +199,7 @@ class SurveyClient:
tier2_json = survey_json[0]['t2'] if len(survey_json) > 0 and survey_json[0]['t2'] is not None else []
f = None
datasets = []
if 'figure' in tier1_json:
figure_json = tier1_json['figure']
......@@ -175,50 +224,59 @@ class SurveyClient:
if 'origination' in img_json and img_json['origination'] not in ('Original',) and 'publication' in img_json:
image_obj.parents.append(populate_parent(img_json['publication']))
elif 'origination' in img_json and img_json['origination'] == 'Original':
image_obj.add_contributor(populate_contributors(img_json['original_agency']))
cont = populate_contributors(img_json['original_agency'])
image_obj.add_contributor(cont)
f.images.append(image_obj)
# # Recent decision: No default images
# elif 'figure' in tier1_json:
# default_image = populate_image(tier1_json['figure'])
# f.images.append(default_image)
if 'datasets' in tier1_json:
datasets = [populate_dataset(ds) for ds in tier1_json['datasets']]
datasets = [populate_dataset(ds) for ds in tier1_json['datasets'] if ds]
# Create activities
activities = [populate_activity(m) for m in tier2_json['methods']] if tier2_json and 'methods' in tier2_json else []
if activities:
print('Found activities: ', activities)
for ds, img_idxs in datasets:
# Associate datasets with images if we have images
if 'images' in tier1_json:
for idx in img_idxs:
# Associate activities with datasets
for act, img_name, ds_name in activities:
if img_name == f.images[idx].title and ds_name == ds.name:
ds.activity = act
try:
f.images[idx].datasets.append(ds)
p = Parent.from_obj(ds)