Commit 9e1794ef authored by abuddenberg's avatar abuddenberg
Browse files

Adding support for datasets (not done yet)

parent 86ba4f13
......@@ -39,6 +39,10 @@ class Gcisbase(object):
self.__dict__[k] = getattr(other, k)
return self
def as_json(self, indent=0):
out_fields = self._gcis_fields
return json.dumps({f: self.__dict__[f] for f in out_fields}, indent=indent)
class Figure(Gcisbase):
_gcis_fields = [
......@@ -140,9 +144,35 @@ class Image(Gcisbase):
self.local_path = local_path
self.remote_path = remote_path
def as_json(self, indent=0):
out_fields = self._gcis_fields
return json.dumps({f: self.__dict__[f] for f in out_fields}, indent=indent)
#This does not accurately reflect GCIS' data model
self.datasets = []
def __str__(self):
return 'Image: {id} {name}'.format(id=self.identifier, name=self.title)
class Dataset(Gcisbase):
_gcis_fields = ['contributors', 'vertical_extent', 'native_id', 'href', 'references', 'cite_metadata',
'scale', 'publication_dt', 'temporal_extent', 'version', 'parents', 'scope', 'type',
'processing_level', 'files', 'data_qualifier', 'access_dt', 'description', 'spatial_ref_sys',
'spatial_res', 'spatial_extent', 'doi', 'name', 'url', 'uri', 'identifier']
_translations = {
'data_set_access_date': 'access_dt',
'data_set_publication_year': 'publication_dt',
# HACK elsewhere 'start_time and end_time': '',
'data_set_id': 'native_id',
# HACK elsewhere'': 'doi',
# HACK elsewhere 'maximum_latitude etc. etc. etc.': '',
'data_set_version': 'version',
'data_set_name': 'name',
'data_set_citation': 'cite_metadata',
'data_set_description': 'description',
# Not sure'': 'type',
}
def __init__(self, data):
super(Dataset, self).__init__(data, fields=self._gcis_fields, trans=self._translations)
def __str__(self):
return 'Dataset: {id} {name}'.format(id=self.identifier, name=self.name)
\ No newline at end of file
......@@ -5,7 +5,7 @@ import urllib
import json
import requests
from os.path import exists
from domain import Figure, Image
from domain import Figure, Image, Dataset
def check_image(fn):
......@@ -64,15 +64,6 @@ class GcisClient(object):
url = '{b}/report/{rpt}/figure/{fig}'.format(b=self.base_url, rpt=report_id, fig=figure_id)
return requests.delete(url, headers=self.headers)
# def associate_figure_with_chapter(self, report_id, chapter_id, figure_id):
# url = '{b}/report/{rpt}/chapter/{chp}/figure/rel/{f}'.format(
# b=self.base_url, rpt=report_id, chp=chapter_id, f=figure_id
# )
# print url
#
# return requests.post(url, json.dumps({'add_image_identifier': image_id}), headers=self.headers)
@check_image
def create_image(self, image, report_id=None, figure_id=None):
url = '{b}/image/'.format(b=self.base_url, img=image.identifier)
......@@ -146,9 +137,8 @@ class GcisClient(object):
def get_keyword_listing(self):
url = '{b}/gcmd_keyword?{p}'.format(b=self.base_url, p=urllib.urlencode({'all': '1'}))
# print url
resp = requests.get(url, headers=self.headers)
# print resp.headers
return resp.json()
def get_keyword(self, key_id):
......@@ -157,7 +147,26 @@ class GcisClient(object):
def associate_keyword_with_figure(self, keyword_id, report_id, figure_id):
url = '{b}/report/{rpt}/figure/keywords/{fig}'.format(b=self.base_url, rpt=report_id, fig=figure_id)
print url
# print json.dumps()
return requests.post(url, data=json.dumps({'identifier': keyword_id}), headers=self.headers)
def get_dataset(self, dataset_id):
url = '{b}/dataset/{ds}'.format(b=self.base_url, ds=dataset_id)
resp = requests.get(url, headers=self.headers)
try:
return Dataset(resp.json())
except ValueError:
raise Exception(resp.text())
def create_data(self, dataset):
url = '{b}/dataset/'.format(b=self.base_url)
return requests.post(url, data=dataset.as_json(), headers=self.headers)
def update_dataset(self, dataset):
url = '{b}/dataset/{ds}'.format(b=self.base_url, ds=dataset.identifier)
return requests.post(url, data=dataset.as_json(), headers=self.headers)
def delete_dataset(self, dataset):
url = '{b}/dataset/{ds}'.format(b=self.base_url, ds=dataset.identifier)
return requests.delete(url, headers=self.headers)
return requests.post(url, data=json.dumps({'identifier': keyword_id}), headers=self.headers)
\ No newline at end of file
# def associate_dataset_with_figure
......@@ -16,10 +16,10 @@ gcis = GcisClient(gcis_url, 'andrew.buddenberg@noaa.gov', '4cd31dc7173eb47b26f61
sync_metadata_tree = {
#Reports
'nca3draft': {
#Chapters
#Chapter 2
'our-changing-climate': [
#(webform_url, gcis_id)
# ('/metadata/figures/2506', 'observed-change-in-very-heavy-precipitation'),
('/metadata/figures/2506', 'observed-change-in-very-heavy-precipitation'),
# ('/metadata/figures/2997', 'observed-change-in-very-heavy-precipitation-2'),
# ('/metadata/figures/2677', 'observed-us-precipitation-change'),
# ('/metadata/figures/3175', 'observed-us-temperature-change'),
......@@ -29,8 +29,26 @@ sync_metadata_tree = {
# ('/metadata/figures/3294', 'projected-changes-in-frostfree-season-length'),
# ('/metadata/figures/3305', 'variation-of-storm-frequency-and-intensity-during-the-cold-season-november--march') #incomplete
],
#Chapter 6
'agriculture': [
# ('/metadata/figures/2872', 'drainage')
],
#Chapter 9
'': [
('/metadata/figures/2896', 'heavy-downpours-disease') #Needs images redone
],
#Chapter 14
'rural': [
('/metadata/figures/3306', 'length-growing-season')
# ('/metadata/figures/3306', 'length-growing-season') #Needs images redone
],
#Chapter 19
'great-plains': [
# ('/metadata/figures/2697', 'mean-annual-temp-and-precip') #Needs images redone
],
#Chapter 25
'coastal-zone': [
# ('/metadata/figures/2543', 'coastal-ecosystem-services')
]
}
}
......@@ -38,12 +56,21 @@ sync_metadata_tree = {
#These are artifacts from our collection efforts; largely duplicates
webform_skip_list = []
dataset_identifiers = [
('Global Historical Climatology Network - Monthly', 'GHCN-M'),
]
def main():
print_webform_list()
# print_webform_list()
# sync(uploads=False)
f = webform.get_webform('/metadata/figures/2506')
# print webform_skip_list
# sync()
for image in f.images:
print image
for dataset in image.datasets[0:1]:
print dataset
gcis.update_dataset(dataset)
def print_webform_list():
......@@ -58,7 +85,7 @@ def print_webform_list():
webform_skip_list.append(webform_url)
def sync():
def sync(uploads=True):
for report_id in sync_metadata_tree:
for chapter_id in sync_metadata_tree[report_id]:
for figure_ids in sync_metadata_tree[report_id][chapter_id]:
......@@ -67,9 +94,12 @@ def sync():
if webform_url in webform_skip_list:
print 'Skipping: ' + webform_url
continue
sync_images(webform_url, gcis_id)
if uploads:
print 'Attempting to upload: ' + gcis_id
sync_images(webform_url, gcis_id)
print 'Attempting to sync: ' + gcis_id
sync_metadata(report_id, chapter_id, webform_url, gcis_id)
print 'Success!'
def sync_metadata(report_id, chapter_id, webform_url, gcis_id):
......
......@@ -5,7 +5,7 @@ import requests
import re
from os.path import join
from domain import Figure, Image
from domain import Figure, Image, Dataset
def sanitized(pattern):
......@@ -38,18 +38,36 @@ class WebformClient:
@sanitized('^/metadata/figures/\d+$')
def get_webform(self, fig_url):
full_url = '{b}{url}?token={t}'.format(b=self.base_url, url=fig_url, t=self.token)
figure_json = requests.get(full_url).json()
webform_json = requests.get(full_url).json()
#TODO: refactor the service so this isn't necessary
figure_id = figure_json.keys()[0]
f = Figure(figure_json[figure_id]['figure'][0])
if 'images' in figure_json[figure_id]:
f.images = [
Image(image, local_path=self.get_local_image_path(image), remote_path=self.get_remote_image_path(image))
for image in figure_json[figure_id]['images']
]
webform_nid = webform_json.keys()[0]
f = Figure(webform_json[webform_nid]['figure'][0])
if 'images' in webform_json[webform_nid]:
# f.images = [
# Image(image, local_path=self.get_local_image_path(image), remote_path=self.get_remote_image_path(image))
# for image in webform_json[webform_nid]['images']
# ]
for img_idx, image in enumerate(webform_json[webform_nid]['images']):
image_obj = Image(image, local_path=self.get_local_image_path(image),
remote_path=self.get_remote_image_path(image))
#TODO: this just keeps getting worse
if 'datasources' in webform_json[webform_nid]['images'][img_idx]:
for dataset_json in webform_json[webform_nid]['images'][img_idx]['datasources']:
dataset = Dataset(dataset_json)
#Commence the hacks
dataset.temporal_extent = ' '.join(
[dataset_json[field] for field in ['start_time', 'end_time']])
dataset.spatial_extent = ' '.join(['{k}: {v};'.format(k=key, v=dataset_json[key]) for key in
['maximum_latitude', 'minimum_latitude', 'maximum_longitude',
'minimum_longitude']])
#TODO: Extract DOIs from citation
image_obj.datasets.append(dataset)
f.images.append(image_obj)
return f
def get_remote_image_path(self, image_json):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment