from types import NoneType
__author__ = 'abuddenberg'
from copy import deepcopy
import json
from dateutil.parser import parse
class Gcisbase(object):
......@@ -88,7 +91,7 @@ class Figure(Gcisbase):
fig = int(fig)
except ValueError:
raise Exception('Invalid chapter/figure numbers: ' + value)
print 'Invalid chapter/figure numbers: ' + value
if self.chapter:
self.chapter.number = chp
......@@ -171,8 +174,40 @@ class Dataset(Gcisbase):
# Not sure'': 'type',
#This desperately needs to get added to the webform
_identifiers = {
'Global Historical Climatology Network - Daily': 'GHCN-D',
'Global Historical Climatology Network - Monthly': 'GHCN-M',
'NCDC Merged Land and Ocean Surface Temperature': 'MLOST',
'Climate Division Database Version 2': 'CDDv2',
'Eighth degree-CONUS Daily Downscaled Climate Projections by Katharine Hayhoe': 'CMIP3-Downscaled', #Problem
'Eighth degree-CONUS Daily Downscaled Climate Projections': 'CMIP3-Downscaled', #Problem
'Earth Policy Institute Atmospheric Carbon Dioxide Concentration, 1000-2012': 'EPI-CO2',
'Daily 1/8-degree gridded meteorological data [1 Jan 1949 - 31 Dec 2010]': 'Maurer',
'NCEP/NCAR Reanalysis': 'NCEP-NCAR',
'NCDC Global Surface Temperature Anomalies': 'NCDC-GST-Anomalies',
def __init__(self, data):
super(Dataset, self).__init__(data, fields=self._gcis_fields, trans=self._translations)
self.identifier = self._identifiers[] if in self._identifiers else
self.access_dt = parse(self.access_dt).isoformat() if self.access_dt else None
except TypeError:
# print "Problem with date: " + self.access_dt
self.access_dt = None
self.publication_dt = parse(self.publication_dt).isoformat() if self.publication_dt else None
except TypeError:
self.publication_dt = None
def __str__(self):
return 'Dataset: {id} {name}'.format(id=self.identifier,
return 'Dataset: {id} {name}'.format(id=self.identifier,
def as_json(self, indent=0):
#Exclude a couple of fields
out_fields = set(self._gcis_fields) - set(['files', 'parents', 'contributors', 'references', 'href', 'uri'])
return json.dumps({f: self.__dict__[f] for f in out_fields}, indent=indent)
......@@ -157,8 +157,9 @@ class GcisClient(object):
except ValueError:
raise Exception(resp.text())
def create_data(self, dataset):
def create_dataset(self, dataset):
url = '{b}/dataset/'.format(b=self.base_url)
print dataset.as_json(indent=4)
return, data=dataset.as_json(), headers=self.headers)
def update_dataset(self, dataset):
......@@ -56,21 +56,28 @@ sync_metadata_tree = {
#These are artifacts from our collection efforts; largely duplicates
webform_skip_list = []
dataset_identifiers = [
('Global Historical Climatology Network - Monthly', 'GHCN-M'),
def main():
# print_webform_list()
# sync(uploads=False)
f = webform.get_webform('/metadata/figures/2506')
for image in f.images:
print image
for dataset in image.datasets[0:1]:
print dataset
# f = webform.get_webform('/metadata/figures/2506')
def aggregate_datasets():
dataset_set = {}
for item in webform.get_list():
webform_url = item['url']
f = webform.get_webform(webform_url)
#aggregate datasets
for image in f.images:
for dataset in image.datasets:
dataset_set[dataset.identifier] = dataset
def print_webform_list():
