Commit 76e97430 authored by abuddenberg's avatar abuddenberg
Browse files

Added function to categorize common problems with webform data

parent 61d86f25
......@@ -4,18 +4,21 @@ from webform_client import WebformClient
from gcis_client import GcisClient
from os.path import exists
import json
import pickle
# webform_dev = ('http://dev.nemac.org/asides10/metadata/figures/all?token=A2PNYxRuG9')
webform = WebformClient('http://resources.assessment.globalchange.gov', 'mgTD63FAjG')
gcis_url = 'http://data.gcis-dev-front.joss.ucar.edu'
# gcis = GcisClient(gcis_url, 'andrew.buddenberg@noaa.gov', '4cd31dc7173eb47b26f616fb07db607f25ab861552e81195')
gcis = GcisClient('http://data-stage.globalchange.gov', 'andrew.buddenberg@noaa.gov', 'ef427a895acf26d4f0b1f053ba7d922791b76f7852e7efee')
gcis = GcisClient(gcis_url, 'andrew.buddenberg@noaa.gov', '4cd31dc7173eb47b26f616fb07db607f25ab861552e81195')
# gcis = GcisClient('http://data-stage.globalchange.gov', 'andrew.buddenberg@noaa.gov', 'ef427a895acf26d4f0b1f053ba7d922791b76f7852e7efee')
global_report_name = 'nca3draft'
sync_metadata_tree = {
#Reports
'nca3draft': {
global_report_name: {
#Chapter 2
'our-changing-climate': [
#(webform_url, gcis_id)
......@@ -29,6 +32,10 @@ sync_metadata_tree = {
# ('/metadata/figures/3294', 'projected-changes-in-frostfree-season-length'),
# ('/metadata/figures/3305', 'variation-of-storm-frequency-and-intensity-during-the-cold-season-november--march') #incomplete
],
#Chapter 4
'energy-supply-and-use': [
# ('/metadata/figures/3292', 'cooling-degree-days')
],
#Chapter 6
'agriculture': [
# ('/metadata/figures/2872', 'drainage')
......@@ -53,55 +60,62 @@ sync_metadata_tree = {
],
#Climate Science Appendix
'appendix-climate-science': [
('/metadata/figures/3147', 'ice-loss-from-greenland-and-antarctica')
# ('/metadata/figures/3147', 'ice-loss-from-greenland-and-antarctica')
]
}
}
#These are artifacts from our collection efforts; largely duplicates
webform_skip_list = []
# webform_skip_list = []
def main():
# print_webform_list()
aggregate_datasets()
# sync(uploads=True)
pickle.dump(sort_webform_list(), open('../hitlist.pk1', 'wb'))
all_forms, ready, problems = pickle.load(open('../hitlist.pk1', 'r'))
print ready
# for ds in aggregate_datasets():
# gcis.update_dataset(ds)
# sync(uploads=False)
# f = webform.get_webform('/metadata/figures/3147').merge(gcis.get_figure('nca3draft', 'ice-loss-from-greenland-and-antarctica', chapter_id='appendix-climate-science'))
def aggregate_datasets():
dataset_set = {}
def sort_webform_list():
all_forms = []
ready = []
problems = {}
for item in webform.get_list():
webform_url = item['url']
f = webform.get_webform(webform_url)
all_forms.append(f)
#aggregate datasets
for image in f.images:
for dataset in image.datasets:
if dataset.identifier not in dataset_set:
dataset_set[dataset.identifier] = dataset
else:
dataset_set[dataset.identifier].merge(dataset)
for ds in dataset_set:
print gcis.create_dataset(ds)
#Check the ready for publication flag
if 'ready_for_publication' in f.original and f.original['ready_for_publication'] == 'yes':
#Check if the figure exists in GCIS
if not gcis.figure_exists(global_report_name, f.identifier):
problems.setdefault(webform_url, {}).setdefault('figure_id', []).append(f.identifier)
#Check if each image exists in GCIS
for image in f.images:
if not gcis.image_exists(image.identifier):
problems.setdefault(webform_url, {}).setdefault('image_id', []).append(image.identifier)
#Check if each image's dataset exists in GCIS
for dataset in image.datasets:
if not gcis.dataset_exists(dataset.identifier):
problems.setdefault(webform_url, {}).setdefault('dataset_id', []).append(dataset.identifier)
#Check if the filename fields are filled out and correct for what's been uploaded
if image.remote_path in (None, '') or not webform.remote_image_exists(image.remote_path):
problems.setdefault(webform_url, {}).setdefault('missing_image_files', []).append(image.identifier)
def print_webform_list():
for item in webform.get_list():
webform_url = item['url']
f = webform.get_webform(webform_url)
if webform_url not in problems:
ready.append((webform_url, f.identifier))
if 'ready_for_publication' in f.original and f.original['ready_for_publication'] == 'yes':
print webform_url, '***Ready For Publication***'
print f
else:
webform_skip_list.append(webform_url)
return all_forms, ready, problems
def sync(uploads=True):
......@@ -110,12 +124,9 @@ def sync(uploads=True):
for figure_ids in sync_metadata_tree[report_id][chapter_id]:
webform_url, gcis_id = figure_ids
if webform_url in webform_skip_list:
print 'Skipping: ' + webform_url
continue
if uploads:
print 'Attempting to upload: ' + gcis_id
sync_images(webform_url, gcis_id)
upload_images_to_gcis(webform_url, gcis_id, report_id)
print 'Attempting to sync: ' + gcis_id
sync_metadata(report_id, chapter_id, webform_url, gcis_id)
print 'Success!'
......@@ -129,16 +140,15 @@ def sync_metadata(report_id, chapter_id, webform_url, gcis_id):
#...then send it.
gcis.update_figure(report_id, chapter_id, figure_obj)
def sync_images(webform_url, gcis_id):
#This function is for adding images to existing figures
def upload_images_to_gcis(webform_url, gcis_id, report_id):
figure = webform.get_webform(webform_url)
#Now identifiers don't need to be matched
figure.identifier = gcis_id
webform.download_all_images(figure)
upload_images_to_gcis(figure)
def upload_images_to_gcis(figure, report_id='nca3draft'):
#Make sure we have all the images required for a COMPLETE update
for image in figure.images:
if not exists(image.local_path):
......@@ -147,8 +157,27 @@ def upload_images_to_gcis(figure, report_id='nca3draft'):
for resp in gcis.create_image(image, report_id=report_id, figure_id=figure.identifier):
print resp.status_code, resp.text
for dataset in image.datasets:
gcis.associate_dataset_with_image(dataset.identifier, report_id, figure.identifier)
# for dataset in image.datasets:
# gcis.associate_dataset_with_image(dataset.identifier, report_id, image.identifier)
def aggregate_datasets():
dataset_map = {}
for item in webform.get_list():
webform_url = item['url']
f = webform.get_webform(webform_url)
#aggregate datasets
for image in f.images:
for dataset in image.datasets:
if dataset.identifier not in dataset_map:
dataset_map[dataset.identifier] = dataset
else:
dataset_map[dataset.identifier].merge(dataset)
return dataset_map.values()
if __name__ == '__main__':
......
......@@ -94,7 +94,7 @@ class WebformClient:
def remote_image_exists(self, path):
url = '{b}{path}?token={t}'.format(b=self.base_url, path=path, t=self.token)
resp = requests.head(url)
print resp.status_code, resp.text
# print resp.status_code, resp.text
return True if resp.status_code == 200 else False
def download_image(self, image):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment