Commit 56bebe06 authored by abuddenberg's avatar abuddenberg
Browse files

Refactoring sync; building separate "Solve Problems" module

parent 73d5a8cf
__author__ = 'abuddenberg' __author__ = 'abuddenberg'
import pickle
from gcis_client import GcisClient
from webform_client import WebformClient
webform = WebformClient('http://resources.assessment.globalchange.gov', 'mgTD63FAjG')
gcis_url = 'http://data.gcis-dev-front.joss.ucar.edu'
gcis = GcisClient(gcis_url, 'andrew.buddenberg@noaa.gov', 'd9fcfd947c1785ab1cd329a9920e05e5c5d3d7f35315f164')
def main():
hitlist_file = '../hitlist.pk1'
# create_problem_list('nca3', hitlist_file)
print_problem_list(hitlist_file)
solve_problems(hitlist_file)
def solve_problems(path):
problems = load_problem_list(path)
for webform in problems:
if problems[webform]['figure_id_not_found']:
print 'Unable to resolve figure identifier: ' + problems[webform]['figure_id_not_found']
continue
def create_problem_list(report_id, path):
pickle.dump(sort_webform_list(report_id), open(path, 'wb'))
def load_problem_list(path):
all_forms, ready, problems = pickle.load(open(path, 'r'))
return problems
def load_ready_list(path):
all_forms, ready, problems = pickle.load(open(path, 'r'))
return ready
def print_problem_list(path):
problems = load_problem_list(path)
for webform in problems:
print webform
for problem_type in problems[webform]:
print '\t', problem_type, problems[webform][problem_type]
def sort_webform_list(report_id):
all_forms = []
ready = []
problems = {}
for item in webform.get_list():
webform_url = item['url']
f = webform.get_webform(webform_url)
all_forms.append(f)
#Check the ready for publication flag
if 'ready_for_publication' in f.original and f.original['ready_for_publication'] == 'yes':
#Check if the figure exists in GCIS
if not gcis.figure_exists(report_id, f.identifier):
problems.setdefault(webform_url, {}).setdefault('figure_id_not_found', []).append((f.identifier, f.figure_num, f.title))
#Check if each image exists in GCIS
for image in f.images:
if not gcis.image_exists(image.identifier):
problems.setdefault(webform_url, {}).setdefault('image_id_not_found', []).append(image.identifier)
#Check if each image's dataset exists in GCIS
for dataset in image.datasets:
if not gcis.dataset_exists(dataset.identifier):
problems.setdefault(webform_url, {}).setdefault('dataset_id_not_found', []).append(dataset.identifier)
#Check if the filename fields are filled out and correct for what's been uploaded
if image.remote_path in (None, '') or not webform.remote_image_exists(image.remote_path):
problems.setdefault(webform_url, {}).setdefault('missing_image_files', []).append(image.identifier)
#Check for broken image associations
if not gcis.has_all_associated_images(report_id, f.identifier, f.images):
problems.setdefault(webform_url, {}).setdefault('broken_image_assoc', []).append(image.identifier)
if webform_url not in problems:
ready.append((webform_url, f.identifier))
return all_forms, ready, problems
if __name__ == '__main__':
main()
\ No newline at end of file
...@@ -11,52 +11,54 @@ import pickle ...@@ -11,52 +11,54 @@ import pickle
webform = WebformClient('http://resources.assessment.globalchange.gov', 'mgTD63FAjG') webform = WebformClient('http://resources.assessment.globalchange.gov', 'mgTD63FAjG')
gcis_url = 'http://data.gcis-dev-front.joss.ucar.edu' gcis_url = 'http://data.gcis-dev-front.joss.ucar.edu'
gcis = GcisClient(gcis_url, 'andrew.buddenberg@noaa.gov', '4cd31dc7173eb47b26f616fb07db607f25ab861552e81195') gcis = GcisClient(gcis_url, 'andrew.buddenberg@noaa.gov', 'd9fcfd947c1785ab1cd329a9920e05e5c5d3d7f35315f164')
# gcis = GcisClient('http://data-stage.globalchange.gov', 'andrew.buddenberg@noaa.gov', 'ef427a895acf26d4f0b1f053ba7d922791b76f7852e7efee') # gcis = GcisClient('http://data-stage.globalchange.gov', 'andrew.buddenberg@noaa.gov', 'a6efcc7cf39c55e9329a8b027e0817e3354bada65310d192')
global_report_name = 'nca3draft'
sync_metadata_tree = { sync_metadata_tree = {
#Reports #Reports
global_report_name: { 'nca3': {
#Chapter 2 #Chapter 2
'our-changing-climate': [ 'our-changing-climate': [
#(webform_url, gcis_id) #(webform_url, gcis_id)
# ('/metadata/figures/2506', 'observed-change-in-very-heavy-precipitation'), ('/metadata/figures/2506', 'observed-change-in-very-heavy-precipitation'),
# ('/metadata/figures/2997', 'observed-change-in-very-heavy-precipitation-2'), ('/metadata/figures/2997', 'observed-change-in-very-heavy-precipitation-2'),
# ('/metadata/figures/2677', 'observed-us-precipitation-change'), ('/metadata/figures/2677', 'observed-us-precipitation-change'),
# ('/metadata/figures/3175', 'observed-us-temperature-change'), ('/metadata/figures/3175', 'observed-us-temperature-change'),
# ('/metadata/figures/3074', 'ten-indicators-of-a-warming-world'), ('/metadata/figures/3074', 'ten-indicators-of-a-warming-world'),
# ('/metadata/figures/3170', 'global-temperature-and-carbon-dioxide'), ('/metadata/figures/3170', 'global-temperature-and-carbon-dioxide'),
# ('/metadata/figures/3293', 'observed-increase-in-frostfree-season-length'), ('/metadata/figures/3293', 'observed-increase-in-frostfree-season-length'),
# ('/metadata/figures/3294', 'projected-changes-in-frostfree-season-length'), ('/metadata/figures/3294', 'projected-changes-in-frostfree-season-length'),
# ('/metadata/figures/3305', 'variation-of-storm-frequency-and-intensity-during-the-cold-season-november--march') #incomplete # ('/metadata/figures/3305', 'variation-of-storm-frequency-and-intensity-during-the-cold-season-november--march') #incomplete
], ],
#Chapter 4 #Chapter 4
'energy-supply-and-use': [ 'energy-supply-and-use': [
# ('/metadata/figures/3292', 'cooling-degree-days') ('/metadata/figures/3292', 'cooling-degree-days')
], ],
#Chapter 6 #Chapter 6
'agriculture': [ 'agriculture': [
# ('/metadata/figures/2872', 'drainage') ('/metadata/figures/2872', 'drainage'),
# ('/metadata/figures/2691', 'variables-affecting-ag') #Needs images redone ('/metadata/figures/2691', 'variables-affecting-ag') #Needs images redone
], ],
#Chapter 9 #Chapter 9
'': [ 'human-health': [
# ('/metadata/figures/2896', 'heavy-downpours-disease') #Needs images redone ('/metadata/figures/2896', 'heavy-downpours-disease') #Needs images redone
], ],
#Chapter 14 #Chapter 14
'rural': [ 'rural': [
# ('/metadata/figures/3306', 'length-growing-season') #Needs images redone ('/metadata/figures/3306', 'length-growing-season') #Needs images redone
],
#Chapter 18
'': [
('/metadata/figures/2992', 'projected-midcentury-temperature-changes-in-the-midwest')
], ],
#Chapter 19 #Chapter 19
'great-plains': [ 'great-plains': [
# ('/metadata/figures/2697', 'mean-annual-temp-and-precip') #Needs images redone ('/metadata/figures/2697', 'mean-annual-temp-and-precip') #Needs images redone
], ],
#Chapter 25 #Chapter 25
'coastal-zone': [ 'coastal-zone': [
# ('/metadata/figures/2543', 'coastal-ecosystem-services') ('/metadata/figures/2543', 'coastal-ecosystem-services')
], ],
#Climate Science Appendix #Climate Science Appendix
'appendix-climate-science': [ 'appendix-climate-science': [
...@@ -66,56 +68,9 @@ sync_metadata_tree = { ...@@ -66,56 +68,9 @@ sync_metadata_tree = {
} }
} }
#These are artifacts from our collection efforts; largely duplicates
# webform_skip_list = []
def main(): def main():
pickle.dump(sort_webform_list(), open('../hitlist.pk1', 'wb')) sync(uploads=False)
all_forms, ready, problems = pickle.load(open('../hitlist.pk1', 'r'))
print ready
# for ds in aggregate_datasets():
# gcis.update_dataset(ds)
# sync(uploads=False)
# f = webform.get_webform('/metadata/figures/3147').merge(gcis.get_figure('nca3draft', 'ice-loss-from-greenland-and-antarctica', chapter_id='appendix-climate-science'))
def sort_webform_list():
all_forms = []
ready = []
problems = {}
for item in webform.get_list():
webform_url = item['url']
f = webform.get_webform(webform_url)
all_forms.append(f)
#Check the ready for publication flag
if 'ready_for_publication' in f.original and f.original['ready_for_publication'] == 'yes':
#Check if the figure exists in GCIS
if not gcis.figure_exists(global_report_name, f.identifier):
problems.setdefault(webform_url, {}).setdefault('figure_id', []).append(f.identifier)
#Check if each image exists in GCIS
for image in f.images:
if not gcis.image_exists(image.identifier):
problems.setdefault(webform_url, {}).setdefault('image_id', []).append(image.identifier)
#Check if each image's dataset exists in GCIS
for dataset in image.datasets:
if not gcis.dataset_exists(dataset.identifier):
problems.setdefault(webform_url, {}).setdefault('dataset_id', []).append(dataset.identifier)
#Check if the filename fields are filled out and correct for what's been uploaded
if image.remote_path in (None, '') or not webform.remote_image_exists(image.remote_path):
problems.setdefault(webform_url, {}).setdefault('missing_image_files', []).append(image.identifier)
if webform_url not in problems:
ready.append((webform_url, f.identifier))
return all_forms, ready, problems
def sync(uploads=True): def sync(uploads=True):
...@@ -128,11 +83,11 @@ def sync(uploads=True): ...@@ -128,11 +83,11 @@ def sync(uploads=True):
print 'Attempting to upload: ' + gcis_id print 'Attempting to upload: ' + gcis_id
upload_images_to_gcis(webform_url, gcis_id, report_id) upload_images_to_gcis(webform_url, gcis_id, report_id)
print 'Attempting to sync: ' + gcis_id print 'Attempting to sync: ' + gcis_id
sync_metadata(report_id, chapter_id, webform_url, gcis_id) sync_figure_metadata(report_id, chapter_id, webform_url, gcis_id)
print 'Success!' print 'Success!'
def sync_metadata(report_id, chapter_id, webform_url, gcis_id): def sync_figure_metadata(report_id, chapter_id, webform_url, gcis_id):
#Merge data from both systems into one object... #Merge data from both systems into one object...
figure_obj = webform.get_webform(webform_url).merge( figure_obj = webform.get_webform(webform_url).merge(
gcis.get_figure(report_id, gcis_id, chapter_id=chapter_id) gcis.get_figure(report_id, gcis_id, chapter_id=chapter_id)
...@@ -143,25 +98,30 @@ def sync_metadata(report_id, chapter_id, webform_url, gcis_id): ...@@ -143,25 +98,30 @@ def sync_metadata(report_id, chapter_id, webform_url, gcis_id):
#This function is for adding images to existing figures #This function is for adding images to existing figures
def upload_images_to_gcis(webform_url, gcis_id, report_id): def upload_images_to_gcis(webform_url, gcis_id, report_id):
figure = webform.get_webform(webform_url) figure = webform.get_webform(webform_url, download_images=True)
#Now identifiers don't need to be matched #Now identifiers don't need to be matched
figure.identifier = gcis_id figure.identifier = gcis_id
webform.download_all_images(figure)
#Make sure we have all the images required for a COMPLETE update #Make sure we have all the images required for a COMPLETE update
for image in figure.images: for image in figure.images:
if not exists(image.local_path): if not exists(image.local_path):
raise Exception('Local file missing ' + image.local_path) raise Exception('Local file missing ' + image.local_path)
for image in figure.images: for image in figure.images:
for resp in gcis.create_image(image, report_id=report_id, figure_id=figure.identifier): if not gcis.image_exists(image.identifier):
print resp.status_code, resp.text gcis.create_image(image, report_id=report_id, figure_id=figure.identifier)
# for dataset in image.datasets: def sync_dataset_metadata(datasets):
# gcis.associate_dataset_with_image(dataset.identifier, report_id, image.identifier) for ds in datasets:
if gcis.dataset_exists(ds.identifier):
print 'Updating: {ds}'.format(ds=ds)
gcis.update_dataset(ds)
else:
print 'Creating: {ds}'.format(ds=ds)
gcis.create_dataset(ds)
def aggregate_datasets(): def aggregate_webform_datasets():
dataset_map = {} dataset_map = {}
for item in webform.get_list(): for item in webform.get_list():
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment