Commit 4d00ffdb authored by abuddenberg's avatar abuddenberg
Browse files

Refactoring sync; building separate "Solve Problems" module

parent 410227ff
__author__ = 'abuddenberg'
import pickle
from gcis_client import GcisClient
from webform_client import WebformClient
webform = WebformClient('http://resources.assessment.globalchange.gov', 'mgTD63FAjG')
gcis_url = 'http://data.gcis-dev-front.joss.ucar.edu'
gcis = GcisClient(gcis_url, 'andrew.buddenberg@noaa.gov', 'd9fcfd947c1785ab1cd329a9920e05e5c5d3d7f35315f164')
def main():
hitlist_file = '../hitlist.pk1'
# create_problem_list('nca3', hitlist_file)
print_problem_list(hitlist_file)
solve_problems(hitlist_file)
def solve_problems(path):
problems = load_problem_list(path)
for webform in problems:
if problems[webform]['figure_id_not_found']:
print 'Unable to resolve figure identifier: ' + problems[webform]['figure_id_not_found']
continue
def create_problem_list(report_id, path):
pickle.dump(sort_webform_list(report_id), open(path, 'wb'))
def load_problem_list(path):
all_forms, ready, problems = pickle.load(open(path, 'r'))
return problems
def load_ready_list(path):
all_forms, ready, problems = pickle.load(open(path, 'r'))
return ready
def print_problem_list(path):
problems = load_problem_list(path)
for webform in problems:
print webform
for problem_type in problems[webform]:
print '\t', problem_type, problems[webform][problem_type]
def sort_webform_list(report_id):
all_forms = []
ready = []
problems = {}
for item in webform.get_list():
webform_url = item['url']
f = webform.get_webform(webform_url)
all_forms.append(f)
#Check the ready for publication flag
if 'ready_for_publication' in f.original and f.original['ready_for_publication'] == 'yes':
#Check if the figure exists in GCIS
if not gcis.figure_exists(report_id, f.identifier):
problems.setdefault(webform_url, {}).setdefault('figure_id_not_found', []).append((f.identifier, f.figure_num, f.title))
#Check if each image exists in GCIS
for image in f.images:
if not gcis.image_exists(image.identifier):
problems.setdefault(webform_url, {}).setdefault('image_id_not_found', []).append(image.identifier)
#Check if each image's dataset exists in GCIS
for dataset in image.datasets:
if not gcis.dataset_exists(dataset.identifier):
problems.setdefault(webform_url, {}).setdefault('dataset_id_not_found', []).append(dataset.identifier)
#Check if the filename fields are filled out and correct for what's been uploaded
if image.remote_path in (None, '') or not webform.remote_image_exists(image.remote_path):
problems.setdefault(webform_url, {}).setdefault('missing_image_files', []).append(image.identifier)
#Check for broken image associations
if not gcis.has_all_associated_images(report_id, f.identifier, f.images):
problems.setdefault(webform_url, {}).setdefault('broken_image_assoc', []).append(image.identifier)
if webform_url not in problems:
ready.append((webform_url, f.identifier))
return all_forms, ready, problems
if __name__ == '__main__':
main()
\ No newline at end of file
......@@ -11,52 +11,54 @@ import pickle
webform = WebformClient('http://resources.assessment.globalchange.gov', 'mgTD63FAjG')
gcis_url = 'http://data.gcis-dev-front.joss.ucar.edu'
gcis = GcisClient(gcis_url, 'andrew.buddenberg@noaa.gov', '4cd31dc7173eb47b26f616fb07db607f25ab861552e81195')
# gcis = GcisClient('http://data-stage.globalchange.gov', 'andrew.buddenberg@noaa.gov', 'ef427a895acf26d4f0b1f053ba7d922791b76f7852e7efee')
global_report_name = 'nca3draft'
gcis = GcisClient(gcis_url, 'andrew.buddenberg@noaa.gov', 'd9fcfd947c1785ab1cd329a9920e05e5c5d3d7f35315f164')
# gcis = GcisClient('http://data-stage.globalchange.gov', 'andrew.buddenberg@noaa.gov', 'a6efcc7cf39c55e9329a8b027e0817e3354bada65310d192')
sync_metadata_tree = {
#Reports
global_report_name: {
'nca3': {
#Chapter 2
'our-changing-climate': [
#(webform_url, gcis_id)
# ('/metadata/figures/2506', 'observed-change-in-very-heavy-precipitation'),
# ('/metadata/figures/2997', 'observed-change-in-very-heavy-precipitation-2'),
# ('/metadata/figures/2677', 'observed-us-precipitation-change'),
# ('/metadata/figures/3175', 'observed-us-temperature-change'),
# ('/metadata/figures/3074', 'ten-indicators-of-a-warming-world'),
# ('/metadata/figures/3170', 'global-temperature-and-carbon-dioxide'),
# ('/metadata/figures/3293', 'observed-increase-in-frostfree-season-length'),
# ('/metadata/figures/3294', 'projected-changes-in-frostfree-season-length'),
('/metadata/figures/2506', 'observed-change-in-very-heavy-precipitation'),
('/metadata/figures/2997', 'observed-change-in-very-heavy-precipitation-2'),
('/metadata/figures/2677', 'observed-us-precipitation-change'),
('/metadata/figures/3175', 'observed-us-temperature-change'),
('/metadata/figures/3074', 'ten-indicators-of-a-warming-world'),
('/metadata/figures/3170', 'global-temperature-and-carbon-dioxide'),
('/metadata/figures/3293', 'observed-increase-in-frostfree-season-length'),
('/metadata/figures/3294', 'projected-changes-in-frostfree-season-length'),
# ('/metadata/figures/3305', 'variation-of-storm-frequency-and-intensity-during-the-cold-season-november--march') #incomplete
],
#Chapter 4
'energy-supply-and-use': [
# ('/metadata/figures/3292', 'cooling-degree-days')
('/metadata/figures/3292', 'cooling-degree-days')
],
#Chapter 6
'agriculture': [
# ('/metadata/figures/2872', 'drainage')
# ('/metadata/figures/2691', 'variables-affecting-ag') #Needs images redone
('/metadata/figures/2872', 'drainage'),
('/metadata/figures/2691', 'variables-affecting-ag') #Needs images redone
],
#Chapter 9
'': [
# ('/metadata/figures/2896', 'heavy-downpours-disease') #Needs images redone
'human-health': [
('/metadata/figures/2896', 'heavy-downpours-disease') #Needs images redone
],
#Chapter 14
'rural': [
# ('/metadata/figures/3306', 'length-growing-season') #Needs images redone
('/metadata/figures/3306', 'length-growing-season') #Needs images redone
],
#Chapter 18
'': [
('/metadata/figures/2992', 'projected-midcentury-temperature-changes-in-the-midwest')
],
#Chapter 19
'great-plains': [
# ('/metadata/figures/2697', 'mean-annual-temp-and-precip') #Needs images redone
('/metadata/figures/2697', 'mean-annual-temp-and-precip') #Needs images redone
],
#Chapter 25
'coastal-zone': [
# ('/metadata/figures/2543', 'coastal-ecosystem-services')
('/metadata/figures/2543', 'coastal-ecosystem-services')
],
#Climate Science Appendix
'appendix-climate-science': [
......@@ -66,56 +68,9 @@ sync_metadata_tree = {
}
}
#These are artifacts from our collection efforts; largely duplicates
# webform_skip_list = []
def main():
pickle.dump(sort_webform_list(), open('../hitlist.pk1', 'wb'))
all_forms, ready, problems = pickle.load(open('../hitlist.pk1', 'r'))
print ready
# for ds in aggregate_datasets():
# gcis.update_dataset(ds)
# sync(uploads=False)
# f = webform.get_webform('/metadata/figures/3147').merge(gcis.get_figure('nca3draft', 'ice-loss-from-greenland-and-antarctica', chapter_id='appendix-climate-science'))
def sort_webform_list():
all_forms = []
ready = []
problems = {}
for item in webform.get_list():
webform_url = item['url']
f = webform.get_webform(webform_url)
all_forms.append(f)
#Check the ready for publication flag
if 'ready_for_publication' in f.original and f.original['ready_for_publication'] == 'yes':
#Check if the figure exists in GCIS
if not gcis.figure_exists(global_report_name, f.identifier):
problems.setdefault(webform_url, {}).setdefault('figure_id', []).append(f.identifier)
#Check if each image exists in GCIS
for image in f.images:
if not gcis.image_exists(image.identifier):
problems.setdefault(webform_url, {}).setdefault('image_id', []).append(image.identifier)
#Check if each image's dataset exists in GCIS
for dataset in image.datasets:
if not gcis.dataset_exists(dataset.identifier):
problems.setdefault(webform_url, {}).setdefault('dataset_id', []).append(dataset.identifier)
#Check if the filename fields are filled out and correct for what's been uploaded
if image.remote_path in (None, '') or not webform.remote_image_exists(image.remote_path):
problems.setdefault(webform_url, {}).setdefault('missing_image_files', []).append(image.identifier)
if webform_url not in problems:
ready.append((webform_url, f.identifier))
return all_forms, ready, problems
sync(uploads=False)
def sync(uploads=True):
......@@ -128,11 +83,11 @@ def sync(uploads=True):
print 'Attempting to upload: ' + gcis_id
upload_images_to_gcis(webform_url, gcis_id, report_id)
print 'Attempting to sync: ' + gcis_id
sync_metadata(report_id, chapter_id, webform_url, gcis_id)
sync_figure_metadata(report_id, chapter_id, webform_url, gcis_id)
print 'Success!'
def sync_metadata(report_id, chapter_id, webform_url, gcis_id):
def sync_figure_metadata(report_id, chapter_id, webform_url, gcis_id):
#Merge data from both systems into one object...
figure_obj = webform.get_webform(webform_url).merge(
gcis.get_figure(report_id, gcis_id, chapter_id=chapter_id)
......@@ -143,25 +98,30 @@ def sync_metadata(report_id, chapter_id, webform_url, gcis_id):
#This function is for adding images to existing figures
def upload_images_to_gcis(webform_url, gcis_id, report_id):
figure = webform.get_webform(webform_url)
figure = webform.get_webform(webform_url, download_images=True)
#Now identifiers don't need to be matched
figure.identifier = gcis_id
webform.download_all_images(figure)
#Make sure we have all the images required for a COMPLETE update
for image in figure.images:
if not exists(image.local_path):
raise Exception('Local file missing ' + image.local_path)
for image in figure.images:
for resp in gcis.create_image(image, report_id=report_id, figure_id=figure.identifier):
print resp.status_code, resp.text
if not gcis.image_exists(image.identifier):
gcis.create_image(image, report_id=report_id, figure_id=figure.identifier)
# for dataset in image.datasets:
# gcis.associate_dataset_with_image(dataset.identifier, report_id, image.identifier)
def sync_dataset_metadata(datasets):
for ds in datasets:
if gcis.dataset_exists(ds.identifier):
print 'Updating: {ds}'.format(ds=ds)
gcis.update_dataset(ds)
else:
print 'Creating: {ds}'.format(ds=ds)
gcis.create_dataset(ds)
def aggregate_datasets():
def aggregate_webform_datasets():
dataset_map = {}
for item in webform.get_list():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment