problems 5.39 KB
Newer Older
1
#!/usr/bin/env python
2
__author__ = 'abuddenberg'
3
4

import pickle
5
6
from gcis_clients import GcisClient
from gcis_clients import WebformClient
abuddenberg's avatar
abuddenberg committed
7
from gcis_clients.sync_utils import move_images_to_gcis
8

9
webform_client = WebformClient('http://resources.assessment.globalchange.gov', 'mgTD63FAjG')
10
11

gcis_url = 'http://data.gcis-dev-front.joss.ucar.edu'
12
13
#gcis = GcisClient(gcis_url, 'andrew.buddenberg@noaa.gov', 'ad90c05b37d4128ae514bc6caa7a41911d2f1de353443a54')
gcis = GcisClient('http://data-stage.globalchange.gov', 'andrew.buddenberg@noaa.gov', 'b4f1458c3cf28248c982428c46e170019327bd4c533c23dd')
abuddenberg's avatar
abuddenberg committed
14

15
16
17
18

def main():
    hitlist_file = '../hitlist.pk1'

19
20
21
#    create_problem_list('nca3', hitlist_file)
#    print_problem_list(hitlist_file)
    solve_problems(hitlist_file, 'nca3')
22
#    print_ready_list(hitlist_file)
23

24
def solve_problems(path, report_id):
25
26
27
    problems = load_problem_list(path)

    for webform in problems:
28
29
30
31
32
        webform_id, fig_num, gcis_id = webform

        #Without a valid GCIS figure_id, nothing else can happen
        if 'figure_id_not_found' in problems[webform]:
            print 'Unable to resolve figure identifier: {fig}'.format(fig=problems[webform]['figure_id_not_found'])
33
            continue
34
35
36
37
38
39
40
41
42

        img_id_missing = set(problems[webform]['image_id_not_found']) if 'image_id_not_found' in problems[webform] else set()
        img_file_missing = set(problems[webform]['missing_image_files']) if 'missing_image_files' in problems[webform] else set()
        creates = img_id_missing - img_file_missing

        img_assoc_broken = set(problems[webform]['broken_image_assoc']) if 'broken_image_assoc' in problems[webform] else set()
        assocs = img_assoc_broken - creates

        if len(creates) > 0:
43
            move_images_to_gcis(webform_client, gcis, webform_id, gcis_id, report_id, subset_images=creates)
44
45
46

        for image_id in assocs:
            print 'Associating image: {i} with figure: {f}'.format(i=image_id, f=gcis_id)
47
            gcis.associate_image_with_figure(image_id, report_id, gcis_id)
48
49
50


def create_problem_list(report_id, path):
51
52
    with open(path, 'wb') as problem_file:
        pickle.dump(sort_webform_list(report_id), problem_file)
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72


def load_problem_list(path):
    all_forms, ready, problems = pickle.load(open(path, 'r'))
    return problems


def load_ready_list(path):
    all_forms, ready, problems = pickle.load(open(path, 'r'))
    return ready


def print_problem_list(path):
    problems = load_problem_list(path)
    for webform in problems:
        print webform
        for problem_type in problems[webform]:
            print '\t', problem_type, problems[webform][problem_type]


73
74
75
76
77
def print_ready_list(path):
    for x in sorted(load_ready_list(path), key=lambda t: float(t[1])):
        print x


78
79
80
81
def sort_webform_list(report_id):
    all_forms = []
    ready = []
    problems = {}
82
83
    
    global_image_ids = {}
84

85
    for item in webform_client.get_list():
86
        webform_url = item['url']
87
        f = webform_client.get_webform(webform_url)
88
89
90
        key = (webform_url, f.figure_num, f.identifier)

        all_forms.append(key)
91
92
93
94
95

        #Check the ready for publication flag
        if 'ready_for_publication' in f.original and f.original['ready_for_publication'] == 'yes':
            #Check if the figure exists in GCIS
            if not gcis.figure_exists(report_id, f.identifier):
96
97
                problems.setdefault(key, {}).setdefault('figure_id_not_found', []).append(
                    (f.identifier, f.figure_num, f.title))
98
99
            #Check if each image exists in GCIS
            for image in f.images:
100
101
102
                #Squirrel the image identifier away from later
                global_image_ids.setdefault(image.identifier, []).append(key)

103
                if not gcis.image_exists(image.identifier):
104
105
                    problems.setdefault(key, {}).setdefault('image_id_not_found', []).append(
                        image.identifier)
106
107
108
109

                #Check if each image's dataset exists in GCIS
                for dataset in image.datasets:
                    if not gcis.dataset_exists(dataset.identifier):
110
111
                        problems.setdefault(key, {}).setdefault('dataset_id_not_found',
                            []).append(dataset.identifier)
112
113

                #Check if the filename fields are filled out and correct for what's been uploaded
114
                if image.remote_path in (None, '') or not webform_client.remote_image_exists(image.remote_path):
115
116
                    problems.setdefault(key, {}).setdefault('missing_image_files', []).append(
                        image.identifier)
117
118

            #Check for broken image associations
119
120
121
122
123
            has_all_images, image_deltas = gcis.has_all_associated_images(report_id, f.identifier,
                                                                               [i.identifier for i in f.images])
            if not has_all_images and len(image_deltas) > 0:
                problems.setdefault(key, {}).setdefault('broken_image_assoc', []).extend(
                    image_deltas)
124

125
126
            if key not in problems:
                ready.append(key)
127

128
129
130
131
132
133
134
135
    #Last but not least, check if image UUIDs are actually unique
    for image_id, keys in global_image_ids.iteritems():
        if len(keys) > 1:

            for key in keys:
#                ready.pop(key, None)
                problems.setdefault(key, {}).setdefault('duplicate_image_id', []).append((image_id, key))

136
137
138
139
    return all_forms, ready, problems


if __name__ == '__main__':
140
    main()