problems 5.9 KB
Newer Older
1
#!/usr/bin/env python
2
__author__ = 'abuddenberg'
3
4

import pickle
5
6
from gcis_clients import GcisClient
from gcis_clients import WebformClient
abuddenberg's avatar
abuddenberg committed
7
from gcis_clients.sync_utils import move_images_to_gcis
8

9
webform_client = WebformClient('http://resources.assessment.globalchange.gov', 'mgTD63FAjG')
10
11

gcis_url = 'http://data.gcis-dev-front.joss.ucar.edu'
12
13
#gcis = GcisClient(gcis_url, 'andrew.buddenberg@noaa.gov', 'ad90c05b37d4128ae514bc6caa7a41911d2f1de353443a54')
gcis = GcisClient('http://data-stage.globalchange.gov', 'andrew.buddenberg@noaa.gov', 'b4f1458c3cf28248c982428c46e170019327bd4c533c23dd')
abuddenberg's avatar
abuddenberg committed
14

15
16

def main():
17
    hitlist_file = 'hitlist.pk1'
18

19
20
21
#    create_problem_list('nca3', hitlist_file)
#    print_problem_list(hitlist_file)
    solve_problems(hitlist_file, 'nca3')
22
#    print_ready_list(hitlist_file)
23

24

25
def solve_problems(path, report_id):
26
27
28
    problems = load_problem_list(path)

    for webform in problems:
29
30
31
32
33
        webform_id, fig_num, gcis_id = webform

        #Without a valid GCIS figure_id, nothing else can happen
        if 'figure_id_not_found' in problems[webform]:
            print 'Unable to resolve figure identifier: {fig}'.format(fig=problems[webform]['figure_id_not_found'])
34
            continue
35
36
37
38
39
40
41
42
43

        img_id_missing = set(problems[webform]['image_id_not_found']) if 'image_id_not_found' in problems[webform] else set()
        img_file_missing = set(problems[webform]['missing_image_files']) if 'missing_image_files' in problems[webform] else set()
        creates = img_id_missing - img_file_missing

        img_assoc_broken = set(problems[webform]['broken_image_assoc']) if 'broken_image_assoc' in problems[webform] else set()
        assocs = img_assoc_broken - creates

        if len(creates) > 0:
44
            move_images_to_gcis(webform_client, gcis, webform_id, gcis_id, report_id, subset_images=creates)
45
46
47

        for image_id in assocs:
            print 'Associating image: {i} with figure: {f}'.format(i=image_id, f=gcis_id)
48
            gcis.associate_image_with_figure(image_id, report_id, gcis_id)
49
50
51


def create_problem_list(report_id, path):
52
53
    with open(path, 'wb') as problem_file:
        pickle.dump(sort_webform_list(report_id), problem_file)
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73


def load_problem_list(path):
    all_forms, ready, problems = pickle.load(open(path, 'r'))
    return problems


def load_ready_list(path):
    all_forms, ready, problems = pickle.load(open(path, 'r'))
    return ready


def print_problem_list(path):
    problems = load_problem_list(path)
    for webform in problems:
        print webform
        for problem_type in problems[webform]:
            print '\t', problem_type, problems[webform][problem_type]


74
75
76
77
78
def print_ready_list(path):
    for x in sorted(load_ready_list(path), key=lambda t: float(t[1])):
        print x


79
80
81
82
def sort_webform_list(report_id):
    all_forms = []
    ready = []
    problems = {}
83
84
    
    global_image_ids = {}
85

86
    for item in webform_client.get_list():
87
        webform_url = item['url']
88
        f = webform_client.get_webform(webform_url)
89
90
91
        key = (webform_url, f.figure_num, f.identifier)

        all_forms.append(key)
92
93
94
95
96

        #Check the ready for publication flag
        if 'ready_for_publication' in f.original and f.original['ready_for_publication'] == 'yes':
            #Check if the figure exists in GCIS
            if not gcis.figure_exists(report_id, f.identifier):
97
98
                problems.setdefault(key, {}).setdefault('figure_id_not_found', []).append(
                    (f.identifier, f.figure_num, f.title))
99
100
101
102
103
104

            #Check if organizations have been proper identified
            for cont in f.contributors:
                if cont.identifier is None:
                    problems.setdefault(key, {}).setdefault('org_id_not_found', []).append(cont)

105
            for image in f.images:
106
107
108
                #Squirrel the image identifier away from later
                global_image_ids.setdefault(image.identifier, []).append(key)

109
                #Check if each image exists in GCIS
110
                if not gcis.image_exists(image.identifier):
111
112
                    problems.setdefault(key, {}).setdefault('image_id_not_found', []).append(
                        image.identifier)
113
114
115
116

                #Check if each image's dataset exists in GCIS
                for dataset in image.datasets:
                    if not gcis.dataset_exists(dataset.identifier):
117
118
                        problems.setdefault(key, {}).setdefault('dataset_id_not_found',
                            []).append(dataset.identifier)
119
120

                #Check if the filename fields are filled out and correct for what's been uploaded
121
                if image.remote_path in (None, '') or not webform_client.remote_image_exists(image.remote_path):
122
123
                    problems.setdefault(key, {}).setdefault('missing_image_files', []).append(
                        image.identifier)
124

125
126
127
128
129
                #Check if organizations have been proper identified
                for cont in image.contributors:
                    if cont.identifier is None:
                        problems.setdefault(key, {}).setdefault('org_id_not_found', []).append(cont)

130
            #Check for broken image associations
131
132
133
134
135
            has_all_images, image_deltas = gcis.has_all_associated_images(report_id, f.identifier,
                                                                               [i.identifier for i in f.images])
            if not has_all_images and len(image_deltas) > 0:
                problems.setdefault(key, {}).setdefault('broken_image_assoc', []).extend(
                    image_deltas)
136

137
138
            if key not in problems:
                ready.append(key)
139

140
141
142
143
144
145
146
147
    #Last but not least, check if image UUIDs are actually unique
    for image_id, keys in global_image_ids.iteritems():
        if len(keys) > 1:

            for key in keys:
#                ready.pop(key, None)
                problems.setdefault(key, {}).setdefault('duplicate_image_id', []).append((image_id, key))

148
149
150
151
    return all_forms, ready, problems


if __name__ == '__main__':
152
    main()