sync_surveys.py 14.3 KB
Newer Older
1
2
__author__ = 'abuddenberg'

3
from gcis_clients import GcisClient, SurveyClient, survey_token, gcis_dev_auth, gcis_stage_auth
4
from gcis_clients.domain import Report, Chapter
abuddenberg's avatar
abuddenberg committed
5
from sync_utils import realize_parents, realize_contributors
6
7

from collections import OrderedDict
abuddenberg's avatar
abuddenberg committed
8

9
import pickle
10
import sys
11
12

gcis = GcisClient('http://data.gcis-dev-front.joss.ucar.edu', *gcis_dev_auth)
13
14
# gcis = GcisClient('https://data-review.globalchange.gov', *gcis_stage_auth)

15
16
17
18
surveys = SurveyClient('https://healthresources.globalchange.gov', survey_token)

sync_metadata_tree = {
    'usgcrp-climate-and-health-assessment-draft': OrderedDict([
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
        ('executive-summary', []),
        ('climate-change-and-human-health', [
            ('/metadata/figures/3698', 'major_u_s__climate_trends'), #1.1 #climate-change-and-human-health
            ('/metadata/figures/3632', 'percent_changes_in_the_annual_number_of_extreme_precipitation_events_'), #1.2 #climate-change-and-human-health
            ('/metadata/figures/3635', 'projected_changes_in_temperature_and_precipitation_by_mid_century'), #1.3 #climate-change-and-human-health
            ('/metadata/figures/3633', 'projected_changes_in_the_hottest_coldest_and_wettest_driest_day_of_the_year'), #1.4 #climate-change-and-human-health
            ('/metadata/figures/3757', 'climate_change_and_health'), #1.5 #climate-change-and-human-health
        ]),
        ('temperature-related-death-and-illness', [
            ('/metadata/figures/3811', 'climate_change_and_health__extreme_heat'), #2.1 #temperature-related-death-and-illness
            ('/metadata/figures/3585', 'heat_related_deaths_during_the_1995_chicago_heat_wave'), #2.2 #temperature-related-death-and-illness
            ('/metadata/figures/3643', 'projected_net_changes_in_extreme_temperature_related_deaths'), #2.3 #temperature-related-death-and-illness
            ('/metadata/figures/3653', 'projected_changes_in_deaths_in_u_s__cities_by_season'), #2.4 #temperature-related-death-and-illness
        ]),
        ('air-quality-impacts', [
            ('/metadata/figures/3812', 'climate_change_and_health__outdoor_air_quality'), #3.1 #air-quality-impacts
            ('/metadata/figures/3647', 'projected_change_in_average_daily_maximum_temperature__seasonal_average_maximum_daily_8_hr_ozone__and_ozone_related_premature_deaths_in_2013'), #3.2 #air-quality-impacts
            ('/metadata/figures/3649', 'projected_change_in_ozone_related_premature_deaths'), #3.3 #air-quality-impacts
            ('/metadata/figures/3650', 'ragweed_pollen_season_lengthens'), #3.4 #air-quality-impacts
        ]),
        ('vectorborne-diseases', [
            ('/metadata/figures/3807', 'climate_change_and_health__lyme_disease'), #4.1 #vectorborne-diseases
            ('/metadata/figures/3659', 'changes_in_lyme_disease_case_report_distribution'), #4.2 #vectorborne-diseases
            ('/metadata/figures/3658', 'life_cycle_of_blacklegged_ticks__ixodes_scapularis'), #4.3 #vectorborne-diseases
            ('/metadata/figures/3747', 'lyme_disease_onset_week_modeling_scenarios'), #4.4 #vectorborne-diseases
            ('/metadata/figures/3674', 'incidence_of_west_nile_neuroinvasive_disease_in_the_united_states'), #4.5 #vectorborne-diseases
            ('/metadata/figures/3675', 'climate_impacts_on_west_nile_virus_transmission'), #4.6 #vectorborne-diseases
        ]),
        ('water-related-illnesses', [
            ('/metadata/figures/3824', 'climate_change_and_health___vibrio'), #5.1 #water-related-illnesses
            ('/metadata/figures/3700', 'links_between_climate_change__water_quantity_and_quality__and_human_exposure_to_water_related_illness'), #5.2 #water-related-illnesses  #TOO BIG
            ('/metadata/figures/3671', 'locations_of_livestock_and_projections_of_heavy_precipitation'), #5.3 #water-related-illnesses #TOO BIG
            ('/metadata/figures/3673', 'potential_routes_of_manure_borne_microbial_contaminants_to_ground_and_surface_water_supplies_'), #5.3 #water-related-illnesses UNUSED?
            ('/metadata/figures/3709', 'projections_of_vibrio_occurrence_and_abundance_in_chesapeake_bay'), #5.4 #water-related-illnesses
            ('/metadata/figures/3704', 'changes_in_suitable_coastal_vibrio_habitat_in_alaska'), #5.5 #water-related-illnesses
            ('/metadata/figures/3734', 'projected_changes_in_caribbean_gambierdiscus_species'), #5.6 #water-related-illnesses
            ('/metadata/figures/3712', 'projections_of_growth_of_alexandrium_fundyense_in_puget_sound'), #5.7 #water-related-illnesses
        ]),
        ('food-safety--nutrition--and-distribution', [
            ('/metadata/figures/3579', 'farm_to_table'), #6.1 #food-safety--nutrition--and-distribution
            ## ('/metadata/figures/3600', 'mycotoxin_in_corn'), #6.1 #food-safety--nutrition--and-distribution BOX 1?
            ('/metadata/figures/3809', 'climate_change_and_health__salmonella'), #6.2 #food-safety--nutrition--and-distribution
            ('/metadata/figures/3748', 'seasonality_of_human_illnesses_associated_with_foodborne_pathogens'), #6.3 #food-safety--nutrition--and-distribution
            ('/metadata/figures/3688', 'effects_of_carbon_dioxide_on_protein_and_minerals'), #6.4 #food-safety--nutrition--and-distribution
            ('/metadata/figures/3597', 'mississippi_river_level_at_st__louis__missouri'), #6.5 #food-safety--nutrition--and-distribution
        ]),
        ('extreme-weather', [
            ('/metadata/figures/3810', 'estimated_deaths_and_billion_dollar_losses_from_extreme_weather_events_in_the_u_s__2004_2013'), #7.1 #extreme-weather #Has Activities
            ## ('/metadata/figures/3772', 'trends_in_flood_magnitude'), #7.2 #extreme-weather NOT USED
            ('/metadata/figures/3808', 'climate_change_and_health__flooding'), #7.2 #extreme-weather
            ('/metadata/figures/3760', 'hurricane_induced_flood_effects_in_eastern_and_central_united_states'), #7.3 #extreme-weather
        ]),
        ('mental-health-and-well-being', [
            ('/metadata/figures/3789', 'climate_change_and_mental_health'), #8.1 #mental-health-and-well-being
            ('/metadata/figures/3722', 'the_impact_of_climate_change_on_physical__mental__and_community_health'), #8.2 #mental-health-and-well-being
        ]),
75
        ('populations-of-concern', [
76
            ('/metadata/figures/3696', 'determinants_of_vulnerability'), #9.1 #populations-of-concern
77
            ('/metadata/figures/3694', 'social_determinants_of_health'), #9.2 #populations-of-concern
78
79
80
            ('/metadata/figures/3758', 'children_at_different_lifestages_experience_unique_vulnerabilities_to_climate_change'), #9.3 #populations-of-concern
            ('/metadata/figures/3714', 'mapping_social_vulnerability'), #9.4 #populations-of-concern
            ('/metadata/figures/3717', 'mapping_communities_vulnerable_to_heat_in_georgia'), #9.5 #populations-of-concern
81
        ]),
82
83
84
85
86
87
        ('appendix-1--technical-support-document', [
            ('/metadata/figures/3623', 'emissions_levels_determine_temperature_rises'), #1.1 #climate-change-and-human-health
            ('/metadata/figures/3759', 'the_shared_socioeconomic_pathways'), #1.2 #climate-change-and-human-health
            ('/metadata/figures/3726', 'example_spatial_resolution_of_climate_models'), #1.3 #climate-change-and-human-health
            ('/metadata/figures/3638', 'sensitivity_analysis_of_differences_in_modeling_approaches'), #1.4 #climate-change-and-human-health
        ])
88
89
90
    ])
}

91

abuddenberg's avatar
abuddenberg committed
92
def main():
93
    print gcis.test_login()
94
    # regenerate_image_id_map()
abuddenberg's avatar
abuddenberg committed
95
    image_id_map = pickle.load(open('image_id_cache.pk1', 'r'))
96
    # create_health_report()
abuddenberg's avatar
abuddenberg committed
97
98
99
100

    for report_id in sync_metadata_tree:
        for chapter_id in sync_metadata_tree[report_id]:
            for survey_url, figure_id in sync_metadata_tree[report_id][chapter_id]:
101
                print survey_url, gen_edit_link(survey_url)
102

103
104
105
                figure, datasets = surveys.get_survey(survey_url, do_download=True)
                realize_parents(gcis, figure.parents)
                realize_contributors(gcis, figure.contributors)
106

107
108
                print 'Contributors: ', figure.contributors
                print 'Parents: ', figure.parents
109

110
111
112
113
114
                for ds in [p for p in figure.parents if p.publication_type_identifier == 'dataset']:
                    # Assign synthetic activity identifier to for datasets associated with figure
                    if ds.activity and ds.activity.identifier is None:
                        ds.activity.identifier = generate_activity_id(figure, ds.publication)
                    print 'Dataset: ', ds.activity
115

116
117
118
                #Create the figure in GCIS
                # print 'Creating figure... ', gcis.create_figure(report_id, chapter_id, figure, skip_images=True)
                print 'Updating figure... ', gcis.update_figure(report_id, chapter_id, figure, skip_images=True)
119

120
121
122
                for i in figure.images:
                    i.identifier = image_id_map[i.identifier]
                    print '\t', i
abuddenberg's avatar
abuddenberg committed
123

124
125
                    realize_parents(gcis, i.parents)
                    realize_contributors(gcis, i.contributors)
abuddenberg's avatar
abuddenberg committed
126

127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
                    print '\t\tContributors: ', i.contributors
                    print '\t\tParents: ', i.parents
                    for ds in [p for p in i.parents if p.publication_type_identifier == 'dataset']:
                        # Assign synthetic activity identifier to for datasets associated with images
                        if ds.activity and ds.activity.identifier is None:
                            ds.activity.identifier = generate_activity_id(i, ds.publication)
                        print '\t\tDataset: ', ds, ds.activity

                    #Create image in GCIS
                    # print 'Creating image... ', gcis.create_image(i, report_id=report_id, figure_id=figure_id)
                    print 'Updating image... ', gcis.update_image(i)


def gen_edit_link(survey_id):
    node_id = survey_id.split('/')[-1]
    return 'https://healthresources.globalchange.gov/node/' + node_id
143
144
145


def generate_activity_id(image, dataset):
146
147
148
149
    try:
        return '-'.join([image.identifier.split('-')[0], dataset.identifier, '-process'])
    except Exception, e:
        sys.stderr.write('WARNING: Activity identifier generation failed\n')
150

abuddenberg's avatar
abuddenberg committed
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
def regenerate_image_id_map(existing=None):
    from uuid import uuid4
    image_id_map = existing if existing else {}

    for report_id in sync_metadata_tree:
        for chapter_id in sync_metadata_tree[report_id]:
            for survey_url, figure_id in sync_metadata_tree[report_id][chapter_id]:
                s = surveys.get_survey(survey_url, do_download=False)
                for img in s.images:
                    if img.identifier in image_id_map:
                        print 'skipping: ', img.identifier
                        continue
                    else:
                        print 'added: ', img.identifier
                        image_id_map[img.identifier] = str(uuid4())

    with open('image_id_cache.pk1', 'wb') as fout:
        pickle.dump(image_id_map, fout)
    print 'image_id_map regenerated'


def gen_survey_list():
    realized_list = []
    chapters = [c for c in sync_metadata_tree['usgcrp-climate-and-health-assessment-draft']]

    survey_list = surveys.get_list()
    for i, survey in enumerate(survey_list):
        url = survey['url']
        print 'Processing: {b}{url} ({i}/{total})'.format(b=surveys.base_url, url=url, i=i + 1, total=len(survey_list))

        s = surveys.get_survey(url)
        chp_id = chapters[s.chapter] if s and s.chapter else None
        if s:
            print s.identifier
            print chp_id, s.figure_num, s.title

            realized_list.append((chp_id, s.figure_num, s.identifier, s.title, url))
        print ''
    return realized_list
190
191
192


def create_health_report():
193
194
195
196
197
198
199
200
    hr = Report({
        'identifier': 'usgcrp-climate-and-health-assessment-draft',
        'report_type_identifier': 'assessment',
        'title': 'Impacts of Climate Change on Human Health in the United States: A Scientific Assessment',
        'url': 'http://www.globalchange.gov/health-assessment',
        'publication_year': '2015',
        'contact_email': 'healthreport@usgcrp.gov'
    })
201
202
203
204
205
206

    # ['report_identifier', 'identifier', 'number', 'title', 'url']
    chapters = [
        ('executive-summary', None, 'Executive Summary'),
        ('climate-change-and-human-health', 1, 'Climate Change and Human Health'),
        ('temperature-related-death-and-illness', 2, 'Temperature-Related Death and Illness'),
207
        ('air-quality-impacts', 3, 'Air Quality Impacts'),
208
209
210
211
212
213
214
215
216
        ('vectorborne-diseases', 4, 'Vectorborne Diseases'),
        ('water-related-illnesses', 5, 'Climate Impacts on Water-Related Illnesses'),
        ('food-safety--nutrition--and-distribution', 6, 'Food Safety, Nutrition, and Distribution'),
        ('extreme-weather', 7, 'Impacts of Extreme Events on Human Health'),
        ('mental-health-and-well-being', 8, 'Mental Health and Well-Being'),
        ('populations-of-concern', 9, 'Climate-Health Risk Factors and Populations of Concern'),
        ('appendix-1--technical-support-document', None, 'Appendix 1: Technical Support Document')
    ]

217
    print gcis.create_report(hr)
218
219

    for id, num, title in chapters:
220
221
222
223
224
225
        ch = Chapter({
            'identifier': id,
            'number': num,
            'title': title,
            'report_identifier': hr.identifier
        })
226

abuddenberg's avatar
abuddenberg committed
227
228
        print gcis.create_chapter(hr.identifier, ch)

229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261

def create_cmip5_report():
    cmip = Report({
        'identifier': 'noaa-techreport-nesdis-144',
        'report_type_identifier': 'report',
        'title': 'Regional Surface Climate Conditions in CMIP3 and CMIP5 for the United States: Differences, Similarities, and Implications for the U.S. National Climate Assessment',
        'publication_year': '2015'
    })

    print gcis.create_report(cmip)

    chapters = [
        ('introduction', 1, 'Introduction'),
        ('data', 2, 'Data'),
        ('methods', 3, 'Methods'),
        ('temperature', 4, 'Temperature'),
        ('precipitation', 5, 'Precipitation'),
        ('summary', 6, 'Summary'),
        ('appendix', None, 'Appendix'),
        ('references', None, 'References'),
        ('acknowledgements', None, 'Acknowledgements'),
    ]

    for id, num, title in chapters:
        ch = Chapter({
            'identifier': id,
            'number': num,
            'title': title,
            'report_identifier': cmip.identifier
        })

        print gcis.create_chapter(cmip.identifier, ch)

abuddenberg's avatar
abuddenberg committed
262
main()