Commit 2a4c78e5 authored by abuddenberg's avatar abuddenberg
Browse files

Another merge

parent 4a44bb43
......@@ -246,7 +246,7 @@ sync_metadata_tree = {
('oceans-marine-resources', [
('/metadata/figures/3106', 'observed-ocean-warming'), # 24.1
('/metadata/figures/2560', 'ocean-acidification-reduces-size-of-clams'), #24.3
('/metadata/figures/2853', 'warming-seas-are-a-doubleblow-to-corals') # 24.4
('/metadata/figures/2853', 'warming-seas-are-a-doubleblow-to-corals'), # 24.4
('/metadata/figures/2854', 'fisheries-shifting-north') # 24.5
]),
#Chapter 25
......
......@@ -14,7 +14,7 @@ def check_image(fn):
# if len(args) < 1 or not isinstance(args[0], Image):
# raise Exception('Invalid Image')
if args[1].identifier in (None, ''):
raise Exception('Invalid identifier', args[0].identifier)
raise Exception('Invalid identifier', args[1].identifier)
return fn(*args, **kwargs)
return wrapped
......@@ -117,6 +117,11 @@ class GcisClient(object):
)
resp = self.s.post(url, data=figure.as_json(), verify=False)
if resp.status_code != 200:
return resp
if figure.local_path is not None:
self.upload_figure_file(report_id, chapter_id, figure.identifier, figure.local_path)
if skip_images is False:
for image in figure.images:
......@@ -160,10 +165,24 @@ class GcisClient(object):
url = '{b}/report/{rpt}/figure/{fig}'.format(b=self.base_url, rpt=report_id, fig=figure_id)
return self.s.delete(url, verify=False)
@http_resp
def upload_figure_file(self, report_id, chapter_id, figure_id, local_path):
# report/usgcrp-climate-and-health-assessment-draft/chapter/climate-change-and-human-health/figure/files/emissions_levels_determine_temperature_rises/
url = '{b}/report/{rpt}/chapter/{chp}/figure/files/{id}/{fn}'.format(b=self.base_url, rpt=report_id, chp=chapter_id, id=figure_id, fn=basename(local_path))
# For future multi-part encoding support
# return self.s.put(url, headers=headers, files={'file': (filename, open(filepath, 'rb'))})
if not os.path.exists(local_path):
raise Exception('File not found: ' + local_path)
return self.s.put(url, data=open(local_path, 'rb'), verify=False)
@check_image
def create_image(self, image, report_id=None, figure_id=None):
url = '{b}/image/'.format(b=self.base_url)
resp = self.s.post(url, data=image.as_json(), verify=False)
print image.as_json()
if resp.status_code != 200:
return resp
if image.local_path is not None:
self.upload_image_file(image.identifier, image.local_path)
......@@ -552,9 +571,13 @@ class GcisClient(object):
def associate_contributor_with_figure(self, contrib, report_id, chapter_id, figure_id):
url = '{b}/report/{rpt}/chapter/{chp}/figure/contributors/{fig}'.format(b=self.base_url, rpt=report_id, chp=chapter_id, fig=figure_id)
data = {
'role': contrib.role.type_id,
}
try:
data = {
'role': contrib.role.type_id,
}
except AttributeError as e:
print 'Contributor {c} missing role'.format(c=contrib)
raise e
if contrib.person is not None and contrib.person.id is not None:
data['person_id'] = contrib.person.id
......
......@@ -5,7 +5,8 @@ import requests
import re
from os.path import join, basename
from gcis_clients.domain import Figure, Image, Dataset, Parent
from gcis_clients.domain import Figure, Image, Dataset, Parent, Contributor, Person, Organization
import survey_transforms as trans
def get_credentials():
......@@ -32,10 +33,10 @@ def populate_figure(fig_json):
try:
f.figure_num, f.title = parse_title(fig_json['graphics_title'])
f.identifier = fig_json['figure_id'] if fig_json['figure_id'] else re.sub('\W', '_', f.title).lower()
f.create_dt = fig_json['graphics_create_date']
f.time_start, f.time_end = fig_json['period_record']
f.create_dt = fig_json['graphics_create_date'].strip()
if any(fig_json['period_record']):
f.time_start, f.time_end = [d.strip() for d in fig_json['period_record']]
f.lat_min, f.lat_max, f.lon_min, f.lon_max = fig_json['spatial_extent']
f.remote_path = fig_json['filepath']
except Exception, e:
print 'Figure exception: ', e
......@@ -46,9 +47,10 @@ def populate_image(img_json):
img = Image({})
try:
img.title = img_json['graphics_title']
img.identifier = img_json['image_id'] if img_json['image_id'] else re.sub('\W', '_', img.title).lower()
img.create_dt = img_json['graphics_create_date']
img.time_start, img.time_end = img_json['period_record']
img.identifier = img_json['image_id'] if 'image_id' in img_json and img_json['image_id'] else re.sub('\W', '_', img.title).lower()
img.create_dt = img_json['graphics_create_date'].strip()
if any(img_json['period_record']):
img.time_start, img.time_end = [d.strip() for d in img_json['period_record']]
img.lat_min, img.lat_max, img.lon_min, img.lon_max = img_json['spatial_extent']
except Exception, e:
print 'Image exception: ', e
......@@ -73,18 +75,32 @@ def populate_dataset(ds_json):
def populate_parent(pub_json):
p = Parent({})
try:
p.publication_type_identifier = pub_json['publicationType'].lower
p.label = pub_json[''] #title or whatever TODO: add a map for each publication to its title or name
p = Parent(pub_json, trans=trans.PARENT_TRANSLATIONS, pubtype_map=trans.PARENT_PUBTYPE_MAP)
p.url = ''
except Exception, e:
print 'Exception: ', e
print 'Parent exception: ', e
return p
def populate_contributors(field):
s = field.split(',')
name, rest = s[0], s[1:]
name_split = name.split()
first_name, last_name = name_split[0], name_split[-1]
org_name = rest[0] if len(rest) > 0 else None
contributor = Contributor({}, hints=trans.CONTRIB_ROLES)
contributor.person = Person({'first_name': first_name, 'last_name': last_name})
contributor.organization = Organization({'name': org_name}, known_ids=trans.ORG_IDS)
return contributor
class SurveyClient:
def __init__(self, url, token, local_download_dir='.'):
def __init__(self, url, token, local_download_dir=None):
self.base_url = url
#If token was not provided, obtain it
......@@ -93,7 +109,11 @@ class SurveyClient:
self.token = token
self.local_download_dir = local_download_dir
if local_download_dir:
self.local_download_dir = local_download_dir
else:
from gcis_clients import default_image_dir
self.local_download_dir = default_image_dir()
def get_list(self):
url = '{b}/metadata/list?token={t}'.format(b=self.base_url, t=self.token)
......@@ -108,10 +128,15 @@ class SurveyClient:
f = None
if 'figure' in tier1_json:
figure_json = tier1_json['figure']
#It's not worth trying to translations on this data; it's too different
f = populate_figure(tier1_json['figure'])
f = populate_figure(figure_json)
f.remote_path = survey_json[0]['filepath']
f.local_path = join(self.local_download_dir, basename(f.remote_path)) if f.remote_path else None
if 'origination' in figure_json and figure_json['origination'] not in ('Original',) and 'publication' in figure_json:
f.parents.append(populate_parent(figure_json['publication']))
if 'images' in tier1_json:
images = [populate_image(img) for img in tier1_json['images']]
f.images.extend(images)
......@@ -130,8 +155,8 @@ class SurveyClient:
except Exception, e:
print 'Association exception: ', e
if 'origination' in tier1_json and tier1_json['origination'] not in ('Original',):
f.parents.append(populate_parent(tier1_json['publication']))
if 'poc' in tier1_json:
f.add_contributor(populate_contributors(tier1_json['poc']))
if do_download:
self.download_figure(f)
......@@ -140,7 +165,6 @@ class SurveyClient:
def download_figure(self, figure):
url = '{b}/{path}?token={t}'.format(b=self.base_url, path=figure.remote_path, t=self.token)
print url
resp = requests.get(url, stream=True)
if resp.status_code == 200:
......
__author__ = 'abuddenberg'
PARENT_TRANSLATIONS = {
'publicationType': 'publication_type_identifier',
'report_name': 'label',
'journal_article_title': 'label',
'book_title': 'label',
'book_section_title': 'label',
'conference_proceeding_title': 'label',
'electronic_article_title': 'label',
'newspaper_article_title': 'label',
'web_page_title': 'label'
}
PARENT_PUBTYPE_MAP = {
'Book': 'book',
'Book Section': 'report',
'Conference Proceedings': 'generic',
'Electronic Article': 'article',
'Journal Article': 'article',
'Newspaper Article': 'article',
'Report': 'report',
'Web Page': 'webpage'
}
PARENT_SEARCH_HINTS = {}
ORG_IDS = {
'NOAA NCDC/CICS-NC': 'cooperative-institute-climate-satellites-nc',
'NCDC/CICS-NC': 'cooperative-institute-climate-satellites-nc',
'NOAA NCDC/CICS NC': 'cooperative-institute-climate-satellites-nc',
'NESDIS/NCDC': 'national-climatic-data-center',
'NCDC': 'national-climatic-data-center',
'U.S. Forest Service': 'us-forest-service',
'NOAA Pacific Marine Environmental Laboratory': 'pacific-marine-environmental-laboratory',
'Jet Propulsion Laboratory': 'jet-propulsion-laboratory',
'HGS Consulting': 'hgs-consulting-llc',
'University of Virginia': 'university-virginia',
'Miami-Dade Dept. of Regulatory and Economic Resources': 'miami-dade-dept-regulatory-economic-resources',
'Nansen Environmental and Remote Sensing Center': 'nansen-environmental-and-remote-sensing-center',
'University of Illinois at Urbana-Champaign': 'university-illinois',
'USGCRP': 'us-global-change-research-program',
'National Park Service': 'national-park-service',
'Institute of the Environment': 'university-arizona',
'USGS': 'us-geological-survey',
'University of Puerto Rico': 'university-puerto-rico',
'University of Alaska': 'university-alaska',
'U.S. Department of Agriculture': 'us-department-agriculture',
'Kiksapa Consulting': 'kiksapa-consulting-llc',
'Centers for Disease Control and Prevention': 'centers-disease-control-and-prevention',
'Pacific Northwest Laboratories': 'pacific-northwest-national-laboratory',
'Susanne Moser Research & Consulting': 'susanne-moser-research-consulting',
'NEMAC': 'national-environmental-modeling-analysis-center',
'LBNL': 'lawrence-berkeley-national-laboratory',
'Texas Tech University': 'texas-tech-university'
}
CONTRIB_ROLES = {
'Kenneth Kunkel': 'scientist',
'Xungang Yin': 'scientist',
'Nina Bednarsek': 'scientist',
'Henry Schwartz': 'scientist',
'Jessicca Griffin': 'graphic_artist',
'James Youtz': 'scientist',
'Chris Fenimore': 'scientist',
'Deb Misch': 'graphic_artist',
'James Galloway': 'scientist',
'Laura Stevens': 'scientist',
'Nichole Hefty': 'point_of_contact',
'Mike Squires': 'scientist',
'Peter Thorne': 'scientist',
'Donald Wuebbles': 'scientist',
'Felix Landerer': 'scientist',
'David Wuertz': 'scientist',
'Russell Vose': 'scientist',
'Gregg Garfin': 'scientist',
'Jeremy Littell': 'scientist',
'Emily Cloyd': 'contributing_author',
'F. Chapin': 'scientist',
' Chapin': 'scientist',
'Andrew Buddenberg': 'analyst',
'Jerry Hatfield': 'author',
'George Luber': 'lead_author',
'Kathy Hibbard': 'lead_author',
'Susanne Moser': 'convening_lead_author',
'Bull Bennett': 'convening_lead_author',
'Ernesto Weil': 'scientist',
'William Elder': 'scientist',
'Greg Dobson': 'analyst',
'Michael Wehner': 'scientist',
'Katharine Hayhoe': 'scientist'
}
......@@ -48,12 +48,11 @@ def realize_contributors(gcis_client, contributors):
print '\t\t', name_matches
if org.identifier in (None, '') and org.name not in (None, ''):
print 'No ID found for ' + org.name
#Check if we missed any organizations in our hardcoding...
if not all(map(lambda c: c.organization.identifier is not None, contributors)):
print contributors
print 'Missing organizations: ', contributors
def realize_parents(gcis_client, parents):
......
......@@ -2,8 +2,10 @@ __author__ = 'abuddenberg'
from gcis_clients import GcisClient, SurveyClient, survey_token, gcis_dev_auth
from gcis_clients.domain import Report, Chapter
from sync_utils import realize_parents, realize_contributors
from collections import OrderedDict
import pickle
......@@ -12,16 +14,12 @@ surveys = SurveyClient('https://healthresources.globalchange.gov', survey_token)
sync_metadata_tree = {
'usgcrp-climate-and-health-assessment-draft': OrderedDict([
# ('executive-summary', []),
('executive-summary', []),
('climate-change-and-human-health', [
('/metadata/figures/3623', 'emissions_levels_determine_temperature_rises'), #1.1 #climate-change-and-human-health
('/metadata/figures/3698', 'major_u_s__climate_trends'), #1.1 #climate-change-and-human-health
('/metadata/figures/3632', 'percent_changes_in_the_annual_number_of_extreme_precipitation_events_'), #1.2 #climate-change-and-human-health
('/metadata/figures/3759', 'the_shared_socioeconomic_pathways'), #1.2 #climate-change-and-human-health
('/metadata/figures/3635', 'projected_changes_in_temperature_and_precipitation_by_mid_century'), #1.3 #climate-change-and-human-health
('/metadata/figures/3726', 'example_spatial_resolution_of_climate_models'), #1.3 #climate-change-and-human-health
('/metadata/figures/3633', 'projected_changes_in_the_hottest_coldest_and_wettest_driest_day_of_the_year'), #1.4 #climate-change-and-human-health
('/metadata/figures/3638', 'sensitivity_analysis_of_differences_in_modeling_approaches'), #1.4 #climate-change-and-human-health
('/metadata/figures/3757', 'climate_change_and_health'), #1.5 #climate-change-and-human-health
]),
('temperature-related-death-and-illness', [
......@@ -37,19 +35,18 @@ sync_metadata_tree = {
('/metadata/figures/3650', 'ragweed_pollen_season_lengthens'), #3.4 #air-quality-impacts
]),
('vectorborne-diseases', [
('/metadata/figures/3753', '__summary_of_reported_case_counts_of_notifiable_vectorborne_and_zoonotic_diseases_in_the_united_states_'), #4.1 #vectorborne-diseases
('/metadata/figures/3807', 'climate_change_and_health__lyme_disease'), #4.1 #vectorborne-diseases
('/metadata/figures/3659', 'changes_in_lyme_disease_case_report_distribution'), #4.2 #vectorborne-diseases
('/metadata/figures/3658', 'life_cycle_of_blacklegged_ticks__ixodes_scapularis'), #4.3 #vectorborne-diseases
('/metadata/figures/3747', 'lyme_disease_onset_week_modeling_scenarios'), #4.4 #vectorborne-diseases
('/metadata/figures/3674', 'incidence_of_west_nile_neuroinvasive_disease_in_the_united_states'), #4.5 #vectorborne-diseases
('/metadata/figures/3675', 'climate_impacts_on_west_nile_virus_transmission'), #4.6 #vectorborne-diseases
('/metadata/figures/3675', 'climate_impacts_on_west_nile_virus_transmission'), #4.6 #vectorborne-diseases
]),
('water-related-illnesses', [
('/metadata/figures/3824', 'climate_change_and_health___vibrio'), #5.1 #water-related-illnesses
('/metadata/figures/3700', 'links_between_climate_change__water_quantity_and_quality__and_human_exposure_to_water_related_illness'), #5.2 #water-related-illnesses
('/metadata/figures/3671', 'locations_of_livestock_and_projections_of_heavy_precipitation'), #5.3 #water-related-illnesses
('/metadata/figures/3673', 'potential_routes_of_manure_borne_microbial_contaminants_to_ground_and_surface_water_supplies_'), #5.3 #water-related-illnesses
# ('/metadata/figures/3673', 'potential_routes_of_manure_borne_microbial_contaminants_to_ground_and_surface_water_supplies_'), #5.3 #water-related-illnesses UNUSED?
('/metadata/figures/3709', 'projections_of_vibrio_occurrence_and_abundance_in_chesapeake_bay'), #5.4 #water-related-illnesses
('/metadata/figures/3704', 'changes_in_suitable_coastal_vibrio_habitat_in_alaska'), #5.5 #water-related-illnesses
('/metadata/figures/3734', 'projected_changes_in_caribbean_gambierdiscus_species'), #5.6 #water-related-illnesses
......@@ -57,59 +54,98 @@ sync_metadata_tree = {
]),
('food-safety--nutrition--and-distribution', [
('/metadata/figures/3579', 'farm_to_table'), #6.1 #food-safety--nutrition--and-distribution
('/metadata/figures/3600', 'mycotoxin_in_corn'), #6.1 #food-safety--nutrition--and-distribution
# ('/metadata/figures/3600', 'mycotoxin_in_corn'), #6.1 #food-safety--nutrition--and-distribution BOX 1?
('/metadata/figures/3809', 'climate_change_and_health__salmonella'), #6.2 #food-safety--nutrition--and-distribution
('/metadata/figures/3748', 'seasonality_of_human_illnesses_associated_with_foodborne_pathogens'), #6.3 #food-safety--nutrition--and-distribution
('/metadata/figures/3688', 'effects_of_carbon_dioxide_on_protein_and_minerals'), #6.4 #food-safety--nutrition--and-distribution
('/metadata/figures/3597', 'mississippi_river_level_at_st__louis__missouri'), #6.5 #food-safety--nutrition--and-distribution
('/metadata/figures/3597', 'mississippi_river_level_at_st__louis__missouri'), #6.5 #food-safety--nutrition--and-distribution
]),
('extreme-weather', [
('/metadata/figures/3810', 'estimated_deaths_and_billion_dollar_losses_from_extreme_weather_events_in_the_u_s__2004_2013'), #7.1 #extreme-weather
('/metadata/figures/3772', 'trends_in_flood_magnitude'), #7.2 #extreme-weather
# ('/metadata/figures/3772', 'trends_in_flood_magnitude'), #7.2 #extreme-weather NOT USED
('/metadata/figures/3808', 'climate_change_and_health__flooding'), #7.2 #extreme-weather
('/metadata/figures/3760', 'hurricane_induced_flood_effects_in_eastern_and_central_united_states'), #7.3 #extreme-weather
('/metadata/figures/3760', 'hurricane_induced_flood_effects_in_eastern_and_central_united_states'), #7.3 #extreme-weather
]),
('mental-health-and-well-being', [
('/metadata/figures/3789', 'climate_change_and_mental_health'), #8.1 #mental-health-and-well-being
('/metadata/figures/3722', 'the_impact_of_climate_change_on_physical__mental__and_community_health'), #8.2 #mental-health-and-well-being
('/metadata/figures/3722', 'the_impact_of_climate_change_on_physical__mental__and_community_health'), #8.2 #mental-health-and-well-being
]),
('populations-of-concern', [
('/metadata/figures/3696', 'determinants_of_vulnerability'), #9.1 #populations-of-concern
('/metadata/figures/3694', 'social_determinants_of_health'), #9.2 #populations-of-concern
('/metadata/figures/3758', 'children_at_different_lifestages_experience_unique_vulnerabilities_to_climate_change'), #9.3 #populations-of-concern
('/metadata/figures/3714', 'mapping_social_vulnerability'), #9.4 #populations-of-concern
('/metadata/figures/3717', 'climate_and_health_vulnerability_case_study__heat_related_illness_in_georgia'), #9.5 #populations-of-concern
('/metadata/figures/3717', 'mapping_communities_vulnerable_to_heat_in_georgia'), #9.5 #populations-of-concern
]),
('appendix-1--technical-support-document', [])
('appendix-1--technical-support-document', [
('/metadata/figures/3623', 'emissions_levels_determine_temperature_rises'), #1.1 #climate-change-and-human-health
('/metadata/figures/3759', 'the_shared_socioeconomic_pathways'), #1.2 #climate-change-and-human-health
('/metadata/figures/3726', 'example_spatial_resolution_of_climate_models'), #1.3 #climate-change-and-human-health
('/metadata/figures/3638', 'sensitivity_analysis_of_differences_in_modeling_approaches'), #1.4 #climate-change-and-human-health
])
])
}
chapters = [c for c in sync_metadata_tree['usgcrp-climate-and-health-assessment-draft']]
def main():
regenerate_image_id_map()
image_id_map = pickle.load(open('image_id_cache.pk1', 'r'))
for report_id in sync_metadata_tree:
for chapter_id in sync_metadata_tree[report_id]:
for survey_url, figure_id in sync_metadata_tree[report_id][chapter_id]:
print survey_url
def chp_id(ch):
if ch:
return chapters[ch]
s = surveys.get_survey(survey_url, do_download=False)
# realized_list = []
# survey_list = surveys.get_list()
# for i, survey in enumerate(survey_list):
# url = survey['url']
# print 'Processing: {b}{url} ({i}/{total})'.format(b=surveys.base_url, url=url, i=i + 1, total=len(survey_list))
#
# s = surveys.get_survey(url)
# if s:
# print s.identifier
# print chp_id(s.chapter), s.figure_num, s.title
# realized_list.append((chp_id(s.chapter), s.figure_num, s.identifier, s.title, url))
# print ''
#
# with open('chapterlist.pkl', 'w') as f:
# pickle.dump(realized_list, f)
# realize_parents(gcis, s.parents)
# realize_contributors(gcis, s.contributors)
# list = pickle.load(open('chapterlist.pkl'))
# for x in sorted(list, key=lambda x: x[1]):
# print (x[4], x[2]), ',', '#{num} #{c}'.format(num=x[1], c=x[0])
for i in s.images:
i.identifier = image_id_map[i.identifier]
i.datasets = []
print gcis.create_image(i, report_id=report_id, figure_id=figure_id)
def regenerate_image_id_map(existing=None):
from uuid import uuid4
image_id_map = existing if existing else {}
for report_id in sync_metadata_tree:
for chapter_id in sync_metadata_tree[report_id]:
for survey_url, figure_id in sync_metadata_tree[report_id][chapter_id]:
s = surveys.get_survey(survey_url, do_download=False)
for img in s.images:
if img.identifier in image_id_map:
print 'skipping: ', img.identifier
continue
else:
print 'added: ', img.identifier
image_id_map[img.identifier] = str(uuid4())
with open('image_id_cache.pk1', 'wb') as fout:
pickle.dump(image_id_map, fout)
print 'image_id_map regenerated'
def gen_survey_list():
realized_list = []
chapters = [c for c in sync_metadata_tree['usgcrp-climate-and-health-assessment-draft']]
survey_list = surveys.get_list()
for i, survey in enumerate(survey_list):
url = survey['url']
print 'Processing: {b}{url} ({i}/{total})'.format(b=surveys.base_url, url=url, i=i + 1, total=len(survey_list))
s = surveys.get_survey(url)
chp_id = chapters[s.chapter] if s and s.chapter else None
if s:
print s.identifier
print chp_id, s.figure_num, s.title
realized_list.append((chp_id, s.figure_num, s.identifier, s.title, url))
print ''
return realized_list
def create_health_report():
......@@ -145,4 +181,6 @@ def create_health_report():
ch.title = title
ch.report_identifier = hr.identifier
print gcis.create_chapter(hr.identifier, ch)
\ No newline at end of file
print gcis.create_chapter(hr.identifier, ch)
main()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment