Commit dc35d6f8 authored by abuddenberg's avatar abuddenberg
Browse files

It's too bad she won't live, but then again who does?

parent f37eca43
name = "Webform Data Server"
description = "Provides RESTful endpoint for serving out Webform submissions"
core = "6.x"
version = "6.x-1.0"
dependencies[] = "tokenauth"
\ No newline at end of file
<?php
/**
* Created by IntelliJ IDEA.
* User: abuddenberg
* Date: 8/22/13
* Time: 11:58 AM
* To change this template use File | Settings | File Templates.
*/
/**
* Implementation of hook_help().
*/
function formsrv_help($path, $arg) {
$output = '';
switch ($path) {
case "admin/help#formsrv":
$output = '<p>'. t("Provides RESTful endpoint for serving out Webform submissions") .'</p>';
break;
}
return $output;
}
/**
* Implementation of hook_menu().
*/
function formsrv_menu() {
// $items['metadata/figures/all'] = array(
// 'page callback' => 'formsrv_figure_metadata_json',
// 'type' => MENU_CALLBACK,
// 'access arguments' => array('access content'),
// );
$items['metadata/figures'] = array(
'page callback' => 'formsrv_figure_metadata_json',
'type' => MENU_CALLBACK,
'access arguments' => array('access content'),
);
$items['metadata/list'] = array(
'page callback' => 'formsrv_figure_list_json',
'type' => MENU_CALLBACK,
'access arguments' => array('access content'),
);
return $items;
}
/**
* Renders a list of figure node IDs with a basic descriptions in JSON.
*/
function formsrv_figure_list_json(){
$q = <<<END
#Query to give brief listing and description of figures in Drupal
select
trim(figures.chapter_image_nid) as chapter_image_nid,
max(
case c.form_key
when 'what_is_the_figure_id' then trim(d.data) else NULL
end
) as figure_id,
max(
case c.form_key
when 'what_is_the_chapter_and_figure_number' then trim(d.data) else NULL
end
) as figure_num,
max(
case c.form_key
when 'what_is_the_name_of_the_figure_as_listed_in_the_report' then trim(d.data) else NULL
end
) as figure_name
from {webform_submitted_data} d
join {webform_component} c on d.nid = c.nid and d.cid = c.cid
join (
select
d.nid,
d.sid,
d.data as chapter_image_nid
from {webform_submitted_data} d
join {webform_component} c on d.nid = c.nid and d.cid = c.cid
where d.nid = (select nid from {node} where title = 'Figure Metadata')
and c.form_key ='nid'
) figures on d.nid = figures.nid and d.sid = figures.sid
where c.form_key in ('what_is_the_figure_id', 'what_is_the_name_of_the_figure_as_listed_in_the_report', 'what_is_the_chapter_and_figure_number')
group by figures.chapter_image_nid
order by figures.chapter_image_nid
END;
$list = array();
$result = db_query($q);
while($row = $result->fetch_assoc()){
$list[] = array(
'chapter_image_nid' => $row['chapter_image_nid'],
'figure_id' => $row['figure_id'],
'figure_num' => $row['figure_num'],
'figure_name' => $row['figure_name'],
'url' => url('metadata/figures/'.$row['chapter_image_nid'])
);
}
drupal_set_header('Content-Type: application/json');
echo json_encode($list);
// drupal_json($list);
}
/**
* Creates and populates a tree-like data structure of figures and images with their related datasources.
* If the node ID of the figure from trh
* Renders this data structure as JSON.
*/
function formsrv_figure_metadata_json($image_nid_query = 'all'){
// drupal_json(node_load(1314));
$map = formsrv_get_chapter_image_map();
//Filter out figures we don't care about
if($image_nid_query != 'all'){
$map = array_intersect_key($map, array($image_nid_query => 1));
}
foreach($map as $chapter_image_nid => $figure){
foreach($figure as $section => $list_ids){
foreach($list_ids as $index => $ids){
$form_node_id = $ids['nid'];
$submission_id = $ids['sid'];
$map[$chapter_image_nid][$section][$index] = formsrv_get_form_data($form_node_id , $submission_id);
//So much for a functional design...
//Grab the path to the figure in Drupal from its node ID
if($section == 'figure'){
$chapter_image = node_load($chapter_image_nid);
$map[$chapter_image_nid][$section][$index]['path'] = url('system/files/'.$chapter_image->field_chapter_image_image[0]['filename']);
}
//Add datasource section to images; might need to expand this to figures as well
if($section == 'images'){
$map[$chapter_image_nid][$section][$index]['datasources'] = formsrv_get_datasources_for_image($submission_id);
}
}
}
}
drupal_set_header('Content-Type: application/json');
echo json_encode($map);
// drupal_json($map);
}
/**
* Create a basic tree-like structure of figures and images with their identifiers.
* We'll replace the identifiers with actual data from the database in another function.
*/
function formsrv_get_chapter_image_map(){
$q = <<<END
select
n.nid as form_nid,
n.title as section,
d.sid as form_submission_id,
trim(d.data) as chapter_image_nid
from (
select
nid,
case title
when 'Image Metadata' then 'images'
when 'Figure Metadata' then 'figure'
end as title
from {node}
where title in ('Image Metadata', 'Figure Metadata')
) n
join {webform_submitted_data} d on n.nid = d.nid
join {webform_component} c on n.nid = c.nid and d.cid = c.cid
where c.form_key = 'nid'
order by chapter_image_nid, d.sid
END;
$map = array();
$result = db_query($q);
while($row = $result->fetch_assoc()){
$map[$row['chapter_image_nid']][$row['section']][] = array('nid' => $row['form_nid'], 'sid' => $row['form_submission_id']);
}
return $map;
}
/**
* Returns a populated list of datasources for the given image.
*/
function formsrv_get_datasources_for_image($image_sid){
$q = <<<END
#Query to get datasource information for an image
select
d.nid,
d.sid
from webform_submitted_data d
join webform_component c on d.nid = c.nid and d.cid = c.cid
where d.nid = (select nid from node where title = 'Data Source Metadata')
and c.form_key = 'image_metadata_sid'
and d.data = %d;
END;
$datasources = array();
$result = db_query($q, $image_sid);
while($row = $result->fetch_assoc()){
$datasources[] = formsrv_get_form_data($row['nid'], $row['sid']);
}
return $datasources;
}
/**
* Returns a map of field-value pairs of webform submission data.
* Parameters:
* $form_node_id: the node ID of the webform
* $submission_id: the submission ID within the given webform to get
*/
function formsrv_get_form_data($form_node_id, $submission_id) {
$q = <<<END
select
# d.sid,
trim(c.form_key) as k,
# c.name,
trim(d.data) as v
from {webform_submitted_data} d
join {webform_component} c on d.nid = c.nid and d.cid = c.cid
where d.nid = %d
and d.sid = %d
order by c.pid, c.weight
END;
$form_data = array();
$result = db_query($q, $form_node_id, $submission_id);
while($row = $result->fetch_assoc()){
$form_data[$row['k']] = $row['v'];
}
return $form_data;
}
This diff is collapsed.
__author__ = 'abuddenberg'
from gcis_clients import Nca3Client, GcisClient, gcis_stage_auth, gcis_dev_auth
import pickle
import json
import requests
import bs4
from bs4 import BeautifulSoup
nca3_url = 'https://nca3.cicsnc.org'
nca3 = Nca3Client(nca3_url, 'andrew.buddenberg', 'Nz9O^00I', http_basic_user='nca3', http_basic_pass='avl-TSU')
gcis_url = 'https://data-stage.globalchange.gov'
gcis = GcisClient(gcis_url, *gcis_stage_auth)
# gcis = GcisClient('http://data.gcis-dev-front.joss.ucar.edu', *gcis_dev_auth)
def main():
print gcis.test_login()
for idx, list_item in enumerate([i for i in sorted(nca3.get_all_captions().json(), key=lambda f: f['Ordinal']) if i['Ordinal'] and i['Metadata URI'] and i['Caption']]):
ordinal = list_item['Ordinal']
gcis_fig_id = list_item['Metadata URI'].split('/')[-1]
stripped_caption = strip_tags(list_item['Caption']['value'])
fig = gcis.get_figure('nca3', gcis_fig_id)
fig.caption = stripped_caption
# print idx, list_item
# print stripped_caption
#Just to be safe...
fig.contributors = []
print gcis.update_figure('nca3', fig.chapter_identifier, fig, skip_images=True)
def strip_tags(caption):
soup = BeautifulSoup(caption)
for t in soup.find_all(name=lambda t: t.name not in ['tbib', 'sup', 'sub']):
t.unwrap()
return str(soup).strip()
def get_gcis_chapters():
gcis_all_chapters = requests.get('{b}/report/nca3/chapter.json'.format(b=gcis_url), params={'all': 1}, verify=False).json()
chapter_map = {c['identifier']: c for c in gcis_all_chapters}
with open('chapter_map.pk1', 'wb') as fout:
pickle.dump(chapter_map, fout)
return pickle.load(open('chapter_map.pk1'))
def get_all_gcis_figures():
gcis_all_figs = {}
for f in gcis.get_figure_listing('nca3'):
chapter_num = get_gcis_chapters()[f.chapter_identifier]['number']
# print f.chapter_identifier, chapter_num, f.ordinal
f.figure_num = '{0}.{1}'.format(chapter_num, f.ordinal)
gcis_all_figs[f.figure_num] = f
with open('fig_map.pk1', 'wb') as fout:
pickle.dump(gcis_all_figs, fout)
gcis_all_figs = pickle.load(open('fig_map.pk1'))
return gcis_all_figs
def populate_uris_in_drupal():
gcis_all_figs = get_all_gcis_figures()
for list_item in sorted(nca3.get_all_captions().json(), key=lambda f: f['Ordinal']):
nid = list_item['nid']
ordinal = list_item['Ordinal']
graphic_type = list_item['Graphic Type']
if ordinal and ordinal in gcis_all_figs and graphic_type == 'Figure':
print 'Found: ', graphic_type, ordinal, gcis_all_figs[ordinal].uri
# nca3_fig = nca3.get_figure(nid)
# print nca3_fig
uri_frag = {
'und': [
{
'value': gcis_all_figs[ordinal].uri[1:],
'format': None,
'safe_value': gcis_all_figs[ordinal].uri[1:]
}
]
}
# nca3_fig['field_metadata_uri'] = uri_frag
resp = nca3.update_figure(nid, {'field_metadata_uri': uri_frag})
print resp.status_code, resp.text
print ''
else:
print '***NO URI FOUND***', graphic_type, ordinal
main()
\ No newline at end of file
......@@ -16,42 +16,55 @@ import re
import traceback
gcis = GcisClient('https://data-stage.globalchange.gov', *gcis_stage_auth)
# gcis = GcisClient('https://data-stage.globalchange.gov', *gcis_stage_auth)
gcis = GcisClient('https://data-review.globalchange.gov', *gcis_stage_auth)
surveys = SurveyClient('https://state-resources.cicsnc.org', survey_token)
def main():
print(gcis.test_login())
for report_id in sync_metadata_tree:
for chapter_id in sync_metadata_tree[report_id]:
for survey_url, figure_id, figure_num in sync_metadata_tree[report_id][chapter_id]:
figure, datasets = surveys.get_survey(survey_url, do_download=True)
#Fix misspelling
figure.identifier = figure_id
figure.title = figure.title.replace('precipitaton', 'precipitation')
figure.ordinal = figure_num
print(survey_url)
print(figure, datasets)
realize_parents(gcis, figure.parents)
realize_contributors(gcis, figure.contributors)
print('Contributors: ', figure.contributors)
print('Parents: ', figure.parents)
# gcis_fig = gcis.get_figure(report_id, figure_id, chapter_id=chapter_id)
for ds in [p for p in figure.parents if p.publication_type_identifier == 'dataset']:
# Assign synthetic activity identifier to for datasets associated with figure
if ds.activity and ds.activity.identifier is None:
ds.activity.identifier = generate_activity_id(figure, ds.publication)
print('Dataset: ', ds.activity)
cssr = Report({
'identifier': 'climate-science-special-report',
'report_type_identifier': 'report',
'title': 'Climate Science Special Report',
# 'url': 'https://statesummaries.cicsnc.org/',
'publication_year': '2017',
'contact_email': ''
})
print('Creating figure... ', gcis.create_figure(report_id, chapter_id, figure, skip_images=True, skip_upload=False))
# print('Updating figure... ', gcis.update_figure(report_id, chapter_id, figure, skip_images=True))
print(gcis.create_report(cssr));
# for report_id in sync_metadata_tree:
# for chapter_id in sync_metadata_tree[report_id]:
# for survey_url, figure_id, figure_num in sync_metadata_tree[report_id][chapter_id]:
# figure, datasets = surveys.get_survey(survey_url, do_download=False)
#
# resp = gcis.post_figure_original(report_id, figure_id, figure.original, chapter_id=chapter_id)
# print(resp.status_code, resp.text)
#
# #Fix misspelling
# figure.identifier = figure_id
# figure.title = figure.title.replace('precipitaton', 'precipitation')
# figure.ordinal = figure_num
#
# print(survey_url)
# print(figure, datasets)
#
# realize_parents(gcis, figure.parents)
# realize_contributors(gcis, figure.contributors)
#
# print('Contributors: ', figure.contributors)
# print('Parents: ', figure.parents)
# # gcis_fig = gcis.get_figure(report_id, figure_id, chapter_id=chapter_id)
#
# for ds in [p for p in figure.parents if p.publication_type_identifier == 'dataset']:
# # Assign synthetic activity identifier to for datasets associated with figure
# if ds.activity and ds.activity.identifier is None:
# ds.activity.identifier = generate_activity_id(figure, ds.publication)
# print('Dataset: ', ds.activity)
#
# print('Creating figure... ', gcis.create_figure(report_id, chapter_id, figure, skip_images=True, skip_upload=False))
# # print('Updating figure... ', gcis.update_figure(report_id, chapter_id, figure, skip_images=True))
def generate_activity_id(image, dataset):
......
......@@ -2,15 +2,15 @@ __author__ = 'abuddenberg'
from gcis_clients import GcisClient, SurveyClient, survey_token, gcis_dev_auth, gcis_stage_auth
from gcis_clients.domain import Report, Chapter
from sync_utils import realize_parents, realize_contributors
# from sync_utils import realize_parents, realize_contributors
from collections import OrderedDict
import pickle
import sys
gcis = GcisClient('http://data.gcis-dev-front.joss.ucar.edu', *gcis_dev_auth)
# gcis = GcisClient('https://data-stage.globalchange.gov', *gcis_stage_auth)
# gcis = GcisClient('http://data.gcis-dev-front.joss.ucar.edu', *gcis_dev_auth)
gcis = GcisClient('https://data-stage.globalchange.gov', *gcis_stage_auth)
surveys = SurveyClient('https://healthresources.cicsnc.org', survey_token)
......@@ -30,7 +30,6 @@ sync_metadata_tree = {
('/metadata/figures/3837', 'es-farm-to-table'),
('/metadata/figures/3839', 'es-the-impact-of-climate-change-on-physical-mental-and-community-health'),
('/metadata/figures/3840', 'es-determinants-of-vulnerability')
]),
('climate-change-and-human-health', [
('/metadata/figures/3698', 'major-us-climate-trends'), #1.1 #climate-change-and-human-health
......@@ -115,57 +114,61 @@ def main():
for report_id in sync_metadata_tree:
for chapter_id in sync_metadata_tree[report_id]:
for survey_url, figure_id in sync_metadata_tree[report_id][chapter_id]:
gcis_fig = gcis.get_figure(report_id, figure_id, chapter_id=chapter_id)
print survey_url, gen_edit_link(survey_url)
figure, datasets = surveys.get_survey(survey_url, do_download=False)
#Override identifier
figure.identifier = figure_id
#Pull existing captions
if gcis.figure_exists(report_id, figure_id, chapter_id=chapter_id):
gcis_fig = gcis.get_figure(report_id, figure_id, chapter_id=chapter_id)
figure.caption = gcis_fig.caption
figure.files = gcis_fig.files
realize_parents(gcis, figure.parents)
realize_contributors(gcis, figure.contributors)
print 'Contributors: ', figure.contributors
print 'Parents: ', figure.parents
for ds in [p for p in figure.parents if p.publication_type_identifier == 'dataset']:
# Assign synthetic activity identifier to for datasets associated with figure
if ds.activity and ds.activity.identifier is None:
ds.activity.identifier = generate_activity_id(figure, ds.publication)
print 'Dataset: ', ds.activity
#Create the figure in GCIS
# print 'Creating figure... ', gcis.create_figure(report_id, chapter_id, figure, skip_images=True, skip_upload=False)
print 'Updating figure... ', gcis.update_figure(report_id, chapter_id, figure, skip_images=True)
# print 'Deleting old file', gcis.delete_file(figure.files[0])
# print 'Uploading...', gcis.upload_figure_file(report_id, chapter_id, figure_id, figure.local_path)
for i in figure.images:
i.identifier = image_id_map[(figure_id, i.identifier)]
print '\t', i
realize_parents(gcis, i.parents)
realize_contributors(gcis, i.contributors)
print '\t\tContributors: ', i.contributors
print '\t\tParents: ', i.parents
for ds in [p for p in i.parents if p.publication_type_identifier == 'dataset']:
# Assign synthetic activity identifier to for datasets associated with images
if ds.activity and ds.activity.identifier is None:
ds.activity.identifier = generate_activity_id(i, ds.publication)
print '\t\tDataset: ', ds, ds.activity
#Create image in GCIS
# print 'Creating image... ', gcis.create_image(i, report_id=report_id, figure_id=figure_id)
print 'Updating image... ', gcis.update_image(i)
resp = gcis.post_figure_original(report_id, figure_id, figure.original, chapter_id=chapter_id)
print(resp.status_code, resp.text)
# gcis_fig = gcis.get_figure(report_id, figure_id, chapter_id=chapter_id)
#
# print survey_url, gen_edit_link(survey_url)
#
# figure, datasets = surveys.get_survey(survey_url, do_download=False)
#
# #Override identifier
# figure.identifier = figure_id
#
# #Pull existing captions
# if gcis.figure_exists(report_id, figure_id, chapter_id=chapter_id):
# gcis_fig = gcis.get_figure(report_id, figure_id, chapter_id=chapter_id)
# figure.caption = gcis_fig.caption
# figure.files = gcis_fig.files
#
# realize_parents(gcis, figure.parents)
# realize_contributors(gcis, figure.contributors)
#
# print 'Contributors: ', figure.contributors
# print 'Parents: ', figure.parents
#
# for ds in [p for p in figure.parents if p.publication_type_identifier == 'dataset']:
# # Assign synthetic activity identifier to for datasets associated with figure
# if ds.activity and ds.activity.identifier is None:
# ds.activity.identifier = generate_activity_id(figure, ds.publication)
# print 'Dataset: ', ds.activity
#
# #Create the figure in GCIS
# # print 'Creating figure... ', gcis.create_figure(report_id, chapter_id, figure, skip_images=True, skip_upload=False)
# print 'Updating figure... ', gcis.update_figure(report_id, chapter_id, figure, skip_images=True)
# # print 'Deleting old file', gcis.delete_file(figure.files[0])
# # print 'Uploading...', gcis.upload_figure_file(report_id, chapter_id, figure_id, figure.local_path)
#
# for i in figure.images:
# i.identifier = image_id_map[(figure_id, i.identifier)]
# print '\t', i
#
# realize_parents(gcis, i.parents)
# realize_contributors(gcis, i.contributors)
#
# print '\t\tContributors: ', i.contributors
# print '\t\tParents: ', i.parents
# for ds in [p for p in i.parents if p.publication_type_identifier == 'dataset']:
# # Assign synthetic activity identifier to for datasets associated with images
# if ds.activity and ds.activity.identifier is None:
# ds.activity.identifier = generate_activity_id(i, ds.publication)
# print '\t\tDataset: ', ds, ds.activity
#
# #Create image in GCIS
# # print 'Creating image... ', gcis.create_image(i, report_id=report_id, figure_id=figure_id)
# print 'Updating image... ', gcis.update_image(i)
......
......@@ -4,6 +4,7 @@ from copy import deepcopy
import json
import re
import inspect
import traceback
from dateutil.parser import parse
......@@ -313,7 +314,14 @@ class Dataset(GcisObject):
def temporal_extent(self):
return self._temporal_extent
#Can't use property.setter due to multiple args
@temporal_extent.setter
def temporal_extent(self, value):
try:
self.set_temporal_extent(*value.split())
except AttributeError:
print('Unable to ')
#Can't use property.setter directly to multiple args
def set_temporal_extent(self, start_dt, end_dt):
try:
self._temporal_extent = '{0} {1}'.format(parse(start_dt).isoformat(), parse(end_dt).isoformat()) if start_dt and end_dt else None
......
......@@ -285,6 +285,29 @@ class GcisClient(object):
)
return self.s.head(url, verify=False)
def get_figure_original(self, report_id, figure_id, chapter_id=None):
chapter_filter = '/chapter/' + chapter_id if chapter_id else ''
url = '{b}/report/{rpt}{chap}/figure/{fig}/original.json'.format(
b=self.base_url, rpt=report_id, chap=chapter_filter, fig=figure_id
)
resp = self.s.get(url, params={'all': '1'}, verify=False)
try:
return resp.json()
<