Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Andrew Buddenberg
gcis-py-client
Commits
76e97430
Commit
76e97430
authored
Feb 27, 2014
by
abuddenberg
Browse files
Added function to categorize common problems with webform data
parent
61d86f25
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/sync_figures.py
View file @
76e97430
...
...
@@ -4,18 +4,21 @@ from webform_client import WebformClient
from
gcis_client
import
GcisClient
from
os.path
import
exists
import
json
import
pickle
# webform_dev = ('http://dev.nemac.org/asides10/metadata/figures/all?token=A2PNYxRuG9')
webform
=
WebformClient
(
'http://resources.assessment.globalchange.gov'
,
'mgTD63FAjG'
)
gcis_url
=
'http://data.gcis-dev-front.joss.ucar.edu'
# gcis = GcisClient(gcis_url, 'andrew.buddenberg@noaa.gov', '4cd31dc7173eb47b26f616fb07db607f25ab861552e81195')
gcis
=
GcisClient
(
'http://data-stage.globalchange.gov'
,
'andrew.buddenberg@noaa.gov'
,
'ef427a895acf26d4f0b1f053ba7d922791b76f7852e7efee'
)
gcis
=
GcisClient
(
gcis_url
,
'andrew.buddenberg@noaa.gov'
,
'4cd31dc7173eb47b26f616fb07db607f25ab861552e81195'
)
# gcis = GcisClient('http://data-stage.globalchange.gov', 'andrew.buddenberg@noaa.gov', 'ef427a895acf26d4f0b1f053ba7d922791b76f7852e7efee')
global_report_name
=
'nca3draft'
sync_metadata_tree
=
{
#Reports
'nca3draft'
:
{
global_report_name
:
{
#Chapter 2
'our-changing-climate'
:
[
#(webform_url, gcis_id)
...
...
@@ -29,6 +32,10 @@ sync_metadata_tree = {
# ('/metadata/figures/3294', 'projected-changes-in-frostfree-season-length'),
# ('/metadata/figures/3305', 'variation-of-storm-frequency-and-intensity-during-the-cold-season-november--march') #incomplete
],
#Chapter 4
'energy-supply-and-use'
:
[
# ('/metadata/figures/3292', 'cooling-degree-days')
],
#Chapter 6
'agriculture'
:
[
# ('/metadata/figures/2872', 'drainage')
...
...
@@ -53,55 +60,62 @@ sync_metadata_tree = {
],
#Climate Science Appendix
'appendix-climate-science'
:
[
(
'/metadata/figures/3147'
,
'ice-loss-from-greenland-and-antarctica'
)
#
('/metadata/figures/3147', 'ice-loss-from-greenland-and-antarctica')
]
}
}
#These are artifacts from our collection efforts; largely duplicates
webform_skip_list
=
[]
#
webform_skip_list = []
def
main
():
# print_webform_list()
aggregate_datasets
()
# sync(uploads=True)
pickle
.
dump
(
sort_webform_list
(),
open
(
'../hitlist.pk1'
,
'wb'
))
all_forms
,
ready
,
problems
=
pickle
.
load
(
open
(
'../hitlist.pk1'
,
'r'
))
print
ready
# for ds in aggregate_datasets():
# gcis.update_dataset(ds)
# sync(uploads=False)
# f = webform.get_webform('/metadata/figures/3147').merge(gcis.get_figure('nca3draft', 'ice-loss-from-greenland-and-antarctica', chapter_id='appendix-climate-science'))
def
aggregate_datasets
():
dataset_set
=
{}
def
sort_webform_list
():
all_forms
=
[]
ready
=
[]
problems
=
{}
for
item
in
webform
.
get_list
():
webform_url
=
item
[
'url'
]
f
=
webform
.
get_webform
(
webform_url
)
all_forms
.
append
(
f
)
#aggregate datasets
for
image
in
f
.
images
:
for
dataset
in
image
.
datasets
:
if
dataset
.
identifier
not
in
dataset_set
:
dataset_set
[
dataset
.
identifier
]
=
dataset
else
:
dataset_set
[
dataset
.
identifier
].
merge
(
dataset
)
for
ds
in
dataset_set
:
print
gcis
.
create_dataset
(
ds
)
#Check the ready for publication flag
if
'ready_for_publication'
in
f
.
original
and
f
.
original
[
'ready_for_publication'
]
==
'yes'
:
#Check if the figure exists in GCIS
if
not
gcis
.
figure_exists
(
global_report_name
,
f
.
identifier
):
problems
.
setdefault
(
webform_url
,
{}).
setdefault
(
'figure_id'
,
[]).
append
(
f
.
identifier
)
#Check if each image exists in GCIS
for
image
in
f
.
images
:
if
not
gcis
.
image_exists
(
image
.
identifier
):
problems
.
setdefault
(
webform_url
,
{}).
setdefault
(
'image_id'
,
[]).
append
(
image
.
identifier
)
#Check if each image's dataset exists in GCIS
for
dataset
in
image
.
datasets
:
if
not
gcis
.
dataset_exists
(
dataset
.
identifier
):
problems
.
setdefault
(
webform_url
,
{}).
setdefault
(
'dataset_id'
,
[]).
append
(
dataset
.
identifier
)
#Check if the filename fields are filled out and correct for what's been uploaded
if
image
.
remote_path
in
(
None
,
''
)
or
not
webform
.
remote_image_exists
(
image
.
remote_path
):
problems
.
setdefault
(
webform_url
,
{}).
setdefault
(
'missing_image_files'
,
[]).
append
(
image
.
identifier
)
def
print_webform_list
():
for
item
in
webform
.
get_list
():
webform_url
=
item
[
'url'
]
f
=
webform
.
get_webform
(
webform_url
)
if
webform_url
not
in
problems
:
ready
.
append
((
webform_url
,
f
.
identifier
))
if
'ready_for_publication'
in
f
.
original
and
f
.
original
[
'ready_for_publication'
]
==
'yes'
:
print
webform_url
,
'***Ready For Publication***'
print
f
else
:
webform_skip_list
.
append
(
webform_url
)
return
all_forms
,
ready
,
problems
def
sync
(
uploads
=
True
):
...
...
@@ -110,12 +124,9 @@ def sync(uploads=True):
for
figure_ids
in
sync_metadata_tree
[
report_id
][
chapter_id
]:
webform_url
,
gcis_id
=
figure_ids
if
webform_url
in
webform_skip_list
:
print
'Skipping: '
+
webform_url
continue
if
uploads
:
print
'Attempting to upload: '
+
gcis_id
sync
_images
(
webform_url
,
gcis_id
)
upload
_images
_to_gcis
(
webform_url
,
gcis_id
,
report_id
)
print
'Attempting to sync: '
+
gcis_id
sync_metadata
(
report_id
,
chapter_id
,
webform_url
,
gcis_id
)
print
'Success!'
...
...
@@ -129,16 +140,15 @@ def sync_metadata(report_id, chapter_id, webform_url, gcis_id):
#...then send it.
gcis
.
update_figure
(
report_id
,
chapter_id
,
figure_obj
)
def
sync_images
(
webform_url
,
gcis_id
):
#This function is for adding images to existing figures
def
upload_images_to_gcis
(
webform_url
,
gcis_id
,
report_id
):
figure
=
webform
.
get_webform
(
webform_url
)
#Now identifiers don't need to be matched
figure
.
identifier
=
gcis_id
webform
.
download_all_images
(
figure
)
upload_images_to_gcis
(
figure
)
def
upload_images_to_gcis
(
figure
,
report_id
=
'nca3draft'
):
#Make sure we have all the images required for a COMPLETE update
for
image
in
figure
.
images
:
if
not
exists
(
image
.
local_path
):
...
...
@@ -147,8 +157,27 @@ def upload_images_to_gcis(figure, report_id='nca3draft'):
for
resp
in
gcis
.
create_image
(
image
,
report_id
=
report_id
,
figure_id
=
figure
.
identifier
):
print
resp
.
status_code
,
resp
.
text
for
dataset
in
image
.
datasets
:
gcis
.
associate_dataset_with_image
(
dataset
.
identifier
,
report_id
,
figure
.
identifier
)
# for dataset in image.datasets:
# gcis.associate_dataset_with_image(dataset.identifier, report_id, image.identifier)
def
aggregate_datasets
():
dataset_map
=
{}
for
item
in
webform
.
get_list
():
webform_url
=
item
[
'url'
]
f
=
webform
.
get_webform
(
webform_url
)
#aggregate datasets
for
image
in
f
.
images
:
for
dataset
in
image
.
datasets
:
if
dataset
.
identifier
not
in
dataset_map
:
dataset_map
[
dataset
.
identifier
]
=
dataset
else
:
dataset_map
[
dataset
.
identifier
].
merge
(
dataset
)
return
dataset_map
.
values
()
if
__name__
==
'__main__'
:
...
...
src/webform_client.py
View file @
76e97430
...
...
@@ -94,7 +94,7 @@ class WebformClient:
def
remote_image_exists
(
self
,
path
):
url
=
'{b}{path}?token={t}'
.
format
(
b
=
self
.
base_url
,
path
=
path
,
t
=
self
.
token
)
resp
=
requests
.
head
(
url
)
print
resp
.
status_code
,
resp
.
text
#
print resp.status_code, resp.text
return
True
if
resp
.
status_code
==
200
else
False
def
download_image
(
self
,
image
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment