Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Andrew Buddenberg
gcis-py-client
Commits
9e1794ef
Commit
9e1794ef
authored
Feb 12, 2014
by
abuddenberg
Browse files
Adding support for datasets (not done yet)
parent
86ba4f13
Changes
4
Hide whitespace changes
Inline
Side-by-side
src/domain.py
View file @
9e1794ef
...
...
@@ -39,6 +39,10 @@ class Gcisbase(object):
self
.
__dict__
[
k
]
=
getattr
(
other
,
k
)
return
self
def
as_json
(
self
,
indent
=
0
):
out_fields
=
self
.
_gcis_fields
return
json
.
dumps
({
f
:
self
.
__dict__
[
f
]
for
f
in
out_fields
},
indent
=
indent
)
class
Figure
(
Gcisbase
):
_gcis_fields
=
[
...
...
@@ -140,9 +144,35 @@ class Image(Gcisbase):
self
.
local_path
=
local_path
self
.
remote_path
=
remote_path
def
as_json
(
self
,
indent
=
0
):
out_fields
=
self
.
_gcis_fields
return
json
.
dumps
({
f
:
self
.
__dict__
[
f
]
for
f
in
out_fields
},
indent
=
indent
)
#This does not accurately reflect GCIS' data model
self
.
datasets
=
[]
def
__str__
(
self
):
return
'Image: {id} {name}'
.
format
(
id
=
self
.
identifier
,
name
=
self
.
title
)
class
Dataset
(
Gcisbase
):
_gcis_fields
=
[
'contributors'
,
'vertical_extent'
,
'native_id'
,
'href'
,
'references'
,
'cite_metadata'
,
'scale'
,
'publication_dt'
,
'temporal_extent'
,
'version'
,
'parents'
,
'scope'
,
'type'
,
'processing_level'
,
'files'
,
'data_qualifier'
,
'access_dt'
,
'description'
,
'spatial_ref_sys'
,
'spatial_res'
,
'spatial_extent'
,
'doi'
,
'name'
,
'url'
,
'uri'
,
'identifier'
]
_translations
=
{
'data_set_access_date'
:
'access_dt'
,
'data_set_publication_year'
:
'publication_dt'
,
# HACK elsewhere 'start_time and end_time': '',
'data_set_id'
:
'native_id'
,
# HACK elsewhere'': 'doi',
# HACK elsewhere 'maximum_latitude etc. etc. etc.': '',
'data_set_version'
:
'version'
,
'data_set_name'
:
'name'
,
'data_set_citation'
:
'cite_metadata'
,
'data_set_description'
:
'description'
,
# Not sure'': 'type',
}
def
__init__
(
self
,
data
):
super
(
Dataset
,
self
).
__init__
(
data
,
fields
=
self
.
_gcis_fields
,
trans
=
self
.
_translations
)
def
__str__
(
self
):
return
'Dataset: {id} {name}'
.
format
(
id
=
self
.
identifier
,
name
=
self
.
name
)
\ No newline at end of file
src/gcis_client.py
View file @
9e1794ef
...
...
@@ -5,7 +5,7 @@ import urllib
import
json
import
requests
from
os.path
import
exists
from
domain
import
Figure
,
Image
from
domain
import
Figure
,
Image
,
Dataset
def
check_image
(
fn
):
...
...
@@ -64,15 +64,6 @@ class GcisClient(object):
url
=
'{b}/report/{rpt}/figure/{fig}'
.
format
(
b
=
self
.
base_url
,
rpt
=
report_id
,
fig
=
figure_id
)
return
requests
.
delete
(
url
,
headers
=
self
.
headers
)
# def associate_figure_with_chapter(self, report_id, chapter_id, figure_id):
# url = '{b}/report/{rpt}/chapter/{chp}/figure/rel/{f}'.format(
# b=self.base_url, rpt=report_id, chp=chapter_id, f=figure_id
# )
# print url
#
# return requests.post(url, json.dumps({'add_image_identifier': image_id}), headers=self.headers)
@
check_image
def
create_image
(
self
,
image
,
report_id
=
None
,
figure_id
=
None
):
url
=
'{b}/image/'
.
format
(
b
=
self
.
base_url
,
img
=
image
.
identifier
)
...
...
@@ -146,9 +137,8 @@ class GcisClient(object):
def
get_keyword_listing
(
self
):
url
=
'{b}/gcmd_keyword?{p}'
.
format
(
b
=
self
.
base_url
,
p
=
urllib
.
urlencode
({
'all'
:
'1'
}))
# print url
resp
=
requests
.
get
(
url
,
headers
=
self
.
headers
)
# print resp.headers
return
resp
.
json
()
def
get_keyword
(
self
,
key_id
):
...
...
@@ -157,7 +147,26 @@ class GcisClient(object):
def
associate_keyword_with_figure
(
self
,
keyword_id
,
report_id
,
figure_id
):
url
=
'{b}/report/{rpt}/figure/keywords/{fig}'
.
format
(
b
=
self
.
base_url
,
rpt
=
report_id
,
fig
=
figure_id
)
print
url
# print json.dumps()
return
requests
.
post
(
url
,
data
=
json
.
dumps
({
'identifier'
:
keyword_id
}),
headers
=
self
.
headers
)
def
get_dataset
(
self
,
dataset_id
):
url
=
'{b}/dataset/{ds}'
.
format
(
b
=
self
.
base_url
,
ds
=
dataset_id
)
resp
=
requests
.
get
(
url
,
headers
=
self
.
headers
)
try
:
return
Dataset
(
resp
.
json
())
except
ValueError
:
raise
Exception
(
resp
.
text
())
def
create_data
(
self
,
dataset
):
url
=
'{b}/dataset/'
.
format
(
b
=
self
.
base_url
)
return
requests
.
post
(
url
,
data
=
dataset
.
as_json
(),
headers
=
self
.
headers
)
def
update_dataset
(
self
,
dataset
):
url
=
'{b}/dataset/{ds}'
.
format
(
b
=
self
.
base_url
,
ds
=
dataset
.
identifier
)
return
requests
.
post
(
url
,
data
=
dataset
.
as_json
(),
headers
=
self
.
headers
)
def
delete_dataset
(
self
,
dataset
):
url
=
'{b}/dataset/{ds}'
.
format
(
b
=
self
.
base_url
,
ds
=
dataset
.
identifier
)
return
requests
.
delete
(
url
,
headers
=
self
.
headers
)
return
requests
.
post
(
url
,
data
=
json
.
dumps
({
'identifier'
:
keyword_id
}),
headers
=
self
.
headers
)
\ No newline at end of file
# def associate_dataset_with_figure
src/sync_figures.py
View file @
9e1794ef
...
...
@@ -16,10 +16,10 @@ gcis = GcisClient(gcis_url, 'andrew.buddenberg@noaa.gov', '4cd31dc7173eb47b26f61
sync_metadata_tree
=
{
#Reports
'nca3draft'
:
{
#Chapter
s
#Chapter
2
'our-changing-climate'
:
[
#(webform_url, gcis_id)
#
('/metadata/figures/2506', 'observed-change-in-very-heavy-precipitation'),
(
'/metadata/figures/2506'
,
'observed-change-in-very-heavy-precipitation'
),
# ('/metadata/figures/2997', 'observed-change-in-very-heavy-precipitation-2'),
# ('/metadata/figures/2677', 'observed-us-precipitation-change'),
# ('/metadata/figures/3175', 'observed-us-temperature-change'),
...
...
@@ -29,8 +29,26 @@ sync_metadata_tree = {
# ('/metadata/figures/3294', 'projected-changes-in-frostfree-season-length'),
# ('/metadata/figures/3305', 'variation-of-storm-frequency-and-intensity-during-the-cold-season-november--march') #incomplete
],
#Chapter 6
'agriculture'
:
[
# ('/metadata/figures/2872', 'drainage')
],
#Chapter 9
''
:
[
(
'/metadata/figures/2896'
,
'heavy-downpours-disease'
)
#Needs images redone
],
#Chapter 14
'rural'
:
[
(
'/metadata/figures/3306'
,
'length-growing-season'
)
# ('/metadata/figures/3306', 'length-growing-season') #Needs images redone
],
#Chapter 19
'great-plains'
:
[
# ('/metadata/figures/2697', 'mean-annual-temp-and-precip') #Needs images redone
],
#Chapter 25
'coastal-zone'
:
[
# ('/metadata/figures/2543', 'coastal-ecosystem-services')
]
}
}
...
...
@@ -38,12 +56,21 @@ sync_metadata_tree = {
#These are artifacts from our collection efforts; largely duplicates
webform_skip_list
=
[]
dataset_identifiers
=
[
(
'Global Historical Climatology Network - Monthly'
,
'GHCN-M'
),
]
def
main
():
print_webform_list
()
# print_webform_list()
# sync(uploads=False)
f
=
webform
.
get_webform
(
'/metadata/figures/2506'
)
# print webform_skip_list
# sync()
for
image
in
f
.
images
:
print
image
for
dataset
in
image
.
datasets
[
0
:
1
]:
print
dataset
gcis
.
update_dataset
(
dataset
)
def
print_webform_list
():
...
...
@@ -58,7 +85,7 @@ def print_webform_list():
webform_skip_list
.
append
(
webform_url
)
def
sync
():
def
sync
(
uploads
=
True
):
for
report_id
in
sync_metadata_tree
:
for
chapter_id
in
sync_metadata_tree
[
report_id
]:
for
figure_ids
in
sync_metadata_tree
[
report_id
][
chapter_id
]:
...
...
@@ -67,9 +94,12 @@ def sync():
if
webform_url
in
webform_skip_list
:
print
'Skipping: '
+
webform_url
continue
sync_images
(
webform_url
,
gcis_id
)
if
uploads
:
print
'Attempting to upload: '
+
gcis_id
sync_images
(
webform_url
,
gcis_id
)
print
'Attempting to sync: '
+
gcis_id
sync_metadata
(
report_id
,
chapter_id
,
webform_url
,
gcis_id
)
print
'Success!'
def
sync_metadata
(
report_id
,
chapter_id
,
webform_url
,
gcis_id
):
...
...
src/webform_client.py
View file @
9e1794ef
...
...
@@ -5,7 +5,7 @@ import requests
import
re
from
os.path
import
join
from
domain
import
Figure
,
Image
from
domain
import
Figure
,
Image
,
Dataset
def
sanitized
(
pattern
):
...
...
@@ -38,18 +38,36 @@ class WebformClient:
@
sanitized
(
'^/metadata/figures/\d+$'
)
def
get_webform
(
self
,
fig_url
):
full_url
=
'{b}{url}?token={t}'
.
format
(
b
=
self
.
base_url
,
url
=
fig_url
,
t
=
self
.
token
)
figure
_json
=
requests
.
get
(
full_url
).
json
()
webform
_json
=
requests
.
get
(
full_url
).
json
()
#TODO: refactor the service so this isn't necessary
figure_id
=
figure_json
.
keys
()[
0
]
f
=
Figure
(
figure_json
[
figure_id
][
'figure'
][
0
])
if
'images'
in
figure_json
[
figure_id
]:
f
.
images
=
[
Image
(
image
,
local_path
=
self
.
get_local_image_path
(
image
),
remote_path
=
self
.
get_remote_image_path
(
image
))
for
image
in
figure_json
[
figure_id
][
'images'
]
]
webform_nid
=
webform_json
.
keys
()[
0
]
f
=
Figure
(
webform_json
[
webform_nid
][
'figure'
][
0
])
if
'images'
in
webform_json
[
webform_nid
]:
# f.images = [
# Image(image, local_path=self.get_local_image_path(image), remote_path=self.get_remote_image_path(image))
# for image in webform_json[webform_nid]['images']
# ]
for
img_idx
,
image
in
enumerate
(
webform_json
[
webform_nid
][
'images'
]):
image_obj
=
Image
(
image
,
local_path
=
self
.
get_local_image_path
(
image
),
remote_path
=
self
.
get_remote_image_path
(
image
))
#TODO: this just keeps getting worse
if
'datasources'
in
webform_json
[
webform_nid
][
'images'
][
img_idx
]:
for
dataset_json
in
webform_json
[
webform_nid
][
'images'
][
img_idx
][
'datasources'
]:
dataset
=
Dataset
(
dataset_json
)
#Commence the hacks
dataset
.
temporal_extent
=
' '
.
join
(
[
dataset_json
[
field
]
for
field
in
[
'start_time'
,
'end_time'
]])
dataset
.
spatial_extent
=
' '
.
join
([
'{k}: {v};'
.
format
(
k
=
key
,
v
=
dataset_json
[
key
])
for
key
in
[
'maximum_latitude'
,
'minimum_latitude'
,
'maximum_longitude'
,
'minimum_longitude'
]])
#TODO: Extract DOIs from citation
image_obj
.
datasets
.
append
(
dataset
)
f
.
images
.
append
(
image_obj
)
return
f
def
get_remote_image_path
(
self
,
image_json
):
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment