Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Andrew Buddenberg
gcis-py-client
Commits
623bfdaa
Commit
623bfdaa
authored
Dec 09, 2016
by
abuddenberg
Browse files
Add support for 50 States metadata. Clean up a lot of hacks.
parent
699cb1be
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
gcis_clients/domain.py
View file @
623bfdaa
...
...
@@ -123,21 +123,22 @@ class Figure(GcisObject):
#TODO: Ordinal handling is unnecessarily complex
@
figure_num
.
setter
def
figure_num
(
self
,
value
):
try
:
chp
,
fig
=
value
.
split
(
'.'
)
chp
=
int
(
chp
)
fig
=
int
(
fig
)
except
ValueError
:
print
'Invalid chapter/figure numbers: '
+
value
chp
=
None
fig
=
None
self
.
ordinal
=
fig
#If we have an actual Chapter instance, populate it
if
isinstance
(
self
.
chapter
,
Chapter
):
self
.
chapter
.
number
=
chp
else
:
self
.
chapter
=
chp
if
value
:
try
:
chp
,
fig
=
value
.
split
(
'.'
)
chp
=
int
(
chp
)
fig
=
int
(
fig
)
except
ValueError
:
print
'Invalid chapter/figure numbers: '
+
value
chp
=
None
fig
=
None
self
.
ordinal
=
fig
#If we have an actual Chapter instance, populate it
if
isinstance
(
self
.
chapter
,
Chapter
):
self
.
chapter
.
number
=
chp
else
:
self
.
chapter
=
chp
def
as_json
(
self
,
indent
=
0
,
omit_fields
=
(
'images'
,
'chapter'
,
'kindred_figures'
,
'keywords'
)):
return
super
(
Figure
,
self
).
as_json
(
omit_fields
=
omit_fields
)
...
...
@@ -277,6 +278,8 @@ class Dataset(GcisObject):
self
.
_release_dt
=
parse
(
value
).
isoformat
()
if
value
else
None
except
TypeError
:
self
.
_release_dt
=
None
except
ValueError
:
self
.
_release_dt
=
None
@
property
def
access_dt
(
self
):
...
...
@@ -289,6 +292,8 @@ class Dataset(GcisObject):
except
TypeError
:
# print "Problem with date: " + self.access_dt
self
.
_access_dt
=
None
except
ValueError
:
self
.
_access_dt
=
None
@
property
def
publication_year
(
self
):
...
...
gcis_clients/survey_client.py
View file @
623bfdaa
...
...
@@ -26,9 +26,9 @@ def get_credentials():
def
parse_title
(
graphic_title
):
match
=
re
.
search
(
'
\w+\.\d+
'
,
graphic_title
)
match
=
re
.
search
(
'
^(\d+[a-z]?)\.
'
,
graphic_title
)
if
match
:
return
match
.
group
(
0
),
graphic_title
[
match
.
end
(
0
):].
strip
()
return
match
.
group
(
1
),
graphic_title
[
match
.
end
(
0
):].
strip
()
else
:
return
None
,
graphic_title
...
...
@@ -36,19 +36,8 @@ def parse_title(graphic_title):
def
populate_figure
(
fig_json
):
f
=
Figure
({})
try
:
if
fig_json
[
'graphics_title'
].
startswith
(
'ES'
):
title_fields
=
fig_json
[
'graphics_title'
].
split
(
'. '
)
title
=
' '
.
join
(
title_fields
[
1
:])
f
.
ordinal
=
re
.
search
(
'\d+'
,
title_fields
[
0
]).
group
(
0
)
else
:
figure_num
,
title
=
parse_title
(
fig_json
[
'graphics_title'
])
if
figure_num
and
figure_num
.
startswith
(
'TSD'
):
f
.
ordinal
=
figure_num
.
split
(
'.'
)[
1
]
else
:
f
.
figure_num
=
figure_num
if
figure_num
else
None
figure_num
,
title
=
parse_title
(
fig_json
[
'graphics_title'
])
f
.
ordinal
=
figure_num
if
figure_num
else
None
f
.
title
=
title
f
.
identifier
=
fig_json
[
'figure_id'
]
if
fig_json
[
'figure_id'
]
else
re
.
sub
(
'\W'
,
'_'
,
f
.
title
).
lower
()
f
.
create_dt
=
fig_json
[
'graphics_create_date'
].
strip
()
...
...
@@ -219,7 +208,7 @@ class SurveyClient:
figure_json
=
tier1_json
[
'figure'
]
#It's not worth trying to translations on this data; it's too different
f
=
populate_figure
(
figure_json
)
f
.
remote_path
=
survey_json
[
0
][
'filepath'
]
f
.
remote_path
=
survey_json
[
0
][
'filepath'
]
.
replace
(
'sites/default/'
,
'system/'
)
f
.
local_path
=
join
(
self
.
local_download_dir
,
basename
(
f
.
remote_path
))
if
f
.
remote_path
else
None
if
'copyright'
in
survey_json
[
0
]:
...
...
gcis_clients/survey_transforms.py
View file @
623bfdaa
...
...
@@ -9,7 +9,9 @@ DATASET_IDS = {
'ArboNet'
:
'cdc-arbonet'
,
'U.S. Natural Hazard Statistics'
:
'noaa-nws-us-natural-hazard-statistics'
,
'Billion-Dollar Weather and Climate Disasters'
:
'noaa-ncdc-billion-dollar-weather-climate-disasters'
,
'ESRI USA10 dataset (ArcGIS version 10.0)'
:
'esri-arcgis-v10-0'
'ESRI USA10 dataset (ArcGIS version 10.0)'
:
'esri-arcgis-v10-0'
,
'nClimDiv'
:
'noaa-ncdc-cag-us-temperature-nclimdiv'
,
'Global Historical Climatology Network (GHCN) Daily'
:
'noaa-ncdc-ghcn-daily'
}
COPYRIGHT_TRANSLATIONS
=
{
...
...
gcis_clients/webform_client.py
View file @
623bfdaa
...
...
@@ -31,9 +31,9 @@ def parse_creators(field):
first_name
,
last_name
=
name_split
[
0
],
name_split
[
-
1
]
org_name
=
rest
[
0
]
if
len
(
rest
)
>
0
else
None
contributor
=
Contributor
({}
,
hints
=
trans
.
CONTRIB_ROLES
)
contributor
=
Contributor
({})
contributor
.
person
=
Person
({
'first_name'
:
first_name
,
'last_name'
:
last_name
})
contributor
.
organization
=
Organization
({
'name'
:
org_name
}
,
known_ids
=
trans
.
ORG_IDS
)
contributor
.
organization
=
Organization
({
'name'
:
org_name
})
return
contributor
...
...
@@ -92,7 +92,7 @@ class WebformClient:
#Add provenance information (wasDerivedFrom parent)
if
'what_type_of_source_provided_this_figure'
in
figure_json
and
figure_json
[
'what_type_of_source_provided_this_figure'
]
==
'published_source'
:
f
.
add_parent
(
Parent
(
deepcopy
(
f
.
original
),
trans
=
trans
.
PARENT_TRANSLATIONS
,
pubtype_map
=
trans
.
PARENT_PUBTYPE_MAP
,
search_hints
=
trans
.
PARENT_SEARCH_HINTS
))
f
.
add_parent
(
Parent
(
deepcopy
(
f
.
original
),
trans
=
trans
.
PARENT_TRANSLATIONS
,
pubtype_map
=
trans
.
PARENT_PUBTYPE_MAP
))
if
'images'
in
webform_json
[
webform_nid
]:
for
img_idx
,
image
in
enumerate
(
webform_json
[
webform_nid
][
'images'
]):
...
...
@@ -134,8 +134,8 @@ class WebformClient:
activity_json
[
'identifier'
]
=
'-'
.
join
((
image_obj
.
identifier
.
split
(
'-'
)[
0
],
dataset
.
identifier
,
'process'
))
dataset
.
activity
=
Activity
(
activity_json
,
trans
=
trans
.
ACT_TRANSLATIONS
)
#TODO: Extract DOIs from citation
image_obj
.
datasets
.
append
(
dataset
)
#
TODO: Extract DOIs from citation
#
image_obj.datasets.append(dataset)
f
.
images
.
append
(
image_obj
)
#If download_images arg is set, attempt to download all images for this figure
...
...
states.py
0 → 100644
View file @
623bfdaa
This diff is collapsed.
Click to expand it.
sync_states.py
0 → 100644
View file @
623bfdaa
__author__
=
'abuddenberg'
import
requests
from
requests.packages.urllib3.exceptions
import
InsecureRequestWarning
requests
.
packages
.
urllib3
.
disable_warnings
(
InsecureRequestWarning
)
from
gcis_clients
import
GcisClient
,
SurveyClient
,
survey_token
,
gcis_dev_auth
,
gcis_stage_auth
from
gcis_clients.domain
import
Report
,
Chapter
from
sync_utils
import
realize_parents
,
realize_contributors
from
states
import
sync_metadata_tree
import
pickle
import
sys
import
re
# gcis = GcisClient('https://data-stage.globalchange.gov', *gcis_stage_auth)
# gcis = GcisClient('https://data.globalchange.gov', *gcis_stage_auth)
surveys
=
SurveyClient
(
'https://state-resources.cicsnc.org'
,
survey_token
)
def
main
():
print
(
gcis
.
test_login
())
for
report_id
in
sync_metadata_tree
:
for
chapter_id
in
sync_metadata_tree
[
report_id
]:
for
survey_url
,
figure_id
,
figure_num
in
sync_metadata_tree
[
report_id
][
chapter_id
]:
figure
,
datasets
=
surveys
.
get_survey
(
survey_url
,
do_download
=
True
)
print
(
survey_url
)
print
(
figure
,
datasets
)
realize_parents
(
gcis
,
figure
.
parents
)
realize_contributors
(
gcis
,
figure
.
contributors
)
print
(
'Contributors: '
,
figure
.
contributors
)
print
(
'Parents: '
,
figure
.
parents
)
# gcis_fig = gcis.get_figure(report_id, figure_id, chapter_id=chapter_id)
def
gen_survey_list
():
# with open('survey_list.pk', 'wb') as out:
# pickle.dump(gen_survey_list(), out)
# surveys = pickle.load(open('survey_list.pk'))
#
# for st in sync_metadata_tree['noaa-led-state-summaries-2016']:
# print(st)
# for f in sorted(surveys[st], key=lambda x: x[1]):
# print("('{0}', '{1}', '{2}'),".format(f[0], f[2], f[1]))
# print('')
realized_list
=
{}
survey_list
=
surveys
.
get_list
()
for
i
,
survey
in
enumerate
(
survey_list
):
url
=
survey
[
'url'
]
match
=
re
.
match
(
'group/([a-z-]+)'
,
survey
[
'node_title'
])
chapter
=
match
.
group
(
1
)
if
match
else
''
print
(
'Processing: {b}{url} ({i}/{total})'
.
format
(
b
=
surveys
.
base_url
,
url
=
url
,
i
=
i
+
1
,
total
=
len
(
survey_list
)))
s
,
ds
=
surveys
.
get_survey
(
url
)
if
s
:
print
(
s
.
identifier
)
print
(
chapter
,
s
.
ordinal
,
s
.
title
)
realized_list
.
setdefault
(
chapter
,
[]).
append
((
url
,
s
.
ordinal
,
s
.
identifier
,
s
.
title
))
print
(
''
)
return
realized_list
def
create_nlss_report
():
nlss
=
Report
({
'identifier'
:
'noaa-led-state-summaries-2016'
,
'report_type_identifier'
:
'report'
,
'title'
:
'NOAA-led State Summaries 2016'
,
'url'
:
'https://statesummaries.cicsnc.org/'
,
'publication_year'
:
'2016'
,
'contact_email'
:
''
})
chapters
=
[(
id
,
i
+
1
,
' '
.
join
([
w
.
capitalize
()
for
w
in
id
.
split
(
'-'
)]))
for
i
,
id
in
enumerate
(
sync_metadata_tree
[
'noaa-led-state-summaries-2016'
])]
print
(
gcis
.
create_report
(
nlss
))
for
id
,
num
,
title
in
chapters
:
ch
=
Chapter
({
'identifier'
:
id
,
'number'
:
num
,
'title'
:
title
,
'report_identifier'
:
nlss
.
identifier
})
print
(
gcis
.
create_chapter
(
nlss
.
identifier
,
ch
))
main
()
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment