domain.py 19.3 KB
Newer Older
abuddenberg's avatar
abuddenberg committed
1
2
__author__ = 'abuddenberg'

3
4
from copy import deepcopy
import json
5
import re
6
import inspect
abuddenberg's avatar
abuddenberg committed
7

8
9
from dateutil.parser import parse

10

abuddenberg's avatar
abuddenberg committed
11
class Gcisbase(object):
12
    def __init__(self, data, fields=[], trans={}):
13
14
15
16
        #Setup class variables
        self.gcis_fields = fields
        self.translations = trans

17
18
19
20
        #Save off a copy of the original JSON for debugging
        self.original = deepcopy(data)

        #Create attributes from the master list
21
        self. __dict__.update(dict.fromkeys(self.gcis_fields, None))
abuddenberg's avatar
abuddenberg committed
22

23
        #Perform translations
24
        for term in self.translations:
25
26
            val = data.pop(term, None)
            if val is not None:
27
                data[self.translations[term]] = val
28
29

        for k in data:
abuddenberg's avatar
abuddenberg committed
30
            if hasattr(self, k):
31
                try:
32
                    #Strip whitespace from strings for consistency
33
                    data[k] = data[k].strip()
34
35
36

                    #We now have unicode characters infesting our data.  I'm sure this is wrong.
                    data[k] = data[k].encode('utf-8')
37
38
39
40
                except AttributeError:
                    pass
                finally:
                    setattr(self, k, data[k])
abuddenberg's avatar
abuddenberg committed
41

42
    def merge(self, other):
43
44
45
46
47
48
        #This sucks
        attrs_we_care_about = [(attr, v) for attr, v in inspect.getmembers(self, lambda a: not (inspect.isroutine(a)))
                               if not attr.startswith('__')]

        for attr, value in attrs_we_care_about:
            if value in (None, '') and hasattr(other, attr):
49
50
                setattr(self, attr, getattr(other, attr))

51
52
        return self

53
54
    def as_json(self, indent=0, omit_fields=[]):
        out_fields = set(self.gcis_fields) - (set(['uri', 'href']) | set(omit_fields))
55
        return json.dumps({f: getattr(self, f) for f in out_fields}, indent=indent)
56

abuddenberg's avatar
abuddenberg committed
57

58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
class GcisObject(Gcisbase):
    def __init__(self, data, **kwargs):
        #Special case for contributors
        contrib_list = data.pop('contributors', None)
        self.contributors = [Contributor(contrib) for contrib in contrib_list] if contrib_list else []

        super(GcisObject, self).__init__(data, **kwargs)

    def add_contributor(self, contributor):
        self.contributors.append(contributor)

    def add_person(self, person):
        self.contributors.append(Contributor(person, Organization()))


class Figure(GcisObject):
74
    def __init__(self, data):
75
76
77
78
79
80
81
82
83
84
85
86
87
88
        self.gcis_fields = [
            'usage_limits', 'kindred_figures', 'time_end', 'keywords', 'lat_min', 'create_dt', 'lat_max', 'time_start',
            'title', 'ordinal', 'lon_min', 'report_identifier', 'chapter', 'submission_dt', 'uri', 'lon_max',
            'caption', 'source_citation', 'attributes', 'identifier', 'chapter_identifier', 'images'
        ]

        self.translations = {
            'what_is_the_figure_id': 'identifier',
            'what_is_the_name_of_the_figure_as_listed_in_the_report': 'title',
            'when_was_this_figure_created': 'create_dt',
            'what_is_the_chapter_and_figure_number': 'figure_num'
        }

        super(Figure, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
abuddenberg's avatar
abuddenberg committed
89
90

        #Special case for chapter
91
        chap_tree = data.pop('chapter', None)
92
        self.chapter = Chapter(chap_tree) if chap_tree else self.chapter
abuddenberg's avatar
abuddenberg committed
93
94

        #Special case for images
95
        image_list = data.pop('images', None)
96
        self.images = [Image(image) for image in image_list] if image_list else []
97

98
        #Hack
99
        self.identifier = self.identifier.replace('/figure/', '') if self.identifier != '' else '***ID MISSING***'
abuddenberg's avatar
abuddenberg committed
100
101
102

    @property
    def figure_num(self):
103
        if isinstance(self.chapter, Chapter) and self.chapter.number and self.ordinal:
abuddenberg's avatar
abuddenberg committed
104
105
            return '{0}.{1}'.format(self.chapter.number, self.ordinal)
        else:
106
            return '{0}.{1}'.format(self.chapter, self.ordinal)
107
108
109
110
111
112
113
114
115

    #TODO: Ordinal handling is unnecessarily complex
    @figure_num.setter
    def figure_num(self, value):
        try:
            chp, fig = value.split('.')
            chp = int(chp)
            fig = int(fig)
        except ValueError:
abuddenberg's avatar
abuddenberg committed
116
            print 'Invalid chapter/figure numbers: ' + value
117
118
119
            chp = None
            fig = None
        self.ordinal = fig
120

121
122
        #If we have an actual Chapter instance, populate it
        if isinstance(self.chapter, Chapter):
123
124
            self.chapter.number = chp
        else:
125
            self.chapter = chp
abuddenberg's avatar
abuddenberg committed
126

127
    def as_json(self, indent=0):
128
        return super(Figure, self).as_json(omit_fields=['images', 'chapter', 'kindred_figures', 'keywords'])
129

abuddenberg's avatar
abuddenberg committed
130
    def __str__(self):
131
        string = '{f_id}: Figure {f_num}: {f_name}\n\tImages: {imgs}'.format(
132
133
            f_id=self.identifier, f_num=self.figure_num, f_name=self.title, imgs=[i.identifier for i in self.images]
        )
134
        return string
abuddenberg's avatar
abuddenberg committed
135
136

    def __repr__(self):
137
138
139
140
        # return super(Figure, self).__repr__()
        return self.__str__()

    def merge(self, other):
141
        # Special handling for Chapters
142
143
144
145
146
147
148
149
150
151
        if isinstance(other.chapter, Chapter) and isinstance(self.chapter, Chapter):
            self.chapter.merge(other.chapter)

        #This might want to move to Chapter's merge()
        elif isinstance(other.chapter, Chapter) and not isinstance(self.chapter, Chapter):
            chapter_num = self.chapter
            self.chapter = other.chapter
            self.chapter.number = chapter_num

        return super(Figure, self).merge(other)
abuddenberg's avatar
abuddenberg committed
152
153


154
class Chapter(GcisObject):
155
    def __init__(self, data):
156
        self.gcis_fields = ['report_identifier', 'identifier', 'number', 'url', 'title']
abuddenberg's avatar
abuddenberg committed
157

158
        super(Chapter, self).__init__(data, fields=self.gcis_fields)
abuddenberg's avatar
abuddenberg committed
159

160

161
class Image(GcisObject):
162
    def __init__(self, data, local_path=None, remote_path=None):
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
        self.gcis_fields = ['attributes', 'create_dt', 'description', 'identifier', 'lat_max', 'lat_min', 'lon_max',
                            'uri', 'lon_min', 'position', 'submission_dt', 'time_end', 'time_start', 'title', 'href',
                            'usage_limits']

        self.translations = {
            'list_any_keywords_for_the_image': 'attributes',
            'when_was_this_image_created': 'create_dt',
            'what_is_the_image_id': 'identifier',
            'maximum_latitude': 'lat_max',
            'minimum_latitude': 'lat_min',
            'maximum_longitude': 'lon_max',
            'minimum_longitude': 'lon_min',
            'start_time': 'time_start',
            'end_time': 'time_end',
            'what_is_the_name_of_the_image_listed_in_the_report': 'title'
        }

180
181
182
        #Private attributes for handling date parsing
        self._create_dt = None

183
        super(Image, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
abuddenberg's avatar
abuddenberg committed
184

185
186
        #Hack
        self.identifier = self.identifier.replace('/image/', '')
187

188
189
190
        self.local_path = local_path
        self.remote_path = remote_path

191
192
        #This does not accurately reflect GCIS' data model
        self.datasets = []
abuddenberg's avatar
abuddenberg committed
193

194
195
196
197
198
199
200
201
202
203
204
    @property
    def create_dt(self):
        return self._create_dt

    @create_dt.setter
    def create_dt(self, value):
        try:
            self._create_dt = parse(value).isoformat() if value else None
        except TypeError:
            self._create_dt = None

205
    def __str__(self):
206
        return 'Image: {id}: {name}'.format(id=self.identifier, name=self.title)
207
208


209
class Dataset(GcisObject):
210
    def __init__(self, data):
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
        self.gcis_fields = ['contributors', 'vertical_extent', 'native_id', 'href', 'references', 'cite_metadata',
                        'scale', 'publication_year', 'temporal_extent', 'version', 'parents', 'scope', 'type',
                        'processing_level', 'files', 'data_qualifier', 'access_dt', 'description', 'spatial_ref_sys',
                        'spatial_res', 'spatial_extent', 'doi', 'name', 'url', 'uri', 'identifier', 'release_dt',
                        'attributes']

        self.translations = {
            'data_set_access_date': 'access_dt',
            'data_set_publication_year': 'publication_year',
            'data_set_original_release_date': 'release_dt',
            # HACK elsewhere 'start_time and end_time': '',
            'data_set_id': 'native_id',
            # HACK elsewhere'': 'doi',
            # HACK elsewhere 'maximum_latitude etc. etc. etc.': '',
            'data_set_version': 'version',
            'data_set_name': 'name',
            'data_set_citation': 'cite_metadata',
            'data_set_description': 'description',
            # Not sure'': 'type',
            'data_set_location': 'url',
            'data_set_variables': 'attributes'
        }

        #This desperately needs to get added to the webform
        self._identifiers = {
236
237
            'Global Historical Climatology Network - Daily': 'ghcn-daily',
            'Global Historical Climatology Network - Monthly': 'ghcn-monthly',
238
            'NCDC Merged Land and Ocean Surface Temperature': 'mlost',
239
            'U.S. Climate Divisional Dataset Version 2': 'cddv2',
240
            'Climate Division Database Version 2': 'cddv2',
241
242
243
244
245
246
247
248
249
250
251
            'Eighth degree-CONUS Daily Downscaled Climate Projections by Katharine Hayhoe': 'cmip3-downscaled', #Problem
            'Eighth degree-CONUS Daily Downscaled Climate Projections': 'cmip3-downscaled', #Problem
            'Earth Policy Institute Atmospheric Carbon Dioxide Concentration, 1000-2012': 'epi-co2',
            'Daily 1/8-degree gridded meteorological data [1 Jan 1949 - 31 Dec 2010]': 'maurer',
            'NCEP/NCAR Reanalysis': 'ncep-ncar',
            'NCDC Global Surface Temperature Anomalies': 'ncdc-gst-anomalies',
            'GRACE Static Field Geopotential Coefficients JPL Release 5.0 GSM': 'grace',
            'UW/NCDC Satellite Derived Hurricane Intensity Dataset': 'hurricane-intensity',
            'Bias-Corrected and Spatially Downscaled Surface Water Projections Hydrologic Data': 'water-projections',
            'International Best Track Archive for Climate Stewardship (IBTrACS)': 'ibtracs',
            'the World Climate Research Programme\'s (WCRP\'s) Coupled Model Intercomparison Project phase 3 (CMIP3) multi-model dataset': 'cmip3',
252
            'World Climate Research Programme\'s (WCRP\'s) Coupled Model Intercomparison Project phase 3 (CMIP3) multi-model dataset': 'cmip3',
253
            'World Climate Research Program\'s (WCRP\'s) Coupled Model Intercomparison Project phase 3 (CMIP3) multi-model dataset': 'cmip3',
254
            'North American Regional Climate Change Assessment Program dataset': 'narccap',
255
256
            'Gridded Population of the World Version 3 (GPWv3): Population Count Grid': 'gpwv3',
            'ETCCDI Extremes Indices Archive': 'etccdi',
257
            'Historical Climatology Network Monthly (USHCN) Version 2.5': 'ushcn',
258
259
            'Annual Maximum Ice Coverage (AMIC)': 'amic',
            'Global Historical Climatology Network-Daily (GHCN-D) Monthly Summaries: North American subset': 'ghcnd-monthly-summaries',
260
261
262
263
264
265
266
267
268
269
            'Global Sea Level From TOPEX & Jason Altimetry': 'topex-jason-altimetry',
            'World Climate Research Program\'s (WCRP\'s) Coupled Model Intercomparison Project phase 5 (CMIP5) multi-model ensemble': 'cmip5',

            #Surely we can do better
            'Proxy Data': 'proxy-data',
            'Tide Gauge Data': 'tide-gauge-data',
            'Projected Sea Level Rise': 'projected-sea-level-rise',

            
            
270
        }
271
272

        #Private attributes for handling date parsing
273
274
275
        self._release_dt = None
        self._access_dt = None
        self._publication_year = None
276

277
278
279
280
        #These do not accurately reflect GCIS' data model
        self.note = None
        self.activity = None

281
        super(Dataset, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
282

abuddenberg's avatar
abuddenberg committed
283
284
        self.identifier = self._identifiers[self.name] if self.name in self._identifiers else self.name

285
    def __repr__(self):
286
        return 'Dataset: {id}: {name}'.format(id=self.identifier, name=self.name)
abuddenberg's avatar
abuddenberg committed
287

288
289
290
    def __str__(self):
        return self.__repr__()

abuddenberg's avatar
abuddenberg committed
291
    def as_json(self, indent=0):
292
293
294
295
296
297
298
299
        return super(Dataset, self).as_json(omit_fields=['files', 'parents', 'contributors', 'references'])

    def merge(self, other):
        for k in self.__dict__:
            #If our copy of the field is empty or the other copy is longer, take that one.
            #TODO: Shoot myself for professional negligence.
            if hasattr(other, k) and (self.__dict__[k] in (None, '') or len(getattr(other, k)) > self.__dict__[k]):
                self.__dict__[k] = getattr(other, k)
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
            return self

    @property
    def release_dt(self):
        return self._release_dt

    @release_dt.setter
    def release_dt(self, value):
        try:
            self._release_dt = parse(value).isoformat() if value else None
        except TypeError:
            self._release_dt = None

    @property
    def access_dt(self):
        return self._access_dt

    @access_dt.setter
    def access_dt(self, value):
        try:
            self._access_dt = parse(value).isoformat() if value else None
        except TypeError:
            # print "Problem with date: " + self.access_dt
            self._access_dt = None

    @property
    def publication_year(self):
        return self._publication_year

    @publication_year.setter
    def publication_year(self, value):
        match = re.search('\d{4}', value) if value else None
        if match:
            self._publication_year = match.group()
        else:
abuddenberg's avatar
abuddenberg committed
335
336
337
            self._publication_year = None
            
            
338
class Activity(GcisObject):
abuddenberg's avatar
abuddenberg committed
339
340
    def __init__(self, data):
        self.gcis_fields = ['start_time', 'uri', 'methodology', 'data_usage', 'href', 'metholodogies', 'end_time',
341
342
                            'output_artifacts', 'duration', 'identifier', 'publication_maps', 'computing_environment',
                            'software', 'visualization_software', 'notes']
abuddenberg's avatar
abuddenberg committed
343
344
345
346
347
348

        self.translations = {
            'how_much_time_was_invested_in_creating_the_image': 'duration',
            '35_what_are_all_of_the_files_names_and_extensions_associated_with_this_image': 'output_artifacts',
            'what_operating_systems_and_platforms_were_used': 'computing_environment',
            'what_analytical_statistical_methods_were_employed_to_the_data': 'methodology',
349
350
351
352
            'describe_how_the_data_was_used_in_the_image_figure_creation': 'data_usage',
            'list_the_name_and_version_of_the_software': 'software',
            'what_software_applications_were_used_to_manipulate_the_data': 'notes',
            '33_what_software_applications_were_used_to_visualize_the_data': 'visualization_software'
abuddenberg's avatar
abuddenberg committed
353
354
355
356
357
358
359
360

        }

        super(Activity, self).__init__(data, fields=self.gcis_fields, trans=self.translations)

    def as_json(self, indent=0):
        return super(Activity, self).as_json(omit_fields=['metholodogies', 'publication_maps'])

361
362
363
364
365
366
    def __repr__(self):
        return 'Activity: {id}'.format(id=self.identifier)

    def __str__(self):
        return self.__repr__()

abuddenberg's avatar
abuddenberg committed
367

abuddenberg's avatar
abuddenberg committed
368
369
370
371
372
373
374
375
376
377
378
379
380
class Person(Gcisbase):
    def __init__(self, data):
        self.gcis_fields = ['first_name', 'last_name', 'middle_name', 'contributors', 'url', 'uri', 'href', 'orcid',
                            'id']

        self.translations = {}

        super(Person, self).__init__(data, fields=self.gcis_fields, trans=self.translations)

    def as_json(self, indent=0):
        return super(Person, self).as_json(omit_fields=['contributors'])

    def __repr__(self):
381
        return 'Person: {id}: {fn} {ln}'.format(id=self.id, fn=self.first_name, ln=self.last_name)
abuddenberg's avatar
abuddenberg committed
382
383
384
385
386
387
388
389
390
391
392

    def __str__(self):
        return self.__repr__()


class Organization(Gcisbase):
    def __init__(self, data):
        self.gcis_fields = ['organization_type_identifier', 'url', 'uri', 'href', 'country_code', 'identifier', 'name']

        self.translations = {}

393
394
        self._identifiers = {
            'NOAA NCDC/CICS-NC': 'cooperative-institute-climate-satellites-nc',
395
396
            'NCDC/CICS-NC': 'cooperative-institute-climate-satellites-nc',
            'NOAA NCDC/CICS NC': 'cooperative-institute-climate-satellites-nc',
397
398
            'NESDIS/NCDC': 'national-climatic-data-center',
            'NCDC': 'national-climatic-data-center',
399
            'U.S. Forest Service': 'us-forest-service',
400
401
            'NOAA Pacific Marine Environmental Laboratory': 'pacific-marine-environmental-laboratory',
            'Jet Propulsion Laboratory': 'jet-propulsion-laboratory',
402
403
404
405
            'HGS Consulting': 'hgs-consulting-llc',
            'University of Virginia': 'university-virginia',
            'Miami-Dade Dept. of Regulatory and Economic Resources': 'miami-dade-dept-regulatory-economic-resources',
            'Nansen Environmental and Remote Sensing Center': 'nansen-environmental-and-remote-sensing-center',
406
407
408
409
410
411
412
            'University of Illinois at Urbana-Champaign': 'university-illinois',
            'USGCRP': 'us-global-change-research-program',
            'National Park Service': 'national-park-service',
            'Institute of the Environment': 'university-arizona',
            'USGS': 'us-geological-survey',
            'University of Puerto Rico': 'university-puerto-rico',
            'University of Alaska': 'university-alaska'
413

414
415
416

        }

abuddenberg's avatar
abuddenberg committed
417
        super(Organization, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
418
        
419
        self.identifier = self._identifiers[self.name] if self.name in self._identifiers else None
abuddenberg's avatar
abuddenberg committed
420
421

    def __repr__(self):
422
        return 'Organization: {id}: {name}'.format(id=self.identifier, name=self.name)
abuddenberg's avatar
abuddenberg committed
423
424
425
426
427

    def __str__(self):
        return self.__repr__()


428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
class Contributor(Gcisbase):
    def __init__(self, data):
        self.gcis_fields = ['role_type_identifier', 'organization_uri', 'uri', 'href', 'person_uri']

        #Hack
        self.people_role_map = {
            'Kenneth Kunkel': 'scientist',
            'Xungang Yin': 'scientist',
            'Nina Bednarsek': 'scientist',
            'Henry Schwartz': 'scientist',
            'Jessicca Griffin': 'graphic_artist',
            'James Youtz': 'scientist',
            'Chris Fenimore': 'scientist',
            'Deb Misch': 'graphic_artist',
            'James Galloway': 'scientist',
            'Laura Stevens': 'scientist',
            'Nichole Hefty': 'point_of_contact',
            'Mike Squires': 'scientist',
            'Peter Thorne': 'scientist',
            'Donald Wuebbles': 'scientist',
            'Felix Landerer': 'scientist',
            'David Wuertz': 'scientist',
450
451
452
453
454
455
            'Russell Vose': 'scientist',
            'Gregg Garfin': 'scientist',
            'Jeremy Littell': 'scientist',
            'Emily Cloyd': 'contributing_author',
            'F. Chapin': 'scientist',
            ' Chapin': 'scientist'
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
        }

        super(Contributor, self).__init__(data, fields=self.gcis_fields)

        self.person = None
        self.organization = None
        self._role = None

    @property
    def role(self):

        #Hack hack hack
        if self._role is None:
            horrible_key = ' '.join((self.person.first_name, self.person.last_name))
            self._role = Role(self.people_role_map[horrible_key]) if horrible_key in self.people_role_map else None

        return self._role
abuddenberg's avatar
abuddenberg committed
473
474

    def __repr__(self):
475
        return '({p}/{o}/{r})'.format(p=self.person, o=self.organization, r=self.role)
abuddenberg's avatar
abuddenberg committed
476
477

    def __str__(self):
478
        return self.__repr__()
479
480
481
482
483
484
485
486
487
488
489
490


class Role(object):
    def __init__(self, type_id):
        self.type_id = type_id

    def __repr__(self):
        return self.type_id

    def __str__(self):
        return self.__repr__()