domain.py 18.2 KB
Newer Older
abuddenberg's avatar
abuddenberg committed
1
2
__author__ = 'abuddenberg'

3
4
from copy import deepcopy
import json
5
import re
6
import inspect
abuddenberg's avatar
abuddenberg committed
7

8
9
from dateutil.parser import parse

10

abuddenberg's avatar
abuddenberg committed
11
class Gcisbase(object):
12
    def __init__(self, data, fields=[], trans={}):
13
14
15
16
        #Setup class variables
        self.gcis_fields = fields
        self.translations = trans

17
18
19
20
        #Save off a copy of the original JSON for debugging
        self.original = deepcopy(data)

        #Create attributes from the master list
21
        self. __dict__.update(dict.fromkeys(self.gcis_fields, None))
abuddenberg's avatar
abuddenberg committed
22

23
        #Perform translations
24
        for term in self.translations:
25
26
            val = data.pop(term, None)
            if val is not None:
27
                data[self.translations[term]] = val
28
29

        for k in data:
abuddenberg's avatar
abuddenberg committed
30
            if hasattr(self, k):
31
                try:
32
                    #Strip whitespace from strings for consistency
33
                    data[k] = data[k].strip()
34
35
36

                    #We now have unicode characters infesting our data.  I'm sure this is wrong.
                    data[k] = data[k].encode('utf-8')
37
38
39
40
                except AttributeError:
                    pass
                finally:
                    setattr(self, k, data[k])
abuddenberg's avatar
abuddenberg committed
41

42
    def merge(self, other):
43
44
45
46
47
48
        #This sucks
        attrs_we_care_about = [(attr, v) for attr, v in inspect.getmembers(self, lambda a: not (inspect.isroutine(a)))
                               if not attr.startswith('__')]

        for attr, value in attrs_we_care_about:
            if value in (None, '') and hasattr(other, attr):
49
50
                setattr(self, attr, getattr(other, attr))

51
52
        return self

53
54
    def as_json(self, indent=0, omit_fields=[]):
        out_fields = set(self.gcis_fields) - (set(['uri', 'href']) | set(omit_fields))
55
        return json.dumps({f: getattr(self, f) for f in out_fields}, indent=indent)
56

abuddenberg's avatar
abuddenberg committed
57

58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
class GcisObject(Gcisbase):
    def __init__(self, data, **kwargs):
        #Special case for contributors
        contrib_list = data.pop('contributors', None)
        self.contributors = [Contributor(contrib) for contrib in contrib_list] if contrib_list else []

        super(GcisObject, self).__init__(data, **kwargs)

    def add_contributor(self, contributor):
        self.contributors.append(contributor)

    def add_person(self, person):
        self.contributors.append(Contributor(person, Organization()))


class Figure(GcisObject):
74
    def __init__(self, data):
75
76
77
78
79
80
81
82
83
84
85
86
87
88
        self.gcis_fields = [
            'usage_limits', 'kindred_figures', 'time_end', 'keywords', 'lat_min', 'create_dt', 'lat_max', 'time_start',
            'title', 'ordinal', 'lon_min', 'report_identifier', 'chapter', 'submission_dt', 'uri', 'lon_max',
            'caption', 'source_citation', 'attributes', 'identifier', 'chapter_identifier', 'images'
        ]

        self.translations = {
            'what_is_the_figure_id': 'identifier',
            'what_is_the_name_of_the_figure_as_listed_in_the_report': 'title',
            'when_was_this_figure_created': 'create_dt',
            'what_is_the_chapter_and_figure_number': 'figure_num'
        }

        super(Figure, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
abuddenberg's avatar
abuddenberg committed
89
90

        #Special case for chapter
91
        chap_tree = data.pop('chapter', None)
92
        self.chapter = Chapter(chap_tree) if chap_tree else self.chapter
abuddenberg's avatar
abuddenberg committed
93
94

        #Special case for images
95
        image_list = data.pop('images', None)
96
        self.images = [Image(image) for image in image_list] if image_list else []
97

98
        #Hack
99
        self.identifier = self.identifier.replace('/figure/', '') if self.identifier != '' else '***ID MISSING***'
abuddenberg's avatar
abuddenberg committed
100
101
102

    @property
    def figure_num(self):
103
        if isinstance(self.chapter, Chapter) and self.chapter.number and self.ordinal:
abuddenberg's avatar
abuddenberg committed
104
105
            return '{0}.{1}'.format(self.chapter.number, self.ordinal)
        else:
106
            return '{0}.{1}'.format(self.chapter, self.ordinal)
107
108
109
110
111
112
113
114
115

    #TODO: Ordinal handling is unnecessarily complex
    @figure_num.setter
    def figure_num(self, value):
        try:
            chp, fig = value.split('.')
            chp = int(chp)
            fig = int(fig)
        except ValueError:
abuddenberg's avatar
abuddenberg committed
116
            print 'Invalid chapter/figure numbers: ' + value
117
118
119
            chp = None
            fig = None
        self.ordinal = fig
120

121
122
        #If we have an actual Chapter instance, populate it
        if isinstance(self.chapter, Chapter):
123
124
            self.chapter.number = chp
        else:
125
            self.chapter = chp
abuddenberg's avatar
abuddenberg committed
126

127
    def as_json(self, indent=0):
128
        return super(Figure, self).as_json(omit_fields=['images', 'chapter', 'kindred_figures', 'keywords'])
129

abuddenberg's avatar
abuddenberg committed
130
    def __str__(self):
131
        string = '{f_id}: Figure {f_num}: {f_name}\n\tImages: {imgs}'.format(
132
133
            f_id=self.identifier, f_num=self.figure_num, f_name=self.title, imgs=[i.identifier for i in self.images]
        )
134
        return string
abuddenberg's avatar
abuddenberg committed
135
136

    def __repr__(self):
137
138
139
140
        # return super(Figure, self).__repr__()
        return self.__str__()

    def merge(self, other):
141
        # Special handling for Chapters
142
143
144
145
146
147
148
149
150
151
        if isinstance(other.chapter, Chapter) and isinstance(self.chapter, Chapter):
            self.chapter.merge(other.chapter)

        #This might want to move to Chapter's merge()
        elif isinstance(other.chapter, Chapter) and not isinstance(self.chapter, Chapter):
            chapter_num = self.chapter
            self.chapter = other.chapter
            self.chapter.number = chapter_num

        return super(Figure, self).merge(other)
abuddenberg's avatar
abuddenberg committed
152
153


154
class Chapter(GcisObject):
155
    def __init__(self, data):
156
        self.gcis_fields = ['report_identifier', 'identifier', 'number', 'url', 'title']
abuddenberg's avatar
abuddenberg committed
157

158
        super(Chapter, self).__init__(data, fields=self.gcis_fields)
abuddenberg's avatar
abuddenberg committed
159

160

161
class Image(GcisObject):
162
    def __init__(self, data, local_path=None, remote_path=None):
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
        self.gcis_fields = ['attributes', 'create_dt', 'description', 'identifier', 'lat_max', 'lat_min', 'lon_max',
                            'uri', 'lon_min', 'position', 'submission_dt', 'time_end', 'time_start', 'title', 'href',
                            'usage_limits']

        self.translations = {
            'list_any_keywords_for_the_image': 'attributes',
            'when_was_this_image_created': 'create_dt',
            'what_is_the_image_id': 'identifier',
            'maximum_latitude': 'lat_max',
            'minimum_latitude': 'lat_min',
            'maximum_longitude': 'lon_max',
            'minimum_longitude': 'lon_min',
            'start_time': 'time_start',
            'end_time': 'time_end',
            'what_is_the_name_of_the_image_listed_in_the_report': 'title'
        }

180
181
182
        #Private attributes for handling date parsing
        self._create_dt = None

183
        super(Image, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
abuddenberg's avatar
abuddenberg committed
184

185
186
        #Hack
        self.identifier = self.identifier.replace('/image/', '')
187

188
189
190
        self.local_path = local_path
        self.remote_path = remote_path

191
192
        #This does not accurately reflect GCIS' data model
        self.datasets = []
abuddenberg's avatar
abuddenberg committed
193

194
195
196
197
198
199
200
201
202
203
204
    @property
    def create_dt(self):
        return self._create_dt

    @create_dt.setter
    def create_dt(self, value):
        try:
            self._create_dt = parse(value).isoformat() if value else None
        except TypeError:
            self._create_dt = None

205
    def __str__(self):
206
        return 'Image: {id}: {name}'.format(id=self.identifier, name=self.title)
207
208


209
class Dataset(GcisObject):
210
    def __init__(self, data):
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
        self.gcis_fields = ['contributors', 'vertical_extent', 'native_id', 'href', 'references', 'cite_metadata',
                        'scale', 'publication_year', 'temporal_extent', 'version', 'parents', 'scope', 'type',
                        'processing_level', 'files', 'data_qualifier', 'access_dt', 'description', 'spatial_ref_sys',
                        'spatial_res', 'spatial_extent', 'doi', 'name', 'url', 'uri', 'identifier', 'release_dt',
                        'attributes']

        self.translations = {
            'data_set_access_date': 'access_dt',
            'data_set_publication_year': 'publication_year',
            'data_set_original_release_date': 'release_dt',
            # HACK elsewhere 'start_time and end_time': '',
            'data_set_id': 'native_id',
            # HACK elsewhere'': 'doi',
            # HACK elsewhere 'maximum_latitude etc. etc. etc.': '',
            'data_set_version': 'version',
            'data_set_name': 'name',
            'data_set_citation': 'cite_metadata',
            'data_set_description': 'description',
            # Not sure'': 'type',
            'data_set_location': 'url',
            'data_set_variables': 'attributes'
        }

        #This desperately needs to get added to the webform
        self._identifiers = {
236
237
            'Global Historical Climatology Network - Daily': 'ghcn-daily',
            'Global Historical Climatology Network - Monthly': 'ghcn-monthly',
238
            'NCDC Merged Land and Ocean Surface Temperature': 'mlost',
239
            'U.S. Climate Divisional Dataset Version 2': 'cddv2',
240
            'Climate Division Database Version 2': 'cddv2',
241
242
243
244
245
246
247
248
249
250
251
            'Eighth degree-CONUS Daily Downscaled Climate Projections by Katharine Hayhoe': 'cmip3-downscaled', #Problem
            'Eighth degree-CONUS Daily Downscaled Climate Projections': 'cmip3-downscaled', #Problem
            'Earth Policy Institute Atmospheric Carbon Dioxide Concentration, 1000-2012': 'epi-co2',
            'Daily 1/8-degree gridded meteorological data [1 Jan 1949 - 31 Dec 2010]': 'maurer',
            'NCEP/NCAR Reanalysis': 'ncep-ncar',
            'NCDC Global Surface Temperature Anomalies': 'ncdc-gst-anomalies',
            'GRACE Static Field Geopotential Coefficients JPL Release 5.0 GSM': 'grace',
            'UW/NCDC Satellite Derived Hurricane Intensity Dataset': 'hurricane-intensity',
            'Bias-Corrected and Spatially Downscaled Surface Water Projections Hydrologic Data': 'water-projections',
            'International Best Track Archive for Climate Stewardship (IBTrACS)': 'ibtracs',
            'the World Climate Research Programme\'s (WCRP\'s) Coupled Model Intercomparison Project phase 3 (CMIP3) multi-model dataset': 'cmip3',
252
            'World Climate Research Programme\'s (WCRP\'s) Coupled Model Intercomparison Project phase 3 (CMIP3) multi-model dataset': 'cmip3',
253
            'World Climate Research Program\'s (WCRP\'s) Coupled Model Intercomparison Project phase 3 (CMIP3) multi-model dataset': 'cmip3',
254
            'North American Regional Climate Change Assessment Program dataset': 'narccap',
255
256
            'Gridded Population of the World Version 3 (GPWv3): Population Count Grid': 'gpwv3',
            'ETCCDI Extremes Indices Archive': 'etccdi',
257
            'Historical Climatology Network Monthly (USHCN) Version 2.5': 'ushcn',
258
259
260
            'Annual Maximum Ice Coverage (AMIC)': 'amic',
            'Global Historical Climatology Network-Daily (GHCN-D) Monthly Summaries: North American subset': 'ghcnd-monthly-summaries',
            'Global Sea Level From TOPEX & Jason Altimetry': 'topex-jason-altimetry'
261
        }
262
263

        #Private attributes for handling date parsing
264
265
266
        self._release_dt = None
        self._access_dt = None
        self._publication_year = None
267

268
269
270
271
        #These do not accurately reflect GCIS' data model
        self.note = None
        self.activity = None

272
        super(Dataset, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
273

abuddenberg's avatar
abuddenberg committed
274
275
        self.identifier = self._identifiers[self.name] if self.name in self._identifiers else self.name

276
    def __str__(self):
277
        return 'Dataset: {id}: {name}'.format(id=self.identifier, name=self.name)
abuddenberg's avatar
abuddenberg committed
278
279

    def as_json(self, indent=0):
280
281
282
283
284
285
286
287
        return super(Dataset, self).as_json(omit_fields=['files', 'parents', 'contributors', 'references'])

    def merge(self, other):
        for k in self.__dict__:
            #If our copy of the field is empty or the other copy is longer, take that one.
            #TODO: Shoot myself for professional negligence.
            if hasattr(other, k) and (self.__dict__[k] in (None, '') or len(getattr(other, k)) > self.__dict__[k]):
                self.__dict__[k] = getattr(other, k)
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
            return self

    @property
    def release_dt(self):
        return self._release_dt

    @release_dt.setter
    def release_dt(self, value):
        try:
            self._release_dt = parse(value).isoformat() if value else None
        except TypeError:
            self._release_dt = None

    @property
    def access_dt(self):
        return self._access_dt

    @access_dt.setter
    def access_dt(self, value):
        try:
            self._access_dt = parse(value).isoformat() if value else None
        except TypeError:
            # print "Problem with date: " + self.access_dt
            self._access_dt = None

    @property
    def publication_year(self):
        return self._publication_year

    @publication_year.setter
    def publication_year(self, value):
        match = re.search('\d{4}', value) if value else None
        if match:
            self._publication_year = match.group()
        else:
abuddenberg's avatar
abuddenberg committed
323
324
325
            self._publication_year = None
            
            
326
class Activity(GcisObject):
abuddenberg's avatar
abuddenberg committed
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
    def __init__(self, data):
        self.gcis_fields = ['start_time', 'uri', 'methodology', 'data_usage', 'href', 'metholodogies', 'end_time',
                            'output_artifacts', 'duration', 'identifier', 'publication_maps', 'computing_environment']

        self.translations = {
            'how_much_time_was_invested_in_creating_the_image': 'duration',
            '35_what_are_all_of_the_files_names_and_extensions_associated_with_this_image': 'output_artifacts',
            'what_operating_systems_and_platforms_were_used': 'computing_environment',
            'what_analytical_statistical_methods_were_employed_to_the_data': 'methodology',
            'describe_how_the_data_was_used_in_the_image_figure_creation': 'data_usage'

        }

        super(Activity, self).__init__(data, fields=self.gcis_fields, trans=self.translations)

    def as_json(self, indent=0):
        return super(Activity, self).as_json(omit_fields=['metholodogies', 'publication_maps'])


abuddenberg's avatar
abuddenberg committed
346
347
348
349
350
351
352
353
354
355
356
357
358
class Person(Gcisbase):
    def __init__(self, data):
        self.gcis_fields = ['first_name', 'last_name', 'middle_name', 'contributors', 'url', 'uri', 'href', 'orcid',
                            'id']

        self.translations = {}

        super(Person, self).__init__(data, fields=self.gcis_fields, trans=self.translations)

    def as_json(self, indent=0):
        return super(Person, self).as_json(omit_fields=['contributors'])

    def __repr__(self):
359
        return 'Person: {id}: {fn} {ln}'.format(id=self.id, fn=self.first_name, ln=self.last_name)
abuddenberg's avatar
abuddenberg committed
360
361
362
363
364
365
366
367
368
369
370

    def __str__(self):
        return self.__repr__()


class Organization(Gcisbase):
    def __init__(self, data):
        self.gcis_fields = ['organization_type_identifier', 'url', 'uri', 'href', 'country_code', 'identifier', 'name']

        self.translations = {}

371
372
        self._identifiers = {
            'NOAA NCDC/CICS-NC': 'cooperative-institute-climate-satellites-nc',
373
374
            'NCDC/CICS-NC': 'cooperative-institute-climate-satellites-nc',
            'NOAA NCDC/CICS NC': 'cooperative-institute-climate-satellites-nc',
375
            'NESDIS/NCDC': 'noaa-national-climatic-data-center',
376
            'NCDC': 'noaa-national-climatic-data-center',
377
            'U.S. Forest Service': 'us-forest-service',
378
379
380
381
382
383
            'NOAA Pacific Marine Environmental Laboratory': 'noaa-pacific-marine-environmental-laboratory',
            'Jet Propulsion Laboratory': 'nasa-jet-propulsion-laboratory',
            'HGS Consulting': 'hgs-consulting-llc',
            'University of Virginia': 'university-virginia',
            'Miami-Dade Dept. of Regulatory and Economic Resources': 'miami-dade-dept-regulatory-economic-resources',
            'Nansen Environmental and Remote Sensing Center': 'nansen-environmental-and-remote-sensing-center',
384
385
386
387
388
389
390
            'University of Illinois at Urbana-Champaign': 'university-illinois',
            'USGCRP': 'us-global-change-research-program',
            'National Park Service': 'national-park-service',
            'Institute of the Environment': 'university-arizona',
            'USGS': 'us-geological-survey',
            'University of Puerto Rico': 'university-puerto-rico',
            'University of Alaska': 'university-alaska'
391

392
393
394

        }

abuddenberg's avatar
abuddenberg committed
395
        super(Organization, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
396
        
397
        self.identifier = self._identifiers[self.name] if self.name in self._identifiers else None
abuddenberg's avatar
abuddenberg committed
398
399

    def __repr__(self):
400
        return 'Organization: {id}: {name}'.format(id=self.identifier, name=self.name)
abuddenberg's avatar
abuddenberg committed
401
402
403
404
405

    def __str__(self):
        return self.__repr__()


406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
class Contributor(Gcisbase):
    def __init__(self, data):
        self.gcis_fields = ['role_type_identifier', 'organization_uri', 'uri', 'href', 'person_uri']

        #Hack
        self.people_role_map = {
            'Kenneth Kunkel': 'scientist',
            'Xungang Yin': 'scientist',
            'Nina Bednarsek': 'scientist',
            'Henry Schwartz': 'scientist',
            'Jessicca Griffin': 'graphic_artist',
            'James Youtz': 'scientist',
            'Chris Fenimore': 'scientist',
            'Deb Misch': 'graphic_artist',
            'James Galloway': 'scientist',
            'Laura Stevens': 'scientist',
            'Nichole Hefty': 'point_of_contact',
            'Mike Squires': 'scientist',
            'Peter Thorne': 'scientist',
            'Donald Wuebbles': 'scientist',
            'Felix Landerer': 'scientist',
            'David Wuertz': 'scientist',
            'Russell Vose': 'scientist'
        }

        super(Contributor, self).__init__(data, fields=self.gcis_fields)

        self.person = None
        self.organization = None
        self._role = None

    @property
    def role(self):

        #Hack hack hack
        if self._role is None:
            horrible_key = ' '.join((self.person.first_name, self.person.last_name))
            self._role = Role(self.people_role_map[horrible_key]) if horrible_key in self.people_role_map else None

        return self._role
abuddenberg's avatar
abuddenberg committed
446
447

    def __repr__(self):
448
        return '({p}/{o}/{r})'.format(p=self.person, o=self.organization, r=self.role)
abuddenberg's avatar
abuddenberg committed
449
450

    def __str__(self):
451
        return self.__repr__()
452
453
454
455
456
457
458
459
460
461
462
463


class Role(object):
    def __init__(self, type_id):
        self.type_id = type_id

    def __repr__(self):
        return self.type_id

    def __str__(self):
        return self.__repr__()