domain.py 18.3 KB
Newer Older
abuddenberg's avatar
abuddenberg committed
1
2
__author__ = 'abuddenberg'

3
4
from copy import deepcopy
import json
5
import re
6
import inspect
abuddenberg's avatar
abuddenberg committed
7

8
9
from dateutil.parser import parse

10

abuddenberg's avatar
abuddenberg committed
11
class Gcisbase(object):
12
    def __init__(self, data, fields=(), trans=()):
13
14
15
        #Setup class variables
        self.gcis_fields = fields
        self.translations = trans
16
        self.identifier = None
17

18
19
20
21
        #Save off a copy of the original JSON for debugging
        self.original = deepcopy(data)

        #Create attributes from the master list
22
        self. __dict__.update(dict.fromkeys(self.gcis_fields, None))
abuddenberg's avatar
abuddenberg committed
23

24
        #Perform translations
25
        for term in self.translations:
26
27
            val = data.pop(term, None)
            if val is not None:
28
                data[self.translations[term]] = val
29
30

        for k in data:
abuddenberg's avatar
abuddenberg committed
31
            if hasattr(self, k):
32
                try:
33
                    #Strip whitespace from strings for consistency
34
                    data[k] = data[k].strip()
35
36
37

                    #We now have unicode characters infesting our data.  I'm sure this is wrong.
                    data[k] = data[k].encode('utf-8')
38
39
40
                except AttributeError:
                    pass
                finally:
41
42
                    if data[k]:
                        setattr(self, k, data[k])
abuddenberg's avatar
abuddenberg committed
43

44
    def merge(self, other):
45
46
47
48
49
50
        #This sucks
        attrs_we_care_about = [(attr, v) for attr, v in inspect.getmembers(self, lambda a: not (inspect.isroutine(a)))
                               if not attr.startswith('__')]

        for attr, value in attrs_we_care_about:
            if value in (None, '') and hasattr(other, attr):
51
52
                setattr(self, attr, getattr(other, attr))

53
54
        return self

55
    def as_json(self, indent=0, omit_fields=()):
56
        out_fields = set(self.gcis_fields) - (set(['uri', 'href']) | set(omit_fields))
57
        return json.dumps({f: getattr(self, f) for f in out_fields}, indent=indent)
58

abuddenberg's avatar
abuddenberg committed
59

60
61
class GcisObject(Gcisbase):
    def __init__(self, data, **kwargs):
62
63
64
        if type(data) is not dict:
            raise TypeError('Expected dict, got {t}'.format(t=type(data)))

65
66
67
68
        #Special case for contributors
        contrib_list = data.pop('contributors', None)
        self.contributors = [Contributor(contrib) for contrib in contrib_list] if contrib_list else []

69
70
71
        parents_list = data.pop('parents', None)
        self.parents = [Parent(parent) for parent in parents_list] if parents_list else []

72
73
74
        super(GcisObject, self).__init__(data, **kwargs)

    def add_contributor(self, contributor):
75
76
77
78
        if isinstance(contributor, Contributor):
            self.contributors.append(contributor)
        else:
            raise TypeError('Expected Contributor, got {t}'.format(t=type(contributor)))
79
80
81
82

    def add_person(self, person):
        self.contributors.append(Contributor(person, Organization()))

83
    def add_parent(self, parent):
84
85
86
87
        if isinstance(parent, Parent):
            self.parents.append(parent)
        else:
            raise TypeError('Expected Parent, got {t}'.format(t=type(parent)))
88

89
90

class Figure(GcisObject):
91
    def __init__(self, data, local_path=None, remote_path=None, trans=()):
92
        self.gcis_fields = [
93
            'usage_limits', 'kindred_figures', 'time_start', 'time_end', 'keywords', 'lat_min', 'create_dt', 'lat_max',
94
            'title', 'ordinal', 'lon_min', 'report_identifier', 'chapter', 'submission_dt', 'uri', 'lon_max',
95
            'caption', 'source_citation', 'attributes', 'identifier', 'chapter_identifier', 'images', 'url'
96
97
        ]

98
        super(Figure, self).__init__(data, fields=self.gcis_fields, trans=trans)
abuddenberg's avatar
abuddenberg committed
99

100
101
102
        self.local_path = local_path
        self.remote_path = remote_path

abuddenberg's avatar
abuddenberg committed
103
        #Special case for chapter
104
        chap_tree = data.pop('chapter', None)
105
        self.chapter = Chapter(chap_tree) if chap_tree else self.chapter
abuddenberg's avatar
abuddenberg committed
106
107

        #Special case for images
108
        image_list = data.pop('images', None)
109
        self.images = [Image(image) for image in image_list] if image_list else []
110

111
112
113
        file_list = data.pop('files', None)
        self.files = [File(f) for f in file_list] if file_list else []

114
        #Hack
115
        self.identifier = self.identifier.replace('/figure/', '') if self.identifier not in ('', None) else '***ID MISSING***'
abuddenberg's avatar
abuddenberg committed
116
117
118

    @property
    def figure_num(self):
119
        if isinstance(self.chapter, Chapter) and self.chapter.number and self.ordinal:
abuddenberg's avatar
abuddenberg committed
120
121
            return '{0}.{1}'.format(self.chapter.number, self.ordinal)
        else:
122
            return '{0}.{1}'.format(self.chapter, self.ordinal)
123
124
125
126

    #TODO: Ordinal handling is unnecessarily complex
    @figure_num.setter
    def figure_num(self, value):
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
        if value:
            try:
                chp, fig = value.split('.')
                chp = int(chp)
                fig = int(fig)
            except ValueError:
                print 'Invalid chapter/figure numbers: ' + value
                chp = None
                fig = None
            self.ordinal = fig

            #If we have an actual Chapter instance, populate it
            if isinstance(self.chapter, Chapter):
                self.chapter.number = chp
            else:
                self.chapter = chp
abuddenberg's avatar
abuddenberg committed
143

144
145
    def as_json(self, indent=0, omit_fields=('images', 'chapter', 'kindred_figures', 'keywords')):
        return super(Figure, self).as_json(omit_fields=omit_fields)
146

abuddenberg's avatar
abuddenberg committed
147
    def __str__(self):
148
        string = '<Figure: id:{f_id} fignum:{f_num} name:{f_name}>\n\t[Images: {imgs}]'.format(
149
150
            f_id=self.identifier, f_num=self.figure_num, f_name=self.title, imgs=[i.identifier for i in self.images]
        )
151
        return string
abuddenberg's avatar
abuddenberg committed
152
153

    def __repr__(self):
154
155
156
157
        # return super(Figure, self).__repr__()
        return self.__str__()

    def merge(self, other):
158
        # Special handling for Chapters
159
160
161
162
163
164
165
166
167
168
        if isinstance(other.chapter, Chapter) and isinstance(self.chapter, Chapter):
            self.chapter.merge(other.chapter)

        #This might want to move to Chapter's merge()
        elif isinstance(other.chapter, Chapter) and not isinstance(self.chapter, Chapter):
            chapter_num = self.chapter
            self.chapter = other.chapter
            self.chapter.number = chapter_num

        return super(Figure, self).merge(other)
abuddenberg's avatar
abuddenberg committed
169
170


171
172
173
174
175
176
177
178
179
180
181
182
183
class Report(GcisObject):
    def __init__(self, data, trans=()):
        self.gcis_fields = ['doi', 'contact_note', 'title', 'publication_year', 'summary', 'url', 'contact_email', 'identifier', 'report_type_identifier']

        super(Report, self).__init__(data, fields=self.gcis_fields, trans=trans)

        # if self.report_type_identifier not in ['report', 'assessment', 'technical_input', 'indicator']:
        #     raise ValueError("report_type_identifier must be one of 'report', 'assessment', 'technical_input', 'indicator'")

    def as_json(self, indent=0, omit_fields=()):
        return super(Report, self).as_json(omit_fields=omit_fields)

    def __repr__(self):
184
        return '<Report: id:{id}>'.format(id=self.identifier)
185
186
187
188
189

    def __str__(self):
        return self.__repr__()


190
class Chapter(GcisObject):
191
    def __init__(self, data):
192
        self.gcis_fields = ['report_identifier', 'identifier', 'number', 'url', 'title']
abuddenberg's avatar
abuddenberg committed
193

194
        super(Chapter, self).__init__(data, fields=self.gcis_fields)
abuddenberg's avatar
abuddenberg committed
195

196
197
198
199
    def as_json(self, indent=0, omit_fields=()):
        return super(Chapter, self).as_json(omit_fields=omit_fields)

    def __repr__(self):
200
        return '<Chapter: id:{id}>'.format(id=self.identifier)
201
202
203
204

    def __str__(self):
        return self.__repr__()

205

206
class Image(GcisObject):
207
    def __init__(self, data, local_path=None, remote_path=None, trans=()):
208
209
210
211
        self.gcis_fields = ['attributes', 'create_dt', 'description', 'identifier', 'lat_max', 'lat_min', 'lon_max',
                            'uri', 'lon_min', 'position', 'submission_dt', 'time_end', 'time_start', 'title', 'href',
                            'usage_limits']

212
213
214
        #Private attributes for handling date parsing
        self._create_dt = None

215
        super(Image, self).__init__(data, fields=self.gcis_fields, trans=trans)
abuddenberg's avatar
abuddenberg committed
216

217
218
219
        self.local_path = local_path
        self.remote_path = remote_path

220
221
222
223
224
225
226
227
228
229
230
    @property
    def create_dt(self):
        return self._create_dt

    @create_dt.setter
    def create_dt(self, value):
        try:
            self._create_dt = parse(value).isoformat() if value else None
        except TypeError:
            self._create_dt = None

231
    def __str__(self):
232
        return '<Image: id:{id} name:{name}>'.format(id=self.identifier, name=self.title)
233
234


235
class Dataset(GcisObject):
236
    def __init__(self, data, trans=(), known_ids=None):
237
        self.gcis_fields = ['contributors', 'vertical_extent', 'native_id', 'href', 'references', 'cite_metadata',
238
239
240
241
                            'scale', 'publication_year', 'temporal_extent', 'version', 'parents', 'scope', 'type',
                            'processing_level', 'files', 'data_qualifier', 'access_dt', 'description',
                            'spatial_ref_sys', 'spatial_res', 'spatial_extent', 'doi', 'name', 'url', 'uri',
                            'identifier', 'release_dt', 'attributes']
242

243
        #TODO: This hack has got to go.
244
        self._identifiers = known_ids
245
246

        #Private attributes for handling date parsing
247
248
249
        self._release_dt = None
        self._access_dt = None
        self._publication_year = None
250
        self._temporal_extent = None
251

252
        super(Dataset, self).__init__(data, fields=self.gcis_fields, trans=trans)
253

254
        self.identifier = self._identifiers[self.name] if self._identifiers and self.name in self._identifiers else self.identifier
255

256
    def __repr__(self):
257
        return '<Dataset: id:{id} name:{name}>'.format(id=self.identifier, name=self.name)
abuddenberg's avatar
abuddenberg committed
258

259
260
261
    def __str__(self):
        return self.__repr__()

262
263
    def as_json(self, indent=0, omit_fields=('files', 'parents', 'contributors', 'references')):
        return super(Dataset, self).as_json(omit_fields=omit_fields)
264
265
266
267
268
269
270

    def merge(self, other):
        for k in self.__dict__:
            #If our copy of the field is empty or the other copy is longer, take that one.
            #TODO: Shoot myself for professional negligence.
            if hasattr(other, k) and (self.__dict__[k] in (None, '') or len(getattr(other, k)) > self.__dict__[k]):
                self.__dict__[k] = getattr(other, k)
271
272
273
274
275
276
277
278
279
280
281
282
            return self

    @property
    def release_dt(self):
        return self._release_dt

    @release_dt.setter
    def release_dt(self, value):
        try:
            self._release_dt = parse(value).isoformat() if value else None
        except TypeError:
            self._release_dt = None
283
284
        except ValueError:
            self._release_dt = None
285
286
287
288
289
290
291
292
293
294
295
296

    @property
    def access_dt(self):
        return self._access_dt

    @access_dt.setter
    def access_dt(self, value):
        try:
            self._access_dt = parse(value).isoformat() if value else None
        except TypeError:
            # print "Problem with date: " + self.access_dt
            self._access_dt = None
297
298
        except ValueError:
           self._access_dt = None
299
300
301
302
303
304
305

    @property
    def publication_year(self):
        return self._publication_year

    @publication_year.setter
    def publication_year(self, value):
abuddenberg's avatar
abuddenberg committed
306
        match = re.search('\d{4}', str(value)) if value else None
307
308
309
        if match:
            self._publication_year = match.group()
        else:
abuddenberg's avatar
abuddenberg committed
310
            self._publication_year = None
311
312
313
314
315
316
317
318
319
320
321
322
323
324

    @property
    def temporal_extent(self):
        return self._temporal_extent

    #Can't use property.setter due to multiple args
    def set_temporal_extent(self, start_dt, end_dt):
        try:
            self._temporal_extent = '{0} {1}'.format(parse(start_dt).isoformat(), parse(end_dt).isoformat()) if start_dt and end_dt else None
        except TypeError:
            self._temporal_extent = None
        except ValueError:
            self._temporal_extent = None

abuddenberg's avatar
abuddenberg committed
325
            
326
class Activity(GcisObject):
327
    def __init__(self, data, trans=()):
abuddenberg's avatar
abuddenberg committed
328
        self.gcis_fields = ['start_time', 'uri', 'methodology', 'data_usage', 'href', 'metholodogies', 'end_time',
329
330
                            'output_artifacts', 'duration', 'identifier', 'publication_maps', 'computing_environment',
                            'software', 'visualization_software', 'notes']
abuddenberg's avatar
abuddenberg committed
331

332
        super(Activity, self).__init__(data, fields=self.gcis_fields, trans=trans)
abuddenberg's avatar
abuddenberg committed
333

334
335
    def as_json(self, indent=0, omit_fields=('metholodogies', 'publication_maps')):
        return super(Activity, self).as_json(omit_fields=omit_fields)
abuddenberg's avatar
abuddenberg committed
336

337
    def __repr__(self):
338
        return '<Activity: id:{id}>'.format(id=self.identifier)
339
340
341
342

    def __str__(self):
        return self.__repr__()

abuddenberg's avatar
abuddenberg committed
343

abuddenberg's avatar
abuddenberg committed
344
class Person(Gcisbase):
345
    def __init__(self, data, trans=()):
abuddenberg's avatar
abuddenberg committed
346
347
348
        self.gcis_fields = ['first_name', 'last_name', 'middle_name', 'contributors', 'url', 'uri', 'href', 'orcid',
                            'id']

349
        super(Person, self).__init__(data, fields=self.gcis_fields, trans=trans)
abuddenberg's avatar
abuddenberg committed
350

351
352
    def as_json(self, indent=0, omit_fields=('contributors',)):
        return super(Person, self).as_json(omit_fields=omit_fields)
abuddenberg's avatar
abuddenberg committed
353
354

    def __repr__(self):
355
        return '<Person: id:{id} first:{fn} last:{ln}>'.format(id=self.id, fn=self.first_name, ln=self.last_name)
abuddenberg's avatar
abuddenberg committed
356
357
358
359
360
361

    def __str__(self):
        return self.__repr__()


class Organization(Gcisbase):
362
    def __init__(self, data, trans=()):
abuddenberg's avatar
abuddenberg committed
363
364
        self.gcis_fields = ['organization_type_identifier', 'url', 'uri', 'href', 'country_code', 'identifier', 'name']

365
        super(Organization, self).__init__(data, fields=self.gcis_fields, trans=trans)
366

abuddenberg's avatar
abuddenberg committed
367
    def __repr__(self):
368
        return '<Organization: id:{id} name:{name}>'.format(id=self.identifier, name=self.name)
abuddenberg's avatar
abuddenberg committed
369
370
371
372
373

    def __str__(self):
        return self.__repr__()


374
class Contributor(Gcisbase):
375
    def __init__(self, data):
376
        self.gcis_fields = ['role_type_identifier', 'organization_uri', 'uri', 'href', 'person_uri', 'person_id', 'id']
377
378
379

        super(Contributor, self).__init__(data, fields=self.gcis_fields)

380
381
382
383
384
        person_tree = data.pop('person', None)
        org_tree = data.pop('organization', None)

        self.person = Person(person_tree) if person_tree else None
        self.organization = Organization(org_tree) if org_tree else None
385
        self.role = Role(self.role_type_identifier) if self.role_type_identifier else None
abuddenberg's avatar
abuddenberg committed
386
387

    def __repr__(self):
388
        return '<Contributor: Person:{p} Org:{o} Role:{r}>'.format(p=self.person, o=self.organization, r=self.role)
abuddenberg's avatar
abuddenberg committed
389
390

    def __str__(self):
391
        return self.__repr__()
392
393
394
395
396
397
398
399
400
401
402
403


class Role(object):
    def __init__(self, type_id):
        self.type_id = type_id

    def __repr__(self):
        return self.type_id

    def __str__(self):
        return self.__repr__()

404
405

class Parent(Gcisbase):
406
    def __init__(self, data, target_pub=None, trans=(), pubtype_map=None):
407
408
        self.gcis_fields = ['relationship', 'url', 'publication_type_identifier', 'label', 'activity_uri', 'note']

409
        self.publication_type_map = pubtype_map
410
411
412

        self._publication_type_identifier = None

413
414
        self.activity = None

415
        super(Parent, self).__init__(data, fields=self.gcis_fields, trans=trans)
416
        self.publication = target_pub
417
418
419
420
421
422
423
424
425
426
427

        #HACK: Set default relationship type
        self.relationship = self.relationship if self.relationship else 'prov:wasDerivedFrom'

    @property
    def publication_type_identifier(self):
        return self._publication_type_identifier

    @publication_type_identifier.setter
    def publication_type_identifier(self, value):
        self._publication_type_identifier = self.publication_type_map[value] \
428
            if self.publication_type_map and value in self.publication_type_map else value
429

430
    @staticmethod
431
    def from_obj(gcis_obj, activity=None):
432
433
        gcis_obj_type = type(gcis_obj).__name__.lower()

434
435
436
437
438
439
440
441
        if hasattr(gcis_obj, 'title'):
            label = gcis_obj.title
        elif hasattr(gcis_obj, 'name'):
            label = gcis_obj.name
        else:
            label = '***MISSING***'

        p = Parent({
442
443
            'relationship': 'prov:wasDerivedFrom',
            'publication_type_identifier': gcis_obj_type,
444
            'url': '/{type}/{id}'.format(type=gcis_obj_type, id=gcis_obj.identifier) if gcis_obj_type and gcis_obj.identifier else None,
445
            'label': label
446
447
448
449
        }, target_pub=gcis_obj)
        p.activity = activity

        return p
450

451
    def __repr__(self):
452
        return '<Parent: rel:{rel} pub_type:{type} url:{url} label:{lbl}>'.format(
abuddenberg's avatar
abuddenberg committed
453
454
            rel=self.relationship, type=self.publication_type_identifier, url=self.url, lbl=self.label
        )
455
456

    def __str__(self):
457
        return self.__repr__()
458

459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493

class Article(Gcisbase):
    def __init__(self, data, trans=()):
        self.gcis_fields = ['files', 'doi', 'contributors', 'title', 'url', 'notes', 'uri',
                            'journal_identifier', 'journal_pages', 'cited_by', 'href', 'parents', 'year',
                            'journal_vol', 'references', 'identifier']

        super(Article, self).__init__(data, fields=self.gcis_fields, trans=trans)

    def as_json(self, indent=0, omit_fields=('files', 'parents', 'contributors', 'references', 'cited_by')):
        return super(Article, self).as_json(omit_fields=omit_fields)

    def __repr__(self):
        return '<Article: id:{id} title:{t}>'.format(id=self.identifier, t=self.title)

    def __str__(self):
        return self.__repr__()


class Webpage(Gcisbase):
    def __init__(self, data, trans=()):
        self.gcis_fields = ['files', 'contributors', 'title', 'url', 'uri', 'cited_by', 'href', 'references',
                            'parents', 'access_date', 'identifier']

        super(Webpage, self).__init__(data, fields=self.gcis_fields, trans=trans)

    def as_json(self, indent=0, omit_fields=('files', 'parents', 'contributors', 'references', 'cited_by')):
        return super(Webpage, self).as_json(omit_fields=omit_fields)

    def __repr__(self):
        return '<Webpage id:{id} title:{t}>'.format(id=self.identifier, t=self.title)

    def __str__(self):
        return self.__repr__()

494
495
496
497
498
499
500
501
502
503
504
505
506
507
508

class File(Gcisbase):
    def __init__(self, data, trans=()):
        self.gcis_fields = ['landing_page', 'sha1', 'url', 'thumbnail_href', 'uri', 'href', 'location', 'file', 'identifier', 'thumbnail', 'mime_type', 'size']

        super(File, self).__init__(data, fields=self.gcis_fields, trans=trans)

    # def as_json(self, indent=0, omit_fields=('files', 'parents', 'contributors', 'references', 'cited_by')):
    #     return super(File, self).as_json(omit_fields=omit_fields)

    def __repr__(self):
        return '<File id:{id} path:{p}>'.format(id=self.identifier, p=self.file)

    def __str__(self):
        return self.__repr__()