domain.py 17.6 KB
Newer Older
abuddenberg's avatar
abuddenberg committed
1
2
__author__ = 'abuddenberg'

3
4
from copy import deepcopy
import json
5
import re
6
import inspect
abuddenberg's avatar
abuddenberg committed
7

8
9
from dateutil.parser import parse

10

abuddenberg's avatar
abuddenberg committed
11
class Gcisbase(object):
12
    def __init__(self, data, fields=(), trans=()):
13
14
15
16
        #Setup class variables
        self.gcis_fields = fields
        self.translations = trans

17
18
19
20
        #Save off a copy of the original JSON for debugging
        self.original = deepcopy(data)

        #Create attributes from the master list
21
        self. __dict__.update(dict.fromkeys(self.gcis_fields, None))
abuddenberg's avatar
abuddenberg committed
22

23
        #Perform translations
24
        for term in self.translations:
25
26
            val = data.pop(term, None)
            if val is not None:
27
                data[self.translations[term]] = val
28
29

        for k in data:
abuddenberg's avatar
abuddenberg committed
30
            if hasattr(self, k):
31
                try:
32
                    #Strip whitespace from strings for consistency
33
                    data[k] = data[k].strip()
34
35
36

                    #We now have unicode characters infesting our data.  I'm sure this is wrong.
                    data[k] = data[k].encode('utf-8')
37
38
39
                except AttributeError:
                    pass
                finally:
40
41
                    if data[k]:
                        setattr(self, k, data[k])
abuddenberg's avatar
abuddenberg committed
42

43
    def merge(self, other):
44
45
46
47
48
49
        #This sucks
        attrs_we_care_about = [(attr, v) for attr, v in inspect.getmembers(self, lambda a: not (inspect.isroutine(a)))
                               if not attr.startswith('__')]

        for attr, value in attrs_we_care_about:
            if value in (None, '') and hasattr(other, attr):
50
51
                setattr(self, attr, getattr(other, attr))

52
53
        return self

54
    def as_json(self, indent=0, omit_fields=()):
55
        out_fields = set(self.gcis_fields) - (set(['uri', 'href']) | set(omit_fields))
56
        return json.dumps({f: getattr(self, f) for f in out_fields}, indent=indent)
57

abuddenberg's avatar
abuddenberg committed
58

59
60
class GcisObject(Gcisbase):
    def __init__(self, data, **kwargs):
61
62
63
        if type(data) is not dict:
            raise TypeError('Expected dict, got {t}'.format(t=type(data)))

64
65
66
67
        #Special case for contributors
        contrib_list = data.pop('contributors', None)
        self.contributors = [Contributor(contrib) for contrib in contrib_list] if contrib_list else []

68
69
70
        parents_list = data.pop('parents', None)
        self.parents = [Parent(parent) for parent in parents_list] if parents_list else []

71
72
73
        super(GcisObject, self).__init__(data, **kwargs)

    def add_contributor(self, contributor):
74
75
76
77
        if isinstance(contributor, Contributor):
            self.contributors.append(contributor)
        else:
            raise TypeError('Expected Contributor, got {t}'.format(t=type(contributor)))
78
79
80
81

    def add_person(self, person):
        self.contributors.append(Contributor(person, Organization()))

82
    def add_parent(self, parent):
83
84
85
86
        if isinstance(parent, Parent):
            self.parents.append(parent)
        else:
            raise TypeError('Expected Parent, got {t}'.format(t=type(parent)))
87

88
89

class Figure(GcisObject):
90
    def __init__(self, data, local_path=None, remote_path=None, trans=()):
91
        self.gcis_fields = [
92
            'usage_limits', 'kindred_figures', 'time_start', 'time_end', 'keywords', 'lat_min', 'create_dt', 'lat_max',
93
            'title', 'ordinal', 'lon_min', 'report_identifier', 'chapter', 'submission_dt', 'uri', 'lon_max',
94
            'caption', 'source_citation', 'attributes', 'identifier', 'chapter_identifier', 'images', 'url'
95
96
        ]

97
        super(Figure, self).__init__(data, fields=self.gcis_fields, trans=trans)
abuddenberg's avatar
abuddenberg committed
98

99
100
101
        self.local_path = local_path
        self.remote_path = remote_path

abuddenberg's avatar
abuddenberg committed
102
        #Special case for chapter
103
        chap_tree = data.pop('chapter', None)
104
        self.chapter = Chapter(chap_tree) if chap_tree else self.chapter
abuddenberg's avatar
abuddenberg committed
105
106

        #Special case for images
107
        image_list = data.pop('images', None)
108
        self.images = [Image(image) for image in image_list] if image_list else []
109

110
111
112
        file_list = data.pop('files', None)
        self.files = [File(f) for f in file_list] if file_list else []

113
        #Hack
114
        self.identifier = self.identifier.replace('/figure/', '') if self.identifier not in ('', None) else '***ID MISSING***'
abuddenberg's avatar
abuddenberg committed
115
116
117

    @property
    def figure_num(self):
118
        if isinstance(self.chapter, Chapter) and self.chapter.number and self.ordinal:
abuddenberg's avatar
abuddenberg committed
119
120
            return '{0}.{1}'.format(self.chapter.number, self.ordinal)
        else:
121
            return '{0}.{1}'.format(self.chapter, self.ordinal)
122
123
124
125
126
127
128
129
130

    #TODO: Ordinal handling is unnecessarily complex
    @figure_num.setter
    def figure_num(self, value):
        try:
            chp, fig = value.split('.')
            chp = int(chp)
            fig = int(fig)
        except ValueError:
abuddenberg's avatar
abuddenberg committed
131
            print 'Invalid chapter/figure numbers: ' + value
132
133
134
            chp = None
            fig = None
        self.ordinal = fig
135

136
137
        #If we have an actual Chapter instance, populate it
        if isinstance(self.chapter, Chapter):
138
139
            self.chapter.number = chp
        else:
140
            self.chapter = chp
abuddenberg's avatar
abuddenberg committed
141

142
143
    def as_json(self, indent=0, omit_fields=('images', 'chapter', 'kindred_figures', 'keywords')):
        return super(Figure, self).as_json(omit_fields=omit_fields)
144

abuddenberg's avatar
abuddenberg committed
145
    def __str__(self):
146
        string = '<Figure: id:{f_id} fignum:{f_num} name:{f_name}>\n\t[Images: {imgs}]'.format(
147
148
            f_id=self.identifier, f_num=self.figure_num, f_name=self.title, imgs=[i.identifier for i in self.images]
        )
149
        return string
abuddenberg's avatar
abuddenberg committed
150
151

    def __repr__(self):
152
153
154
155
        # return super(Figure, self).__repr__()
        return self.__str__()

    def merge(self, other):
156
        # Special handling for Chapters
157
158
159
160
161
162
163
164
165
166
        if isinstance(other.chapter, Chapter) and isinstance(self.chapter, Chapter):
            self.chapter.merge(other.chapter)

        #This might want to move to Chapter's merge()
        elif isinstance(other.chapter, Chapter) and not isinstance(self.chapter, Chapter):
            chapter_num = self.chapter
            self.chapter = other.chapter
            self.chapter.number = chapter_num

        return super(Figure, self).merge(other)
abuddenberg's avatar
abuddenberg committed
167
168


169
170
171
172
173
174
175
176
177
178
179
180
181
class Report(GcisObject):
    def __init__(self, data, trans=()):
        self.gcis_fields = ['doi', 'contact_note', 'title', 'publication_year', 'summary', 'url', 'contact_email', 'identifier', 'report_type_identifier']

        super(Report, self).__init__(data, fields=self.gcis_fields, trans=trans)

        # if self.report_type_identifier not in ['report', 'assessment', 'technical_input', 'indicator']:
        #     raise ValueError("report_type_identifier must be one of 'report', 'assessment', 'technical_input', 'indicator'")

    def as_json(self, indent=0, omit_fields=()):
        return super(Report, self).as_json(omit_fields=omit_fields)

    def __repr__(self):
182
        return '<Report: id:{id}>'.format(id=self.identifier)
183
184
185
186
187

    def __str__(self):
        return self.__repr__()


188
class Chapter(GcisObject):
189
    def __init__(self, data):
190
        self.gcis_fields = ['report_identifier', 'identifier', 'number', 'url', 'title']
abuddenberg's avatar
abuddenberg committed
191

192
        super(Chapter, self).__init__(data, fields=self.gcis_fields)
abuddenberg's avatar
abuddenberg committed
193

194
195
196
197
    def as_json(self, indent=0, omit_fields=()):
        return super(Chapter, self).as_json(omit_fields=omit_fields)

    def __repr__(self):
198
        return '<Chapter: id:{id}>'.format(id=self.identifier)
199
200
201
202

    def __str__(self):
        return self.__repr__()

203

204
class Image(GcisObject):
205
    def __init__(self, data, local_path=None, remote_path=None, trans=()):
206
207
208
209
        self.gcis_fields = ['attributes', 'create_dt', 'description', 'identifier', 'lat_max', 'lat_min', 'lon_max',
                            'uri', 'lon_min', 'position', 'submission_dt', 'time_end', 'time_start', 'title', 'href',
                            'usage_limits']

210
211
212
        #Private attributes for handling date parsing
        self._create_dt = None

213
        super(Image, self).__init__(data, fields=self.gcis_fields, trans=trans)
abuddenberg's avatar
abuddenberg committed
214

215
216
217
        self.local_path = local_path
        self.remote_path = remote_path

218
219
220
221
222
223
224
225
226
227
228
    @property
    def create_dt(self):
        return self._create_dt

    @create_dt.setter
    def create_dt(self, value):
        try:
            self._create_dt = parse(value).isoformat() if value else None
        except TypeError:
            self._create_dt = None

229
    def __str__(self):
230
        return '<Image: id:{id} name:{name}>'.format(id=self.identifier, name=self.title)
231
232


233
class Dataset(GcisObject):
234
    def __init__(self, data, trans=(), known_ids=None):
235
        self.gcis_fields = ['contributors', 'vertical_extent', 'native_id', 'href', 'references', 'cite_metadata',
236
237
238
239
                            'scale', 'publication_year', 'temporal_extent', 'version', 'parents', 'scope', 'type',
                            'processing_level', 'files', 'data_qualifier', 'access_dt', 'description',
                            'spatial_ref_sys', 'spatial_res', 'spatial_extent', 'doi', 'name', 'url', 'uri',
                            'identifier', 'release_dt', 'attributes']
240

241
        #TODO: This hack has got to go.
242
        self._identifiers = known_ids
243
244

        #Private attributes for handling date parsing
245
246
247
        self._release_dt = None
        self._access_dt = None
        self._publication_year = None
248

249
        super(Dataset, self).__init__(data, fields=self.gcis_fields, trans=trans)
250

251
        self.identifier = self._identifiers[self.name] if self._identifiers and self.name in self._identifiers else self.identifier
252

253
    def __repr__(self):
254
        return '<Dataset: id:{id} name:{name}>'.format(id=self.identifier, name=self.name)
abuddenberg's avatar
abuddenberg committed
255

256
257
258
    def __str__(self):
        return self.__repr__()

259
260
    def as_json(self, indent=0, omit_fields=('files', 'parents', 'contributors', 'references')):
        return super(Dataset, self).as_json(omit_fields=omit_fields)
261
262
263
264
265
266
267

    def merge(self, other):
        for k in self.__dict__:
            #If our copy of the field is empty or the other copy is longer, take that one.
            #TODO: Shoot myself for professional negligence.
            if hasattr(other, k) and (self.__dict__[k] in (None, '') or len(getattr(other, k)) > self.__dict__[k]):
                self.__dict__[k] = getattr(other, k)
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
            return self

    @property
    def release_dt(self):
        return self._release_dt

    @release_dt.setter
    def release_dt(self, value):
        try:
            self._release_dt = parse(value).isoformat() if value else None
        except TypeError:
            self._release_dt = None

    @property
    def access_dt(self):
        return self._access_dt

    @access_dt.setter
    def access_dt(self, value):
        try:
            self._access_dt = parse(value).isoformat() if value else None
        except TypeError:
            # print "Problem with date: " + self.access_dt
            self._access_dt = None

    @property
    def publication_year(self):
        return self._publication_year

    @publication_year.setter
    def publication_year(self, value):
abuddenberg's avatar
abuddenberg committed
299
        match = re.search('\d{4}', str(value)) if value else None
300
301
302
        if match:
            self._publication_year = match.group()
        else:
abuddenberg's avatar
abuddenberg committed
303
304
305
            self._publication_year = None
            
            
306
class Activity(GcisObject):
307
    def __init__(self, data, trans=()):
abuddenberg's avatar
abuddenberg committed
308
        self.gcis_fields = ['start_time', 'uri', 'methodology', 'data_usage', 'href', 'metholodogies', 'end_time',
309
310
                            'output_artifacts', 'duration', 'identifier', 'publication_maps', 'computing_environment',
                            'software', 'visualization_software', 'notes']
abuddenberg's avatar
abuddenberg committed
311

312
        super(Activity, self).__init__(data, fields=self.gcis_fields, trans=trans)
abuddenberg's avatar
abuddenberg committed
313

314
315
    def as_json(self, indent=0, omit_fields=('metholodogies', 'publication_maps')):
        return super(Activity, self).as_json(omit_fields=omit_fields)
abuddenberg's avatar
abuddenberg committed
316

317
    def __repr__(self):
318
        return '<Activity: id:{id}>'.format(id=self.identifier)
319
320
321
322

    def __str__(self):
        return self.__repr__()

abuddenberg's avatar
abuddenberg committed
323

abuddenberg's avatar
abuddenberg committed
324
class Person(Gcisbase):
325
    def __init__(self, data, trans=()):
abuddenberg's avatar
abuddenberg committed
326
327
328
        self.gcis_fields = ['first_name', 'last_name', 'middle_name', 'contributors', 'url', 'uri', 'href', 'orcid',
                            'id']

329
        super(Person, self).__init__(data, fields=self.gcis_fields, trans=trans)
abuddenberg's avatar
abuddenberg committed
330

331
332
    def as_json(self, indent=0, omit_fields=('contributors',)):
        return super(Person, self).as_json(omit_fields=omit_fields)
abuddenberg's avatar
abuddenberg committed
333
334

    def __repr__(self):
335
        return '<Person: id:{id} first:{fn} last:{ln}>'.format(id=self.id, fn=self.first_name, ln=self.last_name)
abuddenberg's avatar
abuddenberg committed
336
337
338
339
340
341

    def __str__(self):
        return self.__repr__()


class Organization(Gcisbase):
342
    def __init__(self, data, trans=()):
abuddenberg's avatar
abuddenberg committed
343
344
        self.gcis_fields = ['organization_type_identifier', 'url', 'uri', 'href', 'country_code', 'identifier', 'name']

345
        super(Organization, self).__init__(data, fields=self.gcis_fields, trans=trans)
346

abuddenberg's avatar
abuddenberg committed
347
    def __repr__(self):
348
        return '<Organization: id:{id} name:{name}>'.format(id=self.identifier, name=self.name)
abuddenberg's avatar
abuddenberg committed
349
350
351
352
353

    def __str__(self):
        return self.__repr__()


354
class Contributor(Gcisbase):
355
    def __init__(self, data):
356
        self.gcis_fields = ['role_type_identifier', 'organization_uri', 'uri', 'href', 'person_uri', 'person_id', 'id']
357
358
359

        super(Contributor, self).__init__(data, fields=self.gcis_fields)

360
361
362
363
364
        person_tree = data.pop('person', None)
        org_tree = data.pop('organization', None)

        self.person = Person(person_tree) if person_tree else None
        self.organization = Organization(org_tree) if org_tree else None
365
        self.role = Role(self.role_type_identifier) if self.role_type_identifier else None
abuddenberg's avatar
abuddenberg committed
366
367

    def __repr__(self):
368
        return '<Contributor: Person:{p} Org:{o} Role:{r}>'.format(p=self.person, o=self.organization, r=self.role)
abuddenberg's avatar
abuddenberg committed
369
370

    def __str__(self):
371
        return self.__repr__()
372
373
374
375
376
377
378
379
380
381
382
383


class Role(object):
    def __init__(self, type_id):
        self.type_id = type_id

    def __repr__(self):
        return self.type_id

    def __str__(self):
        return self.__repr__()

384
385

class Parent(Gcisbase):
386
    def __init__(self, data, target_pub=None, trans=(), pubtype_map=None):
387
388
        self.gcis_fields = ['relationship', 'url', 'publication_type_identifier', 'label', 'activity_uri', 'note']

389
        self.publication_type_map = pubtype_map
390
391
392

        self._publication_type_identifier = None

393
394
        self.activity = None

395
        super(Parent, self).__init__(data, fields=self.gcis_fields, trans=trans)
396
        self.publication = target_pub
397
398
399
400
401
402
403
404
405
406
407

        #HACK: Set default relationship type
        self.relationship = self.relationship if self.relationship else 'prov:wasDerivedFrom'

    @property
    def publication_type_identifier(self):
        return self._publication_type_identifier

    @publication_type_identifier.setter
    def publication_type_identifier(self, value):
        self._publication_type_identifier = self.publication_type_map[value] \
408
            if self.publication_type_map and value in self.publication_type_map else value
409

410
    @staticmethod
411
    def from_obj(gcis_obj, activity=None):
412
413
        gcis_obj_type = type(gcis_obj).__name__.lower()

414
415
416
417
418
419
420
421
        if hasattr(gcis_obj, 'title'):
            label = gcis_obj.title
        elif hasattr(gcis_obj, 'name'):
            label = gcis_obj.name
        else:
            label = '***MISSING***'

        p = Parent({
422
423
            'relationship': 'prov:wasDerivedFrom',
            'publication_type_identifier': gcis_obj_type,
424
            'url': '/{type}/{id}'.format(type=gcis_obj_type, id=gcis_obj.identifier) if gcis_obj_type and gcis_obj.identifier else None,
425
            'label': label
426
427
428
429
        }, target_pub=gcis_obj)
        p.activity = activity

        return p
430

431
    def __repr__(self):
432
        return '<Parent: rel:{rel} pub_type:{type} url:{url} label:{lbl}>'.format(
abuddenberg's avatar
abuddenberg committed
433
434
            rel=self.relationship, type=self.publication_type_identifier, url=self.url, lbl=self.label
        )
435
436

    def __str__(self):
437
        return self.__repr__()
438

439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473

class Article(Gcisbase):
    def __init__(self, data, trans=()):
        self.gcis_fields = ['files', 'doi', 'contributors', 'title', 'url', 'notes', 'uri',
                            'journal_identifier', 'journal_pages', 'cited_by', 'href', 'parents', 'year',
                            'journal_vol', 'references', 'identifier']

        super(Article, self).__init__(data, fields=self.gcis_fields, trans=trans)

    def as_json(self, indent=0, omit_fields=('files', 'parents', 'contributors', 'references', 'cited_by')):
        return super(Article, self).as_json(omit_fields=omit_fields)

    def __repr__(self):
        return '<Article: id:{id} title:{t}>'.format(id=self.identifier, t=self.title)

    def __str__(self):
        return self.__repr__()


class Webpage(Gcisbase):
    def __init__(self, data, trans=()):
        self.gcis_fields = ['files', 'contributors', 'title', 'url', 'uri', 'cited_by', 'href', 'references',
                            'parents', 'access_date', 'identifier']

        super(Webpage, self).__init__(data, fields=self.gcis_fields, trans=trans)

    def as_json(self, indent=0, omit_fields=('files', 'parents', 'contributors', 'references', 'cited_by')):
        return super(Webpage, self).as_json(omit_fields=omit_fields)

    def __repr__(self):
        return '<Webpage id:{id} title:{t}>'.format(id=self.identifier, t=self.title)

    def __str__(self):
        return self.__repr__()

474
475
476
477
478
479
480
481
482
483
484
485
486
487
488

class File(Gcisbase):
    def __init__(self, data, trans=()):
        self.gcis_fields = ['landing_page', 'sha1', 'url', 'thumbnail_href', 'uri', 'href', 'location', 'file', 'identifier', 'thumbnail', 'mime_type', 'size']

        super(File, self).__init__(data, fields=self.gcis_fields, trans=trans)

    # def as_json(self, indent=0, omit_fields=('files', 'parents', 'contributors', 'references', 'cited_by')):
    #     return super(File, self).as_json(omit_fields=omit_fields)

    def __repr__(self):
        return '<File id:{id} path:{p}>'.format(id=self.identifier, p=self.file)

    def __str__(self):
        return self.__repr__()