domain.py 16.9 KB
Newer Older
abuddenberg's avatar
abuddenberg committed
1
2
__author__ = 'abuddenberg'

3
4
from copy import deepcopy
import json
5
import re
6
import inspect
abuddenberg's avatar
abuddenberg committed
7

8
9
from dateutil.parser import parse

10

abuddenberg's avatar
abuddenberg committed
11
class Gcisbase(object):
12
    def __init__(self, data, fields=(), trans=()):
13
14
15
16
        #Setup class variables
        self.gcis_fields = fields
        self.translations = trans

17
18
19
20
        #Save off a copy of the original JSON for debugging
        self.original = deepcopy(data)

        #Create attributes from the master list
21
        self. __dict__.update(dict.fromkeys(self.gcis_fields, None))
abuddenberg's avatar
abuddenberg committed
22

23
        #Perform translations
24
        for term in self.translations:
25
26
            val = data.pop(term, None)
            if val is not None:
27
                data[self.translations[term]] = val
28
29

        for k in data:
abuddenberg's avatar
abuddenberg committed
30
            if hasattr(self, k):
31
                try:
32
                    #Strip whitespace from strings for consistency
33
                    data[k] = data[k].strip()
34
35
36

                    #We now have unicode characters infesting our data.  I'm sure this is wrong.
                    data[k] = data[k].encode('utf-8')
37
38
39
                except AttributeError:
                    pass
                finally:
40
41
                    if data[k]:
                        setattr(self, k, data[k])
abuddenberg's avatar
abuddenberg committed
42

43
    def merge(self, other):
44
45
46
47
48
49
        #This sucks
        attrs_we_care_about = [(attr, v) for attr, v in inspect.getmembers(self, lambda a: not (inspect.isroutine(a)))
                               if not attr.startswith('__')]

        for attr, value in attrs_we_care_about:
            if value in (None, '') and hasattr(other, attr):
50
51
                setattr(self, attr, getattr(other, attr))

52
53
        return self

54
    def as_json(self, indent=0, omit_fields=()):
55
        out_fields = set(self.gcis_fields) - (set(['uri', 'href']) | set(omit_fields))
56
        return json.dumps({f: getattr(self, f) for f in out_fields}, indent=indent)
57

abuddenberg's avatar
abuddenberg committed
58

59
60
class GcisObject(Gcisbase):
    def __init__(self, data, **kwargs):
61
62
63
        if type(data) is not dict:
            raise TypeError('Expected dict, got {t}'.format(t=type(data)))

64
65
66
67
        #Special case for contributors
        contrib_list = data.pop('contributors', None)
        self.contributors = [Contributor(contrib) for contrib in contrib_list] if contrib_list else []

68
69
70
        parents_list = data.pop('parents', None)
        self.parents = [Parent(parent) for parent in parents_list] if parents_list else []

71
72
73
        super(GcisObject, self).__init__(data, **kwargs)

    def add_contributor(self, contributor):
74
75
76
77
        if isinstance(contributor, Contributor):
            self.contributors.append(contributor)
        else:
            raise TypeError('Expected Contributor, got {t}'.format(t=type(contributor)))
78
79
80
81

    def add_person(self, person):
        self.contributors.append(Contributor(person, Organization()))

82
    def add_parent(self, parent):
83
84
85
86
        if isinstance(parent, Parent):
            self.parents.append(parent)
        else:
            raise TypeError('Expected Parent, got {t}'.format(t=type(parent)))
87

88
89

class Figure(GcisObject):
90
    def __init__(self, data, local_path=None, remote_path=None, trans=()):
91
        self.gcis_fields = [
92
            'usage_limits', 'kindred_figures', 'time_start', 'time_end', 'keywords', 'lat_min', 'create_dt', 'lat_max',
93
            'title', 'ordinal', 'lon_min', 'report_identifier', 'chapter', 'submission_dt', 'uri', 'lon_max',
94
            'caption', 'source_citation', 'attributes', 'identifier', 'chapter_identifier', 'images', 'url'
95
96
        ]

97
        super(Figure, self).__init__(data, fields=self.gcis_fields, trans=trans)
abuddenberg's avatar
abuddenberg committed
98

99
100
101
        self.local_path = local_path
        self.remote_path = remote_path

abuddenberg's avatar
abuddenberg committed
102
        #Special case for chapter
103
        chap_tree = data.pop('chapter', None)
104
        self.chapter = Chapter(chap_tree) if chap_tree else self.chapter
abuddenberg's avatar
abuddenberg committed
105
106

        #Special case for images
107
        image_list = data.pop('images', None)
108
        self.images = [Image(image) for image in image_list] if image_list else []
109

110
        #Hack
111
        self.identifier = self.identifier.replace('/figure/', '') if self.identifier not in ('', None) else '***ID MISSING***'
abuddenberg's avatar
abuddenberg committed
112
113
114

    @property
    def figure_num(self):
115
        if isinstance(self.chapter, Chapter) and self.chapter.number and self.ordinal:
abuddenberg's avatar
abuddenberg committed
116
117
            return '{0}.{1}'.format(self.chapter.number, self.ordinal)
        else:
118
            return '{0}.{1}'.format(self.chapter, self.ordinal)
119
120
121
122
123
124
125
126
127

    #TODO: Ordinal handling is unnecessarily complex
    @figure_num.setter
    def figure_num(self, value):
        try:
            chp, fig = value.split('.')
            chp = int(chp)
            fig = int(fig)
        except ValueError:
abuddenberg's avatar
abuddenberg committed
128
            print 'Invalid chapter/figure numbers: ' + value
129
130
131
            chp = None
            fig = None
        self.ordinal = fig
132

133
134
        #If we have an actual Chapter instance, populate it
        if isinstance(self.chapter, Chapter):
135
136
            self.chapter.number = chp
        else:
137
            self.chapter = chp
abuddenberg's avatar
abuddenberg committed
138

139
140
    def as_json(self, indent=0, omit_fields=('images', 'chapter', 'kindred_figures', 'keywords')):
        return super(Figure, self).as_json(omit_fields=omit_fields)
141

abuddenberg's avatar
abuddenberg committed
142
    def __str__(self):
143
        string = '<Figure: id:{f_id} fignum:{f_num} name:{f_name}>\n\t[Images: {imgs}]'.format(
144
145
            f_id=self.identifier, f_num=self.figure_num, f_name=self.title, imgs=[i.identifier for i in self.images]
        )
146
        return string
abuddenberg's avatar
abuddenberg committed
147
148

    def __repr__(self):
149
150
151
152
        # return super(Figure, self).__repr__()
        return self.__str__()

    def merge(self, other):
153
        # Special handling for Chapters
154
155
156
157
158
159
160
161
162
163
        if isinstance(other.chapter, Chapter) and isinstance(self.chapter, Chapter):
            self.chapter.merge(other.chapter)

        #This might want to move to Chapter's merge()
        elif isinstance(other.chapter, Chapter) and not isinstance(self.chapter, Chapter):
            chapter_num = self.chapter
            self.chapter = other.chapter
            self.chapter.number = chapter_num

        return super(Figure, self).merge(other)
abuddenberg's avatar
abuddenberg committed
164
165


166
167
168
169
170
171
172
173
174
175
176
177
178
class Report(GcisObject):
    def __init__(self, data, trans=()):
        self.gcis_fields = ['doi', 'contact_note', 'title', 'publication_year', 'summary', 'url', 'contact_email', 'identifier', 'report_type_identifier']

        super(Report, self).__init__(data, fields=self.gcis_fields, trans=trans)

        # if self.report_type_identifier not in ['report', 'assessment', 'technical_input', 'indicator']:
        #     raise ValueError("report_type_identifier must be one of 'report', 'assessment', 'technical_input', 'indicator'")

    def as_json(self, indent=0, omit_fields=()):
        return super(Report, self).as_json(omit_fields=omit_fields)

    def __repr__(self):
179
        return '<Report: id:{id}>'.format(id=self.identifier)
180
181
182
183
184

    def __str__(self):
        return self.__repr__()


185
class Chapter(GcisObject):
186
    def __init__(self, data):
187
        self.gcis_fields = ['report_identifier', 'identifier', 'number', 'url', 'title']
abuddenberg's avatar
abuddenberg committed
188

189
        super(Chapter, self).__init__(data, fields=self.gcis_fields)
abuddenberg's avatar
abuddenberg committed
190

191
192
193
194
    def as_json(self, indent=0, omit_fields=()):
        return super(Chapter, self).as_json(omit_fields=omit_fields)

    def __repr__(self):
195
        return '<Chapter: id:{id}>'.format(id=self.identifier)
196
197
198
199

    def __str__(self):
        return self.__repr__()

200

201
class Image(GcisObject):
202
    def __init__(self, data, local_path=None, remote_path=None, trans=()):
203
204
205
206
        self.gcis_fields = ['attributes', 'create_dt', 'description', 'identifier', 'lat_max', 'lat_min', 'lon_max',
                            'uri', 'lon_min', 'position', 'submission_dt', 'time_end', 'time_start', 'title', 'href',
                            'usage_limits']

207
208
209
        #Private attributes for handling date parsing
        self._create_dt = None

210
        super(Image, self).__init__(data, fields=self.gcis_fields, trans=trans)
abuddenberg's avatar
abuddenberg committed
211

212
213
214
        self.local_path = local_path
        self.remote_path = remote_path

215
216
217
218
219
220
221
222
223
224
225
    @property
    def create_dt(self):
        return self._create_dt

    @create_dt.setter
    def create_dt(self, value):
        try:
            self._create_dt = parse(value).isoformat() if value else None
        except TypeError:
            self._create_dt = None

226
    def __str__(self):
227
        return '<Image: id:{id} name:{name}>'.format(id=self.identifier, name=self.title)
228
229


230
class Dataset(GcisObject):
231
    def __init__(self, data, trans=(), known_ids=None):
232
        self.gcis_fields = ['contributors', 'vertical_extent', 'native_id', 'href', 'references', 'cite_metadata',
233
234
235
236
                            'scale', 'publication_year', 'temporal_extent', 'version', 'parents', 'scope', 'type',
                            'processing_level', 'files', 'data_qualifier', 'access_dt', 'description',
                            'spatial_ref_sys', 'spatial_res', 'spatial_extent', 'doi', 'name', 'url', 'uri',
                            'identifier', 'release_dt', 'attributes']
237

238
        #TODO: This hack has got to go.
239
        self._identifiers = known_ids
240
241

        #Private attributes for handling date parsing
242
243
244
        self._release_dt = None
        self._access_dt = None
        self._publication_year = None
245

246
        super(Dataset, self).__init__(data, fields=self.gcis_fields, trans=trans)
247

248
        self.identifier = self._identifiers[self.name] if self._identifiers and self.name in self._identifiers else self.identifier
249

250
    def __repr__(self):
251
        return '<Dataset: id:{id} name:{name}>'.format(id=self.identifier, name=self.name)
abuddenberg's avatar
abuddenberg committed
252

253
254
255
    def __str__(self):
        return self.__repr__()

256
257
    def as_json(self, indent=0, omit_fields=('files', 'parents', 'contributors', 'references')):
        return super(Dataset, self).as_json(omit_fields=omit_fields)
258
259
260
261
262
263
264

    def merge(self, other):
        for k in self.__dict__:
            #If our copy of the field is empty or the other copy is longer, take that one.
            #TODO: Shoot myself for professional negligence.
            if hasattr(other, k) and (self.__dict__[k] in (None, '') or len(getattr(other, k)) > self.__dict__[k]):
                self.__dict__[k] = getattr(other, k)
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
            return self

    @property
    def release_dt(self):
        return self._release_dt

    @release_dt.setter
    def release_dt(self, value):
        try:
            self._release_dt = parse(value).isoformat() if value else None
        except TypeError:
            self._release_dt = None

    @property
    def access_dt(self):
        return self._access_dt

    @access_dt.setter
    def access_dt(self, value):
        try:
            self._access_dt = parse(value).isoformat() if value else None
        except TypeError:
            # print "Problem with date: " + self.access_dt
            self._access_dt = None

    @property
    def publication_year(self):
        return self._publication_year

    @publication_year.setter
    def publication_year(self, value):
abuddenberg's avatar
abuddenberg committed
296
        match = re.search('\d{4}', str(value)) if value else None
297
298
299
        if match:
            self._publication_year = match.group()
        else:
abuddenberg's avatar
abuddenberg committed
300
301
302
            self._publication_year = None
            
            
303
class Activity(GcisObject):
304
    def __init__(self, data, trans=()):
abuddenberg's avatar
abuddenberg committed
305
        self.gcis_fields = ['start_time', 'uri', 'methodology', 'data_usage', 'href', 'metholodogies', 'end_time',
306
307
                            'output_artifacts', 'duration', 'identifier', 'publication_maps', 'computing_environment',
                            'software', 'visualization_software', 'notes']
abuddenberg's avatar
abuddenberg committed
308

309
        super(Activity, self).__init__(data, fields=self.gcis_fields, trans=trans)
abuddenberg's avatar
abuddenberg committed
310

311
312
    def as_json(self, indent=0, omit_fields=('metholodogies', 'publication_maps')):
        return super(Activity, self).as_json(omit_fields=omit_fields)
abuddenberg's avatar
abuddenberg committed
313

314
    def __repr__(self):
315
        return '<Activity: id:{id}>'.format(id=self.identifier)
316
317
318
319

    def __str__(self):
        return self.__repr__()

abuddenberg's avatar
abuddenberg committed
320

abuddenberg's avatar
abuddenberg committed
321
class Person(Gcisbase):
322
    def __init__(self, data, trans=()):
abuddenberg's avatar
abuddenberg committed
323
324
325
        self.gcis_fields = ['first_name', 'last_name', 'middle_name', 'contributors', 'url', 'uri', 'href', 'orcid',
                            'id']

326
        super(Person, self).__init__(data, fields=self.gcis_fields, trans=trans)
abuddenberg's avatar
abuddenberg committed
327

328
329
    def as_json(self, indent=0, omit_fields=('contributors',)):
        return super(Person, self).as_json(omit_fields=omit_fields)
abuddenberg's avatar
abuddenberg committed
330
331

    def __repr__(self):
332
        return '<Person: id:{id} first:{fn} last:{ln}>'.format(id=self.id, fn=self.first_name, ln=self.last_name)
abuddenberg's avatar
abuddenberg committed
333
334
335
336
337
338

    def __str__(self):
        return self.__repr__()


class Organization(Gcisbase):
339
    def __init__(self, data, trans=()):
abuddenberg's avatar
abuddenberg committed
340
341
        self.gcis_fields = ['organization_type_identifier', 'url', 'uri', 'href', 'country_code', 'identifier', 'name']

342
        super(Organization, self).__init__(data, fields=self.gcis_fields, trans=trans)
343

abuddenberg's avatar
abuddenberg committed
344
    def __repr__(self):
345
        return '<Organization: id:{id} name:{name}>'.format(id=self.identifier, name=self.name)
abuddenberg's avatar
abuddenberg committed
346
347
348
349
350

    def __str__(self):
        return self.__repr__()


351
class Contributor(Gcisbase):
352
    def __init__(self, data):
353
        self.gcis_fields = ['role_type_identifier', 'organization_uri', 'uri', 'href', 'person_uri', 'person_id', 'id']
354
355
356

        super(Contributor, self).__init__(data, fields=self.gcis_fields)

357
358
359
360
361
        person_tree = data.pop('person', None)
        org_tree = data.pop('organization', None)

        self.person = Person(person_tree) if person_tree else None
        self.organization = Organization(org_tree) if org_tree else None
362
        self.role = Role(self.role_type_identifier) if self.role_type_identifier else None
abuddenberg's avatar
abuddenberg committed
363
364

    def __repr__(self):
365
        return '<Contributor: Person:{p} Org:{o} Role:{r}>'.format(p=self.person, o=self.organization, r=self.role)
abuddenberg's avatar
abuddenberg committed
366
367

    def __str__(self):
368
        return self.__repr__()
369
370
371
372
373
374
375
376
377
378
379
380


class Role(object):
    def __init__(self, type_id):
        self.type_id = type_id

    def __repr__(self):
        return self.type_id

    def __str__(self):
        return self.__repr__()

381
382

class Parent(Gcisbase):
383
    def __init__(self, data, target_pub=None, trans=(), pubtype_map=None):
384
385
        self.gcis_fields = ['relationship', 'url', 'publication_type_identifier', 'label', 'activity_uri', 'note']

386
        self.publication_type_map = pubtype_map
387
388
389

        self._publication_type_identifier = None

390
391
        self.activity = None

392
        super(Parent, self).__init__(data, fields=self.gcis_fields, trans=trans)
393
        self.publication = target_pub
394
395
396
397
398
399
400
401
402
403
404

        #HACK: Set default relationship type
        self.relationship = self.relationship if self.relationship else 'prov:wasDerivedFrom'

    @property
    def publication_type_identifier(self):
        return self._publication_type_identifier

    @publication_type_identifier.setter
    def publication_type_identifier(self, value):
        self._publication_type_identifier = self.publication_type_map[value] \
405
            if self.publication_type_map and value in self.publication_type_map else value
406

407
    @staticmethod
408
    def from_obj(gcis_obj, activity=None):
409
410
        gcis_obj_type = type(gcis_obj).__name__.lower()

411
412
413
414
415
416
417
418
        if hasattr(gcis_obj, 'title'):
            label = gcis_obj.title
        elif hasattr(gcis_obj, 'name'):
            label = gcis_obj.name
        else:
            label = '***MISSING***'

        p = Parent({
419
420
            'relationship': 'prov:wasDerivedFrom',
            'publication_type_identifier': gcis_obj_type,
421
            'url': '/{type}/{id}'.format(type=gcis_obj_type, id=gcis_obj.identifier) if gcis_obj_type and gcis_obj.identifier else None,
422
            'label': label
423
424
425
426
        }, target_pub=gcis_obj)
        p.activity = activity

        return p
427

428
    def __repr__(self):
429
        return '<Parent: rel:{rel} pub_type:{type} url:{url} label:{lbl}>'.format(
abuddenberg's avatar
abuddenberg committed
430
431
            rel=self.relationship, type=self.publication_type_identifier, url=self.url, lbl=self.label
        )
432
433

    def __str__(self):
434
        return self.__repr__()
435

436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470

class Article(Gcisbase):
    def __init__(self, data, trans=()):
        self.gcis_fields = ['files', 'doi', 'contributors', 'title', 'url', 'notes', 'uri',
                            'journal_identifier', 'journal_pages', 'cited_by', 'href', 'parents', 'year',
                            'journal_vol', 'references', 'identifier']

        super(Article, self).__init__(data, fields=self.gcis_fields, trans=trans)

    def as_json(self, indent=0, omit_fields=('files', 'parents', 'contributors', 'references', 'cited_by')):
        return super(Article, self).as_json(omit_fields=omit_fields)

    def __repr__(self):
        return '<Article: id:{id} title:{t}>'.format(id=self.identifier, t=self.title)

    def __str__(self):
        return self.__repr__()


class Webpage(Gcisbase):
    def __init__(self, data, trans=()):
        self.gcis_fields = ['files', 'contributors', 'title', 'url', 'uri', 'cited_by', 'href', 'references',
                            'parents', 'access_date', 'identifier']

        super(Webpage, self).__init__(data, fields=self.gcis_fields, trans=trans)

    def as_json(self, indent=0, omit_fields=('files', 'parents', 'contributors', 'references', 'cited_by')):
        return super(Webpage, self).as_json(omit_fields=omit_fields)

    def __repr__(self):
        return '<Webpage id:{id} title:{t}>'.format(id=self.identifier, t=self.title)

    def __str__(self):
        return self.__repr__()