domain.py 10.5 KB
Newer Older
abuddenberg's avatar
abuddenberg committed
1
2
from types import NoneType

abuddenberg's avatar
abuddenberg committed
3
4
__author__ = 'abuddenberg'

5
6
from copy import deepcopy
import json
abuddenberg's avatar
abuddenberg committed
7
from dateutil.parser import parse
8
import re
9
import inspect
abuddenberg's avatar
abuddenberg committed
10
11

class Gcisbase(object):
12
    def __init__(self, data, fields=[], trans={}):
13
14
15
16
        #Setup class variables
        self.gcis_fields = fields
        self.translations = trans

17
18
19
20
        #Save off a copy of the original JSON for debugging
        self.original = deepcopy(data)

        #Create attributes from the master list
21
        self. __dict__.update(dict.fromkeys(self.gcis_fields, None))
abuddenberg's avatar
abuddenberg committed
22

23
        #Perform translations
24
        for term in self.translations:
25
26
            val = data.pop(term, None)
            if val is not None:
27
                data[self.translations[term]] = val
28
29

        for k in data:
abuddenberg's avatar
abuddenberg committed
30
            if hasattr(self, k):
31
                try:
32
                    #Strip whitespace from strings for consistency
33
                    data[k] = data[k].strip()
34
35
36

                    #We now have unicode characters infesting our data.  I'm sure this is wrong.
                    data[k] = data[k].encode('utf-8')
37
38
39
40
                except AttributeError:
                    pass
                finally:
                    setattr(self, k, data[k])
abuddenberg's avatar
abuddenberg committed
41

42
    def merge(self, other):
43
44
45
46
47
48
49
        #This sucks
        attrs_we_care_about = [(attr, v) for attr, v in inspect.getmembers(self, lambda a: not (inspect.isroutine(a)))
                               if not attr.startswith('__')]

        for attr, value in attrs_we_care_about:
            if value in (None, '') and hasattr(other, attr):
                setattr(self, attr, value)
50
51
        return self

52
53
    def as_json(self, indent=0, omit_fields=[]):
        out_fields = set(self.gcis_fields) - (set(['uri', 'href']) | set(omit_fields))
54
        return json.dumps({f: getattr(self, f) for f in out_fields}, indent=indent)
55

abuddenberg's avatar
abuddenberg committed
56
57

class Figure(Gcisbase):
58
    def __init__(self, data):
59
60
61
62
63
64
65
66
67
68
69
70
71
72
        self.gcis_fields = [
            'usage_limits', 'kindred_figures', 'time_end', 'keywords', 'lat_min', 'create_dt', 'lat_max', 'time_start',
            'title', 'ordinal', 'lon_min', 'report_identifier', 'chapter', 'submission_dt', 'uri', 'lon_max',
            'caption', 'source_citation', 'attributes', 'identifier', 'chapter_identifier', 'images'
        ]

        self.translations = {
            'what_is_the_figure_id': 'identifier',
            'what_is_the_name_of_the_figure_as_listed_in_the_report': 'title',
            'when_was_this_figure_created': 'create_dt',
            'what_is_the_chapter_and_figure_number': 'figure_num'
        }

        super(Figure, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
abuddenberg's avatar
abuddenberg committed
73
74

        #Special case for chapter
75
        chap_tree = data.pop('chapter', None)
76
        self.chapter = Chapter(chap_tree) if chap_tree else self.chapter
abuddenberg's avatar
abuddenberg committed
77
78

        #Special case for images
79
        image_list = data.pop('images', None)
80
        self.images = [Image(image) for image in image_list] if image_list else []
81

82
        #Hack
83
        self.identifier = self.identifier.replace('/figure/', '') if self.identifier != '' else '***ID MISSING***'
abuddenberg's avatar
abuddenberg committed
84
85
86

    @property
    def figure_num(self):
87
        if isinstance(self.chapter, Chapter) and self.chapter.number and self.ordinal:
abuddenberg's avatar
abuddenberg committed
88
89
            return '{0}.{1}'.format(self.chapter.number, self.ordinal)
        else:
90
            return '{0}.{1}'.format(self.chapter, self.ordinal)
91
92
93
94
95
96
97
98
99

    #TODO: Ordinal handling is unnecessarily complex
    @figure_num.setter
    def figure_num(self, value):
        try:
            chp, fig = value.split('.')
            chp = int(chp)
            fig = int(fig)
        except ValueError:
abuddenberg's avatar
abuddenberg committed
100
            print 'Invalid chapter/figure numbers: ' + value
101
102
103
            chp = None
            fig = None
        self.ordinal = fig
104

105
106
        #If we have an actual Chapter instance, populate it
        if isinstance(self.chapter, Chapter):
107
108
            self.chapter.number = chp
        else:
109
            self.chapter = chp
abuddenberg's avatar
abuddenberg committed
110

111
    def as_json(self, indent=0):
112
        return super(Figure, self).as_json(omit_fields=['images', 'chapter', 'kindred_figures', 'keywords'])
113

abuddenberg's avatar
abuddenberg committed
114
    def __str__(self):
115
        string = '{f_id}: Figure {f_num}: {f_name}\n\tImages: {imgs}'.format(
116
117
            f_id=self.identifier, f_num=self.figure_num, f_name=self.title, imgs=[i.identifier for i in self.images]
        )
118
        return string
abuddenberg's avatar
abuddenberg committed
119
120

    def __repr__(self):
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
        # return super(Figure, self).__repr__()
        return self.__str__()

    def merge(self, other):
        #Special handling for Chapters
        if isinstance(other.chapter, Chapter) and isinstance(self.chapter, Chapter):
            self.chapter.merge(other.chapter)

        #This might want to move to Chapter's merge()
        elif isinstance(other.chapter, Chapter) and not isinstance(self.chapter, Chapter):
            chapter_num = self.chapter
            self.chapter = other.chapter
            self.chapter.number = chapter_num

        return super(Figure, self).merge(other)
abuddenberg's avatar
abuddenberg committed
136
137


138
139
class Chapter(Gcisbase):
    def __init__(self, data):
140
        self.gcis_fields = ['report_identifier', 'identifier', 'number', 'url', 'title']
abuddenberg's avatar
abuddenberg committed
141

142
        super(Chapter, self).__init__(data, fields=self.gcis_fields)
abuddenberg's avatar
abuddenberg committed
143

144

145
class Image(Gcisbase):
146
    def __init__(self, data, local_path=None, remote_path=None):
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
        self.gcis_fields = ['attributes', 'create_dt', 'description', 'identifier', 'lat_max', 'lat_min', 'lon_max',
                            'uri', 'lon_min', 'position', 'submission_dt', 'time_end', 'time_start', 'title', 'href',
                            'usage_limits']

        self.translations = {
            'list_any_keywords_for_the_image': 'attributes',
            'when_was_this_image_created': 'create_dt',
            'what_is_the_image_id': 'identifier',
            'maximum_latitude': 'lat_max',
            'minimum_latitude': 'lat_min',
            'maximum_longitude': 'lon_max',
            'minimum_longitude': 'lon_min',
            'start_time': 'time_start',
            'end_time': 'time_end',
            'what_is_the_name_of_the_image_listed_in_the_report': 'title'
        }

        super(Image, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
abuddenberg's avatar
abuddenberg committed
165

166
167
        #Hack
        self.identifier = self.identifier.replace('/image/', '')
168

169
170
171
        self.local_path = local_path
        self.remote_path = remote_path

172
173
        #This does not accurately reflect GCIS' data model
        self.datasets = []
abuddenberg's avatar
abuddenberg committed
174

175
176
    def __str__(self):
        return 'Image: {id} {name}'.format(id=self.identifier, name=self.title)
177
178
179
180


class Dataset(Gcisbase):
    def __init__(self, data):
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
        self.gcis_fields = ['contributors', 'vertical_extent', 'native_id', 'href', 'references', 'cite_metadata',
                        'scale', 'publication_year', 'temporal_extent', 'version', 'parents', 'scope', 'type',
                        'processing_level', 'files', 'data_qualifier', 'access_dt', 'description', 'spatial_ref_sys',
                        'spatial_res', 'spatial_extent', 'doi', 'name', 'url', 'uri', 'identifier', 'release_dt',
                        'attributes']

        self.translations = {
            'data_set_access_date': 'access_dt',
            'data_set_publication_year': 'publication_year',
            'data_set_original_release_date': 'release_dt',
            # HACK elsewhere 'start_time and end_time': '',
            'data_set_id': 'native_id',
            # HACK elsewhere'': 'doi',
            # HACK elsewhere 'maximum_latitude etc. etc. etc.': '',
            'data_set_version': 'version',
            'data_set_name': 'name',
            'data_set_citation': 'cite_metadata',
            'data_set_description': 'description',
            # Not sure'': 'type',
            'data_set_location': 'url',
            'data_set_variables': 'attributes'
        }

        #This desperately needs to get added to the webform
        self._identifiers = {
206
207
            'Global Historical Climatology Network - Daily': 'ghcn-daily',
            'Global Historical Climatology Network - Monthly': 'ghcn-monthly',
208
            'NCDC Merged Land and Ocean Surface Temperature': 'MLOST',
209
            'Climate Division Database Version 2': 'cddv2',
210
211
212
213
214
215
216
217
            'Eighth degree-CONUS Daily Downscaled Climate Projections by Katharine Hayhoe': 'CMIP3-Downscaled', #Problem
            'Eighth degree-CONUS Daily Downscaled Climate Projections': 'CMIP3-Downscaled', #Problem
            'Earth Policy Institute Atmospheric Carbon Dioxide Concentration, 1000-2012': 'EPI-CO2',
            'Daily 1/8-degree gridded meteorological data [1 Jan 1949 - 31 Dec 2010]': 'Maurer',
            'NCEP/NCAR Reanalysis': 'NCEP-NCAR',
            'NCDC Global Surface Temperature Anomalies': 'NCDC-GST-Anomalies',
            'GRACE Static Field Geopotential Coefficients JPL Release 5.0 GSM': 'GRACE'
        }
218
219
220
        self._release_dt = None
        self._access_dt = None
        self._publication_year = None
221
222

        super(Dataset, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
223

abuddenberg's avatar
abuddenberg committed
224
225
        self.identifier = self._identifiers[self.name] if self.name in self._identifiers else self.name

226
    def __str__(self):
abuddenberg's avatar
abuddenberg committed
227
228
229
        return 'Dataset: {id} {name}'.format(id=self.identifier, name=self.name)

    def as_json(self, indent=0):
230
231
232
233
234
235
236
237
        return super(Dataset, self).as_json(omit_fields=['files', 'parents', 'contributors', 'references'])

    def merge(self, other):
        for k in self.__dict__:
            #If our copy of the field is empty or the other copy is longer, take that one.
            #TODO: Shoot myself for professional negligence.
            if hasattr(other, k) and (self.__dict__[k] in (None, '') or len(getattr(other, k)) > self.__dict__[k]):
                self.__dict__[k] = getattr(other, k)
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
            return self

    @property
    def release_dt(self):
        return self._release_dt

    @release_dt.setter
    def release_dt(self, value):
        try:
            self._release_dt = parse(value).isoformat() if value else None
        except TypeError:
            self._release_dt = None

    @property
    def access_dt(self):
        return self._access_dt

    @access_dt.setter
    def access_dt(self, value):
        try:
            self._access_dt = parse(value).isoformat() if value else None
        except TypeError:
            # print "Problem with date: " + self.access_dt
            self._access_dt = None

    @property
    def publication_year(self):
        return self._publication_year

    @publication_year.setter
    def publication_year(self, value):
        match = re.search('\d{4}', value) if value else None
        if match:
            self._publication_year = match.group()
        else:
            self._publication_year = None