domain.py 11.3 KB
Newer Older
abuddenberg's avatar
abuddenberg committed
1
2
from types import NoneType

abuddenberg's avatar
abuddenberg committed
3
4
__author__ = 'abuddenberg'

5
6
from copy import deepcopy
import json
abuddenberg's avatar
abuddenberg committed
7
from dateutil.parser import parse
8
import re
9
import inspect
abuddenberg's avatar
abuddenberg committed
10

11

abuddenberg's avatar
abuddenberg committed
12
class Gcisbase(object):
13
    def __init__(self, data, fields=[], trans={}):
14
15
16
17
        #Setup class variables
        self.gcis_fields = fields
        self.translations = trans

18
19
20
21
        #Save off a copy of the original JSON for debugging
        self.original = deepcopy(data)

        #Create attributes from the master list
22
        self. __dict__.update(dict.fromkeys(self.gcis_fields, None))
abuddenberg's avatar
abuddenberg committed
23

24
        #Perform translations
25
        for term in self.translations:
26
27
            val = data.pop(term, None)
            if val is not None:
28
                data[self.translations[term]] = val
29
30

        for k in data:
abuddenberg's avatar
abuddenberg committed
31
            if hasattr(self, k):
32
                try:
33
                    #Strip whitespace from strings for consistency
34
                    data[k] = data[k].strip()
35
36
37

                    #We now have unicode characters infesting our data.  I'm sure this is wrong.
                    data[k] = data[k].encode('utf-8')
38
39
40
41
                except AttributeError:
                    pass
                finally:
                    setattr(self, k, data[k])
abuddenberg's avatar
abuddenberg committed
42

43
    def merge(self, other):
44
45
46
47
48
49
        #This sucks
        attrs_we_care_about = [(attr, v) for attr, v in inspect.getmembers(self, lambda a: not (inspect.isroutine(a)))
                               if not attr.startswith('__')]

        for attr, value in attrs_we_care_about:
            if value in (None, '') and hasattr(other, attr):
50
51
                setattr(self, attr, getattr(other, attr))

52
53
        return self

54
55
    def as_json(self, indent=0, omit_fields=[]):
        out_fields = set(self.gcis_fields) - (set(['uri', 'href']) | set(omit_fields))
56
        return json.dumps({f: getattr(self, f) for f in out_fields}, indent=indent)
57

abuddenberg's avatar
abuddenberg committed
58
59

class Figure(Gcisbase):
60
    def __init__(self, data):
61
62
63
64
65
66
67
68
69
70
71
72
73
74
        self.gcis_fields = [
            'usage_limits', 'kindred_figures', 'time_end', 'keywords', 'lat_min', 'create_dt', 'lat_max', 'time_start',
            'title', 'ordinal', 'lon_min', 'report_identifier', 'chapter', 'submission_dt', 'uri', 'lon_max',
            'caption', 'source_citation', 'attributes', 'identifier', 'chapter_identifier', 'images'
        ]

        self.translations = {
            'what_is_the_figure_id': 'identifier',
            'what_is_the_name_of_the_figure_as_listed_in_the_report': 'title',
            'when_was_this_figure_created': 'create_dt',
            'what_is_the_chapter_and_figure_number': 'figure_num'
        }

        super(Figure, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
abuddenberg's avatar
abuddenberg committed
75
76

        #Special case for chapter
77
        chap_tree = data.pop('chapter', None)
78
        self.chapter = Chapter(chap_tree) if chap_tree else self.chapter
abuddenberg's avatar
abuddenberg committed
79
80

        #Special case for images
81
        image_list = data.pop('images', None)
82
        self.images = [Image(image) for image in image_list] if image_list else []
83

84
        #Hack
85
        self.identifier = self.identifier.replace('/figure/', '') if self.identifier != '' else '***ID MISSING***'
abuddenberg's avatar
abuddenberg committed
86
87
88

    @property
    def figure_num(self):
89
        if isinstance(self.chapter, Chapter) and self.chapter.number and self.ordinal:
abuddenberg's avatar
abuddenberg committed
90
91
            return '{0}.{1}'.format(self.chapter.number, self.ordinal)
        else:
92
            return '{0}.{1}'.format(self.chapter, self.ordinal)
93
94
95
96
97
98
99
100
101

    #TODO: Ordinal handling is unnecessarily complex
    @figure_num.setter
    def figure_num(self, value):
        try:
            chp, fig = value.split('.')
            chp = int(chp)
            fig = int(fig)
        except ValueError:
abuddenberg's avatar
abuddenberg committed
102
            print 'Invalid chapter/figure numbers: ' + value
103
104
105
            chp = None
            fig = None
        self.ordinal = fig
106

107
108
        #If we have an actual Chapter instance, populate it
        if isinstance(self.chapter, Chapter):
109
110
            self.chapter.number = chp
        else:
111
            self.chapter = chp
abuddenberg's avatar
abuddenberg committed
112

113
    def as_json(self, indent=0):
114
        return super(Figure, self).as_json(omit_fields=['images', 'chapter', 'kindred_figures', 'keywords'])
115

abuddenberg's avatar
abuddenberg committed
116
    def __str__(self):
117
        string = '{f_id}: Figure {f_num}: {f_name}\n\tImages: {imgs}'.format(
118
119
            f_id=self.identifier, f_num=self.figure_num, f_name=self.title, imgs=[i.identifier for i in self.images]
        )
120
        return string
abuddenberg's avatar
abuddenberg committed
121
122

    def __repr__(self):
123
124
125
126
        # return super(Figure, self).__repr__()
        return self.__str__()

    def merge(self, other):
127
        # Special handling for Chapters
128
129
130
131
132
133
134
135
136
137
        if isinstance(other.chapter, Chapter) and isinstance(self.chapter, Chapter):
            self.chapter.merge(other.chapter)

        #This might want to move to Chapter's merge()
        elif isinstance(other.chapter, Chapter) and not isinstance(self.chapter, Chapter):
            chapter_num = self.chapter
            self.chapter = other.chapter
            self.chapter.number = chapter_num

        return super(Figure, self).merge(other)
abuddenberg's avatar
abuddenberg committed
138
139


140
141
class Chapter(Gcisbase):
    def __init__(self, data):
142
        self.gcis_fields = ['report_identifier', 'identifier', 'number', 'url', 'title']
abuddenberg's avatar
abuddenberg committed
143

144
        super(Chapter, self).__init__(data, fields=self.gcis_fields)
abuddenberg's avatar
abuddenberg committed
145

146

147
class Image(Gcisbase):
148
    def __init__(self, data, local_path=None, remote_path=None):
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
        self.gcis_fields = ['attributes', 'create_dt', 'description', 'identifier', 'lat_max', 'lat_min', 'lon_max',
                            'uri', 'lon_min', 'position', 'submission_dt', 'time_end', 'time_start', 'title', 'href',
                            'usage_limits']

        self.translations = {
            'list_any_keywords_for_the_image': 'attributes',
            'when_was_this_image_created': 'create_dt',
            'what_is_the_image_id': 'identifier',
            'maximum_latitude': 'lat_max',
            'minimum_latitude': 'lat_min',
            'maximum_longitude': 'lon_max',
            'minimum_longitude': 'lon_min',
            'start_time': 'time_start',
            'end_time': 'time_end',
            'what_is_the_name_of_the_image_listed_in_the_report': 'title'
        }

        super(Image, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
abuddenberg's avatar
abuddenberg committed
167

168
169
        #Hack
        self.identifier = self.identifier.replace('/image/', '')
170

171
172
173
        self.local_path = local_path
        self.remote_path = remote_path

174
175
        #This does not accurately reflect GCIS' data model
        self.datasets = []
abuddenberg's avatar
abuddenberg committed
176

177
178
    def __str__(self):
        return 'Image: {id} {name}'.format(id=self.identifier, name=self.title)
179
180
181
182


class Dataset(Gcisbase):
    def __init__(self, data):
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
        self.gcis_fields = ['contributors', 'vertical_extent', 'native_id', 'href', 'references', 'cite_metadata',
                        'scale', 'publication_year', 'temporal_extent', 'version', 'parents', 'scope', 'type',
                        'processing_level', 'files', 'data_qualifier', 'access_dt', 'description', 'spatial_ref_sys',
                        'spatial_res', 'spatial_extent', 'doi', 'name', 'url', 'uri', 'identifier', 'release_dt',
                        'attributes']

        self.translations = {
            'data_set_access_date': 'access_dt',
            'data_set_publication_year': 'publication_year',
            'data_set_original_release_date': 'release_dt',
            # HACK elsewhere 'start_time and end_time': '',
            'data_set_id': 'native_id',
            # HACK elsewhere'': 'doi',
            # HACK elsewhere 'maximum_latitude etc. etc. etc.': '',
            'data_set_version': 'version',
            'data_set_name': 'name',
            'data_set_citation': 'cite_metadata',
            'data_set_description': 'description',
            # Not sure'': 'type',
            'data_set_location': 'url',
            'data_set_variables': 'attributes'
        }

        #This desperately needs to get added to the webform
        self._identifiers = {
208
209
            'Global Historical Climatology Network - Daily': 'ghcn-daily',
            'Global Historical Climatology Network - Monthly': 'ghcn-monthly',
210
            'NCDC Merged Land and Ocean Surface Temperature': 'MLOST',
211
            'Climate Division Database Version 2': 'cddv2',
212
213
214
215
216
217
            'Eighth degree-CONUS Daily Downscaled Climate Projections by Katharine Hayhoe': 'CMIP3-Downscaled', #Problem
            'Eighth degree-CONUS Daily Downscaled Climate Projections': 'CMIP3-Downscaled', #Problem
            'Earth Policy Institute Atmospheric Carbon Dioxide Concentration, 1000-2012': 'EPI-CO2',
            'Daily 1/8-degree gridded meteorological data [1 Jan 1949 - 31 Dec 2010]': 'Maurer',
            'NCEP/NCAR Reanalysis': 'NCEP-NCAR',
            'NCDC Global Surface Temperature Anomalies': 'NCDC-GST-Anomalies',
218
219
220
221
222
223
224
225
226
            'GRACE Static Field Geopotential Coefficients JPL Release 5.0 GSM': 'GRACE',
            'UW/NCDC Satellite Derived Hurricane Intensity Dataset': 'Hurricane-Intensity',
            'Bias-Corrected and Spatially Downscaled Surface Water Projections Hydrologic Data': 'Water-Projections',
            'International Best Track Archive for Climate Stewardship (IBTrACS)': 'IBTrACS',
            'the World Climate Research Programme\'s (WCRP\'s) Coupled Model Intercomparison Project phase 3 (CMIP3) multi-model dataset': 'CMIP3',
            'North American Regional Climate Change Assessment Program dataset': 'NARCCAP',
            'Gridded Population of the World Version 3 (GPWv3): Population Count Grid': 'GPWv3'


227
        }
228
229

        #Private attributes for handling date parsing
230
231
232
        self._release_dt = None
        self._access_dt = None
        self._publication_year = None
233

234
235
236
237
        #These do not accurately reflect GCIS' data model
        self.note = None
        self.activity = None

238
        super(Dataset, self).__init__(data, fields=self.gcis_fields, trans=self.translations)
239

abuddenberg's avatar
abuddenberg committed
240
241
        self.identifier = self._identifiers[self.name] if self.name in self._identifiers else self.name

242
    def __str__(self):
abuddenberg's avatar
abuddenberg committed
243
244
245
        return 'Dataset: {id} {name}'.format(id=self.identifier, name=self.name)

    def as_json(self, indent=0):
246
247
248
249
250
251
252
253
        return super(Dataset, self).as_json(omit_fields=['files', 'parents', 'contributors', 'references'])

    def merge(self, other):
        for k in self.__dict__:
            #If our copy of the field is empty or the other copy is longer, take that one.
            #TODO: Shoot myself for professional negligence.
            if hasattr(other, k) and (self.__dict__[k] in (None, '') or len(getattr(other, k)) > self.__dict__[k]):
                self.__dict__[k] = getattr(other, k)
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
            return self

    @property
    def release_dt(self):
        return self._release_dt

    @release_dt.setter
    def release_dt(self, value):
        try:
            self._release_dt = parse(value).isoformat() if value else None
        except TypeError:
            self._release_dt = None

    @property
    def access_dt(self):
        return self._access_dt

    @access_dt.setter
    def access_dt(self, value):
        try:
            self._access_dt = parse(value).isoformat() if value else None
        except TypeError:
            # print "Problem with date: " + self.access_dt
            self._access_dt = None

    @property
    def publication_year(self):
        return self._publication_year

    @publication_year.setter
    def publication_year(self, value):
        match = re.search('\d{4}', value) if value else None
        if match:
            self._publication_year = match.group()
        else:
            self._publication_year = None