cmip6.py 5.17 KB
Newer Older
1
from django.template import loader
2
3
4
import requests
import json
import xml.etree.ElementTree as ET
5

6
def get_html_string(request, prefix, suffix):
7
8
9
10
11

    handle_values, json_record = _get_handle_values(prefix+'/'+suffix)
    agg_level = _extract_agg_level(handle_values)
    context = _get_handle_info_depending_on_aggregation_level(agg_level, handle_values, json_record)
    template = _get_template_depending_on_aggregation_level(agg_level)
12
    return template.render(context, request)
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149

def _get_template_depending_on_aggregation_level(aggregation_level):
    if aggregation_level.lower() == 'file':
        return loader.get_template('landingpage/cmip6_file.html')
    elif aggregation_level.lower() == 'dataset':
        return loader.get_template('landingpage/cmip6_dataset.html')
    else:
        return loader.get_template('landingpage/cmip6_fallback.html')


def _get_handle_values(handle):
    resp = requests.get('https://hdl.handle.net/api/handles/'+handle)
    # TODO Put URL in config
    # TODO ONE DAY - MAKE THIS AJAX??? 
    # StackOverflow
    # http://stackoverflow.com/questions/7699796/how-do-you-get-django-to-make-a-restful-call
    # My opinion is to get the client to make the requests where possible; making your webserver block for an external call isn't very scalable or nice in general. However, be aware that AJAX requests must be made to the same domain (you can't do cross domain AJAX requests without using iframes, jsonp, or other trickery). 

    # TODO USE PYHANDLE HERE TO CATCH ERRORS
    entries = json.loads(resp.content)['values']

    values_dict = dict(
        handle=handle
    )
    for entry in entries:
        key = str(entry['type'])
        val = str(entry['data']['value'])
        values_dict[key] = val
    return values_dict, entries

def _get_handle_info_depending_on_aggregation_level(agg_level, values, json_record):
    if agg_level.lower() == 'file':
        return _get_handle_info_for_file(values, json_record)
    elif agg_level.lower() == 'dataset':
        return _get_handle_info_for_dataset(values, json_record)
    else:
        return _get_dict_generic(values, json_record)

def _extract_agg_level(values_dict):
    return values_dict['AGGREGATION_LEVEL']

def _get_handle_info_for_file(values, json_record):

    context = {}

    if 'FILENAME' in values:
        context['file_name'] = values['FILENAME']

    if 'FILESIZE' in values:
        context['file_size'] = values['FILESIZE']

    if 'CHECKSUM' in values:
        context['checksum'] = values['CHECKSUM']

    if 'CHECKSUM_METHOD' in values:
        context['checksum_method'] = values['CHECKSUM_METHOD']

    if 'URL_ORIGINAL_DATA' in values:
        field_string = values['URL_ORIGINAL_DATA']
        list_of_originals = _extract_url_info_from_field(field_string)
        context['urls_original'] = list_of_originals

    if 'URL_REPLICAS' in values:
        context['urls_replicas'] = values['URL_REPLICAS']

    if 'IS_PART_OF' in values or 'PARENT' in values: # TODO remove 'PARENT'
        if 'IS_PART_OF' in values:
            parent_list = values['IS_PART_OF'].split(';')
            context['parents_list'] = parent_list
            infodict, any_replaced = _get_parent_info(parent_list)
            context['parents_info'] = infodict
            context['any_parent_replaced'] = any_replaced
        elif 'PARENT' in values:
            parent_list = values['PARENT'].split(';')
            context['parents_list'] = parent_list
            infodict, any_replaced = _get_parent_info(parent_list)
            context['parents_info'] = infodict
            context['any_parent_replaced'] = any_replaced

    context['content'] = json_record
    context['values'] = values
    context['handle'] = values['handle']

    # TODO add replicas

    return context

def _extract_url_info_from_field(field_string):
    field_xml = ET.fromstring(field_string)
    list_of_originals = []
    temp_url_list = []
    locations = field_xml.findall('location')
    for location in locations:
        url = location.get('href')
        if url not in temp_url_list:
            temp_url_list.append(url)
            list_of_originals.append(dict(
                host=location.get('host'),
                url=url
            ))
    return list_of_originals


def _get_handle_info_for_dataset(values):
    return {}

def _get_dict_generic(values):
    return {}

def _get_parent_info(list_of_parent_handles):

    list_of_parent_info = []
    any_replaced = False
    for handle in list_of_parent_handles:
        
        # Gather info:
        handle_values, temp = _get_handle_values(handle)
        aggregation_level = handle_values['AGGREGATION_LEVEL'].lower()
        drs_id = handle_values['DRS_ID']
        vers_num = handle_values['VERSION_NUMBER']
        replaced = False
        if 'REPLACED_BY' in handle_values:
            newer = handle_values['REPLACED_BY']
            any_replaced = True
            if not newer == handle:
                replaced = True

        # Assemble info:
        info = dict(
            handle = handle,
            level = aggregation_level,
            drs = drs_id,
            version = vers_num,
            replaced = replaced
        )
        list_of_parent_info.append(info)
    return list_of_parent_info, any_replaced