Commit cedfa6f2 authored by Merret Buurman's avatar Merret Buurman
Browse files

Handle retrieval and parsing into handle module.

parent 72bd01db
import json
import logging
LOGGER = logging.getLogger(__name__)
LOGGER.addHandler(logging.NullHandler())
try:
CONFIG = json.loads(open('./landingpage/config.json').read())
except IOError:
LOGGER.info('No config found.')
CONFIG = {}
try:
DEFAULTS = json.loads(open('./landingpage/defaults.json').read())
except IOError:
msg = 'No default values found.'
LOGGER.error(msg)
#import os
#print(os.getcwd())
raise ValueError(msg)
{
"globalresolver" : "http://hdl.handle.net",
"path_restapi": "api/handles",
"special_types":["HS_ADMIN", "HS_VLIST", "HS_PUBKEY", "HS_SECKEY"]
}
\ No newline at end of file
import xml.etree.ElementTree as ET
from landingpage.config_and_defaults import DEFAULTS
import retrieval
###
### Parsing any cmip6 handle records
###
def get_aggregation_level(record):
return get_value_from_record(record, 'AGGREGATION_LEVEL')
###
### Parsing file handle records
###
# The following methods get the correct fields from the handle record
# They operate on the KEY of the handle record fields, so they
# depending strongly on the Handle Record dialect:
def get_filename_from_record(json_record):
return get_value_from_record(json_record, 'FILENAME')
def get_filesize_from_record(json_record):
return get_value_from_record(json_record, 'FILESIZE')
def get_checksum_from_record(json_record):
return get_value_from_record(json_record, 'CHECKSUM')
def get_checksum_method_from_record(json_record):
return get_value_from_record(json_record, 'CHECKSUM_METHOD')
def get_urls_original_from_record(json_record):
field_string = get_value_from_record(json_record, 'URL_ORIGINAL_DATA')
return _extract_url_info_from_field(field_string)
def get_urls_replicas_from_record(json_record):
field_string = get_value_from_record(json_record, 'URL_REPLICAS')
return _extract_url_info_from_field(field_string)
def _get_aggregation_field_from_record(json_record):
field_string = get_value_from_record(json_record, 'IS_PART_OF')
if field_string is None:
field_string = get_value_from_record(json_record, 'PARENT') # TODO Remove once migration is done!
return field_string
# The following methods operate on the VALUES of the
# handle record fields, so they also depend
# strongly on the Handle Record dialect:
def get_list_of_aggregation_handles_from_record(json_record):
field_string = _get_aggregation_field_from_record(json_record)
return field_string.split(';')
def _extract_url_info_from_field(field_string):
if field_string is None:
return None
else:
field_xml = ET.fromstring(field_string)
list_of_originals = []
temp_url_list = []
locations = field_xml.findall('location')
for location in locations:
url = location.get('href')
if url is not None and url not in temp_url_list:
temp_url_list.append(url)
host = location.get('host')
if host is None:
host = ''
list_of_originals.append(dict(
host=host,
url=url
))
return list_of_originals
def get_list_of_aggregation_records_from_record(list_of_aggregation_handles):
list_to_be_returned = []
any_replaced = False
for handle in list_of_aggregation_handles:
info, replaced = _extract_info_on_one_aggregation_record(handle)
list_to_be_returned.append(info)
if replaced:
any_replaced = True
return list_to_be_returned, any_replaced
def _extract_info_on_one_aggregation_record(handle):
# Retrieve handle:
json_record = retrieval.get_handle_record_json(handle)
# Get info:
aggregation_level = get_aggregation_level(json_record)
drs_id = get_drs_from_record(json_record)
vers_num = get_version_number_from_record(json_record)
newer_version = get_replaced_by_from_record(json_record)
replaced = False
if newer_version is not None and not newer_version == handle:
replaced = True
# Assemble info:
info_to_return = dict(
handle = handle,
level = aggregation_level,
drs = drs_id,
version = vers_num,
replaced = newer_version
)
return info_to_return, replaced
###
### Parsing dataset handle records
###
def get_drs_from_record(json_record):
return get_value_from_record(json_record, 'DRS_ID')
def get_version_number_from_record(json_record):
return get_value_from_record(json_record, 'VERSION_NUMBER')
def get_replaced_by_from_record(json_record):
return get_value_from_record(json_record, 'REPLACED_BY')
###
### Utils
###
# TODO Use pyhandle!
def get_value_from_record(record, key):
''' Returns the first occurrence as value.'''
values = []
for entry in record['values']:
if entry['type'] == key:
value = None
try:
value = entry['data']['value']
except KeyError:
value = entry['data']
return value
return None
def get_values_from_record(record, key):
''' Returns all occurrences in a list.'''
values = []
for entry in record['values']:
if entry['type'] == key:
value = None
try:
value = entry['data']['value']
except KeyError:
value = entry['data']
values.append(value)
return values
def get_all_entries_as_dict(record, include_special_types=True):
special_types = DEFAULTS['special_types']
record_dict = {}
for entry in record['values']:
key = entry['type']
if key in special_types and not include_special_types:
pass
else:
record_dict[key] = str(get_value_from_record(record, key))
return record_dict
\ No newline at end of file
import json
import requests
from landingpage.config_and_defaults import CONFIG, DEFAULTS
import logging
LOGGER = logging.getLogger(__name__)
LOGGER.addHandler(logging.NullHandler())
'''
This module retrieves handle records from
a handle server.
'''
def get_handle_record_json(handle):
full_url_with_handle = _get_resolve_url_for_handle(handle)
resp = requests.get(full_url_with_handle)
# TODO ONE DAY - MAKE THIS AJAX???
# StackOverflow
# http://stackoverflow.com/questions/7699796/how-do-you-get-django-to-make-a-restful-call
# My opinion is to get the client to make the requests where possible; making your webserver block for an external call isn't very scalable or nice in general. However, be aware that AJAX requests must be made to the same domain (you can't do cross domain AJAX requests without using iframes, jsonp, or other trickery).
# TODO USE PYHANDLE HERE TO CATCH ERRORS
record_json = json.loads(resp.content)
#values_dict = dict(
# handle=handle
#)
#for entry in entries:
# key = str(entry['type'])
# val = str(entry['data']['value'])
# values_dict[key] = val
return record_json
def _get_handle_server_from_config():
try:
handleserver = CONFIG['handleserver']
return handleserver
except KeyError:
return None
def _get_global_handle_resolver():
try:
val = DEFAULTS['globalresolver']
return val.strip('/')
except KeyError:
msg = 'No default URL for the global handle resolver found.'
LOGGER.error(msg)
raise ValueError(msg)
def _get_resolver_base_url():
handleserver = _get_handle_server_from_config()
if handleserver is None:
handleserver = _get_global_handle_resolver()
return handleserver.rstrip('/')
def _get_resolver_url():
base_url = _get_resolver_base_url()
path = DEFAULTS['path_restapi']
url = base_url.strip('/') +'/'+path
return url
def _get_resolve_url_for_handle(handle):
url = _get_resolver_url() + '/'+handle
return url
if False:
def check_handle_server_availability():
host = _get_resolver_base_url()
resp = Request.blank(host).get_response()
if resp.status_code == 200:
return True
else:
LOGGER.debug('Server not available, maybe? The call to the handle server at "{host}" returned status code "{code}"!'.format(
url=host,
code=resp.status_code
))
return False
def check_handle_existence(handle):
handle_server_url = _get_resolve_url_for_handle(handle)
resp = Request.blank(handle_server_url).get_response()
if resp.status_code == 200:
return True
elif resp.status_code == 404:
LOGGER.debug('The check for handle existence (handle {handle}), call to "{url}" returned code "404". The handle does not exist.'.format(
handle=handle,
url=handle_server_url
))
return False
else:
LOGGER.debug('The check for handle existence (handle {handle}), call to "{url}" returned code "{code}", which is strange.'.format(
handle=handle,
url=handle_server_url,
code=resp.status_code
))
return False
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment