Commit 6bf99474 authored by Merret Buurman's avatar Merret Buurman
Browse files

Put the controlled vocabularies and handle entry keys into separator file.

parent 58a8eed9
# Keys that occur in CMIP6 handle records:
KEYS = dict(
agg_level = 'AGGREGATION_LEVEL',
superpart = 'IS_PART_OF',
errata_ids = 'ERRATA_IDS',
file_name = 'FILE_NAME',
file_size = 'FILE_SIZE',
checksum = 'CHECKSUM',
checksum_method = 'CHECKSUM_METHOD',
url_orig = 'URL_ORIGINAL_DATA',
url_replica = 'URL_REPLICAS',
subparts = 'HAS_PARTS',
replica_nodes = 'REPLICA_NODES',
data_node = 'HOSTING_NODE',
drs_id = 'DRS_ID',
vers_num = 'VERSION_NUMBER',
replaced_by = 'REPLACED_BY'
)
# List separator
LIST_SEPARATOR = ';'
# Controlled vocabularies:
LEVELS = dict(
file = 'FILE',
dataset = 'DATASET'
)
XML_ATTRIBUTES = dict(
location = 'location',
host = 'host',
href = 'href'
)
\ No newline at end of file
import xml.etree.ElementTree as ET
from landingpage.landingpageapp.config_and_defaults import DEFAULTS
from landingpage.landingpageapp.handle.cmip6_vocabulary import KEYS as KEYS
from landingpage.landingpageapp.handle.cmip6_vocabulary import LEVELS as LEVELS
from landingpage.landingpageapp.handle.cmip6_vocabulary import LIST_SEPARATOR as SEPARATOR
from landingpage.landingpageapp.handle.cmip6_vocabulary import XML_ATTRIBUTES as XML_ATTRIBUTES
import retrieval
###
......@@ -92,71 +96,71 @@ def _get_title_from_record(record):
return None
def _get_aggregation_level(record):
return get_value_from_record(record, 'AGGREGATION_LEVEL')
return get_value_from_record(record, KEYS['agg_level'])
def _is_file(record):
if get_aggregation_level(record) == 'FILE':
if get_aggregation_level(record) == LEVELS['file']:
return True
def _is_dataset(record):
if get_aggregation_level(record) == 'DATASET':
if get_aggregation_level(record) == LEVELS['dataset']:
return True
def _get_aggregation_field_from_record(record):
field_string = get_value_from_record(record, 'IS_PART_OF')
field_string = get_value_from_record(record, KEYS['superpart'])
if field_string is None:
field_string = get_value_from_record(record, 'PARENT') # TODO Remove once migration is done!
return field_string
def _get_list_of_aggregation_handles_from_record(record):
field_string = get_aggregation_field_from_record(record)
list_of_strings = field_string.split(';')
list_of_strings = field_string.split(SEPARATOR)
handles = []
for member in list_of_strings:
handles.append(_remove_hdl_from_handle(member))
return handles
def _get_errata_field_from_record(record):
return get_value_from_record(record, 'ERRATA_IDS')
return get_value_from_record(record, KEYS['errata_ids'])
def _get_errata_list_from_record(record):
field_string = get_errata_field_from_record(record)
if field_string is not None:
return field_string.split(';')
return field_string.split(SEPARATOR)
else:
return None
def _get_filename_from_record(record):
return get_value_from_record(record, 'FILE_NAME')
return get_value_from_record(record, KEYS['file_name'])
def _get_filesize_from_record(record):
return get_value_from_record(record, 'FILE_SIZE')
return get_value_from_record(record, KEYS['file_size'])
def _get_checksum_from_record(record):
return get_value_from_record(record, 'CHECKSUM')
return get_value_from_record(record, KEYS['checksum'])
def _get_checksum_method_from_record(record):
return get_value_from_record(record, 'CHECKSUM_METHOD')
return get_value_from_record(record, KEYS['checksum_method'])
def _get_urls_original_from_record(record):
field_string = get_value_from_record(record, 'URL_ORIGINAL_DATA')
field_string = get_value_from_record(record, KEYS['url_orig'])
return _parse_urls_original_from_record(field_string)
def _get_urls_replicas_from_record(record):
field_string = get_value_from_record(record, 'URL_REPLICAS')
field_string = get_value_from_record(record, KEYS['url_replica'])
return _parse_urls_replicas_from_record(field_string)
# Helpers:
def _parse_urls_original_from_record(field_string):
parsed = _extract_url_info_from_field(field_string, 'href', 'host')
parsed = _extract_url_info_from_field(field_string, XML_ATTRIBUTES['href'], XML_ATTRIBUTES['host'])
if parsed is not None:
parsed = _rename_href_to_url(parsed)
parsed = _remove_duplicate_urls(parsed)
return parsed
def _parse_urls_replicas_from_record(field_string):
parsed = _extract_url_info_from_field(field_string, 'href', 'host')
parsed = _extract_url_info_from_field(field_string, XML_ATTRIBUTES['href'], XML_ATTRIBUTES['host'])
if parsed is not None:
parsed = _rename_href_to_url(parsed)
parsed = _remove_duplicate_urls(parsed)
......@@ -166,8 +170,8 @@ def _rename_href_to_url(list_of_dicts):
''' Helper for the list of dict that is the result of parsing
a locations xml snippet.'''
for item in list_of_dicts:
item['url'] = item['href']
del item['href']
item['url'] = item[XML_ATTRIBUTES['href']]
del item[XML_ATTRIBUTES['href']]
return list_of_dicts
def _remove_duplicate_urls(list_of_dicts):
......@@ -191,7 +195,7 @@ def _extract_url_info_from_field(field_string, *attributes):
else:
field_xml = ET.fromstring(field_string)
list_of_items = []
locations = field_xml.findall('location')
locations = field_xml.findall(XML_ATTRIBUTES['location'])
for location in locations:
temp = {}
for attr in attributes:
......@@ -250,35 +254,35 @@ def _extract_info_on_one_aggregation_record(parenthandle):
###
def _get_drs_name_from_record(record):
return get_value_from_record(record, 'DRS_ID')
return get_value_from_record(record, KEYS['drs_id'])
def _get_version_number_from_record(record):
return get_value_from_record(record, 'VERSION_NUMBER')
return get_value_from_record(record, KEYS['vers_num'])
def _get_replaced_by_from_record(record):
value = get_value_from_record(record, 'REPLACED_BY')
value = get_value_from_record(record, KEYS['replaced_by'])
handle = _remove_hdl_from_handle(value)
return handle
def _get_hosts_original_from_record(record):
field_string = get_value_from_record(record, 'HOSTING_NODE')
parsed = _extract_url_info_from_field(field_string, 'host')
field_string = get_value_from_record(record, KEYS['data_node'])
parsed = _extract_url_info_from_field(field_string, XML_ATTRIBUTES['host'])
return parsed
def _get_hosts_replicas_from_record(record):
field_string = get_value_from_record(record, 'REPLICA_NODE')
parsed = _extract_url_info_from_field(field_string, 'host')
field_string = get_value_from_record(record, KEYS['replica_nodes'])
parsed = _extract_url_info_from_field(field_string, XML_ATTRIBUTES['host'])
return parsed
def _get_parts_field_from_record(record):
field_string = get_value_from_record(record, 'HAS_PARTS')
field_string = get_value_from_record(record, KEYS['subparts'])
if field_string is None:
field_string = get_value_from_record(record, 'CHILDREN') # TODO Remove once migration is done!
return field_string
def _get_list_of_parts_handles_from_record(record):
field_string = _get_parts_field_from_record(record)
list_of_strings = field_string.split(';')
list_of_strings = field_string.split(SEPARATOR)
handles = []
for string in list_of_strings:
handles.append(_remove_hdl_from_handle(string))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment