Commit 39c52d15 authored by Merret Buurman's avatar Merret Buurman
Browse files

Restructuring cmip6parser module.

parent fce4d8cf
......@@ -3,21 +3,92 @@ from landingpage.config_and_defaults import DEFAULTS
import retrieval
###
### Parsing any cmip6 handle records
### API
###
# Parsing any cmip6 handle records
def get_all_entries_as_dict(record, include_special_types=True):
return _get_all_entries_as_dict(record, include_special_types)
def get_aggregation_level(record):
return get_value_from_record(record, 'AGGREGATION_LEVEL')
return _get_aggregation_level(record)
def is_file(record):
return _is_file(record)
def is_dataset(record):
return _is_dataset(record)
def get_title_from_record(record):
return _get_title_from_record(record)
def get_aggregation_field_from_record(record):
return _get_aggregation_field_from_record(record)
# File handle records
def get_filename_from_record(record):
return _get_filename_from_record(record)
def get_filesize_from_record(record):
return _get_filesize_from_record(record)
def get_checksum_from_record(record):
return _get_checksum_from_record(record)
def get_checksum_method_from_record(record):
return _get_checksum_method_from_record(record)
def get_urls_original_from_record(record):
return _get_urls_original_from_record(record)
def get_urls_replicas_from_record(record):
return _get_urls_replicas_from_record(record)
def get_list_of_aggregation_handles_from_record(record):
return _get_list_of_aggregation_handles_from_record(record)
def get_list_of_aggregation_records_from_record(record):
return _get_list_of_aggregation_records_from_record(record)
# Dataset handle records
def get_drs_name_from_record(record):
return _get_drs_name_from_record(record)
def get_version_number_from_record(record):
return _get_version_number_from_record(record)
def get_replaced_by_from_record(record):
return _get_replaced_by_from_record(record)
def get_hosts_original_from_record(record):
return _get_hosts_original_from_record(record)
def get_hosts_replicas_from_record(record):
return _get_hosts_replicas_from_record(record)
def get_list_of_parts_handles_from_record(record):
return _get_list_of_parts_handles_from_record(record)
def get_list_of_parts_records_from_record(record):
return _get_list_of_parts_records_from_record(record)
# Actual implementation and helpers
def _get_aggregation_level(record):
return get_value_from_record(record, 'AGGREGATION_LEVEL')
def _is_file(record):
if get_aggregation_level(record) == 'FILE':
return True
def is_dataset(record):
def _is_dataset(record):
if get_aggregation_level(record) == 'DATASET':
return True
def get_title_from_record(record):
def _get_title_from_record(record):
if is_file(record):
return get_filename_from_record(record)
elif is_dataset(record):
......@@ -25,42 +96,31 @@ def get_title_from_record(record):
else:
return None
###
### Parsing file handle records
###
# The following methods get the correct fields from the handle record
# They operate on the KEY of the handle record fields, so they
# depending strongly on the Handle Record dialect:
def _get_aggregation_field_from_record(record):
field_string = get_value_from_record(record, 'IS_PART_OF')
if field_string is None:
field_string = get_value_from_record(record, 'PARENT') # TODO Remove once migration is done!
return field_string
def get_filename_from_record(json_record):
return get_value_from_record(json_record, 'FILENAME')
def _get_filename_from_record(record):
return get_value_from_record(record, 'FILENAME')
def get_filesize_from_record(json_record):
return get_value_from_record(json_record, 'FILESIZE')
def _get_filesize_from_record(record):
return get_value_from_record(record, 'FILESIZE')
def get_checksum_from_record(json_record):
return get_value_from_record(json_record, 'CHECKSUM')
def _get_checksum_from_record(record):
return get_value_from_record(record, 'CHECKSUM')
def get_checksum_method_from_record(json_record):
return get_value_from_record(json_record, 'CHECKSUM_METHOD')
def _get_checksum_method_from_record(record):
return get_value_from_record(record, 'CHECKSUM_METHOD')
def get_urls_original_from_record(json_record):
field_string = get_value_from_record(json_record, 'URL_ORIGINAL_DATA')
parsed = _extract_url_info_from_field(field_string, 'href', 'host')
if parsed is not None:
parsed = _rename_href_to_url(parsed)
parsed = _remove_duplicate_urls(parsed)
return parsed
def get_urls_replicas_from_record(json_record):
field_string = get_value_from_record(json_record, 'URL_REPLICAS')
parsed = _extract_url_info_from_field(field_string, 'href', 'host')
if parsed is not None:
parsed = _rename_href_to_url(parsed)
parsed = _remove_duplicate_urls(parsed)
return parsed
def _get_urls_original_from_record(record):
field_string = get_value_from_record(record, 'URL_ORIGINAL_DATA')
return _parse_urls_original_from_record(field_string)
def _get_urls_replicas_from_record(record):
field_string = get_value_from_record(record, 'URL_REPLICAS')
return _parse_urls_replicas_from_record(field_string)
def _rename_href_to_url(list_of_dicts):
''' Helper for the list of dict that is the result of parsing
......@@ -85,20 +145,41 @@ def _remove_duplicate_urls(list_of_dicts):
new_list_of_dicts.append(item)
return new_list_of_dicts
def _get_aggregation_field_from_record(json_record):
field_string = get_value_from_record(json_record, 'IS_PART_OF')
if field_string is None:
field_string = get_value_from_record(json_record, 'PARENT') # TODO Remove once migration is done!
return field_string
def _parse_urls_original_from_record(field_string):
parsed = _extract_url_info_from_field(field_string, 'href', 'host')
if parsed is not None:
parsed = _rename_href_to_url(parsed)
parsed = _remove_duplicate_urls(parsed)
return parsed
# The following methods operate on the VALUES of the
# handle record fields, so they also depend
# strongly on the Handle Record dialect:
def _parse_urls_replicas_from_record(field_string):
parsed = _extract_url_info_from_field(field_string, 'href', 'host')
if parsed is not None:
parsed = _rename_href_to_url(parsed)
parsed = _remove_duplicate_urls(parsed)
return parsed
def get_list_of_aggregation_handles_from_record(json_record):
field_string = _get_aggregation_field_from_record(json_record)
def _get_list_of_aggregation_handles_from_record(record):
field_string = get_aggregation_field_from_record(record)
return field_string.split(';')
def _get_list_of_aggregation_records_from_record(record):
list_of_aggregation_handles = get_list_of_aggregation_handles_from_record(record)
list_of_infos, any_replaced = _get_list_of_aggregation_records_from_record(list_of_aggregation_handles)
return list_of_infos, any_replaced
def _get_list_of_aggregation_records_from_record(list_of_aggregation_handles):
list_to_be_returned = []
any_replaced = False
for handle in list_of_aggregation_handles:
info, replaced = _extract_info_on_one_aggregation_record(handle)
list_to_be_returned.append(info)
if replaced:
any_replaced = True
return list_to_be_returned, any_replaced
def _extract_url_info_from_field(field_string, *attributes):
if field_string is None:
return None
......@@ -115,29 +196,16 @@ def _extract_url_info_from_field(field_string, *attributes):
list_of_items.append(temp)
return list_of_items
def get_list_of_aggregation_records_from_record(list_of_aggregation_handles):
list_to_be_returned = []
any_replaced = False
for handle in list_of_aggregation_handles:
info, replaced = _extract_info_on_one_aggregation_record(handle)
list_to_be_returned.append(info)
if replaced:
any_replaced = True
return list_to_be_returned, any_replaced
def _extract_info_on_one_aggregation_record(handle):
# Retrieve handle:
json_record = retrieval.get_handle_record_json(handle)
record = retrieval.get_handle_record_json(handle)
# Get info:
aggregation_level = get_aggregation_level(json_record)
drs_id = get_drs_name_from_record(json_record)
vers_num = get_version_number_from_record(json_record)
newer_version = get_replaced_by_from_record(json_record)
aggregation_level = get_aggregation_level(record)
drs_id = get_drs_name_from_record(record)
vers_num = get_version_number_from_record(record)
newer_version = get_replaced_by_from_record(record)
replaced = False
if newer_version is not None and not newer_version == handle:
replaced = True
......@@ -145,7 +213,7 @@ def _extract_info_on_one_aggregation_record(handle):
# Assemble info:
info_to_return = dict(
handle = handle,
level = aggregation_level,
level = aggregation_level.lower(),
title = drs_id,
version = vers_num,
replaced = newer_version
......@@ -156,30 +224,31 @@ def _extract_info_on_one_aggregation_record(handle):
### Parsing dataset handle records
###
def get_drs_name_from_record(json_record):
return get_value_from_record(json_record, 'DRS_ID')
def _get_drs_name_from_record(record):
return get_value_from_record(record, 'DRS_ID')
def get_version_number_from_record(json_record):
return get_value_from_record(json_record, 'VERSION_NUMBER')
def _get_version_number_from_record(record):
return get_value_from_record(record, 'VERSION_NUMBER')
def get_replaced_by_from_record(json_record):
return get_value_from_record(json_record, 'REPLACED_BY')
def _get_replaced_by_from_record(record):
return get_value_from_record(record, 'REPLACED_BY')
def get_hosts_original_from_record(json_record):
field_string = get_value_from_record(json_record, 'HOSTING_NODE')
def _get_hosts_original_from_record(record):
field_string = get_value_from_record(record, 'HOSTING_NODE')
parsed = _extract_url_info_from_field(field_string, 'host')
return parsed
def get_hosts_replicas_from_record(json_record):
field_string = get_value_from_record(json_record, 'REPLICA_NODE')
def _get_hosts_replicas_from_record(record):
field_string = get_value_from_record(record, 'REPLICA_NODE')
parsed = _extract_url_info_from_field(field_string, 'host')
return parsed
def get_list_of_parts_handles_from_record(json_record):
field_string = _get_parts_field_from_record(json_record)
def _get_list_of_parts_handles_from_record(record):
field_string = _get_parts_field_from_record(record)
return field_string.split(';')
def get_list_of_parts_records_from_record(list_of_parts_handles):
def _get_list_of_parts_records_from_record(record):
list_of_parts_handles = _get_list_of_parts_handles_from_record(record)
list_to_be_returned = []
any_replaced = False
for handle in list_of_parts_handles:
......@@ -192,13 +261,13 @@ def get_list_of_parts_records_from_record(list_of_parts_handles):
def _extract_info_on_one_part_record(handle):
# Retrieve handle:
json_record = retrieval.get_handle_record_json(handle)
record = retrieval.get_handle_record_json(handle)
# Get info:
aggregation_level = get_aggregation_level(json_record)
title = get_title_from_record(json_record)
vers_num = get_version_number_from_record(json_record)
newer_version = get_replaced_by_from_record(json_record)
aggregation_level = get_aggregation_level(record)
title = get_title_from_record(record)
vers_num = get_version_number_from_record(record)
newer_version = get_replaced_by_from_record(record)
replaced = False
if newer_version is not None and not newer_version == handle:
replaced = True
......@@ -216,19 +285,13 @@ def _extract_info_on_one_part_record(handle):
return info_to_return, replaced
def _get_parts_field_from_record(json_record):
field_string = get_value_from_record(json_record, 'HAS_PARTS')
def _get_parts_field_from_record(record):
field_string = get_value_from_record(record, 'HAS_PARTS')
if field_string is None:
field_string = get_value_from_record(json_record, 'CHILDREN') # TODO Remove once migration is done!
field_string = get_value_from_record(record, 'CHILDREN') # TODO Remove once migration is done!
return field_string
###
### Utils
###
# TODO Use pyhandle!
def get_value_from_record(record, key):
''' Returns the first occurrence as value.'''
values = []
......@@ -255,7 +318,7 @@ def get_values_from_record(record, key):
values.append(value)
return values
def get_all_entries_as_dict(record, include_special_types=True):
def _get_all_entries_as_dict(record, include_special_types=True):
special_types = DEFAULTS['special_types']
record_dict = {}
for entry in record['values']:
......
......@@ -56,7 +56,7 @@ def _get_context(json_record):
val1 = parser.get_list_of_parts_handles_from_record(json_record)
if val1 is not None:
context['list_of_parts_handles'] = val1
val2,val3 = parser.get_list_of_parts_records_from_record(val1)
val2,val3 = parser.get_list_of_parts_records_from_record(json_record)
if val2 is not None:
context['list_of_parts_records'] = val2
context['any_part_replaced'] = val3
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment