Commit 58a8eed9 authored by Merret Buurman's avatar Merret Buurman
Browse files

Changed order of methods in parser, added comments.

parent 70329e80
......@@ -83,6 +83,14 @@ def get_errata_field_from_record(record):
# Actual implementation and helpers
def _get_title_from_record(record):
if is_file(record):
return get_filename_from_record(record)
elif is_dataset(record):
return get_drs_from_record(record)
else:
return None
def _get_aggregation_level(record):
return get_value_from_record(record, 'AGGREGATION_LEVEL')
......@@ -94,23 +102,30 @@ def _is_dataset(record):
if get_aggregation_level(record) == 'DATASET':
return True
def _get_title_from_record(record):
if is_file(record):
return get_filename_from_record(record)
elif is_dataset(record):
return get_drs_from_record(record)
else:
return None
def _get_aggregation_field_from_record(record):
field_string = get_value_from_record(record, 'IS_PART_OF')
if field_string is None:
field_string = get_value_from_record(record, 'PARENT') # TODO Remove once migration is done!
return field_string
def _get_list_of_aggregation_handles_from_record(record):
field_string = get_aggregation_field_from_record(record)
list_of_strings = field_string.split(';')
handles = []
for member in list_of_strings:
handles.append(_remove_hdl_from_handle(member))
return handles
def _get_errata_field_from_record(record):
return get_value_from_record(record, 'ERRATA_IDS')
def _get_errata_list_from_record(record):
field_string = get_errata_field_from_record(record)
if field_string is not None:
return field_string.split(';')
else:
return None
def _get_filename_from_record(record):
return get_value_from_record(record, 'FILE_NAME')
......@@ -131,6 +146,22 @@ def _get_urls_replicas_from_record(record):
field_string = get_value_from_record(record, 'URL_REPLICAS')
return _parse_urls_replicas_from_record(field_string)
# Helpers:
def _parse_urls_original_from_record(field_string):
parsed = _extract_url_info_from_field(field_string, 'href', 'host')
if parsed is not None:
parsed = _rename_href_to_url(parsed)
parsed = _remove_duplicate_urls(parsed)
return parsed
def _parse_urls_replicas_from_record(field_string):
parsed = _extract_url_info_from_field(field_string, 'href', 'host')
if parsed is not None:
parsed = _rename_href_to_url(parsed)
parsed = _remove_duplicate_urls(parsed)
return parsed
def _rename_href_to_url(list_of_dicts):
''' Helper for the list of dict that is the result of parsing
a locations xml snippet.'''
......@@ -154,34 +185,22 @@ def _remove_duplicate_urls(list_of_dicts):
new_list_of_dicts.append(item)
return new_list_of_dicts
def _parse_urls_original_from_record(field_string):
parsed = _extract_url_info_from_field(field_string, 'href', 'host')
if parsed is not None:
parsed = _rename_href_to_url(parsed)
parsed = _remove_duplicate_urls(parsed)
return parsed
def _get_errata_list_from_record(record):
field_string = get_errata_field_from_record(record)
if field_string is not None:
return field_string.split(';')
else:
def _extract_url_info_from_field(field_string, *attributes):
if field_string is None:
return None
else:
field_xml = ET.fromstring(field_string)
list_of_items = []
locations = field_xml.findall('location')
for location in locations:
temp = {}
for attr in attributes:
val = location.get(attr)
if val is not None:
temp[attr] = val
list_of_items.append(temp)
return list_of_items
def _parse_urls_replicas_from_record(field_string):
parsed = _extract_url_info_from_field(field_string, 'href', 'host')
if parsed is not None:
parsed = _rename_href_to_url(parsed)
parsed = _remove_duplicate_urls(parsed)
return parsed
def _get_list_of_aggregation_handles_from_record(record):
field_string = get_aggregation_field_from_record(record)
list_of_strings = field_string.split(';')
handles = []
for member in list_of_strings:
handles.append(_remove_hdl_from_handle(member))
return handles
# Retrieve info on parent dataset from handle system:
......@@ -202,26 +221,10 @@ def _get_list_of_aggregation_records_from_record(list_of_aggregation_handles):
return list_to_be_returned, any_replaced
def _extract_url_info_from_field(field_string, *attributes):
if field_string is None:
return None
else:
field_xml = ET.fromstring(field_string)
list_of_items = []
locations = field_xml.findall('location')
for location in locations:
temp = {}
for attr in attributes:
val = location.get(attr)
if val is not None:
temp[attr] = val
list_of_items.append(temp)
return list_of_items
def _extract_info_on_one_aggregation_record(handle):
def _extract_info_on_one_aggregation_record(parenthandle):
# Retrieve handle:
record = retrieval.get_handle_record_json(handle)
record = retrieval.get_handle_record_json(parenthandle)
# Get info:
aggregation_level = get_aggregation_level(record)
......@@ -229,12 +232,12 @@ def _extract_info_on_one_aggregation_record(handle):
vers_num = get_version_number_from_record(record)
newer_version = get_replaced_by_from_record(record)
replaced = False
if newer_version is not None and not newer_version == handle:
if newer_version is not None and not newer_version == parenthandle:
replaced = True
# Assemble info:
info_to_return = dict(
handle = handle,
handle = parenthandle,
level = aggregation_level.lower(),
title = drs_id,
version = vers_num,
......@@ -267,6 +270,12 @@ def _get_hosts_replicas_from_record(record):
parsed = _extract_url_info_from_field(field_string, 'host')
return parsed
def _get_parts_field_from_record(record):
field_string = get_value_from_record(record, 'HAS_PARTS')
if field_string is None:
field_string = get_value_from_record(record, 'CHILDREN') # TODO Remove once migration is done!
return field_string
def _get_list_of_parts_handles_from_record(record):
field_string = _get_parts_field_from_record(record)
list_of_strings = field_string.split(';')
......@@ -275,6 +284,8 @@ def _get_list_of_parts_handles_from_record(record):
handles.append(_remove_hdl_from_handle(string))
return handles
# Retrieve info on child files from handle system:
def _get_list_of_parts_records_from_record(record):
list_of_parts_handles = _get_list_of_parts_handles_from_record(record)
list_to_be_returned = []
......@@ -312,13 +323,7 @@ def _extract_info_on_one_part_record(handle):
return info_to_return, replaced
def _get_parts_field_from_record(record):
field_string = get_value_from_record(record, 'HAS_PARTS')
if field_string is None:
field_string = get_value_from_record(record, 'CHILDREN') # TODO Remove once migration is done!
return field_string
# Parsing helpers
def get_value_from_record(record, key):
''' Returns the first occurrence as value.'''
......@@ -357,7 +362,7 @@ def _get_all_entries_as_dict(record, include_special_types=True):
record_dict[key] = str(get_value_from_record(record, key))
return record_dict
# Helper
# Handle helper
def _remove_hdl_from_handle(handle):
if handle is not None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment