Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Merret Buurman
pidlandingpage
Commits
58a8eed9
Commit
58a8eed9
authored
Aug 05, 2016
by
Merret Buurman
Browse files
Changed order of methods in parser, added comments.
parent
70329e80
Changes
1
Hide whitespace changes
Inline
Side-by-side
landingpage/landingpageapp/handle/cmip6parser.py
View file @
58a8eed9
...
...
@@ -83,6 +83,14 @@ def get_errata_field_from_record(record):
# Actual implementation and helpers
def
_get_title_from_record
(
record
):
if
is_file
(
record
):
return
get_filename_from_record
(
record
)
elif
is_dataset
(
record
):
return
get_drs_from_record
(
record
)
else
:
return
None
def
_get_aggregation_level
(
record
):
return
get_value_from_record
(
record
,
'AGGREGATION_LEVEL'
)
...
...
@@ -94,23 +102,30 @@ def _is_dataset(record):
if
get_aggregation_level
(
record
)
==
'DATASET'
:
return
True
def
_get_title_from_record
(
record
):
if
is_file
(
record
):
return
get_filename_from_record
(
record
)
elif
is_dataset
(
record
):
return
get_drs_from_record
(
record
)
else
:
return
None
def
_get_aggregation_field_from_record
(
record
):
field_string
=
get_value_from_record
(
record
,
'IS_PART_OF'
)
if
field_string
is
None
:
field_string
=
get_value_from_record
(
record
,
'PARENT'
)
# TODO Remove once migration is done!
return
field_string
def
_get_list_of_aggregation_handles_from_record
(
record
):
field_string
=
get_aggregation_field_from_record
(
record
)
list_of_strings
=
field_string
.
split
(
';'
)
handles
=
[]
for
member
in
list_of_strings
:
handles
.
append
(
_remove_hdl_from_handle
(
member
))
return
handles
def
_get_errata_field_from_record
(
record
):
return
get_value_from_record
(
record
,
'ERRATA_IDS'
)
def
_get_errata_list_from_record
(
record
):
field_string
=
get_errata_field_from_record
(
record
)
if
field_string
is
not
None
:
return
field_string
.
split
(
';'
)
else
:
return
None
def
_get_filename_from_record
(
record
):
return
get_value_from_record
(
record
,
'FILE_NAME'
)
...
...
@@ -131,6 +146,22 @@ def _get_urls_replicas_from_record(record):
field_string
=
get_value_from_record
(
record
,
'URL_REPLICAS'
)
return
_parse_urls_replicas_from_record
(
field_string
)
# Helpers:
def
_parse_urls_original_from_record
(
field_string
):
parsed
=
_extract_url_info_from_field
(
field_string
,
'href'
,
'host'
)
if
parsed
is
not
None
:
parsed
=
_rename_href_to_url
(
parsed
)
parsed
=
_remove_duplicate_urls
(
parsed
)
return
parsed
def
_parse_urls_replicas_from_record
(
field_string
):
parsed
=
_extract_url_info_from_field
(
field_string
,
'href'
,
'host'
)
if
parsed
is
not
None
:
parsed
=
_rename_href_to_url
(
parsed
)
parsed
=
_remove_duplicate_urls
(
parsed
)
return
parsed
def
_rename_href_to_url
(
list_of_dicts
):
''' Helper for the list of dict that is the result of parsing
a locations xml snippet.'''
...
...
@@ -154,34 +185,22 @@ def _remove_duplicate_urls(list_of_dicts):
new_list_of_dicts
.
append
(
item
)
return
new_list_of_dicts
def
_parse_urls_original_from_record
(
field_string
):
parsed
=
_extract_url_info_from_field
(
field_string
,
'href'
,
'host'
)
if
parsed
is
not
None
:
parsed
=
_rename_href_to_url
(
parsed
)
parsed
=
_remove_duplicate_urls
(
parsed
)
return
parsed
def
_get_errata_list_from_record
(
record
):
field_string
=
get_errata_field_from_record
(
record
)
if
field_string
is
not
None
:
return
field_string
.
split
(
';'
)
else
:
def
_extract_url_info_from_field
(
field_string
,
*
attributes
):
if
field_string
is
None
:
return
None
else
:
field_xml
=
ET
.
fromstring
(
field_string
)
list_of_items
=
[]
locations
=
field_xml
.
findall
(
'location'
)
for
location
in
locations
:
temp
=
{}
for
attr
in
attributes
:
val
=
location
.
get
(
attr
)
if
val
is
not
None
:
temp
[
attr
]
=
val
list_of_items
.
append
(
temp
)
return
list_of_items
def
_parse_urls_replicas_from_record
(
field_string
):
parsed
=
_extract_url_info_from_field
(
field_string
,
'href'
,
'host'
)
if
parsed
is
not
None
:
parsed
=
_rename_href_to_url
(
parsed
)
parsed
=
_remove_duplicate_urls
(
parsed
)
return
parsed
def
_get_list_of_aggregation_handles_from_record
(
record
):
field_string
=
get_aggregation_field_from_record
(
record
)
list_of_strings
=
field_string
.
split
(
';'
)
handles
=
[]
for
member
in
list_of_strings
:
handles
.
append
(
_remove_hdl_from_handle
(
member
))
return
handles
# Retrieve info on parent dataset from handle system:
...
...
@@ -202,26 +221,10 @@ def _get_list_of_aggregation_records_from_record(list_of_aggregation_handles):
return
list_to_be_returned
,
any_replaced
def
_extract_url_info_from_field
(
field_string
,
*
attributes
):
if
field_string
is
None
:
return
None
else
:
field_xml
=
ET
.
fromstring
(
field_string
)
list_of_items
=
[]
locations
=
field_xml
.
findall
(
'location'
)
for
location
in
locations
:
temp
=
{}
for
attr
in
attributes
:
val
=
location
.
get
(
attr
)
if
val
is
not
None
:
temp
[
attr
]
=
val
list_of_items
.
append
(
temp
)
return
list_of_items
def
_extract_info_on_one_aggregation_record
(
handle
):
def
_extract_info_on_one_aggregation_record
(
parenthandle
):
# Retrieve handle:
record
=
retrieval
.
get_handle_record_json
(
handle
)
record
=
retrieval
.
get_handle_record_json
(
parent
handle
)
# Get info:
aggregation_level
=
get_aggregation_level
(
record
)
...
...
@@ -229,12 +232,12 @@ def _extract_info_on_one_aggregation_record(handle):
vers_num
=
get_version_number_from_record
(
record
)
newer_version
=
get_replaced_by_from_record
(
record
)
replaced
=
False
if
newer_version
is
not
None
and
not
newer_version
==
handle
:
if
newer_version
is
not
None
and
not
newer_version
==
parent
handle
:
replaced
=
True
# Assemble info:
info_to_return
=
dict
(
handle
=
handle
,
handle
=
parent
handle
,
level
=
aggregation_level
.
lower
(),
title
=
drs_id
,
version
=
vers_num
,
...
...
@@ -267,6 +270,12 @@ def _get_hosts_replicas_from_record(record):
parsed
=
_extract_url_info_from_field
(
field_string
,
'host'
)
return
parsed
def
_get_parts_field_from_record
(
record
):
field_string
=
get_value_from_record
(
record
,
'HAS_PARTS'
)
if
field_string
is
None
:
field_string
=
get_value_from_record
(
record
,
'CHILDREN'
)
# TODO Remove once migration is done!
return
field_string
def
_get_list_of_parts_handles_from_record
(
record
):
field_string
=
_get_parts_field_from_record
(
record
)
list_of_strings
=
field_string
.
split
(
';'
)
...
...
@@ -275,6 +284,8 @@ def _get_list_of_parts_handles_from_record(record):
handles
.
append
(
_remove_hdl_from_handle
(
string
))
return
handles
# Retrieve info on child files from handle system:
def
_get_list_of_parts_records_from_record
(
record
):
list_of_parts_handles
=
_get_list_of_parts_handles_from_record
(
record
)
list_to_be_returned
=
[]
...
...
@@ -312,13 +323,7 @@ def _extract_info_on_one_part_record(handle):
return
info_to_return
,
replaced
def
_get_parts_field_from_record
(
record
):
field_string
=
get_value_from_record
(
record
,
'HAS_PARTS'
)
if
field_string
is
None
:
field_string
=
get_value_from_record
(
record
,
'CHILDREN'
)
# TODO Remove once migration is done!
return
field_string
# Parsing helpers
def
get_value_from_record
(
record
,
key
):
''' Returns the first occurrence as value.'''
...
...
@@ -357,7 +362,7 @@ def _get_all_entries_as_dict(record, include_special_types=True):
record_dict
[
key
]
=
str
(
get_value_from_record
(
record
,
key
))
return
record_dict
# Helper
# H
andle h
elper
def
_remove_hdl_from_handle
(
handle
):
if
handle
is
not
None
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment