diff --git a/drs/src/cordex.rs b/drs/src/cordex.rs index 23e1326dd03bbbca08de9e038928a905de32476c..46be62f6febc0dd0f0e4ed687823e5716b009438 100644 --- a/drs/src/cordex.rs +++ b/drs/src/cordex.rs @@ -16,7 +16,9 @@ //! <rcm_model_name>_<rcm_version_id>_<frequency>[_<start_time>-<end_time>].nc //! ``` //! -//! Exceptions to the Cordex standard: +//! # Exceptions to the Cordex standard: +//! * According to the spec, `activity` and `product` must be `cordex` and `output` respectively. This allows +//! arbitrary values for these elements. //! * This allows frequency values of `1hr` in addition to the normal values use std::str::FromStr; @@ -27,13 +29,6 @@ use thiserror::Error; use crate::{cmip5::EnsembleMember, parser::parse_cordex}; -/// As per the documentation, <activity> must be `"CORDEX"` -/// -/// In netcdf file attributes, called `project_id`. -pub const ACTIVITY_VALUE: &str = "cordex"; -/// As per the documentation, <product> must be `"output"` -pub const PRODUCT_VALUE: &str = "output"; - /// Error parsing a cordex style path #[derive(Debug, Error)] #[error("error parsing CORDEX path: {reason}")] @@ -82,6 +77,10 @@ impl<'a> Cordex<'a> { /// Where relevant, name in attributes is referring to the key within the netcdf file's metadata for that field. #[derive(Debug, PartialEq, Eq)] pub struct PathMetadata<'a> { + /// Name of the activity for this dataset. In the spec this must be `cordex` but this is not enforced here. + pub activity: &'a str, + /// Name of the product of this dataset. In the spec this must be `output` but this is not enforced here. + pub product: &'a str, /// The name assigned to each of the CORDEX regions and includes a flag for resolution. /// /// Possible values come from the name column(?) of Tables 1 and 2 in the source document. diff --git a/drs/src/parser/cordex.rs b/drs/src/parser/cordex.rs index 61e09d905b424ec1eb6229991bbf6e7431740866..991eb6907ebd6bee8d7754023f5188d319b71454 100644 --- a/drs/src/parser/cordex.rs +++ b/drs/src/parser/cordex.rs @@ -3,13 +3,13 @@ use std::str::FromStr; use chrono::NaiveDateTime; use nom::{ bytes::complete::tag, - combinator::{eof, map_res, opt, verify}, + combinator::{eof, map_res, opt}, sequence::{preceded, terminated}, IResult, }; use crate::{ - cordex::{Frequency, PathMetadata, ACTIVITY_VALUE, PRODUCT_VALUE}, + cordex::{Frequency, PathMetadata}, parser::{ cmip5::parse_ensemble, common::{name_segment, name_sep, parse_time, parse_version, path_sep}, @@ -24,8 +24,8 @@ pub(crate) fn parse_cordex(i: &str) -> IResult<&str, PathMetadata> { <gcm_model_name>/<cmip5_experiement_name>/<cmip5_ensemble_member>/ <rcm_model_name>/<rcm_version_id>/<frequency>/<variable_name> */ - let (i, _activity) = terminated(parse_activity, path_sep)(i)?; - let (i, _product) = terminated(parse_product, path_sep)(i)?; + let (i, activity) = path_segment(i)?; + let (i, product) = path_segment(i)?; let (i, domain) = path_segment(i)?; let (i, institution) = path_segment(i)?; let (i, gcm_model_name) = path_segment(i)?; @@ -59,6 +59,8 @@ pub(crate) fn parse_cordex(i: &str) -> IResult<&str, PathMetadata> { Ok(( i, PathMetadata { + activity, + product, domain, institution, gcm_model_name, @@ -75,14 +77,6 @@ pub(crate) fn parse_cordex(i: &str) -> IResult<&str, PathMetadata> { )) } -fn parse_activity(i: &str) -> IResult<&str, &str> { - verify(word, |activity: &str| activity == ACTIVITY_VALUE)(i) -} - -fn parse_product(i: &str) -> IResult<&str, &str> { - verify(word, |product: &str| product == PRODUCT_VALUE)(i) -} - fn parse_frequency(i: &str) -> IResult<&str, Frequency> { map_res(word, Frequency::from_str)(i) } @@ -129,6 +123,8 @@ mod tests { "rsdt_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_MOHC-HadRM3P_v1_day_19910101-19951231.nc" ), PathMetadata { + activity: "cordex", + product: "output", domain: "AFR-44", institution: "MOHC", gcm_model_name: "ECMWF-ERAINT", @@ -153,6 +149,8 @@ mod tests { "prw_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_MOHC-HadRM3P_v1_day_19960101-20001231.nc" ), PathMetadata { + activity: "cordex", + product: "output", domain: "AFR-44", institution: "MOHC", gcm_model_name: "ECMWF-ERAINT", @@ -177,6 +175,8 @@ mod tests { "ta500_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_MOHC-HadRM3P_v1_day_20060101-20101231.nc" ), PathMetadata { + activity: "cordex", + product: "output", domain: "AFR-44", institution: "MOHC", gcm_model_name: "ECMWF-ERAINT", @@ -201,6 +201,8 @@ mod tests { "tauu_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_MOHC-HadRM3P_v1_day_19910101-19951231.nc" ), PathMetadata { + activity: "cordex", + product: "output", domain: "AFR-44", institution: "MOHC", gcm_model_name: "ECMWF-ERAINT", @@ -225,6 +227,8 @@ mod tests { "rlut_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_MOHC-HadRM3P_v1_day_20010101-20051231.nc" ), PathMetadata { + activity: "cordex", + product: "output", domain: "AFR-44", institution: "MOHC", gcm_model_name: "ECMWF-ERAINT", @@ -249,6 +253,8 @@ mod tests { "ua500_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_MOHC-HadRM3P_v1_day_19960101-20001231.nc" ), PathMetadata { + activity: "cordex", + product: "output", domain: "AFR-44", institution: "MOHC", gcm_model_name: "ECMWF-ERAINT", @@ -273,6 +279,8 @@ mod tests { "tauv_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_MOHC-HadRM3P_v1_day_20110101-20111130.nc" ), PathMetadata { + activity: "cordex", + product: "output", domain: "AFR-44", institution: "MOHC", gcm_model_name: "ECMWF-ERAINT", @@ -297,6 +305,8 @@ mod tests { "hus850_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_MOHC-HadRM3P_v1_day_20110101-20111130.nc" ), PathMetadata { + activity: "cordex", + product: "output", domain: "AFR-44", institution: "MOHC", gcm_model_name: "ECMWF-ERAINT", @@ -321,6 +331,8 @@ mod tests { "evspsbl_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_MOHC-HadRM3P_v1_day_20010101-20051231.nc" ), PathMetadata { + activity: "cordex", + product: "output", domain: "AFR-44", institution: "MOHC", gcm_model_name: "ECMWF-ERAINT", @@ -345,6 +357,8 @@ mod tests { "va200_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_MOHC-HadRM3P_v1_day_19900101-19901231.nc" ), PathMetadata { + activity: "cordex", + product: "output", domain: "AFR-44", institution: "MOHC", gcm_model_name: "ECMWF-ERAINT", @@ -369,6 +383,8 @@ mod tests { "sfcWindmax_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_MOHC-HadRM3P_v1_day_20110101-20111130.nc" ), PathMetadata { + activity: "cordex", + product: "output", domain: "AFR-44", institution: "MOHC", gcm_model_name: "ECMWF-ERAINT", @@ -393,6 +409,8 @@ mod tests { "zmla_AFR-44_ECMWF-ERAINT_evaluation_r1i1p1_MOHC-HadRM3P_v1_day_20010101-20051231.nc" ), PathMetadata { + activity: "cordex", + product: "output", domain: "AFR-44", institution: "MOHC", gcm_model_name: "ECMWF-ERAINT", diff --git a/freva/src/drs/metadata.rs b/freva/src/drs/metadata.rs index 009f4b7d0338c1a5464cd8c372d40e524977d659..e5a6fb82c36c773471015fb0a38812a1e28b49d8 100644 --- a/freva/src/drs/metadata.rs +++ b/freva/src/drs/metadata.rs @@ -12,8 +12,7 @@ use std::{ }; use camino::{Utf8Path, Utf8PathBuf}; -use drs::{cmip5::Cmip5, cordex::Cordex}; -use drs::{cmip6::Cmip6, cordex}; +use drs::{cmip5::Cmip5, cmip6::Cmip6, cordex::Cordex}; use netcdf::AttrValue; use thiserror::Error; use tracing::{debug, error}; @@ -312,8 +311,8 @@ impl<'a> From<Cordex<'a>> for Metadata { let mip_table = "atmos".to_owned(); // all cordex data is atmos Self { path: cordex.path.to_owned(), - activity: cordex::ACTIVITY_VALUE.to_owned(), - product: cordex::PRODUCT_VALUE.to_owned(), + activity: m.activity.to_owned(), + product: m.product.to_owned(), institute: m.institution.to_owned(), model: m.gcm_model_name.to_owned(), experiment: m.cmip5_experiment_name.to_owned(),