Skip to content
Snippets Groups Projects
Commit aeed9ea6 authored by Brian Lewis's avatar Brian Lewis
Browse files

Merge branch 'fix-time-config-name' into 'main'

Fix time config name

See merge request !8
parents f68a8f6f 9ad72ee8
No related branches found
Tags 0.5.0
1 merge request!8Fix time config name
Pipeline #22218 passed
......@@ -356,7 +356,7 @@ dependencies = [
[[package]]
name = "freva"
version = "0.4.1"
version = "0.5.0"
dependencies = [
"camino",
"chrono",
......@@ -381,7 +381,7 @@ dependencies = [
[[package]]
name = "freva-ingest"
version = "0.4.1"
version = "0.5.0"
dependencies = [
"anyhow",
"camino",
......
......@@ -42,8 +42,7 @@ project = "observations"
## Custom Dataset Definitions
Custom dataset definitions require the additional values `parts_dir` and `parts_file_name`, `parts_time` and `defaults`
are both optional.
Custom dataset definitions require the additional values `parts_dir` and `parts_file_name`, `defaults` is optional.
- `parts_dir` defines which sections of the path (`/` separated) corresponds to which metadata values,
note the lack of a `filename` element at the end which is assumed to be there in a valid DRS path.
......@@ -54,10 +53,11 @@ are both optional.
`project=cmip5` in `defaults`.
The required keys are `project`, `product`, `institute`, `model`, `experiment`, `time_frequency`, `realm`, `cmor_table`,
`ensemble`, and `variable`. `version` and `time_range` are used but optional, if it's missing the ingested files will
simply not have a version value. All other values will be ignored which can be useful to document what an unused element
is. Values can appear multiple times and in such cases the value that is ultimately used is the last instance found in
the order of `defaults`, `parts_dir`, `parts_file_name` (and within each, the last instance will take priority).
`ensemble`, and `variable`. `version` is used and optional, if it's missing the ingested files will simply have a
version value. `time` is also optional, see Time Range below for more details. All other values will be ignored which
can be useful to document what an unused element is. Values can appear multiple times and in such cases the value that
is ultimately used is the last instance found in the order of `defaults`, `parts_dir`, `parts_file_name` (and within
each, the last instance will take priority).
For example, given a (not quite valid) config like:
......@@ -80,7 +80,7 @@ Given a path `proj/prod/variable1/mod/freq/realm/variable2/variable3_freq_exp_va
### Time Range
`time_range` is the only key that will require a certain structure in its data. A valid time range must look like a
`time` is the only key that will require a certain structure in its data. A valid time range must look like a
CMIP5 time range, i.e. `start-end` where the section before the `-` is the start of the time range covered by the data
and the section after is the end. Each individual time must be of the form `YYYY[MM[DD[HH[mm[ss]]]]]` where `YYYY` means
the 4 digit year, `MM` is the 2 digit month `DD` is digit 2 day, `HH` is 2 digit hour (24 hour, not 12), `mm` is 2 digit
......@@ -89,6 +89,8 @@ optional but requires all those before e.g. that you can't have a time that spec
year, month, day, and hour. So a valid time might look like `20100405101112` which would mean `2010-04-05` at 10:11 and
12 seconds.
If `time` is not present, all files will be given the maximum range allowed by this schema: year `0` to year `9999`.
# Usage
## Ingestion
......
[package]
name = "freva-ingest"
version = "0.4.1"
version = "0.5.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
......
[package]
name = "freva"
version = "0.4.1"
version = "0.5.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
......
......@@ -38,20 +38,35 @@ pub enum Error {
InvalidTimeRange(#[from] ParseTimeRangeError),
}
// define all the used keys here to avoid duplicating the values everywhere. The names reflect their counterparts in
// the struct and not the values that are read from the config
const KEY_ACTIVITY: &str = "project";
const KEY_PRODUCT: &str = "product";
const KEY_INSTITUTE: &str = "institute";
const KEY_MODEL: &str = "model";
const KEY_EXPERIMENT: &str = "experiment";
const KEY_FREQUENCY: &str = "time_frequency";
const KEY_MODELING_REALM: &str = "realm";
const KEY_MIP_TABLE: &str = "cmor_table";
const KEY_ENSEMBLE: &str = "ensemble";
const KEY_VARIABLE: &str = "variable";
const KEY_VERSION: &str = "version";
const KEY_TIME_RANGE: &str = "time";
lazy_static! {
// these are different from the struct field names for backwards compatibility. The struct field names match the
// cmip5 spec they're derived from but these are the names that were previously used in Freva
static ref REQUIRED: HashSet<&'static str> = HashSet::from([
"project",
"product",
"institute",
"model",
"experiment",
"time_frequency",
"realm",
"cmor_table",
"ensemble",
"variable",
KEY_ACTIVITY,
KEY_PRODUCT,
KEY_INSTITUTE,
KEY_MODEL,
KEY_EXPERIMENT,
KEY_FREQUENCY,
KEY_MODELING_REALM,
KEY_MIP_TABLE,
KEY_ENSEMBLE,
KEY_VARIABLE,
]);
}
......@@ -73,7 +88,7 @@ lazy_static! {
/// The following names are optional and will only be looked for if present in the config:
/// ```text
/// version
/// time_range
/// time
/// ```
///
/// Parts can be located in either the path (`/` delineated) or in the filename (`_` delineated). They can also be
......@@ -169,24 +184,24 @@ impl<'a> Custom<'a> {
return Err(Error::MissingValues(Vec::from_iter(missing)));
}
let time_range = match components.get("time_range") {
let time_range = match components.get(KEY_TIME_RANGE) {
Some(r) => Some(parse_time_range(r)?),
None => None,
};
Ok(Self {
path,
activity: components["project"],
product: components["product"],
institute: components["institute"],
model: components["model"],
experiment: components["experiment"],
frequency: components["time_frequency"],
modeling_realm: components["realm"],
variable: components["variable"],
ensemble: components["ensemble"],
mip_table: components["cmor_table"],
version: components.get("version").copied(),
activity: components[KEY_ACTIVITY],
product: components[KEY_PRODUCT],
institute: components[KEY_INSTITUTE],
model: components[KEY_MODEL],
experiment: components[KEY_EXPERIMENT],
frequency: components[KEY_FREQUENCY],
modeling_realm: components[KEY_MODELING_REALM],
variable: components[KEY_VARIABLE],
ensemble: components[KEY_ENSEMBLE],
mip_table: components[KEY_MIP_TABLE],
version: components.get(KEY_VERSION).copied(),
time_range,
})
}
......@@ -230,15 +245,20 @@ mod tests {
let path = Utf8Path::new("act/prod/inst/mod/exp/freq/v1/var_modrel_mip_ens.nc");
let config = ComponentConfig {
parts_dir: owned_vec(vec![
"project",
"product",
"institute",
"model",
"experiment",
"time_frequency",
"version",
KEY_ACTIVITY,
KEY_PRODUCT,
KEY_INSTITUTE,
KEY_MODEL,
KEY_EXPERIMENT,
KEY_FREQUENCY,
KEY_VERSION,
]),
parts_file_name: owned_vec(vec![
KEY_VARIABLE,
KEY_MODELING_REALM,
KEY_MIP_TABLE,
KEY_ENSEMBLE,
]),
parts_file_name: owned_vec(vec!["variable", "realm", "cmor_table", "ensemble"]),
defaults: HashMap::new(),
};
......@@ -268,14 +288,19 @@ mod tests {
let path = Utf8Path::new("act/prod/inst/mod/exp/freq/var_modrel_mip_ens.nc");
let config = ComponentConfig {
parts_dir: owned_vec(vec![
"project",
"product",
"institute",
"model",
"experiment",
"time_frequency",
KEY_ACTIVITY,
KEY_PRODUCT,
KEY_INSTITUTE,
KEY_MODEL,
KEY_EXPERIMENT,
KEY_FREQUENCY,
]),
parts_file_name: owned_vec(vec![
KEY_VARIABLE,
KEY_MODELING_REALM,
KEY_MIP_TABLE,
KEY_ENSEMBLE,
]),
parts_file_name: owned_vec(vec!["variable", "realm", "cmor_table", "ensemble"]),
defaults: HashMap::new(),
};
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment