diff --git a/freva/src/drs/ingest.rs b/freva/src/drs/ingest.rs index 8b2513c2d4ddd91fa58a90ba066b68ca47b8d178..a3ab10e902c5fdd539891a7da2975487908eddc5 100644 --- a/freva/src/drs/ingest.rs +++ b/freva/src/drs/ingest.rs @@ -145,12 +145,18 @@ async fn ingest_structure( }; if entry.path().is_file() { - // TODO: this conversion should probably take place within drs - let path = Utf8PathBuf::from_path_buf(entry.path().to_path_buf()).unwrap(); - // TODO: this should too - if !structure.verify_path(&path) { - continue; - } + let path = match Utf8PathBuf::from_path_buf(entry.path().to_path_buf()) { + Ok(p) => p, + Err(e) => { + warn!( + "{} not a valid drs file, has non UTF8 characters in path:\n{:?}", + entry.path().display(), + e + ); + skipped += 1; + continue; + } + }; let f = match structure.file_from_path(&path) { Ok(f) => f, diff --git a/freva/src/drs/metadata.rs b/freva/src/drs/metadata.rs index fc9e8dede3c71f3a8673365d38a5bc726453180a..b2ab17ebc4edf5581cb3f8ebb8d945221101b79b 100644 --- a/freva/src/drs/metadata.rs +++ b/freva/src/drs/metadata.rs @@ -87,7 +87,7 @@ impl Structure { /// This is not comprehensive in that there are still paths where this would return `true` but which are not valid /// for this structure. For example, if a path has the same `root_dir` and the same _number_ of components in its /// directory structure, it will pass this even if it doesn't have the right `filename` structure for this. - /// *TODO*: fix this if possible. + /// TODO: fix this if possible. pub fn verify_path<P: AsRef<Path>>(&self, path: P) -> bool { let path = path.as_ref(); @@ -113,6 +113,13 @@ impl Structure { /// Extracts [`Metadata`] for a file path of this structure pub fn file_from_path(&self, path: &Utf8PathBuf) -> Result<Metadata, Error> { + if !self.verify_path(&path) { + return Err(Error::InvalidPath { + path: path.into(), + reason: "file path does not match expected structure".to_owned(), + }); + } + let parts = path .strip_prefix(self.root()) .unwrap() // this is already done with `structure_from_path` so should be safe @@ -268,8 +275,6 @@ impl TryFrom<&String> for Config { /// Paths must be UTF-8. #[derive(Debug, PartialEq)] pub struct Metadata<'a> { - // File doesn't feel quite right as a name since it isn't an open file but more of a path? - // in original, this is the string key rather than the actual structure but I think this can work here structure: &'a Structure, /// map of { part name => value } e.g. "institute" => "cpc"