diff --git a/.docker/evaluation_system.conf b/.docker/evaluation_system.conf index a140d5b1f39daff0fbcebe474a4409b0565ce254..d12c842cf434459ee11c92b079e53038c6f5507f 100644 --- a/.docker/evaluation_system.conf +++ b/.docker/evaluation_system.conf @@ -59,9 +59,9 @@ db.port = 3306 #external_group=frevaext #: Define access to the solr instance -solr.host = "http://localhost" +solr.host = localhost solr.port = 8983 -solr.core = "files" +solr.core = files #shellinabox #shellmachine=None diff --git a/Cargo.lock b/Cargo.lock index 979b36dcfa4b6a95389eb243cb4c97df241ca626..78fea3394a67d56c3982988a6d71bddea92a0772 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + [[package]] name = "anyhow" version = "1.0.52" @@ -337,13 +346,14 @@ dependencies = [ "camino", "env_logger", "futures", - "log", "reqwest", "serde", "serde_json", "thiserror", "tokio", "toml", + "tracing", + "url", "walkdir", "wiremock", ] @@ -362,6 +372,8 @@ dependencies = [ "log", "stderrlog", "tokio", + "tracing", + "tracing-subscriber", ] [[package]] @@ -1228,6 +1240,15 @@ dependencies = [ "serde", ] +[[package]] +name = "sharded-slab" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" +dependencies = [ + "lazy_static", +] + [[package]] name = "signal-hook-registry" version = "1.4.0" @@ -1456,9 +1477,21 @@ checksum = "375a639232caf30edfc78e8d89b2d4c375515393e7af7e16f01cd96917fb2105" dependencies = [ "cfg-if", "pin-project-lite", + "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-attributes" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e65ce065b4b5c53e73bb28912318cb8c9e9ad3921f1d669eb0e68b4c8143a2b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tracing-core" version = "0.1.21" @@ -1468,6 +1501,31 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "tracing-log" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6923477a48e41c1951f1999ef8bb5a3023eb723ceadafe78ffb65dc366761e3" +dependencies = [ + "lazy_static", + "log", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77be66445c4eeebb934a7340f227bfe7b338173d3f8c00a60a5a58005c9faecf" +dependencies = [ + "ansi_term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", +] + [[package]] name = "try-lock" version = "0.2.3" diff --git a/freva-ingest/Cargo.toml b/freva-ingest/Cargo.toml index a1985cbf38f54acceb6ed6ed5b50ddc4dce5cae2..098f74b8fe8f35540f12f6309ffbe00c5bccd269 100644 --- a/freva-ingest/Cargo.toml +++ b/freva-ingest/Cargo.toml @@ -14,6 +14,8 @@ log = "0.4" stderrlog = "0.5" configparser = "3.0" tokio = { version = "1.15", features = ["full"] } +tracing = "0.1" +tracing-subscriber = "0.3" directories = "4.0.1" chrono = "0.4" humantime = "2.1" diff --git a/freva-ingest/src/main.rs b/freva-ingest/src/main.rs index 5597b1231a085472ba86fa75d24f1eeef43acc28..d0f9d46185dfb4d1d837c1a4419948fe69680c40 100644 --- a/freva-ingest/src/main.rs +++ b/freva-ingest/src/main.rs @@ -7,6 +7,7 @@ use clap::StructOpt; use cli::Cli; use config::{config, drs_config}; use directories::ProjectDirs; +use tracing::Level; const DOMAIN: &str = "de"; const ORGANIZATION: &str = "DKRZ"; @@ -19,10 +20,9 @@ async fn main() -> Result<()> { // stderrlog sets 0 as error, bump by 1 so warning is the default let verbosity = args.verbosity + 1; - stderrlog::new() - .module(module_path!()) - .verbosity(verbosity) - .init()?; + tracing_subscriber::fmt() + .with_max_level(log_level(verbosity)) + .init(); let config_dir = match args.config_dir { Some(c) => c, @@ -38,3 +38,12 @@ async fn main() -> Result<()> { solr::ingest(&ingest_opts, &conf, &drs_conf).await } + +fn log_level(verbosity: usize) -> Level { + match verbosity { + 0 => Level::WARN, + 1 => Level::DEBUG, + 2 => Level::INFO, + _ => Level::TRACE, + } +} diff --git a/freva-ingest/src/solr.rs b/freva-ingest/src/solr.rs index 8e712079047933dceb8183fce691e8d087a545f8..b3898318ef768453cea7c7ac041031b54115b21a 100644 --- a/freva-ingest/src/solr.rs +++ b/freva-ingest/src/solr.rs @@ -6,7 +6,7 @@ use crate::cli::IngestOpts; use crate::config::Config; pub async fn ingest(opts: &IngestOpts, conf: &Config, drs_conf: &DrsConfig) -> Result<(), Error> { - let solr = Solr::new(conf.solr.host.clone(), conf.solr.port); + let solr = Solr::new(conf.solr.host.clone(), conf.solr.port, None)?; let reports = freva::drs::ingest(&solr, drs_conf, &opts.data_dir, opts.batch_size) .await diff --git a/freva/Cargo.toml b/freva/Cargo.toml index 5bbb11c07a0b17b16d9c1e1a7c8400a6d84b971d..9793f4f4b088a194bf6652d3ed8910b9e5eebed3 100644 --- a/freva/Cargo.toml +++ b/freva/Cargo.toml @@ -8,7 +8,6 @@ edition = "2021" [dependencies] thiserror = "1.0" -log = "0.4" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" reqwest = { version = "0.11", features = ["blocking", "json"] } @@ -16,7 +15,9 @@ walkdir = "2" camino = { version = "1.0.5", features = ["serde1"] } toml = "0.5" tokio = { version = "1.15", features = ["full"] } +tracing = "0.1" futures = "0.3" +url = "2.2" [dev-dependencies] env_logger = "0.9" diff --git a/freva/src/drs/ingest.rs b/freva/src/drs/ingest.rs index 44515d53a217478910d6229523511bcb608fd072..a9fcc384e5daf84079d4c17d119c6ce79554a073 100644 --- a/freva/src/drs/ingest.rs +++ b/freva/src/drs/ingest.rs @@ -6,10 +6,10 @@ use std::time::Duration; use std::{collections::HashMap, path::Path}; use camino::Utf8PathBuf; -use log::{error, info, trace, warn}; use thiserror::Error; use tokio::sync::mpsc::{channel, Receiver}; use tokio::time::Instant; +use tracing::{error, info, trace, warn}; use walkdir::{DirEntry, WalkDir}; use crate::drs::Structure; diff --git a/freva/src/drs/metadata.rs b/freva/src/drs/metadata.rs index 4710c13ca8404747393ef8a55a854e65bcf52fbc..1f52ad94f4841aa9b0a11e2a03753d80ef90689c 100644 --- a/freva/src/drs/metadata.rs +++ b/freva/src/drs/metadata.rs @@ -9,9 +9,9 @@ use std::{ }; use camino::Utf8PathBuf; -use log::{debug, error, warn}; use serde::{ser::SerializeMap, Deserialize, Serialize}; use thiserror::Error; +use tracing::{debug, error, warn}; /// Errors that can be returned by DRS functions #[derive(Debug, Error)] diff --git a/freva/src/solr.rs b/freva/src/solr.rs index 634dfa89760f82cc4da8617cfa5d86382adf204b..2610e188704477e41b65c707edfb2bfe4c9f0adc 100644 --- a/freva/src/solr.rs +++ b/freva/src/solr.rs @@ -3,14 +3,15 @@ use std::collections::HashMap; use std::io; -use log::debug; -use reqwest::Client; +use reqwest::{Client, Url}; use serde::Deserialize; use thiserror::Error; +use tracing::debug; use crate::drs::Metadata; -const SOLR_DEFAULT_PORT: u16 = 8983; +const DEFAULT_PORT: u16 = 8983; +const DEFAULT_PROTOCOL: &str = "http"; /// Errors #[derive(Debug, Error)] @@ -23,6 +24,12 @@ pub enum SolrError { #[error("error in request to Solr: {0}")] RequestFailed(#[from] reqwest::Error), + /// Some element of the url was not valid. This can happen for 2 reasons: + /// 1. Invalid Solr instance information (e.g. wrong protocol) + /// 2. Solr collection name creates an invalid url + #[error("invalid url: {0}")] + ParseUrlError(#[from] url::ParseError), + /// Solr API has returned a non-successful status code #[error("Solr returned non-ok status: {status}\n{body}")] ResponseNotOk { @@ -41,29 +48,31 @@ pub enum SolrError { /// * <https://solr.apache.org/guide/7_1/uploading-data-with-index-handlers.html> #[derive(Debug, Clone)] pub struct Solr { - host: String, - port: u16, + base_url: Url, client: Client, } impl Solr { /// Constructs a new `Solr` pointing to the indicated solr instance. /// If `port` is `None`, Solr's default port is used. - pub fn new(host: String, port: Option<u16>) -> Self { - let port = match port { - Some(p) => p, - None => SOLR_DEFAULT_PORT, - }; - - Self { - host, - port, + pub fn new( + host: String, + port: Option<u16>, + protocol: Option<String>, + ) -> Result<Self, SolrError> { + let port = port.unwrap_or(DEFAULT_PORT); + let protocol = protocol.unwrap_or_else(|| DEFAULT_PROTOCOL.to_owned()); + + let url = Url::parse(&format!("{}://{}:{}", protocol, host, port))?; + Ok(Self { + base_url: url, client: Client::new(), - } + }) } - fn url(&self, collection: &str, method: &str) -> String { - format!("{}:{}/solr/{}/{}", self.host, self.port, collection, method) + fn url(&self, collection: &str, method: &str) -> Result<Url, url::ParseError> { + self.base_url + .join(&format!("solr/{}/{}", collection, method)) } /// Queries Solr `collection` for documents. @@ -74,7 +83,7 @@ impl Solr { start: usize, facets: &HashMap<&str, &str>, ) -> Result<SearchResponse, SolrError> { - let url = self.url(collection, "select"); + let url = self.url(collection, "select")?; // initialized here for lifetime purposes let start_str = format!("{}", start); @@ -107,7 +116,7 @@ impl Solr { collection: &str, documents: &[&Metadata<'a>], ) -> Result<(), SolrError> { - let url = self.url(collection, "update/json/docs"); + let url = self.url(collection, "update/json/docs")?; debug!("{}", url); let params = HashMap::from([ ("wt", "json"), // sending json body @@ -116,7 +125,7 @@ impl Solr { ]); let req = self .client - .post(&url) + .post(url) .header(reqwest::header::CONTENT_TYPE, "application/json") .query(¶ms) .json(&documents); diff --git a/freva/tests/common/mod.rs b/freva/tests/common/mod.rs index 8388650fbe0ccb8c5a0adc774f2c1e994f99a472..3a9b8e2f042eb67fd57983c681fb1230fb93f042 100644 --- a/freva/tests/common/mod.rs +++ b/freva/tests/common/mod.rs @@ -12,10 +12,12 @@ pub async fn solr_server() -> (MockServer, Solr) { let server = MockServer::start().await; let url = reqwest::Url::parse(&server.uri()).unwrap(); - let domain = format!("http://{}", url.host_str().unwrap().to_owned()); + let domain = url.host_str().unwrap().to_owned(); let port = url.port(); - (server, Solr::new(domain, port)) + let solr = Solr::new(domain, port, None); + assert!(solr.is_ok(), "invalid solr address"); + (server, solr.unwrap()) } pub fn test_config() -> Config {