Add more testing around case-insensitivity and patch an error in merge_records creating duplicates in the database

This commit is contained in:
Karcsesz 2024-04-01 16:30:37 +02:00
parent 92c3239c00
commit 12cb4fc254
3 changed files with 68 additions and 8 deletions

View file

@ -46,7 +46,6 @@ impl LookupHandler {
let mut lookup = HashMap::new(); let mut lookup = HashMap::new();
for (index, resource) in resources.0.iter().enumerate() { for (index, resource) in resources.0.iter().enumerate() {
for lookup_to_add in resource.keys() { for lookup_to_add in resource.keys() {
let lookup_to_add = lookup_to_add.to_lowercase();
debug!("Adding {lookup_to_add} for {}", resource.subject); debug!("Adding {lookup_to_add} for {}", resource.subject);
let duplicate = lookup.insert(lookup_to_add.clone(), index); let duplicate = lookup.insert(lookup_to_add.clone(), index);
@ -162,6 +161,23 @@ mod tests {
} }
} }
#[test]
fn successful_case_insensitive_query() {
let data = "[{\"subject\":\"testing\"},{\"subject\":\"more_testing\"}]".as_bytes();
let data = LookupHandler::load_from_reader(data).unwrap();
for subject in ["TESTING", "mOre_testiNg"] {
assert_eq!(
data.lookup(subject),
Some(&Resource {
subject: subject.to_lowercase(),
aliases: None,
properties: None,
links: None,
})
);
}
}
#[test] #[test]
fn successful_alias_query() { fn successful_alias_query() {
let data = "[{\"subject\":\"testing\",\"aliases\":[\"alias1\",\"alias2\"]},{\"subject\":\"red herring\",\"aliases\":[\"alias\",\"1\", \"2\"]}]".as_bytes(); let data = "[{\"subject\":\"testing\",\"aliases\":[\"alias1\",\"alias2\"]},{\"subject\":\"red herring\",\"aliases\":[\"alias\",\"1\", \"2\"]}]".as_bytes();
@ -178,4 +194,21 @@ mod tests {
); );
} }
} }
#[test]
fn successful_case_insensitive_alias_query() {
let data = "[{\"subject\":\"testing\",\"aliases\":[\"alias1\",\"alias2\"]},{\"subject\":\"red herring\",\"aliases\":[\"alias\",\"1\", \"2\"]}]".as_bytes();
let data = LookupHandler::load_from_reader(data).unwrap();
for subject in ["aliAS1", "aLiAs2"] {
assert_eq!(
data.lookup(subject),
Some(&Resource {
subject: "testing".to_string(),
aliases: Some(vec!["alias1".to_string(), "alias2".to_string()]),
properties: None,
links: None,
})
);
}
}
} }

View file

@ -39,16 +39,16 @@ impl Resource {
} }
} }
/// Returns the aliases of the given record. If the `aliases` field is /// Returns the aliases of the given record, converted to lowercase. If the `aliases` field is
/// entirely missing, returns an empty array. /// entirely missing, returns an empty array.
pub fn keys(&self) -> impl Iterator<Item = &String> { pub fn keys(&self) -> impl Iterator<Item=String> + '_ {
let aliases = if let Some(aliases) = &self.aliases { let aliases = if let Some(aliases) = &self.aliases {
aliases.as_slice() aliases.as_slice()
} else { } else {
&[] &[]
}; };
aliases.iter().chain(std::iter::once(&self.subject)) aliases.iter().chain(std::iter::once(&self.subject)).map(|key| key.to_lowercase())
} }
/// Replaces the current `subject` field of the WebFinger resource /// Replaces the current `subject` field of the WebFinger resource

View file

@ -83,10 +83,10 @@ impl ResourceList {
) -> &ResourceList { ) -> &ResourceList {
debug!("Building hashset of already taken queries..."); debug!("Building hashset of already taken queries...");
let unique_check: HashSet<String> = let unique_check: HashSet<String> =
HashSet::from_iter(self.0.iter().flat_map(Resource::keys).cloned()); HashSet::from_iter(self.0.iter().flat_map(Resource::keys));
for record in new_records { for record in new_records {
let record_keys = HashSet::from_iter(record.keys().cloned()); let record_keys = HashSet::from_iter(record.keys());
let collisions = unique_check let collisions = unique_check
.intersection(&record_keys) .intersection(&record_keys)
.collect::<HashSet<_>>(); .collect::<HashSet<_>>();
@ -103,7 +103,7 @@ impl ResourceList {
CollisionHandling::OverwriteSingleSkipMultiple => { CollisionHandling::OverwriteSingleSkipMultiple => {
let mut collided_resources = let mut collided_resources =
self.0.iter().enumerate().filter(|record| { self.0.iter().enumerate().filter(|record| {
record.1.keys().any(|elem| collisions.contains(elem)) record.1.keys().any(|elem| collisions.contains(&elem))
}); });
if let Some((collided_index, collided_resource)) = collided_resources.next() if let Some((collided_index, collided_resource)) = collided_resources.next()
{ {
@ -118,7 +118,7 @@ impl ResourceList {
CollisionHandling::OverwriteMultiple => { CollisionHandling::OverwriteMultiple => {
warn!("Overwriting already existing record(s) with new data..."); warn!("Overwriting already existing record(s) with new data...");
self.0.retain(|record| { self.0.retain(|record| {
if record.keys().any(|elem| collisions.contains(elem)) { if record.keys().any(|elem| collisions.contains(&elem)) {
warn!("Removing {record:?}"); warn!("Removing {record:?}");
false false
} else { } else {
@ -138,4 +138,31 @@ impl ResourceList {
self self
} }
#[cfg(test)]
/// Returns the amount of records stored
fn len(&self) -> usize {
self.0.len()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn check_merge_case_insensitivity() {
let mut base = ResourceList(vec![Resource::new("ThisIsASubject".to_string())]);
base.merge_records(std::iter::once(Resource::new("thisisasubject".to_string())), CollisionHandling::Skip);
assert_eq!(base.len(), 1);
}
#[test]
fn check_merge_case_insensitivity_alt() {
let mut base = Resource::new("ThisIsASubject".to_string());
base.add_new_primary_subject("ThisIsANewPrimarySubject".to_string());
let mut base = ResourceList(vec![base]);
base.merge_records(std::iter::once(Resource::new("thisisasubject".to_string())), CollisionHandling::Skip);
assert_eq!(base.len(), 1);
}
} }