HashSet 和其他集合类型
除了 Vec 和 HashMap,Rust 标准库还提供了其他有用的集合类型,包括 HashSet、BTreeMap、BTreeSet 等。
HashSet
HashSet 是一个集合,用于存储唯一的值。它基于 HashMap 实现,所以具有类似的性能特征。
创建和基本操作
use std::collections::HashSet;
fn main() {
let mut books = HashSet::new();
// 添加元素
books.insert("The Rust Programming Language");
books.insert("Programming Rust");
books.insert("Rust in Action");
books.insert("The Rust Programming Language"); // 重复元素不会被添加
println!("书籍数量: {}", books.len()); // 输出: 3
// 检查是否包含某个元素
if books.contains("Rust in Action") {
println!("找到了 'Rust in Action'");
}
// 遍历集合
for book in &books {
println!("书籍: {}", book);
}
}
从向量创建 HashSet
use std::collections::HashSet;
fn main() {
let numbers = vec![1, 2, 3, 2, 1, 4, 5, 4];
let unique_numbers: HashSet<_> = numbers.into_iter().collect();
println!("唯一数字: {:?}", unique_numbers);
}
集合操作
use std::collections::HashSet;
fn main() {
let set1: HashSet<i32> = [1, 2, 3, 4, 5].iter().cloned().collect();
let set2: HashSet<i32> = [4, 5, 6, 7, 8].iter().cloned().collect();
// 并集
let union: HashSet<_> = set1.union(&set2).collect();
println!("并集: {:?}", union);
// 交集
let intersection: HashSet<_> = set1.intersection(&set2).collect();
println!("交集: {:?}", intersection);
// 差集
let difference: HashSet<_> = set1.difference(&set2).collect();
println!("差集 (set1 - set2): {:?}", difference);
// 对称差集
let symmetric_difference: HashSet<_> = set1.symmetric_difference(&set2).collect();
println!("对称差集: {:?}", symmetric_difference);
}
子集和超集检查
use std::collections::HashSet;
fn main() {
let set1: HashSet<i32> = [1, 2, 3].iter().cloned().collect();
let set2: HashSet<i32> = [1, 2, 3, 4, 5].iter().cloned().collect();
println!("set1 是 set2 的子集: {}", set1.is_subset(&set2));
println!("set2 是 set1 的超集: {}", set2.is_superset(&set1));
println!("set1 和 set2 不相交: {}", set1.is_disjoint(&set2));
}
BTreeMap 和 BTreeSet
BTreeMap 和 BTreeSet 是基于 B 树实现的有序集合,它们保持键的排序顺序。
BTreeMap
use std::collections::BTreeMap;
fn main() {
let mut scores = BTreeMap::new();
scores.insert("Alice", 95);
scores.insert("Bob", 87);
scores.insert("Charlie", 92);
scores.insert("David", 89);
// BTreeMap 会按键的顺序遍历
for (name, score) in &scores {
println!("{}: {}", name, score);
}
// 获取范围内的元素
println!("\n分数在 'Bob' 到 'David' 之间的学生:");
for (name, score) in scores.range("Bob".."David") {
println!("{}: {}", name, score);
}
}
BTreeSet
use std::collections::BTreeSet;
fn main() {
let mut numbers = BTreeSet::new();
numbers.insert(5);
numbers.insert(2);
numbers.insert(8);
numbers.insert(1);
numbers.insert(9);
// BTreeSet 会按排序顺序遍历
println!("排序后的数字:");
for number in &numbers {
println!("{}", number);
}
// 获取范围内的元素
println!("\n3 到 7 之间的数字:");
for number in numbers.range(3..=7) {
println!("{}", number);
}
}
VecDeque (双端队列)
VecDeque 是一个双端队列,支持在两端高效地添加和删除元素。
use std::collections::VecDeque;
fn main() {
let mut deque = VecDeque::new();
// 在后端添加元素
deque.push_back(1);
deque.push_back(2);
deque.push_back(3);
// 在前端添加元素
deque.push_front(0);
println!("双端队列: {:?}", deque); // [0, 1, 2, 3]
// 从两端删除元素
if let Some(front) = deque.pop_front() {
println!("从前端删除: {}", front);
}
if let Some(back) = deque.pop_back() {
println!("从后端删除: {}", back);
}
println!("剩余元素: {:?}", deque); // [1, 2]
}
BinaryHeap (二叉堆)
BinaryHeap 是一个优先队列,总是返回最大的元素。
use std::collections::BinaryHeap;
fn main() {
let mut heap = BinaryHeap::new();
// 添加元素
heap.push(5);
heap.push(2);
heap.push(8);
heap.push(1);
heap.push(9);
println!("堆的大小: {}", heap.len());
// 查看最大元素(不删除)
if let Some(&max) = heap.peek() {
println!("最大元素: {}", max);
}
// 依次取出最大元素
while let Some(max) = heap.pop() {
println!("取出: {}", max);
}
}
自定义优先级
use std::collections::BinaryHeap;
use std::cmp::Reverse;
#[derive(Debug, Eq, PartialEq)]
struct Task {
priority: u32,
description: String,
}
impl Ord for Task {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.priority.cmp(&other.priority)
}
}
impl PartialOrd for Task {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
fn main() {
let mut tasks = BinaryHeap::new();
tasks.push(Task {
priority: 3,
description: "中等优先级任务".to_string(),
});
tasks.push(Task {
priority: 1,
description: "低优先级任务".to_string(),
});
tasks.push(Task {
priority: 5,
description: "高优先级任务".to_string(),
});
// 按优先级顺序处理任务
while let Some(task) = tasks.pop() {
println!("处理任务 (优先级 {}): {}", task.priority, task.description);
}
// 如果想要最小堆,可以使用 Reverse
let mut min_heap = BinaryHeap::new();
min_heap.push(Reverse(5));
min_heap.push(Reverse(2));
min_heap.push(Reverse(8));
while let Some(Reverse(value)) = min_heap.pop() {
println!("最小值: {}", value);
}
}
实际应用示例
文本分析工具
use std::collections::{HashMap, HashSet};
struct TextAnalyzer {
word_count: HashMap<String, usize>,
unique_words: HashSet<String>,
total_words: usize,
}
impl TextAnalyzer {
fn new() -> Self {
TextAnalyzer {
word_count: HashMap::new(),
unique_words: HashSet::new(),
total_words: 0,
}
}
fn analyze(&mut self, text: &str) {
for word in text.split_whitespace() {
let word = word.to_lowercase()
.trim_matches(|c: char| !c.is_alphabetic())
.to_string();
if !word.is_empty() {
*self.word_count.entry(word.clone()).or_insert(0) += 1;
self.unique_words.insert(word);
self.total_words += 1;
}
}
}
fn most_common_words(&self, n: usize) -> Vec<(&String, &usize)> {
let mut words: Vec<_> = self.word_count.iter().collect();
words.sort_by(|a, b| b.1.cmp(a.1));
words.into_iter().take(n).collect()
}
fn statistics(&self) {
println!("文本统计:");
println!(" 总词数: {}", self.total_words);
println!(" 唯一词数: {}", self.unique_words.len());
println!(" 平均词频: {:.2}",
self.total_words as f64 / self.unique_words.len() as f64);
}
}
fn main() {
let mut analyzer = TextAnalyzer::new();
let text = "The quick brown fox jumps over the lazy dog. \
The dog was really lazy, and the fox was very quick.";
analyzer.analyze(text);
analyzer.statistics();
println!("\n最常见的 5 个词:");
for (word, count) in analyzer.most_common_words(5) {
println!(" {}: {}", word, count);
}
}
图的邻接表表示
use std::collections::{HashMap, HashSet};
struct Graph {
adjacency_list: HashMap<String, HashSet<String>>,
}
impl Graph {
fn new() -> Self {
Graph {
adjacency_list: HashMap::new(),
}
}
fn add_vertex(&mut self, vertex: String) {
self.adjacency_list.entry(vertex).or_insert_with(HashSet::new);
}
fn add_edge(&mut self, from: String, to: String) {
self.add_vertex(from.clone());
self.add_vertex(to.clone());
self.adjacency_list.get_mut(&from).unwrap().insert(to.clone());
self.adjacency_list.get_mut(&to).unwrap().insert(from);
}
fn neighbors(&self, vertex: &str) -> Option<&HashSet<String>> {
self.adjacency_list.get(vertex)
}
fn has_edge(&self, from: &str, to: &str) -> bool {
self.adjacency_list
.get(from)
.map_or(false, |neighbors| neighbors.contains(to))
}
fn vertex_count(&self) -> usize {
self.adjacency_list.len()
}
fn edge_count(&self) -> usize {
self.adjacency_list
.values()
.map(|neighbors| neighbors.len())
.sum::<usize>() / 2 // 无向图,每条边被计算两次
}
fn display(&self) {
println!("图的邻接表:");
for (vertex, neighbors) in &self.adjacency_list {
println!(" {}: {:?}", vertex, neighbors);
}
}
}
fn main() {
let mut graph = Graph::new();
// 添加边(自动添加顶点)
graph.add_edge("A".to_string(), "B".to_string());
graph.add_edge("A".to_string(), "C".to_string());
graph.add_edge("B".to_string(), "D".to_string());
graph.add_edge("C".to_string(), "D".to_string());
graph.display();
println!("顶点数: {}", graph.vertex_count());
println!("边数: {}", graph.edge_count());
if let Some(neighbors) = graph.neighbors("A") {
println!("A 的邻居: {:?}", neighbors);
}
println!("A 和 D 之间有边: {}", graph.has_edge("A", "D"));
println!("A 和 B 之间有边: {}", graph.has_edge("A", "B"));
}
缓存系统与 LRU 策略
use std::collections::{HashMap, VecDeque};
struct LRUCache<K, V> {
capacity: usize,
cache: HashMap<K, V>,
order: VecDeque<K>,
}
impl<K, V> LRUCache<K, V>
where
K: Clone + Eq + std::hash::Hash,
{
fn new(capacity: usize) -> Self {
LRUCache {
capacity,
cache: HashMap::with_capacity(capacity),
order: VecDeque::with_capacity(capacity),
}
}
fn get(&mut self, key: &K) -> Option<&V> {
if self.cache.contains_key(key) {
// 移动到最前面(最近使用)
self.move_to_front(key);
self.cache.get(key)
} else {
None
}
}
fn put(&mut self, key: K, value: V) {
if self.cache.contains_key(&key) {
// 更新现有键
self.cache.insert(key.clone(), value);
self.move_to_front(&key);
} else {
// 添加新键
if self.cache.len() >= self.capacity {
// 移除最久未使用的项
if let Some(lru_key) = self.order.pop_back() {
self.cache.remove(&lru_key);
}
}
self.cache.insert(key.clone(), value);
self.order.push_front(key);
}
}
fn move_to_front(&mut self, key: &K) {
// 从当前位置移除
if let Some(pos) = self.order.iter().position(|k| k == key) {
self.order.remove(pos);
}
// 添加到前面
self.order.push_front(key.clone());
}
fn len(&self) -> usize {
self.cache.len()
}
fn display_order(&self) {
println!("缓存顺序 (最新到最旧): {:?}", self.order);
}
}
fn main() {
let mut cache = LRUCache::new(3);
cache.put("a", 1);
cache.put("b", 2);
cache.put("c", 3);
cache.display_order(); // [c, b, a]
// 访问 'a',它会移到最前面
cache.get(&"a");
cache.display_order(); // [a, c, b]
// 添加新项,'b' 会被移除(最久未使用)
cache.put("d", 4);
cache.display_order(); // [d, a, c]
println!("缓存大小: {}", cache.len());
// 尝试获取已被移除的项
match cache.get(&"b") {
Some(value) => println!("找到 b: {}", value),
None => println!("b 已被移除"),
}
}
选择合适的集合类型
集合类型 | 用途 | 时间复杂度 | 特点 |
---|---|---|---|
Vec<T> | 动态数组 | O(1) 访问, O(n) 插入/删除 | 连续内存,索引访问 |
VecDeque<T> | 双端队列 | O(1) 两端操作 | 环形缓冲区 |
HashMap<K,V> | 键值映射 | O(1) 平均 | 无序,快速查找 |
BTreeMap<K,V> | 有序映射 | O(log n) | 有序,范围查询 |
HashSet<T> | 唯一值集合 | O(1) 平均 | 无序,快速查找 |
BTreeSet<T> | 有序集合 | O(log n) | 有序,范围查询 |
BinaryHeap<T> | 优先队列 | O(log n) 插入/删除 | 最大堆 |
最佳实践
- 选择合适的集合类型:根据使用模式选择
- 预分配容量:当知道大概大小时
- 使用引用避免克隆:特别是在键中
- 考虑内存布局:Vec 有更好的缓存局部性
- 使用迭代器:更高效且表达力强
use std::collections::{HashMap, HashSet};
// 好的实践示例
fn analyze_data(data: &[String]) -> (HashMap<&str, usize>, HashSet<&str>) {
let mut counts = HashMap::with_capacity(data.len());
let mut unique = HashSet::with_capacity(data.len());
for item in data {
let item_ref = item.as_str();
*counts.entry(item_ref).or_insert(0) += 1;
unique.insert(item_ref);
}
(counts, unique)
}
fn main() {
let data = vec![
"apple".to_string(),
"banana".to_string(),
"apple".to_string(),
"cherry".to_string(),
];
let (counts, unique) = analyze_data(&data);
println!("计数: {:?}", counts);
println!("唯一项: {:?}", unique);
}