This commit is contained in:
shenjack 2024-07-20 23:16:54 +08:00
parent d38258b1e2
commit 72d29d3772
Signed by: shenjack
GPG Key ID: 7B1134A979775551
6 changed files with 78 additions and 12 deletions

14
Cargo.lock generated
View File

@ -674,6 +674,7 @@ checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0"
dependencies = [
"futures-channel",
"futures-core",
"futures-executor",
"futures-io",
"futures-sink",
"futures-task",
@ -724,6 +725,17 @@ version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1"
[[package]]
name = "futures-macro"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.71",
]
[[package]]
name = "futures-sink"
version = "0.3.30"
@ -745,6 +757,7 @@ dependencies = [
"futures-channel",
"futures-core",
"futures-io",
"futures-macro",
"futures-sink",
"futures-task",
"memchr",
@ -2525,6 +2538,7 @@ version = "0.1.0"
dependencies = [
"anyhow",
"blake3",
"futures",
"migration",
"reqwest",
"sea-orm",

View File

@ -3,6 +3,7 @@
## V2
Rewritten in Rust !
(其实最早 v1 就是用 rust 写的, 不过我 2023 年 6 月 可才刚学 rust, 代码写的很烂, 所以还是回到了 python 小脚本)
## V1

View File

@ -20,3 +20,4 @@ migration = { path = "../migration" }
serde = { version = "1.0.204", features = ["serde_derive"] }
toml = "0.8.15"
blake3 = "1.5.3"
futures = "0.3.30"

View File

@ -1,6 +1,6 @@
use blake3::Hasher;
use sea_orm::{
ActiveModelTrait, ConnectOptions, ConnectionTrait, Database, DatabaseConnection, EntityTrait,
ActiveModelTrait, ConnectOptions, Database, DatabaseConnection, EntityTrait,
IntoActiveModel, ModelTrait, QueryOrder, QuerySelect, TransactionTrait,
};
use tracing::{event, Level};
@ -59,8 +59,9 @@ pub enum CoverStrategy {
/// 如果失败, 会返回 Err
/// 如果成功, 会返回 Ok(true)
/// 如果数据已经存在, 会根据策略返回
pub async fn save_ship_to_db<D>(
pub async fn save_data_to_db<D>(
save_id: SaveId,
save_type: SaveType,
data: D,
cover_strategy: Option<CoverStrategy>,
db: &DatabaseConnection,
@ -130,7 +131,7 @@ where
// 过长, 需要把数据放到 long_data 里
let new_data = model::main_data::Model {
save_id: save_id as i32,
save_type: SaveType::Ship,
save_type,
blake_hash: hash,
len: data_len as i64,
short_data: None,
@ -148,7 +149,7 @@ where
// 直接放到 main_data 里即可
let new_data = model::main_data::Model {
save_id: save_id as i32,
save_type: SaveType::Ship,
save_type,
blake_hash: hash,
len: data_len as i64,
short_data: Some(data),

View File

@ -1,5 +1,6 @@
use std::path::Path;
use std::{ops::Range, path::Path};
use tracing::{event, Level};
use futures::{future::select_all, task};
mod config;
mod db;
@ -9,6 +10,26 @@ mod net;
pub type SaveId = u32;
pub const TEXT_DATA_MAX_LEN: usize = 1024;
use net::DownloadFile;
async fn big_worker(db: sea_orm::DatabaseConnection, work_range: Range<SaveId>) {
let client = net::Downloader::default();
for work_id in work_range {
match client.try_download_as_any(work_id).await {
Some(file) => {
match file {
DownloadFile::Save(data) => {
},
DownloadFile::Ship(data) => {
},
}
}
};
}
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
tracing_subscriber::fmt().with_max_level(Level::INFO).init();
@ -27,5 +48,33 @@ async fn main() -> anyhow::Result<()> {
event!(Level::INFO, "Starting download from save_id: {}", start_id);
// 1321469 end
let end_id: SaveId = 1321469;
let mut current_id = start_id;
let batch_size = 100;
// 10 works
let mut works = Vec::with_capacity(10);
let max_works = 10;
for _ in 0..10 {
let end = current_id + batch_size;
works.push(tokio::spawn(big_worker(db_connect.clone(), current_id..end)));
current_id = end;
}
while current_id < end_id || !works.is_empty() {
while current_id < end_id && works.len() < max_works {
let end = current_id + batch_size;
works.push(tokio::spawn(big_worker(db_connect.clone(), current_id..end)));
current_id = end;
}
if !works.is_empty() {
let (result, index, remain) = select_all(works).await;
event!(Level::INFO, "worker {} finish with result: {:?}", index, result);
works = remain;
}
}
Ok(())
}

View File

@ -9,24 +9,24 @@ pub struct Downloader {
}
/// 使用 any 下载下来的文件
pub enum SaveFile {
pub enum DownloadFile {
/// 是艘船
Ship(String),
/// 是存档
Save(String),
}
impl SaveFile {
impl DownloadFile {
pub fn as_ship(&self) -> Option<&str> {
match self {
SaveFile::Ship(s) => Some(s),
DownloadFile::Ship(s) => Some(s),
_ => None,
}
}
pub fn as_save(&self) -> Option<&str> {
match self {
SaveFile::Save(s) => Some(s),
DownloadFile::Save(s) => Some(s),
_ => None,
}
}
@ -65,7 +65,7 @@ impl Downloader {
/// 尝试用 ship 或者 save 的 API 下载文件
/// 如果两个都没下载到,返回 None
/// 如果下载到了,返回 Some(文件内容)
pub async fn try_download_as_any(&self, id: SaveId) -> Option<SaveFile> {
pub async fn try_download_as_any(&self, id: SaveId) -> Option<DownloadFile> {
// 先尝试用 ship 的 API 下载
let ship_url = Self::as_ship_url(id);
let ship_try = self
@ -79,7 +79,7 @@ impl Downloader {
if let Ok(body) = ship_try.text().await {
// 再判空
if !(body.is_empty() || body == "0") {
return Some(SaveFile::Ship(body));
return Some(DownloadFile::Ship(body));
}
}
}
@ -97,7 +97,7 @@ impl Downloader {
if let Ok(body) = save_try.text().await {
// 再判空
if !(body.is_empty() || body == "0") {
return Some(SaveFile::Save(body));
return Some(DownloadFile::Save(body));
}
}
}