diff --git a/Cargo.lock b/Cargo.lock index 5ac2065..df16c6b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1241,7 +1241,7 @@ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "migration" -version = "0.1.0" +version = "0.1.1" dependencies = [ "sea-orm-migration", "tokio", diff --git a/migration/Cargo.toml b/migration/Cargo.toml index f7b139f..962bfe5 100644 --- a/migration/Cargo.toml +++ b/migration/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "migration" -version = "0.1.0" +version = "0.1.1" edition = "2021" publish = false diff --git a/migration/README.md b/migration/README.md index 3b438d8..0524589 100644 --- a/migration/README.md +++ b/migration/README.md @@ -1,41 +1,10 @@ -# Running Migrator CLI +# 数据库版本号说明 -- Generate a new migration file - ```sh - cargo run -- generate MIGRATION_NAME - ``` -- Apply all pending migrations - ```sh - cargo run - ``` - ```sh - cargo run -- up - ``` -- Apply first 10 pending migrations - ```sh - cargo run -- up -n 10 - ``` -- Rollback last applied migrations - ```sh - cargo run -- down - ``` -- Rollback last 10 applied migrations - ```sh - cargo run -- down -n 10 - ``` -- Drop all tables from the database, then reapply all migrations - ```sh - cargo run -- fresh - ``` -- Rollback all applied migrations, then reapply all migrations - ```sh - cargo run -- refresh - ``` -- Rollback all applied migrations - ```sh - cargo run -- reset - ``` -- Check the status of all migrations - ```sh - cargo run -- status - ``` +## 0.1.1 + +添加了 `main_data.XmlTested` 字段 + +- 为 `maindata_savetype_saveid_idx` 添加了 + - `len` + - `xml_tested` + - 的索引 diff --git a/migration/src/lib.rs b/migration/src/lib.rs index 8b2fd7c..8486afd 100644 --- a/migration/src/lib.rs +++ b/migration/src/lib.rs @@ -2,9 +2,9 @@ pub use sea_orm_migration::prelude::*; pub mod m20240719_00001_create_main_data_table; pub mod m20240719_00002_create_long_data_table; -pub mod m20240721_221623_create_indexs; +pub mod m20240721_00003_create_indexs; -pub use m20240721_221623_create_indexs::FULL_DATA_VIEW; +pub use m20240721_00003_create_indexs::FULL_DATA_VIEW; pub const TEXT_DATA_MAX_LEN: usize = 1024; @@ -17,7 +17,7 @@ impl MigratorTrait for Migrator { vec![ Box::new(m20240719_00001_create_main_data_table::Migration), Box::new(m20240719_00002_create_long_data_table::Migration), - Box::new(m20240721_221623_create_indexs::Migration), + Box::new(m20240721_00003_create_indexs::Migration), ] } } diff --git a/migration/src/m20240719_00001_create_main_data_table.rs b/migration/src/m20240719_00001_create_main_data_table.rs index 1e9d92a..4114581 100644 --- a/migration/src/m20240719_00001_create_main_data_table.rs +++ b/migration/src/m20240719_00001_create_main_data_table.rs @@ -59,6 +59,7 @@ impl MigrationTrait for Migration { .col(ColumnDef::new(MainData::BlakeHash).char_len(64).not_null()) .col(ColumnDef::new(MainData::Len).big_integer().not_null()) .col(ColumnDef::new(MainData::ShortData).string_len(TEXT_DATA_MAX_LEN as u32)) + .col(ColumnDef::new(MainData::XmlTested).boolean().null()) .to_owned(), ) .await?; @@ -94,4 +95,6 @@ pub enum MainData { /// 如果长度 < 1024 /// 那就直接存在这 ShortData, + /// 数据是不是合法的 XML 数据 + XmlTested, } diff --git a/migration/src/m20240721_221623_create_indexs.rs b/migration/src/m20240721_00003_create_indexs.rs similarity index 97% rename from migration/src/m20240721_221623_create_indexs.rs rename to migration/src/m20240721_00003_create_indexs.rs index 2ad838d..91ad5ff 100644 --- a/migration/src/m20240721_221623_create_indexs.rs +++ b/migration/src/m20240721_00003_create_indexs.rs @@ -16,6 +16,7 @@ SELECT md.save_id, md.save_type, md.blake_hash, + md.xml_tested, md.len, CASE WHEN md.len > 1024 THEN @@ -51,6 +52,8 @@ impl MigrationTrait for Migration { .table(MainData::Table) .col(MainData::SaveType) .col(MainData::SaveId) + .col(MainData::Len) + .col(MainData::XmlTested) .name(MAIN_SAVETYPE_SAVEID_IDX); manager.create_index(savetype_saveid_idx).await?; diff --git a/sr_download/sql/xml_parse.py b/sr_download/sql/xml_parse.py index 7778f63..f6a7782 100644 --- a/sr_download/sql/xml_parse.py +++ b/sr_download/sql/xml_parse.py @@ -17,32 +17,65 @@ def get_db(): ) return connect +def fetch_data(db_cur, offset, limit): + # xml_fetch = f""" + # WITH data AS ( + # SELECT save_id as id, data + # FROM public.full_data + # WHERE "save_type" != 'none' + # AND xml_is_well_formed_document(full_data."data") + # LIMIT {limit} OFFSET {offset} + # ) + # SELECT data.id, string_agg(parts.part_type, '|') AS part_types + # FROM data, + # XMLTABLE ( + # '//Ship/Parts/Part' + # PASSING BY VALUE xmlparse(document data."data") + # COLUMNS part_type text PATH '@partType', + # part_id text PATH '@id' + # ) AS parts + # GROUP BY data.id; + # """ + xml_fetch = f""" + WITH data AS ( + SELECT save_id as id, data + FROM public.full_data + WHERE "save_type" != 'none' + AND xml_is_well_formed_document(full_data."data") + LIMIT {limit} OFFSET {offset} + ), + parts_data AS ( + SELECT data.id, parts.part_type + FROM data, + XMLTABLE ( + '//Ship/Parts/Part' + PASSING BY VALUE xmlparse(document data."data") + COLUMNS part_type text PATH '@partType', + part_id text PATH '@id' + ) AS parts + ) + SELECT id, string_agg(part_type || ':' || part_count, '|') AS part_types + FROM ( + SELECT id, part_type, COUNT(part_type) AS part_count + FROM parts_data + GROUP BY id, part_type + ) AS counted_parts + GROUP BY id; + """ + db_cur.execute(xml_fetch) + return db_cur.fetchall() def main(): db = get_db() db_cur = db.cursor() - - xml_fetch = """ -WITH limited_full_data AS ( - SELECT save_id, data - FROM public.full_data - WHERE "save_type" != 'none' - AND xml_is_well_formed_document(full_data."data") - LIMIT 20 -) -SELECT limited_full_data.save_id, array_agg(x.part_type) AS part_types, array_agg(x.part_id) AS part_ids -FROM limited_full_data, - XMLTABLE ( - '//Ship/Parts/Part' - PASSING BY VALUE xmlparse(document limited_full_data."data") - COLUMNS part_type text PATH '@partType', - part_id text PATH '@id' - ) AS x -GROUP BY limited_full_data.save_id; - """ - - db_cur.execute(xml_fetch) - logger.info(db_cur.fetchall()) - ... + offset = 0 + limit = 100 + while True: + datas = fetch_data(db_cur, offset, limit) + if not datas: + break + for data in datas: + logger.info(data) + offset += limit main() diff --git a/sr_download/src/model/main_data.rs b/sr_download/src/model/main_data.rs index 80f78f3..cc3156f 100644 --- a/sr_download/src/model/main_data.rs +++ b/sr_download/src/model/main_data.rs @@ -12,6 +12,7 @@ pub struct Model { pub blake_hash: String, pub len: i64, pub short_data: Option, + pub xml_tested: Option, } #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]