0.1.1的数据库

2024-08-30 02:15:22 +08:00 · 2024-08-30 02:15:22 +08:00 · 3a5b43ae1e
commit 3a5b43ae1e
parent fe28a0b8ba
8 changed files with 77 additions and 68 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1241,7 +1241,7 @@ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"

 [[package]]
 name = "migration"
-version = "0.1.0"
+version = "0.1.1"
 dependencies = [
 "sea-orm-migration",
 "tokio",
--- a/migration/Cargo.toml
+++ b/migration/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "migration"
-version = "0.1.0"
+version = "0.1.1"
 edition = "2021"
 publish = false

--- a/migration/README.md
+++ b/migration/README.md
@ -1,41 +1,10 @@
-# Running Migrator CLI
+# 数据库版本号说明

- Generate a new migration file
-    ```sh
-    cargo run -- generate MIGRATION_NAME
-    ```
- Apply all pending migrations
-    ```sh
-    cargo run
-    ```
-    ```sh
-    cargo run -- up
-    ```
- Apply first 10 pending migrations
-    ```sh
-    cargo run -- up -n 10
-    ```
- Rollback last applied migrations
-    ```sh
-    cargo run -- down
-    ```
- Rollback last 10 applied migrations
-    ```sh
-    cargo run -- down -n 10
-    ```
- Drop all tables from the database, then reapply all migrations
-    ```sh
-    cargo run -- fresh
-    ```
- Rollback all applied migrations, then reapply all migrations
-    ```sh
-    cargo run -- refresh
-    ```
- Rollback all applied migrations
-    ```sh
-    cargo run -- reset
-    ```
- Check the status of all migrations
-    ```sh
-    cargo run -- status
-    ```
+## 0.1.1
+
+添加了 `main_data.XmlTested` 字段
+
+- 为 `maindata_savetype_saveid_idx` 添加了
+  - `len`
+  - `xml_tested`
+  - 的索引
--- a/migration/src/lib.rs
+++ b/migration/src/lib.rs
@ -2,9 +2,9 @@ pub use sea_orm_migration::prelude::*;

 pub mod m20240719_00001_create_main_data_table;
 pub mod m20240719_00002_create_long_data_table;
-pub mod m20240721_221623_create_indexs;
+pub mod m20240721_00003_create_indexs;

-pub use m20240721_221623_create_indexs::FULL_DATA_VIEW;
+pub use m20240721_00003_create_indexs::FULL_DATA_VIEW;

 pub const TEXT_DATA_MAX_LEN: usize = 1024;

@ -17,7 +17,7 @@ impl MigratorTrait for Migrator {
        vec![
            Box::new(m20240719_00001_create_main_data_table::Migration),
            Box::new(m20240719_00002_create_long_data_table::Migration),
-            Box::new(m20240721_221623_create_indexs::Migration),
+            Box::new(m20240721_00003_create_indexs::Migration),
        ]
    }
 }
--- a/migration/src/m20240719_00001_create_main_data_table.rs
+++ b/migration/src/m20240719_00001_create_main_data_table.rs
@ -59,6 +59,7 @@ impl MigrationTrait for Migration {
                    .col(ColumnDef::new(MainData::BlakeHash).char_len(64).not_null())
                    .col(ColumnDef::new(MainData::Len).big_integer().not_null())
                    .col(ColumnDef::new(MainData::ShortData).string_len(TEXT_DATA_MAX_LEN as u32))
+                    .col(ColumnDef::new(MainData::XmlTested).boolean().null())
                    .to_owned(),
            )
            .await?;
@ -94,4 +95,6 @@ pub enum MainData {
    /// 如果长度 < 1024
    /// 那就直接存在这
    ShortData,
+    /// 数据是不是合法的 XML 数据
+    XmlTested,
 }
--- a/migration/src/m20240721_221623_create_indexs.rs
+++ b/migration/src/m20240721_221623_create_indexs.rs
@ -16,6 +16,7 @@ SELECT
 	md.save_id,
 	md.save_type,
 	md.blake_hash,
+    md.xml_tested,
 	md.len,
 	CASE
 		WHEN md.len > 1024 THEN
@ -51,6 +52,8 @@ impl MigrationTrait for Migration {
            .table(MainData::Table)
            .col(MainData::SaveType)
            .col(MainData::SaveId)
+            .col(MainData::Len)
+            .col(MainData::XmlTested)
            .name(MAIN_SAVETYPE_SAVEID_IDX);
        manager.create_index(savetype_saveid_idx).await?;

--- a/sr_download/sql/xml_parse.py
+++ b/sr_download/sql/xml_parse.py
@ -17,32 +17,65 @@ def get_db():
    )
    return connect

+def fetch_data(db_cur, offset, limit):
+    # xml_fetch = f"""
+    # WITH data AS (
+    #     SELECT save_id as id, data
+    #     FROM public.full_data
+    #     WHERE "save_type" != 'none'
+    #       AND xml_is_well_formed_document(full_data."data")
+    #     LIMIT {limit} OFFSET {offset}
+    # )
+    # SELECT data.id, string_agg(parts.part_type, '|') AS part_types
+    # FROM data,
+    #      XMLTABLE (
+    #         '//Ship/Parts/Part'
+    #         PASSING BY VALUE xmlparse(document data."data")
+    #         COLUMNS part_type text PATH '@partType',
+    #                 part_id text PATH '@id'
+    #     ) AS parts
+    # GROUP BY data.id;
+    # """
+    xml_fetch = f"""
+    WITH data AS (
+        SELECT save_id as id, data
+        FROM public.full_data
+        WHERE "save_type" != 'none'
+          AND xml_is_well_formed_document(full_data."data")
+        LIMIT {limit} OFFSET {offset}
+    ),
+    parts_data AS (
+        SELECT data.id, parts.part_type
+        FROM data,
+             XMLTABLE (
+                '//Ship/Parts/Part'
+                PASSING BY VALUE xmlparse(document data."data")
+                COLUMNS part_type text PATH '@partType',
+                        part_id text PATH '@id'
+            ) AS parts
+    )
+    SELECT id, string_agg(part_type || ':' || part_count, '|') AS part_types
+    FROM (
+        SELECT id, part_type, COUNT(part_type) AS part_count
+        FROM parts_data
+        GROUP BY id, part_type
+    ) AS counted_parts
+    GROUP BY id;
+    """
+    db_cur.execute(xml_fetch)
+    return db_cur.fetchall()

 def main():
    db = get_db()
    db_cur = db.cursor()
-
-    xml_fetch = """
-WITH limited_full_data AS (
-    SELECT save_id, data
-    FROM public.full_data
-    WHERE "save_type" != 'none'
-      AND xml_is_well_formed_document(full_data."data")
-    LIMIT 20
-)
-SELECT limited_full_data.save_id, array_agg(x.part_type) AS part_types, array_agg(x.part_id) AS part_ids
-FROM limited_full_data,
-     XMLTABLE (
-        '//Ship/Parts/Part'
-        PASSING BY VALUE xmlparse(document limited_full_data."data")
-        COLUMNS part_type text PATH '@partType',
-                part_id text PATH '@id'
-    ) AS x
-GROUP BY limited_full_data.save_id;
-    """
-
-    db_cur.execute(xml_fetch)
-    logger.info(db_cur.fetchall())
-    ...
+    offset = 0
+    limit = 100
+    while True:
+        datas = fetch_data(db_cur, offset, limit)
+        if not datas:
+            break
+        for data in datas:
+            logger.info(data)
+        offset += limit

 main()
--- a/sr_download/src/model/main_data.rs
+++ b/sr_download/src/model/main_data.rs
@ -12,6 +12,7 @@ pub struct Model {
    pub blake_hash: String,
    pub len: i64,
    pub short_data: Option<String>,
+    pub xml_tested: Option<bool>,
 }

 #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]