2024-08-29 02:18:50 +08:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
import psycopg2
|
|
|
|
import tomli
|
|
|
|
|
|
|
|
from lib_not_dr import loggers
|
|
|
|
|
|
|
|
with open("../../config.toml", "rb") as f:
|
|
|
|
CONFIG = tomli.load(f)
|
|
|
|
|
|
|
|
logger = loggers.config.get_logger("xml_parse")
|
|
|
|
|
|
|
|
|
|
|
|
def get_db():
|
|
|
|
connect = psycopg2.connect(
|
|
|
|
CONFIG["db"]["url"]
|
|
|
|
)
|
|
|
|
return connect
|
|
|
|
|
2024-08-30 02:15:22 +08:00
|
|
|
def fetch_data(db_cur, offset, limit):
|
|
|
|
# xml_fetch = f"""
|
|
|
|
# WITH data AS (
|
|
|
|
# SELECT save_id as id, data
|
|
|
|
# FROM public.full_data
|
|
|
|
# WHERE "save_type" != 'none'
|
|
|
|
# AND xml_is_well_formed_document(full_data."data")
|
|
|
|
# LIMIT {limit} OFFSET {offset}
|
|
|
|
# )
|
|
|
|
# SELECT data.id, string_agg(parts.part_type, '|') AS part_types
|
|
|
|
# FROM data,
|
|
|
|
# XMLTABLE (
|
|
|
|
# '//Ship/Parts/Part'
|
|
|
|
# PASSING BY VALUE xmlparse(document data."data")
|
|
|
|
# COLUMNS part_type text PATH '@partType',
|
|
|
|
# part_id text PATH '@id'
|
|
|
|
# ) AS parts
|
|
|
|
# GROUP BY data.id;
|
|
|
|
# """
|
|
|
|
xml_fetch = f"""
|
|
|
|
WITH data AS (
|
|
|
|
SELECT save_id as id, data
|
|
|
|
FROM public.full_data
|
|
|
|
WHERE "save_type" != 'none'
|
|
|
|
AND xml_is_well_formed_document(full_data."data")
|
|
|
|
LIMIT {limit} OFFSET {offset}
|
|
|
|
),
|
|
|
|
parts_data AS (
|
|
|
|
SELECT data.id, parts.part_type
|
|
|
|
FROM data,
|
|
|
|
XMLTABLE (
|
|
|
|
'//Ship/Parts/Part'
|
|
|
|
PASSING BY VALUE xmlparse(document data."data")
|
|
|
|
COLUMNS part_type text PATH '@partType',
|
|
|
|
part_id text PATH '@id'
|
|
|
|
) AS parts
|
|
|
|
)
|
|
|
|
SELECT id, string_agg(part_type || ':' || part_count, '|') AS part_types
|
|
|
|
FROM (
|
|
|
|
SELECT id, part_type, COUNT(part_type) AS part_count
|
|
|
|
FROM parts_data
|
|
|
|
GROUP BY id, part_type
|
|
|
|
) AS counted_parts
|
|
|
|
GROUP BY id;
|
|
|
|
"""
|
|
|
|
db_cur.execute(xml_fetch)
|
|
|
|
return db_cur.fetchall()
|
2024-08-29 02:18:50 +08:00
|
|
|
|
|
|
|
def main():
|
|
|
|
db = get_db()
|
|
|
|
db_cur = db.cursor()
|
2024-08-30 02:15:22 +08:00
|
|
|
offset = 0
|
|
|
|
limit = 100
|
|
|
|
while True:
|
|
|
|
datas = fetch_data(db_cur, offset, limit)
|
|
|
|
if not datas:
|
|
|
|
break
|
|
|
|
for data in datas:
|
|
|
|
logger.info(data)
|
|
|
|
offset += limit
|
2024-08-29 02:18:50 +08:00
|
|
|
|
|
|
|
main()
|