ARS-docs/scripts/parse-label.py

from pathlib import Path
from typing import Union, Callable
from pprint import pprint

import mistune

from qtoml.encoder import dumps
from qtoml.decoder import loads
from lib_not_dr import loggers
from lib_not_dr.types.options import Options


ast_markdown = mistune.create_markdown(renderer="ast")
ast_type = list[dict[str, Union[str, dict]]]


def get_all_iter(
    ast: ast_type, type: str, func: Callable[[ast_type], bool] = None
) -> tuple[int, dict]:
    for i, node in enumerate(ast):
        if node["type"] == type:
            if func is not None:
                if not func(node):
                    continue
            yield i, node
    yield -1, {}


def get_text(ast: ast_type) -> str:
    """
    返回第一个找到的字符串
    """
    if ast['type'] == 'text':
        return ast['raw']
    elif ast['type'] in ('link', 'block_text', 'list_item', 'list'):
        return get_text(ast['children'][0])
    print('unkown type', ast['type'], ast)


class TagParser(Options):
    module_root: Path
    tags: dict[str, list[str]] = {}
    tag_map: dict[str, list[str]] = {}
    logger: loggers.logger.Logger = None  # noqa

    def load_module(self, **kwargs):
        for readme in self.module_root.rglob("readme.md"):
            self.logger.debug(readme.absolute(), tag="load file")
            self.get_module_data(readme)

        tag_toml = dumps(self.tags)
        tag_path = self.module_root / ".." / "build" / "tags.toml"
        tag_path.parent.mkdir(parents=True, exist_ok=True)
        tag_path.touch(exist_ok=True)
        with open(tag_path, "w", encoding="utf-8") as file:
            file.write(tag_toml)

    def get_module_data(self, module_path: Path):
        with open(module_path, "r", encoding="utf-8") as f:
            file = f.read()

        ast = ast_markdown(file)
        if len(ast) == 0:
            return

        if ast[0] != {"type": "thematic_break"}:
            # 排除开头不是注释块的
            return

        self.logger.info(f"开始解析 {ast[1]}")
        config_code = ast[1].get("raw", "")
        config_dict = loads(config_code)
        self.logger.trace(config_dict)

        if not (tag_list := config_dict.get("tags")):
            self.logger.warn("未找到 tags", tag=str(module_path))
            return

        for tag in tag_list:
            if tag not in self.tags:
                self.tags[tag] = [module_path.__str__()]
            else:
                self.tags[tag].append(module_path.__str__())

    def init(self, **kwargs) -> bool:
        self.logger = loggers.get_logger()
        self.logger.global_level = 0
        self.load_tags()
        return False

    def load_tags(self):
        tag_list_path = Path("./tags/readme.md")
        if not tag_list_path.exists():
            self.logger.error("未找到 tags/readme.md")
            return False

        with open(tag_list_path, "r", encoding="utf-8") as f:
            file = f.read()

        tag_ast: ast_type = ast_markdown(file)
        start_tag = -1
        # 找到二级标题
        start_tag = get_all_iter(
            tag_ast, "heading", lambda node: node["attrs"]["level"] == 2
        )
        start_tag, node = next(start_tag)
        if start_tag == -1:
            self.logger.error("未找到二级标题")
            return False
        # 获取 tag
        tag_ast: ast_type = tag_ast[start_tag + 1 :]
        start_tag = get_all_iter(tag_ast, "list")
        start_tag, node = next(start_tag)
        if start_tag == -1:
            self.logger.error("未找到 tag 列表")
            return False
        tag_ast: ast_type = [
            item["children"]
            for item in tag_ast[start_tag]["children"]
            if item.get("type") == "list_item"
        ]
        # 顺手过滤一下
        for tag in tag_ast:
            # self.logger.debug([(item['type'], item['children']) for item in tag])
            this_tag = get_text(tag[0])
            if len(tag) == 1:
                # 只有一个 tag, 无别名
                self.tag_map[this_tag] = [this_tag]
                self.logger.debug(f'添加 tag {this_tag}')
            # len > 1
            elif len(tag) >= 1:
                # 寻找 "别名"
                tag_names = tag[1]['children']
                tag_list_start = get_all_iter(tag_names, 'list_item', lambda ast: get_text(ast) == '别名')
                tag_list_start, node = next(tag_list_start)
                if tag_list_start == -1:
                    continue
                sub_names = [this_tag]
                for sub_name in node['children'][1]['children']:
                    sub_names.append(get_text(sub_name))
                self.logger.debug(f"添加 tag {this_tag} 和别名 {sub_names}")
                self.tag_map[this_tag] = sub_names
        # 获取 tag 列表
        self.logger.info(f'可用 tag: {self.tag_map}')
        with open('./tags/tag.toml', 'w', encoding='utf-8') as file:
            file.write(dumps(self.tag_map))


if __name__ == "__main__":
    parser = TagParser(module_root=Path("modules"))
    parser.load_module()
    parser.logger.info(parser.tags)
添加一波模块 (#7) 后面加一下每一个的信息开一堆issue @cree 我谢谢你啊 Co-authored-by: creepebucket <3327018890@qq.com> Reviewed-on: http://shenjack.top:5100/ARS/ARS-docs/pulls/7 2023-12-14 10:09:40 +08:00			`from pathlib import Path`
解析中 2023-12-15 09:03:08 +08:00			`from typing import Union, Callable`
添加一些 tag 解析 2023-12-15 06:51:32 +08:00			`from pprint import pprint`
添加一波模块 (#7) 后面加一下每一个的信息开一堆issue @cree 我谢谢你啊 Co-authored-by: creepebucket <3327018890@qq.com> Reviewed-on: http://shenjack.top:5100/ARS/ARS-docs/pulls/7 2023-12-14 10:09:40 +08:00
			`import mistune`

Commented out unused code in label.yml 我的问题（ Update label.yml to install dependencies using pip Update git clone URL for python-packs repository Fix git clone URL in label.yml workflow Add ls commands to debug dependency installation Update directory paths in label.yml Update label generation script Add ls commands to label.yml workflow Add ls command to display file details Remove unnecessary code in label.yml Update label.yml and add requirement.txt 不管了！ build? mkdir added 2023-12-14 10:13:51 +08:00			`from qtoml.encoder import dumps`
			`from qtoml.decoder import loads`
			`from lib_not_dr import loggers`
			`from lib_not_dr.types.options import Options`
添加一波模块 (#7) 后面加一下每一个的信息开一堆issue @cree 我谢谢你啊 Co-authored-by: creepebucket <3327018890@qq.com> Reviewed-on: http://shenjack.top:5100/ARS/ARS-docs/pulls/7 2023-12-14 10:09:40 +08:00

解析中 2023-12-15 09:03:08 +08:00			`ast_markdown = mistune.create_markdown(renderer="ast")`
3.9 你怎么这typing都不支持（恼 2023-12-15 07:51:23 +08:00			`ast_type = list[dict[str, Union[str, dict]]]`
添加一波模块 (#7) 后面加一下每一个的信息开一堆issue @cree 我谢谢你啊 Co-authored-by: creepebucket <3327018890@qq.com> Reviewed-on: http://shenjack.top:5100/ARS/ARS-docs/pulls/7 2023-12-14 10:09:40 +08:00

解析中 2023-12-15 09:03:08 +08:00			`def get_all_iter(`
			`ast: ast_type, type: str, func: Callable[[ast_type], bool] = None`
			`) -> tuple[int, dict]:`
			`for i, node in enumerate(ast):`
			`if node["type"] == type:`
			`if func is not None:`
			`if not func(node):`
			`continue`
			`yield i, node`
继续解析 2023-12-15 09:58:39 +08:00			`yield -1, {}`
解析中 2023-12-15 09:03:08 +08:00

			`def get_text(ast: ast_type) -> str:`
			`"""`
			`返回第一个找到的字符串`
			`"""`
			`if ast['type'] == 'text':`
			`return ast['raw']`
继续解析 2023-12-15 09:58:39 +08:00			`elif ast['type'] in ('link', 'block_text', 'list_item', 'list'):`
			`return get_text(ast['children'][0])`
			`print('unkown type', ast['type'], ast)`
解析中 2023-12-15 09:03:08 +08:00

Commented out unused code in label.yml 我的问题（ Update label.yml to install dependencies using pip Update git clone URL for python-packs repository Fix git clone URL in label.yml workflow Add ls commands to debug dependency installation Update directory paths in label.yml Update label generation script Add ls commands to label.yml workflow Add ls command to display file details Remove unnecessary code in label.yml Update label.yml and add requirement.txt 不管了！ build? mkdir added 2023-12-14 10:13:51 +08:00			`class TagParser(Options):`
			`module_root: Path`
			`tags: dict[str, list[str]] = {}`
解析中 2023-12-15 09:03:08 +08:00			`tag_map: dict[str, list[str]] = {}`
Commented out unused code in label.yml 我的问题（ Update label.yml to install dependencies using pip Update git clone URL for python-packs repository Fix git clone URL in label.yml workflow Add ls commands to debug dependency installation Update directory paths in label.yml Update label generation script Add ls commands to label.yml workflow Add ls command to display file details Remove unnecessary code in label.yml Update label.yml and add requirement.txt 不管了！ build? mkdir added 2023-12-14 10:13:51 +08:00			`logger: loggers.logger.Logger = None # noqa`
解析中 2023-12-15 09:03:08 +08:00
Commented out unused code in label.yml 我的问题（ Update label.yml to install dependencies using pip Update git clone URL for python-packs repository Fix git clone URL in label.yml workflow Add ls commands to debug dependency installation Update directory paths in label.yml Update label generation script Add ls commands to label.yml workflow Add ls command to display file details Remove unnecessary code in label.yml Update label.yml and add requirement.txt 不管了！ build? mkdir added 2023-12-14 10:13:51 +08:00			`def load_module(self, **kwargs):`
解析中 2023-12-15 09:03:08 +08:00			`for readme in self.module_root.rglob("readme.md"):`
Commented out unused code in label.yml 我的问题（ Update label.yml to install dependencies using pip Update git clone URL for python-packs repository Fix git clone URL in label.yml workflow Add ls commands to debug dependency installation Update directory paths in label.yml Update label generation script Add ls commands to label.yml workflow Add ls command to display file details Remove unnecessary code in label.yml Update label.yml and add requirement.txt 不管了！ build? mkdir added 2023-12-14 10:13:51 +08:00			`self.logger.debug(readme.absolute(), tag="load file")`
			`self.get_module_data(readme)`
解析中 2023-12-15 09:03:08 +08:00
Commented out unused code in label.yml 我的问题（ Update label.yml to install dependencies using pip Update git clone URL for python-packs repository Fix git clone URL in label.yml workflow Add ls commands to debug dependency installation Update directory paths in label.yml Update label generation script Add ls commands to label.yml workflow Add ls command to display file details Remove unnecessary code in label.yml Update label.yml and add requirement.txt 不管了！ build? mkdir added 2023-12-14 10:13:51 +08:00			`tag_toml = dumps(self.tags)`
			`tag_path = self.module_root / ".." / "build" / "tags.toml"`
Refactor tag_path creation in parse-label.py 2023-12-14 18:23:27 +08:00			`tag_path.parent.mkdir(parents=True, exist_ok=True)`
Commented out unused code in label.yml 我的问题（ Update label.yml to install dependencies using pip Update git clone URL for python-packs repository Fix git clone URL in label.yml workflow Add ls commands to debug dependency installation Update directory paths in label.yml Update label generation script Add ls commands to label.yml workflow Add ls command to display file details Remove unnecessary code in label.yml Update label.yml and add requirement.txt 不管了！ build? mkdir added 2023-12-14 10:13:51 +08:00			`tag_path.touch(exist_ok=True)`
解析中 2023-12-15 09:03:08 +08:00			`with open(tag_path, "w", encoding="utf-8") as file:`
Commented out unused code in label.yml 我的问题（ Update label.yml to install dependencies using pip Update git clone URL for python-packs repository Fix git clone URL in label.yml workflow Add ls commands to debug dependency installation Update directory paths in label.yml Update label generation script Add ls commands to label.yml workflow Add ls command to display file details Remove unnecessary code in label.yml Update label.yml and add requirement.txt 不管了！ build? mkdir added 2023-12-14 10:13:51 +08:00			`file.write(tag_toml)`
解析中 2023-12-15 09:03:08 +08:00
Commented out unused code in label.yml 我的问题（ Update label.yml to install dependencies using pip Update git clone URL for python-packs repository Fix git clone URL in label.yml workflow Add ls commands to debug dependency installation Update directory paths in label.yml Update label generation script Add ls commands to label.yml workflow Add ls command to display file details Remove unnecessary code in label.yml Update label.yml and add requirement.txt 不管了！ build? mkdir added 2023-12-14 10:13:51 +08:00			`def get_module_data(self, module_path: Path):`
解析中 2023-12-15 09:03:08 +08:00			`with open(module_path, "r", encoding="utf-8") as f:`
Commented out unused code in label.yml 我的问题（ Update label.yml to install dependencies using pip Update git clone URL for python-packs repository Fix git clone URL in label.yml workflow Add ls commands to debug dependency installation Update directory paths in label.yml Update label generation script Add ls commands to label.yml workflow Add ls command to display file details Remove unnecessary code in label.yml Update label.yml and add requirement.txt 不管了！ build? mkdir added 2023-12-14 10:13:51 +08:00			`file = f.read()`

			`ast = ast_markdown(file)`
			`if len(ast) == 0:`
			`return`

解析中 2023-12-15 09:03:08 +08:00			`if ast[0] != {"type": "thematic_break"}:`
Commented out unused code in label.yml 我的问题（ Update label.yml to install dependencies using pip Update git clone URL for python-packs repository Fix git clone URL in label.yml workflow Add ls commands to debug dependency installation Update directory paths in label.yml Update label generation script Add ls commands to label.yml workflow Add ls command to display file details Remove unnecessary code in label.yml Update label.yml and add requirement.txt 不管了！ build? mkdir added 2023-12-14 10:13:51 +08:00			`# 排除开头不是注释块的`
			`return`
解析中 2023-12-15 09:03:08 +08:00
Commented out unused code in label.yml 我的问题（ Update label.yml to install dependencies using pip Update git clone URL for python-packs repository Fix git clone URL in label.yml workflow Add ls commands to debug dependency installation Update directory paths in label.yml Update label generation script Add ls commands to label.yml workflow Add ls command to display file details Remove unnecessary code in label.yml Update label.yml and add requirement.txt 不管了！ build? mkdir added 2023-12-14 10:13:51 +08:00			`self.logger.info(f"开始解析 {ast[1]}")`
			`config_code = ast[1].get("raw", "")`
			`config_dict = loads(config_code)`
			`self.logger.trace(config_dict)`
解析中 2023-12-15 09:03:08 +08:00
			`if not (tag_list := config_dict.get("tags")):`
Commented out unused code in label.yml 我的问题（ Update label.yml to install dependencies using pip Update git clone URL for python-packs repository Fix git clone URL in label.yml workflow Add ls commands to debug dependency installation Update directory paths in label.yml Update label generation script Add ls commands to label.yml workflow Add ls command to display file details Remove unnecessary code in label.yml Update label.yml and add requirement.txt 不管了！ build? mkdir added 2023-12-14 10:13:51 +08:00			`self.logger.warn("未找到 tags", tag=str(module_path))`
			`return`
解析中 2023-12-15 09:03:08 +08:00
Commented out unused code in label.yml 我的问题（ Update label.yml to install dependencies using pip Update git clone URL for python-packs repository Fix git clone URL in label.yml workflow Add ls commands to debug dependency installation Update directory paths in label.yml Update label generation script Add ls commands to label.yml workflow Add ls command to display file details Remove unnecessary code in label.yml Update label.yml and add requirement.txt 不管了！ build? mkdir added 2023-12-14 10:13:51 +08:00			`for tag in tag_list:`
			`if tag not in self.tags:`
			`self.tags[tag] = [module_path.__str__()]`
			`else:`
			`self.tags[tag].append(module_path.__str__())`
解析中 2023-12-15 09:03:08 +08:00
Commented out unused code in label.yml 我的问题（ Update label.yml to install dependencies using pip Update git clone URL for python-packs repository Fix git clone URL in label.yml workflow Add ls commands to debug dependency installation Update directory paths in label.yml Update label generation script Add ls commands to label.yml workflow Add ls command to display file details Remove unnecessary code in label.yml Update label.yml and add requirement.txt 不管了！ build? mkdir added 2023-12-14 10:13:51 +08:00			`def init(self, **kwargs) -> bool:`
			`self.logger = loggers.get_logger()`
			`self.logger.global_level = 0`
添加一些 tag 解析 2023-12-15 06:51:32 +08:00			`self.load_tags()`
			`return False`

			`def load_tags(self):`
			`tag_list_path = Path("./tags/readme.md")`
			`if not tag_list_path.exists():`
			`self.logger.error("未找到 tags/readme.md")`
			`return False`

解析中 2023-12-15 09:03:08 +08:00			`with open(tag_list_path, "r", encoding="utf-8") as f:`
添加一些 tag 解析 2023-12-15 06:51:32 +08:00			`file = f.read()`

			`tag_ast: ast_type = ast_markdown(file)`
			`start_tag = -1`
解析中 2023-12-15 09:03:08 +08:00			`# 找到二级标题`
			`start_tag = get_all_iter(`
			`tag_ast, "heading", lambda node: node["attrs"]["level"] == 2`
			`)`
			`start_tag, node = next(start_tag)`
添加一些 tag 解析 2023-12-15 06:51:32 +08:00			`if start_tag == -1:`
			`self.logger.error("未找到二级标题")`
			`return False`
			`# 获取 tag`
解析中 2023-12-15 09:03:08 +08:00			`tag_ast: ast_type = tag_ast[start_tag + 1 :]`
			`start_tag = get_all_iter(tag_ast, "list")`
			`start_tag, node = next(start_tag)`
添加一些 tag 解析 2023-12-15 06:51:32 +08:00			`if start_tag == -1:`
			`self.logger.error("未找到 tag 列表")`
			`return False`
解析中 2023-12-15 09:03:08 +08:00			`tag_ast: ast_type = [`
			`item["children"]`
			`for item in tag_ast[start_tag]["children"]`
			`if item.get("type") == "list_item"`
			`]`
添加一些 tag 解析 2023-12-15 06:51:32 +08:00			`# 顺手过滤一下`
解析中 2023-12-15 09:03:08 +08:00			`for tag in tag_ast:`
			`# self.logger.debug([(item['type'], item['children']) for item in tag])`
继续解析 2023-12-15 09:58:39 +08:00			`this_tag = get_text(tag[0])`
解析中 2023-12-15 09:03:08 +08:00			`if len(tag) == 1:`
			`# 只有一个 tag, 无别名`
			`self.tag_map[this_tag] = [this_tag]`
继续解析 2023-12-15 09:58:39 +08:00			`self.logger.debug(f'添加 tag {this_tag}')`
解析中 2023-12-15 09:03:08 +08:00			`# len > 1`
			`elif len(tag) >= 1:`
			`# 寻找 "别名"`
继续解析 2023-12-15 09:58:39 +08:00			`tag_names = tag[1]['children']`
			`tag_list_start = get_all_iter(tag_names, 'list_item', lambda ast: get_text(ast) == '别名')`
			`tag_list_start, node = next(tag_list_start)`
			`if tag_list_start == -1:`
			`continue`
Update \| 更新解析逻辑添加别名 2023-12-23 21:27:44 +08:00			`sub_names = [this_tag]`
继续解析 2023-12-15 09:58:39 +08:00			`for sub_name in node['children'][1]['children']:`
			`sub_names.append(get_text(sub_name))`
			`self.logger.debug(f"添加 tag {this_tag} 和别名 {sub_names}")`
			`self.tag_map[this_tag] = sub_names`
添加一些 tag 解析 2023-12-15 06:51:32 +08:00			`# 获取 tag 列表`
继续解析 2023-12-15 09:58:39 +08:00			`self.logger.info(f'可用 tag: {self.tag_map}')`
			`with open('./tags/tag.toml', 'w', encoding='utf-8') as file:`
			`file.write(dumps(self.tag_map))`
Commented out unused code in label.yml 我的问题（ Update label.yml to install dependencies using pip Update git clone URL for python-packs repository Fix git clone URL in label.yml workflow Add ls commands to debug dependency installation Update directory paths in label.yml Update label generation script Add ls commands to label.yml workflow Add ls command to display file details Remove unnecessary code in label.yml Update label.yml and add requirement.txt 不管了！ build? mkdir added 2023-12-14 10:13:51 +08:00

解析中 2023-12-15 09:03:08 +08:00			`if __name__ == "__main__":`
			`parser = TagParser(module_root=Path("modules"))`
Commented out unused code in label.yml 我的问题（ Update label.yml to install dependencies using pip Update git clone URL for python-packs repository Fix git clone URL in label.yml workflow Add ls commands to debug dependency installation Update directory paths in label.yml Update label generation script Add ls commands to label.yml workflow Add ls command to display file details Remove unnecessary code in label.yml Update label.yml and add requirement.txt 不管了！ build? mkdir added 2023-12-14 10:13:51 +08:00			`parser.load_module()`
			`parser.logger.info(parser.tags)`