from pathlib import Path from typing import Union from pprint import pprint import mistune from qtoml.encoder import dumps from qtoml.decoder import loads from lib_not_dr import loggers from lib_not_dr.types.options import Options ast_markdown = mistune.create_markdown(renderer='ast') ast_type = list[dict[str, Union[str, dict]]] class TagParser(Options): module_root: Path tags: dict[str, list[str]] = {} logger: loggers.logger.Logger = None # noqa def load_module(self, **kwargs): for readme in self.module_root.rglob('readme.md'): self.logger.debug(readme.absolute(), tag="load file") self.get_module_data(readme) tag_toml = dumps(self.tags) tag_path = self.module_root / ".." / "build" / "tags.toml" tag_path.parent.mkdir(parents=True, exist_ok=True) tag_path.touch(exist_ok=True) with open(tag_path, 'w', encoding='utf-8') as file: file.write(tag_toml) def get_module_data(self, module_path: Path): with open(module_path, 'r', encoding='utf-8') as f: file = f.read() ast = ast_markdown(file) if len(ast) == 0: return if ast[0] != {'type': 'thematic_break'}: # 排除开头不是注释块的 return self.logger.info(f"开始解析 {ast[1]}") config_code = ast[1].get("raw", "") config_dict = loads(config_code) self.logger.trace(config_dict) if not (tag_list := config_dict.get('tags')): self.logger.warn("未找到 tags", tag=str(module_path)) return for tag in tag_list: if tag not in self.tags: self.tags[tag] = [module_path.__str__()] else: self.tags[tag].append(module_path.__str__()) def init(self, **kwargs) -> bool: self.logger = loggers.get_logger() self.logger.global_level = 0 self.load_tags() return False def load_tags(self): tag_list_path = Path("./tags/readme.md") if not tag_list_path.exists(): self.logger.error("未找到 tags/readme.md") return False with open(tag_list_path, 'r', encoding='utf-8') as f: file = f.read() tag_ast: ast_type = ast_markdown(file) start_tag = -1 for i, node in enumerate(tag_ast): if node['type'] != 'heading': continue if node['attrs']['level'] != 2: continue # 说明是二级标题 start_tag = i break if start_tag == -1: self.logger.error("未找到二级标题") return False # 获取 tag tag_ast: ast_type = tag_ast[start_tag + 1:] start_tag = -1 for i, node in enumerate(tag_ast): if node['type'] != 'list': continue start_tag = i break if start_tag == -1: self.logger.error("未找到 tag 列表") return False tag_ast: ast_type = [item['children'] for item in tag_ast[start_tag]['children'] if item.get('type') == 'list_item'] # 顺手过滤一下 pprint(tag_ast) self.logger.info(tag_ast) # 获取 tag 列表 if __name__ == '__main__': parser = TagParser(module_root = Path("modules")) parser.load_module() parser.logger.info(parser.tags)