88API_URL = "https://api.deepseek.com/chat/completions"
99POSTS_DIR = "_posts"
1010
11+
1112def get_existing_tags ():
13+ """读取已有的全部标签"""
1214 all_tags = set ()
13- if not os .path .exists (POSTS_DIR ): return []
15+ if not os .path .exists (POSTS_DIR ):
16+ return []
17+
1418 for filename in os .listdir (POSTS_DIR ):
1519 if filename .endswith (".md" ):
1620 filepath = os .path .join (POSTS_DIR , filename )
1721 try :
18- with open (filepath , 'r' , encoding = ' utf-8' ) as f :
22+ with open (filepath , "r" , encoding = " utf-8" ) as f :
1923 content = f .read ()
20- parts = content .split ('---' , 2 )
24+
25+ parts = content .split ("---" , 2 )
2126 if len (parts ) >= 3 :
2227 front_matter = yaml .safe_load (parts [1 ])
28+ if not front_matter :
29+ continue
30+
2331 tags = front_matter .get ("tags" )
2432 if isinstance (tags , list ):
25- for t in tags : all_tags .add (t )
26- except : continue
33+ for t in tags :
34+ all_tags .add (t )
35+ except Exception :
36+ continue
37+
2738 return sorted (list (all_tags ))
2839
40+
2941def get_tags_from_ai (title , content , category , existing_tags ):
30- if not API_KEY : return []
31- headers = {"Content-Type" : "application/json" , "Authorization" : f"Bearer { API_KEY } " }
32-
42+ """调用大模型为文章自动生成标签"""
43+ if not API_KEY :
44+ print ("未检测到 DEEPSEEK_API_KEY,跳过 AI 打标签。" )
45+ return []
46+
47+ headers = {
48+ "Content-Type" : "application/json" ,
49+ "Authorization" : f"Bearer { API_KEY } "
50+ }
51+
52+ role_map = {
53+ "study" : "资深技术专家" ,
54+ "anime" : "资深二次元漫评人" ,
55+ "music" : "专业乐评人" ,
56+ "paint" : "美术鉴赏家" ,
57+ "game" : "骨灰级游戏玩家" ,
58+ "snap" : "专业摄影师" ,
59+ "asmr" : "ASMR深度体验者" ,
60+ "emo" : "情感作家" ,
61+ "paper" : "资深学术研究员"
62+ }
63+ role = role_map .get (category , "专业博客编辑" )
64+
3365 guidance = ""
34- if category == "study" : guidance = "这是一篇学习笔记。请侧重提取技术领域词。"
35- elif category == "anime" : guidance = "这是一篇动漫相关博文。请务必提取作品名称作为首个标签、要根据该动漫的真实内容来提取其他标签。"
36- elif category == "music" : guidance = "这是一篇音乐鉴赏。请提取社团/作者名、曲风等。"
37- elif category == "paint" : guidance = "这是一篇绘画分享。请提取其中的人物、风格等。"
38- elif category == "game" : guidance = "这是一篇游戏记录。请务必提取游戏名称作为首个标签、要根据游戏的真实内容来提取其他标签。"
39- elif category == "snap" : guidance = "这是一篇摄影作品。请提取镜头焦段、拍摄地点等。"
40- elif category == "asmr" : guidance = "这是一篇助眠相关内容。请提取作者名等。"
41- elif category == "emo" : guidance = "这是一篇心情随笔。请提取情感意象或核心感悟。"
66+ if category == "study" :
67+ guidance = "这是一篇学习笔记。请侧重提取核心技术栈、框架、编程语言等专业词汇。"
68+ elif category == "anime" :
69+ guidance = "这是一篇动漫相关博文。请务必提取作品名称作为首个标签,并提取核心角色、制作公司、类型题材(如机战、日常)等。"
70+ elif category == "music" :
71+ guidance = "这是一篇音乐鉴赏。请提取歌手/社团/作者名、曲风、专辑名等。"
72+ elif category == "paint" :
73+ guidance = "这是一篇绘画分享。请提取其中的人物名字、画师名、艺术风格、绘制工具等。"
74+ elif category == "game" :
75+ guidance = "这是一篇游戏记录。请务必提取游戏名称作为首个标签,并提取游戏类型、核心机制或开发商等。"
76+ elif category == "snap" :
77+ guidance = "这是一篇摄影作品。请提取相机型号、镜头焦段、拍摄地点、摄影风格等。"
78+ elif category == "asmr" :
79+ guidance = "这是一篇助眠相关内容。请提取音声作者名、触发音类型(如耳语、心跳、底噪)、设备等。"
80+ elif category == "emo" :
81+ guidance = "这是一篇心情随笔。请提取抽象的情感意象或核心感悟名词。"
82+ elif category == "paper" :
83+ guidance = "这是一篇学术论文阅读笔记或研究总结。请务必提取论文的研究领域、核心算法、关键模型(如Transformer、CNN)、数据集名称等学术专业词汇。"
4284
43- prompt = f"角色:专业技术博客编辑。任务:为文章生成 3-5 个精准标签。\n 分类背景:{ guidance } \n 要求:1. 可以复用已有标签:{ ', ' .join (existing_tags )} \n 2. 格式:JSON 数组。\n 标题:{ title } \n 摘要:{ content [:1000 ]} "
85+ prompt = f"""角色:{ role } 。
86+ 任务:请仔细阅读以下文章内容,为其生成 3-5 个最核心、最准确的标签。
87+ 分类指导:{ guidance }
88+ 严格要求:
89+ 1. 尽可能复用现有标签库中的标签:{ ', ' .join (existing_tags )}
90+ 2. 标签必须是具体的专有名词(如人名、技术名、作品名、算法名),绝对禁止使用宽泛的形容词(如“好看的”、“好听的”)或长句!
91+ 3. 必须以 JSON 对象格式返回,键名固定为 "tags",值为字符串数组。
92+
93+ 文章标题:{ title }
94+ 内容摘要:{ content [:3000 ]} """
4495
4596 payload = {
4697 "model" : "deepseek-chat" ,
4798 "messages" : [
48- {"role" : "system" , "content" : "You are an intelligent tag generator. Output JSON arrays only." },
49- {"role" : "user" , "content" : prompt }
99+ {
100+ "role" : "system" ,
101+ "content" : 'You are a precise and professional tag extractor. You must reply ONLY with a valid JSON object containing a "tags" string array. Do not include markdown formatting like ```json.'
102+ },
103+ {
104+ "role" : "user" ,
105+ "content" : prompt
106+ }
50107 ],
51- "response_format" : {"type" : "json_object" }
108+ "response_format" : {"type" : "json_object" },
109+ "temperature" : 0.1
52110 }
53111
54112 try :
55113 response = requests .post (API_URL , headers = headers , json = payload , timeout = 30 )
114+ response .raise_for_status ()
115+
56116 data = response .json ()
57117 res = data ['choices' ][0 ]['message' ]['content' ]
58118 parsed = json .loads (res )
59- return parsed ["tags" ] if isinstance (parsed , dict ) and "tags" in parsed else parsed
60- except : return []
119+
120+ tags = parsed .get ("tags" )
121+ if isinstance (tags , list ):
122+ return tags
123+ elif isinstance (parsed , list ):
124+ return parsed
125+ return []
126+ except Exception as e :
127+ print (f"Error calling AI: { e } " )
128+ return []
129+
61130
62131def process_posts ():
63- if not os .path .exists (POSTS_DIR ): return
132+ """遍历文章目录并为缺失标签的文章补充标签"""
133+ if not os .path .exists (POSTS_DIR ):
134+ print (f"文章目录 { POSTS_DIR } 不存在。" )
135+ return
136+
64137 existing_tags = get_existing_tags ()
65138
66139 for filename in os .listdir (POSTS_DIR ):
67- if not filename .endswith (".md" ) or filename == "BLOG_TEMPLATE.md" : continue
140+ if not filename .endswith (".md" ) or filename == "BLOG_TEMPLATE.md" :
141+ continue
142+
68143 filepath = os .path .join (POSTS_DIR , filename )
69- with open (filepath , 'r' , encoding = 'utf-8' ) as f :
144+
145+ with open (filepath , "r" , encoding = "utf-8" ) as f :
70146 full_content = f .read ()
71147
72- parts = full_content .split ('---' , 2 )
73- if len (parts ) < 3 : continue
148+ parts = full_content .split ("---" , 2 )
149+ if len (parts ) < 3 :
150+ continue
74151
75152 try :
76153 front_matter = yaml .safe_load (parts [1 ])
77154 post_body = parts [2 ]
78- except : continue
155+ except Exception :
156+ continue
79157
80- if not front_matter .get ("tags" ) or len (front_matter ["tags" ]) == 0 :
158+ if not front_matter or not front_matter .get ("tags" ) or len (front_matter ["tags" ]) == 0 :
159+ if not front_matter :
160+ front_matter = {}
161+
81162 title = front_matter .get ("title" , "" )
82- category = (front_matter .get ("categories" ) or ["" ])[0 ].lower ()
163+ categories = front_matter .get ("categories" , [])
164+ category = categories [0 ].lower () if categories else ""
83165
84166 print (f"Intelligently tagging: { title } ..." )
85167 new_tags = get_tags_from_ai (title , post_body .strip (), category , existing_tags )
168+
86169 if new_tags :
87170 front_matter ["tags" ] = new_tags
88- # 重新写入文件,保持日期原封不动
89- new_content = f"---\n { yaml .dump (front_matter , allow_unicode = True , sort_keys = False ).strip ()} \n ---\n { post_body } "
90- with open (filepath , 'w' , encoding = 'utf-8' ) as f :
171+
172+
173+ yaml_content = yaml .dump (front_matter , allow_unicode = True , sort_keys = False ).strip ()
174+ new_content = f"---\n { yaml_content } \n ---\n { post_body } "
175+
176+ with open (filepath , "w" , encoding = "utf-8" ) as f :
91177 f .write (new_content )
92178 print (f"Updated tags for: { filename } " )
93179
180+
94181if __name__ == "__main__" :
95- process_posts ()
182+ process_posts ()
0 commit comments