Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 90 additions & 2 deletions backend/app/api/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ def add_progress_callback(msg, progress_ratio):
)

def wait_progress_callback(msg, progress_ratio):
progress = 55 + int(progress_ratio * 35) # 55% - 90%
progress = 55 + int(progress_ratio * 25) # 55% - 80%
task_manager.update_task(
task_id,
message=msg,
Expand All @@ -456,6 +456,34 @@ def wait_progress_callback(msg, progress_ratio):

builder._wait_for_episodes(episode_uuids, wait_progress_callback)

# 实体去重
task_manager.update_task(
task_id,
message="执行实体去重...",
progress=80
)
dedup_result = None
try:
from ..services.entity_deduplicator import EntityDeduplicator
deduplicator = EntityDeduplicator()
dedup_report = deduplicator.deduplicate(
graph_id=graph_id,
progress_callback=lambda msg, prog: task_manager.update_task(
task_id,
message=f"去重: {msg}",
progress=80 + int(prog * 10), # 80% - 90%
),
)
dedup_result = dedup_report.to_dict()
build_logger.info(
f"[{task_id}] 实体去重完成: "
f"发现 {dedup_report.groups_found} 组重复, "
f"删除 {dedup_report.nodes_removed} 个节点, "
f"迁移 {dedup_report.edges_migrated} 条边"
)
except Exception as dedup_err:
build_logger.warning(f"[{task_id}] 实体去重失败(不影响图谱构建): {dedup_err}")

# 获取图谱数据
task_manager.update_task(
task_id,
Expand Down Expand Up @@ -483,7 +511,8 @@ def wait_progress_callback(msg, progress_ratio):
"graph_id": graph_id,
"node_count": node_count,
"edge_count": edge_count,
"chunk_count": total_chunks
"chunk_count": total_chunks,
"dedup_report": dedup_result
}
)

Expand Down Expand Up @@ -615,3 +644,62 @@ def delete_graph(graph_id: str):
"error": str(e),
"traceback": traceback.format_exc()
}), 500


# ============== 接口:实体去重 ==============

@graph_bp.route('/deduplicate', methods=['POST'])
def deduplicate_graph():
"""
对已构建的图谱执行实体去重

请求(JSON):
{
"graph_id": "mirofish_xxxx", // 必填
"dry_run": false // 可选,默认false。true时仅检测不合并
}

返回:
{
"success": true,
"data": { ...DeduplicationReport... }
}
"""
try:
if not Config.ZEP_API_KEY:
return jsonify({
"success": False,
"error": "ZEP_API_KEY未配置"
}), 500

if not Config.LLM_API_KEY:
return jsonify({
"success": False,
"error": "LLM_API_KEY未配置(实体去重需要 LLM 支持)"
}), 500

data = request.get_json() or {}
graph_id = data.get('graph_id')
dry_run = data.get('dry_run', False)

if not graph_id:
return jsonify({
"success": False,
"error": "请提供 graph_id"
}), 400

from ..services.entity_deduplicator import EntityDeduplicator
deduplicator = EntityDeduplicator()
report = deduplicator.deduplicate(graph_id=graph_id, dry_run=dry_run)

return jsonify({
"success": True,
"data": report.to_dict()
})

except Exception as e:
return jsonify({
"success": False,
"error": str(e),
"traceback": traceback.format_exc()
}), 500
3 changes: 3 additions & 0 deletions backend/app/services/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .text_processor import TextProcessor
from .zep_entity_reader import ZepEntityReader, EntityNode, FilteredEntities
from .oasis_profile_generator import OasisProfileGenerator, OasisAgentProfile
from .entity_deduplicator import EntityDeduplicator, DeduplicationReport
from .simulation_manager import SimulationManager, SimulationState, SimulationStatus
from .simulation_config_generator import (
SimulationConfigGenerator,
Expand Down Expand Up @@ -46,6 +47,8 @@
'FilteredEntities',
'OasisProfileGenerator',
'OasisAgentProfile',
'EntityDeduplicator',
'DeduplicationReport',
'SimulationManager',
'SimulationState',
'SimulationStatus',
Expand Down
Loading