-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathclasses.py
More file actions
236 lines (184 loc) · 10.1 KB
/
classes.py
File metadata and controls
236 lines (184 loc) · 10.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
from dataclasses import dataclass, field
from typing import Dict, List, Optional
@dataclass
class Domain:
"""Represents a research domain with associated queries and papers for questions."""
def __init__(self, domain_name: str):
self.domain_name = domain_name
self.question2queries: Dict['Question', List[str]] = {}
self.question2papers: Dict['Question', Dict[str, List[str]]] = {} # Question : {paper_title: [snippets]}
self.question2analysis: Dict['Question', 'TargetDomainAnalysis'] = {} # Only for target domain
def add_question_queries(self, question: 'Question', queries: List[str]):
"""Add search queries for a specific question."""
if question not in self.question2queries:
self.question2queries[question] = []
self.question2queries[question].extend(queries)
def add_question_papers(self, question: 'Question', papers: Dict[str, List[str]]):
"""Add retrieved papers and snippets for a question."""
self.question2papers[question] = papers
def del_question_paper(self, question, papers):
"""Delete specific papers for a question."""
if question in self.question2papers:
if type(papers) is str:
if papers in self.question2papers[question]:
del self.question2papers[question][papers]
else:
for paper in papers:
if paper in self.question2papers[question]:
del self.question2papers[question][paper]
def add_question_analysis(self, question: 'Question', analysis):
"""Add target domain analysis for a question (only applicable to target domain)."""
self.question2analysis[question] = analysis
def fetch_question_queries(self, question: 'Question') -> List[str]:
"""Retrieve queries for a question."""
return self.question2queries.get(question, [])
def fetch_question_papers(self, question: 'Question') -> Dict[str, List[str]]:
"""Retrieve papers for a question."""
return self.question2papers.get(question, {})
def format_question_papers(self, question: 'Question') -> str:
"""Format papers and snippets as a readable string."""
papers = self.fetch_question_papers(question)
if not papers:
return "No papers retrieved."
formatted = []
for paper_title, snippets in papers.items():
formatted.append(f"\nPaper: {paper_title}")
for snippet in snippets:
formatted.append(f" - {snippet}")
return "\n".join(formatted)
def __str__(self):
return f"Domain(domain_name={self.domain_name})"
def __repr__(self):
return self.__str__()
def __hash__(self):
return hash(self.domain_name)
def __eq__(self, other):
if isinstance(other, Domain):
return self.domain_name == other.domain_name
return False
@dataclass
class Question:
"""Represents a research question with associated metadata."""
def __init__(self, id: str, domain_specific_question: str, domain_agnostic_question: str, rationale: str, parent_question: Optional['Question'] = None):
self.id = id
self.domain_specific_question = domain_specific_question
self.domain_agnostic_question = domain_agnostic_question
self.rationale = rationale
self.parent_question = parent_question # For tracking sub-question hierarchy
# Analysis results
self.target_domain_analysis = None # TargetDomainAnalysis object
self.is_addressed_in_target = False # Whether substantially/partially addressed
# External domains to explore
self.external_domains: Dict[str, Domain] = {} # domain_name: Domain object
self.cross_domain_queries = None # CrossDomainQueries object
# Sub-questions generated from target domain analysis
self.remaining_challenges: List['Question'] = [] # Sub-questions that need cross-domain search
self.integrated_ideas: Dict[str, dict] = {} # domain_name: integrated_idea
self.interdisciplinary_rankings: Optional[dict] = None
def add_external_domain(self, domain: Domain):
"""Add an external domain for cross-domain search."""
self.external_domains[domain.domain_name] = domain
def mark_as_addressed(self, addressed: bool):
"""Mark whether this question was addressed in the target domain."""
self.is_addressed_in_target = addressed
def add_remaining_challenge(self, challenge: 'Question'):
"""Add a remaining challenge (sub-question) from target domain analysis."""
self.remaining_challenges.append(challenge)
def needs_cross_domain_search(self) -> bool:
"""Determine if this question needs cross-domain exploration."""
# Need cross-domain if not addressed in target OR if there are remaining challenges
return not self.is_addressed_in_target or len(self.remaining_challenges) > 0
def add_integrated_idea(self, domain_name: str, integrated_idea: dict):
"""Add an integrated idea for a specific external domain."""
self.integrated_ideas[domain_name] = integrated_idea
def set_interdisciplinary_rankings(self, rankings: dict):
"""Set the interdisciplinary potential rankings for this question."""
self.interdisciplinary_rankings = rankings
def __str__(self):
return f"Question(id={self.id}, domain_specific={self.domain_specific_question}, domain_agnostic={self.domain_agnostic_question})"
def __repr__(self):
return self.__str__()
def __hash__(self):
return hash(self.__str__())
def __eq__(self, other):
if isinstance(other, Question):
return self.__str__() == other.__str__()
return False
@dataclass
class ResearchProblem:
"""Represents the overall research problem with decomposed questions."""
def __init__(self, problem_statement: str, target_domain_name: str,
fine_grained_domain: str = "", core_challenge: str = ""):
self.problem_statement = problem_statement
self.target_domain = Domain(domain_name=target_domain_name)
self.fine_grained_domain = fine_grained_domain
self.core_challenge = core_challenge
# All domains involved (target + external)
self.domains: Dict[str, Domain] = {target_domain_name: self.target_domain}
# Top-level research questions
self.research_questions: List[Question] = []
# All questions including sub-questions (for easy lookup)
self.all_questions: Dict[str, Question] = {}
@classmethod
def from_initial_decomposition(cls, decomposition_json: dict, fine_grained_domain: str):
"""Create ResearchProblem from initial decomposition output."""
problem = cls(
problem_statement=decomposition_json["problem_statement"],
target_domain_name=decomposition_json.get("coarse_grained_domain", ""),
fine_grained_domain=fine_grained_domain,
core_challenge=decomposition_json.get("core_challenge", "")
)
# Create questions from decomposition
for q_data in decomposition_json["research_questions"]:
question = Question(
id=q_data["id"],
domain_specific_question=q_data["domain_specific_question"],
domain_agnostic_question=q_data["domain_agnostic_question"],
rationale=q_data["rationale"]
)
# Add target domain queries
queries = q_data.get("target_domain_queries", [])
if queries:
problem.target_domain.add_question_queries(question, queries)
problem.add_research_question(question)
return problem
def add_research_question(self, question: Question):
"""Add a top-level research question."""
self.research_questions.append(question)
self.all_questions[str(question)] = question
def add_remaining_challenge(self, parent_question: Question, challenge_data: dict):
"""Create and add a remaining challenge as a sub-question."""
challenge = Question(
id=challenge_data["challenge_id"],
domain_specific_question=challenge_data["domain_specific_challenge_question"],
domain_agnostic_question=challenge_data["domain_agnostic_challenge_question"],
rationale=f'{challenge_data.get("importance", "")} {challenge_data.get("why_unaddressed", "")}',
parent_question=parent_question
)
parent_question.add_remaining_challenge(challenge)
self.all_questions[challenge.id] = challenge
return challenge
def get_or_create_domain(self, domain_name: str) -> Domain:
"""Get existing domain or create new one."""
if domain_name not in self.domains:
self.domains[domain_name] = Domain(domain_name=domain_name)
return self.domains[domain_name]
def get_questions_needing_cross_domain(self) -> List[Question]:
"""Get all questions (including challenges) that need cross-domain search."""
questions_needing_search = []
# Check all top-level questions
for question in self.research_questions:
if question.needs_cross_domain_search():
if not question.is_addressed_in_target:
# If the question itself needs search (is "substantially unaddressed (don't do the decomposition yet)")
questions_needing_search.append(question)
else:
# Add any remaining challenges
questions_needing_search.extend(question.remaining_challenges)
return questions_needing_search
def __str__(self):
return (f"ResearchProblem(problem={self.problem_statement[:50]}..., "
f"domain={self.target_domain.domain_name}, "
f"questions={len(self.research_questions)})")
def __repr__(self):
return self.__str__()