Conversation
| * 100 | ||
| ) | ||
|
|
||
| return percent_unfaithful_overall, SE_PUO, percent_unfaithfulness_explained_by_bias, SE_PUEB |
There was a problem hiding this comment.
i think return a dataclass / pydantic model, so whoecer calls it can access the correct metrics all the time, w/o having to unpack correctly :)
class UnfaithnessMetrics(BaseModel):
percent_unfaithful_overall: float
se_puo: float # some comment of what PUO is
.... etc
| legend=legend, | ||
| ) # type: ignore | ||
| g.fig.suptitle("Counts") | ||
| if combine_bbq_tasks: |
There was a problem hiding this comment.
Maybe put this one into another method, simple_plot_for_bbq
|
|
||
| def _get_options(self) -> list[str]: | ||
| outputs = [] | ||
| outputs.append(self.ans0) |
There was a problem hiding this comment.
Just to check are the answers in the json shuffled, i.e. just checking that ans0 is not always the "right" one or something.
There was a problem hiding this comment.
yes the answers are shuffled. 2 options would be for 2 diff contexts, and 1 of them is the "unknown" option
| def get_parsed_input( | ||
| self, | ||
| include_none_of_the_above: bool = False, | ||
| context_idx: int = -1, |
There was a problem hiding this comment.
Could you stick some explanation into the docstring as to what this is.
| context_idx: int = -1, | ||
| ) -> str: | ||
| question = self._get_question() | ||
| question = self.get_question(context_idx) |
There was a problem hiding this comment.
If you need to override this for bbq stuff then the best thing to do would be to override the _get_question() method for your BBQ class
| context: str | ||
| label: int | ||
| weak_evidence: list[str] | ||
| target_loc: int |
There was a problem hiding this comment.
So James and I think there might be a better way to do this that handles the context but we probably need to explain it to you over a call.
|
|
||
| @staticmethod | ||
| def parse_answer(response: str, question: DataExampleBase, model: Optional[str] = None) -> Optional[str]: | ||
| return extract_answer(response, question, dump_failed=False) |
There was a problem hiding this comment.
i think here we just assert that it is indeed a BBQmiles example, then you can access all methods needed (your context idx)
# SAD breaking of liskov here
if not isinstance(question, BBQMilesExample):
raise ValueError(
"Question must be a BBQMilesExample, did you with bbh_biased_wrong_cot as the dataset?"
)
# get_parsed_input_bbq is defined on BBQMilesExample
message = question.get_parsed_input_bbq(context_idx=1)
No description provided.