Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions lmeval/models/gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,18 @@ def _completion(self,
messages: list[dict],
temperature: float = 0.0,
max_tokens: int = 4096,
completions: int = 1) -> ModelResponse:
completions: int = 1,
top_p: float = 0.0,
seed: int = 0) -> ModelResponse:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you should have an option to not set a seed. Otherwise you are implicitly setting the seed to 0 every time for everyone w/o people knowing it. Need to double check but probably default to none will work.

#! we need to isolate the batch completion to allow various implementation to pass additonals parameters
resp = completion(
model=model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
n=completions,
top_p=top_p,
seed=seed,
api_key=self.runtime_vars.get('api_key'),
# gemini specific
safety_settings=self.runtime_vars.get('safety_settings'))
Expand All @@ -97,15 +101,19 @@ def _completion(self,
def _batch_completion(self, model: str, messages_batch: list[dict],
temperature: float = 0.0,
max_tokens: int = 4096,
completions: int = 1) -> list[ModelResponse]:
completions: int = 1,
top_p: float = 0.0,
seed: int = 0) -> list[ModelResponse]:
batch_responses = batch_completion(
model=model,
messages=messages_batch,
temperature=temperature,
max_tokens=max_tokens,
n=completions,
top_p=top_p,
seed=seed,
api_key=self.runtime_vars.get('api_key'),
# gemini specific
safety_settings=self.runtime_vars.get('safety_settings')
)
return batch_responses
return batch_responses
22 changes: 16 additions & 6 deletions lmeval/models/httpmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ def _post_query(
query: str,
temperature: float = 0.0,
max_tokens: int = 4096,
top_p: float = 0.0,
seed: int = 0,
extra_dict: Dict[str, Any] | None = None) -> Dict[str, Any]:
"""Make the POST call. Override in subclass as needed.

Expand All @@ -106,6 +108,8 @@ def _post_query(
'generation_config': {
'temperature': temperature,
'max_output_tokens': max_tokens,
'seed': seed,
'topP': top_p,
'candidate_count': 1
},
'contents': [{
Expand Down Expand Up @@ -210,7 +214,9 @@ def generate_text(self,
medias: list[Media] | Media = None,
temperature: float = 0.0,
max_tokens: int = 4096,
completions: int = 1) -> LMAnswer:
completions: int = 1,
top_p: float = 0.0,
seed: int = 0) -> LMAnswer:
# no media for now
iserror = False
error_reason = ''
Expand All @@ -222,7 +228,7 @@ def generate_text(self,

try:
start = time()
res = self._post_query(prompt, temperature, max_tokens, extra_dict)
res = self._post_query(prompt, temperature, max_tokens, top_p, seed, extra_dict)
gen_time = time() - start
text = res['candidates'][0]['content']['parts'][0]['text']
except Exception as e: # pylint: disable=broad-except
Expand All @@ -242,14 +248,16 @@ def batch_generate_text(
medias: list[list[Media]],
temperature: float = 0,
max_tokens: int = 4096,
completions: int = 1
completions: int = 1,
top_p: float = 0.0,
seed: int = 0
) -> Generator[Tuple[int, LMAnswer], None, None]:
# only handle text for now but check the size matches anyway.
assert len(prompts) == len(medias)
try:
batch_responses = self._batch_completion(prompts, medias,
temperature, max_tokens,
completions)
completions, top_p, seed)
except Exception as e: # pylint: disable=broad-except
error_message = f'batch generation failed {repr(e)}'
log.error(error_message)
Expand All @@ -266,15 +274,17 @@ def batch_generate_text(

def _batch_completion(self, prompts: list[str], medias: list[list[Media]],
temperature: float, max_tokens: int,
completions: int) -> list[LMAnswer]:
completions: int, top_p: float = 0.0,
seed: int = 0) -> list[LMAnswer]:
# only do text for now
completions = []
with ThreadPoolExecutor(
max_workers=self.runtime_vars.get('max_workers')) as executor:
for i, p in enumerate(prompts):

future = executor.submit(self.generate_text, p, medias[i],
temperature, max_tokens, completions)
temperature, max_tokens, completions,
top_p, seed)
completions.append(future)

return [future.result() for future in completions]
32 changes: 26 additions & 6 deletions lmeval/models/litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,9 @@ def batch_generate_text(
medias: list[list[Media]],
temperature: float = 0,
max_tokens: int = 4096,
completions: int = 1
completions: int = 1,
top_p: float = 0.0,
seed: int = 0
) -> Generator[Tuple[int, LMAnswer], None, None]:
model = self.runtime_vars['litellm_version_string']
assert len(prompts) == len(
Expand All @@ -125,7 +127,7 @@ def batch_generate_text(
try:
batch_responses = self._batch_completion(model, messages_batch,
temperature, max_tokens,
completions)
completions, top_p, seed)
except:
batch_responses = [None for _ in prompts]

Expand All @@ -138,7 +140,9 @@ def generate_text(self,
medias: list[Media] | Media | None = None,
temperature: float = 0.0,
max_tokens: int = 4096,
completions: int = 1) -> LMAnswer:
completions: int = 1,
top_p: float = 0.0,
seed: int = 0) -> LMAnswer:
# FIXME: finish multi-completion support
model = self.runtime_vars['litellm_version_string']
messages = self._make_messages(prompt, medias)
Expand All @@ -148,7 +152,9 @@ def generate_text(self,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
completions=completions)
completions=completions,
top_p=top_p,
seed=seed)
except Exception as e:
resp = None
print("Can't get response from model:", e)
Expand All @@ -163,6 +169,8 @@ def complete(
temperature: float = 0.0,
completions: int = 1,
max_tokens: int = 4096,
top_p: float = 0.0,
seed: int = 0,
**generation_kwargs,
) -> LMAnswer:
# FIXME: finish multi-completion support
Expand All @@ -173,6 +181,8 @@ def complete(
temperature=temperature,
max_tokens=max_tokens,
completions=completions,
top_p=top_p,
seed=seed,
**generation_kwargs,
)
resp = self._completion(**arguments)
Expand Down Expand Up @@ -211,15 +221,17 @@ def multi_complete(self,
temperature: float = 0.0,
completions: int = 1,
max_tokens: int = 4096,
top_p: float = 0.0,
seed: int = 0,
**generation_kwargs) -> LMAnswer:
n_completions = grouped_question.metadata.get('n_completions', 1)
temperature = grouped_question.metadata.get('temperature', None)
grouped_answers = []

for question in grouped_question.question_set:
answer = self.complete(question.messages, temperature,
n_completions, max_tokens,
**generation_kwargs)
n_completions, max_tokens, top_p,
seed, **generation_kwargs)
grouped_answers.append(answer)

return self._make_grouped_answer(grouped_answers)
Expand Down Expand Up @@ -374,6 +386,8 @@ def _batch_completion(self,
temperature: float = 0.0,
max_tokens: int = 4096,
completions: int = 1,
top_p: float = 0.0,
seed: int = 0,
**generation_kwargs) -> list[ModelResponse]:

#! we need to isolate the batch completion to allow various implementation to pass additonals parameters
Expand All @@ -399,6 +413,8 @@ def _batch_completion(self,
temperature=temperature,
max_tokens=max_tokens,
n=completions,
top_p=top_p,
seed=seed,
api_key=self.runtime_vars.get('api_key'),
base_url=self.runtime_vars.get('base_url'),
max_workers=self.runtime_vars.get('max_workers'),
Expand All @@ -412,6 +428,8 @@ def _completion(self,
temperature: float = 0.0,
max_tokens: int = 4096,
completions: int = 1,
top_p: float = 0.0,
seed: int = 0,
**generation_kwargs) -> ModelResponse:
if "generation_kwargs" in self.runtime_vars:
# do not override the generation_kwargs passed as parameter
Expand Down Expand Up @@ -442,6 +460,8 @@ def _completion(self,
temperature=temperature,
max_tokens=max_tokens,
n=completions,
top_p=top_p,
seed=seed,
api_key=self.runtime_vars.get('api_key'),
base_url=self.runtime_vars.get('base_url'),
extra_headers=self._make_headers(),
Expand Down
38 changes: 27 additions & 11 deletions lmeval/models/lmmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,22 +43,28 @@ def generate_text(self,
medias: list[Media] | Media = None,
temperature: float = 0.0,
max_tokens: int = 4096,
completions: int = 1) -> LMAnswer:
completions: int = 1,
top_p: float = 0.0,
seed: int = 0) -> LMAnswer:
raise NotImplementedError

def generate_image(self,
prompt: str,
medias: Optional[list[Media]] = None,
temperature: float = 0.0,
completions: int = 1) -> LMAnswer:
completions: int = 1,
top_p: float = 0.0,
seed: int = 0) -> LMAnswer:
raise NotImplementedError

def complete(self, messages: list[dict], temperature: float = 0.0,
completions: int = 1) -> LMAnswer:
completions: int = 1, top_p: float = 0.0,
seed: int = 0) -> LMAnswer:
raise NotImplementedError

def multi_complete(self, question: "GroupedQuestion", temperature: float = 0.0,
completions: int = 1) -> LMAnswer:
completions: int = 1, top_p: float = 0.0,
seed: int = 0) -> LMAnswer:
raise NotImplementedError

def _build_answer(self, text: str, generation_time: float,
Expand Down Expand Up @@ -133,49 +139,59 @@ def batch_execute(
tasks: list["EvalTask"],
temperature: float = 0.0,
max_tokens: int = 4096,
completions: int = 1
completions: int = 1,
top_p: float = 0.0,
seed: int = 0
) -> Generator[Tuple[int, LMAnswer], None, None]:
""" Execute a batch of prompts in parallel."""
for i, etask in enumerate(tasks):
if etask.task.type == TaskType.completion.value:
yield i, self.complete(etask.messages,
temperature,
completions,
top_p,
seed,
tools=etask.question.tools)
elif etask.task.type == TaskType.grouped_completion.value:
yield i, self.multi_complete(etask.question,
temperature=temperature,
completions=10)
completions=10,
top_p=top_p,
seed=seed)
else:
# normalize medias
mds = etask.question.medias if etask.question.medias else []
mds = mds if isinstance(mds, list) else [mds]
yield i, self.generate_text(etask.instanciated_prompt, mds,
temperature, max_tokens,
completions)
completions, top_p, seed)

def batch_generate_text(
self,
prompts: list[str],
medias: list[list[Media]],
temperature: float = 0.0,
max_tokens: int = 4096,
completions: int = 1
completions: int = 1,
top_p: float = 0.0,
seed: int = 0
) -> Generator[Tuple[int, LMAnswer], None, None]:
""" Generate text answers in batches or parallel."""
for i, prompt in enumerate(prompts):
yield i, self.generate_text(prompt, medias[i], temperature,
completions)
completions, top_p, seed)

def batch_generate_image(self,
prompts: list[str],
medias: list[list[Media]],
temperature: float = 0.0,
completions: int = 1) -> Generator[Tuple[int, LMAnswer], None, None]:
completions: int = 1,
top_p: float = 0.0,
seed: int = 0) -> Generator[Tuple[int, LMAnswer], None, None]:
"""Generate image answers in batches or parallel."""
for i, prompt in enumerate(prompts):
yield i, self.generate_image(prompt, medias[i], temperature,
completions)
completions, top_p, seed)

class Step(CustomModel):
output: str
Expand Down
9 changes: 6 additions & 3 deletions lmeval/models/mock_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ def generate_text(
medias: List[Media] | Media = [],
temperature: float | None = 0.0,
max_tokens: int = 4096,
completions: int = 1) -> LMAnswer:
completions: int = 1,
top_p: float = 0.0,
seed: int = 0) -> LMAnswer:
# print(f"generate_text: {prompt}")
id = "mock"
request_response = self.runtime_vars["request_response"]
Expand All @@ -87,10 +89,11 @@ def generate_text(
def batch_generate_text(
self, prompts: list[str], medias: list[list[Media] | Media] = [],
temperature: float | None = 0.0, max_tokens:int = 4096,
completions: int = 1) -> Generator[Tuple[int, LMAnswer], None, None]:
completions: int = 1, top_p: float = 0.0,
seed: int = 0) -> Generator[Tuple[int, LMAnswer], None, None]:
log.info(f"mock-batch_generate_text: {len(prompts)} prompts")
for i, prompt in enumerate(prompts):
yield i, self.generate_text(prompt, medias[i], temperature, completions)
yield i, self.generate_text(prompt, medias[i], temperature, completions, top_p, seed)


class MockGeminiModel(MockModel):
Expand Down
Loading