From f73cb7da9c1b5c6a2e16e5acef15bdae3b05b053 Mon Sep 17 00:00:00 2001 From: Bernhard Grill Date: Fri, 6 Jun 2025 17:43:15 +0200 Subject: [PATCH 1/2] adding support for top_p and seed including default values --- lmeval/models/litellm.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/lmeval/models/litellm.py b/lmeval/models/litellm.py index 7dcb221..ab1364a 100644 --- a/lmeval/models/litellm.py +++ b/lmeval/models/litellm.py @@ -113,7 +113,9 @@ def batch_generate_text( medias: list[list[Media]], temperature: float = 0, max_tokens: int = 4096, - completions: int = 1 + completions: int = 1, + top_p: float = 0.0, + seed: int = 0 ) -> Generator[Tuple[int, LMAnswer], None, None]: model = self.runtime_vars['litellm_version_string'] assert len(prompts) == len( @@ -125,7 +127,7 @@ def batch_generate_text( try: batch_responses = self._batch_completion(model, messages_batch, temperature, max_tokens, - completions) + completions, top_p, seed) except: batch_responses = [None for _ in prompts] @@ -138,7 +140,9 @@ def generate_text(self, medias: list[Media] | Media | None = None, temperature: float = 0.0, max_tokens: int = 4096, - completions: int = 1) -> LMAnswer: + completions: int = 1, + top_p: float = 0.0, + seed: int = 0) -> LMAnswer: # FIXME: finish multi-completion support model = self.runtime_vars['litellm_version_string'] messages = self._make_messages(prompt, medias) @@ -148,7 +152,9 @@ def generate_text(self, messages=messages, temperature=temperature, max_tokens=max_tokens, - completions=completions) + completions=completions, + top_p=top_p, + seed=seed) except Exception as e: resp = None print("Can't get response from model:", e) From 083f871a5d95fabb5a9581d272c85613ecf6bd00 Mon Sep 17 00:00:00 2001 From: Bernhard Grill Date: Mon, 16 Jun 2025 16:19:22 +0200 Subject: [PATCH 2/2] adding default values to seed and top_p --- lmeval/models/gemini.py | 14 +++++++++++--- lmeval/models/httpmodel.py | 22 +++++++++++++++------ lmeval/models/litellm.py | 18 ++++++++++++++++-- lmeval/models/lmmodel.py | 38 ++++++++++++++++++++++++++----------- lmeval/models/mock_model.py | 9 ++++++--- lmeval/models/vertex.py | 15 ++++++++++++--- 6 files changed, 88 insertions(+), 28 deletions(-) diff --git a/lmeval/models/gemini.py b/lmeval/models/gemini.py index 2873a8c..0b17fea 100644 --- a/lmeval/models/gemini.py +++ b/lmeval/models/gemini.py @@ -81,7 +81,9 @@ def _completion(self, messages: list[dict], temperature: float = 0.0, max_tokens: int = 4096, - completions: int = 1) -> ModelResponse: + completions: int = 1, + top_p: float = 0.0, + seed: int = 0) -> ModelResponse: #! we need to isolate the batch completion to allow various implementation to pass additonals parameters resp = completion( model=model, @@ -89,6 +91,8 @@ def _completion(self, temperature=temperature, max_tokens=max_tokens, n=completions, + top_p=top_p, + seed=seed, api_key=self.runtime_vars.get('api_key'), # gemini specific safety_settings=self.runtime_vars.get('safety_settings')) @@ -97,15 +101,19 @@ def _completion(self, def _batch_completion(self, model: str, messages_batch: list[dict], temperature: float = 0.0, max_tokens: int = 4096, - completions: int = 1) -> list[ModelResponse]: + completions: int = 1, + top_p: float = 0.0, + seed: int = 0) -> list[ModelResponse]: batch_responses = batch_completion( model=model, messages=messages_batch, temperature=temperature, max_tokens=max_tokens, n=completions, + top_p=top_p, + seed=seed, api_key=self.runtime_vars.get('api_key'), # gemini specific safety_settings=self.runtime_vars.get('safety_settings') ) - return batch_responses \ No newline at end of file + return batch_responses diff --git a/lmeval/models/httpmodel.py b/lmeval/models/httpmodel.py index adeeeaf..b39f05e 100644 --- a/lmeval/models/httpmodel.py +++ b/lmeval/models/httpmodel.py @@ -95,6 +95,8 @@ def _post_query( query: str, temperature: float = 0.0, max_tokens: int = 4096, + top_p: float = 0.0, + seed: int = 0, extra_dict: Dict[str, Any] | None = None) -> Dict[str, Any]: """Make the POST call. Override in subclass as needed. @@ -106,6 +108,8 @@ def _post_query( 'generation_config': { 'temperature': temperature, 'max_output_tokens': max_tokens, + 'seed': seed, + 'topP': top_p, 'candidate_count': 1 }, 'contents': [{ @@ -210,7 +214,9 @@ def generate_text(self, medias: list[Media] | Media = None, temperature: float = 0.0, max_tokens: int = 4096, - completions: int = 1) -> LMAnswer: + completions: int = 1, + top_p: float = 0.0, + seed: int = 0) -> LMAnswer: # no media for now iserror = False error_reason = '' @@ -222,7 +228,7 @@ def generate_text(self, try: start = time() - res = self._post_query(prompt, temperature, max_tokens, extra_dict) + res = self._post_query(prompt, temperature, max_tokens, top_p, seed, extra_dict) gen_time = time() - start text = res['candidates'][0]['content']['parts'][0]['text'] except Exception as e: # pylint: disable=broad-except @@ -242,14 +248,16 @@ def batch_generate_text( medias: list[list[Media]], temperature: float = 0, max_tokens: int = 4096, - completions: int = 1 + completions: int = 1, + top_p: float = 0.0, + seed: int = 0 ) -> Generator[Tuple[int, LMAnswer], None, None]: # only handle text for now but check the size matches anyway. assert len(prompts) == len(medias) try: batch_responses = self._batch_completion(prompts, medias, temperature, max_tokens, - completions) + completions, top_p, seed) except Exception as e: # pylint: disable=broad-except error_message = f'batch generation failed {repr(e)}' log.error(error_message) @@ -266,7 +274,8 @@ def batch_generate_text( def _batch_completion(self, prompts: list[str], medias: list[list[Media]], temperature: float, max_tokens: int, - completions: int) -> list[LMAnswer]: + completions: int, top_p: float = 0.0, + seed: int = 0) -> list[LMAnswer]: # only do text for now completions = [] with ThreadPoolExecutor( @@ -274,7 +283,8 @@ def _batch_completion(self, prompts: list[str], medias: list[list[Media]], for i, p in enumerate(prompts): future = executor.submit(self.generate_text, p, medias[i], - temperature, max_tokens, completions) + temperature, max_tokens, completions, + top_p, seed) completions.append(future) return [future.result() for future in completions] diff --git a/lmeval/models/litellm.py b/lmeval/models/litellm.py index ab1364a..d0a5c25 100644 --- a/lmeval/models/litellm.py +++ b/lmeval/models/litellm.py @@ -169,6 +169,8 @@ def complete( temperature: float = 0.0, completions: int = 1, max_tokens: int = 4096, + top_p: float = 0.0, + seed: int = 0, **generation_kwargs, ) -> LMAnswer: # FIXME: finish multi-completion support @@ -179,6 +181,8 @@ def complete( temperature=temperature, max_tokens=max_tokens, completions=completions, + top_p=top_p, + seed=seed, **generation_kwargs, ) resp = self._completion(**arguments) @@ -217,6 +221,8 @@ def multi_complete(self, temperature: float = 0.0, completions: int = 1, max_tokens: int = 4096, + top_p: float = 0.0, + seed: int = 0, **generation_kwargs) -> LMAnswer: n_completions = grouped_question.metadata.get('n_completions', 1) temperature = grouped_question.metadata.get('temperature', None) @@ -224,8 +230,8 @@ def multi_complete(self, for question in grouped_question.question_set: answer = self.complete(question.messages, temperature, - n_completions, max_tokens, - **generation_kwargs) + n_completions, max_tokens, top_p, + seed, **generation_kwargs) grouped_answers.append(answer) return self._make_grouped_answer(grouped_answers) @@ -380,6 +386,8 @@ def _batch_completion(self, temperature: float = 0.0, max_tokens: int = 4096, completions: int = 1, + top_p: float = 0.0, + seed: int = 0, **generation_kwargs) -> list[ModelResponse]: #! we need to isolate the batch completion to allow various implementation to pass additonals parameters @@ -405,6 +413,8 @@ def _batch_completion(self, temperature=temperature, max_tokens=max_tokens, n=completions, + top_p=top_p, + seed=seed, api_key=self.runtime_vars.get('api_key'), base_url=self.runtime_vars.get('base_url'), max_workers=self.runtime_vars.get('max_workers'), @@ -418,6 +428,8 @@ def _completion(self, temperature: float = 0.0, max_tokens: int = 4096, completions: int = 1, + top_p: float = 0.0, + seed: int = 0, **generation_kwargs) -> ModelResponse: if "generation_kwargs" in self.runtime_vars: # do not override the generation_kwargs passed as parameter @@ -448,6 +460,8 @@ def _completion(self, temperature=temperature, max_tokens=max_tokens, n=completions, + top_p=top_p, + seed=seed, api_key=self.runtime_vars.get('api_key'), base_url=self.runtime_vars.get('base_url'), extra_headers=self._make_headers(), diff --git a/lmeval/models/lmmodel.py b/lmeval/models/lmmodel.py index 70f31ea..5463c02 100644 --- a/lmeval/models/lmmodel.py +++ b/lmeval/models/lmmodel.py @@ -43,22 +43,28 @@ def generate_text(self, medias: list[Media] | Media = None, temperature: float = 0.0, max_tokens: int = 4096, - completions: int = 1) -> LMAnswer: + completions: int = 1, + top_p: float = 0.0, + seed: int = 0) -> LMAnswer: raise NotImplementedError def generate_image(self, prompt: str, medias: Optional[list[Media]] = None, temperature: float = 0.0, - completions: int = 1) -> LMAnswer: + completions: int = 1, + top_p: float = 0.0, + seed: int = 0) -> LMAnswer: raise NotImplementedError def complete(self, messages: list[dict], temperature: float = 0.0, - completions: int = 1) -> LMAnswer: + completions: int = 1, top_p: float = 0.0, + seed: int = 0) -> LMAnswer: raise NotImplementedError def multi_complete(self, question: "GroupedQuestion", temperature: float = 0.0, - completions: int = 1) -> LMAnswer: + completions: int = 1, top_p: float = 0.0, + seed: int = 0) -> LMAnswer: raise NotImplementedError def _build_answer(self, text: str, generation_time: float, @@ -133,7 +139,9 @@ def batch_execute( tasks: list["EvalTask"], temperature: float = 0.0, max_tokens: int = 4096, - completions: int = 1 + completions: int = 1, + top_p: float = 0.0, + seed: int = 0 ) -> Generator[Tuple[int, LMAnswer], None, None]: """ Execute a batch of prompts in parallel.""" for i, etask in enumerate(tasks): @@ -141,18 +149,22 @@ def batch_execute( yield i, self.complete(etask.messages, temperature, completions, + top_p, + seed, tools=etask.question.tools) elif etask.task.type == TaskType.grouped_completion.value: yield i, self.multi_complete(etask.question, temperature=temperature, - completions=10) + completions=10, + top_p=top_p, + seed=seed) else: # normalize medias mds = etask.question.medias if etask.question.medias else [] mds = mds if isinstance(mds, list) else [mds] yield i, self.generate_text(etask.instanciated_prompt, mds, temperature, max_tokens, - completions) + completions, top_p, seed) def batch_generate_text( self, @@ -160,22 +172,26 @@ def batch_generate_text( medias: list[list[Media]], temperature: float = 0.0, max_tokens: int = 4096, - completions: int = 1 + completions: int = 1, + top_p: float = 0.0, + seed: int = 0 ) -> Generator[Tuple[int, LMAnswer], None, None]: """ Generate text answers in batches or parallel.""" for i, prompt in enumerate(prompts): yield i, self.generate_text(prompt, medias[i], temperature, - completions) + completions, top_p, seed) def batch_generate_image(self, prompts: list[str], medias: list[list[Media]], temperature: float = 0.0, - completions: int = 1) -> Generator[Tuple[int, LMAnswer], None, None]: + completions: int = 1, + top_p: float = 0.0, + seed: int = 0) -> Generator[Tuple[int, LMAnswer], None, None]: """Generate image answers in batches or parallel.""" for i, prompt in enumerate(prompts): yield i, self.generate_image(prompt, medias[i], temperature, - completions) + completions, top_p, seed) class Step(CustomModel): output: str diff --git a/lmeval/models/mock_model.py b/lmeval/models/mock_model.py index 4169686..fd44c4c 100644 --- a/lmeval/models/mock_model.py +++ b/lmeval/models/mock_model.py @@ -61,7 +61,9 @@ def generate_text( medias: List[Media] | Media = [], temperature: float | None = 0.0, max_tokens: int = 4096, - completions: int = 1) -> LMAnswer: + completions: int = 1, + top_p: float = 0.0, + seed: int = 0) -> LMAnswer: # print(f"generate_text: {prompt}") id = "mock" request_response = self.runtime_vars["request_response"] @@ -87,10 +89,11 @@ def generate_text( def batch_generate_text( self, prompts: list[str], medias: list[list[Media] | Media] = [], temperature: float | None = 0.0, max_tokens:int = 4096, - completions: int = 1) -> Generator[Tuple[int, LMAnswer], None, None]: + completions: int = 1, top_p: float = 0.0, + seed: int = 0) -> Generator[Tuple[int, LMAnswer], None, None]: log.info(f"mock-batch_generate_text: {len(prompts)} prompts") for i, prompt in enumerate(prompts): - yield i, self.generate_text(prompt, medias[i], temperature, completions) + yield i, self.generate_text(prompt, medias[i], temperature, completions, top_p, seed) class MockGeminiModel(MockModel): diff --git a/lmeval/models/vertex.py b/lmeval/models/vertex.py index bf88df2..a4fb020 100644 --- a/lmeval/models/vertex.py +++ b/lmeval/models/vertex.py @@ -63,7 +63,10 @@ def _completion(self, messages: list[dict], temperature: float = 0.0, max_tokens: int = 4096, - completions: int = 1) -> ModelResponse: + completions: int = 1, + top_p: float = 0.0, + seed: int = 0, + ) -> ModelResponse: #! we need to isolate the batch completion to allow various implementation to pass additonals parameters resp = completion( model=model, @@ -71,6 +74,8 @@ def _completion(self, temperature=temperature, max_tokens=max_tokens, n=completions, + top_p=top_p, + seed=seed, api_key=self.runtime_vars.get('api_key'), # vertex specific vertex_location=self.runtime_vars.get('vertex_location'), # Vertex location @@ -81,15 +86,19 @@ def _completion(self, def _batch_completion(self, model: str, messages_batch: list[dict], temperature: float = 0.0, max_tokens: int = 4096, - completions: int = 1) -> list[ModelResponse]: + completions: int = 1, + top_p: float = 0.0, + seed: int = 0) -> list[ModelResponse]: batch_responses = batch_completion( model=model, messages=messages_batch, temperature=temperature, max_tokens=max_tokens, n=completions, + top_p=top_p, + seed=seed, # vertex specific vertex_location=self.runtime_vars.get('vertex_location'), # Vertex location vertex_project=self.runtime_vars.get('vertex_project'), # Vertex project ) - return batch_responses \ No newline at end of file + return batch_responses