Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/myvllm/engine/block_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def allocate(self, seq: Sequence) -> None:
# cache miss
block = self._allocate_block(self.free_block_ids[0])
block.update(h=h, token_ids=token_ids)
block.ref_count = 1
if h != -1:
self.hash_to_block_id[h] = block.block_id
seq.block_table.append(block.block_id)
Expand Down
5 changes: 3 additions & 2 deletions src/myvllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,11 @@ def exit(self):
# return scheduled sequences and whether it is for prefilling
# call model_runner.run() to run the model
# call postprocessor to process the outputs and update sequences and update block manager
def step(self) -> tuple[list[int], bool]:
def step(self) -> tuple[list[tuple[int, list[int]]], int, bool]:
scheduled_sequences, is_prefill = self.scheduler.schedule()
num_processed_tokens = 0
if not scheduled_sequences:
return [], is_prefill
return [], num_processed_tokens, is_prefill
# run the model
outputs = self.model_runner.call("run", scheduled_sequences, is_prefill)
# Move outputs to CPU and convert them to a list
Expand Down