def get_action(self, instruction: str, rgbs: np.ndarray) -> np.ndarray:
encoded_images = [cv2.imencode('.png', rgb)[1].tobytes() for rgb in rgbs]
ret = requests.post(
self.server_url,
data={"text": instruction, "temperature": 1.0},
files=[("image", _img) for _img in encoded_images],
)
raw_action = ret.json().get('response')
action_chunk = np.array(raw_action)
if self.action_horizon > 0 and len(action_chunk) > self.action_horizon:
action_chunk = action_chunk[:self.action_horizon]
return action_chunk
Why it only passes instruction and image to policy? What about the robot state such as joint positions?
Why it only passes instruction and image to policy? What about the robot state such as joint positions?