-
Notifications
You must be signed in to change notification settings - Fork 15
Abnormal model output #50
Description
Hello, I encountered the infer problem when testing with my own point cloud dataset. The model outputs response templates and text content unrelated to the question, while the relevant answer content turns into !, model's raw output is:
!!!### human given the 3D scene answer the question: \"What is the object that is farthest from the shorted distance between the humanoid creature and the creature?\" Choices: A. humanoid creature B. creature C. character model D. Buddhist statue You must answer using exactly this format and nothing else: <answer>A</answer> or <answer>B</answer> or <answer>C</answer> or <answer>D</answer> Do not output any explanation. Do not output any text after </answer>. ### assistant:!!!!!!!!!!!!### human given the 3D scene answer the question: \"What is the object that is farthest from the shorted distance between the humanoid creature and the creature?\" Choices: A. humanoid creature B. creature C. character model D. Buddhist statue You must answer using exactly this format and nothing else: <answer>A</answer> or <answer>B</answer> or <answer>C</answer> or <answer>D</answer> Do not output any explanation. Do not output any text after </answer>. ### assistant:!!!!!!!!!!!!### human given the 3D scene answer the question: \"What is the object that is farthest from the shorted distance between the humanoid creature and the creature?\" Choices: A. humanoid creature B. creature C. character model D. Buddhist statue You must answer using exactly this format and nothing else: <answer>A</answer> or <answer>B</answer> or <answer>C</answer> or <answer>D</answer> Do not output any explanation. Do not output any text after </answer>. ### assistant:!!!!!!!!!!!!### human given the 3D scene answer the question: \"What is the object that is farthest from the shorted distance between the humanoid creature and the creature?\" Choices: A. humanoid creature B. creature C. character model D. Buddhist statue You must answer using exactly this format and nothing else: <answer>A</answer> or <answer>B</answer> or <answer>C</answer> or <answer>D</answer> Do not output any explanation. Do not output any text after </answer>. ### assistant:!!<|endoftext|>Human: Given the question: Read the following article and select the best answer. Article: The first day of school my teacher introduced himself and challenged us to get to know someone we didn't already know.I stood
I have not modified the model code; I only referred to the file dataset/scannet.py in the repository and asked Codex to write a tool for loading and encoding the dataset. The code is as follows:
`class Dataset:
def __init__(
self,
args,
dataset_config: DatasetConfig,
split_set="val",
num_points=40000,
use_color=False,
use_normal=False,
use_multiview=False,
use_height=False,
augment=False,
use_additional_encoders=False,
):
self.args = args
self.dataset_config = dataset_config
self.split = split_set
self.num_points = num_points
self.use_color = use_color
self.use_normal = use_normal
self.use_multiview = use_multiview
self.use_height = use_height
self.augment = augment
self.use_additional_encoders = use_additional_encoders
self.task_name = "qa"
self.eval_func = evaluate
self.max_des_len = args.max_des_len
self.tasks_file = getattr(args, "custom_tasks_file", None)
self.point_cloud_dir = getattr(args, "custom_point_cloud_dir", None)
if not self.tasks_file or not self.point_cloud_dir:
raise ValueError(
"what_distance_farthest dataset requires --custom_tasks_file and --custom_point_cloud_dir"
)
if not os.path.isfile(self.tasks_file):
raise FileNotFoundError(f"tasks file not found: {self.tasks_file}")
if not os.path.isdir(self.point_cloud_dir):
raise FileNotFoundError(f"point cloud dir not found: {self.point_cloud_dir}")
with open(self.tasks_file, "r", encoding="utf-8") as f:
self.tasks = [json.loads(line) for line in f if line.strip()]
# evaluate_qa expects dataset.annotations with scene_id and cot.
self.annotations = []
for t in self.tasks:
scene_key = t.get("point") or t.get("point_cloud") or str(t.get("question_id", "unknown"))
answer_text = t.get("answer", "")
self.annotations.append(
{
"scene_id": scene_key,
"question": t.get("question", ""),
"cot": f"<answer>{answer_text}</answer>",
}
)
self.tokenizer = AutoTokenizer.from_pretrained(args.vocab, add_bos_token=False)
self.tokenizer.pad_token = self.tokenizer.eos_token
self.tokenizer.padding_side = "right"
self.qtokenizer = AutoTokenizer.from_pretrained(args.qformer_vocab)
self.qtokenizer.pad_token = self.tokenizer.eos_token
self.qtokenizer.padding_side = "right"
self.tokenizer_config = dict(
max_length=self.max_des_len,
padding="max_length",
truncation="longest_first",
return_tensors="np",
)
print(
f"[what_distance_farthest] loaded {len(self.tasks)} tasks from {self.tasks_file}, "
f"point clouds from {self.point_cloud_dir}"
)
def __len__(self):
return len(self.tasks)
def _prepare_point_features(self, pc: np.ndarray):
sampled_pc, _ = random_sampling(pc, self.num_points, return_choices=True)
sampled_pc = sampled_pc.astype(np.float32)
xyz = sampled_pc[:, :3]
extras = []
pcl_color = np.zeros_like(xyz, dtype=np.float32)
if self.use_color:
if sampled_pc.shape[1] >= 6:
pcl_color = sampled_pc[:, 3:6].astype(np.float32)
extras.append(pcl_color)
if self.use_normal:
if sampled_pc.shape[1] >= 9:
normals = sampled_pc[:, 6:9].astype(np.float32)
else:
normals = np.zeros_like(xyz, dtype=np.float32)
extras.append(normals)
if self.use_height:
floor_height = np.percentile(xyz[:, 2], 0.99)
height = (xyz[:, 2] - floor_height).astype(np.float32)
extras.append(height[:, None])
point_clouds = np.concatenate([xyz] + extras, axis=1) if extras else xyz
dims_min = xyz.min(axis=0).astype(np.float32)
dims_max = xyz.max(axis=0).astype(np.float32)
return point_clouds.astype(np.float32), pcl_color.astype(np.float32), dims_min, dims_max
def __getitem__(self, idx):
task = self.tasks[idx]
question = task.get("question", "").strip()
options = task.get("options") or task.get("choices")
point_name = task.get("point") or task.get("point_cloud")
if point_name is None:
raise ValueError(f"Missing point/point_cloud field in task index {idx}")
pc_path = os.path.join(self.point_cloud_dir, point_name)
if not os.path.isfile(pc_path):
raise FileNotFoundError(f"Point cloud not found: {pc_path}")
pc = np.load(pc_path)
point_clouds, pcl_color, dims_min, dims_max = self._prepare_point_features(pc)
prompt = deepcopy(TASK_PROPMT["qa"][0])
if options:
option_text = "\n".join(options)
q_text = (
f"{question}\n"
f"Choices:\n{option_text}\n"
"You must answer using exactly this format and nothing else:\n"
"<answer>A</answer> or <answer>B</answer> or <answer>C</answer> or <answer>D</answer>\n"
"Do not output any explanation. Do not output any text after </answer>."
)
else:
q_text = (
f"{question}\n"
"You must answer using exactly this format and nothing else:\n"
"<answer>your short answer</answer>\n"
"Do not output any explanation. Do not output any text after </answer>."
)
instruction_text = prompt["instruction"].format(locations="", question=q_text)
prompt_inputs = self.tokenizer.batch_encode_plus([instruction_text], **self.tokenizer_config)
qformer_inputs = self.qtokenizer.batch_encode_plus([instruction_text], **self.tokenizer_config)
return {
"point_clouds": point_clouds,
"pcl_color": pcl_color,
"point_cloud_dims_min": dims_min,
"point_cloud_dims_max": dims_max,
"instruction": prompt_inputs["input_ids"][0].astype(np.int64),
"instruction_mask": prompt_inputs["attention_mask"][0].astype(np.float32),
"qformer_input_ids": qformer_inputs["input_ids"][0].astype(np.int64),
"qformer_attention_mask": qformer_inputs["attention_mask"][0].astype(np.float32),
"scan_idx": np.array(idx).astype(np.int64),
}
`
The dataset format is: {"question_id": 6, "point": "000006.npy", "category": "what_distance_farthest", "question": "What is the object that is farthest from the building?", "options": ["A. bottle", "B. 3D model", "C. robot", "D. Stormtrooper"], "answer": "bottle", "answer_id": "A"}
The command is:
python3 -m main \ --test_only \ --test_ckpt /model/3dr1/checkpoint_rl.pth \ --dataset what_distance_farthest \ --vocab /model/Qwen2.5-7B \ --qformer_vocab /model/bert-base-uncased \ --checkpoint_dir /eval_results/3dr1 \ --custom_tasks_file /what_distance_farthest/tasks.jsonl \ --custom_point_cloud_dir /what_distance_farthest/pcd \ --use_color --use_normal \ --detector point_encoder \ --captioner 3dr1 \ --depth_encoder_dim 256 \
The existing issues don't seem to provide a solution. Does the author or anyone else have a solution? Thank you very much!