VLA数据清洗+消融实验
清单1.VLA数据集清洗脚本2.VLA时序对齐脚本3.VLA三分支增强脚本图像文本动作4.VLA样本均衡划分脚本5.四组消融实验完整yaml配置6.消融实验的操作细节一、脚本合集1、VLA数据集清洗脚本自动过滤坏样本data_clean.pyimport os import json import cv2 import numpy as np def is_blur(img_path, threshold80): img cv2.imread(img_path) gray cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) lap_var cv2.Laplacian(gray, cv2.CV_64F).var() return lap_var threshold def clean_vla_data(json_path): with open(json_path,r,encodingutf-8) as f: data json.load(f) clean_data [] for item in data: img_path item[image] instruction item[instruction] action_seq item[action] # 过滤1图像模糊/不存在 if not os.path.exists(img_path) or is_blur(img_path): continue # 过滤2空指令/过短指令 if len(instruction.strip()) 3: continue # 过滤3动作序列异常、全0 if np.max(np.abs(action_seq)) 0.01: continue clean_data.append(item) with open(cleaned_vla_data.json,w,encodingutf-8) as f: json.dump(clean_data,f,ensure_asciiFalse,indent2) if __name__ __main__: clean_vla_data(origin_vla_data.json)2、时序对齐脚本time_align.pyimport json import numpy as np def align_sequence(item, target_len15): action np.array(item[action]) if len(action) target_len: action action[:target_len] else: pad np.zeros((target_len-action.shape[0], action.shape[1])) action np.concatenate([action,pad],axis0) item[action] action.tolist() return item def align_all_data(json_path): with open(json_path,r,encodingutf-8) as f: data json.load(f) new_data [align_sequence(d) for d in data] with open(aligned_vla_data.json,w,encodingutf-8) as f: json.dump(new_data,f,indent2,ensure_asciiFalse) if __name__ __main__: align_all_data(cleaned_vla_data.json)3、三分之数据增强脚本图像文本动作vla_augment.pyimport cv2 import random import json # 文本同义指令扩充 prompt_aug [ 请控制机器人完成{}, 执行机器人动作{}, 根据图像完成操作{}, 观察画面并执行{} ] def aug_image(img_path): img cv2.imread(img_path) # 亮度扰动 alpha random.uniform(0.85,1.15) img cv2.convertScaleAbs(img, alphaalpha) return img def aug_text(text): return random.choice(prompt_aug).format(text) def aug_action(action): # 小幅高斯噪声 arr np.array(action) noise np.random.normal(0,0.002,arr.shape) return (arrnoise).tolist() def augment_data(json_path): with open(json_path,r,encodingutf-8) as f: data json.load(f) new_data [] for d in data: d[instruction] aug_text(d[instruction]) d[action] aug_action(d[action]) new_data.append(d) with open(aug_vla_data.json,w,encodingutf-8) as f: json.dump(new_data,f,indent2,ensure_asciiFalse) if __name__ __main__: augment_data(aligned_vla_data.json)4、样本均衡划分训练集/验证机split_balance.pyimport json import random from collections import Counter random.seed(42) def split_data(json_path): with open(json_path,r) as f: data json.load(f) # 简单类别均衡采样 label_dict {} for d in data: label d[instruction] if label not in label_dict: label_dict[label] [] label_dict[label].append(d) train,val [],[] for k,v in label_dict.items(): random.shuffle(v) train.extend(v[:int(len(v)*0.85)]) val.extend(v[int(len(v)*0.85):]) with open(train.json,w) as f: json.dump(train,f,indent2) with open(val.json,w) as f: json.dump(val,f,indent2) if __name__ __main__: split_data(aug_vla_data.json)二、四组消融实验完整配置全部微调QLoRA微调epoch 3~5(不求收敛只求可对比指标)固定随机种子可复现实验1视觉编码器冻结 vs 解冻核心# exp1_freeze_vision.yaml train: epochs: 4 lr: 5e-5 lora_rank: 16 freeze_vision: True loss_weight_action: 1.0 loss_weight_text: 1.0实验2解冻视觉浅层# exp2_unfreeze_vision.yaml train: epochs: 4 lr: 5e-5 lora_rank: 16 freeze_vision: False unfreeze_layers: 2 loss_weight_action: 1.0 loss_weight_text: 1.0实验3动作损失权重消融VLA专属# exp3_loss_weight.yaml train: epochs: 4 lr: 5e-5 lora_rank: 16 freeze_vision: True loss_weight_action: 2.0 loss_weight_text: 0.8实验4学习率消融# exp4_lr.yaml train: epochs: 4 lr: 1e-4 lora_rank: 16 freeze_vision: True loss_weight_action: 1.0 loss_weight_text: 1.0三、实操步骤步骤 1直接运行 4 套数据脚本data_clean.py time_align.py vla_augment.py split_balance.py步骤 2依次跑 4 组消融实验每组只跑3-4epoch1~3 小时必结束不会超时 每跑完一组保存 val_loss、mse、准确率截图 tensorboard 曲线保存最佳 lora 权重