diff --git a/.gitattributes b/.gitattributes index 3e5a278e11f209c687cfb65aff5b36991e30fca0..e38ac81eba2e7e118d2defdf96c2f7898d57c3ff 100644 --- a/.gitattributes +++ b/.gitattributes @@ -922,3 +922,43 @@ G0Plus_Finetune_LeRobot_Datasets_Demo/BENCH_Pick_And_Place_20_Items57_Evenly_Dis G0Plus_PP_CKPT/decode.fp16.engine filter=lfs diff=lfs merge=lfs -text G0Plus_PP_CKPT/gemma_rmsnorm.so filter=lfs diff=lfs merge=lfs -text G0Plus_PP_CKPT/prefill.fp16.engine filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/char-rnn.wts filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/model/model-20080.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/model/model-20080.meta filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/int8_api/airliner.ppm filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/airliner.ppm filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/binoculars.jpeg filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/tabby_tiger_cat.jpg filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp310-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp311-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp312-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp313-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp38-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp39-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp310-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp311-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp312-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp313-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp38-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp39-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp310-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp311-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp312-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp313-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp38-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp39-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/bin/trtexec filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_builder_resource_win.so.10.13.0 filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_builder_resource.so.10.13.0 filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_dispatch_static.a filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_dispatch.so.10.13.0 filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_lean_static.a filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_lean.so.10.13.0 filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_plugin_static.a filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_plugin.so.10.13.0 filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_static.a filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_vc_plugin_static.a filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_vc_plugin.so.10.13.0 filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer.so.10.13.0 filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvonnxparser_static.a filter=lfs diff=lfs merge=lfs -text +g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvonnxparser.so.10.13.0 filter=lfs diff=lfs merge=lfs -text diff --git a/g0plus_dockerfile/.gitignore b/g0plus_dockerfile/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..deb78e01194c8962015fa5527a2ec0a38a55f01d --- /dev/null +++ b/g0plus_dockerfile/.gitignore @@ -0,0 +1,3 @@ +**/GalaxeaFM/* +**/EFMNode/* +docker-assets/data/* \ No newline at end of file diff --git a/g0plus_dockerfile/Dockerfile b/g0plus_dockerfile/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..a5d70886ab4073d07a6acf0f0605c637d5b7631a --- /dev/null +++ b/g0plus_dockerfile/Dockerfile @@ -0,0 +1,122 @@ +FROM althack/ros2:humble-full AS base + +# Switch to root for system operations +USER root + +# Set timezone / locale if needed +ENV DEBIAN_FRONTEND=noninteractive + +# Install necessary build tools +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential \ + curl \ + net-tools \ + iputils-ping \ + ros-${ROS_DISTRO}-rosbag2-storage-mcap \ + ros-${ROS_DISTRO}-rosbridge-server \ + git \ + ca-certificates \ + tmux \ + vim \ + && \ + rm -rf /var/lib/apt/lists/* + +# TensorRT related setup +COPY docker-assets/data/TensorRT-10.13.0.35 /usr/TensorRT-10.13.0.35 + +# Ensure ros user owns home directory +RUN chown -R ros:ros /home/ros + +# Switch to ros user +USER ros +WORKDIR /home/ros/g0plus_ros2 + + +# ============================================ +# Put in code folders +# ============================================ +RUN --mount=type=secret,id=git_token,uid=1000,gid=1000 \ + GIT_TOKEN=$(cat /run/secrets/git_token) && \ + git clone https://${GIT_TOKEN}@github.com/OpenGalaxea/GalaxeaVLA.git -b features/opensource +RUN --mount=type=secret,id=git_token,uid=1000,gid=1000 \ + GIT_TOKEN=$(cat /run/secrets/git_token) && \ + git clone https://${GIT_TOKEN}@github.com/OpenGalaxea/EFMNode.git -b dev/pp_trt +COPY --chown=ros:ros docker-assets/code/Hierarchical_System /home/ros/g0plus_ros2/Hierarchical_System + + +# ============================================ +# UV installation +# ============================================ +WORKDIR /home/ros +ARG http_proxy +ARG https_proxy + +RUN bash -c "\ + curl -LsSf https://astral.sh/uv/install.sh | bash && \ + ~/.local/bin/uv --version \ +" +ENV PATH="/home/ros/.local/bin:${PATH}" + +# ============================================ +# Complete G0plus setup +# ============================================ +WORKDIR /home/ros/g0plus_ros2/GalaxeaVLA + +ENV UV_DEFAULT_INDEX=https://mirrors.aliyun.com/pypi/simple/ +ENV UV_PYTHON_INSTALL_MIRROR=https://gh-proxy.com/https://github.com/astral-sh/python-build-standalone/releases/download +ENV UV_HTTP_TIMEOUT=600 + + +RUN uv sync --index-strategy unsafe-best-match + +RUN VIRTUAL_ENV=.venv uv pip install -e . + +RUN VIRTUAL_ENV=.venv uv pip install -e .[dev] + + +# ============================================ +# Complete EFMNode, VLM and rosbridge setup +# ============================================ +WORKDIR /home/ros/g0plus_ros2/GalaxeaVLA + +RUN VIRTUAL_ENV=.venv uv pip install nvtx google-genai dashscope + +RUN VIRTUAL_ENV=.venv uv pip install lark==1.3.1 empy==3.3.4 colcon-common-extensions==0.3.0 + +RUN VIRTUAL_ENV=.venv uv pip install setuptools==59.6.0 + +RUN VIRTUAL_ENV=.venv uv pip install tensorflow==2.15.0 + +RUN VIRTUAL_ENV=.venv uv pip install netifaces pymongo tornado cbor2 + +# ============================================ +# Install TensorRT wheel +# ============================================ +RUN VIRTUAL_ENV=.venv uv pip install /usr/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp310-none-linux_x86_64.whl + +# ============================================ +# Build the ROS2 workspace using conda env +# ============================================ +WORKDIR /home/ros/g0plus_ros2/Hierarchical_System + +RUN bash -c "\ + source /opt/ros/humble/setup.bash && \ + source /home/ros/g0plus_ros2/GalaxeaVLA/.venv/bin/activate && \ + colcon build --symlink-install \ + --cmake-args -DPython3_ROOT_DIR=${VIRTUAL_ENV} \ +" + + +# ============================================ +# Replace super xml and update ~/.bashrc +# ============================================ +COPY --chown=ros:ros docker-assets/super_client_configuration_file.xml.tpl /home/ros/super_client_configuration_file.xml.tpl + +RUN echo "source /home/ros/g0plus_ros2/GalaxeaVLA/.venv/bin/activate" >> /home/ros/.bashrc && \ + echo "source /home/ros/g0plus_ros2/Hierarchical_System/install/setup.bash" >> /home/ros/.bashrc + +# ============================================ +# Final image settings +# ============================================ +WORKDIR /home/ros diff --git a/g0plus_dockerfile/README.md b/g0plus_dockerfile/README.md new file mode 100644 index 0000000000000000000000000000000000000000..346186e65581159d2437fb3554bc085898530524 --- /dev/null +++ b/g0plus_dockerfile/README.md @@ -0,0 +1,18 @@ +# Dockerfile for Hierarchical System + + +## 1- What we have + +* Dockerfile: create a docker image around 16GB, with comprehensive function to run G0Plus hierarchical system + +## 2- Usage + +``` +cd . +DOCKER_BUILDKIT=1 docker build \ + --add-host=host.docker.internal:host-gateway \ + --build-arg http_proxy=http://host.docker.internal:7897 \ + --build-arg https_proxy=http://host.docker.internal:7897 \ + --secret id=git_token,src=./github_token \ + -t g0plus:ros2_v1-trt . +``` \ No newline at end of file diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/.gitignore b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..e59097ce96242650eeb943c18aee33a9da9d5a3e --- /dev/null +++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/.gitignore @@ -0,0 +1,7 @@ +log/ +install/ +build/ +**/wasted/ +**/__pycache__/ +*.jpg +.vscode/ \ No newline at end of file diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/README.md b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d8ddca6860d2a2417ff86afaf35eb580e8348c77 --- /dev/null +++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/README.md @@ -0,0 +1,110 @@ +# Hierarchical System ROS2 + +## 0- Preface + +### What we have + +- The paths and names of the main logic (Python) folders and files are as follows: + +``` +src/ + └── g0_vlm_node/ + └── g0_vlm_node + ├── utils/ # Stores functions related to Gemini API processing + └── vlm_main.py # Core logic for VLM service provision +``` +- Note: In the above package: + - vlm_main.py + +### Development Log + +- VLM + 1. Format the String so that the JSON string sent by EHI is converted into a structured string. + 2. Support the cache switch for receiving repeated instruction from EHI. + 3. Support parameterized startup, using `--use-qwen` and `--no-use-qwen` to control model usage, with Gemini as the default. + + + +## 1- Install + +1. Install Python dependencies + +Refer to https://github.com/whitbrunn/G0 + +2. Compile the workspace + +Clone the `src/` folder to the local workspace under `TO/YOUR/WORKSPACE/`, then run: + +``` +cd TO/YOUR/WORKSPACE/ +colcon build --symlink-install --cmake-args -DPython3_ROOT_DIR=$CONDA_PREFIX +``` + +Note: + +Use `ros2 pkg list | grep PACK_NAME` to check if the following ROS packages exist: +- `g0_vlm_node` + +## 2- Usage + +1. Set your VLM API key + +``` +export VLM_API_KEY= +export VLM_API_KEY_QWEN= +``` + +2. Start the VLM Node + +1.1 First configure the proxy according to the environment (necessary for Gemini, if using the qwen version, skip to 1.3) + + +``` +export https_proxy=http://127.0.0.1: +export http_proxy=http://127.0.0.1: +export all_proxy=http://127.0.0.1: +``` +1.2 Verify if the external network is accessible + +``` +curl -I www.google.com +``` + +Expected output (partial): + +``` +HTTP/1.1 200 OK +Transfer-Encoding: chunked +Cache-Control: private +Connection: keep-alive +``` + +1.3 After confirming the above step is OK, start the VLM node + +``` +ros2 run g0_vlm_node vlm_main +``` + +*If using the qwen model inference: +``` +unset http_proxy +unset https_proxy +unset all_proxy +ros2 run g0_vlm_node vlm_main -- --use-qwen +``` + + +## 3- What you expect + +- VLM receives a Send request output, e.g., + +``` +2025-11-05 07:40:33.230 | INFO | g0_vlm_node.vlm_main:vlm_processor1:153 - One hp successfully processed: 将咖啡罐用右手放到托盘上 -> [Low]: Pick up the coffee can with the right hand and place it on the tray.! +``` + +- VLM receives a confirm request, e.g., + +``` +2025-11-05 07:40:47.641 | INFO | g0_vlm_node.vlm_main:vlm_processor2:169 - One hp_ successfully sent to VLA: [Low]: Pick up the coffee can with the right hand and place it on the tray.! +``` + diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/README.md.zh b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/README.md.zh new file mode 100644 index 0000000000000000000000000000000000000000..d7c4e86af24b460d6179c28e2bea86f63a36d37d --- /dev/null +++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/README.md.zh @@ -0,0 +1,111 @@ +# Hierarchical System ROS2 + +## 0- 前言 + +### What we have + +- 主要逻辑(python)文件夹及文件的路径及命名如下 + +``` +src/ + └── g0_vlm_node/ + └── g0_vlm_node + ├── utils/ # 储存与Gemini api处理相关的func + └── vlm_main.py # VLM提供服务的核心逻辑 +``` +- 注:以上包内: + - vlm_main.py + +### 开发说明 + + +- VLM + 1. 将String格式化,使得EHI发送的json字符串,改为结构化字符串 + 2. 支持接收EHI的缓存开关 + 3. 支持参数化启动,用`--use-qwen`和`--no-use-qwen`控制模型使用,默认是Gemini + + +## 1- Install + +1. 安装Python依赖库 + +参考https://github.com/whitbrunn/G0 + + +2. 编译工作空间 + +将`src/`文件夹clone到本地工作空间下`TO/YOUR/WORKSPACE/`,运行 + +``` +cd TO/YOUR/WORKSPACE/ +colcon build --symlink-install --cmake-args -DPython3_ROOT_DIR=$CONDA_PREFIX +``` + +Note: + +用`ros2 pkg list | grep PACK_NAME` 检查是否有以下ROS包: +- `g0_vlm_node` + + +## 2- Usage + +1. 设置api key + +``` +export API_KEY= +export API_KEY_QWEN= +``` + +2. 启动VLM Node + +1.1 先按所在环境配置代理(Gemini之必需,若使用qwen版,请跳到1.3) + +``` +export https_proxy=http://127.0.0.1: +export http_proxy=http://127.0.0.1: +export all_proxy=http://127.0.0.1: +``` +1.2 验证外网是否可通 + +``` +curl -I www.google.com +``` + +预期显示(部分), + +``` +HTTP/1.1 200 OK +Transfer-Encoding: chunked +Cache-Control: private +Connection: keep-alive +``` + +1.3 确定上一步OK后,启动VLM节点 + +``` +ros2 run g0_vlm_node vlm_main +``` + +*若使用qwen模型推理 +``` +unset http_proxy +unset https_proxy +unset all_proxy +ros2 run g0_vlm_node vlm_main -- --use-qwen +``` + + +## 3- What you expect + +- VLM收到Send请求输出,e.g., + +``` +2025-11-05 07:40:33.230 | INFO | g0_vlm_node.vlm_main:vlm_processor1:153 - One hp successfully processed: 将咖啡罐用右手放到托盘上 -> [Low]: Pick up the coffee can with the right hand and place it on the tray.! +``` + +- VLM收到confirm请求,e.g., + +``` +2025-11-05 07:40:47.641 | INFO | g0_vlm_node.vlm_main:vlm_processor2:169 - One hp_ successfully sent to VLA: [Low]: Pick up the coffee can with the right hand and place it on the tray.! +``` + diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/__init__.py b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/utils/__init__.py b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cf34a9ac4d255c069a194f7af13009533146764e --- /dev/null +++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/utils/__init__.py @@ -0,0 +1,2 @@ +from .utils_online import call_gemini_for_bbox, call_gemini_for_translation, call_qwen_for_bbox, call_qwen_for_translation +from .utils_online import get_simple_vb_imgcv \ No newline at end of file diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/utils/utils_online.py b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/utils/utils_online.py new file mode 100644 index 0000000000000000000000000000000000000000..4a51f428ca53b830096f2e35e94219491133d961 --- /dev/null +++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/utils/utils_online.py @@ -0,0 +1,418 @@ +import os +from google import genai +from google.genai import types +import re +import cv2 as cv +import time +import tensorflow as tf +import numpy as np +from typing import List, Dict, Any, Optional +import dashscope +from dashscope import MultiModalConversation, Generation +import base64 +import json + +def require_env(name: str) -> str: + value = os.getenv(name) + if not value: + raise RuntimeError(f"Required environment variable `{name}` is not set") + return value + + +MODEL_ID = "gemini-robotics-er-1.5-preview" +MODEL_ID_FOR_TRANS = "gemini-2.5-flash" +API_KEY = require_env("VLM_API_KEY") +client = genai.Client(api_key=API_KEY) + + +MODEL_ID_QWEN = 'qwen3-vl-plus' +MODEL_ID_FOR_TRANS_QWEN = 'qwen-flash' +API_KEY_QWEN = require_env("VLM_API_KEY_QWEN") + + +PROMPT_TEMPLATE = """ +The robot is asked to {instruction}. + +**CRITICAL SPATIAL CONSTRAINT**: If the instruction mentions "outside the [container]" (where container can be tray, plate, box, bowl, basket, etc.), you MUST ONLY detect objects that are clearly OUTSIDE that container's boundaries. Objects inside or on the container should be completely IGNORED. + +Carefully analyze if the requested object is present in the CORRECT location (outside the container if specified). +If the object exists in the correct location and you are confident (confidence > 0.6), return its bounding box as a JSON array. +If the object is only found INSIDE the container when the instruction asks for objects OUTSIDE the container, you MUST return: {{"no_object": true, "reason": "Object found only inside the container, not outside as requested"}} +If you are not confident or the object is not present in the correct location, return: {{"no_object": true, "reason": ""}} + +Format for object found: [{{"box_2d": [x_min, y_min, x_max, y_max], "label": "