diff --git a/.gitattributes b/.gitattributes
index 3e5a278e11f209c687cfb65aff5b36991e30fca0..e38ac81eba2e7e118d2defdf96c2f7898d57c3ff 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -922,3 +922,43 @@ G0Plus_Finetune_LeRobot_Datasets_Demo/BENCH_Pick_And_Place_20_Items57_Evenly_Dis
 G0Plus_PP_CKPT/decode.fp16.engine filter=lfs diff=lfs merge=lfs -text
 G0Plus_PP_CKPT/gemma_rmsnorm.so filter=lfs diff=lfs merge=lfs -text
 G0Plus_PP_CKPT/prefill.fp16.engine filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/char-rnn.wts filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/model/model-20080.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/model/model-20080.meta filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/int8_api/airliner.ppm filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/airliner.ppm filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/binoculars.jpeg filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/tabby_tiger_cat.jpg filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp310-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp311-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp312-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp313-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp38-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp39-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp310-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp311-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp312-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp313-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp38-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp39-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp310-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp311-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp312-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp313-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp38-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp39-none-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/bin/trtexec filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_builder_resource_win.so.10.13.0 filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_builder_resource.so.10.13.0 filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_dispatch_static.a filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_dispatch.so.10.13.0 filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_lean_static.a filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_lean.so.10.13.0 filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_plugin_static.a filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_plugin.so.10.13.0 filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_static.a filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_vc_plugin_static.a filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_vc_plugin.so.10.13.0 filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer.so.10.13.0 filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvonnxparser_static.a filter=lfs diff=lfs merge=lfs -text
+g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvonnxparser.so.10.13.0 filter=lfs diff=lfs merge=lfs -text
diff --git a/g0plus_dockerfile/.gitignore b/g0plus_dockerfile/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..deb78e01194c8962015fa5527a2ec0a38a55f01d
--- /dev/null
+++ b/g0plus_dockerfile/.gitignore
@@ -0,0 +1,3 @@
+**/GalaxeaFM/*
+**/EFMNode/*
+docker-assets/data/*
\ No newline at end of file
diff --git a/g0plus_dockerfile/Dockerfile b/g0plus_dockerfile/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..a5d70886ab4073d07a6acf0f0605c637d5b7631a
--- /dev/null
+++ b/g0plus_dockerfile/Dockerfile
@@ -0,0 +1,122 @@
+FROM althack/ros2:humble-full AS base
+
+# Switch to root for system operations
+USER root
+
+# Set timezone / locale if needed
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install necessary build tools
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        build-essential \
+        curl \
+        net-tools \
+        iputils-ping \
+        ros-${ROS_DISTRO}-rosbag2-storage-mcap \
+        ros-${ROS_DISTRO}-rosbridge-server \
+        git \
+        ca-certificates \
+        tmux \
+        vim \
+        && \
+    rm -rf /var/lib/apt/lists/*
+
+# TensorRT related setup
+COPY docker-assets/data/TensorRT-10.13.0.35 /usr/TensorRT-10.13.0.35
+
+# Ensure ros user owns home directory
+RUN chown -R ros:ros /home/ros
+
+# Switch to ros user
+USER ros
+WORKDIR /home/ros/g0plus_ros2
+
+
+# ============================================
+# Put in code folders
+# ============================================
+RUN --mount=type=secret,id=git_token,uid=1000,gid=1000 \
+    GIT_TOKEN=$(cat /run/secrets/git_token) && \
+    git clone https://${GIT_TOKEN}@github.com/OpenGalaxea/GalaxeaVLA.git -b features/opensource
+RUN --mount=type=secret,id=git_token,uid=1000,gid=1000 \
+    GIT_TOKEN=$(cat /run/secrets/git_token) && \
+    git clone https://${GIT_TOKEN}@github.com/OpenGalaxea/EFMNode.git -b dev/pp_trt
+COPY --chown=ros:ros docker-assets/code/Hierarchical_System /home/ros/g0plus_ros2/Hierarchical_System
+
+
+# ============================================
+# UV installation
+# ============================================
+WORKDIR /home/ros
+ARG http_proxy
+ARG https_proxy
+
+RUN bash -c "\
+    curl -LsSf https://astral.sh/uv/install.sh | bash && \
+    ~/.local/bin/uv --version \
+"
+ENV PATH="/home/ros/.local/bin:${PATH}"
+
+# ============================================
+# Complete G0plus setup
+# ============================================
+WORKDIR /home/ros/g0plus_ros2/GalaxeaVLA
+
+ENV UV_DEFAULT_INDEX=https://mirrors.aliyun.com/pypi/simple/
+ENV UV_PYTHON_INSTALL_MIRROR=https://gh-proxy.com/https://github.com/astral-sh/python-build-standalone/releases/download
+ENV UV_HTTP_TIMEOUT=600
+
+
+RUN uv sync --index-strategy unsafe-best-match
+
+RUN VIRTUAL_ENV=.venv uv pip install -e .
+
+RUN VIRTUAL_ENV=.venv uv pip install -e .[dev]
+
+
+# ============================================
+# Complete EFMNode, VLM and rosbridge setup
+# ============================================
+WORKDIR /home/ros/g0plus_ros2/GalaxeaVLA
+
+RUN VIRTUAL_ENV=.venv uv pip install nvtx google-genai dashscope
+
+RUN VIRTUAL_ENV=.venv uv pip install lark==1.3.1 empy==3.3.4 colcon-common-extensions==0.3.0 
+
+RUN VIRTUAL_ENV=.venv uv pip install setuptools==59.6.0
+
+RUN VIRTUAL_ENV=.venv uv pip install tensorflow==2.15.0
+
+RUN VIRTUAL_ENV=.venv uv pip install netifaces pymongo tornado cbor2
+
+# ============================================
+# Install TensorRT wheel
+# ============================================
+RUN VIRTUAL_ENV=.venv uv pip install /usr/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp310-none-linux_x86_64.whl
+
+# ============================================
+# Build the ROS2 workspace using conda env
+# ============================================
+WORKDIR /home/ros/g0plus_ros2/Hierarchical_System
+
+RUN bash -c "\
+    source /opt/ros/humble/setup.bash && \
+    source /home/ros/g0plus_ros2/GalaxeaVLA/.venv/bin/activate && \
+    colcon build --symlink-install \
+      --cmake-args -DPython3_ROOT_DIR=${VIRTUAL_ENV} \
+"
+
+
+# ============================================
+# Replace super xml and update ~/.bashrc
+# ============================================
+COPY --chown=ros:ros docker-assets/super_client_configuration_file.xml.tpl /home/ros/super_client_configuration_file.xml.tpl
+
+RUN echo "source /home/ros/g0plus_ros2/GalaxeaVLA/.venv/bin/activate" >> /home/ros/.bashrc && \
+    echo "source /home/ros/g0plus_ros2/Hierarchical_System/install/setup.bash" >> /home/ros/.bashrc
+
+# ============================================
+# Final image settings
+# ============================================
+WORKDIR /home/ros
diff --git a/g0plus_dockerfile/README.md b/g0plus_dockerfile/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..346186e65581159d2437fb3554bc085898530524
--- /dev/null
+++ b/g0plus_dockerfile/README.md
@@ -0,0 +1,18 @@
+# Dockerfile for Hierarchical System
+
+
+## 1- What we have
+
+* Dockerfile: create a docker image around 16GB, with comprehensive function to run G0Plus hierarchical system
+
+## 2- Usage
+
+```
+cd .
+DOCKER_BUILDKIT=1 docker build \
+  --add-host=host.docker.internal:host-gateway \
+  --build-arg http_proxy=http://host.docker.internal:7897 \
+  --build-arg https_proxy=http://host.docker.internal:7897 \
+  --secret id=git_token,src=./github_token \
+   -t g0plus:ros2_v1-trt .
+```
\ No newline at end of file
diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/.gitignore b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..e59097ce96242650eeb943c18aee33a9da9d5a3e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/.gitignore
@@ -0,0 +1,7 @@
+log/
+install/
+build/
+**/wasted/
+**/__pycache__/
+*.jpg
+.vscode/
\ No newline at end of file
diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/README.md b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d8ddca6860d2a2417ff86afaf35eb580e8348c77
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/README.md
@@ -0,0 +1,110 @@
+# Hierarchical System ROS2
+
+## 0- Preface
+
+### What we have
+
+- The paths and names of the main logic (Python) folders and files are as follows:
+
+```
+src/
+   â””â”€â”€ g0_vlm_node/
+        â””â”€â”€ g0_vlm_node
+            â”œâ”€â”€ utils/                  # Stores functions related to Gemini API processing
+            â””â”€â”€ vlm_main.py             # Core logic for VLM service provision
+```
+- Note: In the above package:
+    - vlm_main.py
+
+### Development Log
+
+- VLM 
+    1. Format the String so that the JSON string sent by EHI is converted into a structured string.
+    2. Support the cache switch for receiving repeated instruction from EHI.
+    3. Support parameterized startup, using `--use-qwen` and `--no-use-qwen` to control model usage, with Gemini as the default.
+
+
+
+## 1- Install
+
+1. Install Python dependencies
+
+Refer to https://github.com/whitbrunn/G0
+
+2. Compile the workspace
+
+Clone the `src/` folder to the local workspace under `TO/YOUR/WORKSPACE/`, then run:
+
+```
+cd TO/YOUR/WORKSPACE/
+colcon build --symlink-install --cmake-args -DPython3_ROOT_DIR=$CONDA_PREFIX
+```
+
+Note:
+
+Use `ros2 pkg list | grep PACK_NAME` to check if the following ROS packages exist: 
+- `g0_vlm_node`
+
+## 2- Usage
+
+1. Set your VLM API key
+
+```
+export VLM_API_KEY=<YOUR_GEMINI_API_KEY> 
+export VLM_API_KEY_QWEN=<YOUR_QWEN_API_KEY> 
+```
+
+2. Start the VLM Node
+
+1.1 First configure the proxy according to the environment (necessary for Gemini, if using the qwen version, skip to 1.3)
+
+
+```
+export https_proxy=http://127.0.0.1:<PORT>
+export http_proxy=http://127.0.0.1:<PORT>
+export all_proxy=http://127.0.0.1:<PORT>
+```
+1.2 Verify if the external network is accessible
+
+```
+curl -I www.google.com
+```
+
+Expected output (partial):
+
+```
+HTTP/1.1 200 OK
+Transfer-Encoding: chunked
+Cache-Control: private
+Connection: keep-alive
+```
+
+1.3 After confirming the above step is OK, start the VLM node
+
+```
+ros2 run g0_vlm_node vlm_main
+```
+
+*If using the qwen model inference:
+```
+unset http_proxy
+unset https_proxy
+unset all_proxy
+ros2 run g0_vlm_node vlm_main -- --use-qwen
+```
+
+
+## 3- What you expect
+
+- VLM receives a Send request output, e.g.,
+
+```
+2025-11-05 07:40:33.230 | INFO     | g0_vlm_node.vlm_main:vlm_processor1:153 - One hp successfully processed: å°†å’–å•¡ç½ç”¨å³æ‰‹æ”¾åˆ°æ‰˜ç›˜ä¸Š -> [Low]: Pick up the coffee can with the right hand and place it on the tray.!
+```
+
+- VLM receives a confirm request, e.g.,
+
+```
+2025-11-05 07:40:47.641 | INFO     | g0_vlm_node.vlm_main:vlm_processor2:169 - One hp_ successfully sent to VLA: [Low]: Pick up the coffee can with the right hand and place it on the tray.!
+```
+
diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/README.md.zh b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/README.md.zh
new file mode 100644
index 0000000000000000000000000000000000000000..d7c4e86af24b460d6179c28e2bea86f63a36d37d
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/README.md.zh
@@ -0,0 +1,111 @@
+# Hierarchical System ROS2
+
+## 0- å‰è¨€
+
+### What we have
+
+- ä¸»è¦é€»è¾‘ï¼ˆpythonï¼‰æ–‡ä»¶å¤¹åŠæ–‡ä»¶çš„è·¯å¾„åŠå‘½åå¦‚ä¸‹
+
+```
+src/
+   â””â”€â”€ g0_vlm_node/
+        â””â”€â”€ g0_vlm_node
+            â”œâ”€â”€ utils/                  # å‚¨å­˜ä¸ŽGemini apiå¤„ç†ç›¸å…³çš„func
+            â””â”€â”€ vlm_main.py             # VLMæä¾›æœåŠ¡çš„æ ¸å¿ƒé€»è¾‘
+```
+- æ³¨ï¼šä»¥ä¸ŠåŒ…å†…ï¼š
+    - vlm_main.py
+
+### å¼€å‘è¯´æ˜Ž
+
+
+- VLM
+    1. å°†Stringæ ¼å¼åŒ–ï¼Œä½¿å¾—EHIå‘é€çš„jsonå­—ç¬¦ä¸²ï¼Œæ”¹ä¸ºç»“æž„åŒ–å­—ç¬¦ä¸²
+    2. æ”¯æŒæŽ¥æ”¶EHIçš„ç¼“å­˜å¼€å…³
+    3. æ”¯æŒå‚æ•°åŒ–å¯åŠ¨ï¼Œç”¨`--use-qwen`å’Œ`--no-use-qwen`æŽ§åˆ¶æ¨¡åž‹ä½¿ç”¨ï¼Œé»˜è®¤æ˜¯Gemini
+
+
+## 1- Install
+
+1. å®‰è£…Pythonä¾èµ–åº“
+
+å‚è€ƒhttps://github.com/whitbrunn/G0
+
+
+2. ç¼–è¯‘å·¥ä½œç©ºé—´
+
+å°†`src/`æ–‡ä»¶å¤¹cloneåˆ°æœ¬åœ°å·¥ä½œç©ºé—´ä¸‹`TO/YOUR/WORKSPACE/`ï¼Œè¿è¡Œ
+
+```
+cd TO/YOUR/WORKSPACE/
+colcon build --symlink-install --cmake-args -DPython3_ROOT_DIR=$CONDA_PREFIX
+```
+
+Note:
+
+ç”¨`ros2 pkg list | grep PACK_NAME` æ£€æŸ¥æ˜¯å¦æœ‰ä»¥ä¸‹ROSåŒ…ï¼š 
+- `g0_vlm_node`
+
+
+## 2- Usage
+
+1. è®¾ç½®api key
+
+```
+export API_KEY=<YOUR_GEMINI_API_KEY> 
+export API_KEY_QWEN=<YOUR_QWEN_API_KEY> 
+```
+
+2. å¯åŠ¨VLM Node
+
+1.1 å…ˆæŒ‰æ‰€åœ¨çŽ¯å¢ƒé…ç½®ä»£ç†ï¼ˆGeminiä¹‹å¿…éœ€ï¼Œè‹¥ä½¿ç”¨qwenç‰ˆï¼Œè¯·è·³åˆ°1.3ï¼‰
+
+```
+export https_proxy=http://127.0.0.1:<PORT>
+export http_proxy=http://127.0.0.1:<PORT>
+export all_proxy=http://127.0.0.1:<PORT>
+```
+1.2 éªŒè¯å¤–ç½‘æ˜¯å¦å¯é€š
+
+```
+curl -I www.google.com
+```
+
+é¢„æœŸæ˜¾ç¤ºï¼ˆéƒ¨åˆ†ï¼‰ï¼Œ
+
+```
+HTTP/1.1 200 OK
+Transfer-Encoding: chunked
+Cache-Control: private
+Connection: keep-alive
+```
+
+1.3 ç¡®å®šä¸Šä¸€æ­¥OKåŽï¼Œå¯åŠ¨VLMèŠ‚ç‚¹
+
+```
+ros2 run g0_vlm_node vlm_main
+```
+
+*è‹¥ä½¿ç”¨qwenæ¨¡åž‹æŽ¨ç†
+```
+unset http_proxy
+unset https_proxy
+unset all_proxy
+ros2 run g0_vlm_node vlm_main -- --use-qwen
+```
+
+
+## 3- What you expect
+
+- VLMæ”¶åˆ°Sendè¯·æ±‚è¾“å‡ºï¼Œe.g.,
+
+```
+2025-11-05 07:40:33.230 | INFO     | g0_vlm_node.vlm_main:vlm_processor1:153 - One hp successfully processed: å°†å’–å•¡ç½ç”¨å³æ‰‹æ”¾åˆ°æ‰˜ç›˜ä¸Š -> [Low]: Pick up the coffee can with the right hand and place it on the tray.!
+```
+
+- VLMæ”¶åˆ°confirmè¯·æ±‚ï¼Œe.g.,
+
+```
+2025-11-05 07:40:47.641 | INFO     | g0_vlm_node.vlm_main:vlm_processor2:169 - One hp_ successfully sent to VLA: [Low]: Pick up the coffee can with the right hand and place it on the tray.!
+```
+
diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/__init__.py b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/utils/__init__.py b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf34a9ac4d255c069a194f7af13009533146764e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/utils/__init__.py
@@ -0,0 +1,2 @@
+from .utils_online import call_gemini_for_bbox, call_gemini_for_translation, call_qwen_for_bbox, call_qwen_for_translation
+from .utils_online import get_simple_vb_imgcv
\ No newline at end of file
diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/utils/utils_online.py b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/utils/utils_online.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a51f428ca53b830096f2e35e94219491133d961
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/utils/utils_online.py
@@ -0,0 +1,418 @@
+import os
+from google import genai
+from google.genai import types
+import re
+import cv2 as cv
+import time
+import tensorflow as tf
+import numpy as np
+from typing import List, Dict, Any, Optional
+import dashscope
+from dashscope import MultiModalConversation, Generation
+import base64
+import json
+
+def require_env(name: str) -> str:
+    value = os.getenv(name)
+    if not value:
+        raise RuntimeError(f"Required environment variable `{name}` is not set")
+    return value
+
+
+MODEL_ID = "gemini-robotics-er-1.5-preview"
+MODEL_ID_FOR_TRANS = "gemini-2.5-flash"
+API_KEY = require_env("VLM_API_KEY")
+client = genai.Client(api_key=API_KEY)
+
+
+MODEL_ID_QWEN = 'qwen3-vl-plus'
+MODEL_ID_FOR_TRANS_QWEN = 'qwen-flash'
+API_KEY_QWEN = require_env("VLM_API_KEY_QWEN")
+
+
+PROMPT_TEMPLATE = """
+The robot is asked to {instruction}. 
+
+**CRITICAL SPATIAL CONSTRAINT**: If the instruction mentions "outside the [container]" (where container can be tray, plate, box, bowl, basket, etc.), you MUST ONLY detect objects that are clearly OUTSIDE that container's boundaries. Objects inside or on the container should be completely IGNORED.
+
+Carefully analyze if the requested object is present in the CORRECT location (outside the container if specified). 
+If the object exists in the correct location and you are confident (confidence > 0.6), return its bounding box as a JSON array.
+If the object is only found INSIDE the container when the instruction asks for objects OUTSIDE the container, you MUST return: {{"no_object": true, "reason": "Object found only inside the container, not outside as requested"}}
+If you are not confident or the object is not present in the correct location, return: {{"no_object": true, "reason": "<brief explanation>"}}
+
+Format for object found: [{{"box_2d": [x_min, y_min, x_max, y_max], "label": "<label>", "confidence": <0.0-1.0>}}]
+Format for no object: {{"no_object": true, "reason": "<why object not found>"}}
+
+Coordinates normalized to 0-1000. The values in box_2d must only be integers.
+Only return the object that matches the instruction AND is in the correct spatial location.
+
+"""
+
+
+prompt_template = """
+      The robot is asked to {instruction}. Return bounding box of the first required interaction 
+      object as a JSON array with labels. Only return bbox with the max likelihood. Never return masks or code fencing. 
+      The format should be as follows: [{"box_2d": [ymin, xmin, ymax, xmax],
+      "label": <label for the object>}] normalized to 0-1000. The values in
+      box_2d must only be integers
+      """
+
+pt_for_translation1 = """
+      You are a professional robot instruction translation expert. 
+      The robot is asked to translate a robot action instruction "{instruction}" from Chinese to English. Pay special attention to translate the object and hand side accurately and concisely, and do not add any explanations. 
+      The format should be just ONE sentence with "[Low]: " in the FRONT and "." at the END as follows: "[Low]: Pick up the <object> with the <side> hand and place it on the tray."
+      """
+
+pt_for_translation2 = """
+      You are a professional robot instruction translation expert.
+      The robot is asked to translate a robot action instruction "{instruction}" from Chinese to English. Pay special attention to translate the object accurately and concisely, and do not add any explanations.
+      The format should be just ONE sentence with "." at the END as follows: "Pick up the <object> outside the tray and place them on the tray."
+      """
+
+
+# ä¼˜åŒ–åŽçš„ç¿»è¯‘æç¤ºè¯æ¨¡æ¿ï¼Œç‰¹åˆ«å¼ºè°ƒ"outside the tray"æ¡ä»¶
+pt_for_translation2_qwen = """
+ä½ æ˜¯ä¸€ä¸ªä¸“ä¸šçš„æœºå™¨äººæŒ‡ä»¤ç¿»è¯‘ä¸“å®¶ï¼Œä¸“é—¨å¤„ç†pick-and-placeåœºæ™¯çš„æŒ‡ä»¤ç¿»è¯‘ã€‚
+
+åŽŸå§‹ä¸­æ–‡æŒ‡ä»¤: "{instruction}"
+
+é‡è¦ç¿»è¯‘è¦æ±‚:
+1. **å¿…é¡»ä¿ç•™"outside the tray"(æ‰˜ç›˜å¤–)è¿™ä¸ªå…³é”®ç©ºé—´å…³ç³»**ï¼Œè¿™æ˜¯æœ€é‡è¦çš„æ¡ä»¶
+2. å‡†ç¡®è¯†åˆ«è¦æ“ä½œçš„ç‰©ä½“
+3. ä¸¥æ ¼éµå¾ªå›ºå®šå¥å¼ï¼š"Pick up the <object> outside the tray and place them on the tray."
+4. åªè¾“å‡ºç¿»è¯‘åŽçš„è‹±æ–‡å¥å­ï¼Œä¸è¦æ·»åŠ ä»»ä½•è§£é‡Š
+5. ç¡®ä¿å¥å­ä»¥å¥å·ç»“å°¾
+
+ç¿»è¯‘ç¤ºä¾‹:
+- "æ‹¿èµ·æ‰˜ç›˜å¤–çš„çº¢è‰²æ–¹å—" â†’ "Pick up the red cube outside the tray and place them on the tray."
+- "æŠŠæ‰˜ç›˜å¤–é¢çš„è“è‰²é›¶ä»¶æ”¾è¿›åŽ»" â†’ "Pick up the blue part outside the tray and place them on the tray."
+- "æ¡èµ·æ‰˜ç›˜å¤–çš„ç»¿è‰²ç§¯æœ¨" â†’ "Pick up the green block outside the tray and place them on the tray."
+
+ç‰¹åˆ«æ³¨æ„ï¼š**ç»å¯¹ä¸èƒ½çœç•¥"outside the tray"è¿™ä¸ªå…³é”®æ¡ä»¶**ï¼Œå³ä½¿åŽŸå§‹æŒ‡ä»¤ä¸­æ²¡æœ‰æ˜Žç¡®æåˆ°"å¤–"ï¼Œä¹Ÿè¦æ ¹æ®ä¸Šä¸‹æ–‡ç†è§£ä¸ºæ‰˜ç›˜å¤–çš„ç‰©ä½“ã€‚
+
+çŽ°åœ¨è¯·ç¿»è¯‘ä¸Šé¢çš„åŽŸå§‹æŒ‡ä»¤:
+"""
+
+def retry(func, max_retries=3):
+    def wrapper(*args, **kwargs):
+        for attempt in range(max_retries):
+            try:
+                return func(*args, **kwargs)
+            except Exception as e:
+                print(f"Attempt {attempt + 1} failed: {str(e)}")
+                time.sleep(2)
+        raise Exception(f"All {max_retries} attempts failed")
+    return wrapper
+
+
+def simple_visual_bbox(image_array, bbox, use_qwen=False, suffix=""):
+    x1, y1, x2, y2 = bbox
+    vis_image = image_array.copy()
+    cv.rectangle(vis_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
+    if use_qwen:
+        filename = f"qwen_debug_bbox{suffix}.jpg"
+        cv.imwrite(filename, vis_image)
+    else:
+        cv.imwrite("gemini_debug_bbox.jpg", vis_image)
+
+
+def get_simple_vb_imgcv(image_array, bbox, input_format="rgb"):
+    if input_format == "rgb":
+        image_array = cv.cvtColor(image_array, cv.COLOR_RGB2BGR)
+    else:
+        pass
+    x1, y1, x2, y2 = bbox
+    vis_image = image_array.copy()
+    cv.rectangle(vis_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
+    return vis_image
+
+
+@retry
+def call_gemini_for_translation(instruction, ver=2):
+    if ver == 1:
+        prompt = pt_for_translation1.replace("{instruction}", instruction)
+    elif ver == 2:
+        prompt = pt_for_translation2.replace("{instruction}", instruction)
+    start_time = time.time()
+    print("start calling gemini, waiting...")
+    text_response = client.models.generate_content(
+      model=MODEL_ID_FOR_TRANS,
+      contents=[
+        prompt
+      ],
+      config = types.GenerateContentConfig(
+          temperature=0.5,
+          thinking_config=types.ThinkingConfig(thinking_budget=0)
+      )
+    )
+    print(f"gemini inference time: {time.time() - start_time} seconds")
+    translation = text_response.text.strip()
+    # print(f"translation: {translation}")
+    return translation
+
+@retry
+def call_qwen_for_translation(instruction):
+    """
+    ä½¿ç”¨åƒé—®å¤§æ¨¡åž‹å°†ä¸­æ–‡æœºå™¨äººæŒ‡ä»¤ç¿»è¯‘æˆè‹±æ–‡ï¼Œç‰¹åˆ«å¼ºè°ƒ"outside the tray"æ¡ä»¶
+    """
+    dashscope.api_key = API_KEY_QWEN
+    
+    prompt = pt_for_translation2_qwen.replace("{instruction}", instruction)
+    
+    start_time = time.time()
+    print("å¼€å§‹è°ƒç”¨åƒé—®æ¨¡åž‹è¿›è¡Œç¿»è¯‘ï¼Œè¯·ç¨å€™...")
+    
+    response = Generation.call(
+        model=MODEL_ID_FOR_TRANS_QWEN,  
+        prompt=prompt,
+        temperature=0.3,     
+        top_p=0.7,          
+        max_tokens=50        
+    )
+    
+    print(f"åƒé—®æŽ¨ç†æ—¶é—´: {time.time() - start_time:.2f} ç§’")
+    
+    if response.status_code != 200:
+        raise ValueError(
+            f"åƒé—®APIè°ƒç”¨å¤±è´¥ï¼ŒçŠ¶æ€ç : {response.status_code}, "
+            f"é”™è¯¯ä¿¡æ¯: {getattr(response, 'message', 'æœªçŸ¥é”™è¯¯')}"
+        )
+    
+    translation = response.output.text.strip()
+    
+    translation = re.sub(r'^["\']|["\']$', '', translation)
+    translation = translation.split('\n')[0]
+    translation = translation.rstrip('.') + '.'
+    
+    if "outside the" not in translation.lower():
+        print("è­¦å‘Š: ç¿»è¯‘ç»“æžœå¯èƒ½ç¼ºå°‘'outside the'æ¡ä»¶!")
+    
+    print(f"ç¿»è¯‘ç»“æžœ: {translation}")
+    return translation
+
+
+
+@retry
+def call_gemini_for_bbox(image_array, instruction):
+    image_array = cv.cvtColor(image_array, cv.COLOR_RGB2BGR)
+    h, w, _ = image_array.shape
+    _, image_bytes = cv.imencode('.jpg', image_array)
+    image_bytes = image_bytes.tobytes()
+    prompt = prompt_template.replace("{instruction}", instruction)
+    start_time = time.time()
+    print("start calling gemini, waiting...")
+    image_response = client.models.generate_content(
+      model=MODEL_ID,
+      contents=[
+        types.Part.from_bytes(
+          data=image_bytes,
+          mime_type='image/jpeg',
+        ),
+        prompt
+      ],
+      config = types.GenerateContentConfig(
+          temperature=0.5,
+          thinking_config=types.ThinkingConfig(thinking_budget=0)
+      )
+    )
+    print(f"gemini inference time: {time.time() - start_time} seconds")
+    bbox = image_response.text
+    bbox = re.findall(r'\{"box_2d": \[(\d+), (\d+), (\d+), (\d+)\], "label": "([^"]+)"\}', bbox)[0]
+    ymin, xmin, ymax, xmax, label = bbox
+    scaled_bboxes = [
+        int(int(xmin) / 1000 * w),
+        int(int(ymin) / 1000 * h),
+        int(int(xmax) / 1000 * w),
+        int(int(ymax) / 1000 * h),
+    ]
+    print(f"xmin: {scaled_bboxes[0]}, y_min: {scaled_bboxes[1]}, \
+            x_max: {scaled_bboxes[2]}, y_max: {scaled_bboxes[3]}")
+    simple_visual_bbox(image_array, scaled_bboxes)
+    return scaled_bboxes
+
+
+@retry
+def call_qwen_for_bbox(
+    image_rgb: np.ndarray,
+    instruction: str,
+    save_visualization: bool = True,
+    suffix: str = ""
+) -> List[float]:
+    dashscope.api_key = API_KEY_QWEN
+    
+    height, width = image_rgb.shape[:2]
+    image_bgr = cv.cvtColor(image_rgb, cv.COLOR_RGB2BGR)
+    _, encoded_image = cv.imencode('.jpg', image_bgr)
+    image_data = base64.b64encode(encoded_image).decode('utf-8')
+    prompt_text = PROMPT_TEMPLATE.format(instruction=instruction)
+    
+    messages = [
+        {
+            'role': 'user',
+            'content': [
+                {
+                    'image': f'data:image/jpeg;base64,{image_data}'
+                },
+                {
+                    'text': prompt_text
+                }
+            ]
+        }
+    ]
+    
+    start_time = time.time()
+    print("start calling qwen, waiting...")
+
+    response = MultiModalConversation.call(
+        model=MODEL_ID_QWEN,
+        messages=messages,
+        top_p=0.8,
+        enable_thinking=True,
+        thinking_budget=512, # 320, # 
+        temperature=0.1
+    )
+    print(f"qwen inference time: {time.time() - start_time} seconds")
+    
+    if response.status_code != 200:
+        raise ValueError(
+            f"APIè°ƒç”¨å¤±è´¥ï¼ŒçŠ¶æ€ç : {response.status_code}, "
+            f"é”™è¯¯ä¿¡æ¯: {response.message}"
+        )
+    assistant_reply = response.output.choices[0].message.content[0].get('text', '')
+
+    match_with_confidence = re.findall(
+        r'\{\s*"b?box_2d"\s*:\s*\[\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\]\s*,\s*"label"\s*:\s*"([^"]+)"\s*,\s*"confidence"\s*:\s*([\d.]+)\s*\}',
+        assistant_reply,
+        re.DOTALL
+    )
+    match_without_confidence = re.findall(
+        r'\{\s*"b?box_2d"\s*:\s*\[\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*\]\s*,\s*"label"\s*:\s*"([^"]+)"\s*\}',
+        assistant_reply,
+        re.DOTALL
+    )
+    if match_with_confidence:
+        bbox = match_with_confidence[0]
+    elif match_without_confidence:
+        bbox = match_without_confidence[0] + ('1.0',)  # Add default confidence of 1.0
+    else:
+        raise ValueError(f"Could not parse bbox from Qwen response: {assistant_reply}")
+    # print(f"bbox regulated from Qwen: {bbox}")
+    xmin, ymin, xmax, ymax, *_ = bbox
+    scaled_bboxes = [
+        int(int(xmin) / 1000 * width),
+        int(int(ymin) / 1000 * height),
+        int(int(xmax) / 1000 * width),
+        int(int(ymax) / 1000 * height),
+    ]
+    print(f"xmin: {scaled_bboxes[0]}, y_min: {scaled_bboxes[1]}, \
+            x_max: {scaled_bboxes[2]}, y_max: {scaled_bboxes[3]}")
+
+    if save_visualization:
+        simple_visual_bbox(image_bgr, scaled_bboxes, use_qwen=True, suffix=suffix)
+    
+    return scaled_bboxes
+
+
+def get_paligemma_box_instruction(image, bbox, target_image_size=224, scale=1024):
+    bbox = np.array(bbox)
+    h, w  = image.shape[:2]
+    h_scale, w_scale = target_image_size / h, target_image_size / w
+    bbox = bbox * np.array([w_scale, h_scale, w_scale, h_scale])
+    image = cv.resize(image, (target_image_size, target_image_size))
+    simple_visual_bbox(cv.cvtColor(image, cv.COLOR_RGB2BGR), bbox) # simple resize for visualization here
+    bbox = np.clip(np.round(bbox / target_image_size * scale), 0, scale - 1).astype(np.int32)
+    rel_x1, rel_y1, rel_x2, rel_y2 = bbox
+    y_min = min(rel_y1, rel_y2)
+    x_min = min(rel_x1, rel_x2)
+    y_max = max(rel_y1, rel_y2)
+    x_max = max(rel_x1, rel_x2)
+    bbox = f"<loc{y_min}><loc{x_min}><loc{y_max}><loc{x_max}>"
+    return bbox
+
+
+def get_bbox_image(rgb_head_image:np.ndarray, 
+                   bbox, target_height=224, target_width=224):
+    rgb_head_image = tf.convert_to_tensor(rgb_head_image)
+    rgb_head_image = tf.cast(rgb_head_image, tf.float32)
+    H, W, _ = rgb_head_image.shape
+
+    x1, y1, x2, y2 = bbox
+    bw, bh = x2 - x1, y2 - y1
+    side = tf.maximum(bw, bh)
+    cx, cy = x1 + bw / 2, y1 + bh / 2
+
+    # get square bbox
+    new_x1 = tf.cast(tf.floor(cx - side / 2), tf.int32)
+    new_y1 = tf.cast(tf.floor(cy - side / 2), tf.int32)
+    new_x2 = tf.cast(tf.math.ceil(cx + side / 2), tf.int32)
+    new_y2 = tf.cast(tf.math.ceil(cy + side / 2), tf.int32)
+
+    # padding origin image if out of bound
+    pad_left = tf.maximum(0, -new_x1)
+    pad_top = tf.maximum(0, -new_y1)
+    pad_right = tf.maximum(0, new_x2 - W)
+    pad_bottom = tf.maximum(0, new_y2 - H)
+
+    img_padded = tf.pad(
+        rgb_head_image,
+        paddings=[[pad_top, pad_bottom], [pad_left, pad_right], [0, 0]],
+        mode='CONSTANT',
+        constant_values=0
+    )
+
+    # update bbox 
+    crop_x1 = new_x1 + pad_left
+    crop_y1 = new_y1 + pad_top
+    crop_x2 = new_x2 + pad_left
+    crop_y2 = new_y2 + pad_top
+
+    crop = img_padded[crop_y1:crop_y2, crop_x1:crop_x2, :]
+    crop_resized = tf.image.resize(crop, (target_height, target_width), method='bilinear')
+    crop_resized = tf.cast(crop_resized, tf.uint8).numpy()
+
+    cv.imwrite("debug_condition_image.png",
+               cv.cvtColor(crop_resized),
+               cv.COLOR_RGB2BGR)
+    
+    return crop_resized
+
+
+if __name__ == "__main__":
+    img_time = "20251105-161101"
+    image = cv.imread(img_time+".jpg")
+    image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
+    
+    use_lower_half = True # False # 
+    if use_lower_half:
+        height = image.shape[0] # 
+        height_to_use = height // 2
+        suffix2 = "_bottom_half" # 
+    else:
+        height_to_use = 0 # full image
+        suffix2 = "_full" # "_bottom_half" # 
+    image_bottom_half = image[height_to_use:, :, :]
+    
+    instruction = "Pick up the bottles outside the container and place them on the container." 
+    suffix1 = img_time+"-seed1-temp0d1-tb512-bottles-gemini-prompt-container"
+    
+    suffix = suffix1 + suffix2
+    use_gemini = False # True # 
+    if use_gemini:
+        bbox = call_gemini_for_bbox(image_bottom_half, instruction) #, suffix=suffix)
+    else:
+        suffix += "_qwen"
+        bbox = call_qwen_for_bbox(image_bottom_half, instruction, suffix=suffix)
+    print("Final bbox:", bbox)
+    
+    # test Chinese translation
+    chinese_instruction = "æ‹¿èµ·ç´«è‰²ç‰©å“æ”¾åˆ°æ‰˜ç›˜ä¸Š" 
+    english_translation_gemini = None
+    english_translation = call_qwen_for_translation(chinese_instruction)
+    if english_translation_gemini:
+        print(f"\næœ€ç»ˆç¿»è¯‘ç»“æžœ gemini: {english_translation_gemini}")
+    else:
+        print("ç¿»è¯‘å¤±è´¥")
+    if english_translation:
+        print(f"\næœ€ç»ˆç¿»è¯‘ç»“æžœ: {english_translation}")
+    else:
+        print("ç¿»è¯‘å¤±è´¥")
\ No newline at end of file
diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/vlm_main.py b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/vlm_main.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4a68286fd964908b6249960da88f5ac021f581f
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/g0_vlm_node/vlm_main.py
@@ -0,0 +1,371 @@
+import rclpy
+import numpy as np
+from collections import deque
+
+from vla_msg.srv import VLMInstruction
+from vla_msg.msg import VLAPromptEcho
+from std_msgs.msg import String
+from functools import partial
+
+from cv_bridge import CvBridge
+from sensor_msgs.msg import CompressedImage
+import base64
+
+import cv2
+
+from g0_vlm_node.utils import call_gemini_for_translation, call_gemini_for_bbox, get_simple_vb_imgcv
+from g0_vlm_node.utils import call_qwen_for_translation, call_qwen_for_bbox
+
+import argparse
+import time
+import json
+from loguru import logger
+import tyro
+
+
+class VLMNode:
+    def __init__(self, 
+                 reliabty_mode:str,
+                 use_qwen: bool):
+
+        self.node = rclpy.create_node('g0_vlm_node')
+        self.ver = 2
+        self.head_camera_topic_n = "/hdas/camera_head/left_raw/image_raw_color/compressed"
+        self.server_name1 = 'hs/vlm_instruction_proc_service'
+        self.server_name2 = 'hs/vlm_instruction_cfm_service'
+
+        self.pub_for_ehi_topic_n = 'hs/vlm_out4ehi'
+        self.pub_to_vla_topic_n = 'hs/vlm_out2vla'
+
+        self.use_qwen = use_qwen
+
+        self.qos_profile_pub = self.create_qos_profile(reliabty_mode)
+
+        self.loop_num_for_ehi = 1
+        self.loop_num_to_vla = 1
+
+        self.pub_for_ehi = self.node.create_publisher(
+            VLAPromptEcho,
+            self.pub_for_ehi_topic_n,
+            self.qos_profile_pub
+        )
+
+        self.pub_to_vla = self.node.create_publisher(
+            String,
+            self.pub_to_vla_topic_n,
+            self.qos_profile_pub
+        )
+
+        self.vlm_proc_que_len = 5
+        self.vlm_proc_que = deque(
+            maxlen=self.vlm_proc_que_len
+            )
+        self.use_vlm_cache = False
+
+        self.br = CvBridge()
+        self.himg_que_len = 5
+        self.himg_que = deque(
+            maxlen=self.himg_que_len
+            )
+        self.himg_sub = self.node.create_subscription(
+                            CompressedImage, 
+                            self.head_camera_topic_n, 
+                            partial(
+                                self._vlm_camera_callback, 
+                                que=self.himg_que,
+                            ),
+                            self.create_qos_profile("reliable")
+                        )
+
+        self.hp_ = None
+        self.bbox_dict = {"bbox": [], "head_img_base64": ""}
+
+
+
+    def create_qos_profile(self, r_mode):
+        if r_mode in ["reliable", "r"]:
+            qos_profile_pub = rclpy.qos.QoSProfile(
+                reliability=rclpy.qos.ReliabilityPolicy.RELIABLE,  
+                history=rclpy.qos.HistoryPolicy.KEEP_LAST,
+                depth=1,
+                durability=rclpy.qos.DurabilityPolicy.VOLATILE
+            )
+        elif r_mode in ["best_effort", "be"]:
+            qos_profile_pub = rclpy.qos.QoSProfile(
+                reliability=rclpy.qos.ReliabilityPolicy.BEST_EFFORT,  
+                history=rclpy.qos.HistoryPolicy.KEEP_LAST,
+                depth=1,
+                durability=rclpy.qos.DurabilityPolicy.VOLATILE
+            )
+        else:
+            qos_profile_pub = None
+            logger.error("Invalid reliability mode specified. Use 'reliable' or 'best_effort'.")
+            raise ValueError("Invalid reliability mode specified.")
+        return qos_profile_pub
+
+
+    def _get_msg_time(self, msg):
+        return msg.header.stamp.sec + msg.header.stamp.nanosec * 1e-9
+
+    def _vlm_camera_callback(self, msg: CompressedImage, que: list):
+        img_cv_bgr = self.br.compressed_imgmsg_to_cv2(msg)
+        # logger.info(f"Here is camera callback, img_cv_bgr shape is {img_cv_bgr.shape}")
+        if len(img_cv_bgr.shape) == 3 and img_cv_bgr.shape[2] == 3:
+            img_cv = cv2.cvtColor(img_cv_bgr, cv2.COLOR_BGR2RGB)
+        elif len(img_cv_bgr.shape) == 3 and img_cv_bgr.shape[2] == 4:
+            img_cv = cv2.cvtColor(img_cv_bgr, cv2.COLOR_BGRA2RGBA)
+        else:
+            raise ValueError(f"Unexpected image format: {img_cv_bgr.shape}")
+        # if self.config.hardware == R1_LITE and "head" in topic:
+        #     img = img_cv[:, :img_cv.shape[1] // 2]
+        img = img_cv
+
+        # logger.info(f"Here is camera callback, img shape is {img.shape}")
+
+        que.append(
+            dict(
+                data=img,
+                message_time=self._get_msg_time(msg),
+            )
+        )
+
+
+    def vlm_srv(self):
+        logger.info("Starting VLM Service...")
+
+        self.srv1 = self.node.create_service(
+            VLMInstruction, 
+            self.server_name1, 
+            self.vlm_processor1
+            )
+        self.node.get_logger().info("VLM Server1 started!")
+
+        self.srv2 = self.node.create_service(
+            VLMInstruction,
+            self.server_name2,
+            self.vlm_processor2
+            )
+        self.node.get_logger().info("VLM Server2 started!")
+
+        rclpy.spin(self.node)
+
+    def decode_img_from_base64(self, img_base64: str, output_format="rgb") -> np.ndarray:
+        img_data = base64.b64decode(img_base64)
+        # å°†äºŒè¿›åˆ¶æ•°æ®è½¬æ¢ä¸º numpy æ•°ç»„
+        img_array = np.frombuffer(img_data, dtype=np.uint8)
+        # ä½¿ç”¨ cv2.imdecode å°†å…¶æ¢å¤ä¸ºå›¾åƒ
+        img_array = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
+        if output_format == "rgb":
+            return cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
+        else:
+            return img_array
+
+
+    def imencode_img_to_base64(self, img, input_format="rgb") -> str:
+        if input_format == "rgb":
+            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+        else:
+            pass
+        _, buffer = cv2.imencode('.jpg', img)
+        # å°†äºŒè¿›åˆ¶æ•°æ®è½¬æ¢ä¸º base64 å­—ç¬¦ä¸²
+        return base64.b64encode(buffer).decode('utf-8')
+
+    def publish_to_vla(self, hp_: str, bbox: list[int], head_img_base64: str):
+        ver = self.ver
+        msg = String()
+        json_to_vla = {}
+        
+        json_to_vla["lower_prompt_list"] = [hp_]
+        json_to_vla["bbox"] = []
+        json_to_vla["head_img_base64"] = ""
+        if ver == 1:
+            logger.warning("Version 1 you are using, which does not support bbox publishing.")
+        elif ver == 2:
+            if bbox != [] and head_img_base64 != "":
+                json_to_vla["bbox"] = bbox
+                json_to_vla["head_img_base64"] = head_img_base64
+        
+        msg.data = json.dumps(json_to_vla, ensure_ascii=False)
+        
+        loop_num = self.loop_num_to_vla
+        for _ in range(loop_num):
+            self.pub_to_vla.publish(msg)
+
+    def publish_for_ehi(self, text, img, output_format="bgr"):
+        ver = self.ver
+        msg = VLAPromptEcho()
+        msg.role = "vlm"
+        msg.content = text
+        if ver == 1:
+            pass
+        elif ver == 2:
+            if output_format == "rgb":
+                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+            else:
+                pass
+            img_pub = CompressedImage()
+            img_pub.data = cv2.imencode('.jpg', img)[1].tobytes()
+            msg.image_compressed = img_pub
+        
+        loop_num = self.loop_num_for_ehi
+        for _ in range(loop_num):
+            self.pub_for_ehi.publish(msg)
+
+
+    def hp_processor(self, higher_prompt: str) -> str:
+        hp_ = "NaN"
+        try:
+            if self.use_qwen:
+                hp_ = call_qwen_for_translation(higher_prompt)
+            else:
+                hp_ = call_gemini_for_translation(higher_prompt)
+        except Exception as e:
+            logger.info(f"[VLM Server1] Require Gemini for Translation fail! Detail:{str(e)}")
+            time.sleep(2)
+        return hp_
+
+    def bbox_processor(self, latest_head_rgb, hp_: str) -> list[int]:
+        bbox = []
+        try:
+            if self.use_qwen:
+                bbox = call_qwen_for_bbox(latest_head_rgb, hp_)
+            else:
+                bbox = call_gemini_for_bbox(latest_head_rgb, hp_)
+        except Exception as e:
+            model_n = "Qwen" if self.use_qwen else "Gemini"
+            logger.info(f"[VLM Server1] Require {model_n} for BBox fail! Detail:{str(e)}")
+            time.sleep(2)
+        if not isinstance(bbox, list) or len(bbox) != 4 or not all(isinstance(coord, int) for coord in bbox):
+            logger.warning(f"[VLM Server1] Invalid bbox format received: {bbox}. Expected a list of 4 integers.")
+        return bbox
+
+
+    def refine_hp(self, higher_prompt: str) -> str:
+        return higher_prompt.strip().lower()
+
+
+    def vlm_processor1(self, request, response):
+        ins_dict = json.loads(request.instruction)
+        higher_prompt = self.refine_hp(ins_dict["content"])
+        self.use_vlm_cache = True if ins_dict.get("use_vlm_cache", "false").lower() == "true" else False
+        hp_ = higher_prompt
+
+        response.success = False
+
+        if len(self.himg_que) == 0:
+            logger.info(f"[VLM Server1] No head image received!") 
+            return response
+        latest_head_rgb = self.himg_que[-1]["data"]
+
+
+        if higher_prompt != "":
+            if higher_prompt == "reset":
+                self.publish_to_vla("reset", [], "")
+                self.hp_ = hp_
+                
+                self.bbox_dict["bbox"] = []
+                self.bbox_dict["head_img_base64"] = ""
+                logger.info(f"[VLM Server1] Successfully processed instruction: {hp_}!") 
+                response.success = True
+            elif higher_prompt == "stop":
+                self.hp_ = hp_
+                
+                self.bbox_dict["bbox"] = []
+                self.bbox_dict["head_img_base64"] = ""
+                logger.info(f"[VLM Server1] Successfully processed instruction: {hp_}!") 
+                response.success = True
+            else:
+                self.publish_to_vla("reset", [], "")
+                if self.use_vlm_cache:
+                    for proc_dict in self.vlm_proc_que:
+                        if higher_prompt in proc_dict:
+                            hp_ = proc_dict[higher_prompt]["hp_"]
+                            bbox = proc_dict[higher_prompt]["bbox"]
+                            head_img_base64 = proc_dict[higher_prompt]["head_img_base64"]
+                            logger.info(f"[VLM Server1] Found cached hp for the instruction: {higher_prompt} -> {hp_}!") 
+                            bbox_in_img_bgr = get_simple_vb_imgcv(self.decode_img_from_base64(head_img_base64, output_format="rgb"), 
+                                                                bbox, input_format="rgb")
+                            self.publish_for_ehi(hp_, bbox_in_img_bgr)
+                            
+
+                            self.hp_ = hp_
+                            self.bbox_dict["bbox"] = bbox
+                            self.bbox_dict["head_img_base64"] = head_img_base64
+                            logger.info(f"[VLM Server1] Successfully processed instruction: {higher_prompt} -> {hp_} and bbox: {bbox}!") 
+                            response.success = True
+                            self.use_vlm_cache = False
+                            return response
+                            
+                
+                logger.info(f"[VLM Server1] Processing instruction: {higher_prompt}!")
+                hp_ = self.hp_processor(higher_prompt) 
+                
+                if hp_ != "NaN":
+                    bbox = self.bbox_processor(latest_head_rgb, hp_)
+                    if bbox != []:
+                        bbox_in_img_bgr = get_simple_vb_imgcv(latest_head_rgb, bbox, input_format="rgb")
+                        self.publish_for_ehi(hp_, bbox_in_img_bgr)
+                        
+                        self.hp_ = hp_
+                        self.bbox_dict["bbox"] = bbox
+                        head_img_base64 = self.imencode_img_to_base64(latest_head_rgb, input_format="rgb")
+                        self.bbox_dict["head_img_base64"] = head_img_base64
+
+                        if self.use_vlm_cache:
+                            self.vlm_proc_que.append({higher_prompt:{
+                                "hp_": hp_, 
+                                "bbox": bbox, 
+                                "head_img_base64": head_img_base64
+                                }
+                            })
+
+                        logger.info(f"[VLM Server1] Successfully processed instruction: {higher_prompt} -> {hp_} and bbox: {bbox}!") 
+                        response.success = True
+                    else:
+                        logger.info(f"[VLM Server1] BBox process fail! Try again!")
+                else:
+                    logger.info(f"[VLM Server1] Instruction process fail! Try again!")   
+        
+        self.use_vlm_cache = False
+        return response
+
+    def vlm_processor2(self, request, response):
+        hp_ = self.hp_
+        bbox_dict = self.bbox_dict
+        bbox = bbox_dict["bbox"]
+        head_img_base64 = bbox_dict["head_img_base64"]
+        
+        if hp_ is None or bbox == []:
+            response.success = False
+            response.reserved = "No prompts & bbox sent to VLA"
+        else:
+            self.publish_to_vla(hp_, bbox, head_img_base64)
+            response.success = True
+            response.reserved = f"Prompts sent to VLA is {hp_}, Bbox is {bbox}. \nPart of head image is {head_img_base64[:50]}..."
+            
+            logger.info(f"[VLM Server2] Successfully sent to VLA hp: {hp_}, bbox: {bbox} and head_img_base64!")
+        return response
+
+
+
+def main(argv=None):
+    rclpy.init(args=argv)
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--reliabty-mode", dest="reliabty_mode", type=str, default="reliable")
+    parser.add_argument('--use-qwen', dest='use_qwen', action='store_true',
+                        help='Enable Qwen usage')
+    parser.add_argument('--no-use-qwen', dest='use_qwen', action='store_false',
+                        help='Disable Qwen usage')
+    args, unknown = parser.parse_known_args(argv)
+
+    vlm_node = VLMNode(
+        reliabty_mode=args.reliabty_mode,
+        use_qwen=args.use_qwen
+        )
+
+    try:
+        logger.info('Beginning VLM Node, shut down with CTRL-C')
+        vlm_node.vlm_srv()
+    finally:
+        rclpy.shutdown()
+
diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/package.xml b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/package.xml
new file mode 100644
index 0000000000000000000000000000000000000000..e5e62b9da16fa6ca9670dcaf390e3968908a1376
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/package.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>g0_vlm_node</name>
+  <version>0.0.0</version>
+  <description>TODO: Package description</description>
+  <maintainer email="jingyang.mai@galaxea.ai">user</maintainer>
+  <license>TODO: License declaration</license>
+
+  <depend>rclpy</depend>
+  <depend>std_msgs</depend>
+
+  <depend>sensor_msgs</depend>
+  <depend>g0_vlm_interface</depend>
+  <buildtool_depend>rosidl_default_generators</buildtool_depend>
+  <exec_depend>rosidl_default_runtime</exec_depend>
+  <member_of_group>rosidl_interface_packages</member_of_group>
+
+
+  <test_depend>ament_copyright</test_depend>
+  <test_depend>ament_flake8</test_depend>
+  <test_depend>ament_pep257</test_depend>
+  <test_depend>python3-pytest</test_depend>
+
+  <export>
+    <build_type>ament_python</build_type>
+  </export>
+</package>
diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/resource/g0_vlm_node b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/resource/g0_vlm_node
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/setup.cfg b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/setup.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..11dcc0c349ce58ba3358855d9a7277b1c30fd1f2
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/setup.cfg
@@ -0,0 +1,4 @@
+[develop]
+script_dir=$base/lib/g0_vlm_node
+[install]
+install_scripts=$base/lib/g0_vlm_node
diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/setup.py b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca76d6a35c7fe662507e75dd5c2e24eafd2d92ad
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/setup.py
@@ -0,0 +1,30 @@
+from setuptools import find_packages, setup
+
+package_name = 'g0_vlm_node'
+
+setup(
+    name=package_name,
+    version='0.0.0',
+    packages=find_packages(exclude=['test']),
+    data_files=[
+        ('share/ament_index/resource_index/packages',
+            ['resource/' + package_name]),
+        ('share/' + package_name, ['package.xml']),
+    ],
+    install_requires=['setuptools'],
+    zip_safe=True,
+    maintainer='user',
+    maintainer_email='jingyang.mai@galaxea.ai',
+    description='TODO: Package description',
+    license='TODO: License declaration',
+    extras_require={
+        'test': [
+            'pytest',
+        ],
+    },
+    entry_points={
+        'console_scripts': [
+            'vlm_main = g0_vlm_node.vlm_main:main'
+        ],
+    },
+)
diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/test/test_copyright.py b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/test/test_copyright.py
new file mode 100644
index 0000000000000000000000000000000000000000..97a39196e84db97954341162a6d2e7f771d938c0
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/test/test_copyright.py
@@ -0,0 +1,25 @@
+# Copyright 2015 Open Source Robotics Foundation, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ament_copyright.main import main
+import pytest
+
+
+# Remove the `skip` decorator once the source file(s) have a copyright header
+@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.')
+@pytest.mark.copyright
+@pytest.mark.linter
+def test_copyright():
+    rc = main(argv=['.', 'test'])
+    assert rc == 0, 'Found errors'
diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/test/test_flake8.py b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/test/test_flake8.py
new file mode 100644
index 0000000000000000000000000000000000000000..27ee1078ff077cc3a0fec75b7d023101a68164d1
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/test/test_flake8.py
@@ -0,0 +1,25 @@
+# Copyright 2017 Open Source Robotics Foundation, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ament_flake8.main import main_with_errors
+import pytest
+
+
+@pytest.mark.flake8
+@pytest.mark.linter
+def test_flake8():
+    rc, errors = main_with_errors(argv=[])
+    assert rc == 0, \
+        'Found %d code style errors / warnings:\n' % len(errors) + \
+        '\n'.join(errors)
diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/test/test_pep257.py b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/test/test_pep257.py
new file mode 100644
index 0000000000000000000000000000000000000000..b234a3840f4c5bd38f043638c8622b8f240e1185
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/g0_vlm_node/test/test_pep257.py
@@ -0,0 +1,23 @@
+# Copyright 2015 Open Source Robotics Foundation, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ament_pep257.main import main
+import pytest
+
+
+@pytest.mark.linter
+@pytest.mark.pep257
+def test_pep257():
+    rc = main(argv=['.', 'test'])
+    assert rc == 0, 'Found code style errors / warnings'
diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/vla/CMakeLists.txt b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/vla/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fcff1cbd9159f558692dba9cfbf05bea288e248f
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/vla/CMakeLists.txt
@@ -0,0 +1,21 @@
+cmake_minimum_required(VERSION 3.16)
+project(vla_msg)
+
+find_package(ament_cmake REQUIRED)
+find_package(std_msgs REQUIRED)
+
+find_package(sensor_msgs REQUIRED)
+find_package(rosidl_default_generators REQUIRED)
+
+file(GLOB msg_files LIST_DIRECTORIES false RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "msg/*.msg")
+file(GLOB srv_files LIST_DIRECTORIES false RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "srv/*.srv")
+
+rosidl_generate_interfaces(${PROJECT_NAME}
+        ${msg_files}
+        ${srv_files}
+        DEPENDENCIES std_msgs sensor_msgs
+)
+
+ament_export_dependencies(rosidl_default_runtime std_msgs sensor_msgs)
+
+ament_package()
diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/vla/msg/VLAPromptEcho.msg b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/vla/msg/VLAPromptEcho.msg
new file mode 100644
index 0000000000000000000000000000000000000000..bdd4034994ea211f7c5ec9eac3fcf262d04b1c28
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/vla/msg/VLAPromptEcho.msg
@@ -0,0 +1,5 @@
+std_msgs/Header header
+string role
+string content
+sensor_msgs/CompressedImage image_compressed
+string reserved
\ No newline at end of file
diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/vla/package.xml b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/vla/package.xml
new file mode 100644
index 0000000000000000000000000000000000000000..37f21bfbefd51b9a4fc1fd23ccee9e17afba67ad
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/vla/package.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<package format="3">
+    <name>vla_msg</name>
+    <version>0.1.0</version>
+    <description>The vla_msg package</description>
+    <member_of_group>rosidl_interface_packages</member_of_group>
+    <maintainer email="support@galaxea.ai">Galaxea AI</maintainer>  
+    <license>TODO</license>
+    <buildtool_depend>ament_cmake</buildtool_depend>
+    <build_depend>rosidl_default_generators</build_depend>
+    <exec_depend>rosidl_default_runtime</exec_depend>
+    <depend>std_msgs</depend>
+    <depend>sensor_msgs</depend>
+    <depend>vision_msgs</depend>
+    <export>
+        <build_type>ament_cmake</build_type>
+    </export>
+</package>
diff --git a/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/vla/srv/VLMInstruction.srv b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/vla/srv/VLMInstruction.srv
new file mode 100644
index 0000000000000000000000000000000000000000..ae424de566302ed11135e80727d7f0812f3578de
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/code/Hierarchical_System/src/vla/srv/VLMInstruction.srv
@@ -0,0 +1,4 @@
+string instruction
+---
+bool success
+string reserved
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/char-rnn.wts b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/char-rnn.wts
new file mode 100644
index 0000000000000000000000000000000000000000..e8fd145f7430090e2e9cbd75831a4f9c9ff6020e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/char-rnn.wts
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e3eea72440ca567a606df4a6023296b741c060092b66e0438bf65280ad0e97b
+size 51181712
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/model/checkpoint b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/model/checkpoint
new file mode 100644
index 0000000000000000000000000000000000000000..12d169b993299005d4e2fd550624f1e44453e46e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/model/checkpoint
@@ -0,0 +1,6 @@
+model_checkpoint_path: "model-20080"
+all_model_checkpoint_paths: "model-10040"
+all_model_checkpoint_paths: "model-12048"
+all_model_checkpoint_paths: "model-16064"
+all_model_checkpoint_paths: "model-18072"
+all_model_checkpoint_paths: "model-20080"
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/model/model-20080.data-00000-of-00001 b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/model/model-20080.data-00000-of-00001
new file mode 100644
index 0000000000000000000000000000000000000000..5491f03dc6adf2027d400e9b4ab6b7ffcb06a0be
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/model/model-20080.data-00000-of-00001
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d237c223cb17cb956459de977e353714d49b585ef79cf1548d4c78e129062c03
+size 51180336
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/model/model-20080.index b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/model/model-20080.index
new file mode 100644
index 0000000000000000000000000000000000000000..dc0e09b4cfdae54740b0671b015beb5d6a452bad
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/model/model-20080.index differ
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/model/model-20080.meta b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/model/model-20080.meta
new file mode 100644
index 0000000000000000000000000000000000000000..c87cd7a1a8fce6834f79a321f4ee6a9754c82c29
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/char-rnn/model/model-20080.meta
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93e392adc47837ba41946df3d848dc4ba87b378620a722674821077c4f724bba
+size 684877
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/int8_api/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/int8_api/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8e7a7abb32610056c9e677c8f70cc2eb35e7ce71
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/int8_api/README.md
@@ -0,0 +1,11 @@
+# Sample Int8 API
+
+## resnet50
+File: [airliner.ppm]
+The input sample images used to do int8 calibration.
+
+File: [reference_labels.txt]
+The input reference labels used to do int8 calibration.
+
+File: [resnet50_per_tensor_dynamic_range.txt]
+The absolute max value for each tensor.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/int8_api/airliner.ppm b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/int8_api/airliner.ppm
new file mode 100644
index 0000000000000000000000000000000000000000..ea23812f43bf14b6e20ca945d2c3127760b1d421
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/int8_api/airliner.ppm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eac2811e99893115847d2e6ab24bae5a1e4ff64820a000a672333c07dc29e083
+size 150543
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/int8_api/reference_labels.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/int8_api/reference_labels.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f40829ed0fc318c673860fae4be6c48529da116e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/int8_api/reference_labels.txt
@@ -0,0 +1,1000 @@
+tench
+goldfish
+great white shark
+tiger shark
+hammerhead
+electric ray
+stingray
+cock
+hen
+ostrich
+brambling
+goldfinch
+house finch
+junco
+indigo bunting
+robin
+bulbul
+jay
+magpie
+chickadee
+water ouzel
+kite
+bald eagle
+vulture
+great grey owl
+European fire salamander
+common newt
+eft
+spotted salamander
+axolotl
+bullfrog
+tree frog
+tailed frog
+loggerhead
+leatherback turtle
+mud turtle
+terrapin
+box turtle
+banded gecko
+common iguana
+American chameleon
+whiptail
+agama
+frilled lizard
+alligator lizard
+Gila monster
+green lizard
+African chameleon
+Komodo dragon
+African crocodile
+American alligator
+triceratops
+thunder snake
+ringneck snake
+hognose snake
+green snake
+king snake
+garter snake
+water snake
+vine snake
+night snake
+boa constrictor
+rock python
+Indian cobra
+green mamba
+sea snake
+horned viper
+diamondback
+sidewinder
+trilobite
+harvestman
+scorpion
+black and gold garden spider
+barn spider
+garden spider
+black widow
+tarantula
+wolf spider
+tick
+centipede
+black grouse
+ptarmigan
+ruffed grouse
+prairie chicken
+peacock
+quail
+partridge
+African grey
+macaw
+sulphur-crested cockatoo
+lorikeet
+coucal
+bee eater
+hornbill
+hummingbird
+jacamar
+toucan
+drake
+red-breasted merganser
+goose
+black swan
+tusker
+echidna
+platypus
+wallaby
+koala
+wombat
+jellyfish
+sea anemone
+brain coral
+flatworm
+nematode
+conch
+snail
+slug
+sea slug
+chiton
+chambered nautilus
+Dungeness crab
+rock crab
+fiddler crab
+king crab
+American lobster
+spiny lobster
+crayfish
+hermit crab
+isopod
+white stork
+black stork
+spoonbill
+flamingo
+little blue heron
+American egret
+bittern
+crane
+limpkin
+European gallinule
+American coot
+bustard
+ruddy turnstone
+red-backed sandpiper
+redshank
+dowitcher
+oystercatcher
+pelican
+king penguin
+albatross
+grey whale
+killer whale
+dugong
+sea lion
+Chihuahua
+Japanese spaniel
+Maltese dog
+Pekinese
+Shih-Tzu
+Blenheim spaniel
+papillon
+toy terrier
+Rhodesian ridgeback
+Afghan hound
+basset
+beagle
+bloodhound
+bluetick
+black-and-tan coonhound
+Walker hound
+English foxhound
+redbone
+borzoi
+Irish wolfhound
+Italian greyhound
+whippet
+Ibizan hound
+Norwegian elkhound
+otterhound
+Saluki
+Scottish deerhound
+Weimaraner
+Staffordshire bullterrier
+American Staffordshire terrier
+Bedlington terrier
+Border terrier
+Kerry blue terrier
+Irish terrier
+Norfolk terrier
+Norwich terrier
+Yorkshire terrier
+wire-haired fox terrier
+Lakeland terrier
+Sealyham terrier
+Airedale
+cairn
+Australian terrier
+Dandie Dinmont
+Boston bull
+miniature schnauzer
+giant schnauzer
+standard schnauzer
+Scotch terrier
+Tibetan terrier
+silky terrier
+soft-coated wheaten terrier
+West Highland white terrier
+Lhasa
+flat-coated retriever
+curly-coated retriever
+golden retriever
+Labrador retriever
+Chesapeake Bay retriever
+German short-haired pointer
+vizsla
+English setter
+Irish setter
+Gordon setter
+Brittany spaniel
+clumber
+English springer
+Welsh springer spaniel
+cocker spaniel
+Sussex spaniel
+Irish water spaniel
+kuvasz
+schipperke
+groenendael
+malinois
+briard
+kelpie
+komondor
+Old English sheepdog
+Shetland sheepdog
+collie
+Border collie
+Bouvier des Flandres
+Rottweiler
+German shepherd
+Doberman
+miniature pinscher
+Greater Swiss Mountain dog
+Bernese mountain dog
+Appenzeller
+EntleBucher
+boxer
+bull mastiff
+Tibetan mastiff
+French bulldog
+Great Dane
+Saint Bernard
+Eskimo dog
+malamute
+Siberian husky
+dalmatian
+affenpinscher
+basenji
+pug
+Leonberg
+Newfoundland
+Great Pyrenees
+Samoyed
+Pomeranian
+chow
+keeshond
+Brabancon griffon
+Pembroke
+Cardigan
+toy poodle
+miniature poodle
+standard poodle
+Mexican hairless
+timber wolf
+white wolf
+red wolf
+coyote
+dingo
+dhole
+African hunting dog
+hyena
+red fox
+kit fox
+Arctic fox
+grey fox
+tabby
+tiger cat
+Persian cat
+Siamese cat
+Egyptian cat
+cougar
+lynx
+leopard
+snow leopard
+jaguar
+lion
+tiger
+cheetah
+brown bear
+American black bear
+ice bear
+sloth bear
+mongoose
+meerkat
+tiger beetle
+ladybug
+ground beetle
+long-horned beetle
+leaf beetle
+dung beetle
+rhinoceros beetle
+weevil
+fly
+bee
+ant
+grasshopper
+cricket
+walking stick
+cockroach
+mantis
+cicada
+leafhopper
+lacewing
+dragonfly
+damselfly
+admiral
+ringlet
+monarch
+cabbage butterfly
+sulphur butterfly
+lycaenid
+starfish
+sea urchin
+sea cucumber
+wood rabbit
+hare
+Angora
+hamster
+porcupine
+fox squirrel
+marmot
+beaver
+guinea pig
+sorrel
+zebra
+hog
+wild boar
+warthog
+hippopotamus
+ox
+water buffalo
+bison
+ram
+bighorn
+ibex
+hartebeest
+impala
+gazelle
+Arabian camel
+llama
+weasel
+mink
+polecat
+black-footed ferret
+otter
+skunk
+badger
+armadillo
+three-toed sloth
+orangutan
+gorilla
+chimpanzee
+gibbon
+siamang
+guenon
+patas
+baboon
+macaque
+langur
+colobus
+proboscis monkey
+marmoset
+capuchin
+howler monkey
+titi
+spider monkey
+squirrel monkey
+Madagascar cat
+indri
+Indian elephant
+African elephant
+lesser panda
+giant panda
+barracouta
+eel
+coho
+rock beauty
+anemone fish
+sturgeon
+gar
+lionfish
+puffer
+abacus
+abaya
+academic gown
+accordion
+acoustic guitar
+aircraft carrier
+airliner
+airship
+altar
+ambulance
+amphibian
+analog clock
+apiary
+apron
+ashcan
+assault rifle
+backpack
+bakery
+balance beam
+balloon
+ballpoint
+Band Aid
+banjo
+bannister
+barbell
+barber chair
+barbershop
+barn
+barometer
+barrel
+barrow
+baseball
+basketball
+bassinet
+bassoon
+bathing cap
+bath towel
+bathtub
+beach wagon
+beacon
+beaker
+bearskin
+beer bottle
+beer glass
+bell cote
+bib
+bicycle-built-for-two
+bikini
+binder
+binoculars
+birdhouse
+boathouse
+bobsled
+bolo tie
+bonnet
+bookcase
+bookshop
+bottlecap
+bow
+bow tie
+brass
+brassiere
+breakwater
+breastplate
+broom
+bucket
+buckle
+bulletproof vest
+bullet train
+butcher shop
+cab
+caldron
+candle
+cannon
+canoe
+can opener
+cardigan
+car mirror
+carousel
+carpenter's kit
+carton
+car wheel
+cash machine
+cassette
+cassette player
+castle
+catamaran
+CD player
+cello
+cellular telephone
+chain
+chainlink fence
+chain mail
+chain saw
+chest
+chiffonier
+chime
+china cabinet
+Christmas stocking
+church
+cinema
+cleaver
+cliff dwelling
+cloak
+clog
+cocktail shaker
+coffee mug
+coffeepot
+coil
+combination lock
+computer keyboard
+confectionery
+container ship
+convertible
+corkscrew
+cornet
+cowboy boot
+cowboy hat
+cradle
+crane
+crash helmet
+crate
+crib
+Crock Pot
+croquet ball
+crutch
+cuirass
+dam
+desk
+desktop computer
+dial telephone
+diaper
+digital clock
+digital watch
+dining table
+dishrag
+dishwasher
+disk brake
+dock
+dogsled
+dome
+doormat
+drilling platform
+drum
+drumstick
+dumbbell
+Dutch oven
+electric fan
+electric guitar
+electric locomotive
+entertainment center
+envelope
+espresso maker
+face powder
+feather boa
+file
+fireboat
+fire engine
+fire screen
+flagpole
+flute
+folding chair
+football helmet
+forklift
+fountain
+fountain pen
+four-poster
+freight car
+French horn
+frying pan
+fur coat
+garbage truck
+gasmask
+gas pump
+goblet
+go-kart
+golf ball
+golfcart
+gondola
+gong
+gown
+grand piano
+greenhouse
+grille
+grocery store
+guillotine
+hair slide
+hair spray
+half track
+hammer
+hamper
+hand blower
+hand-held computer
+handkerchief
+hard disc
+harmonica
+harp
+harvester
+hatchet
+holster
+home theater
+honeycomb
+hook
+hoopskirt
+horizontal bar
+horse cart
+hourglass
+iPod
+iron
+jack-o'-lantern
+jean
+jeep
+jersey
+jigsaw puzzle
+jinrikisha
+joystick
+kimono
+knee pad
+knot
+lab coat
+ladle
+lampshade
+laptop
+lawn mower
+lens cap
+letter opener
+library
+lifeboat
+lighter
+limousine
+liner
+lipstick
+Loafer
+lotion
+loudspeaker
+loupe
+lumbermill
+magnetic compass
+mailbag
+mailbox
+maillot
+maillot
+manhole cover
+maraca
+marimba
+mask
+matchstick
+maypole
+maze
+measuring cup
+medicine chest
+megalith
+microphone
+microwave
+military uniform
+milk can
+minibus
+miniskirt
+minivan
+missile
+mitten
+mixing bowl
+mobile home
+Model T
+modem
+monastery
+monitor
+moped
+mortar
+mortarboard
+mosque
+mosquito net
+motor scooter
+mountain bike
+mountain tent
+mouse
+mousetrap
+moving van
+muzzle
+nail
+neck brace
+necklace
+nipple
+notebook
+obelisk
+oboe
+ocarina
+odometer
+oil filter
+organ
+oscilloscope
+overskirt
+oxcart
+oxygen mask
+packet
+paddle
+paddlewheel
+padlock
+paintbrush
+pajama
+palace
+panpipe
+paper towel
+parachute
+parallel bars
+park bench
+parking meter
+passenger car
+patio
+pay-phone
+pedestal
+pencil box
+pencil sharpener
+perfume
+Petri dish
+photocopier
+pick
+pickelhaube
+picket fence
+pickup
+pier
+piggy bank
+pill bottle
+pillow
+ping-pong ball
+pinwheel
+pirate
+pitcher
+plane
+planetarium
+plastic bag
+plate rack
+plow
+plunger
+Polaroid camera
+pole
+police van
+poncho
+pool table
+pop bottle
+pot
+potter's wheel
+power drill
+prayer rug
+printer
+prison
+projectile
+projector
+puck
+punching bag
+purse
+quill
+quilt
+racer
+racket
+radiator
+radio
+radio telescope
+rain barrel
+recreational vehicle
+reel
+reflex camera
+refrigerator
+remote control
+restaurant
+revolver
+rifle
+rocking chair
+rotisserie
+rubber eraser
+rugby ball
+rule
+running shoe
+safe
+safety pin
+saltshaker
+sandal
+sarong
+sax
+scabbard
+scale
+school bus
+schooner
+scoreboard
+screen
+screw
+screwdriver
+seat belt
+sewing machine
+shield
+shoe shop
+shoji
+shopping basket
+shopping cart
+shovel
+shower cap
+shower curtain
+ski
+ski mask
+sleeping bag
+slide rule
+sliding door
+slot
+snorkel
+snowmobile
+snowplow
+soap dispenser
+soccer ball
+sock
+solar dish
+sombrero
+soup bowl
+space bar
+space heater
+space shuttle
+spatula
+speedboat
+spider web
+spindle
+sports car
+spotlight
+stage
+steam locomotive
+steel arch bridge
+steel drum
+stethoscope
+stole
+stone wall
+stopwatch
+stove
+strainer
+streetcar
+stretcher
+studio couch
+stupa
+submarine
+suit
+sundial
+sunglass
+sunglasses
+sunscreen
+suspension bridge
+swab
+sweatshirt
+swimming trunks
+swing
+switch
+syringe
+table lamp
+tank
+tape player
+teapot
+teddy
+television
+tennis ball
+thatch
+theater curtain
+thimble
+thresher
+throne
+tile roof
+toaster
+tobacco shop
+toilet seat
+torch
+totem pole
+tow truck
+toyshop
+tractor
+trailer truck
+tray
+trench coat
+tricycle
+trimaran
+tripod
+triumphal arch
+trolleybus
+trombone
+tub
+turnstile
+typewriter keyboard
+umbrella
+unicycle
+upright
+vacuum
+vase
+vault
+velvet
+vending machine
+vestment
+viaduct
+violin
+volleyball
+waffle iron
+wall clock
+wallet
+wardrobe
+warplane
+washbasin
+washer
+water bottle
+water jug
+water tower
+whiskey jug
+whistle
+wig
+window screen
+window shade
+Windsor tie
+wine bottle
+wing
+wok
+wooden spoon
+wool
+worm fence
+wreck
+yawl
+yurt
+web site
+comic book
+crossword puzzle
+street sign
+traffic light
+book jacket
+menu
+plate
+guacamole
+consomme
+hot pot
+trifle
+ice cream
+ice lolly
+French loaf
+bagel
+pretzel
+cheeseburger
+hotdog
+mashed potato
+head cabbage
+broccoli
+cauliflower
+zucchini
+spaghetti squash
+acorn squash
+butternut squash
+cucumber
+artichoke
+bell pepper
+cardoon
+mushroom
+Granny Smith
+strawberry
+orange
+lemon
+fig
+pineapple
+banana
+jackfruit
+custard apple
+pomegranate
+hay
+carbonara
+chocolate sauce
+dough
+meat loaf
+pizza
+potpie
+burrito
+red wine
+espresso
+cup
+eggnog
+alp
+bubble
+cliff
+coral reef
+geyser
+lakeside
+promontory
+sandbar
+seashore
+valley
+volcano
+ballplayer
+groom
+scuba diver
+rapeseed
+daisy
+yellow lady's slipper
+corn
+acorn
+hip
+buckeye
+coral fungus
+agaric
+gyromitra
+stinkhorn
+earthstar
+hen-of-the-woods
+bolete
+ear
+toilet tissue
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/int8_api/resnet50_per_tensor_dynamic_range.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/int8_api/resnet50_per_tensor_dynamic_range.txt
new file mode 100644
index 0000000000000000000000000000000000000000..225d3021dd294b14ab8b4a27b600114893151acb
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/int8_api/resnet50_per_tensor_dynamic_range.txt
@@ -0,0 +1,183 @@
+gpu_0/data_0: 1.00024
+gpu_0/conv1_1: 5.43116
+gpu_0/res_conv1_bn_1: 8.69736
+gpu_0/res_conv1_bn_2: 8.69736
+gpu_0/pool1_1: 8.69736
+gpu_0/res2_0_branch2a_1: 12.819
+gpu_0/res2_0_branch2a_bn_1: 5.47741
+gpu_0/res2_0_branch2a_bn_2: 5.58704
+gpu_0/res2_0_branch2b_1: 5.27718
+gpu_0/res2_0_branch2b_bn_1: 5.08003
+gpu_0/res2_0_branch2b_bn_2: 5.08003
+gpu_0/res2_0_branch2c_1: 2.33625
+gpu_0/res2_0_branch2c_bn_1: 3.17859
+gpu_0/res2_0_branch1_1: 6.10492
+gpu_0/res2_0_branch1_bn_1: 5.63119
+gpu_0/res2_0_branch2c_bn_2: 6.64099
+gpu_0/res2_0_branch2c_bn_3: 4.85535
+gpu_0/res2_1_branch2a_1: 3.55208
+gpu_0/res2_1_branch2a_bn_1: 5.12617
+gpu_0/res2_1_branch2a_bn_2: 3.54669
+gpu_0/res2_1_branch2b_1: 5.56289
+gpu_0/res2_1_branch2b_bn_1: 7.11808
+gpu_0/res2_1_branch2b_bn_2: 6.92282
+gpu_0/res2_1_branch2c_1: 2.19201
+gpu_0/res2_1_branch2c_bn_1: 3.78733
+gpu_0/res2_1_branch2c_bn_2: 4.60415
+gpu_0/res2_1_branch2c_bn_3: 4.60415
+gpu_0/res2_2_branch2a_1: 3.96808
+gpu_0/res2_2_branch2a_bn_1: 4.94773
+gpu_0/res2_2_branch2a_bn_2: 5.50565
+gpu_0/res2_2_branch2b_1: 4.26613
+gpu_0/res2_2_branch2b_bn_1: 6.0784
+gpu_0/res2_2_branch2b_bn_2: 4.92818
+gpu_0/res2_2_branch2c_1: 1.76282
+gpu_0/res2_2_branch2c_bn_1: 3.52767
+gpu_0/res2_2_branch2c_bn_2: 7.08883
+gpu_0/res2_2_branch2c_bn_3: 6.83196
+gpu_0/res3_0_branch2a_1: 6.04728
+gpu_0/res3_0_branch2a_bn_1: 6.35389
+gpu_0/res3_0_branch2a_bn_2: 5.32155
+gpu_0/res3_0_branch2b_1: 4.82218
+gpu_0/res3_0_branch2b_bn_1: 4.97589
+gpu_0/res3_0_branch2b_bn_2: 5.15205
+gpu_0/res3_0_branch2c_1: 2.51726
+gpu_0/res3_0_branch2c_bn_1: 5.92965
+gpu_0/res3_0_branch1_1: 5.16373
+gpu_0/res3_0_branch1_bn_1: 8.38447
+gpu_0/res3_0_branch2c_bn_2: 9.55529
+gpu_0/res3_0_branch2c_bn_3: 9.55529
+gpu_0/res3_1_branch2a_1: 8.36638
+gpu_0/res3_1_branch2a_bn_1: 5.10129
+gpu_0/res3_1_branch2a_bn_2: 6.53472
+gpu_0/res3_1_branch2b_1: 8.96734
+gpu_0/res3_1_branch2b_bn_1: 10.0194
+gpu_0/res3_1_branch2b_bn_2: 7.34823
+gpu_0/res3_1_branch2c_1: 3.2582
+gpu_0/res3_1_branch2c_bn_1: 6.99684
+gpu_0/res3_1_branch2c_bn_2: 10.1138
+gpu_0/res3_1_branch2c_bn_3: 6.95004
+gpu_0/res3_2_branch2a_1: 5.10651
+gpu_0/res3_2_branch2a_bn_1: 6.64402
+gpu_0/res3_2_branch2a_bn_2: 5.18487
+gpu_0/res3_2_branch2b_1: 5.96782
+gpu_0/res3_2_branch2b_bn_1: 7.1799
+gpu_0/res3_2_branch2b_bn_2: 5.37818
+gpu_0/res3_2_branch2c_1: 1.32356
+gpu_0/res3_2_branch2c_bn_1: 3.33188
+gpu_0/res3_2_branch2c_bn_2: 5.36147
+gpu_0/res3_2_branch2c_bn_3: 5.36147
+gpu_0/res3_3_branch2a_1: 4.85147
+gpu_0/res3_3_branch2a_bn_1: 5.59218
+gpu_0/res3_3_branch2a_bn_2: 4.86311
+gpu_0/res3_3_branch2b_1: 3.96831
+gpu_0/res3_3_branch2b_bn_1: 6.06881
+gpu_0/res3_3_branch2b_bn_2: 4.00068
+gpu_0/res3_3_branch2c_1: 0.921573
+gpu_0/res3_3_branch2c_bn_1: 2.8969
+gpu_0/res3_3_branch2c_bn_2: 5.85236
+gpu_0/res3_3_branch2c_bn_3: 5.59852
+gpu_0/res4_0_branch2a_1: 5.03899
+gpu_0/res4_0_branch2a_bn_1: 7.45267
+gpu_0/res4_0_branch2a_bn_2: 6.18469
+gpu_0/res4_0_branch2b_1: 4.83455
+gpu_0/res4_0_branch2b_bn_1: 6.04993
+gpu_0/res4_0_branch2b_bn_2: 6.04993
+gpu_0/res4_0_branch2c_1: 2.82144
+gpu_0/res4_0_branch2c_bn_1: 5.38268
+gpu_0/res4_0_branch1_1: 4.31843
+gpu_0/res4_0_branch1_bn_1: 5.26767
+gpu_0/res4_0_branch2c_bn_2: 7.62006
+gpu_0/res4_0_branch2c_bn_3: 7.49355
+gpu_0/res4_1_branch2a_1: 7.48333
+gpu_0/res4_1_branch2a_bn_1: 5.23361
+gpu_0/res4_1_branch2a_bn_2: 6.22436
+gpu_0/res4_1_branch2b_1: 7.80429
+gpu_0/res4_1_branch2b_bn_1: 5.02395
+gpu_0/res4_1_branch2b_bn_2: 4.22194
+gpu_0/res4_1_branch2c_1: 1.61523
+gpu_0/res4_1_branch2c_bn_1: 5.06857
+gpu_0/res4_1_branch2c_bn_2: 6.47686
+gpu_0/res4_1_branch2c_bn_3: 6.47686
+gpu_0/res4_2_branch2a_1: 3.87822
+gpu_0/res4_2_branch2a_bn_1: 6.10799
+gpu_0/res4_2_branch2a_bn_2: 4.31025
+gpu_0/res4_2_branch2b_1: 4.03413
+gpu_0/res4_2_branch2b_bn_1: 6.68894
+gpu_0/res4_2_branch2b_bn_2: 5.0679
+gpu_0/res4_2_branch2c_1: 1.26098
+gpu_0/res4_2_branch2c_bn_1: 5.29023
+gpu_0/res4_2_branch2c_bn_2: 6.20245
+gpu_0/res4_2_branch2c_bn_3: 6.10486
+gpu_0/res4_3_branch2a_1: 3.20987
+gpu_0/res4_3_branch2a_bn_1: 4.39172
+gpu_0/res4_3_branch2a_bn_2: 4.14733
+gpu_0/res4_3_branch2b_1: 3.92574
+gpu_0/res4_3_branch2b_bn_1: 4.55813
+gpu_0/res4_3_branch2b_bn_2: 3.8462
+gpu_0/res4_3_branch2c_1: 1.00342
+gpu_0/res4_3_branch2c_bn_1: 4.34035
+gpu_0/res4_3_branch2c_bn_2: 5.30305
+gpu_0/res4_3_branch2c_bn_3: 5.30305
+gpu_0/res4_4_branch2a_1: 3.05409
+gpu_0/res4_4_branch2a_bn_1: 4.87153
+gpu_0/res4_4_branch2a_bn_2: 3.2817
+gpu_0/res4_4_branch2b_1: 2.60867
+gpu_0/res4_4_branch2b_bn_1: 4.43434
+gpu_0/res4_4_branch2b_bn_2: 3.89483
+gpu_0/res4_4_branch2c_1: 1.83117
+gpu_0/res4_4_branch2c_bn_1: 3.99871
+gpu_0/res4_4_branch2c_bn_2: 5.77232
+gpu_0/res4_4_branch2c_bn_3: 5.39331
+gpu_0/res4_5_branch2a_1: 4.68277
+gpu_0/res4_5_branch2a_bn_1: 6.16417
+gpu_0/res4_5_branch2a_bn_2: 6.16333
+gpu_0/res4_5_branch2b_1: 3.1276
+gpu_0/res4_5_branch2b_bn_1: 7.00038
+gpu_0/res4_5_branch2b_bn_2: 6.9702
+gpu_0/res4_5_branch2c_1: 1.37766
+gpu_0/res4_5_branch2c_bn_1: 3.93406
+gpu_0/res4_5_branch2c_bn_2: 5.4295
+gpu_0/res4_5_branch2c_bn_3: 5.4295
+gpu_0/res5_0_branch2a_1: 2.65465
+gpu_0/res5_0_branch2a_bn_1: 6.09584
+gpu_0/res5_0_branch2a_bn_2: 3.38788
+gpu_0/res5_0_branch2b_1: 2.74351
+gpu_0/res5_0_branch2b_bn_1: 5.598
+gpu_0/res5_0_branch2b_bn_2: 3.47276
+gpu_0/res5_0_branch2c_1: 2.64331
+gpu_0/res5_0_branch2c_bn_1: 12.3477
+gpu_0/res5_0_branch1_1: 1.78121
+gpu_0/res5_0_branch1_bn_1: 13.8335
+gpu_0/res5_0_branch2c_bn_2: 18.1711
+gpu_0/res5_0_branch2c_bn_3: 21.687
+gpu_0/res5_1_branch2a_1: 8.10959
+gpu_0/res5_1_branch2a_bn_1: 4.35337
+gpu_0/res5_1_branch2a_bn_2: 2.78138
+gpu_0/res5_1_branch2b_1: 3.10084
+gpu_0/res5_1_branch2b_bn_1: 5.05929
+gpu_0/res5_1_branch2b_bn_2: 2.5665
+gpu_0/res5_1_branch2c_1: 0.996128
+gpu_0/res5_1_branch2c_bn_1: 8.6475
+gpu_0/res5_1_branch2c_bn_2: 16.7257
+gpu_0/res5_1_branch2c_bn_3: 18.942
+gpu_0/res5_2_branch2a_1: 10.8203
+gpu_0/res5_2_branch2a_bn_1: 3.37798
+gpu_0/res5_2_branch2a_bn_2: 2.80768
+gpu_0/res5_2_branch2b_1: 2.15978
+gpu_0/res5_2_branch2b_bn_1: 4.58982
+gpu_0/res5_2_branch2b_bn_2: 3.21134
+gpu_0/res5_2_branch2c_1: 0.586011
+gpu_0/res5_2_branch2c_bn_1: 10.6795
+gpu_0/res5_2_branch2c_bn_2: 20.6414
+gpu_0/res5_2_branch2c_bn_3: 22.2285
+gpu_0/pool5_1: 22.2285
+OC2_DUMMY_0: 6.08994
+(Unnamed Layer* 174) [Constant]_output: 0.443716
+(Unnamed Layer* 175) [Fully Connected]_output: 6.40009
+(Unnamed Layer* 176) [Constant]_output: 0.0365279
+(Unnamed Layer* 177) [Shuffle]_output: 0.0365279
+gpu_0/pred_1: 6.46343
+(Unnamed Layer* 179) [Shuffle]_output: 6.46343
+(Unnamed Layer* 180) [Softmax]_output: 0.0303731
+gpu_0/softmax_1: 0.0303731
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/0.pgm b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/0.pgm
new file mode 100644
index 0000000000000000000000000000000000000000..87cc261e4d8ab93af3634645aa86a17dc658899f
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/0.pgm differ
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/1.pgm b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/1.pgm
new file mode 100644
index 0000000000000000000000000000000000000000..aa20b45bda3da9e8766b7652a772fe0373117988
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/1.pgm differ
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/2.pgm b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/2.pgm
new file mode 100644
index 0000000000000000000000000000000000000000..c88a685f84737ed5eaba699d5a5a8261279507bb
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/2.pgm differ
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/3.pgm b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/3.pgm
new file mode 100644
index 0000000000000000000000000000000000000000..e78a083492f4b4b50525d0273b003f79f1224784
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/3.pgm differ
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/4.pgm b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/4.pgm
new file mode 100644
index 0000000000000000000000000000000000000000..43a8053c93b1fc6ae7f8f60855d0b25486c4abc5
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/4.pgm differ
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/5.pgm b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/5.pgm
new file mode 100644
index 0000000000000000000000000000000000000000..71f1f8b694b9054aeac604b73f8a5bcc61988e1a
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/5.pgm differ
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/6.pgm b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/6.pgm
new file mode 100644
index 0000000000000000000000000000000000000000..3a24fd3212697dd886709c3f10ef40bfa71406ae
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/6.pgm differ
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/7.pgm b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/7.pgm
new file mode 100644
index 0000000000000000000000000000000000000000..7fa203232239579e58bb53bd8861aea5c423ef89
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/7.pgm differ
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/8.pgm b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/8.pgm
new file mode 100644
index 0000000000000000000000000000000000000000..ef4c89f339bc7b2ff5b0ec910dfffa5a1ba422f5
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/8.pgm differ
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/9.pgm b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/9.pgm
new file mode 100644
index 0000000000000000000000000000000000000000..4c4e679cdf5dc4aba0bfb721dd55b19ef8573a15
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/9.pgm
@@ -0,0 +1,4 @@
+P5
+28 28
+255
+ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÞ2Xüÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ¯Wàÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿv,Q.ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿý‰úÿíÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ¦iûÿÿíäÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿö)"üÿÿÿéöÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÊ­ÿÿÿÿVöÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ7	ÿÿÿÿÑöÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ5	ÿÿÿÿRöÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ5y‘iŽ?§ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ´+ŽõGÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿèÅ¥¥ÅõÿGÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿG]ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿƒDÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ êÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿòéÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿú»ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþEÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿUÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ°;Öÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ
\ No newline at end of file
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..83d95228c04d7c9dd5936f706ab36d00015849ff
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/README.md
@@ -0,0 +1,20 @@
+# Setting Up MNIST Samples
+
+## Models
+
+mnist.onnx: Opset 8, Retrieved from [ONNX Model Zoo](https://github.com/onnx/models/tree/master/vision/classification/mnist)
+
+## Run ONNX model with trtexec
+
+* FP32 precisons with fixed batch size 1
+  * `./trtexec --explicitBatch --onnx=mnist.onnx --workspace=1024`
+* Other precisions
+  * Add `--fp16` for FP16 and `--int8` for INT8.
+
+## Run safety ONNX model with sampleSafeMNIST
+
+* Build safe engine
+  * `./sample_mnist_safe_build`
+* Inference
+  * `./sample_mnist_safe_infer`
+* See sample READEME for more details.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/mnist.onnx b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/mnist.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..79fe504ea361c19c9f9e7eadacf2ca19959fe581
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/mnist/mnist.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f06e72de813a8635c9bc0397ac447a601bdbfa7df4bebc278723b958831c9bf
+size 26454
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..195b09aa217d95736d59a86c6d36d5ea234d9b13
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/README.md
@@ -0,0 +1,13 @@
+# Models
+
+## UFF
+
+resnet50-infer-5.uff
+- trained by NVidia, based on ResNet50 V1 model from [TF-Slim](https://github.com/tensorflow/models/tree/master/research/slim)
+- converted to UFF using `convert-to-uff`
+  - `convert-to-uff <models>/resnet_all-nlayer_50__precision0_randominit.pb -o tf2trt_resnet50.uff -t -O spatial_avg`
+
+## Caffe
+
+ResNet50_N2.prototxt and ResNet50_fp32.caffemodel
+- downloaded from https://github.com/KaimingHe/deep-residual-networks#models
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/ResNet50.onnx b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/ResNet50.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..ffa1a0fc21278214e6fa893324fa09648eaa7866
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/ResNet50.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78eecdb9354e71364b9df6f3b5824ecc48710938d5b4ea23724b9a2e9edbc4a6
+size 102489423
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/airliner.ppm b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/airliner.ppm
new file mode 100644
index 0000000000000000000000000000000000000000..ea23812f43bf14b6e20ca945d2c3127760b1d421
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/airliner.ppm
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eac2811e99893115847d2e6ab24bae5a1e4ff64820a000a672333c07dc29e083
+size 150543
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/binoculars.jpeg b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/binoculars.jpeg
new file mode 100644
index 0000000000000000000000000000000000000000..62f74209b9f3f5091070eee241ae26c525313477
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/binoculars.jpeg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a44365fb4f2a5802eb379f7d788ddfc7da09ccbee5740283d1a9cd1f9928e8a
+size 159788
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/class_labels.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/class_labels.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f40829ed0fc318c673860fae4be6c48529da116e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/class_labels.txt
@@ -0,0 +1,1000 @@
+tench
+goldfish
+great white shark
+tiger shark
+hammerhead
+electric ray
+stingray
+cock
+hen
+ostrich
+brambling
+goldfinch
+house finch
+junco
+indigo bunting
+robin
+bulbul
+jay
+magpie
+chickadee
+water ouzel
+kite
+bald eagle
+vulture
+great grey owl
+European fire salamander
+common newt
+eft
+spotted salamander
+axolotl
+bullfrog
+tree frog
+tailed frog
+loggerhead
+leatherback turtle
+mud turtle
+terrapin
+box turtle
+banded gecko
+common iguana
+American chameleon
+whiptail
+agama
+frilled lizard
+alligator lizard
+Gila monster
+green lizard
+African chameleon
+Komodo dragon
+African crocodile
+American alligator
+triceratops
+thunder snake
+ringneck snake
+hognose snake
+green snake
+king snake
+garter snake
+water snake
+vine snake
+night snake
+boa constrictor
+rock python
+Indian cobra
+green mamba
+sea snake
+horned viper
+diamondback
+sidewinder
+trilobite
+harvestman
+scorpion
+black and gold garden spider
+barn spider
+garden spider
+black widow
+tarantula
+wolf spider
+tick
+centipede
+black grouse
+ptarmigan
+ruffed grouse
+prairie chicken
+peacock
+quail
+partridge
+African grey
+macaw
+sulphur-crested cockatoo
+lorikeet
+coucal
+bee eater
+hornbill
+hummingbird
+jacamar
+toucan
+drake
+red-breasted merganser
+goose
+black swan
+tusker
+echidna
+platypus
+wallaby
+koala
+wombat
+jellyfish
+sea anemone
+brain coral
+flatworm
+nematode
+conch
+snail
+slug
+sea slug
+chiton
+chambered nautilus
+Dungeness crab
+rock crab
+fiddler crab
+king crab
+American lobster
+spiny lobster
+crayfish
+hermit crab
+isopod
+white stork
+black stork
+spoonbill
+flamingo
+little blue heron
+American egret
+bittern
+crane
+limpkin
+European gallinule
+American coot
+bustard
+ruddy turnstone
+red-backed sandpiper
+redshank
+dowitcher
+oystercatcher
+pelican
+king penguin
+albatross
+grey whale
+killer whale
+dugong
+sea lion
+Chihuahua
+Japanese spaniel
+Maltese dog
+Pekinese
+Shih-Tzu
+Blenheim spaniel
+papillon
+toy terrier
+Rhodesian ridgeback
+Afghan hound
+basset
+beagle
+bloodhound
+bluetick
+black-and-tan coonhound
+Walker hound
+English foxhound
+redbone
+borzoi
+Irish wolfhound
+Italian greyhound
+whippet
+Ibizan hound
+Norwegian elkhound
+otterhound
+Saluki
+Scottish deerhound
+Weimaraner
+Staffordshire bullterrier
+American Staffordshire terrier
+Bedlington terrier
+Border terrier
+Kerry blue terrier
+Irish terrier
+Norfolk terrier
+Norwich terrier
+Yorkshire terrier
+wire-haired fox terrier
+Lakeland terrier
+Sealyham terrier
+Airedale
+cairn
+Australian terrier
+Dandie Dinmont
+Boston bull
+miniature schnauzer
+giant schnauzer
+standard schnauzer
+Scotch terrier
+Tibetan terrier
+silky terrier
+soft-coated wheaten terrier
+West Highland white terrier
+Lhasa
+flat-coated retriever
+curly-coated retriever
+golden retriever
+Labrador retriever
+Chesapeake Bay retriever
+German short-haired pointer
+vizsla
+English setter
+Irish setter
+Gordon setter
+Brittany spaniel
+clumber
+English springer
+Welsh springer spaniel
+cocker spaniel
+Sussex spaniel
+Irish water spaniel
+kuvasz
+schipperke
+groenendael
+malinois
+briard
+kelpie
+komondor
+Old English sheepdog
+Shetland sheepdog
+collie
+Border collie
+Bouvier des Flandres
+Rottweiler
+German shepherd
+Doberman
+miniature pinscher
+Greater Swiss Mountain dog
+Bernese mountain dog
+Appenzeller
+EntleBucher
+boxer
+bull mastiff
+Tibetan mastiff
+French bulldog
+Great Dane
+Saint Bernard
+Eskimo dog
+malamute
+Siberian husky
+dalmatian
+affenpinscher
+basenji
+pug
+Leonberg
+Newfoundland
+Great Pyrenees
+Samoyed
+Pomeranian
+chow
+keeshond
+Brabancon griffon
+Pembroke
+Cardigan
+toy poodle
+miniature poodle
+standard poodle
+Mexican hairless
+timber wolf
+white wolf
+red wolf
+coyote
+dingo
+dhole
+African hunting dog
+hyena
+red fox
+kit fox
+Arctic fox
+grey fox
+tabby
+tiger cat
+Persian cat
+Siamese cat
+Egyptian cat
+cougar
+lynx
+leopard
+snow leopard
+jaguar
+lion
+tiger
+cheetah
+brown bear
+American black bear
+ice bear
+sloth bear
+mongoose
+meerkat
+tiger beetle
+ladybug
+ground beetle
+long-horned beetle
+leaf beetle
+dung beetle
+rhinoceros beetle
+weevil
+fly
+bee
+ant
+grasshopper
+cricket
+walking stick
+cockroach
+mantis
+cicada
+leafhopper
+lacewing
+dragonfly
+damselfly
+admiral
+ringlet
+monarch
+cabbage butterfly
+sulphur butterfly
+lycaenid
+starfish
+sea urchin
+sea cucumber
+wood rabbit
+hare
+Angora
+hamster
+porcupine
+fox squirrel
+marmot
+beaver
+guinea pig
+sorrel
+zebra
+hog
+wild boar
+warthog
+hippopotamus
+ox
+water buffalo
+bison
+ram
+bighorn
+ibex
+hartebeest
+impala
+gazelle
+Arabian camel
+llama
+weasel
+mink
+polecat
+black-footed ferret
+otter
+skunk
+badger
+armadillo
+three-toed sloth
+orangutan
+gorilla
+chimpanzee
+gibbon
+siamang
+guenon
+patas
+baboon
+macaque
+langur
+colobus
+proboscis monkey
+marmoset
+capuchin
+howler monkey
+titi
+spider monkey
+squirrel monkey
+Madagascar cat
+indri
+Indian elephant
+African elephant
+lesser panda
+giant panda
+barracouta
+eel
+coho
+rock beauty
+anemone fish
+sturgeon
+gar
+lionfish
+puffer
+abacus
+abaya
+academic gown
+accordion
+acoustic guitar
+aircraft carrier
+airliner
+airship
+altar
+ambulance
+amphibian
+analog clock
+apiary
+apron
+ashcan
+assault rifle
+backpack
+bakery
+balance beam
+balloon
+ballpoint
+Band Aid
+banjo
+bannister
+barbell
+barber chair
+barbershop
+barn
+barometer
+barrel
+barrow
+baseball
+basketball
+bassinet
+bassoon
+bathing cap
+bath towel
+bathtub
+beach wagon
+beacon
+beaker
+bearskin
+beer bottle
+beer glass
+bell cote
+bib
+bicycle-built-for-two
+bikini
+binder
+binoculars
+birdhouse
+boathouse
+bobsled
+bolo tie
+bonnet
+bookcase
+bookshop
+bottlecap
+bow
+bow tie
+brass
+brassiere
+breakwater
+breastplate
+broom
+bucket
+buckle
+bulletproof vest
+bullet train
+butcher shop
+cab
+caldron
+candle
+cannon
+canoe
+can opener
+cardigan
+car mirror
+carousel
+carpenter's kit
+carton
+car wheel
+cash machine
+cassette
+cassette player
+castle
+catamaran
+CD player
+cello
+cellular telephone
+chain
+chainlink fence
+chain mail
+chain saw
+chest
+chiffonier
+chime
+china cabinet
+Christmas stocking
+church
+cinema
+cleaver
+cliff dwelling
+cloak
+clog
+cocktail shaker
+coffee mug
+coffeepot
+coil
+combination lock
+computer keyboard
+confectionery
+container ship
+convertible
+corkscrew
+cornet
+cowboy boot
+cowboy hat
+cradle
+crane
+crash helmet
+crate
+crib
+Crock Pot
+croquet ball
+crutch
+cuirass
+dam
+desk
+desktop computer
+dial telephone
+diaper
+digital clock
+digital watch
+dining table
+dishrag
+dishwasher
+disk brake
+dock
+dogsled
+dome
+doormat
+drilling platform
+drum
+drumstick
+dumbbell
+Dutch oven
+electric fan
+electric guitar
+electric locomotive
+entertainment center
+envelope
+espresso maker
+face powder
+feather boa
+file
+fireboat
+fire engine
+fire screen
+flagpole
+flute
+folding chair
+football helmet
+forklift
+fountain
+fountain pen
+four-poster
+freight car
+French horn
+frying pan
+fur coat
+garbage truck
+gasmask
+gas pump
+goblet
+go-kart
+golf ball
+golfcart
+gondola
+gong
+gown
+grand piano
+greenhouse
+grille
+grocery store
+guillotine
+hair slide
+hair spray
+half track
+hammer
+hamper
+hand blower
+hand-held computer
+handkerchief
+hard disc
+harmonica
+harp
+harvester
+hatchet
+holster
+home theater
+honeycomb
+hook
+hoopskirt
+horizontal bar
+horse cart
+hourglass
+iPod
+iron
+jack-o'-lantern
+jean
+jeep
+jersey
+jigsaw puzzle
+jinrikisha
+joystick
+kimono
+knee pad
+knot
+lab coat
+ladle
+lampshade
+laptop
+lawn mower
+lens cap
+letter opener
+library
+lifeboat
+lighter
+limousine
+liner
+lipstick
+Loafer
+lotion
+loudspeaker
+loupe
+lumbermill
+magnetic compass
+mailbag
+mailbox
+maillot
+maillot
+manhole cover
+maraca
+marimba
+mask
+matchstick
+maypole
+maze
+measuring cup
+medicine chest
+megalith
+microphone
+microwave
+military uniform
+milk can
+minibus
+miniskirt
+minivan
+missile
+mitten
+mixing bowl
+mobile home
+Model T
+modem
+monastery
+monitor
+moped
+mortar
+mortarboard
+mosque
+mosquito net
+motor scooter
+mountain bike
+mountain tent
+mouse
+mousetrap
+moving van
+muzzle
+nail
+neck brace
+necklace
+nipple
+notebook
+obelisk
+oboe
+ocarina
+odometer
+oil filter
+organ
+oscilloscope
+overskirt
+oxcart
+oxygen mask
+packet
+paddle
+paddlewheel
+padlock
+paintbrush
+pajama
+palace
+panpipe
+paper towel
+parachute
+parallel bars
+park bench
+parking meter
+passenger car
+patio
+pay-phone
+pedestal
+pencil box
+pencil sharpener
+perfume
+Petri dish
+photocopier
+pick
+pickelhaube
+picket fence
+pickup
+pier
+piggy bank
+pill bottle
+pillow
+ping-pong ball
+pinwheel
+pirate
+pitcher
+plane
+planetarium
+plastic bag
+plate rack
+plow
+plunger
+Polaroid camera
+pole
+police van
+poncho
+pool table
+pop bottle
+pot
+potter's wheel
+power drill
+prayer rug
+printer
+prison
+projectile
+projector
+puck
+punching bag
+purse
+quill
+quilt
+racer
+racket
+radiator
+radio
+radio telescope
+rain barrel
+recreational vehicle
+reel
+reflex camera
+refrigerator
+remote control
+restaurant
+revolver
+rifle
+rocking chair
+rotisserie
+rubber eraser
+rugby ball
+rule
+running shoe
+safe
+safety pin
+saltshaker
+sandal
+sarong
+sax
+scabbard
+scale
+school bus
+schooner
+scoreboard
+screen
+screw
+screwdriver
+seat belt
+sewing machine
+shield
+shoe shop
+shoji
+shopping basket
+shopping cart
+shovel
+shower cap
+shower curtain
+ski
+ski mask
+sleeping bag
+slide rule
+sliding door
+slot
+snorkel
+snowmobile
+snowplow
+soap dispenser
+soccer ball
+sock
+solar dish
+sombrero
+soup bowl
+space bar
+space heater
+space shuttle
+spatula
+speedboat
+spider web
+spindle
+sports car
+spotlight
+stage
+steam locomotive
+steel arch bridge
+steel drum
+stethoscope
+stole
+stone wall
+stopwatch
+stove
+strainer
+streetcar
+stretcher
+studio couch
+stupa
+submarine
+suit
+sundial
+sunglass
+sunglasses
+sunscreen
+suspension bridge
+swab
+sweatshirt
+swimming trunks
+swing
+switch
+syringe
+table lamp
+tank
+tape player
+teapot
+teddy
+television
+tennis ball
+thatch
+theater curtain
+thimble
+thresher
+throne
+tile roof
+toaster
+tobacco shop
+toilet seat
+torch
+totem pole
+tow truck
+toyshop
+tractor
+trailer truck
+tray
+trench coat
+tricycle
+trimaran
+tripod
+triumphal arch
+trolleybus
+trombone
+tub
+turnstile
+typewriter keyboard
+umbrella
+unicycle
+upright
+vacuum
+vase
+vault
+velvet
+vending machine
+vestment
+viaduct
+violin
+volleyball
+waffle iron
+wall clock
+wallet
+wardrobe
+warplane
+washbasin
+washer
+water bottle
+water jug
+water tower
+whiskey jug
+whistle
+wig
+window screen
+window shade
+Windsor tie
+wine bottle
+wing
+wok
+wooden spoon
+wool
+worm fence
+wreck
+yawl
+yurt
+web site
+comic book
+crossword puzzle
+street sign
+traffic light
+book jacket
+menu
+plate
+guacamole
+consomme
+hot pot
+trifle
+ice cream
+ice lolly
+French loaf
+bagel
+pretzel
+cheeseburger
+hotdog
+mashed potato
+head cabbage
+broccoli
+cauliflower
+zucchini
+spaghetti squash
+acorn squash
+butternut squash
+cucumber
+artichoke
+bell pepper
+cardoon
+mushroom
+Granny Smith
+strawberry
+orange
+lemon
+fig
+pineapple
+banana
+jackfruit
+custard apple
+pomegranate
+hay
+carbonara
+chocolate sauce
+dough
+meat loaf
+pizza
+potpie
+burrito
+red wine
+espresso
+cup
+eggnog
+alp
+bubble
+cliff
+coral reef
+geyser
+lakeside
+promontory
+sandbar
+seashore
+valley
+volcano
+ballplayer
+groom
+scuba diver
+rapeseed
+daisy
+yellow lady's slipper
+corn
+acorn
+hip
+buckeye
+coral fungus
+agaric
+gyromitra
+stinkhorn
+earthstar
+hen-of-the-woods
+bolete
+ear
+toilet tissue
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/reflex_camera.jpeg b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/reflex_camera.jpeg
new file mode 100644
index 0000000000000000000000000000000000000000..fe985f5dcab4050be11306458d8ea84b0dd73227
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/reflex_camera.jpeg differ
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/tabby_tiger_cat.jpg b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/tabby_tiger_cat.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..28ddb4ec657d8511d2803f919174d46186408f38
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/data/resnet50/tabby_tiger_cat.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5f516d7cde858db080760927ea73b1d5bab21a38ca0d4a1aea5b0e6f884969c
+size 110969
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/doc/Acknowledgements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/doc/Acknowledgements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3967109a4a434b2693995cc491efdf1520e87f30
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/doc/Acknowledgements.txt
@@ -0,0 +1,4738 @@
+SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+property and proprietary rights in and to this material, related
+documentation and any modifications thereto. Any use, reproduction,
+disclosure or distribution of this material and related documentation
+without an express license agreement from NVIDIA CORPORATION or
+its affiliates is strictly prohibited.
+
+This project will download and install additional third-party open source software projects.
+Review the license terms of these open source projects before use.
+
+TensorRT uses elements from the following software, whose licenses are reproduced below.
+
+Abseil C++ Common Libraries
+---------------------------
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        https://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       https://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+Google Protobuf
+---------------
+Copyright 2008 Google Inc.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+    * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Code generated by the Protocol Buffer compiler is owned by the owner
+of the input file used when generating it.  This code is not
+standalone and requires a support library to be linked with it.  This
+support library is itself covered by the above license.
+
+Google Flatbuffers
+------------------
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2014 Google Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+BVLC caffe
+----------
+COPYRIGHT
+
+All contributions by the University of California:
+Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
+All rights reserved.
+
+All other contributions:
+Copyright (c) 2014, 2015, the respective contributors
+All rights reserved.
+
+Caffe uses a shared copyright model: each contributor holds copyright over
+their contributions to Caffe. The project versioning records all such
+contribution and copyright details. If a contributor wants to further mark
+their specific copyright on a particular contribution, they should indicate
+their copyright solely in the commit message of the change when it is
+committed.
+
+LICENSE
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+CONTRIBUTION AGREEMENT
+
+By contributing to the BVLC/caffe repository through pull-request, comment,
+or otherwise, the contributor releases their content to the
+license and copyright terms herein.
+
+half.h
+------
+Copyright (c) 2012-2017 Christian Rau <rauy@users.sourceforge.net>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+jQuery.js
+---------
+jQuery.js is generated automatically under doxygen.
+In all cases TensorRT uses the functions under the MIT license.
+
+CRC
+---
+TensorRT includes CRC routines from FreeBSD.
+
+# $FreeBSD: head/COPYRIGHT 260125 2013-12-31 12:18:10Z gjb $
+# @(#)COPYRIGHT 8.2 (Berkeley) 3/21/94
+
+The compilation of software known as FreeBSD is distributed under the
+following terms:
+
+Copyright (c) 1992-2014 The FreeBSD Project. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+
+The 4.4BSD and 4.4BSD-Lite software is distributed under the following
+terms:
+
+All of the documentation and software included in the 4.4BSD and 4.4BSD-Lite
+Releases is copyrighted by The Regents of the University of California.
+
+Copyright 1979, 1980, 1983, 1986, 1988, 1989, 1991, 1992, 1993, 1994
+The Regents of the University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+3. All advertising materials mentioning features or use of this software
+   must display the following acknowledgement:
+This product includes software developed by the University of
+California, Berkeley and its contributors.
+4. Neither the name of the University nor the names of its contributors
+   may be used to endorse or promote products derived from this software
+   without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+
+The Institute of Electrical and Electronics Engineers and the American
+National Standards Committee X3, on Information Processing Systems have
+given us permission to reprint portions of their documentation.
+
+In the following statement, the phrase ``this text'' refers to portions
+of the system documentation.
+
+Portions of this text are reprinted and reproduced in electronic form in
+the second BSD Networking Software Release, from IEEE Std 1003.1-1988, IEEE
+Standard Portable Operating System Interface for Computer Environments
+(POSIX), copyright C 1988 by the Institute of Electrical and Electronics
+Engineers, Inc.  In the event of any discrepancy between these versions
+and the original IEEE Standard, the original IEEE Standard is the referee
+document.
+
+In the following statement, the phrase ``This material'' refers to portions
+of the system documentation.
+
+This material is reproduced with permission from American National
+Standards Committee X3, on Information Processing Systems.  Computer and
+Business Equipment Manufacturers Association (CBEMA), 311 First St., NW,
+Suite 500, Washington, DC 20001-2178.  The developmental work of
+Programming Language C was completed by the X3J11 Technical Committee.
+
+The views and conclusions contained in the software and documentation are
+those of the authors and should not be interpreted as representing official
+policies, either expressed or implied, of the Regents of the University
+of California.
+
+NOTE: The copyright of UC Berkeley's Berkeley Software Distribution ("BSD")
+source has been updated.  The copyright addendum may be found at
+ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change and is
+included below.
+
+July 22, 1999
+
+To All Licensees, Distributors of Any Version of BSD:
+
+As you know, certain of the Berkeley Software Distribution ("BSD") source
+code files require that further distributions of products containing all or
+portions of the software, acknowledge within their advertising materials
+that such products contain software developed by UC Berkeley and its
+contributors.
+
+Specifically, the provision reads:
+
+"     * 3. All advertising materials mentioning features or use of this software
+      *    must display the following acknowledgement:
+      *    This product includes software developed by the University of
+      *    California, Berkeley and its contributors."
+
+Effective immediately, licensees and distributors are no longer required to
+include the acknowledgement within advertising materials.  Accordingly, the
+foregoing paragraph of those BSD Unix files containing it is hereby deleted
+in its entirety.
+
+William Hoskins
+Director, Office of Technology Licensing
+University of California, Berkeley
+
+getopt.c
+--------
+$OpenBSD: getopt_long.c,v 1.23 2007/10/31 12:34:57 chl Exp $
+$NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $
+
+Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
+
+Permission to use, copy, modify, and distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+Sponsored in part by the Defense Advanced Research Projects
+Agency (DARPA) and Air Force Research Laboratory, Air Force
+Materiel Command, USAF, under agreement number F39502-99-1-0512.
+
+Copyright (c) 2000 The NetBSD Foundation, Inc.
+All rights reserved.
+
+This code is derived from software contributed to The NetBSD Foundation
+by Dieter Baron and Thomas Klausner.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+ONNX Model Zoo
+--------------
+MIT License
+
+Copyright (c) ONNX Project Contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE
+
+RESNET-50 Caffe models
+----------------------
+The MIT License (MIT)
+
+Copyright (c) 2016 Shaoqing Ren
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+OpenSSL
+-------
+Apache License Version 2.0
+
+Copyright (c) OpenSSL Project Contributors
+
+                              Apache License
+                        Version 2.0, January 2004
+                     https://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+   "License" shall mean the terms and conditions for use, reproduction,
+   and distribution as defined by Sections 1 through 9 of this document.
+
+   "Licensor" shall mean the copyright owner or entity authorized by
+   the copyright owner that is granting the License.
+
+   "Legal Entity" shall mean the union of the acting entity and all
+   other entities that control, are controlled by, or are under common
+   control with that entity. For the purposes of this definition,
+   "control" means (i) the power, direct or indirect, to cause the
+   direction or management of such entity, whether by contract or
+   otherwise, or (ii) ownership of fifty percent (50%) or more of the
+   outstanding shares, or (iii) beneficial ownership of such entity.
+
+   "You" (or "Your") shall mean an individual or Legal Entity
+   exercising permissions granted by this License.
+
+   "Source" form shall mean the preferred form for making modifications,
+   including but not limited to software source code, documentation
+   source, and configuration files.
+
+   "Object" form shall mean any form resulting from mechanical
+   transformation or translation of a Source form, including but
+   not limited to compiled object code, generated documentation,
+   and conversions to other media types.
+
+   "Work" shall mean the work of authorship, whether in Source or
+   Object form, made available under the License, as indicated by a
+   copyright notice that is included in or attached to the work
+   (an example is provided in the Appendix below).
+
+   "Derivative Works" shall mean any work, whether in Source or Object
+   form, that is based on (or derived from) the Work and for which the
+   editorial revisions, annotations, elaborations, or other modifications
+   represent, as a whole, an original work of authorship. For the purposes
+   of this License, Derivative Works shall not include works that remain
+   separable from, or merely link (or bind by name) to the interfaces of,
+   the Work and Derivative Works thereof.
+
+   "Contribution" shall mean any work of authorship, including
+   the original version of the Work and any modifications or additions
+   to that Work or Derivative Works thereof, that is intentionally
+   submitted to Licensor for inclusion in the Work by the copyright owner
+   or by an individual or Legal Entity authorized to submit on behalf of
+   the copyright owner. For the purposes of this definition, "submitted"
+   means any form of electronic, verbal, or written communication sent
+   to the Licensor or its representatives, including but not limited to
+   communication on electronic mailing lists, source code control systems,
+   and issue tracking systems that are managed by, or on behalf of, the
+   Licensor for the purpose of discussing and improving the Work, but
+   excluding communication that is conspicuously marked or otherwise
+   designated in writing by the copyright owner as "Not a Contribution."
+
+   "Contributor" shall mean Licensor and any individual or Legal Entity
+   on behalf of whom a Contribution has been received by Licensor and
+   subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   copyright license to reproduce, prepare Derivative Works of,
+   publicly display, publicly perform, sublicense, and distribute the
+   Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   (except as stated in this section) patent license to make, have made,
+   use, offer to sell, sell, import, and otherwise transfer the Work,
+   where such license applies only to those patent claims licensable
+   by such Contributor that are necessarily infringed by their
+   Contribution(s) alone or by combination of their Contribution(s)
+   with the Work to which such Contribution(s) was submitted. If You
+   institute patent litigation against any entity (including a
+   cross-claim or counterclaim in a lawsuit) alleging that the Work
+   or a Contribution incorporated within the Work constitutes direct
+   or contributory patent infringement, then any patent licenses
+   granted to You under this License for that Work shall terminate
+   as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+   Work or Derivative Works thereof in any medium, with or without
+   modifications, and in Source or Object form, provided that You
+   meet the following conditions:
+
+   (a) You must give any other recipients of the Work or
+         Derivative Works a copy of this License; and
+
+   (b) You must cause any modified files to carry prominent notices
+         stating that You changed the files; and
+
+   (c) You must retain, in the Source form of any Derivative Works
+         that You distribute, all copyright, patent, trademark, and
+         attribution notices from the Source form of the Work,
+         excluding those notices that do not pertain to any part of
+         the Derivative Works; and
+
+   (d) If the Work includes a "NOTICE" text file as part of its
+         distribution, then any Derivative Works that You distribute must
+         include a readable copy of the attribution notices contained
+         within such NOTICE file, excluding those notices that do not
+         pertain to any part of the Derivative Works, in at least one
+         of the following places: within a NOTICE text file distributed
+         as part of the Derivative Works; within the Source form or
+         documentation, if provided along with the Derivative Works; or,
+         within a display generated by the Derivative Works, if and
+         wherever such third-party notices normally appear. The contents
+         of the NOTICE file are for informational purposes only and
+         do not modify the License. You may add Your own attribution
+         notices within Derivative Works that You distribute, alongside
+         or as an addendum to the NOTICE text from the Work, provided
+         that such additional attribution notices cannot be construed
+         as modifying the License.
+
+   You may add Your own copyright statement to Your modifications and
+   may provide additional or different license terms and conditions
+   for use, reproduction, or distribution of Your modifications, or
+   for any such Derivative Works as a whole, provided Your use,
+   reproduction, and distribution of the Work otherwise complies with
+   the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+   any Contribution intentionally submitted for inclusion in the Work
+   by You to the Licensor shall be under the terms and conditions of
+   this License, without any additional terms or conditions.
+   Notwithstanding the above, nothing herein shall supersede or modify
+   the terms of any separate license agreement you may have executed
+   with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+   names, trademarks, service marks, or product names of the Licensor,
+   except as required for reasonable and customary use in describing the
+   origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+   agreed to in writing, Licensor provides the Work (and each
+   Contributor provides its Contributions) on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+   implied, including, without limitation, any warranties or conditions
+   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+   PARTICULAR PURPOSE. You are solely responsible for determining the
+   appropriateness of using or redistributing the Work and assume any
+   risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+   whether in tort (including negligence), contract, or otherwise,
+   unless required by applicable law (such as deliberate and grossly
+   negligent acts) or agreed to in writing, shall any Contributor be
+   liable to You for damages, including any direct, indirect, special,
+   incidental, or consequential damages of any character arising as a
+   result of this License or out of the use or inability to use the
+   Work (including but not limited to damages for loss of goodwill,
+   work stoppage, computer failure or malfunction, or any and all
+   other commercial damages or losses), even if such Contributor
+   has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+   the Work or Derivative Works thereof, You may choose to offer,
+   and charge a fee for, acceptance of support, warranty, indemnity,
+   or other liability obligations and/or rights consistent with this
+   License. However, in accepting such obligations, You may act only
+   on Your own behalf and on Your sole responsibility, not on behalf
+   of any other Contributor, and only if You agree to indemnify,
+   defend, and hold each Contributor harmless for any liability
+   incurred by, or claims asserted against, such Contributor by reason
+   of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+Boost Beast
+-----------
+Copyright (c) 2016-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
+
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+
+thread-pool
+-----------
+MIT License
+
+Copyright (c) 2023 Barak Shoshany
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+docs
+----
+
+https://pypi.org/pypi/docs
+Unknown License
+
+Notices Text Not Available
+
+efficientnet
+----
+Apache License Version 2.0
+
+https://github.com/qubvel/efficientnet
+
+Copyright 2019 The TensorFlow Authors, Pavel Yakubovskiy.  All rights reserved.
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2017, The TensorFlow Authors.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+giza
+----
+Apache License Version 2.0
+
+https://github.com/mongodb/docs-tools
+
+
+ Copyright 2014 MongoDB, Inc.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+onnx
+----
+Apache License 2.0
+
+https://github.com/onnx/onnx
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+packaging
+---------
+Apache License Version 2.0
+
+https://github.com/pypa/packaging
+
+This software is made available under the terms of *either* of the licenses
+found in LICENSE.APACHE or LICENSE.BSD. Contributions to this software is made
+under the terms of *both* these licenses.
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   Copyright (c) Donald Stufft and individual contributors.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    1. Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+
+    2. Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+pybind11
+--------
+BSD
+
+https://github.com/pybind/pybind11
+
+Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors
+   may be used to endorse or promote products derived from this software
+   without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+You are under no obligation whatsoever to provide any bug fixes, patches, or
+upgrades to the features, functionality or performance of the source code
+("Enhancements") to anyone; however, if you choose to make your Enhancements
+available either publicly, or directly to the author of this software, without
+imposing a separate written license agreement for such Enhancements, then you
+hereby grant the following license: a non-exclusive, royalty-free perpetual
+license to install, use, modify, prepare derivative works, incorporate into
+other computer software, distribute, and sublicense such enhancements or
+derivative works thereof, in binary and source code form.
+
+python
+------
+PSL
+
+https://docs.python.org/3
+
+PythonÂ® is a registered trademark of the Python Software Foundation
+
+1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and
+   the Individual or Organization ("Licensee") accessing and otherwise using Python
+   3.12.4 software in source or binary form and its associated documentation.
+
+2. Subject to the terms and conditions of this License Agreement, PSF hereby
+   grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
+   analyze, test, perform and/or display publicly, prepare derivative works,
+   distribute, and otherwise use Python 3.12.4 alone or in any derivative
+   version, provided, however, that PSF's License Agreement and PSF's notice of
+   copyright, i.e., "Copyright Â© 2001-2023 Python Software Foundation; All Rights
+   Reserved" are retained in Python 3.12.4 alone or in any derivative version
+   prepared by Licensee.
+
+3. In the event Licensee prepares a derivative work that is based on or
+   incorporates Python 3.12.4 or any part thereof, and wants to make the
+   derivative work available to others as provided herein, then Licensee hereby
+   agrees to include in any such work a brief summary of the changes made to Python
+   3.12.4.
+
+4. PSF is making Python 3.12.4 available to Licensee on an "AS IS" basis.
+   PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED.  BY WAY OF
+   EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR
+   WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE
+   USE OF PYTHON 3.12.4 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
+
+5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 3.12.4
+   FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF
+   MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 3.12.4, OR ANY DERIVATIVE
+   THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+
+6. This License Agreement will automatically terminate upon a material breach of
+   its terms and conditions.
+
+7. Nothing in this License Agreement shall be deemed to create any relationship
+   of agency, partnership, or joint venture between PSF and Licensee.  This License
+   Agreement does not grant permission to use PSF trademarks or trade name in a
+   trademark sense to endorse or promote products or services of Licensee, or any
+   third party.
+
+8. By copying, installing or otherwise using Python 3.12.4, Licensee agrees
+   to be bound by the terms and conditions of this License Agreement.
+BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
+BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
+
+1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an office at
+   160 Saratoga Avenue, Santa Clara, CA 95051, and the Individual or Organization
+   ("Licensee") accessing and otherwise using this software in source or binary
+   form and its associated documentation ("the Software").
+
+2. Subject to the terms and conditions of this BeOpen Python License Agreement,
+   BeOpen hereby grants Licensee a non-exclusive, royalty-free, world-wide license
+   to reproduce, analyze, test, perform and/or display publicly, prepare derivative
+   works, distribute, and otherwise use the Software alone or in any derivative
+   version, provided, however, that the BeOpen Python License is retained in the
+   Software, alone or in any derivative version prepared by Licensee.
+
+3. BeOpen is making the Software available to Licensee on an "AS IS" basis.
+   BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED.  BY WAY OF
+   EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND DISCLAIMS ANY REPRESENTATION OR
+   WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE
+   USE OF THE SOFTWARE WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
+
+4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE SOFTWARE FOR
+   ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF USING,
+   MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY DERIVATIVE THEREOF, EVEN IF
+   ADVISED OF THE POSSIBILITY THEREOF.
+
+5. This License Agreement will automatically terminate upon a material breach of
+   its terms and conditions.
+
+6. This License Agreement shall be governed by and interpreted in all respects
+   by the law of the State of California, excluding conflict of law provisions.
+   Nothing in this License Agreement shall be deemed to create any relationship of
+   agency, partnership, or joint venture between BeOpen and Licensee.  This License
+   Agreement does not grant permission to use BeOpen trademarks or trade names in a
+   trademark sense to endorse or promote products or services of Licensee, or any
+   third party.  As an exception, the "BeOpen Python" logos available at
+   http://www.pythonlabs.com/logos.html may be used according to the permissions
+   granted on that web page.
+
+7. By copying, installing or otherwise using the software, Licensee agrees to be
+   bound by the terms and conditions of this License Agreement.
+
+scope
+-----
+MIT License
+
+https://pypi.org/pypi/scope
+
+Copyright (c)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+tidy
+----
+GNU GENERAL PUBLIC LICENSE
+
+https://pypi.org/pypi/Tidy
+
+GNU GENERAL PUBLIC LICENSE
+Version 3, 29 June 2007
+
+Copyright Ã‚Â© 2007 Free Software Foundation, Inc.
+
+Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
+
+Preamble
+
+The GNU General Public License is a free, copyleft license for software and other kinds of works.
+
+The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too.
+
+When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things.
+
+To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others.
+
+For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights.
+
+Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it.
+
+For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions.
+
+Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users.
+
+Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free.
+
+The precise terms and conditions for copying, distribution and modification follow.
+
+TERMS AND CONDITIONS
+
+0. Definitions.
+
+Ã¢â‚¬Å“This LicenseÃ¢â‚¬Â refers to version 3 of the GNU General Public License.
+
+Ã¢â‚¬Å“CopyrightÃ¢â‚¬Â also means copyright-like laws that apply to other kinds of works, such as semiconductor masks.
+
+Ã¢â‚¬Å“The ProgramÃ¢â‚¬Â refers to any copyrightable work licensed under this License. Each licensee is addressed as Ã¢â‚¬Å“youÃ¢â‚¬Â. Ã¢â‚¬Å“LicenseesÃ¢â‚¬Â and Ã¢â‚¬Å“recipientsÃ¢â‚¬Â may be individuals or organizations.
+
+To Ã¢â‚¬Å“modifyÃ¢â‚¬Â a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a Ã¢â‚¬Å“modified versionÃ¢â‚¬Â of the earlier work or a work Ã¢â‚¬Å“based onÃ¢â‚¬Â the earlier work.
+
+A Ã¢â‚¬Å“covered workÃ¢â‚¬Â means either the unmodified Program or a work based on the Program.
+
+To Ã¢â‚¬Å“propagateÃ¢â‚¬Â a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well.
+
+To Ã¢â‚¬Å“conveyÃ¢â‚¬Â a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying.
+
+An interactive user interface displays Ã¢â‚¬Å“Appropriate Legal NoticesÃ¢â‚¬Â to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion.
+
+1. Source Code.
+The Ã¢â‚¬Å“source codeÃ¢â‚¬Â for a work means the preferred form of the work for making modifications to it. Ã¢â‚¬Å“Object codeÃ¢â‚¬Â means any non-source form of a work.
+
+A Ã¢â‚¬Å“Standard InterfaceÃ¢â‚¬Â means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language.
+
+The Ã¢â‚¬Å“System LibrariesÃ¢â‚¬Â of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A Ã¢â‚¬Å“Major ComponentÃ¢â‚¬Â, in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it.
+
+The Ã¢â‚¬Å“Corresponding SourceÃ¢â‚¬Â for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work.
+
+The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source.
+
+The Corresponding Source for a work in source code form is that same work.
+
+2. Basic Permissions.
+All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law.
+
+You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you.
+
+Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary.
+
+3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures.
+
+When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures.
+
+4. Conveying Verbatim Copies.
+You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program.
+
+You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee.
+
+5. Conveying Modified Source Versions.
+You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions:
+
+     a) The work must carry prominent notices stating that you modified it, and giving a relevant date.
+
+     b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to Ã¢â‚¬Å“keep intact all noticesÃ¢â‚¬Â.
+
+     c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it.
+
+     d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so.
+
+A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an Ã¢â‚¬Å“aggregateÃ¢â‚¬Â if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate.
+
+6. Conveying Non-Source Forms.
+You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways:
+
+     a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange.
+
+     b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge.
+
+     c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b.
+
+     d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements.
+
+     e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d.
+
+A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work.
+
+A Ã¢â‚¬Å“User ProductÃ¢â‚¬Â is either (1) a Ã¢â‚¬Å“consumer productÃ¢â‚¬Â, which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, Ã¢â‚¬Å“normally usedÃ¢â‚¬Â refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product.
+
+Ã¢â‚¬Å“Installation InformationÃ¢â‚¬Â for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made.
+
+If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM).
+
+The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network.
+
+Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying.
+
+7. Additional Terms.
+Ã¢â‚¬Å“Additional permissionsÃ¢â‚¬Â are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions.
+
+When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission.
+
+Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms:
+
+     a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or
+
+     b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or
+
+     c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or
+
+     d) Limiting the use for publicity purposes of names of licensors or authors of the material; or
+
+     e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or
+
+     f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors.
+
+All other non-permissive additional terms are considered Ã¢â‚¬Å“further restrictionsÃ¢â‚¬Â within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying.
+
+If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms.
+
+Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way.
+
+8. Termination.
+You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11).
+
+However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation.
+
+Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice.
+
+Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10.
+
+9. Acceptance Not Required for Having Copies.
+You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so.
+
+10. Automatic Licensing of Downstream Recipients.
+Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License.
+
+An Ã¢â‚¬Å“entity transactionÃ¢â‚¬Â is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts.
+
+You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it.
+
+11. Patents.
+A Ã¢â‚¬Å“contributorÃ¢â‚¬Â is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's Ã¢â‚¬Å“contributor versionÃ¢â‚¬Â.
+
+A contributor's Ã¢â‚¬Å“essential patent claimsÃ¢â‚¬Â are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, Ã¢â‚¬Å“controlÃ¢â‚¬Â includes the right to grant patent sublicenses in a manner consistent with the requirements of this License.
+
+Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version.
+
+In the following three paragraphs, a Ã¢â‚¬Å“patent licenseÃ¢â‚¬Â is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To Ã¢â‚¬Å“grantÃ¢â‚¬Â such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party.
+
+If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. Ã¢â‚¬Å“Knowingly relyingÃ¢â‚¬Â means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid.
+
+If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it.
+
+A patent license is Ã¢â‚¬Å“discriminatoryÃ¢â‚¬Â if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007.
+
+Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law.
+
+12. No Surrender of Others' Freedom.
+If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program.
+
+13. Use with the GNU Affero General Public License.
+Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such.
+
+14. Revised Versions of this License.
+The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License Ã¢â‚¬Å“or any later versionÃ¢â‚¬Â applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation.
+
+If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program.
+
+Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version.
+
+15. Disclaimer of Warranty.
+THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM Ã¢â‚¬Å“AS ISÃ¢â‚¬Â WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+16. Limitation of Liability.
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+17. Interpretation of Sections 15 and 16.
+If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee.
+
+END OF TERMS AND CONDITIONS
+
+How to Apply These Terms to Your New Programs
+
+If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms.
+
+To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the Ã¢â‚¬Å“copyrightÃ¢â‚¬Â line and a pointer to where the full notice is found.
+
+
+     Copyright (C)
+
+     This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+
+     This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+     You should have received a copy of the GNU General Public License along with this program.  If not, see .
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode:
+
+       Copyright (C)
+     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+     This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an Ã¢â‚¬Å“about boxÃ¢â‚¬Â.
+
+You should also get your employer (if you work as a programmer) or school, if any, to sign a Ã¢â‚¬Å“copyright disclaimerÃ¢â‚¬Â for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see .
+
+The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read .
+
+compiledb
+---------
+GNU GENERAL PUBLIC LICENSE
+
+GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    {one line to give the program's name and a brief idea of what it does.}
+    Copyright (C) {year}  {name of author}
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    {project}  Copyright (C) {year}  {fullname}
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
+
+bashlex
+-------
+GNU General Public License v3.0
+
+GNU GENERAL PUBLIC LICENSE
+Version 3, 29 June 2007
+
+Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+
+     Preamble
+
+The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+The precise terms and conditions for copying, distribution and
+modification follow.
+
+TERMS AND CONDITIONS
+
+0. Definitions.
+
+"This License" refers to version 3 of the GNU General Public License.
+
+"Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+"The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+1. Source Code.
+
+The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+The Corresponding Source for a work in source code form is that
+same work.
+
+2. Basic Permissions.
+
+All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+4. Conveying Verbatim Copies.
+
+You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+5. Conveying Modified Source Versions.
+
+You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+a) The work must carry prominent notices stating that you modified
+it, and giving a relevant date.
+
+b) The work must carry prominent notices stating that it is
+released under this License and any conditions added under section
+7.  This requirement modifies the requirement in section 4 to
+"keep intact all notices".
+
+c) You must license the entire work, as a whole, under this
+License to anyone who comes into possession of a copy.  This
+License will therefore apply, along with any applicable section 7
+additional terms, to the whole of the work, and all its parts,
+regardless of how they are packaged.  This License gives no
+permission to license the work in any other way, but it does not
+invalidate such permission if you have separately received it.
+
+d) If the work has interactive user interfaces, each must display
+Appropriate Legal Notices; however, if the Program has interactive
+interfaces that do not display Appropriate Legal Notices, your
+work need not make them do so.
+
+A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+6. Conveying Non-Source Forms.
+
+You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+a) Convey the object code in, or embodied in, a physical product
+(including a physical distribution medium), accompanied by the
+Corresponding Source fixed on a durable physical medium
+customarily used for software interchange.
+
+b) Convey the object code in, or embodied in, a physical product
+(including a physical distribution medium), accompanied by a
+written offer, valid for at least three years and valid for as
+long as you offer spare parts or customer support for that product
+model, to give anyone who possesses the object code either (1) a
+copy of the Corresponding Source for all the software in the
+product that is covered by this License, on a durable physical
+medium customarily used for software interchange, for a price no
+more than your reasonable cost of physically performing this
+conveying of source, or (2) access to copy the
+Corresponding Source from a network server at no charge.
+
+c) Convey individual copies of the object code with a copy of the
+written offer to provide the Corresponding Source.  This
+alternative is allowed only occasionally and noncommercially, and
+only if you received the object code with such an offer, in accord
+with subsection 6b.
+
+d) Convey the object code by offering access from a designated
+place (gratis or for a charge), and offer equivalent access to the
+Corresponding Source in the same way through the same place at no
+further charge.  You need not require recipients to copy the
+Corresponding Source along with the object code.  If the place to
+copy the object code is a network server, the Corresponding Source
+may be on a different server (operated by you or a third party)
+that supports equivalent copying facilities, provided you maintain
+clear directions next to the object code saying where to find the
+Corresponding Source.  Regardless of what server hosts the
+Corresponding Source, you remain obligated to ensure that it is
+available for as long as needed to satisfy these requirements.
+
+e) Convey the object code using peer-to-peer transmission, provided
+you inform other peers where the object code and Corresponding
+Source of the work are being offered to the general public at no
+charge under subsection 6d.
+
+A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+"Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+7. Additional Terms.
+
+"Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+a) Disclaiming warranty or limiting liability differently from the
+terms of sections 15 and 16 of this License; or
+
+b) Requiring preservation of specified reasonable legal notices or
+author attributions in that material or in the Appropriate Legal
+Notices displayed by works containing it; or
+
+c) Prohibiting misrepresentation of the origin of that material, or
+requiring that modified versions of such material be marked in
+reasonable ways as different from the original version; or
+
+d) Limiting the use for publicity purposes of names of licensors or
+authors of the material; or
+
+e) Declining to grant rights under trademark law for use of some
+trade names, trademarks, or service marks; or
+
+f) Requiring indemnification of licensors and authors of that
+material by anyone who conveys the material (or modified versions of
+it) with contractual assumptions of liability to the recipient, for
+any liability that these contractual assumptions directly impose on
+those licensors and authors.
+
+All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+8. Termination.
+
+You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+9. Acceptance Not Required for Having Copies.
+
+You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+10. Automatic Licensing of Downstream Recipients.
+
+Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+11. Patents.
+
+A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+12. No Surrender of Others' Freedom.
+
+If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+13. Use with the GNU Affero General Public License.
+
+Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+14. Revised Versions of this License.
+
+The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+15. Disclaimer of Warranty.
+
+THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+16. Limitation of Liability.
+
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+17. Interpretation of Sections 15 and 16.
+
+If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+END OF TERMS AND CONDITIONS
+
+How to Apply These Terms to Your New Programs
+
+If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+<one line to give the program's name and a brief idea of what it does.>
+Copyright (C) <year>  <name of author>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+<program>  Copyright (C) <year>  <name of author>
+This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+This is free software, and you are welcome to redistribute it
+under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
+
+pyyaml-include
+--------------
+GNU General Public License v3.0
+
+https://pypi.org/project/pyyaml-include
+
+An extending constructor of PyYAML: include other YAML files into current YAML document.
+
+GNU GENERAL PUBLIC LICENSE
+Version 3, 29 June 2007
+
+Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+
+     Preamble
+
+The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+The precise terms and conditions for copying, distribution and
+modification follow.
+
+TERMS AND CONDITIONS
+
+0. Definitions.
+
+"This License" refers to version 3 of the GNU General Public License.
+
+"Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+"The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+1. Source Code.
+
+The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+The Corresponding Source for a work in source code form is that
+same work.
+
+2. Basic Permissions.
+
+All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+4. Conveying Verbatim Copies.
+
+You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+5. Conveying Modified Source Versions.
+
+You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+a) The work must carry prominent notices stating that you modified
+it, and giving a relevant date.
+
+b) The work must carry prominent notices stating that it is
+released under this License and any conditions added under section
+7.  This requirement modifies the requirement in section 4 to
+"keep intact all notices".
+
+c) You must license the entire work, as a whole, under this
+License to anyone who comes into possession of a copy.  This
+License will therefore apply, along with any applicable section 7
+additional terms, to the whole of the work, and all its parts,
+regardless of how they are packaged.  This License gives no
+permission to license the work in any other way, but it does not
+invalidate such permission if you have separately received it.
+
+d) If the work has interactive user interfaces, each must display
+Appropriate Legal Notices; however, if the Program has interactive
+interfaces that do not display Appropriate Legal Notices, your
+work need not make them do so.
+
+A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+6. Conveying Non-Source Forms.
+
+You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+a) Convey the object code in, or embodied in, a physical product
+(including a physical distribution medium), accompanied by the
+Corresponding Source fixed on a durable physical medium
+customarily used for software interchange.
+
+b) Convey the object code in, or embodied in, a physical product
+(including a physical distribution medium), accompanied by a
+written offer, valid for at least three years and valid for as
+long as you offer spare parts or customer support for that product
+model, to give anyone who possesses the object code either (1) a
+copy of the Corresponding Source for all the software in the
+product that is covered by this License, on a durable physical
+medium customarily used for software interchange, for a price no
+more than your reasonable cost of physically performing this
+conveying of source, or (2) access to copy the
+Corresponding Source from a network server at no charge.
+
+c) Convey individual copies of the object code with a copy of the
+written offer to provide the Corresponding Source.  This
+alternative is allowed only occasionally and noncommercially, and
+only if you received the object code with such an offer, in accord
+with subsection 6b.
+
+d) Convey the object code by offering access from a designated
+place (gratis or for a charge), and offer equivalent access to the
+Corresponding Source in the same way through the same place at no
+further charge.  You need not require recipients to copy the
+Corresponding Source along with the object code.  If the place to
+copy the object code is a network server, the Corresponding Source
+may be on a different server (operated by you or a third party)
+that supports equivalent copying facilities, provided you maintain
+clear directions next to the object code saying where to find the
+Corresponding Source.  Regardless of what server hosts the
+Corresponding Source, you remain obligated to ensure that it is
+available for as long as needed to satisfy these requirements.
+
+e) Convey the object code using peer-to-peer transmission, provided
+you inform other peers where the object code and Corresponding
+Source of the work are being offered to the general public at no
+charge under subsection 6d.
+
+A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+"Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+7. Additional Terms.
+
+"Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+a) Disclaiming warranty or limiting liability differently from the
+terms of sections 15 and 16 of this License; or
+
+b) Requiring preservation of specified reasonable legal notices or
+author attributions in that material or in the Appropriate Legal
+Notices displayed by works containing it; or
+
+c) Prohibiting misrepresentation of the origin of that material, or
+requiring that modified versions of such material be marked in
+reasonable ways as different from the original version; or
+
+d) Limiting the use for publicity purposes of names of licensors or
+authors of the material; or
+
+e) Declining to grant rights under trademark law for use of some
+trade names, trademarks, or service marks; or
+
+f) Requiring indemnification of licensors and authors of that
+material by anyone who conveys the material (or modified versions of
+it) with contractual assumptions of liability to the recipient, for
+any liability that these contractual assumptions directly impose on
+those licensors and authors.
+
+All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+8. Termination.
+
+You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+9. Acceptance Not Required for Having Copies.
+
+You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+10. Automatic Licensing of Downstream Recipients.
+
+Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+11. Patents.
+
+A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+12. No Surrender of Others' Freedom.
+
+If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+13. Use with the GNU Affero General Public License.
+
+Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+14. Revised Versions of this License.
+
+The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+15. Disclaimer of Warranty.
+
+THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+16. Limitation of Liability.
+
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+17. Interpretation of Sections 15 and 16.
+
+If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+END OF TERMS AND CONDITIONS
+
+How to Apply These Terms to Your New Programs
+
+If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+<one line to give the program's name and a brief idea of what it does.>
+Copyright (C) <year>  <name of author>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+<program>  Copyright (C) <year>  <name of author>
+This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+This is free software, and you are welcome to redistribute it
+under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
+
+yaml-cpp
+--------
+
+Copyright (c) 2008-2015 Jesse Beder.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+pytorch_scatter
+---------------
+Copyright (c) 2020 Matthias Fey <matthias.fey@tu-dortmund.de>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+onnxruntime
+-----------
+MIT License
+
+Copyright (c) Microsoft Corporation
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+Deformable-DETR
+---------------
+Copyright (c) 2020 SenseTime. All Rights Reserved.
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2020 SenseTime
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+DETR
+
+Copyright 2020 - present, Facebook, Inc
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+PyTorch
+-------
+From PyTorch:
+
+Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
+Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
+Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
+Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
+Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
+Copyright (c) 2011-2013 NYU                      (Clement Farabet)
+Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
+Copyright (c) 2006      Idiap Research Institute (Samy Bengio)
+Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
+
+From Caffe2:
+
+Copyright (c) 2016-present, Facebook Inc. All rights reserved.
+
+All contributions by Facebook:
+Copyright (c) 2016 Facebook Inc.
+
+All contributions by Google:
+Copyright (c) 2015 Google Inc.
+All rights reserved.
+
+All contributions by Yangqing Jia:
+Copyright (c) 2015 Yangqing Jia
+All rights reserved.
+
+All contributions by Kakao Brain:
+Copyright 2019-2020 Kakao Brain
+
+All contributions by Cruise LLC:
+Copyright (c) 2022 Cruise LLC.
+All rights reserved.
+
+All contributions by Tri Dao:
+Copyright (c) 2024 Tri Dao.
+All rights reserved.
+
+All contributions by Arm:
+Copyright (c) 2021, 2023-2024 Arm Limited and/or its affiliates
+
+All contributions from Caffe:
+Copyright(c) 2013, 2014, 2015, the respective contributors
+All rights reserved.
+
+All other contributions:
+Copyright(c) 2015, 2016 the respective contributors
+All rights reserved.
+
+Caffe2 uses a copyright model similar to Caffe: each contributor holds
+copyright over their contributions to Caffe2. The project versioning records
+all such contribution and copyright details. If a contributor wants to further
+mark their specific copyright on a particular contribution, they should
+indicate their copyright solely in the commit message of the change when it is
+committed.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
+   and IDIAP Research Institute nor the names of its contributors may be
+   used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+mmcv
+----
+Copyright (c) OpenMMLab. All rights reserved
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2018-2020 Open-MMLab. All rights reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+GPUSorting
+----------
+MIT license
+
+Copyright (c) 2024 Thomas Smith
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+json-schema-validator
+---------------------
+Modern C++ JSON schema validator is licensed under the MIT License
+<http://opensource.org/licenses/MIT>:
+
+Copyright (c) 2016 Patrick Boettcher
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+
+nlohmann/json
+-------------
+MIT License
+
+Copyright (c) 2013-2025 Niels Lohmann
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/doc/Readme.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/doc/Readme.txt
new file mode 100644
index 0000000000000000000000000000000000000000..576d196a29e4b17cbae8fb78583d63e785c9f733
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/doc/Readme.txt
@@ -0,0 +1,31 @@
+=== NVIDIA TensorRT ===
+
+NVIDIAÂ® TensorRTâ„¢ is a C++ library that facilitates high-performance inference
+on NVIDIA GPUs. TensorRT takes a trained network, which consists of a network
+definition and a set of trained parameters, and produces a highly optimized
+runtime engine that performs inference for that network. TensorRT provides APIs
+using C++ and Python that help to express deep learning models using the Network
+Definition API or load a pre-defined model using the parsers that allow TensorRT
+to optimize and run them on an NVIDIA GPU. TensorRT applies graph optimizations,
+layer fusion, among other optimizations, while also finding the fastest
+implementation of that model leveraging a diverse collection of highly optimized
+kernels. TensorRT also supplies a runtime that you can use to execute this
+network on NVIDIAâ€™s GPUs.
+
+For more information about TensorRT, visit https://developer.nvidia.com/tensorrt.
+
+In previous TensorRT releases, PDF documentation was included inside the TensorRT
+package. The PDF documentation has been removed from the package in favor of
+online documentation, which is updated regularly. Online documentation can be
+found at https://docs.nvidia.com/deeplearning/tensorrt/latest/index.html.
+
+For details on TensorRT's license agreement, visit https://docs.nvidia.com/deeplearning/tensorrt/latest/reference/sla.html.
+
+=== References ===
+
+Quick Start Guide: https://docs.nvidia.com/deeplearning/tensorrt/latest/getting-started/quick-start-guide.html
+Release Notes: https://docs.nvidia.com/deeplearning/tensorrt/latest/getting-started/release-notes.html
+Support Matrix: https://docs.nvidia.com/deeplearning/tensorrt/latest/getting-started/support-matrix.html
+Installation Guide: https://docs.nvidia.com/deeplearning/tensorrt/latest/installing-tensorrt/overview.html
+C++ API: https://docs.nvidia.com/deeplearning/tensorrt/latest/inference-library/c-api-docs.html
+Python API: https://docs.nvidia.com/deeplearning/tensorrt/latest/inference-library/python-api-docs.html
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInfer.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInfer.h
new file mode 100644
index 0000000000000000000000000000000000000000..6a4250cee33172ffe56288c67266b2e05e2057c6
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInfer.h
@@ -0,0 +1,10875 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NV_INFER_H
+#define NV_INFER_H
+
+#include "NvInferLegacyDims.h"
+#include "NvInferRuntime.h"
+
+//!
+//! \mainpage
+//!
+//! This is the API documentation for the NVIDIA TensorRT library. It provides information on individual
+//! functions, classes and methods. Use the index on the left to navigate the documentation.
+//!
+//! Please see the accompanying user guide and samples for higher-level information and general advice on
+//! using TensorRT.
+//!
+//! TensorRT Versioning follows Semantic Versioning Guidelines specified here: https://semver.org/
+//!
+
+//!
+//! \file NvInfer.h
+//!
+//! This is the top-level API file for TensorRT.
+//!
+
+//!
+//! \namespace nvinfer1
+//!
+//! \brief The TensorRT API version 1 namespace.
+//!
+namespace nvinfer1
+{
+
+//!
+//! \enum LayerType
+//!
+//! \brief The type values of layer classes.
+//!
+//! \see ILayer::getType()
+//!
+enum class LayerType : int32_t
+{
+    kCONVOLUTION = 0,         //!< Convolution layer.
+    kCAST = 1,                //!< Cast layer
+    kACTIVATION = 2,          //!< Activation layer.
+    kPOOLING = 3,             //!< Pooling layer.
+    kLRN = 4,                 //!< LRN layer.
+    kSCALE = 5,               //!< Scale layer.
+    kSOFTMAX = 6,             //!< SoftMax layer.
+    kDECONVOLUTION = 7,       //!< Deconvolution layer.
+    kCONCATENATION = 8,       //!< Concatenation layer.
+    kELEMENTWISE = 9,         //!< Elementwise layer.
+    kPLUGIN = 10,             //!< Plugin layer.
+    kUNARY = 11,              //!< UnaryOp operation Layer.
+    kPADDING = 12,            //!< Padding layer.
+    kSHUFFLE = 13,            //!< Shuffle layer.
+    kREDUCE = 14,             //!< Reduce layer.
+    kTOPK = 15,               //!< TopK layer.
+    kGATHER = 16,             //!< Gather layer.
+    kMATRIX_MULTIPLY = 17,    //!< Matrix multiply layer.
+    kRAGGED_SOFTMAX = 18,     //!< Ragged softmax layer.
+    kCONSTANT = 19,           //!< Constant layer.
+    kIDENTITY = 20,           //!< Identity layer.
+    kPLUGIN_V2 = 21,          //!< PluginV2 layer.
+    kSLICE = 22,              //!< Slice layer.
+    kSHAPE = 23,              //!< Shape layer.
+    kPARAMETRIC_RELU = 24,    //!< Parametric ReLU layer.
+    kRESIZE = 25,             //!< Resize Layer.
+    kTRIP_LIMIT = 26,         //!< Loop Trip limit layer
+    kRECURRENCE = 27,         //!< Loop Recurrence layer
+    kITERATOR = 28,           //!< Loop Iterator layer
+    kLOOP_OUTPUT = 29,        //!< Loop output layer
+    kSELECT = 30,             //!< Select layer.
+    kFILL = 31,               //!< Fill layer
+    kQUANTIZE = 32,           //!< Quantize layer
+    kDEQUANTIZE = 33,         //!< Dequantize layer
+    kCONDITION = 34,          //!< Condition layer
+    kCONDITIONAL_INPUT = 35,  //!< Conditional Input layer
+    kCONDITIONAL_OUTPUT = 36, //!< Conditional Output layer
+    kSCATTER = 37,            //!< Scatter layer
+    kEINSUM = 38,             //!< Einsum layer
+    kASSERTION = 39,          //!< Assertion layer
+    kONE_HOT = 40,            //!< OneHot layer
+    kNON_ZERO = 41,           //!< NonZero layer
+    kGRID_SAMPLE = 42,        //!< Grid sample layer
+    kNMS = 43,                //!< NMS layer
+    kREVERSE_SEQUENCE = 44,   //!< Reverse sequence layer
+    kNORMALIZATION = 45,      //!< Normalization layer
+    kPLUGIN_V3 = 46,          //!< PluginV3 layer.
+    kSQUEEZE = 47,            //!< Squeeze Layer.
+    kUNSQUEEZE = 48,          //!< Unsqueeze Layer.
+    kCUMULATIVE = 49,         //!< Cumulative layer.
+    kDYNAMIC_QUANTIZE = 50,    //!< Dynamic Quantize layer.
+};
+
+//!
+//! Maximum number of elements in LayerType enum.
+//!
+//! \see LayerType
+//!
+template <>
+constexpr inline int32_t EnumMax<LayerType>() noexcept
+{
+    return 51;
+}
+
+//!
+//! \brief It is capable of representing one or more TensorFormat by binary OR
+//! operations, e.g., 1U << TensorFormat::kCHW4 | 1U << TensorFormat::kCHW32.
+//!
+//! \see ITensor::getAllowedFormats(), ITensor::setAllowedFormats(),
+//!
+using TensorFormats = uint32_t;
+
+//!
+//! \enum ActivationType
+//!
+//! \brief Enumerates the types of activation to perform in an activation layer.
+//!
+enum class ActivationType : int32_t
+{
+    kRELU = 0,              //!< Rectified linear activation.
+    kSIGMOID = 1,           //!< Sigmoid activation.
+    kTANH = 2,              //!< TanH activation.
+    kLEAKY_RELU = 3,        //!< LeakyRelu activation: x>=0 ? x : alpha * x.
+    kELU = 4,               //!< Elu activation: x>=0 ? x : alpha * (exp(x) - 1).
+    kSELU = 5,              //!< Selu activation: x>0 ? beta * x : beta * (alpha*exp(x) - alpha)
+    kSOFTSIGN = 6,          //!< Softsign activation: x / (1+|x|)
+    kSOFTPLUS = 7,          //!< Parametric softplus activation: alpha*log(exp(beta*x)+1)
+    kCLIP = 8,              //!< Clip activation: max(alpha, min(beta, x))
+    kHARD_SIGMOID = 9,      //!< Hard sigmoid activation: max(0, min(1, alpha*x+beta))
+    kSCALED_TANH = 10,      //!< Scaled tanh activation: alpha*tanh(beta*x)
+    kTHRESHOLDED_RELU = 11, //!< Thresholded ReLU activation: x>alpha ? x : 0
+    kGELU_ERF = 12,         //!< GELU erf activation: 0.5 * x * (1 + erf(sqrt(0.5) * x))
+    kGELU_TANH = 13         //!< GELU tanh activation: 0.5 * x * (1 + tanh(sqrt(2/pi) * (0.044715F * pow(x, 3) + x)))
+};
+
+namespace impl
+{
+//!
+//! Maximum number of elements in ActivationType enum.
+//!
+//! \see ActivationType
+//!
+template <>
+struct EnumMaxImpl<ActivationType>
+{
+    static constexpr int32_t kVALUE = 14;
+};
+} // namespace impl
+
+//!
+//! \class ITensor
+//!
+//! \brief A tensor in a network definition.
+//!
+//! To remove a tensor from a network definition, use INetworkDefinition::removeTensor().
+//!
+//! When using the DLA, the cumulative size of all Tensors that are not marked as Network Input or Output tensors,
+//! must be less than 1GB in size to fit into a single subgraph. If the build option kGPU_FALLBACK is specified, then
+//! multiple subgraphs can be created, with each subgraph limited to less than 1GB of internal tensors data.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and
+//! ABI.
+//!
+class ITensor : public INoCopy
+{
+public:
+    //!
+    //! \brief Set the tensor name.
+    //!
+    //! For a network input, the name is assigned by the application. For tensors which are layer outputs,
+    //! a default name is assigned consisting of the layer name followed by the index of the output in brackets.
+    //! Each input and output tensor must have a unique name.
+    //!
+    //! This method copies the name string.
+    //!
+    //! \param name The name.
+    //!
+    //! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \see getName()
+    //!
+    void setName(char const* name) noexcept
+    {
+        mImpl->setName(name);
+    }
+
+    //!
+    //! \brief Get the tensor name.
+    //!
+    //! \return The name as a null-terminated C-style string.
+    //!
+    //! \see setName()
+    //!
+    char const* getName() const noexcept
+    {
+        return mImpl->getName();
+    }
+
+    //!
+    //! \brief Set the dimensions of a tensor.
+    //!
+    //! For a network input, the dimensions are assigned by the application. For a network output, the dimensions are
+    //! computed based on the layer parameters and the inputs to the layer. If a tensor size or a parameter is modified
+    //! in the network, the dimensions of all dependent tensors will be recomputed.
+    //!
+    //! This call is only legal for network input tensors, since the dimensions of layer output tensors are inferred
+    //! based on layer inputs and parameters.
+    //!
+    //! \param dimensions The dimensions of the tensor.
+    //!
+    //! \see getDimensions()
+    //!
+    void setDimensions(Dims const& dimensions) noexcept
+    {
+        mImpl->setDimensions(dimensions);
+    }
+
+    //!
+    //! \brief Get the dimensions of a tensor.
+    //!
+    //! \return The dimensions of the tensor.
+    //!
+    //! \warning getDimensions() returns a -1 for dimensions that are derived from a wildcard dimension.
+    //!
+    //! \see setDimensions()
+    //!
+    Dims getDimensions() const noexcept
+    {
+        return mImpl->getDimensions();
+    }
+
+    //!
+    //! \brief Set the data type of a tensor.
+    //!
+    //! \param type The data type of the tensor when the type is not inferred.
+    //!
+    //! For strongly typed networks, this method should be used only for network inputs,
+    //! since the types of all other tensors are inferred. Setting the type of a network
+    //! output is tolerated if the type equals the inferred type, otherwise an error occurs
+    //! and the type is not updated.
+    //!
+    //! For weakly typed networks, this method can be used for network outputs too, but
+    //! the type merely has to be implicitly convertible from the inferred type to the
+    //! specified type. In this case it does not matter whether the type is set first
+    //! or the tensor is marked as an output first (via `INetworkDefinition::markOutput`
+    //! or `INetworkDefinition::markOutputForShapes`).
+    //!
+    //! However, marking it first has two advantages:
+    //!
+    //!     * It avoids warnings that the tensor is not yet a network I/O tensor.
+    //!     * It causes method `getType()` to return the type that was set instead of the inferred type.
+    //!
+    //! \see getType()
+    //!
+    //! \note This function does more than just set the type, so `t.setType(t.getType())` is not necessarily a no-op,
+    //! particularly for input and output tensors!
+    //!
+    //! \note Repeated consecutive applications of `t.setType(t.getType())`
+    //! would be idempotent, provided the state of the `ITensor` isn't changed between calls.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
+    //!
+    TRT_DEPRECATED void setType(DataType type) noexcept
+    {
+        mImpl->setType(type);
+    }
+
+    //!
+    //! \brief Get the data type of a tensor.
+    //!
+    //! \return The data type of the tensor.
+    //!
+    //! The type is the type set by `setType` if the tensor is a network input or output.
+    //! Otherwise the type is the inferred type.
+    //!
+    //! \see setType()
+    //!
+    DataType getType() const noexcept
+    {
+        return mImpl->getType();
+    }
+
+    //!
+    //! \brief Set dynamic range for the tensor
+    //!
+    //! Currently, only symmetric ranges are supported.
+    //! Therefore, the larger of the absolute values of the provided bounds is used.
+    //!
+    //! \return Whether the dynamic range was set successfully.
+    //!
+    //! Requires that min and max be finite, and min <= max.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
+    //!
+    TRT_DEPRECATED bool setDynamicRange(float min, float max) noexcept
+    {
+        return mImpl->setDynamicRange(min, max);
+    }
+
+    //!
+    //! \brief Whether the tensor is a network input.
+    //!
+    bool isNetworkInput() const noexcept
+    {
+        return mImpl->isNetworkInput();
+    }
+
+    //!
+    //! \brief Whether the tensor is a network output.
+    //!
+    bool isNetworkOutput() const noexcept
+    {
+        return mImpl->isNetworkOutput();
+    }
+
+    //!
+    //! \brief Set whether to enable broadcast of tensor across the implicit batch dimension.
+    //!
+    //! \warning This method has no effect other than issuing a warning.
+    //!
+    //! \param broadcastAcrossBatch Whether to broadcast the tensor across the implicit
+    //!         batch dimension that was a feature of TensorRT 9.x and prior.
+    //!
+    //! \see getBroadcastAcrossBatch()
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. Implicit batch is not supported since TensorRT 10.0.
+    //!
+    TRT_DEPRECATED void setBroadcastAcrossBatch(bool broadcastAcrossBatch) noexcept
+    {
+        mImpl->setBroadcastAcrossBatch(broadcastAcrossBatch);
+    }
+
+    //!
+    //! \brief Check if tensor is broadcast across the implicit batch dimension.
+    //!
+    //! \return Always false since TensorRT 10.0 does not support an implicit batch dimension.
+    //!
+    //! \see setBroadcastAcrossBatch()
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. Implicit batch is not supported since TensorRT 10.0.
+    //!
+    TRT_DEPRECATED bool getBroadcastAcrossBatch() const noexcept
+    {
+        return mImpl->getBroadcastAcrossBatch();
+    }
+
+    //!
+    //! \brief Get the storage location of a tensor.
+    //!
+    //! \return The location of tensor data.
+    //!
+    //! \see setLocation()
+    //!
+    TensorLocation getLocation() const noexcept
+    {
+        return mImpl->getLocation();
+    }
+
+    //!
+    //! \brief Set the storage location of a tensor
+    //!
+    //! \param location the location of tensor data
+    //!
+    //! Only network input tensors for storing sequence lengths for RNNv2 are supported.
+    //! Using host storage for layers that do not support it will generate
+    //! errors at build time.
+    //!
+    //! \see getLocation()
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. RNNv2 is not supported and the location must
+    //! always be TensorLocation::kDEVICE since TensorRT 10.0.
+    //!
+    TRT_DEPRECATED void setLocation(TensorLocation location) noexcept
+    {
+        mImpl->setLocation(location);
+    }
+
+    //!
+    //! \brief Query whether dynamic range is set.
+    //!
+    //! \return True if dynamic range is set, false otherwise.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
+    //!
+    TRT_DEPRECATED bool dynamicRangeIsSet() const noexcept
+    {
+        return mImpl->dynamicRangeIsSet();
+    }
+
+    //!
+    //! \brief Undo effect of setDynamicRange.
+    //!
+    void resetDynamicRange() noexcept
+    {
+        mImpl->resetDynamicRange();
+    }
+
+    //!
+    //! \brief Get minimum of dynamic range.
+    //!
+    //! \return Minimum of dynamic range, or quiet NaN if range was not set.
+    //!
+    float getDynamicRangeMin() const noexcept
+    {
+        return mImpl->getDynamicRangeMin();
+    }
+
+    //!
+    //! \brief Get maximum of dynamic range.
+    //!
+    //! \return Maximum of dynamic range, or quiet NaN if range was not set.
+    //!
+    float getDynamicRangeMax() const noexcept
+    {
+        return mImpl->getDynamicRangeMax();
+    }
+
+    //!
+    //! \brief Set allowed formats for an input or output tensor. By default all formats are allowed.
+    //!        Shape tensors (for which isShapeTensor() returns true) may only have row-major linear format.
+    //!
+    //! When running network on DLA and the build option kGPU_FALLBACK is not specified, if DLA format(kCHW4 with Int8,
+    //! kCHW4 with FP16, kCHW16 with FP16, kCHW32 with Int8) is set, the input format is treated as native DLA format
+    //! with line stride requirement. Input/output binding with these format should have correct layout during
+    //! inference.
+    //!
+    //! Tensor formats are determined at build time by TensorRT for tensors not marked as input or output.
+    //!
+    //! \param formats A bitmask of TensorFormat values that are supported for this tensor.
+    //!
+    //! \see ITensor::getAllowedFormats()
+    //!
+    //! \see TensorFormats
+    //!
+    void setAllowedFormats(TensorFormats formats) noexcept
+    {
+        mImpl->setAllowedFormats(formats);
+    }
+
+    //!
+    //! \brief Get a bitmask of TensorFormat values that the tensor supports.
+    //!        For a shape tensor, only row-major linear format is allowed.
+    //!
+    //! \return The value specified by setAllowedFormats or all possible formats.
+    //!
+    //! \see ITensor::setAllowedFormats()
+    //!
+    TensorFormats getAllowedFormats() const noexcept
+    {
+        return mImpl->getAllowedFormats();
+    }
+
+    //!
+    //! \brief Whether the tensor is a shape tensor.
+    //!
+    //! A shape tensor is a tensor that is related to shape calculations.
+    //! It must have type Int32, Int64, Bool, or Float, and its shape must be determinable at build time.
+    //! Furthermore, it must be needed as a shape tensor, either marked as a network shape
+    //! output via markOutputForShapes(), or as a layer input that is required to be a shape
+    //! tensor, such as the second input to IShuffleLayer. Some layers are "polymorphic" in
+    //! this respect. For example, the inputs to IElementWiseLayer must be shape tensors
+    //! if the output is a shape tensor.
+    //!
+    //! The TensorRT Developer Guide gives the formal rules for what tensors are shape tensors.
+    //!
+    //! The result of isShapeTensor() is reliable only when network construction is complete.
+    //! For example, if a partially built network sums two tensors T1 and T2 to create
+    //! tensor T3, and none are yet needed as shape tensors, isShapeTensor() returns false
+    //! for all three tensors.  Setting the second input of IShuffleLayer to be T3 would
+    //! cause all three tensors to be shape tensors, because IShuffleLayer requires that its
+    //! second optional input be a shape tensor, and IElementWiseLayer is "polymorphic".
+    //!
+    //! It is possible for a tensor to be both a shape tensor and an execution tensor.
+    //!
+    //! \return True if tensor is a shape tensor, false otherwise.
+    //!
+    //! \see INetworkDefinition::markOutputForShapes()
+    //!
+    bool isShapeTensor() const noexcept
+    {
+        return mImpl->isShapeTensor();
+    }
+
+    //!
+    //! \brief Whether the tensor is an execution tensor.
+    //!
+    //! Tensors are usually execution tensors.  The exceptions are tensors used
+    //! solely for shape calculations or whose contents are not needed to compute the outputs.
+    //!
+    //! The result of isExecutionTensor() is reliable only when network construction is complete.
+    //! For example, if a partially built network has no path from a tensor to a network output,
+    //! isExecutionTensor() returns false. Completing the path would cause it to become true.
+    //!
+    //!
+    //! A tensor with isShapeTensor() == false and isExecutionTensor() == false
+    //! can still show up as an input to the engine if its dimensions are required.
+    //! In that case, only its dimensions need to be set at runtime and a nullptr
+    //! can be passed instead of a pointer to its contents.
+    //!
+    bool isExecutionTensor() const noexcept
+    {
+        return mImpl->isExecutionTensor();
+    }
+
+    //!
+    //! \brief Name a dimension of an input tensor.
+    //!
+    //! Associate a runtime dimension of an input tensor with a symbolic name.
+    //! Dimensions with the same non-empty name must be equal at runtime.
+    //! Knowing this equality for runtime dimensions may help the TensorRT optimizer.
+    //! Both runtime and build-time dimensions can be named.
+    //!
+    //! For example, setDimensionName(0, "n") associates the symbolic name "n" with the leading dimension.
+    //!
+    //! This method copies the name string.
+    //! If the function is called again, with the same index, it will overwrite the previous name.
+    //! If nullptr is passed as name, it will clear the name of the dimension.
+    //!
+    //! \param index index of the dimension
+    //! \param name of the dimension, as a pointer to a null-terminated character sequence.
+    //!
+    //! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \see getDimensionName()
+    //!
+    void setDimensionName(int32_t index, char const* name) noexcept
+    {
+        mImpl->setDimensionName(index, name);
+    }
+
+    //!
+    //! \brief Get the name of an input dimension.
+    //!
+    //! \param index index of the dimension
+    //!
+    //! \return The name of the input dimension, or nullptr if the dimension has no name.
+    //!         The name is a pointer to a null-terminated character sequence.
+    //!
+    //! \see setDimensionName()
+    //!
+    char const* getDimensionName(int32_t index) const noexcept
+    {
+        return mImpl->getDimensionName(index);
+    }
+
+protected:
+    apiv::VTensor* mImpl;
+    virtual ~ITensor() noexcept = default;
+};
+
+//!
+//! \class ILayer
+//!
+//! \brief Base class for all layer classes in a network definition.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class ILayer : public INoCopy
+{
+public:
+    //!
+    //! \brief Return the type of a layer.
+    //!
+    //! \see LayerType
+    //!
+    LayerType getType() const noexcept
+    {
+        return mLayer->getType();
+    }
+
+    //!
+    //! \brief Set the name of a layer.
+    //!
+    //! This method copies the name string.
+    //!
+    //! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \see getName()
+    //!
+    void setName(char const* name) noexcept
+    {
+        mLayer->setName(name);
+    }
+
+    //!
+    //! \brief Return the name of a layer.
+    //!
+    //! \see setName()
+    //!
+    char const* getName() const noexcept
+    {
+        return mLayer->getName();
+    }
+
+    //!
+    //! \brief Get the number of inputs of a layer.
+    //!
+    int32_t getNbInputs() const noexcept
+    {
+        return mLayer->getNbInputs();
+    }
+
+    //!
+    //! \brief Get the layer input corresponding to the given index.
+    //!
+    //! \param index The index of the input tensor.
+    //!
+    //! \return The input tensor, or nullptr if the index is out of range or the tensor is optional
+    //! (\ref ISliceLayer).
+    //!
+    ITensor* getInput(int32_t index) const noexcept
+    {
+        return mLayer->getInput(index);
+    }
+
+    //!
+    //! \brief Get the number of outputs of a layer.
+    //!
+    int32_t getNbOutputs() const noexcept
+    {
+        return mLayer->getNbOutputs();
+    }
+
+    //!
+    //! \brief Get the layer output corresponding to the given index.
+    //!
+    //! \return The indexed output tensor, or nullptr if the index is out of range or the tensor is optional.
+    //!
+    ITensor* getOutput(int32_t index) const noexcept
+    {
+        return mLayer->getOutput(index);
+    }
+
+    //!
+    //! \brief Replace an input of this layer with a specific tensor.
+    //!
+    //! \param index the index of the input to modify.
+    //! \param tensor the new input tensor
+    //!
+    //! Except for IFillLayer, ILoopOutputLayer, INMSLayer, IResizeLayer, IShuffleLayer, and ISliceLayer,
+    //! this method cannot change the number of inputs to a layer. The index argument must be
+    //! less than the value of getNbInputs().
+    //!
+    //! See comments for overloads of setInput() for layers with special behavior.
+    //!
+    void setInput(int32_t index, ITensor& tensor) noexcept
+    {
+        return mLayer->setInput(index, tensor);
+    }
+
+    //!
+    //! \brief Set the preferred or required computational precision of this layer in a weakly-typed network.
+    //!
+    //! Setting the precision directs TensorRT to choose an implementation that runs at this computational precision.
+    //! TensorRT could still choose a non-conforming fastest implementation that ignores the requested precision.
+    //! To force choosing an implementation with the requested precision, set exactly one of the following flags,
+    //! which differ in what happens if no such implementation exists:
+    //!
+    //! * BuilderFlag::kOBEY_PRECISION_CONSTRAINTS - build fails with an error message.
+    //!
+    //! * BuilderFlag::kPREFER_PRECISION_CONSTRAINTS - TensorRT falls back to an
+    //!   implementation without the requested precision.
+    //!
+    //! If precision is not set, or falling back, TensorRT will select the layer computational precision
+    //! and layer input type based on global performance considerations and the flags specified to the builder.
+    //!
+    //! For a IIdentityLayer: If it casts to/from float/half/int8/uint8, the precision must be one of those types,
+    //! otherwise it must be either the input or output type.
+    //!
+    //! Strongly-typed networks reject calls to method setPrecision. In strongly-typed networks, the computation
+    //! precision is typically controlled by casting the input tensors to the desired type.
+    //!
+    //! \param dataType the computational precision.
+    //!
+    //! \see getPrecision() precisionIsSet() resetPrecision()
+    //!
+    //! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
+    //!
+    TRT_DEPRECATED void setPrecision(DataType dataType) noexcept
+    {
+        mLayer->setPrecision(dataType);
+    }
+
+    //!
+    //! \brief get the computational precision of this layer
+    //!
+    //! \return the computational precision
+    //!
+    //! \see setPrecision() precisionIsSet() resetPrecision()
+    //!
+    DataType getPrecision() const noexcept
+    {
+        return mLayer->getPrecision();
+    }
+
+    //!
+    //! \brief whether the computational precision has been set for this layer
+    //!
+    //! \return whether the computational precision has been explicitly set
+    //!
+    //! \see setPrecision() getPrecision() resetPrecision()
+    //!
+    //! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
+    //!
+    TRT_DEPRECATED bool precisionIsSet() const noexcept
+    {
+        return mLayer->precisionIsSet();
+    }
+
+    //!
+    //! \brief reset the computational precision for this layer
+    //!
+    //! \see setPrecision() getPrecision() precisionIsSet()
+    //!
+    //! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
+    //!
+    TRT_DEPRECATED void resetPrecision() noexcept
+    {
+        mLayer->resetPrecision();
+    }
+
+    //!
+    //! \brief Set the output type of this layer in a weakly-typed network.
+    //!
+    //! Setting the output type constrains TensorRT to choose implementations which generate output data with the
+    //! given type. If it is not set, TensorRT will select output type based on layer computational precision. TensorRT
+    //! could still choose non-conforming output type based on fastest implementation. To force choosing the requested
+    //! output type, set exactly one of the following flags, which differ in what happens if no such implementation
+    //! exists:
+    //!
+    //! * BuilderFlag::kOBEY_PRECISION_CONSTRAINTS - build fails with an error message.
+    //!
+    //! * BuilderFlag::kPREFER_PRECISION_CONSTRAINTS - TensorRT falls back to an
+    //!   implementation with a non-conforming output type.
+    //!
+    //! In case layer precision is not specified, or falling back, the output type depends on the
+    //! chosen implementation, based on performance considerations and the flags specified to the builder.
+    //!
+    //! This method cannot be used to set the data type of the second output tensor of the TopK layer. The data type of
+    //! the second output tensor of the topK layer is always Int32. Also the output type of all layers that are shape
+    //! operations must be DataType::kINT32, and all attempts to set the output type to some other data type will be
+    //! ignored except for issuing an error message.
+    //!
+    //! Note that the layer output type is generally not identical to the data type of the output tensor, as TensorRT
+    //! may insert implicit reformatting operations to convert the former to the latter. Calling layer->setOutputType(i,
+    //! type) has no effect on the data type of the i-th output tensor of layer, and users need to call
+    //! layer->getOutput(i)->setType(type) to change the tensor data type. This is particularly relevant if the tensor
+    //! is marked as a network output, since only setType() [but not setOutputType()] will affect the data
+    //! representation in the corresponding output binding.
+    //!
+    //! Strongly-typed networks reject calls to method setOutputType. Instead, the output type can be set
+    //! only for layers that define method setToType(). Those layers are:
+    //!
+    //! * ICastLayer
+    //! * IDequantizeLayer
+    //! * IDynamicQuantizeLayer
+    //! * IFillLayer
+    //! * IQuantizeLayer
+    //!
+    //! \param index the index of the output to set
+    //! \param dataType the type of the output
+    //!
+    //! \see getOutputType() outputTypeIsSet() resetOutputType()
+    //!
+    //! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
+    //!
+    TRT_DEPRECATED void setOutputType(int32_t index, DataType dataType) noexcept
+    {
+        mLayer->setOutputType(index, dataType);
+    }
+
+    //!
+    //! \brief get the output type of this layer
+    //!
+    //! \param index the index of the output
+    //!
+    //! \return the output precision. If no precision has been set, DataType::kFLOAT will be returned,
+    //!         unless the output type is inherently DataType::kINT32.
+    //!
+    //! \see getOutputType() outputTypeIsSet() resetOutputType()
+    //!
+    DataType getOutputType(int32_t index) const noexcept
+    {
+        return mLayer->getOutputType(index);
+    }
+
+    //!
+    //! \brief whether the output type has been set for this layer
+    //!
+    //! \param index the index of the output
+    //!
+    //! \return whether the output type has been explicitly set
+    //!
+    //! \see setOutputType() getOutputType() resetOutputType()
+    //!
+    //! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
+    //!
+    TRT_DEPRECATED bool outputTypeIsSet(int32_t index) const noexcept
+    {
+        return mLayer->outputTypeIsSet(index);
+    }
+
+    //!
+    //! \brief reset the output type for this layer
+    //!
+    //! \param index the index of the output
+    //!
+    //! \see setOutputType() getOutputType() outputTypeIsSet()
+    //!
+    //! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
+    //!
+    TRT_DEPRECATED void resetOutputType(int32_t index) noexcept
+    {
+        return mLayer->resetOutputType(index);
+    }
+
+    //!
+    //! \brief Set the metadata for this layer.
+    //!
+    //! The metadata is emitted in the JSON returned by IEngineInspector with
+    //! ProfilingVerbosity set to kDETAILED.
+    //!
+    //! \param metadata The per-layer metadata.
+    //!
+    //! \warning The string name must be null-terminated and be at most 4096 bytes including the terminator.
+    //!
+    //! \see getMetadata()
+    //! \see getLayerInformation()
+    //!
+    void setMetadata(char const* metadata) noexcept
+    {
+        mLayer->setMetadata(metadata);
+    }
+
+    //!
+    //! \brief Get the metadata of the layer.
+    //!
+    //! \return The metadata as a null-terminated C-style string. If setMetadata() has not been called,
+    //!         an empty string "" will be returned as a default value.
+    //!
+    //! \see setMetadata()
+    //!
+    char const* getMetadata() const noexcept
+    {
+        return mLayer->getMetadata();
+    }
+
+protected:
+    virtual ~ILayer() noexcept = default;
+    apiv::VLayer* mLayer;
+};
+
+//!
+//! \enum PaddingMode
+//!
+//! \brief Enumerates the modes of padding to perform in convolution, deconvolution and pooling layer,
+//! padding mode takes precedence if setPaddingMode() and setPrePadding() are also used.
+//!
+//! There are two padding styles, EXPLICIT and SAME with each style having two variants.
+//! The EXPLICIT style determine if the final sampling location is used or not.
+//! The SAME style determine if the asymmetry in the padding is on the pre or post padding.
+//!
+//! \code
+//! Shorthand:
+//!     I = dimensions of input image.
+//!     B = prePadding, before the image data.
+//!     A = postPadding, after the image data.
+//!     P = delta between input and output
+//!     S = stride
+//!     F = filter
+//!     O = output
+//!     D = dilation
+//!     M = I + B + A ; The image data plus any padding
+//!     DK = 1 + D * (F - 1)
+//! \endcode
+//!
+//! Formulas for Convolution:
+//!     - EXPLICIT_ROUND_DOWN:
+//! \code
+//!         O = floor((M - DK) / S) + 1
+//! \endcode
+//!     - EXPLICIT_ROUND_UP:
+//! \code
+//!         O = ceil((M - DK) / S) + 1
+//! \endcode
+//!     - SAME_UPPER:
+//! \code
+//!         O = ceil(I / S)
+//!         P = floor((I - 1) / S) * S + DK - I;
+//!         B = floor(P / 2)
+//!         A = P - B
+//! \endcode
+//!     - SAME_LOWER:
+//! \code
+//!         O = ceil(I / S)
+//!         P = floor((I - 1) / S) * S + DK - I;
+//!         A = floor(P / 2)
+//!         B = P - A
+//! \endcode
+//!
+//! Formulas for Deconvolution:
+//!     - EXPLICIT_ROUND_DOWN:
+//!     - EXPLICIT_ROUND_UP:
+//! \code
+//!         O = (I - 1) * S + DK - (B + A)
+//! \endcode
+//!     - SAME_UPPER:
+//! \code
+//!         O = min(I * S, (I - 1) * S + DK)
+//!         P = max(DK - S, 0)
+//!         B = floor(P / 2)
+//!         A = P - B
+//! \endcode
+//!     - SAME_LOWER:
+//! \code
+//!         O = min(I * S, (I - 1) * S + DK)
+//!         P = max(DK - S, 0)
+//!         A = floor(P / 2)
+//!         B = P - A
+//! \endcode
+//!
+//! Formulas for Pooling:
+//!     - EXPLICIT_ROUND_DOWN:
+//! \code
+//!         O = floor((M - F) / S) + 1
+//! \endcode
+//!     - EXPLICIT_ROUND_UP:
+//! \code
+//!         O = ceil((M - F) / S) + 1
+//! \endcode
+//!     - SAME_UPPER:
+//! \code
+//!         O = ceil(I / S)
+//!         P = floor((I - 1) / S) * S + F - I;
+//!         B = floor(P / 2)
+//!         A = P - B
+//! \endcode
+//!     - SAME_LOWER:
+//! \code
+//!         O = ceil(I / S)
+//!         P = floor((I - 1) / S) * S + F - I;
+//!         A = floor(P / 2)
+//!         B = P - A
+//! \endcode
+//!
+//! Pooling Example 1:
+//! \code
+//!     Given I = {6, 6}, B = {3, 3}, A = {2, 2}, S = {2, 2}, F = {3, 3}. What is O?
+//!     (B, A can be calculated for SAME_UPPER and SAME_LOWER mode)
+//! \endcode
+//!
+//! - EXPLICIT_ROUND_DOWN:
+//! \code
+//!     Computation:
+//!         M = {6, 6} + {3, 3} + {2, 2} ==> {11, 11}
+//!         O ==> floor((M - F) / S) + 1
+//!           ==> floor(({11, 11} - {3, 3}) / {2, 2}) + {1, 1}
+//!           ==> floor({8, 8} / {2, 2}) + {1, 1}
+//!           ==> {5, 5}
+//! \endcode
+//! - EXPLICIT_ROUND_UP:
+//! \code
+//!     Computation:
+//!         M = {6, 6} + {3, 3} + {2, 2} ==> {11, 11}
+//!         O ==> ceil((M - F) / S) + 1
+//!           ==> ceil(({11, 11} - {3, 3}) / {2, 2}) + {1, 1}
+//!           ==> ceil({8, 8} / {2, 2}) + {1, 1}
+//!           ==> {5, 5}
+//! \endcode
+//!     The sample points are {0, 2, 4, 6, 8} in each dimension.
+//!
+//! - SAME_UPPER:
+//! \code
+//!     Computation:
+//!         I = {6, 6}
+//!         S = {2, 2}
+//!         O = ceil(I / S) = {3, 3}
+//!         P = floor((I - 1) / S) * S + F - I
+//!             ==> floor(({6, 6} - {1, 1}) / {2, 2}) * {2, 2} + {3, 3} - {6, 6}
+//!             ==> {4, 4} + {3, 3} - {6, 6}
+//!             ==> {1, 1}
+//!         B = floor({1, 1} / {2, 2})
+//!             ==> {0, 0}
+//!         A = {1, 1} - {0, 0}
+//!             ==> {1, 1}
+//! \endcode
+//! - SAME_LOWER:
+//! \code
+//!     Computation:
+//!         I = {6, 6}
+//!         S = {2, 2}
+//!         O = ceil(I / S) = {3, 3}
+//!         P = floor((I - 1) / S) * S + F - I
+//!           ==> {1, 1}
+//!         A = floor({1, 1} / {2, 2})
+//!           ==> {0, 0}
+//!         B = {1, 1} - {0, 0}
+//!           ==> {1, 1}
+//! \endcode
+//!     The sample pointers are {0, 2, 4} in each dimension.
+//!     SAMPLE_UPPER has {O0, O1, O2, pad} in output in each dimension.
+//!     SAMPLE_LOWER has {pad, O0, O1, O2} in output in each dimension.
+//!
+//! Pooling Example 2:
+//! \code
+//!     Given I = {6, 6}, B = {3, 3}, A = {3, 3}, S = {2, 2}, F = {3, 3}. What is O?
+//! \endcode
+//!
+enum class PaddingMode : int32_t
+{
+    kEXPLICIT_ROUND_DOWN = 0, //!< Use explicit padding, rounding output size down.
+    kEXPLICIT_ROUND_UP = 1,   //!< Use explicit padding, rounding output size up.
+    kSAME_UPPER = 2,          //!< Use SAME padding, with prePadding <= postPadding.
+    kSAME_LOWER = 3,          //!< Use SAME padding, with prePadding >= postPadding.
+};
+
+namespace impl
+{
+//!
+//! Maximum number of elements in PaddingMode enum.
+//!
+//! \see PaddingMode
+//!
+template <>
+struct EnumMaxImpl<PaddingMode>
+{
+    static constexpr int32_t kVALUE = 4;
+};
+} // namespace impl
+
+//!
+//! \class IConvolutionLayer
+//!
+//! \brief A convolution layer in a network definition.
+//!
+//! This layer performs a correlation operation between 3 or 4 dimensional filter with a 4 or 5 dimensional tensor to
+//! produce another 4 or 5 dimensional tensor.
+//!
+//! An optional bias argument is supported, which adds a per-channel constant to each value in the output.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IConvolutionLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the number of output maps for the convolution.
+    //!
+    //! If executing this layer on DLA, the number of output maps must be in the range [1,8192].
+    //!
+    //! \see getNbOutputMaps()
+    //!
+    void setNbOutputMaps(int64_t nbOutputMaps) noexcept
+    {
+        mImpl->setNbOutputMaps(nbOutputMaps);
+    }
+
+    //!
+    //! \brief Get the number of output maps for the convolution.
+    //!
+    //! \see setNbOutputMaps()
+    //!
+    int64_t getNbOutputMaps() const noexcept
+    {
+        return mImpl->getNbOutputMaps();
+    }
+
+    //!
+    //! \brief Set the number of groups for a convolution.
+    //!
+    //! The input tensor channels are  divided into \p nbGroups groups, and a convolution is executed for each group,
+    //! using a filter per group. The results of the group convolutions are concatenated to form the output.
+    //!
+    //! \note When using groups in int8 mode, the size of the groups (i.e. the channel count divided by the group
+    //! count) must be a multiple of 4 for both input and output.
+    //!
+    //! Default: 1
+    //!
+    //! If executing this layer on DLA, the max number of groups is 8192.
+    //!
+    //! \see getNbGroups()
+    //!
+    void setNbGroups(int64_t nbGroups) noexcept
+    {
+        mImpl->setNbGroups(nbGroups);
+    }
+
+    //!
+    //! \brief Get the number of groups of the convolution.
+    //!
+    //! \see setNbGroups()
+    //!
+    int64_t getNbGroups() const noexcept
+    {
+        return mImpl->getNbGroups();
+    }
+
+    //!
+    //! \brief Set the kernel weights for the convolution.
+    //!
+    //! The weights are specified as a contiguous array in \p GKCRS order, where \p G is the number of groups, \p K
+    //! the number of output feature maps, \p C the number of input channels, and \p R and \p S are the height and
+    //! width of the filter.
+    //!
+    //! \see getKernelWeights()
+    //!
+    void setKernelWeights(Weights weights) noexcept
+    {
+        mImpl->setKernelWeights(weights);
+    }
+
+    //!
+    //! \brief Get the kernel weights of the convolution.
+    //!
+    //! \see setKernelWeights()
+    //!
+    Weights getKernelWeights() const noexcept
+    {
+        return mImpl->getKernelWeights();
+    }
+
+    //!
+    //! \brief Set the bias weights for the convolution.
+    //!
+    //! Bias is optional. To omit bias, set the count value of the weights structure to zero.
+    //!
+    //! The bias is applied per-channel, so the number of weights (if non-zero) must be equal to the number of output
+    //! feature maps.
+    //!
+    //! \see getBiasWeights()
+    //!
+    void setBiasWeights(Weights weights) noexcept
+    {
+        mImpl->setBiasWeights(weights);
+    }
+
+    //!
+    //! \brief Get the bias weights for the convolution.
+    //!
+    //! \see setBiasWeights()
+    //!
+    Weights getBiasWeights() const noexcept
+    {
+        return mImpl->getBiasWeights();
+    }
+
+    //!
+    //! \brief Set the multi-dimension pre-padding of the convolution.
+    //!
+    //! The start of the input will be zero-padded by this number of elements in each dimension.
+    //!
+    //! Default: (0, 0, ..., 0)
+    //!
+    //! If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
+    //! [0,31], and the padding must be less than the kernel size.
+    //!
+    //! \see getPrePadding()
+    //!
+    void setPrePadding(Dims const& padding) noexcept
+    {
+        mImpl->setPrePadding(padding);
+    }
+
+    //!
+    //! \brief Get the pre-padding.
+    //!
+    //! \see setPrePadding()
+    //!
+    Dims getPrePadding() const noexcept
+    {
+        return mImpl->getPrePadding();
+    }
+
+    //!
+    //! \brief Set the multi-dimension post-padding of the convolution.
+    //!
+    //! The end of the input will be zero-padded by this number of elements in each dimension.
+    //!
+    //! Default: (0, 0, ..., 0)
+    //!
+    //! If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
+    //! [0,31], and the padding must be less than the kernel size.
+    //!
+    //! \see getPostPadding()
+    //!
+    void setPostPadding(Dims const& padding) noexcept
+    {
+        mImpl->setPostPadding(padding);
+    }
+
+    //!
+    //! \brief Get the post-padding.
+    //!
+    //! \see setPostPadding()
+    //!
+    Dims getPostPadding() const noexcept
+    {
+        return mImpl->getPostPadding();
+    }
+
+    //!
+    //! \brief Set the padding mode.
+    //!
+    //! Padding mode takes precedence if both setPaddingMode and setPre/PostPadding are used.
+    //!
+    //! Default: kEXPLICIT_ROUND_DOWN
+    //!
+    //! \see getPaddingMode()
+    //!
+    void setPaddingMode(PaddingMode paddingMode) noexcept
+    {
+        mImpl->setPaddingMode(paddingMode);
+    }
+
+    //!
+    //! \brief Get the padding mode.
+    //!
+    //! Default: kEXPLICIT_ROUND_DOWN
+    //!
+    //! \see setPaddingMode()
+    //!
+    PaddingMode getPaddingMode() const noexcept
+    {
+        return mImpl->getPaddingMode();
+    }
+
+    //!
+    //! \brief Set the multi-dimension kernel size of the convolution.
+    //!
+    //! If executing this layer on DLA, only support 2D kernel size, both height and width of kernel size must be in the
+    //! range [1,32].
+    //!
+    //! \see getKernelSizeNd()
+    //!
+    void setKernelSizeNd(Dims const& kernelSize) noexcept
+    {
+        mImpl->setKernelSizeNd(kernelSize);
+    }
+
+    //!
+    //! \brief Get the multi-dimension kernel size of the convolution.
+    //!
+    //! \see setKernelSizeNd()
+    //!
+    Dims getKernelSizeNd() const noexcept
+    {
+        return mImpl->getKernelSizeNd();
+    }
+
+    //!
+    //! \brief Set the multi-dimension stride of the convolution.
+    //!
+    //! Default: (1, 1, ..., 1)
+    //!
+    //! If executing this layer on DLA, only support 2D stride, both height and width of stride must be in the range
+    //! [1,8].
+    //!
+    //! \see getStrideNd()
+    //!
+    void setStrideNd(Dims const& stride) noexcept
+    {
+        mImpl->setStrideNd(stride);
+    }
+
+    //!
+    //! \brief Get the multi-dimension stride of the convolution.
+    //!
+    //! \see setStrideNd()
+    //!
+    Dims getStrideNd() const noexcept
+    {
+        return mImpl->getStrideNd();
+    }
+
+    //!
+    //! \brief Set the multi-dimension padding of the convolution.
+    //!
+    //! The input will be zero-padded by this number of elements in each dimension.
+    //! Padding is symmetric.
+    //!
+    //! Default: (0, 0, ..., 0)
+    //!
+    //! If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
+    //! [0,31], and the padding must be less than the kernel size.
+    //!
+    //! \see getPaddingNd() setPadding() getPadding()
+    //!
+    void setPaddingNd(Dims const& padding) noexcept
+    {
+        mImpl->setPaddingNd(padding);
+    }
+
+    //!
+    //! \brief Get the multi-dimension padding of the convolution.
+    //!
+    //! If the padding is asymmetric, the pre-padding is returned.
+    //!
+    //! \see setPaddingNd()
+    //!
+    Dims getPaddingNd() const noexcept
+    {
+        return mImpl->getPaddingNd();
+    }
+
+    //!
+    //! \brief Set the multi-dimension dilation of the convolution.
+    //!
+    //! Default: (1, 1, ..., 1)
+    //!
+    //! If executing this layer on DLA, only support 2D padding, both height and width must be in the range [1,32].
+    //!
+    //! \see getDilationNd()
+    //!
+    void setDilationNd(Dims const& dilation) noexcept
+    {
+        mImpl->setDilationNd(dilation);
+    }
+
+    //!
+    //! \brief Get the multi-dimension dilation of the convolution.
+    //!
+    //! \see setDilationNd()
+    //!
+    Dims getDilationNd() const noexcept
+    {
+        return mImpl->getDilationNd();
+    }
+
+    //!
+    //! \brief Append or replace an input of this layer with a specific tensor
+    //!
+    //! \param index the index of the input to modify.
+    //! \param tensor the new input tensor
+    //!
+    //! The indices are as follows:
+    //!
+    //! Input 0 is the input activation tensor.
+    //! Input 1 is the kernel tensor. If used, the kernel weights parameter must be set to empty weights.
+    //! Input 2 is the bias tensor. If used, the bias parameter must be set to empty weights.
+    //!
+    //! \see getKernelWeights(), setKernelWeights(), getBiasWeights(), setBiasWeights()
+    //!
+    using ILayer::setInput;
+
+protected:
+    virtual ~IConvolutionLayer() noexcept = default;
+    apiv::VConvolutionLayer* mImpl;
+};
+
+//!
+//! \class IActivationLayer
+//!
+//! \brief An Activation layer in a network definition.
+//!
+//! This layer applies a per-element activation function to its input.
+//!
+//! The output has the same shape as the input.
+//!
+//! The input is a shape tensor if the output is a shape tensor.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IActivationLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the type of activation to be performed.
+    //!
+    //! On the DLA, the valid activation types are kRELU, kSIGMOID, kTANH, and kCLIP.
+    //!
+    //! \see getActivationType(), ActivationType
+    //!
+    void setActivationType(ActivationType type) noexcept
+    {
+        mImpl->setActivationType(type);
+    }
+
+    //!
+    //! \brief Get the type of activation to be performed.
+    //!
+    //! \see setActivationType(), ActivationType
+    //!
+    ActivationType getActivationType() const noexcept
+    {
+        return mImpl->getActivationType();
+    }
+
+    //!
+    //! \brief Set the alpha parameter (must be finite).
+    //!
+    //! This parameter is used by the following activations:
+    //! LeakyRelu, Elu, Selu, Softplus, Clip, HardSigmoid, ScaledTanh,
+    //! ThresholdedRelu.
+    //!
+    //! It is ignored by the other activations.
+    //!
+    //! \see getAlpha(), setBeta()
+    void setAlpha(float alpha) noexcept
+    {
+        mImpl->setAlpha(alpha);
+    }
+
+    //!
+    //! \brief Set the beta parameter (must be finite).
+    //!
+    //! This parameter is used by the following activations:
+    //! Selu, Softplus, Clip, HardSigmoid, ScaledTanh.
+    //!
+    //! It is ignored by the other activations.
+    //!
+    //! \see getBeta(), setAlpha()
+    void setBeta(float beta) noexcept
+    {
+        mImpl->setBeta(beta);
+    }
+
+    //!
+    //! \brief Get the alpha parameter.
+    //!
+    //! \see getBeta(), setAlpha()
+    float getAlpha() const noexcept
+    {
+        return mImpl->getAlpha();
+    }
+
+    //!
+    //! \brief Get the beta parameter.
+    //!
+    //! \see getAlpha(), setBeta()
+    float getBeta() const noexcept
+    {
+        return mImpl->getBeta();
+    }
+
+protected:
+    virtual ~IActivationLayer() noexcept = default;
+    apiv::VActivationLayer* mImpl;
+};
+
+//!
+//! \enum PoolingType
+//!
+//! \brief The type of pooling to perform in a pooling layer.
+//!
+enum class PoolingType : int32_t
+{
+    kMAX = 0,              //!< Maximum over elements
+    kAVERAGE = 1,          //!< Average over elements. If the tensor is padded, the count includes the padding
+    kMAX_AVERAGE_BLEND = 2 //!< Blending between max and average pooling: (1-blendFactor)*maxPool + blendFactor*avgPool
+};
+
+namespace impl
+{
+//!
+//! Maximum number of elements in PoolingType enum.
+//!
+//! \see PoolingType
+//!
+template <>
+struct EnumMaxImpl<PoolingType>
+{
+    static constexpr int32_t kVALUE = 3;
+};
+} // namespace impl
+
+//! \class IPoolingLayer
+//!
+//! \brief A Pooling layer in a network definition.
+//!
+//! The layer applies a reduction operation within a window over the input.
+//!
+//! \warning When running pooling layer with DeviceType::kDLA in Int8 mode, the dynamic ranges
+//! for input and output tensors must be equal.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IPoolingLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the type of activation to be performed.
+    //!
+    //! DLA only supports kMAX and kAVERAGE pooling types.
+    //!
+    //! \see getPoolingType(), PoolingType
+    //!
+    void setPoolingType(PoolingType type) noexcept
+    {
+        mImpl->setPoolingType(type);
+    }
+
+    //!
+    //! \brief Get the type of activation to be performed.
+    //!
+    //! \see setPoolingType(), PoolingType
+    //!
+    PoolingType getPoolingType() const noexcept
+    {
+        return mImpl->getPoolingType();
+    }
+
+    //!
+    //! \brief Set the blending factor for the max_average_blend mode:
+    //! max_average_blendPool = (1-blendFactor)*maxPool + blendFactor*avgPool
+    //! blendFactor is a user value in [0,1] with the default value of 0.0
+    //! This value only applies for the kMAX_AVERAGE_BLEND mode.
+    //!
+    //! Since DLA does not support kMAX_AVERAGE_BLEND, blendFactor is ignored on the DLA.
+    //!
+    //! \see getBlendFactor()
+    //!
+    void setBlendFactor(float blendFactor) noexcept
+    {
+        mImpl->setBlendFactor(blendFactor);
+    }
+
+    //!
+    //! \brief Get the blending factor for the max_average_blend mode:
+    //! max_average_blendPool = (1-blendFactor)*maxPool + blendFactor*avgPool
+    //! blendFactor is a user value in [0,1] with the default value of 0.0
+    //! In modes other than kMAX_AVERAGE_BLEND, blendFactor is ignored.
+    //!
+    //! \see setBlendFactor()
+    //!
+    float getBlendFactor() const noexcept
+    {
+        return mImpl->getBlendFactor();
+    }
+
+    //!
+    //! \brief Set whether average pooling uses as a denominator the overlap area between the window
+    //! and the unpadded input.
+    //! If this is not set, the denominator is the overlap between the pooling window and the padded input.
+    //!
+    //! Default: true
+    //!
+    //! \see getAverageCountExcludesPadding()
+    //!
+    void setAverageCountExcludesPadding(bool exclusive) noexcept
+    {
+        mImpl->setAverageCountExcludesPadding(exclusive);
+    }
+
+    //!
+    //! \brief Get whether average pooling uses as a denominator the overlap area between the window
+    //! and the unpadded input.
+    //!
+    //! \see setAverageCountExcludesPadding()
+    //!
+    bool getAverageCountExcludesPadding() const noexcept
+    {
+        return mImpl->getAverageCountExcludesPadding();
+    }
+
+    //!
+    //! \brief Set the multi-dimension pre-padding for pooling.
+    //!
+    //! The start of the input will be padded by this number of elements in each dimension.
+    //! Padding value depends on pooling type, -inf is used for max pooling and zero padding for average pooling.
+    //!
+    //! Default: (0, 0, ..., 0)
+    //!
+    //! If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
+    //! [0,7].
+    //!
+    //! \see getPrePadding()
+    //!
+    void setPrePadding(Dims const& padding) noexcept
+    {
+        mImpl->setPrePadding(padding);
+    }
+
+    //!
+    //! \brief Get the pre-padding.
+    //!
+    //! \see setPrePadding()
+    //!
+    Dims getPrePadding() const noexcept
+    {
+        return mImpl->getPrePadding();
+    }
+
+    //!
+    //! \brief Set the multi-dimension post-padding for pooling.
+    //!
+    //! The end of the input will be padded by this number of elements in each dimension.
+    //! Padding value depends on pooling type, -inf is used for max pooling and zero padding for average pooling.
+    //!
+    //! Default: (0, 0, ..., 0)
+    //!
+    //! If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
+    //! [0,7].
+    //!
+    //! \see getPostPadding()
+    //!
+    void setPostPadding(Dims const& padding) noexcept
+    {
+        mImpl->setPostPadding(padding);
+    }
+
+    //!
+    //! \brief Get the padding.
+    //!
+    //! \see setPostPadding()
+    //!
+    Dims getPostPadding() const noexcept
+    {
+        return mImpl->getPostPadding();
+    }
+
+    //!
+    //! \brief Set the padding mode.
+    //!
+    //! Padding mode takes precedence if both setPaddingMode and setPre/PostPadding are used.
+    //!
+    //! Default: kEXPLICIT_ROUND_DOWN
+    //!
+    //! \see getPaddingMode()
+    void setPaddingMode(PaddingMode paddingMode) noexcept
+    {
+        mImpl->setPaddingMode(paddingMode);
+    }
+
+    //!
+    //! \brief Get the padding mode.
+    //!
+    //! Default: kEXPLICIT_ROUND_DOWN
+    //!
+    //! \see setPaddingMode()
+    PaddingMode getPaddingMode() const noexcept
+    {
+        return mImpl->getPaddingMode();
+    }
+
+    //!
+    //! \brief Set the multi-dimension window size for pooling.
+    //!
+    //! If executing this layer on DLA, only support 2D window size, both height and width of window size must be in the
+    //! range [1,8].
+    //!
+    //! \see getWindowSizeNd() setWindowSize() getWindowSize()
+    //!
+    void setWindowSizeNd(Dims const& windowSize) noexcept
+    {
+        mImpl->setWindowSizeNd(windowSize);
+    }
+
+    //!
+    //! \brief Get the multi-dimension window size for pooling.
+    //!
+    //! \see setWindowSizeNd()
+    //!
+    Dims getWindowSizeNd() const noexcept
+    {
+        return mImpl->getWindowSizeNd();
+    }
+
+    //!
+    //! \brief Set the multi-dimension stride for pooling.
+    //!
+    //! Default: (1, 1, ..., 1)
+    //!
+    //! If executing this layer on DLA, only support 2D stride, both height and width of stride must be in the range
+    //! [1,16].
+    //!
+    //! \see getStrideNd()
+    //!
+    void setStrideNd(Dims const& stride) noexcept
+    {
+        mImpl->setStrideNd(stride);
+    }
+
+    //!
+    //! \brief Get the multi-dimension stride for pooling.
+    //!
+    //! \see setStrideNd()
+    //!
+    Dims getStrideNd() const noexcept
+    {
+        return mImpl->getStrideNd();
+    }
+
+    //!
+    //! \brief Set the multi-dimension padding for pooling.
+    //!
+    //! The input will be padded by this number of elements in each dimension.
+    //! Padding is symmetric.
+    //! Padding value depends on pooling type, -inf is used for max pooling and zero padding for average pooling.
+    //!
+    //! Default: (0, 0, ..., 0)
+    //!
+    //! If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
+    //! [0,7].
+    //!
+    //! \see getPaddingNd() setPadding() getPadding()
+    //!
+    void setPaddingNd(Dims const& padding) noexcept
+    {
+        mImpl->setPaddingNd(padding);
+    }
+
+    //!
+    //! \brief Get the multi-dimension padding for pooling.
+    //!
+    //! If the padding is asymmetric, the pre-padding is returned.
+    //!
+    //! \see setPaddingNd()
+    //!
+    Dims getPaddingNd() const noexcept
+    {
+        return mImpl->getPaddingNd();
+    }
+
+protected:
+    virtual ~IPoolingLayer() noexcept = default;
+    apiv::VPoolingLayer* mImpl;
+};
+
+//!
+//! \class ILRNLayer
+//!
+//! \brief A LRN layer in a network definition.
+//!
+//! The output size is the same as the input size.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class ILRNLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the LRN window size.
+    //!
+    //! The window size must be odd and in the range of [1, 15].
+    //!
+    //! If executing this layer on the DLA, only values in the set, [3, 5, 7, 9], are valid.
+    //!
+    //! \see setWindowStride()
+    //!
+    void setWindowSize(int64_t windowSize) noexcept
+    {
+        mImpl->setWindowSize(windowSize);
+    }
+
+    //!
+    //! \brief Get the LRN window size.
+    //!
+    //! \see getWindowStride()
+    //!
+    int64_t getWindowSize() const noexcept
+    {
+        return mImpl->getWindowSize();
+    }
+
+    //!
+    //! \brief Set the LRN alpha value.
+    //!
+    //! The valid range is [-1e20, 1e20].
+    //!
+    //! \see getAlpha()
+    //!
+    void setAlpha(float alpha) noexcept
+    {
+        mImpl->setAlpha(alpha);
+    }
+
+    //!
+    //! \brief Get the LRN alpha value.
+    //!
+    //! \see setAlpha()
+    //!
+    float getAlpha() const noexcept
+    {
+        return mImpl->getAlpha();
+    }
+
+    //!
+    //! \brief Set the LRN beta value.
+    //!
+    //! The valid range is [0.01, 1e5f].
+    //!
+    //! \see getBeta()
+    //!
+    void setBeta(float beta) noexcept
+    {
+        mImpl->setBeta(beta);
+    }
+
+    //!
+    //! \brief Get the LRN beta value.
+    //!
+    //! \see setBeta()
+    //!
+    float getBeta() const noexcept
+    {
+        return mImpl->getBeta();
+    }
+
+    //!
+    //! \brief Set the LRN K value.
+    //!
+    //! The valid range is [1e-5, 1e10].
+    //!
+    //! \see getK()
+    //!
+    void setK(float k) noexcept
+    {
+        mImpl->setK(k);
+    }
+
+    //!
+    //! \brief Get the LRN K value.
+    //!
+    //! \see setK()
+    //!
+    float getK() const noexcept
+    {
+        return mImpl->getK();
+    }
+
+protected:
+    virtual ~ILRNLayer() noexcept = default;
+    apiv::VLRNLayer* mImpl;
+};
+
+//!
+//! \brief Controls how shift, scale and power are applied in a Scale layer.
+//!
+//! \see IScaleLayer
+//!
+enum class ScaleMode : int32_t
+{
+    kUNIFORM = 0,    //!< Identical coefficients across all elements of the tensor.
+    kCHANNEL = 1,    //!< Per-channel coefficients.
+    kELEMENTWISE = 2 //!< Elementwise coefficients.
+};
+
+//!
+//! Maximum number of elements in ScaleMode enum.
+//!
+//! \see ScaleMode
+//!
+template <>
+constexpr inline int32_t EnumMax<ScaleMode>() noexcept
+{
+    return 3;
+}
+
+//!
+//! \class IScaleLayer
+//!
+//! \brief A Scale layer in a network definition.
+//!
+//! This layer applies a per-element computation to its input:
+//!
+//! \p output = (\p input* \p scale + \p shift)^ \p power
+//!
+//! The coefficients can be applied on a per-tensor, per-channel, or per-element basis.
+//!
+//! \note If the number of weights is 0, then a default value is used for shift, power, and scale.
+//!       The default shift is 0, the default power is 1, and the default scale is 1.
+//!
+//! The output size is the same as the input size.
+//!
+//! \note The input tensor is required to have at least 4 dimensions.
+//!
+//! A scale layer may be used as an INT8 quantization node in a graph, if the output is constrained to INT8 and
+//! the input to FP32. Quantization rounds ties to even, and clamps to [-128, 127].
+//!
+//! \see ScaleMode
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IScaleLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the scale mode.
+    //!
+    //! \see getMode()
+    //!
+    void setMode(ScaleMode mode) noexcept
+    {
+        mImpl->setMode(mode);
+    }
+
+    //!
+    //! \brief Get the scale mode.
+    //!
+    //! \see setMode()
+    //!
+    ScaleMode getMode() const noexcept
+    {
+        return mImpl->getMode();
+    }
+
+    //!
+    //! \brief Set the shift value.
+    //!
+    //! \see getShift()
+    //!
+    void setShift(Weights shift) noexcept
+    {
+        mImpl->setShift(shift);
+    }
+
+    //!
+    //! \brief Get the shift value.
+    //!
+    //! \see setShift()
+    //!
+    Weights getShift() const noexcept
+    {
+        return mImpl->getShift();
+    }
+
+    //!
+    //! \brief Set the scale value.
+    //!
+    //! \see getScale()
+    //!
+    void setScale(Weights scale) noexcept
+    {
+        mImpl->setScale(scale);
+    }
+
+    //!
+    //! \brief Get the scale value.
+    //!
+    //! \see setScale()
+    //!
+    Weights getScale() const noexcept
+    {
+        return mImpl->getScale();
+    }
+
+    //!
+    //! \brief Set the power value.
+    //!
+    //! \see getPower()
+    //!
+    void setPower(Weights power) noexcept
+    {
+        mImpl->setPower(power);
+    }
+
+    //!
+    //! \brief Get the power value.
+    //!
+    //! \see setPower()
+    //!
+    Weights getPower() const noexcept
+    {
+        return mImpl->getPower();
+    }
+
+    //!
+    //! \brief Get the channel axis.
+    //!
+    //! \return channelAxis parameter passed to addScaleNd() or set by setChannelAxis()
+    //!
+    //! The value is the index of the channel axis in the input tensor's dimensions.
+    //! Scaling happens along the channel axis when ScaleMode::kCHANNEL is enabled.
+    //!
+    //! \see addScaleNd()
+    //!
+    int32_t getChannelAxis() const noexcept
+    {
+        return mImpl->getChannelAxis();
+    }
+
+    //!
+    //! \brief Set the channel axis.
+    //!
+    //! The value is the index of the channel axis in the input tensor's dimensions.
+    //!
+    //! For ScaleMode::kCHANNEL, there can be distinct scale, shift, and power weights for each channel coordinate.
+    //! For ScaleMode::kELEMENTWISE, there can be distinct scale, shift, and power weights for each combination of
+    //! coordinates from the channel axis and axes after it.
+    //!
+    //! For example, suppose the input tensor has dimensions [10,20,30,40] and the channel axis is 1.
+    //! Let [n,c,h,w] denote an input coordinate.
+    //! For ScaleMode::kCHANNEL, the scale, shift, and power weights are indexed by c.
+    //! For ScaleMode::kELEMENTWISE, the scale, shift, and power weights are indexed by [c,h,w].
+    //!
+    //! \see addScaleNd()
+    //!
+    void setChannelAxis(int32_t channelAxis) noexcept
+    {
+        mImpl->setChannelAxis(channelAxis);
+    }
+
+protected:
+    virtual ~IScaleLayer() noexcept = default;
+    apiv::VScaleLayer* mImpl;
+};
+
+//!
+//! \class ISoftMaxLayer
+//!
+//! \brief A Softmax layer in a network definition.
+//!
+//! This layer applies a per-channel softmax to its input.
+//!
+//! The output size is the same as the input size.
+//!
+//! The following constraints must be satisfied to execute this layer on DLA:
+//! * Axis must be one of the channel or spatial dimensions.
+//! * There are two classes of supported input sizes:
+//!     1. Non-axis, non-batch dimensions are all 1 and the axis dimension is at most 8192.
+//!        This is the recommended case for using softmax since it is the most accurate.
+//!     2. At least one non-axis, non-batch dimension greater than 1 and the axis dimension is at most 1024.
+//!        Note that in this case, there may be some approximation error as the axis dimension size approaches
+//!        the upper bound. See the TensorRT Developer Guide for more details on the approximation error.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class ISoftMaxLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the axis along which softmax is computed. Currently, only one axis can be set.
+    //!
+    //! The axis is specified by setting the bit corresponding to the axis to 1.
+    //! For example, consider an NCHW tensor as input.
+    //!
+    //! Bit 0 corresponds to the N dimension boolean.
+    //! Bit 1 corresponds to the C dimension boolean.
+    //! Bit 2 corresponds to the H dimension boolean.
+    //! Bit 3 corresponds to the W dimension boolean.
+    //! By default, softmax is performed on the axis which is the number of axes minus three. It is 0 if
+    //! there are fewer than 3 axes. For example, if the input is NCHW, the default axis is C. If the input
+    //! is NHW, then the default axis is N.
+    //!
+    //! For example, to perform softmax on axis R of a NPQRCHW input, set bit 3.
+    //!
+    //! \param axes The axis along which softmax is computed.
+    //!        Here axes is a bitmap. For example, when doing softmax along axis 0, bit 0 is set to 1, axes = 1 << axis
+    //!        = 1.
+    //!
+    void setAxes(uint32_t axes) noexcept
+    {
+        mImpl->setAxes(axes);
+    }
+
+    //!
+    //! \brief Get the axis along which softmax occurs.
+    //!
+    //! \see setAxes()
+    //!
+    uint32_t getAxes() const noexcept
+    {
+        return mImpl->getAxes();
+    }
+
+protected:
+    virtual ~ISoftMaxLayer() noexcept = default;
+    apiv::VSoftMaxLayer* mImpl;
+};
+
+//!
+//! \class IConcatenationLayer
+//!
+//! \brief A concatenation layer in a network definition.
+//!
+//! The output dimension along the concatenation axis is the sum of the corresponding input dimensions.
+//! Every other output dimension is the same as the corresponding dimension of the inputs.
+//!
+//! \warning All tensors must have the same dimensions except along the concatenation axis.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IConcatenationLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the axis along which concatenation occurs.
+    //!
+    //! The default axis is the number of tensor dimensions minus three, or zero if the tensor has fewer than three
+    //! dimensions. For example, for a tensor with dimensions NCHW, it is C.
+    //!
+    //! When running this layer on the DLA, the concatenation axis must be the third to last axis, e.g. C if tensor
+    //! dimensions are NCHW.
+    //!
+    //! \param axis The axis along which concatenation occurs.
+    //!
+    void setAxis(int32_t axis) noexcept
+    {
+        mImpl->setAxis(axis);
+    }
+
+    //!
+    //! \brief Get the axis along which concatenation occurs.
+    //!
+    //! \see setAxis()
+    //!
+    int32_t getAxis() const noexcept
+    {
+        return mImpl->getAxis();
+    }
+
+protected:
+    virtual ~IConcatenationLayer() noexcept = default;
+    apiv::VConcatenationLayer* mImpl;
+};
+
+//!
+//! \class IDeconvolutionLayer
+//!
+//! \brief A deconvolution layer in a network definition.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IDeconvolutionLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the number of output feature maps for the deconvolution.
+    //!
+    //! If executing this layer on DLA, the number of output maps must be in the range [1,8192].
+    //!
+    //! \see getNbOutputMaps()
+    //!
+    void setNbOutputMaps(int64_t nbOutputMaps) noexcept
+    {
+        mImpl->setNbOutputMaps(nbOutputMaps);
+    }
+
+    //!
+    //! \brief Get the number of output feature maps for the deconvolution.
+    //!
+    //! \see setNbOutputMaps()
+    //!
+    int64_t getNbOutputMaps() const noexcept
+    {
+        return mImpl->getNbOutputMaps();
+    }
+
+    //!
+    //! \brief Set the number of groups for a deconvolution.
+    //!
+    //! The input tensor channels are divided into \p nbGroups groups, and a deconvolution is executed for each group,
+    //! using a filter per group. The results of the group convolutions are concatenated to form the output.
+    //!
+    //! If executing this layer on DLA, nbGroups must be one
+    //!
+    //! \note When using groups in int8 mode, the size of the groups (i.e. the channel count divided by the group count)
+    //! must be a multiple of 4 for both input and output.
+    //!
+    //! Default: 1
+    //!
+    //! \see getNbGroups()
+    //!
+    void setNbGroups(int64_t nbGroups) noexcept
+    {
+        mImpl->setNbGroups(nbGroups);
+    }
+
+    //!
+    //! \brief Get the number of groups for a deconvolution.
+    //!
+    //! \see setNbGroups()
+    //!
+    int64_t getNbGroups() const noexcept
+    {
+        return mImpl->getNbGroups();
+    }
+
+    //!
+    //! \brief Set the kernel weights for the deconvolution.
+    //!
+    //! The weights are specified as a contiguous array in \p CKRS order, where \p C the number of
+    //! input channels, \p K the number of output feature maps, and \p R and \p S are the height and width
+    //! of the filter.
+    //!
+    //! \see getWeights()
+    //!
+    void setKernelWeights(Weights weights) noexcept
+    {
+        mImpl->setKernelWeights(weights);
+    }
+
+    //!
+    //! \brief Get the kernel weights for the deconvolution.
+    //!
+    //! \see setNbGroups()
+    //!
+    Weights getKernelWeights() const noexcept
+    {
+        return mImpl->getKernelWeights();
+    }
+
+    //!
+    //! \brief Set the bias weights for the deconvolution.
+    //!
+    //! Bias is optional. To omit bias, set the count value of the weights structure to zero.
+    //!
+    //! The bias is applied per-feature-map, so the number of weights (if non-zero) must be equal to the number of
+    //! output feature maps.
+    //!
+    //! \see getBiasWeights()
+    //!
+    void setBiasWeights(Weights weights) noexcept
+    {
+        mImpl->setBiasWeights(weights);
+    }
+
+    //!
+    //! \brief Get the bias weights for the deconvolution.
+    //!
+    //! \see getBiasWeights()
+    //!
+    Weights getBiasWeights() const noexcept
+    {
+        return mImpl->getBiasWeights();
+    }
+
+    //!
+    //! \brief Set the multi-dimension pre-padding of the deconvolution.
+    //!
+    //! The output will be trimmed by this number of elements on the start of every dimension.
+    //! In other words, it resembles the inverse of a convolution layer with this padding size.
+    //! Negative padding is not supported.
+    //!
+    //! Default: (0, 0, ..., 0)
+    //!
+    //!
+    //! \see getPrePadding()
+    //!
+    void setPrePadding(Dims const& padding) noexcept
+    {
+        mImpl->setPrePadding(padding);
+    }
+
+    //!
+    //! \brief Get the pre-padding.
+    //!
+    //! \see setPrePadding()
+    //!
+    Dims getPrePadding() const noexcept
+    {
+        return mImpl->getPrePadding();
+    }
+
+    //!
+    //! \brief Set the multi-dimension post-padding of the deconvolution.
+    //!
+    //! The output will be trimmed by this number of elements on the end of every dimension.
+    //! In other words, it resembles the inverse of a convolution layer with this padding size.
+    //! Negative padding is not supported.
+    //!
+    //! Default: (0, 0, ..., 0)
+    //!
+    //!
+    //! \see getPostPadding()
+    //!
+    void setPostPadding(Dims const& padding) noexcept
+    {
+        mImpl->setPostPadding(padding);
+    }
+
+    //!
+    //! \brief Get the padding.
+    //!
+    //! \see setPostPadding()
+    //!
+    Dims getPostPadding() const noexcept
+    {
+        return mImpl->getPostPadding();
+    }
+
+    //!
+    //! \brief Set the padding mode.
+    //!
+    //! Padding mode takes precedence if both setPaddingMode and setPre/PostPadding are used.
+    //!
+    //! Default: kEXPLICIT_ROUND_DOWN
+    //!
+    //! \see getPaddingMode()
+    //!
+    void setPaddingMode(PaddingMode paddingMode) noexcept
+    {
+        mImpl->setPaddingMode(paddingMode);
+    }
+
+    //!
+    //! \brief Get the padding mode.
+    //!
+    //! Default: kEXPLICIT_ROUND_DOWN
+    //!
+    //! \see setPaddingMode()
+    //!
+    PaddingMode getPaddingMode() const noexcept
+    {
+        return mImpl->getPaddingMode();
+    }
+
+    //!
+    //! \brief Set the multi-dimension kernel size of the deconvolution.
+    //!
+    //! If executing this layer on DLA, there are two restrictions:
+    //! 1) Only 2D Kernel is supported.
+    //! 2) Kernel height and width must be in the range [1,32] or the combinations of [64, 96, 128] in one
+    //! dimension and 1 in the other dimensions, i.e. [1x64] or [64x1] are valid, but not [64x64].
+    //!
+    //! \see getKernelSizeNd()
+    //!
+    void setKernelSizeNd(Dims const& kernelSize) noexcept
+    {
+        mImpl->setKernelSizeNd(kernelSize);
+    }
+
+    //!
+    //! \brief Get the multi-dimension kernel size of the deconvolution.
+    //!
+    //! \see setKernelSizeNd()
+    //!
+    Dims getKernelSizeNd() const noexcept
+    {
+        return mImpl->getKernelSizeNd();
+    }
+
+    //!
+    //! \brief Set the multi-dimension stride of the deconvolution.
+    //!
+    //! Default: (1, 1, ..., 1)
+    //!
+    //! If executing this layer on DLA, there are two restrictions:
+    //! 1) Only 2D Stride is supported.
+    //! 2) Stride height and width must be in the range [1,32] or the combinations of [64, 96, 128] in one
+    //! dimension and 1 in the other dimensions, i.e. [1x64] or [64x1] are valid, but not [64x64].
+    //!
+    //! \see getStrideNd()
+    //!
+    void setStrideNd(Dims const& stride) noexcept
+    {
+        mImpl->setStrideNd(stride);
+    }
+
+    //!
+    //! \brief Get the multi-dimension stride of the deconvolution.
+    //!
+    //! \see setStrideNd()
+    //!
+    Dims getStrideNd() const noexcept
+    {
+        return mImpl->getStrideNd();
+    }
+
+    //!
+    //! \brief Set the multi-dimension padding of the deconvolution.
+    //!
+    //! The output will be trimmed by this number of elements on both sides of every dimension.
+    //! In other words, it resembles the inverse of a convolution layer with this padding size.
+    //! Padding is symmetric, and negative padding is not supported.
+    //!
+    //! Default: (0, 0, ..., 0)
+    //!
+    //! If executing this layer on DLA, padding must be 0.
+    //!
+    //! \see getPaddingNd() setPadding() getPadding()
+    //!
+    void setPaddingNd(Dims const& padding) noexcept
+    {
+        mImpl->setPaddingNd(padding);
+    }
+
+    //!
+    //! \brief Get the multi-dimension padding of the deconvolution.
+    //!
+    //! If the padding is asymmetric, the pre-padding is returned.
+    //!
+    //! \see setPaddingNd()
+    //!
+    Dims getPaddingNd() const noexcept
+    {
+        return mImpl->getPaddingNd();
+    }
+
+    //!
+    //! \brief Append or replace an input of this layer with a specific tensor
+    //!
+    //! \param index the index of the input to modify.
+    //! \param tensor the new input tensor
+    //!
+    //! Input 0 is the input activation tensor.
+    //! Input 1 is the kernel tensor. If used, the kernel weights parameter must be set to empty weights.
+    //! Input 2 is the bias tensor. If used, the bias parameter must be set to empty weights.
+    //!
+    //! \see getKernelWeights(), setKernelWeights(), getBiasWeights(), setBiasWeights()
+    //!
+    using ILayer::setInput;
+
+    //!
+    //! \brief Set the multi-dimension dilation of the deconvolution.
+    //!
+    //! Default: (1, 1, ..., 1)
+    //!
+    //! \see getDilationNd()
+    //!
+    void setDilationNd(Dims const& dilation) noexcept
+    {
+        mImpl->setDilationNd(dilation);
+    }
+
+    //!
+    //! \brief Get the multi-dimension dilation of the deconvolution.
+    //!
+    //! \see setDilationNd()
+    //!
+    Dims getDilationNd() const noexcept
+    {
+        return mImpl->getDilationNd();
+    }
+
+protected:
+    virtual ~IDeconvolutionLayer() noexcept = default;
+    apiv::VDeconvolutionLayer* mImpl;
+};
+
+//!
+//! \enum ElementWiseOperation
+//!
+//! \brief Enumerates the binary operations that may be performed by an ElementWise layer.
+//!
+//! Operations kAND, kOR, and kXOR must have inputs of DataType::kBOOL.
+//!
+//! All other operations must have inputs of floating-point type, DataType::kINT8, DataType::kINT32, or
+//! DataType::kINT64.
+//!
+//! \see IElementWiseLayer
+//!
+enum class ElementWiseOperation : int32_t
+{
+    kSUM = 0,       //!< Sum of the two elements.
+    kPROD = 1,      //!< Product of the two elements.
+    kMAX = 2,       //!< Maximum of the two elements.
+    kMIN = 3,       //!< Minimum of the two elements.
+    kSUB = 4,       //!< Subtract the second element from the first.
+    kDIV = 5,       //!< Divide the first element by the second.
+    kPOW = 6,       //!< The first element to the power of the second element.
+    kFLOOR_DIV = 7, //!< Floor division of the first element by the second.
+    kAND = 8,       //!< Logical AND of two elements.
+    kOR = 9,        //!< Logical OR of two elements.
+    kXOR = 10,      //!< Logical XOR of two elements.
+    kEQUAL = 11,    //!< Check if two elements are equal.
+    kGREATER = 12,  //!< Check if element in first tensor is greater than corresponding element in second tensor.
+    kLESS = 13      //!< Check if element in first tensor is less than corresponding element in second tensor.
+};
+
+namespace impl
+{
+//!
+//! Maximum number of elements in ElementWiseOperation enum.
+//!
+//! \see ElementWiseOperation
+//!
+template <>
+struct EnumMaxImpl<ElementWiseOperation>
+{
+    static constexpr int32_t kVALUE = 14;
+};
+} // namespace impl
+
+//!
+//! \class IElementWiseLayer
+//!
+//! \brief A elementwise layer in a network definition.
+//!
+//! This layer applies a per-element binary operation between corresponding elements of two tensors.
+//!
+//! The input tensors must have the same rank. For each dimension, their lengths must
+//! match, or one of them must be one. In the latter case, the tensor is broadcast along that axis.
+//!
+//! The output tensor has the same rank as the inputs. For each output dimension,
+//! its length is equal to the lengths of the corresponding input dimensions if they match,
+//! otherwise it is equal to the length that is not one.
+//!
+//! \warning When running this layer on the DLA with Int8 data type, the dynamic ranges of two input tensors shall be
+//! equal. If the dynamic ranges are generated using calibrator, the largest value shall be used.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IElementWiseLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the binary operation for the layer.
+    //!
+    //! DLA supports only kSUM, kPROD, kMAX, kMIN, and kSUB.
+    //!
+    //! \see getOperation(), ElementWiseOperation
+    //!
+    //! \see getBiasWeights()
+    //!
+    void setOperation(ElementWiseOperation op) noexcept
+    {
+        return mImpl->setOperation(op);
+    }
+
+    //!
+    //! \brief Get the binary operation for the layer.
+    //!
+    //! \see setOperation(), ElementWiseOperation
+    //!
+    //! \see setBiasWeights()
+    //!
+    ElementWiseOperation getOperation() const noexcept
+    {
+        return mImpl->getOperation();
+    }
+
+protected:
+    apiv::VElementWiseLayer* mImpl;
+    virtual ~IElementWiseLayer() noexcept = default;
+};
+
+//!
+//! \brief Control form of IGatherLayer
+//!
+//! \see IGatherLayer
+//!
+enum class GatherMode : int32_t
+{
+    kDEFAULT = 0, //!< Similar to ONNX Gather
+    kELEMENT = 1, //!< Similar to ONNX GatherElements
+    kND = 2       //!< Similar to ONNX GatherND
+};
+
+//!
+//! Maximum number of elements in GatherMode enum.
+//!
+//! \see GatherMode
+//!
+template <>
+constexpr inline int32_t EnumMax<GatherMode>() noexcept
+{
+    return 3;
+}
+
+//!
+//! \class IGatherLayer
+//!
+//! \brief A Gather layer in a network definition. Supports several kinds of gathering.
+//!
+//! The Gather layer has two input tensors, Data and Indices, and an output tensor Output.
+//! Additionally, there are three parameters: mode, nbElementwiseDims, and axis that control
+//! how the indices are interpreted.
+//!
+//! * Data is a tensor of rank r >= 1 that stores the values to be gathered in Output.
+//! * Indices is a tensor of rank q that determines which locations in Data to gather.
+//!     * GatherMode::kDEFAULT: q >= 0
+//!     * GatherMode::kND:      q >= 1 and the last dimension of Indices must be a build time constant.
+//!     * GatherMode::kELEMENT: q = r
+//! * Output stores the gathered results. Its rank s depends on the mode:
+//!     * GatherMode::kDEFAULT: s = q + r - 1 - nbElementwiseDims
+//!     * GatherMode::kND:      s = q + r - indices.d[q-1] - 1 - nbElementwiseDims
+//!     * GatherMode::kELEMENT: s = q = r.
+//!
+//! The dimensions of the output likewise depends on the mode:
+//!
+//!     GatherMode::kDEFAULT:
+//!
+//!         First nbElementwiseDims of output are computed by applying broadcast rules to
+//!         first nbElementwiseDims of indices and data. Note that nbElementwiseDims <= 1.
+//!         Rest of dimensions are computed by copying dimensions of Data, and replacing
+//!         the dimension for axis gatherAxis with the dimensions of indices.
+//!
+//!     GatherMode::kND:
+//!         If indices.d[q-1] = r - nbElementwiseDims
+//!             output.d = [indices.d[0], ... , indices.d[q-2]]
+//!         Else if indices.d[q-1] < r - nbElementwiseDims
+//!             output.d = [indices.d[0], ... , indices.d[q-1], data.d[nbElementwiseDims + indices.d[q-1] + q],
+//!             data.d[r-1]]
+//!         Else
+//!             This is build time error
+//!
+//!     GatherMode::kELEMENT:
+//!         The output dimensions match the dimensions of the indices tensor.
+//!
+//! The types of Data and Output must be the same, and Indices shall be DataType::kINT32 or DataType::kINT64.
+//!
+//! How the elements of Data are gathered depends on the mode:
+//!
+//!     GatherMode::kDEFAULT:
+//!         Each index in indices is used to index Data along axis gatherAxis.
+//!
+//!     GatherMode::kND:
+//!         Indices is a rank q integer tensor, best thought of as a rank (q-1) tensor of
+//!         indices into data, where each element defines a slice of data
+//!         The operation can be formulated as output[i_1, ..., i_{q-1}] = data[indices[i_1, ..., i_{q-1}]]
+//!
+//!     GatherMode::kELEMENT:
+//!
+//!         Here "axis" denotes the result of getGatherAxis().
+//!         For each element X of indices:
+//!             Let J denote a sequence for the subscripts of X
+//!             Let K = sequence J with element [axis] replaced by X
+//!             output[J] = data[K]
+//!
+//! The handling of nbElementWiseDims depends on the mode:
+//!     * GatherMode::kDEFAULT: nbElementWiseDims <= 1. Broadcast is supported across the elementwise dimension if
+//!     present.
+//!     * GatherMode::kND:      0 <= nbElementWiseDims < rank(Data)-1. Broadcast is not supported across the elementwise
+//!     dimensions.
+//!     * GatherMode::kELEMENT: nbElementWiseDims = 0
+//!
+//! Notes:
+//! * For modes GatherMode::kND and GatherMode::kELEMENT, the first nbElementWiseDims dimensions of data and index must
+//! be equal. If not, an error will be reported at build time or run time.
+//! * If an axis of Data has dynamic length, using a negative index for it has undefined behavior.
+//! * No DLA support
+//! * Zero will be stored for OOB access
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IGatherLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the axis used by GatherMode::kELEMENTS and GatherMode::kDEFAULT
+    //! The axis must be less than the number of dimensions in the data input.
+    //! The axis defaults to 0.
+    //!
+    //! \warning Undefined behavior when used with GatherMode::kND.
+    //!
+    //! \see getGatherAxis()
+    //!
+    void setGatherAxis(int32_t axis) noexcept
+    {
+        mImpl->setGatherAxis(axis);
+    }
+
+    //!
+    //! \brief Get the axis to gather on.
+    //!
+    //! \warning Undefined behavior when used with GatherMode::kND.
+    //!
+    //! \see setGatherAxis()
+    //!
+    int32_t getGatherAxis() const noexcept
+    {
+        return mImpl->getGatherAxis();
+    }
+
+    //!
+    //! \brief Set the number of leading dimensions of indices tensor to be handled elementwise.
+    //!
+    //! The gathering of indexing starts from the dimension of data[NbElementWiseDims:].
+    //! The NbElementWiseDims must be less than the Rank of the data input.
+    //!
+    //! \param elementWiseDims number of dims to be handled as elementwise.
+    //!
+    //! Default: 0
+    //!
+    //! The value of nbElementWiseDims and GatherMode are checked during network validation:
+    //!
+    //! GatherMode::kDEFAULT: nbElementWiseDims can be 0 or 1.
+    //! GatherMode::kND: nbElementWiseDims can be between 0 and one less than rank(data).
+    //! GatherMode::kELEMENT: nbElementWiseDims must be 0
+    //!
+    //! \see getNbElementWiseDims()
+    //!
+    void setNbElementWiseDims(int32_t elementWiseDims) noexcept
+    {
+        mImpl->setNbElementWiseDims(elementWiseDims);
+    }
+
+    //!
+    //! \brief Get the number of leading dimensions of indices tensor to be handled elementwise.
+    //!
+    //! \see setNbElementWiseDims()
+    //!
+    int32_t getNbElementWiseDims() const noexcept
+    {
+        return mImpl->getNbElementWiseDims();
+    }
+
+    //!
+    //! \brief Set the gather mode.
+    //!
+    //! \see getMode()
+    //!
+    void setMode(GatherMode mode) noexcept
+    {
+        mImpl->setMode(mode);
+    }
+
+    //!
+    //! \brief Get the gather mode.
+    //!
+    //! \see setMode()
+    //!
+    GatherMode getMode() const noexcept
+    {
+        return mImpl->getMode();
+    }
+
+protected:
+    apiv::VGatherLayer* mImpl;
+    virtual ~IGatherLayer() noexcept = default;
+};
+
+//!
+//! \class IPluginV2Layer
+//!
+//! \brief Layer type for pluginV2
+//!
+//! \see IPluginV2
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+//! \deprecated Deprecated in TensorRT 10.8. Superseded by IPluginV3Layer.
+//!
+class TRT_DEPRECATED IPluginV2Layer : public ILayer
+{
+public:
+    //!
+    //! \brief Get the plugin for the layer.
+    //!
+    //! \see IPluginV2
+    //!
+    IPluginV2& getPlugin() noexcept
+    {
+        return mImpl->getPlugin();
+    }
+
+protected:
+    apiv::VPluginV2Layer* mImpl;
+    virtual ~IPluginV2Layer() noexcept = default;
+};
+
+//!
+//! \class IPluginV3Layer
+//!
+//! \brief Layer type for V3 plugins
+//!
+//! \see IPluginV3
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IPluginV3Layer : public ILayer
+{
+public:
+    //!
+    //! \brief Get the plugin for the layer.
+    //!
+    //! \see IPluginV3
+    //!
+    IPluginV3& getPlugin() noexcept
+    {
+        return mImpl->getPlugin();
+    }
+
+protected:
+    apiv::VPluginV3Layer* mImpl;
+    virtual ~IPluginV3Layer() noexcept = default;
+};
+
+//!
+//! \enum UnaryOperation
+//!
+//! \brief Enumerates the unary operations that may be performed by a Unary layer.
+//!
+//! Operations kNOT must have inputs of DataType::kBOOL.
+//!
+//! Operation kSIGN and kABS must have inputs of floating-point type, DataType::kINT8, DataType::kINT32 or
+//! DataType::kINT64.
+//!
+//! Operation kISINF must have inputs of floating-point type.
+//!
+//! All other operations must have inputs of floating-point type.
+//!
+//! \see IUnaryLayer
+//!
+enum class UnaryOperation : int32_t
+{
+    kEXP = 0,    //!< Exponentiation.
+    kLOG = 1,    //!< Log (base e).
+    kSQRT = 2,   //!< Square root.
+    kRECIP = 3,  //!< Reciprocal.
+    kABS = 4,    //!< Absolute value.
+    kNEG = 5,    //!< Negation.
+    kSIN = 6,    //!< Sine.
+    kCOS = 7,    //!< Cosine.
+    kTAN = 8,    //!< Tangent.
+    kSINH = 9,   //!< Hyperbolic sine.
+    kCOSH = 10,  //!< Hyperbolic cosine.
+    kASIN = 11,  //!< Inverse sine.
+    kACOS = 12,  //!< Inverse cosine.
+    kATAN = 13,  //!< Inverse tangent.
+    kASINH = 14, //!< Inverse hyperbolic sine.
+    kACOSH = 15, //!< Inverse hyperbolic cosine.
+    kATANH = 16, //!< Inverse hyperbolic tangent.
+    kCEIL = 17,  //!< Ceiling.
+    kFLOOR = 18, //!< Floor.
+    kERF = 19,   //!< Gauss error function.
+    kNOT = 20,   //!< Logical NOT.
+    kSIGN = 21,  //!< Sign, If input > 0, output 1; if input < 0, output -1; if input == 0, output 0.
+    kROUND = 22, //!< Round to nearest even for floating-point data type.
+    kISINF = 23, //!< Return true if input value equals +/- infinity for floating-point data type.
+    kISNAN = 24, //!< Return true if input value is a NaN for floating-point data type.
+};
+
+//!
+//! Maximum number of elements in UnaryOperation enum.
+//!
+//! \see UnaryOperation
+//!
+template <>
+constexpr inline int32_t EnumMax<UnaryOperation>() noexcept
+{
+    return 25;
+}
+
+//!
+//! \class IUnaryLayer
+//!
+//! \brief Layer that represents an unary operation.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IUnaryLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the unary operation for the layer.
+    //!
+    //! When running this layer on DLA, only UnaryOperation::kABS is supported.
+    //!
+    //! \see getOperation(), UnaryOperation
+    //!
+    void setOperation(UnaryOperation op) noexcept
+    {
+        mImpl->setOperation(op);
+    }
+
+    //!
+    //! \brief Get the unary operation for the layer.
+    //!
+    //! \see setOperation(), UnaryOperation
+    //!
+    UnaryOperation getOperation() const noexcept
+    {
+        return mImpl->getOperation();
+    }
+
+protected:
+    apiv::VUnaryLayer* mImpl;
+    virtual ~IUnaryLayer() noexcept = default;
+};
+
+//!
+//! \enum ReduceOperation
+//!
+//! \brief Enumerates the reduce operations that may be performed by a Reduce layer.
+//!
+//! The table shows the result of reducing across an empty volume of a given type.
+//!
+//! Operation | kFLOAT and kHALF  | kINT32  | kINT8
+//! --------- | ----------------- | ------- | -----
+//! kSUM      | 0                 | 0       | 0
+//! kPROD     | 1                 | 1       | 1
+//! kMAX      | negative infinity | INT_MIN | -128
+//! kMIN      | positive infinity | INT_MAX | 127
+//! kAVG      | NaN               | 0       | -128
+//!
+//! The current version of TensorRT usually performs reduction for kINT8 via kFLOAT or kHALF.
+//! The kINT8 values show the quantized representations of the floating-point values.
+//!
+enum class ReduceOperation : int32_t
+{
+    kSUM = 0,
+    kPROD = 1,
+    kMAX = 2,
+    kMIN = 3,
+    kAVG = 4
+};
+
+//!
+//! Maximum number of elements in ReduceOperation enum.
+//!
+//! \see ReduceOperation
+//!
+template <>
+constexpr inline int32_t EnumMax<ReduceOperation>() noexcept
+{
+    return 5;
+}
+
+//!
+//! \class IReduceLayer
+//!
+//! \brief Layer that represents a reduction across a non-bool tensor.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IReduceLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the reduce operation for the layer.
+    //!
+    //! \see getOperation(), ReduceOperation
+    //!
+    void setOperation(ReduceOperation op) noexcept
+    {
+        mImpl->setOperation(op);
+    }
+
+    //!
+    //! \brief Get the reduce operation for the layer.
+    //!
+    //! \see setOperation(), ReduceOperation
+    //!
+    ReduceOperation getOperation() const noexcept
+    {
+        return mImpl->getOperation();
+    }
+
+    //!
+    //! \brief Set the axes over which to reduce.
+    //!
+    //! \see getReduceAxes
+    //!
+    void setReduceAxes(uint32_t reduceAxes) noexcept
+    {
+        mImpl->setReduceAxes(reduceAxes);
+    }
+
+    //!
+    //! \brief Get the axes over which to reduce for the layer.
+    //!
+    //! \see setReduceAxes
+    //!
+    uint32_t getReduceAxes() const noexcept
+    {
+        return mImpl->getReduceAxes();
+    }
+
+    //!
+    //! \brief Set the boolean that specifies whether or not to keep the reduced dimensions for the layer.
+    //!
+    //! \see getKeepDimensions
+    //!
+    void setKeepDimensions(bool keepDimensions) noexcept
+    {
+        mImpl->setKeepDimensions(keepDimensions);
+    }
+
+    //!
+    //! \brief Get the boolean that specifies whether or not to keep the reduced dimensions for the layer.
+    //!
+    //! \see setKeepDimensions
+    //!
+    bool getKeepDimensions() const noexcept
+    {
+        return mImpl->getKeepDimensions();
+    }
+
+protected:
+    apiv::VReduceLayer* mImpl;
+    virtual ~IReduceLayer() noexcept = default;
+};
+
+//!
+//! \class IPaddingLayer
+//!
+//! \brief Layer that represents a padding operation.
+//!
+//! The padding layer adds zero-padding at the start and end of the input tensor. It supports padding
+//! only the last two dimensions. Applying negative padding results in cropping of the input.
+//!
+//! To pad across any subset of dimensions, use ISliceLayer with SampleMode::kFILL.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IPaddingLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the padding that is applied at the start of the tensor.
+    //!
+    //! Negative padding results in trimming the edge by the specified amount.
+    //!
+    //! \warning Only 2 dimensional padding is currently supported.
+    //!
+    //! \see getPrePaddingNd
+    //!
+    void setPrePaddingNd(Dims const& padding) noexcept
+    {
+        mImpl->setPrePaddingNd(padding);
+    }
+
+    //!
+    //! \brief Get the padding that is applied at the start of the tensor.
+    //!
+    //! \warning Only 2 dimensional padding is currently supported.
+    //!
+    //! \see setPrePaddingNd
+    //!
+    Dims getPrePaddingNd() const noexcept
+    {
+        return mImpl->getPrePaddingNd();
+    }
+
+    //!
+    //! \brief Set the padding that is applied at the end of the tensor.
+    //!
+    //! Negative padding results in trimming the edge by the specified amount
+    //!
+    //! \warning Only 2 dimensional padding is currently supported.
+    //!
+    //! \see getPostPaddingNd
+    //!
+    void setPostPaddingNd(Dims const& padding) noexcept
+    {
+        mImpl->setPostPaddingNd(padding);
+    }
+
+    //!
+    //! \brief Get the padding that is applied at the end of the tensor.
+    //!
+    //! \warning Only 2 dimensional padding is currently supported.
+    //!
+    //! \see setPostPaddingNd
+    //!
+    Dims getPostPaddingNd() const noexcept
+    {
+        return mImpl->getPostPaddingNd();
+    }
+
+protected:
+    apiv::VPaddingLayer* mImpl;
+    virtual ~IPaddingLayer() noexcept = default;
+};
+
+//!
+//! \struct Permutation
+//!
+//! \brief Represents a permutation of dimensions.
+//!
+struct Permutation
+{
+    //!
+    //! The elements of the permutation.
+    //! The permutation is applied as outputDimensionIndex = permutation.order[inputDimensionIndex], so to
+    //! permute from CHW order to HWC order, the required permutation is [1, 2, 0], and to permute
+    //! from HWC to CHW, the required permutation is [2, 0, 1].
+    //!
+    int32_t order[Dims::MAX_DIMS];
+};
+
+//! \class IShuffleLayer
+//!
+//! \brief Layer type for shuffling data.
+//!
+//! This layer shuffles data by applying in sequence: a transpose operation, a reshape operation
+//! and a second transpose operation. The dimension types of the output are those of the reshape dimension.
+//!
+//! The layer has an optional second input. If present, it must be a 1D tensor of type Int32 or Int64,
+//! and the reshape dimensions are taken from it.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IShuffleLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the permutation applied by the first transpose operation.
+    //!
+    //! \param permutation The dimension permutation applied before the reshape.
+    //!
+    //! The default is the identity permutation.
+    //!
+    //! \see getFirstTranspose
+    //!
+    void setFirstTranspose(Permutation permutation) noexcept
+    {
+        mImpl->setFirstTranspose(permutation);
+    }
+
+    //!
+    //! \brief Get the permutation applied by the first transpose operation.
+    //!
+    //! \return The dimension permutation applied before the reshape.
+    //!
+    //! \see setFirstTranspose
+    //!
+    Permutation getFirstTranspose() const noexcept
+    {
+        return mImpl->getFirstTranspose();
+    }
+
+    //!
+    //! \brief Set the reshaped dimensions.
+    //!
+    //! \param dimensions The reshaped dimensions.
+    //!
+    //! Two special values can be used as dimensions.
+    //!
+    //! Value 0 copies the corresponding dimension from input. This special value
+    //! can be used more than once in the dimensions. If number of reshape
+    //! dimensions is less than input, 0s are resolved by aligning the most
+    //! significant dimensions of input.
+    //!
+    //! Value -1 infers that particular dimension by looking at input and rest
+    //! of the reshape dimensions. Note that only a maximum of one dimension is
+    //! permitted to be specified as -1.
+    //! Avoid using -1 if the input can have zero volume and any of the other
+    //! reshape dimensions can be zero (after resolving special treatment of 0),
+    //! because the solution for the -1 becomes indeterminate and TensorRT will report an error.
+    //!
+    //! The product of the new dimensions must be equal to the product of the old.
+    //!
+    //! If a second input had been used to create this layer, that input is reset to null by this method.
+    //!
+    void setReshapeDimensions(Dims const& dimensions) noexcept
+    {
+        mImpl->setReshapeDimensions(dimensions);
+    }
+
+    //!
+    //! \brief Get the reshaped dimensions.
+    //!
+    //! \return The reshaped dimensions.
+    //!
+    //! If a second input is present and non-null, or setReshapeDimensions has
+    //! not yet been called, this function returns Dims with nbDims == -1.
+    //!
+    Dims getReshapeDimensions() const noexcept
+    {
+        return mImpl->getReshapeDimensions();
+    }
+
+    //!
+    //! \brief Append or replace an input of this layer with a specific tensor
+    //!
+    //! \param index the index of the input to modify.
+    //! \param tensor the new input tensor
+    //
+    //! Sets the input tensor for the given index. The index must be 0 for a static shuffle layer.
+    //! A static shuffle layer is converted to a dynamic shuffle layer by calling setInput with an index 1.
+    //! A dynamic shuffle layer cannot be converted back to a static shuffle layer.
+    //!
+    //! For a dynamic shuffle layer, the values 0 and 1 are valid.
+    //! The indices in the dynamic case are as follows:
+    //!
+    //! - 0: Data or Shape tensor to be shuffled.
+    //! - 1: The dimensions for the reshape operation, as a 1D tensor of type Int32 or Int64.
+    //!
+    //! If this function is called with the value 1, then the function getNbInputs() changes
+    //! from returning 1 to 2.
+    //!
+    //! The reshape dimensions are treated identically to how they are treated if set statically
+    //! via setReshapeDimensions. In particular, a -1 is treated as a wildcard even if dynamically
+    //! supplied at runtime, and a 0 is treated as a placeholder if getZeroIsPlaceholder() = true,
+    //! which is the default. If the placeholder interpretation of 0 is unwanted because the
+    //! runtime dimension should be 0 when the reshape dimension is 0, be sure to call
+    //! setZeroIsPlacholder(false) on the IShuffleLayer.
+    //!
+    //! \see setReshapeDimensions.
+    //!
+    using ILayer::setInput;
+
+    //!
+    //! \brief Set the permutation applied by the second transpose operation.
+    //!
+    //! \param permutation The dimension permutation applied after the reshape.
+    //!
+    //! The default is the identity permutation.
+    //!
+    //! The permutation is applied as outputDimensionIndex = permutation.order[inputDimensionIndex], so to
+    //! permute from CHW order to HWC order, the required permutation is [1, 2, 0].
+    //!
+    //! \see getSecondTranspose
+    //!
+    void setSecondTranspose(Permutation permutation) noexcept
+    {
+        mImpl->setSecondTranspose(permutation);
+    }
+
+    //!
+    //! \brief Get the permutation applied by the second transpose operation.
+    //!
+    //! \return The dimension permutation applied after the reshape.
+    //!
+    //! \see setSecondTranspose
+    //!
+    Permutation getSecondTranspose() const noexcept
+    {
+        return mImpl->getSecondTranspose();
+    }
+
+    //!
+    //! \brief Set meaning of 0 in reshape dimensions.
+    //!
+    //! If true, then a 0 in the reshape dimensions denotes copying the corresponding
+    //! dimension from the first input tensor.  If false, then a 0 in the reshape
+    //! dimensions denotes a zero-length dimension.
+    //!
+    //! Default: true
+    //!
+    //! \see getZeroIsPlaceholder();
+    //!
+    void setZeroIsPlaceholder(bool zeroIsPlaceholder) noexcept
+    {
+        return mImpl->setZeroIsPlaceholder(zeroIsPlaceholder);
+    }
+
+    //!
+    //! \brief Get meaning of 0 in reshape dimensions.
+    //!
+    //! \return true if 0 is placeholder for corresponding input dimension,
+    //!         false if 0 denotes a zero-length dimension.
+    //!
+    //! \see setZeroIsPlaceholder
+    //!
+    bool getZeroIsPlaceholder() const noexcept
+    {
+        return mImpl->getZeroIsPlaceholder();
+    }
+
+protected:
+    apiv::VShuffleLayer* mImpl;
+    virtual ~IShuffleLayer() noexcept = default;
+};
+
+//!
+//! \brief Controls how ISliceLayer and IGridSample handle out-of-bounds coordinates.
+//!
+//! \see ISliceLayer and IGridSample
+//!
+enum class SampleMode : int32_t
+{
+    kSTRICT_BOUNDS = 0,                            //!< Fail with error when the coordinates are out of bounds.
+    kWRAP = 1,                                     //!< Coordinates wrap around periodically.
+    kCLAMP = 2,                                    //!< Out of bounds indices are clamped to bounds.
+    kFILL = 3,                                     //!< Use fill input value when coordinates are out of bounds.
+    kREFLECT = 4, //!< Coordinates reflect. The axis of reflection is the middle of the perimeter pixel and the
+                  //!< reflections are repeated indefinitely within the padded regions. Repeats values for a single
+                  //!< pixel and throws error for zero pixels.
+};
+
+//!
+//! Maximum number of elements in SampleMode enum.
+//!
+//! \see SampleMode
+//!
+template <>
+constexpr inline int32_t EnumMax<SampleMode>() noexcept
+{
+    return 5;
+}
+
+//!
+//! \brief Slices an input tensor into an output tensor based on the offset and strides.
+//!
+//! The slice layer has two variants, static and dynamic. Static slice specifies the start, size, and stride
+//! dimensions at layer creation time via Dims and can use the get/set accessor functions of the ISliceLayer.
+//! Static slice layers can also optionally specify axes through the get/set accessor functions of the ISliceLayer.
+//! Dynamic slice specifies one or more of start, size, stride, or axes as ITensors, by using ILayer::setInput to add
+//! a second, third, fourth, or sixth input respectively. The corresponding Dims are used if an input
+//! is missing or null.
+//!
+//! An application can determine if the ISliceLayer has a dynamic output shape based on whether
+//! the size or axes input is present and non-null.
+//!
+//! The slice layer selects for each dimension a start location from within the input tensor, and
+//! copies elements to the output tensor using the specified stride across the input tensor.
+//! Start, size, and stride tensors must be 1D tensors of type Int32 or Int64 if not specified via Dims.
+//!
+//! An example of using slice on a tensor:
+//! input = {{0, 2, 4}, {1, 3, 5}}
+//! start = {1, 0}
+//! size = {1, 2}
+//! stride = {1, 2}
+//! output = {{1, 5}}
+//!
+//! If axes are provided then starts, ends, and strides must have the same length as axes
+//! and specifies a subset of dimensions to slice. If axes are not provided, starts, ends, and strides
+//! must be of the same length as the rank of the input tensor.
+//!
+//! An example of using slice on a tensor with axes specified:
+//! input = {{0, 2, 4}, {1, 3, 5}}
+//! start = {1}
+//! size = {2}
+//! stride = {1}
+//! axes = {1}
+//! output = {{2, 4}, {3, 5}}
+//!
+//! When the sampleMode is kCLAMP or kREFLECT, for each input dimension, if its size is 0 then the corresponding output
+//! dimension must be 0 too.
+//!
+//! When the sampleMode is kFILL, the fifth input to the slice layer is used to determine the value to fill in out-of-bound
+//! indices. It is an error to specify the fifth input in any other sampleMode.
+//!
+//! A slice layer can produce a shape tensor if the following conditions are met:
+//!
+//! * start, size, and stride are build time constants, either as static Dims or as constant input tensors.
+//! * axes, if provided, are build time constants, either as static Dims or as a constant input tensor.
+//! * The number of elements in the output tensor does not exceed 2 * Dims::MAX_DIMS.
+//!
+//! The input tensor is a shape tensor if the output is a shape tensor.
+//!
+//! The following constraints must be satisfied to execute this layer on DLA:
+//! * start, size, and stride are build time constants, either as static Dims or as constant input tensors.
+//! * axes, if provided, are build time constants, either as static Dims or as a constant input tensor.
+//! * sampleMode is kDEFAULT, kWRAP, or kFILL.
+//! * Strides are 1 for all dimensions.
+//! * Slicing is not performed on the first dimension.
+//! * The input tensor has four dimensions.
+//! * For kFILL sliceMode, the fill value input is a scalar output of an IConstantLayer with value 0 that is not
+//!   consumed by any other layer.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class ISliceLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the start offset that the slice layer uses to create the output slice.
+    //!
+    //! \param start The start offset to read data from the input tensor.
+    //!
+    //! If a second input had been used to create this layer, that input is reset to null by this method.
+    //!
+    //! \see getStart
+    //!
+    void setStart(Dims const& start) noexcept
+    {
+        mImpl->setStart(start);
+    }
+
+    //!
+    //! \brief Get the start offset for the slice layer.
+    //!
+    //! \return The start offset, or an invalid Dims structure.
+    //!
+    //! If the second input is present and non-null,
+    //! this function returns a Dims with nbDims = -1.
+    //!
+    //! \see setStart
+    //!
+    Dims getStart() const noexcept
+    {
+        return mImpl->getStart();
+    }
+
+    //!
+    //! \brief Set the dimensions of the output slice.
+    //!
+    //! \param size The dimensions of the output slice.
+    //!
+    //! If a third input had been used to create this layer, that input is reset to null by this method.
+    //!
+    //! \see getSize
+    //!
+    void setSize(Dims const& size) noexcept
+    {
+        return mImpl->setSize(size);
+    }
+
+    //!
+    //! \brief Get dimensions of the output slice.
+    //!
+    //! \return The output dimension, or an invalid Dims structure.
+    //!
+    //! If the third input is present and non-null,
+    //! this function returns a Dims with nbDims = -1.
+    //!
+    //! \see setSize
+    //!
+    Dims getSize() const noexcept
+    {
+        return mImpl->getSize();
+    }
+
+    //!
+    //! \brief Set the stride for computing the output slice data.
+    //!
+    //! \param stride The dimensions of the stride to compute the values to store in the output slice.
+    //!
+    //! If a fourth input had been used to create this layer, that input is reset to null by this method.
+    //!
+    //! \see getStride
+    //!
+    void setStride(Dims const& stride) noexcept
+    {
+        mImpl->setStride(stride);
+    }
+
+    //!
+    //! \brief Get the stride for the output slice.
+    //!
+    //! \return The slicing stride, or an invalid Dims structure.
+    //!
+    //! If the fourth input is present and non-null,
+    //! this function returns a Dims with nbDims = -1.
+    //!
+    //! \see setStride
+    //!
+    Dims getStride() const noexcept
+    {
+        return mImpl->getStride();
+    }
+
+    //!
+    //! \brief Set the slice mode.
+    //!
+    //! \see getMode()
+    //!
+    void setMode(SampleMode mode) noexcept
+    {
+        mImpl->setMode(mode);
+    }
+
+    //!
+    //! \brief Get the slice mode.
+    //!
+    //! \see setMode()
+    //!
+    SampleMode getMode() const noexcept
+    {
+        return mImpl->getMode();
+    }
+
+    //!
+    //! \brief Append or replace an input of this layer with a specific tensor
+    //!
+    //! \param index the index of the input to modify.
+    //! \param tensor the new input tensor
+    //!
+    //! For a slice layer, the values 0-5 are valid.
+    //! The indices are as follows:
+    //!
+    //! - 0: Tensor to be sliced.
+    //! - 1: The start tensor to begin slicing, as a 1D tensor of type Int32 or Int64.
+    //! - 2: The size tensor of the resulting slice, as a 1D tensor of type Int32 or Int64.
+    //! - 3: The stride of the slicing operation, as a 1D tensor of type Int32 or Int64.
+    //! - 4: Value for the kFILL slice mode. The fill value data type should either be the same
+    //!      or be implicitly convertible to the input data type.
+    //!      Implicit data type conversion is supported among kFLOAT, kHALF, kINT8, and kFP8 data types.
+    //!      This input is disallowed for other modes.
+    //! - 5: The axes tensor indicating the corresponding axes that start, size, and stride
+    //!      should apply to, as a 1D tensor or type Int32 or Int64. Negative values for axes
+    //!      indicate indexing from the back of the input tensor. Values must be unique and be
+    //!      within the interval of [-rank(input), rank(input)-1].
+    //!
+    //! Using the corresponding setter resets the input to null.
+    //!
+    //! If this function is called with a value greater than 0, then the function getNbInputs() changes
+    //! from returning 1 to index + 1.
+    //!
+    using ILayer::setInput;
+
+    //!
+    //! \brief Set the axes for this ISliceLayer.
+    //!
+    //! \param axes The axes on which the starts, ends, and strides parameters of the slice apply to.
+    //!
+    //! If a sixth input had been used to create this layer, that input is reset to null by this method.
+    //!
+    //! \see getAxes
+    //!
+    void setAxes(Dims const& axes) noexcept
+    {
+        mImpl->setAxes(axes);
+    }
+
+    //!
+    //! \brief Get the axes for this ISliceLayer.
+    //!
+    //! \return The axes on which the starts, ends, and strides parameters of this slice apply to.
+    //!
+    //! If the sixth input is present and non-null,
+    //! this function returns a Dims with nbDims = -1.
+    //!
+    //! \see setAxes
+    //!
+    Dims getAxes() const noexcept
+    {
+        return mImpl->getAxes();
+    }
+
+protected:
+    apiv::VSliceLayer* mImpl;
+    virtual ~ISliceLayer() noexcept = default;
+};
+
+//! \class IShapeLayer
+//!
+//! \brief Layer type for getting shape of a tensor.
+//!
+//! This layer sets the output to a 1D tensor of type Int64 with the dimensions of the input tensor.
+//!
+//! For example, if the input is a four-dimensional tensor (of any type) with
+//! dimensions [2,3,5,7], the output tensor is a one-dimensional Int64 tensor
+//! of length 4 containing the sequence 2, 3, 5, 7.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IShapeLayer : public ILayer
+{
+protected:
+    apiv::VShapeLayer* mImpl;
+    virtual ~IShapeLayer() noexcept = default;
+};
+
+//!
+//! \enum TopKOperation
+//!
+//! \brief Enumerates the operations that may be performed by a TopK layer.
+//!
+enum class TopKOperation : int32_t
+{
+    kMAX = 0, //!< Maximum of the elements.
+    kMIN = 1, //!< Minimum of the elements.
+};
+
+//!
+//! Maximum number of elements in TopKOperation enum.
+//!
+//! \see TopKOperation
+//!
+template <>
+constexpr inline int32_t EnumMax<TopKOperation>() noexcept
+{
+    return 2;
+}
+
+//!
+//! \class ITopKLayer
+//!
+//! \brief Layer that represents a TopK reduction.
+//!
+//! This layer can accept both static and dynamic k. Static k can be set through the addTopK() API function,
+//! or accessed using the getK() and setK() functions after layer creation. For dynamic k, use the setInput()
+//! method to pass in k as a tensor with index 1, which overrides the static k value in calculations.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class ITopKLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the operation for the layer.
+    //!
+    //! \see getOperation(), TopKOperation
+    //!
+    void setOperation(TopKOperation op) noexcept
+    {
+        mImpl->setOperation(op);
+    }
+
+    //!
+    //! \brief Get the operation for the layer.
+    //!
+    //! \see setOperation(), TopKOperation
+    //!
+    TopKOperation getOperation() const noexcept
+    {
+        return mImpl->getOperation();
+    }
+
+    //!
+    //! \brief Set the static k value for the layer.
+    //!
+    //! Currently only values up to 3840 are supported.
+    //!
+    //! If a second input to this layer has been set, it will be reset to null by this method.
+    //!
+    //! \see getK()
+    //!
+    void setK(int32_t k) noexcept
+    {
+        mImpl->setK(k);
+    }
+
+    //!
+    //! \brief Get the k value for the layer.
+    //!
+    //! This function will return the static k value passed into addTopK(), or the value passed into setK().
+    //!
+    //! If a second layer input is present and non-null, this function returns -1.
+    //!
+    //! \see setK()
+    //!
+    int32_t getK() const noexcept
+    {
+        return mImpl->getK();
+    }
+
+    //!
+    //! \brief Set which axes to reduce for the layer.
+    //!
+    //! \see getReduceAxes()
+    //!
+    void setReduceAxes(uint32_t reduceAxes) noexcept
+    {
+        mImpl->setReduceAxes(reduceAxes);
+    }
+
+    //!
+    //! \brief Get the axes to reduce for the layer.
+    //!
+    //! \see setReduceAxes()
+    //!
+    uint32_t getReduceAxes() const noexcept
+    {
+        return mImpl->getReduceAxes();
+    }
+
+    //!
+    //! \brief Append or replace an input of this layer with a specific tensor
+    //!
+    //! \param index The index of the input to modify.
+    //! \param tensor The new input tensor.
+    //!
+    //! For a TopK layer, the values 0-1 are valid.
+    //! The indices are as follows:
+    //!
+    //! - 0: Input data tensor.
+    //! - 1: A scalar Int32 tensor containing a positive value corresponding to the number of top
+    //!      elements to retrieve. Values larger than 3840 will result in a runtime error. If provided,
+    //!      this will override the static k value in calculations.
+    //!
+    using ILayer::setInput;
+
+protected:
+    apiv::VTopKLayer* mImpl;
+    virtual ~ITopKLayer() noexcept = default;
+};
+
+//!
+//! \enum MatrixOperation
+//!
+//! \brief Enumerates the operations that may be performed on a tensor
+//!        by IMatrixMultiplyLayer before multiplication.
+//!
+enum class MatrixOperation : int32_t
+{
+    //! Treat x as a matrix if it has two dimensions, or as a collection of
+    //! matrices if x has more than two dimensions, where the last two dimensions
+    //! are the matrix dimensions. x must have at least two dimensions.
+    kNONE = 0,
+
+    //! Like kNONE, but transpose the matrix dimensions.
+    kTRANSPOSE = 1,
+
+    //! Treat x as a vector if it has one dimension, or as a collection of
+    //! vectors if x has more than one dimension. x must have at least one dimension.
+    //!
+    //! The first input tensor with dimensions [M,K] used with MatrixOperation::kVECTOR is equivalent to a tensor
+    //! with dimensions [M, 1, K] with MatrixOperation::kNONE, i.e. is treated as M row vectors of length K,
+    //! or dimensions [M, K, 1] with MatrixOperation::kTRANSPOSE.
+    //!
+    //! The second input tensor with dimensions [M,K] used with MatrixOperation::kVECTOR is equivalent to a tensor
+    //! with dimensions [M, K, 1] with MatrixOperation::kNONE, i.e. is treated as M column vectors of length K,
+    //! or dimensions [M, 1, K] with MatrixOperation::kTRANSPOSE.
+    kVECTOR = 2,
+};
+
+//!
+//! Maximum number of elements in MatrixOperation enum.
+//!
+//! \see DataType
+//!
+template <>
+constexpr inline int32_t EnumMax<MatrixOperation>() noexcept
+{
+    return 3;
+}
+
+//!
+//! \class IMatrixMultiplyLayer
+//!
+//! \brief Layer that represents a Matrix Multiplication.
+//!
+//! Let A be op(getInput(0)) and B be op(getInput(1)) where
+//! op(x) denotes the corresponding MatrixOperation.
+//!
+//! When A and B are matrices or vectors, computes the inner product A * B:
+//!
+//!     matrix * matrix -> matrix
+//!     matrix * vector -> vector
+//!     vector * matrix -> vector
+//!     vector * vector -> scalar
+//!
+//! Inputs of higher rank are treated as collections of matrices or vectors.
+//! The output will be a corresponding collection of matrices, vectors, or scalars.
+//!
+//! For a dimension that is not one of the matrix or vector dimensions:
+//! If the dimension is 1 for one of the tensors but not the other tensor,
+//! the former tensor is broadcast along that dimension to match the dimension of the latter tensor.
+//! The number of these extra dimensions for A and B must match.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IMatrixMultiplyLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the operation for an input tensor.
+    //!
+    //! \param index Input tensor number (0 or 1).
+    //! \param op New operation.
+    //!
+    //! \see getOperation()
+    //!
+    void setOperation(int32_t index, MatrixOperation op) noexcept
+    {
+        mImpl->setOperation(index, op);
+    }
+
+    //!
+    //! \brief Get the operation for an input tensor.
+    //!
+    //! \param index Input tensor number (0 or 1).
+    //!
+    //! \see setOperation()
+    //!
+    MatrixOperation getOperation(int32_t index) const noexcept
+    {
+        return mImpl->getOperation(index);
+    }
+
+protected:
+    apiv::VMatrixMultiplyLayer* mImpl;
+    virtual ~IMatrixMultiplyLayer() noexcept = default;
+};
+
+//! \class INonZero
+//!
+//! \brief A NonZero layer in a network.
+//!
+//! This layer gets the positions of elements that are non-zero in the input.
+//! For boolean input, "non-zero" means "true". Semantics are similar to ONNX NonZero.
+//!
+//! The input may have type kFLOAT, kHALF, kINT32, or kBOOL.
+//!
+//! The output is a matrix of type kINT32.
+//! For an input with dimensions [L1, L2, ..., Lm], the output has dimensions [m,n],
+//! where n is the number of non-zero elements. I.e., each column denotes a m-D position.
+//!
+//! The columns are lexically ordered.
+//! E.g., a column with [3,2,4,7] precedes a column with [3,2,5,6].
+//!
+//! Tip: "compress" can be implemented with INonZero+IShuffle+Gather.
+//! For example, to compress a tensor x over axis k using mask vector v,
+//! use nonzero(v) to compute the subscripts, shuffle with reshape dimensions = [-1]
+//! to make the subscripts 1D, and then gather with the subscripts.
+//!
+class INonZeroLayer : public ILayer
+{
+protected:
+    virtual ~INonZeroLayer() noexcept = default;
+    apiv::VNonZeroLayer* mImpl;
+};
+
+//!
+//! \class IRaggedSoftMaxLayer
+//!
+//! \brief A RaggedSoftmax layer in a network definition.
+//!
+//! This layer takes a ZxS input tensor and an additional Zx1 bounds tensor
+//! holding the lengths of the Z sequences.
+//!
+//! This layer computes a softmax across each of the Z sequences.
+//!
+//! The output tensor is of the same size as the input tensor.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IRaggedSoftMaxLayer : public ILayer
+{
+protected:
+    apiv::VRaggedSoftMaxLayer* mImpl;
+    virtual ~IRaggedSoftMaxLayer() noexcept = default;
+};
+
+//! \class IIdentityLayer
+//!
+//! \brief A layer that represents the identity function.
+//!
+//! For a strongly typed network, the layer is an identity function, i.e. the output
+//! tensor elements are identical to the input tensor elements, possibly with a change
+//! in layout. For example, if a network consists of a single IIdentityLayer, the network
+//! input and output must have the same type, but the input can have NCHW layout and
+//! the output can have NHWC layout.
+//!
+//! If the network is weakly typed, the layer is additionally permitted some type conversions
+//! as described below.
+//!
+//! If the output type is explicitly specified via setOutputType, IIdentityLayer can be
+//! used to convert from one type to another. Other than conversions between the same
+//! type (kFLOAT -> kFLOAT for example), the only valid conversions are:
+//!
+//!     (kFLOAT | kHALF | kINT32 | kBOOL) -> (kFLOAT | kHALF | kINT32 | kBOOL)
+//!
+//!     (kFLOAT | kHALF) -> kUINT8
+//!
+//!     kUINT8 -> (kFLOAT | kHALF)
+//!
+//! Conversion also happens implicitly, without calling setOutputType, if the output
+//! tensor is a network output.
+//!
+//! Two types are compatible if they are identical, or are both in {kFLOAT, kHALF}.
+//! Implicit conversion between incompatible types, i.e. without using setOutputType,
+//! was recognized as incorrect as of TensorRT 8.4, but was retained for API compatibility
+//! within TensorRT 8.x releases. In TensorRT 10.0 onwards it is an error if the network
+//! output tensor type is incompatible with the layer output type. E.g., implicit conversion
+//! from kFLOAT to kINT32 is not allowed.
+//!
+//! To explicitly convert kFLOAT to kINT32:
+//!
+//! * Preferred: use ICastLayer.
+//!
+//! * Legacy alternative: use IIdentityLayer and setOutputType(DataType::kINT32).
+//!
+//! Similar advice applies for explicit conversion in the other direction.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IIdentityLayer : public ILayer
+{
+protected:
+    apiv::VIdentityLayer* mImpl;
+    virtual ~IIdentityLayer() noexcept = default;
+};
+
+//! \class ICastLayer
+//!
+//! \brief A cast layer in a network.
+//!
+//! This layer casts a given tensor to the datatype specified by \p toType.
+//!
+class ICastLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set cast layer output type.
+    //!
+    //! \param toType The DataType of the output tensor.
+    //!
+    //! Set the output type of the cast layer.
+    //!
+    void setToType(DataType toType) noexcept
+    {
+        mImpl->setToType(toType);
+    }
+
+    //!
+    //! \brief Return cast layer output type.
+    //!
+    //! \return toType parameter set during layer creation or by setToType().
+    //! The return value is the output type of the cast layer.
+    //!
+    DataType getToType() const noexcept
+    {
+        return mImpl->getToType();
+    }
+
+protected:
+    apiv::VCastLayer* mImpl;
+    virtual ~ICastLayer() noexcept = default;
+};
+
+//! \class IConstantLayer
+//!
+//! \brief Layer that represents a constant value.
+//!
+//! \note This layer does not support boolean types.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IConstantLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the weights for the layer.
+    //!
+    //! The output type is weights.type. If the network is weakly typed and the weights have a real type,
+    //! the output type might be different per TensorRT's type conversion rules.
+    //!
+    //! \see getWeights()
+    //!
+    void setWeights(Weights weights) noexcept
+    {
+        mImpl->setWeights(weights);
+    }
+
+    //!
+    //! \brief Get the weights for the layer.
+    //!
+    //! \see setWeights
+    //!
+    Weights getWeights() const noexcept
+    {
+        return mImpl->getWeights();
+    }
+
+    //!
+    //! \brief Set the dimensions for the layer.
+    //!
+    //! \param dimensions The dimensions of the layer
+    //!
+    //! \see setDimensions
+    //!
+    void setDimensions(Dims const& dimensions) noexcept
+    {
+        mImpl->setDimensions(dimensions);
+    }
+
+    //!
+    //! \brief Get the dimensions for the layer.
+    //!
+    //! \return the dimensions for the layer
+    //!
+    //! \see getDimensions
+    //!
+    Dims getDimensions() const noexcept
+    {
+        return mImpl->getDimensions();
+    }
+
+protected:
+    apiv::VConstantLayer* mImpl;
+    virtual ~IConstantLayer() noexcept = default;
+};
+
+//!
+//! \class IParametricReLULayer
+//!
+//! \brief Layer that represents a parametric ReLU operation.
+//!
+//! When running this layer on DLA, the slopes input must be a build-time constant.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IParametricReLULayer : public ILayer
+{
+protected:
+    apiv::VParametricReLULayer* mImpl;
+    virtual ~IParametricReLULayer() noexcept = default;
+};
+
+//! \enum InterpolationMode
+//!
+//! \brief Enumerates various modes of interpolation
+//!
+//!
+enum class InterpolationMode : int32_t
+{
+    kNEAREST = 0, //!< ND (0 < N <= 8) nearest neighbor resizing.
+    kLINEAR = 1,  //!< Supports linear (1D), bilinear (2D), and trilinear (3D) interpolation
+    kCUBIC = 2    //!< Supports bicubic (2D) interpolation
+};
+
+namespace impl
+{
+//!
+//! Maximum number of elements in InterpolationMode enum.
+//!
+//! \see InterpolationMode
+//!
+template <>
+struct EnumMaxImpl<InterpolationMode>
+{
+    static constexpr int32_t kVALUE = 3;
+};
+} // namespace impl
+
+//!
+//! \enum ResizeCoordinateTransformation
+//!
+//! \brief The resize coordinate transformation function.
+//!
+//! \see IResizeLayer::setCoordinateTransformation()
+//!
+enum class ResizeCoordinateTransformation : int32_t
+{
+    //! Think of each value in the tensor as a unit volume, and the coordinate is a point inside this volume.
+    //! The coordinate point is drawn as a star `(*)` in the below diagram, and multiple values range has a length.
+    //! Define `x_origin` as the coordinate of axis x in the input tensor, `x_resized` as the coordinate of axis x in
+    //! the output tensor, `length_origin` as length of the input tensor in axis x, and `length_resize` as length of the
+    //! output tensor in axis x.
+    //!
+    //!     |<--------------length---------->|
+    //!     |    0     |    1     |    2     |    3     |
+    //!     *          *          *          *
+    //!
+    //!     x_origin = x_resized * (length_origin - 1) / (length_resize - 1)
+    //!
+    kALIGN_CORNERS = 0,
+
+    //!     |<--------------length--------------------->|
+    //!     |    0     |    1     |    2     |    3     |
+    //!     *          *          *          *
+    //!
+    //!     x_origin = x_resized * (length_origin / length_resize)
+    //!
+    kASYMMETRIC = 1,
+
+    //!     |<--------------length--------------------->|
+    //!     |    0     |    1     |    2     |    3     |
+    //!          *          *          *          *
+    //!
+    //!     x_origin = (x_resized + 0.5) * (length_origin / length_resize) - 0.5
+    //!
+    kHALF_PIXEL = 2,
+};
+
+namespace impl
+{
+//!
+//! Maximum number of elements in ResizeCoordinateTransformation enum.
+//!
+//! \see ResizeCoordinateTransformation
+//!
+template <>
+struct EnumMaxImpl<ResizeCoordinateTransformation>
+{
+    static constexpr int32_t kVALUE = 3;
+};
+} // namespace impl
+
+//!
+//! \enum ResizeSelector
+//!
+//! \brief The coordinate selector when resize to single pixel output.
+//!
+//! \see IResizeLayer::setSelectorForSinglePixel()
+//!
+enum class ResizeSelector : int32_t
+{
+    //! Use formula to map the original index.
+    kFORMULA = 0,
+
+    //! Select the upper left pixel.
+    kUPPER = 1,
+};
+
+namespace impl
+{
+//!
+//! Maximum number of elements in ResizeSelector enum.
+//!
+//! \see ResizeSelector
+//!
+template <>
+struct EnumMaxImpl<ResizeSelector>
+{
+    static constexpr int32_t kVALUE = 2;
+};
+} // namespace impl
+
+//!
+//! \enum ResizeRoundMode
+//!
+//! \brief The rounding mode for nearest neighbor resize.
+//!
+//! \see IResizeLayer::setNearestRounding()
+//!
+enum class ResizeRoundMode : int32_t
+{
+    //! Round half up.
+    kHALF_UP = 0,
+
+    //! Round half down.
+    kHALF_DOWN = 1,
+
+    //! Round to floor.
+    kFLOOR = 2,
+
+    //! Round to ceil.
+    kCEIL = 3,
+};
+
+namespace impl
+{
+//!
+//! Maximum number of elements in ResizeRoundMode enum.
+//!
+//! \see ResizeRoundMode
+//!
+template <>
+struct EnumMaxImpl<ResizeRoundMode>
+{
+    static constexpr int32_t kVALUE = 4;
+};
+} // namespace impl
+
+//! \class IResizeLayer
+//!
+//! \brief A resize layer in a network definition.
+//!
+//! Resize layer can be used for resizing a N-D tensor.
+//!
+//! Resize layer currently supports the following configurations:
+//!     -   InterpolationMode::kNEAREST - resizes last `m` dimensions of N-D, where 0 < m <= min(8, N) and N > 0
+//!     -   InterpolationMode::kLINEAR - resizes last `m` dimensions of N-D, where 0 < m <= min(3, N) and N > 0
+//!
+//! Default resize mode is InterpolationMode::kNEAREST.
+//!
+//! The coordinates in the output tensor are mapped to coordinates in the input tensor using a function set by calling
+//! setCoordinateTransformation(). The default for all InterpolationMode settings (nearest, linear, bilinear, etc.) is
+//! ResizeCoordinateTransformation::kASYMMETRIC.
+//!
+//! The resize layer provides two ways to resize tensor dimensions.
+//!     -   Set output dimensions directly. It can be done for static as well as dynamic resize layer.
+//!         Static resize layer requires output dimensions to be known at build-time.
+//!         Dynamic resize layer requires output dimensions to be set as one of the input tensors.
+//!     -   Set scales for resize. Each output dimension is calculated as floor(input dimension * scale).
+//!         Only static resize layer allows setting scales where the scales are known at build-time.
+//!
+//! If executing this layer on DLA, the following combinations of parameters are supported:
+//!
+//! - In kNEAREST mode:
+//!     * (ResizeCoordinateTransformation::kASYMMETRIC, ResizeSelector::kFORMULA, ResizeRoundMode::kFLOOR)
+//!     * (ResizeCoordinateTransformation::kHALF_PIXEL, ResizeSelector::kFORMULA, ResizeRoundMode::kHALF_DOWN)
+//!     * (ResizeCoordinateTransformation::kHALF_PIXEL, ResizeSelector::kFORMULA, ResizeRoundMode::kHALF_UP)
+//!
+//! - In kLINEAR mode:
+//!     * (ResizeCoordinateTransformation::kHALF_PIXEL, ResizeSelector::kFORMULA)
+//!     * (ResizeCoordinateTransformation::kHALF_PIXEL, ResizeSelector::kUPPER)
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IResizeLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the output dimensions.
+    //!
+    //! \param dimensions The output dimensions. Number of output dimensions must be the same as the number of input
+    //! dimensions.
+    //!
+    //! If executing this layer on DLA, setOutputDimensions() is not supported.
+    //!
+    //! If there is a second input, i.e. resize layer is dynamic,
+    //! calling setOutputDimensions() is an error and does not update the
+    //! dimensions.
+    //!
+    //! Output dimensions can be specified directly, or via scale factors relative to input dimensions.
+    //! Scales for resize can be provided using setScales().
+    //!
+    //! \see setScales
+    //! \see getOutputDimensions
+    //!
+    void setOutputDimensions(Dims const& dimensions) noexcept
+    {
+        return mImpl->setOutputDimensions(dimensions);
+    }
+
+    //!
+    //! \brief Get the output dimensions.
+    //!
+    //! \return The output dimensions.
+    //!
+    Dims getOutputDimensions() const noexcept
+    {
+        return mImpl->getOutputDimensions();
+    }
+
+    //!
+    //! \brief Set the resize scales.
+    //!
+    //! \param scales An array of resize scales.
+    //! \param nbScales Number of scales. Number of scales must be equal to the number of input dimensions.
+    //!
+    //! If executing this layer on DLA, there are three restrictions:
+    //! 1) nbScales has to be exactly 4.
+    //! 2) the first two elements in scales need to be exactly 1 (for unchanged batch and channel dimensions).
+    //! 3) The last two elements in scales, representing the scale values along height and width dimensions,
+    //! respectively, need to be integer values in the range of [1, 32] for kNEAREST mode and [1, 4] for kLINEAR.
+    //! Example of DLA-supported scales: {1, 1, 2, 2}.
+    //!
+    //! If there is a second input, i.e. resize layer is dynamic,
+    //! calling setScales() is an error and does not update the scales.
+    //!
+    //! Output dimensions are calculated as follows:
+    //! outputDims[i] = floor(inputDims[i] * scales[i])
+    //!
+    //! Output dimensions can be specified directly, or via scale factors relative to input dimensions.
+    //! Output dimensions can be provided directly using setOutputDimensions().
+    //!
+    //! \see setOutputDimensions
+    //! \see getScales
+    //!
+    void setScales(float const* scales, int32_t nbScales) noexcept
+    {
+        mImpl->setScales(scales, nbScales);
+    }
+
+    //!
+    //! \brief Copies resize scales to scales[0, ..., nbScales-1], where nbScales is the number of scales that were set.
+    //!
+    //! \param size The number of scales to get. If size != nbScales, no scales will be copied.
+    //!
+    //! \param scales Pointer to where to copy the scales. Scales will be copied only if
+    //!               size == nbScales and scales != nullptr.
+    //!
+    //! In case the size is not known consider using size = 0 and scales = nullptr. This method will return
+    //! the number of resize scales.
+    //!
+    //! \return The number of resize scales i.e. nbScales if scales were set.
+    //!         Return -1 in case no scales were set or resize layer is used in dynamic mode.
+    //!
+    int32_t getScales(int32_t size, float* scales) const noexcept
+    {
+        return mImpl->getScales(size, scales);
+    }
+
+    //!
+    //! \brief Set resize mode for an input tensor.
+    //!
+    //! Supported resize modes are Nearest Neighbor and Linear.
+    //!
+    //! \see InterpolationMode
+    //!
+    void setResizeMode(InterpolationMode interpolationMode) noexcept
+    {
+        mImpl->setResizeMode(interpolationMode);
+    }
+
+    //!
+    //! \brief Get resize mode for an input tensor.
+    //!
+    //! \return The resize mode.
+    //!
+    InterpolationMode getResizeMode() const noexcept
+    {
+        return mImpl->getResizeMode();
+    }
+
+    //!
+    //! \brief Append or replace an input of this layer with a specific tensor
+    //!
+    //! \param index the index of the input to modify.
+    //! \param tensor the new input tensor.
+    //!
+    //! Sets the input tensor for the given index. The index must be 0 for a static resize layer.
+    //! A static resize layer is converted to a dynamic resize layer by calling setInput with an index 1.
+    //! A dynamic resize layer cannot be converted back to a static resize layer.
+    //!
+    //! For a dynamic resize layer, the values 0 and 1 are valid.
+    //! The indices in the dynamic case are as follows:
+    //!
+    //! - 0: Execution tensor to be resized.
+    //! - 1: The output dimensions, as a 1D tensor of type Int32 or Int64.
+    //!
+    //! If this function is called with the value 1, then the function getNbInputs() changes
+    //! from returning 1 to 2.
+    //!
+    using ILayer::setInput;
+
+    //!
+    //! \brief Set coordinate transformation function.
+    //!
+    //! The function maps a coordinate in the output tensor to a coordinate in the input tensor.
+    //!
+    //! Default function is ResizeCoordinateTransformation::kASYMMETRIC.
+    //!
+    //! \see ResizeCoordinateTransformation
+    //!
+    void setCoordinateTransformation(ResizeCoordinateTransformation coordTransform) noexcept
+    {
+        mImpl->setCoordinateTransformation(coordTransform);
+    }
+
+    //!
+    //! \brief Get coordinate transformation function.
+    //!
+    //! \return The coordinate transformation function.
+    //!
+    ResizeCoordinateTransformation getCoordinateTransformation() const noexcept
+    {
+        return mImpl->getCoordinateTransformation();
+    }
+
+    //!
+    //! \brief Set coordinate selector function when resized to single pixel.
+    //!
+    //! When resize to single pixel image, use this function to decide how to map the coordinate in the original
+    //! image.
+    //!
+    //! Default is ResizeSelector::kFORMULA.
+    //!
+    //! \see ResizeSelector
+    //!
+    void setSelectorForSinglePixel(ResizeSelector selector) noexcept
+    {
+        mImpl->setSelectorForSinglePixel(selector);
+    }
+
+    //!
+    //! \brief Get the coordinate selector function when resized to single pixel.
+    //!
+    //! \return The selector function.
+    //!
+    ResizeSelector getSelectorForSinglePixel() const noexcept
+    {
+        return mImpl->getSelectorForSinglePixel();
+    }
+
+    //!
+    //! \brief Set rounding mode for nearest neighbor resize.
+    //!
+    //! This value is used for nearest neighbor interpolation rounding. It is applied after coordinate transformation.
+    //!
+    //! Default is kFLOOR.
+    //!
+    //! \see ResizeRoundMode
+    //!
+    void setNearestRounding(ResizeRoundMode value) noexcept
+    {
+        mImpl->setNearestRounding(value);
+    }
+
+    //!
+    //! \brief Get rounding mode for nearest neighbor resize.
+    //!
+    //! \return The rounding mode.
+    //!
+    ResizeRoundMode getNearestRounding() const noexcept
+    {
+        return mImpl->getNearestRounding();
+    }
+
+    //!
+    //! \brief Set the coefficient 'A' used in cubic interpolation.
+    //!
+    //! Cubic uses the coefficient 'A' to calculate the weight of input pixels:
+    //!
+    //! <pre>
+    //! x := The relative distance between the sampled pixels and the input coordinates.
+    //!
+    //! weight(x) := for |x| <= 1, ((A + 2) * x - (A + 3)) * x * x + 1,
+    //!              for 1 < |x| < 2, ((A * x - 5 * A) * x + 8 * A) * x - 4 * A,
+    //!              others 0;
+    //! </pre>
+    //!
+    //! This attribute is valid only if "resize mode" is "cubic".
+    //!
+    //! The default value is -0.75.
+    //!
+    void setCubicCoeff(float A) noexcept
+    {
+        mImpl->setCubicCoeff(A);
+    }
+
+    //!
+    //! \brief Get the coefficient 'A' used in cubic interpolation.
+    //!
+    //! \see setCubicCoeff()
+    //!
+    float getCubicCoeff() const noexcept
+    {
+        return mImpl->getCubicCoeff();
+    }
+
+    //!
+    //! \brief Set the state for excluding outside pixels.
+    //!
+    //! If set to true, the weight of sampling locations outside the input tensor will be set to false, and the weight
+    //! will be renormalized so that their sum is 1.0.
+    //!
+    //! The default value is false.
+    //!
+    void setExcludeOutside(bool excludeFlag) noexcept
+    {
+        mImpl->setExcludeOutside(excludeFlag);
+    }
+
+    //!
+    //! \brief Get the state for excluding outside pixels.
+    //!
+    //! \see setExcludeOutside()
+    //!
+    bool getExcludeOutside() const noexcept
+    {
+        return mImpl->getExcludeOutside();
+    }
+
+protected:
+    virtual ~IResizeLayer() noexcept = default;
+    apiv::VResizeLayer* mImpl;
+};
+
+//!
+//! \enum LoopOutput
+//!
+//! \brief Enum that describes kinds of loop outputs.
+//!
+enum class LoopOutput : int32_t
+{
+    //! Output value is value of tensor for last iteration.
+    kLAST_VALUE = 0,
+
+    //! Output value is concatenation of values of tensor for each iteration, in forward order.
+    kCONCATENATE = 1,
+
+    //! Output value is concatenation of values of tensor for each iteration, in reverse order.
+    kREVERSE = 2
+};
+
+//!
+//! Maximum number of elements in LoopOutput enum.
+//!
+//! \see DataType
+//!
+template <>
+constexpr inline int32_t EnumMax<LoopOutput>() noexcept
+{
+    return 3;
+}
+
+//!
+//! \enum TripLimit
+//!
+//! \brief Enum that describes kinds of trip limits.
+//!
+enum class TripLimit : int32_t
+{
+
+    kCOUNT = 0, //!< Tensor is a scalar of type kINT32 or kINT64 that contains the trip count.
+    kWHILE = 1  //!< Tensor is a scalar of type kBOOL. Loop terminates when value is false.
+};
+
+//!
+//! Maximum number of elements in TripLimit enum.
+//!
+//! \see DataType
+//!
+template <>
+constexpr inline int32_t EnumMax<TripLimit>() noexcept
+{
+    return 2;
+}
+
+class ILoop;
+
+//!
+//! \class ILoopBoundaryLayer
+//!
+//! \brief This is a base class for Loop boundary layers.
+//!
+//! The loop boundary layers are used to define loops within a network, enabling the implementation
+//! of recurrences. The boundary layers for a loop are created by class ILoop.
+//!
+//! There are four kinds of boundary layers.
+//! * ITripLimitLayer: controls the number of loop iterations.
+//! * IIterationLayer: iterates over an input tensor.
+//! * IRecurrenceLayer: returns an initial value or value from the previous loop iteration.
+//! * ILoopOutputLayer: generates an output tensor from the loop iterations.
+class ILoopBoundaryLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Get a pointer to ILoop associated with this boundary layer.
+    //!
+    ILoop* getLoop() const noexcept
+    {
+        return mBoundary->getLoop();
+    }
+
+protected:
+    virtual ~ILoopBoundaryLayer() noexcept = default;
+    apiv::VLoopBoundaryLayer* mBoundary;
+};
+
+//!
+//! \class IIfConditionalBoundaryLayer
+//!
+//! \brief This is a base class for Conditional boundary layers.
+//!
+//! Boundary layers are used to demarcate the boundaries of Conditionals.
+//!
+class IIfConditionalBoundaryLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Get a pointer to the IIfConditional associated with this boundary layer.
+    //!
+    IIfConditional* getConditional() const noexcept
+    {
+        return mBoundary->getConditional();
+    }
+
+protected:
+    virtual ~IIfConditionalBoundaryLayer() noexcept = default;
+    apiv::VConditionalBoundaryLayer* mBoundary;
+};
+
+//!
+//! \class IConditionLayer
+//!
+//! \brief This layer represents a condition input to an IIfConditional.
+//!
+class IConditionLayer : public IIfConditionalBoundaryLayer
+{
+public:
+protected:
+    virtual ~IConditionLayer() noexcept = default;
+    apiv::VConditionLayer* mImpl;
+};
+
+//!
+//! \class IIfConditionalOutputLayer
+//!
+//! \brief This layer represents an output of an IIfConditional.
+//!
+//! An IIfConditionalOutputLayer has two inputs and one output.
+//!
+//! \see IIfConditional::addOutput
+//!
+class IIfConditionalOutputLayer : public IIfConditionalBoundaryLayer
+{
+public:
+protected:
+    virtual ~IIfConditionalOutputLayer() noexcept = default;
+    apiv::VConditionalOutputLayer* mImpl;
+};
+
+//!
+//! \class IIfConditionalInputLayer
+//!
+//! \brief This layer represents an input to an IIfConditional.
+//!
+class IIfConditionalInputLayer : public IIfConditionalBoundaryLayer
+{
+public:
+protected:
+    virtual ~IIfConditionalInputLayer() noexcept = default;
+    apiv::VConditionalInputLayer* mImpl;
+};
+
+//!
+//! \class IIfConditional
+//!
+//! \brief Helper for constructing conditionally-executed subgraphs.
+//!
+//! An If-conditional conditionally executes part of the network according
+//! to the following pseudo-code:
+//!
+//! If condition is true then:
+//!     output = trueSubgraph(trueInputs);
+//! Else
+//!     output = falseSubgraph(falseInputs);
+//! Emit output
+//!
+//! Condition is a 0D boolean tensor (representing a scalar).
+//! trueSubgraph represents a network subgraph that is executed when condition evaluates to True.
+//! falseSubgraph represents a network subgraph that is executed when condition evaluates to False.
+//!
+//! The following constraints apply to If-conditionals:
+//! - Both the trueSubgraph and falseSubgraph must be defined.
+//! - The number of output tensors in both subgraphs is the same.
+//! - Corresponding output tensors from the true/false subgraphs have the same type and rank.
+//!
+//! The subgraphs may directly use tensors defined outside of the IIfConditional.
+class IIfConditional : public INoCopy
+{
+public:
+    //!
+    //! \brief Set the condition tensor for this If-Conditional construct.
+    //!
+    //! \param condition The condition tensor that will determine which subgraph to execute.
+    //!
+    //! \p condition tensor must be a 0D execution tensor (scalar) with type DataType::kBOOL.
+    //!
+    //! \see IConditionLayer
+    //!
+    IConditionLayer* setCondition(ITensor& condition) noexcept
+    {
+        return mImpl->setCondition(condition);
+    }
+
+    //!
+    //! \brief Add an If-conditional output.
+    //!
+    //! \param trueSubgraphOutput The output of the subgraph executed when the conditional evaluates to true.
+    //! \param falseSubgraphOutput The output of the subgraph executed when the conditional evaluates to false.
+    //!
+    //! Each output layer of an IIfConditional represents a single output of either the true-subgraph or the
+    //! false-subgraph of an IIfConditional, depending on which subgraph was executed.
+    //!
+    //! The ranks of the two tensors must be equal unless the condition is a build-time constant.
+    //!
+    //! \see IIfConditionalOutputLayer
+    //!
+    IIfConditionalOutputLayer* addOutput(ITensor& trueSubgraphOutput, ITensor& falseSubgraphOutput) noexcept
+    {
+        return mImpl->addOutput(trueSubgraphOutput, falseSubgraphOutput);
+    }
+
+    //!
+    //! \brief Add an If-conditional input.
+    //!
+    //! \param input An input to the conditional that can be used by either or both of the conditional's subgraphs.
+    //!
+    //! \see IIfConditionalInputLayer
+    //!
+    IIfConditionalInputLayer* addInput(ITensor& input) noexcept
+    {
+        return mImpl->addInput(input);
+    }
+
+    //!
+    //! \brief Set the name of the conditional.
+    //!
+    //! The name is used in error diagnostics.
+    //! This method copies the name string.
+    //!
+    //! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \see getName()
+    //!
+    void setName(char const* name) noexcept
+    {
+        mImpl->setName(name);
+    }
+
+    //!
+    //! \brief Return the name of the conditional.
+    //!
+    //! \see setName()
+    //!
+    char const* getName() const noexcept
+    {
+        return mImpl->getName();
+    }
+
+protected:
+    virtual ~IIfConditional() noexcept = default;
+    apiv::VIfConditional* mImpl;
+};
+
+//!
+//! \class IRecurrenceLayer
+//!
+//! \brief A recurrence layer in a network definition.
+//!
+//! The recurrence layer allows a loop iteration to compute a result from a value computed in the previous iteration.
+//!
+class IRecurrenceLayer : public ILoopBoundaryLayer
+{
+public:
+    //!
+    //! \brief Append or replace an input of this layer with a specific tensor
+    //!
+    //! \param index the index of the input to modify.
+    //! \param tensor the new input tensor
+    //
+    //! Sets the input tensor for the given index.
+    //!
+    //! For a recurrence layer, the values 0 and 1 are valid.
+    //! The indices are as follows:
+    //!
+    //! - 0: The initial value of the output tensor. The value must come from outside the loop.
+    //! - 1: The next value of the output tensor. The value usually comes from inside the loop, and must have the same
+    //! dimensions as input 0.
+    //!
+    //! If this function is called with the value 1, then the function getNbInputs() changes
+    //! from returning 1 to 2.
+    //!
+    using ILayer::setInput;
+
+protected:
+    virtual ~IRecurrenceLayer() noexcept = default;
+    apiv::VRecurrenceLayer* mImpl;
+};
+
+//!
+//! \class ILoopOutputLayer
+//!
+//! \brief An ILoopOutputLayer is the sole way to get output from a loop.
+//!
+//! The first input tensor must be defined inside the loop; the output tensor is outside the loop.
+//! The second input tensor, if present, must be defined outside the loop.
+//!
+//! If getLoopOutput() is kLAST_VALUE, a single input must be provided,
+//! and that input must be from an IRecurrenceLayer in the same loop.
+//!
+//! If getLoopOutput() is kCONCATENATE or kREVERSE, a second input must be provided.
+//! The second input must be a 0D shape tensor, defined before the loop commences,
+//! that specifies the concatenation length of the output.
+//!
+//! The output tensor has j more dimensions than the input tensor, where
+//! j == 0 if getLoopOutput() is kLAST_VALUE
+//! j == 1 if getLoopOutput() is kCONCATENATE or kREVERSE.
+//!
+class ILoopOutputLayer : public ILoopBoundaryLayer
+{
+public:
+    //!
+    //! \brief Get which kind a loop output has.
+    //!
+    LoopOutput getLoopOutput() const noexcept
+    {
+        return mImpl->getLoopOutput();
+    }
+
+    //!
+    //! \brief Set where to insert the contenation axis. Ignored if getLoopOutput() is kLAST_VALUE.
+    //!
+    //! For example, if the input tensor has dimensions [b,c,d],
+    //! and getLoopOutput() is  kCONCATENATE, the output has four dimensions.
+    //! Let a be the value of the second input.
+    //! setAxis(0) causes the output to have dimensions [a,b,c,d].
+    //! setAxis(1) causes the output to have dimensions [b,a,c,d].
+    //! setAxis(2) causes the output to have dimensions [b,c,a,d].
+    //! setAxis(3) causes the output to have dimensions [b,c,d,a].
+    //! Default is axis is 0.
+    //!
+    void setAxis(int32_t axis) noexcept
+    {
+        mImpl->setAxis(axis);
+    }
+
+    //!
+    //! \brief Get axis being concatenated over.
+    //!
+    int32_t getAxis() const noexcept
+    {
+        return mImpl->getAxis();
+    }
+
+    //!
+    //! \brief Append or replace an input of this layer with a specific tensor
+    //!
+    //! \param index the index of the input to modify.
+    //! \param tensor the new input tensor
+    //
+    //! Sets the input tensor for the given index. The index must be 0 for a kLAST_VALUE loop output layer.
+    //! Loop output layer is converted to a kCONCATENATE or kREVERSE loop output layer by calling setInput with an
+    //! index 1. A kCONCATENATE or kREVERSE loop output layer cannot be converted back to a kLAST_VALUE loop output
+    //! layer.
+    //!
+    //! For a kCONCATENATE or kREVERSE loop output layer, the values 0 and 1 are valid.
+    //! The indices in the kCONCATENATE or kREVERSE cases are as follows:
+    //!
+    //! - 0: Contribution to the output tensor.  The contribution must come from inside the loop.
+    //! - 1: The concatenation length scalar value, must come from outside the loop, as a 0D shape tensor of type Int32 or Int64.
+    //!
+    //! If this function is called with the value 1, then the function getNbInputs() changes
+    //! from returning 1 to 2.
+    //!
+    using ILayer::setInput;
+
+protected:
+    virtual ~ILoopOutputLayer() noexcept = default;
+    apiv::VLoopOutputLayer* mImpl;
+};
+
+//!
+//! \class ITripLimitLayer
+//!
+//! \brief A layer that represents a trip-count limiter.
+//!
+//! The trip limit layer sets the execution condition for loops, using kCOUNT to define the number of iterations or
+//! kWHILE for a conditional loop. A loop can have one of each kind of limit, in which case the loop exits when
+//! the trip count is reached or the condition becomes false.
+//!
+//! See INetworkDefinition::addTripLimit().
+//!
+class ITripLimitLayer : public ILoopBoundaryLayer
+{
+public:
+    //!
+    //! \brief Get a trip limiter type.
+    //!
+    TripLimit getTripLimit() const noexcept
+    {
+        return mImpl->getTripLimit();
+    }
+
+protected:
+    virtual ~ITripLimitLayer() noexcept = default;
+    apiv::VTripLimitLayer* mImpl;
+};
+
+//!
+//! \class IIteratorLayer
+//!
+//! \brief A layer to do iterations.
+//!
+//! The iterator layer iterates over a tensor along the given axis and in the given direction.
+//! It enables each loop iteration to inspect a different slice of the tensor.
+//!
+//! \see ILoop::addIterator()
+//!
+class IIteratorLayer : public ILoopBoundaryLayer
+{
+public:
+    //!
+    //! \brief Set axis to iterate over.
+    //!
+    void setAxis(int32_t axis) noexcept
+    {
+        mImpl->setAxis(axis);
+    }
+
+    //!
+    //! \brief Get axis being iterated over.
+    //!
+    int32_t getAxis() const noexcept
+    {
+        return mImpl->getAxis();
+    }
+
+    //!
+    //! \brief Set iteration order to be reverse.
+    //!
+    //! For reverse=false, the layer is equivalent to addGather(tensor, I, 0) where I is a
+    //! scalar tensor containing the loop iteration number.
+    //! For reverse=true, the layer is equivalent to addGather(tensor, M-1-I, 0) where M is the trip count
+    //! computed from TripLimits of kind kCOUNT.
+    //! The default is reverse=false.
+    //!
+    void setReverse(bool reverse) noexcept
+    {
+        mImpl->setReverse(reverse);
+    }
+
+    //!
+    //! \brief Check if the iteration order is reverse.
+    //!
+    //! \return True if and only if reversing input.
+    //!
+    bool getReverse() const noexcept
+    {
+        return mImpl->getReverse();
+    }
+
+protected:
+    virtual ~IIteratorLayer() noexcept = default;
+    apiv::VIteratorLayer* mImpl;
+};
+
+//!
+//! \class ILoop
+//!
+//! \brief Helper for creating a recurrent subgraph.
+//!
+//! An ILoop defines a loop within a network. It supports the implementation of recurrences,
+//! which are crucial for iterative computations, such as RNNs for natural language processing and
+//! time-series analysis.
+//!
+//! The subgraph may directly use tensors defined outside of the ILoop.
+class ILoop : public INoCopy
+{
+public:
+    //!
+    //! \brief Create a recurrence layer for this loop with initialValue as its first input.
+    //!
+    //! IRecurrenceLayer requires exactly two inputs.  The 2nd input must be added, via method
+    //! IRecurrenceLayer::setInput(1,...) before an Engine can be built.
+    //!
+    IRecurrenceLayer* addRecurrence(ITensor& initialValue) noexcept
+    {
+        return mImpl->addRecurrence(initialValue);
+    }
+
+    //!
+    //! \brief Add a trip-count limiter, based on the given tensor.
+    //!
+    //! There may be at most one kCOUNT and one kWHILE limiter for a loop.
+    //! When both trip limits exist, the loop exits when the
+    //! count is reached or condition is falsified.
+    //! It is an error to not add at least one trip limiter.
+    //!
+    //! For kCOUNT, the input tensor must be available before the loop starts.
+    //!
+    //! For kWHILE, the input tensor must be the output of a subgraph that contains
+    //! only layers that are not ITripLimitLayer, IIteratorLayer or ILoopOutputLayer.
+    //! Any IRecurrenceLayers in the subgraph must belong to the same loop as the
+    //! ITripLimitLayer.  A trivial example of this rule is that the input to the kWHILE
+    //! is the output of an IRecurrenceLayer for the same loop.
+    //!
+    ITripLimitLayer* addTripLimit(ITensor& tensor, TripLimit limit) noexcept
+    {
+        return mImpl->addTripLimit(tensor, limit);
+    }
+
+    //!
+    //! \brief Return layer that subscripts tensor by loop iteration.
+    //!
+    //! For reverse=false, this is equivalent to addGather(tensor, I, 0) where I is a
+    //! scalar tensor containing the loop iteration number.
+    //! For reverse=true, this is equivalent to addGather(tensor, M-1-I, 0) where M is the trip count
+    //! computed from TripLimits of kind kCOUNT.
+    //!
+    IIteratorLayer* addIterator(ITensor& tensor, int32_t axis = 0, bool reverse = false) noexcept
+    {
+        return mImpl->addIterator(tensor, axis, reverse);
+    }
+
+    //!
+    //! \brief Make an output for this loop, based on the given tensor.
+    //!
+    //! axis is the axis for concatenation (if using outputKind of kCONCATENATE or kREVERSE).
+    //!
+    //! If outputKind is kCONCATENATE or kREVERSE, a second input specifying the
+    //! concatenation dimension must be added via method ILoopOutputLayer::setInput.
+    //!
+    ILoopOutputLayer* addLoopOutput(ITensor& tensor, LoopOutput outputKind, int32_t axis = 0) noexcept
+    {
+        return mImpl->addLoopOutput(tensor, outputKind, axis);
+    }
+
+    //!
+    //! \brief Set the name of the loop.
+    //!
+    //! The name is used in error diagnostics.
+    //! This method copies the name string.
+    //!
+    //! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \see getName()
+    //!
+    void setName(char const* name) noexcept
+    {
+        mImpl->setName(name);
+    }
+
+    //!
+    //! \brief Return the name of the loop.
+    //!
+    //! \see setName()
+    //!
+    char const* getName() const noexcept
+    {
+        return mImpl->getName();
+    }
+
+protected:
+    virtual ~ILoop() noexcept = default;
+    apiv::VLoop* mImpl;
+};
+
+//!
+//! \class ISelectLayer
+//!
+//! \brief Select elements from two data tensors based on a condition tensor.
+//!
+//! The select layer makes elementwise selections from two data tensors based on a condition tensor,
+//! behaving similarly to the `numpy.where` function with three parameters.
+//! The three input tensors must share the same rank. Multidirectional broadcasting is supported.
+//! The output tensor has the dimensions of the inputs AFTER applying the broadcast rule.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class ISelectLayer : public ILayer
+{
+protected:
+    virtual ~ISelectLayer() noexcept = default;
+    apiv::VSelectLayer* mImpl;
+};
+
+//!
+//! \class IAssertionLayer
+//!
+//! \brief An assertion layer in a network
+//!
+//! The layer has a single input and no output. The input must be a boolean shape tensor.
+//! If any element of the input is provably false at build time, the network is rejected.
+//! If any element of the input is false at runtime for the supplied runtime dimensions,
+//! an error occurs, much the same as if any other runtime error (e.g. using IShuffleLayer
+//! to change the volume of a tensor) is handled.
+//!
+//! Asserting equality of input dimensions may help the optimizer.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IAssertionLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the message to print if the assertion fails.
+    //!
+    //! The name is used in error diagnostics.
+    //! This method copies the message string.
+    //!
+    //! \see getMessage()
+    //!
+    void setMessage(char const* message) noexcept
+    {
+        mImpl->setMessage(message);
+    }
+
+    //!
+    //! \brief Return the assertion message.
+    //!
+    //! \see setMessage()
+    //!
+    char const* getMessage() const noexcept
+    {
+        return mImpl->getMessage();
+    }
+
+protected:
+    virtual ~IAssertionLayer() noexcept = default;
+
+    apiv::VAssertionLayer* mImpl;
+};
+
+//!
+//! \enum FillOperation
+//!
+//! \brief Enumerates the tensor fill operations that may performed by a fill layer.
+//!
+//! \see IFillLayer
+//!
+enum class FillOperation : int32_t
+{
+    //! Compute each value via an affine function of its indices.
+    //! For example, suppose the parameters for the IFillLayer are:
+    //!
+    //! * Dimensions = [3,4]
+    //! * Alpha = 1
+    //! * Beta = [100,10]
+    //!
+    //! Element [i,j] of the output is Alpha + Beta[0]*i + Beta[1]*j.
+    //! Thus the output matrix is:
+    //!
+    //!      1  11  21  31
+    //!    101 111 121 131
+    //!    201 211 221 231
+    //!
+    //! A static beta b is implicitly a 1D tensor, i.e. Beta = [b].
+    kLINSPACE = 0,
+
+    //! Randomly draw values from a uniform distribution.
+    kRANDOM_UNIFORM = 1,
+
+    //! Randomly draw values from a normal distribution.
+    kRANDOM_NORMAL = 2
+};
+
+//!
+//! Maximum number of elements in FillOperation enum.
+//!
+//! \see FillOperation
+//!
+template <>
+constexpr inline int32_t EnumMax<FillOperation>() noexcept
+{
+    return 3;
+}
+
+//!
+//! \class IFillLayer
+//!
+//! \brief Generate a tensor according to a specified mode.
+//!
+//! The fill layer generates a tensor with values that are drawn from a random distribution
+//! or an affine function of their indices, as specified by the FillMode.
+//!
+//! When an IFillLayer is initially added to a network, all of its parameters are static.
+//! Each parameter may be changed to dynamic by setting a corresponding input.
+//! A parameter is considered dynamic even if that input is the output of an IConstantLayer.
+//! The inputs for each parameter are:
+//!
+//! - 0: Dimensions
+//! - 1: Alpha
+//! - 2: Beta
+//!
+//! The parameter Dimensions describes the shape of the output. If the Dimensions input is provided,
+//! it must be a 1D tensor of type Int32 or Int64 whose length is computable by constant folding.
+//!
+//! The meanings of Alpha and Beta depend on the mode, as described in IFillLayer::setAlpha(),
+//! IFillLayer::setBeta(), and IFillLayer::setInput(). Parameters Alpha and Beta must both be static
+//! or both be dynamic.
+//!
+//! An IFillLayer can produce a shape tensor if the following restrictions are met:
+//!
+//! * The FillOperation is kLINSPACE.
+//! * The output has type Int32, Int64, or Float.
+//! * The volume of the output is within the volume limit imposed on shape tensors.
+//! * If input 0 exists, the values of input 0 must be computable by constant folding.
+//!
+//! \see FillOperation
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IFillLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the output tensor's dimensions.
+    //!
+    //! \param dimensions The output tensor's dimensions.
+    //!
+    //! If the first input had been used to create this layer, that input is reset to null by this method.
+    //!
+    //! \see getDimensions
+    //
+    void setDimensions(Dims const& dimensions) noexcept
+    {
+        mImpl->setDimensions(dimensions);
+    }
+
+    //!
+    //! \brief Get the output tensor's dimensions.
+    //!
+    //! \return The output tensor's dimensions, or an invalid Dims structure.
+    //!
+    //! If the first input is present and non-null,
+    //! this function returns a Dims with nbDims = -1.
+    //!
+    //! \see setDimensions
+    //!
+    Dims getDimensions() const noexcept
+    {
+        return mImpl->getDimensions();
+    }
+
+    //!
+    //! \brief Set the fill operation for the layer.
+    //!
+    //! \see getOperation(), FillOperation
+    //!
+    void setOperation(FillOperation op) noexcept
+    {
+        mImpl->setOperation(op);
+    }
+
+    //!
+    //! \brief Get the fill operation for the layer.
+    //!
+    //! \see setOperation(), FillOperation
+    //!
+    FillOperation getOperation() const noexcept
+    {
+        return mImpl->getOperation();
+    }
+
+    //!
+    //! \brief Set the alpha parameter.
+    //!
+    //! \param alpha has different meanings for each operator:
+    //!
+    //! Operation          | Usage
+    //! kLINSPACE          | the start value, defaults to 0.0;
+    //! kRANDOM_UNIFORM    | the minimum value, defaults to 0.0;
+    //! kRANDOM_NORMAL     | the mean of the normal distribution, default is 0.0;
+    //!
+    //! If input 1 exists, it is reset to null by this method.
+    //!
+    //! \see getAlpha, setAlphaInt64
+    //
+    void setAlpha(double alpha) noexcept
+    {
+        mImpl->setAlpha(alpha);
+    }
+
+    //!
+    //! \brief Get the value of alpha parameter.
+    //!
+    //! \return A double value of alpha.
+    //!
+    //! If the second input is present and non-null,
+    //! this function returns -1.0.
+    //!
+    //! \see setAlpha
+    //!
+    double getAlpha() const noexcept
+    {
+        return mImpl->getAlpha();
+    }
+
+    //!
+    //! \brief Set the beta parameter.
+    //!
+    //! \param beta has different meanings for each operator:
+    //!
+    //! Operation          | Usage
+    //! kLINSPACE          | the delta value, defaults to 1.0;
+    //! kRANDOM_UNIFORM    | the maximal value, defaults to 1.0;
+    //! kRANDOM_NORMAL     | the standard deviation of the normal distribution, default is 1.0;
+    //!
+    //! If input 2 exists, it is reset to null by this method.
+    //!
+    //! \see getBeta
+    //!
+    void setBeta(double beta) noexcept
+    {
+        mImpl->setBeta(beta);
+    }
+
+    //!
+    //! \brief Get the value of beta parameter.
+    //!
+    //! \return A double value of beta.
+    //!
+    //! If the third input is present and non-null,
+    //! this function returns -1.0.
+    //!
+    //! \see setBeta, setBetaInt64
+    //!
+    double getBeta() const noexcept
+    {
+        return mImpl->getBeta();
+    }
+
+    //!
+    //! \brief Replace an input of this layer with a specific tensor.
+    //!
+    //! \param index the index of the input to set.
+    //! \param tensor the new input tensor
+    //!
+    //! The three inputs correspond to these setters of IFillLayer:
+    //!
+    //! - 0: setDimensions
+    //! - 1: setAlpha
+    //! - 2: setBeta
+    //!
+    //! The following descriptions give more intuitive names for the inputs.
+    //!
+    //! Indices for kLINSPACE are:
+    //!
+    //! - 0: Shape, a 1D shape tensor, specifies the output tensor's dimensions.
+    //! - 1: Start, a scalar, specifies the start value.
+    //! - 2: Delta, a 1D tensor, specifies the delta value for each dimension.
+    //!
+    //! Indices for kRANDOM_UNIFORM are:
+    //!
+    //! - 0: Shape, a 1D shape tensor, specifies the output tensor's dimensions.
+    //! - 1: Minimum, a scalar, specifies the minimum random value.
+    //! - 2: Maximum, a scalar, specifies the maximal random value.
+    //!
+    //! Indices for kRANDOM_NORMAL are:
+    //!
+    //! - 0: Shape, a 1D shape tensor, specifies the output tensor's dimensions.
+    //! - 1: Mean, a scalar, specifies the mean of the normal distribution,.
+    //! - 2: Scale, a scalar, specifies the standard deviation of the normal distribution.
+    //!
+    //! Using the corresponding setter resets the input to null.
+    //!
+    //! If either inputs 1 or 2 is non-null, then both must be non-null and have the same data type.
+    //!
+    //! If this function is called for an index greater or equal to getNbInputs(),
+    //! then afterwards getNbInputs() returns index + 1, and any missing intervening
+    //! inputs are set to null.
+    //!
+    using ILayer::setInput;
+
+    //!
+    //! \brief Set the alpha parameter with int64 datatype.
+    //!
+    //! \param alpha has different meanings for each operator:
+    //!
+    //! Operation          | Usage
+    //! kLINSPACE          | the start value, defaults to 0;
+    //! kRANDOM_UNIFORM    | the minimum value, defaults to 0;
+    //! kRANDOM_NORMAL     | the mean of the normal distribution, default is 0;
+    //!
+    //! If a third input had been used to create this layer, that input is reset to null by this method.
+    //!
+    //! \see getAlphaInt64
+    //
+    void setAlphaInt64(int64_t alpha) noexcept
+    {
+        mImpl->setAlphaInt64(alpha);
+    }
+
+    //!
+    //! \brief Get the value of alpha parameter with int64 datatype.
+    //!
+    //! \return A int64 value of alpha.
+    //!
+    //! If the second input is present and non-null,
+    //! this function returns -1.
+    //!
+    //! \see setAlphaInt64
+    //!
+    int64_t getAlphaInt64() const noexcept
+    {
+        return mImpl->getAlphaInt64();
+    }
+
+    //!
+    //! \brief Set the beta parameter with int64 datatype.
+    //!
+    //! \param beta has different meanings for each operator:
+    //!
+    //! Operation          | Usage
+    //! kLINSPACE          | the delta value, defaults to 1;
+    //! kRANDOM_UNIFORM    | the maximal value, defaults to 1;
+    //! kRANDOM_NORMAL     | the standard deviation of the normal distribution, default is 1;
+    //!
+    //! If a third input had been used to create this layer, that input is reset to null by this method.
+    //!
+    //! \see getBetaInt64
+    //!
+    void setBetaInt64(int64_t beta) noexcept
+    {
+        mImpl->setBetaInt64(beta);
+    }
+
+    //!
+    //! \brief Get the value of beta parameter with int64 datatype.
+    //!
+    //! \return A int64 value of beta.
+    //!
+    //! If the third input is present and non-null,
+    //! this function returns -1.0.
+    //!
+    //! \see setBetaInt64
+    //!
+    int64_t getBetaInt64() const noexcept
+    {
+        return mImpl->getBetaInt64();
+    }
+
+    //!
+    //! \brief Return true if alpha/beta have type int64, false if they have type double.
+    //!
+    bool isAlphaBetaInt64() const noexcept
+    {
+        return mImpl->isAlphaBetaInt64();
+    }
+
+    //!
+    //! \brief Set the fill layer output type.
+    //!
+    //! \param toType The DataType of the output tensor.
+    //!
+    //! Set the output type of the fill layer. Valid values are DataType::kFLOAT, DataType::kINT32,
+    //! and DataType::kINT64.
+    //! If the network is strongly typed, setToType must be used to set the output type, and use of setOutputType
+    //! is an error. Otherwise, types passed to setOutputType and setToType must be the same.
+    //!
+    //! \see NetworkDefinitionCreationFlag::kSTRONGLY_TYPED
+    //!
+    void setToType(DataType toType) noexcept
+    {
+        mImpl->setToType(toType);
+    }
+
+    //!
+    //! \brief Get the fill layer output type.
+    //!
+    //! \return toType parameter set during layer creation or by setToType().
+    //! The return value is the output type of the fill layer.
+    //! The default value is DataType::kFLOAT.
+    //!
+    DataType getToType() const noexcept
+    {
+        return mImpl->getToType();
+    }
+
+protected:
+    virtual ~IFillLayer() noexcept = default;
+    apiv::VFillLayer* mImpl;
+};
+
+//!
+//! \class IQuantizeLayer
+//!
+//! \brief A Quantize layer in a network definition.
+//!
+//! This layer accepts a floating-point data input tensor, and uses the scale and zeroPt inputs to
+//! quantize the data according to:
+//! \p output = clamp(round(\p input / \p scale) + \p zeroPt)
+//!
+//! Rounding type is rounding-to-nearest ties-to-even (https://en.wikipedia.org/wiki/Rounding#Round_half_to_even).
+//! Clamping range according to data type:
+//! - FP8: [-448, 448]
+//! - INT4: [-8, 7]
+//! - INT8: [-128, 127]
+//!
+//! The first input (index 0) is the tensor to be quantized.
+//! The second (index 1) and third (index 2) are the scale and zero point respectively.
+//! \p scale and \p zeroPt should have identical dimensions, and rank lower or equal to 2.
+//!
+//! The \p zeroPt tensor is optional, and if not set, will be assumed to be zero. Its data type must match the
+//! output data type. \p zeroPt must only contain zero-valued coefficients, because only symmetric quantization is
+//! supported.
+//! The \p scale value must be a scalar for per-tensor quantization, a 1D tensor for per-channel quantization, or the
+//! same rank as the input tensor for block quantization. All \p scale coefficients must have strictly positive values.
+//! The size of the 1D \p scale tensor must match the size of the quantization axis. For block quantization, the shape
+//! of \p scale tensor must match the shape of the input, except for the blocking dimension (the last or second to last
+//! dimension). The size of \p zeroPt must match the size of \p scale.
+//!
+//! The subgraph which terminates with the \p zeroPt tensor must be a build-time constant containing only zeros.
+//! The output type, if constrained, must be constrained to DataType::kINT8, DataType::kFP8, DataType::kINT4 or
+//! DataType::kFP4. The input type, if constrained, must be constrained to DataType::kFLOAT, DataType::kHALF, or
+//! DataType::kBF16. The output size is the same as the input size. The quantization axis is in reference to the input
+//! tensor's dimensions.
+//!
+//! IQuantizeLayer supports DataType::kFLOAT, DataType::kHALF, or DataType::kBF16 precision and will default to
+//! DataType::kFLOAT precision during instantiation. For strongly typed networks, if the scale data type is
+//! DataType::kHALF or DataType::kBF16, it must match the input data type. For MXFP8 quantization, the \p scale
+//! data type must be DataType::kE8M0.
+//!
+//! IQuantizeLayer supports DataType::kINT8, DataType::kFP8, DataType::kINT4 or DataType::kFP4 output.
+//!
+//! As an example of the operation of this layer, imagine a 4D NCHW activation input which can be quantized using a
+//! single scale coefficient (referred to as per-tensor quantization):
+//!     For each n in N:
+//!         For each c in C:
+//!             For each h in H:
+//!                 For each w in W:
+//!                     output[n,c,h,w] = clamp(round(\p input[n,c,h,w] / \p scale) + \p zeroPt)
+//!
+//! Per-channel quantization is supported only for weight inputs. Thus, Activations cannot be quantized per-channel.
+//! As an example of per-channel operation, imagine a 4D KCRS weights input and K (dimension 0) as the quantization
+//! axis. The scale is an array of coefficients, and must have the same size as the quantization axis.
+//!     For each k in K:
+//!         For each c in C:
+//!             For each r in R:
+//!                 For each s in S:
+//!                     output[k,c,r,s] = clamp(round(\p input[k,c,r,s] / \p scale[k]) + \p zeroPt[k])
+//!
+//! Block quantization is supported for input types DataType::kFP4, DataType::kFP8 and DataType::kINT4.
+//! As an example of blocked operation, imagine a 2D RS input with R (dimension 0) as the blocking axis and B as the
+//! block size. The scale is a 2D array of coefficients, with dimensions (R//B, S).
+//!     For each r in R:
+//!         For each s in S:
+//!             output[r,s] = clamp(round(\p input[r,s] / \p scale[r//B, s]) + \p zeroPt[r//B, s])
+//!
+//! \note Only symmetric quantization is supported.
+//! \note Currently the only allowed build-time constant \p zeroPt subgraphs are:
+//! 1. Constant -> Quantize
+//! 2. Constant -> Cast -> Quantize
+//!
+//! \note The input tensor for this layer must not be a scalar.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IQuantizeLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Get the quantization axis.
+    //!
+    //! \return axis parameter set by setAxis().
+    //! The return value is the index of the quantization axis in the input tensor's dimensions.
+    //! A value of -1 indicates per-tensor quantization.
+    //! The default value is -1.
+    //!
+    int32_t getAxis() const noexcept
+    {
+        return mImpl->getAxis();
+    }
+    //!
+    //! \brief Set the quantization axis.
+    //!
+    //! Set the index of the quantization axis (with reference to the input tensor's dimensions).
+    //! The axis must be a valid axis if the scale tensor has more than one coefficient.
+    //! The axis value is used only for per-axis (per-channel) quantization.
+    //!
+    void setAxis(int32_t axis) noexcept
+    {
+        mImpl->setAxis(axis);
+    }
+
+    //!
+    //! \brief Set the Quantize layer output type.
+    //!
+    //! \param toType The DataType of the output tensor.
+    //!
+    //! Set the output type of the quantize layer. Valid values are DataType::kINT8, DataType::kFP8, DataType::kINT4 and
+    //! DataType::kFP4. If the network is strongly typed, setToType must be used to set the output type, and use of
+    //! setOutputType is an error. Otherwise, types passed to setOutputType and setToType must be the same.
+    //!
+    //! \see NetworkDefinitionCreationFlag::kSTRONGLY_TYPED
+    //!
+    void setToType(DataType toType) noexcept
+    {
+        mImpl->setToType(toType);
+    }
+
+    //!
+    //! \brief Return the Quantize layer output type.
+    //!
+    //! \return toType parameter set during layer creation or by setToType().
+    //! The return value is the output type of the quantize layer.
+    //! The default value is DataType::kINT8.
+    //!
+    DataType getToType() const noexcept
+    {
+        return mImpl->getToType();
+    }
+
+protected:
+    virtual ~IQuantizeLayer() noexcept = default;
+    apiv::VQuantizeLayer* mImpl;
+};
+
+//!
+//! \class IDequantizeLayer
+//!
+//! \brief A Dequantize layer in a network definition.
+//!
+//! This layer accepts a quantized type input tensor, and uses the configured scale and zeroPt inputs to
+//! dequantize the input according to:
+//! \p output = (\p input - \p zeroPt) * \p scale
+//!
+//! The first input (index 0) is the tensor to be dequantized.
+//! The second (index 1) and third (index 2) are the scale and zero point respectively.
+//! \p scale and \p zeroPt should have identical dimensions, and a rank that is lower or equal to 2.
+//!
+//! The \p zeroPt tensor is optional, and if not set, will be assumed to be zero. Its data type must be identical to
+//! the input's data type. \p zeroPt must only contain zero-valued coefficients, because only symmetric quantization is
+//! supported.
+//! The \p scale value must be a scalar for per-tensor quantization, a 1D tensor for per-channel quantization, or the
+//! same rank as the input tensor for block quantization. All \p scale coefficients must have strictly positive values.
+//! The size of the 1D \p scale tensor must match the size of the quantization axis. For block quantization, the shape
+//! of \p scale tensor must match the shape of the input, except for one dimension (the last or second to last
+//! dimension) in which blocking occurs. The size of \p zeroPt must match the size of \p scale.
+//!
+//! The subgraph which terminates with the \p zeroPt tensor must be a build-time constant containing only zeros.
+//! The output type, if constrained, must be constrained to DataType::kFLOAT, DataType::kHALF, or DataType::kBF16. The
+//! input type, if constrained, must be constrained to DataType::kINT8, DataType::kFP8, DataType::kINT4 or
+//! DataType::kFP4. The output size is the same as the input size. The quantization axis is in reference to the input
+//! tensor's dimensions.
+//!
+//! IDequantizeLayer supports DataType::kINT8 (default), DataType::kFP8, DataType::kINT4 or DataType::kFP4. For strongly
+//! typed networks, \p input data type must be the same as \p zeroPt data type.
+//!
+//! IDequantizeLayer supports DataType::kFLOAT, DataType::kHALF, or DataType::kBF16 output. The output data type must
+//! be configured explicitly using \p setToType.
+//!
+//! As an example of the operation of this layer, imagine a 4D NCHW activation input which can be quantized using a
+//! single scale coefficient (referred to as per-tensor quantization):
+//!     For each n in N:
+//!         For each c in C:
+//!             For each h in H:
+//!                 For each w in W:
+//!                     output[n,c,h,w] = (\p input[n,c,h,w] - \p zeroPt) * \p scale
+//!
+//! Per-channel dequantization is supported only for input that is rooted at an IConstantLayer (i.e. weights).
+//! Activations cannot be quantized per-channel. As an example of per-channel operation, imagine a 4D KCRS weights input
+//! and K (dimension 0) as the quantization axis. The scale is an array of coefficients, which is the same size as the
+//! quantization axis.
+//!     For each k in K:
+//!         For each c in C:
+//!             For each r in R:
+//!                 For each s in S:
+//!                     output[k,c,r,s] = (\p input[k,c,r,s] - \p zeroPt[k]) * \p scale[k]
+//!
+//! Block dequantization is supported for input types DataType::kFP4, DataType::kFP8 and DataType::kINT4.
+//! As an example of blocked operation, imagine a 2D RS input with R (dimension 0) as the blocking axis and B as the
+//! block size. The scale is a 2D array of coefficients, with dimensions (R//B, S).
+//! For each r in R:
+//!     For each s in S:
+//!         output[r,s] = (\p input[r,s] - \p zeroPt[r//B, s]) * \p scale[r//B, s]
+//!
+//! \note Only symmetric quantization is supported.
+//! \note Currently the only allowed build-time constant \p zeroPt subgraphs are:
+//! 1. Constant -> Quantize
+//! 2. Constant -> Cast -> Quantize
+//!
+//! \note The input tensor for this layer must not be a scalar.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IDequantizeLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Get the quantization axis.
+    //!
+    //! \return axis parameter set by setAxis().
+    //! The return value is the index of the quantization axis in the input tensor's dimensions.
+    //! A value of -1 indicates per-tensor quantization.
+    //! The default value is -1.
+    //!
+    int32_t getAxis() const noexcept
+    {
+        return mImpl->getAxis();
+    }
+    //!
+    //! \brief Set the quantization axis.
+    //!
+    //! Set the index of the quantization axis (with reference to the input tensor's dimensions).
+    //! The axis must be a valid axis if the scale tensor has more than one coefficient.
+    //! The axis value will be ignored if the scale tensor has exactly one coefficient (per-tensor quantization).
+    //!
+    void setAxis(int32_t axis) noexcept
+    {
+        mImpl->setAxis(axis);
+    }
+
+    //!
+    //! \brief Set the Dequantize layer output type.
+    //!
+    //! \param toType The DataType of the output tensor.
+    //!
+    //! Set the output type of the dequantize layer. Valid values are DataType::kFLOAT, DataType::kHALF and DataType::kBF16.
+    //! If the network is strongly typed, setToType must be used to set the output type, and use of setOutputType
+    //! is an error. Otherwise, types passed to setOutputType and setToType must be the same.
+    //!
+    //! \see NetworkDefinitionCreationFlag::kSTRONGLY_TYPED
+    //!
+    void setToType(DataType toType) noexcept
+    {
+        mImpl->setToType(toType);
+    }
+
+    //!
+    //! \brief Return the Dequantize layer output type.
+    //!
+    //! \return toType parameter set during layer creation or by setToType().
+    //! The return value is the output type of the quantize layer.
+    //! The default value is DataType::kFLOAT.
+    //!
+    DataType getToType() const noexcept
+    {
+        return mImpl->getToType();
+    }
+
+protected:
+    virtual ~IDequantizeLayer() noexcept = default;
+    apiv::VDequantizeLayer* mImpl;
+};
+
+//!
+//! \class IDynamicQuantizeLayer
+//!
+//! \brief A network layer to perform dynamic quantization.
+//!
+//! This layer accepts a floating-point input tensor and computes the block scale factors needed to
+//! quantize the input's data. It outputs the quantized tensor as its first output and
+//! the scale factors as its second output.
+//!
+//! Use ILayer::setInput to add an input for the double-quantization scale factor.
+//!
+//! \note Only symmetric quantization is supported.
+//! \note The input tensor for this layer must not be a scalar.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the
+//! API and ABI.
+//!
+class IDynamicQuantizeLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Append or replace an input of this layer with a specific tensor
+    //!
+    //! \param index the index of the input to modify.
+    //! \param tensor the new input tensor
+    //!
+    //! Input 0 is the input activation tensor.
+    //! Input 1 is the double-quantization scale factor. This scale is used to quantize the
+    //! dynamically computed high-precision scale factors that are used to quantize the
+    //! activation data. Currently this input must be a positive scalar (a 0D tensor).
+    //!
+    using ILayer::setInput;
+
+    //!
+    //! \brief Set DynamicQuantizeLayer's quantized output type.
+    //!
+    //! \param toType The data type of the quantized output tensor.
+    //!
+    //! Set the type of the dynamic quantization layer's quantized output.If the network is strongly typed, setToType
+    //! must be used to set the output type, and use of setOutputType is an error. Otherwise, types passed to
+    //! setOutputType and setToType must be the same.
+    //! Valid values for \p toType are DataType::kFP4 (NVFP4 quantization) and DataType::kFP8 (MXFP8 quantization).
+    //!
+    //! \see NetworkDefinitionCreationFlag::kSTRONGLY_TYPED
+    //!
+    void setToType(DataType toType) noexcept
+    {
+        mImpl->setToType(toType);
+    }
+
+    //!
+    //! \brief Return DynamicQuantizeLayer's quantized output type.
+    //!
+    //! \return toType parameter set during layer creation or by setToType().
+    //!
+    //! The return value is the type of the quantized output tensor.
+    //! The default value is DataType::kFP4.
+    //!
+    DataType getToType() const noexcept
+    {
+        return mImpl->getToType();
+    }
+
+    //!
+    //! \brief Set the data type of the scale factors used to quantize the data.
+    //!
+    //! \param scaleType The scale factors data type.
+    //!
+    //! Set the scale-factors type.
+    //! Valid values are DataType::kFP8 (NVFP4 quantization) and DataType::kE8M0 (MXFP8 quantization).
+    //!
+    void setScaleType(DataType scaleType) noexcept
+    {
+        mImpl->setScaleType(scaleType);
+    }
+
+    //!
+    //! \brief Return the scale factors data type.
+    //!
+    //! \return scaleType parameter set during layer creation or by setScaleType().
+    //!
+    //! The return value is the type of the scale factors used to quantize the dynamic data.
+    //! The default value is DataType::kFP8.
+    //!
+    DataType getScaleType() const noexcept
+    {
+        return mImpl->getScaleType();
+    }
+
+    //!
+    //! \brief Set the axis along which block quantization occurs.
+    //!
+    //! The axis must be the last dimension or second to last dimension.
+    //! The input's shape along the axis must be constant.
+    //!
+    //! \see getAxis()
+    //!
+    void setAxis(int32_t axis) noexcept
+    {
+        mImpl->setAxis(axis);
+    }
+
+    //!
+    //! \brief Get the axis along which blocking occurs.
+    //!
+    //! \see setAxis()
+    //!
+    int32_t getAxis() const noexcept
+    {
+        return mImpl->getAxis();
+    }
+
+    //!
+    //! \brief Set the size of the quantization block.
+    //!
+    //! Note: The block size must divide the input in the blocked axis without remainder.
+    //! Valid values are 16 (NVFP4 quantization) and 32 (MXFP8 quantization).
+    //!
+    //! \see getBlockSize()
+    //!
+    void setBlockSize(int32_t size) noexcept
+    {
+        mImpl->setBlockSize(size);
+    }
+
+    //!
+    //! \brief Get the size of the quantization block.
+    //!
+    //! \see setBlockSize()
+    //!
+    int32_t getBlockSize() const noexcept
+    {
+        return mImpl->getBlockSize();
+    }
+
+protected:
+    virtual ~IDynamicQuantizeLayer() noexcept = default;
+    apiv::VDynamicQuantizeLayer* mImpl;
+};
+
+//!
+//! \class IEinsumLayer
+//!
+//! \brief An Einsum layer in a network
+//!
+//! This layer implements a summation over the elements of the inputs along dimensions specified by the equation
+//! parameter, based on the Einstein summation convention.
+//! The layer can have one or more inputs of rank >= 0. All the inputs must have type DataType::kFLOAT
+//! or DataType::kHALF, not necessarily the same. There is one output of type DataType::kFLOAT.
+//! The shape of the output tensor is determined by the equation.
+//!
+//! The equation specifies ASCII lower-case letters for each dimension in the inputs in the same order as the
+//! dimensions, separated by comma for each input. The dimensions labeled with the same subscript must match or be
+//! broadcastable. Repeated subscript labels in one input take the diagonal. Repeating a label across multiple inputs
+//! means that those axes will be multiplied. Omitting a label from the output means values along those axes will be
+//! summed. In implicit mode, the indices which appear once in the expression will be part of the output in increasing
+//! alphabetical order. In explicit mode, the output can be controlled by specifying output subscript labels by adding
+//! an arrow ('->') followed by subscripts for the output.
+//! For example, "ij,jk->ik" is equivalent to "ij,jk".
+//! Ellipsis ('...') can be used in place of subscripts to broadcast the dimensions.
+//! See the TensorRT Developer Guide for more details on equation syntax.
+//!
+//! Many common operations can be expressed using the Einsum equation.
+//! For example:
+//! Matrix Transpose:             ij->ji
+//! Sum:                          ij->
+//! Matrix-Matrix Multiplication: ik,kj->ij
+//! Dot Product:                  i,i->
+//! Matrix-Vector Multiplication: ik,k->i
+//! Batch Matrix Multiplication:  ijk,ikl->ijl
+//! Batch Diagonal:               ...ii->...i
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IEinsumLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the equation.
+    //! The equation is a comma-separated list of subscript labels, where each label refers to a
+    //! dimension of the corresponding tensor.
+    //!
+    //! \return true if the equation was syntactically valid and set successfully, false otherwise.
+    //!
+    //! \see setEquation()
+    //!
+    bool setEquation(char const* equation) noexcept
+    {
+        return mImpl->setEquation(equation);
+    }
+
+    //!
+    //! \brief Return the equation.
+    //!
+    //! \see setEquation()
+    //!
+    char const* getEquation() const noexcept
+    {
+        return mImpl->getEquation();
+    }
+
+protected:
+    virtual ~IEinsumLayer() noexcept = default;
+    apiv::VEinsumLayer* mImpl;
+};
+
+//!
+//! \enum ScatterMode
+//!
+//! \brief Control form of IScatterLayer
+//!
+//! \see IScatterLayer
+//!
+enum class ScatterMode : int32_t
+{
+    kELEMENT = 0, //!< Similar to ONNX ScatterElements
+    kND = 1,      //!< Similar to ONNX ScatterND
+};
+
+//!
+//! Maximum number of elements in ScatterMode enum.
+//!
+//! \see ScatterMode
+//!
+template <>
+constexpr inline int32_t EnumMax<ScatterMode>() noexcept
+{
+    return 2;
+}
+
+//!
+//! \class IScatterLayer
+//!
+//! \brief A scatter layer in a network definition. Supports several kinds of scattering.
+//!
+//! The Scatter layer has three input tensors: Data, Indices, and Updates, one output tensor
+//! Output, and a scatter mode. When kELEMENT mode is used an optional axis parameter is available.
+//! * Data is a tensor of rank r >= 1 that stores the values to be duplicated in Output.
+//! * Indices is a tensor of rank q that determines which locations in Output to write new
+//!   values to. Constraints on the rank q depend on the mode:
+//!       ScatterMode::kND: q >= 1
+//!       ScatterMode::kELEMENT: q must be the same as r
+//! * Updates is a tensor of rank s >= 1 that provides the data
+//!   to write to Output specified by its corresponding location in Indices.
+//!   Constraints on the rank of Updates depend on the mode:
+//!       ScatterMode::kND: s = r + q - shape(Indices)[-1] - 1
+//!       Scattermode::kELEMENT: s = q = r
+//! * Output is a tensor with the same dimensions as Data that stores the resulting values of the
+//!   transformation. It must not be a shape tensor.
+//! The types of Data, Update, and Output shall be the same, and Indices shall be of type DataType::kINT32 or
+//! DataType::kINT64.
+//!
+//! The output is computed by copying the data, and then updating elements of it based on indices.
+//! How Indices are interpreted depends upon the ScatterMode.
+//!
+//! ScatterMode::kND
+//!
+//!     The indices are interpreted as a tensor of rank q-1 of indexing tuples.
+//!     The axis parameter is ignored.
+//!
+//!     Given that data dims are {d_0,...,d_{r-1}} and indices dims are {i_0,...,i_{q-1}},
+//!     define k = indices[q-1], it follows that updates dims are {i_0,...,i_{q-2},d_k,...,d_{r-1}}
+//!     The updating can be computed by:
+//!         foreach slice in indices[i_0,...,i_{q-2}]
+//!             output[indices[slice]] = updates[slice]
+//!
+//! ScatterMode::kELEMENT
+//!
+//!     Here "axis" denotes the result of getAxis().
+//!
+//!     For each element X of indices:
+//!         Let J denote a sequence for the subscripts of X
+//!         Let K = sequence J with element [axis] replaced by X
+//!         output[K] = updates[J]
+//!
+//!     For example, if indices has dimensions [N,C,H,W] and axis is 2, then the updates happen as:
+//!
+//!         for n in [0,n)
+//!             for c in [0,n)
+//!                 for h in [0,n)
+//!                     for w in [0,n)
+//!                         output[n,c,indices[n,c,h,w],w] = updates[n,c,h,w]
+//!
+//! Writes to the same output element cause undefined behavior.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IScatterLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the scatter mode.
+    //!
+    //! \see getMode()
+    //!
+    void setMode(ScatterMode mode) noexcept
+    {
+        mImpl->setMode(mode);
+    }
+
+    //!
+    //! \brief Get the scatter mode.
+    //!
+    //! \see setMode()
+    //!
+    ScatterMode getMode() const noexcept
+    {
+        return mImpl->getMode();
+    }
+
+    //!
+    //! \brief Set the axis used by ScatterMode::kELEMENTS.
+    //!
+    //! The axis defaults to 0.
+    //!
+    void setAxis(int32_t axis) noexcept
+    {
+        mImpl->setAxis(axis);
+    }
+
+    //!
+    //! \brief Get the axis.
+    //!
+    int32_t getAxis() const noexcept
+    {
+        return mImpl->getAxis();
+    }
+
+protected:
+    apiv::VScatterLayer* mImpl;
+    virtual ~IScatterLayer() noexcept = default;
+}; // class IScatterLayer
+
+//!
+//! \class IOneHotLayer
+//!
+//! \brief A OneHot layer in a network definition.
+//!
+//! The OneHot layer has three input tensors: Indices, Values, and Depth, one output tensor:
+//! Output, and an axis attribute.
+//! * Indices is an Int32 tensor that determines which locations in Output to set as on_value.
+//! * Values is a two-element (rank=1) tensor that consists of [off_value, on_value]
+//! * Depth is an 0D tensor of type Int32 or Int64, which contains the depth (number of classes) of the one-hot encoding.
+//!   The depth tensor must be a positive build-time constant.
+//! * Output is a tensor with rank = rank(indices)+1, where the added dimension contains the one-hot encoding.
+//!   The data types of Output is equal to the Values data type.
+//! * Axis is a scalar specifying to which dimension of the output one-hot encoding is added.
+//!   Valid range for axis is -rank(indices)-1 <= axis <= rank(indices).
+//!
+//! The output is computed by copying off_values to all output elements, then setting on_value on the indices
+//! specified by the indices tensor.
+//! when axis = 0:
+//! output[indices[i, j, k], i, j, k] = on_value for all i, j, k and off_value otherwise.
+//!
+//! when axis = -1:
+//! output[i, j, k, indices[i, j, k]] = on_value for all i, j, k and off_value otherwise.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IOneHotLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the axis parameter.
+    //!
+    //! \see IOneHotLayer
+    //!
+    void setAxis(int32_t axis) noexcept
+    {
+        mImpl->setAxis(axis);
+    }
+
+    //!
+    //! \brief Get the value of the axis parameter.
+    //!
+    int32_t getAxis() const noexcept
+    {
+        return mImpl->getAxis();
+    }
+
+protected:
+    apiv::VOneHotLayer* mImpl;
+    virtual ~IOneHotLayer() noexcept = default;
+};
+
+//!
+//! \class IGridSampleLayer
+//!
+//! \brief A GridSample layer in a network definition.
+//!
+//! This layer uses an input tensor and a grid tensor to produce an interpolated output tensor.
+//! The input and grid tensors must be shape tensors of rank 4. The only supported SampleMode
+//! values are SampleMode::kCLAMP, SampleMode::kFILL, and SampleMode::kREFLECT.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IGridSampleLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the grid sample interpolation mode.
+    //!
+    //! \see getInterpolationMode()
+    //!
+    void setInterpolationMode(InterpolationMode mode) noexcept
+    {
+        mImpl->setInterpolationMode(mode);
+    }
+
+    //!
+    //! \brief Get the grid sample interpolation mode.
+    //!
+    //! \see setInterpolationMode()
+    //!
+    //! \return The value specified by setInterpolationMode, or InterpolationMode::kLINEAR otherwise.
+    //!
+    InterpolationMode getInterpolationMode() const noexcept
+    {
+        return mImpl->getInterpolationMode();
+    }
+
+    //!
+    //! \brief Set the align corners mode.
+    //!
+    //! \see getAlignCorners()
+    //!
+    void setAlignCorners(bool alignCorners) noexcept
+    {
+        mImpl->setAlignCorners(alignCorners);
+    }
+
+    //!
+    //! \brief Get the align corners mode.
+    //!
+    //! \see setAlignCorners()
+    //!
+    //! \return The value specified by setAlignCorners(), or false otherwise.
+    //!
+    bool getAlignCorners() const noexcept
+    {
+        return mImpl->getAlignCorners();
+    }
+
+    //!
+    //! \brief Set the sample mode.
+    //!
+    //! \see getSampleMode()
+    //!
+    //! \return true if layer's sample mode was set to mode, false otherwise.
+    //!
+    bool setSampleMode(SampleMode mode) noexcept
+    {
+        return mImpl->setSampleMode(mode);
+    }
+
+    //!
+    //! \brief Get the sample mode.
+    //!
+    //! \see setSampleMode()
+    //!
+    //! \returns the value specified by a successful call to setSampleMode(), or SampleMode::kFILL otherwise.
+    //!
+    SampleMode getSampleMode() const noexcept
+    {
+        return mImpl->getSampleMode();
+    }
+
+protected:
+    apiv::VGridSampleLayer* mImpl;
+    virtual ~IGridSampleLayer() noexcept = default;
+}; // class IGridSampleLayer
+
+//!
+//! \enum BoundingBoxFormat
+//!
+//! \brief Representation of bounding box data used for the Boxes input tensor in INMSLayer
+//!
+//! \see INMSLayer
+//!
+enum class BoundingBoxFormat : int32_t
+{
+    //! (x1, y1, x2, y2) where (x1, y1) and (x2, y2) are any pair of diagonal corners
+    kCORNER_PAIRS = 0,
+    //! (x_center, y_center, width, height) where (x_center, y_center) is the center point of the box
+    kCENTER_SIZES = 1
+};
+
+//!
+//! Maximum number of elements in BoundingBoxFormat enum.
+//!
+//! \see BoundingBoxFormat
+//!
+template <>
+constexpr inline int32_t EnumMax<BoundingBoxFormat>() noexcept
+{
+    return 2;
+}
+
+//!
+//! \class INMSLayer
+//!
+//! \brief A non-maximum suppression layer in a network definition.
+//!
+//! The NMS algorithm iterates through a set of bounding boxes and their confidence scores, in decreasing
+//! order of score. Boxes are selected if their score is above a given threshold, and their
+//! intersection-over-union (IoU) with previously selected boxes is less than or equal to a given threshold.
+//! This layer implements NMS per batch item and per class.
+//!
+//! Per batch item, boxes are initially sorted by their scores without regard to class. Only boxes up to a maximum of the TopK limit are considered for selection (per batch).
+//! During selection, only overlapping boxes of the same class are compared, so that overlapping boxes of different classes do not suppress each other.
+//!
+//! For each batch item, the ordering of candidate bounding boxes with the same score is unspecified, but the ordering will be consistent across different runs for the same inputs.
+//!
+//! The layer has the following inputs, in order of input index:
+//!
+//! * Boxes contains the input bounding boxes. It is a linear tensor of type kFLOAT or kHALF. It has
+//!   shape [batchSize, numInputBoundingBoxes, numClasses, 4] if the boxes are per class, or
+//!   [batchSize, numInputBoundingBoxes, 4] if the same boxes are to be used for each class.
+//! * Scores contains the per-box scores. It is a linear tensor of the same type as Boxes. It has shape
+//!   [batchSize, numInputBoundingBoxes, numClasses].
+//! * MaxOutputBoxesPerClass is the maximum number of output boxes per batch item per class.
+//!   It is a scalar (0D tensor) of type kINT32.
+//! * IoUThreshold is the maximum IoU for selected boxes. It is a scalar (0D tensor) of type kFLOAT in the range
+//!   [0.0f, 1.0f]. It is an optional input with default 0.0f.
+//! * ScoreThreshold is the value that a box score must exceed in order to be selected. It is a scalar (0D tensor) of type kFLOAT. It is an optional
+//!   input with default 0.0f.
+//!
+//! The layer has the following outputs, in order of output index:
+//!
+//! * SelectedIndices contains the indices of the selected boxes. It is a linear tensor of type kINT32. It has shape
+//!   [NumOutputBoxes, 3]. Each row contains a (batchIndex, classIndex, boxIndex) tuple.
+//!   The output boxes are sorted in order of increasing batchIndex and then in order of decreasing score within each batchIndex.
+//!   For each batchIndex, the ordering of output boxes with the same score is unspecified.
+//!   If MaxOutputBoxesPerClass is a constant input, the maximum number of output boxes is
+//!   batchSize * numClasses * min(numInputBoundingBoxes, MaxOutputBoxesPerClass).
+//!   Otherwise, the maximum number of output boxes is batchSize * numClasses * numInputBoundingBoxes.
+//!   The maximum number of output boxes is used to determine the upper-bound on allocated memory for this output tensor.
+//! * NumOutputBoxes is the number of output boxes in SelectedIndices. It is a scalar (0D tensor) of type kINT32.
+//!
+//! \warning There is a hardware-dependent limit K such that only the K highest scoring boxes in each batch item
+//! will be considered for selection. The value of K is 2000 for SM 5.3 and 6.2 devices, and 5000 otherwise.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class INMSLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the bounding box format parameter for the layer.
+    //!
+    //! The default value for the bounding box format parameter is kCORNER_PAIRS.
+    //!
+    //! \see BoundingBoxFormat
+    //!
+    //! \see getBoundingBoxFormat()
+    //!
+    void setBoundingBoxFormat(BoundingBoxFormat fmt) noexcept
+    {
+        mImpl->setBoundingBoxFormat(fmt);
+    }
+
+    //!
+    //! \brief Get the bounding box format parameter for the layer.
+    //!
+    //! \see BoundingBoxFormat
+    //!
+    //! \see setBoundingBoxFormat()
+    //!
+    BoundingBoxFormat getBoundingBoxFormat() const noexcept
+    {
+        return mImpl->getBoundingBoxFormat();
+    }
+
+    //!
+    //! \brief Set the TopK box limit parameter for the layer.
+    //!
+    //! The TopK box limit is the maximum number of filtered boxes considered for selection per batch item.
+    //! The default value for the TopK box limit parameter is 2000 for SM 5.3 and 6.2 devices, and 5000 otherwise.
+    //! The TopK box limit must be less than or equal to {2000 for SM 5.3 and 6.2 devices, 5000 otherwise}.
+    //!
+    //! \see getTopKBoxLimit()
+    //!
+    void setTopKBoxLimit(int32_t limit) noexcept
+    {
+        mImpl->setTopKBoxLimit(limit);
+    }
+
+    //!
+    //! \brief Get the TopK box limit parameter for the layer.
+    //!
+    //! \see setTopKBoxLimit()
+    //!
+    int32_t getTopKBoxLimit() const noexcept
+    {
+        return mImpl->getTopKBoxLimit();
+    }
+
+    //!
+    //! \brief Append or replace an input of this layer with a specific tensor
+    //!
+    //! \param index the index of the input to modify.
+    //! \param tensor the new input tensor
+    //!
+    //! The indices are as follows:
+    //!
+    //! - 0: The required Boxes tensor.
+    //! - 1: The required Scores tensor.
+    //! - 2: The required MaxOutputBoxesPerClass tensor.
+    //! - 3: The optional IoUThreshold tensor.
+    //! - 4: The optional ScoreThreshold tensor.
+    //!
+    //! If this function is called for an index greater or equal to getNbInputs(),
+    //! then afterwards getNbInputs() returns index + 1, and any missing intervening
+    //! inputs are set to null. Note that only optional inputs can be missing.
+    //!
+    using ILayer::setInput;
+
+protected:
+    apiv::VNMSLayer* mImpl;
+    virtual ~INMSLayer() noexcept = default;
+}; // class INMSLayer
+
+//!
+//! \class IReverseSequenceLayer
+//!
+//! \brief A ReverseSequence layer in a network definition.
+//!
+//! This layer performs batch-wise reversal, which slices the input tensor along the axis batchAxis. For the
+//! i-th slice, the operation reverses the first N elements, specified by the corresponding i-th value in
+//! sequenceLens, along sequenceAxis and keeps the remaining elements unchanged. The output tensor will have
+//! the same shape as the input tensor.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IReverseSequenceLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the batch axis. Default is 1.
+    //!
+    //! batchAxis should be between zero (inclusive) and the rank of input (exclusive), and different from
+    //! sequenceAxis. Otherwise, ErrorCode::kINVALID_ARGUMENT will be triggered.
+    //!
+    //! \see setBatchAxis()
+    //!
+    void setBatchAxis(int32_t batchAxis) noexcept
+    {
+        mImpl->setBatchAxis(batchAxis);
+    }
+
+    //!
+    //! \brief Return the batch axis. Return 1 if no batch axis was set.
+    //!
+    //! \see getBatchAxis()
+    //!
+    int32_t getBatchAxis() const noexcept
+    {
+        return mImpl->getBatchAxis();
+    }
+
+    //!
+    //! \brief Set the sequence axis. Default is 0.
+    //!
+    //! sequenceAxis should be between zero (inclusive) and the rank of input (exclusive), and different from
+    //! batchAxis. Otherwise, ErrorCode::kINVALID_ARGUMENT will be triggered.
+    //!
+    //! \see setSequenceAxis()
+    //!
+    void setSequenceAxis(int32_t sequenceAxis) noexcept
+    {
+        mImpl->setSequenceAxis(sequenceAxis);
+    }
+
+    //!
+    //! \brief Return the sequence axis. Return 0 if no sequence axis was set.
+    //!
+    //! \see getSequenceAxis()
+    //!
+    int32_t getSequenceAxis() const noexcept
+    {
+        return mImpl->getSequenceAxis();
+    }
+
+protected:
+    apiv::VReverseSequenceLayer* mImpl;
+    virtual ~IReverseSequenceLayer() noexcept = default;
+}; // class IReverseSequenceLayer
+
+//!
+//! \class INormalizationLayer
+//!
+//! \brief A normalization layer in a network definition.
+//!
+//! The normalization layer performs the following operation:
+//!
+//! X - input Tensor
+//! Y - output Tensor
+//! S - scale Tensor
+//! B - bias Tensor
+//!
+//! Y = (X - Mean(X, axes)) / Sqrt(Variance(X) + epsilon) * S + B
+//!
+//! Where Mean(X, axes) is a reduction over a set of axes, and Variance(X) = Mean((X - Mean(X, axes)) ^ 2, axes).
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class INormalizationLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the epsilon value used for the normalization calculation.
+    //!
+    //! The default value of \p eps is 1e-5F.
+    //!
+    //! \param eps The epsilon value used for the normalization calculation.
+    //!
+    void setEpsilon(float eps) noexcept
+    {
+        return mImpl->setEpsilon(eps);
+    }
+
+    //!
+    //! \brief Get the epsilon value used for the normalization calculation.
+    //!
+    //! \return The epsilon value used for the normalization calculation.
+    //!
+    float getEpsilon() const noexcept
+    {
+        return mImpl->getEpsilon();
+    }
+
+    //!
+    //! \brief Set the reduction axes for the normalization calculation.
+    //!
+    //! \param axesMask The axes used for the normalization calculation.
+    //!
+    void setAxes(uint32_t axesMask) noexcept
+    {
+        return mImpl->setAxes(axesMask);
+    }
+
+    //!
+    //! \brief Get the axes value used for the normalization calculation.
+    //!
+    //! \return The axes used for the normalization calculation.
+    //!
+    uint32_t getAxes() const noexcept
+    {
+        return mImpl->getAxes();
+    }
+
+    //!
+    //! \brief Set the number of groups used to split the channels in the normalization calculation.
+    //!
+    //! The input tensor channels are divided into \p nbGroups groups, and normalization is performed per group.
+    //! The channel dimension is considered to be the second dimension in a [N, C, H, W, ...] formatted tensor.
+    //!
+    //! The default \p nbGroups is 1.
+    //!
+    //! \warning It is an error to set \p nbGroups to a value that does not evenly divide into the number of channels
+    //! of the input tensor.
+    //!
+    //! \warning When \p nbGroups is != 1, it is expected that the provided axesMask will have all bits corresponding
+    //! to dimensions after the channel dimension set to 1, with all other bits set to 0.
+    //!
+    //! \param nbGroups The number of groups to split the channels into for the normalization calculation.
+    //!
+    void setNbGroups(int64_t nbGroups) noexcept
+    {
+        return mImpl->setNbGroups(nbGroups);
+    }
+
+    //!
+    //! \brief Get the number of groups used to split the channels for the normalization calculation.
+    //!
+    //! \return The number of groups used to split the channel used for the normalization calculation.
+    //!
+    int64_t getNbGroups() const noexcept
+    {
+        return mImpl->getNbGroups();
+    }
+
+    //!
+    //! \brief Set the compute precision of this layer.
+    //!
+    //! \param type The datatype used for the compute precision of this layer.
+    //!
+    //! The method is used to avoid overflow errors by controlling the normalization computation in
+    //! mixed precision mode. The compute precision defaults to DataType::kFLOAT32.
+    //! To override this default, use this method to set the desired compute precision.
+    //!
+    //! For a weakly typed network:
+    //!
+    //! * Method setOutputType() can still be called to control the output data type.
+    //!
+    //! * Method setPrecision() can still be called. The input data is cast to that precision before
+    //!   being cast to the compute precision.
+    //!
+    //! Strongly typed network rejects calls to this method since the compute precision is typically
+    //! controlled by casting the input tensors to the desired type.
+    //!
+    //! Only DataType::kFLOAT32 and DataType::kHALF are valid types for \p type.
+    //!
+    void setComputePrecision(DataType type) noexcept
+    {
+        return mImpl->setComputePrecision(type);
+    }
+
+    //!
+    //! \brief Get the compute precision of this layer.
+    //!
+    //! \return The datatype used for the compute precision of this layer.
+    //!
+    DataType getComputePrecision() const noexcept
+    {
+        return mImpl->getComputePrecision();
+    }
+
+protected:
+    apiv::VNormalizationLayer* mImpl;
+    virtual ~INormalizationLayer() noexcept = default;
+};
+
+
+//!
+//! \class ISqueezeLayer
+//!
+//! \brief Layer that represents a squeeze operation, removing unit dimensions of the input tensor
+//! on a set of axes.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class ISqueezeLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Append or replace an input of this layer with a specific tensor
+    //!
+    //! \param index The index of the input to modify.
+    //! \param tensor The new input tensor.
+    //!
+    //! For a Squeeze layer, the values 0-1 are valid for index.
+    //! The indices are as follows:
+    //!
+    //! - 0: Input data tensor.
+    //! - 1: The axes to remove. Must resolvable to a constant Int32 or Int64 1D shape tensor.
+    //!
+    using ILayer::setInput;
+
+protected:
+    apiv::VSqueezeLayer* mImpl;
+    virtual ~ISqueezeLayer() noexcept = default;
+};
+
+//!
+//! \class IUnsqueezeLayer
+//!
+//! \brief Layer that represents an unsqueeze operation, which reshapes the input tensor by inserting unit-length dimensions at specified axes of the output.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IUnsqueezeLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Append or replace an input of this layer with a specific tensor
+    //!
+    //! \param index The index of the input to modify.
+    //! \param tensor The new input tensor.
+    //!
+    //! For an Unsqueeze layer, the values 0-1 are valid for index.
+    //! The indices are as follows:
+    //!
+    //! - 0: Input data tensor.
+    //! - 1: The output axes at which unit-length dimensions are inserted. Must resolvable to a constant Int32 or Int64 1D shape tensor.
+    //!
+    using ILayer::setInput;
+
+protected:
+    apiv::VUnsqueezeLayer* mImpl;
+    virtual ~IUnsqueezeLayer() noexcept = default;
+};
+
+//!
+//! \enum CumulativeOperation
+//!
+//! \brief Enumerates the cumulative operations that may be performed by a Cumulative layer.
+//!
+//! The table shows the initial value of each Cumulative operation.
+//!
+//! Operation | kFLOAT, kHALF, kBF16 | kINT32, kINT64 |
+//! --------- | -------------------- | -------------- |
+//! kSUM      | +0.0                 | 0              |
+//!
+enum class CumulativeOperation : int32_t
+{
+    kSUM = 0, //!< Calculate cumulative sum.
+};
+
+namespace impl
+{
+
+//!
+//! \brief Maximum number of elements in CumulativeOperation enum.
+//!
+//! \see CumulativeOperation
+//!
+template <>
+struct EnumMaxImpl<CumulativeOperation>
+{
+    static constexpr int32_t kVALUE = 1;
+};
+
+} // namespace impl
+
+//!
+//! \class ICumulativeLayer
+//!
+//! \brief Layer that represents a cumulative operation across a tensor.
+//!
+//! It computes successive reductions across an axis of a tensor. The output
+//! always has the same shape as the input.
+//!
+//! If the reduction operation is summation, then this is also known as
+//! prefix-sum or cumulative sum.
+//!
+//! The operation has forward vs. reverse variants, and inclusive vs. exclusive variants.
+//!
+//! For example, let the input be a vector x of length n and the output be vector y.
+//! Then y[j] = sum(x[...]) where ... denotes a sequence of indices from this table:
+//!
+//!           | forward   | reverse
+//! ----------|-----------| ---------
+//! inclusive | 0..j      |   j..n-1
+//! exclusive | 0..j-1    | j+1..n-1
+//!
+//! For multidimensional tensors, the reductions apply across a specified axis. For
+//! example, given a 2D input, a forward inclusive cumulative operation across axis 0 generates
+//! cumulative sums within each column.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class ICumulativeLayer : public ILayer
+{
+public:
+    //!
+    //! \brief Set the cumulative operation for the layer.
+    //!
+    //! \param op The reduction operation to be performed
+    //!
+    //! \return Whether \p op is valid and the operation successfully set
+    //!
+    //! \see getOperation(), CumulativeOperation
+    //!
+    bool setOperation(CumulativeOperation op) noexcept
+    {
+        return mImpl->setOperation(op);
+    }
+
+    //!
+    //! \brief Get the cumulative operation for the layer.
+    //!
+    //! \return The reduction operation to be performed
+    //!
+    //! \see setOperation(), CumulativeOperation
+    //!
+    CumulativeOperation getOperation() const noexcept
+    {
+        return mImpl->getOperation();
+    }
+
+    //!
+    //! \brief Set whether it is an exclusive accumulation or inclusive accumulation.
+    //!
+    //! \param exclusive Whether the operation will exclude the element at the current index
+    //!
+    //! \see getExclusive
+    //!
+    void setExclusive(bool exclusive) noexcept
+    {
+        mImpl->setExclusive(exclusive);
+    }
+
+    //!
+    //! \brief Get whether it is exclusive accumulation or inclusive accumulation.
+    //!
+    //! \return Whether the operation will exclude the element at the current index
+    //!
+    //! \see setExclusive
+    //!
+    bool getExclusive() const noexcept
+    {
+        return mImpl->getExclusive();
+    }
+
+    //!
+    //! \brief Specify whether the cumulative operation should be applied backward.
+    //!
+    //! \param reverse Whether the cumulative will run in the reverse direction from the last element
+    //!
+    //! \see getReverse
+    //!
+    void setReverse(bool reverse) noexcept
+    {
+        mImpl->setReverse(reverse);
+    }
+
+    //!
+    //! \brief Get the boolean that specifies whether the cumulative operation should be applied backward.
+    //!
+    //! \return Whether the cumulative will run in the reverse direction from the last element
+    //!
+    //! \see setReverse
+    //!
+    bool getReverse() const noexcept
+    {
+        return mImpl->getReverse();
+    }
+
+protected:
+    apiv::VCumulativeLayer* mImpl;
+    virtual ~ICumulativeLayer() noexcept = default;
+};
+
+//!
+//! \class INetworkDefinition
+//!
+//! \brief A network definition for input to the builder.
+//!
+//! A network definition defines the structure of the network, and combined with a IBuilderConfig, is built
+//! into an engine using an IBuilder. An INetworkDefinition can have all dimensions explicit, full dims mode, in the
+//! network definition. The former mode, i.e. the implicit batch size mode, has been deprecated.
+//!
+//! A network with implicit batch dimensions returns the dimensions of a layer without the implicit dimension,
+//! and instead the batch is specified at execute/enqueue time. If the network has all dimensions specified, then
+//! the first dimension follows elementwise broadcast rules: if it is 1 for some inputs and is some value N for all
+//! other inputs, then the first dimension of each output is N, and the inputs with 1 for the first dimension are
+//! broadcast. Having divergent batch sizes across inputs to a layer is not supported.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class INetworkDefinition : public INoCopy
+{
+public:
+    virtual ~INetworkDefinition() noexcept = default;
+
+    //!
+    //! \brief Add an input tensor to the network.
+    //!
+    //! Each input and output tensor must have a unique name.
+    //!
+    //! For networks with wildcard dimensions, the volume
+    //! is based on the maxima specified by an IOptimizationProfile.Dimensions are normally non-negative integers. The
+    //! exception is that in networks with all explicit dimensions, -1 can be used as a wildcard for a dimension to
+    //! be specified at runtime. Input tensors with such a wildcard must have a corresponding entry in the
+    //! IOptimizationProfiles indicating the permitted extrema, and the input dimensions must be set by
+    //! IExecutionContext::setInputShape. Different IExecutionContext instances can have different dimensions.
+    //! Wildcard dimensions are only supported for EngineCapability::kSTANDARD. They are not
+    //! supported in safety contexts. DLA does not support Wildcard dimensions.
+    //!
+    //! Tensor dimensions are specified independent of format.  For example, if a
+    //! tensor is formatted in "NHWC" or a vectorized format, the dimensions are
+    //! still specified in the order{N, C, H, W}. For 2D images with a channel
+    //! dimension, the last three dimensions are always {C,H,W}. For 3D images
+    //! with a channel dimension, the last four dimensions are always {C,D,H,W}.
+    //!
+    //! \param name The name of the tensor.
+    //! \param type The type of the data held in the tensor.
+    //! \param dimensions The dimensions of the tensor.
+    //!
+    //! \warning It is an error to specify a wildcard value on a dimension that is determined by trained parameters.
+    //!
+    //! \warning If run on DLA with explicit dimensions, only leading dimension can be a wildcard. And provided profile
+    //! must have same minimum, optimum, and maximum dimensions.
+    //!
+    //! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \see ITensor
+    //!
+    //! \return The new tensor or nullptr if there is an error.
+    //!
+    ITensor* addInput(char const* name, DataType type, Dims const& dimensions) noexcept
+    {
+        return mImpl->addInput(name, type, dimensions);
+    }
+
+    //!
+    //! \brief Mark a tensor as a network output.
+    //!
+    //! \param tensor The tensor to mark as an output tensor.
+    //!
+    //! \warning It is an error to mark a network input as an output.
+    //! \warning It is an error to mark a tensor inside an ILoop or an
+    //!          IIfConditional as an output.
+    //!
+    void markOutput(ITensor& tensor) noexcept
+    {
+        mImpl->markOutput(tensor);
+    }
+
+    //!
+    //! \brief Mark a tensor as a debug tensor.
+    //!
+    //! A debug tensor can be optionally emitted at runtime.
+    //! Note that tensor names are required to specify debug
+    //! tensors at runtime.
+    //!
+    //! \param tensor Tensor to be marked as debug
+    //!
+    //! \return True if tensor successfully marked (or was already marked), false otherwise.
+    //!
+    //! \see unmarkDebug(), IExecutionContext::setDebugListener(), ITensor::setName()
+    //!
+    bool markDebug(ITensor& tensor) noexcept
+    {
+        return mImpl->markDebug(tensor);
+    }
+
+    //!
+    //! \brief Unmark a tensor as a debug tensor.
+    //!
+    //! Remove the marking of a tensor as a debug tensor.
+    //!
+    //! \param tensor Tensor to be unmarked as debug.
+    //!
+    //! \return True if tensor successfully unmarked (or was already unmarked), false otherwise.
+    //!
+    //! \see markDebug(), IExecutionContext::setDebugListener()
+    //!
+    bool unmarkDebug(ITensor& tensor) noexcept
+    {
+        return mImpl->unmarkDebug(tensor);
+    }
+
+    //!
+    //! \brief Check if a tensor is marked as debug tensor.
+    //!
+    //! \return true if tensor is marked as debug tensor, false otherwise.
+    //!
+    bool isDebugTensor(ITensor const& tensor) const noexcept
+    {
+        return mImpl->isDebugTensor(tensor);
+    }
+
+    //!
+    //! \brief Mark unfused tensors as debug tensors.
+    //!
+    //! Debug tensors can be optionally emitted at runtime.
+    //! Tensors that are fused by the optimizer will not be emitted.
+    //! Tensors marked this way will not prevent fusion like markDebug() does, thus preserving performance.
+    //!
+    //! \warning Tensors marked this way cannot be detected by isDebugTensor().
+    //! \warning DebugListener can only get internal tensor names instead of the original tensor
+    //!          names in the NetworkDefinition for tensors marked this way. But the names correspond to the
+    //!          names obtained by IEngineInspector.
+    //! \warning There is no guarantee that all unfused tensors are marked.
+    //!
+    //! \return True if tensors were successfully marked (or were already marked), false otherwise.
+    //!
+    //! \see unmarkUnfusedTensorsAsDebugTensors(), markDebug(), IExecutionContext::setDebugListener()
+    //!
+    bool markUnfusedTensorsAsDebugTensors() noexcept
+    {
+        return mImpl->markUnfusedTensorsAsDebugTensors();
+    }
+
+    //!
+    //! \brief Undo the marking of unfused tensors as debug tensors.
+    //!
+    //! This has no effect on tensors marked by markDebug().
+    //!
+    //! \return True if tensor successfully unmarked (or was already unmarked), false otherwise.
+    //!
+    //! \see markUnfusedTensorsAsDebugTensors(), IExecutionContext::setDebugListener()
+    //!
+    bool unmarkUnfusedTensorsAsDebugTensors() noexcept
+    {
+        return mImpl->unmarkUnfusedTensorsAsDebugTensors();
+    }
+
+    //!
+    //! \brief Add an activation layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //! \param type The type of activation function to apply.
+    //!
+    //! Note that the setAlpha() and setBeta() methods must be used on the
+    //! output for activations that require these parameters.
+    //!
+    //! \see IActivationLayer ActivationType
+    //!
+    //! \warning Int32 and Int64 are valid only for activation type kRELU.
+    //!
+    //! \return The new activation layer, or nullptr if it could not be created.
+    //!
+    IActivationLayer* addActivation(ITensor& input, ActivationType type) noexcept
+    {
+        return mImpl->addActivation(input, type);
+    }
+
+    //!
+    //! \brief Add a LRN layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //! \param window The size of the window.
+    //! \param alpha The alpha value for the LRN computation.
+    //! \param beta The beta value for the LRN computation.
+    //! \param k The k value for the LRN computation.
+    //!
+    //! \see ILRNLayer
+    //! \warning Int32 tensors are not valid input tensors.
+    //!
+    //! \return The new LRN layer, or nullptr if it could not be created.
+    //!
+    ILRNLayer* addLRN(ITensor& input, int64_t window, float alpha, float beta, float k) noexcept
+    {
+        return mImpl->addLRN(input, window, alpha, beta, k);
+    }
+
+    //!
+    //! \brief Add a Scale layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //!              This tensor must have at least 4 dimensions.
+    //! \param mode The scaling mode.
+    //! \param shift The shift value.
+    //! \param scale The scale value.
+    //! \param power The power value.
+    //!
+    //! If the weights are available, then the size of weights are dependent on the ScaleMode.
+    //! For ScaleMode::kUNIFORM, the number of weights equals 1.
+    //! For ScaleMode::kCHANNEL, the number of weights equals the channel dimension.
+    //! For ScaleMode::kELEMENTWISE, the number of weights equals the product of the last three dimensions of the input.
+    //!
+    //! \see addScaleNd
+    //! \see IScaleLayer
+    //! \warning Int32 tensors are not valid input tensors.
+    //!
+    //! \return The new Scale layer, or nullptr if it could not be created.
+    //!
+    IScaleLayer* addScale(ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power) noexcept
+    {
+        return mImpl->addScale(input, mode, shift, scale, power);
+    }
+
+    //!
+    //! \brief Add a SoftMax layer to the network.
+    //!
+    //! \see ISoftMaxLayer
+    //! \warning Int32 tensors are not valid input tensors.
+    //!
+    //! \return The new SoftMax layer, or nullptr if it could not be created.
+    //!
+    ISoftMaxLayer* addSoftMax(ITensor& input) noexcept
+    {
+        return mImpl->addSoftMax(input);
+    }
+
+    //!
+    //! \brief Add a concatenation layer to the network.
+    //!
+    //! \param inputs The input tensors to the layer.
+    //! \param nbInputs The number of input tensors.
+    //!
+    //! \see IConcatenationLayer
+    //!
+    //! \return The new concatenation layer, or nullptr if it could not be created.
+    //!
+    //! \warning All tensors must have the same dimensions except along the concatenation axis.
+    //!
+    IConcatenationLayer* addConcatenation(ITensor* const* inputs, int32_t nbInputs) noexcept
+    {
+        return mImpl->addConcatenation(inputs, nbInputs);
+    }
+
+    //!
+    //! \brief Add an elementwise layer to the network.
+    //!
+    //! \param input1 The first input tensor to the layer.
+    //! \param input2 The second input tensor to the layer.
+    //! \param op The binary operation that the layer applies.
+    //!
+    //! The input tensors must have the same rank and compatible type.
+    //! Two types are compatible if they are the same type or are both in the set {kFLOAT, kHALF}.
+    //! For each dimension, their lengths must match, or one of them must be one.
+    //! In the latter case, the tensor is broadcast along that axis.
+    //!
+    //! The output tensor has the same rank as the inputs.
+    //! For each dimension, its length is the maximum of the lengths of the
+    //! corresponding input dimension.
+    //!
+    //! The inputs are shape tensors if the output is a shape tensor.
+    //!
+    //! \see IElementWiseLayer
+    //!
+    //! \return The new elementwise layer, or nullptr if it could not be created.
+    //!
+    IElementWiseLayer* addElementWise(ITensor& input1, ITensor& input2, ElementWiseOperation op) noexcept
+    {
+        return mImpl->addElementWise(input1, input2, op);
+    }
+
+    //!
+    //! \brief Add a unary layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //! \param operation The operation to apply.
+    //!
+    //! \see IUnaryLayer
+    //!
+    //! Generally the input must have a floating-point type (or kINT8 as a quantized float),
+    //! except for the following operations:
+    //! * kSIGN accepts a floating-point or Int32 tensor.
+    //! * kNOT requires a Bool tensor.
+    //!
+    //! The input is a shape tensor if the output is a shape tensor.
+    //!
+    //! \return The new unary layer, or nullptr if it could not be created
+    //!
+    IUnaryLayer* addUnary(ITensor& input, UnaryOperation operation) noexcept
+    {
+        return mImpl->addUnary(input, operation);
+    }
+
+    //!
+    //! \brief Add a shuffle layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //!
+    //! \see IShuffleLayer
+    //!
+    //! \return The new shuffle layer, or nullptr if it could not be created.
+    //!
+    IShuffleLayer* addShuffle(ITensor& input) noexcept
+    {
+        return mImpl->addShuffle(input);
+    }
+
+    //!
+    //! \brief Add a OneHot layer to the network.
+    //!
+    //! \param indices - tensor containing indices where on_value should be set.
+    //! \param values - a 2-element tensor, consisting of [off_value, on_value].
+    //! \param depth - a shape tensor containing the width of the added one-hot dimension.
+    //! \param axis - the axis to add the one-hot encoding to.
+    //!
+    //! \see IOneHotLayer
+    //!
+    //! \return The new OneHot layer, or nullptr if it could not be created.
+    //!
+    IOneHotLayer* addOneHot(ITensor& indices, ITensor& values, ITensor& depth, int32_t axis) noexcept
+    {
+        return mImpl->addOneHot(indices, values, depth, axis);
+    }
+
+    //!
+    //! \brief Get the number of layers in the network.
+    //!
+    //! \return The number of layers in the network.
+    //!
+    //! \see getLayer()
+    //!
+    int32_t getNbLayers() const noexcept
+    {
+        return mImpl->getNbLayers();
+    }
+
+    //!
+    //! \brief Get the layer specified by the given index.
+    //!
+    //! \param index The index of the layer.
+    //!
+    //! \return The layer, or nullptr if the index is out of range.
+    //!
+    //! \see getNbLayers()
+    //!
+    ILayer* getLayer(int32_t index) const noexcept
+    {
+        return mImpl->getLayer(index);
+    }
+
+    //!
+    //! \brief Get the number of inputs in the network.
+    //!
+    //! \return The number of inputs in the network.
+    //!
+    //! \see getInput()
+    //!
+    int32_t getNbInputs() const noexcept
+    {
+        return mImpl->getNbInputs();
+    }
+
+    //!
+    //! \brief Get the input tensor specified by the given index.
+    //!
+    //! \param index The index of the input tensor.
+    //!
+    //! \return The input tensor, or nullptr if the index is out of range.
+    //!
+    //! \note adding inputs invalidates indexing here
+    //!
+    //! \see getNbInputs()
+    //!
+    ITensor* getInput(int32_t index) const noexcept
+    {
+        return mImpl->getInput(index);
+    }
+
+    //!
+    //! \brief Get the number of outputs in the network.
+    //!
+    //! The outputs include those marked by markOutput or markOutputForShapes.
+    //!
+    //! \return The number of outputs in the network.
+    //!
+    //! \see getOutput()
+    //!
+    int32_t getNbOutputs() const noexcept
+    {
+        return mImpl->getNbOutputs();
+    }
+
+    //!
+    //! \brief Get the output tensor specified by the given index.
+    //!
+    //! \param index The index of the output tensor.
+    //!
+    //! \return The output tensor, or nullptr if the index is out of range.
+    //!
+    //! \note adding inputs invalidates indexing here
+    //!
+    //! \see getNbOutputs()
+    //!
+    ITensor* getOutput(int32_t index) const noexcept
+    {
+        return mImpl->getOutput(index);
+    }
+
+    //!
+    //! \brief Add a reduce layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //! \param operation The reduction operation to perform.
+    //! \param reduceAxes The reduction dimensions.
+    //!        The bit in position i of bitmask reduceAxes corresponds to explicit dimension i if result.
+    //!        E.g., the least significant bit corresponds to the first explicit dimension and the next to least
+    //!        significant bit corresponds to the second explicit dimension.
+    //! \param keepDimensions The boolean that specifies whether or not to keep the reduced dimensions in the
+    //! output of the layer.
+    //!
+    //! The reduce layer works by performing an operation specified by \p operation to reduce the tensor \p input
+    //! across the axes specified by \p reduceAxes.
+    //!
+    //! \see IReduceLayer
+    //!
+    //! \warning If output is an Int32 or Int64 shape tensor, ReduceOperation::kAVG is unsupported.
+    //!
+    //! \return The new reduce layer, or nullptr if it could not be created.
+    //!
+    IReduceLayer* addReduce(
+        ITensor& input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions) noexcept
+    {
+        return mImpl->addReduce(input, operation, reduceAxes, keepDimensions);
+    }
+
+    //!
+    //! \brief Add a TopK layer to the network.
+    //!
+    //! The TopK layer has two outputs of the same dimensions. The first contains data values,
+    //! the second contains index positions for the values. Output values are sorted, largest first
+    //! for operation kMAX and smallest first for operation kMIN.
+    //!
+    //! Currently only values of K up to 3840 are supported.
+    //!
+    //! \param input The input tensor to the layer.
+    //!
+    //! \param op Operation to perform.
+    //!
+    //! \param k The number of elements to keep. For dynamic k, use the setInput() method to pass in k as a tensor
+    //!        instead, which will override the static k value passed here in calculations.
+    //!
+    //! \param reduceAxes The reduction dimensions.
+    //!        The bit in position i of bitmask reduceAxes corresponds to explicit dimension i of the result.
+    //!        E.g., the least significant bit corresponds to the first explicit dimension and the next to least
+    //!        significant bit corresponds to the second explicit dimension.
+    //!
+    //!        Currently reduceAxes must specify exactly one dimension, and it must be one of the last four dimensions.
+    //!
+    //! \see ITopKLayer
+    //!
+    //! \return The new TopK layer, or nullptr if it could not be created.
+    //!
+    ITopKLayer* addTopK(ITensor& input, TopKOperation op, int32_t k, uint32_t reduceAxes) noexcept
+    {
+        return mImpl->addTopK(input, op, k, reduceAxes);
+    }
+
+    //!
+    //! \brief Add gather with mode GatherMode::kDEFAULT and specified axis and nbElementWiseDims=0.
+    //!
+    //! \param data The tensor to gather values from.
+    //! \param indices The tensor to get indices from to populate the output tensor.
+    //! \param axis The axis in the data tensor to gather on.
+    //!
+    //! \see IGatherLayer
+    //!
+    //! \return The new gather layer, or nullptr if it could not be created.
+    //!
+    IGatherLayer* addGather(ITensor& data, ITensor& indices, int32_t axis) noexcept
+    {
+        return mImpl->addGather(data, indices, axis);
+    }
+
+    //!
+    //! \brief Add gather with specified mode, axis=0 and nbElementWiseDims=0.
+    //!
+    //! \param data The tensor to gather values from.
+    //! \param indices The tensor to get indices from to populate the output tensor.
+    //! \param mode The gather mode.
+    //!
+    //! \see IGatherLayer
+    //!
+    //! \return The new gather layer, or nullptr if it could not be created.
+    //!
+    IGatherLayer* addGatherV2(ITensor& data, ITensor& indices, GatherMode mode) noexcept
+    {
+        return mImpl->addGatherV2(data, indices, mode);
+    }
+
+    //!
+    //! \brief Add a RaggedSoftMax layer to the network.
+    //!
+    //! \param input The ZxS input tensor.
+    //! \param bounds The Zx1 bounds tensor.
+    //!
+    //! \see IRaggedSoftMaxLayer
+    //!
+    //! \warning The bounds tensor cannot have the last dimension be the wildcard character.
+    //! \warning Int32 tensors are not valid input tensors.
+    //! \warning The input and bounds tensors should be 3D tensors.
+    //!
+    //! \return The new RaggedSoftMax layer, or nullptr if it could not be created.
+    //!
+    IRaggedSoftMaxLayer* addRaggedSoftMax(ITensor& input, ITensor& bounds) noexcept
+    {
+        return mImpl->addRaggedSoftMax(input, bounds);
+    }
+
+    //!
+    //! \brief Add a MatrixMultiply layer to the network.
+    //!
+    //! \param input0 The first input tensor (commonly A).
+    //! \param op0 The operation to apply to input0.
+    //! \param input1 The second input tensor (commonly B).
+    //! \param op1 The operation to apply to input1.
+    //!
+    //! The inputs are shape tensors if the output is a shape tensor.
+    //!
+    //! \see IMatrixMultiplyLayer
+    //!
+    //! \warning Int32 tensors are not valid input tensors.
+    //!
+    //! \return The new matrix multiply layer, or nullptr if it could not be created.
+    //!
+    IMatrixMultiplyLayer* addMatrixMultiply(
+        ITensor& input0, MatrixOperation op0, ITensor& input1, MatrixOperation op1) noexcept
+    {
+        return mImpl->addMatrixMultiply(input0, op0, input1, op1);
+    }
+
+    //!
+    //! \brief Add a nonzero layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //!
+    //! \see INonZeroLayer
+    //!
+    //! \return The new nonzero layer, or nullptr if it could be created.
+    //!
+    INonZeroLayer* addNonZero(ITensor& input) noexcept
+    {
+        return mImpl->addNonZero(input);
+    }
+
+    //!
+    //! \brief Add a constant layer to the network.
+    //!
+    //! \param dimensions The dimensions of the constant.
+    //! \param weights The constant value, represented as weights.
+    //!
+    //! \see IConstantLayer
+    //!
+    //! \return The new constant layer, or nullptr if it could not be created.
+    //!
+    //! If weights.type is DataType::kINT32, the output is a tensor of 32-bit indices.
+    //! Otherwise the output is a tensor of real values and the output type will be
+    //! follow TensorRT's normal precision rules.
+    //!
+    //! If a wildcard dimension is used, the volume of the runtime dimensions must equal
+    //! the number of weights specified.
+    //!
+    //! \warning DataType::kUINT8 not supported.
+    //!
+    IConstantLayer* addConstant(Dims const& dimensions, Weights weights) noexcept
+    {
+        return mImpl->addConstant(dimensions, weights);
+    }
+
+    //!
+    //! \brief Add an identity layer.
+    //!
+    //! \param input The input tensor to the layer.
+    //!
+    //! \see IIdentityLayer
+    //!
+    //! \return The new identity layer, or nullptr if it could not be created.
+    //!
+    IIdentityLayer* addIdentity(ITensor& input) noexcept
+    {
+        return mImpl->addIdentity(input);
+    }
+
+    //!
+    //! \brief Add a cast layer.
+    //!
+    //! \param input The input tensor to the layer.
+    //! \param toType The DataType of the output tensor
+    //!
+    //! \see ICastLayer
+    //!
+    //! \return The new cast layer, or nullptr if it could not be created.
+    //!
+    ICastLayer* addCast(ITensor& input, DataType toType) noexcept
+    {
+        return mImpl->addCast(input, toType);
+    }
+
+    //!
+    //! \brief remove a tensor from the network definition.
+    //!
+    //! \param tensor the tensor to remove
+    //!
+    //! It is illegal to remove a tensor that is the input or output of a layer.
+    //! if this method is called with such a tensor, a warning will be emitted on the log
+    //! and the call will be ignored. Its intended use is to remove detached tensors after
+    //! e.g. concatenating two networks with Layer::setInput().
+    //!
+    void removeTensor(ITensor& tensor) noexcept
+    {
+        mImpl->removeTensor(tensor);
+    }
+
+    //!
+    //! \brief unmark a tensor as a network output.
+    //!
+    //! \param tensor The tensor to unmark as an output tensor.
+    //!
+    //! see markOutput()
+    //!
+    void unmarkOutput(ITensor& tensor) noexcept
+    {
+        mImpl->unmarkOutput(tensor);
+    }
+
+    //!
+    //! \brief Add a plugin layer to the network using the IPluginV2 interface.
+    //!
+    //! \param inputs The input tensors to the layer.
+    //! \param nbInputs The number of input tensors.
+    //! \param plugin The layer plugin.
+    //!
+    //! \see IPluginV2Layer
+    //!
+    //! \warning Dimension wildcard are only supported with IPluginV2DynamicExt or IPluginV2IOExt plugins.
+    //! \warning Int32 tensors are not valid input tensors.
+    //!
+    //! \return The new plugin layer, or nullptr if it could not be created.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.8. Superseded by addPluginV3.
+    //!
+    TRT_DEPRECATED IPluginV2Layer* addPluginV2(ITensor* const* inputs, int32_t nbInputs, IPluginV2& plugin) noexcept
+    {
+        return mImpl->addPluginV2(inputs, nbInputs, plugin);
+    }
+
+    //!
+    //! \brief Add a plugin layer implementing the IPluginV3 interface to the network.
+    //!
+    //! \param inputs The input tensors to the layer.
+    //! \param nbInputs The number of input tensors.
+    //! \param shapeInputs Shape tensor inputs to the layer.
+    //! \param nbShapeInputs The number of shape tensor inputs.
+    //! \param plugin The layer plugin.
+    //!
+    //! \see IPluginV3Layer
+    //!
+    //! \return The new plugin layer, or nullptr if it could not be created.
+    //!
+    IPluginV3Layer* addPluginV3(ITensor* const* inputs, int32_t nbInputs, ITensor* const* shapeInputs,
+        int32_t nbShapeInputs, IPluginV3& plugin) noexcept
+    {
+        return mImpl->addPluginV3(inputs, nbInputs, shapeInputs, nbShapeInputs, plugin);
+    }
+
+    //!
+    //! \brief Add a slice layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //! \param start The start offset
+    //! \param size The output dimension
+    //! \param stride The slicing stride
+    //!
+    //! Positive, negative, zero stride values, and combinations of them in different dimensions are allowed.
+    //!
+    //! \see ISliceLayer
+    //!
+    //! \return The new slice layer, or nullptr if it could not be created.
+    //!
+    ISliceLayer* addSlice(ITensor& input, Dims const& start, Dims const& size, Dims const& stride) noexcept
+    {
+        return mImpl->addSlice(input, start, size, stride);
+    }
+
+    //!
+    //! \brief Sets the name of the network.
+    //!
+    //! \param name The name to assign to this network.
+    //!
+    //! Set the name of the network so that it can be associated with a built
+    //! engine. The \p name must be a null-terminated C-style string.
+    //! TensorRT makes no use of this string except storing it as part of the engine
+    //! so that it may be retrieved at runtime.
+    //! A name unique to the builder will be generated by default.
+    //!
+    //! This method copies the name string.
+    //!
+    //! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \see INetworkDefinition::getName(), ISafeCudaEngine::getName()
+    //!
+    //! \return none
+    //!
+    void setName(char const* name) noexcept
+    {
+        mImpl->setName(name);
+    }
+
+    //!
+    //! \brief Returns the name associated with the network.
+    //!
+    //! The memory pointed to by getName() is owned by the INetworkDefinition object.
+    //!
+    //! \see INetworkDefinition::setName()
+    //!
+    //! \return A null-terminated C-style string representing the name of the network.
+    //!
+    char const* getName() const noexcept
+    {
+        return mImpl->getName();
+    }
+
+    //!
+    //! \brief Add a shape layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //!
+    //! \see IShapeLayer
+    //!
+    //! \warning addShape is only supported when hasImplicitBatchDimensions is false.
+    //!
+    //! \return The new shape layer, or nullptr if it could not be created.
+    //!
+    IShapeLayer* addShape(ITensor& input) noexcept
+    {
+        return mImpl->addShape(input);
+    }
+
+    //!
+    //! \brief Query whether the network was created with an implicit batch dimension.
+    //!
+    //! \return Always false since TensorRT 10.0 does not support an implicit batch dimension.
+    //!
+    //! \see createNetworkV2
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. Implicit batch is not supported since TensorRT 10.0.
+    //!
+    TRT_DEPRECATED bool hasImplicitBatchDimension() const noexcept
+    {
+        return mImpl->hasImplicitBatchDimension();
+    }
+
+    //!
+    //! \brief Get the network definition creation flags for this network definition object. Defaults to 0.
+    //!
+    //! \return The network definition creation options as a bitmask.
+    //!
+    NetworkDefinitionCreationFlags getFlags() const noexcept
+    {
+        return mImpl->getFlags();
+    }
+
+    //!
+    //! \brief Returns true if the network definition creation flag is set
+    //!
+    //! \see getFlags()
+    //!
+    //! \return True if flag is set, false if unset.
+    //!
+    bool getFlag(NetworkDefinitionCreationFlag networkDefinitionCreationFlag) const noexcept
+    {
+        return mImpl->getFlag(networkDefinitionCreationFlag);
+    }
+
+    //!
+    //! \brief Enable tensor's value to be computed by IExecutionContext::getShapeBinding.
+    //!
+    //! \return True if successful, false if tensor is already marked as an output.
+    //!
+    //! The tensor must be of type DataType::kINT32 and have no more than one dimension.
+    //!
+    //! \warning The tensor must have dimensions that can be determined to be constants at build time.
+    //!
+    //! \warning It is an error to mark a network input as a shape output.
+    //!
+    //!
+    bool markOutputForShapes(ITensor& tensor) noexcept
+    {
+        return mImpl->markOutputForShapes(tensor);
+    }
+
+    //!
+    //! \brief Undo markOutputForShapes.
+    //!
+    //! \warning inputs to addShape cannot contain wildcard dimension values.
+    //!
+    //! \return True if successful, false if tensor is not marked as an output.
+    //!
+    bool unmarkOutputForShapes(ITensor& tensor) noexcept
+    {
+        return mImpl->unmarkOutputForShapes(tensor);
+    }
+
+    //!
+    //! \brief Add a parametric ReLU layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //! \param slope The slope tensor to the layer. This tensor should be unidirectionally broadcastable
+    //!        to the input tensor.
+    //!
+    //! \see IParametricReLULayer
+    //!
+    //! \warning Tensors of type Int32, Int64, Bool, or UInt8 are not allowed as inputs.
+    //!
+    //! \return The new parametric ReLU layer, or nullptr if it could not be created.
+    //!
+    IParametricReLULayer* addParametricReLU(ITensor& input, ITensor& slope) noexcept
+    {
+        return mImpl->addParametricReLU(input, slope);
+    }
+
+    //!
+    //! \brief Add a multi-dimension convolution layer to the network.
+    //!
+    //! \param input The input tensor to the convolution.
+    //! \param nbOutputMaps The number of output feature maps for the convolution.
+    //! \param kernelSize The multi-dimensions of the convolution kernel.
+    //! \param kernelWeights The kernel weights for the convolution.
+    //! \param biasWeights The bias weights for the convolution. Weights{} represents no bias.
+    //!
+    //! \see IConvolutionLayer
+    //!
+    //! \warning It is an error to specify a wildcard value for the 'C' dimension of the input tensor.
+    //! \warning Int32 tensors are not valid input tensors.
+    //! \warning Only 2D or 3D convolution is supported.
+    //!
+    //! \return The new convolution layer, or nullptr if it could not be created.
+    //!
+    IConvolutionLayer* addConvolutionNd(
+        ITensor& input, int64_t nbOutputMaps, Dims const& kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
+    {
+        return mImpl->addConvolutionNd(input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
+    }
+
+    //!
+    //! \brief Add a multi-dimension pooling layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //! \param type The type of pooling to apply.
+    //! \param windowSize The size of the pooling window.
+    //!
+    //! \see IPoolingLayer PoolingType
+    //!
+    //! \warning Int32 tensors are not valid input tensors.
+    //! \warning Only 2D or 3D pooling is supported.
+    //!
+    //! \return The new pooling layer, or nullptr if it could not be created.
+    //!
+    IPoolingLayer* addPoolingNd(ITensor& input, PoolingType type, Dims const& windowSize) noexcept
+    {
+        return mImpl->addPoolingNd(input, type, windowSize);
+    }
+
+    //!
+    //! \brief Add a multi-dimension deconvolution layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //! \param nbOutputMaps The number of output feature maps.
+    //! \param kernelSize The multi-dimensions of the deconvolution kernel.
+    //! \param kernelWeights The kernel weights for the deconvolution.
+    //! \param biasWeights The bias weights for the deconvolution. Weights{} represents no bias.
+    //!
+    //! \see IDeconvolutionLayer
+    //!
+    //! \warning It is an error to specify a wildcard value for the 'C' dimension of the input tensor.
+    //! \warning Int32 tensors are not valid input tensors.
+    //! \warning Only 2D or 3D deconvolution is supported.
+    //
+    //! \return The new deconvolution layer, or nullptr if it could not be created.
+    //!
+    IDeconvolutionLayer* addDeconvolutionNd(
+        ITensor& input, int64_t nbOutputMaps, Dims kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
+    {
+        return mImpl->addDeconvolutionNd(input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
+    }
+
+    //!
+    //! \brief Add a multi-dimension scale layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //! \param mode The scaling mode.
+    //! \param shift The shift value.
+    //! \param scale The scale value.
+    //! \param power The power value.
+    //! \param channelAxis The channel axis.
+    //!
+    //! If the weights are available, then the size of weights are dependent on the ScaleMode.
+    //! For ScaleMode::kUNIFORM, the number of weights equals 1.
+    //! For ScaleMode::kCHANNEL, the number of weights equals the channel dimension.
+    //! For ScaleMode::kELEMENTWISE, the number of weights equals the product of all input dimensions at channelAxis and
+    //! beyond.
+    //!
+    //! For example, if the inputs dimensions are [A,B,C,D,E,F], and channelAxis=2:
+    //! For ScaleMode::kUNIFORM, the number of weights is equal to 1.
+    //! For ScaleMode::kCHANNEL, the number of weights is C.
+    //! For ScaleMode::kELEMENTWISE, the number of weights is C*D*E*F.
+    //!
+    //! channelAxis can also be set explicitly using setChannelAxis().
+    //!
+    //! \see IScaleLayer
+    //! \see setChannelAxis()
+    //!
+    //! \warning Int32 tensors are not valid input tensors.
+    //! \warning Only 2D or 3D scale is supported.
+    //!
+    //! \return The new Scale layer, or nullptr if it could not be created.
+    //!
+    IScaleLayer* addScaleNd(
+        ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power, int32_t channelAxis) noexcept
+    {
+        return mImpl->addScaleNd(input, mode, shift, scale, power, channelAxis);
+    }
+
+    //!
+    //! \brief Add a resize layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //!
+    //! \see IResizeLayer
+    //!
+    //! \warning Int32 tensors are not valid input tensors.
+    //!
+    //! \return The new resize layer, or nullptr if it could not be created.
+    //!
+    IResizeLayer* addResize(ITensor& input) noexcept
+    {
+        return mImpl->addResize(input);
+    }
+
+    //!
+    //! \brief Add a loop to the network.
+    //!
+    //! An ILoop provides a way to specify a recurrent subgraph.
+    //!
+    //! \return Pointer to ILoop that can be used to add loop-boundary layers for the loop.
+    //!
+    //! \see ILoop
+    //!
+    ILoop* addLoop() noexcept
+    {
+        return mImpl->addLoop();
+    }
+
+    //!
+    //! \brief Add an if-then-else to the network.
+    //!
+    //! An IIfConditional provides a way to conditionally execute parts of the network.
+    //!
+    //! \return Pointer to the IIfConditional that can be used to add conditional-boundary layers
+    //!         for the if-then-else.
+    //!
+    //! \see IIfConditional
+    //!
+    IIfConditional* addIfConditional() noexcept
+    {
+        return mImpl->addIfConditional();
+    }
+
+    //!
+    //! \brief Add a select layer to the network.
+    //!
+    //! \param condition The condition tensor to the layer. Must have type DataType::kBOOL.
+    //! \param thenInput The "then" input tensor to the layer.
+    //! \param elseInput The "else" input tensor to the layer.
+    //!
+    //! All three input tensors must have the same rank, and along each axis
+    //! must have the same length or a length of one. If the length is one, the tensor
+    //! is broadcast along that axis. The output tensor has the dimensions of the inputs AFTER
+    //! the broadcast rule is applied. For example, given:
+    //!
+    //!    dimensions of condition:  [1,1,5,9]
+    //!    dimensions of thenInput:  [1,1,5,9]
+    //!    dimensions of elseInput:  [1,3,1,9]
+    //!
+    //! the output dimensions are [1,3,5,9], and the output contents are defined by:
+    //!
+    //!      output[0,i,j,k] = condition[0,0,j,k] ? thenInput[0,0,j,k] : elseInput[0,i,0,k]
+    //!
+    //! The output dimensions are not necessarily the max of the input dimensions if any input
+    //! is an empty tensor. For example, if in the preceding example, 5 is changed to 0:
+    //!
+    //!    dimensions of condition:  [1,1,0,9]
+    //!    dimensions of thenInput:  [1,1,0,9]
+    //!    dimensions of elseInput:  [1,3,1,9]
+    //!
+    //! then the output dimensions are [1,3,0,9].
+    //!
+    //! The inputs are shape tensors if the output is a shape tensor.
+    //!
+    //! \see ISelectLayer
+    //!
+    //! \return The new select layer, or nullptr if it could not be created.
+    ISelectLayer* addSelect(ITensor& condition, ITensor& thenInput, ITensor& elseInput) noexcept
+    {
+        return mImpl->addSelect(condition, thenInput, elseInput);
+    }
+
+    //!
+    //! \brief Add an assertion layer to the network.
+    //!
+    //! \param condition The input tensor to the layer.
+    //! \param message A message to print if the assertion fails.
+    //!
+    //! \see IAssertionLayer
+    //!
+    //! \return The new assertion layer, or nullptr if it could not be created.
+    //!
+    //! The input tensor must be a boolean shape tensor.
+    //!
+    IAssertionLayer* addAssertion(ITensor& condition, char const* message) noexcept
+    {
+        return mImpl->addAssertion(condition, message);
+    }
+
+    //!
+    //! \brief Add a fill layer to the network.
+    //!
+    //! \param dimensions The output tensor dimensions if input 0 is missing.
+    //! \param op The fill operation that the layer applies.
+    //!
+    //! \warning For FillOperation::kLINSPACE, dimensions.nbDims must be 1 for static start/delta. If delta is provided
+    //! as a 1D tensor, the length of delta must match dimensions.nbDims.
+    //!
+    //! This layer is non-deterministic across subsequent calls as the same inputs will produce different
+    //! output tensors if \p op is either FillOperation::kRANDOM_UNIFORM or FillOperation::kRANDOM_NORMAL
+    //! due to random state being shared across calls. The output tensors generated are determinstic when
+    //! starting from the same initial state.
+    //!
+    //! \see IFillLayer
+    //!
+    //! \return The new fill layer, or nullptr if it could not be created.
+    //!
+    //! \deprecated Deprecated in TensorRT 9.0. Superseded by three-argument addFill.
+    //!
+    TRT_DEPRECATED IFillLayer* addFill(Dims const& dimensions, FillOperation op) noexcept
+    {
+        return mImpl->addFill(dimensions, op);
+    }
+
+    //!
+    //! \brief Add a fill layer to the network.
+    //!
+    //! \param dimensions The output tensor dimensions if input 0 is missing.
+    //! \param op The fill operation that the layer applies.
+    //! \param outputType Optional output tensor data type, must be DataType::kFLOAT, DataType::kHALF, DataType::kINT32,
+    //! or DataType::kINT64. This parameter is only used for static alpha/beta. Future calls to set output type using
+    //! setToType or setOutputType must be consistent.
+    //!
+    //! \warning For FillOperation::kLINSPACE, dimensions.nbDims must be 1 for static start/delta. If delta is provided
+    //! as a 1D tensor, the length of delta must match dimensions.nbDims.
+    //!
+    //! This layer is non-deterministic across subsequent calls as the same inputs will produce different
+    //! output tensors if \p op is either FillOperation::kRANDOM_UNIFORM or FillOperation::kRANDOM_NORMAL
+    //! due to random state being shared across calls. The output tensors generated are deterministic when
+    //! starting from the same initial state.
+    //!
+    //! \see IFillLayer
+    //!
+    //! \return The new fill layer, or nullptr if it could not be created.
+    //!
+    IFillLayer* addFill(Dims const& dimensions, FillOperation op, DataType outputType) noexcept
+    {
+        return mImpl->addFillV2(dimensions, op, outputType);
+    }
+
+    //!
+    //! \brief Add a padding layer to the network. Only 2D padding is currently supported.
+    //!
+    //! \param input The input tensor to the layer.
+    //! \param prePadding The padding to apply to the start of the tensor.
+    //! \param postPadding The padding to apply to the end of the tensor.
+    //!
+    //! \see IPaddingLayer
+    //!
+    //! \return The new padding layer, or nullptr if it could not be created.
+    //!
+    IPaddingLayer* addPaddingNd(ITensor& input, Dims const& prePadding, Dims const& postPadding) noexcept
+    {
+        return mImpl->addPaddingNd(input, prePadding, postPadding);
+    }
+
+    //!
+    //! \brief Associate a name with all current uses of the given weights.
+    //!
+    //! The name must be set after the Weights are used in the network.
+    //! Lookup is associative. The name applies to all Weights with matching
+    //! type, value pointer, and count. If Weights with a matching value
+    //! pointer, but different type or count exists in the network, an
+    //! error message is issued, the name is rejected, and return false.
+    //! If the name has already been used for other weights,
+    //! return false. A nullptr causes the weights to become unnamed,
+    //! i.e. clears any previous name.
+    //!
+    //! \param weights The weights to be named.
+    //! \param name The name to associate with the weights.
+    //!
+    //! \return true on success.
+    //!
+    //! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    bool setWeightsName(Weights weights, char const* name) noexcept
+    {
+        return mImpl->setWeightsName(weights, name);
+    }
+
+    //!
+    //! \brief Set the ErrorRecorder for this interface
+    //!
+    //! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
+    //! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
+    //! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
+    //! a recorder has been registered.
+    //!
+    //! If an error recorder is not set, messages will be sent to the global log stream.
+    //!
+    //! \param recorder The error recorder to register with this interface.
+    //
+    //! \see getErrorRecorder()
+    //!
+    void setErrorRecorder(IErrorRecorder* recorder) noexcept
+    {
+        mImpl->setErrorRecorder(recorder);
+    }
+
+    //!
+    //! \brief get the ErrorRecorder assigned to this interface.
+    //!
+    //! Retrieves the assigned error recorder object for the given class.
+    //! A nullptr will be returned if setErrorRecorder has not been called.
+    //!
+    //! \return A pointer to the IErrorRecorder object that has been registered.
+    //!
+    //! \see setErrorRecorder()
+    //!
+    IErrorRecorder* getErrorRecorder() const noexcept
+    {
+        return mImpl->getErrorRecorder();
+    }
+
+    //!
+    //! \brief Add a dequantization layer to the network.
+    //!
+    //! \param input The input tensor to be quantized.
+    //! \param scale A tensor with the scale value.
+    //!
+    //! \see IDequantizeLayer
+    //!
+    //! \p input tensor data type must be DataType::kINT8 or DataType::kFP8.
+    //! \p scale tensor data type must be DataType::kFLOAT. The subgraph which terminates with the \p scale tensor must
+    //! be a build-time constant.
+    //!
+    //! \return The new quantization layer, or nullptr if it could not be created.
+    //!
+    //! \deprecated Deprecated in TensorRT 9.0. Superseded by three-argument addDequantize.
+    //!
+    TRT_DEPRECATED IDequantizeLayer* addDequantize(ITensor& input, ITensor& scale) noexcept
+    {
+        return mImpl->addDequantize(input, scale);
+    }
+
+    //!
+    //! \brief Add a dequantization layer to the network.
+    //!
+    //! \param input The input tensor to be dequantized.
+    //! \param scale A tensor with the scale value.
+    //! \param outputType Output tensor data type.
+    //!
+    //! \see IDequantizeLayer
+    //!
+    //! \p input tensor data type must be DataType::kINT8, DataType::kFP8,  DataType::kINT4 or DataType::kFP4.
+    //! \p scale tensor data type must be one of the following: DataType::kFLOAT (default), DataType::kHALF,
+    //! DataType::kBF16 or DataType::kE8M0 (for MXFP8 quantization).
+    //! \p outputType output tensor data type must be DataType::kFLOAT (default), DataType::kHALF or DataType::kBF16.
+    //! Future calls to set output type using setToType or setOutputType must be consistent. For strongly typed
+    //! networks, if the scale type is DataType::kHALF or DataType::kBF16 the output type must match.
+    //!
+    //! \return The new quantization layer, or nullptr if it could not be created.
+    //!
+    IDequantizeLayer* addDequantize(ITensor& input, ITensor& scale, DataType outputType) noexcept
+    {
+        return mImpl->addDequantizeV2(input, scale, outputType);
+    }
+
+    //!
+    //! \brief Add a Scatter layer to the network with specified mode and axis=0.
+    //!
+    //! \param data The input tensor to be updated with additional values.
+    //! \param indices indices of the elements to be updated.
+    //! \param updates values to be used for updates.
+    //! \param mode scatter mode.
+    //!
+    //! \see IScatterLayer
+    //!
+    //! \p indices tensor data type must be DataType::kINT32.
+    //! \p updates tensor data type must be the same as \p data
+    //!
+    //! \return The new Scatter layer, or nullptr if it could not be created.
+    //!
+    IScatterLayer* addScatter(ITensor& data, ITensor& indices, ITensor& updates, ScatterMode mode) noexcept
+    {
+        return mImpl->addScatter(data, indices, updates, mode);
+    }
+
+    //!
+    //! \brief Add a quantization layer to the network.
+    //!
+    //! \param input The input tensor to be quantized.
+    //! \param scale A tensor with the scale value.
+    //!
+    //! \see IQuantizeLayer
+    //!
+    //! \p input tensor data type must be DataType::kFLOAT or DataType::kHALF.
+    //! \p scale tensor data type must be DataType::kFLOAT. The subgraph which terminates with the \p scale tensor must
+    //! be a build-time constant.
+    //!
+    //! \return The new quantization layer, or nullptr if it could not be created.
+    //!
+    //! \deprecated Deprecated in TensorRT 9.0. Superseded by three-argument addQuantize.
+    //!
+    TRT_DEPRECATED IQuantizeLayer* addQuantize(ITensor& input, ITensor& scale) noexcept
+    {
+        return mImpl->addQuantize(input, scale);
+    }
+
+    //!
+    //! \brief Add a quantization layer to the network.
+    //!
+    //! \param input The input tensor to be quantized.
+    //! \param scale A tensor with the scale value.
+    //! \param outputType Output tensor data type.
+    //!
+    //! \see IQuantizeLayer
+    //!
+    //! \p input tensor data type must be DataType::kFLOAT, DataType::kHALF or DataType::kBF16.
+    //! \p scale tensor data type must be one of the following: DataType::kFLOAT (default), DataType::kHALF,
+    //! DataType::kBF16 or DataType::kE8M0 (for MXFP8 quantization).
+    //! \p outputType output tensor data type must be DataType::kINT8 (default), DataType::kFP8, DataType::kINT4 or
+    //! DataType::kFP4.
+    //! Future calls to set output type using setToType or setOutputType must be consistent. For strongly typed
+    //! networks, if the scale type is DataType::kHALF or DataType::kBF16 the output type must match.
+    //!
+    //! \return The new quantization layer, or nullptr if it could not be created.
+    //!
+    IQuantizeLayer* addQuantize(ITensor& input, ITensor& scale, DataType outputType) noexcept
+    {
+        return mImpl->addQuantizeV2(input, scale, outputType);
+    }
+
+    //!
+    //! \brief Add a dynamic quantization layer to the network.
+    //!
+    //! This layer performs dynamic block quantization of its input tensor and outputs the
+    //! quantized data and the computed block scale-factors.
+    //! The block size is currently limited to 16 and the size of the blocked axis must be divisible by 16.
+    //!
+    //! \param input The input tensor to be quantized. Its data type must be one of DataType::kFLOAT,
+    //! DataType::kHALF, or DataType::kBF16. Currently only 2D and 3D inputs are supported.
+    //! \param axis The axis that is sliced into blocks. The axis must be the last or second to last dimension.
+    //! \param blockSize The number of elements that are quantized using a shared scale factor.
+    //! Valid values are 16 (NVFP4 quantization) and 32 (MXFP8 quantization).
+    //! \param outputType The data type of the quantized output tensor, must be DataType::kFP4 (NVFP4 quantization) or
+    //! DataType::kFP8 (MXFP8 quantization). Future calls to set output type using setToType or setOutputType must be
+    //! consistent.
+    //! \param scaleType The data type of the scale factor used for quantizing the input data, must be DataType::kFP8
+    //! (NVFP4 quantization) or DataType::kE8M0 (MXFP8 quantization).
+    //!
+    //! \return The new dynamic quantization layer, or nullptr if it could not be created.
+    //!
+    //! \see IDynamicQuantizeLayer
+    //!
+    IDynamicQuantizeLayer* addDynamicQuantize(
+        ITensor& input, int32_t axis, int32_t blockSize, DataType outputType, DataType scaleType) noexcept
+    {
+        return mImpl->addDynamicQuantize(input, axis, blockSize, outputType, scaleType);
+    }
+
+    //!
+    //! \brief Add an Einsum layer to the network.
+    //!
+    //! \param inputs The input tensors to the layer.
+    //! \param nbInputs The number of input tensors.
+    //! \param equation The equation of the layer
+    //! \see IEinsumLayer
+    //!
+    //! \return The new Einsum layer, or nullptr if it could not be created.
+    //!
+    IEinsumLayer* addEinsum(ITensor* const* inputs, int32_t nbInputs, char const* equation) noexcept
+    {
+        return mImpl->addEinsum(inputs, nbInputs, equation);
+    }
+
+    //!
+    //! \brief Add a GridSample layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //! \param grid The grid tensor to the layer.
+    //!
+    //! \see IGridSampleLayer
+    //!
+    //! Creates a GridSample layer with a InterpolationMode::kLINEAR, unaligned corners,
+    //! and SampleMode::kFILL for 4d-shape input tensors.
+    //!
+    //! \return The new GridSample layer, or nullptr if it could not be created.
+    //!
+    IGridSampleLayer* addGridSample(ITensor& input, ITensor& grid) noexcept
+    {
+        return mImpl->addGridSample(input, grid);
+    }
+
+    //!
+    //! \brief Add a non-maximum suppression layer to the network.
+    //!
+    //! \param boxes The input boxes tensor to the layer.
+    //!
+    //! \param scores The input scores tensor to the layer.
+    //!
+    //! \param maxOutputBoxesPerClass The input maxOutputBoxesPerClass tensor to the layer.
+    //!
+    //! \see INMSLayer
+    //!
+    //! \return The new NMS layer, or nullptr if it could not be created.
+    //!
+    INMSLayer* addNMS(ITensor& boxes, ITensor& scores, ITensor& maxOutputBoxesPerClass) noexcept
+    {
+        return mImpl->addNMS(boxes, scores, maxOutputBoxesPerClass);
+    }
+
+    //!
+    //! \brief Add a ReverseSequence layer to the network.
+    //!
+    //! \param input The input tensor to the layer. Must have rank >= 2.
+    //!
+    //! \param sequenceLens 1D tensor specifying lengths of sequences to reverse in a batch. The length of the
+    //!        sequenceLens tensor must be equal to the size of the dimension in input tensor specified by batchAxis.
+    //!
+    //! \see IReverseSequenceLayer
+    //!
+    //! \return The new ReverseSequence layer, or nullptr if it could not be created.
+    //!
+    IReverseSequenceLayer* addReverseSequence(ITensor& input, ITensor& sequenceLens) noexcept
+    {
+        return mImpl->addReverseSequence(input, sequenceLens);
+    }
+
+    //!
+    //! \brief Add a normalization layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //! \param scale The scale tensor used to scale the normalized output.
+    //! \param bias The bias tensor used to scale the normalized output.
+    //! \param axesMask The axes on which to perform mean calculations.
+    //!        The bit in position i of bitmask axesMask corresponds to explicit dimension i of the result.
+    //!        E.g., the least significant bit corresponds to the first explicit dimension and the next to least
+    //!        significant bit corresponds to the second explicit dimension.
+    //!
+    //! The normalization layer works by performing normalization of the tensor \p input on the specified \p axesMask.
+    //! The result is then scaled by multiplying with \p scale and adding \p bias.
+    //!
+    //! The shape of \p scale and \p bias are expected the be the same, and must have the same rank and be
+    //! unidirectionally broadcastable to the shape of \p input.
+    //!
+    //! \see INormalizationLayer
+    //!
+    //! \return The new normalization layer, or nullptr if it could not be created.
+    //!
+    INormalizationLayer* addNormalization(ITensor& input, ITensor& scale, ITensor& bias, uint32_t axesMask) noexcept
+    {
+        return mImpl->addNormalization(input, scale, bias, axesMask);
+    }
+
+    //!
+    //! \brief Add a cumulative layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //! \param axis The axis tensor to apply the cumulative operation on. Currently, it must be a build-time constant 0D
+    //! shape tensor and must be in the range [-rank(input), rank(input)-1]. Negative value means counting dimensions
+    //! from the back. \param operation The reduction operation to perform. \param exclusive The boolean that specifies
+    //! whether it is an exclusive cumulative or inclusive cumulative. \param reverse The boolean that specifies whether
+    //! the cumulative operation should be applied backward.
+    //!
+    //! The cumulative layer works by performing the specified cumulative \p operation to the tensor \p input
+    //! on the axis specified by \p axis.
+    //!
+    //! \see ICumulativeLayer
+    //!
+    //! \return The new cumulative layer, or nullptr if it could not be created.
+    //!
+    ICumulativeLayer* addCumulative(ITensor& input, ITensor& axis, CumulativeOperation operation, bool exclusive, bool reverse) noexcept
+    {
+        return mImpl->addCumulative(input, axis, operation, exclusive, reverse);
+    }
+
+    //!
+    //! \brief Return the builder from which this INetworkDefinition was created.
+    //!
+    //! \see IBuilder::createNetworkV2
+    //!
+    //! \return the builder
+    virtual IBuilder& getBuilder() const noexcept
+    {
+        return mImpl->getBuilder();
+    }
+
+    //!
+    //! \brief Mark weights as refittable when the builder flag kREFIT_INDIVIDUAL is set.
+    //!
+    //! \param name The name of the weights.
+    //!
+    //! \return True if the weights were successfully marked as refittable, false if the weights do not exist or cannot
+    //! be refitted.
+    //!
+    bool markWeightsRefittable(char const* name) noexcept
+    {
+        return mImpl->markWeightsRefittable(name);
+    }
+
+    //!
+    //! \brief Unmark weights as refittable when the builder flag kREFIT_INDIVIDUAL is set.
+    //!
+    //! \param name The name of the weights.
+    //!
+    //! \return True if the weights were successfully marked as unrefittable, false if the weights do not exist.
+    //!
+    bool unmarkWeightsRefittable(char const* name) noexcept
+    {
+        return mImpl->unmarkWeightsRefittable(name);
+    }
+
+    //!
+    //! \brief Whether the weight has been marked as refittable.
+    //!
+    //! \param name The name of the weights to check.
+    //!
+    //! \return True if the weights are marked as refittable, false if the weights do not exist or are marked as
+    //! non-refittable.
+    //!
+    bool areWeightsMarkedRefittable(char const* name) const noexcept
+    {
+        return mImpl->areWeightsMarkedRefittable(name);
+    }
+
+    //!
+    //! \brief Add a squeeze layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //! \param axes The axes to remove unit dimensions on.
+    //!
+    //! \see ISqueezeLayer
+    //!
+    //! Axes must be resolvable to a constant Int32 or Int64 1D shape tensor.
+    //! Values in axes must be unique and in the range of [-r, r-1], where r is the rank of the input tensor.
+    //! For each axis value, the corresponding dimension in the input tensor must be one.
+    //!
+    //! \return The new Squeeze layer, or nullptr if it could not be created.
+    //!
+    ISqueezeLayer* addSqueeze(ITensor& input, ITensor& axes) noexcept
+    {
+        return mImpl->addSqueeze(input, axes);
+    }
+
+    //!
+    //! \brief Add an unsqueeze layer to the network.
+    //!
+    //! \param input The input tensor to the layer.
+    //! \param axes The axes to add unit dimensions.
+    //!
+    //! \see IUnsqueezeLayer
+    //!
+    //! Axes must be resolvable to a constant Int32 or Int64 shape tensor.
+    //! Values in axes must be unique and in the range of [-r_final, r_final-1], where r_final
+    //! is the sum of rank(input) and len(axes).
+    //!
+    //! r_final must be less than Dims::MAX_DIMS.
+    //!
+    //! \return The new Unsqueeze layer, or nullptr if it could not be created
+    //!
+    IUnsqueezeLayer* addUnsqueeze(ITensor& input, ITensor& axes) noexcept
+    {
+        return mImpl->addUnsqueeze(input, axes);
+    }
+
+protected:
+    apiv::VNetworkDefinition* mImpl;
+};
+
+//!
+//! \enum CalibrationAlgoType
+//!
+//! \brief Version of calibration algorithm to use.
+//!
+//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
+//!
+enum class CalibrationAlgoType : int32_t
+{
+    kLEGACY_CALIBRATION TRT_DEPRECATED_ENUM = 0,    //!< Legacy calibration
+    kENTROPY_CALIBRATION TRT_DEPRECATED_ENUM = 1,   //!< Legacy entropy calibration
+    kENTROPY_CALIBRATION_2 TRT_DEPRECATED_ENUM = 2, //!< Entropy calibration
+    kMINMAX_CALIBRATION TRT_DEPRECATED_ENUM = 3,    //!< Minmax calibration
+};
+
+//!
+//! Maximum number of elements in CalibrationAlgoType enum.
+//!
+//! \see DataType
+//!
+template <>
+constexpr inline int32_t EnumMax<CalibrationAlgoType>() noexcept
+{
+    return 4;
+}
+
+//!
+//! \class IInt8Calibrator
+//!
+//! \brief Application-implemented interface for calibration.
+//!
+//! Calibration is a step performed by the builder when deciding suitable scale factors for 8-bit inference.
+//!
+//! It must also provide a method for retrieving representative images which the calibration process can use to examine
+//! the distribution of activations. It may optionally implement a method for caching the calibration result for reuse
+//! on subsequent runs.
+//!
+//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
+//!
+class TRT_DEPRECATED IInt8Calibrator : public IVersionedInterface
+{
+public:
+    //!
+    //! \brief Get the batch size used for calibration batches.
+    //!
+    //! \return The batch size.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. Implicit batch support is removed in TensorRT 10.0.
+    //!
+    TRT_DEPRECATED virtual int32_t getBatchSize() const noexcept = 0;
+
+    //!
+    //! \brief Get a batch of input for calibration.
+    //!
+    //! The batch size of the input must match the batch size returned by getBatchSize().
+    //!
+    //! \param bindings An array of pointers to device memory that must be updated to point to device memory
+    //! containing each network input data.
+    //! \param names The names of the network input for each pointer in the binding array.
+    //! \param nbBindings The number of pointers in the bindings array.
+    //!
+    //! \return False if there are no more batches for calibration.
+    //!
+    //! \see getBatchSize()
+    //!
+    virtual bool getBatch(void* bindings[], char const* names[], int32_t nbBindings) noexcept = 0;
+
+    //!
+    //! \brief Load a calibration cache.
+    //!
+    //! Calibration is potentially expensive, so it can be useful to generate the calibration data once, then use it on
+    //! subsequent builds of the network. The cache includes the regression cutoff and quantile values used to generate
+    //! it, and will not be used if these do not batch the settings of the current calibrator. However, the network
+    //! should also be recalibrated if its structure changes, or the input data set changes, and it is the
+    //! responsibility of the application to ensure this.
+    //!
+    //! \param length The length of the cached data, that should be set by the called function. If there is no data,
+    //! this should be zero.
+    //!
+    //! \return A pointer to the cache, or nullptr if there is no data.
+    //!
+    virtual void const* readCalibrationCache(std::size_t& length) noexcept = 0;
+
+    //!
+    //! \brief Save a calibration cache.
+    //!
+    //! \param ptr A pointer to the data to cache.
+    //! \param length The length in bytes of the data to cache.
+    //!
+    //! \see readCalibrationCache()
+    //!
+    virtual void writeCalibrationCache(void const* ptr, std::size_t length) noexcept = 0;
+
+    //!
+    //! \brief Get the algorithm used by this calibrator.
+    //!
+    //! \return The algorithm used by the calibrator.
+    //!
+    virtual CalibrationAlgoType getAlgorithm() noexcept = 0;
+
+    ~IInt8Calibrator() noexcept override = default;
+};
+
+namespace v_1_0
+{
+class TRT_DEPRECATED IInt8EntropyCalibrator : public IInt8Calibrator
+{
+public:
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"IInt8EntropyCalibrator", 1, 0};
+    }
+
+    //!
+    //! Signal that this is the entropy calibrator.
+    //!
+    CalibrationAlgoType getAlgorithm() noexcept override
+    {
+        return CalibrationAlgoType::kENTROPY_CALIBRATION;
+    }
+
+    ~IInt8EntropyCalibrator() noexcept override = default;
+};
+} // namespace v_1_0
+
+//!
+//! \class IInt8EntropyCalibrator
+//!
+//! \brief Entropy calibrator.
+//!
+//! This is the Legacy Entropy calibrator. It is less complicated than the legacy calibrator and
+//! produces better results.
+//!
+//! \note To ensure compatibility of source code with future versions of TensorRT, use IEntropyCalibrator, not
+//!       v_1_0::IEntropyCalibrator
+//!
+//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
+//!
+using IInt8EntropyCalibrator = v_1_0::IInt8EntropyCalibrator;
+
+namespace v_1_0
+{
+class TRT_DEPRECATED IInt8EntropyCalibrator2 : public IInt8Calibrator
+{
+public:
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"IInt8EntropyCalibrator2", 1, 0};
+    }
+
+    //!
+    //! Signal that this is the entropy calibrator 2.
+    //!
+    CalibrationAlgoType getAlgorithm() noexcept override
+    {
+        return CalibrationAlgoType::kENTROPY_CALIBRATION_2;
+    }
+
+    ~IInt8EntropyCalibrator2() noexcept override = default;
+};
+} // namespace v_1_0
+
+//!
+//! \class IInt8EntropyCalibrator2
+//!
+//! \brief Entropy calibrator 2.
+//!
+//! This is the preferred calibrator. This is the required calibrator for DLA, as it supports per
+//! activation tensor scaling.
+//!
+//! \note To ensure compatibility of source code with future versions of TensorRT, use IEntropyCalibrator2, not
+//!        v_1_0::IEntropyCalibrator2
+//!
+//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
+//!
+using IInt8EntropyCalibrator2 = v_1_0::IInt8EntropyCalibrator2;
+
+namespace v_1_0
+{
+class TRT_DEPRECATED IInt8MinMaxCalibrator : public IInt8Calibrator
+{
+public:
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"IInt8MinMaxCalibrator", 1, 0};
+    }
+
+    //!
+    //! Signal that this is the MinMax Calibrator.
+    //!
+    CalibrationAlgoType getAlgorithm() noexcept override
+    {
+        return CalibrationAlgoType::kMINMAX_CALIBRATION;
+    }
+
+    ~IInt8MinMaxCalibrator() noexcept override = default;
+};
+} // namespace v_1_0
+
+//!
+//! \class IInt8MinMaxCalibrator
+//!
+//! \brief MinMax Calibrator.
+//!
+//! It supports per activation tensor scaling.
+//!
+//! \note To ensure compatibility of source code with future versions of TensorRT, use IMinMaxCalibrator>, not
+//!       v_1_0::IMinMaxCalibrator
+//!
+//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
+//!
+using IInt8MinMaxCalibrator = v_1_0::IInt8MinMaxCalibrator;
+
+namespace v_1_0
+{
+class TRT_DEPRECATED IInt8LegacyCalibrator : public IInt8Calibrator
+{
+public:
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"IInt8Calibrator", 1, 0};
+    }
+
+    //!
+    //! Signal that this is the legacy calibrator.
+    //!
+    CalibrationAlgoType getAlgorithm() noexcept override
+    {
+        return CalibrationAlgoType::kLEGACY_CALIBRATION;
+    }
+
+    //!
+    //! \brief The quantile (between 0 and 1) that will be used to select the region maximum when the quantile method
+    //! is in use.
+    //!
+    //! See the user guide for more details on how the quantile is used.
+    //!
+    virtual double getQuantile() const noexcept = 0;
+
+    //!
+    //! \brief The fraction (between 0 and 1) of the maximum used to define the regression cutoff when using regression
+    //! to determine the region maximum.
+    //!
+    //! See the user guide for more details on how the regression cutoff is used
+    //!
+    virtual double getRegressionCutoff() const noexcept = 0;
+
+    //!
+    //! \brief Load a histogram.
+    //!
+    //! Histogram generation is potentially expensive, so it can be useful to generate the histograms once, then use
+    //! them when exploring the space of calibrations. The histograms should be regenerated if the network structure
+    //! changes, or the input data set changes, and it is the responsibility of the application to ensure this.
+    //!
+    //! \param length The length of the cached data, that should be set by the called function. If there is no data,
+    //! this should be zero.
+    //!
+    //! \return A pointer to the cache, or nullptr if there is no data.
+    //!
+    virtual void const* readHistogramCache(std::size_t& length) noexcept = 0;
+
+    //!
+    //! \brief Save a histogram cache.
+    //!
+    //! \param ptr A pointer to the data to cache.
+    //! \param length The length in bytes of the data to cache.
+    //!
+    //! \see readHistogramCache()
+    //!
+    virtual void writeHistogramCache(void const* ptr, std::size_t length) noexcept = 0;
+
+    ~IInt8LegacyCalibrator() noexcept override = default;
+};
+} // namespace v_1_0
+
+//!
+//! \class IInt8LegacyCalibrator
+//!
+//! \brief Legacy calibrator.
+//!
+//! This calibrator requires user parameterization,
+//! and is provided as a fallback option if the other calibrators yield poor results.
+//!
+//! \note To ensure compatibility of source code with future versions of TensorRT, use ILegacyCalibrator, not
+//!       v_1_0::ILegacyCalibrator
+//!
+//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
+//!
+using IInt8LegacyCalibrator = v_1_0::IInt8LegacyCalibrator;
+
+//!
+//! \class IAlgorithmIOInfo
+//!
+//! \brief Carries information about input or output of the algorithm.
+//!        IAlgorithmIOInfo for all the input and output along with IAlgorithmVariant denotes the variation of algorithm
+//!        and can be used to select or reproduce an algorithm using IAlgorithmSelector::selectAlgorithms().
+//! \see IAlgorithmVariant, IAlgorithm, IAlgorithmSelector::selectAlgorithms()
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+//! \deprecated Deprecated in TensorRT 10.8. Please use editable mode in ITimingCache instead.
+//!
+class TRT_DEPRECATED IAlgorithmIOInfo : public INoCopy
+{
+public:
+    //!
+    //! \brief Return DataType of the input/output of algorithm.
+    //!
+    //! \return the data type.
+    //!
+    DataType getDataType() const noexcept
+    {
+        return mImpl->getDataType();
+    }
+
+    //!
+    //! \brief Return strides of the input/output tensor of algorithm.
+    //! For vectorized formats, strides are given in units of vectors.
+    //!
+    //! \return the strides of the tensor.
+    //!
+    Dims getStrides() const noexcept
+    {
+        return mImpl->getStrides();
+    }
+
+    //!
+    //! \brief Return the index of the vectorized dimension or -1 for non-vectorized formats.
+    //!
+    //! \return the index of the vectorized dimension.
+    //!
+    int64_t getVectorizedDim() const noexcept
+    {
+        return mImpl->getVectorizedDim();
+    }
+
+    //!
+    //! \brief Return the number of components per element.
+    //! This is always 1 for non-vectorized formats.
+    //!
+    //! \return the number of components per element.
+    //!
+    int64_t getComponentsPerElement() const noexcept
+    {
+        return mImpl->getComponentsPerElement();
+    }
+
+protected:
+    virtual ~IAlgorithmIOInfo() noexcept = default;
+    apiv::VAlgorithmIOInfo* mImpl;
+};
+
+//!
+//! \class IAlgorithmVariant
+//!
+//! \brief provides a unique 128-bit identifier, which along with the input and output information
+//!        denotes the variation of algorithm and can be used to select or reproduce an algorithm,
+//!        using IAlgorithmSelector::selectAlgorithms()
+//! \see IAlgorithmIOInfo, IAlgorithm, IAlgorithmSelector::selectAlgorithms()
+//! \note A single implementation can have multiple tactics.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+//! \deprecated Deprecated in TensorRT 10.8. Please use editable mode in ITimingCache instead.
+//!
+class TRT_DEPRECATED IAlgorithmVariant : public INoCopy
+{
+public:
+    //!
+    //! \brief Return implementation of the algorithm.
+    //!
+    int64_t getImplementation() const noexcept
+    {
+        return mImpl->getImplementation();
+    }
+
+    //!
+    //! \brief Return tactic of the algorithm.
+    //!
+    int64_t getTactic() const noexcept
+    {
+        return mImpl->getTactic();
+    }
+
+protected:
+    virtual ~IAlgorithmVariant() noexcept = default;
+    apiv::VAlgorithmVariant* mImpl;
+};
+
+//!
+//! \class IAlgorithmContext
+//!
+//! \brief Describes the context and requirements, that could be fulfilled by one or more instances of IAlgorithm.
+//! \see IAlgorithm
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+//! \deprecated Deprecated in TensorRT 10.8. Please use editable mode in ITimingCache instead.
+//!
+class TRT_DEPRECATED IAlgorithmContext : public INoCopy
+{
+public:
+    //!
+    //! \brief Return name of the algorithm node.
+    //!
+    //! This is a unique identifier for the IAlgorithmContext.
+    //!
+    char const* getName() const noexcept
+    {
+        return mImpl->getName();
+    }
+
+    //!
+    //! \brief Get the minimum / optimum / maximum dimensions for input or output tensor.
+    //!
+    //! \param index Index of the input or output of the algorithm. Incremental numbers assigned to indices of inputs
+    //!              and the outputs.
+    //! \param select Which of the minimum, optimum, or maximum dimensions to be queried.
+    //!
+    Dims getDimensions(int32_t index, OptProfileSelector select) const noexcept
+    {
+        return mImpl->getDimensions(index, select);
+    }
+
+    //!
+    //! \brief Return number of inputs of the algorithm.
+    //!
+    int32_t getNbInputs() const noexcept
+    {
+        return mImpl->getNbInputs();
+    }
+
+    //!
+    //! \brief Return number of outputs of the algorithm.
+    //!
+    int32_t getNbOutputs() const noexcept
+    {
+        return mImpl->getNbOutputs();
+    }
+
+protected:
+    virtual ~IAlgorithmContext() noexcept = default;
+    apiv::VAlgorithmContext* mImpl;
+};
+
+//!
+//! \class IAlgorithm
+//!
+//! \brief Describes a variation of execution of a layer.
+//!        An algorithm is represented by IAlgorithmVariant and the IAlgorithmIOInfo for each of its inputs and outputs.
+//!        An algorithm can be selected or reproduced using AlgorithmSelector::selectAlgorithms().
+//!
+//! \see IAlgorithmIOInfo, IAlgorithmVariant, IAlgorithmSelector::selectAlgorithms()
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+//! \deprecated Deprecated in TensorRT 10.8. Please use editable mode in ITimingCache instead.
+//!
+class TRT_DEPRECATED IAlgorithm : public INoCopy
+{
+public:
+    //!
+    //! \brief Returns the algorithm variant.
+    //!
+    IAlgorithmVariant const& getAlgorithmVariant() const noexcept
+    {
+        return mImpl->getAlgorithmVariant();
+    }
+
+    //!
+    //! \brief The time in milliseconds to execute the algorithm.
+    //!
+    float getTimingMSec() const noexcept
+    {
+        return mImpl->getTimingMSec();
+    }
+
+    //!
+    //! \brief The size of the GPU temporary memory in bytes which the algorithm uses at execution time.
+    //!
+    std::size_t getWorkspaceSize() const noexcept
+    {
+        return mImpl->getWorkspaceSize();
+    }
+
+    //!
+    //! \brief Returns the format of an Algorithm input or output. Algorithm inputs are incrementally numbered first,
+    //!        followed by algorithm outputs.
+    //!
+    //! \param index Index of the input or output of the algorithm. Incremental numbers assigned to indices of inputs
+    //!              and the outputs.
+    //!
+    //! \return a pointer to a IAlgorithmIOInfo interface or nullptr if index is out of range.
+    //!
+    IAlgorithmIOInfo const* getAlgorithmIOInfoByIndex(int32_t index) const noexcept
+    {
+        return mImpl->getAlgorithmIOInfoByIndex(index);
+    }
+
+protected:
+    virtual ~IAlgorithm() noexcept = default;
+    apiv::VAlgorithm* mImpl;
+}; // IAlgorithm
+
+namespace v_1_0
+{
+class TRT_DEPRECATED IAlgorithmSelector : public IVersionedInterface
+{
+public:
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"IAlgorithmSelector", 1, 0};
+    }
+    //!
+    //! \brief Select Algorithms for a layer from the given list of algorithm choices.
+    //!
+    //! \return The number of choices selected from [0, nbChoices-1].
+    //! \param context The context for which the algorithm choices are valid.
+    //! \param choices The list of algorithm choices to select for implementation of this layer.
+    //! \param nbChoices Number of algorithm choices.
+    //! \param selection The user writes indices of selected choices in to selection buffer which is of size nbChoices.
+    //!
+    //! \note TensorRT uses its default algorithm selection to choose from the list provided.
+    //!       If return value is 0, TensorRT's default algorithm selection is used unless
+    //!       BuilderFlag::kREJECT_EMPTY_ALGORITHMS is set.
+    //!       The list of choices is valid only for this specific algorithm context.
+    //!
+    virtual int32_t selectAlgorithms(IAlgorithmContext const& context, IAlgorithm const* const* choices,
+        int32_t nbChoices, int32_t* selection) noexcept = 0;
+
+    //!
+    //! \brief Called by TensorRT to report choices it made.
+    //!
+    //! \note For a given optimization profile, this call comes after all calls to selectAlgorithms.
+    //! algoChoices[i] is the choice that TensorRT made for algoContexts[i], for i in [0, nbAlgorithms-1]
+    //!
+    //! \param algoContexts The list of all algorithm contexts.
+    //! \param algoChoices The list of algorithm choices made by TensorRT
+    //! \param nbAlgorithms The size of algoContexts as well as algoChoices.
+    //!
+    virtual void reportAlgorithms(IAlgorithmContext const* const* algoContexts, IAlgorithm const* const* algoChoices,
+        int32_t nbAlgorithms) noexcept = 0;
+
+    virtual ~IAlgorithmSelector() noexcept = default;
+};
+} // namespace v_1_0
+
+//!
+//! \class IAlgorithmSelector
+//!
+//! \brief Interface implemented by application for selecting and reporting algorithms of a layer provided by the
+//!        builder.
+//! \note A layer in context of algorithm selection may be different from ILayer in INetworkDefinition.
+//!       For example, an algorithm might be implementing a conglomeration of multiple ILayers in INetworkDefinition.
+//! \note To ensure compatibility of source code with future versions of TensorRT, use IAlgorithmSelector, not
+//!       v_1_0::IAlgorithmSelector
+//!
+//! \deprecated Deprecated in TensorRT 10.8. Please use editable mode in ITimingCache instead.
+//!
+using IAlgorithmSelector = v_1_0::IAlgorithmSelector;
+
+//!
+//! \brief Represents one or more QuantizationFlag values using binary OR
+//! operations.
+//!
+//! \see IBuilderConfig::getQuantizationFlags(), IBuilderConfig::setQuantizationFlags()
+//!
+using QuantizationFlags = uint32_t;
+
+//!
+//! \enum QuantizationFlag
+//!
+//! \brief List of valid flags for quantizing the network to int8
+//!
+//! \see IBuilderConfig::setQuantizationFlag(), IBuilderConfig::getQuantizationFlag()
+//!
+//! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
+//!
+enum class QuantizationFlag : int32_t
+{
+    //! Run int8 calibration pass before layer fusion. Only valid for IInt8LegacyCalibrator and
+    //! IInt8EntropyCalibrator. The builder always runs the int8 calibration pass before layer fusion for
+    //! IInt8MinMaxCalibrator and IInt8EntropyCalibrator2. Disabled by default.
+    kCALIBRATE_BEFORE_FUSION TRT_DEPRECATED_ENUM = 0
+};
+
+//!
+//! Maximum number of quantization flags in QuantizationFlag enum.
+//!
+//! \see QuantizationFlag
+//!
+template <>
+constexpr inline int32_t EnumMax<QuantizationFlag>() noexcept
+{
+    return 1;
+}
+
+//!
+//! \enum RuntimePlatform
+//!
+//! \brief Describes the intended runtime platform (operating system and CPU architecture) for the execution of the
+//!        TensorRT engine. TensorRT provides support for cross-platform engine compatibility when the target runtime
+//!        platform is different from the build platform.
+//!
+//! \note The cross-platform engine will not be able to run on the host platform it was built on.
+//!
+//! \note When building a cross-platform engine that also requires version forward compatibility,
+//!       kEXCLUDE_LEAN_RUNTIME must be set to exclude the target platform lean runtime.
+//!
+//! \note The cross-platform engine might have performance differences compared to the natively built engine on the
+//!       target platform.
+//!
+//! \see IBuilderConfig::setRuntimePlatform(), IBuilderConfig::getRuntimePlatform()
+//!
+enum class RuntimePlatform : int32_t
+{
+    //! No requirement for cross-platform compatibility. The engine constructed by TensorRT can only run on the
+    //! identical platform it was built on.
+    kSAME_AS_BUILD = 0,
+
+    //! Designates the target platform for engine execution as Windows AMD64 system. Currently this flag can only be
+    //! enabled when building engines on Linux AMD64 platforms.
+    kWINDOWS_AMD64 = 1,
+};
+
+namespace impl
+{
+//!
+//! Maximum number of elements in RuntimePlatform enum.
+//!
+//! \see RuntimePlatform
+//!
+template <>
+struct EnumMaxImpl<RuntimePlatform>
+{
+    static constexpr int32_t kVALUE = 2;
+};
+} // namespace impl
+
+//!
+//! \brief Represents one or more BuilderFlag values using binary OR
+//! operations, e.g., 1U << BuilderFlag::kFP16 | 1U << BuilderFlag::kDEBUG.
+//!
+//! \see IBuilderConfig::setFlags(), IBuilderConfig::getFlags()
+//!
+using BuilderFlags = uint32_t;
+
+//!
+//! \enum BuilderFlag
+//!
+//! \brief List of valid modes that the builder can enable when creating an engine from a network definition.
+//!
+//! \see IBuilderConfig::setFlags(), IBuilderConfig::getFlags()
+//!
+enum class BuilderFlag : int32_t
+{
+    //! Enable FP16 layer selection, with FP32 fallback.
+    //! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
+    kFP16 TRT_DEPRECATED_ENUM = 0,
+
+    //! Enable Int8 layer selection, with FP32 fallback with FP16 fallback if kFP16 also specified.
+    //! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
+    kINT8 TRT_DEPRECATED_ENUM = 1,
+
+    //! Enable debugging of layers via synchronizing after every layer.
+    kDEBUG = 2,
+
+    //! Enable layers marked to execute on GPU if layer cannot execute on DLA.
+    kGPU_FALLBACK = 3,
+
+    //! Enable building a refittable engine.
+    kREFIT = 4,
+
+    //! Disable reuse of timing information across identical layers.
+    kDISABLE_TIMING_CACHE = 5,
+
+    //! Allow (but not require) computations on tensors of type DataType::kFLOAT to use TF32.
+    //! TF32 computes inner products by rounding the inputs to 10-bit mantissas before
+    //! multiplying, but accumulates the sum using 23-bit mantissas. Enabled by default.
+    kTF32 = 6,
+
+    //! Allow the builder to examine weights and use optimized functions when weights have suitable sparsity.
+    kSPARSE_WEIGHTS = 7,
+
+    //! Change the allowed parameters in the EngineCapability::kSTANDARD flow to
+    //! match the restrictions that EngineCapability::kSAFETY check against for DeviceType::kGPU
+    //! and EngineCapability::kDLA_STANDALONE check against the DeviceType::kDLA case. This flag
+    //! is forced to true if EngineCapability::kSAFETY at build time if it is unset.
+    //!
+    //! This flag is only supported in NVIDIA Drive(R) products.
+    kSAFETY_SCOPE = 8,
+
+    //! Require that layers execute in specified precisions. Build fails otherwise.
+    //! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
+    kOBEY_PRECISION_CONSTRAINTS TRT_DEPRECATED_ENUM = 9,
+
+    //! Prefer that layers execute in specified precisions.
+    //! Fall back (with warning) to another precision if build would otherwise fail.
+    //! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
+    kPREFER_PRECISION_CONSTRAINTS TRT_DEPRECATED_ENUM = 10,
+
+    //! Require that no reformats be inserted between a layer and a network I/O tensor
+    //! for which ITensor::setAllowedFormats was called.
+    //! Build fails if a reformat is required for functional correctness.
+    //! \deprecated Deprecated in TensorRT 10.7. Unneeded API.
+    kDIRECT_IO TRT_DEPRECATED_ENUM = 11,
+
+    //! Fail if IAlgorithmSelector::selectAlgorithms returns an empty set of algorithms.
+    //! \deprecated Deprecated in TensorRT 10.10. Unneeded API due to IAlgorithmSelector deprecation.
+    kREJECT_EMPTY_ALGORITHMS TRT_DEPRECATED_ENUM = 12,
+
+    //! Restrict to lean runtime operators to provide version forward compatibility
+    //! for the plan.
+    //!
+    //! This flag is only supported by NVIDIA Volta and later GPUs.
+    //! This flag is not supported in NVIDIA Drive(R) products.
+    kVERSION_COMPATIBLE = 13,
+
+    //! Exclude lean runtime from the plan when version forward compatability is enabled.
+    //! By default, this flag is unset, so the lean runtime will be included in the plan.
+    //!
+    //! If BuilderFlag::kVERSION_COMPATIBLE is not set then the value of this flag will be ignored.
+    kEXCLUDE_LEAN_RUNTIME = 14,
+
+    //! Enable plugins with FP8 input/output.
+    //!
+    //! This flag is not supported when HardwareCompatibilityLevel::kAMPERE_PLUS is enabled.
+    //!
+    //! \see HardwareCompatibilityLevel
+    //! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
+    kFP8 TRT_DEPRECATED_ENUM = 15,
+
+    //! Emit error when a tactic being timed is not present in the timing cache.
+    //! This flag has an effect only when IBuilderConfig has an associated ITimingCache.
+    kERROR_ON_TIMING_CACHE_MISS = 16,
+
+    //! Enable DataType::kBF16 layer selection, with FP32 fallback.
+    //! This flag is only supported by NVIDIA Ampere and later GPUs.
+    //! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
+    kBF16 TRT_DEPRECATED_ENUM = 17,
+
+    //! Disable caching of JIT-compilation results during engine build.
+    //! By default, JIT-compiled code will be serialized as part of the timing cache, which may significantly increase
+    //! the cache size. Setting this flag prevents the code from being serialized. This flag has an effect only when
+    //! BuilderFlag::DISABLE_TIMING_CACHE is not set.
+    kDISABLE_COMPILATION_CACHE = 18,
+
+    //! Strip the refittable weights from the engine plan file.
+    kSTRIP_PLAN = 19,
+
+    //! \deprecated Deprecated in TensorRT 10.0. Superseded by kSTRIP_PLAN.
+    kWEIGHTLESS TRT_DEPRECATED_ENUM = kSTRIP_PLAN,
+
+    //! Create a refittable engine under the assumption that the refit weights will be identical to those provided at
+    //! build time. The resulting engine will have the same performance as a non-refittable one. All refittable weights
+    //! can be refitted through the refit API, but if the refit weights are not identical to the build-time weights,
+    //! behavior is undefined. When used alongside 'kSTRIP_PLAN', this flag will result in a small plan file for which
+    //! weights are later supplied via refitting. This enables use of a single set of weights with different inference
+    //! backends, or with TensorRT plans for multiple GPU architectures.
+    kREFIT_IDENTICAL = 20,
+
+    //!
+    //! \brief Enable weight streaming for the current engine.
+    //!
+    //! Weight streaming from the host enables execution of models that do not fit
+    //! in GPU memory by allowing TensorRT to intelligently stream network weights
+    //! from the CPU DRAM. Please see ICudaEngine::getMinimumWeightStreamingBudget
+    //! for the default memory budget when this flag is enabled.
+    //!
+    //! Enabling this feature changes the behavior of
+    //! IRuntime::deserializeCudaEngine to allocate the entire network's weights
+    //! on the CPU DRAM instead of GPU memory. Then,
+    //! ICudaEngine::createExecutionContext will determine the optimal split of
+    //! weights between the CPU and GPU and place weights accordingly.
+    //!
+    //! Future TensorRT versions may enable this flag by default.
+    //!
+    //! \warning Enabling this flag may marginally increase build time.
+    //!
+    //! \warning Enabling this feature will significantly increase the latency of
+    //!          ICudaEngine::createExecutionContext.
+    //!
+    //! \see IRuntime::deserializeCudaEngine,
+    //!      ICudaEngine::getMinimumWeightStreamingBudget,
+    //!      ICudaEngine::setWeightStreamingBudget
+    //!
+    kWEIGHT_STREAMING = 21,
+
+    //! Enable plugins with INT4 input/output.
+    //! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
+    kINT4 TRT_DEPRECATED_ENUM = 22,
+
+    //! Enable building a refittable engine and provide fine-grained control. This allows
+    //! control over which weights are refittable or not using INetworkDefinition::markWeightsRefittable and
+    //! INetworkDefinition::unmarkWeightsRefittable. By default, all weights are non-refittable when this flag is
+    //! enabled. This flag cannot be used together with kREFIT or kREFIT_IDENTICAL.
+    kREFIT_INDIVIDUAL = 23,
+
+    //!  Disable floating-point optimizations: 0*x => 0, x-x => 0, or x/x => 1. These identities are
+    //!  not true when x is a NaN or Inf, and thus might hide propagation or generation of NaNs. This flag is typically
+    //!  used in combination with kSPARSE_WEIGHTS.
+    //!  There are three valid sparsity configurations.
+    //!  1. Disable all sparsity. Both kSPARSE_WEIGHTS and kSTRICT_NANS are unset
+    //!  2. Enable sparsity only where it does not affect propagation/generation of NaNs. Both kSPARSE_WEIGHTS and
+    //!  kSTRICT_NANS are set
+    //!  3. Enable all sparsity. kSPARSE_WEIGHTS is set and kSTRICT_NANS is unset
+    kSTRICT_NANS = 24,
+
+    //! Enable memory monitor during build time.
+    kMONITOR_MEMORY = 25,
+
+    //! Enable plugins with FP4 input/output.
+    //! \deprecated Deprecated in TensorRT 10.12. Superseded by strong typing.
+    kFP4 TRT_DEPRECATED_ENUM = 26,
+
+    //! Enable editable timing cache.
+    kEDITABLE_TIMING_CACHE = 27,
+
+    //! Enable distributive independence.
+    //! When BuilderFlag::kDISTRIBUTIVE_INDEPENDENCE is set and a layer documents axis i of an output as a distributive
+    //! axis, then the layer behaves exactly as if each evaluation across axis i was done using identical operations.
+    //! The definition of distributive axis is as follows:
+    //! For IMatrixMultiplyLayer:
+    //! All axes that are not one of the vector or matrix dimensions are distributive axes.
+    //! For layers that perform reduction:
+    //! All non-reduction axes are distributive axes.
+    //! For layers that perform einsum:
+    //! Let n be the leftmost reduction axis. The axes to the left of n are distributive axes.
+    kDISTRIBUTIVE_INDEPENDENCE = 28,
+};
+
+//!
+//! Maximum number of builder flags in BuilderFlag enum.
+//!
+//! \see BuilderFlag
+//!
+template <>
+constexpr inline int32_t EnumMax<BuilderFlag>() noexcept
+{
+    return 29;
+}
+
+namespace v_1_0
+{
+//!
+//! \struct TimingCacheKey
+//!
+//! \brief The key to retrieve timing cache entries.
+//!
+//! TimingCacheKey has two types of representation: binary and string. The conversion rule from binary to string is:
+//! 1) Convert each uint8_t element in binary key into two hexadecimal ascii chars, e.g. 0xab -> "ab"
+//! 2) Concat the ascii chars of all elements in sequence. The result should have exact 32 chars
+//! 3) Add prefix "0x" to the string produced in step 2.
+//!
+//! \see ITimingCache::query(), ITimingCache::update()
+//!
+struct TimingCacheKey
+{
+    uint8_t data[16];
+};
+
+//!
+//! \struct Value
+//!
+//! \brief The values in the cache entry.
+//!
+//! \see ITimingCache::query(), ITimingCache::update()
+//!
+struct TimingCacheValue
+{
+    //! Hash of the selected tactic.
+    uint64_t tacticHash;
+    //! Timing of this tactic in milliseconds. Negative numbers and NaN are invalid values.
+    float timingMSec;
+    //! UINT64_MAX represents the invalid tactic hash.
+    static constexpr uint64_t kINVALID_TACTIC_HASH = UINT64_MAX;
+};
+} // namespace v_1_0
+
+//!
+//! \class ITimingCache
+//!
+//! \brief Class to handle tactic timing info collected from builder.
+//!
+//! The timing cache is created or initialized by IBuilderConfig. It can be shared across builder instances
+//! to reduce the builder wallclock time.
+//!
+//! \warning It is a known issue that the same timing cache may not guarantee stable engine build reproducibility
+//!          in all cases.
+//!
+//! \see IBuilderConfig
+//!
+class ITimingCache : public INoCopy
+{
+public:
+    virtual ~ITimingCache() noexcept = default;
+
+    //!
+    //! \brief Serialize a timing cache to IHostMemory object.
+    //!
+    //! This function allows serialization of current timing cache.
+    //!
+    //! \return A pointer to a IHostMemory object that contains a serialized timing cache.
+    //!
+    //! \see IHostMemory
+    //!
+    nvinfer1::IHostMemory* serialize() const noexcept
+    {
+        return mImpl->serialize();
+    }
+
+    //!
+    //! \brief Combine input timing cache into local instance.
+    //!
+    //! This function allows combining entries in the input timing cache to local cache object.
+    //!
+    //! \param inputCache The input timing cache.
+    //! \param ignoreMismatch Whether or not to allow cache verification header mismatch.
+    //!
+    //! \return True if combined successfully, false otherwise.
+    //!
+    //! Append entries in input cache to local cache. Conflicting entries will be skipped
+    //! The input cache must be generated by a TensorRT build of exact same version, otherwise
+    //! combine will be skipped and return false.
+    //! ignoreMismatch must be set to true if combining a timing cache created from a
+    //! different device.
+    //!
+    //! \warning Combining caches generated from devices with different device properties may
+    //!          lead to functional/performance bugs!
+    //!
+    bool combine(ITimingCache const& inputCache, bool ignoreMismatch) noexcept
+    {
+        return mImpl->combine(inputCache, ignoreMismatch);
+    }
+
+    //!
+    //! \brief Empty the timing cache
+    //!
+    //! \return True if reset successfully, false otherwise.
+    //!
+    bool reset() noexcept
+    {
+        return mImpl->reset();
+    }
+
+    //!
+    //! \brief Query cache keys from Timing Cache.
+    //!
+    //! This function queries the entry count and writes the keys out.
+    //!
+    //! \param keyBuffer The buffer to store keys.
+    //! \param capacity The capacity of the buffer.
+    //!
+    //! \return The count of entries in the cache and fill keys if keyBuffer is non-null.
+    //!         If an error occurs, -1 will be returned.
+    //!
+    //! Query the count of entries in the cache and write out cache keys if keyBuffer is provided.
+    //! Any key entries exceeding the capacity of the keyBuffer will not be copied.
+    //!
+    int64_t queryKeys(TimingCacheKey* keyBuffer, int64_t capacity) const noexcept
+    {
+        return mImpl->queryKeys(keyBuffer, capacity);
+    }
+
+    //!
+    //! \brief Query value in a cache entry.
+    //!
+    //! The function queries the value in a specific cache entry.
+    //!
+    //! \param key The query key.
+    //!
+    //! \return Cache value if the key exists, otherwise an invalid value.
+    //!
+    //! Query the value of the given cache key. If the key exists, write the value out,
+    //! otherwise return an invalid value.
+    //!
+    TimingCacheValue query(TimingCacheKey const& key) const noexcept
+    {
+        return mImpl->query(key);
+    }
+
+    //!
+    //! \brief Update values in a cache entry.
+    //!
+    //! The function updates the value in a specific cache entry.
+    //!
+    //! \param key The key to the entry to be updated.
+    //! \param value New cache value.
+    //!
+    //! \return True if update succeeds, otherwise false.
+    //!
+    //! Update the value of the given cache key. If the key does not exist, return false.
+    //! If the key exists and the new tactic timing is NaN, delete the cache entry and
+    //! return true. If tactic timing is not NaN and the new value is valid, override the
+    //! cache value and return true. False is returned when the new value is invalid.
+    //! If this layer cannot use the new tactic, build errors will be reported when
+    //! building the next engine.
+    //!
+    bool update(TimingCacheKey const& key, TimingCacheValue const& value) noexcept
+    {
+        return mImpl->update(key, value);
+    }
+
+protected:
+    apiv::VTimingCache* mImpl;
+};
+
+//!
+//! \enum MemoryPoolType
+//!
+//! \brief The type for memory pools used by TensorRT.
+//!
+//! \see IBuilderConfig::setMemoryPoolLimit, IBuilderConfig::getMemoryPoolLimit
+//!
+enum class MemoryPoolType : int32_t
+{
+    //!
+    //! kWORKSPACE is used by TensorRT to store intermediate buffers within an operation.
+    //! This defaults to max device memory. Set to a smaller value to restrict tactics that use over the
+    //! threshold en masse. For more targeted removal of tactics use the IAlgorithmSelector
+    //! interface.
+    //!
+    kWORKSPACE = 0,
+
+    //!
+    //! kDLA_MANAGED_SRAM is a fast software managed RAM used by DLA to communicate within a layer.
+    //! The size of this pool must be at least 4 KiB and must be a power of 2.
+    //! This defaults to 1 MiB.
+    //! Orin has capacity of 1 MiB per core.
+    //!
+    kDLA_MANAGED_SRAM = 1,
+
+    //!
+    //! kDLA_LOCAL_DRAM is host RAM used by DLA to share intermediate tensor data across operations.
+    //! The size of this pool must be at least 4 KiB and must be a power of 2.
+    //! This defaults to 1 GiB.
+    //!
+    kDLA_LOCAL_DRAM = 2,
+
+    //!
+    //! kDLA_GLOBAL_DRAM is host RAM used by DLA to store weights and metadata for execution.
+    //! The size of this pool must be at least 4 KiB and must be a power of 2.
+    //! This defaults to 512 MiB.
+    //!
+    kDLA_GLOBAL_DRAM = 3,
+
+    //!
+    //! kTACTIC_DRAM is the device DRAM used by the optimizer to
+    //! run tactics. On embedded devices, where host and device memory are unified, this includes all host
+    //! memory required by TensorRT to build the network up to the point of each memory allocation.
+    //! This defaults to 75% of totalGlobalMem as reported by cudaGetDeviceProperties when
+    //! cudaGetDeviceProperties.embedded is true, and 100% otherwise.
+    //!
+    kTACTIC_DRAM = 4,
+
+    //!
+    //! kTACTIC_SHARED_MEMORY defines the maximum sum of shared memory reserved by the driver and
+    //! used for executing CUDA kernels. Adjust this value to restrict tactics that exceed the
+    //! specified threshold en masse. The default value is device max capability. This value must
+    //! be less than 1GiB.
+    //!
+    //! The driver reserved shared memory can be queried from cuDeviceGetAttribute(&reservedShmem,
+    //! CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK).
+    //!
+    //! Updating this flag will override the shared memory limit set by \ref HardwareCompatibilityLevel,
+    //! which defaults to 48KiB - reservedShmem.
+    //!
+    kTACTIC_SHARED_MEMORY = 5,
+};
+
+//!
+//! Maximum number of memory pool types in the MemoryPoolType enum.
+//!
+//! \see MemoryPoolType
+//!
+template <>
+constexpr inline int32_t EnumMax<MemoryPoolType>() noexcept
+{
+    return 6;
+}
+
+//!
+//! \enum PreviewFeature
+//!
+//! \brief Define preview features
+//!
+//! Preview Features have been fully tested but are not yet as stable as other features in TensorRT.
+//! They are provided as opt-in features for at least one release.
+//!
+enum class PreviewFeature : int32_t
+{
+    //!
+    //! Allows optimization profiles to be shared across execution contexts.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. The default value for this flag is on and can not be changed.
+    //!
+    kPROFILE_SHARING_0806 TRT_DEPRECATED_ENUM = 0,
+
+    //!
+    //! Allows plugin I/O to be aliased when using IPluginV3OneBuildV2
+    //!
+    kALIASED_PLUGIN_IO_10_03 = 1,
+
+    //!
+    //! Allows IExecutionContext::updateDeviceMemorySizeForShapes to resize runner internal activation memory.
+    //! Using this feature can reduce runtime memory requirement when the actual input tensor shapes are smaller than
+    //! the maximum input tensor dimensions.
+    //!
+    kRUNTIME_ACTIVATION_RESIZE_10_10 = 2
+};
+
+namespace impl
+{
+//!
+//! Maximum number of elements in PreviewFeature enum.
+//!
+//! \see PreviewFeature
+//!
+template <>
+struct EnumMaxImpl<PreviewFeature>
+{
+    static constexpr int32_t kVALUE = 3;
+};
+} // namespace impl
+
+//!
+//! \enum HardwareCompatibilityLevel
+//!
+//! \brief Describes requirements of compatibility with GPU architectures other than that of the GPU on which the engine
+//! was built.
+//!
+//! \warning Note that compatibility with future hardware depends on CUDA forward compatibility support.
+//!
+enum class HardwareCompatibilityLevel : int32_t
+{
+    //! Do not require hardware compatibility with GPU architectures other than that of the GPU on which the engine was
+    //! built.
+    kNONE = 0,
+
+    //! Require that the engine is compatible with Ampere and newer GPUs. This will limit the combined usage of driver
+    //! reserved and backend kernel max shared memory to 48KiB, may reduce the number of available tactics for each
+    //! layer, and may prevent some fusions from occurring. Thus this can decrease the performance, especially for tf32
+    //! models.
+    //! This option will disable cuDNN, cuBLAS, and cuBLASLt as tactic sources.
+    //!
+    //! This option is only supported for engines built on NVIDIA Ampere and later GPUs.
+    //!
+    //! The driver reserved shared memory can be queried from cuDeviceGetAttribute(&reservedShmem,
+    //! CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK).
+    //!
+    kAMPERE_PLUS = 1,
+
+    //! Require that the engine is compatible with GPUs that have the same Compute Capability
+    //! (https://developer.nvidia.com/cuda-gpus) as the one it was built on. This may decrease the performance compared
+    //! to an engine with no compatibility.
+    //!
+    //! This option will disable cuDNN, cuBLAS, and cuBLASLt as tactic sources.
+    //!
+    //! This option is only supported for engines built on NVIDIA Turing and later GPUs.
+    //!
+    kSAME_COMPUTE_CAPABILITY = 2,
+};
+
+namespace impl
+{
+//!
+//! Maximum number of elements in HardwareCompatibilityLevel enum.
+//!
+//! \see HardwareCompatibilityLevel
+//!
+template <>
+struct EnumMaxImpl<HardwareCompatibilityLevel>
+{
+    static constexpr int32_t kVALUE = 3;
+};
+} // namespace impl
+
+
+//!
+//! \enum TilingOptimizationLevel
+//!
+//! \brief Define the optimization levels for Tiling
+//!
+//! TensorRT will try tiling optimization for on-chip caching if non-zero level is set.
+//! This level determines how much effort TensorRT would take to find a better solution for performance.
+//!
+enum class TilingOptimizationLevel : int32_t
+{
+    //! Do not apply any tiling strategy.
+    kNONE = 0,
+
+    //! Use a fast algorithm and heuristic based strategy. Slightly increases engine build time.
+    kFAST = 1,
+
+    //! Increase search space and use a mixed heuristic/profiling strategy.
+    //! Moderately increases engine build time.
+    kMODERATE = 2,
+
+    //! Increase search space even wider. Significantly increases engine build time.
+    kFULL = 3
+
+};
+
+namespace impl
+{
+//!
+//! Maximum number of elements in TilingOptimizationLevel enum.
+//!
+//! \see TilingOptimizationLevel
+//!
+template <>
+struct EnumMaxImpl<TilingOptimizationLevel>
+{
+    static constexpr int32_t kVALUE = 4;
+};
+} // namespace impl
+
+namespace v_1_0
+{
+class IProgressMonitor : public IVersionedInterface
+{
+public:
+    IProgressMonitor() = default;
+    virtual ~IProgressMonitor() noexcept = default;
+
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"IProgressMonitor", 1, 0};
+    }
+
+    //!
+    //! \brief Signal that a phase of the optimizer has started.
+    //!
+    //! \param phaseName The name of this phase for tracking purposes.
+    //! \param parentPhase The parent phase that this phase belongs to, or nullptr if there is no parent.
+    //! \param nbSteps The number of steps that are involved in this phase.
+    //!
+    //! The phaseStart function signals to the application that the current phase is beginning, and that it has a
+    //! certain number of steps to perform. If \p phaseParent is nullptr, then the phaseStart is beginning an
+    //! independent phase, and if \p phaseParent is specified, then the current phase, specified by \p phaseName, is
+    //! within the scope of the parent phase. \p nbSteps will always be a positive number. The phaseStart function
+    //! implies that the first step is being executed. TensorRT will signal when each step is complete.
+    //!
+    //! Phase names are human readable English strings which are unique within a single phase hierarchy but which can be
+    //! reused once the previous instance has completed. Phase names and their hierarchies may change between versions
+    //! of TensorRT.
+    //!
+    //! \see phaseFinish
+    //!
+    virtual void phaseStart(char const* phaseName, char const* parentPhase, int32_t nbSteps) noexcept = 0;
+
+    //!
+    //! \brief Signal that a step of an optimizer phase has finished.
+    //!
+    //! \param phaseName The name of the innermost phase being executed.
+    //! \param step The step number that was completed.
+    //!
+    //! The stepComplete function signals to the application that TensorRT has finished the current \p step for the
+    //! phase \p phaseName, and will move onto the next step if there is one. The application can return false for
+    //! TensorRT to exit the build early. The step value will increase on subsequent calls in the range [0, nbSteps).
+    //!
+    //! \return true to continue to the next step or false to stop the build.
+    //!
+    virtual bool stepComplete(char const* phaseName, int32_t step) noexcept = 0;
+
+    //!
+    //! \brief Signal that a phase of the optimizer has finished.
+    //!
+    //! \param phaseName The name of the phase that has finished.
+    //!
+    //! The phaseFinish function signals to the application that the phase is complete. This function may be called
+    //! before all steps in the range [0, nbSteps) have been reported to stepComplete. This scenario can be triggered by
+    //! error handling, internal optimizations, or when stepComplete returns false to request cancellation of the build.
+    //!
+    //! \see phaseStart
+    //!
+    virtual void phaseFinish(char const* phaseName) noexcept = 0;
+
+}; // class IProgressMonitor
+} // namespace v_1_0
+
+//!
+//! \class IProgressMonitor
+//!
+//! \brief Application-implemented progress reporting interface for TensorRT.
+//!
+//! The IProgressMonitor is a user-defined object that TensorRT uses to report back when an internal algorithm has
+//! started or finished a phase to help provide feedback on the progress of the optimizer.
+//!
+//! The IProgressMonitor will trigger its start function when a phase is entered and will trigger its finish function
+//! when that phase is exited. Each phase consists of one or more steps. When each step is completed, the stepComplete
+//! function is triggered. This will allow an application using the builder to communicate progress relative to when the
+//! optimization step is expected to complete.
+//!
+//! The implementation of IProgressMonitor must be thread-safe so that it can be called from multiple internal threads.
+//! The lifetime of the IProgressMonitor must exceed the lifetime of all TensorRT objects that use it.
+//!
+//! \note To ensure compatibility of source code with future versions of TensorRT, use IProgressMonitor, not
+//!       v_1_0::IProgressMonitor
+//!
+using IProgressMonitor = v_1_0::IProgressMonitor;
+
+//!
+//! \class IBuilderConfig
+//!
+//! \brief Holds properties for configuring a builder to produce an engine.
+//!
+//! \see BuilderFlags
+//!
+class IBuilderConfig : public INoCopy
+{
+public:
+    virtual ~IBuilderConfig() noexcept = default;
+
+    //!
+    //! \brief Set the number of averaging iterations used when timing layers.
+    //!
+    //! When timing layers, the builder minimizes over a set of average times for layer execution. This parameter
+    //! controls the number of iterations used in averaging.
+    //!
+    //! \see getAvgTimingIterations()
+    //!
+    virtual void setAvgTimingIterations(int32_t avgTiming) noexcept
+    {
+        mImpl->setAvgTimingIterations(avgTiming);
+    }
+
+    //!
+    //! \brief Query the number of averaging iterations.
+    //!
+    //! By default the number of averaging iterations is 1.
+    //!
+    //! \see setAvgTimingIterations()
+    //!
+    int32_t getAvgTimingIterations() const noexcept
+    {
+        return mImpl->getAvgTimingIterations();
+    }
+
+    //!
+    //! \brief Configure the builder to target specified EngineCapability flow.
+    //!
+    //! The flow means a sequence of API calls that allow an application to set up a runtime, engine,
+    //! and execution context in order to run inference.
+    //!
+    //! The supported flows are specified in the EngineCapability enum.
+    //!
+    void setEngineCapability(EngineCapability capability) noexcept
+    {
+        mImpl->setEngineCapability(capability);
+    }
+
+    //!
+    //! \brief Query EngineCapability flow configured for the builder.
+    //!
+    //! By default it returns EngineCapability::kSTANDARD.
+    //!
+    //! \see setEngineCapability()
+    //!
+    EngineCapability getEngineCapability() const noexcept
+    {
+        return mImpl->getEngineCapability();
+    }
+
+    //!
+    //! \brief Set Int8 Calibration interface.
+    //!
+    //! The calibrator is to minimize the information loss during the INT8 quantization process.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
+    //!
+    TRT_DEPRECATED void setInt8Calibrator(IInt8Calibrator* calibrator) noexcept
+    {
+        mImpl->setInt8Calibrator(calibrator);
+    }
+
+    //!
+    //! \brief Get Int8 Calibration interface.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
+    //!
+    TRT_DEPRECATED IInt8Calibrator* getInt8Calibrator() const noexcept
+    {
+        return mImpl->getInt8Calibrator();
+    }
+
+    //!
+    //! \brief Set the build mode flags to turn on builder options for this network.
+    //!
+    //! The flags are listed in the BuilderFlags enum.
+    //! The flags set configuration options to build the network.
+    //!
+    //! \param builderFlags The build option for an engine.
+    //!
+    //! \note This function will override the previous set flags, rather than bitwise ORing the new flag.
+    //!
+    //! \see getFlags()
+    //!
+    void setFlags(BuilderFlags builderFlags) noexcept
+    {
+        mImpl->setFlags(builderFlags);
+    }
+
+    //!
+    //! \brief Get the build mode flags for this builder config. Defaults to 0.
+    //!
+    //! \return The build options as a bitmask.
+    //!
+    //! \see setFlags()
+    //!
+    BuilderFlags getFlags() const noexcept
+    {
+        return mImpl->getFlags();
+    }
+
+    //!
+    //! \brief clear a single build mode flag.
+    //!
+    //! clears the builder mode flag from the enabled flags.
+    //!
+    //! \see setFlags()
+    //!
+    void clearFlag(BuilderFlag builderFlag) noexcept
+    {
+        mImpl->clearFlag(builderFlag);
+    }
+
+    //!
+    //! \brief Set a single build mode flag.
+    //!
+    //! Add the input builder mode flag to the already enabled flags.
+    //!
+    //! \see setFlags()
+    //!
+    void setFlag(BuilderFlag builderFlag) noexcept
+    {
+        mImpl->setFlag(builderFlag);
+    }
+
+    //!
+    //! \brief Returns true if the build mode flag is set
+    //!
+    //! \see getFlags()
+    //!
+    //! \return True if flag is set, false if unset.
+    //!
+    bool getFlag(BuilderFlag builderFlag) const noexcept
+    {
+        return mImpl->getFlag(builderFlag);
+    }
+
+    //!
+    //! \brief Set the device that this layer must execute on.
+    //!
+    //! \param layer which layer to execute.
+    //! \param deviceType that this layer must execute on.
+    //! If DeviceType is not set or is reset, TensorRT will use the default DeviceType set in the builder.
+    //!
+    //! \note The device type for a layer must be compatible with the safety flow (if specified).
+    //! For example a layer cannot be marked for DLA execution while the builder is configured for kSAFETY.
+    //!
+    //! \see getDeviceType()
+    //!
+    void setDeviceType(ILayer const* layer, DeviceType deviceType) noexcept
+    {
+        mImpl->setDeviceType(layer, deviceType);
+    }
+
+    //!
+    //! \brief Get the device that this layer executes on.
+    //!
+    //! \return Returns DeviceType of the layer.
+    //!
+    DeviceType getDeviceType(ILayer const* layer) const noexcept
+    {
+        return mImpl->getDeviceType(layer);
+    }
+
+    //!
+    //! \brief whether the DeviceType has been explicitly set for this layer
+    //!
+    //! \return true if device type is not default
+    //!
+    //! \see setDeviceType() getDeviceType() resetDeviceType()
+    //!
+    bool isDeviceTypeSet(ILayer const* layer) const noexcept
+    {
+        return mImpl->isDeviceTypeSet(layer);
+    }
+
+    //!
+    //! \brief reset the DeviceType for this layer
+    //!
+    //! \see setDeviceType() getDeviceType() isDeviceTypeSet()
+    //!
+    void resetDeviceType(ILayer const* layer) noexcept
+    {
+        mImpl->resetDeviceType(layer);
+    }
+
+    //!
+    //! \brief Checks if a layer can run on DLA.
+    //!
+    //! \return status true if the layer can on DLA else returns false.
+    //!
+    bool canRunOnDLA(ILayer const* layer) const noexcept
+    {
+        return mImpl->canRunOnDLA(layer);
+    }
+
+    //!
+    //! \brief Sets the DLA core used by the network. Defaults to -1.
+    //!
+    //! \param dlaCore The DLA core to execute the engine on, in the range [0,getNbDlaCores()).
+    //!
+    //! This function is used to specify which DLA core to use via indexing, if multiple DLA cores are available.
+    //!
+    //! \warning if getNbDLACores() returns 0, then this function does nothing.
+    //!
+    //! \see IRuntime::setDLACore() getDLACore()
+    //!
+    void setDLACore(int32_t dlaCore) noexcept
+    {
+        mImpl->setDLACore(dlaCore);
+    }
+
+    //!
+    //! \brief Get the DLA core that the engine executes on.
+    //!
+    //! \return assigned DLA core or -1 for DLA not present or unset.
+    //!
+    int32_t getDLACore() const noexcept
+    {
+        return mImpl->getDLACore();
+    }
+
+    //!
+    //! \brief Sets the default DeviceType to be used by the builder. It ensures that all the layers that can run on
+    //! this device will run on it, unless setDeviceType is used to override the default DeviceType for a layer.
+    //!
+    //! \see getDefaultDeviceType()
+    //!
+    void setDefaultDeviceType(DeviceType deviceType) noexcept
+    {
+        mImpl->setDefaultDeviceType(deviceType);
+    }
+
+    //!
+    //! \brief Get the default DeviceType which was set by setDefaultDeviceType.
+    //!
+    //! By default it returns DeviceType::kGPU.
+    //!
+    DeviceType getDefaultDeviceType() const noexcept
+    {
+        return mImpl->getDefaultDeviceType();
+    }
+
+    //!
+    //! \brief Resets the builder configuration to defaults.
+    //!
+    //! Useful for initializing a builder config object to its original state.
+    //!
+    void reset() noexcept
+    {
+        mImpl->reset();
+    }
+
+    //!
+    //! \brief Set the CUDA stream that is used to profile this network.
+    //!
+    //! \param stream The CUDA stream used for profiling by the builder.
+    //!
+    //! \see getProfileStream()
+    //!
+    void setProfileStream(const cudaStream_t stream) noexcept
+    {
+        return mImpl->setProfileStream(stream);
+    }
+
+    //!
+    //! \brief Get the CUDA stream that is used to profile this network.
+    //!
+    //! \return The CUDA stream set by setProfileStream, nullptr if setProfileStream has not been called.
+    //!
+    //! \see setProfileStream()
+    //!
+    cudaStream_t getProfileStream() const noexcept
+    {
+        return mImpl->getProfileStream();
+    }
+
+    //!
+    //! \brief Add an optimization profile.
+    //!
+    //! This function must be called at least once if the network has dynamic or shape input tensors.
+    //! This function may be called at most once when building a refittable engine, as more than
+    //! a single optimization profile are not supported for refittable engines.
+    //!
+    //! \param profile The new optimization profile, which must satisfy profile->isValid() == true
+    //!
+    //! \return The index of the optimization profile (starting from 0) if the input is valid, or -1 if the input is
+    //!         not valid.
+    //!
+    int32_t addOptimizationProfile(IOptimizationProfile const* profile) noexcept
+    {
+        return mImpl->addOptimizationProfile(profile);
+    }
+
+    //!
+    //! \brief Get number of optimization profiles.
+    //!
+    //! This is one higher than the index of the last optimization profile that has be defined (or
+    //! zero, if none has been defined yet).
+    //!
+    //! \return The number of the optimization profiles.
+    //!
+    int32_t getNbOptimizationProfiles() const noexcept
+    {
+        return mImpl->getNbOptimizationProfiles();
+    }
+
+    //!
+    //! \brief Set verbosity level of layer information exposed in NVTX annotations and IEngineInspector.
+    //!
+    //! Control how much layer information will be exposed in NVTX annotations and IEngineInspector.
+    //!
+    //! \see ProfilingVerbosity, getProfilingVerbosity(), IEngineInspector
+    //!
+    void setProfilingVerbosity(ProfilingVerbosity verbosity) noexcept
+    {
+        mImpl->setProfilingVerbosity(verbosity);
+    }
+
+    //!
+    //! \brief Get verbosity level of layer information exposed in NVTX annotations and IEngineInspector.
+    //!
+    //! Get the current setting of verbosity level of layer information exposed in
+    //! NVTX annotations and IEngineInspector. Default value is ProfilingVerbosity::kLAYER_NAMES_ONLY.
+    //!
+    //! \see ProfilingVerbosity, setProfilingVerbosity(), IEngineInspector
+    //!
+    ProfilingVerbosity getProfilingVerbosity() const noexcept
+    {
+        return mImpl->getProfilingVerbosity();
+    }
+
+    //!
+    //! \brief Set Algorithm Selector.
+    //!
+    //! \param selector The algorithm selector to be set in the build config.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.8. Please use editable mode in ITimingCache instead.
+    //!
+    TRT_DEPRECATED void setAlgorithmSelector(IAlgorithmSelector* selector) noexcept
+    {
+        mImpl->setAlgorithmSelector(selector);
+    }
+
+    //!
+    //! \brief Get Algorithm Selector.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.8. Please use editable mode in ITimingCache instead.
+    //!
+    TRT_DEPRECATED IAlgorithmSelector* getAlgorithmSelector() const noexcept
+    {
+        return mImpl->getAlgorithmSelector();
+    }
+
+    //!
+    //! \brief Add a calibration profile.
+    //!
+    //! Calibration optimization profile must be set if int8 calibration is used to set scales for a network with
+    //! runtime dimensions.
+    //!
+    //! \param profile The new calibration profile, which must satisfy profile->isValid() == true or be nullptr.
+    //! MIN and MAX values will be overwritten by kOPT.
+    //!
+    //! \return True if the calibration profile was set correctly.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
+    //!
+    TRT_DEPRECATED bool setCalibrationProfile(IOptimizationProfile const* profile) noexcept
+    {
+        return mImpl->setCalibrationProfile(profile);
+    }
+
+    //!
+    //! \brief Get the current calibration profile.
+    //!
+    //! \return A pointer to the current calibration profile or nullptr if calibration profile is unset.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
+    //!
+    TRT_DEPRECATED IOptimizationProfile const* getCalibrationProfile() noexcept
+    {
+        return mImpl->getCalibrationProfile();
+    }
+
+    //!
+    //! \brief Set the quantization flags.
+    //!
+    //! The flags are listed in the QuantizationFlag enum.
+    //! The flags set configuration options to quantize the network in int8.
+    //!
+    //! \param flags The quantization flags.
+    //!
+    //! \note This function will override the previous set flags, rather than bitwise ORing the new flag.
+    //!
+    //! \see getQuantizationFlags()
+    //!
+    //! \deprecated Deprecated in TensorRT 10.10. Superseded by explicit quantization.
+    //!
+    TRT_DEPRECATED void setQuantizationFlags(QuantizationFlags flags) noexcept
+    {
+        mImpl->setQuantizationFlags(flags);
+    }
+
+    //!
+    //! \brief Get the quantization flags.
+    //!
+    //! \return The quantization flags as a bitmask.
+    //!
+    //! \see setQuantizationFlag()
+    //!
+    //! \deprecated Deprecated in TensorRT 10.10. Superseded by explicit quantization.
+    //!
+    TRT_DEPRECATED QuantizationFlags getQuantizationFlags() const noexcept
+    {
+        return mImpl->getQuantizationFlags();
+    }
+
+    //!
+    //! \brief clear a quantization flag.
+    //!
+    //! Clears the quantization flag from the enabled quantization flags.
+    //!
+    //! \see setQuantizationFlags()
+    //!
+    //! \deprecated Deprecated in TensorRT 10.10. Superseded by explicit quantization.
+    //!
+    TRT_DEPRECATED void clearQuantizationFlag(QuantizationFlag flag) noexcept
+    {
+        mImpl->clearQuantizationFlag(flag);
+    }
+
+    //!
+    //! \brief Set a single quantization flag.
+    //!
+    //! Add the input quantization flag to the already enabled quantization flags.
+    //!
+    //! \see setQuantizationFlags()
+    //!
+    //! \deprecated Deprecated in TensorRT 10.10. Superseded by explicit quantization.
+    //!
+    TRT_DEPRECATED void setQuantizationFlag(QuantizationFlag flag) noexcept
+    {
+        mImpl->setQuantizationFlag(flag);
+    }
+
+    //!
+    //! \brief Returns true if the quantization flag is set.
+    //!
+    //! \see getQuantizationFlags()
+    //!
+    //! \return True if quantization flag is set, false if unset.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.10. Superseded by explicit quantization.
+    //!
+    TRT_DEPRECATED bool getQuantizationFlag(QuantizationFlag flag) const noexcept
+    {
+        return mImpl->getQuantizationFlag(flag);
+    }
+
+    //!
+    //! \brief Set tactic sources.
+    //!
+    //! This bitset controls which tactic sources TensorRT is allowed to use for tactic
+    //! selection.
+    //!
+    //! Multiple tactic sources may be combined with a bitwise OR operation. For example,
+    //! to enable cublas and cublasLt as tactic sources, use a value of:
+    //!
+    //! 1U << static_cast<uint32_t>(TacticSource::kCUBLAS) | 1U <<
+    //! static_cast<uint32_t>(TacticSource::kCUBLAS_LT)
+    //!
+    //! \see getTacticSources
+    //!
+    //! \return true if the tactic sources in the build configuration were updated.
+    //!         The tactic sources in the build configuration will not be updated if the provided value is invalid.
+    //!
+    bool setTacticSources(TacticSources tacticSources) noexcept
+    {
+        return mImpl->setTacticSources(tacticSources);
+    }
+
+    //!
+    //! \brief Get tactic sources.
+    //!
+    //! Get the tactic sources currently set in the engine build
+    //! configuration.
+    //!
+    //! \see setTacticSources()
+    //!
+    //! \return tactic sources
+    //!
+    TacticSources getTacticSources() const noexcept
+    {
+        return mImpl->getTacticSources();
+    }
+
+    //!
+    //! \brief Create timing cache
+    //!
+    //! Create ITimingCache instance from serialized raw data. The created timing cache doesn't belong to
+    //! a specific IBuilderConfig. It can be shared by multiple builder instances. Call setTimingCache()
+    //! before launching a builder to attach cache to builder instance.
+    //! The lifetime of the ITimingCache must exceed the lifetime of all builders that use it.
+    //!
+    //! \param blob A pointer to the raw data that contains serialized timing cache
+    //! \param size The size in bytes of the serialized timing cache. Size 0 means create a new cache from scratch
+    //!
+    //! \see setTimingCache
+    //!
+    //! \return the pointer to ITimingCache created
+    //!
+    nvinfer1::ITimingCache* createTimingCache(void const* blob, std::size_t size) const noexcept
+    {
+        return mImpl->createTimingCache(blob, size);
+    }
+
+    //!
+    //! \brief Attach a timing cache to IBuilderConfig
+    //!
+    //! The timing cache has verification header to make sure the provided cache can be used in current environment.
+    //! A failure will be reported if the CUDA device property in the provided cache is different from current
+    //! environment. ignoreMismatch = true skips strict verification and allows loading cache created from a different
+    //! device.
+    //!
+    //! The cache must not be destroyed until after the engine is built.
+    //!
+    //! \param cache the timing cache to be used
+    //! \param ignoreMismatch whether or not allow using a cache that contains different CUDA device property
+    //!
+    //! \return true if set successfully, false otherwise
+    //!
+    //! \warning Using cache generated from devices with different CUDA device properties may lead to
+    //!          functional/performance bugs.
+    //!
+    bool setTimingCache(ITimingCache const& cache, bool ignoreMismatch) noexcept
+    {
+        return mImpl->setTimingCache(cache, ignoreMismatch);
+    }
+
+    //!
+    //! \brief Get the pointer to the timing cache from current IBuilderConfig
+    //!
+    //! \return pointer to the timing cache used in current IBuilderConfig
+    //!
+    nvinfer1::ITimingCache const* getTimingCache() const noexcept
+    {
+        return mImpl->getTimingCache();
+    }
+
+    //!
+    //! \brief Set the memory size for the memory pool.
+    //!
+    //! TensorRT layers access different memory pools depending on the operation.
+    //! This function sets in the IBuilderConfig the size limit, specified by \p poolSize,
+    //! for the corresponding memory pool, specified by \p pool.
+    //! TensorRT will build a plan file that is constrained by these limits or report
+    //! which constraint caused the failure.
+    //!
+    //! If the size of the pool, specified by \p poolSize, fails to meet the size requirements
+    //! for the pool, this function does nothing and emits the recoverable error,
+    //! ErrorCode::kINVALID_ARGUMENT, to the registered IErrorRecorder.
+    //!
+    //! If the size of the pool is larger than the maximum possible value for the
+    //! configuration, this function does nothing and emits ErrorCode::kUNSUPPORTED_STATE.
+    //!
+    //! If the pool does not exist on the requested device type when building
+    //! the network, a warning is emitted to the logger, and the memory pool
+    //! value is ignored.
+    //!
+    //! Refer to MemoryPoolType to see the size requirements for each pool.
+    //!
+    //! \param pool The memory pool to limit the available memory for.
+    //! \param poolSize The size of the pool in bytes.
+    //!
+    //! \see getMemoryPoolLimit, MemoryPoolType
+    //!
+    void setMemoryPoolLimit(MemoryPoolType pool, std::size_t poolSize) noexcept
+    {
+        mImpl->setMemoryPoolLimit(pool, poolSize);
+    }
+
+    //!
+    //! \brief Get the memory size limit of the memory pool.
+    //!
+    //! Retrieve the memory size limit of the corresponding pool in bytes.
+    //! If setMemoryPoolLimit for the pool has not been called, this returns the default
+    //! value used by TensorRT. This default value is not necessarily the maximum possible
+    //! value for that configuration.
+    //!
+    //! \param pool The memory pool to get the limit for.
+    //!
+    //! \returns The size of the memory limit, in bytes, for the corresponding pool.
+    //!
+    //! \see setMemoryPoolLimit
+    //!
+    std::size_t getMemoryPoolLimit(MemoryPoolType pool) const noexcept
+    {
+        return mImpl->getMemoryPoolLimit(pool);
+    }
+
+    //!
+    //! \brief Enable or disable a specific preview feature
+    //!
+    //! Allows enabling or disabling experimental features, which are not enabled by default in the
+    //! current release.
+    //!
+    //! Refer to PreviewFeature for additional information, and a list of the available features.
+    //!
+    //! \param feature the feature to enable / disable
+    //! \param enable true for enable, false for disable
+    //!
+    //! \see PreviewFeature, getPreviewFeature
+    //!
+    void setPreviewFeature(PreviewFeature feature, bool enable) noexcept
+    {
+        mImpl->setPreviewFeature(feature, enable);
+    }
+
+    //!
+    //! \brief Get status of preview feature
+    //!
+    //! \param feature the feature to query
+    //!
+    //! \returns true if the \p feature is enabled, false otherwise
+    //!
+    //! \see PreviewFeature, setPreviewFeature
+    //!
+    bool getPreviewFeature(PreviewFeature feature) const noexcept
+    {
+        return mImpl->getPreviewFeature(feature);
+    }
+
+    //!
+    //! \brief Set builder optimization level
+    //!
+    //! Set the builder optimization level. Setting a higher optimization
+    //! level allows the optimizer to spend more time searching for optimization opportunities. The
+    //! resulting engine may have better performance compared to an engine built with a lower optimization level.
+    //!
+    //! The default optimization level is 3. Valid values include integers from 0 to the maximum optimization level,
+    //! which is currently 5. Setting it to greater than the maximum level results in behavior identical to the
+    //! maximum level.
+    //!
+    //! Below are the descriptions about each builder optimization level:
+    //!
+    //! - Level 0: This enables the fastest compilation by disabling dynamic kernel generation and selecting the first
+    //!   tactic that succeeds in execution. This will also not respect a timing cache.
+    //! - Level 1: Available tactics are sorted by heuristics, but only the top are tested to select the best. If a
+    //!   dynamic kernel is generated its compile optimization is low.
+    //! - Level 2: Available tactics are sorted by heuristics, but only the fastest tactics are tested to select the
+    //!   best.
+    //! - Level 3: Apply heuristics to see if a static precompiled kernel is applicable or if a new one has to be
+    //!   compiled dynamically.
+    //! - Level 4: Always compiles a dynamic kernel.
+    //! - Level 5: Always compiles a dynamic kernel and compares it to static kernels.
+    //!
+    //! \param level The optimization level to set to. Must be non-negative.
+    //!
+    //! \see getBuilderOptimizationLevel
+    //!
+    void setBuilderOptimizationLevel(int32_t level) noexcept
+    {
+        mImpl->setBuilderOptimizationLevel(level);
+    }
+
+    //!
+    //! \brief Get builder optimization level
+    //!
+    //! \returns the current builder optimization level
+    //!
+    //! \see setBuilderOptimizationLevel
+    //!
+    int32_t getBuilderOptimizationLevel() noexcept
+    {
+        return mImpl->getBuilderOptimizationLevel();
+    }
+
+    //!
+    //! \brief Set the hardware compatibility level.
+    //!
+    //! Hardware compatibility allows an engine to run on GPU
+    //! architectures other than that of the GPU where the engine was
+    //! built.
+    //!
+    //! The default hardware compatibility level is HardwareCompatibilityLevel::kNONE.
+    //!
+    //! \param hardwareCompatibilityLevel The level of hardware
+    //!        compatibility.
+    //!
+    void setHardwareCompatibilityLevel(HardwareCompatibilityLevel hardwareCompatibilityLevel) noexcept
+    {
+        mImpl->setHardwareCompatibilityLevel(hardwareCompatibilityLevel);
+    }
+
+    //!
+    //! \brief Get the hardware compatibility level.
+    //!
+    //! \return hardwareCompatibilityLevel The level of hardware
+    //!        compatibility.
+    //!
+    //! \see setHardwareCompatibilityLevel()
+    //!
+    HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
+    {
+        return mImpl->getHardwareCompatibilityLevel();
+    }
+
+    //!
+    //! \brief Set the plugin libraries to be serialized with version-compatible engines.
+    //!
+    //! Each entry in the list of libraries must be unique.
+    //!
+    //! \param paths The paths of plugin libraries.
+    //! \param nbPaths The number of paths.
+    //!
+    void setPluginsToSerialize(char const* const* paths, int32_t nbPaths) noexcept
+    {
+        mImpl->setPluginsToSerialize(paths, nbPaths);
+    }
+
+    //!
+    //! \brief Get the plugin library path to be serialized with version-compatible engines.
+    //!
+    //! \param index Index of the plugin library path in the list.  Should be in the range `[0,
+    //! getNbPluginsToSerialize())`.
+    //!
+    //! \return The path to the plugin library.
+    //!
+    char const* getPluginToSerialize(int32_t index) const noexcept
+    {
+        return mImpl->getPluginToSerialize(index);
+    }
+
+    //!
+    //! \brief Get the number of plugin library paths to be serialized with version-compatible engines.
+    //!
+    //! \return The number of paths.
+    //!
+    int32_t getNbPluginsToSerialize() const noexcept
+    {
+        return mImpl->getNbPluginsToSerialize();
+    }
+
+    //!
+    //! \brief Set the maximum number of auxiliary streams that TRT is allowed to use.
+    //!
+    //! If the network contains operators that can run in parallel, TRT can execute them using auxiliary streams
+    //! in addition to the one provided to the IExecutionContext::enqueueV3() call.
+    //!
+    //! The default maximum number of auxiliary streams is determined by the heuristics in TensorRT on whether enabling
+    //! multi-stream would improve the performance. This behavior can be overridden by calling this API to set the
+    //! maximum number of auxiliary streams explicitly. Set this to 0 to enforce single-stream inference.
+    //!
+    //! The resulting engine may use fewer auxiliary streams than the maximum if the network does not contain enough
+    //! parallelism or if TensorRT determines that using more auxiliary streams does not help improve the performance.
+    //!
+    //! \note Allowing more auxiliary streams does not always give better performance since there will be
+    //! synchronizations overhead between streams. Using CUDA graphs at runtime can help reduce the overhead caused by
+    //! cross-stream synchronizations.
+    //!
+    //! \note Using more auxiliary leads to more memory usage at runtime since some activation memory blocks will not
+    //! be able to be reused.
+    //!
+    //! \param nbStreams The maximum number of auxiliary streams that TRT is allowed to use.
+    //!
+    //! \see getMaxAuxStreams(), ICudaEngine::getNbAuxStreams(), IExecutionContext::setAuxStreams()
+    //!
+    void setMaxAuxStreams(int32_t nbStreams) noexcept
+    {
+        mImpl->setMaxAuxStreams(nbStreams);
+    }
+
+    //!
+    //! \brief Get the maximum number of auxiliary streams that TRT is allowed to use.
+    //!
+    //! \see setMaxAuxStreams()
+    //!
+    int32_t getMaxAuxStreams() const noexcept
+    {
+        return mImpl->getMaxAuxStreams();
+    }
+
+    //!
+    //! \brief Sets the progress monitor for building a network.
+    //!
+    //! \param monitor The progress monitor to assign to the IBuilderConfig.
+    //!
+    //! The progress monitor signals to the application when different phases of
+    //! the compiler are being executed. Setting to nullptr unsets the monitor so
+    //! that the application is not signaled.
+    //!
+    //! \see IBuilderConfig::getProgressMonitor
+    //!
+    void setProgressMonitor(IProgressMonitor* monitor) noexcept
+    {
+        return mImpl->setProgressMonitor(monitor);
+    }
+
+    //!
+    //! \return The progress monitor set by the application or nullptr.
+    //!
+    //! \see IBuilderConfig::setProgressMonitor
+    //!
+    IProgressMonitor* getProgressMonitor() const noexcept
+    {
+        return mImpl->getProgressMonitor();
+    }
+
+    //!
+    //! \brief Set the target platform for runtime execution.
+    //!
+    //! Cross-platform compatibility allows an engine to be built and executed on different platforms.
+    //!
+    //! The default cross-platform target is RuntimePlatform::kSAME_AS_BUILD.
+    //!
+    //! \param runtimePlatform The target platform for runtime execution.
+    //!
+    //! \see IBuilderConfig::getRuntimePlatform()
+    //!
+    void setRuntimePlatform(RuntimePlatform runtimePlatform) noexcept
+    {
+        mImpl->setRuntimePlatform(runtimePlatform);
+    }
+
+    //!
+    //! \brief Get the target platform for runtime execution.
+    //!
+    //! \return The target platform for runtime execution.
+    //!
+    //! \see IBuilderConfig::setRuntimePlatform()
+    //!
+    RuntimePlatform getRuntimePlatform() const noexcept
+    {
+        return mImpl->getRuntimePlatform();
+    }
+
+    //!
+    //! \brief Set the maximum number of tactics to time when there is a choice of tactics.
+    //!
+    //! This function controls the number of tactics timed when there are multiple tactics to choose from.
+    //!
+    //! \see getMaxNbTactics()
+    //!
+    void setMaxNbTactics(int32_t maxNbTactics) noexcept
+    {
+        mImpl->setMaxNbTactics(maxNbTactics);
+    }
+
+    //!
+    //! \brief Query the maximum number of tactics timed when there is a choice.
+    //!
+    //! By default the value is -1, indicating TensorRT can determine the number of tactics based on its own heuristic.
+    //!
+    //! \see setMaxNbTactics()
+    //!
+    int32_t getMaxNbTactics() const noexcept
+    {
+        return mImpl->getMaxNbTactics();
+    }
+
+    //!
+    //! \brief Set the Tiling optimization level.
+    //!
+    //! Tiling allows TensorRT to try an on-chip caching strategy.
+    //!
+    //! The default getTilingOptimizationLevel is TilingOptimizationLevel::kNONE.
+    //!
+    //! \param level The level of Tiling optimization.
+    //!
+    //! \return True if successful, false otherwise
+    //!
+    bool setTilingOptimizationLevel(TilingOptimizationLevel level) noexcept
+    {
+        return mImpl->setTilingOptimizationLevel(level);
+    }
+
+    //!
+    //! \brief Get the Tiling optimization level.
+    //!
+    //! \return TilingOptimizationLevel The level of Tiling optimization.
+    //!
+    //! \see setTilingOptimizationLevel()
+    //!
+    TilingOptimizationLevel getTilingOptimizationLevel() const noexcept
+    {
+        return mImpl->getTilingOptimizationLevel();
+    }
+
+    //!
+    //! \brief Set the L2 cache usage limit for Tiling optimization.
+    //!
+    //! Parameter for tiling optimization. This API only takes effect when TilingOptimizationLevel is not kNONE.
+    //! \note If setL2LimitForTiling() has not been called, TensorRT would choose a default value between 0 and L2
+    //! capacity size.
+    //!
+    //! \param size The size of the L2 cache usage limit for Tiling optimization.
+    //!
+    //! \return True if successful, false otherwise
+    //!
+    bool setL2LimitForTiling(int64_t size) noexcept
+    {
+        return mImpl->setL2LimitForTiling(size);
+    }
+
+    //!
+    //! \brief Get the L2 cache usage limit for tiling optimization.
+    //!
+    //! \return L2 cache usage limit for tiling optimization.
+    //!
+    //! \see setL2LimitForTiling()
+    //!
+    int64_t getL2LimitForTiling() const noexcept
+    {
+        return mImpl->getL2LimitForTiling();
+    }
+
+    //!
+    //! \brief Set a config string for remote auto tuning.
+    //!
+    //! Remote auto-tuning is supported only for engines built with EngineCapability::kSAFETY.
+    //!
+    //! \param config The config string to be used during remote auto tuning.
+    //!
+    //! \return True if successful, false otherwise
+    //!
+    bool setRemoteAutoTuningConfig(char const* config) noexcept
+    {
+        return mImpl->setRemoteAutoTuningConfig(config);
+    }
+
+    //!
+    //! \brief Get a config string for remote auto tuning.
+    //!
+    //! \return The current string for remote auto tuning, or nullptr if not set.
+    //!
+    char const* getRemoteAutoTuningConfig() const noexcept
+    {
+        return mImpl->getRemoteAutoTuningConfig();
+    }
+
+protected:
+    apiv::VBuilderConfig* mImpl;
+};
+
+//!
+//! \brief Represents one or more NetworkDefinitionCreationFlag flags
+//! using binary OR operations.
+//!  e.g., 1U << NetworkDefinitionCreationFlag::kSTRONGLY_TYPED
+//!
+//! \see IBuilder::createNetworkV2
+//!
+using NetworkDefinitionCreationFlags = uint32_t;
+
+//!
+//! \enum NetworkDefinitionCreationFlag
+//!
+//! \brief List of immutable network properties expressed at network creation time.
+//! NetworkDefinitionCreationFlag is used with createNetworkV2() to specify immutable properties of the network.
+//!
+//! \see IBuilder::createNetworkV2
+//!
+enum class NetworkDefinitionCreationFlag : int32_t
+{
+    //! Ignored because networks are always "explicit batch" in TensorRT 10.0.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0.
+    kEXPLICIT_BATCH TRT_DEPRECATED_ENUM = 0,
+
+    //! Mark the network to be strongly typed.
+    //! Every tensor in the network has a data type defined in the network following only type inference rules and the
+    //! inputs/operator annotations. Setting layer precision and layer output types is not allowed, and the network
+    //! output types will be inferred based on the input types and the type inference rules.
+    kSTRONGLY_TYPED = 1,
+    //! If set, for a Python plugin with both AOT and JIT implementations, the JIT implementation will be used.
+    //! Any plugin-specific JIT/AOT specification may override this.
+    //! Cannot be used in conjunction with NetworkDefinitionCreationFlag::kPREFER_AOT_PYTHON_PLUGINS.
+    kPREFER_JIT_PYTHON_PLUGINS = 2,
+
+    //! If set, for a Python plugin with both AOT and JIT implementations, the AOT implementation will be used.
+    //! Any plugin-specific JIT/AOT specification may override this.
+    //! Cannot be used in conjunction with NetworkDefinitionCreationFlag::kPREFER_JIT_PYTHON_PLUGINS.
+    kPREFER_AOT_PYTHON_PLUGINS = 3,
+};
+
+//!
+//! Maximum number of elements in NetworkDefinitionCreationFlag enum.
+//!
+//! \see NetworkDefinitionCreationFlag
+//!
+template <>
+constexpr inline int32_t EnumMax<NetworkDefinitionCreationFlag>() noexcept
+{
+    return 4;
+}
+
+//!
+//! \class IBuilder
+//!
+//! \brief Builds an engine from a network definition.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IBuilder : public INoCopy
+{
+public:
+    virtual ~IBuilder() noexcept = default;
+
+    //!
+    //! \brief Determine whether the platform has fast native fp16.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.5. Please query data type support from CUDA directly.
+    //!
+    TRT_DEPRECATED bool platformHasFastFp16() const noexcept
+    {
+        return mImpl->platformHasFastFp16();
+    }
+
+    //!
+    //! \brief Determine whether the platform has fast native int8.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.5. Please query data type support from CUDA directly.
+    //!
+    TRT_DEPRECATED bool platformHasFastInt8() const noexcept
+    {
+        return mImpl->platformHasFastInt8();
+    }
+
+    //!
+    //! \brief Get the maximum batch size DLA can support.
+    //! For any tensor the total volume of index dimensions combined(dimensions other than CHW) with the requested
+    //! batch size should not exceed the value returned by this function.
+    //!
+    //! \warning getMaxDLABatchSize does not work with dynamic shapes.
+    //!
+    int32_t getMaxDLABatchSize() const noexcept
+    {
+        return mImpl->getMaxDLABatchSize();
+    }
+
+    //!
+    //! \brief Return the number of DLA engines available to this builder.
+    //!
+    int32_t getNbDLACores() const noexcept
+    {
+        return mImpl->getNbDLACores();
+    }
+
+    //!
+    //! \brief Set the GPU allocator.
+    //!
+    //! \param allocator Set the GPU allocator to be used by the builder. All GPU memory acquired will use this
+    //! allocator. If NULL is passed, the default allocator will be used.
+    //!
+    //! Default: allocateAsync uses cudaMallocAsync if cudaDevAttrMemoryPoolsSupported returns true, otherwise falls
+    //! back to cudaMalloc. allocate always uses cudaMalloc.
+    //!
+    //! \note This allocator will be passed to any engines created via the builder; thus the lifetime of the allocator
+    //! must span the lifetime of those engines as
+    //! well as that of the builder. If nullptr is passed, the default allocator will be used.
+    //!
+    void setGpuAllocator(IGpuAllocator* allocator) noexcept
+    {
+        mImpl->setGpuAllocator(allocator);
+    }
+
+    //!
+    //! \brief Create a builder configuration object.
+    //!
+    //! The caller owns the new IBuilderConfig, which must be destroyed with operator delete
+    //! before this IBuilder is destroyed. Destroying this IBuilder before destroying the
+    //! IBuilderConfig causes undefined behavior.
+    //!
+    //! \see IBuilderConfig
+    //!
+    nvinfer1::IBuilderConfig* createBuilderConfig() noexcept
+    {
+        return mImpl->createBuilderConfig();
+    }
+
+    //!
+    //! \brief Create a network definition object
+    //!
+    //! Creates a network definition object with immutable properties specified using the flags parameter.
+    //!
+    //! createNetworkV2 supports creating network with properties from NetworkDefinitionCreationFlags.
+    //!
+    //! CreateNetworkV2 supports dynamic shapes and explicit batch dimensions by default.
+    //!
+    //! createNetworkV2 with NetworkDefinitionCreationFlag::kSTRONGLY_TYPED flag supports creating a strongly typed plan
+    //! where tensor data types are inferred from network input types and operator type specification.
+    //!
+    //! The caller owns the new INetworkDefinition, which must be destroyed with operator delete
+    //! before this IBuilder is destroyed. Destroying this IBuilder before destroying the
+    //! INetworkDefinition causes undefined behavior.
+    //!
+    //! \param flags Bitset of NetworkDefinitionCreationFlags specifying network properties combined with bitwise OR,
+    //!              e.g., 1U << NetworkDefinitionCreationFlag::kSTRONGLY_TYPED.
+    //!
+    //! \see INetworkDefinition, NetworkDefinitionCreationFlags
+    //!
+    nvinfer1::INetworkDefinition* createNetworkV2(NetworkDefinitionCreationFlags flags) noexcept
+    {
+        return mImpl->createNetworkV2(flags);
+    }
+
+    //!
+    //! \brief Create a new optimization profile.
+    //!
+    //! If the network has any dynamic input tensors, the appropriate calls to setDimensions() must be made.
+    //! Likewise, if there are any shape input tensors, the appropriate calls to setShapeValues() are required.
+    //! The builder retains ownership of the created optimization profile and returns a raw pointer, i.e. the users
+    //! must not attempt to delete the returned pointer.
+    //!
+    //! \see IOptimizationProfile
+    //!
+    nvinfer1::IOptimizationProfile* createOptimizationProfile() noexcept
+    {
+        return mImpl->createOptimizationProfile();
+    }
+
+    //!
+    //! \brief Set the ErrorRecorder for this interface
+    //!
+    //! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
+    //! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
+    //! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
+    //! a recorder has been registered.
+    //!
+    //! If an error recorder is not set, messages will be sent to the global log stream.
+    //!
+    //! \param recorder The error recorder to register with this interface.
+    //!
+    //! \see getErrorRecorder()
+    //!
+    void setErrorRecorder(IErrorRecorder* recorder) noexcept
+    {
+        mImpl->setErrorRecorder(recorder);
+    }
+
+    //!
+    //! \brief get the ErrorRecorder assigned to this interface.
+    //!
+    //! Retrieves the assigned error recorder object for the given class.
+    //! A nullptr will be returned if setErrorRecorder has not been called.
+    //!
+    //! \return A pointer to the IErrorRecorder object that has been registered.
+    //!
+    //! \see setErrorRecorder()
+    //!
+    IErrorRecorder* getErrorRecorder() const noexcept
+    {
+        return mImpl->getErrorRecorder();
+    }
+
+    //!
+    //! \brief Resets the builder state to default values.
+    //!
+    void reset() noexcept
+    {
+        mImpl->reset();
+    }
+
+    //!
+    //! \brief Determine whether the platform has TF32 support.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.5. Please query data type support from CUDA directly.
+    //!
+    TRT_DEPRECATED bool platformHasTf32() const noexcept
+    {
+        return mImpl->platformHasTf32();
+    }
+
+    //!
+    //! \brief Builds and serializes a network for the given INetworkDefinition and IBuilderConfig.
+    //!
+    //! This function allows building and serialization of a network without creating an engine.
+    //!
+    //! \param network Network definition.
+    //! \param config Builder configuration.
+    //!
+    //! \return A pointer to a IHostMemory object that contains a serialized network.
+    //!
+    //! \note This function will synchronize the CUDA stream returned by \p config.getProfileStream() before returning.
+    //!
+    //! \see INetworkDefinition, IBuilderConfig, IHostMemory
+    //!
+    nvinfer1::IHostMemory* buildSerializedNetwork(INetworkDefinition& network, IBuilderConfig& config) noexcept
+    {
+        return mImpl->buildSerializedNetwork(network, config);
+    }
+
+    //!
+    //! \brief Builds and serializes a network into stream for the given INetworkDefinition and IBuilderConfig.
+    //!
+    //! This function allows building and serialization of a network without creating an engine. The engine is
+    //! finally serialized into the writer stream.
+    //!
+    //! \param network Network definition.
+    //! \param config Builder configuration.
+    //! \param writer Output writer stream.
+    //!
+    //! \return true if build succeed, otherwise false.
+    //!
+    //! \note This function will synchronize the CUDA stream returned by \p config.getProfileStream() before returning.
+    //!
+    //! \see INetworkDefinition, IBuilderConfig, IStreamWriter
+    //!
+    bool buildSerializedNetworkToStream(
+        INetworkDefinition& network, IBuilderConfig& config, IStreamWriter& writer) noexcept
+    {
+        return mImpl->buildSerializedNetworkToStream(network, config, writer);
+    }
+
+    //!
+    //! \brief Builds a network for the given INetworkDefinition and IBuilderConfig.
+    //!
+    //! \param network Network definition.
+    //! \param config Builder configuration.
+    //!
+    //! \return A pointer to a ICudaEngine object that contains an engine.
+    //!
+    //! \note This function will synchronize the CUDA stream returned by \p config.getProfileStream() before returning.
+    //!
+    //! \note This function does not support \p BuilderFlag::kVERSION_COMPATIBLE.
+    //! Please use \p buildSerializedNetwork to get a version compatible engine.
+    //!
+    //! \see INetworkDefinition, IBuilderConfig, ICudaEngine
+    //!
+    nvinfer1::ICudaEngine* buildEngineWithConfig(INetworkDefinition& network, IBuilderConfig& config) noexcept
+    {
+        return mImpl->buildEngineWithConfig(network, config);
+    }
+
+    //!
+    //! \brief Checks that a network is within the scope of the IBuilderConfig settings.
+    //!
+    //! \param network The network definition to check for configuration compliance.
+    //! \param config The configuration of the builder to use when checking \p network.
+    //!
+    //! Given an INetworkDefinition, \p network, and an IBuilderConfig, \p config, check if
+    //! the network falls within the constraints of the builder configuration based on the
+    //! EngineCapability, BuilderFlag, and DeviceType. If the network is within the constraints,
+    //! then the function returns true, and false if a violation occurs. This function reports
+    //! the conditions that are violated to the registered ErrorRecorder.
+    //!
+    //! \return True if network is within the scope of the restrictions specified by the builder config,
+    //! false otherwise.
+    //!
+    //! \note This function will synchronize the CUDA stream returned by \p config.getProfileStream() before returning.
+    //!
+    bool isNetworkSupported(INetworkDefinition const& network, IBuilderConfig const& config) const noexcept
+    {
+        return mImpl->isNetworkSupported(network, config);
+    }
+
+    //!
+    //! \brief get the logger with which the builder was created
+    //!
+    //! \return the logger
+    //!
+    ILogger* getLogger() const noexcept
+    {
+        return mImpl->getLogger();
+    }
+
+    //!
+    //! \brief Set the maximum number of threads.
+    //!
+    //! \param maxThreads The maximum number of threads that can be used by the builder.
+    //!
+    //! \return True if successful, false otherwise.
+    //!
+    //! The default value is 1 and includes the current thread.
+    //! A value greater than 1 permits TensorRT to use multi-threaded algorithms.
+    //! A value less than 1 triggers a kINVALID_ARGUMENT error.
+    //!
+    bool setMaxThreads(int32_t maxThreads) noexcept
+    {
+        return mImpl->setMaxThreads(maxThreads);
+    }
+
+    //!
+    //! \brief get the maximum number of threads that can be used by the builder.
+    //!
+    //! Retrieves the maximum number of threads that can be used by the builder.
+    //!
+    //! \return The maximum number of threads that can be used by the builder.
+    //!
+    //! \see setMaxThreads()
+    //!
+    int32_t getMaxThreads() const noexcept
+    {
+        return mImpl->getMaxThreads();
+    }
+
+    //!
+    //! \brief get the local plugin registry that can be used by the builder.
+    //!
+    //! \return The local plugin registry that can be used by the builder.
+    //!
+    IPluginRegistry& getPluginRegistry() noexcept
+    {
+        return mImpl->getPluginRegistry();
+    }
+
+protected:
+    apiv::VBuilder* mImpl;
+};
+
+} // namespace nvinfer1
+
+//!
+//! Internal C entry point for creating IBuilder.
+//! @private
+//!
+extern "C" TENSORRTAPI void* createInferBuilder_INTERNAL(void* logger, int32_t version) noexcept;
+
+namespace nvinfer1
+{
+namespace
+{
+
+//!
+//! \brief Create an instance of an IBuilder class.
+//!
+//! \param logger The logging class for the builder.
+//!
+//! unnamed namespace avoids linkage surprises when linking objects built with different versions of this header.
+//!
+inline IBuilder* createInferBuilder(ILogger& logger) noexcept
+{
+    return static_cast<IBuilder*>(createInferBuilder_INTERNAL(&logger, NV_TENSORRT_VERSION));
+}
+
+} // namespace
+
+//!
+//! \brief Return the plugin registry for building a Standard engine, or nullptr if no registry exists.
+//!
+//! Also return nullptr if the input argument is not EngineCapability::kSTANDARD.
+//! Engine capabilities EngineCapability::kSTANDARD and EngineCapability::kSAFETY have distinct plugin registries.
+//! Use IPluginRegistry::registerCreator from the registry to register plugins.
+//! Plugins registered in a registry associated with a specific engine capability are only available when
+//! building engines with that engine capability.
+//!
+//! There is no plugin registry for EngineCapability::kDLA_STANDALONE.
+//!
+extern "C" TENSORRTAPI nvinfer1::IPluginRegistry* getBuilderPluginRegistry(
+    nvinfer1::EngineCapability capability) noexcept;
+
+namespace safe
+{
+//! Forward declaration
+class IPluginRegistry;
+} // namespace safe
+
+//!
+//! \brief Return the plugin registry for building a Safety engine, or nullptr if no registry exists.
+//!
+//! Also return nullptr if the input argument is not EngineCapability::kSAFETY.
+//! When building a Standard engine, use nvinfer1::getBuilderPluginRegistry().
+//! Use safe::IPluginRegistry::registerCreator from the registry to register plugins.
+//!
+extern "C" TRT_DEPRECATED_API nvinfer1::safe::IPluginRegistry* getBuilderSafePluginRegistry(
+    nvinfer1::EngineCapability capability) noexcept;
+
+} // namespace nvinfer1
+
+#endif // NV_INFER_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferImpl.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferImpl.h
new file mode 100644
index 0000000000000000000000000000000000000000..3da6aed13ddb28cefda0372a8a1e94f9306ed4b2
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferImpl.h
@@ -0,0 +1,1325 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NV_INFER_IMPL_H
+#define NV_INFER_IMPL_H
+
+#include "NvInferLegacyDims.h"
+#include "NvInferRuntimeCommon.h"
+
+// @cond SuppressDoxyWarnings
+
+namespace nvinfer1
+{
+
+class ILogger;
+
+namespace v_1_0
+{
+class IProgressMonitor;
+} // namespace v_1_0
+using IProgressMonitor = v_1_0::IProgressMonitor;
+
+namespace v_1_0
+{
+class IAlgorithmSelector;
+} // namespace v_1_0
+using IAlgorithmSelector = v_1_0::IAlgorithmSelector;
+
+namespace v_1_0
+{
+class IProfiler;
+} // namespace v_1_0
+using IProfiler = v_1_0::IProfiler;
+
+namespace v_1_0
+{
+class IOutputAllocator;
+} // namespace v_1_0
+using IOutputAllocator = v_1_0::IOutputAllocator;
+
+namespace v_1_0
+{
+class IDebugListener;
+} // namespace v_1_0
+using IDebugListener = v_1_0::IDebugListener;
+
+class IActivationLayer;
+class IAlgorithm;
+class IAlgorithmContext;
+class IAlgorithmIOInfo;
+class IAlgorithmVariant;
+class IAssertionLayer;
+class IBuilder;
+class IBuilderConfig;
+class IConcatenationLayer;
+class IConditionLayer;
+class IConstantLayer;
+class IConvolutionLayer;
+class ICudaEngine;
+class ICumulativeLayer;
+class IDeconvolutionLayer;
+class IDequantizeLayer;
+class IDimensionExpr;
+class IDynamicQuantizeLayer;
+class IEinsumLayer;
+class IElementWiseLayer;
+class IEngineInspector;
+class IExecutionContext;
+class IFillLayer;
+class IGatherLayer;
+class IGridSampleLayer;
+class IHostMemory;
+class IIdentityLayer;
+class ICastLayer;
+class IIfConditional;
+class IIfConditionalInputLayer;
+class IIfConditionalOutputLayer;
+class IInt8Calibrator;
+class IIteratorLayer;
+class ILayer;
+class ILoop;
+class ILoopOutputLayer;
+class ILRNLayer;
+class IMatrixMultiplyLayer;
+class INetworkDefinition;
+class INormalizationLayer;
+class INMSLayer;
+class INonZeroLayer;
+class IOneHotLayer;
+class IOptimizationProfile;
+class IPaddingLayer;
+class IParametricReLULayer;
+class IPlugin;
+class IPluginExt;
+class IPluginFactory;
+class IPluginLayer;
+class IPluginRegistry;
+class IPluginV2Layer;
+class IRuntimeConfig;
+
+namespace v_1_0
+{
+class IPluginV3;
+} // namespace v_1_0
+using IPluginV3 = v_1_0::IPluginV3;
+
+namespace v_1_0
+{
+class IStreamReader;
+class IStreamWriter;
+} // namespace v_1_0
+using IStreamReader = v_1_0::IStreamReader;
+using IStreamWriter = v_1_0::IStreamWriter;
+namespace v_1_0
+{
+class IStreamReaderV2;
+} // namespace v_1_0
+using IStreamReaderV2 = v_1_0::IStreamReaderV2;
+
+class IPluginV3Layer;
+class IPoolingLayer;
+class IQuantizeLayer;
+class IRaggedSoftMaxLayer;
+class IRecurrenceLayer;
+class IReduceLayer;
+class IRefitter;
+class IResizeLayer;
+class IReverseSequenceLayer;
+class IRuntime;
+class IScaleLayer;
+class IScatterLayer;
+class ISelectLayer;
+class ISerializationConfig;
+class IShapeLayer;
+class IShuffleLayer;
+class ISliceLayer;
+class ISoftMaxLayer;
+class ISqueezeLayer;
+class ITensor;
+
+namespace v_1_0
+{
+struct TimingCacheKey;
+struct TimingCacheValue;
+} // namespace v_1_0
+using TimingCacheKey = v_1_0::TimingCacheKey;
+using TimingCacheValue = v_1_0::TimingCacheValue;
+
+class ITimingCache;
+class ITopKLayer;
+class ITripLimitLayer;
+class IUnaryLayer;
+class IUnsqueezeLayer;
+struct Permutation;
+class Weights;
+
+enum class ActivationType : int32_t;
+enum class BoundingBoxFormat : int32_t;
+enum class BuilderFlag : int32_t;
+enum class CalibrationAlgoType : int32_t;
+enum class CumulativeOperation : int32_t;
+enum class DeviceType : int32_t;
+enum class DimensionOperation : int32_t;
+enum class ElementWiseOperation : int32_t;
+enum class EngineCapability : int32_t;
+enum class FillOperation : int32_t;
+enum class GatherMode : int32_t;
+enum class LayerInformationFormat : int32_t;
+enum class LayerType : int32_t;
+enum class LoopOutput : int32_t;
+enum class MatrixOperation : int32_t;
+enum class MemoryPoolType : int32_t;
+enum class NetworkDefinitionCreationFlag : int32_t;
+enum class OptProfileSelector : int32_t;
+enum class PaddingMode : int32_t;
+enum class PoolingType : int32_t;
+enum class ProfilingVerbosity : int32_t;
+enum class QuantizationFlag : int32_t;
+enum class ReduceOperation : int32_t;
+enum class ResizeCoordinateTransformation : int32_t;
+enum class InterpolationMode : int32_t;
+enum class ResizeRoundMode : int32_t;
+enum class ResizeSelector : int32_t;
+enum class ScaleMode : int32_t;
+enum class ScatterMode : int32_t;
+enum class SampleMode : int32_t;
+enum class SerializationFlag : int32_t;
+enum class TensorIOMode : int32_t;
+enum class TensorLocation : int32_t;
+enum class TopKOperation : int32_t;
+enum class TripLimit : int32_t;
+enum class UnaryOperation : int32_t;
+enum class WeightsRole : int32_t;
+enum class PreviewFeature : int32_t;
+enum class HardwareCompatibilityLevel : int32_t;
+enum class ExecutionContextAllocationStrategy : int32_t;
+enum class RuntimePlatform : int32_t;
+enum class TilingOptimizationLevel : int32_t;
+
+
+using TacticSources = uint32_t;
+using TensorFormats = uint32_t;
+using BuilderFlags = uint32_t;
+using NetworkDefinitionCreationFlags = uint32_t;
+using QuantizationFlags = uint32_t;
+using TempfileControlFlags = uint32_t;
+using SerializationFlags = uint32_t;
+
+//!
+//! \file NvInferImpl.h
+//!
+//! This file contains definitions for API methods that cross the shared library boundary. These
+//! methods must not be called directly by applications; they should only be called through the
+//! API classes.
+//!
+
+namespace apiv
+{
+
+class VRoot
+{
+public:
+    virtual ~VRoot() noexcept = default;
+};
+
+class VHostMemory : public VRoot
+{
+public:
+    virtual void* data() const noexcept = 0;
+    virtual std::size_t size() const noexcept = 0;
+    virtual DataType type() const noexcept = 0;
+};
+
+class VDimensionExpr : public VRoot
+{
+public:
+    virtual bool isConstant() const = 0;
+    virtual int64_t getConstantValue() const = 0;
+    virtual bool isSizeTensor() const = 0;
+};
+
+class VExprBuilder : public VRoot
+{
+public:
+    virtual IDimensionExpr const* constant(int64_t value) = 0;
+    virtual IDimensionExpr const* operation(
+        DimensionOperation op, IDimensionExpr const& first, IDimensionExpr const& second)
+        = 0;
+    virtual IDimensionExpr const* declareSizeTensor(
+        int32_t outputIndex, IDimensionExpr const& opt, IDimensionExpr const& upper)
+        = 0;
+};
+
+class VRuntime : public VRoot
+{
+public:
+    virtual IRuntime* getPImpl() noexcept = 0;
+    virtual nvinfer1::ICudaEngine* deserializeCudaEngine(void const* blob, std::size_t size) noexcept = 0;
+    virtual nvinfer1::ICudaEngine* deserializeCudaEngine(IStreamReader& streamReader) noexcept = 0;
+    virtual void setDLACore(int32_t dlaCore) noexcept = 0;
+    virtual int32_t getDLACore() const noexcept = 0;
+    virtual int32_t getNbDLACores() const noexcept = 0;
+    virtual void setGpuAllocator(IGpuAllocator* allocator) noexcept = 0;
+    virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
+    virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
+    virtual ILogger* getLogger() const noexcept = 0;
+    virtual bool setMaxThreads(int32_t maxThreads) noexcept = 0;
+    virtual int32_t getMaxThreads() const noexcept = 0;
+    virtual void setTemporaryDirectory(char const*) noexcept = 0;
+    virtual char const* getTemporaryDirectory() const noexcept = 0;
+    virtual void setTempfileControlFlags(TempfileControlFlags) noexcept = 0;
+    virtual TempfileControlFlags getTempfileControlFlags() const noexcept = 0;
+    virtual IPluginRegistry& getPluginRegistry() noexcept = 0;
+    virtual void setPluginRegistryParent(IPluginRegistry* parent) noexcept = 0;
+    virtual IRuntime* loadRuntime(char const* path) noexcept = 0;
+    virtual void setEngineHostCodeAllowed(bool allowed) noexcept = 0;
+    virtual bool getEngineHostCodeAllowed() const noexcept = 0;
+    // Added in TensorRT version 10.7
+    virtual nvinfer1::ICudaEngine* deserializeCudaEngineV2(IStreamReaderV2& streamReader) noexcept = 0;
+};
+
+class VRefitter : public VRoot
+{
+public:
+    virtual IRefitter* getPImpl() noexcept = 0;
+    virtual bool setWeights(char const* layerName, WeightsRole role, const Weights weights) noexcept = 0;
+    virtual bool refitCudaEngine() noexcept = 0;
+    virtual int32_t getMissing(int32_t size, char const** layerNames, WeightsRole* roles) noexcept = 0;
+    virtual int32_t getAll(int32_t size, char const** layerNames, WeightsRole* roles) noexcept = 0;
+    virtual bool setDynamicRange(char const* tensorName, float min, float max) noexcept = 0;
+    virtual float getDynamicRangeMin(char const* tensorName) const noexcept = 0;
+    virtual float getDynamicRangeMax(char const* tensorName) const noexcept = 0;
+    virtual int32_t getTensorsWithDynamicRange(int32_t size, char const** tensorNames) const noexcept = 0;
+    virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
+    virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
+    virtual bool setNamedWeights(char const* name, Weights weights) noexcept = 0;
+    virtual int32_t getMissingWeights(int32_t size, char const** weightsNames) noexcept = 0;
+    virtual int32_t getAllWeights(int32_t size, char const** weightsNames) noexcept = 0;
+    virtual ILogger* getLogger() const noexcept = 0;
+    virtual bool setMaxThreads(int32_t maxThreads) noexcept = 0;
+    virtual int32_t getMaxThreads() const noexcept = 0;
+    virtual bool setNamedWeightsWithLocation(char const* name, Weights weights, TensorLocation location) noexcept = 0;
+    virtual Weights getNamedWeights(char const* weightsName) const noexcept = 0;
+    virtual TensorLocation getWeightsLocation(char const* weightsName) const noexcept = 0;
+    virtual bool unsetNamedWeights(char const* weightsName) noexcept = 0;
+    virtual void setWeightsValidation(bool weightsValidation) noexcept = 0;
+    virtual bool getWeightsValidation() const noexcept = 0;
+    virtual bool refitCudaEngineAsync(cudaStream_t stream) noexcept = 0;
+    virtual Weights getWeightsPrototype(char const* weightsName) const noexcept = 0;
+};
+
+class VOptimizationProfile : public VRoot
+{
+public:
+    virtual bool setDimensions(char const* inputName, OptProfileSelector select, Dims const& dims) noexcept = 0;
+    virtual Dims getDimensions(char const* inputName, OptProfileSelector select) const noexcept = 0;
+    virtual bool setShapeValues(
+        char const* inputName, OptProfileSelector select, int32_t const* values, int32_t nbValues) noexcept = 0;
+    virtual int32_t getNbShapeValues(char const* inputName) const noexcept = 0;
+    virtual int32_t const* getShapeValues(char const* inputName, OptProfileSelector select) const noexcept = 0;
+    virtual bool setExtraMemoryTarget(float target) noexcept = 0;
+    virtual float getExtraMemoryTarget() const noexcept = 0;
+    virtual bool isValid() const noexcept = 0;
+    // Added in TensorRT 10.11
+    TRT_NODISCARD virtual bool setShapeValuesV2(
+        char const* inputName, OptProfileSelector select, int64_t const* values, int32_t nbValues) noexcept = 0;
+    TRT_NODISCARD virtual int64_t const* getShapeValuesV2(
+        char const* inputName, OptProfileSelector select) const noexcept = 0;
+};
+
+class VCudaEngine : public VRoot
+{
+public:
+    virtual ICudaEngine* getPImpl() noexcept = 0;
+    virtual int32_t getNbLayers() const noexcept = 0;
+    virtual IHostMemory* serialize() const noexcept = 0;
+    virtual IExecutionContext* createExecutionContext(ExecutionContextAllocationStrategy strategy) noexcept = 0;
+    virtual IExecutionContext* createExecutionContextWithoutDeviceMemory() noexcept = 0;
+    virtual size_t getDeviceMemorySize() const noexcept = 0;
+    virtual bool isRefittable() const noexcept = 0;
+    virtual char const* getName() const noexcept = 0;
+    virtual int32_t getNbOptimizationProfiles() const noexcept = 0;
+    virtual int32_t const* getProfileTensorValues(
+        char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept = 0;
+    virtual EngineCapability getEngineCapability() const noexcept = 0;
+    virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
+    virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
+    virtual bool hasImplicitBatchDimension() const noexcept = 0;
+    virtual TacticSources getTacticSources() const noexcept = 0;
+    virtual ProfilingVerbosity getProfilingVerbosity() const noexcept = 0;
+    virtual IEngineInspector* createEngineInspector() const noexcept = 0;
+    virtual Dims getTensorShape(char const* tensorName) const noexcept = 0;
+    virtual DataType getTensorDataType(char const* tensorName) const noexcept = 0;
+    virtual TensorLocation getTensorLocation(char const* tensorName) const noexcept = 0;
+    virtual bool isShapeInferenceIO(char const* tensorName) const noexcept = 0;
+    virtual TensorIOMode getTensorIOMode(char const* tensorName) const noexcept = 0;
+    virtual int32_t getTensorBytesPerComponent(char const* tensorName) const noexcept = 0;
+    virtual int32_t getTensorComponentsPerElement(char const* tensorName) const noexcept = 0;
+    virtual TensorFormat getTensorFormat(char const* tensorName) const noexcept = 0;
+    virtual char const* getTensorFormatDesc(char const* tensorName) const noexcept = 0;
+    virtual int32_t getTensorVectorizedDim(char const* tensorName) const noexcept = 0;
+    virtual Dims getProfileShape(
+        char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept = 0;
+    virtual int32_t getNbIOTensors() const noexcept = 0;
+    virtual char const* getIOTensorName(int32_t index) const noexcept = 0;
+    virtual HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept = 0;
+    virtual int32_t getNbAuxStreams() const noexcept = 0;
+
+    virtual int32_t getTensorBytesPerComponentV2(char const* tensorName, int32_t profileIndex) const noexcept = 0;
+    virtual int32_t getTensorComponentsPerElementV2(char const* tensorName, int32_t profileIndex) const noexcept = 0;
+    virtual TensorFormat getTensorFormatV2(char const* tensorName, int32_t profileIndex) const noexcept = 0;
+    virtual char const* getTensorFormatDescV2(char const* tensorName, int32_t profileIndex) const noexcept = 0;
+    virtual int32_t getTensorVectorizedDimV2(char const* tensorName, int32_t profileIndex) const noexcept = 0;
+
+    virtual ISerializationConfig* createSerializationConfig() noexcept = 0;
+    virtual IHostMemory* serializeWithConfig(ISerializationConfig& config) const noexcept = 0;
+
+    virtual size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept = 0;
+    virtual IRefitter* createRefitter(ILogger& logger) noexcept = 0;
+
+    virtual bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept = 0;
+    virtual int64_t getWeightStreamingBudget() const noexcept = 0;
+    virtual int64_t getMinimumWeightStreamingBudget() const noexcept = 0;
+    virtual int64_t getStreamableWeightsSize() const noexcept = 0;
+
+    virtual bool isDebugTensor(char const* name) const noexcept = 0;
+
+    // Added in TensorRT 10.1
+    virtual bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept = 0;
+    virtual int64_t getWeightStreamingBudgetV2() const noexcept = 0;
+    virtual int64_t getWeightStreamingAutomaticBudget() const noexcept = 0;
+    virtual int64_t getWeightStreamingScratchMemorySize() const noexcept = 0;
+    virtual int64_t getDeviceMemorySizeV2() const noexcept = 0;
+    virtual int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept = 0;
+    // Added in TensorRT 10.11
+    TRT_NODISCARD virtual int64_t const* getProfileTensorValuesV2(
+        char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept = 0;
+    TRT_NODISCARD virtual IExecutionContext* createExecutionContextWithRuntimeConfig(
+        IRuntimeConfig* runtimeConfig) noexcept = 0;
+    TRT_NODISCARD virtual IRuntimeConfig* createRuntimeConfig() noexcept = 0;
+};
+
+class VExecutionContext : public VRoot
+{
+public:
+    virtual IExecutionContext* getPImpl() noexcept = 0;
+    virtual void setDebugSync(bool sync) noexcept = 0;
+    virtual bool getDebugSync() const noexcept = 0;
+    virtual void setProfiler(IProfiler*) noexcept = 0;
+    virtual IProfiler* getProfiler() const noexcept = 0;
+    virtual ICudaEngine const& getEngine() const noexcept = 0;
+    virtual void setName(char const* name) noexcept = 0;
+    virtual char const* getName() const noexcept = 0;
+    virtual void setDeviceMemory(void* memory) noexcept = 0;
+    virtual int32_t getOptimizationProfile() const noexcept = 0;
+    virtual bool allInputDimensionsSpecified() const noexcept = 0;
+    virtual bool allInputShapesSpecified() const noexcept = 0;
+    virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
+    virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
+    virtual bool executeV2(void* const* bindings) noexcept = 0;
+    virtual bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept = 0;
+    virtual void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept = 0;
+    virtual bool getEnqueueEmitsProfile() const noexcept = 0;
+    virtual bool reportToProfiler() const noexcept = 0;
+    virtual bool setInputShape(char const* tensorName, Dims const& dims) noexcept = 0;
+    virtual Dims getTensorShape(char const* tensorName) const noexcept = 0;
+    virtual Dims getTensorStrides(char const* tensorName) const noexcept = 0;
+    virtual bool setTensorAddress(char const* tensorName, void* data) noexcept = 0;
+    virtual void const* getTensorAddress(char const* tensorName) const noexcept = 0;
+    virtual bool setInputTensorAddress(char const* tensorName, void const* data) noexcept = 0;
+    virtual bool setOutputTensorAddress(char const* tensorName, void* data) noexcept = 0;
+    virtual int32_t inferShapes(int32_t nbMaxNames, char const** tensorNames) noexcept = 0;
+    virtual bool setInputConsumedEvent(cudaEvent_t event) noexcept = 0;
+    virtual cudaEvent_t getInputConsumedEvent() const noexcept = 0;
+    virtual void* getOutputTensorAddress(char const* tensorName) const noexcept = 0;
+    virtual bool setOutputAllocator(char const* tensorName, IOutputAllocator* outputAllocator) noexcept = 0;
+    virtual IOutputAllocator* getOutputAllocator(char const* name) noexcept = 0;
+    virtual int64_t getMaxOutputSize(char const* tensorName) const noexcept = 0;
+    virtual bool setTemporaryStorageAllocator(IGpuAllocator* allocator) noexcept = 0;
+    virtual IGpuAllocator* getTemporaryStorageAllocator() const noexcept = 0;
+    virtual bool enqueueV3(cudaStream_t stream) noexcept = 0;
+    virtual void setPersistentCacheLimit(size_t size) noexcept = 0;
+    virtual size_t getPersistentCacheLimit() const noexcept = 0;
+    virtual bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept = 0;
+    virtual ProfilingVerbosity getNvtxVerbosity() const noexcept = 0;
+    virtual void setAuxStreams(cudaStream_t* auxStreams, int32_t nbStreams) noexcept = 0;
+    virtual bool setDebugListener(IDebugListener* listener) noexcept = 0;
+    virtual IDebugListener* getDebugListener() noexcept = 0;
+    virtual bool setTensorDebugState(char const* name, bool flag) noexcept = 0;
+    virtual bool getDebugState(char const* name) const noexcept = 0;
+    virtual bool setAllTensorsDebugState(bool flag) noexcept = 0;
+    virtual size_t updateDeviceMemorySizeForShapes() noexcept = 0;
+    virtual void setDeviceMemoryV2(void* memory, int64_t size) noexcept = 0;
+    TRT_NODISCARD virtual IRuntimeConfig* getRuntimeConfig() const noexcept = 0;
+    virtual bool setUnfusedTensorsDebugState(bool flag) noexcept = 0;
+    virtual bool getUnfusedTensorsDebugState() const noexcept = 0;
+};
+
+class VEngineInspector : public VRoot
+{
+public:
+    virtual IEngineInspector* getPImpl() noexcept = 0;
+    virtual bool setExecutionContext(IExecutionContext const* context) noexcept = 0;
+    virtual IExecutionContext const* getExecutionContext() const noexcept = 0;
+    virtual char const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept = 0;
+    virtual char const* getEngineInformation(LayerInformationFormat format) const noexcept = 0;
+    virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
+    virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
+};
+
+class VTensor : public VRoot
+{
+public:
+    virtual void setName(char const* name) noexcept = 0;
+    virtual char const* getName() const noexcept = 0;
+    virtual void setDimensions(Dims const& dimensions) noexcept = 0;
+    virtual Dims getDimensions() const noexcept = 0;
+    virtual void setType(DataType type) noexcept = 0;
+    virtual DataType getType() const noexcept = 0;
+    virtual bool setDynamicRange(float min, float max) noexcept = 0;
+    virtual bool isNetworkInput() const noexcept = 0;
+    virtual bool isNetworkOutput() const noexcept = 0;
+    virtual void setBroadcastAcrossBatch(bool broadcastAcrossBatch) noexcept = 0;
+    virtual bool getBroadcastAcrossBatch() const noexcept = 0;
+    virtual TensorLocation getLocation() const noexcept = 0;
+    virtual void setLocation(TensorLocation location) noexcept = 0;
+    virtual bool dynamicRangeIsSet() const noexcept = 0;
+    virtual void resetDynamicRange() noexcept = 0;
+    virtual float getDynamicRangeMin() const noexcept = 0;
+    virtual float getDynamicRangeMax() const noexcept = 0;
+    virtual void setAllowedFormats(TensorFormats formats) noexcept = 0;
+    virtual TensorFormats getAllowedFormats() const noexcept = 0;
+    virtual bool isShapeTensor() const noexcept = 0;
+    virtual bool isExecutionTensor() const noexcept = 0;
+    virtual void setDimensionName(int32_t index, char const* name) noexcept = 0;
+    virtual char const* getDimensionName(int32_t index) const noexcept = 0;
+};
+
+class VLayer : public VRoot
+{
+public:
+    virtual LayerType getType() const noexcept = 0;
+    virtual void setName(char const* name) noexcept = 0;
+    virtual char const* getName() const noexcept = 0;
+    virtual int32_t getNbInputs() const noexcept = 0;
+    virtual ITensor* getInput(int32_t index) const noexcept = 0;
+    virtual int32_t getNbOutputs() const noexcept = 0;
+    virtual ITensor* getOutput(int32_t index) const noexcept = 0;
+    virtual void setInput(int32_t index, ITensor& tensor) noexcept = 0;
+    virtual void setPrecision(DataType dataType) noexcept = 0;
+    virtual DataType getPrecision() const noexcept = 0;
+    virtual bool precisionIsSet() const noexcept = 0;
+    virtual void resetPrecision() noexcept = 0;
+    virtual void setOutputType(int32_t index, DataType dataType) noexcept = 0;
+    virtual DataType getOutputType(int32_t index) const noexcept = 0;
+    virtual bool outputTypeIsSet(int32_t index) const noexcept = 0;
+    virtual void resetOutputType(int32_t index) noexcept = 0;
+    virtual void setMetadata(char const* docString) noexcept = 0;
+    virtual char const* getMetadata() const noexcept = 0;
+};
+
+class VConvolutionLayer : public VRoot
+{
+public:
+    virtual void setNbOutputMaps(int64_t nbOutputMaps) noexcept = 0;
+    virtual int64_t getNbOutputMaps() const noexcept = 0;
+    virtual void setNbGroups(int64_t nbGroups) noexcept = 0;
+    virtual int64_t getNbGroups() const noexcept = 0;
+    virtual void setKernelWeights(Weights weights) noexcept = 0;
+    virtual Weights getKernelWeights() const noexcept = 0;
+    virtual void setBiasWeights(Weights weights) noexcept = 0;
+    virtual Weights getBiasWeights() const noexcept = 0;
+    virtual void setPrePadding(Dims const&  padding) noexcept = 0;
+    virtual Dims getPrePadding() const noexcept = 0;
+    virtual void setPostPadding(Dims const& padding) noexcept = 0;
+    virtual Dims getPostPadding() const noexcept = 0;
+    virtual void setPaddingMode(PaddingMode paddingMode) noexcept = 0;
+    virtual PaddingMode getPaddingMode() const noexcept = 0;
+    virtual void setKernelSizeNd(Dims const& kernelSize) noexcept = 0;
+    virtual Dims getKernelSizeNd() const noexcept = 0;
+    virtual void setStrideNd(Dims const& stride) noexcept = 0;
+    virtual Dims getStrideNd() const noexcept = 0;
+    virtual void setPaddingNd(Dims const& padding) noexcept = 0;
+    virtual Dims getPaddingNd() const noexcept = 0;
+    virtual void setDilationNd(Dims const& dilation) noexcept = 0;
+    virtual Dims getDilationNd() const noexcept = 0;
+};
+
+class VActivationLayer : public VRoot
+{
+public:
+    virtual void setActivationType(ActivationType type) noexcept = 0;
+    virtual ActivationType getActivationType() const noexcept = 0;
+    virtual void setAlpha(float alpha) noexcept = 0;
+    virtual void setBeta(float beta) noexcept = 0;
+    virtual float getAlpha() const noexcept = 0;
+    virtual float getBeta() const noexcept = 0;
+};
+
+class VPoolingLayer : public VRoot
+{
+public:
+    virtual void setPoolingType(PoolingType type) noexcept = 0;
+    virtual PoolingType getPoolingType() const noexcept = 0;
+    virtual void setBlendFactor(float blendFactor) noexcept = 0;
+    virtual float getBlendFactor() const noexcept = 0;
+    virtual void setAverageCountExcludesPadding(bool exclusive) noexcept = 0;
+    virtual bool getAverageCountExcludesPadding() const noexcept = 0;
+    virtual void setPrePadding(Dims const& padding) noexcept = 0;
+    virtual Dims getPrePadding() const noexcept = 0;
+    virtual void setPostPadding(Dims const& padding) noexcept = 0;
+    virtual Dims getPostPadding() const noexcept = 0;
+    virtual void setPaddingMode(PaddingMode paddingMode) noexcept = 0;
+    virtual PaddingMode getPaddingMode() const noexcept = 0;
+    virtual void setWindowSizeNd(Dims const& windowSize) noexcept = 0;
+    virtual Dims getWindowSizeNd() const noexcept = 0;
+    virtual void setStrideNd(Dims const& stride) noexcept = 0;
+    virtual Dims getStrideNd() const noexcept = 0;
+    virtual void setPaddingNd(Dims const& padding) noexcept = 0;
+    virtual Dims getPaddingNd() const noexcept = 0;
+};
+
+class VLRNLayer : public VRoot
+{
+public:
+    virtual void setWindowSize(int64_t windowSize) noexcept = 0;
+    virtual int64_t getWindowSize() const noexcept = 0;
+    virtual void setAlpha(float alpha) noexcept = 0;
+    virtual float getAlpha() const noexcept = 0;
+    virtual void setBeta(float beta) noexcept = 0;
+    virtual float getBeta() const noexcept = 0;
+    virtual void setK(float k) noexcept = 0;
+    virtual float getK() const noexcept = 0;
+};
+
+class VScaleLayer : public VRoot
+{
+public:
+    virtual void setMode(ScaleMode mode) noexcept = 0;
+    virtual ScaleMode getMode() const noexcept = 0;
+    virtual void setShift(Weights shift) noexcept = 0;
+    virtual Weights getShift() const noexcept = 0;
+    virtual void setScale(Weights scale) noexcept = 0;
+    virtual Weights getScale() const noexcept = 0;
+    virtual void setPower(Weights power) noexcept = 0;
+    virtual Weights getPower() const noexcept = 0;
+    virtual int32_t getChannelAxis() const noexcept = 0;
+    virtual void setChannelAxis(int32_t channelAxis) noexcept = 0;
+};
+
+class VSoftMaxLayer : public VRoot
+{
+public:
+    virtual void setAxes(uint32_t axes) noexcept = 0;
+    virtual uint32_t getAxes() const noexcept = 0;
+};
+
+class VConcatenationLayer : public VRoot
+{
+public:
+    virtual void setAxis(int32_t axis) noexcept = 0;
+    virtual int32_t getAxis() const noexcept = 0;
+};
+
+class VDeconvolutionLayer : public VRoot
+{
+public:
+    virtual void setNbOutputMaps(int64_t nbOutputMaps) noexcept = 0;
+    virtual int64_t getNbOutputMaps() const noexcept = 0;
+    virtual void setNbGroups(int64_t nbGroups) noexcept = 0;
+    virtual int64_t getNbGroups() const noexcept = 0;
+    virtual void setKernelWeights(Weights weights) noexcept = 0;
+    virtual Weights getKernelWeights() const noexcept = 0;
+    virtual void setBiasWeights(Weights weights) noexcept = 0;
+    virtual Weights getBiasWeights() const noexcept = 0;
+    virtual void setPrePadding(Dims const& padding) noexcept = 0;
+    virtual Dims getPrePadding() const noexcept = 0;
+    virtual void setPostPadding(Dims const& padding) noexcept = 0;
+    virtual Dims getPostPadding() const noexcept = 0;
+    virtual void setPaddingMode(PaddingMode paddingMode) noexcept = 0;
+    virtual PaddingMode getPaddingMode() const noexcept = 0;
+    virtual void setKernelSizeNd(Dims const& kernelSize) noexcept = 0;
+    virtual Dims getKernelSizeNd() const noexcept = 0;
+    virtual void setStrideNd(Dims const& stride) noexcept = 0;
+    virtual Dims getStrideNd() const noexcept = 0;
+    virtual void setPaddingNd(Dims const& padding) noexcept = 0;
+    virtual Dims getPaddingNd() const noexcept = 0;
+    virtual void setDilationNd(Dims const& dilation) noexcept = 0;
+    virtual Dims getDilationNd() const noexcept = 0;
+};
+
+class VElementWiseLayer : public VRoot
+{
+public:
+    virtual void setOperation(ElementWiseOperation op) noexcept = 0;
+    virtual ElementWiseOperation getOperation() const noexcept = 0;
+};
+
+class VGatherLayer : public VRoot
+{
+public:
+    virtual void setGatherAxis(int32_t axis) noexcept = 0;
+    virtual int32_t getGatherAxis() const noexcept = 0;
+    virtual void setNbElementWiseDims(int32_t k) noexcept = 0;
+    virtual int32_t getNbElementWiseDims() const noexcept = 0;
+    virtual void setMode(GatherMode mode) noexcept = 0;
+    virtual GatherMode getMode() const noexcept = 0;
+};
+
+class VPluginLayer : public VRoot
+{
+public:
+    virtual IPlugin& getPlugin() noexcept = 0;
+};
+
+class VPluginV2Layer : public VRoot
+{
+public:
+    virtual IPluginV2& getPlugin() noexcept = 0;
+};
+
+class VPluginV3Layer : public VRoot
+{
+public:
+    virtual IPluginV3& getPlugin() noexcept = 0;
+};
+
+class VUnaryLayer : public VRoot
+{
+public:
+    virtual void setOperation(UnaryOperation op) noexcept = 0;
+    virtual UnaryOperation getOperation() const noexcept = 0;
+};
+
+class VReduceLayer : public VRoot
+{
+public:
+    virtual void setOperation(ReduceOperation op) noexcept = 0;
+    virtual ReduceOperation getOperation() const noexcept = 0;
+    virtual void setReduceAxes(uint32_t reduceAxes) noexcept = 0;
+    virtual uint32_t getReduceAxes() const noexcept = 0;
+    virtual void setKeepDimensions(bool keepDimensions) noexcept = 0;
+    virtual bool getKeepDimensions() const noexcept = 0;
+};
+
+class VPaddingLayer : public VRoot
+{
+public:
+    virtual void setPrePaddingNd(Dims const& padding) noexcept = 0;
+    virtual Dims getPrePaddingNd() const noexcept = 0;
+    virtual void setPostPaddingNd(Dims const& padding) noexcept = 0;
+    virtual Dims getPostPaddingNd() const noexcept = 0;
+};
+
+class VShuffleLayer : public VRoot
+{
+public:
+    virtual void setFirstTranspose(Permutation const& permutation) noexcept = 0;
+    virtual Permutation const& getFirstTranspose() const noexcept = 0;
+    virtual void setReshapeDimensions(Dims const& dimensions) noexcept = 0;
+    virtual Dims getReshapeDimensions() const noexcept = 0;
+    virtual void setSecondTranspose(Permutation const& permutation) noexcept = 0;
+    virtual Permutation const& getSecondTranspose() const noexcept = 0;
+    virtual void setZeroIsPlaceholder(bool zeroIsPlaceholder) noexcept = 0;
+    virtual bool getZeroIsPlaceholder() const noexcept = 0;
+};
+
+class VSliceLayer : public VRoot
+{
+public:
+    virtual void setStart(Dims const& start) noexcept = 0;
+    virtual Dims getStart() const noexcept = 0;
+    virtual void setSize(Dims const& size) noexcept = 0;
+    virtual Dims getSize() const noexcept = 0;
+    virtual void setStride(Dims const& stride) noexcept = 0;
+    virtual Dims getStride() const noexcept = 0;
+    virtual void setMode(SampleMode mode) noexcept = 0;
+    virtual SampleMode getMode() const noexcept = 0;
+    virtual void setAxes(Dims const& axes) noexcept = 0;
+    virtual Dims getAxes() const noexcept = 0;
+};
+
+class VShapeLayer : public VRoot
+{
+public:
+};
+
+class VTopKLayer : public VRoot
+{
+public:
+    virtual void setOperation(TopKOperation op) noexcept = 0;
+    virtual TopKOperation getOperation() const noexcept = 0;
+    virtual void setK(int32_t k) noexcept = 0;
+    virtual int32_t getK() const noexcept = 0;
+    virtual void setReduceAxes(uint32_t reduceAxes) noexcept = 0;
+    virtual uint32_t getReduceAxes() const noexcept = 0;
+};
+
+class VMatrixMultiplyLayer : public VRoot
+{
+public:
+    virtual void setOperation(int32_t index, MatrixOperation op) noexcept = 0;
+    virtual MatrixOperation getOperation(int32_t index) const noexcept = 0;
+};
+
+class VNonZeroLayer : public VRoot
+{
+public:
+};
+
+class VRaggedSoftMaxLayer : public VRoot
+{
+public:
+};
+
+class VIdentityLayer : public VRoot
+{
+public:
+};
+
+class VCastLayer : public VRoot
+{
+public:
+    virtual void setToType(DataType toType) noexcept = 0;
+    virtual DataType getToType() const noexcept = 0;
+};
+
+class VConstantLayer : public VRoot
+{
+public:
+    virtual void setWeights(Weights weights) noexcept = 0;
+    virtual Weights getWeights() const noexcept = 0;
+    virtual void setDimensions(Dims const& dimensions) noexcept = 0;
+    virtual Dims getDimensions() const noexcept = 0;
+};
+
+class VParametricReLULayer : public VRoot
+{
+public:
+};
+
+class VResizeLayer : public VRoot
+{
+public:
+    virtual void setOutputDimensions(Dims const& dimensions) noexcept = 0;
+    virtual Dims getOutputDimensions() const noexcept = 0;
+    virtual void setScales(float const* scales, int32_t nbScales) noexcept = 0;
+    virtual int32_t getScales(int32_t size, float* scales) const noexcept = 0;
+    virtual void setResizeMode(InterpolationMode interpolationMode) noexcept = 0;
+    virtual InterpolationMode getResizeMode() const noexcept = 0;
+    virtual void setCoordinateTransformation(ResizeCoordinateTransformation coordTransform) noexcept = 0;
+    virtual ResizeCoordinateTransformation getCoordinateTransformation() const noexcept = 0;
+    virtual void setSelectorForSinglePixel(ResizeSelector selector) noexcept = 0;
+    virtual ResizeSelector getSelectorForSinglePixel() const noexcept = 0;
+    virtual void setNearestRounding(ResizeRoundMode value) noexcept = 0;
+    virtual ResizeRoundMode getNearestRounding() const noexcept = 0;
+    virtual void setCubicCoeff(float value) noexcept = 0;
+    virtual float getCubicCoeff() const noexcept = 0;
+    virtual void setExcludeOutside(bool value) noexcept = 0;
+    virtual bool getExcludeOutside() const noexcept = 0;
+};
+
+class VLoopBoundaryLayer : public VRoot
+{
+public:
+    virtual ILoop* getLoop() const noexcept = 0;
+};
+
+class VRecurrenceLayer : public VRoot
+{
+public:
+};
+
+class VLoopOutputLayer : public VRoot
+{
+public:
+    virtual LoopOutput getLoopOutput() const noexcept = 0;
+    virtual void setAxis(int32_t axis) noexcept = 0;
+    virtual int32_t getAxis() const noexcept = 0;
+};
+
+class VTripLimitLayer : public VRoot
+{
+public:
+    virtual TripLimit getTripLimit() const noexcept = 0;
+};
+
+class VIteratorLayer : public VRoot
+{
+public:
+    virtual void setAxis(int32_t axis) noexcept = 0;
+    virtual int32_t getAxis() const noexcept = 0;
+    virtual void setReverse(bool reverse) noexcept = 0;
+    virtual bool getReverse() const noexcept = 0;
+};
+class VLoop : public VRoot
+{
+public:
+    virtual IRecurrenceLayer* addRecurrence(ITensor& initialValue) noexcept = 0;
+    virtual ITripLimitLayer* addTripLimit(ITensor& tensor, TripLimit limit) noexcept = 0;
+    virtual IIteratorLayer* addIterator(ITensor& tensor, int32_t axis = 0, bool reverse = false) noexcept = 0;
+    virtual ILoopOutputLayer* addLoopOutput(ITensor& tensor, LoopOutput outputKind, int32_t axis = 0) noexcept = 0;
+    virtual void setName(char const* name) noexcept = 0;
+    virtual char const* getName() const noexcept = 0;
+};
+
+class VConditionalBoundaryLayer : public VRoot
+{
+public:
+    virtual IIfConditional* getConditional() const noexcept = 0;
+};
+
+class VConditionLayer : public VRoot
+{
+public:
+};
+
+class VConditionalInputLayer : public VRoot
+{
+public:
+};
+
+class VConditionalOutputLayer : public VRoot
+{
+public:
+};
+
+class VIfConditional : public VRoot
+{
+public:
+    virtual IConditionLayer* setCondition(ITensor& tensor) noexcept = 0;
+    virtual IIfConditionalInputLayer* addInput(ITensor& tensor) noexcept = 0;
+    virtual IIfConditionalOutputLayer* addOutput(ITensor& trueTensor, ITensor& falseTensor) noexcept = 0;
+    virtual void setName(char const* name) noexcept = 0;
+    virtual char const* getName() const noexcept = 0;
+};
+
+class VSelectLayer : public VRoot
+{
+};
+
+class VAssertionLayer : public VRoot
+{
+public:
+    virtual void setMessage(char const* message) noexcept = 0;
+    virtual char const* getMessage() const noexcept = 0;
+};
+
+class VFillLayer : public VRoot
+{
+public:
+    virtual void setDimensions(Dims const& dimensions) noexcept = 0;
+    virtual Dims getDimensions() const noexcept = 0;
+    virtual void setOperation(FillOperation op) noexcept = 0;
+    virtual FillOperation getOperation() const noexcept = 0;
+    virtual void setAlpha(double alpha) noexcept = 0;
+    virtual double getAlpha() const noexcept = 0;
+    virtual void setBeta(double beta) noexcept = 0;
+    virtual double getBeta() const noexcept = 0;
+    virtual void setAlphaInt64(int64_t alpha) noexcept = 0;
+    virtual int64_t getAlphaInt64() const noexcept = 0;
+    virtual void setBetaInt64(int64_t beta) noexcept = 0;
+    virtual int64_t getBetaInt64() const noexcept = 0;
+    virtual bool isAlphaBetaInt64() const noexcept = 0;
+    virtual DataType getToType() const noexcept = 0;
+    virtual void setToType(DataType toType) noexcept = 0;
+};
+
+class VQuantizeLayer : public VRoot
+{
+public:
+    virtual int32_t getAxis() const noexcept = 0;
+    virtual void setAxis(int32_t axis) noexcept = 0;
+    virtual DataType getToType() const noexcept = 0;
+    virtual void setToType(DataType toType) noexcept = 0;
+};
+
+class VDequantizeLayer : public VRoot
+{
+public:
+    virtual int32_t getAxis() const noexcept = 0;
+    virtual void setAxis(int32_t axis) noexcept = 0;
+    virtual DataType getToType() const noexcept = 0;
+    virtual void setToType(DataType toType) noexcept = 0;
+};
+
+class VDynamicQuantizeLayer : public VRoot
+{
+public:
+    virtual int32_t getAxis() const noexcept = 0;
+    virtual void setAxis(int32_t axis) noexcept = 0;
+    virtual int32_t getBlockSize() const noexcept = 0;
+    virtual void setBlockSize(int32_t axis) noexcept = 0;
+    virtual DataType getScaleType() const noexcept = 0;
+    virtual void setScaleType(DataType axis) noexcept = 0;
+    virtual DataType getToType() const noexcept = 0;
+    virtual void setToType(DataType toType) noexcept = 0;
+};
+
+class VScatterLayer : public VRoot
+{
+public:
+   virtual void setMode(ScatterMode mode) noexcept = 0;
+   virtual ScatterMode getMode() const noexcept = 0;
+   virtual void setAxis(int32_t axis) noexcept = 0;
+   virtual int32_t getAxis() const noexcept = 0;
+}; // class VScatterLayer
+
+class VEinsumLayer : public VRoot
+{
+public:
+    virtual bool setEquation(char const* equation) noexcept = 0;
+    virtual char const* getEquation() const noexcept = 0;
+};
+
+class VOneHotLayer : public VRoot
+{
+public:
+    virtual int32_t getAxis() const noexcept = 0;
+    virtual void setAxis(int32_t axis) noexcept = 0;
+}; // class VOneHotLayer
+
+class VGridSampleLayer : public VRoot
+{
+public:
+    virtual void setInterpolationMode(InterpolationMode mode) noexcept = 0;
+    virtual InterpolationMode getInterpolationMode() const noexcept = 0;
+    virtual void setAlignCorners(bool alignCorners) noexcept = 0;
+    virtual bool getAlignCorners() const noexcept = 0;
+    virtual bool setSampleMode(SampleMode mode) noexcept = 0;
+    virtual SampleMode getSampleMode() const noexcept = 0;
+}; // class VGridSampleLayer
+
+class VNMSLayer : public VRoot
+{
+public:
+    virtual void setBoundingBoxFormat(BoundingBoxFormat fmt) noexcept = 0;
+    virtual BoundingBoxFormat getBoundingBoxFormat() const noexcept = 0;
+    virtual void setTopKBoxLimit(int32_t limit) noexcept = 0;
+    virtual int32_t getTopKBoxLimit() const noexcept = 0;
+}; // class VNMSLayer
+
+class VReverseSequenceLayer : public VRoot
+{
+public:
+    virtual void setBatchAxis(int32_t batchAxis) noexcept = 0;
+    virtual int32_t getBatchAxis() const noexcept = 0;
+
+    virtual void setSequenceAxis(int32_t sequenceAxis) noexcept = 0;
+    virtual int32_t getSequenceAxis() const noexcept = 0;
+}; // class VReverseSequenceLayer
+
+class VNormalizationLayer : public VRoot
+{
+public:
+    virtual void setEpsilon(float eps) noexcept = 0;
+    virtual float getEpsilon() const noexcept = 0;
+    virtual void setAxes(uint32_t axesMask) noexcept = 0;
+    virtual uint32_t getAxes() const noexcept = 0;
+    virtual void setNbGroups(int64_t nbGroups) noexcept = 0;
+    virtual int64_t getNbGroups() const noexcept = 0;
+    virtual void setComputePrecision(DataType type) noexcept = 0;
+    virtual DataType getComputePrecision() const noexcept = 0;
+}; // class VNormalizationLayer
+
+class VSqueezeLayer : public VRoot
+{
+};
+
+class VUnsqueezeLayer : public VRoot
+{
+};
+
+class VCumulativeLayer : public VRoot
+{
+public:
+    virtual bool setOperation(CumulativeOperation op) noexcept = 0;
+    virtual CumulativeOperation getOperation() const noexcept = 0;
+    virtual void setExclusive(bool exclusive) noexcept = 0;
+    virtual bool getExclusive() const noexcept = 0;
+    virtual void setReverse(bool reverse) noexcept = 0;
+    virtual bool getReverse() const noexcept = 0;
+}; // class VCumulativeLayer
+
+
+class VNetworkDefinition : public VRoot
+{
+public:
+    virtual ITensor* addInput(char const* name, DataType type, Dims const& dimensions) noexcept = 0;
+    virtual void markOutput(ITensor& tensor) noexcept = 0;
+    virtual IActivationLayer* addActivation(ITensor& input, ActivationType type) noexcept = 0;
+    virtual ILRNLayer* addLRN(ITensor& input, int64_t window, float alpha, float beta, float k) noexcept = 0;
+    virtual IScaleLayer* addScale(
+        ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power) noexcept = 0;
+    virtual ISoftMaxLayer* addSoftMax(ITensor& input) noexcept = 0;
+    virtual IConcatenationLayer* addConcatenation(ITensor* const* inputs, int32_t nbInputs) noexcept = 0;
+    virtual IElementWiseLayer* addElementWise(ITensor& input1, ITensor& input2, ElementWiseOperation op) noexcept = 0;
+    virtual IUnaryLayer* addUnary(ITensor& input, UnaryOperation operation) noexcept = 0;
+    virtual IShuffleLayer* addShuffle(ITensor& input) noexcept = 0;
+    virtual int32_t getNbLayers() const noexcept = 0;
+    virtual ILayer* getLayer(int32_t index) const noexcept = 0;
+    virtual int32_t getNbInputs() const noexcept = 0;
+    virtual ITensor* getInput(int32_t index) const noexcept = 0;
+    virtual int32_t getNbOutputs() const noexcept = 0;
+    virtual ITensor* getOutput(int32_t index) const noexcept = 0;
+    virtual IReduceLayer* addReduce(
+        ITensor& input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions) noexcept
+        = 0;
+    virtual ITopKLayer* addTopK(ITensor& input, TopKOperation op, int32_t k, uint32_t reduceAxes) noexcept = 0;
+    virtual IGatherLayer* addGather(ITensor& data, ITensor& indices, int32_t axis) noexcept = 0;
+    virtual IRaggedSoftMaxLayer* addRaggedSoftMax(ITensor& input, ITensor& bounds) noexcept = 0;
+    virtual IMatrixMultiplyLayer* addMatrixMultiply(
+        ITensor& input0, MatrixOperation op0, ITensor& input1, MatrixOperation op1) noexcept = 0;
+    virtual IConstantLayer* addConstant(Dims const& dimensions, Weights weights) noexcept = 0;
+    virtual IIdentityLayer* addIdentity(ITensor& input) noexcept = 0;
+    virtual void removeTensor(ITensor& tensor) noexcept = 0;
+    virtual void unmarkOutput(ITensor& tensor) noexcept = 0;
+    virtual IPluginV2Layer* addPluginV2(ITensor* const* inputs, int32_t nbInputs, IPluginV2& plugin) noexcept = 0;
+    virtual IPluginV3Layer* addPluginV3(ITensor* const* inputs, int32_t nbInputs, ITensor* const* shapeInputs,
+        int32_t nbShapeInputs, IPluginV3& plugin) noexcept = 0;
+    virtual ISliceLayer* addSlice(ITensor& input, Dims const& start, Dims const& size, Dims const& stride) noexcept = 0;
+    virtual void setName(char const* name) noexcept = 0;
+    virtual char const* getName() const noexcept = 0;
+    virtual IShapeLayer* addShape(ITensor& input) noexcept = 0;
+    virtual bool hasImplicitBatchDimension() const noexcept = 0;
+    virtual bool markOutputForShapes(ITensor& tensor) noexcept = 0;
+    virtual bool unmarkOutputForShapes(ITensor& tensor) noexcept = 0;
+    virtual IParametricReLULayer* addParametricReLU(ITensor& input, ITensor& slope) noexcept = 0;
+    virtual IConvolutionLayer* addConvolutionNd(
+        ITensor& input, int64_t nbOutputMaps, Dims const& kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
+        = 0;
+    virtual IPoolingLayer* addPoolingNd(ITensor& input, PoolingType type, Dims const& windowSize) noexcept = 0;
+    virtual IDeconvolutionLayer* addDeconvolutionNd(
+        ITensor& input, int64_t nbOutputMaps, Dims const& kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
+        = 0;
+    virtual IScaleLayer* addScaleNd(
+        ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power, int32_t channelAxis) noexcept = 0;
+    virtual IResizeLayer* addResize(ITensor& input) noexcept = 0;
+    virtual ILoop* addLoop() noexcept = 0;
+    virtual ISelectLayer* addSelect(ITensor& condition, ITensor& thenInput, ITensor& elseInput) noexcept = 0;
+    virtual IFillLayer* addFill(Dims const& dimensions, FillOperation op) noexcept = 0;
+    virtual IPaddingLayer* addPaddingNd(ITensor& input, Dims const& prePadding, Dims const& postPadding) noexcept = 0;
+    virtual bool setWeightsName(Weights weights, char const* name) noexcept = 0;
+    virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
+    virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
+    virtual IDequantizeLayer* addDequantize(ITensor& input, ITensor& scale) noexcept = 0;
+    virtual IQuantizeLayer* addQuantize(ITensor& input, ITensor& scale) noexcept = 0;
+    virtual IGatherLayer* addGatherV2(ITensor& data, ITensor& indices, GatherMode mode) noexcept = 0;
+    virtual IIfConditional* addIfConditional() noexcept = 0;
+    virtual IScatterLayer* addScatter(ITensor& data, ITensor& indices, ITensor& updates, ScatterMode mode) noexcept = 0;
+    virtual IEinsumLayer* addEinsum(ITensor* const* inputs, int32_t nbInputs, char const* equation) noexcept = 0;
+    virtual IAssertionLayer* addAssertion(ITensor& condition, char const* message) noexcept = 0;
+    virtual IOneHotLayer* addOneHot(ITensor& indices, ITensor& values, ITensor& depth, int32_t axis) noexcept = 0;
+    virtual INonZeroLayer* addNonZero(ITensor& input) noexcept = 0;
+    virtual IGridSampleLayer* addGridSample(ITensor& input, ITensor& grid) noexcept = 0;
+    virtual INMSLayer* addNMS(ITensor& boxes, ITensor& scores, ITensor& maxOutputBoxesPerClass) noexcept = 0;
+    virtual IReverseSequenceLayer* addReverseSequence(ITensor& input, ITensor& sequenceLens) noexcept = 0;
+    virtual INormalizationLayer* addNormalization(
+        ITensor& input, ITensor& scale, ITensor& bias, uint32_t axesMask) noexcept = 0;
+    virtual ICastLayer* addCast(ITensor& input, DataType toType) noexcept = 0;
+    virtual IBuilder& getBuilder() const noexcept = 0;
+    virtual NetworkDefinitionCreationFlags getFlags() const noexcept = 0;
+    virtual bool getFlag(NetworkDefinitionCreationFlag networkDefinitionCreationFlag) const noexcept = 0;
+    virtual IQuantizeLayer* addQuantizeV2(ITensor& input, ITensor& scale, DataType outputType) noexcept = 0;
+    virtual IDequantizeLayer* addDequantizeV2(ITensor& input, ITensor& scale, DataType outputType) noexcept = 0;
+    virtual IFillLayer* addFillV2(Dims const& dimensions, FillOperation op, DataType outputType) noexcept = 0;
+    virtual bool markDebug(ITensor& tensor) noexcept = 0;
+    virtual bool unmarkDebug(ITensor& tensor) noexcept = 0;
+    virtual bool isDebugTensor(ITensor const& tensor) const noexcept = 0;
+    virtual bool markWeightsRefittable(char const* name) noexcept = 0;
+    virtual bool unmarkWeightsRefittable(char const* name) noexcept = 0;
+    virtual bool areWeightsMarkedRefittable(char const* name) const noexcept = 0;
+    virtual ISqueezeLayer* addSqueeze(ITensor& input, ITensor& axes) noexcept = 0;
+    virtual IUnsqueezeLayer* addUnsqueeze(ITensor& input, ITensor& axes) noexcept = 0;
+    virtual IDynamicQuantizeLayer* addDynamicQuantize(
+        ITensor& input, int32_t axis, int32_t blockSize, DataType toType, DataType scaleType) noexcept = 0;
+    virtual ICumulativeLayer* addCumulative(
+        ITensor& input, ITensor& axis, CumulativeOperation operation, bool exclusive, bool reverse) noexcept = 0;
+    virtual bool markUnfusedTensorsAsDebugTensors() noexcept = 0;
+    virtual bool unmarkUnfusedTensorsAsDebugTensors() noexcept = 0;
+};
+
+class VAlgorithmIOInfo : public VRoot
+{
+public:
+    virtual DataType getDataType() const noexcept = 0;
+    virtual Dims getStrides() const noexcept = 0;
+    virtual int64_t getVectorizedDim() const noexcept = 0;
+    virtual int64_t getComponentsPerElement() const noexcept = 0;
+};
+
+class VAlgorithmVariant : public VRoot
+{
+public:
+    virtual int64_t getImplementation() const noexcept = 0;
+    virtual int64_t getTactic() const noexcept = 0;
+};
+
+class VAlgorithmContext : public VRoot
+{
+public:
+    virtual char const* getName() const noexcept = 0;
+    virtual Dims getDimensions(int32_t index, OptProfileSelector select) const noexcept = 0;
+    virtual int32_t getNbInputs() const noexcept = 0;
+    virtual int32_t getNbOutputs() const noexcept = 0;
+};
+
+class VAlgorithm : public VRoot
+{
+public:
+    virtual IAlgorithmVariant const& getAlgorithmVariant() const noexcept = 0;
+    virtual float getTimingMSec() const noexcept = 0;
+    virtual std::size_t getWorkspaceSize() const noexcept = 0;
+    virtual IAlgorithmIOInfo const* getAlgorithmIOInfoByIndex(int32_t index) const noexcept = 0;
+};
+
+
+class VTimingCache : public VRoot
+{
+public:
+    virtual nvinfer1::IHostMemory* serialize() const noexcept = 0;
+    virtual bool combine(ITimingCache const& inputCache, bool ignoreMismatch) noexcept = 0;
+    virtual bool reset() noexcept = 0;
+    virtual int64_t queryKeys(TimingCacheKey* keyBuffer, int64_t capacity) const noexcept = 0;
+    virtual TimingCacheValue query(TimingCacheKey const& key) const noexcept = 0;
+    virtual bool update(TimingCacheKey const& key, TimingCacheValue const& value) noexcept = 0;
+};
+
+class VBuilderConfig : public VRoot
+{
+public:
+    virtual void setAvgTimingIterations(int32_t avgTiming) noexcept = 0;
+    virtual int32_t getAvgTimingIterations() const noexcept = 0;
+    virtual void setEngineCapability(EngineCapability capability) noexcept = 0;
+    virtual EngineCapability getEngineCapability() const noexcept = 0;
+    virtual void setInt8Calibrator(IInt8Calibrator* calibrator) noexcept = 0;
+    virtual IInt8Calibrator* getInt8Calibrator() const noexcept = 0;
+    virtual void setFlags(BuilderFlags builderFlags) noexcept = 0;
+    virtual BuilderFlags getFlags() const noexcept = 0;
+    virtual void clearFlag(BuilderFlag builderFlag) noexcept = 0;
+    virtual void setFlag(BuilderFlag builderFlag) noexcept = 0;
+    virtual bool getFlag(BuilderFlag builderFlag) const noexcept = 0;
+    virtual void setDeviceType(ILayer const* layer, DeviceType deviceType) noexcept = 0;
+    virtual DeviceType getDeviceType(ILayer const* layer) const noexcept = 0;
+    virtual bool isDeviceTypeSet(ILayer const* layer) const noexcept = 0;
+    virtual void resetDeviceType(ILayer const* layer) noexcept = 0;
+    virtual bool canRunOnDLA(ILayer const* layer) const noexcept = 0;
+    virtual void setDLACore(int32_t dlaCore) noexcept = 0;
+    virtual int32_t getDLACore() const noexcept = 0;
+    virtual void setDefaultDeviceType(DeviceType deviceType) noexcept = 0;
+    virtual DeviceType getDefaultDeviceType() const noexcept = 0;
+    virtual void reset() noexcept = 0;
+    virtual void setProfileStream(const cudaStream_t stream) noexcept = 0;
+    virtual cudaStream_t getProfileStream() const noexcept = 0;
+    virtual int32_t addOptimizationProfile(IOptimizationProfile const* profile) noexcept = 0;
+    virtual int32_t getNbOptimizationProfiles() const noexcept = 0;
+    virtual void setProfilingVerbosity(ProfilingVerbosity verbosity) noexcept = 0;
+    virtual ProfilingVerbosity getProfilingVerbosity() const noexcept = 0;
+    virtual void setAlgorithmSelector(IAlgorithmSelector* selector) noexcept = 0;
+    virtual IAlgorithmSelector* getAlgorithmSelector() const noexcept = 0;
+    virtual bool setCalibrationProfile(IOptimizationProfile const* profile) noexcept = 0;
+    virtual IOptimizationProfile const* getCalibrationProfile() noexcept = 0;
+    virtual void setQuantizationFlags(QuantizationFlags flags) noexcept = 0;
+    virtual QuantizationFlags getQuantizationFlags() const noexcept = 0;
+    virtual void clearQuantizationFlag(QuantizationFlag flag) noexcept = 0;
+    virtual void setQuantizationFlag(QuantizationFlag flag) noexcept = 0;
+    virtual bool getQuantizationFlag(QuantizationFlag flag) const noexcept = 0;
+    virtual bool setTacticSources(TacticSources tacticSources) noexcept = 0;
+    virtual TacticSources getTacticSources() const noexcept = 0;
+    virtual nvinfer1::ITimingCache* createTimingCache(void const* blob, std::size_t size) const noexcept = 0;
+    virtual bool setTimingCache(ITimingCache const& cache, bool ignoreMismatch) noexcept = 0;
+    virtual nvinfer1::ITimingCache const* getTimingCache() const noexcept = 0;
+    virtual void setMemoryPoolLimit(MemoryPoolType pool, std::size_t poolSize) noexcept = 0;
+    virtual std::size_t getMemoryPoolLimit(MemoryPoolType pool) const noexcept = 0;
+    virtual void setPreviewFeature(PreviewFeature feature, bool enable) noexcept = 0;
+    virtual bool getPreviewFeature(PreviewFeature feature) const noexcept = 0;
+    virtual void setBuilderOptimizationLevel(int32_t level) noexcept = 0;
+    virtual int32_t getBuilderOptimizationLevel() const noexcept = 0;
+    virtual void setHardwareCompatibilityLevel(HardwareCompatibilityLevel hardwareCompatibilityLevel) noexcept = 0;
+    virtual HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept = 0;
+    virtual void setPluginsToSerialize(char const* const* paths, int32_t nbPaths) noexcept = 0;
+    virtual char const* getPluginToSerialize(int32_t index) const noexcept = 0;
+    virtual int32_t getNbPluginsToSerialize() const noexcept = 0;
+    virtual void setMaxAuxStreams(int32_t nbStreams) noexcept = 0;
+    virtual int32_t getMaxAuxStreams() const noexcept = 0;
+    virtual void setProgressMonitor(IProgressMonitor* monitor) noexcept = 0;
+    virtual IProgressMonitor* getProgressMonitor() const noexcept = 0;
+    virtual void setRuntimePlatform(RuntimePlatform runtimePlatform) noexcept = 0;
+    virtual RuntimePlatform getRuntimePlatform() const noexcept = 0;
+    virtual void setMaxNbTactics(int32_t maxTactics) noexcept = 0;
+    virtual int32_t getMaxNbTactics() const noexcept = 0;
+    virtual bool setTilingOptimizationLevel(TilingOptimizationLevel level) noexcept = 0;
+    virtual TilingOptimizationLevel getTilingOptimizationLevel() const noexcept = 0;
+    virtual bool setL2LimitForTiling(int64_t size) noexcept = 0;
+    virtual int64_t getL2LimitForTiling() const noexcept = 0;
+    virtual bool setRemoteAutoTuningConfig(char const* config) noexcept = 0;
+    virtual char const* getRemoteAutoTuningConfig() const noexcept = 0;
+};
+
+class VSerializationConfig : public VRoot
+{
+public:
+    virtual bool setFlags(SerializationFlags serializationFlags) noexcept = 0;
+    virtual SerializationFlags getFlags() const noexcept = 0;
+    virtual bool clearFlag(SerializationFlag serializationFlag) noexcept = 0;
+    virtual bool setFlag(SerializationFlag serializationFlag) noexcept = 0;
+    virtual bool getFlag(SerializationFlag serializationFlag) const noexcept = 0;
+};
+
+class VBuilder : public VRoot
+{
+public:
+    virtual bool platformHasFastFp16() const noexcept = 0;
+    virtual bool platformHasFastInt8() const noexcept = 0;
+    virtual int32_t getMaxDLABatchSize() const noexcept = 0;
+    virtual int32_t getNbDLACores() const noexcept = 0;
+    virtual void setGpuAllocator(IGpuAllocator* allocator) noexcept = 0;
+    virtual nvinfer1::IBuilderConfig* createBuilderConfig() noexcept = 0;
+    virtual nvinfer1::INetworkDefinition* createNetworkV2(NetworkDefinitionCreationFlags flags) noexcept = 0;
+    virtual nvinfer1::IOptimizationProfile* createOptimizationProfile() noexcept = 0;
+    virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;
+    virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
+    virtual void reset() noexcept = 0;
+    virtual bool platformHasTf32() const noexcept = 0;
+    virtual nvinfer1::IHostMemory* buildSerializedNetwork(
+        INetworkDefinition& network, IBuilderConfig& config) noexcept = 0;
+    virtual bool isNetworkSupported(INetworkDefinition const& network, IBuilderConfig const& config) const noexcept = 0;
+    virtual ILogger* getLogger() const noexcept = 0;
+    virtual bool setMaxThreads(int32_t maxThreads) noexcept = 0;
+    virtual int32_t getMaxThreads() const noexcept = 0;
+    virtual IPluginRegistry& getPluginRegistry() noexcept = 0;
+    virtual ICudaEngine* buildEngineWithConfig(INetworkDefinition& network, IBuilderConfig& config) noexcept = 0;
+    virtual bool buildSerializedNetworkToStream(
+        INetworkDefinition& network, IBuilderConfig& config, IStreamWriter& writer) noexcept = 0;
+};
+
+class VRuntimeConfig : public VRoot
+{
+public:
+    virtual IRuntimeConfig* getPImpl() noexcept = 0;
+    virtual void setExecutionContextAllocationStrategy(ExecutionContextAllocationStrategy strategy) noexcept = 0;
+    virtual ExecutionContextAllocationStrategy getExecutionContextAllocationStrategy() const noexcept = 0;
+};
+
+
+} // namespace apiv
+} // namespace nvinfer1
+
+// @endcond
+
+#endif // NV_INFER_RUNTIME_IMPL_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferLegacyDims.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferLegacyDims.h
new file mode 100644
index 0000000000000000000000000000000000000000..d1b94e38deee74586cd94922fde5f4affd540f9b
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferLegacyDims.h
@@ -0,0 +1,206 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NV_INFER_LEGACY_DIMS_H
+#define NV_INFER_LEGACY_DIMS_H
+
+#define NV_INFER_INTERNAL_INCLUDE 1
+#include "NvInferRuntimeBase.h" // IWYU pragma: exports
+#undef NV_INFER_INTERNAL_INCLUDE
+
+//!
+//! \file NvInferLegacyDims.h
+//!
+//! This file contains declarations of legacy dimensions types which use channel
+//! semantics in their names, and declarations on which those types rely.
+//!
+
+//!
+//! \namespace nvinfer1
+//!
+//! \brief The TensorRT API version 1 namespace.
+//!
+namespace nvinfer1
+{
+//!
+//! \class Dims2
+//!
+//! \brief Descriptor for two-dimensional data.
+//!
+class Dims2 : public Dims
+{
+public:
+    //!
+    //! \brief Construct an empty Dims2 object.
+    //!
+    Dims2()
+        : Dims2(0, 0)
+    {
+    }
+
+    //!
+    //! \brief Construct a Dims2 from 2 elements.
+    //!
+    //! \param d0 The first element.
+    //! \param d1 The second element.
+    //!
+    Dims2(int64_t d0, int64_t d1)
+    {
+        nbDims = 2;
+        d[0] = d0;
+        d[1] = d1;
+        for (int64_t i{nbDims}; i < Dims::MAX_DIMS; ++i)
+        {
+            d[i] = 0;
+        }
+    }
+};
+
+//!
+//! \class DimsHW
+//!
+//! \brief Descriptor for two-dimensional spatial data.
+//!
+class DimsHW : public Dims2
+{
+public:
+    //!
+    //! \brief Construct an empty DimsHW object.
+    //!
+    DimsHW()
+        : Dims2()
+    {
+    }
+
+    //!
+    //! \brief Construct a DimsHW given height and width.
+    //!
+    //! \param height the height of the data
+    //! \param width the width of the data
+    //!
+    DimsHW(int64_t height, int64_t width)
+        : Dims2(height, width)
+    {
+    }
+
+    //!
+    //! \brief Get the height.
+    //!
+    //! \return The height.
+    //!
+    int64_t& h()
+    {
+        return d[0];
+    }
+
+    //!
+    //! \brief Get the height.
+    //!
+    //! \return The height.
+    //!
+    int64_t h() const
+    {
+        return d[0];
+    }
+
+    //!
+    //! \brief Get the width.
+    //!
+    //! \return The width.
+    //!
+    int64_t& w()
+    {
+        return d[1];
+    }
+
+    //!
+    //! \brief Get the width.
+    //!
+    //! \return The width.
+    //!
+    int64_t w() const
+    {
+        return d[1];
+    }
+};
+
+//!
+//! \class Dims3
+//!
+//! \brief Descriptor for three-dimensional data.
+//!
+class Dims3 : public Dims2
+{
+public:
+    //!
+    //! \brief Construct an empty Dims3 object.
+    //!
+    Dims3()
+        : Dims3(0, 0, 0)
+    {
+    }
+
+    //!
+    //! \brief Construct a Dims3 from 3 elements.
+    //!
+    //! \param d0 The first element.
+    //! \param d1 The second element.
+    //! \param d2 The third element.
+    //!
+    Dims3(int64_t d0, int64_t d1, int64_t d2)
+        : Dims2(d0, d1)
+    {
+        nbDims = 3;
+        d[2] = d2;
+    }
+};
+
+//!
+//! \class Dims4
+//!
+//! \brief Descriptor for four-dimensional data.
+//!
+class Dims4 : public Dims3
+{
+public:
+    //!
+    //! \brief Construct an empty Dims4 object.
+    //!
+    Dims4()
+        : Dims4(0, 0, 0, 0)
+    {
+    }
+
+    //!
+    //! \brief Construct a Dims4 from 4 elements.
+    //!
+    //! \param d0 The first element.
+    //! \param d1 The second element.
+    //! \param d2 The third element.
+    //! \param d3 The fourth element.
+    //!
+    Dims4(int64_t d0, int64_t d1, int64_t d2, int64_t d3)
+        : Dims3(d0, d1, d2)
+    {
+        nbDims = 4;
+        d[3] = d3;
+    }
+};
+
+} // namespace nvinfer1
+
+#endif // NV_INFER_LEGCY_DIMS_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferPlugin.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferPlugin.h
new file mode 100644
index 0000000000000000000000000000000000000000..006cc555de22c9ad164b10811d0bf09f9c7602c7
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferPlugin.h
@@ -0,0 +1,42 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NV_INFER_PLUGIN_H
+#define NV_INFER_PLUGIN_H
+
+#include "NvInfer.h"
+#include "NvInferPluginUtils.h"
+//!
+//! \file NvInferPlugin.h
+//!
+//! This is the API for the Nvidia provided TensorRT plugins.
+//!
+
+extern "C"
+{
+    //!
+    //! \brief Initialize and register all the existing TensorRT plugins to the Plugin Registry with an optional
+    //! namespace. The plugin library author should ensure that this function name is unique to the library. This
+    //! function should be called once before accessing the Plugin Registry.
+    //! \param logger Logger object to print plugin registration information
+    //! \param libNamespace Namespace used to register all the plugins in this library
+    //!
+    TENSORRTAPI bool initLibNvInferPlugins(void* logger, char const* libNamespace);
+
+} // extern "C"
+
+#endif // NV_INFER_PLUGIN_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferPluginBase.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferPluginBase.h
new file mode 100644
index 0000000000000000000000000000000000000000..9f3b2676ef4f57bab8c69a5346fe47f73fd8ff93
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferPluginBase.h
@@ -0,0 +1,291 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NV_INFER_PLUGIN_BASE_H
+#define NV_INFER_PLUGIN_BASE_H
+
+#if !defined(NV_INFER_INTERNAL_INCLUDE)
+static_assert(false, "Do not directly include this file. Include NvInferRuntime.h or NvInferPluginUtils.h");
+#endif
+
+#define NV_INFER_INTERNAL_INCLUDE 1
+#include "NvInferRuntimeBase.h" // IWYU pragma: exports
+#undef NV_INFER_INTERNAL_INCLUDE
+namespace nvinfer1
+{
+
+//!
+//! \enum PluginFieldType
+//!
+//! \brief The possible field types for custom layer.
+//!
+enum class PluginFieldType : int32_t
+{
+    //! FP16 field type.
+    kFLOAT16 = 0,
+    //! FP32 field type.
+    kFLOAT32 = 1,
+    //! FP64 field type.
+    kFLOAT64 = 2,
+    //! INT8 field type.
+    kINT8 = 3,
+    //! INT16 field type.
+    kINT16 = 4,
+    //! INT32 field type.
+    kINT32 = 5,
+    //! char field type.
+    kCHAR = 6,
+    //! nvinfer1::Dims field type.
+    kDIMS = 7,
+    //! Unknown field type.
+    kUNKNOWN = 8,
+    //! BF16 field type.
+    kBF16 = 9,
+    //! INT64 field type.
+    kINT64 = 10,
+    //! FP8 field type.
+    kFP8 = 11,
+    //! INT4 field type.
+    kINT4 = 12,
+    //! FP4 field type.
+    kFP4 = 13,
+};
+
+//!
+//! \class PluginField
+//!
+//! \brief Structure containing plugin attribute field names and associated data
+//! This information can be parsed to decode necessary plugin metadata
+//!
+//!
+class PluginField
+{
+public:
+    //! Plugin field attribute name
+    AsciiChar const* name;
+    //! Plugin field attribute data
+    void const* data;
+    //! Plugin field attribute type
+    PluginFieldType type;
+    //! Number of data entries in the Plugin attribute
+    int32_t length;
+
+    PluginField(AsciiChar const* const name_ = nullptr, void const* const data_ = nullptr,
+        PluginFieldType const type_ = PluginFieldType::kUNKNOWN, int32_t const length_ = 0) noexcept
+        : name(name_)
+        , data(data_)
+        , type(type_)
+        , length(length_)
+    {
+    }
+};
+
+//!
+//! \struct PluginFieldCollection
+//!
+//! \brief Plugin field collection struct.
+//!
+struct PluginFieldCollection
+{
+    //! Number of PluginField entries.
+    int32_t nbFields{};
+    //! Pointer to PluginField entries.
+    PluginField const* fields{};
+};
+
+//!
+//! \enum TensorRTPhase
+//!
+//! \brief Indicates a phase of operation of TensorRT
+//!
+enum class TensorRTPhase : int32_t
+{
+    //! Build phase of TensorRT
+    kBUILD = 0,
+    //! Execution phase of TensorRT
+    kRUNTIME = 1
+};
+
+//!
+//! \enum PluginCapabilityType
+//!
+//! \brief Enumerates the different capability types a IPluginV3 object may have
+//!
+enum class PluginCapabilityType : int32_t
+{
+    //! Core capability. Every IPluginV3 object must have this.
+    kCORE = 0,
+    //! Build capability. IPluginV3 objects provided to TensorRT build phase must have this.
+    kBUILD = 1,
+    //! Runtime capability. IPluginV3 objects provided to TensorRT build and execution phases must have this.
+    kRUNTIME = 2
+};
+
+namespace v_1_0
+{
+class IPluginCapability : public IVersionedInterface
+{
+};
+
+class IPluginResource : public IVersionedInterface
+{
+public:
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"IPluginResource", 1, 0};
+    }
+    //!
+    //! \brief Free the underlying resource
+    //!
+    //! This will only be called for IPluginResource objects that were produced from IPluginResource::clone()
+    //!
+    //! The IPluginResource object on which release() is called must still be in a clone-able state
+    //! after release() returns
+    //!
+    //! \return 0 for success, else non-zero
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: No; this method is not required to be thread-safe
+    //!
+    virtual int32_t release() noexcept = 0;
+
+    //!
+    //! \brief Clone the resource object
+    //!
+    //! \note Resource initialization (if any) may be skipped for non-cloned objects since only clones will be
+    //! registered by TensorRT
+    //!
+    //! \return Pointer to cloned object. nullptr if there was an issue.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes; this method is required to be thread-safe and may be called from multiple threads.
+    //!
+    virtual IPluginResource* clone() noexcept = 0;
+
+    ~IPluginResource() noexcept override = default;
+
+    IPluginResource() = default;
+    IPluginResource(IPluginResource const&) = default;
+    IPluginResource(IPluginResource&&) = default;
+    IPluginResource& operator=(IPluginResource const&) & = default;
+    IPluginResource& operator=(IPluginResource&&) & = default;
+}; // class IPluginResource
+
+class IPluginCreatorInterface : public IVersionedInterface
+{
+public:
+    ~IPluginCreatorInterface() noexcept override = default;
+
+protected:
+    IPluginCreatorInterface() = default;
+    IPluginCreatorInterface(IPluginCreatorInterface const&) = default;
+    IPluginCreatorInterface(IPluginCreatorInterface&&) = default;
+    IPluginCreatorInterface& operator=(IPluginCreatorInterface const&) & = default;
+    IPluginCreatorInterface& operator=(IPluginCreatorInterface&&) & = default;
+};
+
+class IPluginV3 : public IVersionedInterface
+{
+public:
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"PLUGIN", 1, 0};
+    }
+
+    //! \brief Return a pointer to plugin object implementing the specified PluginCapabilityType.
+    //!
+    //! \note IPluginV3 objects added for the build phase (through addPluginV3()) must return valid objects for
+    //! PluginCapabilityType::kCORE, PluginCapabilityType::kBUILD and PluginCapabilityType::kRUNTIME.
+    //!
+    //! \note IPluginV3 objects added for the runtime phase must return valid objects for
+    //! PluginCapabilityType::kCORE and PluginCapabilityType::kRUNTIME.
+    //!
+    //! \see TensorRTPhase
+    //! \see IPluginCreatorV3One::createPlugin()
+    //!
+    virtual IPluginCapability* getCapabilityInterface(PluginCapabilityType type) noexcept = 0;
+
+    //!
+    //! \brief Clone the plugin object. This copies over internal plugin parameters and returns a new plugin object with
+    //! these parameters. The cloned object must be in a fully initialized state.
+    //!
+    //! \note The cloned object must return valid objects through getCapabilityInterface() for at least the same
+    //! PluginCapabilityTypes as the original object.
+    //!
+    //! \return A cloned plugin object in an initialized state with the same parameters as the current object.
+    //!         nullptr must be returned if the cloning fails.
+    //!
+    virtual IPluginV3* clone() noexcept = 0;
+};
+} // namespace v_1_0
+
+//!
+//! \class IPluginResource
+//!
+//! \brief Interface for plugins to define custom resources that could be shared through the plugin registry
+//!
+//! \see IPluginRegistry::acquirePluginResource
+//! \see IPluginRegistry::releasePluginResource
+//!
+using IPluginResource = v_1_0::IPluginResource;
+
+//!
+//! \class IPluginCreatorInterface
+//!
+//! \brief Base class for all plugin creator versions.
+//!
+//! \see IPluginCreator and IPluginRegistry
+//!
+using IPluginCreatorInterface = v_1_0::IPluginCreatorInterface;
+
+//!
+//! \class IPluginV3
+//!
+//! \brief Plugin class for the V3 generation of user-implemented layers.
+//!
+//! IPluginV3 acts as a wrapper around the plugin capability interfaces that define the actual behavior of the plugin.
+//!
+//! \see IPluginCapability
+//! \see IPluginCreatorV3One
+//! \see IPluginRegistry
+//!
+using IPluginV3 = v_1_0::IPluginV3;
+
+//!
+//! \class IPluginCapability
+//!
+//! \brief Base class for plugin capability interfaces
+//!
+//!  IPluginCapability represents a split in TensorRT V3 plugins to sub-objects that expose different types of
+//!  capabilites a plugin may have, as opposed to a single interface which defines all capabilities and behaviors of a
+//!  plugin.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+//! \see PluginCapabilityType
+//!
+using IPluginCapability = v_1_0::IPluginCapability;
+} // namespace nvinfer1
+
+#endif /* NV_INFER_PLUGIN_BASE_H */
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferPluginUtils.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferPluginUtils.h
new file mode 100644
index 0000000000000000000000000000000000000000..bfc924e58ab69336b61e35c4f44b9ead582b8185
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferPluginUtils.h
@@ -0,0 +1,204 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NV_INFER_PLUGIN_UTILS_H
+#define NV_INFER_PLUGIN_UTILS_H
+
+#include "NvInferRuntimeCommon.h"
+
+//!
+//! \file NvInferPluginUtils.h
+//!
+//! This is the API for the Nvidia provided TensorRT plugin utilities.
+//! It lists all the parameters utilized by the TensorRT plugins.
+//!
+
+namespace nvinfer1
+{
+namespace plugin
+{
+
+//!
+//! \struct PriorBoxParameters
+//!
+//! \brief The PriorBox plugin layer generates the prior boxes of designated sizes and aspect ratios across all
+//! dimensions (H x W).
+//!
+//! PriorBoxParameters defines a set of parameters for creating the PriorBox plugin layer.
+//!
+struct PriorBoxParameters
+{
+    float *minSize;          //!< Minimum box size in pixels. Can not be nullptr.
+    float *maxSize;          //!< Maximum box size in pixels. Can be nullptr.
+    float *aspectRatios;     //!< Aspect ratios of the boxes. Can be nullptr.
+    int32_t numMinSize;      //!< Number of elements in minSize. Must be larger than 0.
+    int32_t numMaxSize;      //!< Number of elements in maxSize. Can be 0 or same as numMinSize.
+    int32_t numAspectRatios; //!< Number of elements in aspectRatios. Can be 0.
+    bool flip;               //!< If true, will flip each aspect ratio. For example,
+                             //!< if there is an aspect ratio "r", the aspect ratio "1.0/r" will be generated as well.
+    bool clip;               //!< If true, will clip the prior so that it is within [0,1].
+    float variance[4];       //!< Variance for adjusting the prior boxes.
+    int32_t imgH;            //!< Image height. If 0, then the H dimension of the data tensor will be used.
+    int32_t imgW;            //!< Image width. If 0, then the W dimension of the data tensor will be used.
+    float stepH;             //!< Step in H. If 0, then (float)imgH/h will be used where h is the H dimension of the 1st input tensor.
+    float stepW;             //!< Step in W. If 0, then (float)imgW/w will be used where w is the W dimension of the 1st input tensor.
+    float offset;            //!< Offset to the top left corner of each cell.
+};
+
+//!
+//! \struct RPROIParams
+//!
+//! \brief RPROIParams is used to create the RPROIPlugin instance.
+//!
+struct RPROIParams
+{
+    int32_t poolingH;          //!< Height of the output in pixels after ROI pooling on feature map.
+    int32_t poolingW;          //!< Width of the output in pixels after ROI pooling on feature map.
+    int32_t featureStride;     //!< Feature stride; ratio of input image size to feature map size.
+                               //!< Assuming that max pooling layers in the neural network use square filters.
+    int32_t preNmsTop;         //!< Number of proposals to keep before applying NMS.
+    int32_t nmsMaxOut;         //!< Number of remaining proposals after applying NMS.
+    int32_t anchorsRatioCount; //!< Number of anchor box ratios.
+    int32_t anchorsScaleCount; //!< Number of anchor box scales.
+    float iouThreshold;        //!< IoU (Intersection over Union) threshold used for the NMS step.
+    float minBoxSize;          //!< Minimum allowed bounding box size before scaling, used for anchor box calculation.
+    float spatialScale;        //!< Spatial scale between the input image and the last feature map.
+};
+
+//!
+//! \struct GridAnchorParameters
+//!
+//! \brief The Anchor Generator plugin layer generates the prior boxes of designated sizes and aspect ratios across all dimensions (H x W).
+//! GridAnchorParameters defines a set of parameters for creating the plugin layer for all feature maps.
+//!
+struct GridAnchorParameters
+{
+    float minSize;           //!< Scale of anchors corresponding to finest resolution.
+    float maxSize;           //!< Scale of anchors corresponding to coarsest resolution.
+    float* aspectRatios;     //!< List of aspect ratios to place on each grid point.
+    int32_t numAspectRatios; //!< Number of elements in aspectRatios.
+    int32_t H;               //!< Height of feature map to generate anchors for.
+    int32_t W;               //!< Width of feature map to generate anchors for.
+    float variance[4];       //!< Variance for adjusting the prior boxes.
+};
+
+//!
+//! \enum CodeTypeSSD
+//!
+//! \brief The type of encoding used for decoding the bounding boxes and loc_data.
+//!
+//! \deprecated Deprecated in TensorRT 10.0. DetectionOutput plugin is deprecated.
+//!
+enum class CodeTypeSSD : int32_t
+{
+    CORNER TRT_DEPRECATED_ENUM = 0,      //!< Use box corners.
+    CENTER_SIZE TRT_DEPRECATED_ENUM = 1, //!< Use box centers and size.
+    CORNER_SIZE TRT_DEPRECATED_ENUM = 2, //!< Use box centers and size.
+    TF_CENTER TRT_DEPRECATED_ENUM = 3    //!< Use box centers and size but flip x and y coordinates.
+};
+
+//!
+//! \struct DetectionOutputParameters
+//!
+//! \brief The DetectionOutput plugin layer generates the detection output
+//! based on location and confidence predictions by doing non maximum suppression.
+//!
+//! This plugin first decodes the bounding boxes based on the anchors generated.
+//! It then performs non_max_suppression on the decoded bounding boxes.
+//! DetectionOutputParameters defines a set of parameters for creating the DetectionOutput plugin layer.
+//!
+//! \deprecated Deprecated in TensorRT 10.0. DetectionOutput plugin is deprecated.
+//!
+struct TRT_DEPRECATED DetectionOutputParameters
+{
+    bool shareLocation;           //!< If true, bounding box are shared among different classes.
+    bool varianceEncodedInTarget; //!< If true, variance is encoded in target.
+                                  //!< Otherwise we need to adjust the predicted offset accordingly.
+    int32_t backgroundLabelId;    //!< Background label ID. If there is no background class, set it as -1.
+    int32_t numClasses;           //!< Number of classes to be predicted.
+    int32_t topK;                 //!< Number of boxes per image with top confidence scores that are fed
+                                  //!< into the NMS algorithm.
+    int32_t keepTopK;             //!< Number of total bounding boxes to be kept per image after NMS step.
+    float confidenceThreshold;    //!< Only consider detections whose confidences are larger than a threshold.
+    float nmsThreshold;           //!< Threshold to be used in NMS.
+    CodeTypeSSD codeType;         //!< Type of coding method for bbox.
+    int32_t inputOrder[3];        //!< Specifies the order of inputs {loc_data, conf_data, priorbox_data}.
+    bool confSigmoid;             //!< Set to true to calculate sigmoid of confidence scores.
+    bool isNormalized;            //!< Set to true if bounding box data is normalized by the network.
+    bool isBatchAgnostic{true};   //!< Defaults to true. Set to false if prior boxes are unique per batch.
+};
+
+//!
+//! \brief When performing yolo9000, softmaxTree is helping to do softmax on confidence scores,
+//! for element to get the precise classification through word-tree structured classification definition.
+//!
+struct softmaxTree
+{
+    int32_t* leaf;
+    int32_t n;
+    int32_t* parent;
+    int32_t* child;
+    int32_t* group;
+    char** name;
+    int32_t groups;
+    int32_t* groupSize;
+    int32_t* groupOffset;
+};
+
+//!
+//! \brief The Region plugin layer performs region proposal calculation.
+//!
+//! Generate 5 bounding boxes per cell (for yolo9000, generate 3 bounding boxes per cell).
+//! For each box, calculating its probabilities of objects detections from 80 pre-defined classifications
+//! (yolo9000 has 9418 pre-defined classifications, and these 9418 items are organized as work-tree structure).
+//! RegionParameters defines a set of parameters for creating the Region plugin layer.
+//!
+struct RegionParameters
+{
+    int32_t num;         //!< Number of predicted bounding box for each grid cell.
+    int32_t coords;      //!< Number of coordinates for a bounding box.
+    int32_t classes;     //!< Number of classifications to be predicted.
+    softmaxTree* smTree; //!< Helping structure to do softmax on confidence scores.
+};
+
+//!
+//! \brief The NMSParameters are used by the BatchedNMSPlugin for performing
+//! the non_max_suppression operation over boxes for object detection networks.
+//!
+//! \deprecated Deprecated in TensorRT 10.0. BatchedNMSPlugin plugin is deprecated.
+//!
+struct TRT_DEPRECATED NMSParameters
+{
+    bool shareLocation;        //!< If set to true, the boxes inputs are shared across all classes.
+                               //!< If set to false, the boxes input should account for per class box data.
+    int32_t backgroundLabelId; //!< Label ID for the background class.
+                               //!< If there is no background class, set it as -1
+    int32_t numClasses;        //!< Number of classes in the network.
+    int32_t topK;              //!< Number of bounding boxes to be fed into the NMS step.
+    int32_t keepTopK;          //!< Number of total bounding boxes to be kept per image after NMS step.
+                               //!< Should be less than or equal to the topK value.
+    float scoreThreshold;      //!< Scalar threshold for score (low scoring boxes are removed).
+    float iouThreshold;        //!< A scalar threshold for IOU (new boxes that have high IOU overlap
+                               //!< with previously selected boxes are removed).
+    bool isNormalized;         //!< Set to false, if the box coordinates are not normalized,
+                               //!< i.e. not in the range [0,1]. Defaults to false.
+};
+
+} // namespace plugin
+} // namespace nvinfer1
+
+#endif // NV_INFER_PLUGIN_UTILS_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferRuntime.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferRuntime.h
new file mode 100644
index 0000000000000000000000000000000000000000..f56edeb387840b87c146a32288538ac07f9b4bea
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferRuntime.h
@@ -0,0 +1,5663 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NV_INFER_RUNTIME_H
+#define NV_INFER_RUNTIME_H
+
+//!
+//! \file NvInferRuntime.h
+//!
+//! This is the top-level API file for TensorRT extended runtime library.
+//!
+
+#include "NvInferImpl.h"
+#define NV_INFER_INTERNAL_INCLUDE 1
+#include "NvInferPluginBase.h" // IWYU pragma: exports
+#undef NV_INFER_INTERNAL_INCLUDE
+#include "NvInferRuntimeCommon.h"
+
+namespace nvinfer1
+{
+
+class IExecutionContext; //!< Forward declaration of IExecutionContext for use by other interfaces.
+class ICudaEngine;       //!< Forward declaration of ICudaEngine for use by other interfaces.
+class IPluginFactory;    //!< Forward declaration of IPluginFactory for use by other interfaces.
+class IEngineInspector;  //!< Forward declaration of IEngineInspector for use by other interfaces.
+
+//!
+//! \class INoCopy
+//!
+//! \brief Base class for all TensorRT interfaces that are implemented by the TensorRT libraries
+//!
+//! Objects of such classes are not movable or copyable, and should only be manipulated
+//! via pointers.
+//!
+
+class INoCopy
+{
+protected:
+    INoCopy() = default;
+    virtual ~INoCopy() = default;
+    INoCopy(INoCopy const& other) = delete;
+    INoCopy& operator=(INoCopy const& other) = delete;
+    INoCopy(INoCopy&& other) = delete;
+    INoCopy& operator=(INoCopy&& other) = delete;
+};
+
+//!
+//! \enum EngineCapability
+//!
+//! \brief List of supported engine capability flows.
+//!
+//! \details The EngineCapability determines the restrictions of a network during build time and what runtime
+//! it targets. When BuilderFlag::kSAFETY_SCOPE is not set (by default), EngineCapability::kSTANDARD does not provide
+//! any restrictions on functionality and the resulting serialized engine can be executed with TensorRT's standard
+//! runtime APIs in the nvinfer1 namespace. EngineCapability::kSAFETY provides a restricted subset of network
+//! operations that are safety certified and the resulting serialized engine can be executed with TensorRT's safe
+//! runtime APIs in the nvinfer1::safe namespace. EngineCapability::kDLA_STANDALONE provides a restricted subset of
+//! network operations that are DLA compatible and the resulting serialized engine can be executed using standalone
+//! DLA runtime APIs. See sampleCudla for an example of integrating cuDLA APIs with TensorRT APIs.
+//!
+enum class EngineCapability : int32_t
+{
+    //!
+    //! Standard: TensorRT flow without targeting the safety runtime.
+    //! This flow supports both DeviceType::kGPU and DeviceType::kDLA.
+    //!
+    kSTANDARD = 0,
+
+    //!
+    //! Safety: TensorRT flow with restrictions targeting the safety runtime.
+    //! See safety documentation for list of supported layers and formats.
+    //! This flow supports only DeviceType::kGPU.
+    //!
+    //! This flag is only supported in NVIDIA Drive(R) products.
+    kSAFETY = 1,
+
+    //!
+    //! DLA Standalone: TensorRT flow with restrictions targeting external, to TensorRT, DLA runtimes.
+    //! See DLA documentation for list of supported layers and formats.
+    //! This flow supports only DeviceType::kDLA.
+    //!
+    kDLA_STANDALONE = 2,
+};
+
+namespace impl
+{
+//! Maximum number of elements in EngineCapability enum. \see EngineCapability
+template <>
+struct EnumMaxImpl<EngineCapability>
+{
+    static constexpr int32_t kVALUE = 3;
+};
+} // namespace impl
+
+//!
+//! \class Weights
+//!
+//! \brief An array of weights used as a layer parameter.
+//!
+//! When using the DLA, the cumulative size of all Weights used in a network
+//! must be less than 512MB in size. If the build option kGPU_FALLBACK is specified,
+//! then multiple DLA sub-networks may be generated from the single original network.
+//!
+//! The weights are held by reference until the engine has been built. Therefore the data referenced
+//! by \p values field should be preserved until the build is complete.
+//!
+//! The term "empty weights" refers to Weights with weight coefficients ( \p count == 0 and \p values == nullptr).
+//!
+class Weights
+{
+public:
+    DataType type;      //!< The type of the weights.
+    void const* values; //!< The weight values, in a contiguous array.
+    int64_t count;      //!< The number of weights in the array.
+};
+
+//!
+//! \class IHostMemory
+//!
+//! \brief Class to handle library allocated memory that is accessible to the user.
+//!
+//! The memory allocated via the host memory object is owned by the library and will
+//! be de-allocated when the destroy method is called.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IHostMemory : public INoCopy
+{
+public:
+    virtual ~IHostMemory() noexcept = default;
+
+    //! A pointer to the raw data that is owned by the library.
+    void* data() const noexcept
+    {
+        return mImpl->data();
+    }
+
+    //! The size in bytes of the data that was allocated.
+    std::size_t size() const noexcept
+    {
+        return mImpl->size();
+    }
+
+    //! The type of the memory that was allocated.
+    DataType type() const noexcept
+    {
+        return mImpl->type();
+    }
+
+protected:
+    apiv::VHostMemory* mImpl;
+};
+
+//!
+//! \enum DimensionOperation
+//!
+//! \brief An operation on two IDimensionExpr, which represent integer expressions used in dimension computations.
+//!
+//! For example, given two IDimensionExpr x and y and an IExprBuilder& eb,
+//! eb.operation(DimensionOperation::kSUM, x, y) creates a representation of x+y.
+//!
+//! \see IDimensionExpr, IExprBuilder
+//!
+enum class DimensionOperation : int32_t
+{
+    kSUM = 0,       //!< Sum of the two operands.
+    kPROD = 1,      //!< Product of the two operands.
+    kMAX = 2,       //!< Maximum of the two operands.
+    kMIN = 3,       //!< Minimum of the two operands.
+    kSUB = 4,       //!< Substract the second element from the first.
+    kEQUAL = 5,     //!< 1 if operands are equal, 0 otherwise.
+    kLESS = 6,      //!< 1 if first operand is less than second operand, 0 otherwise.
+    kFLOOR_DIV = 7, //!< Floor division of the first element by the second.
+    kCEIL_DIV = 8   //!< Division rounding up
+};
+
+//! Maximum number of elements in DimensionOperation enum. \see DimensionOperation
+template <>
+constexpr inline int32_t EnumMax<DimensionOperation>() noexcept
+{
+    return 9;
+}
+
+//!
+//! \enum TensorLocation
+//!
+//! \brief The location for tensor data storage, device or host.
+//!
+enum class TensorLocation : int32_t
+{
+    kDEVICE = 0, //!< Data stored on device.
+    kHOST = 1,   //!< Data stored on host.
+};
+
+namespace impl
+{
+//! Maximum number of elements in TensorLocation enum. \see TensorLocation
+template <>
+struct EnumMaxImpl<TensorLocation>
+{
+    static constexpr int32_t kVALUE = 2;
+};
+} // namespace impl
+
+//!
+//! \class IDimensionExpr
+//!
+//! \brief An IDimensionExpr represents an integer expression constructed from constants,
+//! input dimensions, and binary operations.  These expressions are can be used
+//! in overrides of IPluginV2DynamicExt::getOutputDimensions or IPluginV3OneBuild::getOutputShapes() to define output
+//! dimensions in terms of input dimensions.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+//! \see DimensionOperation, IPluginV2DynamicExt::getOutputDimensions, IPluginV3OneBuild::getOutputShapes()
+//!
+class IDimensionExpr : public INoCopy
+{
+public:
+    //!
+    //! \brief Return true if expression is a build-time constant.
+    //!
+    bool isConstant() const noexcept
+    {
+        return mImpl->isConstant();
+    }
+
+    //!
+    //! \brief Get the value of the constant.
+    //!
+    //! If isConstant(), returns value of the constant.
+    //! If !isConstant(), return std::numeric_limits<int64_t>::min().
+    //!
+    int64_t getConstantValue() const noexcept
+    {
+        return mImpl->getConstantValue();
+    }
+
+protected:
+    apiv::VDimensionExpr* mImpl;
+    virtual ~IDimensionExpr() noexcept = default;
+
+public:
+    //!
+    //! \brief Return true if this denotes the value of a size tensor.
+    //!
+    //! \return True if this was created with method IExprBuilder::declareSizeTensor, false otherwise
+    //!
+    bool isSizeTensor() const noexcept
+    {
+        return mImpl->isSizeTensor();
+    }
+};
+
+//!
+//! \class IExprBuilder
+//!
+//! \brief Object for constructing IDimensionExpr.
+//!
+//! There is no public way to construct an IExprBuilder.  It appears as an argument to
+//! method IPluginV2DynamicExt::getOutputDimensions() and IPluginV3OneBuild::getOutputShapes().  Overrides of that
+//! method can use that IExprBuilder argument to construct expressions that define output dimensions in terms of input
+//! dimensions.
+//!
+//! Clients should assume that any values constructed by the IExprBuilder are destroyed
+//! after IPluginV2DynamicExt::getOutputDimensions() or IPluginV3OneBuild::getOutputShapes() returns.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+//! \see IDimensionExpr
+//!
+class IExprBuilder : public INoCopy
+{
+public:
+    //!
+    //! \brief Return pointer to IDimensionExpr for given value.
+    //!
+    IDimensionExpr const* constant(int64_t value) noexcept
+    {
+        return mImpl->constant(value);
+    }
+
+    //!
+    //! \brief Get the operation.
+    //!
+    //! Return pointer to IDimensionExpr that represents the given operation applied to first and second.
+    //! Returns nullptr if op is not a valid DimensionOperation.
+    //!
+    IDimensionExpr const* operation(
+        DimensionOperation op, IDimensionExpr const& first, IDimensionExpr const& second) noexcept
+    {
+        return mImpl->operation(op, first, second);
+    }
+
+protected:
+    apiv::VExprBuilder* mImpl;
+    virtual ~IExprBuilder() noexcept = default;
+
+public:
+    //!
+    //! \brief Declare a size tensor at the given output index, with the specified auto-tuning formula and upper bound.
+    //!
+    //! A size tensor allows a plugin to have output dimensions that cannot be computed solely from input dimensions.
+    //! For example, suppose a plugin implements the equivalent of INonZeroLayer for 2D input. The plugin can
+    //! have one output for the indices of non-zero elements, and a second output containing the number of non-zero
+    //! elements. Suppose the input has size [M,N] and has K non-zero elements. The plugin can write K to the second
+    //! output. When telling TensorRT that the first output has shape [2,K], plugin uses IExprBuilder::constant() and
+    //! IExprBuilder::declareSizeTensor(1,...) to create the IDimensionExpr that respectively denote 2 and K.
+    //!
+    //! TensorRT also needs to know the value of K to use for auto-tuning and an upper bound on K so that it can
+    //! allocate memory for the output tensor. In the example, supposed typically half of the plugin's input elements
+    //! are non-zero, and all the elements might be nonzero. then using M*N/2 might be a good expression for the opt
+    //! parameter, and M*N for the upper bound. IDimensionsExpr for these expressions can be constructed from
+    //! IDimensionsExpr for the input dimensions.
+    //!
+    //! \param outputIndex index of a plugin output that is a size tensor.
+    //! \param opt formula for computing auto-tuning value. Must not depend on a size tensor.
+    //! \param upper Upper bound on the size tensor.
+    //!
+    //! \return IDimensionExpr denoting the value of the size tensor.
+    //!
+    //! \see IPluginV3OneBuild::getOutputShapes()
+    //!
+    IDimensionExpr const* declareSizeTensor(int32_t outputIndex, IDimensionExpr const& opt, IDimensionExpr const& upper)
+    {
+        return mImpl->declareSizeTensor(outputIndex, opt, upper);
+    }
+};
+
+//!
+//! \class DimsExprs
+//!
+//! \brief Analog of class Dims with expressions instead of constants for the dimensions.
+//!
+class DimsExprs
+{
+public:
+    int32_t nbDims;                          //!< The number of dimensions.
+    IDimensionExpr const* d[Dims::MAX_DIMS]; //!< The extent of each dimension.
+};
+
+//!
+//! \struct DynamicPluginTensorDesc
+//!
+//! \brief Summarizes tensors that a plugin might see for an input or output.
+//!
+struct DynamicPluginTensorDesc
+{
+    //! Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of any runtime dimension.
+    PluginTensorDesc desc;
+
+    //! Lower bounds on tensorâ€™s dimensions
+    Dims min;
+
+    //! Upper bounds on tensorâ€™s dimensions
+    Dims max;
+
+    //! Optimum value of tensorâ€™s dimensions specified for auto-tuning
+    Dims opt;
+};
+
+//!
+//! \class IPluginV2DynamicExt
+//!
+//! \brief Similar to IPluginV2Ext, but with support for dynamic shapes.
+//!
+//! Clients should override the public methods, including the following inherited methods:
+//!
+//! * virtual int32_t getNbOutputs() const noexcept = 0;
+//!
+//! * virtual DataType getOutputDataType(int32_t index, DataType const* inputTypes,
+//!                                      int32_t nbInputs) const noexcept = 0;
+//!
+//! * virtual size_t getSerializationSize() const noexcept = 0;
+//!
+//! * virtual void serialize(void* buffer) const noexcept = 0;
+//!
+//! * virtual void destroy() noexcept = 0;
+//!
+//! * virtual void setPluginNamespace(char const* pluginNamespace) noexcept = 0;
+//!
+//! * virtual char const* getPluginNamespace() const noexcept = 0;
+//!
+//! For weakly typed networks, the inputTypes will always be DataType::kFLOAT or DataType::kINT32,
+//! and the returned type is canonicalized to DataType::kFLOAT if it is DataType::kHALF or DataType:kINT8.
+//! For strongly typed networks, inputTypes are inferred from previous operations, and getOutputDataType
+//! specifies the returned type based on the inputTypes.
+//! Details about the floating-point precision are elicited later by method supportsFormatCombination.
+//!
+//! \deprecated Deprecated in TensorRT 10.0. Please implement IPluginV3 instead.
+//!
+class TRT_DEPRECATED IPluginV2DynamicExt : public nvinfer1::IPluginV2Ext
+{
+public:
+    IPluginV2DynamicExt* clone() const noexcept override = 0;
+
+    //!
+    //! \brief Get expressions for computing dimensions of an output tensor from dimensions of the input tensors.
+    //!
+    //! \param outputIndex The index of the output tensor
+    //! \param inputs Expressions for dimensions of the input tensors
+    //! \param nbInputs The number of input tensors
+    //! \param exprBuilder Object for generating new expressions
+    //!
+    //! This function is called by the implementations of IBuilder during analysis of the network.
+    //!
+    //! Example #1: A plugin has a single output that transposes the last two dimensions of the plugin's single input.
+    //! The body of the override of getOutputDimensions can be:
+    //!
+    //!     DimsExprs output(inputs[0]);
+    //!     std::swap(output.d[output.nbDims-1], output.d[output.nbDims-2]);
+    //!     return output;
+    //!
+    //! Example #2: A plugin concatenates its two inputs along the first dimension.
+    //! The body of the override of getOutputDimensions can be:
+    //!
+    //!     DimsExprs output(inputs[0]);
+    //!     output.d[0] = exprBuilder.operation(DimensionOperation::kSUM, *inputs[0].d[0], *inputs[1].d[0]);
+    //!     return output;
+    //!
+    virtual DimsExprs getOutputDimensions(
+        int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept = 0;
+
+    //!
+    //! \brief Limit on number of format combinations accepted.
+    //!
+    static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
+
+    //!
+    //! \brief Return true if plugin supports the format and datatype for the input/output indexed by pos.
+    //!
+    //! For this method inputs are numbered 0..(nbInputs-1) and outputs are numbered nbInputs..(nbInputs+nbOutputs-1).
+    //! Using this numbering, pos is an index into InOut, where 0 <= pos < nbInputs+nbOutputs.
+    //!
+    //! TensorRT invokes this method to ask if the input/output indexed by pos supports the format/datatype specified
+    //! by inOut[pos].format and inOut[pos].type.  The override should return true if that format/datatype at inOut[pos]
+    //! are supported by the plugin.  If support is conditional on other input/output formats/datatypes, the plugin can
+    //! make its result conditional on the formats/datatypes in inOut[0..pos-1], which will be set to values
+    //! that the plugin supports.  The override should not inspect inOut[pos+1..nbInputs+nbOutputs-1],
+    //! which will have invalid values.  In other words, the decision for pos must be based on inOut[0..pos] only.
+    //!
+    //! Some examples:
+    //!
+    //! * A definition for a plugin that supports only FP16 NCHW:
+    //!
+    //!         return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kHALF;
+    //!
+    //! * A definition for a plugin that supports only FP16 NCHW for its two inputs,
+    //!   and FP32 NCHW for its single output:
+    //!
+    //!         return inOut[pos].format == TensorFormat::kLINEAR && (inOut[pos].type == (pos < 2 ? DataType::kHALF :
+    //!         DataType::kFLOAT));
+    //!
+    //! * A definition for a "polymorphic" plugin with two inputs and one output that supports
+    //!   any format or type, but the inputs and output must have the same format and type:
+    //!
+    //!         return pos == 0 || (inOut[pos].format == inOut.format[0] && inOut[pos].type == inOut[0].type);
+    //!
+    //! Warning: TensorRT will stop asking for formats once it finds kFORMAT_COMBINATION_LIMIT on combinations.
+    //!
+    virtual bool supportsFormatCombination(
+        int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
+
+    //!
+    //! \brief Configure the plugin.
+    //!
+    //! configurePlugin() can be called multiple times in both the build and execution phases. The build phase happens
+    //! before initialize() is called and only occurs during creation of an engine by IBuilder. The execution phase
+    //! happens after initialize() is called and occurs during both creation of an engine by IBuilder and execution
+    //! of an engine by IExecutionContext.
+    //!
+    //! Build phase:
+    //! IPluginV2DynamicExt->configurePlugin is called when a plugin is being prepared for profiling but not for any
+    //! specific input size. This provides an opportunity for the plugin to make algorithmic choices on the basis of
+    //! input and output formats, along with the bound of possible dimensions. The min and max value of the
+    //! DynamicPluginTensorDesc correspond to the kMIN and kMAX value of the current profile that the plugin is being
+    //! profiled for, with the desc.dims field corresponding to the dimensions of plugin specified at network creation.
+    //! Wildcard dimensions will exist during this phase in the desc.dims field.
+    //!
+    //! Execution phase:
+    //! IPluginV2DynamicExt->configurePlugin is called when a plugin is being prepared for executing the plugin for a
+    //! specific dimensions. This provides an opportunity for the plugin to change algorithmic choices based on the
+    //! explicit input dimensions stored in desc.dims field.
+    //!  * IBuilder will call this function once per profile, with desc.dims resolved to the values specified by the
+    //!  kOPT
+    //!    field of the current profile. Wildcard dimensions will not exist during this phase.
+    //!  * IExecutionContext will call this during the next subsequent instance enqueue[V2]() or execute[V2]() if:
+    //!    - The batch size is changed from previous call of execute()/enqueue() if hasImplicitBatchDimension() returns
+    //!    true.
+    //!    - The optimization profile is changed via setOptimizationProfileAsync().
+    //!    - An input execution binding is changed via setInputShape().
+    //! \warning The execution phase is timing critical during IExecutionContext but is not part of the timing loop when
+    //! called from IBuilder. Performance bottlenecks of configurePlugin won't show up during engine building but will
+    //! be visible during execution after calling functions that trigger layer resource updates.
+    //!
+    //! \param in The input tensors attributes that are used for configuration.
+    //! \param nbInputs Number of input tensors.
+    //! \param out The output tensors attributes that are used for configuration.
+    //! \param nbOutputs Number of output tensors.
+    //!
+    virtual void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
+        DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
+
+    //!
+    //! \brief Find the workspace size required by the layer.
+    //!
+    //! This function is called after the plugin is configured, and possibly during execution.
+    //! The result should be a sufficient workspace size to deal with inputs and outputs of the given size
+    //! or any smaller problem.
+    //!
+    //! \return The workspace size.
+    //!
+    virtual size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs,
+        int32_t nbOutputs) const noexcept = 0;
+
+    //!
+    //! \brief Execute the layer.
+    //!
+    //! \param inputDesc how to interpret the memory for the input tensors.
+    //! \param outputDesc how to interpret the memory for the output tensors.
+    //! \param inputs The memory for the input tensors.
+    //! \param outputs The memory for the output tensors.
+    //! \param workspace Workspace for execution.
+    //! \param stream The stream in which to execute the kernels.
+    //!
+    //! \return 0 for success, else non-zero (which will cause engine termination).
+    //!
+    virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
+        void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
+
+protected:
+    //!
+    //! \brief Return the API version with which this plugin was built. The
+    //!  upper byte reserved by TensorRT and is used to differentiate this from IPluginV2.
+    //!
+    //! Do not override this method as it is used by the TensorRT library to maintain backwards-compatibility with
+    //! plugins.
+    //!
+    int32_t getTensorRTVersion() const noexcept override
+    {
+        return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
+    }
+
+    virtual ~IPluginV2DynamicExt() noexcept {}
+
+private:
+    // Following are obsolete base class methods, and must not be implemented or used.
+
+    //!
+    //! \brief Set plugin configuration
+    //!
+    void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
+        bool const*, PluginFormat, int32_t) noexcept override final
+    {
+    }
+
+    //!
+    //! \brief Check if provided data type is supported
+    //!
+    bool supportsFormat(DataType, PluginFormat) const noexcept override final
+    {
+        return false;
+    }
+
+    //!
+    //! \brief Get output dimensions.
+    //!
+    Dims getOutputDimensions(int32_t, Dims const*, int32_t) noexcept override final
+    {
+        return Dims{-1, {}};
+    }
+
+    //!
+    //! \brief Is output broadcasted across batch.
+    //!
+    //! \warning Expected to return false as implicit batch support was removed in TensorRT 10.0.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. Implicit batch support is removed in TensorRT 10.0.
+    //!
+    TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int32_t, bool const*, int32_t) const noexcept override final
+    {
+        return false;
+    }
+
+    //!
+    //! \brief Can output broadcasted across batch.
+    //!
+    //! \warning Expected to return false as implicit batch support was removed in TensorRT 10.0.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. Implicit batch support is removed in TensorRT 10.0.
+    //!
+    TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int32_t) const noexcept override final
+    {
+        return true;
+    }
+
+    //!
+    //! \brief Get required workspace size in bytes.
+    //!
+    size_t getWorkspaceSize(int32_t) const noexcept override final
+    {
+        return 0;
+    }
+
+    //!
+    //! \brief Run inference.
+    //!
+    int32_t enqueue(int32_t, void const* const*, void* const*, void*, cudaStream_t) noexcept override final
+    {
+        return 1;
+    }
+};
+
+namespace v_1_0
+{
+class IStreamReader : public IVersionedInterface
+{
+public:
+    //!
+    //! TensorRT never calls the destructor for an IStreamReader defined by the
+    //! application.
+    //!
+    ~IStreamReader() override = default;
+    IStreamReader() = default;
+
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"IStreamReader", 1, 0};
+    }
+
+    //!
+    //! \brief Read the next number of bytes in the stream.
+    //!
+    //! \param destination The memory to write to
+    //! \param nbBytes The number of bytes to read
+    //!
+    //! \returns The number of bytes read. Negative values will be considered an automatic error.
+    //!
+    virtual int64_t read(void* destination, int64_t nbBytes) = 0;
+
+protected:
+    IStreamReader(IStreamReader const&) = default;
+    IStreamReader(IStreamReader&&) = default;
+    IStreamReader& operator=(IStreamReader const&) & = default;
+    IStreamReader& operator=(IStreamReader&&) & = default;
+};
+
+class IStreamWriter : public IVersionedInterface
+{
+public:
+    //!
+    //! TensorRT never calls the destructor for an IStreamWriter defined by the
+    //! application.
+    //!
+    ~IStreamWriter() override = default;
+    IStreamWriter() = default;
+
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept final
+    {
+        return InterfaceInfo{"IStreamWriter", 1, 0};
+    }
+
+    //!
+    //! \brief write nbBytes of data into the stream.
+    //!
+    //! \param data The data to be written to stream
+    //! \param nbBytes The number of bytes to write
+    //!
+    //! \returns The number of bytes written. A value that is negative or less than nBytes indicates that an error
+    //! occurred and TensorRT will give up on writing to the stream.
+    //!
+    virtual int64_t write(void const* data, int64_t nbBytes) = 0;
+
+protected:
+    IStreamWriter(IStreamWriter const&) = default;
+    IStreamWriter(IStreamWriter&&) = default;
+    IStreamWriter& operator=(IStreamWriter const&) & = default;
+    IStreamWriter& operator=(IStreamWriter&&) & = default;
+};
+} // namespace v_1_0
+
+//!
+//! \class IStreamReader
+//!
+//! \brief Application-implemented class for reading data in a stream-based manner.
+//!
+//! \note To ensure compatibility of source code with future versions of TensorRT, use IStreamReader, not
+//!       v_1_0::IStreamReader
+//!
+using IStreamReader = v_1_0::IStreamReader;
+
+//!
+//! \class IStreamWriter
+//!
+//! \brief Application-implemented class for writing data in a stream-based manner.
+//!
+//! \note To ensure compatibility of source code with future versions of TensorRT, use IStreamWriter, not
+//!       v_1_0::IStreamWriter
+//!
+using IStreamWriter = v_1_0::IStreamWriter;
+
+//!
+//! \enum SeekPosition
+//! \brief Controls the seek mode of IStreamReaderV2.
+//!
+enum class SeekPosition : int32_t
+{
+    //! From the beginning of the file.
+    kSET = 0,
+
+    //! From the current position of the file.
+    kCUR = 1,
+
+    //! From the tail of the file.
+    kEND = 2,
+};
+
+namespace v_1_0
+{
+class IStreamReaderV2 : public IVersionedInterface
+{
+public:
+    //!
+    //! TensorRT never calls the destructor for an IStreamReaderV2 defined by the
+    //! application.
+    //!
+    ~IStreamReaderV2() override = default;
+    IStreamReaderV2() = default;
+
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"IStreamReaderV2", 1, 0};
+    }
+
+    //!
+    //! \brief Read the next number of bytes in the stream asynchronously.
+    //!
+    //! \param destination The memory to write to, call cudaPointerGetAttributes to get the memory location
+    //! \param nbBytes The number of bytes to read
+    //! \param stream The CUDA stream used to do the copy
+    //!
+    //! \returns The number of bytes read. Negative values indicate an unrecoverable error.
+    //! A zero indicates that the end of the stream has been reached.
+    //!
+    virtual int64_t read(void* destination, int64_t nbBytes, cudaStream_t stream) noexcept = 0;
+
+    //!
+    //! \brief Sets the position of the stream to the given offset.
+    //!
+    //! \param offset The number of bytes to offset from where.
+    //! \param where The position from where the offset is added. \see SeekPosition
+    //!
+    //! \returns True if the position is updated successfully.
+    //!
+    virtual bool seek(int64_t offset, SeekPosition where) noexcept = 0;
+
+protected:
+    IStreamReaderV2(IStreamReaderV2 const&) = default;
+    IStreamReaderV2(IStreamReaderV2&&) = default;
+    IStreamReaderV2& operator=(IStreamReaderV2 const&) & = default;
+    IStreamReaderV2& operator=(IStreamReaderV2&&) & = default;
+};
+} // namespace v_1_0
+
+//!
+//! \class IStreamReaderV2
+//!
+//! \brief Application-implemented class for reading data in a stream-based manner asynchronously. Intended for use with
+//! the GDS API for optimizing load times.
+//!
+//! \note To ensure compatibility of source code with future versions of TensorRT, use IStreamReaderV2, not
+//!       v_1_0::IStreamReaderV2
+//!
+using IStreamReaderV2 = v_1_0::IStreamReaderV2;
+
+//!
+//! \class IPluginResourceContext
+//!
+//! \brief Interface for plugins to access per context resources provided by TensorRT
+//!
+//! There is no public way to construct an IPluginResourceContext. It appears as an argument to
+//! IPluginV3OneRuntime::attachToContext(). Overrides of that method can use the IPluginResourceContext object to access
+//! any available per context resources.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+//! \see IPluginV3OneRuntime::attachToContext()
+//!
+class IPluginResourceContext
+{
+public:
+    //! \brief Get the GPU allocator associated with the resource context
+    //!
+    //! \see IPluginV3OneRuntime::attachToContext()
+    //!
+    virtual IGpuAllocator* getGpuAllocator() const noexcept = 0;
+
+    //! \brief Get the error recorder associated with the resource context
+    //!
+    //! \see IPluginV3OneRuntime::attachToContext()
+    //!
+    virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
+    virtual ~IPluginResourceContext() noexcept = default;
+
+protected:
+    IPluginResourceContext() = default;
+    IPluginResourceContext(IPluginResourceContext const&) = default;
+    IPluginResourceContext(IPluginResourceContext&&) = default;
+    IPluginResourceContext& operator=(IPluginResourceContext const&) & = default;
+    IPluginResourceContext& operator=(IPluginResourceContext&&) & = default;
+};
+
+namespace v_1_0
+{
+class IPluginV3OneCore : public IPluginCapability
+{
+public:
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"PLUGIN_V3ONE_CORE", 1, 0};
+    }
+
+    //!
+    //! \brief Return the plugin name. Should match the plugin name returned by the corresponding plugin creator.
+    //!
+    //! \see IPluginCreatorV3One::getPluginName()
+    //!
+    //! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including the
+    //! NULL terminator.
+    //!
+    virtual AsciiChar const* getPluginName() const noexcept = 0;
+
+    //!
+    //! \brief Return the plugin version. Should match the plugin version returned by the corresponding plugin creator.
+    //!
+    //! \see IPluginCreatorV3One::getPluginVersion()
+    //!
+    //! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including the
+    //! NULL terminator.
+    //!
+    virtual AsciiChar const* getPluginVersion() const noexcept = 0;
+
+    //!
+    //! \brief Return the namespace of the plugin object. Should match the plugin namespace returned by the
+    //! corresponding plugin creator.
+    //!
+    //! \see IPluginCreatorV3One::getPluginNamespace()
+    //!
+    //! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including the
+    //! NULL terminator.
+    //!
+    virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
+};
+
+class IPluginV3OneBuild : public IPluginCapability
+{
+public:
+    //!
+    //! \brief The default maximum number of format combinations that will be timed by TensorRT during the build phase
+    //!
+    //! \see getFormatCombinationLimit
+    //!
+    static constexpr int32_t kDEFAULT_FORMAT_COMBINATION_LIMIT = 100;
+
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"PLUGIN_V3ONE_BUILD", 1, 0};
+    }
+
+    //!
+    //! \brief Configure the plugin.
+    //!
+    //! configurePlugin() can be called multiple times in the build phase during creation of an engine by IBuilder.
+    //!
+    //! configurePlugin() is called when a plugin is being prepared for profiling but not for any
+    //! specific input size. This provides an opportunity for the plugin to make algorithmic choices on the basis of
+    //! input and output formats, along with the bound of possible dimensions. The min, opt and max value of the
+    //! DynamicPluginTensorDesc correspond to the kMIN, kOPT and kMAX value of the current profile that the plugin is
+    //! being profiled for, with the desc.dims field corresponding to the dimensions of plugin specified at network
+    //! creation. Wildcard dimensions may exist during this phase in the desc.dims field.
+    //!
+    //! \param in The input tensors attributes that are used for configuration.
+    //! \param nbInputs Number of input tensors.
+    //! \param out The output tensors attributes that are used for configuration.
+    //! \param nbOutputs Number of output tensors.
+    //!
+    //! \return 0 for success, else non-zero (which will cause engine termination, if invoked by TensorRT).
+    //!
+    virtual int32_t configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
+        DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
+
+    //!
+    //! \brief Provide the data types of the plugin outputs if the input tensors have the data types provided.
+    //!
+    //! \param outputTypes Pre-allocated array to which the output data types should be written.
+    //! \param nbOutputs The number of output tensors. This matches the value returned from getNbOutputs().
+    //! \param inputTypes The input data types.
+    //! \param nbInputs The number of input tensors.
+    //!
+    //! \return 0 for success, else non-zero (which will cause engine termination). The returned code will be reported
+    //! through the error recorder.
+    //!
+    //! \note Provide `DataType::kFLOAT`s if the layer has no inputs. The data type for any size tensor outputs must be
+    //! `DataType::kINT32`. The returned data types must each have a format that is supported by the plugin.
+    //!
+    //! \warning DataType:kBOOL and DataType::kUINT8 are not supported.
+    //!
+    virtual int32_t getOutputDataTypes(
+        DataType* outputTypes, int32_t nbOutputs, const DataType* inputTypes, int32_t nbInputs) const noexcept = 0;
+
+    //!
+    //! \brief Provide expressions for computing dimensions of the output tensors from dimensions of the input tensors.
+    //!
+    //! \param inputs Expressions for dimensions of the input tensors
+    //! \param nbInputs The number of input tensors
+    //! \param shapeInputs Expressions for values of the shape tensor inputs
+    //! \param nbShapeInputs The number of shape tensor inputs
+    //! \param outputs Pre-allocated array to which the output dimensions must be written
+    //! \param nbOutputs Number of outputs.
+    //! \param exprBuilder Object for generating new dimension expressions
+    //!
+    //! \note Any size tensor outputs must be declared to be 0D.
+    //!
+    //! \note The declaration of shapeInputs as DimsExprs is slightly abusive, because the "dimensions"
+    //!       are actually the values of the shape tensor. For example, if the input shape tensor
+    //!       is a 2x3 matrix, the DimsExprs will have six "dimensions": the three values from the first
+    //!       row of the matrix followed by the three values from the second row of the matrix.
+    //!
+    //! \return 0 for success, else non-zero (which will cause engine termination). Returned code will be reported
+    //! through the error recorder.
+    //!
+    virtual int32_t getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs,
+        int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept = 0;
+
+    //!
+    //! \brief Return true if plugin supports the format and datatype for the input/output indexed by pos.
+    //!
+    //! For this method inputs are numbered 0.. (nbInputs - 1) and outputs are numbered nbInputs.. (nbInputs + nbOutputs
+    //! - 1). Using this numbering, pos is an index into InOut, where 0 <= pos < nbInputs + nbOutputs - 1.
+    //!
+    //! TensorRT invokes this method to ask if the input/output indexed by pos supports the format/datatype specified
+    //! by inOut[pos].format and inOut[pos].type.  The override should return true if that format/datatype at inOut[pos]
+    //! are supported by the plugin.  If support is conditional on other input/output formats/datatypes, the plugin can
+    //! make its result conditional on the formats/datatypes in inOut[0.. pos - 1], which will be set to values
+    //! that the plugin supports.  The override should not inspect inOut[pos1.. nbInputs + nbOutputs - 1],
+    //! which will have invalid values.  In other words, the decision for pos must be based on inOut[0..pos] only.
+    //!
+    //! Some examples:
+    //!
+    //! * A definition for a plugin that supports only FP16 NCHW:
+    //!
+    //!         return inOut.format[pos] == TensorFormat::kLINEAR && inOut.type[pos] == DataType::kHALF;
+    //!
+    //! * A definition for a plugin that supports only FP16 NCHW for its two inputs,
+    //!   and FP32 NCHW for its single output:
+    //!
+    //!         return inOut.format[pos] == TensorFormat::kLINEAR && (inOut.type[pos] == pos < 2 ?  DataType::kHALF :
+    //!         DataType::kFLOAT);
+    //!
+    //! * A definition for a "polymorphic" plugin with two inputs and one output that supports
+    //!   any format or type, but the inputs and output must have the same format and type:
+    //!
+    //!         return pos == 0 || (inOut.format[pos] == inOut.format[0] && inOut.type[pos] == inOut.type[0]);
+    //!
+    //! \warning TensorRT will stop querying once it finds getFormatCombinationLimit() of combinations.
+    //!
+    //! \see getFormatCombinationLimit
+    //!
+    virtual bool supportsFormatCombination(
+        int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
+
+    //!
+    //! \brief Get the number of outputs from the plugin.
+    //!
+    //! \return The number of outputs, which must be a positive integer.
+    //!
+    virtual int32_t getNbOutputs() const noexcept = 0;
+
+    //!
+    //! \brief Find the workspace size required by the layer.
+    //!
+    //! This function is called after the plugin is configured, and possibly during execution.
+    //! The result should be a sufficient workspace size to deal with inputs and outputs of the given size
+    //! or any smaller problem.
+    //!
+    //! \return The workspace size.
+    //!
+    virtual size_t getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs,
+        DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept
+    {
+        return 0;
+    }
+
+    //!
+    //! \brief Query for any custom tactics that the plugin intends to use
+    //!
+    //! This method queries for the set of tactics T(f) supported by the plugin for the format combination f indicated
+    //! by the immediately preceding call to configurePlugin(). It is guaranteed to be called after configurePlugin().
+    //!
+    //! For each format combination provided through configurePlugin(), up to a maximum of getFormatCombinationLimit(),
+    //! the plugin will be timed for each tactic advertised through this method for that format combination. i.e. The
+    //! plugin will be timed \f$N = \sum_{i=0}^{i<getFormatCombinationLimit()} (T(f[i]))\f$ times. If \f$N = 1\f$, the
+    //! plugin may not be timed. In pseudocode, the timing protocol appears as the following:
+    //!
+    //! counter = 0
+    //! for each supported format combination
+    //!     ++counter
+    //!     if counter > getFormatCombinationLimit()
+    //!         goto done
+    //!     configurePlugin(...)
+    //!     for each tactic in getValidTactics(...)
+    //!         time tactic
+    //! done:
+    //!
+    //!
+    //! \param tactics Pre-allocated buffer to which the tactic values should be written
+    //! \param nbTactics The number of tactics advertised through getNbTactics()
+    //!
+    //! \note The provided tactic values must be unique and non-zero. The tactic value 0 is reserved for the default
+    //! tactic attached to each format combination.
+    //!
+    //! \return 0 for success, else non-zero (which will cause engine termination). The returned code will be reported
+    //! through the error recorder.
+    //!
+    virtual int32_t getValidTactics(int32_t* tactics, int32_t nbTactics) noexcept
+    {
+        return 0;
+    }
+
+    //!
+    //! \brief Query for the number of custom tactics the plugin intends to use
+    //!
+    virtual int32_t getNbTactics() noexcept
+    {
+        return 0;
+    }
+
+    //!
+    //! \brief Called to query the suffix to use for the timing cache ID. May be called anytime after plugin creation.
+    //!
+    //! \return Suffix to use for timing cache ID, considering only the creation state of the plugin.
+    //!         Returning nullptr will disable timing caching for the plugin altogether.
+    //!
+    //! \note If timing caching is enabled for the plugin (by returning non-null), the I/O shape and format information
+    //! will be automatically considered to form the prefix of the timing cache ID. Therefore, only other factors
+    //! determining the creation state of the plugin, such as its attribute values, should be considered to compose the
+    //! return value.
+    //!
+    virtual char const* getTimingCacheID() noexcept
+    {
+        return nullptr;
+    }
+
+    //!
+    //! \brief Return the maximum number of format combinations that will be timed by TensorRT during the build phase
+    //!
+    virtual int32_t getFormatCombinationLimit() noexcept
+    {
+        return kDEFAULT_FORMAT_COMBINATION_LIMIT;
+    }
+
+    //!
+    //! \brief Query for a string representing the configuration of the plugin. May be called anytime after
+    //! plugin creation.
+    //!
+    //! \return A string representing the plugin's creation state, especially with regard to its attribute values.
+    //!
+    virtual char const* getMetadataString() noexcept
+    {
+        return nullptr;
+    }
+};
+
+class IPluginV3OneRuntime : public IPluginCapability
+{
+public:
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"PLUGIN_V3ONE_RUNTIME", 1, 0};
+    }
+
+    //!
+    //! \brief Set the tactic to be used in the subsequent call to enqueue(). If no custom tactics were advertised, this
+    //! will have a value of 0, which is designated as the default tactic.
+    //!
+    //! \return 0 for success, else non-zero (which will cause engine termination). The returned code will be reported
+    //! through the error recorder.
+    //!
+    virtual int32_t setTactic(int32_t tactic) noexcept
+    {
+        return 0;
+    }
+
+    //!
+    //! \brief Called when a plugin is being prepared for execution for specific dimensions. This could
+    //! happen multiple times in the execution phase, both during creation of an engine by IBuilder and execution of an
+    //! engine by IExecutionContext.
+    //!  * IBuilder will call this function once per profile, with `in` resolved to the values specified by the
+    //!  kOPT field of the current profile.
+    //!  * IExecutionContext will call this during the next subsequent instance of enqueueV3() or executeV2() if:
+    //!    - The optimization profile is changed via setOptimizationProfile() or setOptimizationProfileAsync().
+    //!    - An input binding is changed via setInputTensorAddress() or setTensorAddress() or setInputShape().
+    //! \warning The execution phase is timing critical during IExecutionContext but is not part of the timing loop when
+    //! called from IBuilder. Performance bottlenecks of onShapeChange() will not show up during engine building but
+    //! will be visible during execution if any triggering functions are called.
+    //!
+    //! \param in The input tensors attributes that are used for configuration.
+    //! \param nbInputs Number of input tensors.
+    //! \param out The output tensors attributes that are used for configuration.
+    //! \param nbOutputs Number of output tensors.
+    //!
+    virtual int32_t onShapeChange(
+        PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
+
+    //!
+    //! \brief Execute the layer.
+    //!
+    //! \param inputDesc how to interpret the memory for the input tensors.
+    //! \param outputDesc how to interpret the memory for the output tensors.
+    //! \param inputs The memory for the input tensors.
+    //! \param outputs The memory for the output tensors.
+    //! \param workspace Workspace for execution.
+    //! \param stream The stream in which to execute the kernels.
+    //!
+    //! \return 0 for success, else non-zero (which will cause engine termination). The returned code will be reported
+    //! through the error recorder.
+    //!
+    virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
+        void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
+
+    //!
+    //! \brief Clone the plugin, attach the cloned plugin object to a execution context and grant the cloned plugin
+    //! access to some context resources.
+    //!
+    //! This function is called automatically for each plugin when a new execution context is created. The plugin may
+    //! use resources provided by the IPluginResourceContext until the plugin is deleted by TensorRT.
+    //!
+    //! If the plugin needs per-context resources, it can be allocated here.
+    //!
+    //! \param context A resource context that exposes methods to get access to execution context specific resources.
+    //!                A different resource context is guaranteed for each different execution context to which the
+    //!                plugin is attached.
+    //! \see IPluginResourceContext
+    //!
+    //! \note This method should clone the entire IPluginV3 object, not just the runtime interface
+    //!
+    //! \return A clone of the IPluginV3 object whose runtime interface on which this method is invoked, which has
+    //! attached to the provided resource context.
+    //!
+    virtual IPluginV3* attachToContext(IPluginResourceContext* context) noexcept = 0;
+
+    //!
+    //! \brief Get the plugin fields which should be serialized.
+    //!
+    //! \note The set of plugin fields returned does not necessarily need to match that advertised through
+    //! getFieldNames() of the corresponding plugin creator.
+
+    //! \note To serialize arbitrary plugin data, use a PluginField of
+    //! PluginFieldType::kUNKNOWN, with the length of the PluginField set to the correct number of bytes.
+    //!
+    virtual PluginFieldCollection const* getFieldsToSerialize() noexcept = 0;
+};
+} // namespace v_1_0
+
+namespace v_2_0
+{
+
+class IPluginV3OneBuild : public v_1_0::IPluginV3OneBuild
+{
+public:
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"PLUGIN_V3ONE_BUILD", 2, 0};
+    }
+
+    //!
+    //! \brief Communicates to TensorRT that the output at the specified output index is aliased to the input at the
+    //! returned index
+    //!
+    //! Enables read-modify-write behavior in plugins. TensorRT may insert copies to facilitate this capability.
+    //!
+    //! \return An integer denoting the index of the input which is aliased to the output at outputIndex.
+    //!         Returning -1 indicates that the output is not aliased to any input. Otherwise, the valid range for
+    //!         return value is [0, nbInputs - 1].
+    //!
+    //! \note A given plugin input can only be aliased to a single plugin output.
+    //!
+    //! \note This API will only be called and have an effect when PreviewFeature::kALIASED_PLUGIN_IO_10_03 is turned
+    //! on.
+    //!
+    //! \warning If an input is not shallow copyable, a copy inserted by TensorRT may not work as intended. Therefore,
+    //!          using this feature with tensors requiring deep copies is not supported.
+    //!
+    //! \warning If a given tensor is requested to be aliased by two different plugins, this may result in divergent
+    //! copies of the tensor after writes from each plugin. e.g. In the below example, t1 and t2 could be divergent.
+    //!
+    //!        +-----+            +--------+
+    //!     +->|Copy +--> t* ---->|Plugin0 +--> t1
+    //!     |  +-----+            +--------+
+    //!     t
+    //!     |  +-----+            +--------+
+    //!     +->|Copy +--> t** --->|Plugin1 +--> t2
+    //!        +-----+            +--------+
+    //!
+    virtual int32_t getAliasedInput(int32_t outputIndex) noexcept
+    {
+        return -1;
+    }
+};
+
+} // namespace v_2_0
+
+//!
+//! \class IPluginV3OneCore
+//!
+//! \brief A plugin capability interface that enables the core capability (PluginCapabilityType::kCORE).
+//!
+//! \see IPluginCapability
+//! \see PluginCapabilityType
+//! \see IPluginV3::getCapabilityInterface()
+//!
+using IPluginV3OneCore = v_1_0::IPluginV3OneCore;
+
+//!
+//! \class IPluginV3OneBuild
+//!
+//! \brief A plugin capability interface that enables the build capability (PluginCapabilityType::kBUILD). Exposes
+//! methods that allow the expression of the build time properties and behavior of a plugin.
+//!
+//! \see IPluginCapability
+//! \see PluginCapabilityType
+//! \see IPluginV3::getCapabilityInterface()
+//!
+using IPluginV3OneBuild = v_1_0::IPluginV3OneBuild;
+
+//!
+//! \class IPluginV3OneRuntime
+//!
+//! \brief A plugin capability interface that enables the runtime capability (PluginCapabilityType::kRUNTIME). Exposes
+//! methods that allow the expression of the runtime properties and behavior of a plugin.
+//!
+//! \see IPluginCapability
+//! \see PluginCapabilityType
+//! \see IPluginV3::getCapabilityInterface()
+//!
+using IPluginV3OneRuntime = v_1_0::IPluginV3OneRuntime;
+
+//!
+//! \class IPluginV3OneBuildV2
+//!
+//! \brief A plugin capability interface that extends IPluginV3OneBuild by providing I/O aliasing functionality.
+//!
+//! \see IPluginV3OneBuild
+//!
+using IPluginV3OneBuildV2 = v_2_0::IPluginV3OneBuild;
+
+namespace v_1_0
+{
+class IProfiler
+{
+public:
+    //!
+    //! \brief Layer time reporting callback.
+    //!
+    //! \param layerName The name of the layer, set when constructing the network definition. If the engine is built
+    //!                  with profiling verbosity set to kNONE, the layerName is the decimal index of the layer.
+    //! \param ms The time in milliseconds to execute the layer.
+    //!
+    virtual void reportLayerTime(char const* layerName, float ms) noexcept = 0;
+
+    virtual ~IProfiler() noexcept {}
+};
+} // namespace v_1_0
+
+//!
+//! \class IProfiler
+//!
+//! \brief Application-implemented interface for profiling.
+//!
+//! When this class is added to an execution context, the profiler will be called once per layer for each invocation of
+//! executeV2()/enqueueV3().
+//!
+//! It is not recommended to run inference with profiler enabled when the inference execution time is critical since the
+//! profiler may affect execution time negatively.
+//!
+using IProfiler = v_1_0::IProfiler;
+
+//!
+//! \enum WeightsRole
+//!
+//! \brief How a layer uses particular Weights.
+//!
+//! The power weights of an IScaleLayer are omitted.  Refitting those is not supported.
+//!
+enum class WeightsRole : int32_t
+{
+    kKERNEL = 0,   //!< kernel for IConvolutionLayer or IDeconvolutionLayer
+    kBIAS = 1,     //!< bias for IConvolutionLayer or IDeconvolutionLayer
+    kSHIFT = 2,    //!< shift part of IScaleLayer
+    kSCALE = 3,    //!< scale part of IScaleLayer
+    kCONSTANT = 4, //!< weights for IConstantLayer
+    kANY = 5,      //!< Any other weights role
+};
+
+//! Maximum number of elements in WeightsRole enum. \see WeightsRole
+template <>
+constexpr inline int32_t EnumMax<WeightsRole>() noexcept
+{
+    return 6;
+}
+
+//!
+//! \enum DeviceType
+//! \brief The device that this layer/network will execute on.
+//!
+//!
+enum class DeviceType : int32_t
+{
+    kGPU = 0, //!< GPU Device
+    kDLA = 1, //!< DLA Core
+};
+
+//! Maximum number of elements in DeviceType enum. \see DeviceType
+template <>
+constexpr inline int32_t EnumMax<DeviceType>() noexcept
+{
+    return 2;
+}
+
+//!
+//! \enum TempfileControlFlag
+//!
+//! \brief Flags used to control TensorRT's behavior when creating executable temporary files.
+//!
+//! On some platforms the TensorRT runtime may need to create files in a temporary directory or use platform-specific
+//! APIs to create files in-memory to load temporary DLLs that implement runtime code. These flags allow the
+//! application to explicitly control TensorRT's use of these files. This will preclude the use of certain TensorRT
+//! APIs for deserializing and loading lean runtimes.
+//!
+enum class TempfileControlFlag : int32_t
+{
+    //! Allow creating and loading files in-memory (or unnamed files).
+    kALLOW_IN_MEMORY_FILES = 0,
+
+    //! Allow creating and loading named files in a temporary directory on the filesystem.
+    //!
+    //! \see IRuntime::setTemporaryDirectory()
+    kALLOW_TEMPORARY_FILES = 1,
+};
+
+//! Maximum number of elements in TempfileControlFlag enum. \see TempfileControlFlag
+template <>
+constexpr inline int32_t EnumMax<TempfileControlFlag>() noexcept
+{
+    return 2;
+}
+
+//!
+//! \brief Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operations.
+//!
+//! \see TempfileControlFlag,
+//!      IRuntime::setTempfileControlFlags(),
+//!      IRuntime::getTempfileControlFlags()
+using TempfileControlFlags = uint32_t;
+
+//!
+//! \enum TensorFormat
+//!
+//! \brief Format of the input/output tensors.
+//!
+//! This enum is used by both plugins and network I/O tensors.
+//!
+//! \see IPluginV2::supportsFormat(), safe::ICudaEngine::getBindingFormat()
+//!
+//! Many of the formats are **vector-major** or **vector-minor**. These formats specify
+//! a <em>vector dimension</em> and <em>scalars per vector</em>.
+//! For example, suppose that the tensor has has dimensions [M,N,C,H,W],
+//! the vector dimension is C and there are V scalars per vector.
+//!
+//! * A **vector-major** format splits the vectorized dimension into two axes in the
+//!   memory layout. The vectorized dimension is replaced by an axis of length ceil(C/V)
+//!   and a new dimension of length V is appended. For the example tensor, the memory layout
+//!   is equivalent to an array with dimensions [M][N][ceil(C/V)][H][W][V].
+//!   Tensor coordinate (m,n,c,h,w) maps to array location [m][n][c/V][h][w][c\%V].
+//!
+//! * A **vector-minor** format moves the vectorized dimension to become the last axis
+//!   in the memory layout. For the example tensor, the memory layout is equivalent to an
+//!   array with dimensions [M][N][H][W][ceil(C/V)*V]. Tensor coordinate (m,n,c,h,w) maps
+//!   array location subscript [m][n][h][w][c].
+//!
+//! In interfaces that refer to "components per element", that's the value of V above.
+//!
+//! For more information about data formats, see the topic "Data Format Description" located in the
+//! TensorRT Developer Guide.
+//! https://docs.nvidia.com/deeplearning/tensorrt/latest/inference-library/advanced.html#i-o-formats
+//!
+enum class TensorFormat : int32_t
+{
+    //! Memory layout is similar to an array in C or C++.
+    //! The stride of each dimension is the product of the dimensions after it.
+    //! The last dimension has unit stride.
+    //!
+    //! This format supports all TensorRT types.
+    //! For DLA usage, the tensor sizes are limited to C,H,W in the range [1,8192].
+    kLINEAR = 0,
+
+    //! Vector-major format with two scalars per vector.
+    //! Vector dimension is third to last.
+    //!
+    //! This format requires FP16 or BF16 and at least three dimensions.
+    kCHW2 = 1,
+
+    //! Vector-minor format with eight scalars per vector.
+    //! Vector dimension is third to last.
+    //! This format requires FP16 or BF16 and at least three dimensions.
+    kHWC8 = 2,
+
+    //! Vector-major format with four scalars per vector.
+    //! Vector dimension is third to last.
+    //!
+    //! This format requires INT8 and at least three dimensions.
+    //! For INT8, the length of the vector dimension must be a build-time constant.
+    //!
+    //! Deprecated usage:
+    //!
+    //! If running on the DLA, this format can be used for acceleration
+    //! with the caveat that C must be less than or equal to 4.
+    //! If used as DLA input and the build option kGPU_FALLBACK is not specified,
+    //! it needs to meet line stride requirement of DLA format. Column stride in
+    //! bytes must be a multiple of 64 on Orin.
+    kCHW4 = 3,
+
+    //! Vector-major format with 16 scalars per vector.
+    //! Vector dimension is third to last.
+    //!
+    //! This format is only supported by DLA and requires FP16 and at least three dimensions.
+    //! This format maps to the native feature format for FP16,
+    //! and the tensor sizes are limited to C,H,W in the range [1,8192].
+    kCHW16 = 4,
+
+    //! Vector-major format with 32 scalars per vector.
+    //! Vector dimension is third to last.
+    //!
+    //! This format requires INT8, FP32, or FP16 and at least three dimensions.
+    //!
+    //! For DLA usage, this format maps to the native feature format for INT8,
+    //! and the tensor sizes are limited to C,H,W in the range [1,8192].
+    kCHW32 = 5,
+
+    //! Vector-minor format with eight scalars per vector.
+    //! Vector dimension is fourth to last.
+    //!
+    //! This format requires FP16 or BF16 and at least four dimensions.
+    kDHWC8 = 6,
+
+    //! Vector-major format with 32 scalars per vector.
+    //! Vector dimension is fourth to last.
+    //!
+    //! This format requires FP16 or INT8 and at least four dimensions.
+    kCDHW32 = 7,
+
+    //! Vector-minor format where channel dimension is third to last and unpadded.
+    //!
+    //! This format requires either FP32 or UINT8 and at least three dimensions.
+    kHWC = 8,
+
+    //! DLA planar format. For a tensor with dimension {N, C, H, W}, the W axis
+    //! always has unit stride. The stride for stepping along the H axis is
+    //! rounded up to 64 bytes.
+    //!
+    //! The memory layout is equivalent to a C array with dimensions
+    //! [N][C][H][roundUp(W, 64/elementSize)] where elementSize is
+    //! 2 for FP16 and 1 for Int8, with the tensor coordinates (n, c, h, w)
+    //! mapping to array subscript [n][c][h][w].
+    kDLA_LINEAR = 9,
+
+    //! DLA image format. For a tensor with dimension {N, C, H, W} the C axis
+    //! always has unit stride. The stride for stepping along the H axis is rounded up
+    //! to 64 bytes on Orin. C can only be 1, 3 or 4.
+    //! If C == 1, it will map to grayscale format.
+    //! If C == 3 or C == 4, it will map to color image format. And if C == 3,
+    //! the stride for stepping along the W axis needs to be padded to 4 in elements.
+    //!
+    //! When C is {1, 3, 4}, then C' is {1, 4, 4} respectively,
+    //! the memory layout is equivalent to a C array with dimensions
+    //! [N][H][roundUp(W, 64/C'/elementSize)][C'] on Orin
+    //! where elementSize is 2 for FP16
+    //! and 1 for Int8. The tensor coordinates (n, c, h, w) mapping to array
+    //! subscript [n][h][w][c].
+    kDLA_HWC4 = 10,
+
+    //! Vector-minor format with 16 scalars per vector.
+    //! Vector dimension is third to last.
+    //!
+    //! This requires FP16, INT8 or FP8 and at least three dimensions.
+    kHWC16 = 11,
+
+    //! Vector-minor format with one scalar per vector.
+    //! Vector dimension is fourth to last.
+    //!
+    //! This format requires FP32 and at least four dimensions.
+    kDHWC = 12
+};
+
+namespace impl
+{
+//! Maximum number of elements in TensorFormat enum. \see TensorFormat
+template <>
+struct EnumMaxImpl<TensorFormat>
+{
+    //! Declaration of kVALUE that represents the maximum number of elements in the TensorFormat enum.
+    static constexpr int32_t kVALUE = 13;
+};
+} // namespace impl
+
+//!
+//! \enum AllocatorFlag
+//!
+//! \brief Allowed type of memory allocation.
+//!
+enum class AllocatorFlag : int32_t
+{
+    //! TensorRT may call realloc() on this allocation.
+    kRESIZABLE = 0,
+};
+
+namespace impl
+{
+//! Maximum number of elements in AllocatorFlag enum. \see AllocatorFlag
+template <>
+struct EnumMaxImpl<AllocatorFlag>
+{
+    //! Declaration of kVALUE that represents the maximum number of elements in the AllocatorFlag enum.
+    static constexpr int32_t kVALUE = 1;
+};
+} // namespace impl
+
+using AllocatorFlags = uint32_t;
+
+//! DO NOT REFER TO namespace v_1_0 IN CODE. ALWAYS USE nvinfer1 INSTEAD.
+//! The name v_1_0 may change in future versions of TensorRT.
+
+//!
+//! \class ILogger
+//!
+//! \brief Application-implemented logging interface for the builder, refitter and runtime.
+//!
+//! The logger used to create an instance of IBuilder, IRuntime or IRefitter is used for all objects created through
+//! that interface. The logger must be valid until all objects created are released.
+//!
+//! The Logger object implementation must be thread safe. All locking and synchronization is pushed to the
+//! interface implementation and TensorRT does not hold any synchronization primitives when calling the interface
+//! functions.
+//!
+class ILogger
+{
+public:
+    //!
+    //! \enum Severity
+    //!
+    //! \brief The severity corresponding to a log message.
+    //!
+    enum class Severity : int32_t
+    {
+        //! An internal error has occurred. Execution is unrecoverable.
+        kINTERNAL_ERROR = 0,
+        //! An application error has occurred.
+        kERROR = 1,
+        //! An application error has been discovered, but TensorRT has recovered or fallen back to a default.
+        kWARNING = 2,
+        //!  Informational messages with instructional information.
+        kINFO = 3,
+        //!  Verbose messages with debugging information.
+        kVERBOSE = 4,
+    };
+
+    //!
+    //! \brief A callback implemented by the application to handle logging messages;
+    //!
+    //! \param severity The severity of the message.
+    //! \param msg A null-terminated log message.
+    //!
+    //! \warning Loggers used in the safety certified runtime must set a maximum message length and truncate
+    //!          messages exceeding this length. It is up to the implementer of the derived class to define
+    //!          a suitable limit that will prevent buffer overruns, resource exhaustion, and other security
+    //!          vulnerabilities in their implementation. The TensorRT safety certified runtime will never
+    //!          emit messages longer than 1024 bytes.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when multiple execution contexts are used during runtime, or if the same logger is used
+    //!                  for multiple runtimes, builders, or refitters.
+    //!
+    virtual void log(Severity severity, AsciiChar const* msg) noexcept = 0;
+
+    ILogger() = default;
+    virtual ~ILogger() = default;
+
+protected:
+    // @cond SuppressDoxyWarnings
+    ILogger(ILogger const&) = default;
+    ILogger(ILogger&&) = default;
+    ILogger& operator=(ILogger const&) & = default;
+    ILogger& operator=(ILogger&&) & = default;
+    // @endcond
+};
+
+namespace impl
+{
+//! Maximum number of elements in ILogger::Severity enum. \see ILogger::Severity
+template <>
+struct EnumMaxImpl<ILogger::Severity>
+{
+    //! Declaration of kVALUE that represents the maximum number of elements in the ILogger::Severity enum.
+    static constexpr int32_t kVALUE = 5;
+};
+} // namespace impl
+
+namespace v_1_0
+{
+
+class IGpuAllocator : public IVersionedInterface
+{
+public:
+    //!
+    //! \brief A thread-safe callback implemented by the application to handle acquisition of GPU memory.
+    //!
+    //! \param size The size of the memory block required (in bytes).
+    //! \param alignment The required alignment of memory. Alignment will be zero
+    //!        or a power of 2 not exceeding the alignment guaranteed by cudaMalloc.
+    //!        Thus this allocator can be safely implemented with cudaMalloc/cudaFree.
+    //!        An alignment value of zero indicates any alignment is acceptable.
+    //! \param flags Reserved for future use. In the current release, 0 will be passed.
+    //!
+    //! \return If the allocation was successful, the start address of a device memory block of the requested size.
+    //! If an allocation request of size 0 is made, nullptr must be returned.
+    //! If an allocation request cannot be satisfied, nullptr must be returned.
+    //! If a non-null address is returned, it is guaranteed to have the specified alignment.
+    //!
+    //! \note The implementation must guarantee thread safety for concurrent allocate/reallocate/deallocate
+    //! requests.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. Superseded by allocateAsync
+    //!
+    TRT_DEPRECATED virtual void* allocate(
+        uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept = 0;
+
+    ~IGpuAllocator() override = default;
+    IGpuAllocator() = default;
+
+    //!
+    //! \brief A thread-safe callback implemented by the application to resize an existing allocation.
+    //!
+    //! Only allocations which were allocated with AllocatorFlag::kRESIZABLE will be resized.
+    //!
+    //! Options are one of:
+    //! * resize in place leaving min(oldSize, newSize) bytes unchanged and return the original address
+    //! * move min(oldSize, newSize) bytes to a new location of sufficient size and return its address
+    //! * return nullptr, to indicate that the request could not be fulfilled.
+    //!
+    //! If nullptr is returned, TensorRT will assume that resize() is not implemented, and that the
+    //! allocation at baseAddr is still valid.
+    //!
+    //! This method is made available for use cases where delegating the resize
+    //! strategy to the application provides an opportunity to improve memory management.
+    //! One possible implementation is to allocate a large virtual device buffer and
+    //! progressively commit physical memory with cuMemMap. CU_MEM_ALLOC_GRANULARITY_RECOMMENDED
+    //! is suggested in this case.
+    //!
+    //! TensorRT may call realloc to increase the buffer by relatively small amounts.
+    //!
+    //! \param baseAddr the address of the original allocation, which will have been returned by previously calling
+    //!        allocate() or reallocate() on the same object.
+    //! \param alignment The alignment used by the original allocation. This will be the same value that was previously
+    //!        passed to the allocate() or reallocate() call that returned baseAddr.
+    //! \param newSize The new memory size required (in bytes).
+    //!
+    //! \return The address of the reallocated memory, or nullptr. If a non-null address is returned, it is
+    //!         guaranteed to have the specified alignment.
+    //!
+    //! \note The implementation must guarantee thread safety for concurrent allocate/reallocate/deallocate
+    //! requests.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
+    //!
+    virtual void* reallocate(void* const /*baseAddr*/, uint64_t /*alignment*/, uint64_t /*newSize*/) noexcept
+    {
+        return nullptr;
+    }
+
+    //!
+    //! \brief A thread-safe callback implemented by the application to handle release of GPU memory.
+    //!
+    //! TensorRT may pass a nullptr to this function if it was previously returned by allocate().
+    //!
+    //! \param memory A memory address that was previously returned by an allocate() or reallocate() call of the same
+    //! allocator object.
+    //!
+    //! \return True if the acquired memory is released successfully.
+    //!
+    //! \note The implementation must guarantee thread safety for concurrent allocate/reallocate/deallocate
+    //! requests.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
+    //! \deprecated Deprecated in TensorRT 10.0. Superseded by deallocateAsync
+    //!
+    TRT_DEPRECATED virtual bool deallocate(void* const memory) noexcept = 0;
+
+    //!
+    //! \brief A thread-safe callback implemented by the application to handle stream-ordered acquisition of GPU memory.
+    //!
+    //! The default behavior is to call method allocate(), which is synchronous and thus loses
+    //! any performance benefits of asynchronous allocation. If you want the benefits of asynchronous
+    //! allocation, see discussion of IGpuAsyncAllocator vs. IGpuAllocator in the documentation
+    //! for nvinfer1::IGpuAllocator.
+    //!
+    //! \param size The size of the memory block required (in bytes).
+    //! \param alignment The required alignment of memory. Alignment will be zero
+    //!        or a power of 2 not exceeding the alignment guaranteed by cudaMalloc.
+    //!        Thus this allocator can be safely implemented with cudaMalloc/cudaFree.
+    //!        An alignment value of zero indicates any alignment is acceptable.
+    //! \param flags Reserved for future use. In the current release, 0 will be passed.
+    //! \param stream specifies the cudaStream for asynchronous usage.
+    //!
+    //! \return If the allocation was successful, the start address of a device memory block of the requested size.
+    //! If an allocation request of size 0 is made, nullptr must be returned.
+    //! If an allocation request cannot be satisfied, nullptr must be returned.
+    //! If a non-null address is returned, it is guaranteed to have the specified alignment.
+    //!
+    //! \note The implementation must guarantee thread safety for concurrent allocate/reallocate/deallocate
+    //! requests.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
+    //!
+    virtual void* allocateAsync(
+        uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t /*stream*/) noexcept
+    {
+        return allocate(size, alignment, flags);
+    }
+    //!
+    //! \brief A thread-safe callback implemented by the application to handle stream-ordered release of GPU memory.
+    //!
+    //! The default behavior is to call method deallocate(), which is synchronous and thus loses
+    //! any performance benefits of asynchronous deallocation. If you want the benefits of asynchronous
+    //! deallocation, see discussion of IGpuAsyncAllocator vs. IGpuAllocator in the documentation
+    //! for nvinfer1::IGpuAllocator.
+    //!
+    //! TensorRT may pass a nullptr to this function if it was previously returned by allocate().
+    //!
+    //! \param memory A memory address that was previously returned by an allocate() or reallocate() call of the same
+    //! allocator object.
+    //! \param stream specifies the cudaStream for asynchronous usage.
+    //!
+    //! \return True if the acquired memory is released successfully.
+    //!
+    //! \note The implementation must guarantee thread safety for concurrent allocate/reallocate/deallocate
+    //! requests.
+    //!
+    //! \note The implementation is not required to be asynchronous. It is permitted to synchronize,
+    //! albeit doing so will lose the performance advantage of asynchronous deallocation.
+    //! Either way, it is critical that it not actually free the memory until the current
+    //! stream position is reached.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
+    //!
+    virtual bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept
+    {
+        return deallocate(memory);
+    }
+
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return {"IGpuAllocator", 1, 0};
+    }
+
+protected:
+    // @cond SuppressDoxyWarnings
+    IGpuAllocator(IGpuAllocator const&) = default;
+    IGpuAllocator(IGpuAllocator&&) = default;
+    IGpuAllocator& operator=(IGpuAllocator const&) & = default;
+    IGpuAllocator& operator=(IGpuAllocator&&) & = default;
+    // @endcond
+};
+
+} // namespace v_1_0
+
+//!
+//! \class IGpuAllocator
+//!
+//! \brief Application-implemented class for controlling allocation on the GPU.
+//!
+//! \warning The lifetime of an IGpuAllocator object must exceed that of all objects that use it.
+//!
+//! This class is intended as a base class for allocators that implement synchronous allocation.
+//! If you want the benefits of asynchronous allocation, you can do either of:
+//!
+//! * Derive your class from IGpuAllocator and override all four of its virtual methods
+//!   for allocation/deallocation, including the two deprecated methods.
+//!
+//! * Derive your class from IGpuAsyncAllocator and override its two pure virtual
+//!   methods for allocation/deallocation.
+//!
+//! The latter style is preferred because it does not tie code to deprecated methods.
+//!
+//! \see IGpuAsyncAllocator.
+//!
+using IGpuAllocator = v_1_0::IGpuAllocator;
+
+//!
+//! \class IRuntime
+//!
+//! \brief Allows a serialized functionally unsafe engine to be deserialized.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IRuntime : public INoCopy
+{
+public:
+    virtual ~IRuntime() noexcept = default;
+
+    //!
+    //! \brief Sets the DLA core used by the network. Defaults to -1.
+    //!
+    //! \param dlaCore The DLA core to execute the engine on, in the range [0,getNbDlaCores()).
+    //!
+    //! This function is used to specify which DLA core to use via indexing, if multiple DLA cores are available.
+    //!
+    //! \warning if getNbDLACores() returns 0, then this function does nothing.
+    //!
+    //! \see getDLACore()
+    //!
+    void setDLACore(int32_t dlaCore) noexcept
+    {
+        mImpl->setDLACore(dlaCore);
+    }
+
+    //!
+    //! \brief Get the DLA core that the engine executes on.
+    //!
+    //! \return assigned DLA core or -1 for DLA not present or unset.
+    //!
+    int32_t getDLACore() const noexcept
+    {
+        return mImpl->getDLACore();
+    }
+
+    //!
+    //! \brief Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
+    //!
+    int32_t getNbDLACores() const noexcept
+    {
+        return mImpl->getNbDLACores();
+    }
+
+    //!
+    //! \brief Set the GPU allocator.
+    //!
+    //! \param allocator Set the GPU allocator to be used by the runtime. All GPU memory acquired will use this
+    //! allocator. If NULL is passed, the default allocator will be used.
+    //!
+    //! Default: allocateAsync uses cudaMallocAsync if cudaDevAttrMemoryPoolsSupported returns true, otherwise falls
+    //! back to cudaMalloc. allocate always uses cudaMalloc.
+    //!
+    //! If nullptr is passed, the default allocator will be used.
+    //!
+    void setGpuAllocator(IGpuAllocator* allocator) noexcept
+    {
+        mImpl->setGpuAllocator(allocator);
+    }
+
+    //!
+    //! \brief Set the ErrorRecorder for this interface
+    //!
+    //! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
+    //! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
+    //! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
+    //! a recorder has been registered.
+    //!
+    //! If an error recorder is not set, messages will be sent to the global log stream.
+    //!
+    //! \param recorder The error recorder to register with this interface.
+    //
+    //! \see getErrorRecorder()
+    //!
+    void setErrorRecorder(IErrorRecorder* recorder) noexcept
+    {
+        mImpl->setErrorRecorder(recorder);
+    }
+
+    //!
+    //! \brief get the ErrorRecorder assigned to this interface.
+    //!
+    //! Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
+    //! an error handler has not been set.
+    //!
+    //! \return A pointer to the IErrorRecorder object that has been registered.
+    //!
+    //! \see setErrorRecorder()
+    //!
+    IErrorRecorder* getErrorRecorder() const noexcept
+    {
+        return mImpl->getErrorRecorder();
+    }
+
+    //!
+    //! \brief Deserialize an engine from host memory.
+    //!
+    //! If an error recorder has been set for the runtime, it will also be passed to the engine.
+    //!
+    //! \warning Destroying the IRuntime before destroying all associated ICudaEngine instances results in undefined
+    //! behavior.
+    //!
+    //! \param blob The memory that holds the serialized engine.
+    //! \param size The size of the memory.
+    //!
+    //! \return The engine, or nullptr if it could not be deserialized.
+    //!
+    ICudaEngine* deserializeCudaEngine(void const* blob, std::size_t size) noexcept
+    {
+        return mImpl->deserializeCudaEngine(blob, size);
+    }
+
+    //!
+    //! \brief Deserialize an engine from a stream.
+    //!
+    //! If an error recorder has been set for the runtime, it will also be passed to the
+    //! engine.
+    //!
+    //! This deserialization path will reduce host memory usage when weight streaming is enabled.
+    //!
+    //! \warning Destroying the IRuntime before destroying all associated ICudaEngine instances results in undefined
+    //! behavior.
+    //!
+    //! \param streamReader a read-only stream from which TensorRT will deserialize a
+    //!        previously serialized engine.
+    //!
+    //! \return The engine, or nullptr if it could not be deserialized.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.7. Superseded by deserializeCudaEngine that takes an IStreamReaderV2
+    //! instead of IStreamReader.
+    //!
+    TRT_DEPRECATED ICudaEngine* deserializeCudaEngine(IStreamReader& streamReader)
+    {
+        return mImpl->deserializeCudaEngine(streamReader);
+    }
+
+    //!
+    //! \brief Deserialize an engine from a stream. IStreamReaderV2 is expected to support reading to both host and
+    //! device pointers.
+    //!
+    //! If an error recorder has been set for the runtime, it will also be passed to the
+    //! engine.
+    //!
+    //! This deserialization path will reduce engine load time when applied with GDS (GPU Direct storage), or when
+    //! weight streaming is enabled.
+    //!
+    //! \warning Destroying the IRuntime before destroying all associated ICudaEngine instances results in undefined
+    //! behavior.
+    //!
+    //! \param streamReader a read-only stream from which TensorRT will deserialize a previously serialized engine.
+    //!
+    //! \return The engine, or nullptr if it could not be deserialized. The pointer may not be valid immediately after
+    //! the function returns.
+    //!
+    ICudaEngine* deserializeCudaEngine(IStreamReaderV2& streamReader)
+    {
+        return mImpl->deserializeCudaEngineV2(streamReader);
+    }
+
+    //!
+    //! \brief get the logger with which the runtime was created
+    //!
+    //! \return the logger
+    //!
+    ILogger* getLogger() const noexcept
+    {
+        return mImpl->getLogger();
+    }
+
+    //!
+    //! \brief Set the maximum number of threads.
+    //!
+    //! \param maxThreads The maximum number of threads that can be used by the runtime.
+    //! \return True if successful, false otherwise.
+    //!
+    //! The default value is 1 and includes the current thread.
+    //! A value greater than 1 permits TensorRT to use multi-threaded algorithms.
+    //! A value less than 1 triggers a kINVALID_ARGUMENT error.
+    //!
+    bool setMaxThreads(int32_t maxThreads) noexcept
+    {
+        return mImpl->setMaxThreads(maxThreads);
+    }
+
+    //!
+    //! \brief Get the maximum number of threads that can be used by the runtime.
+    //!
+    //! Retrieves the maximum number of threads that can be used by the runtime.
+    //!
+    //! \return The maximum number of threads that can be used by the runtime.
+    //!
+    //! \see setMaxThreads()
+    //!
+    int32_t getMaxThreads() const noexcept
+    {
+        return mImpl->getMaxThreads();
+    }
+
+    //!
+    //! \brief Set the directory that will be used by this runtime for temporary files.
+    //!
+    //! On some platforms the TensorRT runtime may need to create and use temporary files
+    //! with read/write/execute permissions to implement runtime functionality.
+    //!
+    //! \param path Path to the temporary directory for use, or nullptr.
+    //!
+    //! If path is nullptr, then TensorRT will use platform-specific heuristics to pick
+    //! a default temporary directory if required:
+    //!
+    //! - On UNIX/Linux platforms, TensorRT will first try the TMPDIR environment variable, then fall back to /tmp
+    //! - On Windows, TensorRT will try the TEMP environment variable.
+    //!
+    //! See the TensorRT Developer Guide for more information.
+    //!
+    //! The default value is nullptr.
+    //!
+    //! \warning If path is not nullptr, it must be a non-empty string representing a relative
+    //! or absolute path in the format expected by the host operating system.
+    //!
+    //! \warning The string path must be null-terminated, and be at most 4096 bytes including the
+    //! terminator. Note that the operating system may have stricter path length requirements.
+    //!
+    //! \warning The process using TensorRT must have rwx permissions for the temporary directory,
+    //! and the directory shall be configured to disallow other users from modifying created files
+    //! (e.g. on Linux, if the directory is shared with other users, the sticky bit must be set).
+    //!
+    //! \see getTemporaryDirectory()
+    //!
+    void setTemporaryDirectory(char const* path) noexcept
+    {
+        return mImpl->setTemporaryDirectory(path);
+    }
+
+    //!
+    //! \brief Get the directory that will be used by this runtime for temporary files.
+    //!
+    //! \returns A path to the temporary directory in use, or nullptr if no path is specified.
+    //!
+    //! \see setTemporaryDirectory()
+    char const* getTemporaryDirectory() const noexcept
+    {
+        return mImpl->getTemporaryDirectory();
+    }
+
+    //!
+    //! \brief Set the tempfile control flags for this runtime.
+    //!
+    //! \param flags The flags to set.
+    //!
+    //! The default value is all flags set, i.e.
+    //!
+    //! (1U << static_cast<uint32_t>(kALLOW_IN_MEMORY_FILES)) | (1U << static_cast<uint32_t>(kALLOW_TEMPORARY_FILES))
+    //!
+    //! \see TempfileControlFlag, TempfileControlFlags, getTempfileControlFlags()
+    //!
+    void setTempfileControlFlags(TempfileControlFlags flags) noexcept
+    {
+        return mImpl->setTempfileControlFlags(flags);
+    }
+
+    //!
+    //! \brief Get the tempfile control flags for this runtime.
+    //!
+    //! \return The flags currently set.
+    //!
+    //! \see TempfileControlFlag, TempfileControlFlags, setTempfileControlFlags()
+    //!
+    TempfileControlFlags getTempfileControlFlags() const noexcept
+    {
+        return mImpl->getTempfileControlFlags();
+    }
+
+    //!
+    //! \brief Get the local plugin registry that can be used by the runtime.
+    //!
+    //! \return The local plugin registry that can be used by the runtime.
+    //!
+    IPluginRegistry& getPluginRegistry() noexcept
+    {
+        return mImpl->getPluginRegistry();
+    }
+
+    //!
+    //! \brief Load IRuntime from the file.
+    //!
+    //! This method loads a runtime library from a shared library file. The runtime can then be used to execute
+    //! a plan file built with BuilderFlag::kVERSION_COMPATIBLE and BuilderFlag::kEXCLUDE_LEAN_RUNTIME both set
+    //! and built with the same version of TensorRT as the loaded runtime library.
+    //!
+    //! \param path Path to the runtime lean library.
+    //!
+    //! \return the runtime library, or nullptr if it could not be loaded
+    //!
+    //! \warning The path string must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    IRuntime* loadRuntime(char const* path) noexcept
+    {
+        return mImpl->loadRuntime(path);
+    }
+
+    //!
+    //! \brief Set whether the runtime is allowed to deserialize engines with host executable code.
+    //!
+    //! \param allowed Whether the runtime is allowed to deserialize engines with host executable code.
+    //!
+    //! The default value is false.
+    //!
+    void setEngineHostCodeAllowed(bool allowed) noexcept
+    {
+        return mImpl->setEngineHostCodeAllowed(allowed);
+    }
+
+    //!
+    //! \brief Get whether the runtime is allowed to deserialize engines with host executable code.
+    //!
+    //! \return Whether the runtime is allowed to deserialize engines with host executable code.
+    //!
+    bool getEngineHostCodeAllowed() const noexcept
+    {
+        return mImpl->getEngineHostCodeAllowed();
+    }
+
+protected:
+    apiv::VRuntime* mImpl;
+};
+
+//!
+//! \class IRefitter
+//!
+//! \brief Updates weights in an engine.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IRefitter : public INoCopy
+{
+public:
+    virtual ~IRefitter() noexcept = default;
+
+    //!
+    //! \brief Specify new weights for a layer of given name.
+    //! Returns true on success, or false if new weights are rejected.
+    //! Possible reasons for rejection are:
+    //!
+    //! * There is no such layer by that name.
+    //! * The layer does not have weights with the specified role.
+    //! * The count of weights is inconsistent with the layerâ€™s original specification.
+    //! * The type of weights is inconsistent with the layerâ€™s original specification.
+    //!
+    //! Modifying the weights before method refitCudaEngine or refitCudaEngineAsync returns will result in undefined
+    //! behavior.
+    //!
+    //! \warning The string layerName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    bool setWeights(char const* layerName, WeightsRole role, Weights weights) noexcept
+    {
+        return mImpl->setWeights(layerName, role, weights);
+    }
+
+    //!
+    //! \brief Refits associated engine.
+    //!
+    //! \return True on success, or false if new weights validation fails or getMissingWeights() != 0 before the call.
+    //! If false is returned, a subset of weights may have been refitted.
+    //!
+    //! The behavior is undefined if the engine has pending enqueued work.
+    //! Provided weights on CPU or GPU can be unset and released, or updated after refitCudaEngine returns.
+    //!
+    //! IExecutionContexts associated with the engine remain valid for use afterwards. There is no need to set the same
+    //! weights repeatedly for multiple refit calls as the weights memory can be updated directly instead.
+    //!
+    bool refitCudaEngine() noexcept
+    {
+        return mImpl->refitCudaEngine();
+    }
+
+    //!
+    //! \brief Get description of missing weights.
+    //!
+    //! For example, if some Weights have been set, but the engine was optimized
+    //! in a way that combines weights, any unsupplied Weights in the combination
+    //! are considered missing.
+    //!
+    //! \param size The number of items that can be safely written to a non-null layerNames or roles.
+    //! \param layerNames Where to write the layer names.
+    //! \param roles Where to write the weights roles.
+    //!
+    //! \return The number of missing Weights.
+    //!
+    //! If layerNames!=nullptr, each written pointer points to a string owned by
+    //! the engine being refit, and becomes invalid when the engine is destroyed.
+    //!
+    int32_t getMissing(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
+    {
+        return mImpl->getMissing(size, layerNames, roles);
+    }
+
+    //!
+    //! \brief Get description of all weights that could be refit.
+    //!
+    //! \param size The number of items that can be safely written to a non-null layerNames or roles.
+    //! \param layerNames Where to write the layer names.
+    //! \param roles Where to write the weights roles.
+    //!
+    //! \return The number of Weights that could be refit.
+    //!
+    //! If layerNames!=nullptr, each written pointer points to a string owned by
+    //! the engine being refit, and becomes invalid when the engine is destroyed.
+    //!
+    int32_t getAll(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
+    {
+        return mImpl->getAll(size, layerNames, roles);
+    }
+
+    //!
+    //! Update dynamic range for a tensor.
+    //!
+    //! \param tensorName The name of an ITensor in the network.
+    //! \param min The minimum of the dynamic range for the tensor.
+    //! \param max The maximum of the dynamic range for the tensor.
+    //!
+    //! \return True if successful; false otherwise.
+    //!
+    //! Returns false if there is no Int8 engine tensor derived from
+    //! a network tensor of that name.  If successful, then getMissing
+    //! may report that some weights need to be supplied.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
+    //!
+    TRT_DEPRECATED bool setDynamicRange(char const* tensorName, float min, float max) noexcept
+    {
+        return mImpl->setDynamicRange(tensorName, min, max);
+    }
+
+    //!
+    //! \brief Get minimum of dynamic range.
+    //!
+    //! \return Minimum of dynamic range.
+    //!
+    //! If the dynamic range was never set, returns the minimum computed during calibration.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
+    //!
+    TRT_DEPRECATED float getDynamicRangeMin(char const* tensorName) const noexcept
+    {
+        return mImpl->getDynamicRangeMin(tensorName);
+    }
+
+    //!
+    //! \brief Get maximum of dynamic range.
+    //!
+    //! \return Maximum of dynamic range.
+    //!
+    //! If the dynamic range was never set, returns the maximum computed during calibration.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
+    //!
+    TRT_DEPRECATED float getDynamicRangeMax(char const* tensorName) const noexcept
+    {
+        return mImpl->getDynamicRangeMax(tensorName);
+    }
+
+    //!
+    //! \brief Get names of all tensors that have refittable dynamic ranges.
+    //!
+    //! \param size The number of items that can be safely written to a non-null tensorNames.
+    //! \param tensorNames Where to write the layer names.
+    //!
+    //! \return The number of Weights that could be refit.
+    //!
+    //! If tensorNames!=nullptr, each written pointer points to a string owned by
+    //! the engine being refit, and becomes invalid when the engine is destroyed.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.1. Superseded by explicit quantization.
+    //!
+    TRT_DEPRECATED int32_t getTensorsWithDynamicRange(int32_t size, char const** tensorNames) const noexcept
+    {
+        return mImpl->getTensorsWithDynamicRange(size, tensorNames);
+    }
+
+    //!
+    //! \brief Set the ErrorRecorder for this interface
+    //!
+    //! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
+    //! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
+    //! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
+    //! a recorder has been registered.
+    //!
+    //! If an error recorder is not set, messages will be sent to the global log stream.
+    //!
+    //! \param recorder The error recorder to register with this interface.
+    //
+    //! \see getErrorRecorder()
+    //!
+    void setErrorRecorder(IErrorRecorder* recorder) noexcept
+    {
+        mImpl->setErrorRecorder(recorder);
+    }
+
+    //!
+    //! \brief Get the ErrorRecorder assigned to this interface.
+    //!
+    //! Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
+    //! an error handler has not been set.
+    //!
+    //! \return A pointer to the IErrorRecorder object that has been registered.
+    //!
+    //! \see setErrorRecorder()
+    //!
+    IErrorRecorder* getErrorRecorder() const noexcept
+    {
+        return mImpl->getErrorRecorder();
+    }
+
+    //!
+    //! \brief Specify new weights of given name.
+    //!
+    //! \param name The name of the weights to be refit.
+    //! \param weights The new weights to associate with the name.
+    //!
+    //! Returns true on success, or false if new weights are rejected.
+    //! Possible reasons for rejection are:
+    //!
+    //! * The name of weights is nullptr or does not correspond to any refittable weights.
+    //! * The count of the weights is inconsistent with the count returned from calling getWeightsPrototype() with the
+    //! same name.
+    //! * The type of the weights is inconsistent with the type returned from calling getWeightsPrototype() with the
+    //! same name.
+    //!
+    //! Modifying the weights before method refitCudaEngine or refitCudaEngineAsync returns will result in undefined
+    //! behavior.
+    //!
+    //! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    bool setNamedWeights(char const* name, Weights weights) noexcept
+    {
+        return mImpl->setNamedWeights(name, weights);
+    }
+
+    //!
+    //! \brief Get names of missing weights.
+    //!
+    //! For example, if some Weights have been set, but the engine was optimized
+    //! in a way that combines weights, any unsupplied Weights in the combination
+    //! are considered missing.
+    //!
+    //! \param size The number of weights names that can be safely written to.
+    //! \param weightsNames The names of the weights to be updated, or nullptr for unnamed weights.
+    //!
+    //! \return The number of missing Weights.
+    //!
+    //! If layerNames!=nullptr, each written pointer points to a string owned by
+    //! the engine being refit, and becomes invalid when the engine is destroyed.
+    //!
+    int32_t getMissingWeights(int32_t size, char const** weightsNames) noexcept
+    {
+        return mImpl->getMissingWeights(size, weightsNames);
+    }
+
+    //!
+    //! \brief Get names of all weights that could be refit.
+    //!
+    //! \param size The number of weights names that can be safely written to.
+    //! \param weightsNames The names of the weights to be updated, or nullptr for unnamed weights.
+    //!
+    //! \return The number of Weights that could be refit.
+    //!
+    //! If layerNames!=nullptr, each written pointer points to a string owned by
+    //! the engine being refit, and becomes invalid when the engine is destroyed.
+    //!
+    int32_t getAllWeights(int32_t size, char const** weightsNames) noexcept
+    {
+        return mImpl->getAllWeights(size, weightsNames);
+    }
+
+    //!
+    //! \brief get the logger with which the refitter was created
+    //!
+    //! \return the logger
+    //!
+    ILogger* getLogger() const noexcept
+    {
+        return mImpl->getLogger();
+    }
+
+    //!
+    //! \brief Set the maximum number of threads.
+    //!
+    //! \param maxThreads The maximum number of threads that can be used by the refitter.
+    //!
+    //! \return True if successful, false otherwise.
+    //!
+    //! The default value is 1 and includes the current thread.
+    //! A value greater than 1 permits TensorRT to use multi-threaded algorithms.
+    //! A value less than 1 triggers a kINVALID_ARGUMENT error.
+    //!
+    bool setMaxThreads(int32_t maxThreads) noexcept
+    {
+        return mImpl->setMaxThreads(maxThreads);
+    }
+
+    //!
+    //! \brief get the maximum number of threads that can be used by the refitter.
+    //!
+    //! Retrieves the maximum number of threads that can be used by the refitter.
+    //!
+    //! \return The maximum number of threads that can be used by the refitter.
+    //!
+    //! \see setMaxThreads()
+    //!
+    int32_t getMaxThreads() const noexcept
+    {
+        return mImpl->getMaxThreads();
+    }
+
+    //!
+    //! \brief Specify new weights on a specified device of given name.
+    //!
+    //! \param name The name of the weights to be refitted.
+    //! \param weights The new weights on the specified device.
+    //! \param location The location (host vs. device) of the new weights.
+    //!
+    //! \return True on success, or false if new weights are rejected.
+    //! Possible reasons for rejection are:
+    //!
+    //! * The name of the weights is nullptr or does not correspond to any refittable weights.
+    //! * The count of the weights is inconsistent with the count returned from calling getWeightsPrototype() with the
+    //! same name.
+    //! * The type of the weights is inconsistent with the type returned from calling getWeightsPrototype() with the
+    //! same name.
+    //!
+    //! It is allowed to provide some weights on CPU and others on GPU.
+    //! Modifying the weights before the method refitCudaEngine() or refitCudaEngineAsync() completes will result in
+    //! undefined behavior.
+    //!
+    //! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    bool setNamedWeights(char const* name, Weights weights, TensorLocation location) noexcept
+    {
+        return mImpl->setNamedWeightsWithLocation(name, weights, location);
+    }
+
+    //!
+    //! \brief Get weights associated with the given name.
+    //!
+    //! \param weightsName The name of the weights to be refitted.
+    //!
+    //! \return Weights associated with the given name.
+    //!
+    //! If the weights were never set, returns null weights and reports an error to the refitter errorRecorder.
+    //!
+    //! \warning The string weightsName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    Weights getNamedWeights(char const* weightsName) const noexcept
+    {
+        return mImpl->getNamedWeights(weightsName);
+    }
+
+    //!
+    //! \brief Get location for the weights associated with the given name.
+    //!
+    //! \param weightsName The name of the weights to be refitted.
+    //!
+    //! \return Location for the weights associated with the given name.
+    //!
+    //! If the weights were never set, returns TensorLocation::kHOST and reports an error to the refitter errorRecorder.
+    //!
+    //! \warning The string weightsName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    TensorLocation getWeightsLocation(char const* weightsName) const noexcept
+    {
+        return mImpl->getWeightsLocation(weightsName);
+    }
+
+    //!
+    //! \brief Unset weights associated with the given name.
+    //!
+    //! \param weightsName The name of the weights to be refitted.
+    //!
+    //! \return False if the weights were never set, returns true otherwise.
+    //!
+    //! Unset weights before releasing them.
+    //!
+    //! \warning The string weightsName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    bool unsetNamedWeights(char const* weightsName) noexcept
+    {
+        return mImpl->unsetNamedWeights(weightsName);
+    }
+
+    //!
+    //! \brief Set whether to validate weights during refitting.
+    //!
+    //! \param weightsValidation Indicate whether to validate weights during refitting.
+    //!
+    //! When set to true, TensorRT will validate weights during FP32 to FP16/BF16 weights conversions or
+    //! sparsifying weights in the refit call. If provided weights are not proper for some weights transformations,
+    //! TensorRT will issue a warning and continue the transformation for minor issues (such as overflow during
+    //! narrowing conversion), or issue an error and stop the refitting process for severe issues (such as sparsifying
+    //! dense weights). By default the flag is true. Set the flag to false for faster refitting performance.
+    //!
+    void setWeightsValidation(bool weightsValidation) noexcept
+    {
+        return mImpl->setWeightsValidation(weightsValidation);
+    }
+
+    //!
+    //! \brief Get whether to validate weights values during refitting.
+    //!
+    bool getWeightsValidation() const noexcept
+    {
+        return mImpl->getWeightsValidation();
+    }
+
+    //!
+    //! \brief Enqueue weights refitting of the associated engine on the given stream.
+    //!
+    //! \param stream The stream to enqueue the weights updating task.
+    //!
+    //! \return True on success, or false if new weights validation fails or getMissingWeights() != 0 before the call.
+    //! If false is returned, a subset of weights may have been refitted.
+    //!
+    //! The behavior is undefined if the engine has pending enqueued work on a different stream from the provided one.
+    //! Provided weights on CPU can be unset and released, or updated after refitCudaEngineAsync returns.
+    //! Freeing or updating of the provided weights on GPU can be enqueued on the same stream after refitCudaEngineAsync
+    //! returns.
+    //!
+    //! IExecutionContexts associated with the engine remain valid for use afterwards. There is no need to set the same
+    //! weights repeatedly for multiple refit calls as the weights memory can be updated directly instead. The weights
+    //! updating task should use the same stream as the one used for the refit call.
+    //!
+    bool refitCudaEngineAsync(cudaStream_t stream) noexcept
+    {
+        return mImpl->refitCudaEngineAsync(stream);
+    }
+
+    //!
+    //! \brief Get the Weights prototype associated with the given name.
+    //!
+    //! \param weightsName The name of the weights to be refitted.
+    //!
+    //! \return Weights prototype associated with the given name.
+    //!
+    //! The type and count of weights prototype is the same as weights used for engine building. The values property
+    //! is nullptr for weights prototypes. The count of the weights prototype is -1 when the name of the weights is
+    //! nullptr or does not correspond to any refittable weights.
+    //!
+    //! \warning The string weightsName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    Weights getWeightsPrototype(char const* weightsName) const noexcept
+    {
+        return mImpl->getWeightsPrototype(weightsName);
+    }
+
+protected:
+    apiv::VRefitter* mImpl;
+};
+
+//!
+//! \enum OptProfileSelector
+//!
+//! \brief When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dimensions),
+//!        select whether we are interested in the minimum, optimum, or maximum values for these parameters.
+//!        The minimum and maximum specify the permitted range that is supported at runtime, while the optimum value
+//!        is used for the kernel selection. This should be the "typical" value that is expected to occur at runtime.
+//!
+//! \see IOptimizationProfile::setDimensions(), IOptimizationProfile::setShapeValuesV2(), IOptimizationProfile::setShapeValues()
+//!
+enum class OptProfileSelector : int32_t
+{
+    kMIN = 0, //!< This is used to set or get the minimum permitted value for dynamic dimensions etc.
+    kOPT = 1, //!< This is used to set or get the value that is used in the optimization (kernel selection).
+    kMAX = 2  //!< This is used to set or get the maximum permitted value for dynamic dimensions etc.
+};
+
+//!
+//! \brief Number of different values of OptProfileSelector enum.
+//!
+//! \see OptProfileSelector
+//!
+template <>
+constexpr inline int32_t EnumMax<OptProfileSelector>() noexcept
+{
+    return 3;
+}
+
+//!
+//! \class IOptimizationProfile
+//! \brief Optimization profile for dynamic input dimensions and shape tensors.
+//!
+//! When building an ICudaEngine from an INetworkDefinition that has dynamically resizable inputs (at least
+//! one input tensor has one or more of its dimensions specified as -1) or shape input tensors, users need to specify
+//! at least one optimization profile. Optimization profiles are numbered 0, 1, ...
+//! The first optimization profile that has been defined (with index 0) will be used by the ICudaEngine whenever no
+//! optimization profile has been selected explicitly. If none of the inputs are dynamic, the default optimization
+//! profile will be generated automatically unless it is explicitly provided by the user (this is possible but not
+//! required in this case). If more than a single optimization profile is defined, users may set a target how
+//! much additional weight space should be maximally allocated to each additional profile (as a fraction of the
+//! maximum, unconstrained memory).
+//!
+//! Users set optimum input tensor dimensions, as well as minimum and maximum input tensor dimensions. The builder
+//! selects the kernels that result in the lowest runtime for the optimum input tensor dimensions, and are valid for
+//! all input tensor sizes in the valid range between minimum and maximum dimensions. A runtime error will be raised
+//! if the input tensor dimensions fall outside the valid range for this profile. Likewise, users provide minimum,
+//! optimum, and maximum values for all shape tensor input values.
+//!
+//! \see IBuilderConfig::addOptimizationProfile()
+//!
+class IOptimizationProfile : public INoCopy
+{
+public:
+    //!
+    //! \brief Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
+    //!
+    //! This function must be called three times (for the minimum, optimum, and maximum) for any network input tensor
+    //! that has dynamic dimensions. If minDims, optDims, and maxDims are the minimum, optimum, and maximum dimensions,
+    //! and networkDims are the dimensions for this input tensor that are provided to the INetworkDefinition object,
+    //! then the following conditions must all hold:
+    //!
+    //! (1) minDims.nbDims == optDims.nbDims == maxDims.nbDims == networkDims.nbDims
+    //! (2) 0 <= minDims.d[i] <= optDims.d[i] <= maxDims.d[i] for i = 0, ..., networkDims.nbDims-1
+    //! (3) if networkDims.d[i] != -1, then minDims.d[i] == optDims.d[i] == maxDims.d[i] == networkDims.d[i]
+    //!
+    //! This function may (but need not be) called for an input tensor that does not have dynamic dimensions. In this
+    //! case, the third argument must always equal networkDims.
+    //!
+    //! \param inputName The input tensor name
+    //! \param select Whether to set the minimum, optimum, or maximum dimensions
+    //! \param dims The minimum, optimum, or maximum dimensions for this input tensor
+    //!
+    //! \return false if an inconsistency was detected (e.g. the rank does not match another dimension that was
+    //!         previously set for the same input), true if no inconsistency was detected. Note that inputs can be
+    //!         validated only partially; a full validation is performed at engine build time.
+    //!
+    //! \warning If run on DLA, minimum, optimum, and maximum dimensions must to be the same.
+    //!
+    //! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    bool setDimensions(char const* inputName, OptProfileSelector select, Dims const& dims) noexcept
+    {
+        return mImpl->setDimensions(inputName, select, dims);
+    }
+
+    //!
+    //! \brief Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
+    //!
+    //! If the dimensions have not been previously set via setDimensions(), return an invalid Dims with nbDims == -1.
+    //!
+    //! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    Dims getDimensions(char const* inputName, OptProfileSelector select) const noexcept
+    {
+        return mImpl->getDimensions(inputName, select);
+    }
+
+    //!
+    //! \brief Set the minimum / optimum / maximum values for an input shape tensor.
+    //!
+    //! This function must be called three times for every input tensor t that is a shape tensor (t.isShape() == true).
+    //! This implies that the dimensions of t are fixed at network definition time and the volume does not exceed 64.
+    //! This function must not be called for any input tensor that is not a shape tensor.
+    //!
+    //! Each time this function is called for the same input tensor, the same nbValues must be supplied (either 1
+    //! if the tensor rank is 0, or dims.d[0] if the rank is 1). Furthermore, if minVals, optVals, maxVals are the
+    //! minimum, optimum, and maximum values, it must be true that minVals[i] <= optVals[i] <= maxVals[i] for
+    //! i = 0, ..., nbValues - 1. Execution of the network must be valid for the optVals.
+    //!
+    //! Shape tensors are tensors that contribute to shape calculations in some way. While input shape tensors can be
+    //! type kINT32 or kINT64, the values used to set the minimum, optimum, and maximum values must fit in int32_t.
+    //!
+    //! Examples:
+    //!
+    //! * A shape tensor used as the second input to IShuffleLayer can contain a -1 wildcard.
+    //!   The corresponding minVal[i] should be -1.
+    //!
+    //! * A shape tensor used as the stride input to ISliceLayer can contain any valid strides.
+    //!   The values could be positive, negative, or zero.
+    //!
+    //! * A shape tensor subtracted from zero to compute the size input of an ISliceLayer can
+    //!   contain any non-positive values that yield a valid slice operation.
+    //!
+    //! Tightening the minVals and maxVals bounds to cover only values that are necessary may help optimization.
+    //!
+    //! \param inputName The input tensor name
+    //! \param select Whether to set the minimum, optimum, or maximum input values.
+    //! \param values An array of length nbValues containing the minimum, optimum, or maximum shape tensor elements.
+    //!               For multidimensional tensors, the array is in row-major order.
+    //! \param nbValues The length of the value array, which must equal the number of shape tensor elements (>= 1)
+    //!
+    //! \return false if an inconsistency was detected (e.g. nbValues does not match a previous call for the same
+    //!         tensor), else true. As for setDimensions(), a full validation can only be performed at engine build
+    //!         time.
+    //!
+    //! \warning If run on DLA, minimum, optimum, and maximum shape values must to be the same.
+    //!
+    //! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \warning When setShapeValuesV2 is called after setShapeValues, a following call to getShapeValues will
+    //! return nullptr. Vice versa, a call to setShapeValues undoes the effects of setShapeValuesV2.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.11. Superseded by setShapeValuesV2().
+    //!
+    TRT_DEPRECATED bool setShapeValues(
+        char const* inputName, OptProfileSelector select, int32_t const* values, int32_t nbValues) noexcept
+    {
+        return mImpl->setShapeValues(inputName, select, values, nbValues);
+    }
+
+    //!
+    //! \brief Get the number of values for an input shape tensor.
+    //!
+    //! This will return the number of shape values if setShapeValues() has been called before for this input tensor.
+    //! Otherwise, return -1.
+    //!
+    //! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    int32_t getNbShapeValues(char const* inputName) const noexcept
+    {
+        return mImpl->getNbShapeValues(inputName);
+    }
+
+    //!
+    //! \brief Get the minimum / optimum / maximum values for an input shape tensor.
+    //!
+    //! If the shape values have not been set previously with setShapeValues(), this returns nullptr.
+    //!
+    //! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.11. Superseded by getShapeValuesV2().
+    //!
+    TRT_DEPRECATED int32_t const* getShapeValues(char const* inputName, OptProfileSelector select) const noexcept
+    {
+        return mImpl->getShapeValues(inputName, select);
+    }
+
+    //!
+    //! \brief Set a target for extra GPU memory that may be used by this profile.
+    //!
+    //! \param target Additional memory that the builder should aim to maximally allocate for this profile, as a
+    //!        fraction of the memory it would use if the user did not impose any constraints on memory. This
+    //!        unconstrained case is the default; it corresponds to target == 1.0. If target == 0.0, the builder
+    //!        aims to create the new optimization profile without allocating any additional weight memory.
+    //!        Valid inputs lie between 0.0 and 1.0. This parameter is only a hint, and TensorRT does not guarantee
+    //!        that the target will be reached. This parameter is ignored for the first (default) optimization profile
+    //!        that is defined.
+    //!
+    //! \return true if the input is in the valid range (between 0 and 1 inclusive), else false.
+    //!
+    bool setExtraMemoryTarget(float target) noexcept
+    {
+        return mImpl->setExtraMemoryTarget(target);
+    }
+
+    //!
+    //! \brief Get the extra memory target that has been defined for this profile.
+    //!
+    //! This defaults to 1.0F.
+    //!
+    //! \return the valid value set by setExtraMemoryTarget or 1.0F.
+    //!
+    float getExtraMemoryTarget() const noexcept
+    {
+        return mImpl->getExtraMemoryTarget();
+    }
+
+    //!
+    //! \brief Check whether the optimization profile can be passed to an IBuilderConfig object.
+    //!
+    //! This function performs partial validation, by e.g. checking that whenever one of the minimum, optimum, or
+    //! maximum dimensions of a tensor have been set, the others have also been set and have the same rank, as
+    //! well as checking that the optimum dimensions are always as least as large as the minimum dimensions, and
+    //! that the maximum dimensions are at least as large as the optimum dimensions. Some validation steps require
+    //! knowledge of the network definition and are deferred to engine build time.
+    //!
+    //!
+    //! \return true if the optimization profile is valid and may be passed to an IBuilderConfig, else false.
+    //!
+    bool isValid() const noexcept
+    {
+        return mImpl->isValid();
+    }
+
+    //!
+    //! \brief Set the minimum / optimum / maximum values for an input shape tensor.
+    //!
+    //! This function must be called three times for every input tensor t that is a shape tensor (t.isShape() == true).
+    //! This implies that the dimensions of t are fixed at network definition time and the volume does not exceed 64.
+    //! This function must not be called for any input tensor that is not a shape tensor.
+    //!
+    //! Each time this function is called for the same input tensor, the same nbValues must be supplied (either 1
+    //! if the tensor rank is 0, or dims.d[0] if the rank is 1). Furthermore, if minVals, optVals, maxVals are the
+    //! minimum, optimum, and maximum values, it must be true that minVals[i] <= optVals[i] <= maxVals[i] for
+    //! i = 0, ..., nbValues - 1. Execution of the network must be valid for the optVals.
+    //!
+    //! Shape tensors are tensors that contribute to shape calculations in some way. While input shape tensors can be
+    //! type kINT32 or kINT64, the values used to set the minimum, optimum, and maximum values must fit in int64_t.
+    //!
+    //! Examples:
+    //!
+    //! * A shape tensor used as the second input to IShuffleLayer can contain a -1 wildcard.
+    //!   The corresponding minVal[i] should be -1.
+    //!
+    //! * A shape tensor used as the stride input to ISliceLayer can contain any valid strides.
+    //!   The values could be positive, negative, or zero.
+    //!
+    //! * A shape tensor subtracted from zero to compute the size input of an ISliceLayer can
+    //!   contain any non-positive values that yield a valid slice operation.
+    //!
+    //! Tightening the minVals and maxVals bounds to cover only values that are necessary may help optimization.
+    //!
+    //! \param inputName The input tensor name
+    //! \param select Whether to set the minimum, optimum, or maximum input values.
+    //! \param values An array of length nbValues containing the minimum, optimum, or maximum shape tensor elements.
+    //!               For multidimensional tensors, the array is in row-major order.
+    //! \param nbValues The length of the value array, which must equal the number of shape tensor elements (>= 1)
+    //!
+    //! \return false if an inconsistency was detected (e.g. nbValues does not match a previous call for the same
+    //!         tensor), else true. As for setDimensions(), a full validation can only be performed at engine build
+    //!         time.
+    //!
+    //! \warning If run on DLA, minimum, optimum, and maximum shape values must to be the same.
+    //!
+    //! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \warning When setShapeValues is called after setShapeValuesV2, input shape would be overwritten as 32 bit
+    //! and getShapeValuesV2 would return nullptr.
+    //!
+    bool setShapeValuesV2(
+        char const* inputName, OptProfileSelector select, int64_t const* values, int32_t nbValues) noexcept
+    {
+        return mImpl->setShapeValuesV2(inputName, select, values, nbValues);
+    }
+
+    //!
+    //! \brief Get the minimum / optimum / maximum values for an input shape tensor.
+    //!
+    //! If the shape values have not been set previously with setShapeValuesV2(), this returns nullptr.
+    //!
+    //! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    int64_t const* getShapeValuesV2(char const* inputName, OptProfileSelector select) const noexcept
+    {
+        return mImpl->getShapeValuesV2(inputName, select);
+    }
+
+protected:
+    apiv::VOptimizationProfile* mImpl;
+    virtual ~IOptimizationProfile() noexcept = default;
+};
+
+//!
+//! \enum TacticSource
+//!
+//! \brief List of tactic sources for TensorRT.
+//!
+//! \see TacticSources, IBuilderConfig::setTacticSources(), IBuilderConfig::getTacticSources()
+//!
+enum class TacticSource : int32_t
+{
+    //! cuBLAS tactics. Disabled by default.
+    //! \note Disabling kCUBLAS will cause the cuBLAS handle passed to plugins in attachToContext to be null.
+    //! \deprecated Deprecated in TensorRT 10.0.
+    kCUBLAS TRT_DEPRECATED_ENUM = 0,
+
+    //! cuBLAS LT tactics. Disabled by default.
+    //! \deprecated Deprecated in TensorRT 9.0.
+    kCUBLAS_LT TRT_DEPRECATED_ENUM = 1,
+
+    //! cuDNN tactics. Disabled by default.
+    //! \note Disabling kCUDNN will cause the cuDNN handle passed to plugins in attachToContext to be null.
+    //! \deprecated Deprecated in TensorRT 10.0.
+    kCUDNN TRT_DEPRECATED_ENUM = 2,
+
+    //! Enables convolution tactics implemented with edge mask tables. These tactics tradeoff memory for performance by
+    //! consuming additional memory space proportional to the input size.
+    //! Enabled by default.
+    kEDGE_MASK_CONVOLUTIONS = 3,
+
+    //! Enables convolution tactics implemented with source-code JIT fusion. The engine building time may increase
+    //! when this is enabled. Enabled by default.
+    kJIT_CONVOLUTIONS = 4,
+};
+
+template <>
+constexpr inline int32_t EnumMax<TacticSource>() noexcept
+{
+    return 5;
+} //!< Maximum number of tactic sources in TacticSource enum. \see TacticSource
+
+//!
+//! \brief Represents a collection of one or more TacticSource values
+//! combine using bitwise-OR operations.
+//!
+//! \see IBuilderConfig::setTacticSources(), IBuilderConfig::getTacticSources()
+//!
+using TacticSources = uint32_t;
+
+//!
+//! \enum ProfilingVerbosity
+//!
+//! \brief List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
+//!
+//! \see IBuilderConfig::setProfilingVerbosity(),
+//!      IBuilderConfig::getProfilingVerbosity(),
+//!      IEngineInspector
+//!
+enum class ProfilingVerbosity : int32_t
+{
+    kLAYER_NAMES_ONLY = 0, //!< Print only the layer names. This is the default setting.
+    kNONE = 1,             //!< Do not print any layer information.
+    kDETAILED = 2,         //!< Print detailed layer information including layer names and layer parameters.
+};
+
+//! Maximum number of profile verbosity levels in ProfilingVerbosity enum. \see ProfilingVerbosity
+template <>
+constexpr inline int32_t EnumMax<ProfilingVerbosity>() noexcept
+{
+    return 3;
+}
+
+//!
+//! \brief Represents one or more SerializationFlag values using binary OR
+//! operations, e.g., 1U << SerializationFlag::kEXCLUDE_LEAN_RUNTIME
+//!
+//! \see ISerializationConfig::setFlags(), ISerializationConfig::getFlags()
+//!
+using SerializationFlags = uint32_t;
+
+//!
+//! \enum SerializationFlag
+//!
+//! \brief List of valid flags that the engine can enable when serializing the bytes.
+//!
+//! \see ISerializationConfig::setFlags(), ISerializationConfig::getFlags()
+//!
+enum class SerializationFlag : int32_t
+{
+    kEXCLUDE_WEIGHTS = 0,      //!< Exclude the weights that can be refitted.
+    kEXCLUDE_LEAN_RUNTIME = 1, //!< Exclude the lean runtime.
+};
+
+//! Maximum number of serialization flags in SerializationFlag enum. \see SerializationFlag
+template <>
+constexpr inline int32_t EnumMax<SerializationFlag>() noexcept
+{
+    return 2;
+}
+
+//!
+//! \class ISerializationConfig
+//!
+//! \brief Holds properties for configuring an engine to serialize the binary.
+//!
+//! \see SerializationFlag
+//!
+class ISerializationConfig : public INoCopy
+{
+public:
+    virtual ~ISerializationConfig() noexcept = default;
+
+    //!
+    //! \brief Set the serialization flags to turn on for this config.
+    //!
+    //! The flags are listed in the SerializationFlag enum.
+    //!
+    //! \param serializationFlags The serialization flags for an engine.
+    //!
+    //! \note This function will override the previous set flags, rather than bitwise ORing the new flag.
+    //!
+    //! \see getFlags()
+    //!
+    bool setFlags(SerializationFlags serializationFlags) noexcept
+    {
+        return mImpl->setFlags(serializationFlags);
+    }
+
+    //!
+    //! \brief Get the serialization flags for this config.
+    //!
+    //! \return The serialization flags as a bitmask.
+    //!
+    //! \see setFlags()
+    //!
+    SerializationFlags getFlags() const noexcept
+    {
+        return mImpl->getFlags();
+    }
+
+    //!
+    //! \brief clear a serialization flag.
+    //!
+    //! clears the serialization flag from the config.
+    //!
+    //! \see setFlags()
+    //!
+    bool clearFlag(SerializationFlag serializationFlag) noexcept
+    {
+        return mImpl->clearFlag(serializationFlag);
+    }
+
+    //!
+    //! \brief Set a serialization flag.
+    //!
+    //! Add the input serialization flag to the already enabled flags.
+    //!
+    //! \see setFlags()
+    //!
+    bool setFlag(SerializationFlag serializationFlag) noexcept
+    {
+        return mImpl->setFlag(serializationFlag);
+    }
+
+    //!
+    //! \brief Returns true if the serialization flag is set
+    //!
+    //! \see getFlags()
+    //!
+    //! \return True if flag is set, false if unset.
+    //!
+    bool getFlag(SerializationFlag serializationFlag) const noexcept
+    {
+        return mImpl->getFlag(serializationFlag);
+    }
+
+protected:
+    apiv::VSerializationConfig* mImpl;
+};
+
+//!
+//! \enum ExecutionContextAllocationStrategy
+//!
+//! \brief Different memory allocation behaviors for IExecutionContext.
+//!
+//! IExecutionContext requires a block of device memory for internal activation tensors during inference. The user can
+//! either let the execution context manage the memory in various ways or allocate the memory themselves.
+//!
+//! \see ICudaEngine::createExecutionContext()
+//! \see IExecutionContext::setDeviceMemory()
+//!
+enum class ExecutionContextAllocationStrategy : int32_t
+{
+    kSTATIC = 0,            //!< Default static allocation with the maximum size across all profiles.
+    kON_PROFILE_CHANGE = 1, //!< Reallocate for a profile when it's selected.
+    kUSER_MANAGED = 2,      //!< The user supplies custom allocation to the execution context.
+};
+
+//!
+//! \brief Maximum number of memory allocation strategies in ExecutionContextAllocationStrategy enum.
+//!
+//! \see ExecutionContextAllocationStrategy
+//!
+template <>
+constexpr inline int32_t EnumMax<ExecutionContextAllocationStrategy>() noexcept
+{
+    return 3;
+}
+
+
+//! \class IRuntimeConfig
+//!
+//! \brief A class for runtime configuration. This class is used during execution context creation.
+//!
+//! \see IRuntime, IBuilderConfig
+//!
+class IRuntimeConfig : public INoCopy
+{
+public:
+    virtual ~IRuntimeConfig() noexcept = default;
+
+    //!
+    //! \brief Set the execution context allocation strategy. Default value is kSTATIC.
+    //!
+    //! \param strategy The execution context allocation strategy.
+    //!
+    void setExecutionContextAllocationStrategy(ExecutionContextAllocationStrategy strategy) noexcept
+    {
+        return mImpl->setExecutionContextAllocationStrategy(strategy);
+    }
+
+    //!
+    //! \brief Get the execution context allocation strategy.
+    //!
+    //! \return The execution context allocation strategy.
+    //!
+    ExecutionContextAllocationStrategy getExecutionContextAllocationStrategy() const noexcept
+    {
+        return mImpl->getExecutionContextAllocationStrategy();
+    }
+
+
+protected:
+    apiv::VRuntimeConfig* mImpl;
+}; // class IRuntimeConfig
+
+//!
+//! \class ICudaEngine
+//!
+//! \brief An engine for executing inference on a built network, with functionally unsafe features.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class ICudaEngine : public INoCopy
+{
+public:
+    virtual ~ICudaEngine() noexcept = default;
+
+    //!
+    //! \brief Get shape of an input or output tensor.
+    //!
+    //! \param tensorName The name of an input or output tensor.
+    //!
+    //! \return shape of the tensor, with -1 in place of each dynamic runtime dimension,
+    //!         or Dims{-1, {}} if the provided name does not map to an input or output tensor.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    Dims getTensorShape(char const* tensorName) const noexcept
+    {
+        return mImpl->getTensorShape(tensorName);
+    }
+
+    //!
+    //! \brief Determine the required data type for a buffer from its tensor name.
+    //!
+    //! \param tensorName The name of an input or output tensor.
+    //!
+    //! \return The type of the data in the buffer, or DataType::kFLOAT if the provided name does not map to an input or
+    //! output tensor.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    DataType getTensorDataType(char const* tensorName) const noexcept
+    {
+        return mImpl->getTensorDataType(tensorName);
+    }
+
+    //!
+    //! \brief Get the number of layers in the network.
+    //!
+    //! The number of layers in the network is not necessarily the number in the original network definition, as layers
+    //! may be combined or eliminated as the engine is optimized. This value can be useful when building per-layer
+    //! tables, such as when aggregating profiling data over a number of executions.
+    //!
+    //! \return The number of layers in the network.
+    //!
+    int32_t getNbLayers() const noexcept
+    {
+        return mImpl->getNbLayers();
+    }
+
+    //!
+    //! \brief Serialize the network to a stream.
+    //!
+    //! \return A IHostMemory object that contains the serialized engine.
+    //!
+    //! The network may be deserialized with IRuntime::deserializeCudaEngine().
+    //!
+    //! \see IRuntime::deserializeCudaEngine()
+    //!
+    IHostMemory* serialize() const noexcept
+    {
+        return mImpl->serialize();
+    }
+
+    //!
+    //! \brief Create an execution context and specify the strategy for allocating internal activation memory.
+    //!
+    //! The default value for the allocation strategy is ExecutionContextAllocationStrategy::kSTATIC, which means the
+    //! context will pre-allocate a block of device memory that is sufficient for all profiles. The newly created
+    //! execution context will be assigned optimization profile 0. If an error recorder has been set for the engine, it
+    //! will also be passed to the execution context.
+    //!
+    //! \see IExecutionContext
+    //! \see IExecutionContext::setOptimizationProfileAsync()
+    //! \see ExecutionContextAllocationStrategy
+    //!
+    IExecutionContext* createExecutionContext(
+        ExecutionContextAllocationStrategy strategy = ExecutionContextAllocationStrategy::kSTATIC) noexcept
+    {
+        return mImpl->createExecutionContext(strategy);
+    }
+
+    //!
+    //! \brief Get whether an input or output tensor must be on GPU or CPU.
+    //!
+    //! \param tensorName The name of an input or output tensor.
+    //!
+    //! \return TensorLocation::kDEVICE if tensorName must be on GPU, or TensorLocation::kHOST if on CPU, or
+    //! TensorLocation::kDEVICE if the provided name does not map to an input or output tensor.
+    //!
+    //! The location is established at build time. E.g. shape tensors inputs are typically required to be on the CPU.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    TensorLocation getTensorLocation(char const* tensorName) const noexcept
+    {
+        return mImpl->getTensorLocation(tensorName);
+    }
+
+    //!
+    //! \brief True if tensor is required as input for shape calculations or is output from shape calculations.
+    //!
+    //! Return true for either of the following conditions:
+    //!
+    //! * The tensor is a network input, and its value is required for IExecutionContext::getTensorShape()
+    //!   to return the shape of a network output.
+    //!
+    //! * The tensor is a network output, and inferShape() will compute its values.
+    //!
+    //! For example, if a network uses an input tensor "foo" as an addend to an IElementWiseLayer
+    //! that computes the "reshape dimensions" for IShuffleLayer, then isShapeInferenceIO("foo") == true.
+    //! If the network copies said input tensor "foo" to an output "bar", then
+    //! isShapeInferenceIO("bar") == true and IExecutionContext::inferShapes() will write to "bar".
+    //!
+    bool isShapeInferenceIO(char const* tensorName) const noexcept
+    {
+        return mImpl->isShapeInferenceIO(tensorName);
+    }
+
+    //!
+    //! \brief Determine whether a tensor is an input or output tensor.
+    //!
+    //! \param tensorName The name of an input or output tensor.
+    //!
+    //! \return kINPUT if tensorName is an input, kOUTPUT if tensorName is an output, or kNONE if neither.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    TensorIOMode getTensorIOMode(char const* tensorName) const noexcept
+    {
+        return mImpl->getTensorIOMode(tensorName);
+    }
+
+    //!
+    //! \brief create an execution context without any device memory allocated
+    //!
+    //! The memory for execution of this device context must be supplied by the application.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. Superseded by createExecutionContext() with parameter.
+    //!
+    TRT_DEPRECATED IExecutionContext* createExecutionContextWithoutDeviceMemory() noexcept
+    {
+        return mImpl->createExecutionContextWithoutDeviceMemory();
+    }
+
+    //!
+    //! \brief Create an execution context with TensorRT JIT runtime config.
+    //!
+    //! \param runtimeConfig The runtime config for TensorRT JIT.
+    //!
+    //! \see IRuntimeConfig
+    //!
+    IExecutionContext* createExecutionContext(IRuntimeConfig* runtimeConfig) noexcept
+    {
+        return mImpl->createExecutionContextWithRuntimeConfig(runtimeConfig);
+    }
+
+    //!
+    //! \brief Create a runtime config for TensorRT JIT.
+    //!        The caller is responsible for ownership of the returned IRuntimeConfig object.
+    //!
+    //! \return A IRuntimeConfig object.
+    //!
+    //! \see IRuntimeConfig
+    //!
+    IRuntimeConfig* createRuntimeConfig() noexcept
+    {
+        return mImpl->createRuntimeConfig();
+    }
+
+    //!
+    //! \brief Return the maximum device memory required by the context over all profiles.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.1. Superseded by getDeviceMemorySizeV2().
+    //!
+    //! \see IExecutionContext::setDeviceMemory()
+    //!
+    TRT_DEPRECATED size_t getDeviceMemorySize() const noexcept
+    {
+        return mImpl->getDeviceMemorySize();
+    }
+
+    //!
+    //! \brief Return the maximum device memory required by the context for a profile.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.1. Superseded by getDeviceMemorySizeForProfileV2(int32_t).
+    //!
+    //! \see IExecutionContext::setDeviceMemoryV2()
+    //!
+    TRT_DEPRECATED size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept
+    {
+        return mImpl->getDeviceMemorySizeForProfile(profileIndex);
+    }
+
+    //!
+    //! \brief Return the maximum device memory required by the context over all profiles.
+    //!
+    //! This API is stateful, so its call returns different values based on the following calls:
+    //! * setWeightStreamingBudget()
+    //! * setWeightStreamingBudgetV2()
+    //!
+    //! \see IExecutionContext::setDeviceMemoryV2()
+    //! \see setWeightStreamingBudget()
+    //! \see setWeightStreamingBudgetV2()
+    //!
+    int64_t getDeviceMemorySizeV2() const noexcept
+    {
+        return mImpl->getDeviceMemorySizeV2();
+    }
+
+    //!
+    //! \brief Return the maximum device memory required by the context for a profile.
+    //!
+    //! This API is stateful, so its call returns different values based on the following calls:
+    //! * setWeightStreamingBudget()
+    //! * setWeightStreamingBudgetV2()
+    //!
+    //! \see IExecutionContext::setDeviceMemoryV2()
+    //! \see setWeightStreamingBudget()
+    //! \see setWeightStreamingBudgetV2()
+    //!
+    int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
+    {
+        return mImpl->getDeviceMemorySizeForProfileV2(profileIndex);
+    }
+
+    //!
+    //! \brief Return true if an engine can be refit.
+    //!
+    //! \see nvinfer1::createInferRefitter()
+    //!
+    bool isRefittable() const noexcept
+    {
+        return mImpl->isRefittable();
+    }
+
+    //!
+    //! \brief Return the number of bytes per component of an element, or -1 if the
+    //! tensor is not vectorized or provided name does not map to an input or output tensor.
+    //!
+    //! The vector component size is returned if getTensorVectorizedDim(tensorName) != -1.
+    //!
+    //! \param tensorName The name of an input or output tensor.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //! \warning The function can only return the result of profile 0, and issues a warning message when there are
+    //! multiple profiles in the engine, use getTensorBytesPerComponent with profileIndex when there are multiple
+    //! profiles.
+    //!
+    //! \see getTensorVectorizedDim()
+    //! \see getTensorBytesPerComponent(tensorName, profileIndex)
+    //!
+    int32_t getTensorBytesPerComponent(char const* tensorName) const noexcept
+    {
+        return mImpl->getTensorBytesPerComponent(tensorName);
+    }
+
+    //!
+    //! \brief Return the number of bytes per component of an element given of given profile, or -1 if the tensor is not
+    //! vectorized or provided name does not map to an input or output tensor.
+    //!
+    //! The vector component size is returned if getTensorVectorizedDim(tensorName, profileIndex) != -1.
+    //!
+    //! \param tensorName The name of an input or output tensor.
+    //! \param profileIndex The profile index to query
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \see getTensorVectorizedDim(tensorName, profileIndex)
+    //!
+    int32_t getTensorBytesPerComponent(char const* tensorName, int32_t profileIndex) const noexcept
+    {
+        return mImpl->getTensorBytesPerComponentV2(tensorName, profileIndex);
+    }
+
+    //!
+    //! \brief Return the number of components included in one element, or -1 if tensor is
+    //! not vectorized or if the provided name does not map to an input or output tensor.
+    //!
+    //! The number of elements in the vectors is returned if getTensorVectorizedDim(tensorName) != -1.
+    //!
+    //! \param tensorName The name of an input or output tensor.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //! \warning The function can only return the result of profile 0, and issues a warning message when there
+    //! are multiple profiles in the engine, use getTensorComponentsPerElement with profileIndex when there are
+    //! multiple profiles.
+    //!
+    //! \see getTensorVectorizedDim()
+    //! \see getTensorComponentsPerElement(tensorName, profileIndex)
+    //!
+    int32_t getTensorComponentsPerElement(char const* tensorName) const noexcept
+    {
+        return mImpl->getTensorComponentsPerElement(tensorName);
+    }
+
+    //!
+    //! \brief Return the number of components included in one element of given profile, or -1 if tensor is not
+    //! vectorized or the provided name does not map to an input or output tensor.
+    //!
+    //! The number of elements in the vectors is returned if getTensorVectorizedDim(tensorName, profileIndex) != -1.
+    //!
+    //! \param tensorName The name of an input or output tensor.
+    //! \param profileIndex The profile index to query
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \see getTensorVectorizedDim(tensorName, profileIndex)
+    //!
+    int32_t getTensorComponentsPerElement(char const* tensorName, int32_t profileIndex) const noexcept
+    {
+        return mImpl->getTensorComponentsPerElementV2(tensorName, profileIndex);
+    }
+
+    //!
+    //! \brief Return the tensor format, or TensorFormat::kLINEAR if the provided name does not map to an input or
+    //! output tensor.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //! \warning This API can only return the tensor format of profile 0, and issues a warning message when there are
+    //! multiple profiles in the engine, use getTensorFormat with profileIndex when there are multiple profiles.
+    //!
+    //! \see getTensorFormat(tensorName, profileIndex)
+    //!
+    TensorFormat getTensorFormat(char const* tensorName) const noexcept
+    {
+        return mImpl->getTensorFormat(tensorName);
+    }
+
+    //!
+    //! \brief Return the tensor format of given profile, or TensorFormat::kLINEAR if the provided name does not map to
+    //! an input or output tensor.
+    //!
+    //! \param tensorName The name of an input or output tensor.
+    //! \param profileIndex The profile index to query the format for.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    TensorFormat getTensorFormat(char const* tensorName, int32_t profileIndex) const noexcept
+    {
+        return mImpl->getTensorFormatV2(tensorName, profileIndex);
+    }
+
+    //!
+    //! \brief Return the human readable description of the tensor format, or empty string if the provided name does not
+    //! map to an input or output tensor.
+    //!
+    //! The description includes the order, vectorization, data type, and strides.
+    //! Examples are shown as follows:
+    //!   Example 1: kCHW + FP32
+    //!     "Row-major linear FP32 format"
+    //!   Example 2: kCHW2 + FP16
+    //!     "Two-wide channel vectorized row-major FP16 format"
+    //!   Example 3: kHWC8 + FP16 + Line Stride = 32
+    //!     "Channel major FP16 format where C % 8 == 0 and H Stride % 32 == 0"
+    //!
+    //! \param tensorName The name of an input or output tensor.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //! \warning The function can only return the result of profile 0, and issues a warning message when there are
+    //! multiple profiles in the engine, use getTensorFormatDesc with profileIndex when there are multiple profiles.
+    //!
+    char const* getTensorFormatDesc(char const* tensorName) const noexcept
+    {
+        return mImpl->getTensorFormatDesc(tensorName);
+    }
+
+    //!
+    //! \brief Return the human readable description of the tensor format of given profile, or empty string if the
+    //! provided name does not map to an input or output tensor.
+    //!
+    //! The description includes the order, vectorization, data type, and strides.
+    //! Examples are shown as follows:
+    //!   Example 1: kCHW + FP32
+    //!     "Row-major linear FP32 format"
+    //!   Example 2: kCHW2 + FP16
+    //!     "Two-wide channel vectorized row-major FP16 format"
+    //!   Example 3: kHWC8 + FP16 + Line Stride = 32
+    //!     "Channel major FP16 format where C % 8 == 0 and H Stride % 32 == 0"
+    //!
+    //! \param tensorName The name of an input or output tensor.
+    //! \param profileIndex The profile index to query the format for.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    char const* getTensorFormatDesc(char const* tensorName, int32_t profileIndex) const noexcept
+    {
+        return mImpl->getTensorFormatDescV2(tensorName, profileIndex);
+    }
+
+    //!
+    //! \brief Return the dimension index that the buffer is vectorized, or -1 if the provided name does not
+    //! map to an input or output tensor.
+    //!
+    //! Specifically -1 is returned if scalars per vector is 1.
+    //!
+    //! \param tensorName The name of an input or output tensor.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //! \warning The function can only return the result of profile 0, and issues a warning message when there are
+    //!  multiple profiles in the engine, use getTensorVectorizedDim with profileIndex when there are multiple profiles.
+    //!
+    int32_t getTensorVectorizedDim(char const* tensorName) const noexcept
+    {
+        return mImpl->getTensorVectorizedDim(tensorName);
+    }
+
+    //!
+    //! \brief Return the dimension index that the buffer is vectorized of given profile, or -1 if the provided name
+    //! does not map to an input or output tensor.
+    //!
+    //! Specifically -1 is returned if scalars per vector is 1.
+    //!
+    //! \param tensorName The name of an input.
+    //! \param profileIndex The profile index to query the format for.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    int32_t getTensorVectorizedDim(char const* tensorName, int32_t profileIndex) const noexcept
+    {
+        return mImpl->getTensorVectorizedDimV2(tensorName, profileIndex);
+    }
+
+    //!
+    //! \brief Returns the name of the network associated with the engine.
+    //!
+    //! The name is set during network creation and is retrieved after
+    //! building or deserialization.
+    //!
+    //! \see INetworkDefinition::setName(), INetworkDefinition::getName()
+    //!
+    //! \return A null-terminated C-style string representing the name of the network.
+    //!
+    char const* getName() const noexcept
+    {
+        return mImpl->getName();
+    }
+
+    //!
+    //! \brief Get the number of optimization profiles defined for this engine.
+    //!
+    //! \return Number of optimization profiles. It is always at least 1.
+    //!
+    //! \see IExecutionContext::setOptimizationProfileAsync()
+    int32_t getNbOptimizationProfiles() const noexcept
+    {
+        return mImpl->getNbOptimizationProfiles();
+    }
+
+    //!
+    //! \brief Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimization
+    //! profile.
+    //!
+    //! \param tensorName The name of an input tensor.
+    //!
+    //! \param profileIndex The profile index, which must be between 0 and getNbOptimizationProfiles()-1.
+    //!
+    //! \param select Whether to query the minimum, optimum, or maximum dimensions for this input tensor.
+    //!
+    //! \return The minimum / optimum / maximum dimensions for an input tensor in this profile.
+    //!         If the profileIndex is invalid or provided name does not map to an input tensor, return Dims{-1, {}}
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    Dims getProfileShape(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
+    {
+        return mImpl->getProfileShape(tensorName, profileIndex, select);
+    }
+
+    //!
+    //! \brief Get the minimum / optimum / maximum values (not dimensions) for an input tensor given
+    //! its name under an optimization profile. These correspond to the values set using
+    //! IOptimizationProfile::setShapeValues when the engine was built.
+    //!
+    //! \param tensorName The name of an input tensor.
+    //!
+    //! \param profileIndex The profile index, which must be between 0 and getNbOptimizationProfiles()-1.
+    //!
+    //! \param select Whether to query the minimum, optimum, or maximum values for this input tensor.
+    //!
+    //! \return The minimum / optimum / maximum values for an input tensor in this profile. If the profileIndex is
+    //! invalid or the provided name does not map to an input tensor, or the tensor is not a shape binding, return
+    //! nullptr.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.11. Superseded by getProfileTensorValuesV2().
+    //! \warning If input shapes are set with setShapeValuesV2, getProfileTensorValues will return nullptr
+    //!
+    TRT_DEPRECATED int32_t const* getProfileTensorValues(
+        char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
+    {
+        return mImpl->getProfileTensorValues(tensorName, profileIndex, select);
+    }
+
+    //!
+    //! \brief Determine what execution capability this engine has.
+    //!
+    //! If the engine has EngineCapability::kSTANDARD, then all engine functionality is valid.
+    //! If the engine has EngineCapability::kSAFETY, then only the functionality in safe engine is valid.
+    //! If the engine has EngineCapability::kDLA_STANDALONE, then only serialize, destroy, and const-accessor functions
+    //! are valid.
+    //!
+    //! \return The EngineCapability flag that the engine was built for.
+    //!
+    EngineCapability getEngineCapability() const noexcept
+    {
+        return mImpl->getEngineCapability();
+    }
+
+    //!
+    //! \brief Set the ErrorRecorder for this interface
+    //!
+    //! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
+    //! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
+    //! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
+    //! a recorder has been registered.
+    //!
+    //! If an error recorder is not set, messages will be sent to the global log stream.
+    //!
+    //! \param recorder The error recorder to register with this interface.
+    //!
+    //! \see getErrorRecorder()
+    //!
+    void setErrorRecorder(IErrorRecorder* recorder) noexcept
+    {
+        return mImpl->setErrorRecorder(recorder);
+    }
+
+    //!
+    //! \brief Get the ErrorRecorder assigned to this interface.
+    //!
+    //! Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
+    //! an error handler has not been set.
+    //!
+    //! \return A pointer to the IErrorRecorder object that has been registered.
+    //!
+    //! \see setErrorRecorder()
+    //!
+    IErrorRecorder* getErrorRecorder() const noexcept
+    {
+        return mImpl->getErrorRecorder();
+    }
+
+    //!
+    //! \brief Query whether the engine was built with an implicit batch dimension.
+    //!
+    //! \return Always false since TensorRT 10.0 does not support an implicit batch dimension.
+    //!
+    //! \see createNetworkV2
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. Implicit batch is no supported since TensorRT 10.0.
+    //!
+    TRT_DEPRECATED bool hasImplicitBatchDimension() const noexcept
+    {
+        return mImpl->hasImplicitBatchDimension();
+    }
+
+    //!
+    //! \brief return the tactic sources required by this engine.
+    //!
+    //! The value returned is equal to zero or more tactics sources set
+    //! at build time via setTacticSources() in IBuilderConfig. Sources
+    //! set by the latter but not returned by \ref ICudaEngine::getTacticSources
+    //! do not reduce overall engine execution time, and can be removed from
+    //! future builds to reduce build time.
+    //!
+    //! \see IBuilderConfig::setTacticSources()
+    //!
+    TacticSources getTacticSources() const noexcept
+    {
+        return mImpl->getTacticSources();
+    }
+
+    //!
+    //! \brief Return the \ref ProfilingVerbosity the builder config was set to when the engine was built.
+    //!
+    //! \return the profiling verbosity the builder config was set to when the engine was built.
+    //!
+    //! \see IBuilderConfig::setProfilingVerbosity()
+    //!
+    ProfilingVerbosity getProfilingVerbosity() const noexcept
+    {
+        return mImpl->getProfilingVerbosity();
+    }
+
+    //!
+    //! \brief Create a new engine inspector which prints the layer information in an engine or an execution context.
+    //!
+    //! \see IEngineInspector.
+    //!
+    IEngineInspector* createEngineInspector() const noexcept
+    {
+        return mImpl->createEngineInspector();
+    }
+
+    //!
+    //! \brief Return number of IO tensors.
+    //!
+    //! It is the number of input and output tensors for the network from which the engine was built.
+    //! The names of the IO tensors can be discovered by calling getIOTensorName(i) for i in 0 to getNbIOTensors()-1.
+    //!
+    //! \see getIOTensorName()
+    //!
+    int32_t getNbIOTensors() const noexcept
+    {
+        return mImpl->getNbIOTensors();
+    }
+
+    //!
+    //! \brief Return name of an IO tensor.
+    //!
+    //! \param index value between 0 and getNbIOTensors()-1
+    //!
+    //! \see getNbIOTensors()
+    //!
+    char const* getIOTensorName(int32_t index) const noexcept
+    {
+        return mImpl->getIOTensorName(index);
+    }
+
+    //!
+    //! \brief Return the hardware compatibility level of this engine.
+    //!
+    //! \return hardwareCompatibilityLevel The level of hardware
+    //!        compatibility.
+    //!
+    HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
+    {
+        return mImpl->getHardwareCompatibilityLevel();
+    }
+
+    //!
+    //! \brief Return the number of auxiliary streams used by this engine.
+    //!
+    //! This number will be less than or equal to the maximum allowed number of auxiliary streams set by
+    //! IBuilderConfig::setMaxAuxStreams() API call when the engine was built.
+    //!
+    //! \return the number of auxiliary streams used by this engine.
+    //!
+    //! \see IBuilderConfig::setMaxAuxStreams(), IExecutionContext::setAuxStreams()
+    //!
+    int32_t getNbAuxStreams() const noexcept
+    {
+        return mImpl->getNbAuxStreams();
+    }
+
+    //!
+    //! \brief Create a serialization configuration object.
+    //!
+    //! \see ISerializationConfig
+    //!
+    ISerializationConfig* createSerializationConfig() noexcept
+    {
+        return mImpl->createSerializationConfig();
+    }
+
+    //!
+    //! \brief Serialize the network to a stream with the provided SerializationConfig.
+    //!
+    //! \return An IHostMemory object that contains the serialized engine.
+    //!
+    //! The network may be deserialized with IRuntime::deserializeCudaEngine().
+    //! Serializing plan file with SerializationFlag::kEXCLUDE_WEIGHTS requires building the engine with kREFIT,
+    //! kREFIT_IDENTICAL or kREFIT_INDIVIDUAL.
+    //!
+    //! \see IRuntime::deserializeCudaEngine()
+    //!
+    IHostMemory* serializeWithConfig(ISerializationConfig& config) const noexcept
+    {
+        return mImpl->serializeWithConfig(config);
+    }
+
+    //!
+    //! \brief Limit the maximum amount of GPU memory usable for network weights
+    //! in bytes.
+    //!
+    //! \param gpuMemoryBudget  This parameter may take on 3 types of values:
+    //!  -1: Allows TensorRT to choose the budget according to the streamable weights size.
+    //!      Free CUDA memory will be queried at createExecutionContext() and accordingly:
+    //!       * If streamable weights all fit: weight streaming is not required and disabled.
+    //!       * Otherwise: Budget is set to getMinimumWeightStreamingBudget
+    //!   0: (default) Disables weight streaming. The execution may fail if the network is too large for GPU memory.
+    //!  >0: The maximum bytes of GPU memory that weights can occupy. It must be bounded by
+    //!      [getMinimumWeightStreamingBudget, free GPU memory)].
+    //!
+    //! By setting a weight limit, users can expect a GPU memory usage reduction
+    //! of (total bytes for network weights) - gpuMemoryBudget bytes. Maximum memory savings occur
+    //! when gpuMemoryBudget is set to getMinimumWeightStreamingBudget(). Creating additional
+    //! IExecutionContexts will increase memory usage by O(getMinimumStreamingBudget()).
+    //!
+    //! Streaming larger amounts of memory will likely result in lower performance
+    //! except in some boundary cases where streaming weights allows the user to
+    //! run larger batch sizes. The higher throughput offsets the increased
+    //! latency in these cases. Tuning the value of the memory limit is
+    //! recommended for best performance.
+    //!
+    //! \warning GPU memory for the weights is allocated in this call and will be deallocated by enabling weight
+    //!          streaming or destroying the ICudaEngine.
+    //!
+    //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
+    //!
+    //! \warning The weights streaming budget cannot be modified while there are active IExecutionContexts.
+    //!
+    //! \return true if the memory limit is valid and the call was successful, false otherwise.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.1. Superseded by setWeightStreamingBudgetV2().
+    //!
+    //! \see BuilderFlag::kWEIGHT_STREAMING
+    //! \see getWeightStreamingBudget()
+    //! \see getMinimumWeightStreamingBudget()
+    //! \see getStreamableWeightsSize()
+    //!
+    TRT_DEPRECATED bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept
+    {
+        return mImpl->setWeightStreamingBudget(gpuMemoryBudget);
+    }
+
+    //!
+    //! \brief Returns the current weight streaming device memory budget in bytes.
+    //!
+    //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
+    //!
+    //! \returns The weight streaming budget in bytes. Please see setWeightStreamingBudget() for the possible
+    //!          values.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.1. Superseded by getWeightStreamingBudgetV2().
+    //!
+    //! \see BuilderFlag::kWEIGHT_STREAMING,
+    //! \see setWeightStreamingBudget()
+    //! \see getMinimumWeightStreamingBudget()
+    //! \see getStreamableWeightsSize()
+    //!
+    TRT_DEPRECATED int64_t getWeightStreamingBudget() const noexcept
+    {
+        return mImpl->getWeightStreamingBudget();
+    }
+
+    //!
+    //! \brief The minimum number of bytes of GPU memory required by network
+    //! weights for successful weight streaming.
+    //!
+    //! This is a positive integer for engines with streamable weights because a
+    //! staging buffer on the GPU is required to temporarily hold the streamed
+    //! weights. The size of the staging buffer is determined by TensorRT and must
+    //! be at least as large as the size of the largest streamable weight in the
+    //! network.
+    //!
+    //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
+    //!
+    //! \returns The minimum number of bytes of GPU memory required for streaming.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.1. The minimum budget is 0 in the V2 APIs.
+    //!
+    //! \see setWeightStreamingBudget()
+    //!
+    TRT_DEPRECATED int64_t getMinimumWeightStreamingBudget() const noexcept
+    {
+        return mImpl->getMinimumWeightStreamingBudget();
+    }
+
+    //!
+    //! \brief Get the total size in bytes of all streamable weights.
+    //!
+    //! The set of streamable weights is a subset of all network weights. The
+    //! total size may exceed free GPU memory.
+    //!
+    //! \returns The total size in bytes of all streamable weights.
+    //!          Returns 0 if BuilderFlag::kWEIGHT_STREAMING is unset during engine building.
+    //!
+    //! \see setWeightStreamingBudget()
+    //!
+    int64_t getStreamableWeightsSize() const noexcept
+    {
+        return mImpl->getStreamableWeightsSize();
+    }
+
+    //!
+    //! \brief Limit the maximum amount of GPU memory usable for network weights in bytes.
+    //!
+    //! \param gpuMemoryBudget This parameter must be a non-negative value.
+    //!   0: Only small amounts of scratch memory will required to run the model.
+    //!  >= getStreamableWeightsSize (default): Disables weight streaming.
+    //!       The execution may fail if the network is too large for GPU memory.
+    //!
+    //! By setting a weight limit, users can expect a GPU memory usage reduction on the order
+    //! of (total bytes for network weights) - gpuMemoryBudget bytes. Maximum memory savings occur
+    //! when gpuMemoryBudget is set to 0. Each IExecutionContext will require getWeightStreamingScratchMemorySize()
+    //! bytes of additional device memory if the engine is streaming its weights (budget < getStreamableWeightsSize()).
+    //!
+    //! Streaming larger amounts of memory will likely result in lower performance
+    //! except in some boundary cases where streaming weights allows the user to
+    //! run larger batch sizes. The higher throughput offsets the increased
+    //! latency in these cases. Tuning the value of the memory limit is
+    //! recommended for best performance.
+    //!
+    //! \warning GPU memory for the weights is allocated in this call and will be deallocated by enabling weight
+    //! streaming or destroying the ICudaEngine.
+    //!
+    //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
+    //!
+    //! \warning The weights streaming budget cannot be modified while there are active IExecutionContexts.
+    //!
+    //! \warning Using the V2 weight streaming APIs with V1 APIs (setWeightStreamingBudget(),
+    //!          getWeightStreamingBudget(), getWeightStreamingMinimumBudget()) leads to undefined behavior.
+    //!
+    //! \return true if the memory limit is valid and the call was successful, false otherwise.
+    //!
+    //! \see BuilderFlag::kWEIGHT_STREAMING
+    //! \see getWeightStreamingBudgetV2()
+    //! \see getWeightStreamingScratchMemorySize()
+    //! \see getWeightStreamingAutomaticBudget()
+    //! \see getStreamableWeightsSize()
+    //!
+    bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
+    {
+        return mImpl->setWeightStreamingBudgetV2(gpuMemoryBudget);
+    }
+
+    //!
+    //! \brief Returns the current weight streaming device memory budget in bytes.
+    //!
+    //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
+    //!
+    //! \returns The weight streaming budget in bytes. Please see setWeightStreamingBudgetV2() for the possible
+    //!          return values. Returns getStreamableWeightsSize() if weight streaming is disabled.
+    //!
+    //! \see BuilderFlag::kWEIGHT_STREAMING
+    //! \see setWeightStreamingBudget()
+    //! \see getMinimumWeightStreamingBudget()
+    //! \see getStreamableWeightsSize()
+    //!
+    int64_t getWeightStreamingBudgetV2() const noexcept
+    {
+        return mImpl->getWeightStreamingBudgetV2();
+    }
+
+    //!
+    //! \brief TensorRT automatically determines a device memory budget for the model to run. The budget is close to the
+    //! current free memory size, leaving some space for other memory needs in the user's application. If the budget
+    //! exceeds the size obtained from getStreamableWeightsSize(), it is capped to that size, effectively disabling
+    //! weight streaming. Since TensorRT lacks information about the user's allocations, the remaining memory size might
+    //! be larger than required, leading to wasted memory, or smaller than required, causing an out-of-memory error. For
+    //! optimal memory allocation, it is recommended to manually calculate and set the budget.
+    //!
+    //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
+    //!
+    //! \warning The return value may change between TensorRT minor versions.
+    //!
+    //! \warning Setting the returned budget with V1 APIs (setWeightStreamingBudget()) will lead to undefined behavior.
+    //! Please use V2 APIs.
+    //!
+    //! \returns The weight streaming budget in bytes. Please set with setWeightStreamingBudgetV2().
+    //!
+    //! \see BuilderFlag::kWEIGHT_STREAMING
+    //! \see setWeightStreamingBudgetV2()
+    //!
+    int64_t getWeightStreamingAutomaticBudget() const noexcept
+    {
+        return mImpl->getWeightStreamingAutomaticBudget();
+    }
+
+    //!
+    //! \brief Returns the size of the scratch memory required by the current weight streaming budget.
+    //!
+    //! Weight streaming requires small amounts of scratch memory on the GPU to stage CPU weights right before
+    //! execution. This value is typically much smaller than the total streamable weights size. Each IExecutionContext
+    //! will then allocate this additional memory or the user can provide the additional memory through
+    //! getDeviceMemorySizeV2() and IExecutionContext::setDeviceMemoryV2().
+    //!
+    //! The return value of this call depends on
+    //!    1. setWeightStreamingBudget()
+    //!    2. setWeightStreamingBudgetV2()
+    //!
+    //! \warning BuilderFlag::kWEIGHT_STREAMING must be set during engine building.
+    //!
+    //! \returns The weight streaming scratch memory in bytes. Returns 0 if weight streaming is disabled.
+    //!
+    //! \see BuilderFlag::kWEIGHT_STREAMING
+    //! \see setWeightStreamingBudgetV2()
+    //! \see getStreamableWeightsSize()
+    //! \see getDeviceMemorySizeV2()
+    //! \see getDeviceMemorySizeForProfileV2()
+    //! \see IExecutionContext::setDeviceMemoryV2()
+    //!
+    int64_t getWeightStreamingScratchMemorySize() const noexcept
+    {
+        return mImpl->getWeightStreamingScratchMemorySize();
+    }
+
+    //!
+    //! \brief Check if a tensor is marked as a debug tensor.
+    //!
+    //! Determine whether the given name corresponds to a debug tensor.
+    //!
+    //! \returns True if tensor is a debug tensor, false otherwise.
+    //!
+    //! \see INetworkDefinition::markDebug
+    //!
+    bool isDebugTensor(char const* name) const noexcept
+    {
+        return mImpl->isDebugTensor(name);
+    }
+
+    //!
+    //! \brief Get the minimum / optimum / maximum values (not dimensions) for an input tensor given
+    //! its name under an optimization profile. These correspond to the values set using
+    //! IOptimizationProfile::setShapeValuesV2 when the engine was built.
+    //!
+    //! \param tensorName The name of an input tensor.
+    //!
+    //! \param profileIndex The profile index, which must be between 0 and getNbOptimizationProfiles()-1.
+    //!
+    //! \param select Whether to query the minimum, optimum, or maximum values for this input tensor.
+    //!
+    //! \return The minimum / optimum / maximum values for an input tensor in this profile. If the profileIndex is
+    //! invalid or the provided name does not map to an input tensor, or the tensor is not a shape binding, return
+    //! nullptr.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \warning If input shapes are set with setShapeValues, getProfileTensorValuesV2 will return nullptr
+    //!
+    int64_t const* getProfileTensorValuesV2(
+        char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
+    {
+        return mImpl->getProfileTensorValuesV2(tensorName, profileIndex, select);
+    }
+
+protected:
+    apiv::VCudaEngine* mImpl;
+};
+
+namespace v_1_0
+{
+class IOutputAllocator : public IVersionedInterface
+{
+public:
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return {"IOutputAllocator", 1, 0};
+    }
+
+    //!
+    //! \brief Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated.
+    //!        If the requested memory size exceeds the currentMemory size, the currentMemory can be freed as well.
+    //!        If currentMemory is known to be big enough, one option is to return currentMemory.
+    //!
+    //! \param tensorName name of the output tensor.
+    //! \param currentMemory points to the address set by IExecutionContext::setTensorAddress.
+    //! \param size number of bytes required. Always positive, even for an empty tensor.
+    //! \param alignment required alignment of the allocation.
+    //!
+    //! \return A pointer to memory to use for the output tensor or nullptr.
+    //!
+    //!
+    //! To preallocate memory and have the engine fail if the preallocation is not big enough,
+    //! use IExecutionContext::setTensorAddress to set a pointer to the preallocated memory,
+    //! and have reallocateOutput return nullptr if that memory is not big enough.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. Superseded by reallocateOutputAsync with cudaStream_t argument
+    //!
+    TRT_DEPRECATED virtual void* reallocateOutput(
+        char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept
+    {
+        return nullptr;
+    }
+
+    //!
+    //! \brief Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated.
+    //!        If the requested memory size exceeds the currentMemory size, the currentMemory can be freed as well.
+    //!        If currentMemory is known to be big enough, one option is to return currentMemory.
+    //!
+    //! \param tensorName name of the output tensor.
+    //! \param currentMemory points to the address set by IExecutionContext::setTensorAddress.
+    //! \param size number of bytes required. Always positive, even for an empty tensor.
+    //! \param alignment required alignment of the allocation.
+    //! \param stream The stream in which to execute the kernels.
+    //!
+    //! \return A pointer to memory to use for the output tensor or nullptr.
+    //!
+    //! To preallocate memory and have the engine fail if the preallocation is not big enough,
+    //! use IExecutionContext::setTensorAddress to set a pointer to the preallocated memory,
+    //! and have reallocateOutputAsync return nullptr if that memory is not big enough.
+    //!
+    //! The default definition exists for sake of backward compatibility with earlier versions of TensorRT.
+    //! Eventually this method will become a pure virtual method that requires an override, and method
+    //! reallocateOutput() will disappear. Code moving away from TensorRT 9.x should override method
+    //! reallocateOutputAsync() and NOT override method reallocateOutput().
+    //!
+    virtual void* reallocateOutputAsync(
+        char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment, cudaStream_t /*stream*/)
+    {
+        return reallocateOutput(tensorName, currentMemory, size, alignment);
+    }
+
+    //!
+    //! \brief Called by TensorRT when the shape of the output tensor is known.
+    //!
+    //! Called by TensorRT sometime between when it calls reallocateOutput and enqueueV3 returns.
+    //!
+    //! \param dims dimensions of the output
+    //! \param tensorName name of the tensor
+    //!
+    virtual void notifyShape(char const* tensorName, Dims const& dims) noexcept = 0;
+};
+} // namespace v_1_0
+
+//!
+//! \class IOutputAllocator
+//!
+//! \brief Callback from ExecutionContext::enqueueV3()
+//!
+//! \see IExecutionContext::enqueueV3()
+//!
+using IOutputAllocator = v_1_0::IOutputAllocator;
+
+namespace v_1_0
+{
+class IDebugListener : public IVersionedInterface
+{
+public:
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return {"IDebugListener", 1, 0};
+    }
+
+    //!
+    //! \brief Callback function that is called when a debug tensorâ€™s value is updated and the debug state of the tensor
+    //! is set to true. Content in the given address is only guaranteed to be valid for the duration of the callback.
+    //!
+    //! \param location TensorLocation of the tensor.
+    //! \param addr pointer to buffer.
+    //! \param type data Type of the tensor.
+    //! \param shape shape of the tensor.
+    //! \param name name of the tensor.
+    //! \param stream CUDA stream object.
+    //!
+    //! \return True on success, false otherwise.
+    //!
+    virtual bool processDebugTensor(void const* addr, TensorLocation location, DataType type, Dims const& shape,
+        char const* name, cudaStream_t stream)
+        = 0;
+
+    ~IDebugListener() override = default;
+};
+} // namespace v_1_0
+
+//!
+//! \class IDebugListener
+//!
+//! \brief User-implemented callback for notification when value of a debug tensor is updated.
+//!
+using IDebugListener = v_1_0::IDebugListener;
+
+//!
+//! \class IExecutionContext
+//!
+//! \brief Context for executing inference using an engine, with functionally unsafe features.
+//!
+//! Multiple execution contexts may exist for one ICudaEngine instance, allowing the same
+//! engine to be used for the execution of multiple batches simultaneously. If the engine supports
+//! dynamic shapes, each execution context in concurrent use must use a separate optimization profile.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+class IExecutionContext : public INoCopy
+{
+public:
+    virtual ~IExecutionContext() noexcept = default;
+
+    //!
+    //! \brief Set the debug sync flag.
+    //!
+    //! If this flag is set to true, the engine will log the successful execution for each kernel during executeV2(). It
+    //! has no effect when using enqueueV3().
+    //!
+    //! \see getDebugSync()
+    //!
+    void setDebugSync(bool sync) noexcept
+    {
+        mImpl->setDebugSync(sync);
+    }
+
+    //!
+    //! \brief Get the debug sync flag.
+    //!
+    //! \see setDebugSync()
+    //!
+    bool getDebugSync() const noexcept
+    {
+        return mImpl->getDebugSync();
+    }
+
+    //!
+    //! \brief Set the profiler.
+    //!
+    //! \see IProfiler getProfiler()
+    //!
+    void setProfiler(IProfiler* profiler) noexcept
+    {
+        mImpl->setProfiler(profiler);
+    }
+
+    //!
+    //! \brief Get the profiler.
+    //!
+    //! \see IProfiler setProfiler()
+    //!
+    IProfiler* getProfiler() const noexcept
+    {
+        return mImpl->getProfiler();
+    }
+
+    //!
+    //! \brief Get the associated engine.
+    //!
+    //! \see ICudaEngine
+    //!
+    ICudaEngine const& getEngine() const noexcept
+    {
+        return mImpl->getEngine();
+    }
+
+    //!
+    //! \brief Set the name of the execution context.
+    //!
+    //! This method copies the name string.
+    //!
+    //! \warning The string name must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \see getName()
+    //!
+    void setName(char const* name) noexcept
+    {
+        mImpl->setName(name);
+    }
+
+    //!
+    //! \brief Return the name of the execution context.
+    //!
+    //! \see setName()
+    //!
+    char const* getName() const noexcept
+    {
+        return mImpl->getName();
+    }
+
+    //!
+    //! \brief Set the device memory for use by this execution context.
+    //!
+    //! The memory must be aligned with CUDA memory alignment property (using cudaGetDeviceProperties()), and its size
+    //! must be large enough for performing inference with the given network inputs. getDeviceMemorySize() and
+    //! getDeviceMemorySizeForProfile() report upper bounds of the size. Setting memory to nullptr is acceptable if the
+    //! reported size is 0. If using enqueueV3() to run the network, the memory is in use from the invocation of
+    //! enqueueV3() until network execution is complete. If using executeV2(), it is in use until executeV2() returns.
+    //! Releasing or otherwise using the memory for other purposes, including using it in another execution context
+    //! running in parallel, during this time will result in undefined behavior.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.1. Superseded by setDeviceMemoryV2().
+    //!
+    //! \warning Weight streaming related scratch memory will be allocated by TensorRT if the memory is set by this API.
+    //!          Please use setDeviceMemoryV2() instead.
+    //!
+    //! \see ICudaEngine::getDeviceMemorySize()
+    //! \see ICudaEngine::getDeviceMemorySizeForProfile()
+    //! \see ExecutionContextAllocationStrategy
+    //! \see ICudaEngine::createExecutionContext()
+    //! \see ICudaEngine::createExecutionContextWithoutDeviceMemory()
+    //!
+    void setDeviceMemory(void* memory) noexcept
+    {
+        mImpl->setDeviceMemory(memory);
+    }
+
+    //!
+    //! \brief Set the device memory and its corresponding size for use by this execution context.
+    //!
+    //! The memory must be aligned with CUDA memory alignment property (using cudaGetDeviceProperties()), and its size
+    //! must be large enough for performing inference with the given network inputs. getDeviceMemorySize() and
+    //! getDeviceMemorySizeForProfile() report upper bounds of the size. Setting memory to nullptr is acceptable if the
+    //! reported size is 0. If using enqueueV3() to run the network, the memory is in use from the invocation of
+    //! enqueueV3() until network execution is complete. If using executeV2(), it is in use until executeV2() returns.
+    //! Releasing or otherwise using the memory for other purposes, including using it in another execution context
+    //! running in parallel, during this time will result in undefined behavior.
+    //!
+    //! \see ICudaEngine::getDeviceMemorySizeV2()
+    //! \see ICudaEngine::getDeviceMemorySizeForProfileV2()
+    //! \see ExecutionContextAllocationStrategy
+    //! \see ICudaEngine::createExecutionContext()
+    //! \see ICudaEngine::createExecutionContextWithoutDeviceMemory()
+    //!
+    void setDeviceMemoryV2(void* memory, int64_t size) noexcept
+    {
+        return mImpl->setDeviceMemoryV2(memory, size);
+    }
+
+    //!
+    //! \brief Return the strides of the buffer for the given tensor name.
+    //!
+    //! The strides are in units of elements, not components or bytes.
+    //! For example, for TensorFormat::kHWC8, a stride of one spans 8 scalars.
+    //!
+    //! Note that strides can be different for different execution contexts
+    //! with dynamic shapes.
+    //!
+    //! If the provided name does not map to an input or output tensor, or there are dynamic dimensions that have not
+    //! been set yet, return Dims{-1, {}}
+    //!
+    //! \param tensorName The name of an input or output tensor.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    Dims getTensorStrides(char const* tensorName) const noexcept
+    {
+        return mImpl->getTensorStrides(tensorName);
+    }
+
+public:
+    //!
+    //! \brief Get the index of the currently selected optimization profile.
+    //!
+    //! If the profile index has not been set yet (implicitly to 0 if no other execution context has been set to
+    //! profile 0, or explicitly for all subsequent contexts), an invalid value of -1 will be returned
+    //! and all calls to enqueueV3()/executeV2() will fail until a valid profile index has been set.
+    //! This behavior is deprecated in TensorRT 8.6, all profiles will default to optimization
+    //! profile 0 and -1 will no longer be returned.
+    //!
+    int32_t getOptimizationProfile() const noexcept
+    {
+        return mImpl->getOptimizationProfile();
+    }
+
+    //!
+    //! \brief Set shape of given input.
+    //!
+    //! \param tensorName The name of an input tensor.
+    //! \param dims The shape of an input tensor.
+    //!
+    //! \return True on success, false if the provided name does not map to an input tensor, or if some other error
+    //! occurred.
+    //!
+    //! Each dimension must agree with the network dimension unless the latter was -1.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    bool setInputShape(char const* tensorName, Dims const& dims) noexcept
+    {
+        return mImpl->setInputShape(tensorName, dims);
+    }
+
+    //!
+    //! \brief Return the shape of the given input or output.
+    //!
+    //! \param tensorName The name of an input or output tensor.
+    //!
+    //! Return Dims{-1, {}} if the provided name does not map to an input or output tensor.
+    //! Otherwise return the shape of the input or output tensor.
+    //!
+    //! A dimension in an input tensor will have a -1 wildcard value if all the following are true:
+    //!  * setInputShape() has not yet been called for this tensor
+    //!  * The dimension is a runtime dimension that is not implicitly constrained to be a single value.
+    //!
+    //! A dimension in an output tensor will have a -1 wildcard value if the dimension depends
+    //! on values of execution tensors OR if all the following are true:
+    //!  * It is a runtime dimension.
+    //!  * setInputShape() has NOT been called for some input tensor(s) with a runtime shape.
+    //!  * setTensorAddress() has NOT been called for some input tensor(s) with isShapeInferenceIO() = true.
+    //!
+    //! An output tensor may also have -1 wildcard dimensions if its shape depends on values of tensors supplied to
+    //! enqueueV3().
+    //!
+    //! If the request is for the shape of an output tensor with runtime dimensions,
+    //! all input tensors with isShapeInferenceIO() = true should have their value already set,
+    //! since these values might be needed to compute the output shape.
+    //!
+    //! Examples of an input dimension that is implicitly constrained to a single value:
+    //! * The optimization profile specifies equal min and max values.
+    //! * The dimension is named and only one value meets the optimization profile requirements
+    //!   for dimensions with that name.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    Dims getTensorShape(char const* tensorName) const noexcept
+    {
+        return mImpl->getTensorShape(tensorName);
+    }
+
+    //!
+    //! \brief Whether all dynamic dimensions of input tensors have been specified
+    //!
+    //! \return True if all dynamic dimensions of input tensors have been specified
+    //!         by calling setInputShape().
+    //!
+    //! Trivially true if network has no dynamically shaped input tensors.
+    //!
+    //! Does not work with name-base interfaces eg. IExecutionContext::setInputShape(). Use
+    //! IExecutionContext::inferShapes() instead.
+    //!
+    bool allInputDimensionsSpecified() const noexcept
+    {
+        return mImpl->allInputDimensionsSpecified();
+    }
+
+    //!
+    //! \brief Whether all input shape bindings have been specified
+    //!
+    //! \return True if all input shape bindings have been specified by setInputShapeBinding().
+    //!
+    //! Trivially true if network has no input shape bindings.
+    //!
+    //! Does not work with name-base interfaces eg. IExecutionContext::setInputShape(). Use
+    //! IExecutionContext::inferShapes() instead.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. setInputShapeBinding() is removed since TensorRT 10.0.
+    //!
+    TRT_DEPRECATED bool allInputShapesSpecified() const noexcept
+    {
+        return mImpl->allInputShapesSpecified();
+    }
+
+    //!
+    //! \brief Set the ErrorRecorder for this interface
+    //!
+    //! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
+    //! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
+    //! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
+    //! a recorder has been registered.
+    //!
+    //! If an error recorder is not set, messages will be sent to the global log stream.
+    //!
+    //! \param recorder The error recorder to register with this interface.
+    //!
+    //! \see getErrorRecorder()
+    //!
+    void setErrorRecorder(IErrorRecorder* recorder) noexcept
+    {
+        mImpl->setErrorRecorder(recorder);
+    }
+
+    //!
+    //! \brief Get the ErrorRecorder assigned to this interface.
+    //!
+    //! Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
+    //! an error handler has not been set.
+    //!
+    //! \return A pointer to the IErrorRecorder object that has been registered.
+    //!
+    //! \see setErrorRecorder()
+    //!
+    IErrorRecorder* getErrorRecorder() const noexcept
+    {
+        return mImpl->getErrorRecorder();
+    }
+
+    //!
+    //! \brief Synchronously execute a network.
+    //!
+    //! This method requires an array of input and output buffers. The mapping
+    //! from indices to tensor names can be queried using ICudaEngine::getIOTensorName().
+    //!
+    //! \param bindings An array of pointers to input and output buffers for the network.
+    //!
+    //! \return True if execution succeeded.
+    //!
+    //! \see ICudaEngine::getIOTensorName()
+    //!
+    bool executeV2(void* const* bindings) noexcept
+    {
+        return mImpl->executeV2(bindings);
+    }
+
+    //!
+    //! \brief Select an optimization profile for the current context with async
+    //! semantics.
+    //!
+    //! \param profileIndex Index of the profile. The value must lie between 0 and
+    //!        getEngine().getNbOptimizationProfiles() - 1
+    //!
+    //! \param stream A CUDA stream on which the cudaMemcpyAsyncs may be
+    //! enqueued
+    //!
+    //! When an optimization profile is switched via this API, TensorRT may
+    //! require that data is copied via cudaMemcpyAsync. It is the
+    //! applicationâ€™s responsibility to guarantee that synchronization between
+    //! the profile sync stream and the enqueue stream occurs.
+    //!
+    //! The selected profile will be used in subsequent calls to executeV2()/enqueueV3().
+    //! If the associated CUDA engine has inputs with dynamic shapes, the optimization profile must
+    //! be set with its corresponding profileIndex before calling execute or enqueue. The newly created execution
+    //! context will be assigned optimization profile 0.
+    //!
+    //! If the associated CUDA engine does not have inputs with dynamic shapes,
+    //! this method need not be called, in which case the default profile index
+    //! of 0 will be used.
+    //!
+    //! setOptimizationProfileAsync() must be called before calling
+    //! setInputShape() for all dynamic input
+    //! tensors or input shape tensors, which in turn must be called before
+    //! executeV2()/enqueueV3().
+    //!
+    //! \warning This function will trigger layer resource updates on the next call of
+    //!          executeV2()/enqueueV3(), possibly resulting in performance bottlenecks.
+    //!
+    //! \warning Not synchronizing the stream used at enqueue with the stream
+    //! used to set optimization profile asynchronously using this API will
+    //! result in undefined behavior.
+    //!
+    //! \return true if the call succeeded, else false (e.g. input out of range)
+    //!
+    //! \see ICudaEngine::getNbOptimizationProfiles()
+    bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
+    {
+        return mImpl->setOptimizationProfileAsync(profileIndex, stream);
+    }
+
+    //!
+    //! \brief Set whether enqueue emits layer timing to the profiler
+    //!
+    //! If set to true (default), enqueue is synchronous and does layer timing profiling implicitly if
+    //! there is a profiler attached.
+    //! If set to false, enqueue will be asynchronous if there is a profiler attached. An extra method
+    //! reportToProfiler() needs to be called to obtain the profiling data and report to the profiler attached.
+    //!
+    //! \see IExecutionContext::getEnqueueEmitsProfile()
+    //! \see IExecutionContext::reportToProfiler()
+    //!
+    void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
+    {
+        mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
+    }
+
+    //!
+    //! \brief Get the enqueueEmitsProfile state.
+    //!
+    //! \return The enqueueEmitsProfile state.
+    //!
+    //! \see IExecutionContext::setEnqueueEmitsProfile()
+    //!
+    bool getEnqueueEmitsProfile() const noexcept
+    {
+        return mImpl->getEnqueueEmitsProfile();
+    }
+
+    //!
+    //! \brief Calculate layer timing info for the current optimization profile in IExecutionContext
+    //! and update the profiler after one iteration of inference launch.
+    //!
+    //! If IExecutionContext::getEnqueueEmitsProfile() returns true, the enqueue function will calculate layer timing
+    //! implicitly if a profiler is provided. This function returns true and does nothing.
+    //!
+    //! If IExecutionContext::getEnqueueEmitsProfile() returns false, the enqueue function will record the CUDA event
+    //! timers if a profiler is provided. But it will not perform the layer timing calculation.
+    //! IExecutionContext::reportToProfiler() needs to be called explicitly to calculate layer timing for the previous
+    //! inference launch.
+    //!
+    //! In the CUDA graph launch scenario, it will record the same set of CUDA events
+    //! as in regular enqueue functions if the graph is captured from an IExecutionContext with profiler enabled.
+    //! This function needs to be called after graph launch to report the layer timing info to the profiler.
+    //!
+    //! \warning profiling CUDA graphs is only available from CUDA 11.1 onwards.
+    //! \warning reportToProfiler uses the stream of the previous enqueue call, so the stream must be live otherwise
+    //! behavior is undefined.
+    //!
+    //! \return true if the call succeeded, else false (e.g. profiler not provided, in CUDA graph capture mode, etc.)
+    //!
+    //! \see IExecutionContext::setEnqueueEmitsProfile()
+    //! \see IExecutionContext::getEnqueueEmitsProfile()
+    //!
+    bool reportToProfiler() const noexcept
+    {
+        return mImpl->reportToProfiler();
+    }
+
+    //!
+    //! \brief Set memory address for given input or output tensor.
+    //!
+    //! \param tensorName The name of an input or output tensor.
+    //! \param data The pointer (void*) to the data owned by the user.
+    //!
+    //! \return True on success, false if error occurred.
+    //!
+    //! An address defaults to nullptr.
+    //! Pass data=nullptr to reset to the default state.
+    //!
+    //! Return false if the provided name does not map to an input or output tensor.
+    //!
+    //! If an input pointer has type (void const*), use setInputTensorAddress() instead.
+    //!
+    //! Before calling enqueueV3(), each input must have a non-null address and
+    //! each output must have a non-null address or an IOutputAllocator to set it later.
+    //!
+    //! If the TensorLocation of the tensor is kHOST:
+    //! - The pointer must point to a host buffer of sufficient size.
+    //! - Data representing shape values is not copied until enqueueV3 is invoked.
+    //!
+    //! If the TensorLocation of the tensor is kDEVICE:
+    //! - The pointer must point to a device buffer of sufficient size and alignment, or
+    //! - Be nullptr if the tensor is an output tensor that will be allocated by IOutputAllocator.
+    //!
+    //! If getTensorShape(name) reports a -1 for any dimension of an output after all
+    //! input shapes have been set, use setOutputAllocator() to associate an IOutputAllocator
+    //! to which the dimensions will be reported when known.
+    //!
+    //! Calling both setTensorAddress and setOutputAllocator() for the same output is allowed,
+    //! and can be useful for preallocating memory, and then reallocating if it's not big enough.
+    //!
+    //! The pointer must have at least 256-byte alignment.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \see setInputTensorAddress() setOutputTensorAddress() getTensorShape() setOutputAllocator() IOutputAllocator
+    //!
+    bool setTensorAddress(char const* tensorName, void* data) noexcept
+    {
+        return mImpl->setTensorAddress(tensorName, data);
+    }
+
+    //!
+    //! \brief Get memory address bound to given input or output tensor, or nullptr if the provided name does not map to
+    //! an input or output tensor.
+    //!
+    //! \param tensorName The name of an input or output tensor.
+    //!
+    //! Use method getOutputTensorAddress() if a non-const pointer for an output tensor is required.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \see getOutputTensorAddress()
+    //!
+    void const* getTensorAddress(char const* tensorName) const noexcept
+    {
+        return mImpl->getTensorAddress(tensorName);
+    }
+
+    //!
+    //! \brief Set the memory address for a given output tensor.
+    //!
+    //! \param tensorName The name of an output tensor.
+    //! \param data The pointer to the buffer to which to write the output.
+    //!
+    //! \return True on success, false if the provided name does not map to an output tensor, does not meet alignment
+    //! requirements, or some other error occurred.
+    //!
+    //! Output addresses can also be set using method setTensorAddress. This method is provided for applications which
+    //! prefer to use different methods for setting input and output tensors.
+    //!
+    //! See setTensorAddress() for alignment and data type constraints.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \see setTensorAddress()
+    //!
+    bool setOutputTensorAddress(char const* tensorName, void* data) noexcept
+    {
+        return mImpl->setOutputTensorAddress(tensorName, data);
+    }
+
+    //!
+    //! \brief Set memory address for given input.
+    //!
+    //! \param tensorName The name of an input tensor.
+    //! \param data The pointer (void const*) to the const data owned by the user.
+    //!
+    //! \return True on success, false if the provided name does not map to an input tensor, does not meet alignment
+    //! requirements, or some other error occurred.
+    //!
+    //! Input addresses can also be set using method setTensorAddress, which requires a (void*).
+    //!
+    //! See description of method setTensorAddress() for alignment and data type constraints.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \see setTensorAddress()
+    //!
+    bool setInputTensorAddress(char const* tensorName, void const* data) noexcept
+    {
+        return mImpl->setInputTensorAddress(tensorName, data);
+    }
+
+    //!
+    //! \brief Get memory address for given output.
+    //!
+    //! \param tensorName The name of an output tensor.
+    //!
+    //! \return Raw output data pointer (void*) for given output tensor, or nullptr if the provided name does not map to
+    //! an output tensor.
+    //!
+    //! If only a (void const*) pointer is needed, an alternative is to call method getTensorAddress().
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \see getTensorAddress()
+    //!
+    void* getOutputTensorAddress(char const* tensorName) const noexcept
+    {
+        return mImpl->getOutputTensorAddress(tensorName);
+    }
+
+    //!
+    //! \brief Run shape calculations.
+    //!
+    //! \param nbMaxNames Maximum number of names to write to tensorNames.
+    //!        When the return value is a positive value n and tensorNames != nullptr,
+    //!        the names of min(n,nbMaxNames) insufficiently specified input tensors are
+    //!        written to tensorNames.
+    //!
+    //! \param tensorNames Buffer in which to place names of insufficiently specified input tensors.
+    //!
+    //! \return 0 on success.
+    //!         Positive value n if n input tensors were not sufficiently specified.
+    //!         -1 for other errors.
+    //!
+    //! An input tensor is insufficiently specified if either of the following is true:
+    //!
+    //! * It has dynamic dimensions and its runtime dimensions have not yet
+    //!   been specified via IExecutionContext::setInputShape.
+    //!
+    //! * isShapeInferenceIO(t)=true and the tensor's address has not yet been set.
+    //!
+    //! If an output tensor has isShapeInferenceIO(t)=true and its address has been specified,
+    //! then its value is written.
+    //!
+    //! Returns -1 if tensorNames == nullptr and nbMaxNames != 0.
+    //! Returns -1 if nbMaxNames < 0.
+    //! Returns -1 if a tensor's dimensions are invalid, e.g. a tensor ends up with a negative dimension.
+    //!
+    int32_t inferShapes(int32_t nbMaxNames, char const** tensorNames) noexcept
+    {
+        return mImpl->inferShapes(nbMaxNames, tensorNames);
+    }
+
+    //!
+    //! \brief Recompute the internal activation buffer sizes based on the current input shapes, and return the total
+    //! amount of memory required.
+    //!
+    //! Users can allocate the device memory based on the size returned and provided the memory to TRT with
+    //! IExecutionContext::setDeviceMemory(). Must specify all input shapes and the optimization profile to use before
+    //! calling this function, otherwise the partition will be invalidated.
+    //!
+    //! \return Total amount of memory required on success, 0 if error occurred.
+    //!
+    //! \see IExecutionContext::setDeviceMemory()
+    //!
+    size_t updateDeviceMemorySizeForShapes() noexcept
+    {
+        return mImpl->updateDeviceMemorySizeForShapes();
+    }
+
+    //!
+    //! \brief Mark input as consumed.
+    //!
+    //! \param event The CUDA event that is triggered after all input tensors have been consumed.
+    //!
+    //! \warning The set event must be valid during the inference.
+    //!
+    //! \return True on success, false if error occurred.
+    //!
+    //! Passing event==nullptr removes whatever event was set, if any.
+    //!
+    bool setInputConsumedEvent(cudaEvent_t event) noexcept
+    {
+        return mImpl->setInputConsumedEvent(event);
+    }
+
+    //!
+    //! \brief The event associated with consuming the input.
+    //!
+    //! \return The CUDA event. Nullptr will be returned if the event is not set yet.
+    //!
+    cudaEvent_t getInputConsumedEvent() const noexcept
+    {
+        return mImpl->getInputConsumedEvent();
+    }
+
+    //!
+    //! \brief Set output allocator to use for output tensor of given name.
+    //! Pass nullptr to outputAllocator to unset.
+    //! The allocator is called by enqueueV3().
+    //!
+    //! \param tensorName The name of an output tensor.
+    //! \param outputAllocator IOutputAllocator for the tensors.
+    //!
+    //! \return True if success, false if the provided name does not map to an output or, if some other error occurred.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \see enqueueV3() IOutputAllocator
+    //!
+    bool setOutputAllocator(char const* tensorName, IOutputAllocator* outputAllocator) noexcept
+    {
+        return mImpl->setOutputAllocator(tensorName, outputAllocator);
+    }
+
+    //!
+    //! \brief Get output allocator associated with output tensor of given name, or nullptr if the provided name does
+    //! not map to an output tensor.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    //! \see IOutputAllocator
+    //!
+    IOutputAllocator* getOutputAllocator(char const* tensorName) const noexcept
+    {
+        return mImpl->getOutputAllocator(tensorName);
+    }
+
+    //!
+    //! \brief Get upper bound on an output tensor's size, in bytes, based on
+    //! the current optimization profile and input dimensions.
+    //!
+    //! If the profile or input dimensions are not yet set, or the provided name
+    //! does not map to an output, returns -1.
+    //!
+    //! \param tensorName The name of an output tensor.
+    //!
+    //! \return Upper bound in bytes.
+    //!
+    //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.
+    //!
+    int64_t getMaxOutputSize(char const* tensorName) const noexcept
+    {
+        return mImpl->getMaxOutputSize(tensorName);
+    }
+
+    //!
+    //! \brief Specify allocator to use for internal temporary storage.
+    //!
+    //! This allocator is used only by enqueueV3() for temporary storage whose size cannot be
+    //! predicted ahead of enqueueV3(). It is not used for output tensors, because memory
+    //! allocation for those is allocated by the allocator set by setOutputAllocator().
+    //! All memory allocated is freed by the time enqueueV3() returns.
+    //!
+    //! \param allocator pointer to allocator to use. Pass nullptr to revert to using TensorRT's
+    //!        default allocator.
+    //!
+    //! \return True on success, false if error occurred.
+    //!
+    //! \see enqueueV3() setOutputAllocator()
+    //!
+    bool setTemporaryStorageAllocator(IGpuAllocator* allocator) noexcept
+    {
+        return mImpl->setTemporaryStorageAllocator(allocator);
+    }
+
+    //!
+    //! \brief Get allocator set by setTemporaryStorageAllocator.
+    //!
+    //! Returns a nullptr if a nullptr was passed with setTemporaryStorageAllocator().
+    //!
+    IGpuAllocator* getTemporaryStorageAllocator() const noexcept
+    {
+        return mImpl->getTemporaryStorageAllocator();
+    }
+
+    //!
+    //! \brief Enqueue inference on a stream.
+    //!
+    //! \param stream A CUDA stream on which the inference kernels will be enqueued.
+    //!
+    //! \return True if the kernels were enqueued successfully, false otherwise.
+    //!
+    //! Modifying or releasing memory that has been registered for the tensors before stream
+    //! synchronization or the event passed to setInputConsumedEvent has been being triggered results in undefined
+    //! behavior.
+    //! Input tensor can be released after the setInputConsumedEvent whereas output tensors require stream
+    //! synchronization.
+    //!
+    //! \warning Using default stream may lead to performance issues due to additional cudaDeviceSynchronize() calls by
+    //!          TensorRT to ensure correct synchronizations. Please use non-default stream instead.
+    //!
+    //! \warning If the Engine is streaming weights, enqueueV3 will become synchronous, and
+    //!          the graph will not be capturable.
+    //!
+    bool enqueueV3(cudaStream_t stream) noexcept
+    {
+        return mImpl->enqueueV3(stream);
+    }
+
+    //!
+    //! \brief Set the maximum size for persistent cache usage.
+    //!
+    //! This function sets the maximum persistent L2 cache that this execution context may use for activation caching.
+    //! Activation caching is not supported on all architectures - see "How TensorRT uses Memory" in the developer guide
+    //! for details
+    //!
+    //! \param size the size of persistent cache limitation in bytes.
+    //! The default is 0 Bytes.
+    //!
+    //! \see getPersistentCacheLimit
+    void setPersistentCacheLimit(size_t size) noexcept
+    {
+        mImpl->setPersistentCacheLimit(size);
+    }
+
+    //!
+    //! \brief Get the maximum size for persistent cache usage.
+    //!
+    //! \returns The size of the persistent cache limit
+    //!
+    //! \see setPersistentCacheLimit
+    size_t getPersistentCacheLimit() const noexcept
+    {
+        return mImpl->getPersistentCacheLimit();
+    }
+
+    //!
+    //! \brief Set the verbosity of the NVTX markers in the execution context.
+    //!
+    //! Building with kDETAILED verbosity will generally increase latency in enqueueV3(). Call this method
+    //! to select NVTX verbosity in this execution context at runtime.
+    //!
+    //! The default is the verbosity with which the engine was built, and the verbosity may not be raised above that
+    //! level.
+    //!
+    //! This function does not affect how IEngineInspector interacts with the engine.
+    //!
+    //! \param verbosity The verbosity of the NVTX markers.
+    //!
+    //! \return True if the NVTX verbosity is set successfully. False if the provided verbosity level is higher than the
+    //! profiling verbosity of the corresponding engine.
+    //!
+    //! \see getNvtxVerbosity()
+    //! \see ICudaEngine::getProfilingVerbosity()
+    //!
+    bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
+    {
+        return mImpl->setNvtxVerbosity(verbosity);
+    }
+
+    //!
+    //! \brief Get the NVTX verbosity of the execution context.
+    //!
+    //! \return The current NVTX verbosity of the execution context.
+    //!
+    //! \see setNvtxVerbosity()
+    //!
+    ProfilingVerbosity getNvtxVerbosity() const noexcept
+    {
+        return mImpl->getNvtxVerbosity();
+    }
+
+    //!
+    //! \brief Set the auxiliary streams that TensorRT should launch kernels on in the next enqueueV3() call.
+    //!
+    //! If set, TensorRT will launch the kernels that are supposed to run on the auxiliary streams using the streams
+    //! provided by the user with this API. If this API is not called before the enqueueV3() call, then TensorRT will
+    //! use the auxiliary streams created by TensorRT internally.
+    //!
+    //! TensorRT will always insert event synchronizations between the main stream provided via enqueueV3() call and the
+    //! auxiliary streams:
+    //!  - At the beginning of the enqueueV3() call, TensorRT will make sure that all the auxiliary streams wait on
+    //!    the activities on the main stream.
+    //!  - At the end of the enqueueV3() call, TensorRT will make sure that the main stream wait on the activities on
+    //!    all the auxiliary streams.
+    //!
+    //! \param auxStreams The pointer to an array of cudaStream_t with the array length equal to nbStreams.
+    //! \param nbStreams The number of auxiliary streams provided. If nbStreams is greater than
+    //!        `engine->getNbAuxStreams()`, then only the first `engine->getNbAuxStreams()` streams will be used. If
+    //!        `nbStreams` is less than `engine->getNbAuxStreams()`, such as setting `nbStreams` to 0, then TensorRT
+    //!        will use the provided streams for the first `nbStreams` auxiliary streams, and will create additional
+    //!        streams internally for the rest of the auxiliary streams.
+    //!
+    //! \note The provided auxiliary streams must not be the default stream and must all be different to avoid
+    //!       deadlocks.
+    //!
+    //! \see enqueueV3(), IBuilderConfig::setMaxAuxStreams(), ICudaEngine::getNbAuxStreams()
+    //!
+    void setAuxStreams(cudaStream_t* auxStreams, int32_t nbStreams) noexcept
+    {
+        mImpl->setAuxStreams(auxStreams, nbStreams);
+    }
+
+    //!
+    //! \brief Set DebugListener for this execution context.
+    //!
+    //! \param listener DebugListener for this execution context.
+    //!
+    //! \return true if succeed, false if failure.
+    //!
+    bool setDebugListener(IDebugListener* listener) noexcept
+    {
+        return mImpl->setDebugListener(listener);
+    }
+
+    //!
+    //! \brief Get the DebugListener of this execution context.
+    //!
+    //! \return DebugListener of this execution context.
+    //!
+    IDebugListener* getDebugListener() noexcept
+    {
+        return mImpl->getDebugListener();
+    }
+
+    //!
+    //! \brief Set debug state of tensor given the tensor name.
+    //!
+    //! Turn the debug state of a tensor on or off.
+    //! A tensor with the parameter tensor name must exist in the network, and the tensor must have
+    //! been marked as a debug tensor during build time. Otherwise, an error is thrown.
+    //!
+    //! \param name Name of target tensor.
+    //!
+    //! \param flag True if turning on debug state, false if turning off debug state of tensor
+    //! The default is off.
+    //!
+    //! \return True if successful, false otherwise.
+    //!
+    bool setTensorDebugState(char const* name, bool flag) noexcept
+    {
+        return mImpl->setTensorDebugState(name, flag);
+    }
+
+    //!
+    //! \brief Get the debug state.
+    //!
+    //! \param name Name of target tensor.
+    //!
+    //! \return true if there is a debug tensor with the given name and it has debug state turned on.
+    //!
+    bool getDebugState(char const* name) const noexcept
+    {
+        return mImpl->getDebugState(name);
+    }
+
+    //!
+    //! \brief Get the runtime config object used during execution context creation.
+    //!
+    //! \return The runtime config object.
+    //!
+    IRuntimeConfig* getRuntimeConfig() const noexcept
+    {
+        return mImpl->getRuntimeConfig();
+    }
+
+    //! \brief Turn the debug state of all debug tensors on or off.
+    //!
+    //! \param flag true if turning on debug state, false if turning off debug state.
+    //!
+    //! \return true if successful, false otherwise.
+    //!
+    //! The default is off.
+    //!
+    bool setAllTensorsDebugState(bool flag) noexcept
+    {
+        return mImpl->setAllTensorsDebugState(flag);
+    }
+
+    //!
+    //! \brief Turn the debug state of unfused tensors on or off.
+    //!
+    //! The default is off.
+    //!
+    //! \param flag true if turning on debug state, false if turning off debug state.
+    //!
+    //! \return true if successful, false otherwise.
+    //!
+    //! \see INetworkDefinition::markUnfusedTensorsAsDebugTensors()
+    //!
+    bool setUnfusedTensorsDebugState(bool flag) noexcept
+    {
+        return mImpl->setUnfusedTensorsDebugState(flag);
+    }
+
+    //!
+    //! \brief Get the debug state of unfused tensors.
+    //!
+    //! \return true if unfused tensors debug state is on. False if unfused tensors debug state is off.
+    //!
+    bool getUnfusedTensorsDebugState() const noexcept
+    {
+        return mImpl->getUnfusedTensorsDebugState();
+    }
+
+protected:
+    apiv::VExecutionContext* mImpl;
+}; // class IExecutionContext
+
+//!
+//! \enum LayerInformationFormat
+//!
+//! \brief The format in which the IEngineInspector prints the layer information.
+//!
+//! \see IEngineInspector::getLayerInformation(), IEngineInspector::getEngineInformation()
+//!
+enum class LayerInformationFormat : int32_t
+{
+    kONELINE = 0, //!< Print layer information in one line per layer.
+    kJSON = 1,    //!< Print layer information in JSON format.
+};
+
+//! Maximum number of layer information formats in LayerInformationFormat enum.
+//! \see LayerInformationFormat
+template <>
+constexpr inline int32_t EnumMax<LayerInformationFormat>() noexcept
+{
+    return 2;
+}
+
+//!
+//! \class IEngineInspector
+//!
+//! \brief An engine inspector which prints out the layer information of an engine or an execution context.
+//!
+//! The amount of printed information depends on the profiling verbosity setting of the builder config when the engine
+//! is built:
+//! - ProfilingVerbosity::kLAYER_NAMES_ONLY: only layer names will be printed.
+//! - ProfilingVerbosity::kNONE: no layer information will be printed.
+//! - ProfilingVerbosity::kDETAILED: layer names and layer parameters will be printed.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+//! \see ProfilingVerbosity, IEngineInspector
+//!
+class IEngineInspector : public INoCopy
+{
+public:
+    virtual ~IEngineInspector() noexcept = default;
+
+    //!
+    //! \brief Set an execution context as the inspection source.
+    //!
+    //! Setting the execution context and specifying all the input shapes allows the inspector
+    //! to calculate concrete dimensions for any dynamic shapes and display their format information.
+    //! Otherwise, values dependent on input shapes will be displayed as -1 and format information
+    //! will not be shown.
+    //!
+    //! Passing nullptr will remove any association with an execution context.
+    //!
+    //! \return Whether the action succeeds.
+    //!
+    bool setExecutionContext(IExecutionContext const* context) noexcept
+    {
+        return mImpl->setExecutionContext(context);
+    }
+
+    //!
+    //! \brief Get the context currently being inspected.
+    //!
+    //! \return The pointer to the context currently being inspected.
+    //!
+    //! \see setExecutionContext()
+    //!
+    IExecutionContext const* getExecutionContext() const noexcept
+    {
+        return mImpl->getExecutionContext();
+    }
+
+    //!
+    //! \brief Get a string describing the information about a specific layer in the current engine or the execution
+    //!        context.
+    //!
+    //! \param layerIndex the index of the layer. It must lie in range [0, engine.getNbLayers()).
+    //!
+    //! \param format the format the layer information should be printed in.
+    //!
+    //! \return A null-terminated C-style string describing the information about a specific layer in the current
+    //!         engine or the execution context.
+    //!
+    //! \warning The content of the returned string may change when another execution context has
+    //!          been set, or when another getLayerInformation() or getEngineInformation() has been called.
+    //!
+    //! \warning In a multi-threaded environment, this function must be protected from other threads changing the
+    //!          inspection source. If the inspection source changes, the data that is being pointed to can change.
+    //!          Copy the string to another buffer before releasing the lock in order to guarantee consistency.
+    //!
+    //! \see LayerInformationFormat
+    //!
+    char const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
+    {
+        return mImpl->getLayerInformation(layerIndex, format);
+    }
+
+    //!
+    //! \brief Get a string describing the information about all the layers in the current engine or the execution
+    //!        context.
+    //!
+    //! \param format the format the layer information should be printed in.
+    //!
+    //! \return A null-terminated C-style string describing the information about all the layers in the current
+    //!         engine or the execution context.
+    //!
+    //! \warning The content of the returned string may change when another execution context has
+    //!          been set, or when another getLayerInformation() or getEngineInformation() has been called.
+    //!
+    //! \warning In a multi-threaded environment, this function must be protected from other threads changing the
+    //!          inspection source. If the inspection source changes, the data that is being pointed to can change.
+    //!          Copy the string to another buffer before releasing the lock in order to guarantee consistency.
+    //!
+    //! \see LayerInformationFormat
+    //!
+    char const* getEngineInformation(LayerInformationFormat format) const noexcept
+    {
+        return mImpl->getEngineInformation(format);
+    }
+
+    //!
+    //! \brief Set the ErrorRecorder for this interface
+    //!
+    //! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
+    //! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
+    //! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
+    //! a recorder has been registered.
+    //!
+    //! If an error recorder is not set, messages will be sent to the global log stream.
+    //!
+    //! \param recorder The error recorder to register with this interface.
+    //!
+    //! \see getErrorRecorder()
+    //!
+    void setErrorRecorder(IErrorRecorder* recorder) noexcept
+    {
+        mImpl->setErrorRecorder(recorder);
+    }
+
+    //!
+    //! \brief Get the ErrorRecorder assigned to this interface.
+    //!
+    //! Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
+    //! an error handler has not been set.
+    //!
+    //! \return A pointer to the IErrorRecorder object that has been registered.
+    //!
+    //! \see setErrorRecorder()
+    //!
+    IErrorRecorder* getErrorRecorder() const noexcept
+    {
+        return mImpl->getErrorRecorder();
+    }
+
+protected:
+    apiv::VEngineInspector* mImpl;
+}; // class IEngineInspector
+
+} // namespace nvinfer1
+
+//!
+//! Internal C entry point for creating IRuntime.
+//! @private
+//!
+extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept;
+
+//!
+//! Internal C entry point for creating IRefitter.
+//! @private
+//!
+extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept;
+
+//!
+//! \brief Return the plugin registry
+//!
+extern "C" TENSORRTAPI nvinfer1::IPluginRegistry* getPluginRegistry() noexcept;
+
+//!
+//! \brief Return the logger object.
+//! \note the global logger is used only by standalone functions which have no associated builder, runtime
+//! or refitter.
+//!
+extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger() noexcept;
+
+namespace nvinfer1
+{
+namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this
+          // header.
+{
+//!
+//! \brief Create an instance of an IRuntime class.
+//!
+//! \param logger The logging class for the runtime.
+//!
+inline IRuntime* createInferRuntime(ILogger& logger) noexcept
+{
+    return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
+}
+
+//!
+//! \brief Create an instance of an IRefitter class.
+//!
+//! \param engine The engine class for the refitter.
+//! \param logger The logging class for the refitter.
+//!
+inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger) noexcept
+{
+    return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
+}
+
+} // namespace
+
+//!
+//! \brief Register the plugin creator to the registry
+//! The static registry object will be instantiated when the plugin library is
+//! loaded. This static object will register all creators available in the
+//! library to the registry.
+//!
+//! \warning Statically registering plugins should be avoided in the automotive
+//!  safety context as the application developer should first register an error recorder
+//!  with the plugin registry via IPluginRegistry::setErrorRecorder() before using
+//!  IPluginRegistry::registerCreator() or other methods.
+//!
+template <typename T>
+class PluginRegistrar
+{
+public:
+    PluginRegistrar()
+    {
+        getPluginRegistry()->registerCreator(instance, "");
+    }
+
+private:
+    //! Plugin instance.
+    T instance{};
+};
+
+} // namespace nvinfer1
+
+#define REGISTER_TENSORRT_PLUGIN(name)                                                                                 \
+    static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
+
+namespace nvinfer1
+{
+//!
+//! \class ILoggerFinder
+//!
+//! \brief A virtual base class to find a logger.
+//! Allows a plugin to find an instance of a logger if it needs to emit a log message.
+//! A pointer to an instance of this class is passed to a plugin shared library on initialization when that plugin
+//! is serialized as part of a version-compatible plan. See the plugin chapter in the developer guide for details.
+//!
+class ILoggerFinder
+{
+public:
+    //!
+    //! \brief Get the logger used by the engine or execution context which called the plugin method.
+    //!
+    //! \warning Must be called from the thread in which the plugin method was called.
+    //!
+    //! \return A pointer to the logger.
+    //!
+    virtual ILogger* findLogger() = 0;
+
+protected:
+    virtual ~ILoggerFinder() = default;
+};
+
+//! DO NOT REFER TO namespace v_1_0 IN CODE. ALWAYS USE nvinfer1 INSTEAD.
+//! The name v_1_0 may change in future versions of TensorRT.
+namespace v_1_0
+{
+
+class IGpuAsyncAllocator : public IGpuAllocator
+{
+public:
+    IGpuAsyncAllocator() = default;
+    ~IGpuAsyncAllocator() override = default;
+
+    //!
+    //! \brief A thread-safe callback implemented by the application to handle stream-ordered asynchronous
+    //!        acquisition of GPU memory.
+    //!
+    //! \param size The size of the memory block required (in bytes).
+    //! \param alignment The required alignment of memory. Alignment will be zero
+    //!        or a power of 2 not exceeding the alignment guaranteed by cudaMalloc.
+    //!        Thus this allocator can be safely implemented with cudaMalloc/cudaFree.
+    //!        An alignment value of zero indicates any alignment is acceptable.
+    //! \param flags Reserved for future use. In the current release, 0 will be passed.
+    //!
+    //! \param stream Specifies the cudastream for the asynchronous allocation. If nullptr or 0 is
+    //!        passed, the default stream will be used.
+    //!
+    //! \return If the allocation was successful, the start address of a device memory block of the requested size.
+    //!         If an allocation request of size 0 is made, nullptr must be returned.
+    //!         If an allocation request cannot be satisfied, nullptr must be returned.
+    //!         If a non-null address is returned, it is guaranteed to have the specified alignment.
+    //!
+    //! \note The implementation must guarantee thread safety for concurrent allocateAsync/deallocateAsync
+    //! requests.
+    //!
+    //! \note The implementation is not required to be asynchronous. It is permitted to synchronize,
+    //! albeit doing so will lose the performance advantage of asynchronous allocation.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
+    //!
+    void* allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags,
+        cudaStream_t /*stream*/) noexcept override = 0;
+
+    //!
+    //! \brief A thread-safe callback implemented by the application to handle stream-ordered asynchronous
+    //! release of GPU memory.
+    //!
+    //! TensorRT may pass a nullptr to this function if it was previously returned by allocate().
+    //!
+    //! \param memory A memory address that was previously returned by an allocate() or reallocate() call of the same
+    //! allocator object.
+    //!
+    //! \param stream Specifies the cudastream for the asynchronous deallocation. If nullptr or 0 is
+    //!        passed, the default stream will be used.
+    //!
+    //! \return True if the acquired memory is released successfully.
+    //!
+    //! \note The implementation must guarantee thread safety for concurrent allocateAsync/deallocateAsync
+    //! requests.
+    //!
+    //! \note The implementation is not required to be asynchronous. It is permitted to synchronize,
+    //! albeit doing so will lose the performance advantage of asynchronous deallocation.
+    //! Either way, it is critical that it not actually free the memory until the current
+    //! stream position is reached.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
+    bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept override = 0;
+
+    //!
+    //! \brief A thread-safe callback implemented by the application to handle acquisition of GPU memory.
+    //!
+    //! \param size The size of the memory block required (in bytes).
+    //! \param alignment The required alignment of memory. Alignment will be zero
+    //!        or a power of 2 not exceeding the alignment guaranteed by cudaMalloc.
+    //!        Thus this allocator can be safely implemented with cudaMalloc/cudaFree.
+    //!        An alignment value of zero indicates any alignment is acceptable.
+    //! \param flags Reserved for future use. In the current release, 0 will be passed.
+    //!
+    //! \return If the allocation was successful, the start address of a device memory block of the requested size.
+    //!         If an allocation request of size 0 is made, nullptr must be returned.
+    //!         If an allocation request cannot be satisfied, nullptr must be returned.
+    //!         If a non-null address is returned, it is guaranteed to have the specified alignment.
+    //!
+    //! \note The implementation must guarantee thread safety for concurrent allocateAsync/deallocateAsync/reallocate
+    //! requests.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
+    //! \deprecated Deprecated in TensorRT 10.0. Superseded by allocateAsync
+    //!
+    TRT_DEPRECATED void* allocate(
+        uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
+    {
+        return allocateAsync(size, alignment, flags, nullptr);
+    }
+
+    //!
+    //! \brief A thread-safe callback implemented by the application to handle release of GPU memory.
+    //!
+    //! TensorRT may pass a nullptr to this function if it was previously returned by allocate().
+    //!
+    //! \param memory A memory address that was previously returned by an allocate() or reallocate() call of the same
+    //! allocator object.
+    //!
+    //! \return True if the acquired memory is released successfully.
+    //!
+    //! \note The implementation must guarantee thread safety for concurrent allocate/reallocate/deallocate
+    //! requests.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads.
+    //! \deprecated Deprecated in TensorRT 10.0. Superseded by deallocateAsync
+    //!
+    TRT_DEPRECATED bool deallocate(void* const memory) noexcept override
+    {
+        return deallocateAsync(memory, nullptr);
+    }
+
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return {"IGpuAllocator", 1, 0};
+    }
+};
+
+class IPluginCreatorV3One : public IPluginCreatorInterface
+{
+public:
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"PLUGIN CREATOR_V3ONE", 1, 0};
+    }
+
+    //!
+    //! \brief Return a plugin object. Return nullptr in case of error.
+    //!
+    //! \param name A NULL-terminated name string of length 1024 or less, including the NULL terminator.
+    //! \param fc A pointer to a collection of fields needed for constructing the plugin.
+    //! \param phase The TensorRT phase in which the plugin is being created
+    //!
+    //! When the phase is TensorRTPhase::kRUNTIME, the PluginFieldCollection provided for serialization by the plugin's
+    //! runtime interface will be passed as fc.
+    //!
+    //! \note The returned plugin object must be in an initialized state
+    //!
+    //! \note If invoked by the user (e.g. with TensorRTPhase::kBUILD, to add to the network defintion with
+    //! addPluginV3()), it is the user's responsibility to delete the plugin object. If invoked by TensorRT (e.g. during
+    //! engine deserialization), TensorRT will delete any objects it creates.
+    //!
+    virtual IPluginV3* createPlugin(
+        AsciiChar const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept = 0;
+
+    //!
+    //! \brief Return a list of fields that need to be passed to createPlugin() when creating a plugin for use in the
+    //! TensorRT build phase.
+    //!
+    //! \see PluginFieldCollection
+    //!
+    virtual PluginFieldCollection const* getFieldNames() noexcept = 0;
+
+    //!
+    //! \brief Return the plugin name.
+    //!
+    //! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including
+    //! the NULL terminator.
+    //!
+    virtual AsciiChar const* getPluginName() const noexcept = 0;
+
+    //!
+    //! \brief Return the plugin version.
+    //!
+    //! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including
+    //! the NULL terminator.
+    //!
+    virtual AsciiChar const* getPluginVersion() const noexcept = 0;
+
+    //!
+    //! \brief Return the plugin namespace.
+    //!
+    //! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including
+    //! the NULL terminator.
+    //!
+    virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
+
+    IPluginCreatorV3One() = default;
+    virtual ~IPluginCreatorV3One() = default;
+
+protected:
+    IPluginCreatorV3One(IPluginCreatorV3One const&) = default;
+    IPluginCreatorV3One(IPluginCreatorV3One&&) = default;
+    IPluginCreatorV3One& operator=(IPluginCreatorV3One const&) & = default;
+    IPluginCreatorV3One& operator=(IPluginCreatorV3One&&) & = default;
+};
+
+} // namespace v_1_0
+
+//!
+//! \class IGpuAsyncAllocator
+//!
+//! \brief Application-implemented class for controlling asynchronous (stream ordered) memory allocation on the GPU.
+//!
+//! \warning The lifetime of an IGpuAsyncAllocator object must exceed that of all objects that use it.
+//!
+//! The advantage of deriving from IGpuAsyncAllocator instead of IGpuAllocator is that you only have
+//! to override two methods: allocateAsync() and deallocateAsync() to implement an allocator with
+//! asynchronous capability, whereas deriving from IGpuAllocator requires overriding four methods,
+//! including two deprecated methods.
+//!
+//! \see IGpuAllocator
+using IGpuAsyncAllocator = v_1_0::IGpuAsyncAllocator;
+
+//!
+//! \class IPluginCreatorV3One
+//!
+//! \brief A plugin creator class capable of producing IPluginV3 objects
+//!
+//! \see IPluginV3
+//! \see IPluginRegistry
+//!
+using IPluginCreatorV3One = v_1_0::IPluginCreatorV3One;
+
+} // namespace nvinfer1
+
+//!
+//! \brief Return the library major version number.
+//!
+extern "C" TENSORRTAPI int32_t getInferLibMajorVersion() noexcept;
+//!
+//! \brief Return the library minor version number.
+//!
+extern "C" TENSORRTAPI int32_t getInferLibMinorVersion() noexcept;
+//!
+//! \brief Return the library patch version number.
+//!
+extern "C" TENSORRTAPI int32_t getInferLibPatchVersion() noexcept;
+//!
+//! \brief Return the library build version number.
+//!
+extern "C" TENSORRTAPI int32_t getInferLibBuildVersion() noexcept;
+
+#endif // NV_INFER_RUNTIME_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferRuntimeBase.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferRuntimeBase.h
new file mode 100644
index 0000000000000000000000000000000000000000..bd021865dff470fafcc1f493b0330435675aad4a
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferRuntimeBase.h
@@ -0,0 +1,686 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NV_INFER_RUNTIME_BASE_H
+#define NV_INFER_RUNTIME_BASE_H
+
+#include "NvInferVersion.h"
+#include <cstddef>
+#include <cstdint>
+#include <cuda_runtime_api.h>
+
+// Items that are marked as deprecated will be removed in a future release.
+#if __cplusplus >= 201402L
+#define TRT_DEPRECATED [[deprecated]]
+#define TRT_DEPRECATED_BECAUSE(REASON) [[deprecated(REASON)]]
+#define TRT_DEPRECATED_ENUM TRT_DEPRECATED
+#ifdef _MSC_VER
+#define TRT_DEPRECATED_API __declspec(dllexport)
+#else
+#define TRT_DEPRECATED_API [[deprecated]] __attribute__((visibility("default")))
+#endif
+#else
+#ifdef _MSC_VER
+#define TRT_DEPRECATED
+#define TRT_DEPRECATED_ENUM
+#define TRT_DEPRECATED_API __declspec(dllexport)
+#else
+#define TRT_DEPRECATED __attribute__((deprecated))
+#define TRT_DEPRECATED_ENUM
+#define TRT_DEPRECATED_API __attribute__((deprecated, visibility("default")))
+#endif
+#define TRT_DEPRECATED_BECAUSE(REASON) TRT_DEPRECATED
+#endif
+
+//! A stand-in for `[[nodiscard]]` and `[[nodiscard(REASON)]]` that works with older compilers.
+#if __cplusplus >= 201907L
+#define TRT_NODISCARD [[nodiscard]]
+#define TRT_NODISCARD_BECAUSE(REASON) [[nodiscard(REASON)]]
+#elif __cplusplus >= 201603L
+#define TRT_NODISCARD [[nodiscard]]
+#define TRT_NODISCARD_BECAUSE(REASON) [[nodiscard]]
+#else
+#define TRT_NODISCARD
+#define TRT_NODISCARD_BECAUSE(REASON)
+#endif
+
+// Defines which symbols are exported
+#ifdef TENSORRT_BUILD_LIB
+#ifdef _MSC_VER
+#define TENSORRTAPI __declspec(dllexport)
+#else
+#define TENSORRTAPI __attribute__((visibility("default")))
+#endif
+#else
+#define TENSORRTAPI
+#endif
+#define TRTNOEXCEPT
+//!
+//! \file NvInferRuntimeBase.h
+//!
+//! This file contains common definitions, data structures and interfaces shared between the standard and safe runtime.
+//!
+//! \warning Do not directly include this file. Instead include one of:
+//! * NvInferRuntime.h (for the standard runtime)
+//! * NvInferPluginUtils.h (for plugin utilities)
+//!
+#if !defined(NV_INFER_INTERNAL_INCLUDE)
+static_assert(false, "Do not directly include this file. Include NvInferRuntime.h or NvInferPluginUtils.h");
+#endif
+
+//! Forward declare some CUDA types to avoid an include dependency.
+
+extern "C"
+{
+    //! Forward declaration of cublasContext to use in other interfaces.
+    struct cublasContext;
+    //! Forward declaration of cudnnContext to use in other interfaces.
+    struct cudnnContext;
+}
+
+//! Construct a single integer denoting TensorRT version.
+//! Usable in preprocessor expressions.
+#define NV_TENSORRT_VERSION_INT(major, minor, patch) ((major) *10000L + (minor) *100L + (patch) *1L)
+
+//! TensorRT version as a single integer.
+//! Usable in preprocessor expressions.
+#define NV_TENSORRT_VERSION NV_TENSORRT_VERSION_INT(NV_TENSORRT_MAJOR, NV_TENSORRT_MINOR, NV_TENSORRT_PATCH)
+
+//!
+//! \namespace nvinfer1
+//!
+//! \brief The TensorRT API version 1 namespace.
+//!
+namespace nvinfer1
+{
+//! char_t is the type used by TensorRT to represent all valid characters.
+using char_t = char;
+
+//! AsciiChar is the type used by TensorRT to represent valid ASCII characters.
+//! This type is widely used in automotive safety context.
+using AsciiChar = char_t;
+
+//! Forward declare IErrorRecorder for use in other interfaces.
+namespace v_1_0
+{
+class IErrorRecorder;
+} // namespace v_1_0
+using IErrorRecorder = v_1_0::IErrorRecorder;
+
+namespace impl
+{
+//! Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
+template <typename T>
+struct EnumMaxImpl;
+} // namespace impl
+
+//! Maximum number of elements in an enumeration type.
+template <typename T>
+constexpr int32_t EnumMax() noexcept
+{
+    return impl::EnumMaxImpl<T>::kVALUE;
+}
+
+//!
+//! \enum DataType
+//! \brief The type of weights and tensors.
+//!
+enum class DataType : int32_t
+{
+    //! 32-bit floating point format.
+    kFLOAT = 0,
+
+    //! IEEE 16-bit floating-point format -- has a 5 bit exponent and 11 bit significand.
+    kHALF = 1,
+
+    //! Signed 8-bit integer representing a quantized floating-point value.
+    kINT8 = 2,
+
+    //! Signed 32-bit integer format.
+    kINT32 = 3,
+
+    //! 8-bit boolean. 0 = false, 1 = true, other values undefined.
+    kBOOL = 4,
+
+    //! Unsigned 8-bit integer format.
+    //! Cannot be used to represent quantized floating-point values.
+    //! Use the IdentityLayer to convert kUINT8 network-level inputs to {kFLOAT, kHALF} prior
+    //! to use with other TensorRT layers, or to convert intermediate output
+    //! before kUINT8 network-level outputs from {kFLOAT, kHALF} to kUINT8.
+    //! kUINT8 conversions are only supported for {kFLOAT, kHALF}.
+    //! kUINT8 to {kFLOAT, kHALF} conversion will convert the integer values
+    //! to equivalent floating point values.
+    //! {kFLOAT, kHALF} to kUINT8 conversion will convert the floating point values
+    //! to integer values by truncating towards zero. This conversion has undefined behavior for
+    //! floating point values outside the range [0.0F, 256.0F) after truncation.
+    //! kUINT8 conversions are not supported for {kINT8, kINT32, kBOOL}.
+    kUINT8 = 5,
+
+    //! Signed 8-bit floating point with
+    //! 1 sign bit, 4 exponent bits, 3 mantissa bits, and exponent-bias 7.
+    kFP8 = 6,
+
+    //! Brain float -- has an 8 bit exponent and 8 bit significand.
+    kBF16 = 7,
+
+    //! Signed 64-bit integer type.
+    kINT64 = 8,
+
+    //! Signed 4-bit integer type.
+    kINT4 = 9,
+
+    //! 4-bit floating point type
+    //! 1 bit sign, 2 bit exponent, 1 bit mantissa
+    kFP4 = 10,
+
+    //! Unsigned representation of exponent-only 8-bit floating point type for quantization scales
+    kE8M0 = 11,
+};
+
+namespace impl
+{
+//! Maximum number of elements in DataType enum. \see DataType
+template <>
+struct EnumMaxImpl<DataType>
+{
+    //! Declaration of kVALUE that represents the maximum number of elements in the DataType enum.
+    static constexpr int32_t kVALUE = 12;
+};
+} // namespace impl
+
+//!
+//! \class Dims
+//! \brief Structure to define the dimensions of a tensor.
+//!
+//! TensorRT can also return an "invalid dims" structure. This structure is
+//! represented by nbDims == -1 and d[i] == 0 for all i.
+//!
+//! TensorRT can also return an "unknown rank" dims structure. This structure is
+//! represented by nbDims == -1 and d[i] == -1 for all i.
+//!
+class Dims64
+{
+public:
+    //! The maximum rank (number of dimensions) supported for a tensor.
+    static constexpr int32_t MAX_DIMS{8};
+
+    //! The rank (number of dimensions).
+    int32_t nbDims;
+
+    //! The extent of each dimension.
+    int64_t d[MAX_DIMS];
+};
+
+//!
+//! Alias for Dims64.
+//!
+using Dims = Dims64;
+
+using InterfaceKind = char const*;
+
+//!
+//! \class InterfaceInfo
+//!
+//! \brief Version information associated with a TRT interface
+//!
+class InterfaceInfo
+{
+public:
+    InterfaceKind kind;
+    int32_t major;
+    int32_t minor;
+};
+
+//!
+//! \enum APILanguage
+//!
+//! \brief Programming language used in the implementation of a TRT interface
+//!
+enum class APILanguage : int32_t
+{
+    kCPP = 0,
+    kPYTHON = 1
+};
+
+namespace impl
+{
+//! Maximum number of elements in APILanguage enum. \see APILanguage
+template <>
+struct EnumMaxImpl<APILanguage>
+{
+    //! Declaration of kVALUE that represents the maximum number of elements in the APILanguage enum.
+    static constexpr int32_t kVALUE = 2;
+};
+} // namespace impl
+
+//!
+//! \class IVersionedInterface
+//!
+//! \brief An Interface class for version control.
+//!
+class IVersionedInterface
+{
+public:
+    //!
+    //! \brief The language used to build the implementation of this Interface.
+    //!
+    //! Applications must not override this method.
+    //!
+    virtual APILanguage getAPILanguage() const noexcept
+    {
+        return APILanguage::kCPP;
+    }
+
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    virtual InterfaceInfo getInterfaceInfo() const noexcept = 0;
+
+    virtual ~IVersionedInterface() noexcept = default;
+
+protected:
+    IVersionedInterface() = default;
+    IVersionedInterface(IVersionedInterface const&) = default;
+    IVersionedInterface(IVersionedInterface&&) = default;
+    IVersionedInterface& operator=(IVersionedInterface const&) & = default;
+    IVersionedInterface& operator=(IVersionedInterface&&) & = default;
+};
+
+//!
+//! \enum ErrorCode
+//!
+//! \brief Error codes that can be returned by TensorRT during execution.
+//!
+enum class ErrorCode : int32_t
+{
+    //!
+    //! Execution completed successfully.
+    //!
+    kSUCCESS = 0,
+
+    //!
+    //! An error that does not fall into any other category. This error is included for forward compatibility.
+    //!
+    kUNSPECIFIED_ERROR = 1,
+
+    //!
+    //! A non-recoverable TensorRT error occurred. TensorRT is in an invalid internal state when this error is
+    //! emitted and any further calls to TensorRT will result in undefined behavior.
+    //!
+    kINTERNAL_ERROR = 2,
+
+    //!
+    //! An argument passed to the function is invalid in isolation.
+    //! This is a violation of the API contract.
+    //!
+    kINVALID_ARGUMENT = 3,
+
+    //!
+    //! An error occurred when comparing the state of an argument relative to other arguments. For example, the
+    //! dimensions for concat differ between two tensors outside of the channel dimension. This error is triggered
+    //! when an argument is correct in isolation, but not relative to other arguments. This is to help to distinguish
+    //! from the simple errors from the more complex errors.
+    //! This is a violation of the API contract.
+    //!
+    kINVALID_CONFIG = 4,
+
+    //!
+    //! An error occurred when performing an allocation of memory on the host or the device.
+    //! A memory allocation error is normally fatal, but in the case where the application provided its own memory
+    //! allocation routine, it is possible to increase the pool of available memory and resume execution.
+    //!
+    kFAILED_ALLOCATION = 5,
+
+    //!
+    //! One, or more, of the components that TensorRT relies on did not initialize correctly.
+    //! This is a system setup issue.
+    //!
+    kFAILED_INITIALIZATION = 6,
+
+    //!
+    //! An error occurred during execution that caused TensorRT to end prematurely, either an asynchronous error,
+    //! user cancellation, or other execution errors reported by CUDA/DLA. In a dynamic system, the
+    //! data can be thrown away and the next frame can be processed or execution can be retried.
+    //! This is either an execution error or a memory error.
+    //!
+    kFAILED_EXECUTION = 7,
+
+    //!
+    //! An error occurred during execution that caused the data to become corrupted, but execution finished. Examples
+    //! of this error are NaN squashing or integer overflow. In a dynamic system, the data can be thrown away and the
+    //! next frame can be processed or execution can be retried.
+    //! This is either a data corruption error, an input error, or a range error.
+    //! This is not used in safety but may be used in standard.
+    //!
+    kFAILED_COMPUTATION = 8,
+
+    //!
+    //! TensorRT was put into a bad state by incorrect sequence of function calls. An example of an invalid state is
+    //! specifying a layer to be DLA only without GPU fallback, and that layer is not supported by DLA. This can occur
+    //! in situations where a service is optimistically executing networks for multiple different configurations
+    //! without checking proper error configurations, and instead throwing away bad configurations caught by TensorRT.
+    //! This is a violation of the API contract, but can be recoverable.
+    //!
+    //! Example of a recovery:
+    //! GPU fallback is disabled and conv layer with large filter(63x63) is specified to run on DLA. This will fail due
+    //! to DLA not supporting the large kernel size. This can be recovered by either turning on GPU fallback
+    //! or setting the layer to run on the GPU.
+    //!
+    kINVALID_STATE = 9,
+
+    //!
+    //! An error occurred due to the network not being supported on the device due to constraints of the hardware or
+    //! system. An example is running an unsafe layer in a safety certified context, or a resource requirement for the
+    //! current network is greater than the capabilities of the target device. The network is otherwise correct, but
+    //! the network and hardware combination is problematic. This can be recoverable.
+    //! Examples:
+    //!  * Scratch space requests larger than available device memory and can be recovered by increasing allowed
+    //!    workspace size.
+    //!  * Tensor size exceeds the maximum element count and can be recovered by reducing the maximum batch size.
+    //!
+    kUNSUPPORTED_STATE = 10,
+
+};
+
+namespace impl
+{
+//! Maximum number of elements in ErrorCode enum. \see ErrorCode
+template <>
+struct EnumMaxImpl<ErrorCode>
+{
+    //! Declaration of kVALUE
+    static constexpr int32_t kVALUE = 11;
+};
+} // namespace impl
+
+namespace v_1_0
+{
+class IErrorRecorder : public IVersionedInterface
+{
+public:
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"IErrorRecorder", 1, 0};
+    }
+
+    //!
+    //! \brief A typedef of a C-style string for reporting error descriptions.
+    //!
+    using ErrorDesc = char const*;
+
+    //!
+    //! \brief The length limit for an error description in bytes, excluding the '\0' string terminator.
+    //!        Only applicable to safe runtime.
+    //!        General error recorder implementation can use any size appropriate for the use case.
+    //!
+    static constexpr size_t kMAX_DESC_LENGTH{127U};
+
+    //!
+    //! \brief A typedef of a 32-bit integer for reference counting.
+    //!
+    using RefCount = int32_t;
+
+    IErrorRecorder() = default;
+    ~IErrorRecorder() noexcept override = default;
+
+    // Public API used to retrieve information from the error recorder.
+
+    //!
+    //! \brief Return the number of errors
+    //!
+    //! Determines the number of errors that occurred between the current point in execution
+    //! and the last time that the clear() was executed. Due to the possibility of asynchronous
+    //! errors occurring, a TensorRT API can return correct results, but still register errors
+    //! with the Error Recorder. The value of getNbErrors() must increment by 1 after each reportError()
+    //! call until clear() is called, or the maximum number of errors that can be stored is exceeded.
+    //!
+    //! \return Returns the number of errors detected, or 0 if there are no errors.
+    //!         If the upper bound of errors that can be stored is exceeded, the upper bound value must
+    //!         be returned.
+    //!
+    //! For example, if the error recorder can store up to 16 error descriptions but reportError() has
+    //! been called 20 times, getNbErrors() must return 16.
+    //!
+    //! \see clear(), hasOverflowed()
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when multiple execution contexts are used during runtime.
+    //!
+    virtual int32_t getNbErrors() const noexcept = 0;
+
+    //!
+    //! \brief Returns the ErrorCode enumeration.
+    //!
+    //! \param errorIdx A 32-bit integer that indexes into the error array.
+    //!
+    //! The errorIdx specifies what error code from 0 to getNbErrors()-1 that the application
+    //! wants to analyze and return the error code enum.
+    //!
+    //! \return Returns the enum corresponding to errorIdx if errorIdx is in range (between 0 and getNbErrors()-1).
+    //!         ErrorCode::kUNSPECIFIED_ERROR must be returned if errorIdx is not in range.
+    //!
+    //! \see getErrorDesc(), ErrorCode
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when multiple execution contexts are used during runtime.
+    //!
+    virtual ErrorCode getErrorCode(int32_t errorIdx) const noexcept = 0;
+
+    //!
+    //! \brief Returns a null-terminated C-style string description of the error.
+    //!
+    //! \param errorIdx A 32-bit integer that indexes into the error array.
+    //!
+    //! For the error specified by the idx value, return the string description of the error. The
+    //! error string is a null-terminated C-style string. In the safety context there is a
+    //! constant length requirement to remove any dynamic memory allocations and the error message
+    //! will be truncated if it exceeds kMAX_DESC_LENGTH bytes.
+    //! The format of the string is "<EnumAsStr> - <Description>".
+    //!
+    //! \return Returns a string representation of the error along with a description of the error if errorIdx is in
+    //!         range (between 0 and getNbErrors()-1). An empty string will be returned if errorIdx is not in range.
+    //!
+    //! \see getErrorCode()
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when multiple execution contexts are used during runtime.
+    //!
+    virtual ErrorDesc getErrorDesc(int32_t errorIdx) const noexcept = 0;
+
+    //!
+    //! \brief Determine if the error stack has overflowed.
+    //!
+    //! In the case when the number of errors is large, this function is used to query if one or more
+    //! errors have been dropped due to lack of storage capacity. This is especially important in the
+    //! automotive safety case where the internal error handling mechanisms cannot allocate memory.
+    //!
+    //! \return true if errors have been dropped due to overflowing the error stack.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when multiple execution contexts are used during runtime.
+    //!
+    virtual bool hasOverflowed() const noexcept = 0;
+
+    //!
+    //! \brief Clear the error stack on the error recorder.
+    //!
+    //! Removes all the tracked errors by the error recorder.  The implementation must guarantee that after
+    //! this function is called, and as long as no error occurs, the next call to getNbErrors will return
+    //! zero and hasOverflowed will return false.
+    //!
+    //! \see getNbErrors(), hasOverflowed()
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when multiple execution contexts are used during runtime.
+    //!
+    virtual void clear() noexcept = 0;
+
+    // API used by TensorRT to report Error information to the application.
+
+    //!
+    //! \brief Report an error to the error recorder with the corresponding enum and description.
+    //!
+    //! \param val  The error code enum that is being reported.
+    //! \param desc The string description of the error, which will be a NULL-terminated string.
+    //!             For safety use cases its length is limited to kMAX_DESC_LENGTH bytes
+    //!             (excluding the NULL terminator) and descriptions that exceed this limit will be silently truncated.
+    //!
+    //! Report an error to the user that has a given value and human readable description. The function returns false
+    //! if processing can continue, which implies that the reported error is not fatal. This does not guarantee that
+    //! processing continues, but provides a hint to TensorRT.
+    //! The desc C-string data is only valid during the call to reportError and may be immediately deallocated by the
+    //! caller when reportError returns. The implementation must not store the desc pointer in the ErrorRecorder object
+    //! or otherwise access the data from desc after reportError returns.
+    //!
+    //! \return True if the error is determined to be fatal and processing of the current function must end.
+    //!
+    //! \warning If the error recorder's maximum number of storable errors is exceeded, the error description will be
+    //!          silently dropped and the value returned by getNbErrors() will not be incremented. However, the return
+    //!          value will still signal whether the error must be considered fatal.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when multiple execution contexts are used during runtime.
+    //!
+    virtual bool reportError(ErrorCode val, ErrorDesc desc) noexcept = 0;
+
+    //!
+    //! \brief Increments the refcount for the current ErrorRecorder.
+    //!
+    //! Increments the reference count for the object by one and returns the current value.  This reference count allows
+    //! the application to know that an object inside of TensorRT has taken a reference to the ErrorRecorder.  TensorRT
+    //! guarantees that every call to IErrorRecorder::incRefCount() will be paired with a call to
+    //! IErrorRecorder::decRefCount() when the reference is released.  It is undefined behavior to destruct the
+    //! ErrorRecorder when incRefCount() has been called without a corresponding decRefCount().
+    //!
+    //! \return The reference counted value after the increment completes.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when multiple execution contexts are used during runtime.
+    //!
+    virtual RefCount incRefCount() noexcept = 0;
+
+    //!
+    //! \brief Decrements the refcount for the current ErrorRecorder.
+    //!
+    //! Decrements the reference count for the object by one and returns the current value.  This reference count allows
+    //! the application to know that an object inside of TensorRT has taken a reference to the ErrorRecorder.  TensorRT
+    //! guarantees that every call to IErrorRecorder::decRefCount() will be preceded by a call to
+    //! IErrorRecorder::incRefCount().  It is undefined behavior to destruct the ErrorRecorder when incRefCount() has been
+    //! called without a corresponding decRefCount().
+    //!
+    //! \return The reference counted value after the decrement completes.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when multiple execution contexts are used during runtime.
+    //!
+    virtual RefCount decRefCount() noexcept = 0;
+
+protected:
+    // @cond SuppressDoxyWarnings
+    IErrorRecorder(IErrorRecorder const&) = default;
+    IErrorRecorder(IErrorRecorder&&) = default;
+    IErrorRecorder& operator=(IErrorRecorder const&) & = default;
+    IErrorRecorder& operator=(IErrorRecorder&&) & = default;
+    // @endcond
+}; // class IErrorRecorder
+} // namespace v_1_0
+
+//!
+//! \class IErrorRecorder
+//!
+//! \brief Reference counted application-implemented error reporting interface for TensorRT objects.
+//!
+//! The error reporting mechanism is a user-defined object that interacts with the internal state of the object
+//! that it is assigned to in order to determine information about abnormalities in execution. The error recorder
+//! gets both an error enum that is more descriptive than pass/fail and also a string description that gives more
+//! detail on the exact failure modes. In the safety context, the error strings are all limited to 128 bytes
+//! or less in length, including the NULL terminator.
+//!
+//! The ErrorRecorder gets passed along to any class that is created from another class that has an ErrorRecorder
+//! assigned to it. For example, assigning an ErrorRecorder to an IBuilder allows all INetwork's, ILayer's, and
+//! ITensor's to use the same error recorder. For functions that have their own ErrorRecorder accessor functions.
+//! This allows registering a different error recorder or de-registering of the error recorder for that specific
+//! object.
+//!
+//! ErrorRecorder objects that are used in the safety runtime must define an implementation-dependent upper limit
+//! of errors whose information can be stored, and drop errors above this upper limit. The limit must fit in int32_t.
+//! The IErrorRecorder::hasOverflowed() method is used to signal that one or more errors have been dropped.
+//!
+//! The ErrorRecorder object implementation must be thread safe. All locking and synchronization is pushed to the
+//! interface implementation and TensorRT does not hold any synchronization primitives when calling the interface
+//! functions.
+//!
+//! The lifetime of the ErrorRecorder object must exceed the lifetime of all TensorRT objects that use it.
+//!
+using IErrorRecorder = v_1_0::IErrorRecorder;
+
+//!
+//! \enum TensorIOMode
+//!
+//! \brief Definition of tensor IO Mode.
+//!
+enum class TensorIOMode : int32_t
+{
+    //! Tensor is not an input or output.
+    kNONE = 0,
+
+    //! Tensor is input to the engine.
+    kINPUT = 1,
+
+    //! Tensor is output by the engine.
+    kOUTPUT = 2
+};
+
+namespace impl
+{
+//! Maximum number of elements in TensorIOMode enum. \see TensorIOMode
+template <>
+struct EnumMaxImpl<TensorIOMode>
+{
+    // Declaration of kVALUE that represents maximum number of elements in TensorIOMode enum
+    static constexpr int32_t kVALUE = 3;
+};
+} // namespace impl
+} // namespace nvinfer1
+
+//!
+//! \brief Return the library version number.
+//!
+//! The format is as for TENSORRT_VERSION: (MAJOR * 100 + MINOR) * 100 + PATCH
+//!
+extern "C" TENSORRTAPI int32_t getInferLibVersion() noexcept;
+
+#endif // NV_INFER_RUNTIME_BASE_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferRuntimeCommon.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferRuntimeCommon.h
new file mode 100644
index 0000000000000000000000000000000000000000..110e14ea044c1bb78ea0a414523d31dce2ed1d20
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferRuntimeCommon.h
@@ -0,0 +1,322 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NV_INFER_RUNTIME_COMMON_H
+#define NV_INFER_RUNTIME_COMMON_H
+
+//!
+//! \file NvInferRuntimeCommon.h
+//!
+//! This file provides the nvinfer1::IPluginRegistry interface, which will be moved to the NvInferRuntime.h header
+//! in a future release.
+//!
+//! \warning This file will be removed in a future release.
+//!
+//! \warning Do not directly include this file. Instead include NvInferRuntime.h
+//!
+#define NV_INFER_INTERNAL_INCLUDE 1
+#include "NvInferPluginBase.h"
+#undef NV_INFER_INTERNAL_INCLUDE
+#include "NvInferRuntimePlugin.h"
+
+namespace nvinfer1
+{
+//!
+//! \class IPluginRegistry
+//!
+//! \brief Single registration point for all plugins in an application. It is
+//! used to find plugin implementations during engine deserialization.
+//! Internally, the plugin registry is considered to be a singleton so all
+//! plugins in an application are part of the same global registry.
+//! Note that the plugin registry is only supported for plugins of type
+//! IPluginV2 and should also have a corresponding IPluginCreator implementation.
+//!
+//! \see IPluginV2 and IPluginCreator
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+//! \warning In the automotive safety context, be sure to call IPluginRegistry::setErrorRecorder() to register
+//! an error recorder with the registry before using other methods in the registry.
+//!
+class IPluginRegistry
+{
+public:
+    //!
+    //! \brief Pointer for plugin library handle.
+    //!
+    using PluginLibraryHandle = void*;
+
+    //!
+    //! \brief Register a plugin creator implementing IPluginCreator. Returns false if any plugin creator with the same
+    //! name, version or namespace is already registered.
+    //!
+    //! \warning The string pluginNamespace must be 1024 bytes or less including the NULL terminator and must be NULL
+    //! terminated.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes; calls to this method will be synchronized by a mutex.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. Superseded by
+    //! IPluginRegistry::registerCreator(IPluginCreatorInterface&, AsciiChar const* const).
+    //!
+    TRT_DEPRECATED virtual bool registerCreator(
+        IPluginCreator& creator, AsciiChar const* const pluginNamespace) noexcept = 0;
+
+    //!
+    //! \brief Return all the registered plugin creators and the number of
+    //! registered plugin creators. Returns nullptr if none found.
+    //!
+    //! \warning If any plugin creators are registered or deregistered after calling this function, the returned pointer
+    //! is not guaranteed to be valid thereafter.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: No
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. Superseded by IPluginRegistry::getAllCreators(int32_t* const).
+    //!
+    TRT_DEPRECATED virtual IPluginCreator* const* getPluginCreatorList(int32_t* const numCreators) const noexcept = 0;
+
+    //!
+    //! \brief Return plugin creator based on plugin name, version, and
+    //! namespace associated with plugin during network creation.
+    //!
+    //! \warning The strings pluginName, pluginVersion, and pluginNamespace must be 1024 bytes or less including the
+    //! NULL terminator and must be NULL terminated.
+    //!
+    //! \warning Returns nullptr if a plugin creator with matching name, version, and namespace is found, but is not a
+    //! descendent of IPluginCreator
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. Superseded by IPluginRegistry::getCreator(AsciiChar const* const,
+    //! AsciiChar const* const, AsciiChar const* const).
+    //!
+    TRT_DEPRECATED virtual IPluginCreator* getPluginCreator(AsciiChar const* const pluginName,
+        AsciiChar const* const pluginVersion, AsciiChar const* const pluginNamespace = "") noexcept = 0;
+
+    // @cond SuppressDoxyWarnings
+    IPluginRegistry() = default;
+    IPluginRegistry(IPluginRegistry const&) = delete;
+    IPluginRegistry(IPluginRegistry&&) = delete;
+    IPluginRegistry& operator=(IPluginRegistry const&) & = delete;
+    IPluginRegistry& operator=(IPluginRegistry&&) & = delete;
+    // @endcond
+
+protected:
+    virtual ~IPluginRegistry() noexcept = default;
+
+public:
+    //!
+    //! \brief Set the ErrorRecorder for this interface
+    //!
+    //! Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
+    //! This function will call incRefCount of the registered ErrorRecorder at least once. Setting
+    //! recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
+    //! a recorder has been registered.
+    //!
+    //! \param recorder The error recorder to register with this interface.
+    //!
+    //! \see getErrorRecorder()
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: No
+    //!
+    virtual void setErrorRecorder(IErrorRecorder* const recorder) noexcept = 0;
+
+    //!
+    //! \brief Get the ErrorRecorder assigned to this interface.
+    //!
+    //! Retrieves the assigned error recorder object for the given class. A default error recorder does not exist,
+    //! so a nullptr will be returned if setErrorRecorder has not been called, or an ErrorRecorder has not been
+    //! inherited.
+    //!
+    //! \return A pointer to the IErrorRecorder object that has been registered.
+    //!
+    //! \see setErrorRecorder()
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes
+    //!
+    virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
+
+    //!
+    //! \brief Deregister a previously registered plugin creator implementing IPluginCreator.
+    //!
+    //! Since there may be a desire to limit the number of plugins,
+    //! this function provides a mechanism for removing plugin creators registered in TensorRT.
+    //! The plugin creator that is specified by \p creator is removed from TensorRT and no longer tracked.
+    //!
+    //! \return True if the plugin creator was deregistered, false if it was not found in the registry or otherwise
+    //! could not be deregistered.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. Superseded by
+    //! IPluginRegistry::deregisterCreator(IPluginCreatorInterface const&).
+    //!
+    TRT_DEPRECATED virtual bool deregisterCreator(IPluginCreator const& creator) noexcept = 0;
+
+    //!
+    //! \brief Return whether the parent registry will be searched if a plugin is not found in this registry
+    //! default: true
+    //!
+    //! \return bool variable indicating whether parent search is enabled.
+    //!
+    //! \see setParentSearchEnabled
+    //!
+    virtual bool isParentSearchEnabled() const = 0;
+
+    //!
+    //! \brief Set whether the parent registry will be searched if a plugin is not found in this registry.
+    //!
+    //! \param enabled The bool variable indicating whether parent search is enabled.
+    //!
+    //! \see isParentSearchEnabled
+    //!
+    virtual void setParentSearchEnabled(bool const enabled) = 0;
+
+    //!
+    //! \brief Load and register a shared library of plugins.
+    //!
+    //! \param pluginPath the plugin library path.
+    //!
+    //! \return The loaded plugin library handle. The call will fail and return
+    //! nullptr if any of the plugins are already registered.
+    //!
+    virtual PluginLibraryHandle loadLibrary(AsciiChar const* pluginPath) noexcept = 0;
+
+    //!
+    //! \brief Deregister plugins associated with a library. Any resources acquired when the library
+    //! was loaded will be released.
+    //!
+    //! \param handle the plugin library handle to deregister.
+    //!
+    virtual void deregisterLibrary(PluginLibraryHandle handle) noexcept = 0;
+
+    //!
+    //! \brief Register a plugin creator. Returns false if a plugin creator with the same type
+    //! is already registered.
+    //!
+    //! \warning The string pluginNamespace must be 1024 bytes or less including the NULL terminator and must be NULL
+    //! terminated.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes; calls to this method will be synchronized by a mutex.
+    //!
+    virtual bool registerCreator(IPluginCreatorInterface& creator, AsciiChar const* const pluginNamespace) noexcept = 0;
+
+    //!
+    //! \brief Return all registered plugin creators. Returns nullptr if none found.
+    //!
+    //! \warning If any plugin creators are registered or deregistered after calling this function, the returned pointer
+    //! is not guaranteed to be valid thereafter.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: No
+    //!
+    virtual IPluginCreatorInterface* const* getAllCreators(int32_t* const numCreators) const noexcept = 0;
+
+    //!
+    //! \brief Return a registered plugin creator based on plugin name, version, and namespace associated with the
+    //! plugin during network creation.
+    //!
+    //! \warning The strings pluginName, pluginVersion, and pluginNamespace must be 1024 bytes or less including the
+    //! NULL terminator and must be NULL terminated.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes
+    //!
+    virtual IPluginCreatorInterface* getCreator(AsciiChar const* const pluginName, AsciiChar const* const pluginVersion,
+        AsciiChar const* const pluginNamespace = "") noexcept = 0;
+
+    //!
+    //! \brief Deregister a previously registered plugin creator.
+    //!
+    //! Since there may be a desire to limit the number of plugins,
+    //! this function provides a mechanism for removing plugin creators registered in TensorRT.
+    //! The plugin creator that is specified by \p creator is removed from TensorRT and no longer tracked.
+    //!
+    //! \return True if the plugin creator was deregistered, false if it was not found in the registry or otherwise
+    //! could not be deregistered.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes
+    //!
+    virtual bool deregisterCreator(IPluginCreatorInterface const& creator) noexcept = 0;
+
+    //!
+    //! \brief Get a plugin resource
+    //! \param key Key for identifying the resource. Cannot be null.
+    //! \param resource A plugin resource object. The object will only need to be valid until this method returns, as
+    //! only a clone of this object will be registered by TRT. Cannot be null.
+    //!
+    //! \return Registered plugin resource object
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes; calls to this method will be synchronized by a mutex.
+    //!
+    virtual IPluginResource* acquirePluginResource(AsciiChar const* key, IPluginResource* resource) noexcept = 0;
+
+    //!
+    //! \brief Decrement reference count for the resource with this key
+    //!        If reference count goes to zero after decrement, release() will be invoked on the resource, the key will
+    //!        be deregistered and the resource object will be deleted
+    //!
+    //! \param key Key that was used to register the resource. Cannot be null.
+    //!
+    //! \return 0 for success, else non-zero
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes; calls to this method will be synchronized by a mutex.
+    //!
+    virtual int32_t releasePluginResource(AsciiChar const* key) noexcept = 0;
+
+    //!
+    //! \brief Return all registered plugin creators by searching starting from the current registry and following
+    //! parent registries recursively as long as isParentSearchEnabled() returns true.
+    //!
+    //! \param[out] numCreators Pointer to an integer where the number of registered plugin creators will be stored.
+    //!
+    //! \return A pointer to an array of IPluginCreatorInterface pointers. Returns nullptr if no creators are found.
+    //!
+    //! \warning If any plugin creators are registered or deregistered after calling this function, the returned pointer
+    //! is not guaranteed to remain valid.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: No
+    //!
+    virtual IPluginCreatorInterface* const* getAllCreatorsRecursive(int32_t* const numCreators) noexcept = 0;
+};
+
+} // namespace nvinfer1
+
+#endif /* NV_INFER_RUNTIME_COMMON_H */
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferRuntimePlugin.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferRuntimePlugin.h
new file mode 100644
index 0000000000000000000000000000000000000000..e374a69427edaf110f1b5d727bb0b775e18fc7c1
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferRuntimePlugin.h
@@ -0,0 +1,980 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NV_INFER_RUNTIME_PLUGIN_H
+#define NV_INFER_RUNTIME_PLUGIN_H
+
+#define NV_INFER_INTERNAL_INCLUDE 1
+#include "NvInferPluginBase.h"
+#undef NV_INFER_INTERNAL_INCLUDE
+
+//!
+//! \file NvInferRuntimePlugin.h
+//!
+//! This file contains common definitions, data structures and interfaces that relate to plugins and are shared
+//! between the standard and safe runtime.
+//!
+//! \warning Do not directly include this file. Instead include NvInferRuntime.h
+//!
+
+//!
+//! \namespace nvinfer1
+//!
+//! \brief The TensorRT API version 1 namespace.
+//!
+namespace nvinfer1
+{
+
+enum class TensorFormat : int32_t;
+namespace v_1_0
+{
+class IGpuAllocator;
+} // namespace v_1_0
+using IGpuAllocator = v_1_0::IGpuAllocator;
+
+//!
+//! \brief PluginFormat is reserved for backward compatibility.
+//!
+//! \see IPluginV2::supportsFormat()
+//!
+using PluginFormat = TensorFormat;
+
+//!
+//! \brief Bit at the plugin version to identify that it is a plugin.
+//!
+static constexpr int32_t kPLUGIN_VERSION_PYTHON_BIT = 0x40;
+
+//!
+//! \struct PluginTensorDesc
+//!
+//! \brief Fields that a plugin might see for an input or output.
+//!
+//! Scale is only valid when data type is DataType::kINT8. TensorRT will set
+//! the value to -1.0F if it is invalid.
+//!
+//! \see IPluginV2IOExt::supportsFormatCombination
+//! \see IPluginV2IOExt::configurePlugin
+//!
+struct PluginTensorDesc
+{
+    //! Dimensions.
+    Dims dims;
+    //! \warning DataType:kBOOL and DataType::kUINT8 are not supported.
+    DataType type;
+    //! Tensor format.
+    TensorFormat format;
+    //! Scale for INT8 data type.
+    float scale;
+};
+
+//!
+//! \struct PluginVersion
+//!
+//! \brief Definition of plugin versions.
+//!
+//! Tag for plug-in versions.  Used in upper byte of getTensorRTVersion().
+//!
+//! \deprecated Deprecated in TensorRT 10.10. PluginVersion is used only in relation to IPluginV2-descendent plugin
+//! interfaces, which are all deprecated.
+//!
+enum class PluginVersion : uint8_t
+{
+    //! IPluginV2
+    kV2 TRT_DEPRECATED_ENUM = 0,
+    //! IPluginV2Ext
+    kV2_EXT TRT_DEPRECATED_ENUM = 1,
+    //! IPluginV2IOExt
+    kV2_IOEXT TRT_DEPRECATED_ENUM = 2,
+    //! IPluginV2DynamicExt
+    kV2_DYNAMICEXT TRT_DEPRECATED_ENUM = 3,
+    //! IPluginV2DynamicExt-based Python plugins
+    kV2_DYNAMICEXT_PYTHON TRT_DEPRECATED_ENUM = kPLUGIN_VERSION_PYTHON_BIT | 3
+};
+
+//!
+//! \enum PluginCreatorVersion
+//!
+//! \brief Enum to identify version of the plugin creator.
+//!
+//! \deprecated Deprecated in TensorRT 10.10. PluginCreatorVersion is used only in relation to plugin creators based
+//! off IPluginCreator, which is deprecated.
+//!
+enum class PluginCreatorVersion : int32_t
+{
+    //! IPluginCreator
+    kV1 TRT_DEPRECATED_ENUM = 0,
+    //! IPluginCreator-based Python plugin creators
+    kV1_PYTHON TRT_DEPRECATED_ENUM = kPLUGIN_VERSION_PYTHON_BIT
+};
+
+//!
+//! \class IPluginV2
+//!
+//! \brief Plugin class for user-implemented layers.
+//!
+//! Plugins are a mechanism for applications to implement custom layers. When
+//! combined with IPluginCreator it provides a mechanism to register plugins and
+//! look up the Plugin Registry during de-serialization.
+//!
+//! \see IPluginCreator
+//! \see IPluginRegistry
+//!
+//! \deprecated Deprecated in TensorRT 8.5. Implement IPluginV3 instead.
+//!
+class TRT_DEPRECATED IPluginV2
+{
+public:
+    //!
+    //! \brief Return the API version with which this plugin was built.
+    //!
+    //! Do not override this method as it is used by the TensorRT library to maintain backwards-compatibility with
+    //! plugins.
+    //!
+    //! \return The TensorRT version in the format (major * 100 + minor) * 100 + patch.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, the implementation provided here is safe to call from any thread.
+    //!
+    virtual int32_t getTensorRTVersion() const noexcept
+    {
+        return NV_TENSORRT_VERSION;
+    }
+
+    //!
+    //! \brief Return the plugin type. Should match the plugin name returned by the corresponding plugin creator
+    //!
+    //! \see IPluginCreator::getPluginName()
+    //!
+    //! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including the
+    //! NULL terminator.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin.
+    //!
+    virtual AsciiChar const* getPluginType() const noexcept = 0;
+
+    //!
+    //! \brief Return the plugin version. Should match the plugin version returned by the corresponding plugin creator
+    //!
+    //! \see IPluginCreator::getPluginVersion()
+    //!
+    //! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including the
+    //! NULL terminator.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin.
+    //!
+    virtual AsciiChar const* getPluginVersion() const noexcept = 0;
+
+    //!
+    //! \brief Get the number of outputs from the layer.
+    //!
+    //! \return The number of outputs, which is a positive integer.
+    //!
+    //! This function is called by the implementations of INetworkDefinition and IBuilder. In particular, it is called
+    //! prior to any call to initialize().
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin.
+    //!
+    virtual int32_t getNbOutputs() const noexcept = 0;
+
+    //!
+    //! \brief Get the dimension of an output tensor.
+    //!
+    //! \param index The index of the output tensor. Will lie in the valid range (between 0 and getNbOutputs()-1
+    //! inclusive).
+    //! \param inputs The input tensor dimensions. Will be the start address of a Dims array of length nbInputDims.
+    //! \param nbInputDims The number of input tensors. Will be a non-negative integer.
+    //!
+    //! \return The output tensor dimensions if the index is in the valid range.
+    //!         An invalid value of Dims{-1, {}} must be returned if the index is not in the valid range.
+    //!
+    //! This function is called by the implementations of INetworkDefinition and IBuilder. In particular, it is called
+    //! prior to any call to initialize().
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin.
+    //!
+    //! \note In any non-IPluginV2DynamicExt plugin, batch size must not be included in the returned dimensions,
+    //! even if the plugin is expected to be run in a network with explicit batch mode enabled.
+    //! Please see the TensorRT Developer Guide for more details on how plugin inputs and outputs behave.
+    //!
+    virtual Dims getOutputDimensions(int32_t index, Dims const* inputs, int32_t nbInputDims) noexcept = 0;
+
+    //!
+    //! \brief Check format support.
+    //!
+    //! \param type DataType requested.
+    //! \param format PluginFormat requested.
+    //!
+    //! \return true if the plugin supports the type-format combination.
+    //!
+    //! This function is called by the implementations of INetworkDefinition, IBuilder, and
+    //! safe::ICudaEngine/ICudaEngine. In particular, it is called when creating an engine and when deserializing an
+    //! engine.
+    //!
+    //! \warning for the format field, the values PluginFormat::kCHW4, PluginFormat::kCHW16, and PluginFormat::kCHW32
+    //! will not be passed in, this is to keep backward compatibility with TensorRT 5.x series.  Use PluginV2IOExt
+    //! or PluginV2DynamicExt for other PluginFormats.
+    //!
+    //! \warning DataType:kBOOL and DataType::kUINT8 are not supported.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin.
+    //!
+    virtual bool supportsFormat(DataType type, PluginFormat format) const noexcept = 0;
+
+    //!
+    //! \brief Configure the layer.
+    //!
+    //! This function is called by the builder prior to initialize(). It provides an opportunity for the layer to make
+    //! algorithm choices on the basis of its weights, dimensions, and maximum batch size.
+    //!
+    //! \param inputDims The input tensor dimensions. Will be the start address of a Dims array of length nbInputs.
+    //! \param nbInputs The number of inputs. Will be a non-negative integer.
+    //! \param outputDims The output tensor dimensions. Will be the start address of a Dims array of length nbOutputs.
+    //! \param nbOutputs The number of outputs. Will be a positive integer identical to the return value of
+    //! getNbOutputs().
+    //! \param type The data type selected for the engine.
+    //! \param format The format selected for the engine.
+    //! \param maxBatchSize The maximum batch size. Will be a positive integer.
+    //!
+    //! The dimensions passed here do not include the outermost batch size (i.e. for 2D image networks, they will be
+    //! 3-dimensional CHW dimensions).
+    //!
+    //! \warning for the format field, the values PluginFormat::kCHW4, PluginFormat::kCHW16, and PluginFormat::kCHW32
+    //! will not be passed in, this is to keep backward compatibility with TensorRT 5.x series.  Use PluginV2IOExt
+    //! or PluginV2DynamicExt for other PluginFormats.
+    //!
+    //! \warning DataType:kBOOL and DataType::kUINT8 are not supported.
+    //!
+    //! \see clone()
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin. However, TensorRT
+    //!                  will not call this method from two threads simultaneously on a given clone of a plugin.
+    //!
+    virtual void configureWithFormat(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs,
+        DataType type, PluginFormat format, int32_t maxBatchSize) noexcept
+        = 0;
+
+    //!
+    //! \brief Initialize the layer for execution. This is called when the engine is created.
+    //!
+    //! \return 0 for success, else non-zero (which will cause engine termination).
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin or when using multiple
+    //!                  execution contexts using this plugin.
+    //!
+    virtual int32_t initialize() noexcept = 0;
+
+    //!
+    //! \brief Release resources acquired during plugin layer initialization. This is called when the engine is
+    //! destroyed.
+    //!
+    //! \see initialize()
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin or when using multiple
+    //!                  execution contexts using this plugin. However, TensorRT will not call this method from
+    //!                  two threads simultaneously on a given clone of a plugin.
+    //!
+    virtual void terminate() noexcept = 0;
+
+    //!
+    //! \brief Find the workspace size required by the layer.
+    //!
+    //! This function is called during engine startup, after initialize(). The workspace size returned must be
+    //! sufficient for any batch size up to the maximum.
+    //!
+    //! \param maxBatchSize The maximum batch size, which will be a positive integer.
+    //!
+    //! \return The workspace size in bytes, i.e. the device memory size that the plugin requires for its internal
+    //! computations.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin. However, TensorRT
+    //!                  will not call this method from two threads simultaneously on a given clone of a plugin.
+    //!
+    virtual size_t getWorkspaceSize(int32_t maxBatchSize) const noexcept = 0;
+
+    //!
+    //! \brief Execute the layer.
+    //!
+    //! \param batchSize The number of inputs in the batch.
+    //! \param inputs The memory for the input tensors. Will be an array of device addresses corresponding to input
+    //!        tensors of length nbInputs, where nbInputs is the second parameter passed to configureWithFormat().
+    //!        The i-th input tensor will have the dimensions inputDims[i], where inputDims is the first parameter
+    //!        that was passed to configureWithFormat().
+    //! \param outputs The memory for the output tensors. Will be an array of device addresses corresponding to output
+    //!        tensors of length getNbOutputs().
+    //! \param workspace Workspace for execution. Will be the start address of a device buffer whose length will be at
+    //!        least getWorkspaceSize(batchSize).
+    //! \param stream The stream in which to execute the kernels. This will be a valid CUDA stream.
+    //!
+    //! \return 0 for success, else non-zero (which will cause engine termination).
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when multiple execution contexts are used during runtime.
+    //!
+    virtual int32_t enqueue(int32_t batchSize, void const* const* inputs, void* const* outputs, void* workspace,
+        cudaStream_t stream) noexcept
+        = 0;
+
+    //!
+    //! \brief Find the size of the serialization buffer required to store the plugin configuration in a binary file.
+    //!
+    //! \return The size of the serialization buffer in bytes.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin.
+    //!
+    virtual size_t getSerializationSize() const noexcept = 0;
+
+    //!
+    //! \brief Serialize the layer.
+    //!
+    //! \param buffer A pointer to a host buffer to serialize data. Size of buffer will be at least as large as the
+    //! value returned by getSerializationSize.
+    //!
+    //! \see getSerializationSize()
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin.
+    //!
+    virtual void serialize(void* buffer) const noexcept = 0;
+
+    //!
+    //! \brief Destroy the plugin object. This will be called when the network, builder or engine is destroyed.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin.
+    //!
+    virtual void destroy() noexcept = 0;
+
+    //!
+    //! \brief Clone the plugin object. This copies over internal plugin parameters and returns a new plugin object with
+    //! these parameters.
+    //!
+    //! The TensorRT runtime calls clone() to clone the plugin when an execution context is created for an engine,
+    //! after the engine has been created.  The runtime does not call initialize() on the cloned plugin,
+    //! so the cloned plugin must be created in an initialized state.
+    //!
+    //! \return A cloned plugin object in an initialized state with the same parameters as the current object.
+    //!         nullptr must be returned if the cloning fails, e.g. because of resource exhaustion.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin or when creating multiple
+    //!                  execution contexts.
+    //!
+    virtual IPluginV2* clone() const noexcept = 0;
+
+    //!
+    //! \brief Set the namespace that this plugin object belongs to. Ideally, all plugin
+    //! objects from the same plugin library must have the same namespace.
+    //!
+    //! \param pluginNamespace The namespace for the plugin object.
+    //!
+    //! \warning The string pluginNamespace will be NULL-terminated and have a length of 1024 bytes or less including the
+    //! NULL terminator.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin.
+    //!
+    virtual void setPluginNamespace(AsciiChar const* pluginNamespace) noexcept = 0;
+
+    //!
+    //! \brief Return the namespace of the plugin object.
+    //!
+    //! \return The namespace string that was passed to setPluginNamespace(), possibly after truncation to 1024 bytes
+    //! if a longer string was passed. An empty string must be returned as default value.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin.
+    //!
+    virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
+
+    // @cond SuppressDoxyWarnings
+    IPluginV2() = default;
+    virtual ~IPluginV2() noexcept = default;
+// @endcond
+
+protected:
+// @cond SuppressDoxyWarnings
+    IPluginV2(IPluginV2 const&) = default;
+    IPluginV2(IPluginV2&&) = default;
+    IPluginV2& operator=(IPluginV2 const&) & = default;
+    IPluginV2& operator=(IPluginV2&&) & = default;
+// @endcond
+};
+
+//!
+//! \class IPluginV2Ext
+//!
+//! \brief Plugin class for user-implemented layers.
+//!
+//! Plugins are a mechanism for applications to implement custom layers. This
+//! interface provides additional capabilities to the IPluginV2 interface by
+//! supporting different output data types and broadcast across batches.
+//!
+//! \see IPluginV2
+//!
+//! \deprecated Deprecated in TensorRT 8.5. Implement IPluginV3 instead.
+//!
+class TRT_DEPRECATED IPluginV2Ext : public IPluginV2
+{
+public:
+    //!
+    //! \brief Return the DataType of the plugin output at the requested index.
+    //!
+    //! \param index The output tensor index in the valid range between 0 and getNbOutputs()-1.
+    //! \param inputTypes The data types of the input tensors, stored in an array of length nbInputs.
+    //! \param nbInputs The number of input tensors. Will be a non-negative integer.
+    //!
+    //! \return The data type of the output tensor with the provided index if the input tensors have the data types
+    //! provided in inputTypes, provided the output tensor index is in the valid range. DataType::kFLOAT must be
+    //! returned if the index is not in the valid range.
+    //!
+    //! The default behavior must be to return the type of the first input, or DataType::kFLOAT if the layer has no
+    //! inputs. The returned data type must have a format that is supported by the plugin.
+    //!
+    //! \see supportsFormat()
+    //!
+    //! \warning DataType:kBOOL and DataType::kUINT8 are not supported.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin.
+    //!
+    virtual nvinfer1::DataType getOutputDataType(
+        int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept
+        = 0;
+
+    //!
+    //! \brief Return true if the output tensor is broadcast across a batch.
+    //!
+    //! \param outputIndex The index of the output tensor, which will be in the valid range between 0 and
+    //! nbOutputs()-1.
+    //! \param inputIsBroadcasted A boolean array of length nbInputs. The i-th element will be true if and only if
+    //! the tensor for the ith input is broadcast across a batch.
+    //! \param nbInputs The number of inputs. Will be a non-negative integer.
+    //!
+    //! The values in inputIsBroadcasted refer to broadcasting at the semantic level,
+    //! i.e. are unaffected by whether method canBroadcastInputAcrossBatch requests
+    //! physical replication of the values.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. Implicit batch support is removed in TensorRT 10.0.
+    //!
+    TRT_DEPRECATED virtual bool isOutputBroadcastAcrossBatch(
+        int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept
+        = 0;
+
+    //!
+    //! \brief Return true if the plugin can use an input tensor that is broadcast across batch without replication.
+    //!
+    //! \param inputIndex Index of input that could be broadcast. Will be in the valid range between 0 and
+    //! nbInputs - 1 where nbInputs is the maximum number of input tensors supported by this plugin.
+    //!
+    //! \return true if the index is in the valid range and the plugin is able to broadcast a single copy of this
+    //! input tensor across the batch. False otherwise.
+    //!
+    //! For each input whose tensor is semantically broadcast across a batch,
+    //! TensorRT calls this method before calling configurePlugin.
+    //! If canBroadcastInputAcrossBatch returns true, TensorRT will not replicate the input tensor;
+    //! i.e., there will be a single copy that the plugin must share across the batch.
+    //! If it returns false, TensorRT will replicate the input tensor
+    //! so that it appears like a non-broadcasted tensor.
+    //!
+    //! This method is called only for inputs that can be broadcast.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin.
+    //!
+    //! \deprecated Deprecated in TensorRT 10.0. Implicit batch support is removed in TensorRT 10.0.
+    //!
+    TRT_DEPRECATED virtual bool canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept = 0;
+
+    //!
+    //! \brief Configure the layer with input and output data types.
+    //!
+    //! This function is called by the builder prior to initialize(). It provides an opportunity for the layer to make
+    //! algorithm choices on the basis of its weights, dimensions, data types and maximum batch size.
+    //!
+    //! \param inputDims The input tensor dimensions. Will be an array of length nbInputs.
+    //! \param nbInputs The number of inputs. Will be a non-negative integer.
+    //! \param outputDims The output tensor dimensions. Will be an array of length nbOutputs.
+    //! \param nbOutputs The number of outputs. Will be a positive integer.
+    //! \param inputTypes The data types selected for the plugin inputs. Will be an array of length nbInputs.
+    //! \param outputTypes The data types selected for the plugin outputs. Will be an array of length nbOutputs.
+    //! \param inputIsBroadcast True for each input that the plugin must broadcast across the batch.
+    //!                         Will be an array of length nbInputs.
+    //! \param outputIsBroadcast True for each output that TensorRT will broadcast across the batch.
+    //!                          Will be an array of length nbOutputs.
+    //! \param floatFormat The format selected for the engine for the floating point inputs/outputs.
+    //! \param maxBatchSize The maximum batch size. Will be a positive integer.
+    //!
+    //! The dimensions passed here do not include the outermost batch size (i.e. for 2D image networks, they will be
+    //! 3-dimensional CHW dimensions). When inputIsBroadcast or outputIsBroadcast is true, the outermost batch size for
+    //! that input or output must be treated as if it is one.
+    //! Index 'i' of inputIsBroadcast is true only if the input is semantically broadcast across the batch and
+    //! calling canBroadcastInputAcrossBatch with argument 'i' returns true.
+    //! Index 'i' of outputIsBroadcast is true only if calling isOutputBroadcastAcrossBatch with argument 'i'
+    //! returns true.
+    //!
+    //! \warning for the floatFormat field, the values PluginFormat::kCHW4, PluginFormat::kCHW16, and
+    //! PluginFormat::kCHW32 will not be passed in, this is to keep backward compatibility with TensorRT 5.x series. Use
+    //! PluginV2IOExt or PluginV2DynamicExt for other PluginFormats.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin. However, TensorRT
+    //!                  will not call this method from two threads simultaneously on a given clone of a plugin.
+    //!
+    virtual void configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs,
+        DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast,
+        bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept
+        = 0;
+
+    IPluginV2Ext() = default;
+    ~IPluginV2Ext() override = default;
+
+    //!
+    //! \brief Attach the plugin object to an execution context and grant the plugin the access to some context
+    //! resources.
+    //!
+    //! \param cudnn The cuDNN context handle of the execution context. Will be a valid cuDNN context handle, or
+    //!              nullptr if TacticSource::kCUDNN is disabled.
+    //! \param cublas The cuBLAS context handle of the execution context. Will be a valid cuBLAS context handle, or
+    //!               nullptr if TacticSource::kCUBLAS is disabled.
+    //! \param allocator The allocator used by the execution context
+    //!
+    //! This function is called automatically for each plugin when a new execution context is created. If the context
+    //! was created without resources, this method is not called until the resources are assigned. It is also called if
+    //! new resources are assigned to the context.
+    //!
+    //! If the plugin needs per-context resource, it can be allocated here.
+    //! The plugin can also get context-owned cuDNN and cuBLAS context here.
+    //!
+    //! \note The TacticSource::kCUDNN and TacticSource::kCUBLAS flag is disabled by default.
+    //! The allocator pointer is unique to each building or execution context instance having overlapping lifetimes.
+    //! It can be used as a key to manage resources across plugin instances sharing the same context.
+    //! Plugins attached to different contexts will have different handles as their execution will not overlap.
+    //!
+    //! \see TacticSources
+    //! \see getPluginCudnnHandle(void* executionContextIdentifier)
+    //! \see getPluginCublasHandle(void* excecutionContextIdentifier)
+    //!
+    //! \note In the automotive safety context, the cuDNN and cuBLAS parameters will be nullptr because cuDNN and cuBLAS
+    //!       are not used by the safe runtime.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin.
+    //!
+    virtual void attachToContext(
+        cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, IGpuAllocator* /*allocator*/) noexcept
+    {
+    }
+
+    //!
+    //! \brief Detach the plugin object from its execution context.
+    //!
+    //! This function is called automatically for each plugin when an execution context is destroyed or the context
+    //! resources are unassigned from the context.
+    //!
+    //! If the plugin owns per-context resource, it can be released here.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin.
+    //!
+    virtual void detachFromContext() noexcept {}
+
+    //!
+    //! \brief Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin
+    //! object with these parameters. If the source plugin is pre-configured with configurePlugin(), the returned object
+    //! must also be pre-configured. The returned object must allow attachToContext() with a new execution context.
+    //! Cloned plugin objects can share the same per-engine immutable resource (e.g. weights) with the source object
+    //! (e.g. via ref-counting) to avoid duplication.
+    //!
+    //! \return A pointer to a cloned plugin object if cloning was successful, otherwise nullptr.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin.
+    //!
+    IPluginV2Ext* clone() const noexcept override = 0;
+
+protected:
+    // @cond SuppressDoxyWarnings
+    IPluginV2Ext(IPluginV2Ext const&) = default;
+    IPluginV2Ext(IPluginV2Ext&&) = default;
+    IPluginV2Ext& operator=(IPluginV2Ext const&) & = default;
+    IPluginV2Ext& operator=(IPluginV2Ext&&) & = default;
+// @endcond
+
+    //!
+    //! \brief Return the API version with which this plugin was built. The
+    //!  upper byte reserved by TensorRT and is used to differentiate this from IPluginV2.
+    //!
+    //! \return In the lower three bytes, the TensorRT version in the format
+    //!         (major * 100 + minor) * 100 + patch.
+    //!         In the upper byte, the value 1.
+    //!
+    //! Do not override this method as it is used by the TensorRT library to maintain backwards-compatibility with
+    //! plugins.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, the implementation provided here is safe to call from any thread.
+    //!
+    int32_t getTensorRTVersion() const noexcept override
+    {
+        return static_cast<int32_t>((static_cast<uint32_t>(PluginVersion::kV2_EXT) << 24U)
+            | (static_cast<uint32_t>(NV_TENSORRT_VERSION) & 0xFFFFFFU));
+    }
+
+    //!
+    //! \brief Derived classes must not implement this. In a C++11 API it would be override final.
+    //!
+    //! IPluginV2Ext::configureWithFormat() is a NOP operation for all classes derived from IPluginV2Ext.
+    //! These classes call configurePlugin() instead.
+    //!
+    void configureWithFormat(Dims const* /*inputDims*/, int32_t /*nbInputs*/, Dims const* /*outputDims*/,
+        int32_t /*nbOutputs*/, DataType /*type*/, PluginFormat /*format*/, int32_t /*maxBatchSize*/) noexcept override
+    {
+    }
+};
+
+//!
+//! \class IPluginV2IOExt
+//!
+//! \brief Plugin class for user-implemented layers.
+//!
+//! Plugins are a mechanism for applications to implement custom layers. This interface provides additional
+//! capabilities to the IPluginV2Ext interface by extending different I/O data types and tensor formats.
+//!
+//! \see IPluginV2Ext
+//!
+//! \deprecated Deprecated in TensorRT 10.0. Implement IPluginV3 instead.
+//!
+class TRT_DEPRECATED IPluginV2IOExt : public IPluginV2Ext
+{
+public:
+    //!
+    //! \brief Configure the layer.
+    //!
+    //! This function is called by the builder prior to initialize(). It provides an opportunity for the layer to make
+    //! algorithm choices on the basis of the provided I/O PluginTensorDesc.
+    //!
+    //! \param in The input tensors attributes that are used for configuration.
+    //! \param nbInput Number of input tensors.
+    //! \param out The output tensors attributes that are used for configuration.
+    //! \param nbOutput Number of output tensors.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin. However, TensorRT
+    //!                  will not call this method from two threads simultaneously on a given clone of a plugin.
+    //!
+    virtual void configurePlugin(
+        PluginTensorDesc const* in, int32_t nbInput, PluginTensorDesc const* out, int32_t nbOutput) noexcept
+        = 0;
+
+    //!
+    //! \brief Return true if plugin supports the format and datatype for the input/output indexed by pos.
+    //!
+    //! For this method inputs are numbered 0..(nbInputs-1) and outputs are numbered nbInputs..(nbInputs+nbOutputs-1).
+    //! Using this numbering, pos is an index into InOut, where 0 <= pos < nbInputs+nbOutputs.
+    //!
+    //! TensorRT invokes this method to ask if the input/output indexed by pos supports the format/datatype specified
+    //! by inOut[pos].format and inOut[pos].type. The override must return true if that format/datatype at inOut[pos]
+    //! are supported by the plugin. If support is conditional on other input/output formats/datatypes, the plugin can
+    //! make its result conditional on the formats/datatypes in inOut[0..pos-1], which will be set to values
+    //! that the plugin supports. The override must not inspect inOut[pos+1..nbInputs+nbOutputs-1],
+    //! which will have invalid values.  In other words, the decision for pos must be based on inOut[0..pos] only.
+    //!
+    //! Some examples:
+    //!
+    //! * A definition for a plugin that supports only FP16 NCHW:
+    //!
+    //!         return inOut.format[pos] == TensorFormat::kLINEAR && inOut.type[pos] == DataType::kHALF;
+    //!
+    //! * A definition for a plugin that supports only FP16 NCHW for its two inputs,
+    //!   and FP32 NCHW for its single output:
+    //!
+    //!         return inOut.format[pos] == TensorFormat::kLINEAR &&
+    //!                (inOut.type[pos] == (pos < 2 ?  DataType::kHALF : DataType::kFLOAT));
+    //!
+    //! * A definition for a "polymorphic" plugin with two inputs and one output that supports
+    //!   any format or type, but the inputs and output must have the same format and type:
+    //!
+    //!         return pos == 0 || (inOut.format[pos] == inOut.format[0] && inOut.type[pos] == inOut.type[0]);
+    //!
+    //! Warning: TensorRT will stop asking for formats once it finds kFORMAT_COMBINATION_LIMIT on combinations.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin.
+    //!
+    virtual bool supportsFormatCombination(
+        int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) const noexcept
+        = 0;
+
+    // @cond SuppressDoxyWarnings
+    IPluginV2IOExt() = default;
+    ~IPluginV2IOExt() override = default;
+// @endcond
+
+protected:
+// @cond SuppressDoxyWarnings
+    IPluginV2IOExt(IPluginV2IOExt const&) = default;
+    IPluginV2IOExt(IPluginV2IOExt&&) = default;
+    IPluginV2IOExt& operator=(IPluginV2IOExt const&) & = default;
+    IPluginV2IOExt& operator=(IPluginV2IOExt&&) & = default;
+// @endcond
+
+    //!
+    //! \brief Return the API version with which this plugin was built. The upper byte is reserved by TensorRT and is
+    //! used to differentiate this from IPluginV2 and IPluginV2Ext.
+    //!
+    //! Do not override this method as it is used by the TensorRT library to maintain backwards-compatibility with
+    //! plugins.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, the implementation provided here is safe to call from any thread.
+    //!
+    int32_t getTensorRTVersion() const noexcept override
+    {
+        return static_cast<int32_t>((static_cast<uint32_t>(PluginVersion::kV2_IOEXT) << 24U)
+            | (static_cast<uint32_t>(NV_TENSORRT_VERSION) & 0xFFFFFFU));
+    }
+
+private:
+    // Following are obsolete base class methods, and must not be implemented or used.
+
+    //!
+    //! \brief Set plugin configuration.
+    //!
+    void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
+        bool const*, PluginFormat, int32_t) noexcept final
+    {
+    }
+
+    //!
+    //! \brief Check if provided data type is supported.
+    //!
+    bool supportsFormat(DataType, PluginFormat) const noexcept final
+    {
+        return false;
+    }
+};
+
+namespace v_1_0
+{
+class TRT_DEPRECATED IPluginCreator : public IPluginCreatorInterface
+{
+public:
+    //!
+    //! \brief Return the plugin name.
+    //!
+    //! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including
+    //! the NULL terminator.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin or when deserializing
+    //!                  multiple engines concurrently sharing plugins.
+    //!
+    virtual AsciiChar const* getPluginName() const noexcept = 0;
+
+    //!
+    //! \brief Return the plugin version.
+    //!
+    //! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including
+    //! the NULL terminator.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin or when deserializing
+    //!                  multiple engines concurrently sharing plugins.
+    //!
+    virtual AsciiChar const* getPluginVersion() const noexcept = 0;
+
+    //!
+    //! \brief Return a list of fields that need to be passed to createPlugin.
+    //!
+    //! \see PluginFieldCollection
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin or when deserializing
+    //!                  multiple engines concurrently sharing plugins.
+    //!
+    virtual PluginFieldCollection const* getFieldNames() noexcept = 0;
+
+    //!
+    //! \brief Return a plugin object. Return nullptr in case of error.
+    //!
+    //! \param name A NULL-terminated name string of length 1024 or less, including the NULL terminator.
+    //! \param fc A pointer to a collection of fields needed for constructing the plugin.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin or when deserializing
+    //!                  multiple engines concurrently sharing plugins.
+    //!
+    virtual IPluginV2* createPlugin(AsciiChar const* name, PluginFieldCollection const* fc) noexcept = 0;
+
+    //!
+    //! \brief Called during deserialization of plugin layer. Return a plugin object.
+    //!
+    //! \param name A NULL-terminated name string of length 1024 or less, including the NULL terminator.
+    //! \param serialData The start address of a byte array with the serialized plugin representation.
+    //! \param serialLength The length in bytes of the byte array with the serialized plugin representation.
+    //!
+    //! \return A deserialized plugin object
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin or when deserializing
+    //!                  multiple engines concurrently sharing plugins.
+    //!
+    virtual IPluginV2* deserializePlugin(AsciiChar const* name, void const* serialData, size_t serialLength) noexcept
+        = 0;
+
+    //!
+    //! \brief Set the namespace of the plugin creator based on the plugin
+    //! library it belongs to. This can be set while registering the plugin creator.
+    //!
+    //! \param pluginNamespace A NULL-terminated namespace string of length 1024 or less, including the NULL terminator
+    //!
+    //! \see IPluginRegistry::registerCreator()
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin or when deserializing
+    //!                  multiple engines concurrently sharing plugins.
+    //!
+    virtual void setPluginNamespace(AsciiChar const* pluginNamespace) noexcept = 0;
+
+    //!
+    //! \brief Return the namespace of the plugin creator object.
+    //!
+    //! \warning The string returned must be NULL-terminated and have a length of 1024 bytes or less including the
+    //! NULL terminator.
+    //!
+    //! \usage
+    //! - Allowed context for the API call
+    //!   - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads
+    //!                  when building networks on multiple devices sharing the same plugin or when deserializing
+    //!                  multiple engines concurrently sharing plugins.
+    //!
+    virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
+
+    IPluginCreator() = default;
+    ~IPluginCreator() override = default;
+
+protected:
+    // @cond SuppressDoxyWarnings
+    IPluginCreator(IPluginCreator const&) = default;
+    IPluginCreator(IPluginCreator&&) = default;
+    IPluginCreator& operator=(IPluginCreator const&) & = default;
+    IPluginCreator& operator=(IPluginCreator&&) & = default;
+    // @endcond
+public:
+    //!
+    //! \brief Return version information associated with this interface. Applications must not override this method.
+    //!
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"PLUGIN CREATOR_V1", 1, 0};
+    }
+};
+} // namespace v_1_0
+
+//!
+//! \class IPluginCreator
+//!
+//! \brief Plugin creator class for user implemented layers.
+//!
+//! \see IPlugin and IPluginFactory
+//!
+//! \deprecated Deprecated in TensorRT 10.0. Please implement IPluginCreatorV3One
+//! along with IPluginV3 plugins instead.
+//!
+using IPluginCreator = v_1_0::IPluginCreator;
+
+} // namespace nvinfer1
+
+#endif // NV_INFER_RUNTIME_PLUGIN_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferVersion.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferVersion.h
new file mode 100644
index 0000000000000000000000000000000000000000..82700373bb61437f09026b4ff333a5ce2824f87d
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvInferVersion.h
@@ -0,0 +1,45 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//!
+//! \file NvInferVersion.h
+//!
+//! Defines the TensorRT version
+//!
+#ifndef NV_INFER_VERSION_H
+#define NV_INFER_VERSION_H
+
+#define TRT_MAJOR_ENTERPRISE 10
+#define TRT_MINOR_ENTERPRISE 13
+#define TRT_PATCH_ENTERPRISE 0
+#define TRT_BUILD_ENTERPRISE 35
+#define NV_TENSORRT_MAJOR TRT_MAJOR_ENTERPRISE //!< TensorRT major version.
+#define NV_TENSORRT_MINOR TRT_MINOR_ENTERPRISE //!< TensorRT minor version.
+#define NV_TENSORRT_PATCH TRT_PATCH_ENTERPRISE //!< TensorRT patch version.
+#define NV_TENSORRT_BUILD TRT_BUILD_ENTERPRISE //!< TensorRT build number.
+
+#define NV_TENSORRT_LWS_MAJOR 0 //!< TensorRT LWS major version.
+#define NV_TENSORRT_LWS_MINOR 0 //!< TensorRT LWS minor version.
+#define NV_TENSORRT_LWS_PATCH 0 //!< TensorRT LWS patch version.
+
+#define NV_TENSORRT_RELEASE_TYPE_EARLY_ACCESS 0         //!< An early access release
+#define NV_TENSORRT_RELEASE_TYPE_RELEASE_CANDIDATE 1    //!< A release candidate
+#define NV_TENSORRT_RELEASE_TYPE_GENERAL_AVAILABILITY 2 //!< A final release
+
+#define NV_TENSORRT_RELEASE_TYPE NV_TENSORRT_RELEASE_TYPE_GENERAL_AVAILABILITY //!< TensorRT release type
+
+#endif // NV_INFER_VERSION_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvOnnxConfig.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvOnnxConfig.h
new file mode 100644
index 0000000000000000000000000000000000000000..8a222aa774d21cedec2a26738ee2e018b45fe6cd
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvOnnxConfig.h
@@ -0,0 +1,198 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NV_OnnxConfig_H
+#define NV_OnnxConfig_H
+
+#include "NvInfer.h"
+
+namespace nvonnxparser
+{
+
+//!
+//! \mainpage
+//!
+//! This is the API documentation for the Configuration Manager for Open Neural Network Exchange (ONNX) Parser for Nvidia TensorRT Inference Engine.
+//! It provides information on individual functions, classes
+//! and methods. Use the index on the left to navigate the documentation.
+//!
+//! Please see the accompanying user guide and samples for higher-level information and general advice on using ONNX Parser and TensorRT.
+//!
+
+//!
+//! \file NvOnnxConfig.h
+//!
+//! This is the API file for the Configuration Manager for ONNX Parser for Nvidia TensorRT.
+//!
+
+//!
+//! \class IOnnxConfig
+//! \brief Configuration Manager Class.
+//!
+class IOnnxConfig
+{
+public:
+    virtual ~IOnnxConfig() noexcept = default;
+    //!
+    //! \typedef Verbosity
+    //!
+    //! \brief Defines Verbosity level.
+    //!
+    typedef int32_t Verbosity;
+
+    //!
+    //! \brief Set the Model Data Type.
+    //!
+    //! Sets the Model DataType, one of the following: float -d 32 (default), half precision -d 16, and int8 -d 8 data
+    //! types.
+    //!
+    //! \see getModelDtype()
+    //!
+    virtual void setModelDtype(const nvinfer1::DataType) noexcept = 0;
+
+    //!
+    //! \brief Get the Model Data Type.
+    //!
+    //! \return the data type of the model.
+    //!
+    //! \see setModelDtype() and DataType
+    //!
+    virtual nvinfer1::DataType getModelDtype() const noexcept = 0;
+
+    //!
+    //! \brief Get the Model FileName.
+    //!
+    //! \return Return the Model Filename, as a null-terminated C-style string.
+    //!
+    //! \see setModelFileName()
+    //!
+    virtual char const* getModelFileName() const noexcept = 0;
+
+    //!
+    //! \brief Set the Model File Name.
+    //!
+    //! The Model File name contains the Network Description in ONNX pb format.
+    //!
+    //! This method copies the name string.
+    //!
+    //! \param onnxFilename The name.
+    //!
+    //! \see getModelFileName()
+    //!
+    virtual void setModelFileName(char const* onnxFilename) noexcept = 0;
+
+    //!
+    //! \brief Get the Verbosity Level.
+    //!
+    //! \return The Verbosity Level.
+    //!
+    //! \see addVerbosity(), reduceVerbosity()
+    //!
+    virtual Verbosity getVerbosityLevel() const noexcept = 0;
+
+    //!
+    //! \brief Increase the Verbosity Level.
+    //!
+    //! \return The Verbosity Level.
+    //!
+    //! \see reduceVerbosity(), setVerbosity(Verbosity)
+    //!
+    virtual void addVerbosity() noexcept = 0;
+
+    //!
+    //! \brief Reduce the Verbosity Level.
+    //!
+    //! \see addVerbosity(), setVerbosity(Verbosity)
+    //!
+    virtual void reduceVerbosity() noexcept = 0;
+
+    //!
+    //! \brief Set to specific verbosity Level.
+    //!
+    //! \see addVerbosity(), reduceVerbosity()
+    //!
+    virtual void setVerbosityLevel(Verbosity) noexcept = 0;
+
+    //!
+    //! \brief Returns the File Name of the Network Description as a Text File.
+    //!
+    //! \return Return the name of the file containing the network description converted to a plain text, used for
+    //! debugging purposes.
+    //!
+    //! \see setTextFilename()
+    //!
+    virtual char const* getTextFileName() const noexcept = 0;
+
+    //!
+    //! \brief Set the File Name of the Network Description as a Text File.
+    //!
+    //! This API allows setting a file name for the network description in plain text, equivalent of the ONNX protobuf.
+    //!
+    //! This method copies the name string.
+    //!
+    //! \param textFileName Name of the file.
+    //!
+    //! \see getTextFilename()
+    //!
+    virtual void setTextFileName(char const* textFileName) noexcept = 0;
+
+    //!
+    //! \brief Get the File Name of the Network Description as a Text File, including the weights.
+    //!
+    //! \return Return the name of the file containing the network description converted to a plain text, used for
+    //! debugging purposes.
+    //!
+    //! \see setFullTextFilename()
+    //!
+    virtual char const* getFullTextFileName() const noexcept = 0;
+
+    //!
+    //! \brief Set the File Name of the Network Description as a Text File, including the weights.
+    //!
+    //! This API allows setting a file name for the network description in plain text, equivalent of the ONNX protobuf.
+    //!
+    //! This method copies the name string.
+    //!
+    //! \param fullTextFileName Name of the file.
+    //!
+    //! \see getFullTextFilename()
+    //!
+    virtual void setFullTextFileName(char const* fullTextFileName) noexcept = 0;
+
+    //!
+    //! \brief Get whether the layer information will be printed.
+    //!
+    //! \return Returns whether the layer information will be printed.
+    //!
+    //! \see setPrintLayerInfo()
+    //!
+    virtual bool getPrintLayerInfo() const noexcept = 0;
+
+    //!
+    //! \brief Set whether the layer information will be printed.
+    //!
+    //! \see getPrintLayerInfo()
+    //!
+    virtual void setPrintLayerInfo(bool) noexcept = 0;
+
+}; // class IOnnxConfig
+
+TENSORRTAPI IOnnxConfig* createONNXConfig();
+
+} // namespace nvonnxparser
+
+#endif
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvOnnxParser.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvOnnxParser.h
new file mode 100644
index 0000000000000000000000000000000000000000..6bbe304c05661f9396c1d659c2009e6a9ce4a2d5
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/include/NvOnnxParser.h
@@ -0,0 +1,645 @@
+/*
+ * Copyright (c) 1993-2024, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NV_ONNX_PARSER_H
+#define NV_ONNX_PARSER_H
+
+#include "NvInfer.h"
+#include <stddef.h>
+#include <string>
+#include <vector>
+
+//!
+//! \file NvOnnxParser.h
+//!
+//! This is the API for the ONNX Parser
+//!
+
+#define NV_ONNX_PARSER_MAJOR 0
+#define NV_ONNX_PARSER_MINOR 1
+#define NV_ONNX_PARSER_PATCH 0
+
+static constexpr int32_t NV_ONNX_PARSER_VERSION
+    = ((NV_ONNX_PARSER_MAJOR * 10000) + (NV_ONNX_PARSER_MINOR * 100) + NV_ONNX_PARSER_PATCH);
+
+//!
+//! \typedef SubGraph_t
+//!
+//! \brief The data structure containing the parsing capability of
+//! a set of nodes in an ONNX graph.
+//!
+typedef std::pair<std::vector<size_t>, bool> SubGraph_t;
+
+//!
+//! \typedef SubGraphCollection_t
+//!
+//! \brief The data structure containing all SubGraph_t partitioned
+//! out of an ONNX graph.
+//!
+typedef std::vector<SubGraph_t> SubGraphCollection_t;
+
+//!
+//! \namespace nvonnxparser
+//!
+//! \brief The TensorRT ONNX parser API namespace
+//!
+namespace nvonnxparser
+{
+
+template <typename T>
+constexpr inline int32_t EnumMax() noexcept;
+
+//!
+//! \enum ErrorCode
+//!
+//! \brief The type of error that the parser or refitter may return
+//!
+enum class ErrorCode : int
+{
+    kSUCCESS = 0,
+    kINTERNAL_ERROR = 1,
+    kMEM_ALLOC_FAILED = 2,
+    kMODEL_DESERIALIZE_FAILED = 3,
+    kINVALID_VALUE = 4,
+    kINVALID_GRAPH = 5,
+    kINVALID_NODE = 6,
+    kUNSUPPORTED_GRAPH = 7,
+    kUNSUPPORTED_NODE = 8,
+    kUNSUPPORTED_NODE_ATTR = 9,
+    kUNSUPPORTED_NODE_INPUT = 10,
+    kUNSUPPORTED_NODE_DATATYPE = 11,
+    kUNSUPPORTED_NODE_DYNAMIC = 12,
+    kUNSUPPORTED_NODE_SHAPE = 13,
+    kREFIT_FAILED = 14
+};
+
+//!
+//! Maximum number of flags in the ErrorCode enum.
+//!
+//! \see ErrorCode
+//!
+template <>
+constexpr inline int32_t EnumMax<ErrorCode>() noexcept
+{
+    return 14;
+}
+
+//!
+//! \brief Represents one or more OnnxParserFlag values using binary OR
+//! operations, e.g., 1U << OnnxParserFlag::kNATIVE_INSTANCENORM
+//!
+//! \see IParser::setFlags() and IParser::getFlags()
+//!
+using OnnxParserFlags = uint32_t;
+
+enum class OnnxParserFlag : int32_t
+{
+    //! Parse the ONNX model into the INetworkDefinition with the intention of using TensorRT's native layer
+    //! implementation over the plugin implementation for InstanceNormalization nodes.
+    //! This flag is required when building version-compatible or hardware-compatible engines.
+    //! This flag is set to be ON by default.
+    kNATIVE_INSTANCENORM = 0,
+    //! Enable UINT8 as a quantization data type and asymmetric quantization with non-zero zero-point values
+    //! in Quantize and Dequantize nodes. This flag is set to be OFF by default.
+    //! The resulting engine must be built targeting DLA version >= 3.16.
+    kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA = 1,
+};
+
+//!
+//! Maximum number of flags in the OnnxParserFlag enum.
+//!
+//! \see OnnxParserFlag
+//!
+template <>
+constexpr inline int32_t EnumMax<OnnxParserFlag>() noexcept
+{
+    return 2;
+}
+
+//!
+//! \class IParserError
+//!
+//! \brief an object containing information about an error
+//!
+class IParserError
+{
+public:
+    //!
+    //!\brief the error code.
+    //!
+    virtual ErrorCode code() const = 0;
+    //!
+    //!\brief description of the error.
+    //!
+    virtual char const* desc() const = 0;
+    //!
+    //!\brief source file in which the error occurred.
+    //!
+    virtual char const* file() const = 0;
+    //!
+    //!\brief source line at which the error occurred.
+    //!
+    virtual int line() const = 0;
+    //!
+    //!\brief source function in which the error occurred.
+    //!
+    virtual char const* func() const = 0;
+    //!
+    //!\brief index of the ONNX model node in which the error occurred.
+    //!
+    virtual int node() const = 0;
+    //!
+    //!\brief name of the node in which the error occurred.
+    //!
+    virtual char const* nodeName() const = 0;
+    //!
+    //!\brief name of the node operation in which the error occurred.
+    //!
+    virtual char const* nodeOperator() const = 0;
+    //!
+    //!\brief A list of the local function names, from the top level down, constituting the current
+    //!             stack trace in which the error occurred. A top-level node that is not inside any
+    //!             local function would return a nullptr.
+    //!
+    virtual char const* const* localFunctionStack() const = 0;
+    //!
+    //!\brief The size of the stack of local functions at the point where the error occurred.
+    //!             A top-level node that is not inside any local function would correspond to
+    //              a stack size of 0.
+    //!
+    virtual int32_t localFunctionStackSize() const = 0;
+
+protected:
+    virtual ~IParserError() {}
+};
+
+//!
+//! \class IParser
+//!
+//! \brief an object for parsing ONNX models into a TensorRT network definition
+//!
+//! \warning If the ONNX model has a graph output with the same name as a graph input,
+//!          the output will be renamed by prepending "__".
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IParser
+{
+public:
+    //!
+    //! \brief Parse a serialized ONNX model into the TensorRT network.
+    //!         This method has very limited diagnostics. If parsing the serialized model
+    //!         fails for any reason (e.g. unsupported IR version, unsupported opset, etc.)
+    //!         it the user responsibility to intercept and report the error.
+    //!         To obtain a better diagnostic, use the parseFromFile method below.
+    //!
+    //! \param serialized_onnx_model Pointer to the serialized ONNX model. Can be freed after this function returns.
+    //! \param serialized_onnx_model_size Size of the serialized ONNX model
+    //!        in bytes
+    //! \param model_path Absolute path to the model file for loading external weights if required
+    //! \return true if the model was parsed successfully
+    //! \see getNbErrors() getError()
+    //!
+    virtual bool parse(
+        void const* serialized_onnx_model, size_t serialized_onnx_model_size, const char* model_path = nullptr) noexcept
+        = 0;
+
+    //!
+    //! \brief Parse an onnx model file, which can be a binary protobuf or a text onnx model
+    //!         calls parse method inside.
+    //!
+    //! \param onnxModelFile name
+    //! \param verbosity Level
+    //!
+    //! \return true if the model was parsed successfully
+    //!
+    //!
+    virtual bool parseFromFile(const char* onnxModelFile, int verbosity) noexcept = 0;
+
+    //!
+    //! [DEPRECATED] Deprecated in TensorRT 10.1. See supportsModelV2.
+    //!
+    //! \brief Check whether TensorRT supports a particular ONNX model.
+    //!        If the function returns True, one can proceed to engine building
+    //!        without having to call \p parse or \p parseFromFile.
+    //!
+    //! \param serialized_onnx_model Pointer to the serialized ONNX model. Can be freed after this function returns.
+    //! \param serialized_onnx_model_size Size of the serialized ONNX model
+    //!        in bytes
+    //! \param sub_graph_collection Container to hold supported subgraphs
+    //! \param model_path Absolute path to the model file for loading external weights if required
+    //! \return true if the model is supported
+    //!
+    TRT_DEPRECATED virtual bool supportsModel(void const* serialized_onnx_model, size_t serialized_onnx_model_size,
+        SubGraphCollection_t& sub_graph_collection, const char* model_path = nullptr) noexcept = 0;
+
+    //!
+    //! [DEPRECATED] Deprecated in TensorRT 10.13. See loadInitializer().
+    //!
+    //!\brief Parse a serialized ONNX model into the TensorRT network
+    //! with consideration of user provided weights
+    //!
+    //! \param serialized_onnx_model Pointer to the serialized ONNX model. Can be freed after this function returns.
+    //! \param serialized_onnx_model_size Size of the serialized ONNX model
+    //!        in bytes
+    //! \return true if the model was parsed successfully
+    //! \see getNbErrors() getError()
+    //!
+    TRT_DEPRECATED virtual bool parseWithWeightDescriptors(
+        void const* serialized_onnx_model, size_t serialized_onnx_model_size) noexcept = 0;
+
+    //!
+    //!\brief Returns whether the specified operator may be supported by the
+    //!         parser.
+    //!
+    //! Note that a result of true does not guarantee that the operator will be
+    //! supported in all cases (i.e., this function may return false-positives).
+    //!
+    //! \param op_name The name of the ONNX operator to check for support
+    //!
+    virtual bool supportsOperator(const char* op_name) const noexcept = 0;
+
+    //!
+    //!\brief Get the number of errors that occurred during prior calls to
+    //!         \p parse
+    //!
+    //! \see getError() clearErrors() IParserError
+    //!
+    virtual int getNbErrors() const noexcept = 0;
+
+    //!
+    //!\brief Get an error that occurred during prior calls to \p parse
+    //!
+    //! \see getNbErrors() clearErrors() IParserError
+    //!
+    virtual IParserError const* getError(int index) const noexcept = 0;
+
+    //!
+    //!\brief Clear errors from prior calls to \p parse
+    //!
+    //! \see getNbErrors() getError() IParserError
+    //!
+    virtual void clearErrors() noexcept = 0;
+
+    virtual ~IParser() noexcept = default;
+
+    //!
+    //! \brief Query the plugin libraries needed to implement operations used by the parser in a version-compatible
+    //! engine.
+    //!
+    //! This provides a list of plugin libraries on the filesystem needed to implement operations
+    //! in the parsed network.  If you are building a version-compatible engine using this network,
+    //! provide this list to IBuilderConfig::setPluginsToSerialize to serialize these plugins along
+    //! with the version-compatible engine, or, if you want to ship these plugin libraries externally
+    //! to the engine, ensure that IPluginRegistry::loadLibrary is used to load these libraries in the
+    //! appropriate runtime before deserializing the corresponding engine.
+    //!
+    //! \param[out] nbPluginLibs Returns the number of plugin libraries in the array, or -1 if there was an error.
+    //! \return Array of `nbPluginLibs` C-strings describing plugin library paths on the filesystem if nbPluginLibs > 0,
+    //! or nullptr otherwise.  This array is owned by the IParser, and the pointers in the array are only valid until
+    //! the next call to parse(), supportsModel(), parseFromFile(), or parseWithWeightDescriptors().
+    //!
+    virtual char const* const* getUsedVCPluginLibraries(int64_t& nbPluginLibs) const noexcept = 0;
+
+    //!
+    //! \brief Set the parser flags.
+    //!
+    //! The flags are listed in the OnnxParserFlag enum.
+    //!
+    //! \param OnnxParserFlags The flags used when parsing an ONNX model.
+    //!
+    //! \note This function will override the previous set flags, rather than bitwise ORing the new flag.
+    //!
+    //! \see getFlags()
+    //!
+    virtual void setFlags(OnnxParserFlags onnxParserFlags) noexcept = 0;
+
+    //!
+    //! \brief Get the parser flags. Defaults to 0.
+    //!
+    //! \return The parser flags as a bitmask.
+    //!
+    //! \see setFlags()
+    //!
+    virtual OnnxParserFlags getFlags() const noexcept = 0;
+
+    //!
+    //! \brief clear a parser flag.
+    //!
+    //! clears the parser flag from the enabled flags.
+    //!
+    //! \see setFlags()
+    //!
+    virtual void clearFlag(OnnxParserFlag onnxParserFlag) noexcept = 0;
+
+    //!
+    //! \brief Set a single parser flag.
+    //!
+    //! Add the input parser flag to the already enabled flags.
+    //!
+    //! \see setFlags()
+    //!
+    virtual void setFlag(OnnxParserFlag onnxParserFlag) noexcept = 0;
+
+    //!
+    //! \brief Returns true if the parser flag is set
+    //!
+    //! \see getFlags()
+    //!
+    //! \return True if flag is set, false if unset.
+    //!
+    virtual bool getFlag(OnnxParserFlag onnxParserFlag) const noexcept = 0;
+
+    //!
+    //!\brief Return the i-th output ITensor object for the ONNX layer "name".
+    //!
+    //! Return the i-th output ITensor object for the ONNX layer "name".
+    //! If "name" is not found or i is out of range, return nullptr.
+    //! In the case of multiple nodes sharing the same name this function will return
+    //! the output tensors of the first instance of the node in the ONNX graph.
+    //!
+    //! \param name The name of the ONNX layer.
+    //!
+    //! \param i The index of the output. i must be in range [0, layer.num_outputs).
+    //!
+    virtual nvinfer1::ITensor const* getLayerOutputTensor(char const* name, int64_t i) noexcept = 0;
+
+    //!
+    //! \brief Check whether TensorRT supports a particular ONNX model.
+    //!            If the function returns True, one can proceed to engine building
+    //!            without having to call \p parse or \p parseFromFile.
+    //!            Results can be queried through \p getNbSubgraphs, \p isSubgraphSupported,
+    //!            \p getSubgraphNodes.
+    //!
+    //! \param serializedOnnxModel Pointer to the serialized ONNX model. Can be freed after this function returns.
+    //! \param serializedOnnxModelSize Size of the serialized ONNX model in bytes
+    //! \param modelPath Absolute path to the model file for loading external weights if required
+    //! \return true if the model is supported
+    //!
+    virtual bool supportsModelV2(
+        void const* serializedOnnxModel, size_t serializedOnnxModelSize, char const* modelPath = nullptr) noexcept = 0;
+
+    //!
+    //! \brief Get the number of subgraphs. Calling this function before calling \p supportsModelV2 results in undefined
+    //! behavior.
+    //!
+    //!
+    //! \return Number of subgraphs.
+    //!
+    virtual int64_t getNbSubgraphs() noexcept = 0;
+
+    //!
+    //! \brief Returns whether the subgraph is supported. Calling this function before calling \p supportsModelV2
+    //! results in undefined behavior.
+    //!
+    //!
+    //! \param index Index of the subgraph.
+    //! \return Whether the subgraph is supported.
+    //!
+    virtual bool isSubgraphSupported(int64_t const index) noexcept = 0;
+
+    //!
+    //! \brief Get the nodes of the specified subgraph. Calling this function before calling \p supportsModelV2 results
+    //! in undefined behavior.
+    //!
+    //!
+    //! \param index Index of the subgraph.
+    //! \param subgraphLength Returns the length of the subgraph as reference.
+    //!
+    //! \return Pointer to the subgraph nodes array. This pointer is owned by the Parser.
+    //!
+    virtual int64_t* getSubgraphNodes(int64_t const index, int64_t& subgraphLength) noexcept = 0;
+
+    //!
+    //! \brief Load a serialized ONNX model into the parser. Unlike the parse(), parseFromFile(), or
+    //! parseWithWeightDescriptors() functions, this function does not immediately convert the model into a TensorRT
+    //! INetworkDefinition. Using this function allows users to provide their own initializers for the ONNX model
+    //! through the loadInitializer() function.
+    //!
+    //! Only one model can be loaded at a time. Subsequent calls to loadModelProto() will result in an error.
+    //!
+    //! To begin the conversion of the model into a TensorRT INetworkDefinition, use parseModelProto().
+    //!
+    //! \param serializedOnnxModel Pointer to the serialized ONNX model. Can be freed after this function returns.
+    //! \param serializedOnnxModelSize Size of the serialized ONNX model in bytes.
+    //! \param modelPath Absolute path to the model file for loading external weights if required.
+    //! \return true if the model was loaded successfully
+    //! \see getNbErrors() getError()
+    //!
+    virtual bool loadModelProto(
+        void const* serializedOnnxModel, size_t serializedOnnxModelSize, char const* modelPath = nullptr) noexcept = 0;
+
+    //!
+    //! \brief Prompt the ONNX parser to load an initializer with user-provided binary data.
+    //! The lifetime of the data must exceed the lifetime of the parser.
+    //!
+    //! All user-provided initializers must be provided prior to calling refitModelProto().
+    //!
+    //! This function can be called multiple times to specify the names of multiple initializers.
+    //!
+    //! Calling this function with an initializer previously specified will overwrite the previous instance.
+    //!
+    //!
+    //! This function will return false if initializer validation fails. Possible validation errors are:
+    //! * This function was called prior to loadModelProto().
+    //! * The requested initializer was not found in the model.
+    //! * The size of the data provided is different from the corresponding initializer in the model.
+    //!
+    //! \param name Name of the initializer.
+    //! \param data Binary data containing the values of the initializer.
+    //! \param size Size of the initializer in bytes.
+    //! \return true if the initializer was loaded successfully
+    //! \see loadModelProto()
+    //!
+    virtual bool loadInitializer(char const* name, void const* data, size_t size) noexcept = 0;
+
+    //! \brief Begin the parsing and conversion process of the loaded ONNX model into a TensorRT INetworkDefinition.
+    //!
+    //! \return true if conversion was successful
+    //! \see getNbErrors() getError() loadModelProto() loadModelProtoFromFile()
+    //!
+    virtual bool parseModelProto() noexcept = 0;
+};
+
+//!
+//! \class IParserRefitter
+//!
+//! \brief An interface designed to refit weights from an ONNX model.
+//!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
+class IParserRefitter
+{
+public:
+    //!
+    //! \brief Load a serialized ONNX model from memory and perform weight refit.
+    //!
+    //! \param serializedOnnxModel Pointer to the serialized ONNX model
+    //! \param serializedOnnxModelSize Size of the serialized ONNX model
+    //!        in bytes
+    //! \param modelPath Absolute path to the model file for loading external weights if required
+    //! \return true if all the weights in the engine were refit successfully.
+    //!
+    //! The serialized ONNX model must be identical to the one used to generate the engine
+    //! that will be refit.
+    //!
+    virtual bool refitFromBytes(
+        void const* serializedOnnxModel, size_t serializedOnnxModelSize, char const* modelPath = nullptr) noexcept
+        = 0;
+
+    //!
+    //! \brief Load and parse a ONNX model from disk and perform weight refit.
+    //!
+    //! \param onnxModelFile Path to the ONNX model to load from disk.
+    //!
+    //! \return true if the model was loaded successfully, and if all the weights in the engine were refit successfully.
+    //!
+    //! The provided ONNX model must be identical to the one used to generate the engine
+    //! that will be refit.
+    //!
+    virtual bool refitFromFile(char const* onnxModelFile) noexcept = 0;
+
+    //!
+    //!\brief Get the number of errors that occurred during prior calls to \p refitFromBytes or \p refitFromFile
+    //!
+    //! \see getError() IParserError
+    //!
+    virtual int32_t getNbErrors() const noexcept = 0;
+
+    //!
+    //!\brief Get an error that occurred during prior calls to \p refitFromBytes or \p refitFromFile
+    //!
+    //! \see getNbErrors() IParserError
+    //!
+    virtual IParserError const* getError(int32_t index) const noexcept = 0;
+
+    //!
+    //!\brief Clear errors from prior calls to \p refitFromBytes or \p refitFromFile
+    //!
+    //! \see getNbErrors() getError() IParserError
+    //!
+    virtual void clearErrors() = 0;
+
+    virtual ~IParserRefitter() noexcept = default;
+
+    //!
+    //! \brief Load a serialized ONNX model into the parser. Unlike the refit(), or refitFromFile()
+    //! functions, this function does not immediately begin the refit process. Using this function
+    //! allows users to provide their own initializers for the ONNX model through the loadInitializer() function.
+    //!
+    //! Only one model can be loaded at a time. Subsequent calls to loadModelProto() will result in an error.
+    //!
+    //! To begin the refit process, use refitModelProto().
+    //!
+    //! \param serializedOnnxModel Pointer to the serialized ONNX model. Can be freed after this function returns.
+    //! \param serializedOnnxModelSize Size of the serialized ONNX model in bytes.
+    //! \param modelPath Absolute path to the model file for loading external weights if required.
+    //! \return true if the model was loaded successfully
+    //! \see getNbErrors() getError()
+    //!
+    virtual bool loadModelProto(
+        void const* serializedOnnxModel, size_t serializedOnnxModelSize, char const* modelPath = nullptr) noexcept = 0;
+
+    //!
+    //! \brief Prompt the ONNX refitter to load an initializer with user-provided binary data.
+    //! The lifetime of the data must exceed the lifetime of the refitter.
+    //!
+    //! All user-provided initializers must be provided prior to calling refitModelProto().
+    //!
+    //! This function can be called multiple times to specify the names of multiple initializers.
+    //!
+    //! Calling this function with an initializer previously specified will overwrite the previous instance.
+    //!
+    //! This function will return false if initializer validation fails. Possible validation errors are:
+    //! * This function was called prior to loadModelProto()
+    //! * The requested initializer was not found in the model.
+    //! * The size of the data provided is different from the corresponding initializer in the model.
+    //!
+    //! \param name Name of the initializer.
+    //! \param data Binary data containing the values of the initializer.
+    //! \param size Size of the initializer in bytes.
+    //! \return true if the initializer was loaded successfully
+    //! \see loadModelProto()
+    //!
+    virtual bool loadInitializer(char const* name, void const* data, size_t size) noexcept = 0;
+
+    //! \brief Begin the refit process from the loaded ONNX model.
+    //!
+    //! \return true if refit was successful
+    //! \see getNbErrors() getError() loadModelProto()
+    //!
+    virtual bool refitModelProto() noexcept = 0;
+};
+
+} // namespace nvonnxparser
+
+extern "C" TENSORRTAPI void* createNvOnnxParser_INTERNAL(void* network, void* logger, int version) noexcept;
+extern "C" TENSORRTAPI void* createNvOnnxParserRefitter_INTERNAL(
+    void* refitter, void* logger, int32_t version) noexcept;
+extern "C" TENSORRTAPI int getNvOnnxParserVersion() noexcept;
+
+namespace nvonnxparser
+{
+
+namespace
+{
+
+//!
+//! \brief Create a new parser object
+//!
+//! \param network The network definition that the parser will write to
+//! \param logger The logger to use
+//! \return a new parser object or NULL if an error occurred
+//!
+//! Any input dimensions that are constant should not be changed after parsing,
+//! because correctness of the translation may rely on those constants.
+//! Changing a dynamic input dimension, i.e. one that translates to -1 in
+//! TensorRT, to a constant is okay if the constant is consistent with the model.
+//! Each instance of the parser is designed to only parse one ONNX model once.
+//!
+//! \see IParser
+//!
+inline IParser* createParser(nvinfer1::INetworkDefinition& network, nvinfer1::ILogger& logger) noexcept
+{
+    return static_cast<IParser*>(createNvOnnxParser_INTERNAL(&network, &logger, NV_ONNX_PARSER_VERSION));
+}
+
+//!
+//! \brief Create a new ONNX refitter object
+//!
+//! \param refitter The Refitter object used to refit the model
+//! \param logger The logger to use
+//! \return a new ParserRefitter object or NULL if an error occurred
+//!
+//! \see IParserRefitter
+//!
+inline IParserRefitter* createParserRefitter(nvinfer1::IRefitter& refitter, nvinfer1::ILogger& logger) noexcept
+{
+    return static_cast<IParserRefitter*>(
+        createNvOnnxParserRefitter_INTERNAL(&refitter, &logger, NV_ONNX_PARSER_VERSION));
+}
+
+} // namespace
+
+} // namespace nvonnxparser
+
+#endif // NV_ONNX_PARSER_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/include/impl/NvInferPythonPlugin.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/include/impl/NvInferPythonPlugin.h
new file mode 100644
index 0000000000000000000000000000000000000000..d703ba52cf9865b813b360ac05eb63acef06fc69
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/include/impl/NvInferPythonPlugin.h
@@ -0,0 +1,595 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TRT_PYTHON_IMPL_PLUGIN_H
+#define TRT_PYTHON_IMPL_PLUGIN_H
+
+#include "NvInfer.h"
+
+//!
+//! \file NvInferPythonPlugin.h
+//!
+//! This file contains definitions for supporting the `tensorrt.plugin` Python module
+//!
+//! \warning None of the defintions here are part of the TensorRT C++ API and may not follow semantic versioning rules.
+//! TensorRT clients must not utilize them directly.
+//!
+
+namespace nvinfer1
+{
+
+//! \enum PluginArgType
+//! \brief Numeric type of an extra kernel input argument in an AOT Python plugin
+enum class PluginArgType : int32_t
+{
+    //! Integer argument
+    kINT = 0,
+};
+
+//! \enum PluginArgDataType
+//! \brief Data type of an extra kernel input argument in an AOT Python plugin
+enum class PluginArgDataType : int32_t
+{
+    //! 8-bit signed integer
+    kINT8 = 0,
+    //! 16-bit signed integer
+    kINT16 = 1,
+    //! 32-bit signed integer
+    kINT32 = 2,
+};
+//! \class ISymExpr
+//! \brief Generic interface for a scalar symbolic expression implementable by a Python plugin / TensorRT Python backend
+class ISymExpr
+{
+public:
+    //! \brief Get the type of the symbolic expression
+    virtual PluginArgType getType() const noexcept = 0;
+    //! \brief Get the data type of the symbolic expression
+    virtual PluginArgDataType getDataType() const noexcept = 0;
+    //! \brief Underlying symbolic expression
+    virtual void* getExpr() noexcept = 0;
+};
+
+//! Impl class for ISymExprs
+class ISymExprsImpl
+{
+public:
+    virtual ISymExpr* getSymExpr(int32_t index) const noexcept = 0;
+    virtual bool setSymExpr(int32_t index, ISymExpr* symExpr) noexcept = 0;
+    virtual int32_t getNbSymExprs() const noexcept = 0;
+    virtual bool setNbSymExprs(int32_t count) noexcept = 0;
+
+    virtual ~ISymExprsImpl() noexcept = default;
+};
+
+//! \class ISymExprs
+//! \brief Allows for a sequence of symbolic expressions to be communicated to the TensorRT backend
+//! \note Clients must not implement this class.
+//! \see ISymExpr
+class ISymExprs
+{
+public:
+    //! \brief Get the symbolic expression at the given index
+    //! \return A pointer to the symbolic expression or nullptr if the index is out of range
+    ISymExpr* getSymExpr(int32_t index) const noexcept
+    {
+        return mImpl->getSymExpr(index);
+    }
+
+    //! \brief Set the symbolic expression at the given index
+    //! \return true if the index is in range and the symbolic expression was set successfully, false otherwise
+    bool setSymExpr(int32_t index, ISymExpr* symExpr) noexcept
+    {
+        return mImpl->setSymExpr(index, symExpr);
+    }
+
+    //! \brief Get the number of symbolic expressions
+    int32_t getNbSymExprs() const noexcept
+    {
+        return mImpl->getNbSymExprs();
+    }
+
+    //! \brief Set the number of symbolic expressions
+    //! \return true if the number of symbolic expressions was set successfully, false otherwise
+    bool setNbSymExprs(int32_t count) noexcept
+    {
+        return mImpl->setNbSymExprs(count);
+    }
+
+protected:
+    ISymExprsImpl* mImpl{nullptr};
+    virtual ~ISymExprs() noexcept = default;
+};
+
+
+//! \enum QuickPluginCreationRequest
+//! \brief Communicates preference when a quickly deployable plugin is to be added to the network
+enum class QuickPluginCreationRequest : int32_t
+{
+    //! No preference specified
+    kUNKNOWN = 0,
+    //! JIT plugin is preferred
+    kPREFER_JIT = 1,
+    //! AOT plugin is preferred
+    kPREFER_AOT = 2,
+    //! JIT plugin must be used. TensorRT should fail if a JIT implementation cannot be found.
+    kSTRICT_JIT = 3,
+    //! AOT plugin must be used. TensorRT should fail if an AOT implementation cannot be found.
+    kSTRICT_AOT = 4,
+};
+
+//! Impl class for IKernelLaunchParams
+class IKernelLaunchParamsImpl
+{
+public:
+    virtual ISymExpr* getGridX() noexcept = 0;
+    virtual bool setGridX(ISymExpr* gridX) noexcept = 0;
+
+    virtual ISymExpr* getGridY() noexcept = 0;
+    virtual bool setGridY(ISymExpr* gridY) noexcept = 0;
+
+    virtual ISymExpr* getGridZ() noexcept = 0;
+    virtual bool setGridZ(ISymExpr* gridZ) noexcept = 0;
+
+    virtual ISymExpr* getBlockX() noexcept = 0;
+    virtual bool setBlockX(ISymExpr* blockX) noexcept = 0;
+
+    virtual ISymExpr* getBlockY() noexcept = 0;
+    virtual bool setBlockY(ISymExpr* blockY) noexcept = 0;
+
+    virtual ISymExpr* getBlockZ() noexcept = 0;
+    virtual bool setBlockZ(ISymExpr* blockZ) noexcept = 0;
+
+    virtual ISymExpr* getSharedMem() noexcept = 0;
+    virtual bool setSharedMem(ISymExpr* sharedMem) noexcept = 0;
+
+    virtual ~IKernelLaunchParamsImpl() noexcept = default;
+};
+
+//! \class IKernelLaunchParams
+//! \brief Allows for kernel launch parameters to be communicated to the TensorRT backend
+//! \note Clients must not implement this class.
+class IKernelLaunchParams
+{
+public:
+    //! Get the X dimension of the grid
+    ISymExpr* getGridX() noexcept
+    {
+        return mImpl->getGridX();
+    }
+
+    //! \brief Set the X dimension of the grid
+    //! \return true if the grid's X dimension was set successfully, false otherwise
+    bool setGridX(ISymExpr* gridX) noexcept
+    {
+        return mImpl->setGridX(gridX);
+    }
+
+    //! Get the Y dimension of the grid
+    ISymExpr* getGridY() noexcept
+    {
+        return mImpl->getGridY();
+    }
+
+    //! \brief Set the Y dimension of the grid
+    //! \return true if the grid's Y dimension was set successfully, false otherwise
+    bool setGridY(ISymExpr* gridY) noexcept
+    {
+        return mImpl->setGridY(gridY);
+    }
+
+    //! Get the Z dimension of the grid
+    ISymExpr* getGridZ() noexcept
+    {
+        return mImpl->getGridZ();
+    }
+
+    //! \brief Set the Z dimension of the grid
+    //! \return true if the grid's Z dimension was set successfully, false otherwise
+    bool setGridZ(ISymExpr* gridZ) noexcept
+    {
+        return mImpl->setGridZ(gridZ);
+    }
+
+    //! \brief Get the X dimension of each thread block
+    ISymExpr* getBlockX() noexcept
+    {
+        return mImpl->getBlockX();
+    }
+
+    //! \brief Set the X dimension of each thread block
+    //! \return true if each thread block's X dimension was set successfully, false otherwise
+    bool setBlockX(ISymExpr* blockX) noexcept
+    {
+        return mImpl->setBlockX(blockX);
+    }
+
+    //! \brief Get the Y dimension of each thread block
+    ISymExpr* getBlockY() noexcept
+    {
+        return mImpl->getBlockY();
+    }
+
+    //! \brief Set the Y dimension of each thread block
+    //! \return true if each thread block's Y dimension was set successfully, false otherwise
+    bool setBlockY(ISymExpr* blockY) noexcept
+    {
+        return mImpl->setBlockY(blockY);
+    }
+
+    //! \brief Get the Z dimension of each thread block
+    ISymExpr* getBlockZ() noexcept
+    {
+        return mImpl->getBlockZ();
+    }
+
+    //! \brief Set the Z dimension of each thread block
+    //! \return true if each thread block's Z dimension was set successfully, false otherwise
+    bool setBlockZ(ISymExpr* blockZ) noexcept
+    {
+        return mImpl->setBlockZ(blockZ);
+    }
+
+    //! \brief Get the dynamic shared-memory per thread block in bytes
+    ISymExpr* getSharedMem() noexcept
+    {
+        return mImpl->getSharedMem();
+    }
+
+    //! \brief Set the dynamic shared-memory per thread block in bytes
+    //! \return true if the dynamic shared-memory per thread block was set successfully, false otherwise
+    bool setSharedMem(ISymExpr* sharedMem) noexcept
+    {
+        return mImpl->setSharedMem(sharedMem);
+    }
+
+protected:
+    IKernelLaunchParamsImpl* mImpl{nullptr};
+    virtual ~IKernelLaunchParams() noexcept = default;
+};
+
+namespace v_1_0
+{
+
+class IPluginV3QuickCore : public IPluginCapability
+{
+public:
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"PLUGIN_V3QUICK_CORE", 1, 0};
+    }
+
+    virtual AsciiChar const* getPluginName() const noexcept = 0;
+
+    virtual AsciiChar const* getPluginVersion() const noexcept = 0;
+
+    virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
+};
+
+class IPluginV3QuickBuild : public IPluginCapability
+{
+public:
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"PLUGIN_V3QUICK_BUILD", 1, 0};
+    }
+
+    //!
+    //! \brief Provide the data types of the plugin outputs if the input tensors have the data types provided.
+    //!
+    //! \param outputTypes Pre-allocated array to which the output data types should be written.
+    //! \param nbOutputs The number of output tensors. This matches the value returned from getNbOutputs().
+    //! \param inputTypes The input data types.
+    //! \param inputRanks Ranks of the input tensors
+    //! \param nbInputs The number of input tensors.
+    //!
+    //! \return 0 for success, else non-zero
+    //!
+    virtual int32_t getOutputDataTypes(DataType* outputTypes, int32_t nbOutputs, DataType const* inputTypes,
+        int32_t const* inputRanks, int32_t nbInputs) const noexcept = 0;
+
+    //!
+    //! \brief Provide expressions for computing dimensions of the output tensors from dimensions of the input tensors.
+    //!
+    //! \param inputs Expressions for dimensions of the input tensors
+    //! \param nbInputs The number of input tensors
+    //! \param shapeInputs Expressions for values of the shape tensor inputs
+    //! \param nbShapeInputs The number of shape tensor inputs
+    //! \param outputs Pre-allocated array to which the output dimensions must be written
+    //! \param exprBuilder Object for generating new dimension expressions
+    //!
+    //! \return 0 for success, else non-zero
+    //!
+    virtual int32_t getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs,
+        int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept = 0;
+
+    //!
+    //! \brief Configure the plugin. Behaves similarly to `IPluginV3OneBuild::configurePlugin()`
+    //!
+    //! \return 0 for success, else non-zero
+    //!
+    virtual int32_t configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
+        DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
+
+    //!
+    //! \brief Get number of format combinations supported by the plugin for the I/O characteristics indicated by
+    //! `inOut`.
+    //!
+    virtual int32_t getNbSupportedFormatCombinations(
+        DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
+
+    //!
+    //! \brief Write all format combinations supported by the plugin for the I/O characteristics indicated by `inOut` to
+    //! `supportedCombinations`. It is guaranteed to have sufficient memory allocated for (nbInputs + nbOutputs) *
+    //! getNbSupportedFormatCombinations() `PluginTensorDesc`s.
+    //!
+    //! \return 0 for success, else non-zero
+    //!
+    virtual int32_t getSupportedFormatCombinations(DynamicPluginTensorDesc const* inOut, int32_t nbInputs,
+        int32_t nbOutputs, PluginTensorDesc* supportedCombinations, int32_t nbFormatCombinations) noexcept = 0;
+
+    //!
+    //! \brief Get the number of outputs from the plugin.
+    //!
+    virtual int32_t getNbOutputs() const noexcept = 0;
+
+    //!
+    //! \brief Communicates to TensorRT that the output at the specified output index is aliased to the input at the
+    //! returned index. Behaves similary to `v_2_0::IPluginV3OneBuild.getAliasedInput()`.
+    //!
+    virtual int32_t getAliasedInput(int32_t outputIndex) noexcept
+    {
+        return -1;
+    }
+
+    //!
+    //! \brief Query for any custom tactics that the plugin intends to use specific to the I/O characteristics indicated
+    //! by the immediately preceding call to `configurePlugin()`.
+    //!
+    //! \return 0 for success, else non-zero
+    //!
+    virtual int32_t getValidTactics(int32_t* tactics, int32_t nbTactics) noexcept
+    {
+        return 0;
+    }
+
+    //!
+    //! \brief Query for number of custom tactics related to the `getValidTactics()` call.
+    //!
+    virtual int32_t getNbTactics() noexcept
+    {
+        return 0;
+    }
+
+    //!
+    //! \brief Called to query the suffix to use for the timing cache ID. May be called anytime after plugin creation.
+    //!
+    virtual char const* getTimingCacheID() noexcept
+    {
+        return nullptr;
+    }
+
+    //!
+    //! \brief Query for a string representing the configuration of the plugin. May be called anytime after
+    //! plugin creation.
+    //!
+    virtual char const* getMetadataString() noexcept
+    {
+        return nullptr;
+    }
+};
+
+class IPluginV3QuickAOTBuild : public IPluginV3QuickBuild
+{
+public:
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"PLUGIN_V3QUICKAOT_BUILD", 1, 0};
+    }
+
+    //! \brief Get the launch parameters for the kernel to be used for the specified input and output types/formats and
+    //! any corresponding custom tactics.
+    //!        If custom tactics are being advertised by the plugin, the corresponding tactic is the one specified by
+    //!        the immediately preceding call to setTactic().
+    //!
+    //! \param inputs Expressions for dimensions of the input tensors
+    //! \param inOut The input and output tensors' attributes
+    //! \param nbInputs The number of input tensors
+    //! \param nbOutputs The number of output tensors
+    //! \param launchParams Interface which allows the specification of kernel launch parameters as symbolic expressions
+    //! of the input dimensions
+    //! \param extraArgs Interface which allows the specification of any scalar arguments to be
+    //! passed to the kernel, as symbolic expressions of the input dimensions
+    //! \param exprBuilder Object for generating new symbolic expressions
+    //!
+    //! \return 0 for success, else non-zero
+    //!
+    virtual int32_t getLaunchParams(DimsExprs const* inputs, DynamicPluginTensorDesc const* inOut, int32_t nbInputs,
+        int32_t nbOutputs, IKernelLaunchParams* launchParams, ISymExprs* extraArgs,
+        IExprBuilder& exprBuilder) noexcept = 0;
+
+    //!
+    //! \brief Get the compiled form for the kernel to be used for the specified input and output types/formats and any
+    //! corresponding custom tactics.
+    //!        If custom tactics are being advertised by the plugin, the corresponding tactic is the one specified by
+    //!        the immediately preceding call to setTactic().
+    //!
+    //! \param in The input tensors' attributes that are used for configuration.
+    //! \param nbInputs Number of input tensors.
+    //! \param out The output tensors' attributes that are used for configuration.
+    //! \param nbOutputs Number of output tensors.
+    //! \param kernelName The name for the kernel.
+    //! \param compiledKernel Compiled form of the kernel.
+    //! \param compiledKernelSize The size of the compiled kernel.
+    //!
+    //! \return 0 for success, else non-zero
+    //!
+    virtual int32_t getKernel(PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out,
+        int32_t nbOutputs, const char** kernelName, char** compiledKernel, int32_t* compiledKernelSize) noexcept = 0;
+
+    //!
+    //! \brief Set the tactic to be used in the subsequent call to enqueue(). Behaves similar to
+    //! IPluginV3OneRuntime::setTactic()
+    //!
+    //! \return 0 for success, else non-zero
+    //!
+    virtual int32_t setTactic(int32_t tactic) noexcept
+    {
+        return 0;
+    }
+};
+
+class IPluginV3QuickRuntime : public IPluginCapability
+{
+public:
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"PLUGIN_V3QUICK_RUNTIME", 1, 0};
+    }
+
+    //!
+    //! \brief Set the tactic to be used in the subsequent call to enqueue(). Behaves similar to
+    //! `IPluginV3OneRuntime::setTactic()`.
+    //!
+    //! \return 0 for success, else non-zero
+    //!
+    virtual int32_t setTactic(int32_t tactic) noexcept
+    {
+        return 0;
+    }
+
+    //!
+    //! \brief Execute the plugin.
+    //!
+    //! \param inputDesc how to interpret the memory for the input tensors.
+    //! \param outputDesc how to interpret the memory for the output tensors.
+    //! \param inputs The memory for the input tensors.
+    //! \param inputStrides Strides for input tensors.
+    //! \param outputStrides Strides for output tensors.
+    //! \param outputs The memory for the output tensors.
+    //! \param nbInputs Number of input tensors.
+    //! \param nbOutputs Number of output tensors.
+    //! \param stream The stream in which to execute the kernels.
+    //!
+    //! \return 0 for success, else non-zero
+    //!
+    virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
+        void const* const* inputs, void* const* outputs, Dims const* inputStrides, Dims const* outputStrides,
+        int32_t nbInputs, int32_t nbOutputs, cudaStream_t stream) noexcept = 0;
+
+    //!
+    //! \brief Get the plugin fields which should be serialized.
+    //!
+    virtual PluginFieldCollection const* getFieldsToSerialize() noexcept = 0;
+};
+
+class IPluginCreatorV3Quick : public IPluginCreatorInterface
+{
+public:
+    InterfaceInfo getInterfaceInfo() const noexcept override
+    {
+        return InterfaceInfo{"PLUGIN CREATOR_V3QUICK", 1, 0};
+    }
+
+    //!
+    //! \brief Return a plugin object. Return nullptr in case of error.
+    //!
+    //! \param name A NULL-terminated name string of length 1024 or less, including the NULL terminator.
+    //! \param namespace A NULL-terminated name string of length 1024 or less, including the NULL terminator.
+    //! \param fc A pointer to a collection of fields needed for constructing the plugin.
+    //! \param phase The TensorRT phase in which the plugin is being created
+    //! \param quickPluginCreationRequest Whether a JIT or AOT plugin should be created
+    //!
+    virtual IPluginV3* createPlugin(AsciiChar const* name, AsciiChar const* nspace, PluginFieldCollection const* fc,
+        TensorRTPhase phase, QuickPluginCreationRequest quickPluginCreationRequest) noexcept = 0;
+
+    //!
+    //! \brief Return a list of fields that need to be passed to createPlugin() when creating a plugin for use in the
+    //! TensorRT build phase.
+    //!
+    virtual PluginFieldCollection const* getFieldNames() noexcept = 0;
+
+    virtual AsciiChar const* getPluginName() const noexcept = 0;
+
+    virtual AsciiChar const* getPluginVersion() const noexcept = 0;
+
+    virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
+
+    IPluginCreatorV3Quick() = default;
+    virtual ~IPluginCreatorV3Quick() = default;
+
+protected:
+    IPluginCreatorV3Quick(IPluginCreatorV3Quick const&) = default;
+    IPluginCreatorV3Quick(IPluginCreatorV3Quick&&) = default;
+    IPluginCreatorV3Quick& operator=(IPluginCreatorV3Quick const&) & = default;
+    IPluginCreatorV3Quick& operator=(IPluginCreatorV3Quick&&) & = default;
+};
+
+} // namespace v_1_0
+
+//!
+//! \class IPluginV3QuickCore
+//!
+//! \brief Provides core capability (`IPluginCapability::kCORE`) for quickly-deployable TRT plugins
+//!
+//! \warning This class is strictly for the purpose of supporting quickly-deployable TRT Python plugins and is not part
+//! of the public TensorRT C++ API. Users must not inherit from this class.
+//!
+using IPluginV3QuickCore = v_1_0::IPluginV3QuickCore;
+
+//!
+//! \class IPluginV3QuickBuild
+//!
+//! \brief Provides build capability (`IPluginCapability::kBUILD`) for quickly-deployable TRT plugins
+//!
+//! \warning This class is strictly for the purpose of supporting quickly-deployable TRT Python plugins and is not part
+//! of the public TensorRT C++ API. Users must not inherit from this class.
+//!
+using IPluginV3QuickBuild = v_1_0::IPluginV3QuickBuild;
+
+//!
+//! \class IPluginV3QuickAOTBuild
+//!
+//! \brief Provides additional build capabilities for AOT quickly-deployable TRT plugins. Descends from
+//! IPluginV3QuickBuild.
+//!
+//! \warning This class is strictly for the purpose of supporting quickly-deployable TRT Python plugins and is not part
+//! of the public TensorRT C++ API. Users must not inherit from this class.
+//!
+using IPluginV3QuickAOTBuild = v_1_0::IPluginV3QuickAOTBuild;
+
+//!
+//! \class IPluginV3QuickRuntime
+//!
+//! \brief Provides runtime capability (`IPluginCapability::kRUNTIME`) for JIT quickly-deployable TRT plugins
+//!
+//! \warning This class is strictly for the purpose of supporting quickly-deployable TRT Python plugins and is not part
+//! of the public TensorRT C++ API. Users must not inherit from this class.
+//!
+using IPluginV3QuickRuntime = v_1_0::IPluginV3QuickRuntime;
+
+//!
+//! \class IPluginCreatorV3Quick
+//!
+//! \warning This class is strictly for the purpose of supporting quickly-deployable TRT Python plugins and is not part
+//! of the public TensorRT C++ API. Users must not inherit from this class.
+//!
+using IPluginCreatorV3Quick = v_1_0::IPluginCreatorV3Quick;
+
+} // namespace nvinfer1
+
+#endif // TRT_PYTHON_IMPL_PLUGIN_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp310-none-linux_x86_64.whl b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp310-none-linux_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..bbd08e32fa1f23aa028be087d08bb0d2e856a2f3
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp310-none-linux_x86_64.whl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:976bc26fe4c4ba3a687271a7a81582555b7ec1d4735a17f9f9fa27eaa0ad9eca
+size 1179562
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp311-none-linux_x86_64.whl b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp311-none-linux_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..213253be0977cd41807a8e1d711238e9a02e069c
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp311-none-linux_x86_64.whl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb438c5f2060fb0568f37858c2abdca4c943e4bad7d3b83359f61686b0f698d0
+size 1179959
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp312-none-linux_x86_64.whl b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp312-none-linux_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..8898d48a7b09719d21b10bb5c41121d25e56b39e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp312-none-linux_x86_64.whl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30b8731625971ccc8807da0fee7a76f554f5c180bb6328dc76c22bedc3d85e8d
+size 1182996
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp313-none-linux_x86_64.whl b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp313-none-linux_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..168a7fd264634fde413870062f3f15883436e34d
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp313-none-linux_x86_64.whl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66d71ec2d0cff0b3f4408fc9080a0dc4a33ab6bb98265572c456f789528639de
+size 1182970
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp38-none-linux_x86_64.whl b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp38-none-linux_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..422cc1a0f60065c0c6269017cea3d2b8a762d5b4
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp38-none-linux_x86_64.whl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfdc5830108a0709f78ee8d995da7e0d982f430f7ca8b6e9a11a8324584da689
+size 1177964
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp39-none-linux_x86_64.whl b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp39-none-linux_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..2b5cdbeac6a96218d133a69745abaa337d1e40e8
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt-10.13.0.35-cp39-none-linux_x86_64.whl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:974e29cd1dc7f582ec42783497b921e021c511de3a087e7468749982fcc3289d
+size 1179896
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp310-none-linux_x86_64.whl b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp310-none-linux_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..f5b2ac2725c8d9a482230f2c3ed8f771819e242a
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp310-none-linux_x86_64.whl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9217eb2bc79b9a662b52494a19ad85e7981aa17798c4800349969b1b124c448a
+size 725889
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp311-none-linux_x86_64.whl b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp311-none-linux_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..cce979c893e8a6c4fe7ca5f4c17207bbf00dfa47
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp311-none-linux_x86_64.whl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88c75c1fca690794c32155e4d28c258561cd1172c1a5592fc59614f085f74233
+size 724572
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp312-none-linux_x86_64.whl b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp312-none-linux_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..0135b031940309bc14ece054607f6ad16f3154d2
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp312-none-linux_x86_64.whl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a48f3dc6ac9795b71c858b15cf1f45ddcd5b2775b62965cc73773d7ed79483e
+size 727568
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp313-none-linux_x86_64.whl b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp313-none-linux_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..cb92344c196ff7f1ec49235a9a54f20906df89de
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp313-none-linux_x86_64.whl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56cacce55cdd97252364e67540c69f540ee85a54520fbf4927581314be99e8b2
+size 727593
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp38-none-linux_x86_64.whl b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp38-none-linux_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..ef3f7b8b21a4dc96f4ab7021a30691e001608e10
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp38-none-linux_x86_64.whl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:540ddf51a15c1f6894eabdab1785dfe334d720be59fef1767ad86fec36860bfa
+size 724839
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp39-none-linux_x86_64.whl b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp39-none-linux_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..2ca433059c51eaa3b943a38ba8aed1115e3668eb
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_dispatch-10.13.0.35-cp39-none-linux_x86_64.whl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5222a8b65364650836cccf8c77601c218aad7149810264b251e1dd27a82bc4e
+size 725175
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp310-none-linux_x86_64.whl b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp310-none-linux_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..b1abf79787ce32d3a2563136a0a8cb0583f8e5e1
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp310-none-linux_x86_64.whl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4525afe9c65633f3b84328bca1bed155e10da2e5c1d01b0692d39933cc5fcb84
+size 725741
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp311-none-linux_x86_64.whl b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp311-none-linux_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..3d0244da0ff2a831593041f4616d9067649e96d7
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp311-none-linux_x86_64.whl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1e675039063d43b96d82cb9691289babbebc5998eb6617f88a99f1a583acdfb
+size 724404
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp312-none-linux_x86_64.whl b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp312-none-linux_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..ca5207fc1223baf057bdd2d5f0fb21a28f7bdf18
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp312-none-linux_x86_64.whl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec2b53bd14d9e8f2c20c285921c7da519a843bf677c8b1cce594e653225fb266
+size 727410
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp313-none-linux_x86_64.whl b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp313-none-linux_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..5022ed0a49f10e3c04875e04f2805f31d07bbb39
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp313-none-linux_x86_64.whl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7e86357c08f2b20504855c31bdeb734d7e3d41f86edc500406ef063eb15e57e
+size 727457
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp38-none-linux_x86_64.whl b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp38-none-linux_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..97331a2e8a0e675a0dfdc26216b69a47b949bd16
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp38-none-linux_x86_64.whl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ffcd2dd35ea2eaabcad2d22e94db76de982b96efe39f60f6f6b595ec0f0391af
+size 724692
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp39-none-linux_x86_64.whl b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp39-none-linux_x86_64.whl
new file mode 100644
index 0000000000000000000000000000000000000000..e987762d55aad328b0a9d26bed09d5acd3328a4b
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/python/tensorrt_lean-10.13.0.35-cp39-none-linux_x86_64.whl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0338629a9d0a6fa80d0231f1623d3501e12892d31cb0cc55dda8b994ec4bc128
+size 724994
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/Makefile b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..2e54644af67427734d6106b65956cd6877e443d9
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/Makefile
@@ -0,0 +1,103 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+SHELL = /bin/bash -o pipefail
+TARGET ?= $(shell uname -m)
+LIBDIR ?= lib
+VERBOSE ?= 0
+ifeq ($(VERBOSE), 1)
+  AT =
+else
+  AT = @
+endif
+
+CUDA_TRIPLE = x86_64-linux
+DLSW_TRIPLE = x86_64-linux-gnu
+ifeq ($(TARGET), aarch64)
+  ifeq ($(ARMSERVER), 1)
+    CUDA_TRIPLE = sbsa-linux
+  else
+    CUDA_TRIPLE = aarch64-linux
+  endif
+  DLSW_TRIPLE = aarch64-linux-gnu
+endif
+ifeq ($(TARGET), qnx)
+  CUDA_TRIPLE = aarch64-qnx
+  DLSW_TRIPLE = aarch64-unknown-nto-qnx
+endif
+ifeq ($(TARGET), ppc64le)
+  CUDA_TRIPLE = ppc64le-linux
+  DLSW_TRIPLE = ppc64le-linux
+endif
+export TARGET
+export LIBDIR
+export VERBOSE
+export CUDA_TRIPLE
+export DLSW_TRIPLE
+
+ifeq ($(SAFE_PDK), 1)
+  # Only dlaSafetyRuntime is currently able to execute with safety pdk.
+  samples := dlaSafetyRuntime
+else
+  samples := \
+          sampleCharRNN \
+          sampleDynamicReshape \
+          sampleEditableTimingCache \
+          sampleINT8API \
+          sampleNamedDimensions \
+          sampleOnnxMNIST \
+          sampleOnnxMnistCoordConvAC \
+          sampleProgressMonitor \
+          sampleIOFormats \
+          trtexec
+
+  ifneq ($(TRT_WINML), 1)
+    # TRT_WINML build does not support custom plugins.
+    samples += sampleNonZeroPlugin
+  endif
+
+  ifeq ($(SAFETY_SAMPLE_BUILD), 1)
+    ifneq ($(ABITYPE), qnx-safe)
+      samples += trtSafeExec
+      samples += sampleSafeMNIST
+      samples += sampleSafePluginV3
+    endif
+  endif
+
+  ifeq ($(ENABLE_DLA), 1)
+    samples += sampleCudla
+  endif
+
+  # skip std samples since they have non-safety certified dependencies.
+  ifeq ($(QNX_SAFE_BUILD), 1)
+    samples = trtSafeExec
+    samples += sampleSafeMNIST
+    samples += sampleSafePluginV3
+  endif
+endif
+
+.PHONY: all clean help
+all:
+	$(AT)$(foreach sample, $(samples), $(MAKE) -C $(sample) &&) :
+
+clean:
+	$(AT)$(foreach sample, $(samples), $(MAKE) clean -C $(sample) &&) :
+
+help:
+	$(AT)echo "Sample building help menu."
+	$(AT)echo "Samples:"
+	$(AT)$(foreach sample, $(samples), echo -e "\t$(sample)" &&) :
+	$(AT)echo -e "\nCommands:"
+	$(AT)echo -e "\tall - build all samples."
+	$(AT)echo -e "\tclean - clean all samples."
+	$(AT)echo -e "\nVariables:"
+	$(AT)echo -e "\tTARGET - Specify the target to build for."
+	$(AT)echo -e "\tVERBOSE - Specify verbose output."
+	$(AT)echo -e "\tCUDA_INSTALL_DIR - Directory where cuda installs to."
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/Makefile.config b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/Makefile.config
new file mode 100644
index 0000000000000000000000000000000000000000..a62db463211c8e1d087f70497f50f533abbd3fe8
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/Makefile.config
@@ -0,0 +1,402 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+.SUFFIXES:
+CUDA_TRIPLE ?= x86_64-linux
+DLSW_TRIPLE ?= x86_64-linux-gnu
+PROTOBUF_TRIPLE ?= x86_64-linux-gnu
+SAFE_PDK ?= 0
+CPP_STANDARD ?= 17
+TARGET ?= $(shell uname -m)
+CUSTOM_LD_FLAGS ?=
+USE_STUB_EXTERNALS ?= 0
+TRT_STATIC ?= 0
+GENCODES ?=
+
+ifeq ($(CUDA_INSTALL_DIR), )
+  CUDA_INSTALL_DIR ?= /usr/local/cuda
+  $(warning CUDA_INSTALL_DIR variable is not specified, using $(CUDA_INSTALL_DIR) by default, use CUDA_INSTALL_DIR=<cuda_directory> to change.)
+endif
+
+ifeq ($(ENABLE_DLA), 1)
+  ifneq ($(PDK_DIR), )
+    PDK_LIB_DIR ?= $(PDK_DIR)/lib-target
+    PDK_INC_DIR ?= $(PDK_DIR)/include
+  endif
+  ifeq ($(PDK_LIB_DIR), )
+    PDK_LIB_DIR ?= /usr/lib
+    $(warning DLA is enabled and PDK_LIB_DIR is not specified, using $(PDK_LIB_DIR) by default, use PDK_LIB_DIR=<pdk_directory> to change.)
+  endif
+  ifeq ($(PDK_INC_DIR), )
+    PDK_INC_DIR ?= /usr/include
+    $(warning DLA is enabled and PDK_INC_DIR is not specified, using $(PDK_INC_DIR) by default, use PDK_INC_DIR=<pdk_directory> to change.)
+  endif
+endif
+
+ifeq ($(TRT_LIB_DIR), )
+  TRT_LIB_DIR ?= ../../lib
+  $(warning TRT_LIB_DIR is not specified, searching $(TRT_LIB_DIR), ../../lib, ../lib by default, use TRT_LIB_DIR=<trt_lib_directory> to change.)
+endif
+
+CUDA_LIBDIR = lib
+ifeq ($(TARGET), aarch64)
+  ifeq ($(shell uname -m), aarch64)
+    CUDA_LIBDIR = lib64
+    CC = g++
+  else
+    CC = aarch64-linux-gnu-g++
+  endif
+  CUCC = $(CUDA_INSTALL_DIR)/bin/nvcc -m64 -ccbin $(CC)
+else ifeq ($(TARGET), x86_64)
+  CUDA_LIBDIR = lib64
+  CC = g++
+  CUCC = $(CUDA_INSTALL_DIR)/bin/nvcc -m64
+else ifeq ($(TARGET), ppc64le)
+  CUDA_LIBDIR = lib64
+  CC = g++
+  CUCC = $(CUDA_INSTALL_DIR)/bin/nvcc -m64
+else ifeq ($(TARGET), qnx)
+  ifndef QNX_VERSION
+    $(error QNX_VERSION is not set)
+  endif
+  ifeq ($(USE_QCC), 1)
+    CC = ${QNX_HOST}/usr/bin/q++ -V$(QNX_GCC_VERSION),gcc_ntoaarch64le
+    COMMON_FLAGS += -D_QNX_SOURCE -D__aarch64__ -V$(QNX_GCC_VERSION),gcc_ntoaarch64le
+    COMMON_LD_FLAGS += -D_QNX_SOURCE -D__aarch64__ -V$(QNX_GCC_VERSION),gcc_ntoaarch64le
+    CUCC = $(CUDA_INSTALL_DIR)/bin/nvcc -m64 -ccbin ${QNX_HOST}/usr/bin/q++ --qpp-config=$(QNX_GCC_VERSION),gcc_ntoaarch64le --target-directory aarch64-qnx -lineinfo
+  else
+    CC = ${QNX_HOST}/usr/bin/aarch64-unknown-nto-qnx$(QNX_VERSION)-g++
+    CUCC = $(CUDA_INSTALL_DIR)/bin/nvcc -m64 -ccbin $(CC)
+  endif
+else ########
+  $(error Auto-detection of platform failed. Please specify one of the following arguments to make: TARGET=[aarch64|x86_64|qnx])
+endif
+
+CC_MAJOR_VER = $(shell $(CC) -dumpversion | sed -e "s/\..*//")
+ifeq ($(CC_MAJOR_VER), 4)
+  CPP_STANDARD := 17
+endif
+
+# When using GCC_MAJOR_VERSION < 11 on x86_64 Linux, RedHat Developer Toolset 11 non-shared libstdc++ library is needed
+# to static link TensorRT.
+# For more details, see https://docs.nvidia.com/deeplearning/tensorrt/sample-support-guide/index.html#building-samples-limitations.
+ifeq ($(TRT_STATIC), 1)
+  ifeq ($(TARGET), x86_64)
+    ifeq ($(shell expr $(CC_MAJOR_VER) \< 11), 1)
+      ifeq ($(REDHAT_DEVTOOL_LIB_DIR), )
+        REDHAT_DEVTOOL_LIB_DIR ?= $(TRT_LIB_DIR)
+        $(warning REDHAT_DEVTOOL_LIB_DIR is not specified, searching $(TRT_LIB_DIR), ../../lib, ../lib by default, use REDHAT_DEVTOOL_LIB_DIR=<redhat_devtool_lib_directory> to change.)
+      endif
+
+      REDHAT_DEVTOOL_LIB = $(shell test -e "$(REDHAT_DEVTOOL_LIB_DIR)/libstdc++_nonshared.a" && echo -lstdc++_nonshared)
+      ifeq ($(REDHAT_DEVTOOL_LIB), )
+        REDHAT_DEVTOOL_LIB = $(shell test -e "../lib/libstdc++_nonshared.a" && echo -lstdc++_nonshared)
+      endif
+      ifeq ($(REDHAT_DEVTOOL_LIB), )
+        REDHAT_DEVTOOL_LIB = $(shell test -e "../../lib/libstdc++_nonshared.a" && echo -lstdc++_nonshared)
+      endif
+      ifeq ($(REDHAT_DEVTOOL_LIB), )
+        $(warning libstdc++_nonshared.a cannot be found in $(REDHAT_DEVTOOL_LIB_DIR), ../../lib, ../lib. Trying to compile without libstdc++_nonshared.a. \
+          For more details, see https://docs.nvidia.com/deeplearning/tensorrt/sample-support-guide/index.html#building-samples-limitations.)
+      endif
+    endif
+  endif
+endif
+
+ifdef VERBOSE
+  AT =
+else
+  AT = @
+endif
+
+AR = ar cr
+ECHO = @echo
+
+SHELL = /bin/sh
+
+ROOT_PATH = ../..
+ifeq ($(SAFE_PDK), 1)
+  OUT_PATH = $(ROOT_PATH)/bin/safety
+else
+  OUT_PATH = $(ROOT_PATH)/bin
+endif
+OUTDIR = $(OUT_PATH)
+
+define concat
+$1$2$3$4$5$6$7$8
+endef
+
+ifneq ($(USE_QCC), 1)
+# Usage: $(call make-depend,source-file,object-file,depend-file)
+define make-depend
+  $(AT)$(CC) -MM -MF $3 -MP -MT $2 $(COMMON_FLAGS) $1
+endef
+# Usage: $(call make-cuda-depend,source-file,object-file,depend-file,flags)
+define make-cuda-depend
+  $(AT)$(CUCC) -M -MT $2 $(CUFLAGS) $4 $1 > $3
+endef
+endif
+
+USE_NVRTC_STATIC = 0
+ifneq ("$(wildcard $(CUDA_INSTALL_DIR)/targets/$(CUDA_TRIPLE)/lib*/libnvrtc_static.a)", "")
+  USE_NVRTC_STATIC = 1
+endif
+USE_PTXJIT_STATIC = 0
+ifneq ("$(wildcard $(CUDA_INSTALL_DIR)/targets/$(CUDA_TRIPLE)/lib*/libnvptxcompiler_static.a)", "")
+  USE_PTXJIT_STATIC = 1
+endif
+
+
+# When TRT_STATIC is set, pick the static libraries for all components. Samples are compiled with static libraries
+ifeq ($(TRT_STATIC), 1)
+  USE_CUDART_STATIC = 1
+  ifneq ($(USE_CUGFX), 1)
+      CUDA_LIBS = -lcudart_static
+  else
+      CUDA_LIBS = -lcugfx_dll
+  endif
+  ifeq ($(USE_NVRTC_STATIC), 1)
+    CUDA_LIBS += -lnvrtc_static -lnvrtc-builtins_static
+  endif
+  ifeq ($(USE_PTXJIT_STATIC), 1)
+    CUDA_LIBS += -lnvptxcompiler_static
+    ifneq ("$(wildcard $(CUDA_INSTALL_DIR)/targets/$(CUDA_TRIPLE)/lib*/libnvJitLink_static.a)", "")
+      CUDA_LIBS += -lnvJitLink_static
+    endif
+  endif
+  NVINFER_LIB = -Wl,--whole-archive -lnvinfer_static -Wl,--no-whole-archive
+  ifeq ($(TRT_WINML), 1)
+    NVINFER_PLUGIN_LIB =
+  else
+    NVINFER_PLUGIN_LIB = -Wl,--whole-archive -lnvinfer_plugin_static -Wl,--no-whole-archive
+  endif
+  NVONNXPARSERS_LIB = -Wl,--whole-archive -lnvonnxparser_static -Wl,--no-whole-archive
+  PROTO_LIB =
+  STUBS_DIR = -L"$(TRT_LIB_DIR)/stubs" -Wl,-rpath-link="$(TRT_LIB_DIR)/stubs"
+else
+  CUDA_LIBS = -lcudart
+  NVINFER_LIB = -lnvinfer
+  ifeq ($(TRT_WINML), 1)
+    NVINFER_PLUGIN_LIB =
+  else
+    NVINFER_PLUGIN_LIB = -lnvinfer_plugin
+  endif
+  NVONNXPARSERS_LIB = -lnvonnxparser
+  PROTO_LIBDIR =
+  STUBS_DIR =
+endif
+
+#########################
+INCPATHS =
+LIBPATHS =
+COMMON_LIBS =
+
+# Add extra libraries if TRT_STATIC is enabled
+
+ifeq ($(TRT_STATIC), 1)
+  # Increases the distance for jump instructions to allow larger binaries to be created, which is needed when all libraries are statically linked
+  COMMON_LIBS += -lculibos -lcuda
+  COMMON_LD_FLAGS += -mcmodel=large
+  ifneq ($(REDHAT_DEVTOOL_LIB), )
+    COMMON_LIBS += $(REDHAT_DEVTOOL_LIB)
+    LIBPATHS += -L"$(REDHAT_DEVTOOL_LIB_DIR)"
+  endif
+  # Add static tag to binaries
+  ifneq ($(OUTNAME_RELEASE), )
+    OUTNAME_RELEASE := $(OUTNAME_RELEASE)_static
+  endif
+  ifneq ($(OUTNAME_DEBUG), )
+    OUTNAME_DEBUG := $(OUTNAME_DEBUG)_static
+  endif
+endif
+
+# add cross compile directories
+ifneq ($(shell uname -m), $(TARGET))
+  INCPATHS += -I"/usr/include/$(DLSW_TRIPLE)" -I"$(CUDA_INSTALL_DIR)/targets/$(CUDA_TRIPLE)/include"
+  LIBPATHS += -L"../lib/stubs" -L"../../lib/stubs" -L"/usr/lib/$(DLSW_TRIPLE)/stubs" -L"/usr/lib/$(DLSW_TRIPLE)"
+  LIBPATHS += -L"$(CUDA_INSTALL_DIR)/targets/$(CUDA_TRIPLE)/$(CUDA_LIBDIR)/stubs" -L"$(CUDA_INSTALL_DIR)/targets/$(CUDA_TRIPLE)/$(CUDA_LIBDIR)"
+endif
+INCPATHS += -I"../common" -I"../utils" -I".." -I"$(CUDA_INSTALL_DIR)/include" -I"../include" -I"../../include" -I"../../parsers/onnxOpenSource"
+LIBPATHS += -L"$(CUDA_INSTALL_DIR)/$(CUDA_LIBDIR)" -Wl,-rpath-link="$(CUDA_INSTALL_DIR)/$(CUDA_LIBDIR)"
+LIBPATHS += -L"../lib" -L"../../lib" -L"$(TRT_LIB_DIR)" -Wl,-rpath-link="$(TRT_LIB_DIR)" $(STUBS_DIR)
+
+# libnvinfer_safe.so links to neither standard nor safe PDK, while libnvinfer.so depends on standard PDK when DLA is enabled.
+ifeq ($(SAFE_PDK), 0)
+  ifneq ($(ENABLE_DLA), 0)
+    LIBPATHS += -L"$(PDK_LIB_DIR)" -Wl,-rpath-link="$(PDK_LIB_DIR)"
+  endif
+endif
+
+# delimiter ; is to avoid the issue caused by the case that one keyword is the substr of another keyword
+USE_PDK_LISTS := dla_safety_runtime; sample_nvmedia;
+
+# add required PDK headers/libraries
+ifeq ($(ENABLE_DLA), 1)
+  ifeq ($(TARGET), qnx)
+    LIBPATHS += -L"$(QNX_TARGET)/aarch64le/lib"
+    LIBPATHS += -L"$(QNX_TARGET)/aarch64le/lib/gcc/$(QNX_GCC_VERSION)"
+    LIBPATHS += -L"$(QNX_TARGET)/aarch64le/usr/lib"
+    INCPATHS += -I"$(QNX_TARGET)/usr/include"
+  endif
+
+  PDK_LISTS_FILTER := $(OUTNAME_RELEASE);
+
+  PDK_LIBLIST :=
+  HAS_NVSCIBUF_LIB = $(shell ls $(PDK_LIB_DIR)/libnvscibuf.so 2> /dev/null | wc -l)
+  ifeq ($(HAS_NVSCIBUF_LIB), 1)
+    PDK_LIBLIST += -lnvscibuf
+  endif
+  HAS_NVMEDIA_TENSOR_LIB = $(shell ls $(PDK_LIB_DIR)/libnvmedia_tensor.so 2> /dev/null | wc -l)
+  ifeq ($(HAS_NVMEDIA_TENSOR_LIB), 1)
+    PDK_LIBLIST += -lnvmedia_tensor
+  endif
+  HAS_NVMEDIA_DLA_LIB = $(shell ls $(PDK_LIB_DIR)/libnvmedia_dla.so 2> /dev/null | wc -l)
+  ifeq ($(HAS_NVMEDIA_DLA_LIB), 1)
+    PDK_LIBLIST += -lnvmedia_dla
+  endif
+
+  ifneq ($(findstring $(PDK_LISTS_FILTER),$(USE_PDK_LISTS)), )
+    LIBLIST += $(PDK_LIBLIST)
+  else ifeq ($(TRT_STATIC), 1)
+    LIBLIST += $(PDK_LIBLIST)
+    LIBLIST += -lEGL
+    HAS_NVDLA_COMPILER_LIB = $(shell ls $(PDK_LIB_DIR)/libnvdla_compiler.so 2> /dev/null | wc -l)
+    ifeq ($(HAS_NVDLA_COMPILER_LIB), 1)
+      LIBLIST += -lnvdla_compiler
+    endif
+  endif
+
+    LIBLIST += -lcudla
+    COMMON_LIBS += $(LIBLIST)
+    LIBPATHS += -L"$(PDK_LIB_DIR)" -Wl,-rpath-link=$(PDK_LIB_DIR) -Wl,--unresolved-symbols=ignore-in-shared-libs
+    INCPATHS += -I"$(PDK_INC_DIR)"
+endif
+
+.SUFFIXES:
+vpath %.h $(EXTRA_DIRECTORIES)
+vpath %.cpp $(EXTRA_DIRECTORIES)
+
+COMMON_FLAGS += -Wall -Wno-deprecated-declarations -std=c++$(CPP_STANDARD) $(INCPATHS)
+COMMON_FLAGS += -D_REENTRANT
+COMMON_FLAGS += -DTRT_STATIC=$(TRT_STATIC)
+
+ifeq ($(TARGET), qnx)
+  COMMON_FLAGS += -D_POSIX_C_SOURCE=200112L -D_QNX_SOURCE -D_FILE_OFFSET_BITS=64 -fpermissive
+endif
+
+COMMON_LD_FLAGS += $(LIBPATHS) -L$(OUTDIR)
+COMMON_LD_FLAGS += $(CUSTOM_LD_FLAGS)
+
+ifdef SAMPLE_DIR_NAME
+  OBJDIR = $(call concat,$(OUTDIR),/chobj/,$(SAMPLE_DIR_NAME),/,$(SAMPLE_DIR_NAME))
+  DOBJDIR = $(call concat,$(OUTDIR),/dchobj/,$(SAMPLE_DIR_NAME),/,$(SAMPLE_DIR_NAME))
+else
+  OBJDIR = $(call concat,$(OUTDIR),/chobj)
+  DOBJDIR = $(call concat,$(OUTDIR),/dchobj)
+endif
+
+COMMON_LIBS += $(CUDA_LIBS)
+
+ifneq ($(TARGET), qnx)
+  COMMON_LIBS += -lrt -ldl -lpthread
+endif
+
+ifeq ($(TRT_STATIC), 1)
+  COMMON_LIBS += -lz
+endif
+
+COMMON_LIBS_FOR_EXECUTABLE := $(filter-out -lcudart_static, $(COMMON_LIBS))
+ifeq ($(USE_CUDART_STATIC), 1)
+  COMMON_LIBS_FOR_EXECUTABLE += $(CUDA_LIBS)
+endif
+
+ifeq ($(SAFE_PDK), 1)
+  LIBS = $(COMMON_LIBS_FOR_EXECUTABLE) $(PROTO_LIB)
+  DLIBS = $(COMMON_LIBS_FOR_EXECUTABLE) $(PROTO_LIB)
+else ifeq ($(OUTNAME_RELEASE), trtexec)
+  LIBS = $(COMMON_LIBS_FOR_EXECUTABLE) $(PROTO_LIB)
+  DLIBS = $(COMMON_LIBS_FOR_EXECUTABLE) $(PROTO_LIB)
+else
+  LIBS = $(NVINFER_LIB) $(NVINFER_PLUGIN_LIB) $(NVONNXPARSERS_LIB) $(COMMON_LIBS_FOR_EXECUTABLE) $(PROTO_LIB)
+  DLIBS = $(NVINFER_LIB) $(NVINFER_PLUGIN_LIB) $(NVONNXPARSERS_LIB) $(COMMON_LIBS_FOR_EXECUTABLE) $(PROTO_LIB)
+endif
+
+OBJS = $(patsubst %.cpp, $(OBJDIR)/%.o, $(wildcard *.cpp $(addsuffix /*.cpp, $(EXTRA_DIRECTORIES))))
+DOBJS = $(patsubst %.cpp, $(DOBJDIR)/%.o, $(wildcard *.cpp $(addsuffix /*.cpp, $(EXTRA_DIRECTORIES))))
+CUOBJS = $(patsubst %.cu, $(OBJDIR)/%.o, $(wildcard *.cu $(addsuffix /*.cu, $(EXTRA_DIRECTORIES))))
+CUDOBJS = $(patsubst %.cu, $(DOBJDIR)/%.o, $(wildcard *.cu $(addsuffix /*.cu, $(EXTRA_DIRECTORIES))))
+
+CFLAGS = $(COMMON_FLAGS)
+CFLAGSD = $(COMMON_FLAGS) -g
+LFLAGS = $(COMMON_LD_FLAGS)
+LFLAGSD = $(COMMON_LD_FLAGS)
+
+CUFLAGS += $(GENCODES) $(INCPATHS)
+CUFLAGSD = $(CUFLAGS) -g
+
+all: debug release
+
+release: $(OUTDIR)/$(OUTNAME_RELEASE)
+debug: $(OUTDIR)/$(OUTNAME_DEBUG)
+
+test: test_debug test_release
+
+test_debug:
+	$(AT)cd $(OUTDIR) && ./$(OUTNAME_DEBUG)
+
+test_release:
+	$(AT)cd $(OUTDIR) && ./$(OUTNAME_RELEASE)
+
+$(OUTDIR)/$(OUTNAME_RELEASE): $(OBJS) $(CUOBJS)
+	$(ECHO) Linking: $@
+	$(AT)$(CC) -o $@ $(LFLAGS) -Wl,--start-group $(LIBS) $^ -Wl,--end-group -Wl,--no-relax
+
+$(OUTDIR)/$(OUTNAME_DEBUG): $(DOBJS) $(CUDOBJS)
+	$(ECHO) Linking: $@
+	$(AT)$(CC) -o $@ $(LFLAGSD) -Wl,--start-group $(DLIBS) $^ -Wl,--end-group -Wl,--no-relax
+
+$(OBJDIR)/%.o: %.cpp
+	$(AT)if [ ! -d $(OBJDIR) ]; then mkdir -p $(OBJDIR); fi
+	$(foreach XDIR, $(EXTRA_DIRECTORIES), if [ ! -d $(OBJDIR)/$(XDIR) ]; then mkdir -p $(OBJDIR)/$(XDIR); fi &&) :
+	$(call make-depend,$<,$@,$(subst .o,.d,$@))
+	$(ECHO) Compiling: $<
+	$(AT)$(CC) $(CFLAGS) -c -o $@ $<
+
+$(DOBJDIR)/%.o: %.cpp
+	$(AT)if [ ! -d $(DOBJDIR) ]; then mkdir -p $(DOBJDIR); fi
+	$(foreach XDIR, $(EXTRA_DIRECTORIES), if [ ! -d $(OBJDIR)/$(XDIR) ]; then mkdir -p $(DOBJDIR)/$(XDIR); fi &&) :
+	$(call make-depend,$<,$@,$(subst .o,.d,$@))
+	$(ECHO) Compiling: $<
+	$(AT)$(CC) $(CFLAGSD) -c -o $@ $<
+
+######################################################################### CU
+$(OBJDIR)/%.o: %.cu
+	$(AT)if [ ! -d $(OBJDIR) ]; then mkdir -p $(OBJDIR); fi
+	$(foreach XDIR, $(EXTRA_DIRECTORIES), if [ ! -d $(OBJDIR)/$(XDIR) ]; then mkdir -p $(OBJDIR)/$(XDIR); fi &&) :
+	$(call make-cuda-depend,$<,$@,$(subst .o,.d,$@))
+	$(ECHO) Compiling CUDA release: $<
+	$(AT)$(CUCC) $(CUFLAGS) -c -o $@ $<
+
+$(DOBJDIR)/%.o: %.cu
+	$(AT)if [ ! -d $(DOBJDIR) ]; then mkdir -p $(DOBJDIR); fi
+	$(foreach XDIR, $(EXTRA_DIRECTORIES), if [ ! -d $(DOBJDIR)/$(XDIR) ]; then mkdir -p $(DOBJDIR)/$(XDIR); fi &&) :
+	$(call make-cuda-depend,$<,$@,$(subst .o,.d,$@))
+	$(ECHO) Compiling CUDA debug: $<
+	$(AT)$(CUCC) $(CUFLAGSD) -c -o $@ $<
+
+clean:
+	$(ECHO) Cleaning...
+	$(foreach XDIR, $(EXTRA_DIRECTORIES), if [ -d $(OBJDIR)/$(XDIR) ]; then rm -rf $(OBJDIR)/$(XDIR); fi &&) :
+	$(foreach XDIR, $(EXTRA_DIRECTORIES), if [ -d $(DOBJDIR)/$(XDIR) ]; then rm -rf $(DOBJDIR)/$(XDIR); fi &&) :
+	$(AT)rm -rf $(OBJDIR) $(DOBJDIR) $(OUTDIR)/$(OUTNAME_RELEASE) $(OUTDIR)/$(OUTNAME_DEBUG)
+
+ifneq ($(MAKECMDGOALS), clean)
+  -include $(OBJDIR)/*.d $(DOBJDIR)/*.d
+endif
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/BatchStream.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/BatchStream.h
new file mode 100644
index 0000000000000000000000000000000000000000..d12596e2c71e1771285f1254ac4196e7f58eb7e8
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/BatchStream.h
@@ -0,0 +1,381 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef BATCH_STREAM_H
+#define BATCH_STREAM_H
+
+#include "NvInfer.h"
+#include "common.h"
+#include <algorithm>
+#include <stdio.h>
+#include <vector>
+
+class IBatchStream
+{
+public:
+    virtual void reset(int firstBatch) = 0;
+    virtual bool next() = 0;
+    virtual void skip(int skipCount) = 0;
+    virtual float* getBatch() = 0;
+    virtual float* getLabels() = 0;
+    virtual int getBatchesRead() const = 0;
+    virtual int getBatchSize() const = 0;
+    virtual nvinfer1::Dims getDims() const = 0;
+};
+
+class MNISTBatchStream : public IBatchStream
+{
+public:
+    MNISTBatchStream(int batchSize, int maxBatches, const std::string& dataFile, const std::string& labelsFile,
+        const std::vector<std::string>& directories)
+        : mBatchSize{batchSize}
+        , mMaxBatches{maxBatches}
+        , mDims{3, {1, 28, 28}} //!< We already know the dimensions of MNIST images.
+    {
+        readDataFile(samplesCommon::locateFile(dataFile, directories));
+        readLabelsFile(samplesCommon::locateFile(labelsFile, directories));
+    }
+
+    void reset(int firstBatch) override
+    {
+        mBatchCount = firstBatch;
+    }
+
+    bool next() override
+    {
+        if (mBatchCount >= mMaxBatches)
+        {
+            return false;
+        }
+        ++mBatchCount;
+        return true;
+    }
+
+    void skip(int skipCount) override
+    {
+        mBatchCount += skipCount;
+    }
+
+    float* getBatch() override
+    {
+        return mData.data() + (mBatchCount * mBatchSize * samplesCommon::volume(mDims));
+    }
+
+    float* getLabels() override
+    {
+        return mLabels.data() + (mBatchCount * mBatchSize);
+    }
+
+    int getBatchesRead() const override
+    {
+        return mBatchCount;
+    }
+
+    int getBatchSize() const override
+    {
+        return mBatchSize;
+    }
+
+    nvinfer1::Dims getDims() const override
+    {
+        return nvinfer1::Dims{4, {mBatchSize, mDims.d[0], mDims.d[1], mDims.d[2]}};
+    }
+
+private:
+    void readDataFile(const std::string& dataFilePath)
+    {
+        std::ifstream file{dataFilePath.c_str(), std::ios::binary};
+
+        int magicNumber, numImages, imageH, imageW;
+        file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
+        // All values in the MNIST files are big endian.
+        magicNumber = samplesCommon::swapEndianness(magicNumber);
+        ASSERT(magicNumber == 2051 && "Magic Number does not match the expected value for an MNIST image set");
+
+        // Read number of images and dimensions
+        file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
+        file.read(reinterpret_cast<char*>(&imageH), sizeof(imageH));
+        file.read(reinterpret_cast<char*>(&imageW), sizeof(imageW));
+
+        numImages = samplesCommon::swapEndianness(numImages);
+        imageH = samplesCommon::swapEndianness(imageH);
+        imageW = samplesCommon::swapEndianness(imageW);
+
+        // The MNIST data is made up of unsigned bytes, so we need to cast to float and normalize.
+        int numElements = numImages * imageH * imageW;
+        std::vector<uint8_t> rawData(numElements);
+        file.read(reinterpret_cast<char*>(rawData.data()), numElements * sizeof(uint8_t));
+        mData.resize(numElements);
+        std::transform(
+            rawData.begin(), rawData.end(), mData.begin(), [](uint8_t val) { return static_cast<float>(val) / 255.F; });
+    }
+
+    void readLabelsFile(const std::string& labelsFilePath)
+    {
+        std::ifstream file{labelsFilePath.c_str(), std::ios::binary};
+        int magicNumber, numImages;
+        file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
+        // All values in the MNIST files are big endian.
+        magicNumber = samplesCommon::swapEndianness(magicNumber);
+        ASSERT(magicNumber == 2049 && "Magic Number does not match the expected value for an MNIST labels file");
+
+        file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
+        numImages = samplesCommon::swapEndianness(numImages);
+
+        std::vector<uint8_t> rawLabels(numImages);
+        file.read(reinterpret_cast<char*>(rawLabels.data()), numImages * sizeof(uint8_t));
+        mLabels.resize(numImages);
+        std::transform(
+            rawLabels.begin(), rawLabels.end(), mLabels.begin(), [](uint8_t val) { return static_cast<float>(val); });
+    }
+
+    int mBatchSize{0};
+    int mBatchCount{0}; //!< The batch that will be read on the next invocation of next()
+    int mMaxBatches{0};
+    nvinfer1::Dims mDims{};
+    std::vector<float> mData{};
+    std::vector<float> mLabels{};
+};
+
+class BatchStream : public IBatchStream
+{
+public:
+    BatchStream(int batchSize, int maxBatches, std::string const& prefix, std::string const& suffix,
+        std::vector<std::string> const& directories)
+        : mBatchSize(batchSize)
+        , mMaxBatches(maxBatches)
+        , mPrefix(prefix)
+        , mSuffix(suffix)
+        , mDataDir(directories)
+    {
+        std::ifstream file(
+            samplesCommon::locateFile(mPrefix + std::string("0") + mSuffix, mDataDir).c_str(), std::ios::binary);
+        ASSERT(file.good());
+        int d[4];
+        file.read(reinterpret_cast<char*>(d), 4 * sizeof(int32_t));
+        mDims.nbDims = 4;  // The number of dimensions.
+        mDims.d[0] = d[0]; // Batch Size
+        mDims.d[1] = d[1]; // Channels
+        mDims.d[2] = d[2]; // Height
+        mDims.d[3] = d[3]; // Width
+        ASSERT(mDims.d[0] > 0 && mDims.d[1] > 0 && mDims.d[2] > 0 && mDims.d[3] > 0);
+
+        mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
+        mBatch.resize(mBatchSize * mImageSize, 0);
+        mLabels.resize(mBatchSize, 0);
+        mFileBatch.resize(mDims.d[0] * mImageSize, 0);
+        mFileLabels.resize(mDims.d[0], 0);
+    }
+
+    BatchStream(int batchSize, int maxBatches, std::string const& prefix, std::vector<std::string> const& directories)
+        : BatchStream(batchSize, maxBatches, prefix, ".batch", directories)
+    {
+    }
+
+    BatchStream(int batchSize, int maxBatches, nvinfer1::Dims const& dims, std::string const& listFile,
+        std::vector<std::string> const& directories)
+        : mBatchSize(batchSize)
+        , mMaxBatches(maxBatches)
+        , mDims(dims)
+        , mListFile(listFile)
+        , mDataDir(directories)
+    {
+        mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
+        mBatch.resize(mBatchSize * mImageSize, 0);
+        mLabels.resize(mBatchSize, 0);
+        mFileBatch.resize(mDims.d[0] * mImageSize, 0);
+        mFileLabels.resize(mDims.d[0], 0);
+    }
+
+    // Resets data members
+    void reset(int firstBatch) override
+    {
+        mBatchCount = 0;
+        mFileCount = 0;
+        mFileBatchPos = mDims.d[0];
+        skip(firstBatch);
+    }
+
+    // Advance to next batch and return true, or return false if there is no batch left.
+    bool next() override
+    {
+        if (mBatchCount == mMaxBatches)
+        {
+            return false;
+        }
+
+        for (int64_t csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize, mFileBatchPos += csize)
+        {
+            ASSERT(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]);
+            if (mFileBatchPos == mDims.d[0] && !update())
+            {
+                return false;
+            }
+
+            // copy the smaller of: elements left to fulfill the request, or elements left in the file buffer.
+            csize = std::min<int64_t>(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos);
+            std::copy_n(
+                getFileBatch() + mFileBatchPos * mImageSize, csize * mImageSize, getBatch() + batchPos * mImageSize);
+            std::copy_n(getFileLabels() + mFileBatchPos, csize, getLabels() + batchPos);
+        }
+        mBatchCount++;
+        return true;
+    }
+
+    // Skips the batches
+    void skip(int skipCount) override
+    {
+        if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 && mFileBatchPos == mDims.d[0])
+        {
+            mFileCount += skipCount * mBatchSize / mDims.d[0];
+            return;
+        }
+
+        int x = mBatchCount;
+        for (int i = 0; i < skipCount; i++)
+        {
+            next();
+        }
+        mBatchCount = x;
+    }
+
+    float* getBatch() override
+    {
+        return mBatch.data();
+    }
+
+    float* getLabels() override
+    {
+        return mLabels.data();
+    }
+
+    int getBatchesRead() const override
+    {
+        return mBatchCount;
+    }
+
+    int getBatchSize() const override
+    {
+        return mBatchSize;
+    }
+
+    nvinfer1::Dims getDims() const override
+    {
+        return mDims;
+    }
+
+private:
+    float* getFileBatch()
+    {
+        return mFileBatch.data();
+    }
+
+    float* getFileLabels()
+    {
+        return mFileLabels.data();
+    }
+
+    bool update()
+    {
+        if (mListFile.empty())
+        {
+            std::string inputFileName
+                = samplesCommon::locateFile(mPrefix + std::to_string(mFileCount++) + mSuffix, mDataDir);
+            std::ifstream file(inputFileName.c_str(), std::ios::binary);
+            if (!file)
+            {
+                return false;
+            }
+            int d[4];
+            file.read(reinterpret_cast<char*>(d), 4 * sizeof(int32_t));
+            ASSERT(mDims.d[0] == d[0] && mDims.d[1] == d[1] && mDims.d[2] == d[2] && mDims.d[3] == d[3]);
+            file.read(reinterpret_cast<char*>(getFileBatch()), sizeof(float) * mDims.d[0] * mImageSize);
+            file.read(reinterpret_cast<char*>(getFileLabels()), sizeof(float) * mDims.d[0]);
+        }
+        else
+        {
+            std::vector<std::string> fNames;
+            std::ifstream file(samplesCommon::locateFile(mListFile, mDataDir), std::ios::binary);
+            if (!file)
+            {
+                return false;
+            }
+
+            sample::gLogInfo << "Batch #" << mFileCount << std::endl;
+            file.seekg(((mBatchCount * mBatchSize)) * 7);
+
+            for (int i = 1; i <= mBatchSize; i++)
+            {
+                std::string sName;
+                std::getline(file, sName);
+                sName = sName + ".ppm";
+                sample::gLogInfo << "Calibrating with file " << sName << std::endl;
+                fNames.emplace_back(sName);
+            }
+
+            mFileCount++;
+
+            const int imageC = 3;
+            const int imageH = 300;
+            const int imageW = 300;
+            std::vector<samplesCommon::PPM<imageC, imageH, imageW>> ppms(fNames.size());
+            for (uint32_t i = 0; i < fNames.size(); ++i)
+            {
+                readPPMFile(samplesCommon::locateFile(fNames[i], mDataDir), ppms[i]);
+            }
+
+            std::vector<float> data(samplesCommon::volume(mDims));
+            const float scale = 2.0 / 255.0;
+            const float bias = 1.0;
+            long int volChl = mDims.d[2] * mDims.d[3];
+
+            // Normalize input data
+            for (int i = 0, volImg = mDims.d[1] * mDims.d[2] * mDims.d[3]; i < mBatchSize; ++i)
+            {
+                for (int c = 0; c < mDims.d[1]; ++c)
+                {
+                    for (int j = 0; j < volChl; ++j)
+                    {
+                        data[i * volImg + c * volChl + j] = scale * float(ppms[i].buffer[j * mDims.d[1] + c]) - bias;
+                    }
+                }
+            }
+
+            std::copy_n(data.data(), mDims.d[0] * mImageSize, getFileBatch());
+        }
+
+        mFileBatchPos = 0;
+        return true;
+    }
+
+    int64_t mBatchSize{0};
+    int mMaxBatches{0};
+    int mBatchCount{0};
+    int mFileCount{0};
+    int mFileBatchPos{0};
+    int mImageSize{0};
+    std::vector<float> mBatch;         //!< Data for the batch
+    std::vector<float> mLabels;        //!< Labels for the batch
+    std::vector<float> mFileBatch;     //!< List of image files
+    std::vector<float> mFileLabels;    //!< List of label files
+    std::string mPrefix;               //!< Batch file name prefix
+    std::string mSuffix;               //!< Batch file name suffix
+    nvinfer1::Dims mDims;              //!< Input dimensions
+    std::string mListFile;             //!< File name of the list of image names
+    std::vector<std::string> mDataDir; //!< Directories where the files can be found
+};
+
+#endif
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/EntropyCalibrator.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/EntropyCalibrator.h
new file mode 100644
index 0000000000000000000000000000000000000000..67a0130ee5e08250a2ae34a514334ca2d9550c7e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/EntropyCalibrator.h
@@ -0,0 +1,136 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ENTROPY_CALIBRATOR_H
+#define ENTROPY_CALIBRATOR_H
+
+#include "BatchStream.h"
+#include "NvInfer.h"
+
+//! \class EntropyCalibratorImpl
+//!
+//! \brief Implements common functionality for Entropy calibrators.
+//!
+template <typename TBatchStream>
+class EntropyCalibratorImpl
+{
+public:
+    EntropyCalibratorImpl(TBatchStream const& stream, int firstBatch, std::string const& networkName,
+        const char* inputBlobName, bool readCache = true)
+        : mStream{stream}
+        , mCalibrationTableName("CalibrationTable" + networkName)
+        , mInputBlobName(inputBlobName)
+        , mReadCache(readCache)
+    {
+        nvinfer1::Dims dims = mStream.getDims();
+        mInputCount = samplesCommon::volume(dims);
+        CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
+        mStream.reset(firstBatch);
+    }
+
+    virtual ~EntropyCalibratorImpl()
+    {
+        CHECK(cudaFree(mDeviceInput));
+    }
+
+    int getBatchSize() const noexcept
+    {
+        return mStream.getBatchSize();
+    }
+
+    bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept
+    {
+        if (!mStream.next())
+        {
+            return false;
+        }
+        CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float), cudaMemcpyHostToDevice));
+        ASSERT(!strcmp(names[0], mInputBlobName));
+        bindings[0] = mDeviceInput;
+        return true;
+    }
+
+    const void* readCalibrationCache(size_t& length) noexcept
+    {
+        mCalibrationCache.clear();
+        std::ifstream input(mCalibrationTableName, std::ios::binary);
+        input >> std::noskipws;
+        if (mReadCache && input.good())
+        {
+            std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(),
+                std::back_inserter(mCalibrationCache));
+        }
+        length = mCalibrationCache.size();
+        return length ? mCalibrationCache.data() : nullptr;
+    }
+
+    void writeCalibrationCache(const void* cache, size_t length) noexcept
+    {
+        std::ofstream output(mCalibrationTableName, std::ios::binary);
+        output.write(reinterpret_cast<const char*>(cache), length);
+    }
+
+private:
+    TBatchStream mStream;
+    size_t mInputCount;
+    std::string mCalibrationTableName;
+    const char* mInputBlobName;
+    bool mReadCache{true};
+    void* mDeviceInput{nullptr};
+    std::vector<char> mCalibrationCache;
+};
+
+//! \class Int8EntropyCalibrator2
+//!
+//! \brief Implements Entropy calibrator 2.
+//!  CalibrationAlgoType is kENTROPY_CALIBRATION_2.
+//!
+template <typename TBatchStream>
+class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2
+{
+public:
+    Int8EntropyCalibrator2(TBatchStream const& stream, int32_t firstBatch, const char* networkName,
+        const char* inputBlobName, bool readCache = true)
+        : mImpl(stream, firstBatch, networkName, inputBlobName, readCache)
+    {
+    }
+
+    int getBatchSize() const noexcept override
+    {
+        return mImpl.getBatchSize();
+    }
+
+    bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override
+    {
+        return mImpl.getBatch(bindings, names, nbBindings);
+    }
+
+    const void* readCalibrationCache(size_t& length) noexcept override
+    {
+        return mImpl.readCalibrationCache(length);
+    }
+
+    void writeCalibrationCache(const void* cache, size_t length) noexcept override
+    {
+        mImpl.writeCalibrationCache(cache, length);
+    }
+
+private:
+    EntropyCalibratorImpl<TBatchStream> mImpl;
+};
+
+#endif // ENTROPY_CALIBRATOR_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/ErrorRecorder.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/ErrorRecorder.h
new file mode 100644
index 0000000000000000000000000000000000000000..91ddb176f5b562b78f4688d85c025857ceb565e8
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/ErrorRecorder.h
@@ -0,0 +1,138 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ERROR_RECORDER_H
+#define ERROR_RECORDER_H
+#include "NvInferRuntimeBase.h"
+#include "logger.h"
+#include <atomic>
+#include <cstdint>
+#include <exception>
+#include <mutex>
+#include <vector>
+
+using nvinfer1::IErrorRecorder;
+using nvinfer1::ErrorCode;
+
+//!
+//! A simple implementation of the IErrorRecorder interface for
+//! use by samples. This interface also can be used as a reference
+//! implementation.
+//! The sample Error recorder is based on a vector that pairs the error
+//! code and the error string into a single element. It also uses
+//! standard mutex's and atomics in order to make sure that the code
+//! works in a multi-threaded environment.
+//!
+class SampleErrorRecorder : public IErrorRecorder
+{
+    using errorPair = std::pair<ErrorCode, std::string>;
+    using errorStack = std::vector<errorPair>;
+
+public:
+    SampleErrorRecorder() = default;
+
+    ~SampleErrorRecorder() noexcept override {}
+    int32_t getNbErrors() const noexcept final
+    {
+        return mErrorStack.size();
+    }
+    ErrorCode getErrorCode(int32_t errorIdx) const noexcept final
+    {
+        return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT : (*this)[errorIdx].first;
+    };
+    IErrorRecorder::ErrorDesc getErrorDesc(int32_t errorIdx) const noexcept final
+    {
+        return invalidIndexCheck(errorIdx) ? "errorIdx out of range." : (*this)[errorIdx].second.c_str();
+    }
+    // This class can never overflow since we have dynamic resize via std::vector usage.
+    bool hasOverflowed() const noexcept final
+    {
+        return false;
+    }
+
+    // Empty the errorStack.
+    void clear() noexcept final
+    {
+        try
+        {
+            // grab a lock so that there is no addition while clearing.
+            std::lock_guard<std::mutex> guard(mStackLock);
+            mErrorStack.clear();
+        }
+        catch (const std::exception& e)
+        {
+            sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
+        }
+    };
+
+    //! Simple helper function that
+    bool empty() const noexcept
+    {
+        return mErrorStack.empty();
+    }
+
+    bool reportError(ErrorCode val, IErrorRecorder::ErrorDesc desc) noexcept final
+    {
+        try
+        {
+            std::lock_guard<std::mutex> guard(mStackLock);
+            sample::gLogError << "Error[" << static_cast<int32_t>(val) << "]: " << desc << std::endl;
+            mErrorStack.push_back(errorPair(val, desc));
+        }
+        catch (const std::exception& e)
+        {
+            sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
+        }
+        // All errors are considered fatal.
+        return true;
+    }
+
+    // Atomically increment or decrement the ref counter.
+    IErrorRecorder::RefCount incRefCount() noexcept final
+    {
+        return ++mRefCount;
+    }
+    IErrorRecorder::RefCount decRefCount() noexcept final
+    {
+        return --mRefCount;
+    }
+
+private:
+    // Simple helper functions.
+    const errorPair& operator[](size_t index) const noexcept
+    {
+        return mErrorStack[index];
+    }
+
+    bool invalidIndexCheck(int32_t index) const noexcept
+    {
+        // By converting signed to unsigned, we only need a single check since
+        // negative numbers turn into large positive greater than the size.
+        size_t sIndex = index;
+        return sIndex >= mErrorStack.size();
+    }
+    // Mutex to hold when locking mErrorStack.
+    std::mutex mStackLock;
+
+    // Reference count of the class. Destruction of the class when mRefCount
+    // is not zero causes undefined behavior.
+    std::atomic<int32_t> mRefCount{0};
+
+    // The error stack that holds the errors recorded by TensorRT.
+    errorStack mErrorStack;
+};     // class SampleErrorRecorder
+#endif // ERROR_RECORDER_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/argsParser.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/argsParser.h
new file mode 100644
index 0000000000000000000000000000000000000000..1f0b9025c9f92f5f7d8b738926189c3110dedca3
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/argsParser.h
@@ -0,0 +1,162 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef TENSORRT_ARGS_PARSER_H
+#define TENSORRT_ARGS_PARSER_H
+
+#ifdef _MSC_VER
+#include "getOptWin.h"
+#else
+#include <getopt.h>
+#endif
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace samplesCommon
+{
+
+//!
+//! \brief The SampleParams structure groups the basic parameters required by
+//!        all sample networks.
+//!
+struct SampleParams
+{
+    int32_t batchSize{1};              //!< Number of inputs in a batch
+    int32_t dlaCore{-1};               //!< Specify the DLA core to run network on.
+    bool int8{false};                  //!< Allow runnning the network in Int8 mode.
+    bool fp16{false};                  //!< Allow running the network in FP16 mode.
+    bool bf16{false};                  //!< Allow running the network in BF16 mode.
+    std::vector<std::string> dataDirs; //!< Directory paths where sample data files are stored
+    std::vector<std::string> inputTensorNames;
+    std::vector<std::string> outputTensorNames;
+    std::string timingCacheFile; //!< Path to timing cache file
+};
+
+//!
+//! \brief The OnnxSampleParams structure groups the additional parameters required by
+//!         networks that use ONNX
+//!
+struct OnnxSampleParams : public SampleParams
+{
+    std::string onnxFileName; //!< Filename of ONNX file of a network
+};
+
+//!
+//! /brief Struct to maintain command-line arguments.
+//!
+struct Args
+{
+    bool runInInt8{false};
+    bool runInFp16{false};
+    bool runInBf16{false};
+    bool help{false};
+    int32_t useDLACore{-1};
+    int32_t batch{1};
+    std::vector<std::string> dataDirs;
+    std::string saveEngine;
+    std::string loadEngine;
+    bool rowOrder{true};
+    std::string timingCacheFile;
+};
+
+//!
+//! \brief Populates the Args struct with the provided command-line parameters.
+//!
+//! \throw invalid_argument if any of the arguments are not valid
+//!
+//! \return boolean If return value is true, execution can continue, otherwise program should exit
+//!
+inline bool parseArgs(Args& args, int32_t argc, char* argv[])
+{
+    while (1)
+    {
+        int32_t arg;
+        static struct option long_options[]
+            = {{"help", no_argument, 0, 'h'}, {"datadir", required_argument, 0, 'd'}, {"int8", no_argument, 0, 'i'},
+                {"fp16", no_argument, 0, 'f'}, {"bf16", no_argument, 0, 'z'}, {"columnOrder", no_argument, 0, 'c'},
+                {"saveEngine", required_argument, 0, 's'}, {"loadEngine", required_argument, 0, 'o'},
+                {"useDLACore", required_argument, 0, 'u'}, {"batch", required_argument, 0, 'b'},
+                {"timingCacheFile", required_argument, 0, 't'}, {nullptr, 0, nullptr, 0}};
+        int32_t option_index = 0;
+        arg = getopt_long(argc, argv, "hd:iu", long_options, &option_index);
+        if (arg == -1)
+        {
+            break;
+        }
+
+        switch (arg)
+        {
+        case 'h': args.help = true; return true;
+        case 'd':
+            if (optarg)
+            {
+                args.dataDirs.push_back(optarg);
+            }
+            else
+            {
+                std::cerr << "ERROR: --datadir requires option argument" << std::endl;
+                return false;
+            }
+            break;
+        case 's':
+            if (optarg)
+            {
+                args.saveEngine = optarg;
+            }
+            break;
+        case 'o':
+            if (optarg)
+            {
+                args.loadEngine = optarg;
+            }
+            break;
+        case 'i': args.runInInt8 = true; break;
+        case 'f': args.runInFp16 = true; break;
+        case 'z': args.runInBf16 = true; break;
+        case 'c': args.rowOrder = false; break;
+        case 'u':
+            if (optarg)
+            {
+                args.useDLACore = std::stoi(optarg);
+            }
+            break;
+        case 'b':
+            if (optarg)
+            {
+                args.batch = std::stoi(optarg);
+            }
+            break;
+        case 't':
+            if (optarg)
+            {
+                args.timingCacheFile = optarg;
+            }
+            else
+            {
+                std::cerr << "ERROR: --timingCacheFile requires option argument" << std::endl;
+                return false;
+            }
+            break;
+        default: return false;
+        }
+    }
+    return true;
+}
+
+} // namespace samplesCommon
+
+#endif // TENSORRT_ARGS_PARSER_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/bfloat16.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/bfloat16.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8222826ae40f38395e1c7b96cbbc3fd8045c8df9
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/bfloat16.cpp
@@ -0,0 +1,60 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bfloat16.h"
+#include <cstring>
+
+namespace sample
+{
+
+BFloat16::operator float() const
+{
+    static_assert(sizeof(uint32_t) == sizeof(float), "");
+    float val{0.F};
+    auto bits = static_cast<uint32_t>(mRep) << 16;
+    std::memcpy(&val, &bits, sizeof(uint32_t));
+    return val;
+}
+
+BFloat16::BFloat16(float x)
+{
+    static_assert(sizeof(uint32_t) == sizeof(float), "");
+    uint32_t bits{0};
+    std::memcpy(&bits, &x, sizeof(float));
+
+    // FP32 format: 1 sign bit, 8 bit exponent, 23 bit mantissa
+    // BF16 format: 1 sign bit, 8 bit exponent, 7 bit mantissa
+
+    // Mask for exponent
+    constexpr uint32_t exponent = 0xFFU << 23;
+
+    // Check if exponent is all 1s (NaN or infinite)
+    if ((bits & exponent) != exponent)
+    {
+        // x is finite - round to even
+        bits += 0x7FFFU + (bits >> 16 & 1);
+    }
+
+    mRep = static_cast<uint16_t>(bits >> 16);
+}
+
+BFloat16 operator+(BFloat16 x, BFloat16 y)
+{
+    return BFloat16(static_cast<float>(x) + static_cast<float>(y));
+}
+
+} // namespace sample
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/bfloat16.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/bfloat16.h
new file mode 100644
index 0000000000000000000000000000000000000000..0d0ab92229633962f9f62ce1f7518a0d6a6912d8
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/bfloat16.h
@@ -0,0 +1,46 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+
+namespace sample
+{
+
+//! Implements "Brain Floating Point": like an IEEE FP32,
+//! but the significand is only 7 bits instead of 23 bits.
+class BFloat16
+{
+public:
+    BFloat16()
+        : mRep(0)
+    {
+    }
+
+    // Rounds to even if there is a tie.
+    BFloat16(float x);
+
+    operator float() const;
+
+private:
+    //! Value stored in BFloat16 representation.
+    uint16_t mRep;
+};
+BFloat16 operator+(BFloat16 x, BFloat16 y);
+
+} // namespace sample
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/buffers.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/buffers.h
new file mode 100644
index 0000000000000000000000000000000000000000..57ed4b387105f7f3d0a665b551e4526241817364
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/buffers.h
@@ -0,0 +1,456 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef TENSORRT_BUFFERS_H
+#define TENSORRT_BUFFERS_H
+
+#include "NvInfer.h"
+#include "common.h"
+#include "half.h"
+#include <cassert>
+#include <cuda_runtime_api.h>
+#include <iostream>
+#include <iterator>
+#include <memory>
+#include <new>
+#include <numeric>
+#include <string>
+#include <vector>
+
+namespace samplesCommon
+{
+
+//!
+//! \brief  The GenericBuffer class is a templated class for buffers.
+//!
+//! \details This templated RAII (Resource Acquisition Is Initialization) class handles the allocation,
+//!          deallocation, querying of buffers on both the device and the host.
+//!          It can handle data of arbitrary types because it stores byte buffers.
+//!          The template parameters AllocFunc and FreeFunc are used for the
+//!          allocation and deallocation of the buffer.
+//!          AllocFunc must be a functor that takes in (void** ptr, size_t size)
+//!          and returns bool. ptr is a pointer to where the allocated buffer address should be stored.
+//!          size is the amount of memory in bytes to allocate.
+//!          The boolean indicates whether or not the memory allocation was successful.
+//!          FreeFunc must be a functor that takes in (void* ptr) and returns void.
+//!          ptr is the allocated buffer address. It must work with nullptr input.
+//!
+template <typename AllocFunc, typename FreeFunc>
+class GenericBuffer
+{
+public:
+    //!
+    //! \brief Construct an empty buffer.
+    //!
+    GenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT)
+        : mSize(0)
+        , mCapacity(0)
+        , mType(type)
+        , mBuffer(nullptr)
+    {
+    }
+
+    //!
+    //! \brief Construct a buffer with the specified allocation size in bytes.
+    //!
+    GenericBuffer(size_t size, nvinfer1::DataType type)
+        : mSize(size)
+        , mCapacity(size)
+        , mType(type)
+    {
+        if (!allocFn(&mBuffer, this->nbBytes()))
+        {
+            throw std::bad_alloc();
+        }
+    }
+
+    GenericBuffer(GenericBuffer&& buf)
+        : mSize(buf.mSize)
+        , mCapacity(buf.mCapacity)
+        , mType(buf.mType)
+        , mBuffer(buf.mBuffer)
+    {
+        buf.mSize = 0;
+        buf.mCapacity = 0;
+        buf.mType = nvinfer1::DataType::kFLOAT;
+        buf.mBuffer = nullptr;
+    }
+
+    GenericBuffer& operator=(GenericBuffer&& buf)
+    {
+        if (this != &buf)
+        {
+            freeFn(mBuffer);
+            mSize = buf.mSize;
+            mCapacity = buf.mCapacity;
+            mType = buf.mType;
+            mBuffer = buf.mBuffer;
+            // Reset buf.
+            buf.mSize = 0;
+            buf.mCapacity = 0;
+            buf.mBuffer = nullptr;
+        }
+        return *this;
+    }
+
+    //!
+    //! \brief Returns pointer to underlying array.
+    //!
+    void* data()
+    {
+        return mBuffer;
+    }
+
+    //!
+    //! \brief Returns pointer to underlying array.
+    //!
+    const void* data() const
+    {
+        return mBuffer;
+    }
+
+    //!
+    //! \brief Returns the size (in number of elements) of the buffer.
+    //!
+    size_t size() const
+    {
+        return mSize;
+    }
+
+    //!
+    //! \brief Returns the size (in bytes) of the buffer.
+    //!
+    size_t nbBytes() const
+    {
+        return samplesCommon::getNbBytes(mType, size());
+    }
+
+    //!
+    //! \brief Resizes the buffer. This is a no-op if the new size is smaller than or equal to the current capacity.
+    //!
+    void resize(size_t newSize)
+    {
+        mSize = newSize;
+        if (mCapacity < newSize)
+        {
+            freeFn(mBuffer);
+            if (!allocFn(&mBuffer, this->nbBytes()))
+            {
+                throw std::bad_alloc{};
+            }
+            mCapacity = newSize;
+        }
+    }
+
+    //!
+    //! \brief Overload of resize that accepts Dims
+    //!
+    void resize(const nvinfer1::Dims& dims)
+    {
+        return this->resize(samplesCommon::volume(dims));
+    }
+
+    ~GenericBuffer()
+    {
+        freeFn(mBuffer);
+    }
+
+private:
+    size_t mSize{0}, mCapacity{0};
+    nvinfer1::DataType mType;
+    void* mBuffer;
+    AllocFunc allocFn;
+    FreeFunc freeFn;
+};
+
+class DeviceAllocator
+{
+public:
+    bool operator()(void** ptr, size_t size) const
+    {
+        return cudaMalloc(ptr, size) == cudaSuccess;
+    }
+};
+
+class DeviceFree
+{
+public:
+    void operator()(void* ptr) const
+    {
+        cudaFree(ptr);
+    }
+};
+
+class HostAllocator
+{
+public:
+    bool operator()(void** ptr, size_t size) const
+    {
+        *ptr = malloc(size);
+        return *ptr != nullptr;
+    }
+};
+
+class HostFree
+{
+public:
+    void operator()(void* ptr) const
+    {
+        free(ptr);
+    }
+};
+
+using DeviceBuffer = GenericBuffer<DeviceAllocator, DeviceFree>;
+using HostBuffer = GenericBuffer<HostAllocator, HostFree>;
+
+//!
+//! \brief  The ManagedBuffer class groups together a pair of corresponding device and host buffers.
+//!
+class ManagedBuffer
+{
+public:
+    DeviceBuffer deviceBuffer;
+    HostBuffer hostBuffer;
+};
+
+//!
+//! \brief  The BufferManager class handles host and device buffer allocation and deallocation.
+//!
+//! \details This RAII class handles host and device buffer allocation and deallocation,
+//!          memcpy between host and device buffers to aid with inference,
+//!          and debugging dumps to validate inference. The BufferManager class is meant to be
+//!          used to simplify buffer management and any interactions between buffers and the engine.
+//!
+class BufferManager
+{
+public:
+    static const size_t kINVALID_SIZE_VALUE = ~size_t(0);
+
+    //!
+    //! \brief Create a BufferManager for handling buffer interactions with engine, when the I/O tensor volumes
+    //! are provided
+    //!
+    BufferManager(
+        std::shared_ptr<nvinfer1::ICudaEngine> engine, std::vector<int64_t> const& volumes, int32_t batchSize = 0)
+        : mEngine(engine)
+        , mBatchSize(batchSize)
+    {
+        // Create host and device buffers
+        for (int32_t i = 0; i < mEngine->getNbIOTensors(); i++)
+        {
+            auto const name = engine->getIOTensorName(i);
+            mNames[name] = i;
+
+            nvinfer1::DataType type = mEngine->getTensorDataType(name);
+
+            std::unique_ptr<ManagedBuffer> manBuf{new ManagedBuffer()};
+            manBuf->deviceBuffer = DeviceBuffer(volumes[i], type);
+            manBuf->hostBuffer = HostBuffer(volumes[i], type);
+            void* deviceBuffer = manBuf->deviceBuffer.data();
+            mDeviceBindings.emplace_back(deviceBuffer);
+            mManagedBuffers.emplace_back(std::move(manBuf));
+        }
+    }
+
+    //!
+    //! \brief Create a BufferManager for handling buffer interactions with engine.
+    //!
+    BufferManager(std::shared_ptr<nvinfer1::ICudaEngine> engine, int32_t const batchSize = 0,
+        nvinfer1::IExecutionContext const* context = nullptr)
+        : mEngine(engine)
+        , mBatchSize(batchSize)
+    {
+        // Create host and device buffers
+        for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
+        {
+            auto const name = engine->getIOTensorName(i);
+            mNames[name] = i;
+
+            auto dims = context ? context->getTensorShape(name) : mEngine->getTensorShape(name);
+            size_t vol = context || !mBatchSize ? 1 : static_cast<size_t>(mBatchSize);
+            nvinfer1::DataType type = mEngine->getTensorDataType(name);
+            int32_t vecDim = mEngine->getTensorVectorizedDim(name);
+            if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector
+            {
+                int32_t scalarsPerVec = mEngine->getTensorComponentsPerElement(name);
+                dims.d[vecDim] = divUp(dims.d[vecDim], scalarsPerVec);
+                vol *= scalarsPerVec;
+            }
+            vol *= samplesCommon::volume(dims);
+            std::unique_ptr<ManagedBuffer> manBuf{new ManagedBuffer()};
+            manBuf->deviceBuffer = DeviceBuffer(vol, type);
+            manBuf->hostBuffer = HostBuffer(vol, type);
+            void* deviceBuffer = manBuf->deviceBuffer.data();
+            mDeviceBindings.emplace_back(deviceBuffer);
+            mManagedBuffers.emplace_back(std::move(manBuf));
+        }
+    }
+
+    //!
+    //! \brief Returns a vector of device buffers that you can use directly as
+    //!        bindings for the execute and enqueue methods of IExecutionContext.
+    //!
+    std::vector<void*>& getDeviceBindings()
+    {
+        return mDeviceBindings;
+    }
+
+    //!
+    //! \brief Returns a vector of device buffers.
+    //!
+    std::vector<void*> const& getDeviceBindings() const
+    {
+        return mDeviceBindings;
+    }
+
+    //!
+    //! \brief Returns the device buffer corresponding to tensorName.
+    //!        Returns nullptr if no such tensor can be found.
+    //!
+    void* getDeviceBuffer(std::string const& tensorName) const
+    {
+        return getBuffer(false, tensorName);
+    }
+
+    //!
+    //! \brief Returns the host buffer corresponding to tensorName.
+    //!        Returns nullptr if no such tensor can be found.
+    //!
+    void* getHostBuffer(std::string const& tensorName) const
+    {
+        return getBuffer(true, tensorName);
+    }
+
+    //!
+    //! \brief Returns the size of the host and device buffers that correspond to tensorName.
+    //!        Returns kINVALID_SIZE_VALUE if no such tensor can be found.
+    //!
+    size_t size(std::string const& tensorName) const
+    {
+        auto record = mNames.find(tensorName);
+        if (record == mNames.end())
+            return kINVALID_SIZE_VALUE;
+        return mManagedBuffers[record->second]->hostBuffer.nbBytes();
+    }
+
+    //!
+    //! \brief Templated print function that dumps buffers of arbitrary type to std::ostream.
+    //!        rowCount parameter controls how many elements are on each line.
+    //!        A rowCount of 1 means that there is only 1 element on each line.
+    //!
+    template <typename T>
+    void print(std::ostream& os, void* buf, size_t bufSize, size_t rowCount)
+    {
+        assert(rowCount != 0);
+        assert(bufSize % sizeof(T) == 0);
+        T* typedBuf = static_cast<T*>(buf);
+        size_t numItems = bufSize / sizeof(T);
+        for (int32_t i = 0; i < static_cast<int>(numItems); i++)
+        {
+            // Handle rowCount == 1 case
+            if (rowCount == 1 && i != static_cast<int>(numItems) - 1)
+                os << typedBuf[i] << std::endl;
+            else if (rowCount == 1)
+                os << typedBuf[i];
+            // Handle rowCount > 1 case
+            else if (i % rowCount == 0)
+                os << typedBuf[i];
+            else if (i % rowCount == rowCount - 1)
+                os << " " << typedBuf[i] << std::endl;
+            else
+                os << " " << typedBuf[i];
+        }
+    }
+
+    //!
+    //! \brief Copy the contents of input host buffers to input device buffers synchronously.
+    //!
+    void copyInputToDevice()
+    {
+        memcpyBuffers(true, false, false);
+    }
+
+    //!
+    //! \brief Copy the contents of output device buffers to output host buffers synchronously.
+    //!
+    void copyOutputToHost()
+    {
+        memcpyBuffers(false, true, false);
+    }
+
+    //!
+    //! \brief Copy the contents of input host buffers to input device buffers asynchronously.
+    //!
+    void copyInputToDeviceAsync(cudaStream_t const& stream = 0)
+    {
+        memcpyBuffers(true, false, true, stream);
+    }
+
+    //!
+    //! \brief Copy the contents of output device buffers to output host buffers asynchronously.
+    //!
+    void copyOutputToHostAsync(cudaStream_t const& stream = 0)
+    {
+        memcpyBuffers(false, true, true, stream);
+    }
+
+    ~BufferManager() = default;
+
+private:
+    void* getBuffer(bool const isHost, std::string const& tensorName) const
+    {
+        auto record = mNames.find(tensorName);
+        if (record == mNames.end())
+            return nullptr;
+        return (isHost ? mManagedBuffers[record->second]->hostBuffer.data()
+                       : mManagedBuffers[record->second]->deviceBuffer.data());
+    }
+
+    bool tenosrIsInput(const std::string& tensorName) const
+    {
+        return mEngine->getTensorIOMode(tensorName.c_str()) == nvinfer1::TensorIOMode::kINPUT;
+    }
+
+    void memcpyBuffers(bool const copyInput, bool const deviceToHost, bool const async, cudaStream_t const& stream = 0)
+    {
+        for (auto const& n : mNames)
+        {
+            void* dstPtr = deviceToHost ? mManagedBuffers[n.second]->hostBuffer.data()
+                                        : mManagedBuffers[n.second]->deviceBuffer.data();
+            void const* srcPtr = deviceToHost ? mManagedBuffers[n.second]->deviceBuffer.data()
+                                              : mManagedBuffers[n.second]->hostBuffer.data();
+            size_t const byteSize = mManagedBuffers[n.second]->hostBuffer.nbBytes();
+            const cudaMemcpyKind memcpyType = deviceToHost ? cudaMemcpyDeviceToHost : cudaMemcpyHostToDevice;
+            if ((copyInput && tenosrIsInput(n.first)) || (!copyInput && !tenosrIsInput(n.first)))
+            {
+                if (async)
+                    CHECK(cudaMemcpyAsync(dstPtr, srcPtr, byteSize, memcpyType, stream));
+                else
+                    CHECK(cudaMemcpy(dstPtr, srcPtr, byteSize, memcpyType));
+            }
+        }
+    }
+
+    std::shared_ptr<nvinfer1::ICudaEngine> mEngine;              //!< The pointer to the engine
+    int mBatchSize;                                              //!< The batch size for legacy networks, 0 otherwise.
+    std::vector<std::unique_ptr<ManagedBuffer>> mManagedBuffers; //!< The vector of pointers to managed buffers
+    std::vector<void*> mDeviceBindings;              //!< The vector of device buffers needed for engine execution
+    std::unordered_map<std::string, int32_t> mNames; //!< The map of tensor name and index pairs
+};
+
+} // namespace samplesCommon
+
+#endif // TENSORRT_BUFFERS_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/common.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/common.h
new file mode 100644
index 0000000000000000000000000000000000000000..c0b1d321af6a29dc4cccd7ed6daacbec9e9fbd06
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/common.h
@@ -0,0 +1,1110 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TENSORRT_COMMON_H
+#define TENSORRT_COMMON_H
+#include "NvInfer.h"
+#include "NvInferPlugin.h"
+#include "logger.h"
+#include "sampleEntrypoints.h"
+#include "utils/cacheUtils.h"
+#include <algorithm>
+#include <cassert>
+#include <chrono>
+#include <cmath>
+#include <cstring>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <fstream>
+#include <functional>
+#include <iomanip>
+#include <iostream>
+#include <iterator>
+#include <map>
+#include <memory>
+#include <new>
+#include <numeric>
+#include <ratio>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#ifdef _MSC_VER
+// For loadLibrary
+// Needed so that the max/min definitions in windows.h do not conflict with std::max/min.
+#define NOMINMAX
+#include <windows.h>
+#undef NOMINMAX
+#else
+#include <dlfcn.h>
+#endif
+
+#ifdef _MSC_VER
+#define FN_NAME __FUNCTION__
+#else
+#define FN_NAME __func__
+#endif
+
+#if defined(__aarch64__) || defined(__QNX__)
+#define ENABLE_DLA_API 1
+#endif
+
+using namespace nvinfer1;
+
+#define CHECK_RETURN_W_MSG(status, val, errMsg)                                                                        \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        if (!(status))                                                                                                 \
+        {                                                                                                              \
+            sample::gLogError << errMsg << " Error in " << __FILE__ << ", function " << FN_NAME << "(), line "         \
+                              << __LINE__ << std::endl;                                                                \
+            return val;                                                                                                \
+        }                                                                                                              \
+    } while (0)
+
+#undef ASSERT
+#define ASSERT(condition)                                                                                              \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        if (!(condition))                                                                                              \
+        {                                                                                                              \
+            sample::gLogError << "Assertion failure: " << #condition << std::endl;                                     \
+            exit(EXIT_FAILURE);                                                                                        \
+        }                                                                                                              \
+    } while (0)
+
+#define CHECK_RETURN(status, val) CHECK_RETURN_W_MSG(status, val, "")
+
+#undef CHECK_WITH_STREAM
+#define CHECK_WITH_STREAM(status, stream)                                                                              \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        if ((status) != cudaSuccess)                                                                                   \
+        {                                                                                                              \
+            stream << "Cuda failure at " << __FILE__ << ":" << __LINE__ << ": " << cudaGetErrorString(status)          \
+                   << std::endl;                                                                                       \
+            exit(EXIT_FAILURE);                                                                                        \
+        }                                                                                                              \
+    } while (0)
+
+#undef CHECK
+#define CHECK(status) CHECK_WITH_STREAM(status, std::cerr)
+
+constexpr long double operator"" _GiB(long double val)
+{
+    return val * (1 << 30);
+}
+constexpr long double operator"" _MiB(long double val)
+{
+    return val * (1 << 20);
+}
+constexpr long double operator"" _KiB(long double val)
+{
+    return val * (1 << 10);
+}
+
+struct SimpleProfiler : public nvinfer1::IProfiler
+{
+    struct Record
+    {
+        float time{0};
+        int count{0};
+    };
+
+    void reportLayerTime(const char* layerName, float ms) noexcept override
+    {
+        mProfile[layerName].count++;
+        mProfile[layerName].time += ms;
+        if (std::find(mLayerNames.begin(), mLayerNames.end(), layerName) == mLayerNames.end())
+        {
+            mLayerNames.push_back(layerName);
+        }
+    }
+
+    SimpleProfiler(const char* name, const std::vector<SimpleProfiler>& srcProfilers = std::vector<SimpleProfiler>())
+        : mName(name)
+    {
+        for (const auto& srcProfiler : srcProfilers)
+        {
+            for (const auto& rec : srcProfiler.mProfile)
+            {
+                auto it = mProfile.find(rec.first);
+                if (it == mProfile.end())
+                {
+                    mProfile.insert(rec);
+                }
+                else
+                {
+                    it->second.time += rec.second.time;
+                    it->second.count += rec.second.count;
+                }
+            }
+        }
+    }
+
+    friend std::ostream& operator<<(std::ostream& out, const SimpleProfiler& value)
+    {
+        out << "========== " << value.mName << " profile ==========" << std::endl;
+        float totalTime = 0;
+        std::string layerNameStr = "TensorRT layer name";
+        int maxLayerNameLength = std::max(static_cast<int>(layerNameStr.size()), 70);
+        for (const auto& elem : value.mProfile)
+        {
+            totalTime += elem.second.time;
+            maxLayerNameLength = std::max(maxLayerNameLength, static_cast<int>(elem.first.size()));
+        }
+
+        auto old_settings = out.flags();
+        auto old_precision = out.precision();
+        // Output header
+        {
+            out << std::setfill(' ') << std::setw(maxLayerNameLength) << layerNameStr << " ";
+            out << std::setw(12) << "Runtime, "
+                << "%"
+                << " ";
+            out << std::setw(12) << "Invocations"
+                << " ";
+            out << std::setw(12) << "Runtime, ms" << std::endl;
+        }
+        for (size_t i = 0; i < value.mLayerNames.size(); i++)
+        {
+            const std::string layerName = value.mLayerNames[i];
+            auto elem = value.mProfile.at(layerName);
+            out << std::setw(maxLayerNameLength) << layerName << " ";
+            out << std::setw(12) << std::fixed << std::setprecision(1) << (elem.time * 100.0F / totalTime) << "%"
+                << " ";
+            out << std::setw(12) << elem.count << " ";
+            out << std::setw(12) << std::fixed << std::setprecision(2) << elem.time << std::endl;
+        }
+        out.flags(old_settings);
+        out.precision(old_precision);
+        out << "========== " << value.mName << " total runtime = " << totalTime << " ms ==========" << std::endl;
+
+        return out;
+    }
+
+private:
+    std::string mName;
+    std::vector<std::string> mLayerNames;
+    std::map<std::string, Record> mProfile;
+};
+
+namespace samplesCommon
+{
+using nvinfer1::utils::loadCacheFile;
+using nvinfer1::utils::buildTimingCacheFromFile;
+using nvinfer1::utils::saveCacheFile;
+using nvinfer1::utils::updateTimingCacheFile;
+
+template <typename T>
+inline std::shared_ptr<T> infer_object(T* obj)
+{
+    if (!obj)
+    {
+        throw std::runtime_error("Failed to create object");
+    }
+    return std::shared_ptr<T>(obj);
+}
+
+// Swaps endianness of an integral type.
+template <typename T, typename std::enable_if_t<std::is_integral<T>::value, int> = 0>
+inline T swapEndianness(const T& value)
+{
+    uint8_t bytes[sizeof(T)];
+    for (int i = 0; i < static_cast<int>(sizeof(T)); ++i)
+    {
+        bytes[sizeof(T) - 1 - i] = *(reinterpret_cast<const uint8_t*>(&value) + i);
+    }
+    return *reinterpret_cast<T*>(bytes);
+}
+
+class HostMemory
+{
+public:
+    HostMemory() = delete;
+    virtual void* data() const noexcept
+    {
+        return mData;
+    }
+    virtual std::size_t size() const noexcept
+    {
+        return mSize;
+    }
+    virtual nvinfer1::DataType type() const noexcept
+    {
+        return mType;
+    }
+    virtual ~HostMemory() {}
+
+protected:
+    HostMemory(std::size_t size, nvinfer1::DataType type)
+        : mData{nullptr}
+        , mSize(size)
+        , mType(type)
+    {
+    }
+    void* mData;
+    std::size_t mSize;
+    nvinfer1::DataType mType;
+};
+
+template <typename ElemType, nvinfer1::DataType dataType>
+class TypedHostMemory : public HostMemory
+{
+public:
+    explicit TypedHostMemory(std::size_t size)
+        : HostMemory(size, dataType)
+    {
+        mData = new ElemType[size];
+    };
+    ~TypedHostMemory() noexcept override
+    {
+        delete[](ElemType*) mData;
+    }
+    ElemType* raw() noexcept
+    {
+        return static_cast<ElemType*>(data());
+    }
+};
+
+using FloatMemory = TypedHostMemory<float, nvinfer1::DataType::kFLOAT>;
+using HalfMemory = TypedHostMemory<uint16_t, nvinfer1::DataType::kHALF>;
+using ByteMemory = TypedHostMemory<uint8_t, nvinfer1::DataType::kINT8>;
+
+inline void* safeCudaMalloc(size_t memSize)
+{
+    void* deviceMem;
+    CHECK(cudaMalloc(&deviceMem, memSize));
+    if (deviceMem == nullptr)
+    {
+        std::cerr << "Out of memory" << std::endl;
+        exit(EXIT_FAILURE);
+    }
+    return deviceMem;
+}
+
+inline bool isDebug()
+{
+    return (std::getenv("TENSORRT_DEBUG") ? true : false);
+}
+
+struct InferDeleter
+{
+    template <typename T>
+    void operator()(T* obj) const
+    {
+        delete obj;
+    }
+};
+
+template <typename T>
+using SampleUniquePtr = std::unique_ptr<T>;
+
+static auto StreamDeleter = [](cudaStream_t* pStream) {
+    if (pStream)
+    {
+        static_cast<void>(cudaStreamDestroy(*pStream));
+        delete pStream;
+    }
+};
+
+inline std::unique_ptr<cudaStream_t, decltype(StreamDeleter)> makeCudaStream()
+{
+    std::unique_ptr<cudaStream_t, decltype(StreamDeleter)> pStream(new cudaStream_t, StreamDeleter);
+    if (cudaStreamCreateWithFlags(pStream.get(), cudaStreamNonBlocking) != cudaSuccess)
+    {
+        pStream.reset(nullptr);
+    }
+
+    return pStream;
+}
+
+//! Return vector of indices that puts magnitudes of sequence in descending order.
+template <class Iter>
+std::vector<size_t> argMagnitudeSort(Iter begin, Iter end)
+{
+    std::vector<size_t> indices(end - begin);
+    std::iota(indices.begin(), indices.end(), 0);
+    std::sort(indices.begin(), indices.end(),
+        [&begin](size_t i, size_t j) { return std::abs(begin[j]) < std::abs(begin[i]); });
+    return indices;
+}
+
+inline bool readReferenceFile(const std::string& fileName, std::vector<std::string>& refVector)
+{
+    std::ifstream infile(fileName);
+    if (!infile.is_open())
+    {
+        std::cout << "ERROR: readReferenceFile: Attempting to read from a file that is not open." << std::endl;
+        return false;
+    }
+    std::string line;
+    while (std::getline(infile, line))
+    {
+        if (line.empty())
+            continue;
+        refVector.push_back(line);
+    }
+    infile.close();
+    return true;
+}
+
+template <typename T>
+std::vector<std::string> classify(
+    const std::vector<std::string>& refVector, const std::vector<T>& output, const size_t topK)
+{
+    const auto inds = samplesCommon::argMagnitudeSort(output.cbegin(), output.cend());
+    std::vector<std::string> result;
+    result.reserve(topK);
+    for (size_t k = 0; k < topK; ++k)
+    {
+        result.push_back(refVector[inds[k]]);
+    }
+    return result;
+}
+
+// Returns indices of highest K magnitudes in v.
+template <typename T>
+std::vector<size_t> topKMagnitudes(const std::vector<T>& v, const size_t k)
+{
+    std::vector<size_t> indices = samplesCommon::argMagnitudeSort(v.cbegin(), v.cend());
+    indices.resize(k);
+    return indices;
+}
+
+template <typename T>
+bool readASCIIFile(const std::string& fileName, const size_t size, std::vector<T>& out)
+{
+    std::ifstream infile(fileName);
+    if (!infile.is_open())
+    {
+        std::cout << "ERROR readASCIIFile: Attempting to read from a file that is not open." << std::endl;
+        return false;
+    }
+    out.clear();
+    out.reserve(size);
+    out.assign(std::istream_iterator<T>(infile), std::istream_iterator<T>());
+    infile.close();
+    return true;
+}
+
+template <typename T>
+bool writeASCIIFile(const std::string& fileName, const std::vector<T>& in)
+{
+    std::ofstream outfile(fileName);
+    if (!outfile.is_open())
+    {
+        std::cout << "ERROR: writeASCIIFile: Attempting to write to a file that is not open." << std::endl;
+        return false;
+    }
+    for (auto fn : in)
+    {
+        outfile << fn << "\n";
+    }
+    outfile.close();
+    return true;
+}
+
+inline void print_version()
+{
+    std::cout << "  TensorRT version: " << NV_TENSORRT_MAJOR << "." << NV_TENSORRT_MINOR << "." << NV_TENSORRT_PATCH
+              << "." << NV_TENSORRT_BUILD << std::endl;
+}
+
+inline std::string getFileType(const std::string& filepath)
+{
+    return filepath.substr(filepath.find_last_of(".") + 1);
+}
+
+inline std::string toLower(const std::string& inp)
+{
+    std::string out = inp;
+    std::transform(out.begin(), out.end(), out.begin(), ::tolower);
+    return out;
+}
+
+inline float getMaxValue(const float* buffer, int64_t size)
+{
+    assert(buffer != nullptr);
+    assert(size > 0);
+    return *std::max_element(buffer, buffer + size);
+}
+
+// Ensures that every tensor used by a network has a dynamic range set.
+//
+// All tensors in a network must have a dynamic range specified if a calibrator is not used.
+// This function is just a utility to globally fill in missing scales and zero-points for the entire network.
+//
+// If a tensor does not have a dyanamic range set, it is assigned inRange or outRange as follows:
+//
+// * If the tensor is the input to a layer or output of a pooling node, its dynamic range is derived from inRange.
+// * Otherwise its dynamic range is derived from outRange.
+//
+// The default parameter values are intended to demonstrate, for final layers in the network,
+// cases where dynamic ranges are asymmetric.
+//
+// The default parameter values choosen arbitrarily. Range values should be choosen such that
+// we avoid underflow or overflow. Also range value should be non zero to avoid uniform zero scale tensor.
+inline void setAllDynamicRanges(nvinfer1::INetworkDefinition* network, float inRange = 2.0F, float outRange = 4.0F)
+{
+    // Ensure that all layer inputs have a scale.
+    for (int i = 0; i < network->getNbLayers(); i++)
+    {
+        auto layer = network->getLayer(i);
+        for (int j = 0; j < layer->getNbInputs(); j++)
+        {
+            nvinfer1::ITensor* input{layer->getInput(j)};
+            // Optional inputs are nullptr here and are from RNN layers.
+            if (input != nullptr && !input->dynamicRangeIsSet())
+            {
+                ASSERT(input->setDynamicRange(-inRange, inRange));
+            }
+        }
+    }
+
+    // Ensure that all layer outputs have a scale.
+    // Tensors that are also inputs to layers are ingored here
+    // since the previous loop nest assigned scales to them.
+    for (int i = 0; i < network->getNbLayers(); i++)
+    {
+        auto layer = network->getLayer(i);
+        for (int j = 0; j < layer->getNbOutputs(); j++)
+        {
+            nvinfer1::ITensor* output{layer->getOutput(j)};
+            // Optional outputs are nullptr here and are from RNN layers.
+            if (output != nullptr && !output->dynamicRangeIsSet())
+            {
+                // Pooling must have the same input and output scales.
+                if (layer->getType() == nvinfer1::LayerType::kPOOLING)
+                {
+                    ASSERT(output->setDynamicRange(-inRange, inRange));
+                }
+                else
+                {
+                    ASSERT(output->setDynamicRange(-outRange, outRange));
+                }
+            }
+        }
+    }
+}
+
+inline void setDummyInt8DynamicRanges(const nvinfer1::IBuilderConfig* c, nvinfer1::INetworkDefinition* n)
+{
+    // Set dummy per-tensor dynamic range if Int8 mode is requested.
+    if (c->getFlag(nvinfer1::BuilderFlag::kINT8))
+    {
+        sample::gLogWarning << "Int8 calibrator not provided. Generating dummy per-tensor dynamic range. Int8 accuracy "
+                               "is not guaranteed."
+                            << std::endl;
+        setAllDynamicRanges(n);
+    }
+}
+
+inline void enableDLA(
+    nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, int useDLACore, bool allowGPUFallback = true)
+{
+    if (useDLACore >= 0)
+    {
+        if (builder->getNbDLACores() == 0)
+        {
+            std::cerr << "Trying to use DLA core " << useDLACore << " on a platform that doesn't have any DLA cores"
+                      << std::endl;
+            assert("Error: use DLA core on a platfrom that doesn't have any DLA cores" && false);
+        }
+        if (allowGPUFallback)
+        {
+            config->setFlag(nvinfer1::BuilderFlag::kGPU_FALLBACK);
+        }
+        if (!config->getFlag(nvinfer1::BuilderFlag::kINT8))
+        {
+            // User has not requested INT8 Mode.
+            // By default run in FP16 mode. FP32 mode is not permitted.
+            config->setFlag(nvinfer1::BuilderFlag::kFP16);
+        }
+        config->setDefaultDeviceType(nvinfer1::DeviceType::kDLA);
+        config->setDLACore(useDLACore);
+    }
+}
+
+inline int32_t parseDLA(int32_t argc, char** argv)
+{
+    for (int32_t i = 1; i < argc; i++)
+    {
+        if (strncmp(argv[i], "--useDLACore=", 13) == 0)
+        {
+            return std::stoi(argv[i] + 13);
+        }
+    }
+    return -1;
+}
+
+inline size_t getNbBytes(nvinfer1::DataType t, int64_t vol) noexcept
+{
+    switch (t)
+    {
+    case nvinfer1::DataType::kINT64: return 8 * vol;
+    case nvinfer1::DataType::kINT32:
+    case nvinfer1::DataType::kFLOAT: return 4 * vol;
+    case nvinfer1::DataType::kBF16:
+    case nvinfer1::DataType::kHALF: return 2 * vol;
+    case nvinfer1::DataType::kBOOL:
+    case nvinfer1::DataType::kUINT8:
+    case nvinfer1::DataType::kINT8: return vol;
+    case nvinfer1::DataType::kFP8:
+#if CUDA_VERSION < 11060
+        ASSERT(false && "FP8 is not supported");
+#else
+        return vol;
+#endif
+    case nvinfer1::DataType::kE8M0:
+#if CUDA_VERSION < 12080
+        ASSERT(false && "E8M0 is not supported");
+#else
+        return vol;
+#endif // CUDA_VERSION < 12080
+    case nvinfer1::DataType::kINT4:
+    case nvinfer1::DataType::kFP4: return (vol + 1) / 2;
+    }
+    ASSERT(false && "Unknown element type");
+}
+
+// Return least integer no less than exact value of m/n.
+template <typename A, typename B>
+inline auto divUp(A m, B n) -> typename std::enable_if_t<std::is_integral<A>::value && std::is_integral<B>::value, A>
+{
+    ASSERT(n > 0);
+    return (m + n - 1) / n;
+}
+
+inline int64_t volume(nvinfer1::Dims const& d)
+{
+    return std::accumulate(d.d, d.d + d.nbDims, int64_t{1}, std::multiplies<int64_t>{});
+}
+
+inline int64_t volume(nvinfer1::Dims const& dims, int32_t start, int32_t stop)
+{
+    ASSERT(start >= 0);
+    ASSERT(start <= stop);
+    ASSERT(stop <= dims.nbDims);
+    ASSERT(std::all_of(dims.d + start, dims.d + stop, [](int32_t x) { return x >= 0; }));
+    return std::accumulate(dims.d + start, dims.d + stop, int64_t{1}, std::multiplies<int64_t>{});
+}
+
+//! Locate path to file, given its filename or filepath suffix and possible dirs it might lie in.
+//! Function will also walk back MAX_DEPTH dirs from CWD to check for such a file path.
+inline std::string locateFile(
+    const std::string& filepathSuffix, const std::vector<std::string>& directories, bool reportError = true)
+{
+    const int MAX_DEPTH{10};
+    bool found{false};
+    std::string filepath;
+
+    for (auto& dir : directories)
+    {
+        if (!dir.empty() && dir.back() != '/')
+        {
+#ifdef _MSC_VER
+            filepath = dir + "\\" + filepathSuffix;
+#else
+            filepath = dir + "/" + filepathSuffix;
+#endif
+        }
+        else
+        {
+            filepath = dir + filepathSuffix;
+        }
+
+        for (int i = 0; i < MAX_DEPTH && !found; i++)
+        {
+            const std::ifstream checkFile(filepath);
+            found = checkFile.is_open();
+            if (found)
+            {
+                break;
+            }
+
+            filepath = "../" + filepath; // Try again in parent dir
+        }
+
+        if (found)
+        {
+            break;
+        }
+
+        filepath.clear();
+    }
+
+    // Could not find the file
+    if (filepath.empty())
+    {
+        const std::string dirList = std::accumulate(directories.begin() + 1, directories.end(), directories.front(),
+            [](const std::string& a, const std::string& b) { return a + "\n\t" + b; });
+        std::cout << "Could not find " << filepathSuffix << " in data directories:\n\t" << dirList << std::endl;
+
+        if (reportError)
+        {
+            std::cout << "&&&& FAILED" << std::endl;
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    return filepath;
+}
+
+inline void readPGMFile(const std::string& fileName, uint8_t* buffer, int32_t inH, int32_t inW)
+{
+    std::ifstream infile(fileName, std::ifstream::binary);
+    ASSERT(infile.is_open() && "Attempting to read from a file that is not open.");
+    std::string magic, w, h, max;
+    infile >> magic >> w >> h >> max;
+    infile.seekg(1, infile.cur);
+    infile.read(reinterpret_cast<char*>(buffer), inH * inW);
+}
+template <int C, int H, int W>
+struct PPM
+{
+    std::string magic, fileName;
+    int h, w, max;
+    uint8_t buffer[C * H * W];
+};
+
+// New vPPM(variable sized PPM) class with variable dimensions.
+struct vPPM
+{
+    std::string magic, fileName;
+    int h, w, max;
+    std::vector<uint8_t> buffer;
+};
+
+struct BBox
+{
+    float x1, y1, x2, y2;
+};
+
+template <int C, int H, int W>
+void readPPMFile(const std::string& filename, samplesCommon::PPM<C, H, W>& ppm)
+{
+    ppm.fileName = filename;
+    std::ifstream infile(filename, std::ifstream::binary);
+    assert(infile.is_open() && "Attempting to read from a file that is not open.");
+    infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max;
+    infile.seekg(1, infile.cur);
+    infile.read(reinterpret_cast<char*>(ppm.buffer), ppm.w * ppm.h * 3);
+}
+
+inline void readPPMFile(const std::string& filename, vPPM& ppm, std::vector<std::string>& input_dir)
+{
+    ppm.fileName = filename;
+    std::ifstream infile(locateFile(filename, input_dir), std::ifstream::binary);
+    infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max;
+    infile.seekg(1, infile.cur);
+
+    for (int i = 0; i < ppm.w * ppm.h * 3; ++i)
+    {
+        ppm.buffer.push_back(0);
+    }
+
+    infile.read(reinterpret_cast<char*>(&ppm.buffer[0]), ppm.w * ppm.h * 3);
+}
+
+template <int C, int H, int W>
+void writePPMFileWithBBox(const std::string& filename, PPM<C, H, W>& ppm, const BBox& bbox)
+{
+    std::ofstream outfile("./" + filename, std::ofstream::binary);
+    assert(!outfile.fail());
+    outfile << "P6"
+            << "\n"
+            << ppm.w << " " << ppm.h << "\n"
+            << ppm.max << "\n";
+
+    auto round = [](float x) -> int { return int(std::floor(x + 0.5F)); };
+    const int x1 = std::min(std::max(0, round(int(bbox.x1))), W - 1);
+    const int x2 = std::min(std::max(0, round(int(bbox.x2))), W - 1);
+    const int y1 = std::min(std::max(0, round(int(bbox.y1))), H - 1);
+    const int y2 = std::min(std::max(0, round(int(bbox.y2))), H - 1);
+
+    for (int x = x1; x <= x2; ++x)
+    {
+        // bbox top border
+        ppm.buffer[(y1 * ppm.w + x) * 3] = 255;
+        ppm.buffer[(y1 * ppm.w + x) * 3 + 1] = 0;
+        ppm.buffer[(y1 * ppm.w + x) * 3 + 2] = 0;
+        // bbox bottom border
+        ppm.buffer[(y2 * ppm.w + x) * 3] = 255;
+        ppm.buffer[(y2 * ppm.w + x) * 3 + 1] = 0;
+        ppm.buffer[(y2 * ppm.w + x) * 3 + 2] = 0;
+    }
+
+    for (int y = y1; y <= y2; ++y)
+    {
+        // bbox left border
+        ppm.buffer[(y * ppm.w + x1) * 3] = 255;
+        ppm.buffer[(y * ppm.w + x1) * 3 + 1] = 0;
+        ppm.buffer[(y * ppm.w + x1) * 3 + 2] = 0;
+        // bbox right border
+        ppm.buffer[(y * ppm.w + x2) * 3] = 255;
+        ppm.buffer[(y * ppm.w + x2) * 3 + 1] = 0;
+        ppm.buffer[(y * ppm.w + x2) * 3 + 2] = 0;
+    }
+
+    outfile.write(reinterpret_cast<char*>(ppm.buffer), ppm.w * ppm.h * 3);
+}
+
+inline void writePPMFileWithBBox(const std::string& filename, vPPM ppm, std::vector<BBox>& dets)
+{
+    std::ofstream outfile("./" + filename, std::ofstream::binary);
+    assert(!outfile.fail());
+    outfile << "P6"
+            << "\n"
+            << ppm.w << " " << ppm.h << "\n"
+            << ppm.max << "\n";
+    auto round = [](float x) -> int { return int(std::floor(x + 0.5F)); };
+
+    for (auto bbox : dets)
+    {
+        for (int x = int(bbox.x1); x < int(bbox.x2); ++x)
+        {
+            // bbox top border
+            ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3] = 255;
+            ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 1] = 0;
+            ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 2] = 0;
+            // bbox bottom border
+            ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3] = 255;
+            ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 1] = 0;
+            ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 2] = 0;
+        }
+
+        for (int y = int(bbox.y1); y < int(bbox.y2); ++y)
+        {
+            // bbox left border
+            ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3] = 255;
+            ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 1] = 0;
+            ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 2] = 0;
+            // bbox right border
+            ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3] = 255;
+            ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 1] = 0;
+            ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 2] = 0;
+        }
+    }
+
+    outfile.write(reinterpret_cast<char*>(&ppm.buffer[0]), ppm.w * ppm.h * 3);
+}
+
+class TimerBase
+{
+public:
+    virtual void start() {}
+    virtual void stop() {}
+    float microseconds() const noexcept
+    {
+        return mMs * 1000.F;
+    }
+    float milliseconds() const noexcept
+    {
+        return mMs;
+    }
+    float seconds() const noexcept
+    {
+        return mMs / 1000.F;
+    }
+    void reset() noexcept
+    {
+        mMs = 0.F;
+    }
+
+protected:
+    float mMs{0.0F};
+};
+
+class GpuTimer : public TimerBase
+{
+public:
+    explicit GpuTimer(cudaStream_t stream)
+        : mStream(stream)
+    {
+        CHECK(cudaEventCreate(&mStart));
+        CHECK(cudaEventCreate(&mStop));
+    }
+    ~GpuTimer()
+    {
+        CHECK(cudaEventDestroy(mStart));
+        CHECK(cudaEventDestroy(mStop));
+    }
+    void start() override
+    {
+        CHECK(cudaEventRecord(mStart, mStream));
+    }
+    void stop() override
+    {
+        CHECK(cudaEventRecord(mStop, mStream));
+        float ms{0.0F};
+        CHECK(cudaEventSynchronize(mStop));
+        CHECK(cudaEventElapsedTime(&ms, mStart, mStop));
+        mMs += ms;
+    }
+
+private:
+    cudaEvent_t mStart, mStop;
+    cudaStream_t mStream;
+}; // class GpuTimer
+
+template <typename Clock>
+class CpuTimer : public TimerBase
+{
+public:
+    using clock_type = Clock;
+
+    void start() override
+    {
+        mStart = Clock::now();
+    }
+    void stop() override
+    {
+        mStop = Clock::now();
+        mMs += std::chrono::duration<float, std::milli>{mStop - mStart}.count();
+    }
+
+private:
+    std::chrono::time_point<Clock> mStart, mStop;
+}; // class CpuTimer
+
+using PreciseCpuTimer = CpuTimer<std::chrono::high_resolution_clock>;
+
+inline std::vector<std::string> splitString(std::string str, char delimiter = ',')
+{
+    std::vector<std::string> splitVect;
+    std::stringstream ss(str);
+    std::string substr;
+
+    while (ss.good())
+    {
+        getline(ss, substr, delimiter);
+        splitVect.emplace_back(std::move(substr));
+    }
+    return splitVect;
+}
+
+inline int getC(nvinfer1::Dims const& d)
+{
+    return d.nbDims >= 3 ? d.d[d.nbDims - 3] : 1;
+}
+
+inline int getH(const nvinfer1::Dims& d)
+{
+    return d.nbDims >= 2 ? d.d[d.nbDims - 2] : 1;
+}
+
+inline int getW(const nvinfer1::Dims& d)
+{
+    return d.nbDims >= 1 ? d.d[d.nbDims - 1] : 1;
+}
+
+//! Platform-agnostic wrapper around dynamic libraries.
+class DynamicLibrary
+{
+public:
+    explicit DynamicLibrary(std::string const& name)
+        : mLibName{name}
+    {
+#if defined(_WIN32)
+        mHandle = LoadLibraryA(name.c_str());
+#else // defined(_WIN32)
+        int32_t flags{RTLD_LAZY};
+#if ENABLE_ASAN
+        // https://github.com/google/sanitizers/issues/89
+        // asan doesn't handle module unloading correctly and there are no plans on doing
+        // so. In order to get proper stack traces, don't delete the shared library on
+        // close so that asan can resolve the symbols correctly.
+        flags |= RTLD_NODELETE;
+#endif // ENABLE_ASAN
+
+        mHandle = dlopen(name.c_str(), flags);
+#endif // defined(_WIN32)
+
+        if (mHandle == nullptr)
+        {
+            std::string errorStr{};
+#if !defined(_WIN32)
+            errorStr = std::string{" due to "} + std::string{dlerror()};
+#endif
+            throw std::runtime_error("Unable to open library: " + name + errorStr);
+        }
+    }
+
+    DynamicLibrary(DynamicLibrary const&) = delete;
+    DynamicLibrary(DynamicLibrary const&&) = delete;
+
+    //!
+    //! Retrieve a function symbol from the loaded library.
+    //!
+    //! \return the loaded symbol on success
+    //! \throw std::invalid_argument if loading the symbol failed.
+    //!
+    template <typename Signature>
+    std::function<Signature> symbolAddress(char const* name)
+    {
+        if (mHandle == nullptr)
+        {
+            throw std::runtime_error("Handle to library is nullptr.");
+        }
+        void* ret;
+#if defined(_MSC_VER)
+        ret = static_cast<void*>(GetProcAddress(static_cast<HMODULE>(mHandle), name));
+#else
+        ret = dlsym(mHandle, name);
+#endif
+        if (ret == nullptr)
+        {
+            std::string const kERROR_MSG(mLibName + ": error loading symbol: " + std::string(name));
+            throw std::invalid_argument(kERROR_MSG);
+        }
+        return reinterpret_cast<Signature*>(ret);
+    }
+
+    ~DynamicLibrary()
+    {
+        try
+        {
+#if defined(_WIN32)
+            ASSERT(static_cast<bool>(FreeLibrary(static_cast<HMODULE>(mHandle))));
+#else
+            ASSERT(dlclose(mHandle) == 0);
+#endif
+        }
+        catch (...)
+        {
+            sample::gLogError << "Unable to close library: " << mLibName << std::endl;
+        }
+    }
+
+private:
+    std::string mLibName{}; //!< Name of the DynamicLibrary
+    void* mHandle{};        //!< Handle to the DynamicLibrary
+};
+
+inline std::unique_ptr<DynamicLibrary> loadLibrary(std::string const& path)
+{
+    // make_unique not available until C++14 - we still need to support C++11 builds.
+    return std::unique_ptr<DynamicLibrary>(new DynamicLibrary{path});
+}
+
+//! Represents the compute capability of a device.
+//! This pertains to virtual architectures represented by the intermediate PTX format.
+//! This is distinct from the SM version.
+//! See https://forums.developer.nvidia.com/t/how-should-i-use-correctly-the-sm-xx-and-compute-xx/219160
+struct ComputeCapability
+{
+    int32_t major{};
+    int32_t minor{};
+
+    //! \return the compute capability of the CUDA device with the given \p deviceIndex.
+    [[nodiscard]] static ComputeCapability forDevice(int32_t deviceIndex)
+    {
+        int32_t major{0};
+        int32_t minor{0};
+        CHECK(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, deviceIndex));
+        CHECK(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, deviceIndex));
+        // Redirect 12.1 to 12.0 to since dependencies do not support 12.1 yet and 12.1 can reuse 12.0 cubins to save
+        // lib size/compile time..
+        if (major == 12 && minor == 1)
+        {
+            minor = 0;
+        }
+        return {major, minor};
+    }
+};
+
+inline int32_t getSMVersion()
+{
+    int32_t deviceIndex = 0;
+    CHECK(cudaGetDevice(&deviceIndex));
+
+    auto const cc = ComputeCapability::forDevice(deviceIndex);
+    return ((cc.major << 8) | cc.minor);
+}
+
+inline bool isSMSafe()
+{
+    const int32_t smVersion = getSMVersion();
+    return smVersion == 0x0705 || smVersion == 0x0800 || smVersion == 0x0806 || smVersion == 0x0807;
+}
+
+inline int32_t getMaxPersistentCacheSize()
+{
+    int32_t deviceIndex{};
+    CHECK(cudaGetDevice(&deviceIndex));
+
+    int32_t maxPersistentL2CacheSize{};
+#if CUDART_VERSION >= 11030
+    CHECK(cudaDeviceGetAttribute(&maxPersistentL2CacheSize, cudaDevAttrMaxPersistingL2CacheSize, deviceIndex));
+#endif
+
+    return maxPersistentL2CacheSize;
+}
+
+inline bool isDataTypeSupported(nvinfer1::DataType dataType)
+{
+    auto builder = SampleUniquePtr<nvinfer1::IBuilder>(createBuilder());
+    if (!builder)
+    {
+        return false;
+    }
+
+    return true;
+}
+} // namespace samplesCommon
+
+inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims)
+{
+    os << "(";
+    for (int i = 0; i < dims.nbDims; ++i)
+    {
+        os << (i ? ", " : "") << dims.d[i];
+    }
+    return os << ")";
+}
+
+[[nodiscard]] inline std::string genFilenameSafeString(std::string_view s)
+{
+    constexpr std::string_view kALLOWED{"._-,"};
+    constexpr size_t kMAX_FILENAME_LENGTH = 150; // Leave some margin due to Windows path length limitation
+    constexpr size_t kELLIPSIS_LENGTH = 3;       // Length of "..."
+
+    auto processChar = [&kALLOWED](char c) {
+        return std::isalnum(static_cast<unsigned char>(c)) || kALLOWED.find(c) != std::string_view::npos ? c : '_';
+    };
+
+    std::string res;
+    if (s.length() <= kMAX_FILENAME_LENGTH)
+    {
+        res.reserve(s.size());
+        std::transform(s.begin(), s.end(), std::back_inserter(res), processChar);
+        return res;
+    }
+
+    res.reserve(kMAX_FILENAME_LENGTH);
+    size_t const halfLength = (kMAX_FILENAME_LENGTH - kELLIPSIS_LENGTH) / 2;
+
+    std::transform(s.begin(), s.begin() + halfLength, std::back_inserter(res), processChar);
+    res += "...";
+    std::transform(s.end() - halfLength, s.end(), std::back_inserter(res), processChar);
+
+    return res;
+}
+
+#endif // TENSORRT_COMMON_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/debugTensorWriter.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/debugTensorWriter.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f6207c07cb0205f4f31efa43f37f9088eb8926e5
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/debugTensorWriter.cpp
@@ -0,0 +1,923 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "debugTensorWriter.h"
+#include "common.h"
+#include <algorithm>
+#include <cuda_bf16.h>
+#include <cuda_fp16.h>
+#if CUDA_VERSION >= 11060
+#include <cuda_fp8.h>
+#endif
+#if CUDA_VERSION >= 12070
+#include <cuda_fp4.h>
+#endif
+#include <cuda_runtime_api.h>
+#include <numeric>
+namespace sample
+{
+
+namespace
+{
+
+class Int4
+{
+public:
+    Int4() = default;
+    explicit Int4(int8_t val)
+        : mValue(val)
+    {
+    }
+
+    operator int64_t() const
+    {
+        return static_cast<int64_t>(mValue);
+    }
+
+private:
+    int8_t mValue{};
+};
+
+class Int4x2
+{
+public:
+    using StorageType = uint8_t;
+
+    Int4x2() = default;
+    explicit Int4x2(StorageType val)
+        : mRep(val)
+    {
+    }
+
+    // Get a single element
+    inline Int4 element(int32_t index) const
+    {
+        ASSERT(index == 0 || index == 1);
+        return Int4(index == 0 ? static_cast<int8_t>(mRep << 4) >> 4 : static_cast<int8_t>(mRep) >> 4);
+    }
+
+private:
+    StorageType mRep{};
+};
+
+#if CUDA_VERSION >= 12070
+using Fp4 = __nv_fp4_e2m1;
+
+class Fp4x2
+{
+public:
+    using StorageType = uint8_t;
+
+    Fp4x2() = default;
+    explicit Fp4x2(StorageType val)
+        : mRep(val)
+    {
+    }
+
+    // Get a single element
+    inline Fp4 element(int32_t index) const
+    {
+        ASSERT(index == 0 || index == 1);
+        int8_t bits = index == 0 ? static_cast<int8_t>(mRep << 4) >> 4 : static_cast<int8_t>(mRep) >> 4;
+        Fp4 fp4_el = *reinterpret_cast<Fp4*>(&bits);
+        return fp4_el;
+    }
+
+private:
+    StorageType mRep{};
+};
+#endif
+
+// Iterator that can handle packed format data (int4 and fp4)
+template <typename T>
+class DataIterator
+{
+public:
+#if CUDA_VERSION >= 12070
+    using value_type
+        = std::conditional_t<std::is_same_v<T, Int4x2>, Int4, std::conditional_t<std::is_same_v<T, Fp4x2>, Fp4, T>>;
+#else
+    using value_type = std::conditional_t<std::is_same_v<T, Int4x2>, Int4, T>;
+#endif
+
+    DataIterator(void const* data, int64_t volume, int64_t index = 0)
+        : mData(static_cast<uint8_t const*>(data))
+        , mVolume(volume)
+        , mIndex(index)
+    {
+    }
+
+    value_type operator*() const
+    {
+        if constexpr (std::is_same_v<T, Int4x2>)
+        {
+            // For Int4x2, each byte contains two 4-bit integers
+            Int4x2 packed(mData[mIndex / 2]);
+            return packed.element(mIndex % 2);
+        }
+#if CUDA_VERSION >= 12070
+        else if constexpr (std::is_same_v<T, Fp4x2>)
+        {
+            // For Fp4x2, each byte contains two 4-bit floating point numbers
+            Fp4x2 packed(mData[mIndex / 2]);
+            return packed.element(mIndex % 2);
+        }
+#endif
+        else
+        {
+            return reinterpret_cast<T const*>(mData)[mIndex];
+        }
+    }
+
+    DataIterator& operator++()
+    {
+        ++mIndex;
+        return *this;
+    }
+
+    DataIterator operator++(int)
+    {
+        DataIterator tmp = *this;
+        ++mIndex;
+        return tmp;
+    }
+
+    bool operator==(DataIterator const& other) const
+    {
+        return mIndex == other.mIndex;
+    }
+
+    bool operator!=(DataIterator const& other) const
+    {
+        return mIndex != other.mIndex;
+    }
+
+    DataIterator operator+(int64_t n) const
+    {
+        DataIterator tmp = *this;
+        tmp.mIndex += n;
+        return tmp;
+    }
+
+private:
+    uint8_t const* mData;
+    int64_t mVolume;
+    int64_t mIndex;
+};
+
+template <typename T>
+class DataRange
+{
+public:
+    using iterator = DataIterator<T>;
+    using value_type = typename iterator::value_type;
+
+    DataRange(void const* data, int64_t volume)
+        : mData(data)
+        , mVolume(volume)
+    {
+    }
+
+    iterator begin() const
+    {
+        return iterator(mData, mVolume, 0);
+    }
+    iterator end() const
+    {
+        return iterator(mData, mVolume, mVolume);
+    }
+
+private:
+    void const* mData;
+    int64_t mVolume;
+};
+
+template <typename T>
+static constexpr bool isFloatingPoint
+    = std::is_floating_point_v<T> || std::is_same_v<T, half> || std::is_same_v<T, nv_bfloat16>
+#if CUDA_VERSION >= 11060
+    || std::is_same_v<T, __nv_fp8_e4m3>
+#endif
+#if CUDA_VERSION >= 12070
+    || std::is_same_v<T, Fp4> || std::is_same_v<T, Fp4x2>
+#endif
+    ;
+
+constexpr int32_t kFLOATING_POINT_PRECISION = 6;
+constexpr int32_t kFLOATING_POINT_WIDTH = 13;
+
+std::string_view getDataTypeString(nvinfer1::DataType type)
+{
+    switch (type)
+    {
+    case nvinfer1::DataType::kBOOL: return "BOOL";
+    case nvinfer1::DataType::kINT4: return "INT4";
+    case nvinfer1::DataType::kINT8: return "INT8";
+    case nvinfer1::DataType::kINT32: return "INT32";
+    case nvinfer1::DataType::kINT64: return "INT64";
+    case nvinfer1::DataType::kUINT8: return "UINT8";
+    case nvinfer1::DataType::kFP4: return "FP4";
+    case nvinfer1::DataType::kFP8: return "FP8";
+    case nvinfer1::DataType::kE8M0: return "E8M0";
+    case nvinfer1::DataType::kHALF: return "HALF";
+    case nvinfer1::DataType::kBF16: return "BF16";
+    case nvinfer1::DataType::kFLOAT: return "FLOAT";
+    }
+    return "UNKNOWN";
+}
+
+template <typename T>
+void printTensorElements(T const* data, int64_t volume, std::ofstream& f)
+{
+    f << "        \"elements\": \"";
+    constexpr int32_t kPRINT_ELEMENTS_COUNT = 10;
+    int64_t firstHalf = std::min(static_cast<int64_t>(kPRINT_ELEMENTS_COUNT / 2), volume);
+    int64_t secondHalf = (volume > kPRINT_ELEMENTS_COUNT)
+        ? kPRINT_ELEMENTS_COUNT / 2
+        : std::max(static_cast<int64_t>(0), volume - kPRINT_ELEMENTS_COUNT / 2);
+
+    auto printElement = [&f](auto value) {
+        if constexpr (isFloatingPoint<T>)
+        {
+            f << static_cast<float>(value);
+        }
+        else
+        {
+            f << static_cast<int64_t>(value);
+        }
+    };
+
+    DataRange<T> range(data, volume);
+    auto it = range.begin();
+
+    // Print first half elements
+    std::string delimiter = "";
+    for (int64_t i = 0; i < firstHalf; ++i)
+    {
+        f << delimiter;
+        printElement(*it++);
+        delimiter = ", ";
+    }
+
+    // Add ellipsis if needed
+    f << (volume > kPRINT_ELEMENTS_COUNT ? ", ..." : "");
+
+    // Print last elements
+    it = range.begin() + (volume - secondHalf);
+    for (int64_t i = volume - secondHalf; i < volume; ++i)
+    {
+        f << ", ";
+        printElement(*it++);
+    }
+
+    f << "\"" << std::endl;
+}
+
+template <typename T>
+void processTensorSummary(void const* addr_host, int64_t volume, std::ofstream& f)
+{
+    DataRange<T> range(addr_host, volume);
+
+    if constexpr (isFloatingPoint<T>)
+    {
+        float minVal = std::numeric_limits<float>::max();
+        float maxVal = std::numeric_limits<float>::lowest();
+        double sum = 0.0;
+
+        for (auto value : range)
+        {
+            float val = static_cast<float>(value);
+            minVal = std::min(minVal, val);
+            maxVal = std::max(maxVal, val);
+            sum += val;
+        }
+        float avgVal = sum / volume;
+
+        // nan and inf turn into string in json
+        auto valueToStr = [](float val) -> std::string {
+            std::stringstream ss;
+            if (!std::isfinite(val))
+            {
+                ss << "\"" << val << "\"";
+            }
+            else
+            {
+                ss << val;
+            }
+            return ss.str();
+        };
+        f << "        \"min\": " << valueToStr(minVal) << "," << std::endl;
+        f << "        \"max\": " << valueToStr(maxVal) << "," << std::endl;
+        f << "        \"avg\": " << valueToStr(avgVal) << "," << std::endl;
+    }
+    else
+    {
+        // For integer types, use int64_t for min/max calculation
+        int64_t minVal = std::numeric_limits<int64_t>::max();
+        int64_t maxVal = std::numeric_limits<int64_t>::lowest();
+        int64_t sum = 0;
+
+        for (auto value : range)
+        {
+            int64_t val = static_cast<int64_t>(value);
+            minVal = std::min(minVal, val);
+            maxVal = std::max(maxVal, val);
+            sum += val;
+        }
+        double avgVal = static_cast<double>(sum) / volume;
+
+        f << "        \"min\": " << minVal << "," << std::endl;
+        f << "        \"max\": " << maxVal << "," << std::endl;
+        f << "        \"avg\": " << avgVal << "," << std::endl;
+    }
+
+    printTensorElements<T>(static_cast<T const*>(addr_host), volume, f);
+}
+
+std::string getCurrentTimeString()
+{
+    auto now = std::chrono::system_clock::now();
+    auto nowC = std::chrono::system_clock::to_time_t(now);
+    std::stringstream ss;
+    ss << std::put_time(std::localtime(&nowC), "%Y-%m-%dT%H:%M:%S%z");
+    return ss.str();
+}
+
+template <typename T>
+void writeTensorStringRecursive(T const* data, nvinfer1::Dims const& shape, int32_t currentDim, int64_t offset,
+    int64_t stride, std::ofstream& f, bool isFirstElement = true, int32_t indent = 0, int32_t maxWidth = 0)
+{
+    bool isLastDim = currentDim == shape.nbDims - 1;
+    if (isLastDim)
+    {
+        // Last dimension - print elements in a row
+        f << std::string(indent, ' ') << "[";
+        DataRange<T> range(data + offset, shape.d[currentDim]);
+        auto it = range.begin();
+        for (int32_t i = 0; i < shape.d[currentDim]; ++i)
+        {
+            if (i > 0)
+            {
+                f << " ";
+            }
+            if constexpr (isFloatingPoint<T>)
+            {
+                f << std::scientific << std::setprecision(kFLOATING_POINT_PRECISION) << std::setw(kFLOATING_POINT_WIDTH)
+                  << std::right << static_cast<float>(*it++);
+            }
+            else
+            {
+                f << std::setw(maxWidth) << static_cast<int64_t>(*it++);
+            }
+        }
+        f << "]" << std::endl;
+    }
+    else
+    {
+        // For higher dimensions, print each slice
+        f << std::string(indent, ' ') << "[" << std::endl;
+        for (int32_t i = 0; i < shape.d[currentDim]; ++i)
+        {
+            writeTensorStringRecursive(data, shape, currentDim + 1, offset + i * stride,
+                stride / shape.d[currentDim + 1], f, i == 0, indent + 1, maxWidth);
+        }
+        f << std::string(indent, ' ') << "]" << std::endl;
+    }
+}
+
+template <typename T>
+int32_t getMaxWidthInDimension(
+    T const* data, nvinfer1::Dims const& shape, int32_t currentDim, int64_t offset, int64_t stride)
+{
+    int32_t maxWidth = 0;
+    if (currentDim == shape.nbDims - 1)
+    {
+        // Last dimension - check each element
+        DataRange<T> range(data + offset, shape.d[currentDim]);
+        for (auto value : range)
+        {
+            std::stringstream ss;
+            ss << static_cast<int64_t>(value);
+            maxWidth = std::max(maxWidth, static_cast<int32_t>(ss.str().length()));
+        }
+    }
+    else
+    {
+        // For higher dimensions, check each slice
+        for (int64_t i = 0; i < shape.d[currentDim]; ++i)
+        {
+            maxWidth = std::max(maxWidth,
+                getMaxWidthInDimension(
+                    data, shape, currentDim + 1, offset + i * stride, stride / shape.d[currentDim + 1]));
+        }
+    }
+    return maxWidth;
+}
+
+template <typename T>
+void writeTensorString(
+    T const* data, nvinfer1::Dims const& shape, std::string_view tensorName, std::string const& fileName)
+{
+    sample::gLogVerbose << "Writing debug tensor '" << tensorName << "' to file '" << fileName << "'" << std::endl;
+
+    std::ofstream f(fileName, std::ios::out);
+    if (!f)
+    {
+        sample::gLogError << "Cannot open file for write: " << fileName << std::endl;
+        return;
+    }
+
+    if (shape.nbDims == 0)
+    {
+        f << "[]";
+        return;
+    }
+
+    int64_t totalElements = 1;
+    for (int32_t i = 0; i < shape.nbDims; ++i)
+    {
+        totalElements *= shape.d[i];
+    }
+
+    if (totalElements == 0)
+    {
+        f << "[]";
+        return;
+    }
+
+    // Calculate stride for the first dimension
+    int64_t stride = totalElements / shape.d[0];
+
+    // Calculate max width for proper alignment only for non-floating point types
+    int32_t maxWidth = 0;
+    if constexpr (!isFloatingPoint<T>)
+    {
+        maxWidth = getMaxWidthInDimension(data, shape, 0, 0, stride);
+    }
+
+    writeTensorStringRecursive(data, shape, 0, 0, stride, f, true, 0, maxWidth);
+    f << std::endl;
+}
+
+std::string writeStringFile(void const* addr_host, nvinfer1::DataType type, nvinfer1::Dims const& shape,
+    std::string const& tensorName, std::string const& prefix)
+{
+    std::string fileName = genFilenameSafeString(prefix + tensorName + ".str");
+
+    switch (type)
+    {
+    case nvinfer1::DataType::kBOOL:
+        writeTensorString(static_cast<bool const*>(addr_host), shape, tensorName, fileName);
+        break;
+    case nvinfer1::DataType::kINT4:
+        writeTensorString(reinterpret_cast<Int4x2 const*>(addr_host), shape, tensorName, fileName);
+        break;
+    case nvinfer1::DataType::kINT8:
+        writeTensorString(static_cast<int8_t const*>(addr_host), shape, tensorName, fileName);
+        break;
+    case nvinfer1::DataType::kINT32:
+        writeTensorString(static_cast<int32_t const*>(addr_host), shape, tensorName, fileName);
+        break;
+    case nvinfer1::DataType::kINT64:
+        writeTensorString(static_cast<int64_t const*>(addr_host), shape, tensorName, fileName);
+        break;
+    case nvinfer1::DataType::kUINT8:
+        writeTensorString(static_cast<uint8_t const*>(addr_host), shape, tensorName, fileName);
+        break;
+    case nvinfer1::DataType::kFP4:
+#if CUDA_VERSION >= 12070
+        writeTensorString(static_cast<Fp4x2 const*>(addr_host), shape, tensorName, fileName);
+        break;
+#else
+        sample::gLogWarning << "Unsupported data type kFP4 for tensor string dump in this CUDA version." << std::endl;
+        return "";
+#endif
+    case nvinfer1::DataType::kFP8:
+#if CUDA_VERSION >= 11060
+        writeTensorString(static_cast<__nv_fp8_e4m3 const*>(addr_host), shape, tensorName, fileName);
+        break;
+#else
+        sample::gLogWarning << "Unsupported data type kFP8 for tensor string dump in this CUDA version." << std::endl;
+        return "";
+#endif
+    case nvinfer1::DataType::kE8M0:
+        sample::gLogWarning << "Unsupported data type kE8M0 for tensor string dump." << std::endl;
+        return "";
+    case nvinfer1::DataType::kHALF:
+        writeTensorString(static_cast<half const*>(addr_host), shape, tensorName, fileName);
+        break;
+    case nvinfer1::DataType::kBF16:
+        writeTensorString(static_cast<nv_bfloat16 const*>(addr_host), shape, tensorName, fileName);
+        break;
+    case nvinfer1::DataType::kFLOAT:
+        writeTensorString(static_cast<float const*>(addr_host), shape, tensorName, fileName);
+        break;
+    }
+    return fileName;
+}
+
+std::string escapeJsonString(std::string_view str)
+{
+    std::string result;
+    result.reserve(str.length());
+    for (char c : str)
+    {
+        switch (c)
+        {
+        case '\\': result += "\\\\"; break;
+        case '\"': result += "\\\""; break;
+        case '\b': result += "\\b"; break;
+        case '\f': result += "\\f"; break;
+        case '\n': result += "\\n"; break;
+        case '\r': result += "\\r"; break;
+        case '\t': result += "\\t"; break;
+        default: result += c;
+        }
+    }
+    return result;
+}
+
+template <typename U, typename T>
+std::vector<U> convertBufferTo(T const* data, int64_t volume)
+{
+    std::vector<U> buffer(volume);
+    DataRange<T> range(data, volume);
+    int64_t i = 0;
+    for (auto value : range)
+    {
+        buffer[i++] = static_cast<U>(value);
+    }
+    return buffer;
+}
+
+} // namespace
+
+DebugTensorWriter::DebugTensorWriter(std::unordered_map<std::string, std::string> const& debugTensorFileNames,
+    std::vector<std::string> const& debugTensorFormats, std::string const& engineName, std::string const& cmdline)
+    : mDebugTensorFileNames(debugTensorFileNames)
+    , mDebugTensorFormats(debugTensorFormats)
+    , mEngineName(engineName)
+    , mCmdline(cmdline)
+{
+    // Create a summary file if "summary" format is requested
+    if (std::find(mDebugTensorFormats.begin(), mDebugTensorFormats.end(), "summary") != mDebugTensorFormats.end())
+    {
+        mSummaryFileName = "tensor_summary.json";
+        mSummaryFile.open(mSummaryFileName, std::ios::out);
+        if (mSummaryFile.is_open())
+        {
+            sample::gLogInfo << "Writing tensor summary to file: " << mSummaryFileName << std::endl;
+            writeSummaryHeader();
+        }
+        else
+        {
+            sample::gLogError << "Failed to open tensor summary file: " << mSummaryFileName << std::endl;
+        }
+    }
+}
+
+DebugTensorWriter::~DebugTensorWriter()
+{
+    // Close the summary file
+    if (mSummaryFile.is_open())
+    {
+        writeSummaryFooter();
+        mSummaryFile.close();
+    }
+}
+
+void DebugTensorWriter::writeSummaryHeader()
+{
+    mSummaryFile << "{" << std::endl;
+    mSummaryFile << "  \"metadata\": {" << std::endl;
+    mSummaryFile << "    \"title\": \"Tensor Summary Report\"," << std::endl;
+    mSummaryFile << "    \"time_generated\": \"" << getCurrentTimeString() << "\"," << std::endl;
+    mSummaryFile << "    \"engine_name\": \"" << mEngineName << "\"," << std::endl;
+    mSummaryFile << "    \"command_line\": \"" << escapeJsonString(mCmdline) << "\"" << std::endl;
+    mSummaryFile << "  }," << std::endl;
+    mSummaryFile << "  \"tensors\": [" << std::endl;
+}
+
+void DebugTensorWriter::writeSummaryFooter()
+{
+    mSummaryFile << std::endl << "  ]" << std::endl;
+    mSummaryFile << "}" << std::endl;
+}
+
+void DebugTensorWriter::writeSummary(std::string_view name, nvinfer1::Dims const& shape, nvinfer1::DataType type,
+    int64_t volume, void const* addr_host, std::string_view assignedFileName, std::string_view numpyFileName,
+    std::string_view stringFileName, std::string_view rawFileName)
+{
+    // Add comma separator if not the first tensor
+    if (!mFirstTensor)
+    {
+        mSummaryFile << "," << std::endl;
+    }
+    mFirstTensor = false;
+
+    // Write tensor information
+    mSummaryFile << "  {\n"
+                 << "    \"name\": \"" << name << "\",\n"
+                 << "    \"shape\": [";
+
+    for (int32_t i = 0; i < shape.nbDims; ++i)
+    {
+        if (i > 0)
+        {
+            mSummaryFile << ", ";
+        }
+        mSummaryFile << shape.d[i];
+    }
+
+    mSummaryFile << "],\n"
+                 << "    \"type\": \"" << getDataTypeString(type) << "\",\n";
+
+    // Write statistics
+    mSummaryFile << "    \"statistics\": {\n";
+
+    switch (type)
+    {
+    case nvinfer1::DataType::kBOOL: processTensorSummary<bool>(addr_host, volume, mSummaryFile); break;
+    case nvinfer1::DataType::kINT4: processTensorSummary<Int4x2>(addr_host, volume, mSummaryFile); break;
+    case nvinfer1::DataType::kINT8: processTensorSummary<int8_t>(addr_host, volume, mSummaryFile); break;
+    case nvinfer1::DataType::kINT32: processTensorSummary<int32_t>(addr_host, volume, mSummaryFile); break;
+    case nvinfer1::DataType::kINT64: processTensorSummary<int64_t>(addr_host, volume, mSummaryFile); break;
+    case nvinfer1::DataType::kUINT8: processTensorSummary<uint8_t>(addr_host, volume, mSummaryFile); break;
+    case nvinfer1::DataType::kFP4:
+#if CUDA_VERSION >= 12070
+        processTensorSummary<Fp4x2>(addr_host, volume, mSummaryFile);
+#else
+        sample::gLogWarning << "Unsupported data type kFP4 for tensor '" << name
+                            << "' summary dump in this CUDA version." << std::endl;
+#endif
+        break;
+    case nvinfer1::DataType::kFP8:
+#if CUDA_VERSION >= 11060
+        processTensorSummary<__nv_fp8_e4m3>(addr_host, volume, mSummaryFile);
+        break;
+#else
+        sample::gLogWarning << "Unsupported data type kFP8 for tensor '" << name
+                            << "' summary dump in this CUDA version." << std::endl;
+#endif
+        break;
+    case nvinfer1::DataType::kE8M0:
+        sample::gLogWarning << "Unsupported data type kE8M0 for tensor '" << name << "' summary dump." << std::endl;
+        break;
+    case nvinfer1::DataType::kHALF: processTensorSummary<half>(addr_host, volume, mSummaryFile); break;
+    case nvinfer1::DataType::kBF16: processTensorSummary<nv_bfloat16>(addr_host, volume, mSummaryFile); break;
+    case nvinfer1::DataType::kFLOAT: processTensorSummary<float>(addr_host, volume, mSummaryFile); break;
+    }
+
+    mSummaryFile << "    }";
+
+    // Write file information only if at least one file exists
+    if (!assignedFileName.empty() || !numpyFileName.empty() || !stringFileName.empty() || !rawFileName.empty())
+    {
+        mSummaryFile << ",\n    \"files\": {\n";
+        std::string delimiter = "";
+
+        if (!assignedFileName.empty())
+        {
+            mSummaryFile << delimiter << "      \"assigned\": \"" << escapeJsonString(assignedFileName) << "\"";
+            delimiter = ",\n";
+        }
+
+        if (!numpyFileName.empty())
+        {
+            mSummaryFile << delimiter << "      \"numpy\": \"" << escapeJsonString(numpyFileName) << "\"";
+            delimiter = ",\n";
+        }
+
+        if (!stringFileName.empty())
+        {
+            mSummaryFile << delimiter << "      \"string\": \"" << escapeJsonString(stringFileName) << "\"";
+            delimiter = ",\n";
+        }
+
+        if (!rawFileName.empty())
+        {
+            mSummaryFile << delimiter << "      \"raw\": \"" << escapeJsonString(rawFileName) << "\"";
+        }
+
+        mSummaryFile << "\n    }";
+    }
+
+    mSummaryFile << "\n  }";
+}
+
+bool writeNumpyFile(void const* addr_host, std::string_view dtype, nvinfer1::Dims const& shape, int64_t size,
+    std::string_view tensorName, std::string const& fileName)
+{
+    sample::gLogVerbose << "Writing debug tensor '" << tensorName << "' to numpy file '" << fileName << "'"
+                        << std::endl;
+
+    std::ofstream f(fileName, std::ios::out | std::ios::binary);
+    if (!f)
+    {
+        sample::gLogError << "Cannot open file for write: " << fileName << std::endl;
+        return false;
+    }
+
+    // Write numpy magic string and version
+    char magic[] = {'\x93', 'N', 'U', 'M', 'P', 'Y'};
+    char version[] = {'\x01', '\x00'};
+    f.write(magic, sizeof(magic));
+    f.write(version, sizeof(version));
+
+    // Construct header
+    std::stringstream header;
+    header << "{'descr': '" << dtype << "', 'fortran_order': False, 'shape': (";
+
+    for (int32_t i = 0; i < shape.nbDims; i++)
+    {
+        header << shape.d[i];
+        header << ", ";
+    }
+    header << "), }";
+
+    // Pad header to 16 bytes alignment
+    std::string headerStr = header.str();
+    int32_t headerLen = 10 + headerStr.length();
+    int32_t padding = 16 - ((headerLen + 1) % 16);
+    headerStr.append(padding, ' ');
+    headerStr += '\n';
+
+    // Write header length and header
+    uint16_t headerSize = headerStr.length();
+    f.write(reinterpret_cast<char*>(&headerSize), sizeof(uint16_t));
+    f.write(headerStr.c_str(), headerSize);
+
+    // Write data
+    f.write(static_cast<char const*>(addr_host), size);
+    f.close();
+
+    return true;
+}
+
+std::string writeNumpy(nvinfer1::DataType type, void const* addr_host, int64_t volume, nvinfer1::Dims const& shape,
+    std::string const& name, std::string const& prefix)
+{
+    std::string fileName = prefix + name;
+    std::string_view dtype = "";
+    void const* data = addr_host;
+    int64_t size = samplesCommon::getNbBytes(type, volume);
+    std::vector<float> floatBuffer;
+    std::vector<int8_t> int8Buffer;
+
+    auto convertToFloat = [&](std::vector<float> const& buffer) {
+        sample::gLogWarning << "Converting " << getDataTypeString(type) << " to float for numpy dump of tensor '"
+                            << name << "'." << std::endl;
+        dtype = "<f4";
+        data = buffer.data();
+        size = volume * sizeof(float);
+        fileName += "_to_float";
+    };
+
+    auto convertToInt8 = [&](std::vector<int8_t> const& buffer) {
+        sample::gLogWarning << "Converting " << getDataTypeString(type) << " to int8 for numpy dump of tensor '" << name
+                            << "'." << std::endl;
+        dtype = "<i1";
+        data = buffer.data();
+        size = volume * sizeof(int8_t);
+        fileName += "_to_int8";
+    };
+
+    switch (type)
+    {
+    case nvinfer1::DataType::kBOOL: dtype = "|b1"; break;
+    case nvinfer1::DataType::kINT4:
+        int8Buffer = convertBufferTo<int8_t>(reinterpret_cast<Int4x2 const*>(addr_host), volume);
+        convertToInt8(int8Buffer);
+        break;
+    case nvinfer1::DataType::kINT8: dtype = "<i1"; break;
+    case nvinfer1::DataType::kINT32: dtype = "<i4"; break;
+    case nvinfer1::DataType::kINT64: dtype = "<i8"; break;
+    case nvinfer1::DataType::kUINT8: dtype = "|u1"; break;
+    case nvinfer1::DataType::kFP4:
+#if CUDA_VERSION >= 12070
+        floatBuffer = convertBufferTo<float>(static_cast<Fp4x2 const*>(addr_host), volume);
+        convertToFloat(floatBuffer);
+#else
+        sample::gLogWarning << "Unsupported data type kFP4 for tensor '" << name << "' numpy dump in this CUDA version."
+                            << std::endl;
+        return "";
+#endif
+        break;
+    case nvinfer1::DataType::kFP8:
+#if CUDA_VERSION >= 11060
+        floatBuffer = convertBufferTo<float>(static_cast<__nv_fp8_e4m3 const*>(addr_host), volume);
+        convertToFloat(floatBuffer);
+#else
+        sample::gLogWarning << "Unsupported data type kFP8 for tensor '" << name << "' numpy dump in this CUDA version."
+                            << std::endl;
+        return "";
+#endif
+        break;
+    case nvinfer1::DataType::kE8M0:
+        sample::gLogWarning << "Unsupported data type kE8M0 for tensor '" << name << "' numpy dump." << std::endl;
+        return "";
+    case nvinfer1::DataType::kHALF: dtype = "<f2"; break;
+    case nvinfer1::DataType::kBF16:
+        floatBuffer = convertBufferTo<float>(static_cast<nv_bfloat16 const*>(addr_host), volume);
+        convertToFloat(floatBuffer);
+        break;
+    case nvinfer1::DataType::kFLOAT: dtype = "<f4"; break;
+    }
+
+    if (!dtype.empty())
+    {
+
+        fileName += ".npy";
+        fileName = genFilenameSafeString(fileName);
+        writeNumpyFile(data, dtype, shape, size, name, fileName);
+        return fileName;
+    }
+    return "";
+}
+
+bool DebugTensorWriter::processDebugTensor(void const* addr, nvinfer1::TensorLocation location, nvinfer1::DataType type,
+    nvinfer1::Dims const& shape, char const* name, cudaStream_t stream)
+{
+    CHECK(cudaStreamSynchronize(stream));
+    // Store data from callback.
+    auto volume = std::accumulate(shape.d, shape.d + shape.nbDims, 1LL, std::multiplies<int64_t>{});
+    int64_t size = samplesCommon::getNbBytes(type, volume);
+    std::vector<char> hostDataOut;
+    void const* addrHost = nullptr;
+    if (location == nvinfer1::TensorLocation::kDEVICE)
+    {
+        hostDataOut.resize(size);
+        CHECK(cudaMemcpy(hostDataOut.data(), addr, size, cudaMemcpyDeviceToHost));
+        addrHost = hostDataOut.data();
+    }
+    else
+    {
+        addrHost = addr;
+    }
+
+    std::string assignedFileName;
+    std::string numpyFileName;
+    std::string rawFileName;
+    std::string stringFileName;
+    auto it = mDebugTensorFileNames.find(name);
+    if (it != mDebugTensorFileNames.end())
+    {
+        assignedFileName = it->second;
+        std::ofstream f(assignedFileName, std::ios::out | std::ios::binary);
+        ASSERT(f && "Cannot open file for write");
+        sample::gLogVerbose << "Writing debug tensor '" << name << "' to file '" << assignedFileName << "'"
+                            << std::endl;
+        f.write(static_cast<char const*>(addrHost), size);
+        f.close();
+    }
+
+    std::stringstream ss;
+    ss << std::setw(4) << std::setfill('0') << mTensorIndex << "_";
+    std::string prefix = ss.str();
+
+    if (std::find(mDebugTensorFormats.begin(), mDebugTensorFormats.end(), "raw") != mDebugTensorFormats.end())
+    {
+        rawFileName = genFilenameSafeString(prefix + name + ".raw");
+        sample::gLogVerbose << "Writing debug tensor '" << name << "' to raw file '" << rawFileName << "'" << std::endl;
+        std::ofstream f(rawFileName, std::ios::out | std::ios::binary);
+        ASSERT(f && "Cannot open file for write");
+        f.write(static_cast<char const*>(addrHost), size);
+        f.close();
+    }
+
+    if (std::find(mDebugTensorFormats.begin(), mDebugTensorFormats.end(), "numpy") != mDebugTensorFormats.end())
+    {
+        numpyFileName = writeNumpy(type, addrHost, volume, shape, name, prefix);
+    }
+
+    if (std::find(mDebugTensorFormats.begin(), mDebugTensorFormats.end(), "string") != mDebugTensorFormats.end())
+    {
+        stringFileName = writeStringFile(addrHost, type, shape, name, prefix);
+    }
+
+    if (std::find(mDebugTensorFormats.begin(), mDebugTensorFormats.end(), "summary") != mDebugTensorFormats.end()
+        && mSummaryFile.is_open())
+    {
+        writeSummary(name, shape, type, volume, addrHost, assignedFileName, numpyFileName, stringFileName, rawFileName);
+        mSummaryFile.flush();
+    }
+
+    mTensorIndex++;
+    return true;
+}
+
+} // namespace sample
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/debugTensorWriter.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/debugTensorWriter.h
new file mode 100644
index 0000000000000000000000000000000000000000..4123216d9c126cfa30296bdc011402752e251087
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/debugTensorWriter.h
@@ -0,0 +1,59 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TENSORRT_DEBUG_TENSOR_WRITER_H
+#define TENSORRT_DEBUG_TENSOR_WRITER_H
+
+#include "NvInferRuntime.h"
+#include <fstream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+namespace sample
+{
+
+class DebugTensorWriter : public nvinfer1::IDebugListener
+{
+public:
+    DebugTensorWriter(std::unordered_map<std::string, std::string> const& debugTensorFileNames,
+        std::vector<std::string> const& debugTensorFormats, std::string const& engineName = "",
+        std::string const& cmdline = "");
+    ~DebugTensorWriter() override;
+
+    bool processDebugTensor(void const* addr, nvinfer1::TensorLocation location, nvinfer1::DataType type,
+        nvinfer1::Dims const& shape, char const* name, cudaStream_t stream) override;
+
+private:
+    void writeSummaryHeader();
+    void writeSummaryFooter();
+    void writeSummary(std::string_view name, nvinfer1::Dims const& shape, nvinfer1::DataType type, int64_t volume,
+        void const* addr_host, std::string_view assignedFileName, std::string_view numpyFileName,
+        std::string_view stringFileName, std::string_view rawFileName);
+
+    std::unordered_map<std::string, std::string> mDebugTensorFileNames;
+    std::vector<std::string> mDebugTensorFormats;
+    std::string mSummaryFileName;
+    std::ofstream mSummaryFile;
+    bool mFirstTensor{true};
+    std::string mEngineName;
+    std::string mCmdline;
+    int32_t mTensorIndex{0};
+};
+
+} // namespace sample
+
+#endif // TENSORRT_DEBUG_TENSOR_WRITER_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/dumpTFWts.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/dumpTFWts.py
new file mode 100644
index 0000000000000000000000000000000000000000..70770fbd802e963d6de0170b9f0dd117e4f88726
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/dumpTFWts.py
@@ -0,0 +1,124 @@
+#!/usr/bin/python
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Script to dump TensorFlow weights in TRT v1 and v2 dump format.
+# The V1 format is for TensorRT 4.0. The V2 format is for TensorRT 4.0 and later.
+
+import sys
+import struct
+import argparse
+
+try:
+    import tensorflow as tf
+    from tensorflow.python import pywrap_tensorflow
+except ImportError as err:
+    sys.stderr.write("""Error: Failed to import module ({})""".format(err))
+    sys.exit()
+
+parser = argparse.ArgumentParser(description="TensorFlow Weight Dumper")
+
+parser.add_argument(
+    "-m",
+    "--model",
+    required=True,
+    help="The checkpoint file basename, example basename(model.ckpt-766908.data-00000-of-00001) -> model.ckpt-766908",
+)
+parser.add_argument("-o", "--output", required=True, help="The weight file to dump all the weights to.")
+parser.add_argument("-1", "--wtsv1", required=False, default=False, type=bool, help="Dump the weights in the wts v1.")
+
+opt = parser.parse_args()
+
+if opt.wtsv1:
+    print("Outputting the trained weights in TensorRT's wts v1 format. This format is documented as:")
+    print("Line 0: <number of buffers in the file>")
+    print("Line 1-Num: [buffer name] [buffer type] [buffer size] <hex values>")
+else:
+    print("Outputting the trained weights in TensorRT's wts v2 format. This format is documented as:")
+    print("Line 0: <number of buffers in the file>")
+    print("Line 1-Num: [buffer name] [buffer type] [(buffer shape{e.g. (1, 2, 3)}] <buffer shaped size bytes of data>")
+
+inputbase = opt.model
+outputbase = opt.output
+
+
+def float_to_hex(f):
+    return hex(struct.unpack("<I", struct.pack("<f", f))[0])
+
+
+def getTRTType(tensor):
+    if tf.as_dtype(tensor.dtype) == tf.float32:
+        return 0
+    if tf.as_dtype(tensor.dtype) == tf.float16:
+        return 1
+    if tf.as_dtype(tensor.dtype) == tf.int8:
+        return 2
+    if tf.as_dtype(tensor.dtype) == tf.int32:
+        return 3
+    print("Tensor data type of %s is not supported in TensorRT" % (tensor.dtype))
+    sys.exit()
+
+
+try:
+    # Open output file
+    if opt.wtsv1:
+        outputFileName = outputbase + ".wts"
+    else:
+        outputFileName = outputbase + ".wts2"
+    outputFile = open(outputFileName, "w")
+
+    # read vars from checkpoint
+    reader = pywrap_tensorflow.NewCheckpointReader(inputbase)
+    var_to_shape_map = reader.get_variable_to_shape_map()
+
+    # Record count of weights
+    count = 0
+    for key in sorted(var_to_shape_map):
+        count += 1
+    outputFile.write("%s\n" % (count))
+
+    # Dump the weights in either v1 or v2 format
+    for key in sorted(var_to_shape_map):
+        tensor = reader.get_tensor(key)
+        file_key = key.replace("/", "_")
+        typeOfElem = getTRTType(tensor)
+        val = tensor.shape
+        if opt.wtsv1:
+            val = tensor.size
+        print("%s %s %s " % (file_key, typeOfElem, val))
+        flat_tensor = tensor.flatten()
+        outputFile.write("%s 0 %s " % (file_key, val))
+        if opt.wtsv1:
+            for weight in flat_tensor:
+                hexval = float_to_hex(float(weight))
+                outputFile.write("%s " % (hexval[2:]))
+        else:
+            outputFile.write(flat_tensor.tobytes())
+        outputFile.write("\n")
+    outputFile.close()
+
+except Exception as e:  # pylint: disable=broad-except
+    print(str(e))
+    if "corrupted compressed block contents" in str(e):
+        print("It's likely that your checkpoint file has been compressed " "with SNAPPY.")
+        if "Data loss" in str(e) and (any([e in inputbase for e in [".index", ".meta", ".data"]])):
+            proposed_file = ".".join(inputbase.split(".")[0:-1])
+            v2_file_error_template = """
+           It's likely that this is a V2 checkpoint and you need to provide the filename
+           *prefix*.  Try removing the '.' and extension.  Try:
+           inspect checkpoint --file_name = {}"""
+            print(v2_file_error_template.format(proposed_file))
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/getOptions.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/getOptions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..19cd328117b406a4c0f37527314d524c19ce96f8
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/getOptions.cpp
@@ -0,0 +1,248 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "getOptions.h"
+#include "logger.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cctype>
+#include <cstring>
+#include <set>
+
+namespace nvinfer1
+{
+namespace utility
+{
+
+//! Matching for TRTOptions is defined as follows:
+//!
+//! If A and B both have longName set, A matches B if and only if A.longName ==
+//! B.longName and (A.shortName == B.shortName if both have short name set).
+//!
+//! If A only has shortName set and B only has longName set, then A does not
+//! match B. It is assumed that when 2 TRTOptions are compared, one of them is
+//! the definition of a TRTOption in the input to getOptions. As such, if the
+//! definition only has shortName set, it will never be equal to a TRTOption
+//! that does not have shortName set (and same for longName).
+//!
+//! If A and B both have shortName set but B does not have longName set, A
+//! matches B if and only if A.shortName == B.shortName.
+//!
+//! If A has neither long or short name set, A matches B if and only if B has
+//! neither long or short name set.
+bool matches(const TRTOption& a, const TRTOption& b)
+{
+    if (!a.longName.empty() && !b.longName.empty())
+    {
+        if (a.shortName && b.shortName)
+        {
+            return (a.longName == b.longName) && (a.shortName == b.shortName);
+        }
+        return a.longName == b.longName;
+    }
+
+    // If only one of them is not set, this will return false anyway.
+    return a.shortName == b.shortName;
+}
+
+//! getTRTOptionIndex returns the index of a TRTOption in a vector of
+//! TRTOptions, -1 if not found.
+int getTRTOptionIndex(const std::vector<TRTOption>& options, const TRTOption& opt)
+{
+    for (size_t i = 0; i < options.size(); ++i)
+    {
+        if (matches(opt, options[i]))
+        {
+            return i;
+        }
+    }
+    return -1;
+}
+
+//! validateTRTOption will return a string containing an error message if options
+//! contain non-numeric characters, or if there are duplicate option names found.
+//! Otherwise, returns the empty string.
+std::string validateTRTOption(
+    const std::set<char>& seenShortNames, const std::set<std::string>& seenLongNames, const TRTOption& opt)
+{
+    if (opt.shortName != 0)
+    {
+        if (!std::isalnum(opt.shortName))
+        {
+            return "Short name '" + std::to_string(opt.shortName) + "' is non-alphanumeric";
+        }
+
+        if (seenShortNames.find(opt.shortName) != seenShortNames.end())
+        {
+            return "Short name '" + std::to_string(opt.shortName) + "' is a duplicate";
+        }
+    }
+
+    if (!opt.longName.empty())
+    {
+        for (const char& c : opt.longName)
+        {
+            if (!std::isalnum(c) && c != '-' && c != '_')
+            {
+                return "Long name '" + opt.longName + "' contains characters that are not '-', '_', or alphanumeric";
+            }
+        }
+
+        if (seenLongNames.find(opt.longName) != seenLongNames.end())
+        {
+            return "Long name '" + opt.longName + "' is a duplicate";
+        }
+    }
+    return "";
+}
+
+//! validateTRTOptions will return a string containing an error message if any
+//! options contain non-numeric characters, or if there are duplicate option
+//! names found. Otherwise, returns the empty string.
+std::string validateTRTOptions(const std::vector<TRTOption>& options)
+{
+    std::set<char> seenShortNames;
+    std::set<std::string> seenLongNames;
+    for (size_t i = 0; i < options.size(); ++i)
+    {
+        const std::string errMsg = validateTRTOption(seenShortNames, seenLongNames, options[i]);
+        if (!errMsg.empty())
+        {
+            return "Error '" + errMsg + "' at TRTOption " + std::to_string(i);
+        }
+
+        seenShortNames.insert(options[i].shortName);
+        seenLongNames.insert(options[i].longName);
+    }
+    return "";
+}
+
+//! parseArgs parses an argument list and returns a TRTParsedArgs with the
+//! fields set accordingly. Assumes that options is validated.
+//! ErrMsg will be set if:
+//!     - an argument is null
+//!     - an argument is empty
+//!     - an argument does not have option (i.e. "-" and "--")
+//!     - a short argument has more than 1 character
+//!     - the last argument in the list requires a value
+TRTParsedArgs parseArgs(int argc, const char* const* argv, const std::vector<TRTOption>& options)
+{
+    TRTParsedArgs parsedArgs;
+    parsedArgs.values.resize(options.size());
+
+    for (int i = 1; i < argc; ++i) // index of current command-line argument
+    {
+        if (argv[i] == nullptr)
+        {
+            return TRTParsedArgs{"Null argument at index " + std::to_string(i)};
+        }
+
+        const std::string argStr(argv[i]);
+        if (argStr.empty())
+        {
+            return TRTParsedArgs{"Empty argument at index " + std::to_string(i)};
+        }
+
+        // No starting hyphen means it is a positional argument
+        if (argStr[0] != '-')
+        {
+            parsedArgs.positionalArgs.push_back(argStr);
+            continue;
+        }
+
+        if (argStr == "-" || argStr == "--")
+        {
+            return TRTParsedArgs{"Argument does not specify an option at index " + std::to_string(i)};
+        }
+
+        // If only 1 hyphen, char after is the flag.
+        TRTOption opt{' ', "", false, ""};
+        std::string value;
+        if (argStr[1] != '-')
+        {
+            // Must only have 1 char after the hyphen
+            if (argStr.size() > 2)
+            {
+                return TRTParsedArgs{"Short arg contains more than 1 character at index " + std::to_string(i)};
+            }
+            opt.shortName = argStr[1];
+        }
+        else
+        {
+            opt.longName = argStr.substr(2);
+
+            // We need to support --foo=bar syntax, so look for '='
+            const size_t eqIndex = opt.longName.find('=');
+            if (eqIndex < opt.longName.size())
+            {
+                value = opt.longName.substr(eqIndex + 1);
+                opt.longName = opt.longName.substr(0, eqIndex);
+            }
+        }
+
+        const int idx = getTRTOptionIndex(options, opt);
+        if (idx < 0)
+        {
+            continue;
+        }
+
+        if (options[idx].valueRequired)
+        {
+            if (!value.empty())
+            {
+                parsedArgs.values[idx].second.push_back(value);
+                parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
+                continue;
+            }
+
+            if (i + 1 >= argc)
+            {
+                return TRTParsedArgs{"Last argument requires value, but none given"};
+            }
+
+            const std::string nextArg(argv[i + 1]);
+            if (nextArg.size() >= 1 && nextArg[0] == '-')
+            {
+                sample::gLogWarning << "Warning: Using '" << nextArg << "' as a value for '" << argStr
+                                    << "', Should this be its own flag?" << std::endl;
+            }
+
+            parsedArgs.values[idx].second.push_back(nextArg);
+            i += 1; // Next argument already consumed
+
+            parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
+        }
+        else
+        {
+            parsedArgs.values[idx].first += 1;
+        }
+    }
+    return parsedArgs;
+}
+
+TRTParsedArgs getOptions(int argc, const char* const* argv, const std::vector<TRTOption>& options)
+{
+    const std::string errMsg = validateTRTOptions(options);
+    if (!errMsg.empty())
+    {
+        return TRTParsedArgs{errMsg};
+    }
+    return parseArgs(argc, argv, options);
+}
+} // namespace utility
+} // namespace nvinfer1
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/getOptions.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/getOptions.h
new file mode 100644
index 0000000000000000000000000000000000000000..4bbf9e2754a9f07a867d2d480aceee289f83e7f3
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/getOptions.h
@@ -0,0 +1,128 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TRT_GET_OPTIONS_H
+#define TRT_GET_OPTIONS_H
+
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace nvinfer1
+{
+namespace utility
+{
+
+//! TRTOption defines a command line option. At least 1 of shortName and longName
+//! must be defined.
+//! If bool initialization is undefined behavior on your system, valueRequired
+//! must also be explicitly defined.
+//! helpText is optional.
+struct TRTOption
+{
+    char shortName;       //!< Option name in short (single hyphen) form (i.e. -a, -b)
+    std::string longName; //!< Option name in long (double hyphen) form (i.e. --foo, --bar)
+    bool valueRequired;   //!< True if a value is needed for an option (i.e. -N 4, --foo bar)
+    std::string helpText; //!< Text to show when printing out the command usage
+};
+
+//! TRTParsedArgs is returned by getOptions after it has parsed a command line
+//! argument list (argv).
+//!
+//! errMsg is a string containing an error message if any errors occurred. If it
+//! is empty, no errors occurred.
+//!
+//! values stores a vector of pairs for each option (ordered by order in the
+//! input). Each pair contains an int (the number of occurrences) and a vector
+//! of strings (a list of values). The user should know which of these to use,
+//! and which options required values. For non-value options, only occurrences is
+//! populated. For value-required options, occurrences == # of values. Values do
+//! not need to be unique.
+//!
+//! positionalArgs stores additional arguments that are passed in without an
+//! option (these must not start with a hyphen).
+struct TRTParsedArgs
+{
+    std::string errMsg;
+    std::vector<std::pair<int, std::vector<std::string>>> values;
+    std::vector<std::string> positionalArgs;
+};
+
+//! Parse the input arguments passed to main() and extract options as well as
+//! positional arguments.
+//!
+//! Options are supposed to be passed to main() with a preceding hyphen '-'.
+//!
+//! If there is a single preceding hyphen, there should be exactly 1 character
+//! after the hyphen, which is interpreted as the option.
+//!
+//! If there are 2 preceding hyphens, the entire argument (without the hyphens)
+//! is interpreted as the option.
+//!
+//! If the option requires a value, the next argument is used as the value.
+//!
+//! Positional arguments must not start with a hyphen.
+//!
+//! If an argument requires a value, the next argument is interpreted as the
+//! value, even if it is the form of a valid option (i.e. --foo --bar will store
+//! "--bar" as a value for option "foo" if "foo" requires a value).
+//! We also support --name=value syntax. In this case, 'value' would be used as
+//! the value, NOT the next argument.
+//!
+//! For options:
+//!   { { 'a', "", false },
+//!     { 'b', "", false },
+//!     { 0, "cee", false },
+//!     { 'd', "", true },
+//!     { 'e', "", true },
+//!     { 'f', "foo", true } }
+//!
+//! ./main hello world -a -a --cee -d 12 -f 34
+//! and
+//! ./main hello world -a -a --cee -d 12 --foo 34
+//!
+//! will result in:
+//!
+//! TRTParsedArgs {
+//!      errMsg: "",
+//!      values: { { 2, {} },
+//!                { 0, {} },
+//!                { 1, {} },
+//!                { 1, {"12"} },
+//!                { 0, {} },
+//!                { 1, {"34"} } }
+//!      positionalArgs: {"hello", "world"},
+//! }
+//!
+//! Non-POSIX behavior:
+//!      - Does not support "-abcde" as a shorthand for "-a -b -c -d -e". Each
+//!        option must have its own hyphen prefix.
+//!      - Does not support -e12 as a shorthand for "-e 12". Values MUST be
+//!        whitespace-separated from the option it is for.
+//!
+//! @param[in] argc The number of arguments passed to main (including the
+//!            file name, which is disregarded)
+//! @param[in] argv The arguments passed to main (including the file name,
+//!            which is disregarded)
+//! @param[in] options List of TRTOptions to parse
+//! @return TRTParsedArgs. See TRTParsedArgs documentation for descriptions of
+//!         the fields.
+TRTParsedArgs getOptions(int argc, const char* const* argv, const std::vector<TRTOption>& options);
+} // namespace utility
+} // namespace nvinfer1
+
+#endif // TRT_GET_OPTIONS_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/getopt.c b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/getopt.c
new file mode 100644
index 0000000000000000000000000000000000000000..c1da08b5b84d60aba78cc4e5ebe4a5248192e5c3
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/getopt.c
@@ -0,0 +1,568 @@
+/*	$OpenBSD: getopt_long.c,v 1.23 2007/10/31 12:34:57 chl Exp $	*/
+/*	$NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $	*/
+
+/*
+ * Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Sponsored in part by the Defense Advanced Research Projects
+ * Agency (DARPA) and Air Force Research Laboratory, Air Force
+ * Materiel Command, USAF, under agreement number F39502-99-1-0512.
+ */
+/*-
+ * Copyright (c) 2000 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Dieter Baron and Thomas Klausner.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "getoptWin.h"
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <windows.h>
+
+#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */
+
+#ifdef REPLACE_GETOPT
+int opterr = 1;   /* if error message should be printed */
+int optind = 1;   /* index into parent argv vector */
+int optopt = '?'; /* character checked for validity */
+#undef optreset   /* see getopt.h */
+#define optreset __mingw_optreset
+int optreset; /* reset getopt */
+char* optarg; /* argument associated with option */
+#endif
+
+#define PRINT_ERROR ((opterr) && (*options != ':'))
+
+#define FLAG_PERMUTE 0x01  /* permute non-options to the end of argv */
+#define FLAG_ALLARGS 0x02  /* treat non-options as args to option "-1" */
+#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */
+
+/* return values */
+#define BADCH (int) '?'
+#define BADARG ((*options == ':') ? (int) ':' : (int) '?')
+#define INORDER (int) 1
+
+#ifndef __CYGWIN__
+#define __progname __argv[0]
+#else
+extern char __declspec(dllimport) * __progname;
+#endif
+
+#ifdef __CYGWIN__
+static char EMSG[] = "";
+#else
+#define EMSG ""
+#endif
+
+static int getopt_internal(int, char* const*, char const*, const struct option*, int*, int);
+static int parse_long_options(char* const*, char const*, const struct option*, int*, int);
+static int gcd(int, int);
+static void permute_args(int, int, int, char* const*);
+
+static char* place = EMSG; /* option letter processing */
+
+/* XXX: set optreset to 1 rather than these two */
+static int nonopt_start = -1; /* first non option argument (for permute) */
+static int nonopt_end = -1;   /* first option after non options (for permute) */
+
+/* Error messages */
+static char const recargchar[] = "option requires an argument -- %c";
+static char const recargstring[] = "option requires an argument -- %s";
+static char const ambig[] = "ambiguous option -- %.*s";
+static char const noarg[] = "option doesn't take an argument -- %.*s";
+static char const illoptchar[] = "unknown option -- %c";
+static char const illoptstring[] = "unknown option -- %s";
+
+static void _vwarnx(char const* fmt, va_list ap)
+{
+    (void) fprintf(stderr, "%s: ", __progname);
+    if (fmt != NULL)
+        (void) vfprintf(stderr, fmt, ap);
+    (void) fprintf(stderr, "\n");
+}
+
+static void warnx(char const* fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    _vwarnx(fmt, ap);
+    va_end(ap);
+}
+
+/*
+ * Compute the greatest common divisor of a and b.
+ */
+static int gcd(int a, int b)
+{
+    int c;
+
+    c = a % b;
+    while (c != 0)
+    {
+        a = b;
+        b = c;
+        c = a % b;
+    }
+
+    return (b);
+}
+
+/*
+ * Exchange the block from nonopt_start to nonopt_end with the block
+ * from nonopt_end to opt_end (keeping the same order of arguments
+ * in each block).
+ */
+static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char* const* nargv)
+{
+    int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos;
+    char* swap;
+
+    /*
+     * compute lengths of blocks and number and size of cycles
+     */
+    nnonopts = panonopt_end - panonopt_start;
+    nopts = opt_end - panonopt_end;
+    ncycle = gcd(nnonopts, nopts);
+    cyclelen = (opt_end - panonopt_start) / ncycle;
+
+    for (i = 0; i < ncycle; i++)
+    {
+        cstart = panonopt_end + i;
+        pos = cstart;
+        for (j = 0; j < cyclelen; j++)
+        {
+            if (pos >= panonopt_end)
+                pos -= nnonopts;
+            else
+                pos += nopts;
+            swap = nargv[pos];
+            /* LINTED const cast */
+            ((char**) nargv)[pos] = nargv[cstart];
+            /* LINTED const cast */
+            ((char**) nargv)[cstart] = swap;
+        }
+    }
+}
+
+/*
+ * parse_long_options --
+ *	Parse long options in argc/argv argument vector.
+ * Returns -1 if short_too is set and the option does not match long_options.
+ */
+static int parse_long_options(
+    char* const* nargv, char const* options, const struct option* long_options, int* idx, int short_too)
+{
+    char *current_argv, *has_equal;
+    size_t current_argv_len;
+    int i, ambiguous, match;
+
+#define IDENTICAL_INTERPRETATION(_x, _y)                                                                               \
+    (long_options[(_x)].has_arg == long_options[(_y)].has_arg && long_options[(_x)].flag == long_options[(_y)].flag    \
+        && long_options[(_x)].val == long_options[(_y)].val)
+
+    current_argv = place;
+    match = -1;
+    ambiguous = 0;
+
+    optind++;
+
+    if ((has_equal = strchr(current_argv, '=')) != NULL)
+    {
+        /* argument found (--option=arg) */
+        current_argv_len = has_equal - current_argv;
+        has_equal++;
+    }
+    else
+        current_argv_len = strlen(current_argv);
+
+    for (i = 0; long_options[i].name; i++)
+    {
+        /* find matching long option */
+        if (strncmp(current_argv, long_options[i].name, current_argv_len))
+            continue;
+
+        if (strlen(long_options[i].name) == current_argv_len)
+        {
+            /* exact match */
+            match = i;
+            ambiguous = 0;
+            break;
+        }
+        /*
+         * If this is a known short option, don't allow
+         * a partial match of a single character.
+         */
+        if (short_too && current_argv_len == 1)
+            continue;
+
+        if (match == -1) /* partial match */
+            match = i;
+        else if (!IDENTICAL_INTERPRETATION(i, match))
+            ambiguous = 1;
+    }
+    if (ambiguous)
+    {
+        /* ambiguous abbreviation */
+        if (PRINT_ERROR)
+            warnx(ambig, (int) current_argv_len, current_argv);
+        optopt = 0;
+        return (BADCH);
+    }
+    if (match != -1)
+    { /* option found */
+        if (long_options[match].has_arg == no_argument && has_equal)
+        {
+            if (PRINT_ERROR)
+                warnx(noarg, (int) current_argv_len, current_argv);
+            /*
+             * XXX: GNU sets optopt to val regardless of flag
+             */
+            if (long_options[match].flag == NULL)
+                optopt = long_options[match].val;
+            else
+                optopt = 0;
+            return (BADARG);
+        }
+        if (long_options[match].has_arg == required_argument || long_options[match].has_arg == optional_argument)
+        {
+            if (has_equal)
+                optarg = has_equal;
+            else if (long_options[match].has_arg == required_argument)
+            {
+                /*
+                 * optional argument doesn't use next nargv
+                 */
+                optarg = nargv[optind++];
+            }
+        }
+        if ((long_options[match].has_arg == required_argument) && (optarg == NULL))
+        {
+            /*
+             * Missing argument; leading ':' indicates no error
+             * should be generated.
+             */
+            if (PRINT_ERROR)
+                warnx(recargstring, current_argv);
+            /*
+             * XXX: GNU sets optopt to val regardless of flag
+             */
+            if (long_options[match].flag == NULL)
+                optopt = long_options[match].val;
+            else
+                optopt = 0;
+            --optind;
+            return (BADARG);
+        }
+    }
+    else
+    { /* unknown option */
+        if (short_too)
+        {
+            --optind;
+            return (-1);
+        }
+        if (PRINT_ERROR)
+            warnx(illoptstring, current_argv);
+        optopt = 0;
+        return (BADCH);
+    }
+    if (idx)
+        *idx = match;
+    if (long_options[match].flag)
+    {
+        *long_options[match].flag = long_options[match].val;
+        return (0);
+    }
+    else
+        return (long_options[match].val);
+#undef IDENTICAL_INTERPRETATION
+}
+
+/*
+ * getopt_internal --
+ *	Parse argc/argv argument vector.  Called by user level routines.
+ */
+static int getopt_internal(
+    int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx, int flags)
+{
+    char const* oli; /* option letter list index */
+    int optchar, short_too;
+    static int posixly_correct = -1;
+
+    if (options == NULL)
+        return (-1);
+
+    /*
+     * XXX Some GNU programs (like cvs) set optind to 0 instead of
+     * XXX using optreset.  Work around this braindamage.
+     */
+    if (optind == 0)
+        optind = optreset = 1;
+
+    /*
+     * Disable GNU extensions if POSIXLY_CORRECT is set or options
+     * string begins with a '+'.
+     *
+     * CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or
+     *                 optreset != 0 for GNU compatibility.
+     */
+    if (posixly_correct == -1 || optreset != 0)
+        posixly_correct = (getenv("POSIXLY_CORRECT") != NULL);
+    if (*options == '-')
+        flags |= FLAG_ALLARGS;
+    else if (posixly_correct || *options == '+')
+        flags &= ~FLAG_PERMUTE;
+    if (*options == '+' || *options == '-')
+        options++;
+
+    optarg = NULL;
+    if (optreset)
+        nonopt_start = nonopt_end = -1;
+start:
+    if (optreset || !*place)
+    { /* update scanning pointer */
+        optreset = 0;
+        if (optind >= nargc)
+        { /* end of argument vector */
+            place = EMSG;
+            if (nonopt_end != -1)
+            {
+                /* do permutation, if we have to */
+                permute_args(nonopt_start, nonopt_end, optind, nargv);
+                optind -= nonopt_end - nonopt_start;
+            }
+            else if (nonopt_start != -1)
+            {
+                /*
+                 * If we skipped non-options, set optind
+                 * to the first of them.
+                 */
+                optind = nonopt_start;
+            }
+            nonopt_start = nonopt_end = -1;
+            return (-1);
+        }
+        if (*(place = nargv[optind]) != '-' || (place[1] == '\0' && strchr(options, '-') == NULL))
+        {
+            place = EMSG; /* found non-option */
+            if (flags & FLAG_ALLARGS)
+            {
+                /*
+                 * GNU extension:
+                 * return non-option as argument to option 1
+                 */
+                optarg = nargv[optind++];
+                return (INORDER);
+            }
+            if (!(flags & FLAG_PERMUTE))
+            {
+                /*
+                 * If no permutation wanted, stop parsing
+                 * at first non-option.
+                 */
+                return (-1);
+            }
+            /* do permutation */
+            if (nonopt_start == -1)
+                nonopt_start = optind;
+            else if (nonopt_end != -1)
+            {
+                permute_args(nonopt_start, nonopt_end, optind, nargv);
+                nonopt_start = optind - (nonopt_end - nonopt_start);
+                nonopt_end = -1;
+            }
+            optind++;
+            /* process next argument */
+            goto start;
+        }
+        if (nonopt_start != -1 && nonopt_end == -1)
+            nonopt_end = optind;
+
+        /*
+         * If we have "-" do nothing, if "--" we are done.
+         */
+        if (place[1] != '\0' && *++place == '-' && place[1] == '\0')
+        {
+            optind++;
+            place = EMSG;
+            /*
+             * We found an option (--), so if we skipped
+             * non-options, we have to permute.
+             */
+            if (nonopt_end != -1)
+            {
+                permute_args(nonopt_start, nonopt_end, optind, nargv);
+                optind -= nonopt_end - nonopt_start;
+            }
+            nonopt_start = nonopt_end = -1;
+            return (-1);
+        }
+    }
+
+    /*
+     * Check long options if:
+     *  1) we were passed some
+     *  2) the arg is not just "-"
+     *  3) either the arg starts with -- we are getopt_long_only()
+     */
+    if (long_options != NULL && place != nargv[optind] && (*place == '-' || (flags & FLAG_LONGONLY)))
+    {
+        short_too = 0;
+        if (*place == '-')
+            place++; /* --foo long option */
+        else if (*place != ':' && strchr(options, *place) != NULL)
+            short_too = 1; /* could be short option too */
+
+        optchar = parse_long_options(nargv, options, long_options, idx, short_too);
+        if (optchar != -1)
+        {
+            place = EMSG;
+            return (optchar);
+        }
+    }
+
+    if ((optchar = (int) *place++) == (int) ':' || (optchar == (int) '-' && *place != '\0')
+        || (oli = strchr(options, optchar)) == NULL)
+    {
+        /*
+         * If the user specified "-" and  '-' isn't listed in
+         * options, return -1 (non-option) as per POSIX.
+         * Otherwise, it is an unknown option character (or ':').
+         */
+        if (optchar == (int) '-' && *place == '\0')
+            return (-1);
+        if (!*place)
+            ++optind;
+        if (PRINT_ERROR)
+            warnx(illoptchar, optchar);
+        optopt = optchar;
+        return (BADCH);
+    }
+    if (long_options != NULL && optchar == 'W' && oli[1] == ';')
+    {
+        /* -W long-option */
+        if (*place) /* no space */
+            /* NOTHING */;
+        else if (++optind >= nargc)
+        { /* no arg */
+            place = EMSG;
+            if (PRINT_ERROR)
+                warnx(recargchar, optchar);
+            optopt = optchar;
+            return (BADARG);
+        }
+        else /* white space */
+            place = nargv[optind];
+        optchar = parse_long_options(nargv, options, long_options, idx, 0);
+        place = EMSG;
+        return (optchar);
+    }
+    if (*++oli != ':')
+    { /* doesn't take argument */
+        if (!*place)
+            ++optind;
+    }
+    else
+    { /* takes (optional) argument */
+        optarg = NULL;
+        if (*place) /* no white space */
+            optarg = place;
+        else if (oli[1] != ':')
+        { /* arg not optional */
+            if (++optind >= nargc)
+            { /* no arg */
+                place = EMSG;
+                if (PRINT_ERROR)
+                    warnx(recargchar, optchar);
+                optopt = optchar;
+                return (BADARG);
+            }
+            else
+                optarg = nargv[optind];
+        }
+        place = EMSG;
+        ++optind;
+    }
+    /* dump back option letter */
+    return (optchar);
+}
+
+#ifdef REPLACE_GETOPT
+/*
+ * getopt --
+ *	Parse argc/argv argument vector.
+ *
+ * [eventually this will replace the BSD getopt]
+ */
+int getopt(int nargc, char* const* nargv, char const* options)
+{
+
+    /*
+     * We don't pass FLAG_PERMUTE to getopt_internal() since
+     * the BSD getopt(3) (unlike GNU) has never done this.
+     *
+     * Furthermore, since many privileged programs call getopt()
+     * before dropping privileges it makes sense to keep things
+     * as simple (and bug-free) as possible.
+     */
+    return (getopt_internal(nargc, nargv, options, NULL, NULL, 0));
+}
+#endif /* REPLACE_GETOPT */
+
+/*
+ * getopt_long --
+ *	Parse argc/argv argument vector.
+ */
+int getopt_long(int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx)
+{
+
+    return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE));
+}
+
+/*
+ * getopt_long_only --
+ *	Parse argc/argv argument vector.
+ */
+int getopt_long_only(int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx)
+{
+
+    return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE | FLAG_LONGONLY));
+}
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/getoptWin.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/getoptWin.h
new file mode 100644
index 0000000000000000000000000000000000000000..a1dc6ffa9fba7ac2b18f34586432addb8a9b8e5f
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/getoptWin.h
@@ -0,0 +1,124 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __GETOPT_H__
+/**
+ * DISCLAIMER
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is a part of the w64 mingw-runtime package.
+ *
+ * The w64 mingw-runtime package and its code is distributed in the hope that it
+ * will be useful but WITHOUT ANY WARRANTY.  ALL WARRANTIES, EXPRESSED OR
+ * IMPLIED ARE HEREBY DISCLAIMED.  This includes but is not limited to
+ * warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#define __GETOPT_H__
+
+/* All the headers include this file. */
+#include <crtdefs.h>
+
+#if defined(WINGETOPT_SHARED_LIB)
+#if defined(BUILDING_WINGETOPT_DLL)
+#define WINGETOPT_API __declspec(dllexport)
+#else
+#define WINGETOPT_API __declspec(dllimport)
+#endif
+#else
+#define WINGETOPT_API
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+    WINGETOPT_API extern int optind; /* index of first non-option in argv      */
+    WINGETOPT_API extern int optopt; /* single option character, as parsed     */
+    WINGETOPT_API extern int opterr; /* flag to enable built-in diagnostics... */
+    /* (user may set to zero, to suppress)    */
+
+    WINGETOPT_API extern char* optarg; /* pointer to argument of current option  */
+
+    extern int getopt(int nargc, char* const* nargv, char const* options);
+
+#ifdef _BSD_SOURCE
+/*
+ * BSD adds the non-standard `optreset' feature, for reinitialisation
+ * of `getopt' parsing.  We support this feature, for applications which
+ * proclaim their BSD heritage, before including this header; however,
+ * to maintain portability, developers are advised to avoid it.
+ */
+#define optreset __mingw_optreset
+    extern int optreset;
+#endif
+#ifdef __cplusplus
+}
+#endif
+/*
+ * POSIX requires the `getopt' API to be specified in `unistd.h';
+ * thus, `unistd.h' includes this header.  However, we do not want
+ * to expose the `getopt_long' or `getopt_long_only' APIs, when
+ * included in this manner.  Thus, close the standard __GETOPT_H__
+ * declarations block, and open an additional __GETOPT_LONG_H__
+ * specific block, only when *not* __UNISTD_H_SOURCED__, in which
+ * to declare the extended API.
+ */
+#endif /* !defined(__GETOPT_H__) */
+
+#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__)
+#define __GETOPT_LONG_H__
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+    struct option /* specification for a long form option...	*/
+    {
+        char const* name; /* option name, without leading hyphens */
+        int has_arg;      /* does it take an argument?		*/
+        int* flag;        /* where to save its status, or NULL	*/
+        int val;          /* its associated status value		*/
+    };
+
+    enum /* permitted values for its `has_arg' field...	*/
+    {
+        no_argument = 0,   /* option never takes an argument	*/
+        required_argument, /* option always requires an argument	*/
+        optional_argument  /* option may take an argument		*/
+    };
+
+    extern int getopt_long(
+        int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx);
+    extern int getopt_long_only(
+        int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx);
+/*
+ * Previous MinGW implementation had...
+ */
+#ifndef HAVE_DECL_GETOPT
+/*
+ * ...for the long form API only; keep this for compatibility.
+ */
+#define HAVE_DECL_GETOPT 1
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/half.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/half.h
new file mode 100644
index 0000000000000000000000000000000000000000..dc808e72c8a3a6770cef184931a66b1581090ef7
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/half.h
@@ -0,0 +1,4303 @@
+// half - IEEE 754-based half-precision floating point library.
+//
+// Copyright (c) 2012-2017 Christian Rau <rauy@users.sourceforge.net>
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+// documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
+// Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Version 1.12.0
+
+/// \file
+/// Main header file for half precision functionality.
+
+#ifndef HALF_HALF_HPP
+#define HALF_HALF_HPP
+
+/// Combined gcc version number.
+#define HALF_GNUC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+// check C++11 language features
+#if defined(__clang__) // clang
+#if __has_feature(cxx_static_assert) && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)
+#define HALF_ENABLE_CPP11_STATIC_ASSERT 1
+#endif
+#if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR)
+#define HALF_ENABLE_CPP11_CONSTEXPR 1
+#endif
+#if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT)
+#define HALF_ENABLE_CPP11_NOEXCEPT 1
+#endif
+#if __has_feature(cxx_user_literals) && !defined(HALF_ENABLE_CPP11_USER_LITERALS)
+#define HALF_ENABLE_CPP11_USER_LITERALS 1
+#endif
+#if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && !defined(HALF_ENABLE_CPP11_LONG_LONG)
+#define HALF_ENABLE_CPP11_LONG_LONG 1
+#endif
+/*#elif defined(__INTEL_COMPILER)								//Intel C++
+    #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)		????????
+        #define HALF_ENABLE_CPP11_STATIC_ASSERT 1
+    #endif
+    #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_CONSTEXPR)			????????
+        #define HALF_ENABLE_CPP11_CONSTEXPR 1
+    #endif
+    #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_NOEXCEPT)			????????
+        #define HALF_ENABLE_CPP11_NOEXCEPT 1
+    #endif
+    #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_LONG_LONG)			????????
+        #define HALF_ENABLE_CPP11_LONG_LONG 1
+    #endif*/
+#elif defined(__GNUC__) // gcc
+#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L
+#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)
+#define HALF_ENABLE_CPP11_STATIC_ASSERT 1
+#endif
+#if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR)
+#define HALF_ENABLE_CPP11_CONSTEXPR 1
+#endif
+#if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT)
+#define HALF_ENABLE_CPP11_NOEXCEPT 1
+#endif
+#if HALF_GNUC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS)
+#define HALF_ENABLE_CPP11_USER_LITERALS 1
+#endif
+#if !defined(HALF_ENABLE_CPP11_LONG_LONG)
+#define HALF_ENABLE_CPP11_LONG_LONG 1
+#endif
+#endif
+#elif defined(_MSC_VER) // Visual C++
+#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_CONSTEXPR)
+#define HALF_ENABLE_CPP11_CONSTEXPR 1
+#endif
+#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_NOEXCEPT)
+#define HALF_ENABLE_CPP11_NOEXCEPT 1
+#endif
+#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_USER_LITERALS)
+#define HALF_ENABLE_CPP11_USER_LITERALS 1
+#endif
+#if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT)
+#define HALF_ENABLE_CPP11_STATIC_ASSERT 1
+#endif
+#if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG)
+#define HALF_ENABLE_CPP11_LONG_LONG 1
+#endif
+#define HALF_POP_WARNINGS 1
+#pragma warning(push)
+#pragma warning(disable : 4099 4127 4146) // struct vs class, constant in if, negative unsigned
+#endif
+
+// check C++11 library features
+#include <utility>
+#if defined(_LIBCPP_VERSION) // libc++
+#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103
+#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS
+#define HALF_ENABLE_CPP11_TYPE_TRAITS 1
+#endif
+#ifndef HALF_ENABLE_CPP11_CSTDINT
+#define HALF_ENABLE_CPP11_CSTDINT 1
+#endif
+#ifndef HALF_ENABLE_CPP11_CMATH
+#define HALF_ENABLE_CPP11_CMATH 1
+#endif
+#ifndef HALF_ENABLE_CPP11_HASH
+#define HALF_ENABLE_CPP11_HASH 1
+#endif
+#endif
+#elif defined(__GLIBCXX__) // libstdc++
+#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103
+#ifdef __clang__
+#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS)
+#define HALF_ENABLE_CPP11_TYPE_TRAITS 1
+#endif
+#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT)
+#define HALF_ENABLE_CPP11_CSTDINT 1
+#endif
+#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH)
+#define HALF_ENABLE_CPP11_CMATH 1
+#endif
+#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH)
+#define HALF_ENABLE_CPP11_HASH 1
+#endif
+#else
+#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT)
+#define HALF_ENABLE_CPP11_CSTDINT 1
+#endif
+#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH)
+#define HALF_ENABLE_CPP11_CMATH 1
+#endif
+#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH)
+#define HALF_ENABLE_CPP11_HASH 1
+#endif
+#endif
+#endif
+#elif defined(_CPPLIB_VER) // Dinkumware/Visual C++
+#if _CPPLIB_VER >= 520
+#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS
+#define HALF_ENABLE_CPP11_TYPE_TRAITS 1
+#endif
+#ifndef HALF_ENABLE_CPP11_CSTDINT
+#define HALF_ENABLE_CPP11_CSTDINT 1
+#endif
+#ifndef HALF_ENABLE_CPP11_HASH
+#define HALF_ENABLE_CPP11_HASH 1
+#endif
+#endif
+#if _CPPLIB_VER >= 610
+#ifndef HALF_ENABLE_CPP11_CMATH
+#define HALF_ENABLE_CPP11_CMATH 1
+#endif
+#endif
+#endif
+#undef HALF_GNUC_VERSION
+
+// support constexpr
+#if HALF_ENABLE_CPP11_CONSTEXPR
+#define HALF_CONSTEXPR constexpr
+#define HALF_CONSTEXPR_CONST constexpr
+#else
+#define HALF_CONSTEXPR
+#define HALF_CONSTEXPR_CONST const
+#endif
+
+// support noexcept
+#if HALF_ENABLE_CPP11_NOEXCEPT
+#define HALF_NOEXCEPT noexcept
+#define HALF_NOTHROW noexcept
+#else
+#define HALF_NOEXCEPT
+#define HALF_NOTHROW throw()
+#endif
+
+#include <algorithm>
+#include <climits>
+#include <cmath>
+#include <cstring>
+#include <iostream>
+#include <limits>
+#if HALF_ENABLE_CPP11_TYPE_TRAITS
+#include <type_traits>
+#endif
+#if HALF_ENABLE_CPP11_CSTDINT
+#include <cstdint>
+#endif
+#if HALF_ENABLE_CPP11_HASH
+#include <functional>
+#endif
+
+/// Default rounding mode.
+/// This specifies the rounding mode used for all conversions between [half](\ref half_float::half)s and `float`s as
+/// well as for the half_cast() if not specifying a rounding mode explicitly. It can be redefined (before including
+/// half.hpp) to one of the standard rounding modes using their respective constants or the equivalent values of
+/// `std::float_round_style`:
+///
+/// `std::float_round_style`         | value | rounding
+/// ---------------------------------|-------|-------------------------
+/// `std::round_indeterminate`       | -1    | fastest (default)
+/// `std::round_toward_zero`         | 0     | toward zero
+/// `std::round_to_nearest`          | 1     | to nearest
+/// `std::round_toward_infinity`     | 2     | toward positive infinity
+/// `std::round_toward_neg_infinity` | 3     | toward negative infinity
+///
+/// By default this is set to `-1` (`std::round_indeterminate`), which uses truncation (round toward zero, but with
+/// overflows set to infinity) and is the fastest rounding mode possible. It can even be set to
+/// `std::numeric_limits<float>::round_style` to synchronize the rounding mode with that of the underlying
+/// single-precision implementation.
+#ifndef HALF_ROUND_STYLE
+#define HALF_ROUND_STYLE 1 // = std::round_to_nearest
+#endif
+
+/// Tie-breaking behaviour for round to nearest.
+/// This specifies if ties in round to nearest should be resolved by rounding to the nearest even value. By default this
+/// is defined to `0` resulting in the faster but slightly more biased behaviour of rounding away from zero in half-way
+/// cases (and thus equal to the round() function), but can be redefined to `1` (before including half.hpp) if more
+/// IEEE-conformant behaviour is needed.
+#ifndef HALF_ROUND_TIES_TO_EVEN
+#define HALF_ROUND_TIES_TO_EVEN 0 // ties away from zero
+#endif
+
+/// Value signaling overflow.
+/// In correspondence with `HUGE_VAL[F|L]` from `<cmath>` this symbol expands to a positive value signaling the overflow
+/// of an operation, in particular it just evaluates to positive infinity.
+#define HUGE_VALH std::numeric_limits<half_float::half>::infinity()
+
+/// Fast half-precision fma function.
+/// This symbol is only defined if the fma() function generally executes as fast as, or faster than, a separate
+/// half-precision multiplication followed by an addition. Due to the internal single-precision implementation of all
+/// arithmetic operations, this is in fact always the case.
+#define FP_FAST_FMAH 1
+
+#ifndef FP_ILOGB0
+#define FP_ILOGB0 INT_MIN
+#endif
+#ifndef FP_ILOGBNAN
+#define FP_ILOGBNAN INT_MAX
+#endif
+#ifndef FP_SUBNORMAL
+#define FP_SUBNORMAL 0
+#endif
+#ifndef FP_ZERO
+#define FP_ZERO 1
+#endif
+#ifndef FP_NAN
+#define FP_NAN 2
+#endif
+#ifndef FP_INFINITE
+#define FP_INFINITE 3
+#endif
+#ifndef FP_NORMAL
+#define FP_NORMAL 4
+#endif
+
+/// Main namespace for half precision functionality.
+/// This namespace contains all the functionality provided by the library.
+namespace half_float
+{
+class half;
+
+#if HALF_ENABLE_CPP11_USER_LITERALS
+/// Library-defined half-precision literals.
+/// Import this namespace to enable half-precision floating point literals:
+/// ~~~~{.cpp}
+/// using namespace half_float::literal;
+/// half_float::half = 4.2_h;
+/// ~~~~
+namespace literal
+{
+half operator"" _h(long double);
+}
+#endif
+
+/// \internal
+/// \brief Implementation details.
+namespace detail
+{
+#if HALF_ENABLE_CPP11_TYPE_TRAITS
+/// Conditional type.
+template <bool B, typename T, typename F>
+struct conditional : std::conditional<B, T, F>
+{
+};
+
+/// Helper for tag dispatching.
+template <bool B>
+struct bool_type : std::integral_constant<bool, B>
+{
+};
+using std::false_type;
+using std::true_type;
+
+/// Type traits for floating point types.
+template <typename T>
+struct is_float : std::is_floating_point<T>
+{
+};
+#else
+/// Conditional type.
+template <bool, typename T, typename>
+struct conditional
+{
+    typedef T type;
+};
+template <typename T, typename F>
+struct conditional<false, T, F>
+{
+    typedef F type;
+};
+
+/// Helper for tag dispatching.
+template <bool>
+struct bool_type
+{
+};
+typedef bool_type<true> true_type;
+typedef bool_type<false> false_type;
+
+/// Type traits for floating point types.
+template <typename>
+struct is_float : false_type
+{
+};
+template <typename T>
+struct is_float<const T> : is_float<T>
+{
+};
+template <typename T>
+struct is_float<volatile T> : is_float<T>
+{
+};
+template <typename T>
+struct is_float<const volatile T> : is_float<T>
+{
+};
+template <>
+struct is_float<float> : true_type
+{
+};
+template <>
+struct is_float<double> : true_type
+{
+};
+template <>
+struct is_float<long double> : true_type
+{
+};
+#endif
+
+/// Type traits for floating point bits.
+template <typename T>
+struct bits
+{
+    typedef unsigned char type;
+};
+template <typename T>
+struct bits<const T> : bits<T>
+{
+};
+template <typename T>
+struct bits<volatile T> : bits<T>
+{
+};
+template <typename T>
+struct bits<const volatile T> : bits<T>
+{
+};
+
+#if HALF_ENABLE_CPP11_CSTDINT
+/// Unsigned integer of (at least) 16 bits width.
+typedef std::uint_least16_t uint16;
+
+/// Unsigned integer of (at least) 32 bits width.
+template <>
+struct bits<float>
+{
+    typedef std::uint_least32_t type;
+};
+
+/// Unsigned integer of (at least) 64 bits width.
+template <>
+struct bits<double>
+{
+    typedef std::uint_least64_t type;
+};
+#else
+/// Unsigned integer of (at least) 16 bits width.
+typedef unsigned short uint16;
+
+/// Unsigned integer of (at least) 32 bits width.
+template <>
+struct bits<float> : conditional<std::numeric_limits<unsigned int>::digits >= 32, unsigned int, unsigned long>
+{
+};
+
+#if HALF_ENABLE_CPP11_LONG_LONG
+/// Unsigned integer of (at least) 64 bits width.
+template <>
+struct bits<double> : conditional<std::numeric_limits<unsigned long>::digits >= 64, unsigned long, unsigned long long>
+{
+};
+#else
+/// Unsigned integer of (at least) 64 bits width.
+template <>
+struct bits<double>
+{
+    typedef unsigned long type;
+};
+#endif
+#endif
+
+/// Tag type for binary construction.
+struct binary_t
+{
+};
+
+/// Tag for binary construction.
+HALF_CONSTEXPR_CONST binary_t binary = binary_t();
+
+/// Temporary half-precision expression.
+/// This class represents a half-precision expression which just stores a single-precision value internally.
+struct expr
+{
+    /// Conversion constructor.
+    /// \param f single-precision value to convert
+    explicit HALF_CONSTEXPR expr(float f) HALF_NOEXCEPT : value_(f) {}
+
+    /// Conversion to single-precision.
+    /// \return single precision value representing expression value
+    HALF_CONSTEXPR operator float() const HALF_NOEXCEPT
+    {
+        return value_;
+    }
+
+private:
+    /// Internal expression value stored in single-precision.
+    float value_;
+};
+
+/// SFINAE helper for generic half-precision functions.
+/// This class template has to be specialized for each valid combination of argument types to provide a corresponding
+/// `type` member equivalent to \a T.
+/// \tparam T type to return
+template <typename T, typename, typename = void, typename = void>
+struct enable
+{
+};
+template <typename T>
+struct enable<T, half, void, void>
+{
+    typedef T type;
+};
+template <typename T>
+struct enable<T, expr, void, void>
+{
+    typedef T type;
+};
+template <typename T>
+struct enable<T, half, half, void>
+{
+    typedef T type;
+};
+template <typename T>
+struct enable<T, half, expr, void>
+{
+    typedef T type;
+};
+template <typename T>
+struct enable<T, expr, half, void>
+{
+    typedef T type;
+};
+template <typename T>
+struct enable<T, expr, expr, void>
+{
+    typedef T type;
+};
+template <typename T>
+struct enable<T, half, half, half>
+{
+    typedef T type;
+};
+template <typename T>
+struct enable<T, half, half, expr>
+{
+    typedef T type;
+};
+template <typename T>
+struct enable<T, half, expr, half>
+{
+    typedef T type;
+};
+template <typename T>
+struct enable<T, half, expr, expr>
+{
+    typedef T type;
+};
+template <typename T>
+struct enable<T, expr, half, half>
+{
+    typedef T type;
+};
+template <typename T>
+struct enable<T, expr, half, expr>
+{
+    typedef T type;
+};
+template <typename T>
+struct enable<T, expr, expr, half>
+{
+    typedef T type;
+};
+template <typename T>
+struct enable<T, expr, expr, expr>
+{
+    typedef T type;
+};
+
+/// Return type for specialized generic 2-argument half-precision functions.
+/// This class template has to be specialized for each valid combination of argument types to provide a corresponding
+/// `type` member denoting the appropriate return type.
+/// \tparam T first argument type
+/// \tparam U first argument type
+template <typename T, typename U>
+struct result : enable<expr, T, U>
+{
+};
+template <>
+struct result<half, half>
+{
+    typedef half type;
+};
+
+/// \name Classification helpers
+/// \{
+
+/// Check for infinity.
+/// \tparam T argument type (builtin floating point type)
+/// \param arg value to query
+/// \retval true if infinity
+/// \retval false else
+template <typename T>
+bool builtin_isinf(T arg)
+{
+#if HALF_ENABLE_CPP11_CMATH
+    return std::isinf(arg);
+#elif defined(_MSC_VER)
+    return !::_finite(static_cast<double>(arg)) && !::_isnan(static_cast<double>(arg));
+#else
+    return arg == std::numeric_limits<T>::infinity() || arg == -std::numeric_limits<T>::infinity();
+#endif
+}
+
+/// Check for NaN.
+/// \tparam T argument type (builtin floating point type)
+/// \param arg value to query
+/// \retval true if not a number
+/// \retval false else
+template <typename T>
+bool builtin_isnan(T arg)
+{
+#if HALF_ENABLE_CPP11_CMATH
+    return std::isnan(arg);
+#elif defined(_MSC_VER)
+    return ::_isnan(static_cast<double>(arg)) != 0;
+#else
+    return arg != arg;
+#endif
+}
+
+/// Check sign.
+/// \tparam T argument type (builtin floating point type)
+/// \param arg value to query
+/// \retval true if signbit set
+/// \retval false else
+template <typename T>
+bool builtin_signbit(T arg)
+{
+#if HALF_ENABLE_CPP11_CMATH
+    return std::signbit(arg);
+#else
+    return arg < T() || (arg == T() && T(1) / arg < T());
+#endif
+}
+
+/// \}
+/// \name Conversion
+/// \{
+
+/// Convert IEEE single-precision to half-precision.
+/// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf).
+/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
+/// \param value single-precision value
+/// \return binary representation of half-precision value
+template <std::float_round_style R>
+uint16 float2half_impl(float value, true_type)
+{
+    typedef bits<float>::type uint32;
+    uint32 bits; // = *reinterpret_cast<uint32*>(&value);		//violating strict aliasing!
+    std::memcpy(&bits, &value, sizeof(float));
+    /*			uint16 hbits = (bits>>16) & 0x8000;
+                bits &= 0x7FFFFFFF;
+                int exp = bits >> 23;
+                if(exp == 255)
+                    return hbits | 0x7C00 | (0x3FF&-static_cast<unsigned>((bits&0x7FFFFF)!=0));
+                if(exp > 142)
+                {
+                    if(R == std::round_toward_infinity)
+                        return hbits | 0x7C00 - (hbits>>15);
+                    if(R == std::round_toward_neg_infinity)
+                        return hbits | 0x7BFF + (hbits>>15);
+                    return hbits | 0x7BFF + (R!=std::round_toward_zero);
+                }
+                int g, s;
+                if(exp > 112)
+                {
+                    g = (bits>>12) & 1;
+                    s = (bits&0xFFF) != 0;
+                    hbits |= ((exp-112)<<10) | ((bits>>13)&0x3FF);
+                }
+                else if(exp > 101)
+                {
+                    int i = 125 - exp;
+                    bits = (bits&0x7FFFFF) | 0x800000;
+                    g = (bits>>i) & 1;
+                    s = (bits&((1L<<i)-1)) != 0;
+                    hbits |= bits >> (i+1);
+                }
+                else
+                {
+                    g = 0;
+                    s = bits != 0;
+                }
+                if(R == std::round_to_nearest)
+                    #if HALF_ROUND_TIES_TO_EVEN
+                        hbits += g & (s|hbits);
+                    #else
+                        hbits += g;
+                    #endif
+                else if(R == std::round_toward_infinity)
+                    hbits += ~(hbits>>15) & (s|g);
+                else if(R == std::round_toward_neg_infinity)
+                    hbits += (hbits>>15) & (g|s);
+    */
+    static const uint16 base_table[512] = {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008,
+        0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000,
+        0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00, 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800,
+        0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
+        0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
+        0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
+        0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
+        0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
+        0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
+        0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
+        0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
+        0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8C00, 0x9000,
+        0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, 0xC000, 0xC400, 0xC800,
+        0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFC00, 0xFC00,
+        0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
+        0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
+        0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
+        0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
+        0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
+        0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
+        0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
+        0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00};
+    static const unsigned char shift_table[512] = {24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+        13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+        13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+        24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13};
+    uint16 hbits = base_table[bits >> 23] + static_cast<uint16>((bits & 0x7FFFFF) >> shift_table[bits >> 23]);
+    if (R == std::round_to_nearest)
+        hbits += (((bits & 0x7FFFFF) >> (shift_table[bits >> 23] - 1)) | (((bits >> 23) & 0xFF) == 102))
+            & ((hbits & 0x7C00) != 0x7C00)
+#if HALF_ROUND_TIES_TO_EVEN
+            & (((((static_cast<uint32>(1) << (shift_table[bits >> 23] - 1)) - 1) & bits) != 0) | hbits)
+#endif
+            ;
+    else if (R == std::round_toward_zero)
+        hbits -= ((hbits & 0x7FFF) == 0x7C00) & ~shift_table[bits >> 23];
+    else if (R == std::round_toward_infinity)
+        hbits += ((((bits & 0x7FFFFF & ((static_cast<uint32>(1) << (shift_table[bits >> 23])) - 1)) != 0)
+                      | (((bits >> 23) <= 102) & ((bits >> 23) != 0)))
+                     & (hbits < 0x7C00))
+            - ((hbits == 0xFC00) & ((bits >> 23) != 511));
+    else if (R == std::round_toward_neg_infinity)
+        hbits += ((((bits & 0x7FFFFF & ((static_cast<uint32>(1) << (shift_table[bits >> 23])) - 1)) != 0)
+                      | (((bits >> 23) <= 358) & ((bits >> 23) != 256)))
+                     & (hbits < 0xFC00) & (hbits >> 15))
+            - ((hbits == 0x7C00) & ((bits >> 23) != 255));
+    return hbits;
+}
+
+/// Convert IEEE double-precision to half-precision.
+/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
+/// \param value double-precision value
+/// \return binary representation of half-precision value
+template <std::float_round_style R>
+uint16 float2half_impl(double value, true_type)
+{
+    typedef bits<float>::type uint32;
+    typedef bits<double>::type uint64;
+    uint64 bits; // = *reinterpret_cast<uint64*>(&value);		//violating strict aliasing!
+    std::memcpy(&bits, &value, sizeof(double));
+    uint32 hi = bits >> 32, lo = bits & 0xFFFFFFFF;
+    uint16 hbits = (hi >> 16) & 0x8000;
+    hi &= 0x7FFFFFFF;
+    int exp = hi >> 20;
+    if (exp == 2047)
+        return hbits | 0x7C00 | (0x3FF & -static_cast<unsigned>((bits & 0xFFFFFFFFFFFFF) != 0));
+    if (exp > 1038)
+    {
+        if (R == std::round_toward_infinity)
+            return hbits | 0x7C00 - (hbits >> 15);
+        if (R == std::round_toward_neg_infinity)
+            return hbits | 0x7BFF + (hbits >> 15);
+        return hbits | 0x7BFF + (R != std::round_toward_zero);
+    }
+    int g, s = lo != 0;
+    if (exp > 1008)
+    {
+        g = (hi >> 9) & 1;
+        s |= (hi & 0x1FF) != 0;
+        hbits |= ((exp - 1008) << 10) | ((hi >> 10) & 0x3FF);
+    }
+    else if (exp > 997)
+    {
+        int i = 1018 - exp;
+        hi = (hi & 0xFFFFF) | 0x100000;
+        g = (hi >> i) & 1;
+        s |= (hi & ((1L << i) - 1)) != 0;
+        hbits |= hi >> (i + 1);
+    }
+    else
+    {
+        g = 0;
+        s |= hi != 0;
+    }
+    if (R == std::round_to_nearest)
+#if HALF_ROUND_TIES_TO_EVEN
+        hbits += g & (s | hbits);
+#else
+        hbits += g;
+#endif
+    else if (R == std::round_toward_infinity)
+        hbits += ~(hbits >> 15) & (s | g);
+    else if (R == std::round_toward_neg_infinity)
+        hbits += (hbits >> 15) & (g | s);
+    return hbits;
+}
+
+/// Convert non-IEEE floating point to half-precision.
+/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
+/// \tparam T source type (builtin floating point type)
+/// \param value floating point value
+/// \return binary representation of half-precision value
+template <std::float_round_style R, typename T>
+uint16 float2half_impl(T value, ...)
+{
+    uint16 hbits = static_cast<unsigned>(builtin_signbit(value)) << 15;
+    if (value == T())
+        return hbits;
+    if (builtin_isnan(value))
+        return hbits | 0x7FFF;
+    if (builtin_isinf(value))
+        return hbits | 0x7C00;
+    int exp;
+    std::frexp(value, &exp);
+    if (exp > 16)
+    {
+        if (R == std::round_toward_infinity)
+            return hbits | (0x7C00 - (hbits >> 15));
+        else if (R == std::round_toward_neg_infinity)
+            return hbits | (0x7BFF + (hbits >> 15));
+        return hbits | (0x7BFF + (R != std::round_toward_zero));
+    }
+    if (exp < -13)
+        value = std::ldexp(value, 24);
+    else
+    {
+        value = std::ldexp(value, 11 - exp);
+        hbits |= ((exp + 13) << 10);
+    }
+    T ival, frac = std::modf(value, &ival);
+    hbits += static_cast<uint16>(std::abs(static_cast<int>(ival)));
+    if (R == std::round_to_nearest)
+    {
+        frac = std::abs(frac);
+#if HALF_ROUND_TIES_TO_EVEN
+        hbits += (frac > T(0.5)) | ((frac == T(0.5)) & hbits);
+#else
+        hbits += frac >= T(0.5);
+#endif
+    }
+    else if (R == std::round_toward_infinity)
+        hbits += frac > T();
+    else if (R == std::round_toward_neg_infinity)
+        hbits += frac < T();
+    return hbits;
+}
+
+/// Convert floating point to half-precision.
+/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
+/// \tparam T source type (builtin floating point type)
+/// \param value floating point value
+/// \return binary representation of half-precision value
+template <std::float_round_style R, typename T>
+uint16 float2half(T value)
+{
+    return float2half_impl<R>(
+        value, bool_type < std::numeric_limits<T>::is_iec559 && sizeof(typename bits<T>::type) == sizeof(T) > ());
+}
+
+/// Convert integer to half-precision floating point.
+/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
+/// \tparam S `true` if value negative, `false` else
+/// \tparam T type to convert (builtin integer type)
+/// \param value non-negative integral value
+/// \return binary representation of half-precision value
+template <std::float_round_style R, bool S, typename T>
+uint16 int2half_impl(T value)
+{
+#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
+    static_assert(std::is_integral<T>::value, "int to half conversion only supports builtin integer types");
+#endif
+    if (S)
+        value = -value;
+    uint16 bits = S << 15;
+    if (value > 0xFFFF)
+    {
+        if (R == std::round_toward_infinity)
+            bits |= 0x7C00 - S;
+        else if (R == std::round_toward_neg_infinity)
+            bits |= 0x7BFF + S;
+        else
+            bits |= 0x7BFF + (R != std::round_toward_zero);
+    }
+    else if (value)
+    {
+        uint32_t m = value, exp = 24;
+        for (; m < 0x400; m <<= 1, --exp)
+            ;
+        for (; m > 0x7FF; m >>= 1, ++exp)
+            ;
+        bits |= (exp << 10) + m;
+        if (exp > 24)
+        {
+            if (R == std::round_to_nearest)
+                bits += (value >> (exp - 25)) & 1
+#if HALF_ROUND_TIES_TO_EVEN
+                    & (((((1 << (exp - 25)) - 1) & value) != 0) | bits)
+#endif
+                    ;
+            else if (R == std::round_toward_infinity)
+                bits += ((value & ((1 << (exp - 24)) - 1)) != 0) & !S;
+            else if (R == std::round_toward_neg_infinity)
+                bits += ((value & ((1 << (exp - 24)) - 1)) != 0) & S;
+        }
+    }
+    return bits;
+}
+
+/// Convert integer to half-precision floating point.
+/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
+/// \tparam T type to convert (builtin integer type)
+/// \param value integral value
+/// \return binary representation of half-precision value
+template <std::float_round_style R, typename T>
+uint16 int2half(T value)
+{
+    return (value < 0) ? int2half_impl<R, true>(value) : int2half_impl<R, false>(value);
+}
+
+/// Convert half-precision to IEEE single-precision.
+/// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf).
+/// \param value binary representation of half-precision value
+/// \return single-precision value
+inline float half2float_impl(uint16 value, float, true_type)
+{
+    typedef bits<float>::type uint32;
+    /*			uint32 bits = static_cast<uint32>(value&0x8000) << 16;
+                int abs = value & 0x7FFF;
+                if(abs)
+                {
+                    bits |= 0x38000000 << static_cast<unsigned>(abs>=0x7C00);
+                    for(; abs<0x400; abs<<=1,bits-=0x800000) ;
+                    bits += static_cast<uint32>(abs) << 13;
+                }
+    */
+    static const uint32 mantissa_table[2048] = {0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000,
+        0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000,
+        0x35700000, 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000,
+        0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000, 0x36000000,
+        0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000,
+        0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, 0x36400000, 0x36440000, 0x36480000,
+        0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000,
+        0x36700000, 0x36740000, 0x36780000, 0x367C0000, 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000,
+        0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000,
+        0x369C0000, 0x369E0000, 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000,
+        0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000,
+        0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000,
+        0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, 0x36E00000, 0x36E20000,
+        0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000,
+        0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000, 0x37000000, 0x37010000, 0x37020000, 0x37030000,
+        0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000,
+        0x370D0000, 0x370E0000, 0x370F0000, 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000,
+        0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000,
+        0x371F0000, 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000,
+        0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000, 0x37300000,
+        0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000,
+        0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, 0x37400000, 0x37410000, 0x37420000,
+        0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000,
+        0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000,
+        0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000,
+        0x375E0000, 0x375F0000, 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000,
+        0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000,
+        0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000,
+        0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, 0x37800000, 0x37808000,
+        0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000,
+        0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, 0x37880000, 0x37888000, 0x37890000, 0x37898000,
+        0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000,
+        0x378E8000, 0x378F0000, 0x378F8000, 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000,
+        0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000,
+        0x37978000, 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000,
+        0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000, 0x37A00000,
+        0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000,
+        0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, 0x37A80000, 0x37A88000, 0x37A90000,
+        0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000,
+        0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, 0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000,
+        0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000,
+        0x37B70000, 0x37B78000, 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000,
+        0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000,
+        0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000,
+        0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, 0x37C80000, 0x37C88000,
+        0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000,
+        0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000, 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000,
+        0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000,
+        0x37D68000, 0x37D70000, 0x37D78000, 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000,
+        0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000,
+        0x37DF8000, 0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000,
+        0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000, 0x37E80000,
+        0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000,
+        0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, 0x37F00000, 0x37F08000, 0x37F10000,
+        0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000,
+        0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, 0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000,
+        0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000,
+        0x37FF0000, 0x37FF8000, 0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000,
+        0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000,
+        0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000,
+        0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000, 0x38080000, 0x38084000,
+        0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000,
+        0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000, 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000,
+        0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000,
+        0x380F4000, 0x380F8000, 0x380FC000, 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000,
+        0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000,
+        0x3813C000, 0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000,
+        0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000, 0x38180000,
+        0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000,
+        0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, 0x381C0000, 0x381C4000, 0x381C8000,
+        0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000,
+        0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, 0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000,
+        0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000,
+        0x38238000, 0x3823C000, 0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000,
+        0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000,
+        0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000,
+        0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, 0x382C0000, 0x382C4000,
+        0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000,
+        0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000, 0x38300000, 0x38304000, 0x38308000, 0x3830C000,
+        0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000,
+        0x38334000, 0x38338000, 0x3833C000, 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000,
+        0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000,
+        0x3837C000, 0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000,
+        0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000, 0x383C0000,
+        0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000,
+        0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, 0x38400000, 0x38404000, 0x38408000,
+        0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000,
+        0x38430000, 0x38434000, 0x38438000, 0x3843C000, 0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000,
+        0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000,
+        0x38478000, 0x3847C000, 0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000,
+        0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000,
+        0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000,
+        0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, 0x38500000, 0x38504000,
+        0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000,
+        0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000, 0x38540000, 0x38544000, 0x38548000, 0x3854C000,
+        0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000,
+        0x38574000, 0x38578000, 0x3857C000, 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000,
+        0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000,
+        0x385BC000, 0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000,
+        0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000, 0x38600000,
+        0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000,
+        0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, 0x38640000, 0x38644000, 0x38648000,
+        0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000,
+        0x38670000, 0x38674000, 0x38678000, 0x3867C000, 0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000,
+        0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000,
+        0x386B8000, 0x386BC000, 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000,
+        0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000,
+        0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000,
+        0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000, 0x38740000, 0x38744000,
+        0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000,
+        0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000, 0x38780000, 0x38784000, 0x38788000, 0x3878C000,
+        0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000,
+        0x387B4000, 0x387B8000, 0x387BC000, 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000,
+        0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000,
+        0x387FC000, 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000,
+        0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000, 0x38020000,
+        0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000,
+        0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, 0x38040000, 0x38042000, 0x38044000,
+        0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000,
+        0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000,
+        0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000,
+        0x3807C000, 0x3807E000, 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000,
+        0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000,
+        0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000,
+        0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, 0x380C0000, 0x380C2000,
+        0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000,
+        0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000, 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000,
+        0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000,
+        0x380FA000, 0x380FC000, 0x380FE000, 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000,
+        0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000,
+        0x3811E000, 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000,
+        0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000, 0x38140000,
+        0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000,
+        0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, 0x38160000, 0x38162000, 0x38164000,
+        0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000,
+        0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000,
+        0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000,
+        0x3819C000, 0x3819E000, 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000,
+        0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000,
+        0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000,
+        0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, 0x381E0000, 0x381E2000,
+        0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000,
+        0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000, 0x38200000, 0x38202000, 0x38204000, 0x38206000,
+        0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000,
+        0x3821A000, 0x3821C000, 0x3821E000, 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000,
+        0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000,
+        0x3823E000, 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000,
+        0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000, 0x38260000,
+        0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000,
+        0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, 0x38280000, 0x38282000, 0x38284000,
+        0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000,
+        0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, 0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000,
+        0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000,
+        0x382BC000, 0x382BE000, 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000,
+        0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000,
+        0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000,
+        0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, 0x38300000, 0x38302000,
+        0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000,
+        0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000, 0x38320000, 0x38322000, 0x38324000, 0x38326000,
+        0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000,
+        0x3833A000, 0x3833C000, 0x3833E000, 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000,
+        0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000,
+        0x3835E000, 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000,
+        0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000, 0x38380000,
+        0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000,
+        0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, 0x383A0000, 0x383A2000, 0x383A4000,
+        0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000,
+        0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, 0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000,
+        0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000,
+        0x383DC000, 0x383DE000, 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000,
+        0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000,
+        0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000,
+        0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, 0x38420000, 0x38422000,
+        0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000,
+        0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000, 0x38440000, 0x38442000, 0x38444000, 0x38446000,
+        0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000,
+        0x3845A000, 0x3845C000, 0x3845E000, 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000,
+        0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000,
+        0x3847E000, 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000,
+        0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000, 0x384A0000,
+        0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000,
+        0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, 0x384C0000, 0x384C2000, 0x384C4000,
+        0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000,
+        0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, 0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000,
+        0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000,
+        0x384FC000, 0x384FE000, 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000,
+        0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000,
+        0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000,
+        0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, 0x38540000, 0x38542000,
+        0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000,
+        0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000, 0x38560000, 0x38562000, 0x38564000, 0x38566000,
+        0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000,
+        0x3857A000, 0x3857C000, 0x3857E000, 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000,
+        0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000,
+        0x3859E000, 0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000,
+        0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000, 0x385C0000,
+        0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000,
+        0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, 0x385E0000, 0x385E2000, 0x385E4000,
+        0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000,
+        0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000,
+        0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000,
+        0x3861C000, 0x3861E000, 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000,
+        0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000,
+        0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000,
+        0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, 0x38660000, 0x38662000,
+        0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000,
+        0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000, 0x38680000, 0x38682000, 0x38684000, 0x38686000,
+        0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000,
+        0x3869A000, 0x3869C000, 0x3869E000, 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000,
+        0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000,
+        0x386BE000, 0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000,
+        0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000, 0x386E0000,
+        0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000,
+        0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, 0x38700000, 0x38702000, 0x38704000,
+        0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000,
+        0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000,
+        0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000,
+        0x3873C000, 0x3873E000, 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000,
+        0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000,
+        0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000,
+        0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, 0x38780000, 0x38782000,
+        0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000,
+        0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000, 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000,
+        0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000,
+        0x387BA000, 0x387BC000, 0x387BE000, 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000,
+        0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000,
+        0x387DE000, 0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000,
+        0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000};
+    static const uint32 exponent_table[64] = {0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000,
+        0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000,
+        0x07800000, 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000,
+        0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000, 0x80000000,
+        0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000,
+        0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, 0x88000000, 0x88800000, 0x89000000,
+        0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000,
+        0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000};
+    static const unsigned short offset_table[64] = {0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
+        1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
+        1024, 1024, 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
+        1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024};
+    uint32 bits = mantissa_table[offset_table[value >> 10] + (value & 0x3FF)] + exponent_table[value >> 10];
+    //			return *reinterpret_cast<float*>(&bits);			//violating strict aliasing!
+    float out;
+    std::memcpy(&out, &bits, sizeof(float));
+    return out;
+}
+
+/// Convert half-precision to IEEE double-precision.
+/// \param value binary representation of half-precision value
+/// \return double-precision value
+inline double half2float_impl(uint16 value, double, true_type)
+{
+    typedef bits<float>::type uint32;
+    typedef bits<double>::type uint64;
+    uint32 hi = static_cast<uint32>(value & 0x8000) << 16;
+    int abs = value & 0x7FFF;
+    if (abs)
+    {
+        hi |= 0x3F000000 << static_cast<unsigned>(abs >= 0x7C00);
+        for (; abs < 0x400; abs <<= 1, hi -= 0x100000)
+            ;
+        hi += static_cast<uint32>(abs) << 10;
+    }
+    uint64 bits = static_cast<uint64>(hi) << 32;
+    //			return *reinterpret_cast<double*>(&bits);			//violating strict aliasing!
+    double out;
+    std::memcpy(&out, &bits, sizeof(double));
+    return out;
+}
+
+/// Convert half-precision to non-IEEE floating point.
+/// \tparam T type to convert to (builtin integer type)
+/// \param value binary representation of half-precision value
+/// \return floating point value
+template <typename T>
+T half2float_impl(uint16 value, T, ...)
+{
+    T out;
+    int abs = value & 0x7FFF;
+    if (abs > 0x7C00)
+        out = std::numeric_limits<T>::has_quiet_NaN ? std::numeric_limits<T>::quiet_NaN() : T();
+    else if (abs == 0x7C00)
+        out = std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : std::numeric_limits<T>::max();
+    else if (abs > 0x3FF)
+        out = std::ldexp(static_cast<T>((abs & 0x3FF) | 0x400), (abs >> 10) - 25);
+    else
+        out = std::ldexp(static_cast<T>(abs), -24);
+    return (value & 0x8000) ? -out : out;
+}
+
+/// Convert half-precision to floating point.
+/// \tparam T type to convert to (builtin integer type)
+/// \param value binary representation of half-precision value
+/// \return floating point value
+template <typename T>
+T half2float(uint16 value)
+{
+    return half2float_impl(
+        value, T(), bool_type < std::numeric_limits<T>::is_iec559 && sizeof(typename bits<T>::type) == sizeof(T) > ());
+}
+
+/// Convert half-precision floating point to integer.
+/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
+/// \tparam E `true` for round to even, `false` for round away from zero
+/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign
+/// bits) \param value binary representation of half-precision value \return integral value
+template <std::float_round_style R, bool E, typename T>
+T half2int_impl(uint16 value)
+{
+#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
+    static_assert(std::is_integral<T>::value, "half to int conversion only supports builtin integer types");
+#endif
+    uint32_t e = value & 0x7FFF;
+    if (e >= 0x7C00)
+        return (value & 0x8000) ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
+    if (e < 0x3800)
+    {
+        if (R == std::round_toward_infinity)
+            return T(~(value >> 15) & (e != 0));
+        else if (R == std::round_toward_neg_infinity)
+            return -T(value > 0x8000);
+        return T();
+    }
+    uint32_t m = (value & 0x3FF) | 0x400;
+    e >>= 10;
+    if (e < 25)
+    {
+        if (R == std::round_to_nearest)
+            m += (1 << (24 - e)) - (~(m >> (25 - e)) & E);
+        else if (R == std::round_toward_infinity)
+            m += ((value >> 15) - 1) & ((1 << (25 - e)) - 1U);
+        else if (R == std::round_toward_neg_infinity)
+            m += -(value >> 15) & ((1 << (25 - e)) - 1U);
+        m >>= 25 - e;
+    }
+    else
+        m <<= e - 25;
+    return (value & 0x8000) ? -static_cast<T>(m) : static_cast<T>(m);
+}
+
+/// Convert half-precision floating point to integer.
+/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
+/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign
+/// bits) \param value binary representation of half-precision value \return integral value
+template <std::float_round_style R, typename T>
+T half2int(uint16 value)
+{
+    return half2int_impl<R, HALF_ROUND_TIES_TO_EVEN, T>(value);
+}
+
+/// Convert half-precision floating point to integer using round-to-nearest-away-from-zero.
+/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign
+/// bits) \param value binary representation of half-precision value \return integral value
+template <typename T>
+T half2int_up(uint16 value)
+{
+    return half2int_impl<std::round_to_nearest, 0, T>(value);
+}
+
+/// Round half-precision number to nearest integer value.
+/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
+/// \tparam E `true` for round to even, `false` for round away from zero
+/// \param value binary representation of half-precision value
+/// \return half-precision bits for nearest integral value
+template <std::float_round_style R, bool E>
+uint16 round_half_impl(uint16 value)
+{
+    uint32_t e = value & 0x7FFF;
+    uint16 result = value;
+    if (e < 0x3C00)
+    {
+        result &= 0x8000;
+        if (R == std::round_to_nearest)
+            result |= 0x3C00U & -(e >= (0x3800 + E));
+        else if (R == std::round_toward_infinity)
+            result |= 0x3C00U & -(~(value >> 15) & (e != 0));
+        else if (R == std::round_toward_neg_infinity)
+            result |= 0x3C00U & -(value > 0x8000);
+    }
+    else if (e < 0x6400)
+    {
+        e = 25 - (e >> 10);
+        uint32_t mask = (1 << e) - 1;
+        if (R == std::round_to_nearest)
+            result += (1 << (e - 1)) - (~(result >> e) & E);
+        else if (R == std::round_toward_infinity)
+            result += mask & ((value >> 15) - 1);
+        else if (R == std::round_toward_neg_infinity)
+            result += mask & -(value >> 15);
+        result &= ~mask;
+    }
+    return result;
+}
+
+/// Round half-precision number to nearest integer value.
+/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding
+/// \param value binary representation of half-precision value
+/// \return half-precision bits for nearest integral value
+template <std::float_round_style R>
+uint16 round_half(uint16 value)
+{
+    return round_half_impl<R, HALF_ROUND_TIES_TO_EVEN>(value);
+}
+
+/// Round half-precision number to nearest integer value using round-to-nearest-away-from-zero.
+/// \param value binary representation of half-precision value
+/// \return half-precision bits for nearest integral value
+inline uint16 round_half_up(uint16 value)
+{
+    return round_half_impl<std::round_to_nearest, 0>(value);
+}
+/// \}
+
+struct functions;
+template <typename>
+struct unary_specialized;
+template <typename, typename>
+struct binary_specialized;
+template <typename, typename, std::float_round_style>
+struct half_caster;
+} // namespace detail
+
+/// Half-precision floating point type.
+/// This class implements an IEEE-conformant half-precision floating point type with the usual arithmetic operators and
+/// conversions. It is implicitly convertible to single-precision floating point, which makes artihmetic expressions and
+/// functions with mixed-type operands to be of the most precise operand type. Additionally all arithmetic operations
+/// (and many mathematical functions) are carried out in single-precision internally. All conversions from single- to
+/// half-precision are done using the library's default rounding mode, but temporary results inside chained arithmetic
+/// expressions are kept in single-precision as long as possible (while of course still maintaining a strong
+/// half-precision type).
+///
+/// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and
+/// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which
+/// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the
+/// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not neccessarily have to be
+/// of exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will
+/// most probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying
+/// 16-bit IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16
+/// bits if your C++ implementation supports an unsigned integer type of exactly 16 bits width. But this should be the
+/// case on nearly any reasonable platform.
+///
+/// So if your C++ implementation is not totally exotic or imposes special alignment requirements, it is a reasonable
+/// assumption that the data of a half is just comprised of the 2 bytes of the underlying IEEE representation.
+class half
+{
+    friend struct detail::functions;
+    friend struct detail::unary_specialized<half>;
+    friend struct detail::binary_specialized<half, half>;
+    template <typename, typename, std::float_round_style>
+    friend struct detail::half_caster;
+    friend class std::numeric_limits<half>;
+#if HALF_ENABLE_CPP11_HASH
+    friend struct std::hash<half>;
+#endif
+#if HALF_ENABLE_CPP11_USER_LITERALS
+    friend half literal::operator"" _h(long double);
+#endif
+
+public:
+    /// Default constructor.
+    /// This initializes the half to 0. Although this does not match the builtin types' default-initialization semantics
+    /// and may be less efficient than no initialization, it is needed to provide proper value-initialization semantics.
+    HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {}
+
+    /// Copy constructor.
+    /// \tparam T type of concrete half expression
+    /// \param rhs half expression to copy from
+    half(detail::expr rhs)
+        : data_(detail::float2half<round_style>(static_cast<float>(rhs)))
+    {
+    }
+
+    /// Conversion constructor.
+    /// \param rhs float to convert
+    explicit half(float rhs)
+        : data_(detail::float2half<round_style>(rhs))
+    {
+    }
+
+    /// Conversion to single-precision.
+    /// \return single precision value representing expression value
+    operator float() const
+    {
+        return detail::half2float<float>(data_);
+    }
+
+    /// Assignment operator.
+    /// \tparam T type of concrete half expression
+    /// \param rhs half expression to copy from
+    /// \return reference to this half
+    half& operator=(detail::expr rhs)
+    {
+        return *this = static_cast<float>(rhs);
+    }
+
+    /// Arithmetic assignment.
+    /// \tparam T type of concrete half expression
+    /// \param rhs half expression to add
+    /// \return reference to this half
+    template <typename T>
+    typename detail::enable<half&, T>::type operator+=(T rhs)
+    {
+        return *this += static_cast<float>(rhs);
+    }
+
+    /// Arithmetic assignment.
+    /// \tparam T type of concrete half expression
+    /// \param rhs half expression to subtract
+    /// \return reference to this half
+    template <typename T>
+    typename detail::enable<half&, T>::type operator-=(T rhs)
+    {
+        return *this -= static_cast<float>(rhs);
+    }
+
+    /// Arithmetic assignment.
+    /// \tparam T type of concrete half expression
+    /// \param rhs half expression to multiply with
+    /// \return reference to this half
+    template <typename T>
+    typename detail::enable<half&, T>::type operator*=(T rhs)
+    {
+        return *this *= static_cast<float>(rhs);
+    }
+
+    /// Arithmetic assignment.
+    /// \tparam T type of concrete half expression
+    /// \param rhs half expression to divide by
+    /// \return reference to this half
+    template <typename T>
+    typename detail::enable<half&, T>::type operator/=(T rhs)
+    {
+        return *this /= static_cast<float>(rhs);
+    }
+
+    /// Assignment operator.
+    /// \param rhs single-precision value to copy from
+    /// \return reference to this half
+    half& operator=(float rhs)
+    {
+        data_ = detail::float2half<round_style>(rhs);
+        return *this;
+    }
+
+    /// Arithmetic assignment.
+    /// \param rhs single-precision value to add
+    /// \return reference to this half
+    half& operator+=(float rhs)
+    {
+        data_ = detail::float2half<round_style>(detail::half2float<float>(data_) + rhs);
+        return *this;
+    }
+
+    /// Arithmetic assignment.
+    /// \param rhs single-precision value to subtract
+    /// \return reference to this half
+    half& operator-=(float rhs)
+    {
+        data_ = detail::float2half<round_style>(detail::half2float<float>(data_) - rhs);
+        return *this;
+    }
+
+    /// Arithmetic assignment.
+    /// \param rhs single-precision value to multiply with
+    /// \return reference to this half
+    half& operator*=(float rhs)
+    {
+        data_ = detail::float2half<round_style>(detail::half2float<float>(data_) * rhs);
+        return *this;
+    }
+
+    /// Arithmetic assignment.
+    /// \param rhs single-precision value to divide by
+    /// \return reference to this half
+    half& operator/=(float rhs)
+    {
+        data_ = detail::float2half<round_style>(detail::half2float<float>(data_) / rhs);
+        return *this;
+    }
+
+    /// Prefix increment.
+    /// \return incremented half value
+    half& operator++()
+    {
+        return *this += 1.0F;
+    }
+
+    /// Prefix decrement.
+    /// \return decremented half value
+    half& operator--()
+    {
+        return *this -= 1.0F;
+    }
+
+    /// Postfix increment.
+    /// \return non-incremented half value
+    half operator++(int)
+    {
+        half out(*this);
+        ++*this;
+        return out;
+    }
+
+    /// Postfix decrement.
+    /// \return non-decremented half value
+    half operator--(int)
+    {
+        half out(*this);
+        --*this;
+        return out;
+    }
+
+private:
+    /// Rounding mode to use
+    static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE);
+
+    /// Constructor.
+    /// \param bits binary representation to set half to
+    HALF_CONSTEXPR half(detail::binary_t, detail::uint16 bits) HALF_NOEXCEPT : data_(bits) {}
+
+    /// Internal binary representation
+    detail::uint16 data_;
+};
+
+#if HALF_ENABLE_CPP11_USER_LITERALS
+namespace literal
+{
+/// Half literal.
+/// While this returns an actual half-precision value, half literals can unfortunately not be constant expressions due
+/// to rather involved conversions.
+/// \param value literal value
+/// \return half with given value (if representable)
+inline half operator"" _h(long double value)
+{
+    return half(detail::binary, detail::float2half<half::round_style>(value));
+}
+} // namespace literal
+#endif
+
+namespace detail
+{
+/// Wrapper implementing unspecialized half-precision functions.
+struct functions
+{
+    /// Addition implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \return Half-precision sum stored in single-precision
+    static expr plus(float x, float y)
+    {
+        return expr(x + y);
+    }
+
+    /// Subtraction implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \return Half-precision difference stored in single-precision
+    static expr minus(float x, float y)
+    {
+        return expr(x - y);
+    }
+
+    /// Multiplication implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \return Half-precision product stored in single-precision
+    static expr multiplies(float x, float y)
+    {
+        return expr(x * y);
+    }
+
+    /// Division implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \return Half-precision quotient stored in single-precision
+    static expr divides(float x, float y)
+    {
+        return expr(x / y);
+    }
+
+    /// Output implementation.
+    /// \param out stream to write to
+    /// \param arg value to write
+    /// \return reference to stream
+    template <typename charT, typename traits>
+    static std::basic_ostream<charT, traits>& write(std::basic_ostream<charT, traits>& out, float arg)
+    {
+        return out << arg;
+    }
+
+    /// Input implementation.
+    /// \param in stream to read from
+    /// \param arg half to read into
+    /// \return reference to stream
+    template <typename charT, typename traits>
+    static std::basic_istream<charT, traits>& read(std::basic_istream<charT, traits>& in, half& arg)
+    {
+        float f;
+        if (in >> f)
+            arg = f;
+        return in;
+    }
+
+    /// Modulo implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \return Half-precision division remainder stored in single-precision
+    static expr fmod(float x, float y)
+    {
+        return expr(std::fmod(x, y));
+    }
+
+    /// Remainder implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \return Half-precision division remainder stored in single-precision
+    static expr remainder(float x, float y)
+    {
+#if HALF_ENABLE_CPP11_CMATH
+        return expr(std::remainder(x, y));
+#else
+        if (builtin_isnan(x) || builtin_isnan(y))
+            return expr(std::numeric_limits<float>::quiet_NaN());
+        float ax = std::fabs(x), ay = std::fabs(y);
+        if (ax >= 65536.0f || ay < std::ldexp(1.0f, -24))
+            return expr(std::numeric_limits<float>::quiet_NaN());
+        if (ay >= 65536.0f)
+            return expr(x);
+        if (ax == ay)
+            return expr(builtin_signbit(x) ? -0.0f : 0.0f);
+        ax = std::fmod(ax, ay + ay);
+        float y2 = 0.5f * ay;
+        if (ax > y2)
+        {
+            ax -= ay;
+            if (ax >= y2)
+                ax -= ay;
+        }
+        return expr(builtin_signbit(x) ? -ax : ax);
+#endif
+    }
+
+    /// Remainder implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \param quo address to store quotient bits at
+    /// \return Half-precision division remainder stored in single-precision
+    static expr remquo(float x, float y, int* quo)
+    {
+#if HALF_ENABLE_CPP11_CMATH
+        return expr(std::remquo(x, y, quo));
+#else
+        if (builtin_isnan(x) || builtin_isnan(y))
+            return expr(std::numeric_limits<float>::quiet_NaN());
+        bool sign = builtin_signbit(x), qsign = static_cast<bool>(sign ^ builtin_signbit(y));
+        float ax = std::fabs(x), ay = std::fabs(y);
+        if (ax >= 65536.0f || ay < std::ldexp(1.0f, -24))
+            return expr(std::numeric_limits<float>::quiet_NaN());
+        if (ay >= 65536.0f)
+            return expr(x);
+        if (ax == ay)
+            return *quo = qsign ? -1 : 1, expr(sign ? -0.0f : 0.0f);
+        ax = std::fmod(ax, 8.0f * ay);
+        int cquo = 0;
+        if (ax >= 4.0f * ay)
+        {
+            ax -= 4.0f * ay;
+            cquo += 4;
+        }
+        if (ax >= 2.0f * ay)
+        {
+            ax -= 2.0f * ay;
+            cquo += 2;
+        }
+        float y2 = 0.5f * ay;
+        if (ax > y2)
+        {
+            ax -= ay;
+            ++cquo;
+            if (ax >= y2)
+            {
+                ax -= ay;
+                ++cquo;
+            }
+        }
+        return *quo = qsign ? -cquo : cquo, expr(sign ? -ax : ax);
+#endif
+    }
+
+    /// Positive difference implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \return Positive difference stored in single-precision
+    static expr fdim(float x, float y)
+    {
+#if HALF_ENABLE_CPP11_CMATH
+        return expr(std::fdim(x, y));
+#else
+        return expr((x <= y) ? 0.0f : (x - y));
+#endif
+    }
+
+    /// Fused multiply-add implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \param z third operand
+    /// \return \a x * \a y + \a z stored in single-precision
+    static expr fma(float x, float y, float z)
+    {
+#if HALF_ENABLE_CPP11_CMATH && defined(FP_FAST_FMAF)
+        return expr(std::fma(x, y, z));
+#else
+        return expr(x * y + z);
+#endif
+    }
+
+    /// Get NaN.
+    /// \return Half-precision quiet NaN
+    static half nanh()
+    {
+        return half(binary, 0x7FFF);
+    }
+
+    /// Exponential implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr exp(float arg)
+    {
+        return expr(std::exp(arg));
+    }
+
+    /// Exponential implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr expm1(float arg)
+    {
+#if HALF_ENABLE_CPP11_CMATH
+        return expr(std::expm1(arg));
+#else
+        return expr(static_cast<float>(std::exp(static_cast<double>(arg)) - 1.0));
+#endif
+    }
+
+    /// Binary exponential implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr exp2(float arg)
+    {
+#if HALF_ENABLE_CPP11_CMATH
+        return expr(std::exp2(arg));
+#else
+        return expr(static_cast<float>(std::exp(arg * 0.69314718055994530941723212145818)));
+#endif
+    }
+
+    /// Logarithm implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr log(float arg)
+    {
+        return expr(std::log(arg));
+    }
+
+    /// Common logarithm implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr log10(float arg)
+    {
+        return expr(std::log10(arg));
+    }
+
+    /// Logarithm implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr log1p(float arg)
+    {
+#if HALF_ENABLE_CPP11_CMATH
+        return expr(std::log1p(arg));
+#else
+        return expr(static_cast<float>(std::log(1.0 + arg)));
+#endif
+    }
+
+    /// Binary logarithm implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr log2(float arg)
+    {
+#if HALF_ENABLE_CPP11_CMATH
+        return expr(std::log2(arg));
+#else
+        return expr(static_cast<float>(std::log(static_cast<double>(arg)) * 1.4426950408889634073599246810019));
+#endif
+    }
+
+    /// Square root implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr sqrt(float arg)
+    {
+        return expr(std::sqrt(arg));
+    }
+
+    /// Cubic root implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr cbrt(float arg)
+    {
+#if HALF_ENABLE_CPP11_CMATH
+        return expr(std::cbrt(arg));
+#else
+        if (builtin_isnan(arg) || builtin_isinf(arg))
+            return expr(arg);
+        return expr(builtin_signbit(arg) ? -static_cast<float>(std::pow(-static_cast<double>(arg), 1.0 / 3.0))
+                                         : static_cast<float>(std::pow(static_cast<double>(arg), 1.0 / 3.0)));
+#endif
+    }
+
+    /// Hypotenuse implementation.
+    /// \param x first argument
+    /// \param y second argument
+    /// \return function value stored in single-preicision
+    static expr hypot(float x, float y)
+    {
+#if HALF_ENABLE_CPP11_CMATH
+        return expr(std::hypot(x, y));
+#else
+        return expr((builtin_isinf(x) || builtin_isinf(y))
+                ? std::numeric_limits<float>::infinity()
+                : static_cast<float>(std::sqrt(static_cast<double>(x) * x + static_cast<double>(y) * y)));
+#endif
+    }
+
+    /// Power implementation.
+    /// \param base value to exponentiate
+    /// \param exp power to expontiate to
+    /// \return function value stored in single-preicision
+    static expr pow(float base, float exp)
+    {
+        return expr(std::pow(base, exp));
+    }
+
+    /// Sine implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr sin(float arg)
+    {
+        return expr(std::sin(arg));
+    }
+
+    /// Cosine implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr cos(float arg)
+    {
+        return expr(std::cos(arg));
+    }
+
+    /// Tan implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr tan(float arg)
+    {
+        return expr(std::tan(arg));
+    }
+
+    /// Arc sine implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr asin(float arg)
+    {
+        return expr(std::asin(arg));
+    }
+
+    /// Arc cosine implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr acos(float arg)
+    {
+        return expr(std::acos(arg));
+    }
+
+    /// Arc tangent implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr atan(float arg)
+    {
+        return expr(std::atan(arg));
+    }
+
+    /// Arc tangent implementation.
+    /// \param x first argument
+    /// \param y second argument
+    /// \return function value stored in single-preicision
+    static expr atan2(float x, float y)
+    {
+        return expr(std::atan2(x, y));
+    }
+
+    /// Hyperbolic sine implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr sinh(float arg)
+    {
+        return expr(std::sinh(arg));
+    }
+
+    /// Hyperbolic cosine implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr cosh(float arg)
+    {
+        return expr(std::cosh(arg));
+    }
+
+    /// Hyperbolic tangent implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr tanh(float arg)
+    {
+        return expr(std::tanh(arg));
+    }
+
+    /// Hyperbolic area sine implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr asinh(float arg)
+    {
+#if HALF_ENABLE_CPP11_CMATH
+        return expr(std::asinh(arg));
+#else
+        return expr((arg == -std::numeric_limits<float>::infinity())
+                ? arg
+                : static_cast<float>(std::log(arg + std::sqrt(arg * arg + 1.0))));
+#endif
+    }
+
+    /// Hyperbolic area cosine implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr acosh(float arg)
+    {
+#if HALF_ENABLE_CPP11_CMATH
+        return expr(std::acosh(arg));
+#else
+        return expr((arg < -1.0f) ? std::numeric_limits<float>::quiet_NaN()
+                                  : static_cast<float>(std::log(arg + std::sqrt(arg * arg - 1.0))));
+#endif
+    }
+
+    /// Hyperbolic area tangent implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr atanh(float arg)
+    {
+#if HALF_ENABLE_CPP11_CMATH
+        return expr(std::atanh(arg));
+#else
+        return expr(static_cast<float>(0.5 * std::log((1.0 + arg) / (1.0 - arg))));
+#endif
+    }
+
+    /// Error function implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr erf(float arg)
+    {
+#if HALF_ENABLE_CPP11_CMATH
+        return expr(std::erf(arg));
+#else
+        return expr(static_cast<float>(erf(static_cast<double>(arg))));
+#endif
+    }
+
+    /// Complementary implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr erfc(float arg)
+    {
+#if HALF_ENABLE_CPP11_CMATH
+        return expr(std::erfc(arg));
+#else
+        return expr(static_cast<float>(1.0 - erf(static_cast<double>(arg))));
+#endif
+    }
+
+    /// Gamma logarithm implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr lgamma(float arg)
+    {
+#if HALF_ENABLE_CPP11_CMATH
+        return expr(std::lgamma(arg));
+#else
+        if (builtin_isinf(arg))
+            return expr(std::numeric_limits<float>::infinity());
+        if (arg < 0.0f)
+        {
+            float i, f = std::modf(-arg, &i);
+            if (f == 0.0f)
+                return expr(std::numeric_limits<float>::infinity());
+            return expr(static_cast<float>(1.1447298858494001741434273513531
+                - std::log(std::abs(std::sin(3.1415926535897932384626433832795 * f))) - lgamma(1.0 - arg)));
+        }
+        return expr(static_cast<float>(lgamma(static_cast<double>(arg))));
+#endif
+    }
+
+    /// Gamma implementation.
+    /// \param arg function argument
+    /// \return function value stored in single-preicision
+    static expr tgamma(float arg)
+    {
+#if HALF_ENABLE_CPP11_CMATH
+        return expr(std::tgamma(arg));
+#else
+        if (arg == 0.0f)
+            return builtin_signbit(arg) ? expr(-std::numeric_limits<float>::infinity())
+                                        : expr(std::numeric_limits<float>::infinity());
+        if (arg < 0.0f)
+        {
+            float i, f = std::modf(-arg, &i);
+            if (f == 0.0f)
+                return expr(std::numeric_limits<float>::quiet_NaN());
+            double value = 3.1415926535897932384626433832795
+                / (std::sin(3.1415926535897932384626433832795 * f) * std::exp(lgamma(1.0 - arg)));
+            return expr(static_cast<float>((std::fmod(i, 2.0f) == 0.0f) ? -value : value));
+        }
+        if (builtin_isinf(arg))
+            return expr(arg);
+        return expr(static_cast<float>(std::exp(lgamma(static_cast<double>(arg)))));
+#endif
+    }
+
+    /// Floor implementation.
+    /// \param arg value to round
+    /// \return rounded value
+    static half floor(half arg)
+    {
+        return half(binary, round_half<std::round_toward_neg_infinity>(arg.data_));
+    }
+
+    /// Ceiling implementation.
+    /// \param arg value to round
+    /// \return rounded value
+    static half ceil(half arg)
+    {
+        return half(binary, round_half<std::round_toward_infinity>(arg.data_));
+    }
+
+    /// Truncation implementation.
+    /// \param arg value to round
+    /// \return rounded value
+    static half trunc(half arg)
+    {
+        return half(binary, round_half<std::round_toward_zero>(arg.data_));
+    }
+
+    /// Nearest integer implementation.
+    /// \param arg value to round
+    /// \return rounded value
+    static half round(half arg)
+    {
+        return half(binary, round_half_up(arg.data_));
+    }
+
+    /// Nearest integer implementation.
+    /// \param arg value to round
+    /// \return rounded value
+    static long lround(half arg)
+    {
+        return detail::half2int_up<long>(arg.data_);
+    }
+
+    /// Nearest integer implementation.
+    /// \param arg value to round
+    /// \return rounded value
+    static half rint(half arg)
+    {
+        return half(binary, round_half<half::round_style>(arg.data_));
+    }
+
+    /// Nearest integer implementation.
+    /// \param arg value to round
+    /// \return rounded value
+    static long lrint(half arg)
+    {
+        return detail::half2int<half::round_style, long>(arg.data_);
+    }
+
+#if HALF_ENABLE_CPP11_LONG_LONG
+    /// Nearest integer implementation.
+    /// \param arg value to round
+    /// \return rounded value
+    static long long llround(half arg)
+    {
+        return detail::half2int_up<long long>(arg.data_);
+    }
+
+    /// Nearest integer implementation.
+    /// \param arg value to round
+    /// \return rounded value
+    static long long llrint(half arg)
+    {
+        return detail::half2int<half::round_style, long long>(arg.data_);
+    }
+#endif
+
+    /// Decompression implementation.
+    /// \param arg number to decompress
+    /// \param exp address to store exponent at
+    /// \return normalized significant
+    static half frexp(half arg, int* exp)
+    {
+        int m = arg.data_ & 0x7FFF, e = -14;
+        if (m >= 0x7C00 || !m)
+            return *exp = 0, arg;
+        for (; m < 0x400; m <<= 1, --e)
+            ;
+        return *exp = e + (m >> 10), half(binary, (arg.data_ & 0x8000) | 0x3800 | (m & 0x3FF));
+    }
+
+    /// Decompression implementation.
+    /// \param arg number to decompress
+    /// \param iptr address to store integer part at
+    /// \return fractional part
+    static half modf(half arg, half* iptr)
+    {
+        uint32_t e = arg.data_ & 0x7FFF;
+        if (e >= 0x6400)
+            return *iptr = arg, half(binary, arg.data_ & (0x8000U | -(e > 0x7C00)));
+        if (e < 0x3C00)
+            return iptr->data_ = arg.data_ & 0x8000, arg;
+        e >>= 10;
+        uint32_t mask = (1 << (25 - e)) - 1, m = arg.data_ & mask;
+        iptr->data_ = arg.data_ & ~mask;
+        if (!m)
+            return half(binary, arg.data_ & 0x8000);
+        for (; m < 0x400; m <<= 1, --e)
+            ;
+        return half(binary, static_cast<uint16>((arg.data_ & 0x8000) | (e << 10) | (m & 0x3FF)));
+    }
+
+    /// Scaling implementation.
+    /// \param arg number to scale
+    /// \param exp power of two to scale by
+    /// \return scaled number
+    static half scalbln(half arg, long exp)
+    {
+        uint32_t m = arg.data_ & 0x7FFF;
+        if (m >= 0x7C00 || !m)
+            return arg;
+        for (; m < 0x400; m <<= 1, --exp)
+            ;
+        exp += m >> 10;
+        uint16 value = arg.data_ & 0x8000;
+        if (exp > 30)
+        {
+            if (half::round_style == std::round_toward_zero)
+                value |= 0x7BFF;
+            else if (half::round_style == std::round_toward_infinity)
+                value |= 0x7C00 - (value >> 15);
+            else if (half::round_style == std::round_toward_neg_infinity)
+                value |= 0x7BFF + (value >> 15);
+            else
+                value |= 0x7C00;
+        }
+        else if (exp > 0)
+            value |= (exp << 10) | (m & 0x3FF);
+        else if (exp > -11)
+        {
+            m = (m & 0x3FF) | 0x400;
+            if (half::round_style == std::round_to_nearest)
+            {
+                m += 1 << -exp;
+#if HALF_ROUND_TIES_TO_EVEN
+                m -= (m >> (1 - exp)) & 1;
+#endif
+            }
+            else if (half::round_style == std::round_toward_infinity)
+                m += ((value >> 15) - 1) & ((1 << (1 - exp)) - 1U);
+            else if (half::round_style == std::round_toward_neg_infinity)
+                m += -(value >> 15) & ((1 << (1 - exp)) - 1U);
+            value |= m >> (1 - exp);
+        }
+        else if (half::round_style == std::round_toward_infinity)
+            value -= (value >> 15) - 1;
+        else if (half::round_style == std::round_toward_neg_infinity)
+            value += value >> 15;
+        return half(binary, value);
+    }
+
+    /// Exponent implementation.
+    /// \param arg number to query
+    /// \return floating point exponent
+    static int ilogb(half arg)
+    {
+        int abs = arg.data_ & 0x7FFF;
+        if (!abs)
+            return FP_ILOGB0;
+        if (abs < 0x7C00)
+        {
+            int exp = (abs >> 10) - 15;
+            if (abs < 0x400)
+                for (; abs < 0x200; abs <<= 1, --exp)
+                    ;
+            return exp;
+        }
+        if (abs > 0x7C00)
+            return FP_ILOGBNAN;
+        return INT_MAX;
+    }
+
+    /// Exponent implementation.
+    /// \param arg number to query
+    /// \return floating point exponent
+    static half logb(half arg)
+    {
+        int abs = arg.data_ & 0x7FFF;
+        if (!abs)
+            return half(binary, 0xFC00);
+        if (abs < 0x7C00)
+        {
+            int exp = (abs >> 10) - 15;
+            if (abs < 0x400)
+                for (; abs < 0x200; abs <<= 1, --exp)
+                    ;
+            uint16 bits = (exp < 0) << 15;
+            if (exp)
+            {
+                uint32_t m = std::abs(exp) << 6, e = 18;
+                for (; m < 0x400; m <<= 1, --e)
+                    ;
+                bits |= (e << 10) + m;
+            }
+            return half(binary, bits);
+        }
+        if (abs > 0x7C00)
+            return arg;
+        return half(binary, 0x7C00);
+    }
+
+    /// Enumeration implementation.
+    /// \param from number to increase/decrease
+    /// \param to direction to enumerate into
+    /// \return next representable number
+    static half nextafter(half from, half to)
+    {
+        uint16 fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF;
+        if (fabs > 0x7C00)
+            return from;
+        if (tabs > 0x7C00 || from.data_ == to.data_ || !(fabs | tabs))
+            return to;
+        if (!fabs)
+            return half(binary, (to.data_ & 0x8000) + 1);
+        bool lt = ((fabs == from.data_) ? static_cast<int>(fabs) : -static_cast<int>(fabs))
+            < ((tabs == to.data_) ? static_cast<int>(tabs) : -static_cast<int>(tabs));
+        return half(binary, from.data_ + (((from.data_ >> 15) ^ static_cast<unsigned>(lt)) << 1) - 1);
+    }
+
+    /// Enumeration implementation.
+    /// \param from number to increase/decrease
+    /// \param to direction to enumerate into
+    /// \return next representable number
+    static half nexttoward(half from, long double to)
+    {
+        if (isnan(from))
+            return from;
+        long double lfrom = static_cast<long double>(from);
+        if (builtin_isnan(to) || lfrom == to)
+            return half(static_cast<float>(to));
+        if (!(from.data_ & 0x7FFF))
+            return half(binary, (static_cast<detail::uint16>(builtin_signbit(to)) << 15) + 1);
+        return half(binary, from.data_ + (((from.data_ >> 15) ^ static_cast<unsigned>(lfrom < to)) << 1) - 1);
+    }
+
+    /// Sign implementation
+    /// \param x first operand
+    /// \param y second operand
+    /// \return composed value
+    static half copysign(half x, half y)
+    {
+        return half(binary, x.data_ ^ ((x.data_ ^ y.data_) & 0x8000));
+    }
+
+    /// Classification implementation.
+    /// \param arg value to classify
+    /// \retval true if infinite number
+    /// \retval false else
+    static int fpclassify(half arg)
+    {
+        uint32_t abs = arg.data_ & 0x7FFF;
+        return abs
+            ? ((abs > 0x3FF) ? ((abs >= 0x7C00) ? ((abs > 0x7C00) ? FP_NAN : FP_INFINITE) : FP_NORMAL) : FP_SUBNORMAL)
+            : FP_ZERO;
+    }
+
+    /// Classification implementation.
+    /// \param arg value to classify
+    /// \retval true if finite number
+    /// \retval false else
+    static bool isfinite(half arg)
+    {
+        return (arg.data_ & 0x7C00) != 0x7C00;
+    }
+
+    /// Classification implementation.
+    /// \param arg value to classify
+    /// \retval true if infinite number
+    /// \retval false else
+    static bool isinf(half arg)
+    {
+        return (arg.data_ & 0x7FFF) == 0x7C00;
+    }
+
+    /// Classification implementation.
+    /// \param arg value to classify
+    /// \retval true if not a number
+    /// \retval false else
+    static bool isnan(half arg)
+    {
+        return (arg.data_ & 0x7FFF) > 0x7C00;
+    }
+
+    /// Classification implementation.
+    /// \param arg value to classify
+    /// \retval true if normal number
+    /// \retval false else
+    static bool isnormal(half arg)
+    {
+        return ((arg.data_ & 0x7C00) != 0) & ((arg.data_ & 0x7C00) != 0x7C00);
+    }
+
+    /// Sign bit implementation.
+    /// \param arg value to check
+    /// \retval true if signed
+    /// \retval false if unsigned
+    static bool signbit(half arg)
+    {
+        return (arg.data_ & 0x8000) != 0;
+    }
+
+    /// Comparison implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \retval true if operands equal
+    /// \retval false else
+    static bool isequal(half x, half y)
+    {
+        return (x.data_ == y.data_ || !((x.data_ | y.data_) & 0x7FFF)) && !isnan(x);
+    }
+
+    /// Comparison implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \retval true if operands not equal
+    /// \retval false else
+    static bool isnotequal(half x, half y)
+    {
+        return (x.data_ != y.data_ && ((x.data_ | y.data_) & 0x7FFF)) || isnan(x);
+    }
+
+    /// Comparison implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \retval true if \a x > \a y
+    /// \retval false else
+    static bool isgreater(half x, half y)
+    {
+        int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
+        return xabs <= 0x7C00 && yabs <= 0x7C00
+            && (((xabs == x.data_) ? xabs : -xabs) > ((yabs == y.data_) ? yabs : -yabs));
+    }
+
+    /// Comparison implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \retval true if \a x >= \a y
+    /// \retval false else
+    static bool isgreaterequal(half x, half y)
+    {
+        int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
+        return xabs <= 0x7C00 && yabs <= 0x7C00
+            && (((xabs == x.data_) ? xabs : -xabs) >= ((yabs == y.data_) ? yabs : -yabs));
+    }
+
+    /// Comparison implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \retval true if \a x < \a y
+    /// \retval false else
+    static bool isless(half x, half y)
+    {
+        int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
+        return xabs <= 0x7C00 && yabs <= 0x7C00
+            && (((xabs == x.data_) ? xabs : -xabs) < ((yabs == y.data_) ? yabs : -yabs));
+    }
+
+    /// Comparison implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \retval true if \a x <= \a y
+    /// \retval false else
+    static bool islessequal(half x, half y)
+    {
+        int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
+        return xabs <= 0x7C00 && yabs <= 0x7C00
+            && (((xabs == x.data_) ? xabs : -xabs) <= ((yabs == y.data_) ? yabs : -yabs));
+    }
+
+    /// Comparison implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \retval true if either \a x > \a y nor \a x < \a y
+    /// \retval false else
+    static bool islessgreater(half x, half y)
+    {
+        int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
+        if (xabs > 0x7C00 || yabs > 0x7C00)
+            return false;
+        int a = (xabs == x.data_) ? xabs : -xabs, b = (yabs == y.data_) ? yabs : -yabs;
+        return a < b || a > b;
+    }
+
+    /// Comparison implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \retval true if operand unordered
+    /// \retval false else
+    static bool isunordered(half x, half y)
+    {
+        return isnan(x) || isnan(y);
+    }
+
+private:
+    static double erf(double arg)
+    {
+        if (builtin_isinf(arg))
+            return (arg < 0.0) ? -1.0 : 1.0;
+        double x2 = arg * arg, ax2 = 0.147 * x2,
+               value = std::sqrt(1.0 - std::exp(-x2 * (1.2732395447351626861510701069801 + ax2) / (1.0 + ax2)));
+        return builtin_signbit(arg) ? -value : value;
+    }
+
+    static double lgamma(double arg)
+    {
+        double v = 1.0;
+        for (; arg < 8.0; ++arg)
+            v *= arg;
+        double w = 1.0 / (arg * arg);
+        return (((((((-0.02955065359477124183006535947712 * w + 0.00641025641025641025641025641026) * w
+                        + -0.00191752691752691752691752691753)
+                           * w
+                       + 8.4175084175084175084175084175084e-4)
+                          * w
+                      + -5.952380952380952380952380952381e-4)
+                         * w
+                     + 7.9365079365079365079365079365079e-4)
+                        * w
+                    + -0.00277777777777777777777777777778)
+                       * w
+                   + 0.08333333333333333333333333333333)
+            / arg
+            + 0.91893853320467274178032973640562 - std::log(v) - arg + (arg - 0.5) * std::log(arg);
+    }
+};
+
+/// Wrapper for unary half-precision functions needing specialization for individual argument types.
+/// \tparam T argument type
+template <typename T>
+struct unary_specialized
+{
+    /// Negation implementation.
+    /// \param arg value to negate
+    /// \return negated value
+    static HALF_CONSTEXPR half negate(half arg)
+    {
+        return half(binary, arg.data_ ^ 0x8000);
+    }
+
+    /// Absolute value implementation.
+    /// \param arg function argument
+    /// \return absolute value
+    static half fabs(half arg)
+    {
+        return half(binary, arg.data_ & 0x7FFF);
+    }
+};
+template <>
+struct unary_specialized<expr>
+{
+    static HALF_CONSTEXPR expr negate(float arg)
+    {
+        return expr(-arg);
+    }
+    static expr fabs(float arg)
+    {
+        return expr(std::fabs(arg));
+    }
+};
+
+/// Wrapper for binary half-precision functions needing specialization for individual argument types.
+/// \tparam T first argument type
+/// \tparam U first argument type
+template <typename T, typename U>
+struct binary_specialized
+{
+    /// Minimum implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \return minimum value
+    static expr fmin(float x, float y)
+    {
+#if HALF_ENABLE_CPP11_CMATH
+        return expr(std::fmin(x, y));
+#else
+        if (builtin_isnan(x))
+            return expr(y);
+        if (builtin_isnan(y))
+            return expr(x);
+        return expr(std::min(x, y));
+#endif
+    }
+
+    /// Maximum implementation.
+    /// \param x first operand
+    /// \param y second operand
+    /// \return maximum value
+    static expr fmax(float x, float y)
+    {
+#if HALF_ENABLE_CPP11_CMATH
+        return expr(std::fmax(x, y));
+#else
+        if (builtin_isnan(x))
+            return expr(y);
+        if (builtin_isnan(y))
+            return expr(x);
+        return expr(std::max(x, y));
+#endif
+    }
+};
+template <>
+struct binary_specialized<half, half>
+{
+    static half fmin(half x, half y)
+    {
+        int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
+        if (xabs > 0x7C00)
+            return y;
+        if (yabs > 0x7C00)
+            return x;
+        return (((xabs == x.data_) ? xabs : -xabs) > ((yabs == y.data_) ? yabs : -yabs)) ? y : x;
+    }
+    static half fmax(half x, half y)
+    {
+        int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF;
+        if (xabs > 0x7C00)
+            return y;
+        if (yabs > 0x7C00)
+            return x;
+        return (((xabs == x.data_) ? xabs : -xabs) < ((yabs == y.data_) ? yabs : -yabs)) ? y : x;
+    }
+};
+
+/// Helper class for half casts.
+/// This class template has to be specialized for all valid cast argument to define an appropriate static `cast` member
+/// function and a corresponding `type` member denoting its return type.
+/// \tparam T destination type
+/// \tparam U source type
+/// \tparam R rounding mode to use
+template <typename T, typename U, std::float_round_style R = (std::float_round_style)(HALF_ROUND_STYLE)>
+struct half_caster
+{
+};
+template <typename U, std::float_round_style R>
+struct half_caster<half, U, R>
+{
+#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
+    static_assert(std::is_arithmetic<U>::value, "half_cast from non-arithmetic type unsupported");
+#endif
+
+    static half cast(U arg)
+    {
+        return cast_impl(arg, is_float<U>());
+    };
+
+private:
+    static half cast_impl(U arg, true_type)
+    {
+        return half(binary, float2half<R>(arg));
+    }
+    static half cast_impl(U arg, false_type)
+    {
+        return half(binary, int2half<R>(arg));
+    }
+};
+template <typename T, std::float_round_style R>
+struct half_caster<T, half, R>
+{
+#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
+    static_assert(std::is_arithmetic<T>::value, "half_cast to non-arithmetic type unsupported");
+#endif
+
+    static T cast(half arg)
+    {
+        return cast_impl(arg, is_float<T>());
+    }
+
+private:
+    static T cast_impl(half arg, true_type)
+    {
+        return half2float<T>(arg.data_);
+    }
+    static T cast_impl(half arg, false_type)
+    {
+        return half2int<R, T>(arg.data_);
+    }
+};
+template <typename T, std::float_round_style R>
+struct half_caster<T, expr, R>
+{
+#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS
+    static_assert(std::is_arithmetic<T>::value, "half_cast to non-arithmetic type unsupported");
+#endif
+
+    static T cast(expr arg)
+    {
+        return cast_impl(arg, is_float<T>());
+    }
+
+private:
+    static T cast_impl(float arg, true_type)
+    {
+        return static_cast<T>(arg);
+    }
+    static T cast_impl(half arg, false_type)
+    {
+        return half2int<R, T>(arg.data_);
+    }
+};
+template <std::float_round_style R>
+struct half_caster<half, half, R>
+{
+    static half cast(half arg)
+    {
+        return arg;
+    }
+};
+template <std::float_round_style R>
+struct half_caster<half, expr, R> : half_caster<half, half, R>
+{
+};
+
+/// \name Comparison operators
+/// \{
+
+/// Comparison for equality.
+/// \param x first operand
+/// \param y second operand
+/// \retval true if operands equal
+/// \retval false else
+template <typename T, typename U>
+typename enable<bool, T, U>::type operator==(T x, U y)
+{
+    return functions::isequal(x, y);
+}
+
+/// Comparison for inequality.
+/// \param x first operand
+/// \param y second operand
+/// \retval true if operands not equal
+/// \retval false else
+template <typename T, typename U>
+typename enable<bool, T, U>::type operator!=(T x, U y)
+{
+    return functions::isnotequal(x, y);
+}
+
+/// Comparison for less than.
+/// \param x first operand
+/// \param y second operand
+/// \retval true if \a x less than \a y
+/// \retval false else
+template <typename T, typename U>
+typename enable<bool, T, U>::type operator<(T x, U y)
+{
+    return functions::isless(x, y);
+}
+
+/// Comparison for greater than.
+/// \param x first operand
+/// \param y second operand
+/// \retval true if \a x greater than \a y
+/// \retval false else
+template <typename T, typename U>
+typename enable<bool, T, U>::type operator>(T x, U y)
+{
+    return functions::isgreater(x, y);
+}
+
+/// Comparison for less equal.
+/// \param x first operand
+/// \param y second operand
+/// \retval true if \a x less equal \a y
+/// \retval false else
+template <typename T, typename U>
+typename enable<bool, T, U>::type operator<=(T x, U y)
+{
+    return functions::islessequal(x, y);
+}
+
+/// Comparison for greater equal.
+/// \param x first operand
+/// \param y second operand
+/// \retval true if \a x greater equal \a y
+/// \retval false else
+template <typename T, typename U>
+typename enable<bool, T, U>::type operator>=(T x, U y)
+{
+    return functions::isgreaterequal(x, y);
+}
+
+/// \}
+/// \name Arithmetic operators
+/// \{
+
+/// Add halfs.
+/// \param x left operand
+/// \param y right operand
+/// \return sum of half expressions
+template <typename T, typename U>
+typename enable<expr, T, U>::type operator+(T x, U y)
+{
+    return functions::plus(x, y);
+}
+
+/// Subtract halfs.
+/// \param x left operand
+/// \param y right operand
+/// \return difference of half expressions
+template <typename T, typename U>
+typename enable<expr, T, U>::type operator-(T x, U y)
+{
+    return functions::minus(x, y);
+}
+
+/// Multiply halfs.
+/// \param x left operand
+/// \param y right operand
+/// \return product of half expressions
+template <typename T, typename U>
+typename enable<expr, T, U>::type operator*(T x, U y)
+{
+    return functions::multiplies(x, y);
+}
+
+/// Divide halfs.
+/// \param x left operand
+/// \param y right operand
+/// \return quotient of half expressions
+template <typename T, typename U>
+typename enable<expr, T, U>::type operator/(T x, U y)
+{
+    return functions::divides(x, y);
+}
+
+/// Identity.
+/// \param arg operand
+/// \return uncahnged operand
+template <typename T>
+HALF_CONSTEXPR typename enable<T, T>::type operator+(T arg)
+{
+    return arg;
+}
+
+/// Negation.
+/// \param arg operand
+/// \return negated operand
+template <typename T>
+HALF_CONSTEXPR typename enable<T, T>::type operator-(T arg)
+{
+    return unary_specialized<T>::negate(arg);
+}
+
+/// \}
+/// \name Input and output
+/// \{
+
+/// Output operator.
+/// \param out output stream to write into
+/// \param arg half expression to write
+/// \return reference to output stream
+template <typename T, typename charT, typename traits>
+typename enable<std::basic_ostream<charT, traits>&, T>::type operator<<(std::basic_ostream<charT, traits>& out, T arg)
+{
+    return functions::write(out, arg);
+}
+
+/// Input operator.
+/// \param in input stream to read from
+/// \param arg half to read into
+/// \return reference to input stream
+template <typename charT, typename traits>
+std::basic_istream<charT, traits>& operator>>(std::basic_istream<charT, traits>& in, half& arg)
+{
+    return functions::read(in, arg);
+}
+
+/// \}
+/// \name Basic mathematical operations
+/// \{
+
+/// Absolute value.
+/// \param arg operand
+/// \return absolute value of \a arg
+//		template<typename T> typename enable<T,T>::type abs(T arg) { return unary_specialized<T>::fabs(arg); }
+inline half abs(half arg)
+{
+    return unary_specialized<half>::fabs(arg);
+}
+inline expr abs(expr arg)
+{
+    return unary_specialized<expr>::fabs(arg);
+}
+
+/// Absolute value.
+/// \param arg operand
+/// \return absolute value of \a arg
+//		template<typename T> typename enable<T,T>::type fabs(T arg) { return unary_specialized<T>::fabs(arg); }
+inline half fabs(half arg)
+{
+    return unary_specialized<half>::fabs(arg);
+}
+inline expr fabs(expr arg)
+{
+    return unary_specialized<expr>::fabs(arg);
+}
+
+/// Remainder of division.
+/// \param x first operand
+/// \param y second operand
+/// \return remainder of floating point division.
+//		template<typename T,typename U> typename enable<expr,T,U>::type fmod(T x, U y) { return functions::fmod(x, y); }
+inline expr fmod(half x, half y)
+{
+    return functions::fmod(x, y);
+}
+inline expr fmod(half x, expr y)
+{
+    return functions::fmod(x, y);
+}
+inline expr fmod(expr x, half y)
+{
+    return functions::fmod(x, y);
+}
+inline expr fmod(expr x, expr y)
+{
+    return functions::fmod(x, y);
+}
+
+/// Remainder of division.
+/// \param x first operand
+/// \param y second operand
+/// \return remainder of floating point division.
+//		template<typename T,typename U> typename enable<expr,T,U>::type remainder(T x, U y) { return
+// functions::remainder(x, y); }
+inline expr remainder(half x, half y)
+{
+    return functions::remainder(x, y);
+}
+inline expr remainder(half x, expr y)
+{
+    return functions::remainder(x, y);
+}
+inline expr remainder(expr x, half y)
+{
+    return functions::remainder(x, y);
+}
+inline expr remainder(expr x, expr y)
+{
+    return functions::remainder(x, y);
+}
+
+/// Remainder of division.
+/// \param x first operand
+/// \param y second operand
+/// \param quo address to store some bits of quotient at
+/// \return remainder of floating point division.
+//		template<typename T,typename U> typename enable<expr,T,U>::type remquo(T x, U y, int *quo) { return
+// functions::remquo(x, y, quo); }
+inline expr remquo(half x, half y, int* quo)
+{
+    return functions::remquo(x, y, quo);
+}
+inline expr remquo(half x, expr y, int* quo)
+{
+    return functions::remquo(x, y, quo);
+}
+inline expr remquo(expr x, half y, int* quo)
+{
+    return functions::remquo(x, y, quo);
+}
+inline expr remquo(expr x, expr y, int* quo)
+{
+    return functions::remquo(x, y, quo);
+}
+
+/// Fused multiply add.
+/// \param x first operand
+/// \param y second operand
+/// \param z third operand
+/// \return ( \a x * \a y ) + \a z rounded as one operation.
+//		template<typename T,typename U,typename V> typename enable<expr,T,U,V>::type fma(T x, U y, V z) { return
+// functions::fma(x, y, z); }
+inline expr fma(half x, half y, half z)
+{
+    return functions::fma(x, y, z);
+}
+inline expr fma(half x, half y, expr z)
+{
+    return functions::fma(x, y, z);
+}
+inline expr fma(half x, expr y, half z)
+{
+    return functions::fma(x, y, z);
+}
+inline expr fma(half x, expr y, expr z)
+{
+    return functions::fma(x, y, z);
+}
+inline expr fma(expr x, half y, half z)
+{
+    return functions::fma(x, y, z);
+}
+inline expr fma(expr x, half y, expr z)
+{
+    return functions::fma(x, y, z);
+}
+inline expr fma(expr x, expr y, half z)
+{
+    return functions::fma(x, y, z);
+}
+inline expr fma(expr x, expr y, expr z)
+{
+    return functions::fma(x, y, z);
+}
+
+/// Maximum of half expressions.
+/// \param x first operand
+/// \param y second operand
+/// \return maximum of operands
+//		template<typename T,typename U> typename result<T,U>::type fmax(T x, U y) { return
+// binary_specialized<T,U>::fmax(x, y); }
+inline half fmax(half x, half y)
+{
+    return binary_specialized<half, half>::fmax(x, y);
+}
+inline expr fmax(half x, expr y)
+{
+    return binary_specialized<half, expr>::fmax(x, y);
+}
+inline expr fmax(expr x, half y)
+{
+    return binary_specialized<expr, half>::fmax(x, y);
+}
+inline expr fmax(expr x, expr y)
+{
+    return binary_specialized<expr, expr>::fmax(x, y);
+}
+
+/// Minimum of half expressions.
+/// \param x first operand
+/// \param y second operand
+/// \return minimum of operands
+//		template<typename T,typename U> typename result<T,U>::type fmin(T x, U y) { return
+// binary_specialized<T,U>::fmin(x, y); }
+inline half fmin(half x, half y)
+{
+    return binary_specialized<half, half>::fmin(x, y);
+}
+inline expr fmin(half x, expr y)
+{
+    return binary_specialized<half, expr>::fmin(x, y);
+}
+inline expr fmin(expr x, half y)
+{
+    return binary_specialized<expr, half>::fmin(x, y);
+}
+inline expr fmin(expr x, expr y)
+{
+    return binary_specialized<expr, expr>::fmin(x, y);
+}
+
+/// Positive difference.
+/// \param x first operand
+/// \param y second operand
+/// \return \a x - \a y or 0 if difference negative
+//		template<typename T,typename U> typename enable<expr,T,U>::type fdim(T x, U y) { return functions::fdim(x, y); }
+inline expr fdim(half x, half y)
+{
+    return functions::fdim(x, y);
+}
+inline expr fdim(half x, expr y)
+{
+    return functions::fdim(x, y);
+}
+inline expr fdim(expr x, half y)
+{
+    return functions::fdim(x, y);
+}
+inline expr fdim(expr x, expr y)
+{
+    return functions::fdim(x, y);
+}
+
+/// Get NaN value.
+/// \return quiet NaN
+inline half nanh(const char*)
+{
+    return functions::nanh();
+}
+
+/// \}
+/// \name Exponential functions
+/// \{
+
+/// Exponential function.
+/// \param arg function argument
+/// \return e raised to \a arg
+//		template<typename T> typename enable<expr,T>::type exp(T arg) { return functions::exp(arg); }
+inline expr exp(half arg)
+{
+    return functions::exp(arg);
+}
+inline expr exp(expr arg)
+{
+    return functions::exp(arg);
+}
+
+/// Exponential minus one.
+/// \param arg function argument
+/// \return e raised to \a arg subtracted by 1
+//		template<typename T> typename enable<expr,T>::type expm1(T arg) { return functions::expm1(arg); }
+inline expr expm1(half arg)
+{
+    return functions::expm1(arg);
+}
+inline expr expm1(expr arg)
+{
+    return functions::expm1(arg);
+}
+
+/// Binary exponential.
+/// \param arg function argument
+/// \return 2 raised to \a arg
+//		template<typename T> typename enable<expr,T>::type exp2(T arg) { return functions::exp2(arg); }
+inline expr exp2(half arg)
+{
+    return functions::exp2(arg);
+}
+inline expr exp2(expr arg)
+{
+    return functions::exp2(arg);
+}
+
+/// Natural logorithm.
+/// \param arg function argument
+/// \return logarithm of \a arg to base e
+//		template<typename T> typename enable<expr,T>::type log(T arg) { return functions::log(arg); }
+inline expr log(half arg)
+{
+    return functions::log(arg);
+}
+inline expr log(expr arg)
+{
+    return functions::log(arg);
+}
+
+/// Common logorithm.
+/// \param arg function argument
+/// \return logarithm of \a arg to base 10
+//		template<typename T> typename enable<expr,T>::type log10(T arg) { return functions::log10(arg); }
+inline expr log10(half arg)
+{
+    return functions::log10(arg);
+}
+inline expr log10(expr arg)
+{
+    return functions::log10(arg);
+}
+
+/// Natural logorithm.
+/// \param arg function argument
+/// \return logarithm of \a arg plus 1 to base e
+//		template<typename T> typename enable<expr,T>::type log1p(T arg) { return functions::log1p(arg); }
+inline expr log1p(half arg)
+{
+    return functions::log1p(arg);
+}
+inline expr log1p(expr arg)
+{
+    return functions::log1p(arg);
+}
+
+/// Binary logorithm.
+/// \param arg function argument
+/// \return logarithm of \a arg to base 2
+//		template<typename T> typename enable<expr,T>::type log2(T arg) { return functions::log2(arg); }
+inline expr log2(half arg)
+{
+    return functions::log2(arg);
+}
+inline expr log2(expr arg)
+{
+    return functions::log2(arg);
+}
+
+/// \}
+/// \name Power functions
+/// \{
+
+/// Square root.
+/// \param arg function argument
+/// \return square root of \a arg
+//		template<typename T> typename enable<expr,T>::type sqrt(T arg) { return functions::sqrt(arg); }
+inline expr sqrt(half arg)
+{
+    return functions::sqrt(arg);
+}
+inline expr sqrt(expr arg)
+{
+    return functions::sqrt(arg);
+}
+
+/// Cubic root.
+/// \param arg function argument
+/// \return cubic root of \a arg
+//		template<typename T> typename enable<expr,T>::type cbrt(T arg) { return functions::cbrt(arg); }
+inline expr cbrt(half arg)
+{
+    return functions::cbrt(arg);
+}
+inline expr cbrt(expr arg)
+{
+    return functions::cbrt(arg);
+}
+
+/// Hypotenuse function.
+/// \param x first argument
+/// \param y second argument
+/// \return square root of sum of squares without internal over- or underflows
+//		template<typename T,typename U> typename enable<expr,T,U>::type hypot(T x, U y) { return functions::hypot(x, y);
+//}
+inline expr hypot(half x, half y)
+{
+    return functions::hypot(x, y);
+}
+inline expr hypot(half x, expr y)
+{
+    return functions::hypot(x, y);
+}
+inline expr hypot(expr x, half y)
+{
+    return functions::hypot(x, y);
+}
+inline expr hypot(expr x, expr y)
+{
+    return functions::hypot(x, y);
+}
+
+/// Power function.
+/// \param base first argument
+/// \param exp second argument
+/// \return \a base raised to \a exp
+//		template<typename T,typename U> typename enable<expr,T,U>::type pow(T base, U exp) { return functions::pow(base,
+// exp); }
+inline expr pow(half base, half exp)
+{
+    return functions::pow(base, exp);
+}
+inline expr pow(half base, expr exp)
+{
+    return functions::pow(base, exp);
+}
+inline expr pow(expr base, half exp)
+{
+    return functions::pow(base, exp);
+}
+inline expr pow(expr base, expr exp)
+{
+    return functions::pow(base, exp);
+}
+
+/// \}
+/// \name Trigonometric functions
+/// \{
+
+/// Sine function.
+/// \param arg function argument
+/// \return sine value of \a arg
+//		template<typename T> typename enable<expr,T>::type sin(T arg) { return functions::sin(arg); }
+inline expr sin(half arg)
+{
+    return functions::sin(arg);
+}
+inline expr sin(expr arg)
+{
+    return functions::sin(arg);
+}
+
+/// Cosine function.
+/// \param arg function argument
+/// \return cosine value of \a arg
+//		template<typename T> typename enable<expr,T>::type cos(T arg) { return functions::cos(arg); }
+inline expr cos(half arg)
+{
+    return functions::cos(arg);
+}
+inline expr cos(expr arg)
+{
+    return functions::cos(arg);
+}
+
+/// Tangent function.
+/// \param arg function argument
+/// \return tangent value of \a arg
+//		template<typename T> typename enable<expr,T>::type tan(T arg) { return functions::tan(arg); }
+inline expr tan(half arg)
+{
+    return functions::tan(arg);
+}
+inline expr tan(expr arg)
+{
+    return functions::tan(arg);
+}
+
+/// Arc sine.
+/// \param arg function argument
+/// \return arc sine value of \a arg
+//		template<typename T> typename enable<expr,T>::type asin(T arg) { return functions::asin(arg); }
+inline expr asin(half arg)
+{
+    return functions::asin(arg);
+}
+inline expr asin(expr arg)
+{
+    return functions::asin(arg);
+}
+
+/// Arc cosine function.
+/// \param arg function argument
+/// \return arc cosine value of \a arg
+//		template<typename T> typename enable<expr,T>::type acos(T arg) { return functions::acos(arg); }
+inline expr acos(half arg)
+{
+    return functions::acos(arg);
+}
+inline expr acos(expr arg)
+{
+    return functions::acos(arg);
+}
+
+/// Arc tangent function.
+/// \param arg function argument
+/// \return arc tangent value of \a arg
+//		template<typename T> typename enable<expr,T>::type atan(T arg) { return functions::atan(arg); }
+inline expr atan(half arg)
+{
+    return functions::atan(arg);
+}
+inline expr atan(expr arg)
+{
+    return functions::atan(arg);
+}
+
+/// Arc tangent function.
+/// \param x first argument
+/// \param y second argument
+/// \return arc tangent value
+//		template<typename T,typename U> typename enable<expr,T,U>::type atan2(T x, U y) { return functions::atan2(x, y);
+//}
+inline expr atan2(half x, half y)
+{
+    return functions::atan2(x, y);
+}
+inline expr atan2(half x, expr y)
+{
+    return functions::atan2(x, y);
+}
+inline expr atan2(expr x, half y)
+{
+    return functions::atan2(x, y);
+}
+inline expr atan2(expr x, expr y)
+{
+    return functions::atan2(x, y);
+}
+
+/// \}
+/// \name Hyperbolic functions
+/// \{
+
+/// Hyperbolic sine.
+/// \param arg function argument
+/// \return hyperbolic sine value of \a arg
+//		template<typename T> typename enable<expr,T>::type sinh(T arg) { return functions::sinh(arg); }
+inline expr sinh(half arg)
+{
+    return functions::sinh(arg);
+}
+inline expr sinh(expr arg)
+{
+    return functions::sinh(arg);
+}
+
+/// Hyperbolic cosine.
+/// \param arg function argument
+/// \return hyperbolic cosine value of \a arg
+//		template<typename T> typename enable<expr,T>::type cosh(T arg) { return functions::cosh(arg); }
+inline expr cosh(half arg)
+{
+    return functions::cosh(arg);
+}
+inline expr cosh(expr arg)
+{
+    return functions::cosh(arg);
+}
+
+/// Hyperbolic tangent.
+/// \param arg function argument
+/// \return hyperbolic tangent value of \a arg
+//		template<typename T> typename enable<expr,T>::type tanh(T arg) { return functions::tanh(arg); }
+inline expr tanh(half arg)
+{
+    return functions::tanh(arg);
+}
+inline expr tanh(expr arg)
+{
+    return functions::tanh(arg);
+}
+
+/// Hyperbolic area sine.
+/// \param arg function argument
+/// \return area sine value of \a arg
+//		template<typename T> typename enable<expr,T>::type asinh(T arg) { return functions::asinh(arg); }
+inline expr asinh(half arg)
+{
+    return functions::asinh(arg);
+}
+inline expr asinh(expr arg)
+{
+    return functions::asinh(arg);
+}
+
+/// Hyperbolic area cosine.
+/// \param arg function argument
+/// \return area cosine value of \a arg
+//		template<typename T> typename enable<expr,T>::type acosh(T arg) { return functions::acosh(arg); }
+inline expr acosh(half arg)
+{
+    return functions::acosh(arg);
+}
+inline expr acosh(expr arg)
+{
+    return functions::acosh(arg);
+}
+
+/// Hyperbolic area tangent.
+/// \param arg function argument
+/// \return area tangent value of \a arg
+//		template<typename T> typename enable<expr,T>::type atanh(T arg) { return functions::atanh(arg); }
+inline expr atanh(half arg)
+{
+    return functions::atanh(arg);
+}
+inline expr atanh(expr arg)
+{
+    return functions::atanh(arg);
+}
+
+/// \}
+/// \name Error and gamma functions
+/// \{
+
+/// Error function.
+/// \param arg function argument
+/// \return error function value of \a arg
+//		template<typename T> typename enable<expr,T>::type erf(T arg) { return functions::erf(arg); }
+inline expr erf(half arg)
+{
+    return functions::erf(arg);
+}
+inline expr erf(expr arg)
+{
+    return functions::erf(arg);
+}
+
+/// Complementary error function.
+/// \param arg function argument
+/// \return 1 minus error function value of \a arg
+//		template<typename T> typename enable<expr,T>::type erfc(T arg) { return functions::erfc(arg); }
+inline expr erfc(half arg)
+{
+    return functions::erfc(arg);
+}
+inline expr erfc(expr arg)
+{
+    return functions::erfc(arg);
+}
+
+/// Natural logarithm of gamma function.
+/// \param arg function argument
+/// \return natural logarith of gamma function for \a arg
+//		template<typename T> typename enable<expr,T>::type lgamma(T arg) { return functions::lgamma(arg); }
+inline expr lgamma(half arg)
+{
+    return functions::lgamma(arg);
+}
+inline expr lgamma(expr arg)
+{
+    return functions::lgamma(arg);
+}
+
+/// Gamma function.
+/// \param arg function argument
+/// \return gamma function value of \a arg
+//		template<typename T> typename enable<expr,T>::type tgamma(T arg) { return functions::tgamma(arg); }
+inline expr tgamma(half arg)
+{
+    return functions::tgamma(arg);
+}
+inline expr tgamma(expr arg)
+{
+    return functions::tgamma(arg);
+}
+
+/// \}
+/// \name Rounding
+/// \{
+
+/// Nearest integer not less than half value.
+/// \param arg half to round
+/// \return nearest integer not less than \a arg
+//		template<typename T> typename enable<half,T>::type ceil(T arg) { return functions::ceil(arg); }
+inline half ceil(half arg)
+{
+    return functions::ceil(arg);
+}
+inline half ceil(expr arg)
+{
+    return functions::ceil(arg);
+}
+
+/// Nearest integer not greater than half value.
+/// \param arg half to round
+/// \return nearest integer not greater than \a arg
+//		template<typename T> typename enable<half,T>::type floor(T arg) { return functions::floor(arg); }
+inline half floor(half arg)
+{
+    return functions::floor(arg);
+}
+inline half floor(expr arg)
+{
+    return functions::floor(arg);
+}
+
+/// Nearest integer not greater in magnitude than half value.
+/// \param arg half to round
+/// \return nearest integer not greater in magnitude than \a arg
+//		template<typename T> typename enable<half,T>::type trunc(T arg) { return functions::trunc(arg); }
+inline half trunc(half arg)
+{
+    return functions::trunc(arg);
+}
+inline half trunc(expr arg)
+{
+    return functions::trunc(arg);
+}
+
+/// Nearest integer.
+/// \param arg half to round
+/// \return nearest integer, rounded away from zero in half-way cases
+//		template<typename T> typename enable<half,T>::type round(T arg) { return functions::round(arg); }
+inline half round(half arg)
+{
+    return functions::round(arg);
+}
+inline half round(expr arg)
+{
+    return functions::round(arg);
+}
+
+/// Nearest integer.
+/// \param arg half to round
+/// \return nearest integer, rounded away from zero in half-way cases
+//		template<typename T> typename enable<long,T>::type lround(T arg) { return functions::lround(arg); }
+inline long lround(half arg)
+{
+    return functions::lround(arg);
+}
+inline long lround(expr arg)
+{
+    return functions::lround(arg);
+}
+
+/// Nearest integer using half's internal rounding mode.
+/// \param arg half expression to round
+/// \return nearest integer using default rounding mode
+//		template<typename T> typename enable<half,T>::type nearbyint(T arg) { return functions::nearbyint(arg); }
+inline half nearbyint(half arg)
+{
+    return functions::rint(arg);
+}
+inline half nearbyint(expr arg)
+{
+    return functions::rint(arg);
+}
+
+/// Nearest integer using half's internal rounding mode.
+/// \param arg half expression to round
+/// \return nearest integer using default rounding mode
+//		template<typename T> typename enable<half,T>::type rint(T arg) { return functions::rint(arg); }
+inline half rint(half arg)
+{
+    return functions::rint(arg);
+}
+inline half rint(expr arg)
+{
+    return functions::rint(arg);
+}
+
+/// Nearest integer using half's internal rounding mode.
+/// \param arg half expression to round
+/// \return nearest integer using default rounding mode
+//		template<typename T> typename enable<long,T>::type lrint(T arg) { return functions::lrint(arg); }
+inline long lrint(half arg)
+{
+    return functions::lrint(arg);
+}
+inline long lrint(expr arg)
+{
+    return functions::lrint(arg);
+}
+#if HALF_ENABLE_CPP11_LONG_LONG
+/// Nearest integer.
+/// \param arg half to round
+/// \return nearest integer, rounded away from zero in half-way cases
+//		template<typename T> typename enable<long long,T>::type llround(T arg) { return functions::llround(arg); }
+inline long long llround(half arg)
+{
+    return functions::llround(arg);
+}
+inline long long llround(expr arg)
+{
+    return functions::llround(arg);
+}
+
+/// Nearest integer using half's internal rounding mode.
+/// \param arg half expression to round
+/// \return nearest integer using default rounding mode
+//		template<typename T> typename enable<long long,T>::type llrint(T arg) { return functions::llrint(arg); }
+inline long long llrint(half arg)
+{
+    return functions::llrint(arg);
+}
+inline long long llrint(expr arg)
+{
+    return functions::llrint(arg);
+}
+#endif
+
+/// \}
+/// \name Floating point manipulation
+/// \{
+
+/// Decompress floating point number.
+/// \param arg number to decompress
+/// \param exp address to store exponent at
+/// \return significant in range [0.5, 1)
+//		template<typename T> typename enable<half,T>::type frexp(T arg, int *exp) { return functions::frexp(arg, exp); }
+inline half frexp(half arg, int* exp)
+{
+    return functions::frexp(arg, exp);
+}
+inline half frexp(expr arg, int* exp)
+{
+    return functions::frexp(arg, exp);
+}
+
+/// Multiply by power of two.
+/// \param arg number to modify
+/// \param exp power of two to multiply with
+/// \return \a arg multplied by 2 raised to \a exp
+//		template<typename T> typename enable<half,T>::type ldexp(T arg, int exp) { return functions::scalbln(arg, exp);
+//}
+inline half ldexp(half arg, int exp)
+{
+    return functions::scalbln(arg, exp);
+}
+inline half ldexp(expr arg, int exp)
+{
+    return functions::scalbln(arg, exp);
+}
+
+/// Extract integer and fractional parts.
+/// \param arg number to decompress
+/// \param iptr address to store integer part at
+/// \return fractional part
+//		template<typename T> typename enable<half,T>::type modf(T arg, half *iptr) { return functions::modf(arg, iptr);
+//}
+inline half modf(half arg, half* iptr)
+{
+    return functions::modf(arg, iptr);
+}
+inline half modf(expr arg, half* iptr)
+{
+    return functions::modf(arg, iptr);
+}
+
+/// Multiply by power of two.
+/// \param arg number to modify
+/// \param exp power of two to multiply with
+/// \return \a arg multplied by 2 raised to \a exp
+//		template<typename T> typename enable<half,T>::type scalbn(T arg, int exp) { return functions::scalbln(arg, exp);
+//}
+inline half scalbn(half arg, int exp)
+{
+    return functions::scalbln(arg, exp);
+}
+inline half scalbn(expr arg, int exp)
+{
+    return functions::scalbln(arg, exp);
+}
+
+/// Multiply by power of two.
+/// \param arg number to modify
+/// \param exp power of two to multiply with
+/// \return \a arg multplied by 2 raised to \a exp
+//		template<typename T> typename enable<half,T>::type scalbln(T arg, long exp) { return functions::scalbln(arg,
+// exp);
+//}
+inline half scalbln(half arg, long exp)
+{
+    return functions::scalbln(arg, exp);
+}
+inline half scalbln(expr arg, long exp)
+{
+    return functions::scalbln(arg, exp);
+}
+
+/// Extract exponent.
+/// \param arg number to query
+/// \return floating point exponent
+/// \retval FP_ILOGB0 for zero
+/// \retval FP_ILOGBNAN for NaN
+/// \retval MAX_INT for infinity
+//		template<typename T> typename enable<int,T>::type ilogb(T arg) { return functions::ilogb(arg); }
+inline int ilogb(half arg)
+{
+    return functions::ilogb(arg);
+}
+inline int ilogb(expr arg)
+{
+    return functions::ilogb(arg);
+}
+
+/// Extract exponent.
+/// \param arg number to query
+/// \return floating point exponent
+//		template<typename T> typename enable<half,T>::type logb(T arg) { return functions::logb(arg); }
+inline half logb(half arg)
+{
+    return functions::logb(arg);
+}
+inline half logb(expr arg)
+{
+    return functions::logb(arg);
+}
+
+/// Next representable value.
+/// \param from value to compute next representable value for
+/// \param to direction towards which to compute next value
+/// \return next representable value after \a from in direction towards \a to
+//		template<typename T,typename U> typename enable<half,T,U>::type nextafter(T from, U to) { return
+// functions::nextafter(from, to); }
+inline half nextafter(half from, half to)
+{
+    return functions::nextafter(from, to);
+}
+inline half nextafter(half from, expr to)
+{
+    return functions::nextafter(from, to);
+}
+inline half nextafter(expr from, half to)
+{
+    return functions::nextafter(from, to);
+}
+inline half nextafter(expr from, expr to)
+{
+    return functions::nextafter(from, to);
+}
+
+/// Next representable value.
+/// \param from value to compute next representable value for
+/// \param to direction towards which to compute next value
+/// \return next representable value after \a from in direction towards \a to
+//		template<typename T> typename enable<half,T>::type nexttoward(T from, long double to) { return
+// functions::nexttoward(from, to); }
+inline half nexttoward(half from, long double to)
+{
+    return functions::nexttoward(from, to);
+}
+inline half nexttoward(expr from, long double to)
+{
+    return functions::nexttoward(from, to);
+}
+
+/// Take sign.
+/// \param x value to change sign for
+/// \param y value to take sign from
+/// \return value equal to \a x in magnitude and to \a y in sign
+//		template<typename T,typename U> typename enable<half,T,U>::type copysign(T x, U y) { return
+// functions::copysign(x, y); }
+inline half copysign(half x, half y)
+{
+    return functions::copysign(x, y);
+}
+inline half copysign(half x, expr y)
+{
+    return functions::copysign(x, y);
+}
+inline half copysign(expr x, half y)
+{
+    return functions::copysign(x, y);
+}
+inline half copysign(expr x, expr y)
+{
+    return functions::copysign(x, y);
+}
+
+/// \}
+/// \name Floating point classification
+/// \{
+
+/// Classify floating point value.
+/// \param arg number to classify
+/// \retval FP_ZERO for positive and negative zero
+/// \retval FP_SUBNORMAL for subnormal numbers
+/// \retval FP_INFINITY for positive and negative infinity
+/// \retval FP_NAN for NaNs
+/// \retval FP_NORMAL for all other (normal) values
+//		template<typename T> typename enable<int,T>::type fpclassify(T arg) { return functions::fpclassify(arg); }
+inline int fpclassify(half arg)
+{
+    return functions::fpclassify(arg);
+}
+inline int fpclassify(expr arg)
+{
+    return functions::fpclassify(arg);
+}
+
+/// Check if finite number.
+/// \param arg number to check
+/// \retval true if neither infinity nor NaN
+/// \retval false else
+//		template<typename T> typename enable<bool,T>::type isfinite(T arg) { return functions::isfinite(arg); }
+inline bool isfinite(half arg)
+{
+    return functions::isfinite(arg);
+}
+inline bool isfinite(expr arg)
+{
+    return functions::isfinite(arg);
+}
+
+/// Check for infinity.
+/// \param arg number to check
+/// \retval true for positive or negative infinity
+/// \retval false else
+//		template<typename T> typename enable<bool,T>::type isinf(T arg) { return functions::isinf(arg); }
+inline bool isinf(half arg)
+{
+    return functions::isinf(arg);
+}
+inline bool isinf(expr arg)
+{
+    return functions::isinf(arg);
+}
+
+/// Check for NaN.
+/// \param arg number to check
+/// \retval true for NaNs
+/// \retval false else
+//		template<typename T> typename enable<bool,T>::type isnan(T arg) { return functions::isnan(arg); }
+inline bool isnan(half arg)
+{
+    return functions::isnan(arg);
+}
+inline bool isnan(expr arg)
+{
+    return functions::isnan(arg);
+}
+
+/// Check if normal number.
+/// \param arg number to check
+/// \retval true if normal number
+/// \retval false if either subnormal, zero, infinity or NaN
+//		template<typename T> typename enable<bool,T>::type isnormal(T arg) { return functions::isnormal(arg); }
+inline bool isnormal(half arg)
+{
+    return functions::isnormal(arg);
+}
+inline bool isnormal(expr arg)
+{
+    return functions::isnormal(arg);
+}
+
+/// Check sign.
+/// \param arg number to check
+/// \retval true for negative number
+/// \retval false for positive number
+//		template<typename T> typename enable<bool,T>::type signbit(T arg) { return functions::signbit(arg); }
+inline bool signbit(half arg)
+{
+    return functions::signbit(arg);
+}
+inline bool signbit(expr arg)
+{
+    return functions::signbit(arg);
+}
+
+/// \}
+/// \name Comparison
+/// \{
+
+/// Comparison for greater than.
+/// \param x first operand
+/// \param y second operand
+/// \retval true if \a x greater than \a y
+/// \retval false else
+//		template<typename T,typename U> typename enable<bool,T,U>::type isgreater(T x, U y) { return
+// functions::isgreater(x, y); }
+inline bool isgreater(half x, half y)
+{
+    return functions::isgreater(x, y);
+}
+inline bool isgreater(half x, expr y)
+{
+    return functions::isgreater(x, y);
+}
+inline bool isgreater(expr x, half y)
+{
+    return functions::isgreater(x, y);
+}
+inline bool isgreater(expr x, expr y)
+{
+    return functions::isgreater(x, y);
+}
+
+/// Comparison for greater equal.
+/// \param x first operand
+/// \param y second operand
+/// \retval true if \a x greater equal \a y
+/// \retval false else
+//		template<typename T,typename U> typename enable<bool,T,U>::type isgreaterequal(T x, U y) { return
+// functions::isgreaterequal(x, y); }
+inline bool isgreaterequal(half x, half y)
+{
+    return functions::isgreaterequal(x, y);
+}
+inline bool isgreaterequal(half x, expr y)
+{
+    return functions::isgreaterequal(x, y);
+}
+inline bool isgreaterequal(expr x, half y)
+{
+    return functions::isgreaterequal(x, y);
+}
+inline bool isgreaterequal(expr x, expr y)
+{
+    return functions::isgreaterequal(x, y);
+}
+
+/// Comparison for less than.
+/// \param x first operand
+/// \param y second operand
+/// \retval true if \a x less than \a y
+/// \retval false else
+//		template<typename T,typename U> typename enable<bool,T,U>::type isless(T x, U y) { return functions::isless(x,
+// y);
+//}
+inline bool isless(half x, half y)
+{
+    return functions::isless(x, y);
+}
+inline bool isless(half x, expr y)
+{
+    return functions::isless(x, y);
+}
+inline bool isless(expr x, half y)
+{
+    return functions::isless(x, y);
+}
+inline bool isless(expr x, expr y)
+{
+    return functions::isless(x, y);
+}
+
+/// Comparison for less equal.
+/// \param x first operand
+/// \param y second operand
+/// \retval true if \a x less equal \a y
+/// \retval false else
+//		template<typename T,typename U> typename enable<bool,T,U>::type islessequal(T x, U y) { return
+// functions::islessequal(x, y); }
+inline bool islessequal(half x, half y)
+{
+    return functions::islessequal(x, y);
+}
+inline bool islessequal(half x, expr y)
+{
+    return functions::islessequal(x, y);
+}
+inline bool islessequal(expr x, half y)
+{
+    return functions::islessequal(x, y);
+}
+inline bool islessequal(expr x, expr y)
+{
+    return functions::islessequal(x, y);
+}
+
+/// Comarison for less or greater.
+/// \param x first operand
+/// \param y second operand
+/// \retval true if either less or greater
+/// \retval false else
+//		template<typename T,typename U> typename enable<bool,T,U>::type islessgreater(T x, U y) { return
+// functions::islessgreater(x, y); }
+inline bool islessgreater(half x, half y)
+{
+    return functions::islessgreater(x, y);
+}
+inline bool islessgreater(half x, expr y)
+{
+    return functions::islessgreater(x, y);
+}
+inline bool islessgreater(expr x, half y)
+{
+    return functions::islessgreater(x, y);
+}
+inline bool islessgreater(expr x, expr y)
+{
+    return functions::islessgreater(x, y);
+}
+
+/// Check if unordered.
+/// \param x first operand
+/// \param y second operand
+/// \retval true if unordered (one or two NaN operands)
+/// \retval false else
+//		template<typename T,typename U> typename enable<bool,T,U>::type isunordered(T x, U y) { return
+// functions::isunordered(x, y); }
+inline bool isunordered(half x, half y)
+{
+    return functions::isunordered(x, y);
+}
+inline bool isunordered(half x, expr y)
+{
+    return functions::isunordered(x, y);
+}
+inline bool isunordered(expr x, half y)
+{
+    return functions::isunordered(x, y);
+}
+inline bool isunordered(expr x, expr y)
+{
+    return functions::isunordered(x, y);
+}
+
+/// \name Casting
+/// \{
+
+/// Cast to or from half-precision floating point number.
+/// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted
+/// directly using the given rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do.
+/// It uses the default rounding mode.
+///
+/// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types
+/// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler
+/// error and casting between [half](\ref half_float::half)s is just a no-op.
+/// \tparam T destination type (half or built-in arithmetic type)
+/// \tparam U source type (half or built-in arithmetic type)
+/// \param arg value to cast
+/// \return \a arg converted to destination type
+template <typename T, typename U>
+T half_cast(U arg)
+{
+    return half_caster<T, U>::cast(arg);
+}
+
+/// Cast to or from half-precision floating point number.
+/// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted
+/// directly using the given rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do.
+///
+/// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types
+/// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler
+/// error and casting between [half](\ref half_float::half)s is just a no-op.
+/// \tparam T destination type (half or built-in arithmetic type)
+/// \tparam R rounding mode to use.
+/// \tparam U source type (half or built-in arithmetic type)
+/// \param arg value to cast
+/// \return \a arg converted to destination type
+template <typename T, std::float_round_style R, typename U>
+T half_cast(U arg)
+{
+    return half_caster<T, U, R>::cast(arg);
+}
+/// \}
+} // namespace detail
+
+using detail::operator==;
+using detail::operator!=;
+using detail::operator<;
+using detail::operator>;
+using detail::operator<=;
+using detail::operator>=;
+using detail::operator+;
+using detail::operator-;
+using detail::operator*;
+using detail::operator/;
+using detail::operator<<;
+using detail::operator>>;
+
+using detail::abs;
+using detail::acos;
+using detail::acosh;
+using detail::asin;
+using detail::asinh;
+using detail::atan;
+using detail::atan2;
+using detail::atanh;
+using detail::cbrt;
+using detail::ceil;
+using detail::cos;
+using detail::cosh;
+using detail::erf;
+using detail::erfc;
+using detail::exp;
+using detail::exp2;
+using detail::expm1;
+using detail::fabs;
+using detail::fdim;
+using detail::floor;
+using detail::fma;
+using detail::fmax;
+using detail::fmin;
+using detail::fmod;
+using detail::hypot;
+using detail::lgamma;
+using detail::log;
+using detail::log10;
+using detail::log1p;
+using detail::log2;
+using detail::lrint;
+using detail::lround;
+using detail::nanh;
+using detail::nearbyint;
+using detail::pow;
+using detail::remainder;
+using detail::remquo;
+using detail::rint;
+using detail::round;
+using detail::sin;
+using detail::sinh;
+using detail::sqrt;
+using detail::tan;
+using detail::tanh;
+using detail::tgamma;
+using detail::trunc;
+#if HALF_ENABLE_CPP11_LONG_LONG
+using detail::llrint;
+using detail::llround;
+#endif
+using detail::copysign;
+using detail::fpclassify;
+using detail::frexp;
+using detail::ilogb;
+using detail::isfinite;
+using detail::isgreater;
+using detail::isgreaterequal;
+using detail::isinf;
+using detail::isless;
+using detail::islessequal;
+using detail::islessgreater;
+using detail::isnan;
+using detail::isnormal;
+using detail::isunordered;
+using detail::ldexp;
+using detail::logb;
+using detail::modf;
+using detail::nextafter;
+using detail::nexttoward;
+using detail::scalbln;
+using detail::scalbn;
+using detail::signbit;
+
+using detail::half_cast;
+} // namespace half_float
+
+/// Extensions to the C++ standard library.
+namespace std
+{
+/// Numeric limits for half-precision floats.
+/// Because of the underlying single-precision implementation of many operations, it inherits some properties from
+/// `std::numeric_limits<float>`.
+template <>
+class numeric_limits<half_float::half> : public numeric_limits<float>
+{
+public:
+    /// Supports signed values.
+    static HALF_CONSTEXPR_CONST bool is_signed = true;
+
+    /// Is not exact.
+    static HALF_CONSTEXPR_CONST bool is_exact = false;
+
+    /// Doesn't provide modulo arithmetic.
+    static HALF_CONSTEXPR_CONST bool is_modulo = false;
+
+    /// IEEE conformant.
+    static HALF_CONSTEXPR_CONST bool is_iec559 = true;
+
+    /// Supports infinity.
+    static HALF_CONSTEXPR_CONST bool has_infinity = true;
+
+    /// Supports quiet NaNs.
+    static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true;
+
+    /// Supports subnormal values.
+    static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present;
+
+    /// Rounding mode.
+    /// Due to the mix of internal single-precision computations (using the rounding mode of the underlying
+    /// single-precision implementation) with the rounding mode of the single-to-half conversions, the actual rounding
+    /// mode might be `std::round_indeterminate` if the default half-precision rounding mode doesn't match the
+    /// single-precision rounding mode.
+    static HALF_CONSTEXPR_CONST float_round_style round_style
+        = (std::numeric_limits<float>::round_style == half_float::half::round_style) ? half_float::half::round_style
+                                                                                     : round_indeterminate;
+
+    /// Significant digits.
+    static HALF_CONSTEXPR_CONST int digits = 11;
+
+    /// Significant decimal digits.
+    static HALF_CONSTEXPR_CONST int digits10 = 3;
+
+    /// Required decimal digits to represent all possible values.
+    static HALF_CONSTEXPR_CONST int max_digits10 = 5;
+
+    /// Number base.
+    static HALF_CONSTEXPR_CONST int radix = 2;
+
+    /// One more than smallest exponent.
+    static HALF_CONSTEXPR_CONST int min_exponent = -13;
+
+    /// Smallest normalized representable power of 10.
+    static HALF_CONSTEXPR_CONST int min_exponent10 = -4;
+
+    /// One more than largest exponent
+    static HALF_CONSTEXPR_CONST int max_exponent = 16;
+
+    /// Largest finitely representable power of 10.
+    static HALF_CONSTEXPR_CONST int max_exponent10 = 4;
+
+    /// Smallest positive normal value.
+    static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW
+    {
+        return half_float::half(half_float::detail::binary, 0x0400);
+    }
+
+    /// Smallest finite value.
+    static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW
+    {
+        return half_float::half(half_float::detail::binary, 0xFBFF);
+    }
+
+    /// Largest finite value.
+    static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW
+    {
+        return half_float::half(half_float::detail::binary, 0x7BFF);
+    }
+
+    /// Difference between one and next representable value.
+    static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW
+    {
+        return half_float::half(half_float::detail::binary, 0x1400);
+    }
+
+    /// Maximum rounding error.
+    static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW
+    {
+        return half_float::half(half_float::detail::binary, (round_style == std::round_to_nearest) ? 0x3800 : 0x3C00);
+    }
+
+    /// Positive infinity.
+    static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW
+    {
+        return half_float::half(half_float::detail::binary, 0x7C00);
+    }
+
+    /// Quiet NaN.
+    static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW
+    {
+        return half_float::half(half_float::detail::binary, 0x7FFF);
+    }
+
+    /// Signalling NaN.
+    static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW
+    {
+        return half_float::half(half_float::detail::binary, 0x7DFF);
+    }
+
+    /// Smallest positive subnormal value.
+    static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW
+    {
+        return half_float::half(half_float::detail::binary, 0x0001);
+    }
+};
+
+#if HALF_ENABLE_CPP11_HASH
+/// Hash function for half-precision floats.
+/// This is only defined if C++11 `std::hash` is supported and enabled.
+template <>
+struct hash<half_float::half> //: unary_function<half_float::half,size_t>
+{
+    /// Type of function argument.
+    typedef half_float::half argument_type;
+
+    /// Function return type.
+    typedef size_t result_type;
+
+    /// Compute hash function.
+    /// \param arg half to hash
+    /// \return hash value
+    result_type operator()(argument_type arg) const
+    {
+        return hash<half_float::detail::uint16>()(static_cast<unsigned>(arg.data_) & -(arg.data_ != 0x8000));
+    }
+};
+#endif
+} // namespace std
+
+#undef HALF_CONSTEXPR
+#undef HALF_CONSTEXPR_CONST
+#undef HALF_NOEXCEPT
+#undef HALF_NOTHROW
+#ifdef HALF_POP_WARNINGS
+#pragma warning(pop)
+#undef HALF_POP_WARNINGS
+#endif
+
+#endif
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/logger.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/logger.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..909ec0bbd9ade33a96980f42409787782042ea35
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/logger.cpp
@@ -0,0 +1,41 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "logger.h"
+#include "ErrorRecorder.h"
+#include "logging.h"
+using namespace nvinfer1;
+SampleErrorRecorder gRecorder;
+namespace sample
+{
+Logger gLogger{Logger::Severity::kINFO};
+LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)};
+LogStreamConsumer gLogInfo{LOG_INFO(gLogger)};
+LogStreamConsumer gLogWarning{LOG_WARN(gLogger)};
+LogStreamConsumer gLogError{LOG_ERROR(gLogger)};
+LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)};
+
+void setReportableSeverity(Logger::Severity severity)
+{
+    gLogger.setReportableSeverity(severity);
+    gLogVerbose.setReportableSeverity(severity);
+    gLogInfo.setReportableSeverity(severity);
+    gLogWarning.setReportableSeverity(severity);
+    gLogError.setReportableSeverity(severity);
+    gLogFatal.setReportableSeverity(severity);
+}
+} // namespace sample
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/logger.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/logger.h
new file mode 100644
index 0000000000000000000000000000000000000000..8205e4572ee028f8be09d516f3f5b1948dc1cd18
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/logger.h
@@ -0,0 +1,37 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LOGGER_H
+#define LOGGER_H
+
+#include "logging.h"
+
+class SampleErrorRecorder;
+extern SampleErrorRecorder gRecorder;
+namespace sample
+{
+extern Logger gLogger;
+extern LogStreamConsumer gLogVerbose;
+extern LogStreamConsumer gLogInfo;
+extern LogStreamConsumer gLogWarning;
+extern LogStreamConsumer gLogError;
+extern LogStreamConsumer gLogFatal;
+
+void setReportableSeverity(Logger::Severity severity);
+} // namespace sample
+
+#endif // LOGGER_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/logging.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/logging.h
new file mode 100644
index 0000000000000000000000000000000000000000..f62fee8200146765f0fc3ae987975b086256b5ad
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/logging.h
@@ -0,0 +1,587 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TENSORRT_LOGGING_H
+#define TENSORRT_LOGGING_H
+
+#include "NvInferRuntime.h"
+#include "sampleOptions.h"
+#include <cassert>
+#include <ctime>
+#include <iomanip>
+#include <iostream>
+#include <mutex>
+#include <ostream>
+#include <sstream>
+#include <string>
+
+namespace sample
+{
+
+using Severity = nvinfer1::ILogger::Severity;
+
+class LogStreamConsumerBuffer : public std::stringbuf
+{
+public:
+    LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog)
+        : mOutput(stream)
+        , mPrefix(prefix)
+        , mShouldLog(shouldLog)
+    {
+    }
+
+    LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) noexcept
+        : mOutput(other.mOutput)
+        , mPrefix(other.mPrefix)
+        , mShouldLog(other.mShouldLog)
+    {
+    }
+    LogStreamConsumerBuffer(const LogStreamConsumerBuffer& other) = delete;
+    LogStreamConsumerBuffer() = delete;
+    LogStreamConsumerBuffer& operator=(const LogStreamConsumerBuffer&) = delete;
+    LogStreamConsumerBuffer& operator=(LogStreamConsumerBuffer&&) = delete;
+
+    ~LogStreamConsumerBuffer() override
+    {
+        // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence
+        // std::streambuf::pptr() gives a pointer to the current position of the output sequence
+        // if the pointer to the beginning is not equal to the pointer to the current position,
+        // call putOutput() to log the output to the stream
+        if (pbase() != pptr())
+        {
+            putOutput();
+        }
+    }
+
+    //!
+    //! synchronizes the stream buffer and returns 0 on success
+    //! synchronizing the stream buffer consists of inserting the buffer contents into the stream,
+    //! resetting the buffer and flushing the stream
+    //!
+    int32_t sync() override
+    {
+        putOutput();
+        return 0;
+    }
+
+    void putOutput()
+    {
+        if (mShouldLog)
+        {
+            // prepend timestamp
+            std::time_t timestamp = std::time(nullptr);
+            tm* tm_local = std::localtime(&timestamp);
+            mOutput << "[";
+            mOutput << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/";
+            mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
+            mOutput << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-";
+            mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
+            mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
+            mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
+            // std::stringbuf::str() gets the string contents of the buffer
+            // insert the buffer contents pre-appended by the appropriate prefix into the stream
+            mOutput << mPrefix << str();
+        }
+        // set the buffer to empty
+        str("");
+        // flush the stream
+        mOutput.flush();
+    }
+
+    void setShouldLog(bool shouldLog)
+    {
+        mShouldLog = shouldLog;
+    }
+
+private:
+    std::ostream& mOutput;
+    std::string mPrefix;
+    bool mShouldLog{};
+}; // class LogStreamConsumerBuffer
+
+//!
+//! \class LogStreamConsumerBase
+//! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer
+//!
+class LogStreamConsumerBase
+{
+public:
+    LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog)
+        : mBuffer(stream, prefix, shouldLog)
+    {
+    }
+
+protected:
+    std::mutex mLogMutex;
+    LogStreamConsumerBuffer mBuffer;
+}; // class LogStreamConsumerBase
+
+//!
+//! \class LogStreamConsumer
+//! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages.
+//!  Order of base classes is LogStreamConsumerBase and then std::ostream.
+//!  This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field
+//!  in LogStreamConsumer and then the address of the buffer is passed to std::ostream.
+//!  This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream.
+//!  Please do not change the order of the parent classes.
+//!
+class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream
+{
+public:
+    //!
+    //! \brief Creates a LogStreamConsumer which logs messages with level severity.
+    //!  Reportable severity determines if the messages are severe enough to be logged.
+    //!
+    LogStreamConsumer(nvinfer1::ILogger::Severity reportableSeverity, nvinfer1::ILogger::Severity severity)
+        : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity)
+        , std::ostream(&mBuffer) // links the stream buffer with the stream
+        , mShouldLog(severity <= reportableSeverity)
+        , mSeverity(severity)
+    {
+    }
+
+    LogStreamConsumer(LogStreamConsumer&& other) noexcept
+        : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog)
+        , std::ostream(&mBuffer) // links the stream buffer with the stream
+        , mShouldLog(other.mShouldLog)
+        , mSeverity(other.mSeverity)
+    {
+    }
+    LogStreamConsumer(const LogStreamConsumer& other) = delete;
+    LogStreamConsumer() = delete;
+    ~LogStreamConsumer() override = default;
+    LogStreamConsumer& operator=(const LogStreamConsumer&) = delete;
+    LogStreamConsumer& operator=(LogStreamConsumer&&) = delete;
+
+    void setReportableSeverity(Severity reportableSeverity)
+    {
+        mShouldLog = mSeverity <= reportableSeverity;
+        mBuffer.setShouldLog(mShouldLog);
+    }
+
+    std::mutex& getMutex()
+    {
+        return mLogMutex;
+    }
+
+    bool getShouldLog() const
+    {
+        return mShouldLog;
+    }
+
+private:
+    static std::ostream& severityOstream(Severity severity)
+    {
+        return severity >= Severity::kINFO ? std::cout : std::cerr;
+    }
+
+    static std::string severityPrefix(Severity severity)
+    {
+        switch (severity)
+        {
+        case Severity::kINTERNAL_ERROR: return "[F] ";
+        case Severity::kERROR: return "[E] ";
+        case Severity::kWARNING: return "[W] ";
+        case Severity::kINFO: return "[I] ";
+        case Severity::kVERBOSE: return "[V] ";
+        default: assert(0); return "";
+        }
+    }
+
+    bool mShouldLog;
+    Severity mSeverity;
+}; // class LogStreamConsumer
+
+template <typename T>
+LogStreamConsumer& operator<<(LogStreamConsumer& logger, const T& obj)
+{
+    if (logger.getShouldLog())
+    {
+        std::lock_guard<std::mutex> guard(logger.getMutex());
+        auto& os = static_cast<std::ostream&>(logger);
+        os << obj;
+    }
+    return logger;
+}
+
+//!
+//! Special handling std::endl
+//!
+inline LogStreamConsumer& operator<<(LogStreamConsumer& logger, std::ostream& (*f)(std::ostream&) )
+{
+    if (logger.getShouldLog())
+    {
+        std::lock_guard<std::mutex> guard(logger.getMutex());
+        auto& os = static_cast<std::ostream&>(logger);
+        os << f;
+    }
+    return logger;
+}
+
+inline LogStreamConsumer& operator<<(LogStreamConsumer& logger, const nvinfer1::Dims& dims)
+{
+    if (logger.getShouldLog())
+    {
+        std::lock_guard<std::mutex> guard(logger.getMutex());
+        auto& os = static_cast<std::ostream&>(logger);
+        for (int32_t i = 0; i < dims.nbDims; ++i)
+        {
+            os << (i ? "x" : "") << dims.d[i];
+        }
+    }
+    return logger;
+}
+
+//!
+//! \class Logger
+//!
+//! \brief Class which manages logging of TensorRT tools and samples
+//!
+//! \details This class provides a common interface for TensorRT tools and samples to log information to the console,
+//! and supports logging two types of messages:
+//!
+//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal)
+//! - Test pass/fail messages
+//!
+//! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is
+//! that the logic for controlling the verbosity and formatting of sample output is centralized in one location.
+//!
+//! In the future, this class could be extended to support dumping test results to a file in some standard format
+//! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run).
+//!
+//! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger
+//! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT
+//! library and messages coming from the sample.
+//!
+//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the
+//! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger
+//! object.
+//!
+class Logger : public nvinfer1::ILogger
+{
+public:
+    explicit Logger(Severity severity = Severity::kWARNING)
+        : mReportableSeverity(severity)
+    {
+    }
+
+    //!
+    //! \enum TestResult
+    //! \brief Represents the state of a given test
+    //!
+    enum class TestResult
+    {
+        kRUNNING, //!< The test is running
+        kPASSED,  //!< The test passed
+        kFAILED,  //!< The test failed
+        kWAIVED   //!< The test was waived
+    };
+
+    //!
+    //! \brief Forward-compatible method for retrieving the nvinfer1::ILogger associated with this Logger
+    //! \return The nvinfer1::ILogger associated with this Logger
+    //!
+    //! TODO Once all samples are updated to use this method to register the logger with TensorRT,
+    //! we can eliminate the inheritance of Logger from ILogger
+    //!
+    nvinfer1::ILogger& getTRTLogger() noexcept
+    {
+        return *this;
+    }
+
+    //!
+    //! \brief Implementation of the nvinfer1::ILogger::log() virtual method
+    //!
+    //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the
+    //! inheritance from nvinfer1::ILogger
+    //!
+    void log(Severity severity, const char* msg) noexcept override
+    {
+        LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl;
+    }
+
+    //!
+    //! \brief Method for controlling the verbosity of logging output
+    //!
+    //! \param severity The logger will only emit messages that have severity of this level or higher.
+    //!
+    void setReportableSeverity(Severity severity) noexcept
+    {
+        mReportableSeverity = severity;
+    }
+
+    //!
+    //! \brief Opaque handle that holds logging information for a particular test
+    //!
+    //! This object is an opaque handle to information used by the Logger to print test results.
+    //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used
+    //! with Logger::reportTest{Start,End}().
+    //!
+    class TestAtom
+    {
+    public:
+        TestAtom(TestAtom&&) = default;
+
+        std::string getCmdline() const
+        {
+            return mCmdline;
+        }
+
+    private:
+        friend class Logger;
+
+        TestAtom(bool started, const std::string& name, const std::string& cmdline)
+            : mStarted(started)
+            , mName(name)
+            , mCmdline(cmdline)
+        {
+        }
+
+        bool mStarted;
+        std::string mName;
+        std::string mCmdline;
+    };
+
+    //!
+    //! \brief Define a test for logging
+    //!
+    //! \param[in] name The name of the test. This should be a string starting with
+    //!                  "TensorRT" and containing dot-separated strings containing
+    //!                  the characters [A-Za-z0-9_].
+    //!                  For example, "TensorRT.sample_googlenet"
+    //! \param[in] cmdline The command line used to reproduce the test
+    //
+    //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
+    //!
+    static TestAtom defineTest(const std::string& name, const std::string& cmdline)
+    {
+        return TestAtom(false, name, cmdline);
+    }
+
+    //!
+    //! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments
+    //!        as input
+    //!
+    //! \param[in] name The name of the test
+    //! \param[in] argc The number of command-line arguments
+    //! \param[in] argv The array of command-line arguments (given as C strings)
+    //!
+    //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
+    //!
+    static TestAtom defineTest(const std::string& name, int32_t argc, char const* const* argv)
+    {
+        // Append TensorRT version as info
+        const std::string vname = name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "] [b"
+            + std::to_string(NV_TENSORRT_BUILD) + "]";
+        auto cmdline = genCmdlineString(argc, argv);
+        return defineTest(vname, cmdline);
+    }
+
+    //!
+    //! \brief Report that a test has started.
+    //!
+    //! \pre reportTestStart() has not been called yet for the given testAtom
+    //!
+    //! \param[in] testAtom The handle to the test that has started
+    //!
+    static void reportTestStart(TestAtom& testAtom)
+    {
+        reportTestResult(testAtom, TestResult::kRUNNING);
+        assert(!testAtom.mStarted);
+        testAtom.mStarted = true;
+    }
+
+    //!
+    //! \brief Report that a test has ended.
+    //!
+    //! \pre reportTestStart() has been called for the given testAtom
+    //!
+    //! \param[in] testAtom The handle to the test that has ended
+    //! \param[in] result The result of the test. Should be one of TestResult::kPASSED,
+    //!                   TestResult::kFAILED, TestResult::kWAIVED
+    //!
+    static void reportTestEnd(TestAtom const& testAtom, TestResult result)
+    {
+        assert(result != TestResult::kRUNNING);
+        assert(testAtom.mStarted);
+        reportTestResult(testAtom, result);
+    }
+
+    static int32_t reportPass(TestAtom const& testAtom)
+    {
+        reportTestEnd(testAtom, TestResult::kPASSED);
+        return EXIT_SUCCESS;
+    }
+
+    static int32_t reportFail(TestAtom const& testAtom)
+    {
+        reportTestEnd(testAtom, TestResult::kFAILED);
+        return EXIT_FAILURE;
+    }
+
+    static int32_t reportWaive(TestAtom const& testAtom)
+    {
+        reportTestEnd(testAtom, TestResult::kWAIVED);
+        return EXIT_SUCCESS;
+    }
+
+    static int32_t reportTest(TestAtom const& testAtom, bool pass)
+    {
+        return pass ? reportPass(testAtom) : reportFail(testAtom);
+    }
+
+    Severity getReportableSeverity() const
+    {
+        return mReportableSeverity;
+    }
+
+private:
+    //!
+    //! \brief returns an appropriate string for prefixing a log message with the given severity
+    //!
+    static const char* severityPrefix(Severity severity)
+    {
+        switch (severity)
+        {
+        case Severity::kINTERNAL_ERROR: return "[F] ";
+        case Severity::kERROR: return "[E] ";
+        case Severity::kWARNING: return "[W] ";
+        case Severity::kINFO: return "[I] ";
+        case Severity::kVERBOSE: return "[V] ";
+        default: assert(0); return "";
+        }
+    }
+
+    //!
+    //! \brief returns an appropriate string for prefixing a test result message with the given result
+    //!
+    static const char* testResultString(TestResult result)
+    {
+        switch (result)
+        {
+        case TestResult::kRUNNING: return "RUNNING";
+        case TestResult::kPASSED: return "PASSED";
+        case TestResult::kFAILED: return "FAILED";
+        case TestResult::kWAIVED: return "WAIVED";
+        default: assert(0); return "";
+        }
+    }
+
+    //!
+    //! \brief returns an appropriate output stream (cout or cerr) to use with the given severity
+    //!
+    static std::ostream& severityOstream(Severity severity)
+    {
+        return severity >= Severity::kINFO ? std::cout : std::cerr;
+    }
+
+    //!
+    //! \brief method that implements logging test results
+    //!
+    static void reportTestResult(TestAtom const& testAtom, TestResult result)
+    {
+        severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
+                                         << testAtom.mCmdline << std::endl;
+    }
+
+    //!
+    //! \brief generate a command line string from the given (argc, argv) values
+    //!        Note: It simply joins the arguments without proper escaping. If spaces is part
+    //!        of an argument, they will be joined with single space.
+    //!
+    static std::string genCmdlineString(int32_t argc, char const* const* argv)
+    {
+        std::stringstream ss;
+        for (int32_t i = 0; i < argc; i++)
+        {
+            if (i > 0)
+            {
+                ss << " ";
+            }
+            ss << argv[i];
+        }
+        return ss.str();
+    }
+
+    Severity mReportableSeverity;
+}; // class Logger
+
+namespace
+{
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE
+//!
+//! Example usage:
+//!
+//!     LOG_VERBOSE(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO
+//!
+//! Example usage:
+//!
+//!     LOG_INFO(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_INFO(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING
+//!
+//! Example usage:
+//!
+//!     LOG_WARN(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_WARN(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR
+//!
+//! Example usage:
+//!
+//!     LOG_ERROR(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_ERROR(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR
+//!        ("fatal" severity)
+//!
+//! Example usage:
+//!
+//!     LOG_FATAL(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_FATAL(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR);
+}
+} // anonymous namespace
+} // namespace sample
+#endif // TENSORRT_LOGGING_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/parserOnnxConfig.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/parserOnnxConfig.h
new file mode 100644
index 0000000000000000000000000000000000000000..67ee6c717167032833bda37904e315ff8cc905e4
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/parserOnnxConfig.h
@@ -0,0 +1,145 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PARSER_ONNX_CONFIG_H
+#define PARSER_ONNX_CONFIG_H
+
+#include <cstring>
+#include <iostream>
+#include <string>
+
+#include "NvInfer.h"
+#include "NvOnnxConfig.h"
+#include "NvOnnxParser.h"
+
+#define ONNX_DEBUG 1
+
+/**
+ * \class ParserOnnxConfig
+ * \brief Configuration Manager Class Concrete Implementation
+ *
+ * \note:
+ *
+ */
+
+class ParserOnnxConfig : public nvonnxparser::IOnnxConfig
+{
+
+protected:
+    std::string mModelFilename{};
+    std::string mTextFilename{};
+    std::string mFullTextFilename{};
+    nvinfer1::DataType mModelDtype;
+    nvonnxparser::IOnnxConfig::Verbosity mVerbosity;
+    bool mPrintLayercInfo;
+
+public:
+    ParserOnnxConfig()
+        : mModelDtype(nvinfer1::DataType::kFLOAT)
+        , mVerbosity(static_cast<int>(nvinfer1::ILogger::Severity::kWARNING))
+        , mPrintLayercInfo(false)
+    {
+#ifdef ONNX_DEBUG
+        if (isDebug())
+        {
+            std::cout << " ParserOnnxConfig::ctor(): " << this << "\t" << std::endl;
+        }
+#endif
+    }
+
+    ~ParserOnnxConfig() override
+    {
+#ifdef ONNX_DEBUG
+        if (isDebug())
+        {
+            std::cout << "ParserOnnxConfig::dtor(): " << this << std::endl;
+        }
+#endif
+    }
+
+public:
+    void setModelDtype(const nvinfer1::DataType modelDtype) noexcept override
+    {
+        mModelDtype = modelDtype;
+    }
+
+    nvinfer1::DataType getModelDtype() const noexcept override
+    {
+        return mModelDtype;
+    }
+
+    const char* getModelFileName() const noexcept override
+    {
+        return mModelFilename.c_str();
+    }
+    void setModelFileName(const char* onnxFilename) noexcept override
+    {
+        mModelFilename = std::string(onnxFilename);
+    }
+    nvonnxparser::IOnnxConfig::Verbosity getVerbosityLevel() const noexcept override
+    {
+        return mVerbosity;
+    }
+    void addVerbosity() noexcept override
+    {
+        ++mVerbosity;
+    }
+    void reduceVerbosity() noexcept override
+    {
+        --mVerbosity;
+    }
+    void setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) noexcept override
+    {
+        mVerbosity = verbosity;
+    }
+
+    const char* getTextFileName() const noexcept override
+    {
+        return mTextFilename.c_str();
+    }
+    void setTextFileName(const char* textFilename) noexcept override
+    {
+        mTextFilename = std::string(textFilename);
+    }
+    const char* getFullTextFileName() const noexcept override
+    {
+        return mFullTextFilename.c_str();
+    }
+    void setFullTextFileName(const char* fullTextFilename) noexcept override
+    {
+        mFullTextFilename = std::string(fullTextFilename);
+    }
+    bool getPrintLayerInfo() const noexcept override
+    {
+        return mPrintLayercInfo;
+    }
+    void setPrintLayerInfo(bool src) noexcept override
+    {
+        mPrintLayercInfo = src;
+    } //!< get the boolean variable corresponding to the Layer Info, see getPrintLayerInfo()
+
+    virtual bool isDebug() const noexcept
+    {
+#if ONNX_DEBUG
+        return (std::getenv("ONNX_DEBUG") ? true : false);
+#else
+        return false;
+#endif
+    }
+}; // class ParserOnnxConfig
+
+#endif
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleConfig.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleConfig.h
new file mode 100644
index 0000000000000000000000000000000000000000..801a268a40be8b516fa349063a0116b4d61c0cac
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleConfig.h
@@ -0,0 +1,331 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SampleConfig_H
+#define SampleConfig_H
+
+#include <cstring>
+#include <iostream>
+#include <string>
+
+#include "NvInfer.h"
+#include "NvOnnxConfig.h"
+class SampleConfig : public nvonnxparser::IOnnxConfig
+{
+public:
+    enum class InputDataFormat : int
+    {
+        kASCII = 0,
+        kPPM = 1
+    };
+
+private:
+    std::string mModelFilename;
+    std::string mEngineFilename;
+    std::string mTextFilename;
+    std::string mFullTextFilename;
+    std::string mImageFilename;
+    std::string mReferenceFilename;
+    std::string mOutputFilename;
+    std::string mCalibrationFilename;
+    std::string mTimingCacheFilename;
+    int64_t mLabel{-1};
+    int64_t mMaxBatchSize{32};
+    int64_t mCalibBatchSize{0};
+    int64_t mMaxNCalibBatch{0};
+    int64_t mFirstCalibBatch{0};
+    int64_t mUseDLACore{-1};
+    nvinfer1::DataType mModelDtype{nvinfer1::DataType::kFLOAT};
+    bool mTF32{true};
+    Verbosity mVerbosity{static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)};
+    bool mPrintLayercInfo{false};
+    bool mDebugBuilder{false};
+    InputDataFormat mInputDataFormat{InputDataFormat::kASCII};
+    uint64_t mTopK{0};
+    float mFailurePercentage{-1.0F};
+    float mTolerance{0.0F};
+    float mAbsTolerance{1e-5F};
+
+public:
+    SampleConfig()
+    {
+#ifdef ONNX_DEBUG
+        if (isDebug())
+        {
+            std::cout << " SampleConfig::ctor(): " << this << "\t" << std::endl;
+        }
+#endif
+    }
+
+    ~SampleConfig() override
+    {
+#ifdef ONNX_DEBUG
+        if (isDebug())
+        {
+            std::cout << "SampleConfig::dtor(): " << this << std::endl;
+        }
+#endif
+    }
+
+public:
+    void setModelDtype(const nvinfer1::DataType mdt) noexcept override
+    {
+        mModelDtype = mdt;
+    }
+
+    nvinfer1::DataType getModelDtype() const noexcept override
+    {
+        return mModelDtype;
+    }
+
+    bool getTF32() const noexcept
+    {
+        return mTF32;
+    }
+
+    void setTF32(bool enabled) noexcept
+    {
+        mTF32 = enabled;
+    }
+
+    const char* getModelFileName() const noexcept override
+    {
+        return mModelFilename.c_str();
+    }
+
+    void setModelFileName(const char* onnxFilename) noexcept override
+    {
+        mModelFilename = std::string(onnxFilename);
+    }
+    Verbosity getVerbosityLevel() const noexcept override
+    {
+        return mVerbosity;
+    }
+    void addVerbosity() noexcept override
+    {
+        ++mVerbosity;
+    }
+    void reduceVerbosity() noexcept override
+    {
+        --mVerbosity;
+    }
+    void setVerbosityLevel(Verbosity v) noexcept override
+    {
+        mVerbosity = v;
+    }
+    const char* getEngineFileName() const noexcept
+    {
+        return mEngineFilename.c_str();
+    }
+    void setEngineFileName(const char* engineFilename) noexcept
+    {
+        mEngineFilename = std::string(engineFilename);
+    }
+    const char* getTextFileName() const noexcept override
+    {
+        return mTextFilename.c_str();
+    }
+    void setTextFileName(const char* textFilename) noexcept override
+    {
+        mTextFilename = std::string(textFilename);
+    }
+    const char* getFullTextFileName() const noexcept override
+    {
+        return mFullTextFilename.c_str();
+    }
+    void setFullTextFileName(const char* fullTextFilename) noexcept override
+    {
+        mFullTextFilename = std::string(fullTextFilename);
+    }
+    void setLabel(int64_t label) noexcept
+    {
+        mLabel = label;
+    } //!<  set the Label
+
+    int64_t getLabel() const noexcept
+    {
+        return mLabel;
+    } //!<  get the Label
+
+    bool getPrintLayerInfo() const noexcept override
+    {
+        return mPrintLayercInfo;
+    }
+
+    void setPrintLayerInfo(bool b) noexcept override
+    {
+        mPrintLayercInfo = b;
+    } //!< get the boolean variable corresponding to the Layer Info, see getPrintLayerInfo()
+
+    void setMaxBatchSize(int64_t maxBatchSize) noexcept
+    {
+        mMaxBatchSize = maxBatchSize;
+    } //!<  set the Max Batch Size
+    int64_t getMaxBatchSize() const noexcept
+    {
+        return mMaxBatchSize;
+    } //!<  get the Max Batch Size
+
+    void setCalibBatchSize(int64_t CalibBatchSize) noexcept
+    {
+        mCalibBatchSize = CalibBatchSize;
+    } //!<  set the calibration batch size
+    int64_t getCalibBatchSize() const noexcept
+    {
+        return mCalibBatchSize;
+    } //!<  get calibration batch size
+
+    void setMaxNCalibBatch(int64_t MaxNCalibBatch) noexcept
+    {
+        mMaxNCalibBatch = MaxNCalibBatch;
+    } //!<  set Max Number of Calibration Batches
+    int64_t getMaxNCalibBatch() const noexcept
+    {
+        return mMaxNCalibBatch;
+    } //!<  get the Max Number of Calibration Batches
+
+    void setFirstCalibBatch(int64_t FirstCalibBatch) noexcept
+    {
+        mFirstCalibBatch = FirstCalibBatch;
+    } //!<  set the first calibration batch
+    int64_t getFirstCalibBatch() const noexcept
+    {
+        return mFirstCalibBatch;
+    } //!<  get the first calibration batch
+
+    void setUseDLACore(int64_t UseDLACore) noexcept
+    {
+        mUseDLACore = UseDLACore;
+    } //!<  set the DLA core to use
+    int64_t getUseDLACore() const noexcept
+    {
+        return mUseDLACore;
+    } //!<  get the DLA core to use
+
+    void setDebugBuilder() noexcept
+    {
+        mDebugBuilder = true;
+    } //!<  enable the Debug info, while building the engine.
+    bool getDebugBuilder() const noexcept
+    {
+        return mDebugBuilder;
+    } //!<  get the boolean variable, corresponding to the debug builder
+
+    const char* getImageFileName() const noexcept //!<  set Image file name (PPM or ASCII)
+    {
+        return mImageFilename.c_str();
+    }
+    void setImageFileName(const char* imageFilename) noexcept //!< get the Image file name
+    {
+        mImageFilename = std::string(imageFilename);
+    }
+    const char* getReferenceFileName() const noexcept
+    {
+        return mReferenceFilename.c_str();
+    }
+    void setReferenceFileName(const char* referenceFilename) noexcept //!<  set reference file name
+    {
+        mReferenceFilename = std::string(referenceFilename);
+    }
+
+    void setInputDataFormat(InputDataFormat idt) noexcept
+    {
+        mInputDataFormat = idt;
+    } //!<  specifies expected data format of the image file (PPM or ASCII)
+    InputDataFormat getInputDataFormat() const noexcept
+    {
+        return mInputDataFormat;
+    } //!<  returns the expected data format of the image file.
+
+    const char* getOutputFileName() const noexcept //!<  specifies the file to save the results
+    {
+        return mOutputFilename.c_str();
+    }
+    void setOutputFileName(const char* outputFilename) noexcept //!<  get the output file name
+    {
+        mOutputFilename = std::string(outputFilename);
+    }
+
+    const char* getCalibrationFileName() const noexcept
+    {
+        return mCalibrationFilename.c_str();
+    } //!<  specifies the file containing the list of image files for int8 calibration
+    void setCalibrationFileName(const char* calibrationFilename) noexcept //!<  get the int 8 calibration list file name
+    {
+        mCalibrationFilename = std::string(calibrationFilename);
+    }
+
+    uint64_t getTopK() const noexcept
+    {
+        return mTopK;
+    }
+    void setTopK(uint64_t topK) noexcept
+    {
+        mTopK = topK;
+    } //!<  If this options is specified, return the K top probabilities.
+
+    float getFailurePercentage() const noexcept
+    {
+        return mFailurePercentage;
+    }
+
+    void setFailurePercentage(float f) noexcept
+    {
+        mFailurePercentage = f;
+    }
+
+    float getAbsoluteTolerance() const noexcept
+    {
+        return mAbsTolerance;
+    }
+
+    void setAbsoluteTolerance(float a) noexcept
+    {
+        mAbsTolerance = a;
+    }
+
+    float getTolerance() const noexcept
+    {
+        return mTolerance;
+    }
+
+    void setTolerance(float t) noexcept
+    {
+        mTolerance = t;
+    }
+
+    const char* getTimingCacheFilename() const noexcept
+    {
+        return mTimingCacheFilename.c_str();
+    }
+
+    void setTimingCacheFileName(const char* timingCacheFilename) noexcept
+    {
+        mTimingCacheFilename = std::string(timingCacheFilename);
+    }
+
+    bool isDebug() const noexcept
+    {
+#if ONNX_DEBUG
+        return (std::getenv("ONNX_DEBUG") ? true : false);
+#else
+        return false;
+#endif
+    }
+}; // class SampleConfig
+
+#endif
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleDevice.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleDevice.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5eedfc1e28d9f2357c81041298e79ac363a1db13
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleDevice.cpp
@@ -0,0 +1,126 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "sampleDevice.h"
+
+#include <iomanip>
+
+namespace sample
+{
+
+// Construct GPU UUID string in the same format as nvidia-smi does.
+std::string getUuidString(cudaUUID_t uuid)
+{
+    constexpr int32_t kUUID_SIZE = sizeof(cudaUUID_t);
+    static_assert(kUUID_SIZE == 16, "Unexpected size for cudaUUID_t!");
+
+    std::ostringstream ss;
+    std::vector<int32_t> const splits = {0, 4, 6, 8, 10, kUUID_SIZE};
+
+    ss << "GPU" << std::hex << std::setfill('0');
+    for (int32_t splitIdx = 0; splitIdx < static_cast<int32_t>(splits.size()) - 1; ++splitIdx)
+    {
+        ss << "-";
+        for (int32_t byteIdx = splits[splitIdx]; byteIdx < splits[splitIdx + 1]; ++byteIdx)
+        {
+            ss << std::setw(2) << +static_cast<uint8_t>(uuid.bytes[byteIdx]);
+        }
+    }
+    return ss.str();
+}
+
+void setCudaDevice(int32_t device, std::ostream& os)
+{
+    os << "=== Device Information ===" << std::endl;
+
+    // Get the number of visible GPUs.
+    int32_t nbDevices{-1};
+    CHECK(cudaGetDeviceCount(&nbDevices));
+
+    if (nbDevices <= 0)
+    {
+        os << "Cannot find any available devices (GPUs)!" << std::endl;
+        exit(EXIT_FAILURE);
+    }
+
+    // Print out the GPU name and PCIe bus ID of each GPU.
+    os << "Available Devices: " << std::endl;
+    cudaDeviceProp properties;
+    for (int32_t deviceIdx = 0; deviceIdx < nbDevices; ++deviceIdx)
+    {
+        cudaDeviceProp tempProperties;
+        CHECK(cudaGetDeviceProperties(&tempProperties, deviceIdx));
+
+        // clang-format off
+        os << "  Device " << deviceIdx << ": \"" << tempProperties.name << "\" UUID: "
+           << getUuidString(tempProperties.uuid) << std::endl;
+        // clang-format on
+
+        // Record the properties of the desired GPU.
+        if (deviceIdx == device)
+        {
+            properties = tempProperties;
+        }
+    }
+
+    // Exit with error if the requested device ID does not exist.
+    if (device < 0 || device >= nbDevices)
+    {
+        os << "Cannot find device ID " << device << "!" << std::endl;
+        exit(EXIT_FAILURE);
+    }
+
+    // Set to the corresponding GPU.
+    CHECK(cudaSetDevice(device));
+
+    // clang-format off
+    os << "Selected Device: "      << properties.name                                               << std::endl;
+    os << "Selected Device ID: "   << device                                                        << std::endl;
+    os << "Selected Device UUID: " << getUuidString(properties.uuid)                                << std::endl;
+    os << "Compute Capability: "   << properties.major << "." << properties.minor                   << std::endl;
+    os << "SMs: "                  << properties.multiProcessorCount                                << std::endl;
+    os << "Device Global Memory: " << (properties.totalGlobalMem >> 20) << " MiB"                   << std::endl;
+    os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB"       << std::endl;
+    os << "Memory Bus Width: "     << properties.memoryBusWidth << " bits"
+                        << " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl;
+    int32_t clockRate = 0;
+    int32_t memoryClockRate = 0;
+    CHECK(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, device));
+    CHECK(cudaDeviceGetAttribute(&memoryClockRate, cudaDevAttrMemoryClockRate, device));
+    os << "Application Compute Clock Rate: "   << clockRate / 1000000.0F << " GHz"       << std::endl;
+    os << "Application Memory Clock Rate: "    << memoryClockRate / 1000000.0F << " GHz" << std::endl;
+    os << std::endl;
+    os << "Note: The application clock rates do not reflect the actual clock rates that the GPU is "
+                                                                         << "currently running at." << std::endl;
+    // clang-format on
+}
+
+int32_t getCudaDriverVersion()
+{
+    int32_t version{-1};
+    CHECK(cudaDriverGetVersion(&version));
+    return version;
+}
+
+int32_t getCudaRuntimeVersion()
+{
+    int32_t version{-1};
+    CHECK(cudaRuntimeGetVersion(&version));
+    return version;
+}
+
+} // namespace sample
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleDevice.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleDevice.h
new file mode 100644
index 0000000000000000000000000000000000000000..6a5000bdc946f4a0d0b5cd21f2b33ee262c9e86b
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleDevice.h
@@ -0,0 +1,591 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TRT_SAMPLE_DEVICE_H
+#define TRT_SAMPLE_DEVICE_H
+
+#include <cassert>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <iostream>
+#include <thread>
+
+#include "common.h"
+#include "sampleUtils.h"
+
+namespace sample
+{
+
+class TrtCudaEvent;
+
+namespace
+{
+
+void cudaSleep(void* sleep)
+{
+    std::this_thread::sleep_for(std::chrono::duration<float, std::milli>(*static_cast<float*>(sleep)));
+}
+
+} // namespace
+
+//!
+//! \class TrtCudaStream
+//! \brief Managed CUDA stream
+//!
+class TrtCudaStream
+{
+public:
+    TrtCudaStream()
+    {
+        CHECK(cudaStreamCreate(&mStream));
+    }
+
+    TrtCudaStream(const TrtCudaStream&) = delete;
+
+    TrtCudaStream& operator=(const TrtCudaStream&) = delete;
+
+    TrtCudaStream(TrtCudaStream&&) = delete;
+
+    TrtCudaStream& operator=(TrtCudaStream&&) = delete;
+
+    ~TrtCudaStream()
+    {
+        CHECK(cudaStreamDestroy(mStream));
+    }
+
+    cudaStream_t get() const
+    {
+        return mStream;
+    }
+
+    void synchronize()
+    {
+        CHECK(cudaStreamSynchronize(mStream));
+    }
+
+    void wait(TrtCudaEvent& event);
+
+    void sleep(float* ms)
+    {
+        CHECK(cudaLaunchHostFunc(mStream, cudaSleep, ms));
+    }
+
+private:
+    cudaStream_t mStream{};
+};
+
+//!
+//! \class TrtCudaEvent
+//! \brief Managed CUDA event
+//!
+class TrtCudaEvent
+{
+public:
+    explicit TrtCudaEvent(bool blocking = true)
+    {
+        const uint32_t flags = blocking ? cudaEventBlockingSync : cudaEventDefault;
+        CHECK(cudaEventCreateWithFlags(&mEvent, flags));
+    }
+
+    TrtCudaEvent(const TrtCudaEvent&) = delete;
+
+    TrtCudaEvent& operator=(const TrtCudaEvent&) = delete;
+
+    TrtCudaEvent(TrtCudaEvent&&) = delete;
+
+    TrtCudaEvent& operator=(TrtCudaEvent&&) = delete;
+
+    ~TrtCudaEvent()
+    {
+        CHECK(cudaEventDestroy(mEvent));
+    }
+
+    cudaEvent_t get() const
+    {
+        return mEvent;
+    }
+
+    void record(const TrtCudaStream& stream)
+    {
+        CHECK(cudaEventRecord(mEvent, stream.get()));
+    }
+
+    void synchronize()
+    {
+        CHECK(cudaEventSynchronize(mEvent));
+    }
+
+    // Returns time elapsed time in milliseconds
+    float operator-(const TrtCudaEvent& e) const
+    {
+        float time{0};
+        CHECK(cudaEventElapsedTime(&time, e.get(), get()));
+        return time;
+    }
+
+private:
+    cudaEvent_t mEvent{};
+};
+
+inline void TrtCudaStream::wait(TrtCudaEvent& event)
+{
+    CHECK(cudaStreamWaitEvent(mStream, event.get(), 0));
+}
+
+//!
+//! \class TrtCudaGraph
+//! \brief Managed CUDA graph
+//!
+class TrtCudaGraph
+{
+public:
+    explicit TrtCudaGraph() = default;
+
+    TrtCudaGraph(const TrtCudaGraph&) = delete;
+
+    TrtCudaGraph& operator=(const TrtCudaGraph&) = delete;
+
+    TrtCudaGraph(TrtCudaGraph&&) = delete;
+
+    TrtCudaGraph& operator=(TrtCudaGraph&&) = delete;
+
+    ~TrtCudaGraph()
+    {
+        if (mGraphExec)
+        {
+            cudaGraphExecDestroy(mGraphExec);
+        }
+    }
+
+    void beginCapture(TrtCudaStream& stream)
+    {
+        CHECK(cudaStreamBeginCapture(stream.get(), cudaStreamCaptureModeThreadLocal));
+    }
+
+    bool launch(TrtCudaStream& stream)
+    {
+        return cudaGraphLaunch(mGraphExec, stream.get()) == cudaSuccess;
+    }
+
+    void endCapture(TrtCudaStream& stream)
+    {
+        CHECK(cudaStreamEndCapture(stream.get(), &mGraph));
+        CHECK(cudaGraphInstantiate(&mGraphExec, mGraph, nullptr, nullptr, 0));
+        CHECK(cudaGraphDestroy(mGraph));
+    }
+
+    void endCaptureOnError(TrtCudaStream& stream)
+    {
+        // There are two possibilities why stream capture would fail:
+        // (1) stream is in cudaErrorStreamCaptureInvalidated state.
+        // (2) TRT reports a failure.
+        // In case (1), the returning mGraph should be nullptr.
+        // In case (2), the returning mGraph is not nullptr, but it should not be used.
+        const auto ret = cudaStreamEndCapture(stream.get(), &mGraph);
+        if (ret == cudaErrorStreamCaptureInvalidated)
+        {
+            assert(mGraph == nullptr);
+        }
+        else
+        {
+            CHECK(ret);
+            assert(mGraph != nullptr);
+            CHECK(cudaGraphDestroy(mGraph));
+            mGraph = nullptr;
+        }
+        // Clean up any CUDA error.
+        cudaGetLastError();
+        sample::gLogWarning << "The CUDA graph capture on the stream has failed." << std::endl;
+    }
+
+private:
+    cudaGraph_t mGraph{};
+    cudaGraphExec_t mGraphExec{};
+};
+
+//!
+//! \class TrtCudaBuffer
+//! \brief Managed buffer for host and device
+//!
+template <typename A, typename D>
+class TrtCudaBuffer
+{
+public:
+    TrtCudaBuffer() = default;
+
+    TrtCudaBuffer(const TrtCudaBuffer&) = delete;
+
+    TrtCudaBuffer& operator=(const TrtCudaBuffer&) = delete;
+
+    TrtCudaBuffer(TrtCudaBuffer&& rhs)
+    {
+        reset(rhs.mPtr, rhs.mSize);
+        rhs.mPtr = nullptr;
+        rhs.mSize = 0;
+    }
+
+    TrtCudaBuffer& operator=(TrtCudaBuffer&& rhs)
+    {
+        if (this != &rhs)
+        {
+            reset(rhs.mPtr, rhs.mSize);
+            rhs.mPtr = nullptr;
+            rhs.mSize = 0;
+        }
+        return *this;
+    }
+
+    ~TrtCudaBuffer()
+    {
+        reset();
+    }
+
+    TrtCudaBuffer(size_t size)
+    {
+        A()(&mPtr, size);
+        mSize = size;
+    }
+
+    void allocate(size_t size)
+    {
+        reset();
+        A()(&mPtr, size);
+        mSize = size;
+    }
+
+    void reset(void* ptr = nullptr, size_t size = 0)
+    {
+        if (mPtr)
+        {
+            D()(mPtr);
+        }
+        mPtr = ptr;
+        mSize = size;
+    }
+
+    void* get() const
+    {
+        return mPtr;
+    }
+
+    size_t getSize() const
+    {
+        return mSize;
+    }
+
+private:
+    void* mPtr{nullptr};
+    size_t mSize{0};
+};
+
+struct DeviceAllocator
+{
+    void operator()(void** ptr, size_t size)
+    {
+        CHECK(cudaMalloc(ptr, size));
+    }
+};
+
+struct DeviceDeallocator
+{
+    void operator()(void* ptr)
+    {
+        CHECK(cudaFree(ptr));
+    }
+};
+
+struct ManagedAllocator
+{
+    void operator()(void** ptr, size_t size)
+    {
+        CHECK(cudaMallocManaged(ptr, size));
+    }
+};
+
+struct HostAllocator
+{
+    //! Attempts to allocate size bytes on host, pointing *ptr to the start.
+    //! First attempts to allocate pinned memory using cudaMallocHost(ptr, size), failing that, warns to gLogWarning and
+    //! falls back to ::operator new(size) to allocate pageable memory. If that still fails, an exception may be thrown.
+    void operator()(void** ptr, size_t size)
+    {
+        // Try allocating pinned host memory.
+        cudaError_t ret = cudaMallocHost(ptr, size);
+
+        // If we cannot allocate pinned host memory, allocate pageable host memory instead and print a warning.
+        if (ret != cudaSuccess)
+        {
+            // Clean up the last cuda error.
+            (void) cudaGetLastError();
+
+            sample::gLogWarning << "cudaMallocHost() call with ptr=" << ptr << " and size=" << size
+                                << " returns a cuda error: " << cudaGetErrorString(ret) << std::endl;
+            sample::gLogWarning << "Allocate pageable host memory instead of pinned host memory. H2D and D2H copy "
+                                   "latencies may become longer."
+                                << std::endl;
+            *ptr = ::operator new(size);
+
+            // Make sure there is no remaining cuda error at this point.
+            CHECK(cudaGetLastError());
+        }
+    }
+};
+
+struct HostDeallocator
+{
+    //! Attempts to deallocate the host memory allocated by HostAllocator.
+    //! It first checks if ptr is a pinned or pageable host memory. If pinned, call cudaFreeHost() to free it. If
+    //! pageable, call ::operator delete() to free it. If ptr is neither of them, an error is printed and the program
+    //! exits.
+    void operator()(void* ptr)
+    {
+        // Check if the host memory pointer is pinned or pageable.
+        cudaPointerAttributes attrs;
+        CHECK(cudaPointerGetAttributes(&attrs, ptr));
+
+        // If pinned, call cudaFreeHost() to deallocate it.
+        if (attrs.type == cudaMemoryTypeHost)
+        {
+            CHECK(cudaFreeHost(ptr));
+        }
+        // If pageable, delete it directly.
+        else if (attrs.type == cudaMemoryTypeUnregistered)
+        {
+            ::operator delete(ptr);
+        }
+        // The host memory pointer should not be of any other types.
+        else
+        {
+            sample::gLogError << "Unexpected cuda memory type:" << static_cast<int32_t>(attrs.type) << std::endl;
+            exit(EXIT_FAILURE);
+        }
+    }
+};
+
+using TrtDeviceBuffer = TrtCudaBuffer<DeviceAllocator, DeviceDeallocator>;
+using TrtManagedBuffer = TrtCudaBuffer<ManagedAllocator, DeviceDeallocator>;
+
+using TrtHostBuffer = TrtCudaBuffer<HostAllocator, HostDeallocator>;
+
+//!
+//! \class MirroredBuffer
+//! \brief Coupled host and device buffers
+//!
+class IMirroredBuffer
+{
+public:
+    //!
+    //! Allocate memory for the mirrored buffer give the size
+    //! of the allocation.
+    //!
+    virtual void allocate(size_t size) = 0;
+
+    //!
+    //! Get the pointer to the device side buffer.
+    //!
+    //! \return pointer to device memory or nullptr if uninitialized.
+    //!
+    virtual void* getDeviceBuffer() const = 0;
+
+    //!
+    //! Get the pointer to the host side buffer.
+    //!
+    //! \return pointer to host memory or nullptr if uninitialized.
+    //!
+    virtual void* getHostBuffer() const = 0;
+
+    //!
+    //! Copy the memory from host to device.
+    //!
+    virtual void hostToDevice(TrtCudaStream& stream) = 0;
+
+    //!
+    //! Copy the memory from device to host.
+    //!
+    virtual void deviceToHost(TrtCudaStream& stream) = 0;
+
+    //!
+    //! Interface to get the size of the memory
+    //!
+    //! \return the size of memory allocated.
+    //!
+    virtual size_t getSize() const = 0;
+
+    //!
+    //! Virtual destructor declaraion
+    //!
+    virtual ~IMirroredBuffer() = default;
+
+}; // class IMirroredBuffer
+
+//!
+//! Class to have a separate memory buffer for discrete device and host allocations.
+//!
+class DiscreteMirroredBuffer : public IMirroredBuffer
+{
+public:
+    void allocate(size_t size) override
+    {
+        mSize = size;
+        mHostBuffer.allocate(size);
+        mDeviceBuffer.allocate(size);
+    }
+
+    void* getDeviceBuffer() const override
+    {
+        return mDeviceBuffer.get();
+    }
+
+    void* getHostBuffer() const override
+    {
+        return mHostBuffer.get();
+    }
+
+    void hostToDevice(TrtCudaStream& stream) override
+    {
+        CHECK(cudaMemcpyAsync(mDeviceBuffer.get(), mHostBuffer.get(), mSize, cudaMemcpyHostToDevice, stream.get()));
+    }
+
+    void deviceToHost(TrtCudaStream& stream) override
+    {
+        CHECK(cudaMemcpyAsync(mHostBuffer.get(), mDeviceBuffer.get(), mSize, cudaMemcpyDeviceToHost, stream.get()));
+    }
+
+    size_t getSize() const override
+    {
+        return mSize;
+    }
+
+private:
+    size_t mSize{0};
+    TrtHostBuffer mHostBuffer;
+    TrtDeviceBuffer mDeviceBuffer;
+}; // class DiscreteMirroredBuffer
+
+//!
+//! Class to have a unified memory buffer for embedded devices.
+//!
+class UnifiedMirroredBuffer : public IMirroredBuffer
+{
+public:
+    void allocate(size_t size) override
+    {
+        mSize = size;
+        mBuffer.allocate(size);
+    }
+
+    void* getDeviceBuffer() const override
+    {
+        return mBuffer.get();
+    }
+
+    void* getHostBuffer() const override
+    {
+        return mBuffer.get();
+    }
+
+    void hostToDevice(TrtCudaStream& stream) override
+    {
+        // Does nothing since we are using unified memory.
+    }
+
+    void deviceToHost(TrtCudaStream& stream) override
+    {
+        // Does nothing since we are using unified memory.
+    }
+
+    size_t getSize() const override
+    {
+        return mSize;
+    }
+
+private:
+    size_t mSize{0};
+    TrtManagedBuffer mBuffer;
+}; // class UnifiedMirroredBuffer
+
+//!
+//! Class to allocate memory for outputs with data-dependent shapes. The sizes of those are unknown so pre-allocation is
+//! not possible.
+//!
+class OutputAllocator : public nvinfer1::IOutputAllocator
+{
+public:
+    //! Construct, using buffer as the backing storage:
+    explicit OutputAllocator(std::unique_ptr<IMirroredBuffer> buffer)
+        : mBuffer{std::move(buffer)}
+    {
+        ASSERT(mBuffer);
+    }
+
+    ~OutputAllocator() override = default;
+
+    void* reallocateOutput(
+        char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept override
+    {
+        // Some memory allocators return nullptr when allocating zero bytes, but TensorRT requires a non-null ptr
+        // even for empty tensors, so allocate a dummy byte.
+        size = std::max(size, static_cast<uint64_t>(1));
+        if (size > mSize)
+        {
+            mBuffer->allocate(roundUp(size, alignment));
+            mSize = size;
+        }
+        return mBuffer->getDeviceBuffer();
+    }
+
+    //! IMirroredBuffer does not implement Async allocation, hence this is just a wrap around
+    void* reallocateOutputAsync(char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment,
+        cudaStream_t /*stream*/) noexcept override
+    {
+        return reallocateOutput(tensorName, currentMemory, size, alignment);
+    }
+
+    void notifyShape(char const* tensorName, nvinfer1::Dims const& dims) noexcept override
+    {
+        mFinalDims = dims;
+    }
+
+    IMirroredBuffer* getBuffer()
+    {
+        return mBuffer.get();
+    }
+
+    nvinfer1::Dims getFinalDims()
+    {
+        return mFinalDims;
+    }
+
+private:
+    std::unique_ptr<IMirroredBuffer> mBuffer;
+    uint64_t mSize{};
+    nvinfer1::Dims mFinalDims;
+};
+
+//! Set the GPU to run the inference on.
+void setCudaDevice(int32_t device, std::ostream& os);
+
+//! Get the CUDA version of the current CUDA driver.
+int32_t getCudaDriverVersion();
+
+//! Get the CUDA version of the current CUDA runtime.
+int32_t getCudaRuntimeVersion();
+
+
+} // namespace sample
+
+#endif // TRT_SAMPLE_DEVICE_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleEngines.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleEngines.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ce1e4a5f6c4b930dde33427be942ae26b801bb13
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleEngines.cpp
@@ -0,0 +1,1921 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <chrono>
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <map>
+#include <random>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "NvInfer.h"
+#include "NvOnnxParser.h"
+
+#include "ErrorRecorder.h"
+#include "common.h"
+#include "logger.h"
+#include "sampleDevice.h"
+#include "sampleEngines.h"
+#include "sampleOptions.h"
+#include "sampleUtils.h"
+
+#if ENABLE_UNIFIED_BUILDER
+#include "NvInferConsistency.h"
+#include "safeErrorRecorder.h"
+#endif
+
+using namespace nvinfer1;
+
+namespace sample
+{
+
+namespace
+{
+class FileStreamWriter final : public nvinfer1::IStreamWriter
+{
+protected:
+    std::ofstream mStream;
+    int64_t mTotalWrittenSize;
+
+public:
+    FileStreamWriter(std::string const& path)
+        : mStream(path, std::ios::binary)
+        , mTotalWrittenSize(0)
+    {
+    }
+
+    virtual int64_t write(void const* data, int64_t nbBytes) final
+    {
+        SMP_RETVAL_IF_FALSE(
+            (mStream.is_open() && mStream.good()), "Cannot write to FileStreamWriter", -1, sample::gLogError);
+        auto const* src = reinterpret_cast<char const*>(data);
+        mStream.write(src, nbBytes);
+        mTotalWrittenSize += nbBytes;
+        return nbBytes;
+    }
+
+    int64_t finalize()
+    {
+        mStream.close();
+        return mTotalWrittenSize;
+    }
+};
+
+std::map<std::string, float> readScalesFromCalibrationCache(std::string const& calibrationFile)
+{
+    std::map<std::string, float> tensorScales;
+    std::ifstream cache{calibrationFile};
+    if (!cache.is_open())
+    {
+        sample::gLogError << "[TRT] Can not open provided calibration cache file" << std::endl;
+        return tensorScales;
+    }
+    std::string line;
+    while (std::getline(cache, line))
+    {
+        auto colonPos = line.find_last_of(':');
+        if (colonPos != std::string::npos)
+        {
+            // Scales should be stored in calibration cache as 32-bit floating numbers encoded as 32-bit integers
+            int32_t scalesAsInt = std::stoi(line.substr(colonPos + 2, 8), nullptr, 16);
+            auto const tensorName = line.substr(0, colonPos);
+            tensorScales[tensorName] = *reinterpret_cast<float*>(&scalesAsInt);
+        }
+    }
+    cache.close();
+    return tensorScales;
+}
+} // namespace
+
+nvinfer1::ICudaEngine* LazilyDeserializedEngine::get()
+{
+    SMP_RETVAL_IF_FALSE(
+        !mIsSafe, "Safe mode is enabled, but trying to get standard engine!", nullptr, sample::gLogError);
+
+    if (mEngine == nullptr)
+    {
+        SMP_RETVAL_IF_FALSE(getAsyncFileReader().isOpen() || getFileReader().isOpen() || !getBlob().empty(),
+            "Engine is empty. Nothing to deserialize!", nullptr, sample::gLogError);
+        using time_point = std::chrono::time_point<std::chrono::high_resolution_clock>;
+        using duration = std::chrono::duration<float>;
+        time_point const deserializeStartTime{std::chrono::high_resolution_clock::now()};
+
+        if (mLeanDLLPath.empty())
+        {
+            mRuntime.reset(createRuntime());
+        }
+        else
+        {
+            mParentRuntime.reset(createRuntime());
+            ASSERT(mParentRuntime != nullptr);
+
+            mRuntime.reset(mParentRuntime->loadRuntime(mLeanDLLPath.c_str()));
+        }
+        ASSERT(mRuntime != nullptr);
+        if (mVersionCompatible)
+        {
+            // Application needs to opt into allowing deserialization of engines with embedded lean runtime.
+            mRuntime->setEngineHostCodeAllowed(true);
+        }
+
+        if (!mTempdir.empty())
+        {
+            mRuntime->setTemporaryDirectory(mTempdir.c_str());
+        }
+
+        mRuntime->setTempfileControlFlags(mTempfileControls);
+        SMP_RETVAL_IF_FALSE(mRuntime != nullptr, "runtime creation failed", nullptr, sample::gLogError);
+        if (mDLACore != -1)
+        {
+            mRuntime->setDLACore(mDLACore);
+        }
+        mRuntime->setErrorRecorder(&gRecorder);
+        for (auto const& pluginPath : mDynamicPlugins)
+        {
+            mRuntime->getPluginRegistry().loadLibrary(pluginPath.c_str());
+        }
+
+        if (getAsyncFileReader().isOpen())
+        {
+            mEngine.reset(mRuntime->deserializeCudaEngine(getAsyncFileReader()));
+        }
+        else if (getFileReader().isOpen())
+        {
+            mEngine.reset(mRuntime->deserializeCudaEngine(getFileReader()));
+        }
+        else
+        {
+            auto const& engineBlob = getBlob();
+            mEngine.reset(mRuntime->deserializeCudaEngine(engineBlob.data, engineBlob.size));
+        }
+        SMP_RETVAL_IF_FALSE(mEngine != nullptr, "Engine deserialization failed", nullptr, sample::gLogError);
+
+        time_point const deserializeEndTime{std::chrono::high_resolution_clock::now()};
+        sample::gLogInfo << "Engine deserialized in " << duration(deserializeEndTime - deserializeStartTime).count()
+                         << " sec." << std::endl;
+    }
+
+    return mEngine.get();
+}
+
+nvinfer1::ICudaEngine* LazilyDeserializedEngine::release()
+{
+    return mEngine.release();
+}
+
+bool LazilyDeserializedEngine::checkDLASafe()
+{
+    ASSERT(sample::hasSafeRuntime());
+
+    SMP_RETVAL_IF_FALSE(mDLACore == -1, "Safe DLA engine built with kDLA_STANDALONE should not be run via TRT!", false,
+        sample::gLogError);
+
+    return true;
+}
+
+void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, std::vector<IOFormat> const& inputFormats,
+    std::vector<IOFormat> const& outputFormats, std::string const& calibrationFile)
+{
+    auto const tensorScales = readScalesFromCalibrationCache(calibrationFile);
+    bool const broadcastInputFormats = broadcastIOFormats(inputFormats, network.getNbInputs());
+    for (int32_t i = 0, n = network.getNbInputs(); i < n; ++i)
+    {
+        int32_t formatIdx = broadcastInputFormats ? 0 : i;
+        if (!inputFormats.empty() && inputFormats[formatIdx].first == DataType::kINT8)
+        {
+            auto* input = network.getInput(i);
+            auto const calibScale = tensorScales.at(input->getName());
+            input->setDynamicRange(-127 * calibScale, 127 * calibScale);
+        }
+    }
+    bool const broadcastOutputFormats = broadcastIOFormats(outputFormats, network.getNbOutputs());
+    for (int32_t i = 0, n = network.getNbOutputs(); i < n; ++i)
+    {
+        int32_t formatIdx = broadcastOutputFormats ? 0 : i;
+        if (!outputFormats.empty() && outputFormats[formatIdx].first == DataType::kINT8)
+        {
+            auto* output = network.getOutput(i);
+            auto const calibScale = tensorScales.at(output->getName());
+            output->setDynamicRange(-127 * calibScale, 127 * calibScale);
+        }
+    }
+}
+
+//!
+//! \brief Generate a network definition for a given model
+//!
+//! \param[in] model Model options for this network
+//! \param[in,out] network Network storing the parsed results
+//! \param[in,out] err Error stream
+//! \param[out] vcPluginLibrariesUsed If not nullptr, will be populated with paths to VC plugin libraries required by
+//! the parsed network.
+//!
+//! \return Parser The parser used to initialize the network and that holds the weights for the network, or an invalid
+//! parser (the returned parser converts to false if tested)
+//!
+//! Constant input dimensions in the model must not be changed in the corresponding
+//! network definition, because its correctness may rely on the constants.
+//!
+//! \see Parser::operator bool()
+//!
+Parser modelToNetwork(ModelOptions const& model, BuildOptions const& build, nvinfer1::INetworkDefinition& network,
+    std::ostream& err, std::vector<std::string>* vcPluginLibrariesUsed)
+{
+    sample::gLogInfo << "Start parsing network model." << std::endl;
+    auto const tBegin = std::chrono::high_resolution_clock::now();
+
+    Parser parser;
+    switch (model.baseModel.format)
+    {
+    case ModelFormat::kONNX:
+    {
+        using namespace nvonnxparser;
+        parser.onnxParser.reset(createONNXParser(network));
+        ASSERT(parser.onnxParser != nullptr);
+        // kNATIVE_INSTANCENORM is ON by default in the parser and must be cleared to use the plugin implementation.
+        if (build.pluginInstanceNorm)
+        {
+            parser.onnxParser->clearFlag(OnnxParserFlag::kNATIVE_INSTANCENORM);
+        }
+        if (build.enableUInt8AsymmetricQuantizationDLA)
+        {
+            parser.onnxParser->setFlag(OnnxParserFlag::kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA);
+        }
+        if (!parser.onnxParser->parseFromFile(
+                model.baseModel.model.c_str(), static_cast<int>(sample::gLogger.getReportableSeverity())))
+        {
+            err << "Failed to parse onnx file" << std::endl;
+            parser.onnxParser.reset();
+        }
+        if (vcPluginLibrariesUsed && parser.onnxParser.get())
+        {
+            int64_t nbPluginLibs;
+            char const* const* pluginLibArray = parser.onnxParser->getUsedVCPluginLibraries(nbPluginLibs);
+            if (nbPluginLibs >= 0)
+            {
+                vcPluginLibrariesUsed->reserve(nbPluginLibs);
+                for (int64_t i = 0; i < nbPluginLibs; ++i)
+                {
+                    sample::gLogInfo << "Using VC plugin library " << pluginLibArray[i] << std::endl;
+                    vcPluginLibrariesUsed->emplace_back(std::string{pluginLibArray[i]});
+                }
+            }
+            else
+            {
+                sample::gLogWarning << "Failure to query VC plugin libraries required by parsed ONNX network"
+                                    << std::endl;
+            }
+        }
+        break;
+    }
+    case ModelFormat::kANY: break;
+    }
+
+    auto const tEnd = std::chrono::high_resolution_clock::now();
+    float const parseTime = std::chrono::duration<float>(tEnd - tBegin).count();
+
+    sample::gLogInfo << "Finished parsing network model. Parse time: " << parseTime << std::endl;
+    return parser;
+}
+
+namespace
+{
+
+class RndInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator2
+{
+public:
+    RndInt8Calibrator(int32_t batches, std::vector<int64_t>& elemCount, std::string const& cacheFile,
+        nvinfer1::INetworkDefinition const& network, std::ostream& err);
+
+    ~RndInt8Calibrator() override
+    {
+        for (auto& elem : mInputDeviceBuffers)
+        {
+            CHECK_WITH_STREAM(cudaFree(elem.second), mErr);
+        }
+    }
+
+    bool getBatch(void* bindings[], char const* names[], int32_t nbBindings) noexcept override;
+
+    int32_t getBatchSize() const noexcept override
+    {
+        return 1;
+    }
+
+    void const* readCalibrationCache(size_t& length) noexcept override;
+
+    void writeCalibrationCache(void const*, size_t) noexcept override {}
+
+private:
+    int32_t mBatches{};
+    int32_t mCurrentBatch{};
+    std::string mCacheFile;
+    std::map<std::string, void*> mInputDeviceBuffers;
+    std::vector<char> mCalibrationCache;
+    std::ostream& mErr;
+};
+
+RndInt8Calibrator::RndInt8Calibrator(int32_t batches, std::vector<int64_t>& elemCount, std::string const& cacheFile,
+    INetworkDefinition const& network, std::ostream& err)
+    : mBatches(batches)
+    , mCurrentBatch(0)
+    , mCacheFile(cacheFile)
+    , mErr(err)
+{
+    std::ifstream tryCache(cacheFile, std::ios::binary);
+    if (tryCache.good())
+    {
+        return;
+    }
+
+    std::default_random_engine generator;
+    std::uniform_real_distribution<float> distribution(-1.0F, 1.0F);
+    auto gen = [&generator, &distribution]() { return distribution(generator); };
+
+    for (int32_t i = 0; i < network.getNbInputs(); i++)
+    {
+        auto* input = network.getInput(i);
+        std::vector<float> rnd_data(elemCount[i]);
+        std::generate_n(rnd_data.begin(), elemCount[i], gen);
+
+        void* data;
+        CHECK_WITH_STREAM(cudaMalloc(&data, elemCount[i] * sizeof(float)), mErr);
+        CHECK_WITH_STREAM(
+            cudaMemcpy(data, rnd_data.data(), elemCount[i] * sizeof(float), cudaMemcpyHostToDevice), mErr);
+
+        mInputDeviceBuffers.insert(std::make_pair(input->getName(), data));
+    }
+}
+
+bool RndInt8Calibrator::getBatch(void* bindings[], char const* names[], int32_t nbBindings) noexcept
+{
+    if (mCurrentBatch >= mBatches)
+    {
+        return false;
+    }
+
+    for (int32_t i = 0; i < nbBindings; ++i)
+    {
+        bindings[i] = mInputDeviceBuffers[names[i]];
+    }
+
+    ++mCurrentBatch;
+
+    return true;
+}
+
+void const* RndInt8Calibrator::readCalibrationCache(size_t& length) noexcept
+{
+    mCalibrationCache.clear();
+    std::ifstream input(mCacheFile, std::ios::binary);
+    input >> std::noskipws;
+    if (input.good())
+    {
+        std::copy(
+            std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(mCalibrationCache));
+    }
+
+    length = mCalibrationCache.size();
+    return !mCalibrationCache.empty() ? mCalibrationCache.data() : nullptr;
+}
+
+bool setTensorDynamicRange(INetworkDefinition const& network, float inRange = 2.0F, float outRange = 4.0F)
+{
+    // Ensure that all layer inputs have a dynamic range.
+    for (int32_t l = 0; l < network.getNbLayers(); l++)
+    {
+        auto* layer = network.getLayer(l);
+        for (int32_t i = 0; i < layer->getNbInputs(); i++)
+        {
+            ITensor* input{layer->getInput(i)};
+            // Optional inputs are nullptr here and are from RNN layers.
+            if (input && !input->dynamicRangeIsSet())
+            {
+                // Concat should propagate dynamic range from outputs to inputs to avoid
+                // Re-quantization during the concatenation
+                auto dynRange = (layer->getType() == LayerType::kCONCATENATION) ? outRange : inRange;
+                if (!input->setDynamicRange(-dynRange, dynRange))
+                {
+                    return false;
+                }}
+        }
+        for (int32_t o = 0; o < layer->getNbOutputs(); o++)
+        {
+            ITensor* output{layer->getOutput(o)};
+            // Optional outputs are nullptr here and are from RNN layers.
+            if (output && !output->dynamicRangeIsSet())
+            {
+                // Pooling must have the same input and output dynamic range.
+                if (layer->getType() == LayerType::kPOOLING)
+                {
+                    if (!output->setDynamicRange(-inRange, inRange))
+                    {
+                        return false;
+                    }
+                }
+                else
+                {
+                    if (!output->setDynamicRange(-outRange, outRange))
+                    {
+                        return false;
+                    }
+                }
+            }
+        }
+    }
+    return true;
+}
+
+bool isNonActivationType(nvinfer1::DataType const type)
+{
+    return type == nvinfer1::DataType::kINT32 || type == nvinfer1::DataType::kINT64 || type == nvinfer1::DataType::kBOOL
+        || type == nvinfer1::DataType::kUINT8;
+}
+void setLayerPrecisions(INetworkDefinition& network, LayerPrecisions const& layerPrecisions)
+{
+    bool hasLayerPrecisionSkipped{false};
+    for (int32_t layerIdx = 0; layerIdx < network.getNbLayers(); ++layerIdx)
+    {
+        auto* layer = network.getLayer(layerIdx);
+        auto const layerName = layer->getName();
+        auto exactMatch = layerPrecisions.find(layerName);
+        auto plausibleMatch = findPlausible(layerPrecisions, layerName);
+        if (exactMatch != layerPrecisions.end())
+        {
+            sample::gLogInfo << "Set layer " << layerName << " to precision " << exactMatch->second << std::endl;
+            layer->setPrecision(exactMatch->second);
+        }
+        else if (plausibleMatch != layerPrecisions.end())
+        {
+            if (isNonActivationType(layer->getPrecision()))
+            {
+                hasLayerPrecisionSkipped = true;
+                sample::gLogVerbose << "Skipped setting precision for layer " << layerName << " because the "
+                                    << " default layer precision is of non-activation type." << std::endl;
+                continue;
+            }
+            if (layer->getType() == nvinfer1::LayerType::kCONSTANT
+                && (isNonActivationType(static_cast<IConstantLayer*>(layer)->getWeights().type)))
+            {
+                hasLayerPrecisionSkipped = true;
+                sample::gLogVerbose << "Skipped setting precision for layer " << layerName << " because this "
+                                    << "constant layer has weights of non-activation type." << std::endl;
+                continue;
+            }
+            if (layer->getNbInputs() >= 1 && layer->getInput(0)->isShapeTensor())
+            {
+                hasLayerPrecisionSkipped = true;
+                sample::gLogVerbose << "Skipped setting precision for layer " << layerName << " because this layer "
+                                    << "operates on a shape tensor." << std::endl;
+                continue;
+            }
+            if (layer->getNbInputs() >= 1 && isNonActivationType(layer->getInput(0)->getType())
+                && layer->getNbOutputs() >= 1 && isNonActivationType(layer->getOutput(0)->getType()))
+            {
+                hasLayerPrecisionSkipped = true;
+                sample::gLogVerbose << "Skipped setting precision for layer " << layerName << " because this "
+                                    << "layer has input and output of non-activation type." << std::endl;
+                continue;
+            }
+            // All heuristics passed. Set the layer precision.
+            sample::gLogInfo << "Set layer " << layerName << " to precision " << plausibleMatch->second << std::endl;
+            layer->setPrecision(plausibleMatch->second);
+        }
+    }
+
+    if (hasLayerPrecisionSkipped)
+    {
+        sample::gLogInfo << "Skipped setting precisions for some layers. Check verbose logs for more details."
+                         << std::endl;
+    }
+}
+
+void setLayerOutputTypes(INetworkDefinition& network, LayerOutputTypes const& layerOutputTypes)
+{
+    bool const hasGlobalOutputType{layerOutputTypes.find("*") != layerOutputTypes.end()};
+    auto const globalOutputType = hasGlobalOutputType ? layerOutputTypes.at("*").at(0) : nvinfer1::DataType::kFLOAT;
+    bool hasLayerOutputTypeSkipped{false};
+    for (int32_t layerIdx = 0; layerIdx < network.getNbLayers(); ++layerIdx)
+    {
+        auto* layer = network.getLayer(layerIdx);
+        auto const layerName = layer->getName();
+        auto const nbOutputs = layer->getNbOutputs();
+        auto exactMatch = layerOutputTypes.find(layerName);
+        auto plausibleMatch = findPlausible(layerOutputTypes, layerName);
+        if (exactMatch != layerOutputTypes.end())
+        {
+            auto const& outputTypes = exactMatch->second;
+            bool const isBroadcast = (outputTypes.size() == 1);
+            if (!isBroadcast && static_cast<int32_t>(outputTypes.size()) != nbOutputs)
+            {
+                sample::gLogError << "Layer " << layerName << " has " << nbOutputs << " outputs but "
+                                  << outputTypes.size() << " output types are given in --layerOutputTypes flag."
+                                  << std::endl;
+                throw std::invalid_argument("Invalid --layerOutputTypes flag.");
+            }
+            for (int32_t outputIdx = 0; outputIdx < nbOutputs; ++outputIdx)
+            {
+                auto const outputType = outputTypes.at(isBroadcast ? 0 : outputIdx);
+                sample::gLogInfo << "Set output " << outputIdx << " of layer " << layerName << " to type " << outputType
+                                 << std::endl;
+                layer->setOutputType(outputIdx, outputType);
+            }
+        }
+        else if (plausibleMatch != layerOutputTypes.end())
+        {
+            auto const& outputTypes = plausibleMatch->second;
+            bool const isBroadcast = (outputTypes.size() == 1);
+
+            // We should not set the layer output types if its default precision is INT32 or Bool.
+            if (layer->getPrecision() == nvinfer1::DataType::kINT32
+                || layer->getPrecision() == nvinfer1::DataType::kBOOL)
+            {
+                hasLayerOutputTypeSkipped = true;
+                sample::gLogVerbose << "Skipped setting output types for layer " << layerName << " because the "
+                                    << " default layer precision is INT32 or Bool." << std::endl;
+                continue;
+            }
+            // We should not set the constant layer output types if its weights are in INT32.
+            if (layer->getType() == nvinfer1::LayerType::kCONSTANT
+                && static_cast<IConstantLayer*>(layer)->getWeights().type == nvinfer1::DataType::kINT32)
+            {
+                hasLayerOutputTypeSkipped = true;
+                sample::gLogVerbose << "Skipped setting output types for layer " << layerName << " because this "
+                                    << "constant layer has INT32 weights." << std::endl;
+                continue;
+            }
+            for (int32_t outputIdx = 0; outputIdx < nbOutputs; ++outputIdx)
+            {
+                // We should not set the output type if the output is a shape tensor.
+                if (layer->getOutput(0)->isShapeTensor())
+                {
+                    hasLayerOutputTypeSkipped = true;
+                    sample::gLogVerbose << "Skipped setting output type for output " << outputIdx << " of layer "
+                                        << layerName << " because it is a shape tensor." << std::endl;
+                    continue;
+                }
+
+                auto const outputType = outputTypes.at(isBroadcast ? 0 : outputIdx);
+                sample::gLogInfo << "Set output " << outputIdx << " of layer " << layerName << " to type " << outputType
+                                 << std::endl;
+                layer->setOutputType(outputIdx, globalOutputType);
+            }
+        }
+    }
+
+    if (hasLayerOutputTypeSkipped)
+    {
+        sample::gLogInfo << "Skipped setting output types for some layers. Check verbose logs for more details."
+                         << std::endl;
+    }
+}
+
+void setLayerDeviceTypes(
+    INetworkDefinition const& network, IBuilderConfig& config, LayerDeviceTypes const& layerDeviceTypes)
+{
+    for (int32_t layerIdx = 0; layerIdx < network.getNbLayers(); ++layerIdx)
+    {
+        auto* layer = network.getLayer(layerIdx);
+        auto const layerName = layer->getName();
+        auto match = findPlausible(layerDeviceTypes, layerName);
+        if (match != layerDeviceTypes.end())
+        {
+            DeviceType const deviceType = match->second;
+            sample::gLogInfo << "Set layer " << layerName << " to device type " << deviceType << std::endl;
+            config.setDeviceType(layer, deviceType);
+        }
+    }
+}
+
+void markDebugTensors(INetworkDefinition& network, StringSet const& debugTensors)
+{
+    for (int64_t inputIndex = 0; inputIndex < network.getNbInputs(); ++inputIndex)
+    {
+        auto* t = network.getInput(inputIndex);
+        auto const tensorName = t->getName();
+        if (debugTensors.count(tensorName) > 0)
+        {
+            network.markDebug(*t);
+        }
+    }
+    for (int64_t layerIndex = 0; layerIndex < network.getNbLayers(); ++layerIndex)
+    {
+        auto* layer = network.getLayer(layerIndex);
+        for (int64_t outputIndex = 0; outputIndex < layer->getNbOutputs(); ++outputIndex)
+        {
+            auto* t = layer->getOutput(outputIndex);
+            auto const tensorName = t->getName();
+            if (debugTensors.count(tensorName) > 0)
+            {
+                network.markDebug(*t);
+            }
+        }
+    }
+}
+void setMemoryPoolLimits(IBuilderConfig& config, BuildOptions const& build)
+{
+    auto const roundToBytes = [](double const size, bool fromMB = true) {
+        return static_cast<size_t>(size * (fromMB ? 1.0_MiB : 1.0_KiB));
+    };
+    if (build.workspace >= 0)
+    {
+        config.setMemoryPoolLimit(MemoryPoolType::kWORKSPACE, roundToBytes(build.workspace));
+    }
+    if (build.dlaSRAM >= 0)
+    {
+        size_t const sizeInBytes = roundToBytes(build.dlaSRAM);
+        size_t sizeInPowerOf2{1};
+        // Using 2^30 bytes as a loose upper bound to prevent the possibility of overflows and infinite loops.
+        while (sizeInPowerOf2 < 31 && (static_cast<size_t>(1) << sizeInPowerOf2) <= sizeInBytes)
+        {
+            ++sizeInPowerOf2;
+        }
+        --sizeInPowerOf2;
+        if (sizeInPowerOf2 == 30)
+        {
+            sample::gLogWarning
+                << "User-specified DLA managed SRAM size is too large and has been clipped to 2^30 bytes. "
+                << "Please make sure that this is the intended managed SRAM size." << std::endl;
+        }
+        config.setMemoryPoolLimit(MemoryPoolType::kDLA_MANAGED_SRAM, static_cast<size_t>(1) << sizeInPowerOf2);
+    }
+    if (build.dlaLocalDRAM >= 0)
+    {
+        config.setMemoryPoolLimit(MemoryPoolType::kDLA_LOCAL_DRAM, roundToBytes(build.dlaLocalDRAM));
+    }
+    if (build.dlaGlobalDRAM >= 0)
+    {
+        config.setMemoryPoolLimit(MemoryPoolType::kDLA_GLOBAL_DRAM, roundToBytes(build.dlaGlobalDRAM));
+    }
+    if (build.tacticSharedMem >= 0)
+    {
+        config.setMemoryPoolLimit(MemoryPoolType::kTACTIC_SHARED_MEMORY, roundToBytes(build.tacticSharedMem, false));
+    }
+}
+
+void setPreviewFeatures(IBuilderConfig& config, BuildOptions const& build)
+{
+    auto const setFlag = [&](PreviewFeature feat) {
+        int32_t featVal = static_cast<int32_t>(feat);
+        if (build.previewFeatures.find(featVal) != build.previewFeatures.end())
+        {
+            config.setPreviewFeature(feat, build.previewFeatures.at(featVal));
+        }
+    };
+    setFlag(PreviewFeature::kALIASED_PLUGIN_IO_10_03);
+    setFlag(PreviewFeature::kRUNTIME_ACTIVATION_RESIZE_10_10);
+}
+
+[[nodiscard]] bool setupTilingSettings(BuildOptions const& build, IBuilderConfig& config, std::ostream& err)
+{
+    if (!config.setTilingOptimizationLevel(static_cast<TilingOptimizationLevel>(build.tilingOptimizationLevel)))
+    {
+        err << "Can not set tilingOptimizationLevel(" << build.tilingOptimizationLevel << ")" << std::endl;
+        return false;
+    }
+
+    if (build.l2LimitForTiling != -1)
+    {
+        if (!config.setL2LimitForTiling(build.l2LimitForTiling))
+        {
+            err << "Can not set l2LimitForTiling(" << build.l2LimitForTiling << ")" << std::endl;
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, IBuilder& builder,
+    INetworkDefinition& network, IBuilderConfig& config, std::unique_ptr<nvinfer1::IInt8Calibrator>& calibrator,
+    std::ostream& err, std::vector<std::vector<int8_t>>& sparseWeights)
+{
+    std::vector<IOptimizationProfile*> profiles{};
+    profiles.resize(build.optProfiles.size());
+    for (auto& profile : profiles)
+    {
+        profile = builder.createOptimizationProfile();
+    }
+
+    bool hasDynamicShapes{false};
+
+    bool broadcastInputFormats = broadcastIOFormats(build.inputFormats, network.getNbInputs());
+
+    // Check if the provided input tensor names match the input tensors of the engine.
+    // Throw an error if the provided input tensor names cannot be found because it implies a potential typo.
+    for (auto const& shapes : build.optProfiles)
+    {
+        for (auto const& shape : shapes)
+        {
+            bool tensorNameFound{false};
+            for (int32_t i = 0; i < network.getNbInputs(); ++i)
+            {
+                if (matchStringWithOneWildcard(shape.first, network.getInput(i)->getName()))
+                {
+                    tensorNameFound = true;
+                    break;
+                }
+            }
+            if (!tensorNameFound)
+            {
+                sample::gLogError << "Cannot find input tensor with name \"" << shape.first << "\" in the network "
+                                  << "inputs! Please make sure the input tensor names are correct." << std::endl;
+                return false;
+            }
+        }
+    }
+
+    for (uint32_t i = 0, n = network.getNbInputs(); i < n; i++)
+    {
+        // Set formats and data types of inputs
+        auto* input = network.getInput(i);
+        if (!build.inputFormats.empty())
+        {
+            int32_t inputFormatIndex = broadcastInputFormats ? 0 : i;
+            input->setType(build.inputFormats[inputFormatIndex].first);
+            input->setAllowedFormats(build.inputFormats[inputFormatIndex].second);
+        }
+
+        auto const dims = input->getDimensions();
+        auto const isScalar = dims.nbDims == 0;
+        auto const isDynamicInput = std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; })
+            || input->isShapeTensor();
+        if (isDynamicInput)
+        {
+            hasDynamicShapes = true;
+            for (size_t i = 0; i < build.optProfiles.size(); i++)
+            {
+                auto const& optShapes = build.optProfiles[i];
+                auto profile = profiles[i];
+                auto const tensorName = input->getName();
+                auto shape = findPlausible(optShapes, tensorName);
+                ShapeRange shapes{};
+
+                // If no shape is provided, set dynamic dimensions to 1.
+                if (shape == optShapes.end())
+                {
+                    constexpr int32_t kDEFAULT_DIMENSION{1};
+                    std::vector<int64_t> staticDims;
+                    if (input->isShapeTensor())
+                    {
+                        if (isScalar)
+                        {
+                            staticDims.push_back(1);
+                        }
+                        else
+                        {
+                            staticDims.resize(dims.d[0]);
+                            std::fill(staticDims.begin(), staticDims.end(), kDEFAULT_DIMENSION);
+                        }
+                    }
+                    else
+                    {
+                        staticDims.resize(dims.nbDims);
+                        std::transform(dims.d, dims.d + dims.nbDims, staticDims.begin(),
+                            [&](int dimension) { return dimension > 0 ? dimension : kDEFAULT_DIMENSION; });
+                    }
+                    sample::gLogWarning << "Dynamic dimensions required for input: " << tensorName
+                                        << ", but no shapes were provided. Automatically overriding shape to: "
+                                        << staticDims << std::endl;
+                    std::fill(shapes.begin(), shapes.end(), staticDims);
+                }
+                else
+                {
+                    shapes = shape->second;
+                }
+
+                std::vector<int64_t> profileDims{};
+                if (input->isShapeTensor())
+                {
+                    profileDims = shapes[static_cast<size_t>(OptProfileSelector::kMIN)];
+                    SMP_RETVAL_IF_FALSE(profile->setShapeValuesV2(tensorName, OptProfileSelector::kMIN,
+                                            profileDims.data(), static_cast<int>(profileDims.size())),
+                        "Error in set shape values MIN", false, err);
+                    profileDims = shapes[static_cast<size_t>(OptProfileSelector::kOPT)];
+                    SMP_RETVAL_IF_FALSE(profile->setShapeValuesV2(tensorName, OptProfileSelector::kOPT,
+                                            profileDims.data(), static_cast<int>(profileDims.size())),
+                        "Error in set shape values OPT", false, err);
+                    profileDims = shapes[static_cast<size_t>(OptProfileSelector::kMAX)];
+                    SMP_RETVAL_IF_FALSE(profile->setShapeValuesV2(tensorName, OptProfileSelector::kMAX,
+                                            profileDims.data(), static_cast<int>(profileDims.size())),
+                        "Error in set shape values MAX", false, err);
+                    sample::gLogInfo << "Set input shape tensor " << tensorName << " for optimization profile " << i
+                                     << " to:"
+                                     << " MIN=" << shapes[static_cast<size_t>(OptProfileSelector::kMIN)]
+                                     << " OPT=" << shapes[static_cast<size_t>(OptProfileSelector::kOPT)]
+                                     << " MAX=" << shapes[static_cast<size_t>(OptProfileSelector::kMAX)] << std::endl;
+                }
+                else
+                {
+                    profileDims = shapes[static_cast<size_t>(OptProfileSelector::kMIN)];
+                    SMP_RETVAL_IF_FALSE(
+                        profile->setDimensions(tensorName, OptProfileSelector::kMIN, toDims(profileDims)),
+                        "Error in set dimensions to profile MIN", false, err);
+                    profileDims = shapes[static_cast<size_t>(OptProfileSelector::kOPT)];
+                    SMP_RETVAL_IF_FALSE(
+                        profile->setDimensions(tensorName, OptProfileSelector::kOPT, toDims(profileDims)),
+                        "Error in set dimensions to profile OPT", false, err);
+                    profileDims = shapes[static_cast<size_t>(OptProfileSelector::kMAX)];
+                    SMP_RETVAL_IF_FALSE(
+                        profile->setDimensions(tensorName, OptProfileSelector::kMAX, toDims(profileDims)),
+                        "Error in set dimensions to profile MAX", false, err);
+                    sample::gLogInfo << "Set shape of input tensor " << tensorName << " for optimization profile " << i
+                                     << " to:"
+                                     << " MIN=" << shapes[static_cast<size_t>(OptProfileSelector::kMIN)]
+                                     << " OPT=" << shapes[static_cast<size_t>(OptProfileSelector::kOPT)]
+                                     << " MAX=" << shapes[static_cast<size_t>(OptProfileSelector::kMAX)] << std::endl;
+                }
+            }
+        }
+    }
+
+    for (uint32_t i = 0, n = network.getNbOutputs(); i < n; i++)
+    {
+        auto* output = network.getOutput(i);
+        auto const dims = output->getDimensions();
+        // A shape tensor output with known static dimensions may have dynamic shape values inside it.
+        auto const isDynamicOutput = std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; })
+            || output->isShapeTensor();
+        if (isDynamicOutput)
+        {
+            hasDynamicShapes = true;
+        }
+    }
+
+    if (!hasDynamicShapes && !build.optProfiles[0].empty())
+    {
+        sample::gLogError << "Static model does not take explicit shapes since the shape of inference tensors will be "
+                             "determined by the model itself"
+                          << std::endl;
+        return false;
+    }
+
+    if (hasDynamicShapes)
+    {
+        for (auto profile : profiles)
+        {
+            SMP_RETVAL_IF_FALSE(profile->isValid(), "Required optimization profile is invalid", false, err);
+            SMP_RETVAL_IF_FALSE(
+                config.addOptimizationProfile(profile) != -1, "Error in add optimization profile", false, err);
+        }
+    }
+
+    bool broadcastOutputFormats = broadcastIOFormats(build.outputFormats, network.getNbOutputs(), false);
+
+    for (uint32_t i = 0, n = network.getNbOutputs(); i < n; i++)
+    {
+        // Set formats and data types of outputs
+        auto* output = network.getOutput(i);
+        if (!build.outputFormats.empty())
+        {
+            int32_t outputFormatIndex = broadcastOutputFormats ? 0 : i;
+            output->setType(build.outputFormats[outputFormatIndex].first);
+            output->setAllowedFormats(build.outputFormats[outputFormatIndex].second);
+        }
+    }
+
+    setMemoryPoolLimits(config, build);
+
+    setPreviewFeatures(config, build);
+
+    if (build.builderOptimizationLevel != defaultBuilderOptimizationLevel)
+    {
+        config.setBuilderOptimizationLevel(build.builderOptimizationLevel);
+    }
+
+    if (build.maxTactics != defaultMaxTactics)
+    {
+        config.setMaxNbTactics(build.maxTactics);
+    }
+
+    if (build.timingCacheMode == TimingCacheMode::kDISABLE)
+    {
+        config.setFlag(BuilderFlag::kDISABLE_TIMING_CACHE);
+    }
+
+    if (build.disableCompilationCache)
+    {
+        config.setFlag(BuilderFlag::kDISABLE_COMPILATION_CACHE);
+    }
+
+    if (build.errorOnTimingCacheMiss)
+    {
+        config.setFlag(BuilderFlag::kERROR_ON_TIMING_CACHE_MISS);
+    }
+
+    if (!build.tf32)
+    {
+        config.clearFlag(BuilderFlag::kTF32);
+    }
+
+    if (build.refittable)
+    {
+        config.setFlag(BuilderFlag::kREFIT);
+    }
+
+    if (build.stripWeights)
+    {
+        // The kREFIT_IDENTICAL is enabled by default when kSTRIP_PLAN is on.
+        config.setFlag(BuilderFlag::kSTRIP_PLAN);
+    }
+
+    if (build.versionCompatible)
+    {
+        config.setFlag(BuilderFlag::kVERSION_COMPATIBLE);
+    }
+    std::vector<char const*> pluginPaths;
+    for (auto const& pluginPath : sys.setPluginsToSerialize)
+    {
+        sample::gLogVerbose << "Setting plugin to serialize: " << pluginPath << std::endl;
+        pluginPaths.push_back(pluginPath.c_str());
+    }
+    if (!pluginPaths.empty())
+    {
+        config.setPluginsToSerialize(pluginPaths.data(), pluginPaths.size());
+    }
+    if (build.excludeLeanRuntime)
+    {
+        config.setFlag(BuilderFlag::kEXCLUDE_LEAN_RUNTIME);
+    }
+
+    if (build.sparsity != SparsityFlag::kDISABLE)
+    {
+        config.setFlag(BuilderFlag::kSPARSE_WEIGHTS);
+        if (build.sparsity == SparsityFlag::kFORCE)
+        {
+            sparsify(network, sparseWeights);
+        }
+    }
+
+    if (build.enableMonitorMemory)
+    {
+        config.setFlag(BuilderFlag::kMONITOR_MEMORY);
+    }
+
+    if (build.distributiveIndependence)
+    {
+        config.setFlag(BuilderFlag::kDISTRIBUTIVE_INDEPENDENCE);
+    }
+
+    config.setProfilingVerbosity(build.profilingVerbosity);
+    config.setAvgTimingIterations(build.avgTiming);
+    if (build.fp16)
+    {
+        config.setFlag(BuilderFlag::kFP16);
+    }
+    if (build.int8)
+    {
+        config.setFlag(BuilderFlag::kINT8);
+    }
+    if (build.bf16)
+    {
+        config.setFlag(BuilderFlag::kBF16);
+    }
+
+    SMP_RETVAL_IF_FALSE(!(build.int8 && build.fp8), "FP8 and INT8 precisions have been specified", false, err);
+
+    if (build.fp8)
+    {
+        config.setFlag(BuilderFlag::kFP8);
+    }
+
+    if (build.int4)
+    {
+        config.setFlag(BuilderFlag::kINT4);
+    }
+
+    if (build.int8 && !build.fp16)
+    {
+        sample::gLogInfo
+            << "FP32 and INT8 precisions have been specified - more performance might be enabled by additionally "
+               "specifying --fp16 or --best"
+            << std::endl;
+    }
+    auto isInt8 = [](IOFormat const& format) { return format.first == DataType::kINT8; };
+    auto int8IO = std::count_if(build.inputFormats.begin(), build.inputFormats.end(), isInt8)
+        + std::count_if(build.outputFormats.begin(), build.outputFormats.end(), isInt8);
+
+    auto hasQDQLayers = [](INetworkDefinition& network) {
+        // Determine if our network has QDQ layers.
+        auto const nbLayers = network.getNbLayers();
+        for (int32_t i = 0; i < nbLayers; i++)
+        {
+            auto const& layer = network.getLayer(i);
+            if (layer->getType() == LayerType::kQUANTIZE || layer->getType() == LayerType::kDEQUANTIZE)
+            {
+                return true;
+            }
+        }
+        return false;
+    };
+
+    if (!hasQDQLayers(network) && (build.int8 || int8IO) && build.calibration.empty())
+    {
+        // Explicitly set int8 scales if no calibrator is provided and if I/O tensors use int8,
+        // because auto calibration does not support this case.
+        SMP_RETVAL_IF_FALSE(setTensorDynamicRange(network), "Error in set tensor dynamic range.", false, err);
+    }
+    else if (build.int8)
+    {
+        if (!hasQDQLayers(network) && int8IO)
+        {
+            try
+            {
+                // Set dynamic ranges of int8 inputs / outputs to match scales loaded from calibration cache
+                // TODO http://nvbugs/3262234 Change the network validation so that this workaround can be removed
+                setTensorScalesFromCalibration(network, build.inputFormats, build.outputFormats, build.calibration);
+            }
+            catch (std::exception&)
+            {
+                sample::gLogError
+                    << "Int8IO was specified but impossible to read tensor scales from provided calibration cache file"
+                    << std::endl;
+                return false;
+            }
+        }
+
+        IOptimizationProfile* profileCalib{nullptr};
+        if (!build.shapesCalib.empty())
+        {
+            profileCalib = builder.createOptimizationProfile();
+            for (uint32_t i = 0, n = network.getNbInputs(); i < n; i++)
+            {
+                auto* input = network.getInput(i);
+                Dims profileDims{};
+                auto const tensorName = input->getName();
+                auto shape = findPlausible(build.shapesCalib, tensorName);
+
+                if (shape == build.shapesCalib.end())
+                {
+                    std::ostringstream msg;
+                    msg << "Calibration profile for tensor " << tensorName << " cannot be found!";
+                    throw std::invalid_argument(msg.str());
+                }
+
+                auto shapesCalib = shape->second;
+                profileDims = toDims(shapesCalib[static_cast<size_t>(OptProfileSelector::kOPT)]);
+                // Here we check only kMIN as all profileDims are the same.
+                SMP_RETVAL_IF_FALSE(profileCalib->setDimensions(tensorName, OptProfileSelector::kMIN, profileDims),
+                    "Error in set dimensions to calibration profile OPT", false, err);
+                profileCalib->setDimensions(tensorName, OptProfileSelector::kOPT, profileDims);
+                profileCalib->setDimensions(tensorName, OptProfileSelector::kMAX, profileDims);
+                sample::gLogInfo << "Set calibration profile for input tensor " << tensorName << " to " << profileDims
+                                 << std::endl;
+            }
+            SMP_RETVAL_IF_FALSE(profileCalib->isValid(), "Calibration profile is invalid", false, err);
+            SMP_RETVAL_IF_FALSE(
+                config.setCalibrationProfile(profileCalib), "Error in set calibration profile", false, err);
+        }
+
+        std::vector<int64_t> elemCount{};
+        for (int i = 0; i < network.getNbInputs(); i++)
+        {
+            auto* input = network.getInput(i);
+            auto const dims = input->getDimensions();
+            auto const isDynamicInput
+                = std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; });
+
+            if (profileCalib)
+            {
+                elemCount.push_back(volume(profileCalib->getDimensions(input->getName(), OptProfileSelector::kOPT)));
+            }
+            else if (!profiles.empty() && isDynamicInput)
+            {
+                elemCount.push_back(
+                    volume(profiles[build.calibProfile]->getDimensions(input->getName(), OptProfileSelector::kOPT)));
+            }
+            else
+            {
+                elemCount.push_back(volume(input->getDimensions()));
+            }
+        }
+
+        calibrator.reset(new RndInt8Calibrator(1, elemCount, build.calibration, network, err));
+        config.setInt8Calibrator(calibrator.get());
+    }
+
+    if (build.directIO)
+    {
+        config.setFlag(BuilderFlag::kDIRECT_IO);
+    }
+
+    switch (build.precisionConstraints)
+    {
+    case PrecisionConstraints::kNONE:
+        // It's the default for TensorRT.
+        break;
+    case PrecisionConstraints::kOBEY: config.setFlag(BuilderFlag::kOBEY_PRECISION_CONSTRAINTS); break;
+    case PrecisionConstraints::kPREFER: config.setFlag(BuilderFlag::kPREFER_PRECISION_CONSTRAINTS); break;
+    }
+
+    if (!build.layerPrecisions.empty() && build.precisionConstraints != PrecisionConstraints::kNONE)
+    {
+        setLayerPrecisions(network, build.layerPrecisions);
+    }
+
+    if (!build.layerOutputTypes.empty() && build.precisionConstraints != PrecisionConstraints::kNONE)
+    {
+        setLayerOutputTypes(network, build.layerOutputTypes);
+    }
+
+    if (!build.layerDeviceTypes.empty())
+    {
+        setLayerDeviceTypes(network, config, build.layerDeviceTypes);
+    }
+
+    if (!build.debugTensors.empty())
+    {
+        markDebugTensors(network, build.debugTensors);
+    }
+
+    if (build.markUnfusedTensorsAsDebugTensors)
+    {
+        network.markUnfusedTensorsAsDebugTensors();
+    }
+
+    if (build.safe && sys.DLACore == -1)
+    {
+        config.setEngineCapability(EngineCapability::kSAFETY);
+    }
+
+    if (build.restricted)
+    {
+        config.setFlag(BuilderFlag::kSAFETY_SCOPE);
+    }
+
+    if (sys.DLACore != -1)
+    {
+        if (sys.DLACore < builder.getNbDLACores())
+        {
+            config.setDefaultDeviceType(DeviceType::kDLA);
+            config.setDLACore(sys.DLACore);
+            config.setFlag(BuilderFlag::kPREFER_PRECISION_CONSTRAINTS);
+            if (build.buildDLAStandalone)
+            {
+                config.setEngineCapability(EngineCapability::kDLA_STANDALONE);
+            }
+            if (build.allowGPUFallback)
+            {
+                config.setFlag(BuilderFlag::kGPU_FALLBACK);
+            }
+            else
+            {
+                // Reformatting runs on GPU, so avoid I/O reformatting.
+                config.setFlag(BuilderFlag::kDIRECT_IO);
+            }
+            if (!build.int8)
+            {
+                config.setFlag(BuilderFlag::kFP16);
+            }
+        }
+        else
+        {
+            err << "Cannot create DLA engine, " << sys.DLACore << " not available" << std::endl;
+            return false;
+        }
+    }
+    if (build.enabledTactics || build.disabledTactics)
+    {
+        TacticSources tacticSources = config.getTacticSources();
+        tacticSources |= build.enabledTactics;
+        tacticSources &= ~build.disabledTactics;
+        config.setTacticSources(tacticSources);
+    }
+
+    config.setHardwareCompatibilityLevel(build.hardwareCompatibilityLevel);
+
+
+    config.setRuntimePlatform(build.runtimePlatform);
+
+    if (build.maxAuxStreams != defaultMaxAuxStreams)
+    {
+        config.setMaxAuxStreams(build.maxAuxStreams);
+    }
+
+    if (build.allowWeightStreaming)
+    {
+        config.setFlag(BuilderFlag::kWEIGHT_STREAMING);
+    }
+
+    if (!setupTilingSettings(build, config, err))
+    {
+        return false;
+    }
+
+    config.setRemoteAutoTuningConfig(build.remoteAutoTuningConfig.c_str());
+
+    return true;
+}
+
+} // namespace
+
+//!
+//! \brief Create a serialized engine for a network defintion
+//!
+//! \return Whether the engine creation succeeds or fails.
+//!
+bool networkToSerializedEngine(
+    BuildOptions const& build, SystemOptions const& sys, IBuilder& builder, BuildEnvironment& env, std::ostream& err)
+{
+    std::unique_ptr<IBuilderConfig> config{builder.createBuilderConfig()};
+    std::unique_ptr<nvinfer1::IInt8Calibrator> calibrator;
+    std::vector<std::vector<int8_t>> sparseWeights;
+    SMP_RETVAL_IF_FALSE(config != nullptr, "Config creation failed", false, err);
+    SMP_RETVAL_IF_FALSE(
+        setupNetworkAndConfig(build, sys, builder, *env.network, *config, calibrator, err, sparseWeights),
+        "Network And Config setup failed", false, err);
+
+    std::unique_ptr<ITimingCache> timingCache{};
+    // Try to load cache from file. Create a fresh cache if the file doesn't exist
+    if (build.timingCacheMode == TimingCacheMode::kGLOBAL)
+    {
+        timingCache = samplesCommon::buildTimingCacheFromFile(gLogger.getTRTLogger(), *config, build.timingCacheFile);
+    }
+
+    // CUDA stream used for profiling by the builder.
+    auto profileStream = samplesCommon::makeCudaStream();
+    SMP_RETVAL_IF_FALSE(profileStream != nullptr, "Cuda stream creation failed", false, err);
+    config->setProfileStream(*profileStream);
+
+    auto const tBegin = std::chrono::high_resolution_clock::now();
+
+    if (!(build.safe || build.buildDLAStandalone) && build.save)
+    {
+        auto const engineFile = build.engine;
+        FileStreamWriter writer(engineFile);
+        SMP_RETVAL_IF_FALSE(builder.buildSerializedNetworkToStream(*env.network, *config, writer),
+            "Engine could not be created from network", false, err);
+        auto const engineSize = writer.finalize();
+        std::vector<uint8_t> streamEngine(engineSize, 0);
+        std::ifstream reader(engineFile, std::ios::binary);
+        SMP_RETVAL_IF_FALSE((reader.is_open() && reader.good()), "Failed to open engine file for reading", false, err);
+        reader.read(reinterpret_cast<char*>(streamEngine.data()), engineSize);
+        SMP_RETVAL_IF_FALSE((!reader.fail()), "Error when reading engine file", false, err);
+        reader.close();
+        sample::gLogInfo << "Created engine with size: " << (engineSize / 1.0_MiB) << " MiB" << std::endl;
+        env.engine.setBlob(std::move(streamEngine));
+    }
+    else
+    {
+        std::unique_ptr<IHostMemory> serializedEngine{builder.buildSerializedNetwork(*env.network, *config)};
+        SMP_RETVAL_IF_FALSE(serializedEngine != nullptr, "Engine could not be created from network", false, err);
+        sample::gLogInfo << "Created engine with size: " << (serializedEngine->size() / 1.0_MiB) << " MiB" << std::endl;
+        if (build.safe && build.consistency)
+        {
+            if (!checkSafeEngine(serializedEngine->data(), serializedEngine->size()))
+            {
+                sample::gLogError << "Consistency validation is not supported." << std::endl;
+                return false;
+            }
+        }
+        env.engine.setBlob(serializedEngine);
+    }
+
+    auto const tEnd = std::chrono::high_resolution_clock::now();
+    float const buildTime = std::chrono::duration<float>(tEnd - tBegin).count();
+    sample::gLogInfo << "Engine built in " << buildTime << " sec." << std::endl;
+
+    if (build.timingCacheMode == TimingCacheMode::kGLOBAL)
+    {
+        auto timingCache = config->getTimingCache();
+        samplesCommon::updateTimingCacheFile(gLogger.getTRTLogger(), build.timingCacheFile, timingCache, builder);
+    }
+
+    return true;
+}
+
+//!
+//! \brief Parse a given model, create a network and an engine.
+//!
+bool modelToBuildEnv(
+    ModelOptions const& model, BuildOptions const& build, SystemOptions& sys, BuildEnvironment& env, std::ostream& err)
+{
+    env.builder.reset(createBuilder());
+    SMP_RETVAL_IF_FALSE(env.builder != nullptr, "Builder creation failed", false, err);
+    env.builder->setErrorRecorder(&gRecorder);
+    auto networkFlags = (build.stronglyTyped)
+        ? 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED)
+        : 0U;
+    for (auto const& pluginPath : sys.dynamicPlugins)
+    {
+        env.builder->getPluginRegistry().loadLibrary(pluginPath.c_str());
+    }
+    env.network.reset(env.builder->createNetworkV2(networkFlags));
+
+    std::vector<std::string> vcPluginLibrariesUsed;
+    SMP_RETVAL_IF_FALSE(env.network != nullptr, "Network creation failed", false, err);
+    env.parser
+        = modelToNetwork(model, build, *env.network, err, build.versionCompatible ? &vcPluginLibrariesUsed : nullptr);
+    SMP_RETVAL_IF_FALSE(env.parser.operator bool(), "Parsing model failed", false, err);
+
+    if (build.versionCompatible && !sys.ignoreParsedPluginLibs && !vcPluginLibrariesUsed.empty())
+    {
+        sample::gLogInfo << "The following plugin libraries were identified by the parser as required for a "
+                            "version-compatible engine:"
+                         << std::endl;
+        for (auto const& lib : vcPluginLibrariesUsed)
+        {
+            sample::gLogInfo << "    " << lib << std::endl;
+        }
+        if (!build.excludeLeanRuntime)
+        {
+            sample::gLogInfo << "These libraries will be added to --setPluginsToSerialize since --excludeLeanRuntime "
+                                "was not specified."
+                             << std::endl;
+            std::copy(vcPluginLibrariesUsed.begin(), vcPluginLibrariesUsed.end(),
+                std::back_inserter(sys.setPluginsToSerialize));
+        }
+        sample::gLogInfo << "These libraries will be added to --dynamicPlugins for use at inference time." << std::endl;
+        std::copy(vcPluginLibrariesUsed.begin(), vcPluginLibrariesUsed.end(), std::back_inserter(sys.dynamicPlugins));
+
+        // Implicitly-added plugins from ONNX parser should be loaded into plugin registry as well.
+        for (auto const& pluginPath : vcPluginLibrariesUsed)
+        {
+            env.builder->getPluginRegistry().loadLibrary(pluginPath.c_str());
+        }
+
+        sample::gLogInfo << "Use --ignoreParsedPluginLibs to disable this behavior." << std::endl;
+    }
+
+    SMP_RETVAL_IF_FALSE(
+        networkToSerializedEngine(build, sys, *env.builder, env, err), "Building engine failed", false, err);
+    return true;
+}
+
+namespace
+{
+std::pair<std::vector<std::string>, std::vector<WeightsRole>> getLayerWeightsRolePair(IRefitter& refitter)
+{
+    // Get number of refittable items.
+    auto const nbAll = refitter.getAll(0, nullptr, nullptr);
+    std::vector<char const*> layerNames(nbAll);
+    // Allocate buffers for the items and get them.
+    std::vector<nvinfer1::WeightsRole> weightsRoles(nbAll);
+    refitter.getAll(nbAll, layerNames.data(), weightsRoles.data());
+    std::vector<std::string> layerNameStrs(nbAll);
+    std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), [](char const* name) {
+        if (name == nullptr)
+        {
+            return std::string{};
+        }
+        return std::string{name};
+    });
+    return {layerNameStrs, weightsRoles};
+}
+
+std::pair<std::vector<std::string>, std::vector<WeightsRole>> getMissingLayerWeightsRolePair(IRefitter& refitter)
+{
+    // Get number of refittable items.
+    auto const nbMissing = refitter.getMissing(0, nullptr, nullptr);
+    std::vector<char const*> layerNames(nbMissing);
+    // Allocate buffers for the items and get them.
+    std::vector<nvinfer1::WeightsRole> weightsRoles(nbMissing);
+    refitter.getMissing(nbMissing, layerNames.data(), weightsRoles.data());
+    std::vector<std::string> layerNameStrs(nbMissing);
+    std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), [](char const* name) {
+        if (name == nullptr)
+        {
+            return std::string{};
+        }
+        return std::string{name};
+    });
+    return {layerNameStrs, weightsRoles};
+}
+} // namespace
+
+bool loadStreamingEngineToBuildEnv(std::string const& filepath, BuildEnvironment& env, std::ostream& err)
+{
+    auto& reader = env.engine.getFileReader();
+    SMP_RETVAL_IF_FALSE(reader.open(filepath), "", false, err << "Error opening engine file: " << filepath);
+    return true;
+}
+
+bool loadAsyncStreamingEngineToBuildEnv(std::string const& filepath, BuildEnvironment& env, std::ostream& err)
+{
+    auto& asyncReader = env.engine.getAsyncFileReader();
+    SMP_RETVAL_IF_FALSE(asyncReader.open(filepath), "", false, err << "Error opening engine file: " << filepath);
+    return true;
+}
+
+
+bool loadEngineToBuildEnv(
+    std::string const& filepath, BuildEnvironment& env, std::ostream& err, bool const enableConsistency)
+{
+    auto const tBegin = std::chrono::high_resolution_clock::now();
+    std::ifstream engineFile(filepath, std::ios::binary);
+    SMP_RETVAL_IF_FALSE(engineFile.good(), "", false, err << "Error opening engine file: " << filepath);
+    engineFile.seekg(0, std::ifstream::end);
+    int64_t fsize = engineFile.tellg();
+    engineFile.seekg(0, std::ifstream::beg);
+
+    std::vector<uint8_t> engineBlob(fsize);
+    engineFile.read(reinterpret_cast<char*>(engineBlob.data()), fsize);
+    SMP_RETVAL_IF_FALSE(engineFile.good(), "", false, err << "Error loading engine file: " << filepath);
+    auto const tEnd = std::chrono::high_resolution_clock::now();
+    float const loadTime = std::chrono::duration<float>(tEnd - tBegin).count();
+    sample::gLogInfo << "Engine loaded in " << loadTime << " sec." << std::endl;
+    sample::gLogInfo << "Loaded engine with size: " << (fsize / 1.0_MiB) << " MiB" << std::endl;
+
+    if (enableConsistency)
+    {
+        if (!checkSafeEngine(engineBlob.data(), fsize))
+        {
+            sample::gLogError << "Consistency validation is not enabled." << std::endl;
+            return false;
+        }
+    }
+
+    env.engine.setBlob(std::move(engineBlob));
+
+    return true;
+}
+
+bool printPlanVersion(BuildEnvironment& env, std::ostream& err)
+{
+    constexpr int64_t kPLAN_SIZE{28};
+    std::vector<uint8_t> data(kPLAN_SIZE);
+    auto blob = data.data();
+
+    auto& reader = env.engine.getFileReader();
+    auto& asyncReader = env.engine.getAsyncFileReader();
+    if (reader.isOpen())
+    {
+        SMP_RETVAL_IF_FALSE(reader.read(data.data(), kPLAN_SIZE) == kPLAN_SIZE, "Failed to read plan file", false, err);
+    }
+    else if (asyncReader.isOpen())
+    {
+        SMP_RETVAL_IF_FALSE(asyncReader.read(data.data(), kPLAN_SIZE, cudaStream_t{}) == kPLAN_SIZE,
+            "Failed to read plan file", false, err);
+    }
+    else
+    {
+        SMP_RETVAL_IF_FALSE(env.engine.getBlob().data != nullptr, "Plan file is empty", false, err);
+        SMP_RETVAL_IF_FALSE(env.engine.getBlob().size >= 28, "Plan file is incorrect", false, err);
+        blob = static_cast<uint8_t*>(env.engine.getBlob().data);
+    }
+    auto blob32 = reinterpret_cast<uint32_t*>(blob);
+
+    //! Correct TensorRT plan file starts with this tag
+    constexpr uint32_t kPLAN_FILE_TAG{0x74727466U};
+    SMP_RETVAL_IF_FALSE(blob32[0] == kPLAN_FILE_TAG, "Failed to verify a plan tag.", false, err);
+    switch (blob32[1])
+    {
+    case 0U:
+    {
+        // Blob index to store the plan version may depend on the serialization version.
+        sample::gLogInfo << "Plan was created with TensorRT version " << static_cast<int32_t>(blob[24])
+        << "." << static_cast<int32_t>(blob[25]) << "." << static_cast<int32_t>(blob[26])
+        << "." << static_cast<int32_t>(blob[27]) << std::endl;
+        return true;
+    }
+    }
+    sample::gLogError << "Serialization version is not supported." << std::endl;
+    return false;
+}
+
+void dumpRefittable(nvinfer1::ICudaEngine& engine)
+{
+    std::unique_ptr<IRefitter> refitter{createRefitter(engine)};
+    if (refitter == nullptr)
+    {
+        sample::gLogError << "Failed to create a refitter." << std::endl;
+        return;
+    }
+
+    auto const& layerWeightsRolePair = getLayerWeightsRolePair(*refitter);
+    auto const& layerNames = layerWeightsRolePair.first;
+    auto const& weightsRoles = layerWeightsRolePair.second;
+    auto const nbAll = layerWeightsRolePair.first.size();
+    for (size_t i = 0; i < nbAll; ++i)
+    {
+        sample::gLogInfo << layerNames[i] << " " << weightsRoles[i] << std::endl;
+    }
+}
+
+ICudaEngine* loadEngine(std::string const& engine, int32_t DLACore, std::ostream& err)
+{
+    BuildEnvironment env(/* isSafe */ false, /* versionCompatible */ false, DLACore, "", getTempfileControlDefaults());
+    return loadEngineToBuildEnv(engine, env, err, false) ? env.engine.release() : nullptr;
+}
+
+bool saveEngine(ICudaEngine const& engine, std::string const& fileName, std::ostream& err)
+{
+    std::ofstream engineFile(fileName, std::ios::binary);
+    if (!engineFile)
+    {
+        err << "Cannot open engine file: " << fileName << std::endl;
+        return false;
+    }
+
+    std::unique_ptr<IHostMemory> serializedEngine{engine.serialize()};
+    if (serializedEngine == nullptr)
+    {
+        err << "Engine serialization failed" << std::endl;
+        return false;
+    }
+
+    engineFile.write(static_cast<char*>(serializedEngine->data()), serializedEngine->size());
+    return !engineFile.fail();
+}
+
+bool getEngineBuildEnv(
+    ModelOptions const& model, BuildOptions const& build, SystemOptions& sys, BuildEnvironment& env, std::ostream& err)
+{
+    bool createEngineSuccess{false};
+
+    if (build.load)
+    {
+        if (build.safe)
+        {
+            createEngineSuccess = loadEngineToBuildEnv(build.engine, env, err, build.safe && build.consistency);
+        }
+        else
+        {
+            if (build.asyncFileReader)
+            {
+                createEngineSuccess = loadAsyncStreamingEngineToBuildEnv(build.engine, env, err);
+            }
+            else
+            {
+                createEngineSuccess = loadStreamingEngineToBuildEnv(build.engine, env, err);
+            }
+        }
+    }
+    else
+    {
+        createEngineSuccess = modelToBuildEnv(model, build, sys, env, err);
+    }
+
+    SMP_RETVAL_IF_FALSE(createEngineSuccess, "Failed to create engine from model or file.", false, err);
+
+    if (build.getPlanVersionOnly && build.load)
+    {
+        SMP_RETVAL_IF_FALSE(printPlanVersion(env, err), "Failed to get plan file version.", false, err);
+        return true;
+    }
+
+    if (build.save)
+    {
+        std::ofstream engineFile(build.engine, std::ios::binary);
+        auto& engineBlob = env.engine.getBlob();
+        engineFile.write(static_cast<char const*>(engineBlob.data), engineBlob.size);
+        SMP_RETVAL_IF_FALSE(!engineFile.fail(), "Saving engine to file failed.", false, err);
+        engineFile.flush();
+        engineFile.close();
+        if (!build.safe)
+        {
+            env.engine.releaseBlob();
+            if (build.asyncFileReader)
+            {
+                SMP_RETVAL_IF_FALSE(loadAsyncStreamingEngineToBuildEnv(build.engine, env, err),
+                    "Reading engine file via async stream reader failed.", false, err);
+            }
+            else
+            {
+                SMP_RETVAL_IF_FALSE(loadStreamingEngineToBuildEnv(build.engine, env, err),
+                    "Reading engine file via stream reader failed.", false, err);
+            }
+        }
+    }
+
+    return true;
+}
+
+// There is not a getWeightsName API, so we need to use WeightsRole.
+std::vector<std::pair<WeightsRole, Weights>> getAllRefitWeightsForLayer(ILayer const& l)
+{
+    switch (l.getType())
+    {
+    case LayerType::kCONSTANT:
+    {
+        auto const& layer = static_cast<nvinfer1::IConstantLayer const&>(l);
+        auto const weights = layer.getWeights();
+        switch (weights.type)
+        {
+        case DataType::kFLOAT:
+        case DataType::kHALF:
+        case DataType::kBF16:
+        case DataType::kINT8:
+        case DataType::kINT32:
+        case DataType::kINT64: return {std::make_pair(WeightsRole::kCONSTANT, weights)};
+        case DataType::kBOOL:
+        case DataType::kUINT8:
+        case DataType::kFP8:
+        case DataType::kINT4:
+        case DataType::kFP4:
+        case DataType::kE8M0:
+            // Refit not supported for these types.
+            break;
+        }
+        break;
+    }
+    case LayerType::kCONVOLUTION:
+    {
+        auto const& layer = static_cast<nvinfer1::IConvolutionLayer const&>(l);
+        return {std::make_pair(WeightsRole::kKERNEL, layer.getKernelWeights()),
+            std::make_pair(WeightsRole::kBIAS, layer.getBiasWeights())};
+    }
+    case LayerType::kDECONVOLUTION:
+    {
+        auto const& layer = static_cast<nvinfer1::IDeconvolutionLayer const&>(l);
+        return {std::make_pair(WeightsRole::kKERNEL, layer.getKernelWeights()),
+            std::make_pair(WeightsRole::kBIAS, layer.getBiasWeights())};
+    }
+    case LayerType::kSCALE:
+    {
+        auto const& layer = static_cast<nvinfer1::IScaleLayer const&>(l);
+        return {std::make_pair(WeightsRole::kSCALE, layer.getScale()),
+            std::make_pair(WeightsRole::kSHIFT, layer.getShift())};
+    }
+    case LayerType::kACTIVATION:
+    case LayerType::kASSERTION:
+    case LayerType::kCAST:
+    case LayerType::kCONCATENATION:
+    case LayerType::kCONDITION:
+    case LayerType::kCONDITIONAL_INPUT:
+    case LayerType::kCONDITIONAL_OUTPUT:
+    case LayerType::kCUMULATIVE:
+    case LayerType::kDEQUANTIZE:
+    case LayerType::kDYNAMIC_QUANTIZE:
+    case LayerType::kEINSUM:
+    case LayerType::kELEMENTWISE:
+    case LayerType::kFILL:
+    case LayerType::kGATHER:
+    case LayerType::kGRID_SAMPLE:
+    case LayerType::kIDENTITY:
+    case LayerType::kITERATOR:
+    case LayerType::kLOOP_OUTPUT:
+    case LayerType::kLRN:
+    case LayerType::kMATRIX_MULTIPLY:
+    case LayerType::kNMS:
+    case LayerType::kNON_ZERO:
+    case LayerType::kNORMALIZATION:
+    case LayerType::kONE_HOT:
+    case LayerType::kPADDING:
+    case LayerType::kPARAMETRIC_RELU:
+    case LayerType::kPLUGIN:
+    case LayerType::kPLUGIN_V2:
+    case LayerType::kPLUGIN_V3:
+    case LayerType::kPOOLING:
+    case LayerType::kQUANTIZE:
+    case LayerType::kRAGGED_SOFTMAX:
+    case LayerType::kRECURRENCE:
+    case LayerType::kREDUCE:
+    case LayerType::kRESIZE:
+    case LayerType::kREVERSE_SEQUENCE:
+    case LayerType::kSCATTER:
+    case LayerType::kSELECT:
+    case LayerType::kSHAPE:
+    case LayerType::kSHUFFLE:
+    case LayerType::kSLICE:
+    case LayerType::kSOFTMAX:
+    case LayerType::kSQUEEZE:
+    case LayerType::kTOPK:
+    case LayerType::kTRIP_LIMIT:
+    case LayerType::kUNARY:
+    case LayerType::kUNSQUEEZE: return {};
+    }
+    return {};
+}
+
+bool timeRefit(INetworkDefinition const& network, nvinfer1::ICudaEngine& engine, bool multiThreading)
+{
+    using time_point = std::chrono::time_point<std::chrono::steady_clock>;
+    using durationMs = std::chrono::duration<float, std::milli>;
+
+    auto const nbLayers = network.getNbLayers();
+    std::unique_ptr<IRefitter> refitter{createRefitter(engine)};
+    // Set max threads that can be used by refitter.
+    if (multiThreading && !refitter->setMaxThreads(10))
+    {
+        sample::gLogError << "Failed to set max threads to refitter." << std::endl;
+        return false;
+    }
+    auto const& layerWeightsRolePair = getLayerWeightsRolePair(*refitter);
+    // We use std::string instead of char const* since we can have copies of layer names.
+    std::set<std::pair<std::string, WeightsRole>> layerRoleSet;
+
+    auto const& layerNames = layerWeightsRolePair.first;
+    auto const& weightsRoles = layerWeightsRolePair.second;
+
+    std::transform(layerNames.begin(), layerNames.end(), weightsRoles.begin(),
+        std::inserter(layerRoleSet, layerRoleSet.begin()),
+        [](std::string const& layerName, WeightsRole const role) { return std::make_pair(layerName, role); });
+
+    auto const isRefittable = [&layerRoleSet](char const* layerName, WeightsRole const role) {
+        return layerRoleSet.find(std::make_pair(layerName, role)) != layerRoleSet.end();
+    };
+
+    auto const setWeights = [&] {
+        for (int32_t i = 0; i < nbLayers; i++)
+        {
+            auto const layer = network.getLayer(i);
+            auto const roleWeightsVec = getAllRefitWeightsForLayer(*layer);
+            for (auto const& roleWeights : roleWeightsVec)
+            {
+                if (isRefittable(layer->getName(), roleWeights.first))
+                {
+                    bool const success = refitter->setWeights(layer->getName(), roleWeights.first, roleWeights.second);
+                    if (!success)
+                    {
+                        return false;
+                    }
+                }
+            }
+        }
+        return true;
+    };
+
+    auto const reportMissingWeights = [&] {
+        auto const& missingPair = getMissingLayerWeightsRolePair(*refitter);
+        auto const& layerNames = missingPair.first;
+        auto const& weightsRoles = missingPair.second;
+        for (size_t i = 0; i < layerNames.size(); ++i)
+        {
+            sample::gLogError << "Missing (" << layerNames[i] << ", " << weightsRoles[i] << ") for refitting."
+                              << std::endl;
+        }
+        return layerNames.empty();
+    };
+
+    // Skip weights validation since we are confident that the new weights are similar to the weights used to build
+    // engine.
+    refitter->setWeightsValidation(false);
+
+    // Warm up and report missing weights
+    // We only need to set weights for the first time and that can be reused in later refitting process.
+    bool const success = setWeights() && reportMissingWeights() && refitter->refitCudaEngine();
+    if (!success)
+    {
+        return false;
+    }
+
+    TrtCudaStream stream;
+    constexpr int32_t kLOOP = 10;
+    time_point const refitStartTime{std::chrono::steady_clock::now()};
+    {
+        for (int32_t l = 0; l < kLOOP; l++)
+        {
+            if (!refitter->refitCudaEngineAsync(stream.get()))
+            {
+                return false;
+            }
+        }
+    }
+    stream.synchronize();
+    time_point const refitEndTime{std::chrono::steady_clock::now()};
+
+    sample::gLogInfo << "Engine refitted"
+                     << " in " << durationMs(refitEndTime - refitStartTime).count() / kLOOP << " ms." << std::endl;
+    return true;
+}
+
+namespace
+{
+void* initSafeRuntime()
+{
+    void* handle{nullptr};
+    // Currently libnvinfer_safe_debug.so for samplesCommon::isDebug() is not ready.
+#if !defined(_WIN32)
+    std::string const dllName{"libnvinfer_safe.so"};
+#if SANITIZER_BUILD
+    handle = dlopen(dllName.c_str(), RTLD_LAZY | RTLD_NODELETE);
+#else
+    // RTLD_GLOBAL is used for symbol resolution of subsequently loaded plugin libraries
+    handle = dlopen(dllName.c_str(), RTLD_LAZY | RTLD_GLOBAL);
+#endif
+#endif
+    return handle;
+}
+
+void* initConsistencyCheckerLibrary()
+{
+    void* handle{nullptr};
+#if !defined(_WIN32)
+    std::string const dllName{"libnvinfer_checker_shared.so"};
+#if SANITIZER_BUILD
+    handle = dlopen(dllName.c_str(), RTLD_LAZY | RTLD_NODELETE);
+#else
+    handle = dlopen(dllName.c_str(), RTLD_LAZY);
+#endif
+#endif
+    return handle;
+}
+
+#if !defined(_WIN32)
+struct DllDeleter
+{
+    void operator()(void* handle)
+    {
+        if (handle != nullptr)
+        {
+            dlclose(handle);
+        }
+    }
+};
+const std::unique_ptr<void, DllDeleter> safeRuntimeLibrary{initSafeRuntime()};
+const std::unique_ptr<void, DllDeleter> consistencyCheckerLibrary{initConsistencyCheckerLibrary()};
+#endif
+} // namespace
+
+bool hasSafeRuntime()
+{
+#if defined(_WIN32)
+    return false;
+#else
+    return (safeRuntimeLibrary != nullptr);
+#endif
+}
+
+bool hasConsistencyChecker()
+{
+#if defined(_WIN32)
+    return false;
+#else
+    return (consistencyCheckerLibrary != nullptr);
+#endif
+}
+
+#if ENABLE_UNIFIED_BUILDER
+
+nvinfer2::safe::consistency::IConsistencyChecker* createConsistencyChecker(
+    sample::SampleSafeRecorder& recorder, void const* serializedEngine, int32_t const engineSize) noexcept
+{
+    nvinfer2::safe::consistency::IConsistencyChecker* checker{nullptr};
+
+    if (serializedEngine == nullptr || engineSize == 0)
+    {
+        return checker;
+    }
+
+#if !defined(_WIN32)
+    constexpr char symbolName[] = "createConsistencyChecker";
+    typedef ErrorCode (*CreateCheckerFn)(nvinfer2::safe::consistency::IConsistencyChecker * &checker,
+        sample::SampleSafeRecorder & recorder, void const* data, size_t size);
+    if (hasSafeRuntime())
+    {
+        auto createFn = reinterpret_cast<CreateCheckerFn>(dlsym(consistencyCheckerLibrary.get(), symbolName));
+        if (createFn != nullptr)
+        {
+            ErrorCode errorCode = createFn(checker, recorder, serializedEngine, engineSize);
+            if (errorCode != ErrorCode::kSUCCESS)
+            {
+                return nullptr;
+            }
+        }
+    }
+#endif
+    return checker;
+}
+#endif
+
+bool checkSafeEngine(void const* serializedEngine, int64_t const engineSize)
+{
+    if (!hasConsistencyChecker())
+    {
+        sample::gLogError << "Cannot perform consistency check because the checker is not loaded.." << std::endl;
+        return false;
+    }
+
+#if ENABLE_UNIFIED_BUILDER
+    sample::SampleSafeRecorder recorder{nvinfer2::safe::Severity::kINFO};
+
+    auto checker = std::unique_ptr<nvinfer2::safe::consistency::IConsistencyChecker>(
+        createConsistencyChecker(recorder, serializedEngine, engineSize));
+    if (checker.get() == nullptr)
+    {
+        sample::gLogError << "Failed to create consistency checker." << std::endl;
+        return false;
+    }
+    sample::gLogInfo << "Start consistency checking." << std::endl;
+    if (!checker->validate())
+    {
+        sample::gLogError << "Consistency validation failed." << std::endl;
+        return false;
+    }
+    sample::gLogInfo << "Consistency validation passed." << std::endl;
+    return true;
+#else
+    return false;
+#endif
+}
+
+} // namespace sample
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleEngines.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleEngines.h
new file mode 100644
index 0000000000000000000000000000000000000000..7cde51d72b816731f6d11cda70e76ba19f2edbdb
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleEngines.h
@@ -0,0 +1,354 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TRT_SAMPLE_ENGINES_H
+#define TRT_SAMPLE_ENGINES_H
+
+#include "NvInfer.h"
+#include "NvOnnxParser.h"
+#include "sampleOptions.h"
+#include "sampleUtils.h"
+#include "streamReader.h"
+#include <iostream>
+#include <vector>
+
+namespace sample
+{
+
+struct Parser
+{
+    std::unique_ptr<nvonnxparser::IParser> onnxParser;
+
+    operator bool() const
+    {
+        return onnxParser != nullptr;
+    }
+};
+
+//!
+//! \brief Helper struct to faciliate engine serialization and deserialization. It does not own the underlying memory.
+//!
+struct EngineBlob
+{
+    EngineBlob(void* engineData, size_t engineSize)
+        : data(engineData)
+        , size(engineSize)
+    {
+    }
+    void* data{};
+    size_t size{};
+    bool empty() const
+    {
+        return size == 0;
+    }
+};
+
+//!
+//! \brief A helper class to hold a serialized engine (std or safe) and only deserialize it when being accessed.
+//!
+class LazilyDeserializedEngine
+{
+public:
+    //!
+    //! \brief Delete default constructor to make sure isSafe and DLACore are always set.
+    //!
+    LazilyDeserializedEngine() = delete;
+
+    //!
+    //! \brief Constructor of LazilyDeserializedEngine.
+    //!
+    LazilyDeserializedEngine(bool isSafe, bool versionCompatible, int32_t DLACore, std::string const& tempdir,
+        nvinfer1::TempfileControlFlags tempfileControls, std::string const& leanDLLPath)
+        : mIsSafe(isSafe)
+        , mVersionCompatible(versionCompatible)
+        , mDLACore(DLACore)
+        , mTempdir(tempdir)
+        , mTempfileControls(tempfileControls)
+        , mLeanDLLPath(leanDLLPath)
+    {
+        // Only one of these is relevant for any given trtexec call.
+        // Enabled using  --asyncFileReader flag.
+        mAsyncFileReader = std::make_unique<samplesCommon::AsyncStreamReader>();
+       // Enabled using --load flag.
+        mFileReader = std::make_unique<samplesCommon::FileStreamReader>();
+    }
+
+    //!
+    //! \brief Move from another LazilyDeserializedEngine.
+    //!
+    LazilyDeserializedEngine(LazilyDeserializedEngine&& other) = default;
+
+    //!
+    //! \brief Delete copy constructor.
+    //!
+    LazilyDeserializedEngine(LazilyDeserializedEngine const& other) = delete;
+
+    //!
+    //! \brief Get the pointer to the ICudaEngine. Triggers deserialization if not already done so.
+    //!
+    nvinfer1::ICudaEngine* get();
+
+    //! \overload nvinfer1::ICudaEngine* get();
+    [[nodiscard]] nvinfer1::ICudaEngine* operator->()
+    {
+        return this->get();
+    }
+
+    //!
+    //! \brief Get the pointer to the ICudaEngine and release the ownership.
+    //!
+    nvinfer1::ICudaEngine* release();
+
+    //!
+    //! \brief Check Safe DLA engine built with kDLA_STANDALONE should not be run via TRT
+    //!
+    bool checkDLASafe();
+
+    //!
+    //! \brief Get the underlying blob storing serialized engine.
+    //!
+    EngineBlob const getBlob() const
+    {
+        ASSERT((!mFileReader || !mFileReader->isOpen())
+            && "Attempting to access the glob when there is an open file reader!");
+        ASSERT((!mAsyncFileReader || !mAsyncFileReader->isOpen())
+            && "Attempting to access the glob when there is an open async file reader!");
+        if (!mEngineBlob.empty())
+        {
+            return EngineBlob{const_cast<void*>(static_cast<void const*>(mEngineBlob.data())), mEngineBlob.size()};
+        }
+        if (mEngineBlobHostMemory != nullptr && mEngineBlobHostMemory->size() > 0)
+        {
+            return EngineBlob{mEngineBlobHostMemory->data(), mEngineBlobHostMemory->size()};
+        }
+        ASSERT(false && "Attempting to access an empty engine!");
+        return EngineBlob{nullptr, 0};
+    }
+
+    //!
+    //! \brief Set the underlying blob storing the serialized engine without duplicating IHostMemory.
+    //!
+    void setBlob(std::unique_ptr<nvinfer1::IHostMemory>& data)
+    {
+        ASSERT(data.get() && data->size() > 0);
+        mEngineBlobHostMemory = std::move(data);
+        mEngine.reset();
+    }
+
+    //!
+    //! \brief Set the underlying blob storing the serialized engine without duplicating vector memory.
+    //!
+    void setBlob(std::vector<uint8_t>&& engineBlob)
+    {
+        mEngineBlob = std::move(engineBlob);
+        mEngine.reset();
+    }
+
+    //!
+    //! \brief Release the underlying blob without deleting the deserialized engine.
+    //!
+    void releaseBlob()
+    {
+        mEngineBlob.clear();
+        mEngineBlobHostMemory.reset();
+    }
+
+    //!
+    //! \brief Get the file stream reader used for deserialization
+    //!
+    samplesCommon::FileStreamReader& getFileReader()
+    {
+        ASSERT(mFileReader);
+        return *mFileReader;
+    }
+
+    //!
+    //! \brief Get the file stream reader used for deserialization
+    //!
+    //! when IStreamReader is eventually deprecated.
+    //!
+    samplesCommon::AsyncStreamReader& getAsyncFileReader()
+    {
+        ASSERT(mAsyncFileReader);
+        return *mAsyncFileReader;
+    }
+
+
+    //!
+    //! \brief Get if safe mode is enabled.
+    //!
+    bool isSafe()
+    {
+        return mIsSafe;
+    }
+
+    void setDynamicPlugins(std::vector<std::string> const& dynamicPlugins)
+    {
+        mDynamicPlugins = dynamicPlugins;
+    }
+
+private:
+    bool mIsSafe{false};
+    bool mVersionCompatible{false};
+    int32_t mDLACore{-1};
+    std::vector<uint8_t> mEngineBlob;
+    std::unique_ptr<samplesCommon::FileStreamReader> mFileReader;
+    std::unique_ptr<samplesCommon::AsyncStreamReader> mAsyncFileReader;
+
+
+    // Directly use the host memory of a serialized engine instead of duplicating the engine in CPU memory.
+    std::unique_ptr<nvinfer1::IHostMemory> mEngineBlobHostMemory;
+
+    std::string mTempdir{};
+    nvinfer1::TempfileControlFlags mTempfileControls{getTempfileControlDefaults()};
+    std::string mLeanDLLPath{};
+    std::vector<std::string> mDynamicPlugins;
+
+    //! \name Owned TensorRT objects
+    //! Per TensorRT object lifetime requirements as outlined in the developer guide,
+    //! the runtime must remain live while any engines created by the runtime are live.
+    //! DO NOT ADJUST the declaration order here: runtime -> (engine).
+    //! Destruction occurs in reverse declaration order: (engine) -> runtime.
+    //!@{
+
+    //! The runtime used to track parent of mRuntime if one exists.
+    //! Needed to load mRuntime if lean.so is supplied through file system path.
+    std::unique_ptr<nvinfer1::IRuntime> mParentRuntime{};
+
+    //! The runtime that is used to deserialize the engine.
+    std::unique_ptr<nvinfer1::IRuntime> mRuntime{};
+
+    //! If mIsSafe is false, this points to the deserialized std engine
+    std::unique_ptr<nvinfer1::ICudaEngine> mEngine{};
+
+    //!@}
+};
+
+struct BuildEnvironment
+{
+    BuildEnvironment() = delete;
+    BuildEnvironment(BuildEnvironment const& other) = delete;
+    BuildEnvironment(BuildEnvironment&& other) = delete;
+    BuildEnvironment(bool isSafe, bool versionCompatible, int32_t DLACore, std::string const& tempdir,
+        nvinfer1::TempfileControlFlags tempfileControls, std::string const& leanDLLPath = "",
+        std::string const& cmdline = "")
+        : engine(isSafe, versionCompatible, DLACore, tempdir, tempfileControls, leanDLLPath)
+        , cmdline(cmdline)
+    {
+    }
+
+    //! \name Owned TensorRT objects
+    //! Per TensorRT object lifetime requirements as outlined in the developer guide,
+    //! factory objects must remain live while the objects created by those factories
+    //! are live (with the exception of builder -> engine).
+    //! DO NOT ADJUST the declaration order here: builder -> network -> parser.
+    //! Destruction occurs in reverse declaration order: parser -> network -> builder.
+    //!@{
+
+    //! The builder used to build the engine.
+    std::unique_ptr<nvinfer1::IBuilder> builder;
+
+    //! The network used by the builder.
+    std::unique_ptr<nvinfer1::INetworkDefinition> network;
+
+    //! The parser used to specify the network.
+    Parser parser;
+
+    //! The engine.
+    LazilyDeserializedEngine engine;
+
+    //! The command line string.
+    std::string cmdline;
+    //!@}
+};
+
+//!
+//! \brief Log refittable layers and weights of a refittable engine
+//!
+void dumpRefittable(nvinfer1::ICudaEngine& engine);
+
+//!
+//! \brief Load a serialized engine
+//!
+//! \return Pointer to the engine loaded or nullptr if the operation failed
+//!
+nvinfer1::ICudaEngine* loadEngine(std::string const& engine, int32_t DLACore, std::ostream& err);
+
+//!
+//! \brief Save an engine into a file
+//!
+//! \return boolean Return true if the engine was successfully saved
+//!
+bool saveEngine(nvinfer1::ICudaEngine const& engine, std::string const& fileName, std::ostream& err);
+
+//!
+//! \brief Create an engine from model or serialized file, and optionally save engine
+//!
+//! \return Pointer to the engine created or nullptr if the creation failed
+//!
+bool getEngineBuildEnv(
+    ModelOptions const& model, BuildOptions const& build, SystemOptions& sys, BuildEnvironment& env, std::ostream& err);
+
+//!
+//! \brief Create a serialized network
+//!
+//! \return Pointer to a host memory for a serialized network
+//!
+nvinfer1::IHostMemory* networkToSerialized(const BuildOptions& build, const SystemOptions& sys,
+    nvinfer1::IBuilder& builder, nvinfer1::INetworkDefinition& network, std::ostream& err);
+
+//!
+//! \brief Tranfer model to a serialized network
+//!
+//! \return Pointer to a host memory for a serialized network
+//!
+nvinfer1::IHostMemory* modelToSerialized(
+    const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err);
+
+//!
+//! \brief Serialize network and save it into a file
+//!
+//! \return boolean Return true if the network was successfully serialized and saved
+//!
+bool serializeAndSave(
+    const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err);
+
+bool timeRefit(const nvinfer1::INetworkDefinition& network, nvinfer1::ICudaEngine& engine, bool multiThreading);
+
+//!
+//! \brief Set tensor scales from a calibration table
+//!
+void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, std::vector<IOFormat> const& inputFormats,
+    std::vector<IOFormat> const& outputFormats, std::string const& calibrationFile);
+
+//!
+//! \brief Check if safe runtime is loaded.
+//!
+bool hasSafeRuntime();
+
+//!
+//! \brief Run consistency check on serialized engine.
+//!
+bool checkSafeEngine(void const* serializedEngine, int64_t const engineSize);
+
+bool loadStreamingEngineToBuildEnv(std::string const& engine, BuildEnvironment& env, std::ostream& err);
+
+bool loadEngineToBuildEnv(
+    std::string const& engine, BuildEnvironment& env, std::ostream& err, bool const enableConsistency);
+} // namespace sample
+
+#endif // TRT_SAMPLE_ENGINES_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleEntrypoints.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleEntrypoints.h
new file mode 100644
index 0000000000000000000000000000000000000000..e77cd443ddb2bf0e8f7cf8e7d0e4d75fd6001f79
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleEntrypoints.h
@@ -0,0 +1,101 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TRT_SAMPLE_ENTRYPOINTS_H
+#define TRT_SAMPLE_ENTRYPOINTS_H
+
+//! \file sampleEntrypoints.h
+//!
+//! Declares and conditionally defines entrypoints needed to create base TensorRT objects, depending
+//! on whether the given sample uses TRT at link time or dynamically.  Since common code is built once
+//! and shared across all samples (both link-time and dynamic TRT), it does not define these entrypoints,
+//! so each sample must define them individually.
+//!
+//! Samples that use TRT at link time can define DEFINE_TRT_ENTRYPOINTS before including this header to
+//! pick up the definitions here.
+
+#include "NvInfer.h"
+#include "NvOnnxParser.h"
+#include "logger.h"
+
+extern nvinfer1::IBuilder* createBuilder();
+extern nvinfer1::IRuntime* createRuntime();
+extern nvinfer1::IRefitter* createRefitter(nvinfer1::ICudaEngine& engine);
+
+extern nvonnxparser::IParser* createONNXParser(nvinfer1::INetworkDefinition& network);
+
+#if !defined(DEFINE_TRT_ENTRYPOINTS)
+#define DEFINE_TRT_ENTRYPOINTS 0
+#endif
+
+// Allow opting out of individual entrypoints that are unused by the sample
+#if !defined(DEFINE_TRT_BUILDER_ENTRYPOINT)
+#define DEFINE_TRT_BUILDER_ENTRYPOINT 1
+#endif
+#if !defined(DEFINE_TRT_RUNTIME_ENTRYPOINT)
+#define DEFINE_TRT_RUNTIME_ENTRYPOINT 1
+#endif
+#if !defined(DEFINE_TRT_REFITTER_ENTRYPOINT)
+#define DEFINE_TRT_REFITTER_ENTRYPOINT 1
+#endif
+#if !defined(DEFINE_TRT_ONNX_PARSER_ENTRYPOINT)
+#define DEFINE_TRT_ONNX_PARSER_ENTRYPOINT 1
+#endif
+#if !defined(DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT)
+#define DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT 1
+#endif
+
+#if DEFINE_TRT_ENTRYPOINTS
+nvinfer1::IBuilder* createBuilder()
+{
+#if DEFINE_TRT_BUILDER_ENTRYPOINT
+    return nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger());
+#else
+    return {};
+#endif
+}
+
+nvinfer1::IRuntime* createRuntime()
+{
+#if DEFINE_TRT_RUNTIME_ENTRYPOINT
+    return nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger());
+#else
+    return {};
+#endif
+}
+
+nvinfer1::IRefitter* createRefitter(nvinfer1::ICudaEngine& engine)
+{
+#if DEFINE_TRT_REFITTER_ENTRYPOINT
+    return nvinfer1::createInferRefitter(engine, sample::gLogger.getTRTLogger());
+#else
+    return {};
+#endif
+}
+
+nvonnxparser::IParser* createONNXParser(nvinfer1::INetworkDefinition& network)
+{
+#if DEFINE_TRT_ONNX_PARSER_ENTRYPOINT
+    return nvonnxparser::createParser(network, sample::gLogger.getTRTLogger());
+#else
+    return {};
+#endif
+}
+
+#endif // DEFINE_TRT_ENTRYPOINTS
+
+#endif // TRT_SAMPLE_ENTRYPOINTS_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleInference.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleInference.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cea7afd6203affa7eec36e8e7aa3179ddb9d5dd1
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleInference.cpp
@@ -0,0 +1,2197 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <array>
+#include <chrono>
+#include <cuda.h>
+#include <cuda_profiler_api.h>
+#include <functional>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <mutex>
+#include <thread>
+#include <utility>
+#include <vector>
+
+#if defined(__QNX__)
+#include <sys/neutrino.h>
+#include <sys/syspage.h>
+#endif
+
+#include "NvInferRuntime.h"
+#include "bfloat16.h"
+#include "common.h"
+#include "debugTensorWriter.h"
+#include "logger.h"
+#include "sampleDevice.h"
+#include "sampleEngines.h"
+#include "sampleInference.h"
+#include "sampleOptions.h"
+#include "sampleReporting.h"
+#include "sampleUtils.h"
+#include <cuda.h>
+
+#if CUDA_VERSION >= 11060
+#include <cuda_fp8.h>
+#endif
+
+using namespace nvinfer1;
+#if ENABLE_UNIFIED_BUILDER
+using namespace nvinfer2::safe;
+// Provide a weak default definition that can be overridden
+__attribute__((weak)) std::shared_ptr<sample::SampleSafeRecorder> gSafeRecorder
+    = std::make_shared<sample::SampleSafeRecorder>(nvinfer2::safe::Severity::kINFO);
+#endif
+
+namespace sample
+{
+#if !TRT_STATIC
+std::string const& getRuntimeLibraryName(RuntimeMode const mode)
+{
+    switch (mode)
+    {
+    case RuntimeMode::kFULL: return kNVINFER_LIBNAME;
+    case RuntimeMode::kDISPATCH: return kNVINFER_DISPATCH_LIBNAME;
+    case RuntimeMode::kLEAN: return kNVINFER_LEAN_LIBNAME;
+    case RuntimeMode::kSAFE: return kNVINFER_SAFE_LIBNAME;
+    }
+    throw std::runtime_error("Unknown runtime mode");
+}
+
+#endif // !TRT_STATIC
+
+#if ENABLE_UNIFIED_BUILDER
+namespace safe
+{
+namespace
+{
+std::function<nvinfer1::ErrorCode(
+    nvinfer2::safe::ITRTGraph*&, void const*, int64_t, ISafeRecorder&, bool, ISafeMemAllocator*)>
+    pcreateTRTGraphInternal{};
+std::function<nvinfer1::ErrorCode(nvinfer2::safe::ITRTGraph* graph)> pdestroyTRTGraphInternal{};
+std::function<nvinfer2::safe::ISafePluginRegistry*(ISafeRecorder& recorder)> pgetSafePluginRegistryInternal{};
+} // namespace
+
+//! Track runtime used for the execution of trtexec.
+//! Must be tracked as a global variable due to how library init functions APIs are organized.
+RuntimeMode gUseRuntime = RuntimeMode::kSAFE;
+
+//!
+//! \brief Initialize the NVIDIA Inference Safe Runtime library
+//!
+//! This function dynamically loads the Safe TensorRT runtime library and initializes
+//! function pointers for safe TensorRT operations. It is used to set up the safe runtime
+//! environment for inference with safety-certified TensorRT engines.
+//!
+//! The function performs the following operations:
+//! - Dynamically loads the safe TensorRT runtime library
+//! - Retrieves and stores function pointers for:
+//!   - createTRTGraph: Creates a safe TRT graph from serialized engine data
+//!   - destroyTRTGraph: Destroys a safe TRT graph and releases resources
+//!   - getSafePluginRegistry: Gets the safe plugin registry for loading plugins
+//!
+//! \return true if the safe runtime library was successfully loaded and initialized,
+//!         false otherwise (e.g., in static builds or if library loading fails)
+//!
+bool initNvinferSafe()
+{
+#if !TRT_STATIC
+    static LibraryPtr libnvinfersafePtr{};
+    auto fetchPtrs = [](samplesCommon::DynamicLibrary* l) {
+        if (gUseRuntime == RuntimeMode::kSAFE)
+        {
+            pcreateTRTGraphInternal = l->symbolAddress<nvinfer2::safe::ErrorCode(nvinfer2::safe::ITRTGraph*&,
+                void const*, int64_t, ISafeRecorder&, bool, ISafeMemAllocator*)>("createTRTGraph");
+
+            pdestroyTRTGraphInternal
+                = l->symbolAddress<nvinfer2::safe::ErrorCode(nvinfer2::safe::ITRTGraph * graph)>("destroyTRTGraph");
+
+            pgetSafePluginRegistryInternal
+                = l->symbolAddress<nvinfer2::safe::ISafePluginRegistry*(ISafeRecorder & recorder)>(
+                    "getSafePluginRegistry");
+        }
+    };
+    return initLibrary(libnvinfersafePtr, sample::getRuntimeLibraryName(gUseRuntime), fetchPtrs);
+#else
+    return false;
+#endif // !TRT_STATIC
+}
+
+//!
+//! \brief Create a safe TRT graph from serialized engine data
+//!
+//! This function creates a safe TRT graph from serialized engine data. It is used to create
+//! a safe TRT graph for inference with safety-certified TensorRT engines.
+//!
+nvinfer1::ErrorCode createSafeTRTGraph(nvinfer2::safe::ITRTGraph*& graph, void const* blob, int64_t size,
+    ISafeRecorder& recorder, bool useManaged, ISafeMemAllocator* allocator)
+{
+    if (!initNvinferSafe())
+    {
+        return nvinfer1::ErrorCode::kINTERNAL_ERROR;
+    }
+    ASSERT(pcreateTRTGraphInternal != nullptr);
+    return pcreateTRTGraphInternal(graph, blob, size, recorder, useManaged, allocator);
+}
+
+//!
+//! \brief Destroy a safe TRT graph and release resources
+//!
+//! This function destroys a safe TRT graph and releases the associated resources. It is used to clean up
+//! the safe TRT graph after inference with safety-certified TensorRT engines.
+//!
+nvinfer1::ErrorCode destroySafeTRTGraph(nvinfer2::safe::ITRTGraph*& graph)
+{
+    if (!initNvinferSafe())
+    {
+        return nvinfer1::ErrorCode::kINTERNAL_ERROR;
+    }
+    ASSERT(pdestroyTRTGraphInternal != nullptr);
+    return pdestroyTRTGraphInternal(graph);
+}
+
+//!
+//! \brief Get the safe plugin registry for loading plugins
+//!
+//! This function retrieves the safe plugin registry for loading plugins. It is used to get the safe plugin registry
+//! for loading plugins with safety-certified TensorRT engines.
+//!
+nvinfer2::safe::ISafePluginRegistry* getSafePluginRegistry(ISafeRecorder& recorder)
+{
+    if (!initNvinferSafe())
+    {
+        return nullptr;
+    }
+    ASSERT(pgetSafePluginRegistryInternal != nullptr);
+    return pgetSafePluginRegistryInternal(recorder);
+}
+
+namespace
+{
+nvinfer2::safe::TypedArray createTypedArray(void* const ptr, DataType const type, uint64_t bufferSize)
+{
+    switch (type)
+    {
+    case DataType::kFLOAT: return nvinfer2::safe::TypedArray(static_cast<float*>(ptr), bufferSize);
+    case DataType::kHALF: return nvinfer2::safe::TypedArray(static_cast<nvinfer2::safe::half_t*>(ptr), bufferSize);
+    case DataType::kINT32: return nvinfer2::safe::TypedArray(static_cast<int32_t*>(ptr), bufferSize);
+    case DataType::kINT8: return nvinfer2::safe::TypedArray(static_cast<int8_t*>(ptr), bufferSize);
+    default:
+    {
+        sample::gLogError << "Invalid tensor DataType encountered." << std::endl;
+        return nvinfer2::safe::TypedArray{};
+    }
+    }
+}
+} // namespace
+} // namespace safe
+#endif
+
+template <class TMapType, class TEngineType>
+bool validateTensorNames(TMapType const& map, TEngineType const* engine, int32_t const endBindingIndex)
+{
+    // Check if the provided input tensor names match the input tensors of the engine.
+    // Throw an error if the provided input tensor names cannot be found because it implies a potential typo.
+    for (auto const& item : map)
+    {
+        bool tensorNameFound{false};
+        for (int32_t b = 0; b < endBindingIndex; ++b)
+        {
+            auto const tensorName = engine->getIOTensorName(b);
+            auto const tensorIOMode = engine->getTensorIOMode(tensorName);
+            if (tensorIOMode == nvinfer1::TensorIOMode::kINPUT && matchStringWithOneWildcard(item.first, tensorName))
+            {
+                tensorNameFound = true;
+                break;
+            }
+        }
+        if (!tensorNameFound)
+        {
+            sample::gLogError << "Cannot find input tensor with name \"" << item.first << "\" in the engine bindings! "
+                              << "Please make sure the input tensor names are correct." << std::endl;
+            return false;
+        }
+    }
+    return true;
+}
+
+template <class TEngineType>
+class FillBindingClosure
+{
+private:
+    using InputsMap = std::unordered_map<std::string, std::string>;
+    using BindingsVector = std::vector<std::unique_ptr<BindingsStd>>;
+
+    TEngineType const* mEngine;
+    nvinfer1::IExecutionContext const* mContext;
+    InputsMap const& inputs;
+    BindingsVector& bindings;
+    int32_t batch;
+    int32_t endBindingIndex;
+    int32_t profileIndex;
+
+    void fillOneBinding(TensorInfo const& tensorInfo)
+    {
+        auto const name = tensorInfo.name;
+        auto const* bindingInOutStr = tensorInfo.isInput ? "Input" : "Output";
+        for (auto& binding : bindings)
+        {
+            auto const input = findPlausible(inputs, name);
+            if (tensorInfo.isInput && input != inputs.end())
+            {
+                sample::gLogInfo << "Using values loaded from " << input->second << " for input " << name << std::endl;
+                binding->addBinding(tensorInfo, input->second);
+            }
+            else
+            {
+                if (tensorInfo.isInput)
+                {
+                    sample::gLogInfo << "Using random values for input " << name << std::endl;
+                }
+                binding->addBinding(tensorInfo);
+            }
+            if (tensorInfo.isDynamic)
+            {
+                sample::gLogInfo << bindingInOutStr << " binding for " << name
+                                 << " is dynamic and will be created during execution using OutputAllocator."
+                                 << std::endl;
+            }
+            else
+            {
+                sample::gLogInfo << bindingInOutStr << " binding for " << name << " with dimensions " << tensorInfo.dims
+                                 << " is created." << std::endl;
+            }
+        }
+    }
+
+    bool fillAllBindings(int32_t batch, int32_t endBindingIndex)
+    {
+        if (!validateTensorNames(inputs, mEngine, endBindingIndex))
+        {
+            sample::gLogError << "Invalid tensor names found in --loadInputs flag." << std::endl;
+            return false;
+        }
+        for (int32_t b = 0; b < endBindingIndex; b++)
+        {
+            TensorInfo tensorInfo;
+            tensorInfo.bindingIndex = b;
+            getTensorInfo(tensorInfo);
+            tensorInfo.updateVolume(batch);
+            fillOneBinding(tensorInfo);
+        }
+        return true;
+    }
+
+    void getTensorInfo(TensorInfo& tensorInfo);
+
+public:
+    FillBindingClosure(TEngineType const* _engine, nvinfer1::IExecutionContext const* _context,
+        InputsMap const& _inputs, BindingsVector& _bindings, int32_t _batch, int32_t _endBindingIndex,
+        int32_t _profileIndex)
+        : mEngine(_engine)
+        , mContext(_context)
+        , inputs(_inputs)
+        , bindings(_bindings)
+        , batch(_batch)
+        , endBindingIndex(_endBindingIndex)
+        , profileIndex(_profileIndex)
+    {
+    }
+
+    bool operator()()
+    {
+        return fillAllBindings(batch, endBindingIndex);
+    }
+};
+
+template <>
+void FillBindingClosure<nvinfer1::ICudaEngine>::getTensorInfo(TensorInfo& tensorInfo)
+{
+    auto const b = tensorInfo.bindingIndex;
+    auto const name = mEngine->getIOTensorName(b);
+    tensorInfo.name = name;
+    tensorInfo.dims = mContext->getTensorShape(name);
+    tensorInfo.isDynamic = std::any_of(
+        tensorInfo.dims.d, tensorInfo.dims.d + tensorInfo.dims.nbDims, [](int32_t dim) { return dim == -1; });
+    tensorInfo.comps = mEngine->getTensorComponentsPerElement(name, profileIndex);
+    tensorInfo.strides = mContext->getTensorStrides(name);
+    tensorInfo.vectorDimIndex = mEngine->getTensorVectorizedDim(name, profileIndex);
+    tensorInfo.isInput = mEngine->getTensorIOMode(name) == TensorIOMode::kINPUT;
+    tensorInfo.dataType = mEngine->getTensorDataType(name);
+}
+
+namespace
+{
+bool allocateContextMemory(InferenceEnvironmentStd& iEnv, InferenceOptions const& inference)
+{
+    auto* engine = iEnv.engine.get();
+    iEnv.deviceMemory.resize(inference.infStreams);
+    // Delay context memory allocation until input shapes are specified because runtime allocation would require actual
+    // input shapes.
+    for (int32_t i = 0; i < inference.infStreams; ++i)
+    {
+        auto const& ec = iEnv.contexts.at(i);
+        if (inference.memoryAllocationStrategy == MemoryAllocationStrategy::kSTATIC)
+        {
+            sample::gLogInfo << "Created execution context with device memory size: "
+                             << (engine->getDeviceMemorySize() / 1.0_MiB) << " MiB" << std::endl;
+        }
+        else
+        {
+            size_t sizeToAlloc{0};
+            const char* allocReason{nullptr};
+            if (inference.memoryAllocationStrategy == MemoryAllocationStrategy::kPROFILE)
+            {
+                auto const p = inference.optProfileIndex;
+                sizeToAlloc = engine->getDeviceMemorySizeForProfile(p);
+                allocReason = "current profile";
+            }
+            else if (inference.memoryAllocationStrategy == MemoryAllocationStrategy::kRUNTIME)
+            {
+                sizeToAlloc = ec->updateDeviceMemorySizeForShapes();
+                allocReason = "current input shapes";
+            }
+            else
+            {
+                sample::gLogError << "Unrecognizable memory allocation strategy." << std::endl;
+                return false;
+            }
+            iEnv.deviceMemory.at(i) = TrtDeviceBuffer(sizeToAlloc);
+            ec->setDeviceMemoryV2(iEnv.deviceMemory.at(i).get(), iEnv.deviceMemory.at(i).getSize());
+            sample::gLogInfo << "Maximum device memory size across all profiles: "
+                             << (engine->getDeviceMemorySizeV2() / 1.0_MiB) << " MiB" << std::endl;
+            sample::gLogInfo << "Only allocated device memory enough for " << allocReason << ": "
+                             << (sizeToAlloc / 1.0_MiB) << " MiB" << std::endl;
+        }
+    }
+    return true;
+}
+
+//! \brief Transform shapeData so that it can be type-punned to array of int32_t.
+//!
+//! Transform shapeData so if data() is type-punned to (int32_t*), the sequence
+//! of values are equal to the original elements of shapeData.
+void contractInt64ToInt32(std::vector<int64_t>& shapeData)
+{
+    int64_t const size = shapeData.size();
+    for (int64_t const& val : shapeData)
+    {
+        ASSERT(val <= std::numeric_limits<int32_t>::max() && val >= std::numeric_limits<int32_t>::min()
+            && "Value out of range for int32_t conversion");
+    }
+    int64_t const* src = shapeData.data();
+    int32_t* dst = reinterpret_cast<int32_t*>(shapeData.data());
+    std::copy(src, src + size, dst);
+    shapeData.resize((size + 1) / 2);
+}
+
+} // namespace
+
+
+bool setUpInference(InferenceEnvironmentBase& iEnv, InferenceOptions const& inference, SystemOptions const& system)
+{
+#if ENABLE_UNIFIED_BUILDER
+    if (iEnv.safe)
+    {
+        return setUpSafeInference(static_cast<InferenceEnvironmentSafe&>(iEnv), inference, system);
+    }
+#endif
+
+    return setUpStdInference(static_cast<InferenceEnvironmentStd&>(iEnv), inference, system);
+}
+
+#if ENABLE_UNIFIED_BUILDER
+void getSafeTensorInfo(uint32_t profileIndex, nvinfer2::safe::ITRTGraph* safeGraph, TensorInfo& tensorInfo)
+{
+    nvinfer2::safe::TensorDescriptor desc;
+    auto const b = tensorInfo.bindingIndex;
+    const char* name = nullptr;
+    safeGraph->getIOTensorName(name, b);
+    tensorInfo.name = name;
+    safeGraph->getIOTensorDescriptor(desc, name);
+    tensorInfo.dims = desc.shape;
+    tensorInfo.isDynamic = std::any_of(
+        tensorInfo.dims.d, tensorInfo.dims.d + tensorInfo.dims.nbDims, [](int32_t dim) { return dim == -1; });
+    tensorInfo.strides = desc.stride;
+    tensorInfo.isInput = desc.ioMode == TensorIOMode::kINPUT;
+    tensorInfo.dataType = desc.dataType;
+}
+
+bool setUpSafeInference(InferenceEnvironmentSafe& iEnv, InferenceOptions const& inference, SystemOptions const& system)
+{
+    int32_t device{};
+    CHECK(cudaGetDevice(&device));
+
+    cudaDeviceProp properties;
+    CHECK(cudaGetDeviceProperties(&properties, device));
+    int32_t const isIntegrated{properties.integrated};
+
+    ASSERT(sample::hasSafeRuntime());
+    ASSERT(sample::safe::initNvinferSafe());
+
+    auto safeEngineBlob = iEnv.engine.getBlob();
+    SMP_RETVAL_IF_FALSE(safeEngineBlob.data != nullptr, "Engine blob is empty.", false, sample::gLogError);
+    SMP_RETVAL_IF_FALSE(iEnv.engine.checkDLASafe(),
+        "Safe DLA engine built with kDLA_STANDALONE should not be infered in TRT!", false, sample::gLogError);
+
+    std::unique_ptr<nvinfer2::safe::ITRTGraph> safeGraph;
+
+    // Use managed memory on integrated devices when transfers are skipped
+    // and when it is explicitly requested on the commandline.
+    bool useManagedMemory{(inference.skipTransfers && isIntegrated) || inference.useManaged};
+
+    nvinfer2::safe::ITRTGraph* tempGraph = nullptr;
+    if (sample::safe::createSafeTRTGraph(
+            tempGraph, safeEngineBlob.data, safeEngineBlob.size, *gSafeRecorder, true, nullptr)
+        != nvinfer2::safe::ErrorCode::kSUCCESS)
+    {
+        sample::gLogError << "Create Safe TRT Graph Failed." << std::endl;
+    }
+    safeGraph.reset(tempGraph);
+
+    // Release serialized blob to save memory space.
+    iEnv.engine.releaseBlob();
+
+    for (int32_t s = 0; s < inference.infStreams; ++s)
+    {
+        nvinfer2::safe::ITRTGraph* clonedGraph{nullptr};
+
+        safeGraph->clone(clonedGraph, *gSafeRecorder); // return errorcode
+        iEnv.mClonedGraphs.emplace_back(clonedGraph);
+        iEnv.bindings.emplace_back(std::make_unique<BindingsSafe>(useManagedMemory));
+    }
+
+    int64_t endBindingIndex = 0;
+    safeGraph->getNbIOTensors(endBindingIndex);
+
+    for (int32_t b = 0; b < endBindingIndex; b++)
+    {
+        TensorInfo tensorInfo;
+        tensorInfo.bindingIndex = b;
+        getSafeTensorInfo(inference.optProfileIndex, safeGraph.get(), tensorInfo);
+        tensorInfo.updateVolume(1);
+        auto const name = tensorInfo.name;
+        auto const* bindingInOutStr = tensorInfo.isInput ? "Input" : "Output";
+        for (auto& binding : iEnv.bindings)
+        {
+            auto const input = findPlausible(inference.inputs, name);
+            if (tensorInfo.isInput && input != inference.inputs.end())
+            {
+                sample::gLogInfo << "Using values loaded from " << input->second << " for input " << name << std::endl;
+                binding->addBinding(tensorInfo, input->second);
+            }
+            else
+            {
+                if (tensorInfo.isInput)
+                {
+                    sample::gLogInfo << "Using random values for input " << name << std::endl;
+                }
+                binding->addBinding(tensorInfo);
+            }
+            if (tensorInfo.isDynamic)
+            {
+                sample::gLogInfo << bindingInOutStr << " binding for " << name
+                                 << " is dynamic and will be created during execution using OutputAllocator."
+                                 << std::endl;
+            }
+            else
+            {
+                sample::gLogInfo << bindingInOutStr << " binding for " << name << " with dimensions " << tensorInfo.dims
+                                 << " is created." << std::endl;
+            }
+        }
+    }
+    return true;
+}
+#endif
+
+bool setUpStdInference(InferenceEnvironmentStd& iEnv, InferenceOptions const& inference, SystemOptions const& system)
+{
+    int32_t device{};
+    CHECK(cudaGetDevice(&device));
+
+    cudaDeviceProp properties;
+    CHECK(cudaGetDeviceProperties(&properties, device));
+    int32_t const isIntegrated{properties.integrated};
+    // Use managed memory on integrated devices when transfers are skipped
+    // and when it is explicitly requested on the commandline.
+    bool useManagedMemory{(inference.skipTransfers && isIntegrated) || inference.useManaged};
+
+    using FillStdBindings = FillBindingClosure<nvinfer1::ICudaEngine>;
+
+    auto* engine = iEnv.engine.get();
+    SMP_RETVAL_IF_FALSE(engine != nullptr, "Got invalid engine!", false, sample::gLogError);
+
+    // Release serialized blob to save memory space.
+    iEnv.engine.releaseBlob();
+
+
+    // Setup weight streaming if enabled
+    if (engine->getStreamableWeightsSize() > 0)
+    {
+        auto const& budget = inference.weightStreamingBudget;
+        int64_t wsBudget = budget.bytes;
+        if (budget.percent != 100.0)
+        {
+            double const percent = budget.percent;
+            ASSERT(percent < 100.0);
+            auto const max = engine->getStreamableWeightsSize();
+            wsBudget = (max >= 0) ? (percent / 100) * (max) : WeightStreamingBudget::kDISABLE;
+        }
+
+        if (wsBudget == WeightStreamingBudget::kDISABLE)
+        {
+            wsBudget = engine->getStreamableWeightsSize();
+        }
+        else if (wsBudget == WeightStreamingBudget::kAUTOMATIC)
+        {
+            wsBudget = engine->getWeightStreamingAutomaticBudget();
+        }
+        ASSERT(wsBudget >= 0);
+        bool success = engine->setWeightStreamingBudgetV2(wsBudget);
+        SMP_RETVAL_IF_FALSE(success, "Failed to set weight streaming limit!", false, sample::gLogError);
+        switch (wsBudget)
+        {
+        case WeightStreamingBudget::kDISABLE:
+        {
+            sample::gLogInfo << "Weight streaming has been disabled at runtime." << std::endl;
+            break;
+        }
+
+        case WeightStreamingBudget::kAUTOMATIC:
+        {
+            sample::gLogInfo << "The weight streaming budget will automatically be chosen by TensorRT." << std::endl;
+            break;
+        }
+        default:
+        {
+            sample::gLogInfo << "Weight streaming is enabled with a device memory limit of " << wsBudget << " bytes."
+                             << std::endl;
+            break;
+        }
+        }
+    }
+
+    int32_t const nbOptProfiles = engine->getNbOptimizationProfiles();
+
+    if (inference.optProfileIndex >= nbOptProfiles)
+    {
+        sample::gLogError << "Selected profile index " << inference.optProfileIndex
+                          << " exceeds the number of profiles that the engine holds. " << std::endl;
+        return false;
+    }
+
+    if (nbOptProfiles > 1 && !inference.setOptProfile)
+    {
+        sample::gLogWarning << nbOptProfiles
+                            << " profiles detected but not set. Running with profile 0. Please use "
+                               "--dumpOptimizationProfile to see all available profiles."
+                            << std::endl;
+    }
+
+    cudaStream_t setOptProfileStream;
+    CHECK(cudaStreamCreate(&setOptProfileStream));
+
+    for (int32_t s = 0; s < inference.infStreams; ++s)
+    {
+        IExecutionContext* ec{nullptr};
+
+        //! \return the `ExecutionContextAllocationStrategy` to use for the given allocation strategy, \p s.
+        auto getExecutionContextAllocationStrategy = [](MemoryAllocationStrategy s) {
+            return s == MemoryAllocationStrategy::kSTATIC
+                // Let TRT pre-allocate and manage the memory.
+                ? ExecutionContextAllocationStrategy::kSTATIC
+                // Allocate based on the current profile or runtime shapes.
+                : ExecutionContextAllocationStrategy::kUSER_MANAGED;
+        };
+
+        ec = engine->createExecutionContext(getExecutionContextAllocationStrategy(inference.memoryAllocationStrategy));
+        if (ec == nullptr)
+        {
+            sample::gLogError << "Unable to create execution context for stream " << s << "." << std::endl;
+            return false;
+        }
+        ec->setNvtxVerbosity(inference.nvtxVerbosity);
+
+        int32_t const persistentCacheLimit
+            = samplesCommon::getMaxPersistentCacheSize() * inference.persistentCacheRatio;
+        sample::gLogInfo << "Setting persistentCacheLimit to " << persistentCacheLimit << " bytes." << std::endl;
+        ec->setPersistentCacheLimit(persistentCacheLimit);
+
+        auto setProfile = ec->setOptimizationProfileAsync(inference.optProfileIndex, setOptProfileStream);
+        CHECK(cudaStreamSynchronize(setOptProfileStream));
+
+        if (!setProfile)
+        {
+            sample::gLogError << "Set optimization profile failed. " << std::endl;
+            if (inference.infStreams > 1)
+            {
+                sample::gLogError
+                    << "Please ensure that the engine is built with preview feature profileSharing0806 enabled. "
+                    << std::endl;
+            }
+            return false;
+        }
+
+        iEnv.contexts.emplace_back(ec);
+        iEnv.bindings.emplace_back(std::make_unique<BindingsStd>(useManagedMemory));
+    }
+
+    CHECK(cudaStreamDestroy(setOptProfileStream));
+
+    if (iEnv.profiler)
+    {
+        iEnv.contexts.front()->setProfiler(iEnv.profiler.get());
+        // Always run reportToProfiler() after enqueue launch
+        iEnv.contexts.front()->setEnqueueEmitsProfile(false);
+    }
+
+    int32_t const endBindingIndex = engine->getNbIOTensors();
+
+    // Make sure that the tensor names provided in command-line args actually exist in any of the engine bindings
+    // to avoid silent typos.
+    if (!validateTensorNames(inference.shapes, engine, endBindingIndex))
+    {
+        sample::gLogError << "Invalid tensor names found in --shapes flag." << std::endl;
+        return false;
+    }
+
+    for (int32_t b = 0; b < endBindingIndex; ++b)
+    {
+        auto const& name = engine->getIOTensorName(b);
+        auto const& mode = engine->getTensorIOMode(name);
+        if (mode == TensorIOMode::kINPUT)
+        {
+            Dims const dims = iEnv.contexts.front()->getTensorShape(name);
+            bool isShapeInferenceIO{false};
+            isShapeInferenceIO = engine->isShapeInferenceIO(name);
+            bool const hasRuntimeDim = std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; });
+            auto const shape = findPlausible(inference.shapes, name);
+            if (hasRuntimeDim || isShapeInferenceIO)
+            {
+                // Set shapeData to either dimensions of the input (if it has a dynamic shape)
+                // or set to values of the input (if it is an input shape tensor).
+                std::vector<int64_t> shapeData;
+
+                if (shape == inference.shapes.end())
+                {
+                    // No information provided. Use default value for missing data.
+                    constexpr int32_t kDEFAULT_VALUE = 1;
+                    if (isShapeInferenceIO)
+                    {
+                        // Set shape tensor to all ones.
+                        shapeData.assign(volume(dims, 0, dims.nbDims), kDEFAULT_VALUE);
+                        sample::gLogWarning << "Values missing for input shape tensor: " << name
+                                            << "Automatically setting values to: " << shapeData << std::endl;
+                    }
+                    else
+                    {
+                        // Use default value for unspecified runtime dimensions.
+                        shapeData.resize(dims.nbDims);
+                        std::transform(dims.d, dims.d + dims.nbDims, shapeData.begin(),
+                            [&](int32_t dimension) { return dimension >= 0 ? dimension : kDEFAULT_VALUE; });
+                        sample::gLogWarning << "Shape missing for input with dynamic shape: " << name
+                                            << "Automatically setting shape to: " << shapeData << std::endl;
+                    }
+                }
+                else if (inference.inputs.count(shape->first) && isShapeInferenceIO)
+                {
+                    // Load shape tensor from file.
+                    int64_t const size = volume(dims, 0, dims.nbDims);
+                    shapeData.resize(size);
+                    auto const& filename = inference.inputs.at(shape->first);
+                    auto dst = reinterpret_cast<char*>(shapeData.data());
+                    loadFromFile(filename, dst, size * sizeof(decltype(shapeData)::value_type));
+                }
+                else
+                {
+                    shapeData = shape->second;
+                }
+
+                int64_t* shapeTensorData{nullptr};
+                if (isShapeInferenceIO)
+                {
+                    // Save the data in iEnv, in a way that its address does not change
+                    // before enqueueV3 is called.
+                    DataType const type = engine->getTensorDataType(name);
+                    switch (type)
+                    {
+                    case DataType::kINT64: break;
+                    case DataType::kINT32: contractInt64ToInt32(shapeData); break;
+                    default:
+                        sample::gLogError << "Shape tensor " << name << " has unexpected type " << type << std::endl;
+                        return false;
+                    }
+                    iEnv.inputShapeTensorValues.emplace_back(shapeData);
+                    shapeTensorData = iEnv.inputShapeTensorValues.back().data();
+                }
+
+                for (auto& c : iEnv.contexts)
+                {
+                    if (isShapeInferenceIO)
+                    {
+                        sample::gLogInfo << "Set input shape tensor " << name << " to: " << shapeData << std::endl;
+                        if (!c->setTensorAddress(name, shapeTensorData))
+                        {
+                            return false;
+                        }
+                    }
+                    else
+                    {
+                        sample::gLogInfo << "Set shape of input tensor " << name << " to: " << shapeData << std::endl;
+                        if (!c->setInputShape(name, toDims(shapeData)))
+                        {
+                            return false;
+                        }
+                    }
+                }
+            }
+            else if (nbOptProfiles && shape != inference.shapes.end())
+            {
+                // Check if the provided shape matches the static dimensions in the engine.
+                for (auto& c : iEnv.contexts)
+                {
+                    if (!c->setInputShape(name, toDims(shape->second)))
+                    {
+                        sample::gLogError << "The engine was built with static shapes for input tensor " << name
+                                          << " but the provided shapes do not match the static shapes!" << std::endl;
+                        return false;
+                    }
+                }
+            }
+        }
+    }
+
+    // Create Debug Listener and turn on debug states if client requested dumping debug tensors.
+    if (!inference.debugTensorFileNames.empty() || !inference.dumpAlldebugTensorFormats.empty())
+    {
+        iEnv.listener = std::make_unique<DebugTensorWriter>(
+            inference.debugTensorFileNames, inference.dumpAlldebugTensorFormats, engine->getName(), iEnv.cmdline);
+        iEnv.contexts.front()->setDebugListener(iEnv.listener.get());
+        for (auto const& s : inference.debugTensorFileNames)
+        {
+            iEnv.contexts.front()->setTensorDebugState(s.first.c_str(), true);
+        }
+        if (!inference.dumpAlldebugTensorFormats.empty())
+        {
+            iEnv.contexts.front()->setUnfusedTensorsDebugState(true);
+        }
+    }
+
+    if (!allocateContextMemory(iEnv, inference))
+    {
+        return false;
+    }
+
+    auto const* context = iEnv.contexts.front().get();
+    bool fillBindingsSuccess = FillStdBindings(
+        engine, context, inference.inputs, iEnv.bindings, 1, endBindingIndex, inference.optProfileIndex)();
+
+
+    return fillBindingsSuccess;
+}
+
+TaskInferenceEnvironment::TaskInferenceEnvironment(std::string engineFile, InferenceOptions const& inference,
+    ReportingOptions const& reporting, int32_t deviceId, int32_t DLACore, int32_t bs)
+    : iOptions(inference)
+    , rOptions(reporting)
+    , device(deviceId)
+    , batch(bs)
+{
+    BuildEnvironment bEnv(/* isSafe */ false, /* versionCompatible */ false, DLACore, "", getTempfileControlDefaults());
+    loadEngineToBuildEnv(engineFile, bEnv, sample::gLogError, false);
+    iEnv = std::make_unique<InferenceEnvironmentStd>(bEnv);
+
+    CHECK(cudaSetDevice(device));
+    SystemOptions system{};
+    system.device = device;
+    system.DLACore = DLACore;
+    if (!setUpStdInference(*iEnv, iOptions, system))
+    {
+        sample::gLogError << "Inference set up failed" << std::endl;
+    }
+}
+namespace
+{
+
+#if defined(__QNX__)
+using TimePoint = double;
+#else
+using TimePoint = std::chrono::time_point<std::chrono::high_resolution_clock>;
+#endif
+
+TimePoint getCurrentTime()
+{
+#if defined(__QNX__)
+    uint64_t const currentCycles = ClockCycles();
+    uint64_t const cyclesPerSecond = SYSPAGE_ENTRY(qtime)->cycles_per_sec;
+    // Return current timestamp in ms.
+    return static_cast<TimePoint>(currentCycles) * 1000. / cyclesPerSecond;
+#else
+    return std::chrono::high_resolution_clock::now();
+#endif
+}
+
+//!
+//! \struct SyncStruct
+//! \brief Threads synchronization structure
+//!
+struct SyncStruct
+{
+    std::mutex mutex;
+    TrtCudaStream mainStream;
+    TrtCudaEvent gpuStart{cudaEventBlockingSync};
+    TimePoint cpuStart{};
+    float sleep{};
+};
+
+struct Enqueue
+{
+    explicit Enqueue(nvinfer1::IExecutionContext& context)
+        : mContext(context)
+    {
+    }
+
+    nvinfer1::IExecutionContext& mContext;
+};
+
+#if ENABLE_UNIFIED_BUILDER
+struct SafeEnqueue
+{
+    explicit SafeEnqueue(nvinfer2::safe::ITRTGraph& graph)
+        : mGraph(graph)
+    {
+    }
+
+    nvinfer2::safe::ITRTGraph& mGraph;
+};
+#endif
+
+//!
+//! \class EnqueueExplicit
+//! \brief Functor to enqueue inference with explict batch
+//!
+class EnqueueExplicit : private Enqueue
+{
+
+public:
+    explicit EnqueueExplicit(nvinfer1::IExecutionContext& context, BindingsStd const& bindings)
+        : Enqueue(context)
+        , mBindings(bindings)
+    {
+        ASSERT(mBindings.setTensorAddresses(mContext));
+    }
+
+    bool operator()(TrtCudaStream& stream) const
+    {
+        try
+        {
+            bool const result = mContext.enqueueV3(stream.get());
+            // Collecting layer timing info from current profile index of execution context, except under capturing
+            // mode.
+            if (!isStreamCapturing(stream) && mContext.getProfiler() && !mContext.getEnqueueEmitsProfile()
+                && !mContext.reportToProfiler())
+            {
+                gLogWarning << "Failed to collect layer timing info from previous enqueueV3()" << std::endl;
+            }
+            return result;
+        }
+        catch (const std::exception&)
+        {
+            return false;
+        }
+        return false;
+    }
+
+private:
+    // Helper function to check if a stream is in capturing mode.
+    bool isStreamCapturing(TrtCudaStream& stream) const
+    {
+        cudaStreamCaptureStatus status{cudaStreamCaptureStatusNone};
+        CHECK(cudaStreamIsCapturing(stream.get(), &status));
+        return status != cudaStreamCaptureStatusNone;
+    }
+
+    BindingsStd const& mBindings;
+};
+
+#if ENABLE_UNIFIED_BUILDER
+//!
+//! \class EnqueueExplicitSafe
+//! \brief Functor to safeEnqueue inference with explict batch
+//!
+class EnqueueExplicitSafe : private SafeEnqueue
+{
+
+public:
+    explicit EnqueueExplicitSafe(nvinfer2::safe::ITRTGraph& graph, BindingsSafe const& bindings)
+        : SafeEnqueue(graph)
+        , mBindings(bindings)
+    {
+        ASSERT(mBindings.setTensorAddresses(graph));
+    }
+
+    bool operator()(TrtCudaStream& stream) const
+    {
+        try
+        {
+            bool const result = (mGraph.executeAsync(stream.get()) == nvinfer1::ErrorCode::kSUCCESS);
+            return result;
+        }
+        catch (const std::exception&)
+        {
+            return false;
+        }
+        return false;
+    }
+
+private:
+    BindingsSafe const& mBindings;
+};
+#endif
+
+//!
+//! \class EnqueueGraph
+//! \brief Functor to enqueue inference from CUDA Graph
+//!
+class EnqueueGraph
+{
+
+public:
+    explicit EnqueueGraph(nvinfer1::IExecutionContext& context, TrtCudaGraph& graph)
+        : mGraph(graph)
+        , mContext(context)
+    {
+    }
+
+    bool operator()(TrtCudaStream& stream) const
+    {
+        if (mGraph.launch(stream))
+        {
+            // Collecting layer timing info from current profile index of execution context
+            if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() && !mContext.reportToProfiler())
+            {
+                gLogWarning << "Failed to collect layer timing info from previous CUDA graph launch" << std::endl;
+            }
+            return true;
+        }
+        return false;
+    }
+
+    TrtCudaGraph& mGraph;
+    nvinfer1::IExecutionContext& mContext;
+};
+
+#if ENABLE_UNIFIED_BUILDER
+//!
+//! \class EnqueueGraphSafe
+//! \brief Functor to enqueue inference from CUDA Graph
+//!
+class EnqueueGraphSafe
+{
+
+public:
+    explicit EnqueueGraphSafe(nvinfer2::safe::ITRTGraph& graph)
+        : mGraph(graph)
+    {
+    }
+
+    bool operator()(TrtCudaStream& stream) const
+    {
+        return mGraph.executeAsync(stream.get()) == nvinfer1::ErrorCode::kSUCCESS;
+    }
+
+    nvinfer2::safe::ITRTGraph& mGraph;
+};
+#endif
+
+using EnqueueFunction = std::function<bool(TrtCudaStream&)>;
+
+enum class StreamType : int32_t
+{
+    kINPUT = 0,
+    kCOMPUTE = 1,
+    kOUTPUT = 2,
+    kNUM = 3
+};
+
+enum class EventType : int32_t
+{
+    kINPUT_S = 0,
+    kINPUT_E = 1,
+    kCOMPUTE_S = 2,
+    kCOMPUTE_E = 3,
+    kOUTPUT_S = 4,
+    kOUTPUT_E = 5,
+    kNUM = 6
+};
+
+using MultiStream = std::array<TrtCudaStream, static_cast<int32_t>(StreamType::kNUM)>;
+
+using MultiEvent = std::array<std::unique_ptr<TrtCudaEvent>, static_cast<int32_t>(EventType::kNUM)>;
+
+using EnqueueTimes = std::array<TimePoint, 2>;
+
+//!
+//! \class IterationBase
+//! \brief Inference iteration and streams management
+//!
+class IterationBase
+{
+
+public:
+    explicit IterationBase(int32_t id, InferenceOptions const& inference, BindingsBase& bindings)
+        : mBindings(bindings)
+        , mStreamId(id)
+        , mDepth(1 + inference.overlap)
+        , mActive(mDepth)
+        , mEvents(mDepth)
+        , mEnqueueTimes(mDepth)
+    {
+        for (auto& eventsAtDepth : mEvents)
+        {
+            std::generate(eventsAtDepth.begin(), eventsAtDepth.end(),
+                [&] { return std::make_unique<TrtCudaEvent>(!inference.spin); });
+        }
+    }
+
+    bool query(bool skipTransfers)
+    {
+        if (mActive[mNext])
+        {
+            return true;
+        }
+
+        if (!skipTransfers)
+        {
+            record(EventType::kINPUT_S, StreamType::kINPUT);
+            setInputData(false);
+            record(EventType::kINPUT_E, StreamType::kINPUT);
+            wait(EventType::kINPUT_E, StreamType::kCOMPUTE); // Wait for input DMA before compute
+        }
+
+        record(EventType::kCOMPUTE_S, StreamType::kCOMPUTE);
+        recordEnqueueTime();
+        if (!mEnqueue(getStream(StreamType::kCOMPUTE)))
+        {
+            return false;
+        }
+        recordEnqueueTime();
+        record(EventType::kCOMPUTE_E, StreamType::kCOMPUTE);
+
+        if (!skipTransfers)
+        {
+            wait(EventType::kCOMPUTE_E, StreamType::kOUTPUT); // Wait for compute before output DMA
+            record(EventType::kOUTPUT_S, StreamType::kOUTPUT);
+            fetchOutputData(false);
+            record(EventType::kOUTPUT_E, StreamType::kOUTPUT);
+        }
+
+        mActive[mNext] = true;
+        moveNext();
+        return true;
+    }
+
+    float sync(
+        TimePoint const& cpuStart, TrtCudaEvent const& gpuStart, std::vector<InferenceTrace>& trace, bool skipTransfers)
+    {
+        if (mActive[mNext])
+        {
+            if (skipTransfers)
+            {
+                getEvent(EventType::kCOMPUTE_E).synchronize();
+            }
+            else
+            {
+                getEvent(EventType::kOUTPUT_E).synchronize();
+            }
+            trace.emplace_back(getTrace(cpuStart, gpuStart, skipTransfers));
+            mActive[mNext] = false;
+            return getEvent(EventType::kCOMPUTE_S) - gpuStart;
+        }
+        return 0;
+    }
+
+    void syncAll(
+        TimePoint const& cpuStart, TrtCudaEvent const& gpuStart, std::vector<InferenceTrace>& trace, bool skipTransfers)
+    {
+        for (int32_t d = 0; d < mDepth; ++d)
+        {
+            sync(cpuStart, gpuStart, trace, skipTransfers);
+            moveNext();
+        }
+    }
+
+    void wait(TrtCudaEvent& gpuStart)
+    {
+        getStream(StreamType::kINPUT).wait(gpuStart);
+    }
+
+    void setInputData(bool sync)
+    {
+        mBindings.transferInputToDevice(getStream(StreamType::kINPUT));
+        // additional sync to avoid overlapping with inference execution.
+        if (sync)
+        {
+            getStream(StreamType::kINPUT).synchronize();
+        }
+    }
+
+    void fetchOutputData(bool sync)
+    {
+        mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT));
+        // additional sync to avoid overlapping with inference execution.
+        if (sync)
+        {
+            getStream(StreamType::kOUTPUT).synchronize();
+        }
+    }
+
+protected:
+    void moveNext()
+    {
+        mNext = mDepth - 1 - mNext;
+    }
+
+    TrtCudaStream& getStream(StreamType t)
+    {
+        return mStream[static_cast<int32_t>(t)];
+    }
+
+    TrtCudaEvent& getEvent(EventType t)
+    {
+        return *mEvents[mNext][static_cast<int32_t>(t)];
+    }
+
+    void record(EventType e, StreamType s)
+    {
+        getEvent(e).record(getStream(s));
+    }
+
+    void recordEnqueueTime()
+    {
+        mEnqueueTimes[mNext][enqueueStart] = getCurrentTime();
+        enqueueStart = 1 - enqueueStart;
+    }
+
+    TimePoint getEnqueueTime(bool start)
+    {
+        return mEnqueueTimes[mNext][start ? 0 : 1];
+    }
+
+    void wait(EventType e, StreamType s)
+    {
+        getStream(s).wait(getEvent(e));
+    }
+
+    InferenceTrace getTrace(TimePoint const& cpuStart, TrtCudaEvent const& gpuStart, bool skipTransfers)
+    {
+        float is
+            = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart : getEvent(EventType::kINPUT_S) - gpuStart;
+        float ie
+            = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart : getEvent(EventType::kINPUT_E) - gpuStart;
+        float os
+            = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart : getEvent(EventType::kOUTPUT_S) - gpuStart;
+        float oe
+            = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart : getEvent(EventType::kOUTPUT_E) - gpuStart;
+
+        return InferenceTrace(mStreamId,
+            std::chrono::duration<float, std::milli>(getEnqueueTime(true) - cpuStart).count(),
+            std::chrono::duration<float, std::milli>(getEnqueueTime(false) - cpuStart).count(), is, ie,
+            getEvent(EventType::kCOMPUTE_S) - gpuStart, getEvent(EventType::kCOMPUTE_E) - gpuStart, os, oe);
+    }
+
+    BindingsBase& mBindings;
+
+    TrtCudaGraph mGraph;
+    EnqueueFunction mEnqueue;
+
+    int32_t mStreamId{0};
+    int32_t mNext{0};
+    int32_t mDepth{2}; // default to double buffer to hide DMA transfers
+
+    std::vector<bool> mActive;
+    MultiStream mStream;
+    std::vector<MultiEvent> mEvents;
+
+    int32_t enqueueStart{0};
+    std::vector<EnqueueTimes> mEnqueueTimes;
+};
+
+//!
+//! \class IterationStd
+//! \brief Inference iteration and streams management for standard inference
+//!
+class IterationStd : public IterationBase
+{
+public:
+    explicit IterationStd(
+        int32_t id, InferenceOptions const& inference, nvinfer1::IExecutionContext& context, BindingsStd& bindings)
+        : IterationBase(id, inference, bindings)
+    {
+        createEnqueueFunction(inference, context, bindings);
+    }
+
+private:
+    void createEnqueueFunction(
+        InferenceOptions const& inference, nvinfer1::IExecutionContext& context, BindingsStd& bindings)
+    {
+        mEnqueue = EnqueueFunction(EnqueueExplicit(context, bindings));
+        if (inference.graph)
+        {
+            sample::gLogInfo << "Capturing CUDA graph for the current execution context" << std::endl;
+
+            TrtCudaStream& stream = getStream(StreamType::kCOMPUTE);
+            // Avoid capturing initialization calls by executing the enqueue function at least
+            // once before starting CUDA graph capture.
+            auto const ret = mEnqueue(stream);
+            if (!ret)
+            {
+                throw std::runtime_error("Inference enqueue failed.");
+            }
+            stream.synchronize();
+
+            mGraph.beginCapture(stream);
+            // The built TRT engine may contain operations that are not permitted under CUDA graph capture mode.
+            // When the stream is capturing, the enqueue call may return false if the current CUDA graph capture fails.
+            if (mEnqueue(stream))
+            {
+                mGraph.endCapture(stream);
+                mEnqueue = EnqueueFunction(EnqueueGraph(context, mGraph));
+                sample::gLogInfo << "Successfully captured CUDA graph for the current execution context" << std::endl;
+            }
+            else
+            {
+                mGraph.endCaptureOnError(stream);
+                // Ensure any CUDA error has been cleaned up.
+                CHECK(cudaGetLastError());
+                sample::gLogWarning << "The built TensorRT engine contains operations that are not permitted under "
+                                       "CUDA graph capture mode."
+                                    << std::endl;
+                sample::gLogWarning << "The specified --useCudaGraph flag has been ignored. The inference will be "
+                                       "launched without using CUDA graph launch."
+                                    << std::endl;
+            }
+        }
+    }
+};
+
+#if ENABLE_UNIFIED_BUILDER
+//!
+//! \class IterationSafe
+//! \brief Inference iteration and streams management for safe inference
+//!
+class IterationSafe : public IterationBase
+{
+public:
+    explicit IterationSafe(
+        int32_t id, InferenceOptions const& inference, nvinfer2::safe::ITRTGraph& graph, BindingsSafe& bindings)
+        : IterationBase(id, inference, bindings)
+    {
+        createEnqueueFunction(inference, graph, bindings);
+    }
+
+private:
+    void createEnqueueFunction(
+        InferenceOptions const& inference, nvinfer2::safe::ITRTGraph& graph, BindingsSafe& bindings)
+    {
+        mEnqueue = EnqueueFunction(EnqueueExplicitSafe(graph, bindings));
+        if (inference.graph)
+        {
+            sample::gLogInfo << "Capturing CUDA graph for the current execution context" << std::endl;
+
+            TrtCudaStream& stream = getStream(StreamType::kCOMPUTE);
+            // Avoid capturing initialization calls by executing the enqueue function at least
+            // once before starting CUDA graph capture.
+            auto const ret = mEnqueue(stream);
+            if (!ret)
+            {
+                throw std::runtime_error("Inference enqueue failed.");
+            }
+            stream.synchronize();
+
+            mGraph.beginCapture(stream);
+            // The built TRT engine may contain operations that are not permitted under CUDA graph capture mode.
+            // When the stream is capturing, the enqueue call may return false if the current CUDA graph capture fails.
+            if (mEnqueue(stream))
+            {
+                mGraph.endCapture(stream);
+                mEnqueue = EnqueueFunction(EnqueueGraphSafe(graph));
+                sample::gLogInfo << "Successfully captured CUDA graph for the current execution context" << std::endl;
+            }
+            else
+            {
+                mGraph.endCaptureOnError(stream);
+                // Ensure any CUDA error has been cleaned up.
+                CHECK(cudaGetLastError());
+                sample::gLogWarning << "The built TensorRT engine contains operations that are not permitted under "
+                                       "CUDA graph capture mode."
+                                    << std::endl;
+                sample::gLogWarning << "The specified --useCudaGraph flag has been ignored. The inference will be "
+                                       "launched without using CUDA graph launch."
+                                    << std::endl;
+            }
+        }
+    }
+};
+#endif
+
+bool inferenceLoop(std::vector<std::unique_ptr<IterationBase>>& iStreams, TimePoint const& cpuStart,
+    TrtCudaEvent const& gpuStart, int iterations, float maxDurationMs, float warmupMs,
+    std::vector<InferenceTrace>& trace, bool skipTransfers, float idleMs)
+{
+    float durationMs = 0;
+    int32_t skip = 0;
+
+    if (maxDurationMs == -1.F)
+    {
+        sample::gLogWarning << "--duration=-1 is specified, inference will run in an endless loop until"
+                            << " aborted with CTRL-C (SIGINT)" << std::endl;
+        while (true)
+        {
+            for (auto& s : iStreams)
+            {
+                if (!s->query(skipTransfers))
+                {
+                    return false;
+                }
+            }
+            for (auto& s : iStreams)
+            {
+                s->sync(cpuStart, gpuStart, trace, skipTransfers);
+            }
+        }
+    }
+
+    for (int32_t i = 0; i < iterations + skip || durationMs < maxDurationMs; ++i)
+    {
+        for (auto& s : iStreams)
+        {
+            if (!s->query(skipTransfers))
+            {
+                return false;
+            }
+        }
+        for (auto& s : iStreams)
+        {
+            durationMs = std::max(durationMs, s->sync(cpuStart, gpuStart, trace, skipTransfers));
+        }
+        if (durationMs < warmupMs) // Warming up
+        {
+            if (durationMs) // Skip complete iterations
+            {
+                ++skip;
+            }
+            continue;
+        }
+        if (idleMs != 0.F)
+        {
+            std::this_thread::sleep_for(std::chrono::duration<float, std::milli>(idleMs));
+        }
+    }
+    for (auto& s : iStreams)
+    {
+        s->syncAll(cpuStart, gpuStart, trace, skipTransfers);
+    }
+    return true;
+}
+
+void inferenceExecution(InferenceOptions const& inference, InferenceEnvironmentBase& iEnv, SyncStruct& sync,
+    int32_t const threadIdx, int32_t const streamsPerThread, int32_t device, std::vector<InferenceTrace>& trace,
+    ReportingOptions const& reporting) noexcept
+{
+    try
+    {
+        float warmupMs = inference.warmup;
+        float durationMs = -1.F;
+        if (inference.duration != -1.F)
+        {
+            durationMs = inference.duration * 1000.F + warmupMs;
+        }
+
+        CHECK(cudaSetDevice(device));
+
+#if ENABLE_UNIFIED_BUILDER
+        if (iEnv.safe)
+        {
+            //! Function to make one iteration:
+            auto makeIteration = [&](int32_t s) -> std::unique_ptr<IterationSafe> {
+                int32_t const streamId{threadIdx * streamsPerThread + s};
+                auto iteration = std::make_unique<IterationSafe>(streamId, inference,
+                    *static_cast<InferenceEnvironmentSafe&>(iEnv).mClonedGraphs[streamId],
+                    *static_cast<InferenceEnvironmentSafe&>(iEnv).bindings[streamId]);
+                if (inference.skipTransfers)
+                {
+                    iteration->setInputData(true);
+                }
+                return iteration;
+            };
+
+            std::vector<std::unique_ptr<IterationBase>> iStreams;
+            for (int32_t s = 0; s < streamsPerThread; ++s)
+            {
+                iStreams.emplace_back(makeIteration(s));
+            }
+
+            for (auto& s : iStreams)
+            {
+                s->wait(sync.gpuStart);
+            }
+            std::vector<InferenceTrace> localTrace;
+            if (!inferenceLoop(iStreams, sync.cpuStart, sync.gpuStart, inference.iterations, durationMs, warmupMs,
+                    localTrace, inference.skipTransfers, inference.idle))
+            {
+                std::lock_guard<std::mutex> lock{sync.mutex};
+                iEnv.error = true;
+            }
+            if (inference.skipTransfers)
+            {
+                for (auto& s : iStreams)
+                {
+                    s->fetchOutputData(true);
+                }
+            }
+            std::lock_guard<std::mutex> lock{sync.mutex};
+            trace.insert(trace.end(), localTrace.begin(), localTrace.end());
+            return;
+        }
+#endif
+
+        //! Function to make one iteration:
+        auto makeIteration = [&](int32_t s) -> std::unique_ptr<IterationStd> {
+            int32_t const streamId{threadIdx * streamsPerThread + s};
+            auto iteration = std::make_unique<IterationStd>(streamId, inference,
+                *static_cast<InferenceEnvironmentStd&>(iEnv).getContext(streamId),
+                *static_cast<InferenceEnvironmentStd&>(iEnv).bindings[streamId]);
+            if (inference.skipTransfers)
+            {
+                iteration->setInputData(true);
+            }
+            return iteration;
+        };
+
+        std::vector<std::unique_ptr<IterationBase>> iStreams;
+        for (int32_t s = 0; s < streamsPerThread; ++s)
+        {
+            iStreams.emplace_back(makeIteration(s));
+        }
+
+        for (auto& s : iStreams)
+        {
+            s->wait(sync.gpuStart);
+        }
+
+        std::vector<InferenceTrace> localTrace;
+        if (!inferenceLoop(iStreams, sync.cpuStart, sync.gpuStart, inference.iterations, durationMs, warmupMs,
+                localTrace, inference.skipTransfers, inference.idle))
+        {
+            std::lock_guard<std::mutex> lock{sync.mutex};
+            iEnv.error = true;
+        }
+
+        auto const needOutput = reporting.output || !reporting.exportOutput.empty();
+        if (inference.skipTransfers && needOutput)
+        {
+            for (auto& s : iStreams)
+            {
+                s->fetchOutputData(true);
+            }
+        }
+
+        {
+            std::lock_guard<std::mutex> lock{sync.mutex};
+            trace.insert(trace.end(), localTrace.begin(), localTrace.end());
+        }
+    }
+    catch (...)
+    {
+        std::lock_guard<std::mutex> lock{sync.mutex};
+        iEnv.error = true;
+    }
+}
+
+inline std::thread makeThread(InferenceOptions const& inference, InferenceEnvironmentBase& iEnv, SyncStruct& sync,
+    int32_t threadIdx, int32_t streamsPerThread, int32_t device, std::vector<InferenceTrace>& trace,
+    ReportingOptions const& reporting)
+{
+    return std::thread(inferenceExecution, std::cref(inference), std::ref(iEnv), std::ref(sync), threadIdx,
+        streamsPerThread, device, std::ref(trace), std::cref(reporting));
+}
+
+} // namespace
+
+bool runInference(InferenceOptions const& inference, InferenceEnvironmentBase& iEnv, int32_t device,
+    std::vector<InferenceTrace>& trace, ReportingOptions const& reporting)
+{
+    CHECK(cudaProfilerStart());
+
+    trace.resize(0);
+
+    SyncStruct sync;
+    sync.sleep = inference.sleep;
+    sync.mainStream.sleep(&sync.sleep);
+    sync.cpuStart = getCurrentTime();
+    sync.gpuStart.record(sync.mainStream);
+
+    // When multiple streams are used, trtexec can run inference in two modes:
+    // (1) if inference.threads is true, then run each stream on each thread.
+    // (2) if inference.threads is false, then run all streams on the same thread.
+    int32_t const numThreads = inference.threads ? inference.infStreams : 1;
+    int32_t const streamsPerThread = inference.threads ? 1 : inference.infStreams;
+
+    std::vector<std::thread> threads;
+    for (int32_t threadIdx = 0; threadIdx < numThreads; ++threadIdx)
+    {
+        threads.emplace_back(makeThread(inference, iEnv, sync, threadIdx, streamsPerThread, device, trace, reporting));
+    }
+    for (auto& th : threads)
+    {
+        th.join();
+    }
+    CHECK(cudaProfilerStop());
+
+    auto cmpTrace = [](InferenceTrace const& a, InferenceTrace const& b) { return a.h2dStart < b.h2dStart; };
+    std::sort(trace.begin(), trace.end(), cmpTrace);
+
+
+    return !iEnv.error;
+}
+
+bool runMultiTasksInference(std::vector<std::unique_ptr<TaskInferenceEnvironment>>& tEnvList)
+{
+    CHECK(cudaProfilerStart());
+    cudaSetDeviceFlags(cudaDeviceScheduleSpin);
+
+    SyncStruct sync;
+    sync.sleep = 0;
+    sync.mainStream.sleep(&sync.sleep);
+    sync.cpuStart = getCurrentTime();
+    sync.gpuStart.record(sync.mainStream);
+
+    std::vector<std::thread> threads;
+    for (size_t i = 0; i < tEnvList.size(); ++i)
+    {
+        auto& tEnv = tEnvList[i];
+        threads.emplace_back(makeThread(
+            tEnv->iOptions, *(tEnv->iEnv), sync, /*threadIdx*/ 0, /*streamsPerThread*/ 1, tEnv->device, tEnv->trace,
+            tEnv->rOptions));
+    }
+    for (auto& th : threads)
+    {
+        th.join();
+    }
+
+    CHECK(cudaProfilerStop());
+
+    auto cmpTrace = [](InferenceTrace const& a, InferenceTrace const& b) { return a.h2dStart < b.h2dStart; };
+    for (auto& tEnv : tEnvList)
+    {
+        std::sort(tEnv->trace.begin(), tEnv->trace.end(), cmpTrace);
+    }
+
+    return std::none_of(tEnvList.begin(), tEnvList.end(),
+        [](std::unique_ptr<TaskInferenceEnvironment>& tEnv) { return tEnv->iEnv->error; });
+}
+
+namespace
+{
+size_t reportGpuMemory()
+{
+    static size_t prevFree{0};
+    size_t free{0};
+    size_t total{0};
+    size_t newlyAllocated{0};
+    CHECK(cudaMemGetInfo(&free, &total));
+    sample::gLogInfo << "Free GPU memory = " << free / 1024.0_MiB << " GiB";
+    if (prevFree != 0)
+    {
+        newlyAllocated = (prevFree - free);
+        sample::gLogInfo << ", newly allocated GPU memory = " << newlyAllocated / 1024.0_MiB << " GiB";
+    }
+    sample::gLogInfo << ", total GPU memory = " << total / 1024.0_MiB << " GiB" << std::endl;
+    prevFree = free;
+    return newlyAllocated;
+}
+} // namespace
+
+//! Returns true if deserialization is slower than expected or fails.
+bool timeDeserialize(InferenceEnvironmentBase& iEnv, SystemOptions const& sys)
+{
+    constexpr int32_t kNB_ITERS{20};
+    std::unique_ptr<IRuntime> rt{createRuntime()};
+    std::unique_ptr<ICudaEngine> engine;
+
+    SMP_RETVAL_IF_FALSE(!iEnv.safe, "Safe inference is not supported!", false, sample::gLogError);
+
+    auto timeDeserializeFn = [&]() -> float {
+        bool deserializeOK{false};
+        engine.reset(nullptr);
+        auto startClock = std::chrono::high_resolution_clock::now();
+
+        SMP_RETVAL_IF_FALSE(!iEnv.safe, "Safe inference is not supported!", false, sample::gLogError);
+
+        for (auto const& pluginPath : sys.dynamicPlugins)
+        {
+            rt->getPluginRegistry().loadLibrary(pluginPath.c_str());
+        }
+        auto& reader = iEnv.engine.getFileReader();
+        auto& asyncReader = iEnv.engine.getAsyncFileReader();
+        ASSERT(reader.isOpen() || asyncReader.isOpen());
+        if (asyncReader.isOpen())
+        {
+            asyncReader.reset();
+            engine.reset(rt->deserializeCudaEngine(asyncReader));
+        }
+        else
+        {
+            reader.reset();
+            engine.reset(rt->deserializeCudaEngine(reader));
+        }
+        deserializeOK = (engine != nullptr);
+        auto endClock = std::chrono::high_resolution_clock::now();
+        // return NAN if deserialization failed.
+        return deserializeOK ? std::chrono::duration<float, std::milli>(endClock - startClock).count() : NAN;
+    };
+
+    // Warmup the caches to make sure that cache thrashing isn't throwing off the results
+    {
+        sample::gLogInfo << "Begin deserialization warmup..." << std::endl;
+        for (int32_t i = 0, e = 2; i < e; ++i)
+        {
+            timeDeserializeFn();
+        }
+    }
+    sample::gLogInfo << "Begin deserialization engine timing..." << std::endl;
+    float const first = timeDeserializeFn();
+
+    // Check if first deserialization succeeded.
+    if (std::isnan(first))
+    {
+        sample::gLogError << "Engine deserialization failed." << std::endl;
+        return true;
+    }
+
+    sample::gLogInfo << "First deserialization time = " << first << " milliseconds" << std::endl;
+
+    // Record initial gpu memory state.
+    reportGpuMemory();
+
+    float totalTime{0.F};
+    for (int32_t i = 0; i < kNB_ITERS; ++i)
+    {
+        totalTime += timeDeserializeFn();
+    }
+    auto const averageTime = totalTime / kNB_ITERS;
+    // reportGpuMemory sometimes reports zero after a single deserialization of a small engine,
+    // so use the size of memory for all the iterations.
+    auto const totalEngineSizeGpu = reportGpuMemory();
+    sample::gLogInfo << "Total deserialization time = " << totalTime << " milliseconds in " << kNB_ITERS
+                     << " iterations, average time = " << averageTime << " milliseconds, first time = " << first
+                     << " milliseconds." << std::endl;
+    sample::gLogInfo << "Deserialization Bandwidth = " << 1E-6 * totalEngineSizeGpu / totalTime << " GB/s" << std::endl;
+
+    // If the first deserialization is more than tolerance slower than
+    // the average deserialization, return true, which means an error occurred.
+    // The tolerance is set to 2x since the deserialization time is quick and susceptible
+    // to caching issues causing problems in the first timing.
+    auto const tolerance = 2.0F;
+    bool const isSlowerThanExpected = first > averageTime * tolerance;
+    if (isSlowerThanExpected)
+    {
+        sample::gLogInfo << "First deserialization time divided by average time is " << (first / averageTime)
+                         << ". Exceeds tolerance of " << tolerance << "x." << std::endl;
+    }
+    return isSlowerThanExpected;
+}
+
+std::string getLayerInformation(
+    nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context, nvinfer1::LayerInformationFormat format)
+{
+    auto runtime = std::unique_ptr<IRuntime>{createRuntime()};
+    auto inspector = std::unique_ptr<IEngineInspector>(engine->createEngineInspector());
+    if (context != nullptr)
+    {
+        inspector->setExecutionContext(context);
+    }
+    std::string result = inspector->getEngineInformation(format);
+    return result;
+}
+
+void Binding::fill(std::string const& fileName)
+{
+    loadFromFile(fileName, static_cast<char*>(buffer->getHostBuffer()), buffer->getSize());
+}
+
+void Binding::fill()
+{
+    switch (dataType)
+    {
+    case nvinfer1::DataType::kBOOL:
+    {
+        fillBuffer<bool>(buffer->getHostBuffer(), volume, 0, 1);
+        break;
+    }
+    case nvinfer1::DataType::kINT32:
+    {
+        fillBuffer<int32_t>(buffer->getHostBuffer(), volume, -128, 127);
+        break;
+    }
+    case nvinfer1::DataType::kINT64:
+    {
+        fillBuffer<int64_t>(buffer->getHostBuffer(), volume, -128, 127);
+        break;
+    }
+    case nvinfer1::DataType::kINT8:
+    {
+        fillBuffer<int8_t>(buffer->getHostBuffer(), volume, -128, 127);
+        break;
+    }
+    case nvinfer1::DataType::kFLOAT:
+    {
+        fillBuffer<float>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
+        break;
+    }
+    case nvinfer1::DataType::kHALF:
+    {
+        fillBuffer<__half>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
+        break;
+    }
+    case nvinfer1::DataType::kBF16:
+    {
+        fillBuffer<BFloat16>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
+        break;
+    }
+    case nvinfer1::DataType::kUINT8:
+    {
+        fillBuffer<uint8_t>(buffer->getHostBuffer(), volume, 0, 255);
+        break;
+    }
+    case nvinfer1::DataType::kFP8:
+#if CUDA_VERSION < 11060
+        ASSERT(false && "FP8 is not supported");
+#else
+    {
+        fillBuffer<__nv_fp8_e4m3>(buffer->getHostBuffer(), volume, -1.0F, 1.0F);
+        break;
+    }
+#endif
+    case nvinfer1::DataType::kINT4:
+    {
+        // int4 is implemented as packing two elements into a single byte,
+        // so all possible bit patterns of the two int4 elements coincides with all possible bit patterns of
+        // an uint8.
+        fillBuffer<uint8_t>(buffer->getHostBuffer(), volume, 0, 255);
+        break;
+    }
+    case DataType::kFP4: ASSERT(false && "FP4 is not supported");
+    case DataType::kE8M0: ASSERT(false && "E8M0 is not supported");
+    }
+}
+
+void Binding::dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim, int32_t spv,
+    std::string const separator /*= " "*/) const
+{
+    void* outputBuffer{};
+    if (outputAllocator != nullptr)
+    {
+        outputBuffer = outputAllocator->getBuffer()->getHostBuffer();
+        // Overwrite dimensions with those reported by the output allocator.
+        dims = outputAllocator->getFinalDims();
+        os << "Final shape is " << dims << " reported by the output allocator." << std::endl;
+    }
+    else
+    {
+        outputBuffer = buffer->getHostBuffer();
+    }
+    switch (dataType)
+    {
+    case nvinfer1::DataType::kBOOL:
+    {
+        dumpBuffer<bool>(outputBuffer, separator, os, dims, strides, vectorDim, spv);
+        break;
+    }
+    case nvinfer1::DataType::kINT32:
+    {
+        dumpBuffer<int32_t>(outputBuffer, separator, os, dims, strides, vectorDim, spv);
+        break;
+    }
+    case nvinfer1::DataType::kINT8:
+    {
+        dumpBuffer<int8_t>(outputBuffer, separator, os, dims, strides, vectorDim, spv);
+        break;
+    }
+    case nvinfer1::DataType::kFLOAT:
+    {
+        dumpBuffer<float>(outputBuffer, separator, os, dims, strides, vectorDim, spv);
+        break;
+    }
+    case nvinfer1::DataType::kHALF:
+    {
+        dumpBuffer<__half>(outputBuffer, separator, os, dims, strides, vectorDim, spv);
+        break;
+    }
+    case nvinfer1::DataType::kBF16:
+    {
+        dumpBuffer<BFloat16>(outputBuffer, separator, os, dims, strides, vectorDim, spv);
+        break;
+    }
+    case nvinfer1::DataType::kUINT8:
+    {
+        dumpBuffer<uint8_t>(outputBuffer, separator, os, dims, strides, vectorDim, spv);
+        break;
+    }
+    case nvinfer1::DataType::kINT64:
+    {
+        dumpBuffer<int64_t>(outputBuffer, separator, os, dims, strides, vectorDim, spv);
+        break;
+    }
+    case nvinfer1::DataType::kFP8:
+#if CUDA_VERSION < 11060
+        ASSERT(false && "FP8 is not supported");
+#else
+    {
+        dumpBuffer<__nv_fp8_e4m3>(outputBuffer, separator, os, dims, strides, vectorDim, spv);
+        break;
+    }
+#endif
+    case nvinfer1::DataType::kINT4:
+    {
+        dumpInt4Buffer(outputBuffer, separator, os, dims, strides, vectorDim, spv);
+        break;
+    }
+    case nvinfer1::DataType::kFP4: ASSERT(false && "FP4 is not supported");
+    case nvinfer1::DataType::kE8M0: ASSERT(false && "E8M0 is not supported");
+    }
+}
+
+void BindingsBase::addBinding(TensorInfo const& tensorInfo, std::string const& fileName /*= ""*/)
+{
+    auto const b = tensorInfo.bindingIndex;
+    while (mBindings.size() <= static_cast<size_t>(b))
+    {
+        mBindings.emplace_back();
+        mDevicePointers.emplace_back();
+    }
+    mNames[tensorInfo.name] = b;
+    mBindings[b].isInput = tensorInfo.isInput;
+    mBindings[b].volume = tensorInfo.vol;
+    mBindings[b].dataType = tensorInfo.dataType;
+    //! Make a UnifiedMirroredBuffer if useManaged or Discrete othereise:
+    auto makeBuffer = [](bool useManaged) -> std::unique_ptr<IMirroredBuffer> {
+        if (useManaged)
+        {
+            return std::make_unique<UnifiedMirroredBuffer>();
+        }
+        else
+        {
+            return std::make_unique<DiscreteMirroredBuffer>();
+        }
+    };
+    if (tensorInfo.isDynamic)
+    {
+        ASSERT(!tensorInfo.isInput); // Only output shape can be possibly unknown because of DDS.
+        if (mBindings[b].outputAllocator == nullptr)
+        {
+            mBindings[b].outputAllocator = std::make_unique<OutputAllocator>(makeBuffer(mUseManaged));
+        }
+    }
+    else
+    {
+        if (mBindings[b].buffer == nullptr)
+        {
+            mBindings[b].buffer = makeBuffer(mUseManaged);
+        }
+        // Some memory allocators return nullptr when allocating zero bytes, but TensorRT requires a non-null ptr
+        // even for empty tensors, so allocate a dummy byte.
+        if (tensorInfo.vol == 0)
+        {
+            mBindings[b].buffer->allocate(1);
+        }
+        else
+        {
+            mBindings[b].buffer->allocate(samplesCommon::getNbBytes(tensorInfo.dataType, tensorInfo.vol));
+        }
+        mDevicePointers[b] = mBindings[b].buffer->getDeviceBuffer();
+    }
+    if (tensorInfo.isInput)
+    {
+        if (fileName.empty())
+        {
+            fill(b);
+        }
+        else
+        {
+            fill(b, fileName);
+        }
+    }
+}
+
+void** BindingsBase::getDeviceBuffers()
+{
+    return mDevicePointers.data();
+}
+
+void BindingsBase::transferInputToDevice(TrtCudaStream& stream)
+{
+    for (auto& b : mNames)
+    {
+        if (mBindings[b.second].isInput)
+        {
+            mBindings[b.second].buffer->hostToDevice(stream);
+        }
+    }
+}
+
+void BindingsBase::transferOutputToHost(TrtCudaStream& stream)
+{
+    for (auto& b : mNames)
+    {
+        if (!mBindings[b.second].isInput)
+        {
+            if (mBindings[b.second].outputAllocator != nullptr)
+            {
+                mBindings[b.second].outputAllocator->getBuffer()->deviceToHost(stream);
+            }
+            else
+            {
+                mBindings[b.second].buffer->deviceToHost(stream);
+            }
+        }
+    }
+}
+
+void BindingsStd::dumpBindingValues(nvinfer1::IExecutionContext const& context, int32_t binding, std::ostream& os,
+    std::string const& separator /*= " "*/, int32_t batch /*= 1*/) const
+{
+    auto const tensorName = context.getEngine().getIOTensorName(binding);
+    Dims dims = context.getTensorShape(tensorName);
+    Dims strides = context.getTensorStrides(tensorName);
+    int32_t vectorDim = context.getEngine().getTensorVectorizedDim(tensorName);
+    int32_t const spv = context.getEngine().getTensorComponentsPerElement(tensorName);
+
+    mBindings[binding].dump(os, dims, strides, vectorDim, spv, separator);
+}
+
+namespace
+{
+
+Dims getBindingDimensions(nvinfer1::IExecutionContext const& context, std::string const& name)
+{
+    return context.getTensorShape(name.c_str());
+}
+} // namespace
+
+void BindingsStd::dumpRawBindingToFiles(nvinfer1::IExecutionContext const& context, std::ostream& os) const
+{
+    os << "Dumping I/O Bindings to RAW Files:" << std::endl;
+    for (auto const& n : mNames)
+    {
+        auto name = n.first;
+        auto bIndex = n.second;
+        auto const& binding = mBindings[bIndex];
+        void* outputBuffer{};
+        if (binding.outputAllocator != nullptr)
+        {
+            outputBuffer = binding.outputAllocator->getBuffer()->getHostBuffer();
+        }
+        else
+        {
+            outputBuffer = binding.buffer->getHostBuffer();
+        }
+
+        Dims dims = getBindingDimensions(context, name);
+        std::string dimsStr;
+        std::string dotStr;
+
+        for (int32_t i = 0; i < dims.nbDims; i++)
+        {
+            dimsStr += dotStr + std::to_string(dims.d[i]);
+            dotStr = ".";
+        }
+
+        std::string const bindingTypeStr = (binding.isInput ? "input" : "output");
+
+        std::stringstream fileNameStream;
+        fileNameStream << name << "." << bindingTypeStr << "." << dimsStr << "." << binding.dataType << ".raw";
+        std::string fileName = genFilenameSafeString(fileNameStream.str());
+
+        os << "Writing file for " << bindingTypeStr << " binding " << name << " (with datatype " << binding.dataType
+           << " and dimensions " << dimsStr << ") to " << fileName << std::endl;
+
+        std::ofstream f(fileName, std::ios::out | std::ios::binary);
+        ASSERT(f && "Cannot open file for write");
+        f.write(static_cast<char*>(outputBuffer), samplesCommon::getNbBytes(binding.dataType, binding.volume));
+        f.close();
+    }
+}
+
+void BindingsStd::dumpBindingDimensions(
+    std::string const& name, nvinfer1::IExecutionContext const& context, std::ostream& os) const
+{
+    auto const dims = context.getTensorShape(name.c_str());
+    // Do not add a newline terminator, because the caller may be outputting a JSON string.
+    os << dims;
+}
+
+std::unordered_map<std::string, int> BindingsBase::getBindings(std::function<bool(Binding const&)> predicate) const
+{
+    std::unordered_map<std::string, int> bindings;
+    for (auto const& n : mNames)
+    {
+        auto const binding = n.second;
+        if (predicate(mBindings[binding]))
+        {
+            bindings.insert(n);
+        }
+    }
+    return bindings;
+}
+
+bool BindingsStd::setTensorAddresses(nvinfer1::IExecutionContext& context) const
+{
+    for (auto const& b : mNames)
+    {
+        auto const name = b.first.c_str();
+        auto const location = context.getEngine().getTensorLocation(name);
+        if (location == TensorLocation::kDEVICE)
+        {
+            if (mBindings[b.second].outputAllocator != nullptr)
+            {
+                if (!context.setOutputAllocator(name, mBindings[b.second].outputAllocator.get()))
+                {
+                    return false;
+                }
+            }
+            else
+            {
+                if (!context.setTensorAddress(name, mDevicePointers[b.second]))
+                {
+                    return false;
+                }
+            }
+        }
+    }
+    return true;
+}
+
+#if ENABLE_UNIFIED_BUILDER
+namespace
+{
+Dims getBindingDimensions(ITRTGraph& graph, std::string const& name)
+{
+    nvinfer2::safe::TensorDescriptor desc;
+    graph.getIOTensorDescriptor(desc, name.c_str());
+    return desc.shape;
+}
+} // namespace
+
+void BindingsSafe::dumpBindingDimensions(std::string const& name, ITRTGraph const& graph, std::ostream& os) const
+{
+    // Do not add a newline terminator, because the caller may be outputting a JSON string.
+    os << getBindingDimensions(const_cast<ITRTGraph&>(graph), name);
+}
+
+void BindingsSafe::dumpBindingValues(ITRTGraph const& graph, int32_t binding, std::ostream& os,
+    std::string const& separator /*= " "*/, int32_t batch /*= 1*/) const
+{
+    char const* tensorName;
+    graph.getIOTensorName(tensorName, binding);
+    nvinfer2::safe::TensorDescriptor desc;
+    graph.getIOTensorDescriptor(desc, tensorName);
+    Dims dims = desc.shape;
+    Dims strides = desc.stride;
+    // int32_t vectorDim = desc.vectorizedDim;
+    // int32_t const spv = desc.componentsPerVector;
+
+    mBindings[binding].dump(os, dims, strides, -1, -1, separator);
+}
+
+void BindingsSafe::dumpRawBindingToFiles(ITRTGraph& graph, std::ostream& os) const
+{
+    os << "Dumping I/O Bindings to RAW Files:" << std::endl;
+    for (auto const& n : mNames)
+    {
+        auto name = n.first;
+        auto bIndex = n.second;
+        auto const& binding = mBindings[bIndex];
+        void* outputBuffer{};
+        if (binding.outputAllocator != nullptr)
+        {
+            outputBuffer = binding.outputAllocator->getBuffer()->getHostBuffer();
+        }
+        else
+        {
+            outputBuffer = binding.buffer->getHostBuffer();
+        }
+
+        Dims dims = getBindingDimensions(graph, name);
+        std::string dimsStr;
+        std::string dotStr;
+
+        for (int32_t i = 0; i < dims.nbDims; i++)
+        {
+            dimsStr += dotStr + std::to_string(dims.d[i]);
+            dotStr = ".";
+        }
+
+        std::string const bindingTypeStr = (binding.isInput ? "input" : "output");
+
+        std::stringstream fileName;
+        fileName << genFilenameSafeString(name) << "." << bindingTypeStr << "." << dimsStr << "." << binding.dataType
+                 << ".raw";
+
+        os << "Writing file for " << bindingTypeStr << " binding " << name << " (with datatype " << binding.dataType
+           << " and dimensions " << dimsStr << ") to " << fileName.str() << std::endl;
+
+        std::ofstream f(fileName.str(), std::ios::out | std::ios::binary);
+        ASSERT(f && "Cannot open file for write");
+        f.write(static_cast<char*>(outputBuffer), samplesCommon::getNbBytes(binding.dataType, binding.volume));
+        f.close();
+    }
+}
+
+bool BindingsSafe::setTensorAddresses(ITRTGraph& graph) const
+{
+    for (auto const& b : mNames)
+    {
+        auto const name = b.first.c_str();
+        nvinfer2::safe::TensorDescriptor desc;
+        graph.getIOTensorDescriptor(desc, name);
+        bool onGpu = desc.memPlacement == nvinfer2::safe::MemoryPlacement::kGPU
+            || desc.memPlacement == nvinfer2::safe::MemoryPlacement::kNONE;
+        if (onGpu)
+        {
+            if (mBindings[b.second].outputAllocator != nullptr)
+            {
+                nvinfer2::safe::TypedArray tensor = safe::createTypedArray(
+                    mBindings[b.second].outputAllocator->getBuffer(), desc.dataType, desc.sizeInBytes);
+                graph.setIOTensorAddress(name, tensor);
+            }
+            else
+            {
+                nvinfer2::safe::TypedArray tensor
+                    = safe::createTypedArray(mDevicePointers[b.second], desc.dataType, desc.sizeInBytes);
+                graph.setIOTensorAddress(name, tensor);
+            }
+        }
+    }
+    return true;
+}
+#endif
+} // namespace sample
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleInference.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleInference.h
new file mode 100644
index 0000000000000000000000000000000000000000..a715695db6f0ebbdd1250b49804440777d3092a6
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleInference.h
@@ -0,0 +1,488 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TRT_SAMPLE_INFERENCE_H
+#define TRT_SAMPLE_INFERENCE_H
+
+#include "debugTensorWriter.h"
+#include "sampleDevice.h"
+#include "sampleEngines.h"
+#include "sampleReporting.h"
+#include "sampleUtils.h"
+
+#include <functional>
+#include <iostream>
+#include <list>
+#include <memory>
+#include <string>
+#include <vector>
+
+#if ENABLE_UNIFIED_BUILDER
+#include "safeCudaAllocator.h"
+#endif
+namespace sample
+{
+using LibraryPtr = std::unique_ptr<samplesCommon::DynamicLibrary>;
+
+std::string const TRT_NVINFER_NAME = "nvinfer";
+std::string const TRT_ONNXPARSER_NAME = "nvonnxparser";
+std::string const TRT_LIB_SUFFIX = "";
+
+#if !TRT_STATIC
+#if defined(_WIN32)
+std::string const kNVINFER_PLUGIN_LIBNAME
+    = std::string{"nvinfer_plugin_"} + std::to_string(NV_TENSORRT_MAJOR) + std::string{".dll"};
+std::string const kNVINFER_LIBNAME = std::string(TRT_NVINFER_NAME) + std::string{"_"}
+    + std::to_string(NV_TENSORRT_MAJOR) + TRT_LIB_SUFFIX + std::string{".dll"};
+std::string const kNVINFER_SAFE_LIBNAME
+    = std::string{"nvinfer_safe_"} + std::to_string(NV_TENSORRT_MAJOR) + std::string{".dll"};
+std::string const kNVONNXPARSER_LIBNAME = std::string(TRT_ONNXPARSER_NAME) + std::string{"_"}
+    + std::to_string(NV_TENSORRT_MAJOR) + TRT_LIB_SUFFIX + std::string{".dll"};
+std::string const kNVINFER_LEAN_LIBNAME
+    = std::string{"nvinfer_lean_"} + std::to_string(NV_TENSORRT_MAJOR) + std::string{".dll"};
+std::string const kNVINFER_DISPATCH_LIBNAME
+    = std::string{"nvinfer_dispatch_"} + std::to_string(NV_TENSORRT_MAJOR) + std::string{".dll"};
+#else
+std::string const kNVINFER_PLUGIN_LIBNAME = std::string{"libnvinfer_plugin.so."} + std::to_string(NV_TENSORRT_MAJOR);
+std::string const kNVINFER_LIBNAME
+    = std::string{"lib"} + std::string(TRT_NVINFER_NAME) + std::string{".so."} + std::to_string(NV_TENSORRT_MAJOR);
+std::string const kNVINFER_SAFE_LIBNAME = std::string{"libnvinfer_safe.so."} + std::to_string(NV_TENSORRT_MAJOR);
+std::string const kNVONNXPARSER_LIBNAME
+    = std::string{"lib"} + std::string(TRT_ONNXPARSER_NAME) + std::string{".so."} + std::to_string(NV_TENSORRT_MAJOR);
+std::string const kNVINFER_LEAN_LIBNAME = std::string{"libnvinfer_lean.so."} + std::to_string(NV_TENSORRT_MAJOR);
+std::string const kNVINFER_DISPATCH_LIBNAME
+    = std::string{"libnvinfer_dispatch.so."} + std::to_string(NV_TENSORRT_MAJOR);
+#endif
+
+std::string const& getRuntimeLibraryName(RuntimeMode const mode);
+
+template <typename FetchPtrs>
+bool initLibrary(LibraryPtr& libPtr, std::string const& libName, FetchPtrs fetchFunc)
+{
+    if (libPtr != nullptr)
+    {
+        return true;
+    }
+    try
+    {
+        libPtr.reset(new samplesCommon::DynamicLibrary{libName});
+        fetchFunc(libPtr.get());
+    }
+    catch (std::exception const& e)
+    {
+        libPtr.reset();
+        sample::gLogError << "Could not load library " << libName << ": " << e.what() << std::endl;
+        return false;
+    }
+    catch (...)
+    {
+        libPtr.reset();
+        sample::gLogError << "Could not load library " << libName << std::endl;
+        return false;
+    }
+
+    return true;
+}
+#endif // !TRT_STATIC
+
+#if ENABLE_UNIFIED_BUILDER
+namespace safe
+{
+
+//!
+//! \brief Initialize the NVIDIA Inference Safe Runtime library
+//!
+//! This function dynamically loads the Safe TensorRT runtime library and initializes
+//! function pointers for safe TensorRT operations. It is used to set up the safe runtime
+//! environment for inference with safety-certified TensorRT engines.
+//!
+//! \return true if the safe runtime library was successfully loaded and initialized,
+//!         false otherwise (e.g., in static builds or if library loading fails)
+//!
+bool initNvinferSafe();
+
+//!
+//! \brief Create a safe TRT graph from serialized engine data
+//!
+//! This function creates a safe TRT graph from serialized engine data. It is used to create
+//! a safe TRT graph for inference with safety-certified TensorRT engines.
+//!
+//! \param graph: Pointer to the safe TRT graph to be created
+//! \param blob: Pointer to the serialized engine data
+//! \param size: Size of the serialized engine data
+//! \param recorder: Reference to the safe recorder
+//! \param useManaged: Flag indicating whether to use managed memory
+//! \param allocator: Pointer to the safe memory allocator
+//! \return Error code indicating the success or failure of the operation
+//!
+nvinfer1::ErrorCode createSafeTRTGraph(nvinfer2::safe::ITRTGraph*& graph, void const* blob, int64_t size,
+    ISafeRecorder& recorder, bool useManaged, ISafeMemAllocator* allocator);
+
+//!
+//! \brief Destroy a safe TRT graph and release resources
+//!
+//! This function destroys a safe TRT graph and releases the associated resources. It is used to clean up
+//! the safe TRT graph after inference with safety-certified TensorRT engines.
+//!
+//! \param graph: Pointer to the safe TRT graph to be destroyed
+//! \return Error code indicating the success or failure of the operation
+//!
+nvinfer1::ErrorCode destroySafeTRTGraph(nvinfer2::safe::ITRTGraph*& graph);
+
+//!
+//! \brief Get the safe plugin registry for loading plugins
+//!
+//! This function retrieves the safe plugin registry for loading plugins. It is used to get the safe plugin registry
+//! for loading plugins with safety-certified TensorRT engines.
+//!
+//! \param recorder: Reference to the safe recorder
+//! \return Pointer to the safe plugin registry
+//!
+nvinfer2::safe::ISafePluginRegistry* getSafePluginRegistry(ISafeRecorder& recorder);
+} // namespace safe
+#endif
+
+struct InferenceEnvironmentBase
+{
+    InferenceEnvironmentBase() = delete;
+    virtual ~InferenceEnvironmentBase() = default;
+    InferenceEnvironmentBase(InferenceEnvironmentBase const& other) = delete;
+    InferenceEnvironmentBase(InferenceEnvironmentBase&& other) = delete;
+    InferenceEnvironmentBase(BuildEnvironment& bEnv)
+        : engine(std::move(bEnv.engine))
+        , safe(bEnv.engine.isSafe())
+        , cmdline(bEnv.cmdline)
+    {
+    }
+
+    LazilyDeserializedEngine engine;
+    std::unique_ptr<Profiler> profiler;
+    std::vector<TrtDeviceBuffer>
+        deviceMemory; //< Device memory used for inference when the allocation strategy is not static.
+    std::unique_ptr<DebugTensorWriter> listener;
+    bool error{false};
+
+    bool safe{false};
+    std::string cmdline;
+};
+
+struct InferenceEnvironmentStd : public InferenceEnvironmentBase
+{
+    InferenceEnvironmentStd() = delete;
+    InferenceEnvironmentStd(InferenceEnvironmentStd const& other) = delete;
+    InferenceEnvironmentStd(InferenceEnvironmentStd&& other) = delete;
+    InferenceEnvironmentStd(BuildEnvironment& bEnv)
+        : InferenceEnvironmentBase(bEnv)
+    {
+    }
+    std::vector<std::unique_ptr<nvinfer1::IExecutionContext>> contexts;
+    std::vector<std::unique_ptr<BindingsStd>> bindings;
+
+    inline nvinfer1::IExecutionContext* getContext(int32_t streamIdx);
+
+    //! Storage for input shape tensors.
+    //!
+    //! It's important that the addresses of the data do not change between the calls to
+    //! setTensorAddress/setInputShape (which tells TensorRT where the input shape tensor is)
+    //! and enqueueV3 (when TensorRT might use the input shape tensor).
+    //!
+    //! The input shape tensors could alternatively be handled via member bindings,
+    //! but it simplifies control-flow to store the data here since it's shared across
+    //! the bindings.
+    std::list<std::vector<int64_t>> inputShapeTensorValues;
+};
+
+#if ENABLE_UNIFIED_BUILDER
+// Forward declaration of BindingsSafe
+class BindingsSafe;
+
+struct InferenceEnvironmentSafe : public InferenceEnvironmentBase
+{
+    InferenceEnvironmentSafe() = delete;
+    InferenceEnvironmentSafe(InferenceEnvironmentSafe const& other) = delete;
+    InferenceEnvironmentSafe(InferenceEnvironmentSafe&& other) = delete;
+    InferenceEnvironmentSafe(BuildEnvironment& bEnv)
+        : InferenceEnvironmentBase(bEnv)
+    {
+    }
+
+    std::vector<std::unique_ptr<BindingsSafe>> bindings;
+    inline void* getClonedGraph(int32_t streamIdx);
+
+    std::vector<std::unique_ptr<nvinfer2::safe::ITRTGraph>> mClonedGraphs;
+};
+#endif
+
+inline nvinfer1::IExecutionContext* InferenceEnvironmentStd::getContext(int32_t streamIdx)
+{
+    return contexts[streamIdx].get();
+}
+
+//!
+//! \brief Set up contexts/graphs and bindings for inference
+//!
+bool setUpInference(InferenceEnvironmentBase& iEnv, InferenceOptions const& inference, SystemOptions const& system);
+
+#if ENABLE_UNIFIED_BUILDER
+//!
+//! \brief Set up graphs and bindings for safe inference
+//!
+bool setUpSafeInference(InferenceEnvironmentSafe& iEnv, InferenceOptions const& inference, SystemOptions const& system);
+#endif
+
+//!
+//! \brief Set up contexts and bindings for standard inference
+//!
+bool setUpStdInference(InferenceEnvironmentStd& iEnv, InferenceOptions const& inference, SystemOptions const& system);
+
+//!
+//! \brief Deserialize the engine and time how long it takes.
+//!
+bool timeDeserialize(InferenceEnvironmentBase& iEnv, SystemOptions const& sys);
+
+//!
+//! \brief Run inference and collect timing, return false if any error hit during inference
+//!
+bool runInference(InferenceOptions const& inference, InferenceEnvironmentBase& iEnv, int32_t device,
+    std::vector<InferenceTrace>& trace, ReportingOptions const& reporting);
+
+//!
+//! \brief Get layer information of the engine.
+//!
+std::string getLayerInformation(
+    nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context, nvinfer1::LayerInformationFormat format);
+
+struct Binding
+{
+    bool isInput{false};
+    std::unique_ptr<IMirroredBuffer> buffer;
+    std::unique_ptr<OutputAllocator> outputAllocator;
+    int64_t volume{0};
+    nvinfer1::DataType dataType{nvinfer1::DataType::kFLOAT};
+
+    void fill(std::string const& fileName);
+
+    void fill();
+
+    void dump(std::ostream& os, nvinfer1::Dims dims, nvinfer1::Dims strides, int32_t vectorDim, int32_t spv,
+        std::string const separator = " ") const;
+};
+
+struct TensorInfo
+{
+    int32_t bindingIndex{-1};
+    char const* name{nullptr};
+    nvinfer1::Dims dims{};
+    bool isDynamic{};
+    int32_t comps{-1};
+    nvinfer1::Dims strides{};
+    int32_t vectorDimIndex{-1};
+    bool isInput{};
+    nvinfer1::DataType dataType{};
+    int64_t vol{-1};
+
+    void updateVolume(int32_t batch)
+    {
+        vol = volume(dims, strides, vectorDimIndex, comps, batch);
+    }
+};
+
+class BindingsBase
+{
+public:
+    BindingsBase() = delete;
+    explicit BindingsBase(bool useManaged)
+        : mUseManaged(useManaged)
+    {
+    }
+
+    void addBinding(TensorInfo const& tensorInfo, std::string const& fileName = "");
+
+    void** getDeviceBuffers();
+
+    void transferInputToDevice(TrtCudaStream& stream);
+
+    void transferOutputToHost(TrtCudaStream& stream);
+
+    void fill(int binding, std::string const& fileName)
+    {
+        mBindings[binding].fill(fileName);
+    }
+
+    void fill(int binding)
+    {
+        mBindings[binding].fill();
+    }
+
+    std::unordered_map<std::string, int> getInputBindings() const
+    {
+        auto isInput = [](Binding const& b) { return b.isInput; };
+        return getBindings(isInput);
+    }
+
+    std::unordered_map<std::string, int> getOutputBindings() const
+    {
+        auto isOutput = [](Binding const& b) { return !b.isInput; };
+        return getBindings(isOutput);
+    }
+
+    std::unordered_map<std::string, int> getBindings() const
+    {
+        auto all = [](Binding const& b) { return true; };
+        return getBindings(all);
+    }
+
+    std::unordered_map<std::string, int> getBindings(std::function<bool(Binding const&)> predicate) const;
+
+protected:
+    std::unordered_map<std::string, int32_t> mNames;
+    std::vector<Binding> mBindings;
+    std::vector<void*> mDevicePointers;
+    bool mUseManaged{false};
+};
+
+class BindingsStd : public BindingsBase
+{
+public:
+    BindingsStd() = delete;
+    explicit BindingsStd(bool useManaged)
+        : BindingsBase(useManaged)
+    {
+    }
+
+    void dumpInputs(nvinfer1::IExecutionContext const& context, std::ostream& os) const
+    {
+        auto isInput = [](Binding const& b) { return b.isInput; };
+        dumpBindings(context, isInput, os);
+    }
+
+    void dumpOutputs(nvinfer1::IExecutionContext const& context, std::ostream& os) const
+    {
+        auto isOutput = [](Binding const& b) { return !b.isInput; };
+        dumpBindings(context, isOutput, os);
+    }
+
+    void dumpBindings(nvinfer1::IExecutionContext const& context, std::ostream& os) const
+    {
+        auto all = [](Binding const& b) { return true; };
+        dumpBindings(context, all, os);
+    }
+
+    void dumpBindings(nvinfer1::IExecutionContext const& context, std::function<bool(Binding const&)> predicate,
+        std::ostream& os) const
+    {
+        for (auto const& n : mNames)
+        {
+            auto const name = n.first;
+            auto const binding = n.second;
+            if (predicate(mBindings[binding]))
+            {
+                os << n.first << ": (";
+                dumpBindingDimensions(name, context, os);
+                os << ")" << std::endl;
+
+                dumpBindingValues(context, binding, os);
+                os << std::endl;
+            }
+        }
+    }
+
+    void dumpBindingDimensions(
+        std::string const& name, nvinfer1::IExecutionContext const& context, std::ostream& os) const;
+
+    void dumpBindingValues(nvinfer1::IExecutionContext const& context, int32_t binding, std::ostream& os,
+        std::string const& separator = " ", int32_t batch = 1) const;
+
+    void dumpRawBindingToFiles(nvinfer1::IExecutionContext const& context, std::ostream& os) const;
+
+    bool setTensorAddresses(nvinfer1::IExecutionContext& context) const;
+};
+#if ENABLE_UNIFIED_BUILDER
+class BindingsSafe : public BindingsBase
+{
+public:
+    BindingsSafe() = delete;
+    explicit BindingsSafe(bool useManaged)
+        : BindingsBase(useManaged)
+    {
+    }
+
+    void dumpInputs(ITRTGraph const& graph, std::ostream& os) const
+    {
+        auto isInput = [](Binding const& b) { return b.isInput; };
+        dumpBindings(graph, isInput, os);
+    }
+
+    void dumpOutputs(ITRTGraph const& graph, std::ostream& os) const
+    {
+        auto isOutput = [](Binding const& b) { return !b.isInput; };
+        dumpBindings(graph, isOutput, os);
+    }
+
+    void dumpBindings(ITRTGraph const& graph, std::ostream& os) const
+    {
+        auto all = [](Binding const& b) { return true; };
+        dumpBindings(graph, all, os);
+    }
+
+    void dumpBindings(ITRTGraph const& graph, std::function<bool(Binding const&)> predicate, std::ostream& os) const
+    {
+        for (auto const& n : mNames)
+        {
+            auto const name = n.first;
+            auto const binding = n.second;
+            if (predicate(mBindings[binding]))
+            {
+                os << n.first << ": (";
+                dumpBindingDimensions(name, graph, os);
+                os << ")" << std::endl;
+
+                dumpBindingValues(graph, binding, os);
+                os << std::endl;
+            }
+        }
+    }
+
+    void dumpBindingDimensions(std::string const& name, ITRTGraph const& graph, std::ostream& os) const;
+
+    void dumpBindingValues(ITRTGraph const& graph, int32_t binding, std::ostream& os,
+        std::string const& separator = " ", int32_t batch = 1) const;
+
+    void dumpRawBindingToFiles(ITRTGraph& graph, std::ostream& os) const;
+
+    bool setTensorAddresses(ITRTGraph& graph) const;
+};
+#endif
+
+struct TaskInferenceEnvironment
+{
+    TaskInferenceEnvironment(std::string engineFile, InferenceOptions const& inference,
+        ReportingOptions const& reporting, int32_t deviceId = 0,
+        int32_t DLACore = -1, int32_t bs = batchNotProvided);
+    InferenceOptions iOptions{};
+    ReportingOptions rOptions{};
+    int32_t device{defaultDevice};
+    int32_t batch{batchNotProvided};
+    std::unique_ptr<InferenceEnvironmentStd> iEnv;
+    std::vector<InferenceTrace> trace;
+};
+
+bool runMultiTasksInference(std::vector<std::unique_ptr<TaskInferenceEnvironment>>& tEnvList);
+
+} // namespace sample
+
+#endif // TRT_SAMPLE_INFERENCE_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleOptions.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleOptions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f84d90ec0cc715481603734a36dee7bd3adbdc21
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleOptions.cpp
@@ -0,0 +1,2931 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <cctype>
+#include <cstring>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include "NvInfer.h"
+#include "logger.h"
+#include "sampleOptions.h"
+#include "sampleUtils.h"
+
+using namespace nvinfer1;
+namespace sample
+{
+
+namespace
+{
+
+static const std::map<char, std::pair<int64_t, std::string>> kUNIT_MULTIPLIERS{
+    {'B', {1, "Bytes"}},
+    {'K', {1 << 10, "Kibibytes"}},
+    {'M', {1 << 20, "Mebibytes"}},
+    {'G', {1 << 30, "Gibibytes"}},
+};
+
+std::string addDefaultUnitSuffixIfNotSpecified(std::string const& option, char defaultUnit)
+{
+    char lastChar = option.at(option.size() - 1);
+    return std::isdigit(lastChar) ? option + defaultUnit : option;
+}
+
+// Returns "B (Bytes), K (Kilobytes), ..."
+std::string getAvailableUnitSuffixes()
+{
+    std::ostringstream ss;
+    for (auto it = kUNIT_MULTIPLIERS.begin(); it != kUNIT_MULTIPLIERS.end(); ++it)
+    {
+        if (it != kUNIT_MULTIPLIERS.begin())
+        {
+            ss << ", ";
+        }
+        ss << it->first << " (" << it->second.second << ")";
+    }
+    return ss.str();
+}
+
+// Numeric trtexec arguments can have unit specifiers in similar to polygraphy.
+// E.g. --weightStreamingBudget=20M would be 20 Mebibytes (base 2).
+int64_t getUnitMultiplier(std::string const& option)
+{
+    char lastChar = option.at(option.size() - 1);
+    if (!std::isdigit(lastChar))
+    {
+        char unit = std::toupper(lastChar);
+        auto found = kUNIT_MULTIPLIERS.find(unit);
+        if (found == kUNIT_MULTIPLIERS.end())
+        {
+            std::ostringstream ss;
+            ss << "Error parsing \"" << option << "\": invalid unit specifier '" << unit
+               << "'. Valid base-2 unit suffixes include: ";
+            ss << getAvailableUnitSuffixes() << ".";
+            throw std::invalid_argument(ss.str());
+        }
+        return found->second.first;
+    }
+
+    // Return bytes by default
+    return kUNIT_MULTIPLIERS.at('B').first;
+}
+
+template <typename T>
+T stringToValue(const std::string& option)
+{
+    return T{option};
+}
+
+template <>
+int32_t stringToValue<int32_t>(const std::string& option)
+{
+    return std::stoi(option);
+}
+
+template <>
+int64_t stringToValue<int64_t>(const std::string& option)
+{
+    return std::stoi(option);
+}
+
+template <>
+size_t stringToValue<size_t>(const std::string& option)
+{
+    return std::stoi(option) * getUnitMultiplier(option);
+}
+
+template <>
+float stringToValue<float>(const std::string& option)
+{
+    return std::stof(option);
+}
+
+template <>
+double stringToValue<double>(const std::string& option)
+{
+    return std::stod(option) * getUnitMultiplier(option);
+}
+
+template <>
+bool stringToValue<bool>(const std::string& option)
+{
+    return true;
+}
+
+template <>
+std::vector<int64_t> stringToValue<std::vector<int64_t>>(const std::string& option)
+{
+    std::vector<int64_t> shape;
+    if (option == "scalar")
+    {
+        return shape;
+    }
+    std::vector<std::string> dimsStrings = splitToStringVec(option, 'x');
+    for (const auto& d : dimsStrings)
+    {
+        shape.push_back(stringToValue<int64_t>(d));
+    }
+    return shape;
+}
+
+template <>
+nvinfer1::DataType stringToValue<nvinfer1::DataType>(const std::string& option)
+{
+    const std::unordered_map<std::string, nvinfer1::DataType> strToDT{{"fp32", nvinfer1::DataType::kFLOAT},
+        {"fp16", nvinfer1::DataType::kHALF}, {"bf16", nvinfer1::DataType::kBF16}, {"int8", nvinfer1::DataType::kINT8},
+        {"fp8", nvinfer1::DataType::kFP8}, {"int32", nvinfer1::DataType::kINT32}, {"int64", nvinfer1::DataType::kINT64},
+        {"bool", nvinfer1::DataType::kBOOL}, {"uint8", nvinfer1::DataType::kUINT8},
+        {"int4", nvinfer1::DataType::kINT4}};
+    const auto& dt = strToDT.find(option);
+    if (dt == strToDT.end())
+    {
+        throw std::invalid_argument("Invalid DataType " + option);
+    }
+    return dt->second;
+}
+
+template <>
+nvinfer1::DeviceType stringToValue<nvinfer1::DeviceType>(std::string const& option)
+{
+    std::unordered_map<std::string, nvinfer1::DeviceType> const strToDevice = {
+        {"GPU", nvinfer1::DeviceType::kGPU},
+        {"DLA", nvinfer1::DeviceType::kDLA},
+    };
+    auto const& device = strToDevice.find(option);
+    if (device == strToDevice.end())
+    {
+        throw std::invalid_argument("Invalid Device Type " + option);
+    }
+    return device->second;
+}
+
+template <>
+nvinfer1::TensorFormats stringToValue<nvinfer1::TensorFormats>(const std::string& option)
+{
+    std::vector<std::string> optionStrings = splitToStringVec(option, '+');
+    const std::unordered_map<std::string, nvinfer1::TensorFormat> strToFmt{{"chw", nvinfer1::TensorFormat::kLINEAR},
+        {"chw2", nvinfer1::TensorFormat::kCHW2}, {"chw4", nvinfer1::TensorFormat::kCHW4},
+        {"hwc8", nvinfer1::TensorFormat::kHWC8}, {"chw16", nvinfer1::TensorFormat::kCHW16},
+        {"chw32", nvinfer1::TensorFormat::kCHW32}, {"dhwc8", nvinfer1::TensorFormat::kDHWC8},
+        {"cdhw32", nvinfer1::TensorFormat::kCDHW32}, {"hwc", nvinfer1::TensorFormat::kHWC},
+        {"dhwc", nvinfer1::TensorFormat::kDHWC}, {"dla_linear", nvinfer1::TensorFormat::kDLA_LINEAR},
+        {"hwc16", nvinfer1::TensorFormat::kHWC16}, {"dla_hwc4", nvinfer1::TensorFormat::kDLA_HWC4}};
+    nvinfer1::TensorFormats formats{};
+    for (auto f : optionStrings)
+    {
+        const auto& tf = strToFmt.find(f);
+        if (tf == strToFmt.end())
+        {
+            throw std::invalid_argument(std::string("Invalid TensorFormat ") + f);
+        }
+        formats |= 1U << static_cast<int32_t>(tf->second);
+    }
+
+    return formats;
+}
+
+template <>
+IOFormat stringToValue<IOFormat>(const std::string& option)
+{
+    IOFormat ioFormat{};
+    const size_t colon = option.find(':');
+
+    if (colon == std::string::npos)
+    {
+        throw std::invalid_argument(std::string("Invalid IOFormat ") + option);
+    }
+
+    ioFormat.first = stringToValue<nvinfer1::DataType>(option.substr(0, colon));
+    ioFormat.second = stringToValue<nvinfer1::TensorFormats>(option.substr(colon + 1));
+
+    return ioFormat;
+}
+
+template <>
+SparsityFlag stringToValue<SparsityFlag>(std::string const& option)
+{
+    std::unordered_map<std::string, SparsityFlag> const table{
+        {"disable", SparsityFlag::kDISABLE}, {"enable", SparsityFlag::kENABLE},
+        {
+            "force", SparsityFlag::kFORCE
+        }
+    };
+    auto search = table.find(option);
+    if (search == table.end())
+    {
+        throw std::invalid_argument(std::string("Unknown sparsity mode: ") + option);
+    }
+    if (search->second == SparsityFlag::kFORCE)
+    {
+        sample::gLogWarning << "--sparsity=force has been deprecated. "
+                            << "Please use <polygraphy surgeon prune> to rewrite the weights to a sparsity pattern "
+                            << "and then run with --sparsity=enable" << std::endl;
+    }
+    return search->second;
+}
+
+template <>
+WeightStreamingBudget stringToValue<WeightStreamingBudget>(std::string const& option)
+{
+    WeightStreamingBudget budget;
+    if (option.find('%') != std::string::npos)
+    {
+        double percent = std::stod(option);
+        if (!(percent >= 0 && percent <= 100.0))
+        {
+            std::ostringstream err;
+            err << "The weight streaming percent must be between 0 and 100.";
+            throw std::invalid_argument(err.str());
+        }
+        budget.percent = percent;
+    }
+    else
+    {
+        double bytes = stringToValue<double>(option);
+        if (!(bytes == WeightStreamingBudget::kAUTOMATIC || bytes == WeightStreamingBudget::kDISABLE || bytes >= 0))
+        {
+            std::ostringstream err;
+            err << "The weight streaming budget must be " << WeightStreamingBudget::kDISABLE << ", "
+                << WeightStreamingBudget::kAUTOMATIC << ", or at least 0.";
+            throw std::invalid_argument(err.str());
+        }
+        budget.bytes = static_cast<int64_t>(bytes);
+    }
+    return budget;
+}
+
+#if ENABLE_UNIFIED_BUILDER
+template <>
+samplesSafeCommon::SafetyPluginLibraryArgument stringToValue<samplesSafeCommon::SafetyPluginLibraryArgument>(
+    std::string const& option)
+{
+    samplesSafeCommon::SafetyPluginLibraryArgument argument;
+    auto status = parseSafetyPluginArgument(option, argument);
+    if (!status)
+    {
+        throw std::invalid_argument(std::string("Invalid Safety plugin library option: " + option));
+    }
+    return argument;
+}
+#endif
+
+
+template <typename T>
+std::pair<std::string, T> splitNameAndValue(const std::string& s)
+{
+    std::string tensorName;
+    std::string valueString;
+
+    // Support 'inputName':Path format for --loadInputs flag when dealing with Windows paths.
+    // i.e. 'inputName':c:\inputData
+    std::vector<std::string> quoteNameRange{splitToStringVec(s, '\'')};
+    // splitToStringVec returns the entire string when delimiter is not found, so it's size is always at least 1
+    if (quoteNameRange.size() != 1)
+    {
+        if (quoteNameRange.size() != 3)
+        {
+            std::string errorMsg = std::string("Found invalid number of \'s when parsing ") + s +
+                std::string(". Expected: 2, received: ") + std::to_string(quoteNameRange.size() -1) +
+                ". Please ensure that a singular comma is used within each comma-separated key-value pair for options like --inputIOFormats, --optShapes, --optShapesCalib, --layerPrecisions, etc.";
+            throw std::invalid_argument(errorMsg);
+        }
+        // Everything before the second "'" is the name.
+        tensorName = quoteNameRange[0] + quoteNameRange[1];
+        // Path is the last string - ignoring leading ":" so slice it with [1:]
+        valueString = quoteNameRange[2].substr(1);
+        return std::pair<std::string, T>(tensorName, stringToValue<T>(valueString));
+    }
+
+    // Split on the last :
+    std::vector<std::string> nameRange{splitToStringVec(s, ':')};
+    // Everything before the last : is the name
+    tensorName = nameRange[0];
+    for (size_t i = 1; i < nameRange.size() - 1; i++)
+    {
+        tensorName += ":" + nameRange[i];
+    }
+    // Value is the string element after the last :
+    valueString = nameRange[nameRange.size() - 1];
+    return std::pair<std::string, T>(tensorName, stringToValue<T>(valueString));
+}
+
+template <typename T>
+void splitInsertKeyValue(const std::vector<std::string>& kvList, T& map)
+{
+    for (const auto& kv : kvList)
+    {
+        map.insert(splitNameAndValue<typename T::mapped_type>(kv));
+    }
+}
+
+const char* boolToEnabled(bool enable)
+{
+    return enable ? "Enabled" : "Disabled";
+}
+
+//! A helper function similar to sep.join(list) in Python.
+template <typename T>
+std::string joinValuesToString(std::vector<T> const& list, std::string const& sep)
+{
+    std::ostringstream os;
+    for (int32_t i = 0, n = list.size(); i < n; ++i)
+    {
+        os << list[i];
+        if (i != n - 1)
+        {
+            os << sep;
+        }
+    }
+    return os.str();
+}
+
+template <typename T, size_t N>
+std::string joinValuesToString(std::array<T, N> const& list, std::string const& sep)
+{
+    return joinValuesToString(std::vector<T>(list.begin(), list.end()), sep);
+}
+
+//! Check if input option exists in input arguments.
+//! If it does: set its value, and return true
+//! If it does not: return false.
+template <typename T>
+bool getOption(Arguments& arguments, const std::string& option, T& value)
+{
+    auto const match = arguments.find(option);
+    if (match != arguments.end())
+    {
+        value = stringToValue<T>(match->second.first);
+        return true;
+    }
+
+    return false;
+}
+
+//! Check if input option exists in input arguments.
+//! If it does: set its value, erase the argument and return true.
+//! If it does not: return false.
+template <typename T>
+bool getAndDelOption(Arguments& arguments, const std::string& option, T& value)
+{
+    bool found = getOption(arguments, option, value);
+    if (found)
+    {
+        const auto match = arguments.find(option);
+        arguments.erase(match);
+    }
+
+    return found;
+}
+
+//! Check if input option exists in input arguments.
+//! If it does: set its value and position, erase the argument and return true.
+//! If it does not: return false.
+template <typename T>
+bool getAndDelOptionWithPosition(Arguments& arguments, std::string const& option, T& value, int32_t& pos)
+{
+    auto const match = arguments.find(option);
+    if (match != arguments.end())
+    {
+        value = stringToValue<T>(match->second.first);
+        pos = match->second.second;
+        arguments.erase(match);
+        return true;
+    }
+
+    return false;
+}
+
+//! Check if input option exists in input arguments behind the position spcecified by pos.
+//! If it does: set its value, erase the argument and return true.
+//! If it does not: return false.
+template <typename T>
+bool getAndDelOptionBehind(Arguments& arguments, std::string const& option, int32_t pos, T& value)
+{
+    auto const match = arguments.equal_range(option);
+    if (match.first == match.second)
+    {
+        return false;
+    }
+    for (auto i = match.first; i != match.second; ++i)
+    {
+        if (i->second.second - pos == 1)
+        {
+            value = stringToValue<T>(i->second.first);
+            arguments.erase(i);
+            return true;
+        }
+    }
+    return false;
+}
+
+//! Check if input option exists in input arguments.
+//! If it does: set false in value, erase the argument and return true.
+//! If it does not: return false.
+bool getAndDelNegOption(Arguments& arguments, const std::string& option, bool& value)
+{
+    bool dummy;
+    if (getAndDelOption(arguments, option, dummy))
+    {
+        value = false;
+        return true;
+    }
+    return false;
+}
+
+//! Check if input option exists in input arguments.
+//! If it does: add all the matched arg values to values vector, erase the argument and return true.
+//! If it does not: return false.
+template <typename T>
+bool getAndDelRepeatedOption(Arguments& arguments, const std::string& option, std::vector<T>& values)
+{
+    const auto match = arguments.equal_range(option);
+    if (match.first == match.second)
+    {
+        return false;
+    }
+
+    auto addToValues
+        = [&values](Arguments::value_type& argValue) { values.emplace_back(stringToValue<T>(argValue.second.first)); };
+    std::for_each(match.first, match.second, addToValues);
+    arguments.erase(match.first, match.second);
+
+    return true;
+}
+
+void insertShapesBuild(BuildOptions::ShapeProfile& shapes, nvinfer1::OptProfileSelector selector,
+    const std::string& name, const std::vector<int64_t>& dims)
+{
+    shapes[name][static_cast<size_t>(selector)] = dims;
+}
+
+void insertShapesInference(
+    InferenceOptions::ShapeProfile& shapes, std::string const& name, std::vector<int64_t> const& dims)
+{
+    shapes[name] = dims;
+}
+
+std::string removeSingleQuotationMarks(std::string& str)
+{
+    std::vector<std::string> strList{splitToStringVec(str, '\'')};
+    // Remove all the escaped single quotation marks
+    std::string retVal;
+    // Do not really care about unterminated sequences
+    for (size_t i = 0; i < strList.size(); i++)
+    {
+        retVal += strList[i];
+    }
+    return retVal;
+}
+
+void getLayerPrecisions(Arguments& arguments, char const* argument, LayerPrecisions& layerPrecisions)
+{
+    std::string list;
+    if (!getAndDelOption(arguments, argument, list))
+    {
+        return;
+    }
+
+    // The layerPrecisions flag contains comma-separated layerName:precision pairs.
+    std::vector<std::string> precisionList{splitToStringVec(list, ',')};
+    for (auto const& s : precisionList)
+    {
+        auto namePrecisionPair = splitNameAndValue<nvinfer1::DataType>(s);
+        auto const layerName = removeSingleQuotationMarks(namePrecisionPair.first);
+        layerPrecisions[layerName] = namePrecisionPair.second;
+    }
+}
+
+void getLayerOutputTypes(Arguments& arguments, char const* argument, LayerOutputTypes& layerOutputTypes)
+{
+    std::string list;
+    if (!getAndDelOption(arguments, argument, list))
+    {
+        return;
+    }
+
+    // The layerOutputTypes flag contains comma-separated layerName:types pairs.
+    std::vector<std::string> precisionList{splitToStringVec(list, ',')};
+    for (auto const& s : precisionList)
+    {
+        auto namePrecisionPair = splitNameAndValue<std::string>(s);
+        auto const layerName = removeSingleQuotationMarks(namePrecisionPair.first);
+        auto const typeStrings = splitToStringVec(namePrecisionPair.second, '+');
+        std::vector<nvinfer1::DataType> typeVec(typeStrings.size(), nvinfer1::DataType::kFLOAT);
+        std::transform(typeStrings.begin(), typeStrings.end(), typeVec.begin(), stringToValue<nvinfer1::DataType>);
+        layerOutputTypes[layerName] = typeVec;
+    }
+}
+
+void getLayerDeviceTypes(Arguments& arguments, char const* argument, LayerDeviceTypes& layerDeviceTypes)
+{
+    std::string list;
+    if (!getAndDelOption(arguments, argument, list))
+    {
+        return;
+    }
+
+    // The layerDeviceTypes flag contains comma-separated layerName:deviceType pairs.
+    std::vector<std::string> deviceList{splitToStringVec(list, ',')};
+    for (auto const& s : deviceList)
+    {
+        auto nameDevicePair = splitNameAndValue<std::string>(s);
+        auto const layerName = removeSingleQuotationMarks(nameDevicePair.first);
+        layerDeviceTypes[layerName] = stringToValue<nvinfer1::DeviceType>(nameDevicePair.second);
+    }
+}
+
+void getAndDelStringsSet(Arguments& arguments, char const* argument, StringSet& stringSet)
+{
+    std::string list;
+    if (!getAndDelOption(arguments, argument, list))
+    {
+        return;
+    }
+
+    // The layerPrecisions flag contains comma-separated layerName:precision pairs.
+    std::vector<std::string> strings{splitToStringVec(list, ',')};
+    for (auto const& s : strings)
+    {
+        stringSet.insert(s);
+    }
+}
+bool getShapesBuild(Arguments& arguments, BuildOptions::ShapeProfile& shapes, char const* argument,
+    nvinfer1::OptProfileSelector selector)
+{
+    std::string list;
+    bool retVal = getAndDelOption(arguments, argument, list);
+    std::vector<std::string> shapeList{splitToStringVec(list, ',')};
+    for (const auto& s : shapeList)
+    {
+        auto nameDimsPair = splitNameAndValue<std::vector<int64_t>>(s);
+        auto tensorName = removeSingleQuotationMarks(nameDimsPair.first);
+        auto dims = nameDimsPair.second;
+        insertShapesBuild(shapes, selector, tensorName, dims);
+    }
+    return retVal;
+}
+
+bool getShapesInference(Arguments& arguments, InferenceOptions::ShapeProfile& shapes, const char* argument)
+{
+    std::string list;
+    bool retVal = getAndDelOption(arguments, argument, list);
+    std::vector<std::string> shapeList{splitToStringVec(list, ',')};
+    for (const auto& s : shapeList)
+    {
+        auto nameDimsPair = splitNameAndValue<std::vector<int64_t>>(s);
+        auto tensorName = removeSingleQuotationMarks(nameDimsPair.first);
+        auto dims = nameDimsPair.second;
+        insertShapesInference(shapes, tensorName, dims);
+    }
+    return retVal;
+}
+
+void fillShapes(BuildOptions::ShapeProfile& shapes, std::string const& name, ShapeRange const& sourceShapeRange,
+    nvinfer1::OptProfileSelector minDimsSource, nvinfer1::OptProfileSelector optDimsSource,
+    nvinfer1::OptProfileSelector maxDimsSource)
+{
+    insertShapesBuild(
+        shapes, nvinfer1::OptProfileSelector::kMIN, name, sourceShapeRange[static_cast<size_t>(minDimsSource)]);
+    insertShapesBuild(
+        shapes, nvinfer1::OptProfileSelector::kOPT, name, sourceShapeRange[static_cast<size_t>(optDimsSource)]);
+    insertShapesBuild(
+        shapes, nvinfer1::OptProfileSelector::kMAX, name, sourceShapeRange[static_cast<size_t>(maxDimsSource)]);
+}
+
+void processShapes(BuildOptions::ShapeProfile& shapes, bool minShapes, bool optShapes, bool maxShapes, bool calib)
+{
+    // Only accept optShapes only or all three of minShapes, optShapes, maxShapes when calib is set
+    if (((minShapes || maxShapes) && !optShapes)   // minShapes only, maxShapes only, both minShapes and maxShapes
+        || (minShapes && !maxShapes && optShapes)  // both minShapes and optShapes
+        || (!minShapes && maxShapes && optShapes)) // both maxShapes and optShapes
+    {
+        if (calib)
+        {
+            throw std::invalid_argument(
+                "Must specify only --optShapesCalib or all of --minShapesCalib, --optShapesCalib, --maxShapesCalib");
+        }
+    }
+
+    if (!minShapes && !optShapes && !maxShapes)
+    {
+        return;
+    }
+
+    BuildOptions::ShapeProfile newShapes;
+    for (auto& s : shapes)
+    {
+        nvinfer1::OptProfileSelector minDimsSource, optDimsSource, maxDimsSource;
+        minDimsSource = nvinfer1::OptProfileSelector::kMIN;
+        optDimsSource = nvinfer1::OptProfileSelector::kOPT;
+        maxDimsSource = nvinfer1::OptProfileSelector::kMAX;
+
+        // Populate missing minShapes
+        if (!minShapes)
+        {
+            if (optShapes)
+            {
+                minDimsSource = optDimsSource;
+                sample::gLogWarning << "optShapes is being broadcasted to minShapes for tensor " << s.first
+                                    << std::endl;
+            }
+            else
+            {
+                minDimsSource = maxDimsSource;
+                sample::gLogWarning << "maxShapes is being broadcasted to minShapes for tensor " << s.first
+                                    << std::endl;
+            }
+        }
+
+        // Populate missing optShapes
+        if (!optShapes)
+        {
+            if (maxShapes)
+            {
+                optDimsSource = maxDimsSource;
+                sample::gLogWarning << "maxShapes is being broadcasted to optShapes for tensor " << s.first
+                                    << std::endl;
+            }
+            else
+            {
+                optDimsSource = minDimsSource;
+                sample::gLogWarning << "minShapes is being broadcasted to optShapes for tensor " << s.first
+                                    << std::endl;
+            }
+        }
+
+        // Populate missing maxShapes
+        if (!maxShapes)
+        {
+            if (optShapes)
+            {
+                maxDimsSource = optDimsSource;
+                sample::gLogWarning << "optShapes is being broadcasted to maxShapes for tensor " << s.first
+                                    << std::endl;
+            }
+            else
+            {
+                maxDimsSource = minDimsSource;
+                sample::gLogWarning << "minShapes is being broadcasted to maxShapes for tensor " << s.first
+                                    << std::endl;
+            }
+        }
+
+        fillShapes(newShapes, s.first, s.second, minDimsSource, optDimsSource, maxDimsSource);
+    }
+    shapes = newShapes;
+}
+
+bool getOptimizationProfiles(
+    Arguments& arguments, std::vector<BuildOptions::ShapeProfile>& optProfiles, char const* argument)
+{
+    bool retValue{false};
+    int32_t pos{};
+    size_t profileIndex{};
+
+    auto getShapes
+        = [](BuildOptions::ShapeProfile& shapes, std::string const& list, nvinfer1::OptProfileSelector selector) {
+              std::vector<std::string> shapeList{splitToStringVec(list, ',')};
+              for (auto const& s : shapeList)
+              {
+                  auto nameDimsPair = splitNameAndValue<std::vector<int64_t>>(s);
+                  auto tensorName = removeSingleQuotationMarks(nameDimsPair.first);
+                  auto dims = nameDimsPair.second;
+                  insertShapesBuild(shapes, selector, tensorName, dims);
+              }
+          };
+
+    while (getAndDelOptionWithPosition(arguments, argument, profileIndex, pos))
+    {
+        BuildOptions::ShapeProfile optProfile{};
+        bool minShapes{false}, maxShapes{false}, optShapes{false};
+        for (int32_t i = 0; i < nvinfer1::EnumMax<nvinfer1::OptProfileSelector>(); i++, pos++)
+        {
+            std::string value;
+
+            if (!minShapes && getAndDelOptionBehind(arguments, "--minShapes", pos, value))
+            {
+                minShapes = true;
+                getShapes(optProfile, value, nvinfer1::OptProfileSelector::kMIN);
+            }
+            else if (!maxShapes && getAndDelOptionBehind(arguments, "--maxShapes", pos, value))
+            {
+                maxShapes = true;
+                getShapes(optProfile, value, nvinfer1::OptProfileSelector::kMAX);
+            }
+            else if (!optShapes && getAndDelOptionBehind(arguments, "--optShapes", pos, value))
+            {
+                optShapes = true;
+                getShapes(optProfile, value, nvinfer1::OptProfileSelector::kOPT);
+            }
+            else
+            {
+                break;
+            }
+        }
+        processShapes(optProfile, minShapes, optShapes, maxShapes, false);
+        if (profileIndex >= optProfiles.size())
+        {
+            optProfiles.resize(profileIndex + 1);
+        }
+        if (!optProfiles[profileIndex].empty())
+        {
+            throw std::invalid_argument("Optimization profile index cannot be the same.");
+        }
+        optProfiles[profileIndex] = optProfile;
+        retValue = true;
+    }
+
+    profileIndex = 0;
+    for (auto const& optProfile : optProfiles)
+    {
+        if (optProfile.empty())
+        {
+            throw std::invalid_argument(std::string("Found invalid or missing shape spec at profile index ")
+                + std::to_string(profileIndex) + std::string(". "));
+        }
+        ++profileIndex;
+    }
+    return retValue;
+}
+
+template <typename T>
+void printShapes(std::ostream& os, char const* phase, T const& shapes, int32_t profileIndex)
+{
+    if (shapes.empty())
+    {
+        os << "Input " << phase << " shapes: model" << std::endl;
+    }
+    else
+    {
+        std::string profileString = (profileIndex != -1 && strcmp(phase, "build") == 0)
+            ? "(profile " + std::to_string(profileIndex) + ")"
+            : "";
+        for (auto const& s : shapes)
+        {
+            os << "Input " << phase << " shape " << profileString << ": " << s.first << "=" << s.second << std::endl;
+        }
+    }
+}
+
+std::ostream& printTacticSources(
+    std::ostream& os, nvinfer1::TacticSources enabledSources, nvinfer1::TacticSources disabledSources)
+{
+    if (!enabledSources && !disabledSources)
+    {
+        os << "Using default tactic sources";
+    }
+    else
+    {
+        auto const addSource = [&](uint32_t source, std::string const& name)
+        {
+            if (enabledSources & source)
+            {
+                os << name << " [ON], ";
+            }
+            else if (disabledSources & source)
+            {
+                os << name << " [OFF], ";
+            }
+        };
+
+        addSource(1U << static_cast<uint32_t>(nvinfer1::TacticSource::kCUBLAS), "cublas");
+        addSource(1U << static_cast<uint32_t>(nvinfer1::TacticSource::kCUBLAS_LT), "cublasLt");
+        addSource(1U << static_cast<uint32_t>(nvinfer1::TacticSource::kCUDNN), "cudnn");
+        addSource(
+            1U << static_cast<uint32_t>(nvinfer1::TacticSource::kEDGE_MASK_CONVOLUTIONS), "edge mask convolutions");
+        addSource(1U << static_cast<uint32_t>(nvinfer1::TacticSource::kJIT_CONVOLUTIONS), "JIT convolutions");
+    }
+    return os;
+}
+
+std::ostream& printPrecision(std::ostream& os, BuildOptions const& options)
+{
+    if (options.stronglyTyped)
+    {
+        os << "Strongly Typed";
+        return os;
+    }
+    os << "FP32";
+    if (options.fp16)
+    {
+        os << "+FP16";
+    }
+    if (options.bf16)
+    {
+        os << "+BF16";
+    }
+    if (options.int8)
+    {
+        os << "+INT8";
+    }
+    if (options.fp8)
+    {
+        os << "+FP8";
+    }
+    if (options.int4)
+    {
+        os << "+INT4";
+    }
+    if (options.precisionConstraints == PrecisionConstraints::kOBEY)
+    {
+        os << " (obey precision constraints)";
+    }
+    if (options.precisionConstraints == PrecisionConstraints::kPREFER)
+    {
+        os << " (prefer precision constraints)";
+    }
+    return os;
+}
+
+std::ostream& printTempfileControls(std::ostream& os, TempfileControlFlags const tempfileControls)
+{
+    auto getFlag = [&](TempfileControlFlag f) -> char const*
+    {
+        bool allowed = !!(tempfileControls & (1U << static_cast<int64_t>(f)));
+        return allowed ? "allow" : "deny";
+    };
+    auto const inMemory = getFlag(TempfileControlFlag::kALLOW_IN_MEMORY_FILES);
+    auto const temporary = getFlag(TempfileControlFlag::kALLOW_TEMPORARY_FILES);
+
+    os << "{ in_memory: " << inMemory << ", temporary: " << temporary << " }";
+
+    return os;
+}
+std::ostream& printTimingCache(std::ostream& os, TimingCacheMode const& timingCacheMode)
+{
+    switch (timingCacheMode)
+    {
+    case TimingCacheMode::kGLOBAL: os << "global"; break;
+    case TimingCacheMode::kLOCAL: os << "local"; break;
+    case TimingCacheMode::kDISABLE: os << "disable"; break;
+    }
+    return os;
+}
+
+std::ostream& printSparsity(std::ostream& os, BuildOptions const& options)
+{
+    switch (options.sparsity)
+    {
+    case SparsityFlag::kDISABLE: os << "Disabled"; break;
+    case SparsityFlag::kENABLE: os << "Enabled"; break;
+    case SparsityFlag::kFORCE: os << "Forced"; break;
+    }
+
+    return os;
+}
+
+std::ostream& printMemoryPools(std::ostream& os, BuildOptions const& options)
+{
+    auto const printValueOrDefault = [&os](double const val, char const* unit = "MiB")
+    {
+        if (val >= 0)
+        {
+            os << val << " " << unit;
+        }
+        else
+        {
+            os << "default";
+        }
+    };
+    os << "workspace: ";
+    printValueOrDefault(options.workspace);
+    os << ", ";
+    os << "dlaSRAM: ";
+    printValueOrDefault(options.dlaSRAM);
+    os << ", ";
+    os << "dlaLocalDRAM: ";
+    printValueOrDefault(options.dlaLocalDRAM);
+    os << ", ";
+    os << "dlaGlobalDRAM: ";
+    printValueOrDefault(options.dlaGlobalDRAM);
+    os << ", ";
+    os << "tacticSharedMem: ";
+    printValueOrDefault(options.tacticSharedMem, "KiB");
+    return os;
+}
+
+std::string previewFeatureToString(PreviewFeature feature)
+{
+    // clang-format off
+    switch (feature)
+    {
+    case PreviewFeature::kPROFILE_SHARING_0806:
+    {
+        gLogWarning << "profileSharing0806 is on by default in TensorRT 10.0. This flag is deprecated and has no effect." << std::endl;
+        break;
+    }
+    case PreviewFeature::kALIASED_PLUGIN_IO_10_03: return "kALIASED_PLUGIN_IO_10_03";
+    case PreviewFeature::kRUNTIME_ACTIVATION_RESIZE_10_10: return "kRUNTIME_ACTIVATION_RESIZE_10_10";
+    }
+    return "Invalid Preview Feature";
+    // clang-format on
+}
+
+std::ostream& printPreviewFlags(std::ostream& os, BuildOptions const& options)
+{
+    if (options.previewFeatures.empty())
+    {
+        os << "Use default preview flags.";
+        return os;
+    }
+
+    auto const addFlag = [&](PreviewFeature feat) {
+        int32_t featVal = static_cast<int32_t>(feat);
+        if (options.previewFeatures.find(featVal) != options.previewFeatures.end())
+        {
+            os << previewFeatureToString(feat) << (options.previewFeatures.at(featVal) ? " [ON], " : " [OFF], ");
+        }
+    };
+
+    addFlag(PreviewFeature::kALIASED_PLUGIN_IO_10_03);
+    addFlag(PreviewFeature::kRUNTIME_ACTIVATION_RESIZE_10_10);
+
+    return os;
+}
+
+} // namespace
+
+Arguments argsToArgumentsMap(int32_t argc, char* argv[])
+{
+    Arguments arguments;
+    for (int32_t i = 1; i < argc; ++i)
+    {
+        auto valuePtr = strchr(argv[i], '=');
+        if (valuePtr)
+        {
+            std::string value{valuePtr + 1};
+            arguments.emplace(std::string(argv[i], valuePtr - argv[i]), std::make_pair(value, i));
+        }
+        else
+        {
+            arguments.emplace(argv[i], std::make_pair(std::string(""), i));
+        }
+    }
+    return arguments;
+}
+
+namespace
+{
+std::string resolveHomeDirectoryOnLinux(std::string const& model)
+{
+    std::string filePath{model};
+#ifndef _WIN32
+    if (filePath[0] == '~')
+    {
+        char const* home = std::getenv("HOME");
+        if (home)
+        {
+            filePath.replace(0, 1, home);
+        }
+    }
+#endif
+    return filePath;
+}
+} // namespace
+
+void BaseModelOptions::parse(Arguments& arguments)
+{
+    if (getAndDelOption(arguments, "--onnx", model))
+    {
+        format = ModelFormat::kONNX;
+        model = resolveHomeDirectoryOnLinux(model);
+    }
+}
+
+void ModelOptions::parse(Arguments& arguments)
+{
+    baseModel.parse(arguments);
+
+    switch (baseModel.format)
+    {
+    case ModelFormat::kONNX:
+    case ModelFormat::kANY:
+    {
+        break;
+    }
+    }
+
+    if (baseModel.format == ModelFormat::kONNX)
+    {
+        if (!outputs.empty())
+        {
+            throw std::invalid_argument("The --output flag should not be used with ONNX models.");
+        }
+    }
+}
+
+void getTempfileControls(Arguments& arguments, char const* argument, TempfileControlFlags& tempfileControls)
+{
+    std::string list;
+    if (!getAndDelOption(arguments, argument, list))
+    {
+        return;
+    }
+
+    std::vector<std::string> controlList{splitToStringVec(list, ',')};
+    for (auto const& s : controlList)
+    {
+        auto controlAllowPair = splitNameAndValue<std::string>(s);
+        bool allowed{false};
+        int32_t offset{-1};
+
+        if (controlAllowPair.second.compare("allow") == 0)
+        {
+            allowed = true;
+        }
+        else if (controlAllowPair.second.compare("deny") != 0)
+        {
+            throw std::invalid_argument("--tempfileControls value should be `deny` or `allow`");
+        }
+
+        if (controlAllowPair.first.compare("in_memory") == 0)
+        {
+            offset = static_cast<int32_t>(TempfileControlFlag::kALLOW_IN_MEMORY_FILES);
+        }
+        else if (controlAllowPair.first.compare("temporary") == 0)
+        {
+            offset = static_cast<int32_t>(TempfileControlFlag::kALLOW_TEMPORARY_FILES);
+        }
+        else
+        {
+            throw std::invalid_argument(std::string{"Unknown --tempfileControls key "} + controlAllowPair.first);
+        }
+
+        if (allowed)
+        {
+            tempfileControls |= (1U << offset);
+        }
+        else
+        {
+            tempfileControls &= ~(1U << offset);
+        }
+    }
+}
+
+void BuildOptions::parse(Arguments& arguments)
+{
+    auto getFormats = [&arguments](std::vector<IOFormat>& formatsVector, const char* argument)
+    {
+        std::string list;
+        getAndDelOption(arguments, argument, list);
+        std::vector<std::string> formats{splitToStringVec(list, ',')};
+        for (const auto& f : formats)
+        {
+            formatsVector.push_back(stringToValue<IOFormat>(f));
+        }
+    };
+
+    getFormats(inputFormats, "--inputIOFormats");
+    getFormats(outputFormats, "--outputIOFormats");
+
+    bool getCalibProfile = getAndDelOption(arguments, "--calibProfile", calibProfile);
+    if (!getOptimizationProfiles(arguments, optProfiles, "--profile"))
+    {
+        ShapeProfile shapes;
+        bool minShapes{false}, optShapes{false}, maxShapes{false};
+        try
+        {
+            minShapes = getShapesBuild(arguments, shapes, "--minShapes", nvinfer1::OptProfileSelector::kMIN);
+            optShapes = getShapesBuild(arguments, shapes, "--optShapes", nvinfer1::OptProfileSelector::kOPT);
+            maxShapes = getShapesBuild(arguments, shapes, "--maxShapes", nvinfer1::OptProfileSelector::kMAX);
+        }
+        catch (std::invalid_argument const& arg)
+        {
+            throw std::invalid_argument(arg.what()
+                + std::string(" conversion failure: failed to parse minShapes/optShapes/maxShapes. Please double check "
+                              "your input string."));
+        }
+
+        processShapes(shapes, minShapes, optShapes, maxShapes, false);
+        optProfiles.emplace_back(shapes);
+    }
+    if (calibProfile >= optProfiles.size())
+    {
+        throw std::invalid_argument(
+            std::string("--calibProfile shouldn't greater than the size of optimization profile."));
+    }
+    BuildOptions::ShapeProfile dummyShapes;
+
+    bool remainingMinShapes = getShapesBuild(arguments, dummyShapes, "--minShapes", nvinfer1::OptProfileSelector::kMIN);
+    bool remainingOptShapes = getShapesBuild(arguments, dummyShapes, "--optShapes", nvinfer1::OptProfileSelector::kOPT);
+    bool remainingMaxShapes = getShapesBuild(arguments, dummyShapes, "--maxShapes", nvinfer1::OptProfileSelector::kMAX);
+    if (remainingMinShapes || remainingOptShapes || remainingMaxShapes)
+    {
+        throw std::invalid_argument("Multiple --minShapes/--optShapes/--maxShapes without --profile are not allowed. ");
+    }
+
+    bool minShapesCalib{false}, optShapesCalib{false}, maxShapesCalib{false};
+    try
+    {
+        minShapesCalib = getShapesBuild(arguments, shapesCalib, "--minShapesCalib", nvinfer1::OptProfileSelector::kMIN);
+        optShapesCalib = getShapesBuild(arguments, shapesCalib, "--optShapesCalib", nvinfer1::OptProfileSelector::kOPT);
+        maxShapesCalib = getShapesBuild(arguments, shapesCalib, "--maxShapesCalib", nvinfer1::OptProfileSelector::kMAX);
+    }
+    catch (std::invalid_argument const& arg)
+    {
+        throw std::invalid_argument(arg.what()
+            + std::string(" conversion failure: failed to parse minShapesCalib/optShapesCalib/maxShapesCalib. Please "
+                          "double check your input string."));
+    }
+    processShapes(shapesCalib, minShapesCalib, optShapesCalib, maxShapesCalib, true);
+
+    std::string memPoolSizes;
+    getAndDelOption(arguments, "--memPoolSize", memPoolSizes);
+    std::vector<std::string> memPoolSpecs{splitToStringVec(memPoolSizes, ',')};
+    for (auto const& memPoolSpec : memPoolSpecs)
+    {
+        std::string memPoolName;
+        double memPoolSize;
+        try
+        {
+            std::string strPoolSize;
+            std::tie(memPoolName, strPoolSize) = splitNameAndValue<std::string>(memPoolSpec);
+            memPoolSize = stringToValue<double>(addDefaultUnitSuffixIfNotSpecified(strPoolSize, 'M'));
+        }
+        catch (std::invalid_argument const& arg)
+        {
+            throw std::invalid_argument(arg.what()
+                + std::string(
+                    " conversion failure: failed to parse --memPoolSize. Please double check your input string."));
+        }
+
+        if (memPoolSize < 0)
+        {
+            throw std::invalid_argument(std::string("Negative memory pool size: ") + std::to_string(memPoolSize));
+        }
+        if (memPoolName == "workspace")
+        {
+            // use unit in MB.
+            workspace = memPoolSize / 1.0_MiB;
+        }
+        else if (memPoolName == "dlaSRAM")
+        {
+            // use unit in MB.
+            dlaSRAM = memPoolSize / 1.0_MiB;
+        }
+        else if (memPoolName == "dlaLocalDRAM")
+        {
+            // use unit in MB.
+            dlaLocalDRAM = memPoolSize / 1.0_MiB;
+        }
+        else if (memPoolName == "dlaGlobalDRAM")
+        {
+            // use unit in MB.
+            dlaGlobalDRAM = memPoolSize / 1.0_MiB;
+        }
+        else if (memPoolName == "tacticSharedMem")
+        {
+            // use unit in KB.
+            tacticSharedMem = memPoolSize / 1.0_KiB;
+        }
+        else if (!memPoolName.empty())
+        {
+            throw std::invalid_argument(std::string("Unknown memory pool: ") + memPoolName);
+        }
+    }
+
+    getAndDelOption(arguments, "--avgTiming", avgTiming);
+
+    bool best{false};
+    getAndDelOption(arguments, "--best", best);
+    if (best)
+    {
+        int8 = (samplesCommon::getSMVersion() != 0x0a03);
+        fp16 = true;
+
+        // BF16 only supported on Ampere+
+        if (samplesCommon::getSMVersion() >= 0x0800)
+        {
+            bf16 = true;
+        }
+    }
+
+    getAndDelOption(arguments, "--refit", refittable);
+
+    getAndDelOption(arguments, "--weightless", stripWeights);
+    getAndDelOption(arguments, "--stripWeights", stripWeights);
+
+    bool stripAllWeights{};
+    getAndDelOption(arguments, "--stripAllWeights", stripAllWeights);
+    if (stripAllWeights)
+    {
+        refittable = true;
+        stripWeights = true;
+    }
+
+    // --vc and --versionCompatible are synonyms
+    getAndDelOption(arguments, "--vc", versionCompatible);
+    if (!versionCompatible)
+    {
+        getAndDelOption(arguments, "--versionCompatible", versionCompatible);
+    }
+
+    // --pi and --pluginInstanceNorm are synonyms
+    getAndDelOption(arguments, "--pi", pluginInstanceNorm);
+    if (!pluginInstanceNorm)
+    {
+        getAndDelOption(arguments, "--pluginInstanceNorm", pluginInstanceNorm);
+    }
+
+    getAndDelOption(arguments, "--uint8AsymmetricQuantizationDLA", enableUInt8AsymmetricQuantizationDLA);
+    getAndDelOption(arguments, "--excludeLeanRuntime", excludeLeanRuntime);
+    getAndDelOption(arguments, "--noCompilationCache", disableCompilationCache);
+    getAndDelOption(arguments, "--monitorMemory", enableMonitorMemory);
+    getAndDelNegOption(arguments, "--noTF32", tf32);
+    getAndDelOption(arguments, "--fp16", fp16);
+    getAndDelOption(arguments, "--bf16", bf16);
+    getAndDelOption(arguments, "--int8", int8);
+    getAndDelOption(arguments, "--fp8", fp8);
+    getAndDelOption(arguments, "--int4", int4);
+    getAndDelOption(arguments, "--stronglyTyped", stronglyTyped);
+    getAndDelOption(arguments, "--distributiveIndependence", distributiveIndependence);
+
+    if (best && stronglyTyped)
+    {
+        throw std::invalid_argument(
+            "--best and --stronglyTyped cannot be both set. --best enables implicit precisions, while "
+            "--stronglyTyped enforces explicit precisions.");
+    }
+
+    if (stronglyTyped)
+    {
+        auto disableAndLog = [](bool& flag, std::string mode, std::string type)
+        {
+            if (flag)
+            {
+                flag = false;
+                sample::gLogWarning << "Invalid usage, setting " << mode
+                                    << " mode is not allowed if graph is strongly typed. Disabling BuilderFlag::"
+                                    << type << "." << std::endl;
+            }
+        };
+        disableAndLog(fp16, "fp16", "kFP16");
+        disableAndLog(int8, "int8", "kINT8");
+        disableAndLog(bf16, "bf16", "kBF16");
+        disableAndLog(fp8, "fp8", "kFP8");
+        disableAndLog(int4, "int4", "kINT4");
+    }
+
+    // Print a message to tell users that --noTF32 can be added to improve accuracy with performance cost.
+    if (samplesCommon::getSMVersion() >= 0x0800)
+    {
+        if (!(stronglyTyped || fp16 || bf16 || int8 || fp8 || int4))
+        {
+            sample::gLogInfo << "TF32 is enabled by default. Add --noTF32 flag to further improve accuracy with some "
+                             << "performance cost." << std::endl;
+        }
+    }
+
+    if (fp8 && int8)
+    {
+        throw std::invalid_argument("Invalid usage, fp8 and int8 aren't allowed to be enabled together.");
+    }
+    getAndDelOption(arguments, "--safe", safe);
+    getAndDelOption(arguments, "--buildDLAStandalone", buildDLAStandalone);
+    getAndDelOption(arguments, "--allowGPUFallback", allowGPUFallback);
+    getAndDelOption(arguments, "--consistency", consistency);
+    getAndDelOption(arguments, "--restricted", restricted);
+    getAndDelOption(arguments, "--skipInference", skipInference);
+    if (getAndDelOption(arguments, "--directIO", directIO))
+    {
+        sample::gLogWarning << "--directIO flag has been deprecated" << std::endl;
+    }
+
+    std::string precisionConstraintsString;
+    getAndDelOption(arguments, "--precisionConstraints", precisionConstraintsString);
+    if (!precisionConstraintsString.empty())
+    {
+        const std::unordered_map<std::string, PrecisionConstraints> precisionConstraintsMap
+            = {{"obey", PrecisionConstraints::kOBEY}, {"prefer", PrecisionConstraints::kPREFER},
+                {"none", PrecisionConstraints::kNONE}};
+        auto it = precisionConstraintsMap.find(precisionConstraintsString);
+        if (it == precisionConstraintsMap.end())
+        {
+            throw std::invalid_argument(std::string("Unknown precision constraints: ") + precisionConstraintsString);
+        }
+        precisionConstraints = it->second;
+    }
+    else
+    {
+        precisionConstraints = PrecisionConstraints::kNONE;
+    }
+
+    getLayerPrecisions(arguments, "--layerPrecisions", layerPrecisions);
+    getLayerOutputTypes(arguments, "--layerOutputTypes", layerOutputTypes);
+    getLayerDeviceTypes(arguments, "--layerDeviceTypes", layerDeviceTypes);
+
+    if (layerPrecisions.empty() && layerOutputTypes.empty() && precisionConstraints != PrecisionConstraints::kNONE)
+    {
+        sample::gLogWarning << R"(When --precisionConstraints flag is set to "obey" or "prefer", please add )"
+                            << "--layerPrecision/--layerOutputTypes flags to set layer-wise precisions and output "
+                            << "types." << std::endl;
+    }
+    else if ((!layerPrecisions.empty() || !layerOutputTypes.empty())
+        && precisionConstraints == PrecisionConstraints::kNONE)
+    {
+        sample::gLogWarning << "--layerPrecision/--layerOutputTypes flags have no effect when --precisionConstraints "
+                            << R"(flag is set to "none".)" << std::endl;
+    }
+
+    getAndDelStringsSet(arguments, "--markDebug", debugTensors);
+    getAndDelOption(arguments, "--markUnfusedTensorsAsDebugTensors", markUnfusedTensorsAsDebugTensors);
+
+    getAndDelOption(arguments, "--sparsity", sparsity);
+    bool calibCheck = getAndDelOption(arguments, "--calib", calibration);
+    if (int8 && calibCheck && !optProfiles[calibProfile].empty() && shapesCalib.empty())
+    {
+        shapesCalib = optProfiles[calibProfile];
+    }
+    else if (!shapesCalib.empty() && getCalibProfile)
+    {
+        sample::gLogWarning
+            << "--calibProfile have no effect when --minShapesCalib/--optShapesCalib/--maxShapesCalib is set."
+            << std::endl;
+    }
+    std::string profilingVerbosityString;
+
+    getAndDelOption(arguments, "--profilingVerbosity", profilingVerbosityString);
+    if (profilingVerbosityString == "layer_names_only")
+    {
+        profilingVerbosity = nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY;
+    }
+    else if (profilingVerbosityString == "none")
+    {
+        profilingVerbosity = nvinfer1::ProfilingVerbosity::kNONE;
+    }
+    else if (profilingVerbosityString == "detailed")
+    {
+        profilingVerbosity = nvinfer1::ProfilingVerbosity::kDETAILED;
+    }
+    else if (profilingVerbosityString == "default")
+    {
+        sample::gLogWarning << "--profilingVerbosity=default has been deprecated by "
+                               "--profilingVerbosity=layer_names_only."
+                            << std::endl;
+        profilingVerbosity = nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY;
+    }
+    else if (profilingVerbosityString == "verbose")
+    {
+        sample::gLogWarning << "--profilingVerbosity=verbose has been deprecated by --profilingVerbosity=detailed."
+                            << std::endl;
+        profilingVerbosity = nvinfer1::ProfilingVerbosity::kDETAILED;
+    }
+    else if (!profilingVerbosityString.empty())
+    {
+        throw std::invalid_argument(std::string("Unknown profilingVerbosity: ") + profilingVerbosityString);
+    }
+
+    if (getAndDelOption(arguments, "--loadEngine", engine))
+    {
+        load = true;
+    }
+    getAndDelOption(arguments, "--asyncFileReader", asyncFileReader);
+    getAndDelOption(arguments, "--getPlanVersionOnly", getPlanVersionOnly);
+
+    if (getAndDelOption(arguments, "--saveEngine", engine))
+    {
+        save = true;
+    }
+    if (load && save)
+    {
+        throw std::invalid_argument("Incompatible load and save engine options selected");
+    }
+
+    std::string tacticSourceArgs;
+    if (getAndDelOption(arguments, "--tacticSources", tacticSourceArgs))
+    {
+        std::vector<std::string> tacticList = splitToStringVec(tacticSourceArgs, ',');
+        for (auto& t : tacticList)
+        {
+            bool enable{false};
+            if (t.front() == '+')
+            {
+                enable = true;
+            }
+            else if (t.front() != '-')
+            {
+                throw std::invalid_argument(
+                    "Tactic source must be prefixed with + or -, indicating whether it should be enabled or disabled "
+                    "respectively.");
+            }
+            t.erase(0, 1);
+
+            const auto toUpper = [](std::string& sourceName)
+            {
+                std::transform(
+                    sourceName.begin(), sourceName.end(), sourceName.begin(), [](char c) { return std::toupper(c); });
+                return sourceName;
+            };
+
+            nvinfer1::TacticSource source{};
+            t = toUpper(t);
+            if (t == "CUBLAS")
+            {
+                source = nvinfer1::TacticSource::kCUBLAS;
+            }
+            else if (t == "CUBLASLT" || t == "CUBLAS_LT")
+            {
+                source = nvinfer1::TacticSource::kCUBLAS_LT;
+            }
+            else
+            if (t == "CUDNN")
+            {
+                source = nvinfer1::TacticSource::kCUDNN;
+            }
+            else if (t == "EDGE_MASK_CONVOLUTIONS")
+            {
+                source = nvinfer1::TacticSource::kEDGE_MASK_CONVOLUTIONS;
+            }
+            else if (t == "JIT_CONVOLUTIONS")
+            {
+                source = nvinfer1::TacticSource::kJIT_CONVOLUTIONS;
+            }
+            else
+            {
+                throw std::invalid_argument(std::string("Unknown tactic source: ") + t);
+            }
+
+            uint32_t sourceBit = 1U << static_cast<uint32_t>(source);
+
+            if (enable)
+            {
+                enabledTactics |= sourceBit;
+            }
+            else
+            {
+                disabledTactics |= sourceBit;
+            }
+
+            if (enabledTactics & disabledTactics)
+            {
+                throw std::invalid_argument(std::string("Cannot enable and disable ") + t);
+            }
+        }
+    }
+
+    bool noBuilderCache{false};
+    getAndDelOption(arguments, "--noBuilderCache", noBuilderCache);
+    getAndDelOption(arguments, "--timingCacheFile", timingCacheFile);
+    if (noBuilderCache)
+    {
+        timingCacheMode = TimingCacheMode::kDISABLE;
+    }
+    else if (!timingCacheFile.empty())
+    {
+        timingCacheMode = TimingCacheMode::kGLOBAL;
+    }
+    else
+    {
+        timingCacheMode = TimingCacheMode::kLOCAL;
+    }
+    getAndDelOption(arguments, "--errorOnTimingCacheMiss", errorOnTimingCacheMiss);
+    getAndDelOption(arguments, "--builderOptimizationLevel", builderOptimizationLevel);
+    getAndDelOption(arguments, "--maxTactics", maxTactics);
+
+    std::string runtimePlatformArgs;
+    getAndDelOption(arguments, "--runtimePlatform", runtimePlatformArgs);
+    if (runtimePlatformArgs == "SameAsBuild" || runtimePlatformArgs.empty())
+    {
+        runtimePlatform = RuntimePlatform::kSAME_AS_BUILD;
+    }
+    else if (runtimePlatformArgs == "WindowsAMD64")
+    {
+        runtimePlatform = RuntimePlatform::kWINDOWS_AMD64;
+    }
+    else
+    {
+        throw std::invalid_argument(std::string("Unknown runtime platform: ") + runtimePlatformArgs
+            + ". Valid options: SameAsBuild, WindowsAMD64.");
+    }
+
+    std::string hardwareCompatibleArgs;
+    getAndDelOption(arguments, "--hardwareCompatibilityLevel", hardwareCompatibleArgs);
+    if (hardwareCompatibleArgs == "none" || hardwareCompatibleArgs.empty())
+    {
+        hardwareCompatibilityLevel = HardwareCompatibilityLevel::kNONE;
+    }
+    else if (samplesCommon::toLower(hardwareCompatibleArgs) == "ampere+")
+    {
+        hardwareCompatibilityLevel = HardwareCompatibilityLevel::kAMPERE_PLUS;
+    }
+    else if (samplesCommon::toLower(hardwareCompatibleArgs) == "samecomputecapability")
+    {
+        hardwareCompatibilityLevel = HardwareCompatibilityLevel::kSAME_COMPUTE_CAPABILITY;
+    }
+    else
+    {
+        throw std::invalid_argument(std::string("Unknown hardwareCompatibilityLevel: ") + hardwareCompatibleArgs
+            + ". Valid options: none, ampere+, sameComputeCapability.");
+    }
+    if (pluginInstanceNorm
+        && (versionCompatible || hardwareCompatibilityLevel == HardwareCompatibilityLevel::kAMPERE_PLUS))
+    {
+        throw std::invalid_argument(
+            "Plugin InstanceNorm cannot be used with version compatible or hardware compatible engines!");
+    }
+    getAndDelOption(arguments, "--maxAuxStreams", maxAuxStreams);
+    std::string previewFeaturesBuf;
+    getAndDelOption(arguments, "--preview", previewFeaturesBuf);
+    std::vector<std::string> previewFeaturesVec{splitToStringVec(previewFeaturesBuf, ',')};
+    for (auto featureName : previewFeaturesVec)
+    {
+        bool enable{false};
+        if (featureName.front() == '+')
+        {
+            enable = true;
+        }
+        else if (featureName.front() != '-')
+        {
+            throw std::invalid_argument(
+                "Preview features must be prefixed with + or -, indicating whether it should be enabled or disabled "
+                "respectively.");
+        }
+        featureName.erase(0, 1);
+
+        PreviewFeature feat{};
+        if (featureName == "profileSharing0806")
+        {
+            sample::gLogWarning
+                << "profileSharing0806 is on by default in TensorRT 10.0. This flag is deprecated and has no effect."
+                << std::endl;
+        }
+        else if (featureName == "aliasedPluginIO1003")
+        {
+            feat = PreviewFeature::kALIASED_PLUGIN_IO_10_03;
+        }
+        else if (featureName == "runtimeActivationResize")
+        {
+            feat = PreviewFeature::kRUNTIME_ACTIVATION_RESIZE_10_10;
+        }
+        else
+        {
+            throw std::invalid_argument(std::string("Unknown preview feature: ") + featureName);
+        }
+        previewFeatures[static_cast<int32_t>(feat)] = enable;
+    }
+
+    getAndDelOption(arguments, "--tempdir", tempdir);
+    getTempfileControls(arguments, "--tempfileControls", tempfileControls);
+
+    std::string runtimeMode;
+    getAndDelOption(arguments, "--useRuntime", runtimeMode);
+    if (runtimeMode == "full")
+    {
+        useRuntime = RuntimeMode::kFULL;
+    }
+    else if (runtimeMode == "dispatch")
+    {
+        useRuntime = RuntimeMode::kDISPATCH;
+    }
+    else if (runtimeMode == "lean")
+    {
+        useRuntime = RuntimeMode::kLEAN;
+    }
+    else if (!runtimeMode.empty())
+    {
+        throw std::invalid_argument(std::string("Unknown useRuntime: ") + runtimeMode);
+    }
+
+    if ((useRuntime == RuntimeMode::kDISPATCH || useRuntime == RuntimeMode::kLEAN) && !versionCompatible)
+    {
+        versionCompatible = true;
+        sample::gLogWarning << "Implicitly enabling --versionCompatible since --useRuntime=" << runtimeMode
+                            << " is set." << std::endl;
+    }
+
+    if (useRuntime != RuntimeMode::kFULL && !load)
+    {
+        throw std::invalid_argument(std::string("Building a TensorRT engine requires --useRuntime=full."));
+    }
+
+    getAndDelOption(arguments, "--leanDLLPath", leanDLLPath);
+    // Don't delete the option because the inference option parser requires it
+    getOption(arguments, "--allowWeightStreaming", allowWeightStreaming);
+
+    getAndDelOption(arguments, "--tilingOptimizationLevel", tilingOptimizationLevel);
+    getAndDelOption(arguments, "--l2LimitForTiling", l2LimitForTiling);
+    getAndDelOption(arguments, "--remoteAutoTuningConfig", remoteAutoTuningConfig);
+    if (!remoteAutoTuningConfig.empty() && !safe)
+    {
+        throw std::invalid_argument(
+            "Remote auto tuning is not supported in standard build. Use --safe flag to enable it.");
+    }
+}
+
+void SystemOptions::parse(Arguments& arguments)
+{
+    getAndDelOption(arguments, "--device", device);
+    getAndDelOption(arguments, "--useDLACore", DLACore);
+    std::string pluginName;
+    while (getAndDelOption(arguments, "--plugins", pluginName))
+    {
+        sample::gLogWarning << "--plugins flag has been deprecated, use --staticPlugins flag instead." << std::endl;
+        plugins.emplace_back(pluginName);
+    }
+    while (getAndDelOption(arguments, "--staticPlugins", pluginName))
+    {
+        plugins.emplace_back(pluginName);
+    }
+    while (getAndDelOption(arguments, "--setPluginsToSerialize", pluginName))
+    {
+        setPluginsToSerialize.emplace_back(pluginName);
+    }
+    while (getAndDelOption(arguments, "--dynamicPlugins", pluginName))
+    {
+        dynamicPlugins.emplace_back(pluginName);
+    }
+#if ENABLE_UNIFIED_BUILDER
+    samplesSafeCommon::SafetyPluginLibraryArgument safetyPluginOption;
+    while (getAndDelOption(arguments, "--safetyPlugins", safetyPluginOption))
+    {
+        safetyPlugins.emplace_back(std::move(safetyPluginOption));
+    }
+#endif // ENABLE_UNIFIED_BUILDER
+    getAndDelOption(arguments, "--ignoreParsedPluginLibs", ignoreParsedPluginLibs);
+}
+
+constexpr int64_t WeightStreamingBudget::kDISABLE;
+constexpr int64_t WeightStreamingBudget::kAUTOMATIC;
+
+void InferenceOptions::parse(Arguments& arguments)
+{
+
+    if (getAndDelOption(arguments, "--streams", infStreams))
+    {
+        sample::gLogWarning << "--streams flag has been deprecated, use --infStreams flag instead." << std::endl;
+    }
+    getAndDelOption(arguments, "--infStreams", infStreams);
+
+    getAndDelOption(arguments, "--iterations", iterations);
+    getAndDelOption(arguments, "--duration", duration);
+    getAndDelOption(arguments, "--warmUp", warmup);
+    getAndDelOption(arguments, "--sleepTime", sleep);
+    getAndDelOption(arguments, "--idleTime", idle);
+    bool exposeDMA{false};
+    if (getAndDelOption(arguments, "--exposeDMA", exposeDMA))
+    {
+        overlap = !exposeDMA;
+    }
+    getAndDelOption(arguments, "--noDataTransfers", skipTransfers);
+    getAndDelOption(arguments, "--useManagedMemory", useManaged);
+    getAndDelOption(arguments, "--useSpinWait", spin);
+    getAndDelOption(arguments, "--threads", threads);
+    getAndDelOption(arguments, "--useCudaGraph", graph);
+    getAndDelOption(arguments, "--separateProfileRun", rerun);
+    getAndDelOption(arguments, "--timeDeserialize", timeDeserialize);
+    getAndDelOption(arguments, "--timeRefit", timeRefit);
+    getAndDelOption(arguments, "--persistentCacheRatio", persistentCacheRatio);
+
+    std::string list;
+    getAndDelOption(arguments, "--loadInputs", list);
+    std::vector<std::string> inputsList{splitToStringVec(list, ',')};
+    splitInsertKeyValue(inputsList, inputs);
+
+    getShapesInference(arguments, shapes, "--shapes");
+    setOptProfile = getAndDelOption(arguments, "--useProfile", optProfileIndex);
+
+    std::string allocationStrategyString;
+    getAndDelOption(arguments, "--allocationStrategy", allocationStrategyString);
+    if (allocationStrategyString == "static")
+    {
+        memoryAllocationStrategy = MemoryAllocationStrategy::kSTATIC;
+    }
+    else if (allocationStrategyString == "profile")
+    {
+        memoryAllocationStrategy = MemoryAllocationStrategy::kPROFILE;
+    }
+    else if (allocationStrategyString == "runtime")
+    {
+        memoryAllocationStrategy = MemoryAllocationStrategy::kRUNTIME;
+    }
+    else if (!allocationStrategyString.empty())
+    {
+        throw std::invalid_argument(std::string("Unknown allocationStrategy: ") + allocationStrategyString);
+    }
+
+    bool allowWs{false};
+    getAndDelOption(arguments, "--allowWeightStreaming", allowWs);
+    bool wsBudgetFound = getAndDelOption(arguments, "--weightStreamingBudget", weightStreamingBudget);
+    if (wsBudgetFound && !allowWs)
+    {
+        throw std::invalid_argument(
+            "The weight streaming budget can only be set with --allowWeightStreaming specified.");
+    }
+    if (allowWs && weightStreamingBudget.isDisabled())
+    {
+        sample::gLogWarning << "The engine can stream its weights but it will not at runtime because "
+                               "--weightStreamingBudget unset or set to "
+                            << WeightStreamingBudget::kDISABLE << "." << std::endl;
+    }
+    std::string debugTensorList;
+    getAndDelOption(arguments, "--saveDebugTensors", debugTensorList);
+    std::vector<std::string> fileNames{splitToStringVec(debugTensorList, ',')};
+    splitInsertKeyValue(fileNames, debugTensorFileNames);
+
+    std::string debugFormats;
+    getAndDelOption(arguments, "--saveAllDebugTensors", debugFormats);
+    dumpAlldebugTensorFormats = splitToStringVec(debugFormats, ',');
+}
+
+void ReportingOptions::parse(Arguments& arguments)
+{
+    getAndDelOption(arguments, "--avgRuns", avgs);
+    getAndDelOption(arguments, "--verbose", verbose);
+    getAndDelOption(arguments, "--dumpRefit", refit);
+    getAndDelOption(arguments, "--dumpOutput", output);
+    getAndDelOption(arguments, "--dumpRawBindingsToFile", dumpRawBindings);
+    getAndDelOption(arguments, "--dumpProfile", profile);
+    getAndDelOption(arguments, "--dumpLayerInfo", layerInfo);
+    getAndDelOption(arguments, "--dumpOptimizationProfile", optProfileInfo);
+    getAndDelOption(arguments, "--exportTimes", exportTimes);
+    getAndDelOption(arguments, "--exportOutput", exportOutput);
+    getAndDelOption(arguments, "--exportProfile", exportProfile);
+    getAndDelOption(arguments, "--exportLayerInfo", exportLayerInfo);
+
+    std::string percentileString;
+    getAndDelOption(arguments, "--percentile", percentileString);
+    std::vector<std::string> percentileStrings = splitToStringVec(percentileString, ',');
+    if (!percentileStrings.empty())
+    {
+        percentiles.clear();
+    }
+    for (const auto& p : percentileStrings)
+    {
+        percentiles.push_back(stringToValue<float>(p));
+    }
+
+    for (auto percentile : percentiles)
+    {
+        if (percentile < 0.F || percentile > 100.F)
+        {
+            throw std::invalid_argument(std::string("Percentile ") + std::to_string(percentile) + "is not in [0,100]");
+        }
+    }
+}
+
+bool parseHelp(Arguments& arguments)
+{
+    bool helpLong{false};
+    bool helpShort{false};
+    getAndDelOption(arguments, "--help", helpLong);
+    getAndDelOption(arguments, "-h", helpShort);
+    return helpLong || helpShort;
+}
+
+void AllOptions::parse(Arguments& arguments)
+{
+    model.parse(arguments);
+    build.parse(arguments);
+    system.parse(arguments);
+    inference.parse(arguments);
+
+    if (build.useRuntime != RuntimeMode::kFULL && inference.timeRefit)
+    {
+        throw std::invalid_argument("--timeRefit requires --useRuntime=full.");
+    }
+
+    if (inference.optProfileIndex < static_cast<int32_t>(build.optProfiles.size()))
+    {
+        // Propagate shape profile between builder and inference
+        for (auto const& s : build.optProfiles[inference.optProfileIndex])
+        {
+            if (inference.shapes.find(s.first) == inference.shapes.end())
+            {
+                insertShapesInference(
+                    inference.shapes, s.first, s.second[static_cast<size_t>(nvinfer1::OptProfileSelector::kOPT)]);
+            }
+        }
+        for (auto const& s : inference.shapes)
+        {
+            if (build.optProfiles[inference.optProfileIndex].find(s.first)
+                == build.optProfiles[inference.optProfileIndex].end())
+            {
+                // assume min/opt/max all the same
+                insertShapesBuild(build.optProfiles[inference.optProfileIndex], nvinfer1::OptProfileSelector::kMIN,
+                    s.first, s.second);
+                insertShapesBuild(build.optProfiles[inference.optProfileIndex], nvinfer1::OptProfileSelector::kOPT,
+                    s.first, s.second);
+                insertShapesBuild(build.optProfiles[inference.optProfileIndex], nvinfer1::OptProfileSelector::kMAX,
+                    s.first, s.second);
+            }
+        }
+    }
+
+    // Set nvtxVerbosity to be the same as build-time profilingVerbosity.
+    inference.nvtxVerbosity = build.profilingVerbosity;
+
+    reporting.parse(arguments);
+    helps = parseHelp(arguments);
+
+    if (!helps)
+    {
+        if (!build.load && model.baseModel.format == ModelFormat::kANY)
+        {
+            throw std::invalid_argument("Model missing or format not recognized");
+        }
+        if (system.DLACore >= 0 && inference.graph)
+        {
+            sample::gLogWarning << "CUDA graphs and DLA offloading are not simultaneously supported. "
+                                << "The CUDA graph option has been disabled (alternatively, you may run without the "
+                                   "`--useDLACore` option)."
+                                << std::endl;
+            inference.graph = false;
+        }
+        if (build.safe && system.DLACore >= 0)
+        {
+            build.buildDLAStandalone = true;
+        }
+        if (build.runtimePlatform != nvinfer1::RuntimePlatform::kSAME_AS_BUILD)
+        {
+            build.skipInference = true;
+        }
+        if (build.buildDLAStandalone)
+        {
+            build.skipInference = true;
+            auto checkSafeDLAFormats = [](std::vector<IOFormat> const& fmt, bool isInput)
+            {
+                return fmt.empty()
+                    ? false
+                    : std::all_of(fmt.begin(), fmt.end(),
+                        [&](IOFormat const& pair)
+                        {
+                            bool supported{false};
+                            bool const isDLA_LINEAR{
+                                pair.second == 1U << static_cast<int32_t>(nvinfer1::TensorFormat::kDLA_LINEAR)};
+                            bool const isHWC4{pair.second == 1U << static_cast<int32_t>(nvinfer1::TensorFormat::kCHW4)
+                                || pair.second == 1U << static_cast<int32_t>(nvinfer1::TensorFormat::kDLA_HWC4)};
+                            bool const isCHW32{
+                                pair.second == 1U << static_cast<int32_t>(nvinfer1::TensorFormat::kCHW32)};
+                            bool const isCHW16{
+                                pair.second == 1U << static_cast<int32_t>(nvinfer1::TensorFormat::kCHW16)};
+                            supported |= pair.first == nvinfer1::DataType::kINT8
+                                && (isDLA_LINEAR || (isInput ? isHWC4 : false) || isCHW32);
+                            supported |= pair.first == nvinfer1::DataType::kHALF
+                                && (isDLA_LINEAR || (isInput ? isHWC4 : false) || isCHW16);
+                            return supported;
+                        });
+            };
+            if (!checkSafeDLAFormats(build.inputFormats, true) || !checkSafeDLAFormats(build.outputFormats, false))
+            {
+                throw std::invalid_argument(
+                    "I/O formats for safe DLA capability are restricted to fp16/int8:dla_linear, fp16/int8:hwc4, "
+                    "fp16:chw16 or "
+                    "int8:chw32");
+            }
+            if (build.allowGPUFallback)
+            {
+                throw std::invalid_argument("GPU fallback (--allowGPUFallback) not allowed for DLA standalone mode");
+            }
+        }
+        if (system.DLACore < 0 && build.enableUInt8AsymmetricQuantizationDLA)
+        {
+            throw std::invalid_argument("--uint8AsymmetricQuantizationDLA is not supported without DLA cores.");
+        }
+    }
+}
+
+void TaskInferenceOptions::parse(Arguments& arguments)
+{
+    getAndDelOption(arguments, "engine", engine);
+    getAndDelOption(arguments, "device", device);
+    getAndDelOption(arguments, "batch", batch);
+    getAndDelOption(arguments, "DLACore", DLACore);
+    getAndDelOption(arguments, "graph", graph);
+    getAndDelOption(arguments, "persistentCacheRatio", persistentCacheRatio);
+}
+
+void SafeBuilderOptions::parse(Arguments& arguments)
+{
+    auto getFormats = [&arguments](std::vector<IOFormat>& formatsVector, const char* argument) {
+        std::string list;
+        getAndDelOption(arguments, argument, list);
+        std::vector<std::string> formats{splitToStringVec(list, ',')};
+        for (const auto& f : formats)
+        {
+            formatsVector.push_back(stringToValue<IOFormat>(f));
+        }
+    };
+    getAndDelOption(arguments, "--serialized", serialized);
+    getAndDelOption(arguments, "--onnx", onnxModelFile);
+    getAndDelOption(arguments, "--help", help);
+    getAndDelOption(arguments, "-h", help);
+    getAndDelOption(arguments, "--verbose", verbose);
+    getAndDelOption(arguments, "-v", verbose);
+    getFormats(inputFormats, "--inputIOFormats");
+    getFormats(outputFormats, "--outputIOFormats");
+    getAndDelOption(arguments, "--int8", int8);
+    getAndDelOption(arguments, "--calib", calibFile);
+    getAndDelOption(arguments, "--consistency", consistency);
+    getAndDelOption(arguments, "--std", standard);
+    std::string pluginName;
+    while (getAndDelOption(arguments, "--plugins", pluginName))
+    {
+        sample::gLogWarning << "--plugins flag has been deprecated, use --staticPlugins flag instead." << std::endl;
+        plugins.emplace_back(pluginName);
+    }
+    while (getAndDelOption(arguments, "--staticPlugins", pluginName))
+    {
+        plugins.emplace_back(pluginName);
+    }
+    bool noBuilderCache{false};
+    getAndDelOption(arguments, "--noBuilderCache", noBuilderCache);
+    getAndDelOption(arguments, "--timingCacheFile", timingCacheFile);
+    getAndDelOption(arguments, "--avgTiming", avgTiming);
+    if (noBuilderCache)
+    {
+        timingCacheMode = TimingCacheMode::kDISABLE;
+    }
+    else if (!timingCacheFile.empty())
+    {
+        timingCacheMode = TimingCacheMode::kGLOBAL;
+    }
+    else
+    {
+        timingCacheMode = TimingCacheMode::kLOCAL;
+    }
+    getAndDelOption(arguments, "--sparsity", sparsity);
+}
+
+std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options)
+{
+    os << "=== Model Options ===" << std::endl;
+
+    os << "Format: ";
+    switch (options.format)
+    {
+    case ModelFormat::kONNX:
+    {
+        os << "ONNX";
+        break;
+    }
+    case ModelFormat::kANY: os << "*"; break;
+    }
+    os << std::endl << "Model: " << options.model << std::endl;
+
+    return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const ModelOptions& options)
+{
+    os << options.baseModel;
+    switch (options.baseModel.format)
+    {
+    case ModelFormat::kONNX: // Fallthrough: No options to report for ONNX or the generic case
+    case ModelFormat::kANY: break;
+    }
+
+    os << "Output:";
+    for (const auto& o : options.outputs)
+    {
+        os << " " << o;
+    }
+    os << std::endl;
+
+    return os;
+}
+
+std::ostream& operator<<(std::ostream& os, nvinfer1::DataType dtype)
+{
+    switch (dtype)
+    {
+    case nvinfer1::DataType::kFLOAT:
+    {
+        os << "fp32";
+        break;
+    }
+    case nvinfer1::DataType::kHALF:
+    {
+        os << "fp16";
+        break;
+    }
+    case nvinfer1::DataType::kBF16:
+    {
+        os << "bf16";
+        break;
+    }
+    case nvinfer1::DataType::kINT8:
+    {
+        os << "int8";
+        break;
+    }
+    case nvinfer1::DataType::kINT32:
+    {
+        os << "int32";
+        break;
+    }
+    case nvinfer1::DataType::kBOOL:
+    {
+        os << "bool";
+        break;
+    }
+    case nvinfer1::DataType::kUINT8:
+    {
+        os << "uint8";
+        break;
+    }
+    case nvinfer1::DataType::kFP8:
+    {
+        os << "fp8";
+        break;
+    }
+    case nvinfer1::DataType::kINT64:
+    {
+        os << "int64";
+        break;
+    }
+    case nvinfer1::DataType::kINT4:
+    {
+        os << "int4";
+        break;
+    }
+    case nvinfer1::DataType::kFP4:
+    {
+        os << "fp4";
+        break;
+    }
+    case nvinfer1::DataType::kE8M0:
+    {
+        os << "e8m0";
+        break;
+    }
+    }
+    return os;
+}
+
+std::ostream& operator<<(std::ostream& os, IOFormat const& format)
+{
+    os << format.first << ":";
+
+    for (int32_t f = 0; f < nvinfer1::EnumMax<nvinfer1::TensorFormat>(); ++f)
+    {
+        if ((1U << f) & format.second)
+        {
+            if (f)
+            {
+                os << "+";
+            }
+            switch (nvinfer1::TensorFormat(f))
+            {
+            case nvinfer1::TensorFormat::kLINEAR:
+            {
+                os << "chw";
+                break;
+            }
+            case nvinfer1::TensorFormat::kCHW2:
+            {
+                os << "chw2";
+                break;
+            }
+            case nvinfer1::TensorFormat::kHWC8:
+            {
+                os << "hwc8";
+                break;
+            }
+            case nvinfer1::TensorFormat::kHWC16:
+            {
+                os << "hwc16";
+                break;
+            }
+            case nvinfer1::TensorFormat::kCHW4:
+            {
+                os << "chw4";
+                break;
+            }
+            case nvinfer1::TensorFormat::kCHW16:
+            {
+                os << "chw16";
+                break;
+            }
+            case nvinfer1::TensorFormat::kCHW32:
+            {
+                os << "chw32";
+                break;
+            }
+            case nvinfer1::TensorFormat::kDHWC8:
+            {
+                os << "dhwc8";
+                break;
+            }
+            case nvinfer1::TensorFormat::kCDHW32:
+            {
+                os << "cdhw32";
+                break;
+            }
+            case nvinfer1::TensorFormat::kHWC:
+            {
+                os << "hwc";
+                break;
+            }
+            case nvinfer1::TensorFormat::kDHWC:
+            {
+                os << "dhwc";
+                break;
+            }
+            case nvinfer1::TensorFormat::kDLA_LINEAR:
+            {
+                os << "dla_linear";
+                break;
+            }
+            case nvinfer1::TensorFormat::kDLA_HWC4:
+            {
+                os << "dla_hwc4";
+                break;
+            }
+            }
+        }
+    }
+    return os;
+}
+
+std::ostream& operator<<(std::ostream& os, nvinfer1::DeviceType devType)
+{
+    switch (devType)
+    {
+    case nvinfer1::DeviceType::kGPU:
+    {
+        os << "GPU";
+        break;
+    }
+    case nvinfer1::DeviceType::kDLA:
+    {
+        os << "DLA";
+        break;
+    }
+    }
+    return os;
+}
+
+
+std::ostream& operator<<(std::ostream& os, nvinfer1::RuntimePlatform platform)
+{
+    switch (platform)
+    {
+    case nvinfer1::RuntimePlatform::kSAME_AS_BUILD:
+    {
+        os << "Same As Build";
+        break;
+    }
+    case nvinfer1::RuntimePlatform::kWINDOWS_AMD64:
+    {
+        os << "Windows AMD64";
+        break;
+    }
+    }
+    return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const ShapeRange& dims)
+{
+    int32_t i = 0;
+    for (const auto& d : dims)
+    {
+        if (!d.size())
+        {
+            break;
+        }
+        os << (i ? "+" : "") << d;
+        ++i;
+    }
+    return os;
+}
+
+std::ostream& operator<<(std::ostream& os, LayerPrecisions const& layerPrecisions)
+{
+    int32_t i = 0;
+    for (auto const& layerPrecision : layerPrecisions)
+    {
+        os << (i ? "," : "") << layerPrecision.first << ":" << layerPrecision.second;
+        ++i;
+    }
+    return os;
+}
+
+std::ostream& operator<<(std::ostream& os, LayerDeviceTypes const& layerDeviceTypes)
+{
+    int32_t i = 0;
+    for (auto const& layerDevicePair : layerDeviceTypes)
+    {
+        os << (i++ ? ", " : "") << layerDevicePair.first << ":" << layerDevicePair.second;
+    }
+    return os;
+}
+
+std::ostream& operator<<(std::ostream& os, StringSet const& stringSet)
+{
+    int64_t i = 0;
+    for (auto const& s : stringSet)
+    {
+        os << (i ? "," : "") << s;
+        ++i;
+    }
+    return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const BuildOptions& options)
+{
+    // if loadEngine is specified, BuildOptions are N/A
+    if (options.load)
+    {
+        os << std::endl;
+        return os;
+    }
+    // clang-format off
+    os << "=== Build Options ==="                                                                                       << std::endl <<
+          "Memory Pools: ";     printMemoryPools(os, options)                                                           << std::endl <<
+          "avgTiming: "      << options.avgTiming                                                                       << std::endl <<
+          "Precision: ";        printPrecision(os, options)                                                             << std::endl <<
+          "LayerPrecisions: " << options.layerPrecisions                                                                << std::endl <<
+          "Layer Device Types: " << options.layerDeviceTypes                                                            << std::endl <<
+          "Calibration: "    << (options.int8 && options.calibration.empty() ? "Dynamic" : options.calibration.c_str()) << std::endl <<
+          "Refit: "          << boolToEnabled(options.refittable)                                                       << std::endl <<
+          "Strip weights: "     << boolToEnabled(options.stripWeights)                                                  << std::endl <<
+          "Version Compatible: " << boolToEnabled(options.versionCompatible)                                            << std::endl <<
+          "ONNX Plugin InstanceNorm: " << boolToEnabled(options.pluginInstanceNorm)                                     << std::endl <<
+          "ONNX kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA flag: " << boolToEnabled(options.enableUInt8AsymmetricQuantizationDLA) << std::endl <<
+          "TensorRT runtime: " << options.useRuntime                                                                    << std::endl <<
+          "Lean DLL Path: " << options.leanDLLPath                                                                      << std::endl <<
+          "Tempfile Controls: "; printTempfileControls(os, options.tempfileControls)                                    << std::endl <<
+          "Exclude Lean Runtime: " << boolToEnabled(options.excludeLeanRuntime)                                         << std::endl <<
+          "Sparsity: ";         printSparsity(os, options)                                                              << std::endl <<
+          "Safe mode: "      << boolToEnabled(options.safe)                                                             << std::endl <<
+          "Build DLA standalone loadable: " << boolToEnabled(options.buildDLAStandalone)                                << std::endl <<
+          "Allow GPU fallback for DLA: " << boolToEnabled(options.allowGPUFallback)                                     << std::endl <<
+          "DirectIO mode: "  << boolToEnabled(options.directIO)                                                         << std::endl <<
+          "Restricted mode: " << boolToEnabled(options.restricted)                                                      << std::endl <<
+          "Skip inference: "     << boolToEnabled(options.skipInference)                                                << std::endl <<
+          "Save engine: "    << (options.save ? options.engine : "")                                                    << std::endl <<
+          "Load engine: "    << (options.load ? options.engine : "")                                                    << std::endl <<
+          "Profiling verbosity: " << static_cast<int32_t>(options.profilingVerbosity)                                   << std::endl <<
+          "Tactic sources: ";   printTacticSources(os, options.enabledTactics, options.disabledTactics)                 << std::endl <<
+          "timingCacheMode: ";  printTimingCache(os, options.timingCacheMode)                                           << std::endl <<
+          "timingCacheFile: " << options.timingCacheFile                                                                << std::endl <<
+          "Enable Compilation Cache: "<< boolToEnabled(!options.disableCompilationCache) << std::endl <<
+          "Enable Monitor Memory: "<< boolToEnabled(options.enableMonitorMemory) << std::endl <<
+          "errorOnTimingCacheMiss: "  << boolToEnabled(options.errorOnTimingCacheMiss)                                  << std::endl <<
+          "Preview Features: "; printPreviewFlags(os, options)                                                          << std::endl <<
+          "MaxAuxStreams: "   << options.maxAuxStreams                                                                  << std::endl <<
+          "BuilderOptimizationLevel: " << options.builderOptimizationLevel                                              << std::endl <<
+          "MaxTactics: " << options.maxTactics                                                                          << std::endl <<
+          "Calibration Profile Index: " << options.calibProfile                                                         << std::endl <<
+          "Weight Streaming: " << boolToEnabled(options.allowWeightStreaming)                                           << std::endl <<
+          "Runtime Platform: " << options.runtimePlatform                                                               << std::endl <<
+          "Debug Tensors: " << options.debugTensors                                                                     << std::endl <<
+          "Distributive Independence: " << boolToEnabled(options.distributiveIndependence)                      << std::endl <<
+          "Mark Unfused Tensors As Debug Tensors: " << boolToEnabled(options.markUnfusedTensorsAsDebugTensors)   << std::endl;
+    // clang-format on
+
+    auto printIOFormats = [](std::ostream& os, const char* direction, const std::vector<IOFormat> formats) {
+        if (formats.empty())
+        {
+            os << direction << "s format: fp32:CHW" << std::endl;
+        }
+        else
+        {
+            for (const auto& f : formats)
+            {
+                os << direction << ": " << f << std::endl;
+            }
+        }
+    };
+
+    printIOFormats(os, "Input(s)", options.inputFormats);
+    printIOFormats(os, "Output(s)", options.outputFormats);
+    for (size_t i = 0; i < options.optProfiles.size(); i++)
+    {
+        printShapes(os, "build", options.optProfiles[i], i);
+    }
+    printShapes(os, "calibration", options.shapesCalib, -1);
+
+    return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const SystemOptions& options)
+{
+    // clang-format off
+    os << "=== System Options ==="                                                                << std::endl <<
+
+          "Device: "  << options.device                                                           << std::endl <<
+          "DLACore: " << (options.DLACore != -1 ? std::to_string(options.DLACore) : "")           << std::endl;
+    os << "Plugins:";
+
+    for (const auto& p : options.plugins)
+    {
+        os << " " << p;
+    }
+    os << std::endl;
+
+    os << "setPluginsToSerialize:";
+
+    for (const auto& p : options.setPluginsToSerialize)
+    {
+        os << " " << p;
+    }
+    os << std::endl;
+
+    os << "dynamicPlugins:";
+
+    for (const auto& p : options.dynamicPlugins)
+    {
+        os << " " << p;
+    }
+    os << std::endl;
+
+    os << "ignoreParsedPluginLibs: " << options.ignoreParsedPluginLibs << std::endl;
+    os << std::endl;
+    return os;
+    // clang-format on
+}
+
+std::ostream& operator<<(std::ostream& os, const InferenceOptions& options)
+{
+    // clang-format off
+    os << "=== Inference Options ==="                                     << std::endl <<
+
+          "Batch: ";
+    if (options.batch && options.shapes.empty())
+    {
+                          os << options.batch                             << std::endl;
+    }
+    else
+    {
+                          os << "Explicit"                                << std::endl;
+    }
+    printShapes(os, "inference", options.shapes, options.optProfileIndex);
+
+    std::string wsBudget{"Disabled"};
+    if (options.weightStreamingBudget.bytes == WeightStreamingBudget::kAUTOMATIC)
+    {
+        wsBudget = "Automatic";
+    }
+    else if (options.weightStreamingBudget.bytes != WeightStreamingBudget::kDISABLE)
+    {
+        wsBudget = std::to_string(options.weightStreamingBudget.bytes) + " bytes";
+    }
+    else if (options.weightStreamingBudget.percent != WeightStreamingBudget::kDISABLE)
+    {
+        wsBudget = std::to_string(options.weightStreamingBudget.percent) + "%";
+    }
+
+    os << "Iterations: "                << options.iterations                                   << std::endl <<
+          "Duration: "                  << options.duration   << "s (+ "
+                                        << options.warmup     << "ms warm up)"                  << std::endl <<
+          "Sleep time: "                << options.sleep      << "ms"                           << std::endl <<
+          "Idle time: "                 << options.idle       << "ms"                           << std::endl <<
+          "Inference Streams: "         << options.infStreams                                   << std::endl <<
+          "ExposeDMA: "                 << boolToEnabled(!options.overlap)                      << std::endl <<
+          "Data transfers: "            << boolToEnabled(!options.skipTransfers)                << std::endl <<
+          "Spin-wait: "                 << boolToEnabled(options.spin)                          << std::endl <<
+          "Multithreading: "            << boolToEnabled(options.threads)                       << std::endl <<
+          "CUDA Graph: "                << boolToEnabled(options.graph)                         << std::endl <<
+          "Separate profiling: "        << boolToEnabled(options.rerun)                         << std::endl <<
+          "Time Deserialize: "          << boolToEnabled(options.timeDeserialize)               << std::endl <<
+          "Time Refit: "                << boolToEnabled(options.timeRefit)                     << std::endl <<
+          "NVTX verbosity: "            << static_cast<int32_t>(options.nvtxVerbosity)          << std::endl <<
+          "Persistent Cache Ratio: "    << static_cast<float>(options.persistentCacheRatio)     << std::endl <<
+          "Optimization Profile Index: "<< options.optProfileIndex                              << std::endl <<
+          "Weight Streaming Budget: "   << wsBudget                                             << std::endl;
+    // clang-format on
+
+    os << "Inputs:" << std::endl;
+    for (const auto& input : options.inputs)
+    {
+        os << input.first << "<-" << input.second << std::endl;
+    }
+
+    os << "Debug Tensor Save Destinations:" << std::endl;
+    for (auto const& fileName : options.debugTensorFileNames)
+    {
+        os << fileName.first << ": " << fileName.second << std::endl;
+    }
+    os << "Dump All Debug Tensor in Formats: " << std::endl;
+    for (auto const& format : options.dumpAlldebugTensorFormats)
+    {
+        os << format << std::endl;
+    }
+
+    return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const ReportingOptions& options)
+{
+    // clang-format off
+    os << "=== Reporting Options ==="                                                     << std::endl <<
+          "Verbose: "                     << boolToEnabled(options.verbose)               << std::endl <<
+          "Averages: "                    << options.avgs << " inferences"                << std::endl <<
+          "Percentiles: "                 << joinValuesToString(options.percentiles, ",") << std::endl <<
+          "Dump refittable layers:"       << boolToEnabled(options.refit)                 << std::endl <<
+          "Dump output: "                 << boolToEnabled(options.output)                << std::endl <<
+          "Profile: "                     << boolToEnabled(options.profile)               << std::endl <<
+          "Export timing to JSON file: "  << options.exportTimes                          << std::endl <<
+          "Export output to JSON file: "  << options.exportOutput                         << std::endl <<
+          "Export profile to JSON file: " << options.exportProfile                        << std::endl;
+    // clang-format on
+
+    return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const AllOptions& options)
+{
+    os << options.model << options.build << options.system << options.inference << options.reporting << std::endl;
+    return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options)
+{
+    auto printIOFormats = [](std::ostream& os, const char* direction, const std::vector<IOFormat> formats)
+    {
+        if (formats.empty())
+        {
+            os << direction << "s format: fp32:CHW" << std::endl;
+        }
+        else
+        {
+            for (const auto& f : formats)
+            {
+                os << direction << ": " << f << std::endl;
+            }
+        }
+    };
+
+    os << "=== Build Options ===" << std::endl;
+    os << "Model ONNX: " << options.onnxModelFile << std::endl;
+
+    os << "Precision: FP16";
+    if (options.int8)
+    {
+        os << " + INT8";
+    }
+    if (options.fp8)
+    {
+        os << " + FP8";
+    }
+    if (options.int4)
+    {
+        os << " + INT4";
+    }
+    os << std::endl;
+    os << "Calibration file: " << options.calibFile << std::endl;
+    os << "Serialized Network: " << options.serialized << std::endl;
+
+    printIOFormats(os, "Input(s)", options.inputFormats);
+    printIOFormats(os, "Output(s)", options.outputFormats);
+    os << "Plugins:";
+    for (const auto& p : options.plugins)
+    {
+        os << " " << p;
+    }
+    os << "timingCacheMode: ";
+    printTimingCache(os, options.timingCacheMode) << std::endl;
+    os << "timingCacheFile: " << options.timingCacheFile << std::endl;
+    os << std::endl;
+    return os;
+}
+
+void BaseModelOptions::help(std::ostream& os)
+{
+    // clang-format off
+    os << "  --onnx=<file>               ONNX model"                                            << std::endl;
+    // clang-format on
+}
+
+void ModelOptions::help(std::ostream& os)
+{
+    // clang-format off
+    os << "=== Model Options ==="                                                                                 << std::endl;
+    BaseModelOptions::help(os);
+    // clang-format on
+}
+
+void BuildOptions::help(std::ostream& os)
+{
+    // clang-format off
+    os << "=== Build Options ==="                                                                                                                   "\n"
+          "  --minShapes=spec                   Build with dynamic shapes using a profile with the min shapes provided"                             "\n"
+          "  --optShapes=spec                   Build with dynamic shapes using a profile with the opt shapes provided"                             "\n"
+          "  --maxShapes=spec                   Build with dynamic shapes using a profile with the max shapes provided"                             "\n"
+          "  --minShapesCalib=spec              Calibrate with dynamic shapes using a profile with the min shapes provided"                         "\n"
+          "  --optShapesCalib=spec              Calibrate with dynamic shapes using a profile with the opt shapes provided"                         "\n"
+          "  --maxShapesCalib=spec              Calibrate with dynamic shapes using a profile with the max shapes provided"                         "\n"
+          "                                     Note: All three of min, opt and max shapes must be supplied."                                       "\n"
+          "                                           However, if only opt shapes is supplied then it will be expanded so"                          "\n"
+          "                                           that min shapes and max shapes are set to the same values as opt shapes."                     "\n"
+          "                                           Input names can be wrapped with escaped single quotes (ex: 'Input:0')."                       "\n"
+          "                                     Example input shapes spec: input0:1x3x256x256,input1:1x3x128x128"                                   "\n"
+          "                                     For scalars (0-D shapes), use input0:scalar or simply input0: with nothing after the colon."        "\n"
+          "                                     Each input shape is supplied as a key-value pair where key is the input name and"                   "\n"
+          "                                     value is the dimensions (including the batch dimension) to be used for that input."                 "\n"
+          "                                     Each key-value pair has the key and value separated using a colon (:)."                             "\n"
+          "                                     Multiple input shapes can be provided via comma-separated key-value pairs, and each input name can" "\n"
+          "                                     contain at most one wildcard ('*') character."                                                      "\n"
+          "  --inputIOFormats=spec              Type and format of each of the input tensors (default = all inputs in fp32:chw)"                    "\n"
+          "                                     See --outputIOFormats help for the grammar of type and format list."                                "\n"
+          "                                     Note: If this option is specified, please set comma-separated types and formats for all"            "\n"
+          "                                           inputs following the same order as network inputs ID (even if only one input"                 "\n"
+          "                                           needs specifying IO format) or set the type and format once for broadcasting."                "\n"
+          "  --outputIOFormats=spec             Type and format of each of the output tensors (default = all outputs in fp32:chw)"                  "\n"
+          "                                     Note: If this option is specified, please set comma-separated types and formats for all"            "\n"
+          "                                           outputs following the same order as network outputs ID (even if only one output"              "\n"
+          "                                           needs specifying IO format) or set the type and format once for broadcasting."                "\n"
+        R"(                                     IO Formats: spec  ::= IOfmt[","spec])"                                                              "\n"
+          "                                                 IOfmt ::= type:fmt"                                                                     "\n"
+        R"(                                                 type  ::= "fp32"|"fp16"|"bf16"|"int32"|"int64"|"int8"|"uint8"|"bool")"                  "\n"
+        R"(                                                 fmt   ::= ("chw"|"chw2"|"hwc8"|"chw4"|"chw16"|"chw32"|"dhwc8"|)"                        "\n"
+        R"(                                                            "cdhw32"|"hwc"|"dla_linear"|"dla_hwc4"|"hwc16"|"dhwc")["+"fmt])"             "\n"
+          "  --memPoolSize=poolspec             Specify the size constraints of the designated memory pool(s)"                                      "\n"
+          "                                     Supports the following base-2 suffixes: " << getAvailableUnitSuffixes() << "."                      "\n"
+          "                                     If none of suffixes is appended, the defualt unit is in MiB."                                       "\n"
+          "                                     Note: Also accepts decimal sizes, e.g. 0.25M. Will be rounded down to the nearest integer bytes."   "\n"
+          "                                     In particular, for dlaSRAM the bytes will be rounded down to the nearest power of 2."               "\n"
+        R"(                                     Pool constraint: poolspec ::= poolfmt[","poolspec])"                                                "\n"
+          "                                                      poolfmt ::= pool:size\n"
+        R"(                                                      pool ::= "workspace"|"dlaSRAM"|"dlaLocalDRAM"|"dlaGlobalDRAM"|"tacticSharedMem")"  "\n"
+          "  --profilingVerbosity=mode          Specify profiling verbosity. mode ::= layer_names_only|detailed|none (default = layer_names_only)." "\n"
+          "                                     Please only assign once."                                                                           "\n"
+          "  --avgTiming=M                      Set the number of times averaged in each iteration for kernel selection (default = "
+                                                                                                                  << defaultAvgTiming << ")"        "\n"
+          "  --refit                            Mark the engine as refittable. This will allow the inspection of refittable layers "                "\n"
+          "                                     and weights within the engine."                                                                     "\n"
+          "  --stripWeights                     Strip weights from plan. This flag works with either refit or refit with identical weights. Default""\n"
+          "                                     to latter, but you can switch to the former by enabling both --stripWeights and --refit at the same""\n"
+          "                                     time."                                                                                              "\n"
+          "  --stripAllWeights                  Alias for combining the --refit and --stripWeights options. It marks all weights as refittable,"    "\n"
+          "                                     disregarding any performance impact. Additionally, it strips all refittable weights after the "     "\n"
+          "                                     engine is built."                                                                                   "\n"
+          "  --weightless                       [Deprecated] this knob has been deprecated. Please use --stripWeights"                              "\n"
+          "  --versionCompatible, --vc          Mark the engine as version compatible. This allows the engine to be used with newer versions"       "\n"
+          "                                     of TensorRT on the same host OS, as well as TensorRT's dispatch and lean runtimes."                 "\n"
+          "  --pluginInstanceNorm, --pi         Set `kNATIVE_INSTANCENORM` to false in the ONNX parser. This will cause the ONNX parser to use"     "\n"
+          "                                     a plugin InstanceNorm implementation over the native implementation when parsing."                  "\n"
+          "  --uint8AsymmetricQuantizationDLA   Set `kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA` to true in the ONNX parser. This directs the"   "\n"
+          "                                     onnx parser to allow UINT8 as a quantization data type and import zero point values directly"       "\n"
+          "                                     without converting to float type or all-zero values. Should only be set with DLA software version"  "\n"
+          "                                     >= 3.16."                                                                                           "\n"
+        R"(  --useRuntime=runtime               TensorRT runtime to execute engine. "lean" and "dispatch" require loading VC engine and do)"        "\n"
+          "                                     not support building an engine."                                                                    "\n"
+        R"(                                         runtime::= "full"|"lean"|"dispatch")"                                                           "\n"
+          "  --leanDLLPath=<file>               External lean runtime DLL to use in version compatiable mode."                                      "\n"
+          "  --excludeLeanRuntime               When --versionCompatible is enabled, this flag indicates that the generated engine should"          "\n"
+          "                                     not include an embedded lean runtime. If this is set, the user must explicitly specify a"           "\n"
+          "                                     valid lean runtime to use when loading the engine."     "\n"
+          "  --monitorMemory                    Enable memory monitor report for debugging usage. (default = disabled)"                             "\n"
+          "  --sparsity=spec                    Control sparsity (default = disabled). "                                                            "\n"
+        R"(                                     Sparsity: spec ::= "disable", "enable", "force")"                                                   "\n"
+          "                                     Note: Description about each of these options is as below"                                          "\n"
+          "                                           disable = do not enable sparse tactics in the builder (this is the default)"                  "\n"
+          "                                           enable  = enable sparse tactics in the builder (but these tactics will only be"               "\n"
+          "                                                     considered if the weights have the right sparsity pattern)"                         "\n"
+          "                                           force   = enable sparse tactics in the builder and force-overwrite the weights to have"       "\n"
+          "                                                     a sparsity pattern (even if you loaded a model yourself)"                           "\n"
+          "                                                     [Deprecated] this knob has been deprecated."                                        "\n"
+          "                                                     Please use <polygraphy surgeon prune> to rewrite the weights."                      "\n"
+          "  --noTF32                           Disable tf32 precision (default is to enable tf32, in addition to fp32)"                            "\n"
+          "  --fp16                             Enable fp16 precision, in addition to fp32 (default = disabled)"                                    "\n"
+          "  --bf16                             Enable bf16 precision, in addition to fp32 (default = disabled)"                                    "\n"
+          "  --int8                             Enable int8 precision, in addition to fp32 (default = disabled)"                                    "\n"
+          "  --fp8                              Enable fp8 precision, in addition to fp32 (default = disabled)"                                     "\n"
+          "  --int4                             Enable int4 precision, in addition to fp32 (default = disabled)"                                    "\n"
+          "  --best                             Enable all precisions to achieve the best performance (default = disabled)"                         "\n"
+          "  --stronglyTyped                    Create a strongly typed network. (default = disabled)"                                              "\n"
+          "  --directIO                         [Deprecated] Avoid reformatting at network boundaries. (default = disabled)"                        "\n"
+          "  --precisionConstraints=spec        Control precision constraint setting. (default = none)"                                             "\n"
+        R"(                                     Precision Constraints: spec ::= "none" | "obey" | "prefer")"                                        "\n"
+          "                                         none = no constraints"                                                                          "\n"
+          "                                         prefer = meet precision constraints set by --layerPrecisions/--layerOutputTypes if possible"    "\n"
+          "                                         obey = meet precision constraints set by --layerPrecisions/--layerOutputTypes or fail"          "\n"
+          "                                                otherwise"                                                                               "\n"
+          "  --layerPrecisions=spec             Control per-layer precision constraints. Effective only when precisionConstraints is set to"        "\n"
+        R"(                                     "obey" or "prefer". (default = none))"                                                              "\n"
+        R"(                                     The specs are read left-to-right, and later ones override earlier ones. Each layer name can)"       "\n"
+          "                                     contain at most one wildcard ('*') character."                                                      "\n"
+        R"(                                     Per-layer precision spec ::= layerPrecision[","spec])"                                              "\n"
+        R"(                                                         layerPrecision ::= layerName":"precision)"                                      "\n"
+        R"(                                                         precision ::= "fp32"|"fp16"|"bf16"|"int32"|"int8")"                             "\n"
+          "  --layerOutputTypes=spec            Control per-layer output type constraints. Effective only when precisionConstraints is set to"      "\n"
+        R"(                                     "obey" or "prefer". (default = none)"                                                               "\n"
+        R"(                                     The specs are read left-to-right, and later ones override earlier ones. Each layer name can)"       "\n"
+          "                                     contain at most one wildcard ('*') character. If a layer has more than"                             "\n"
+        R"(                                     one output, then multiple types separated by "+" can be provided for this layer.)"                  "\n"
+        R"(                                     Per-layer output type spec ::= layerOutputTypes[","spec])"                                          "\n"
+        R"(                                                           layerOutputTypes ::= layerName":"type)"                                       "\n"
+        R"(                                                           type ::= "fp32"|"fp16"|"bf16"|"int32"|"int8"["+"type])"                       "\n"
+          "  --layerDeviceTypes=spec            Specify layer-specific device type."                                                                "\n"
+          "                                     The specs are read left-to-right, and later ones override earlier ones. If a layer does not have"   "\n"
+          "                                     a device type specified, the layer will opt for the default device type."                           "\n"
+        R"(                                     Per-layer device type spec ::= layerDeviceTypePair[","spec])"                                       "\n"
+        R"(                                                           layerDeviceTypePair ::= layerName":"deviceType)"                              "\n"
+        R"(                                                           deviceType ::= "GPU"|"DLA")"                                                  "\n"
+          "  --calib=<file>                     Read INT8 calibration cache file"                                                                   "\n"
+          "  --safe                             Enable build safety certified engine, if DLA is enable, --buildDLAStandalone will be specified"     "\n"
+          "                                     automatically (default = disabled)"                                                                 "\n"
+          "  --buildDLAStandalone               Enable build DLA standalone loadable which can be loaded by cuDLA, when this option is enabled, "   "\n"
+          "                                     --allowGPUFallback is disallowed and --skipInference is enabled by default. Additionally, "         "\n"
+          "                                     specifying --inputIOFormats and --outputIOFormats restricts I/O data type and memory layout"        "\n"
+          "                                     (default = disabled)"                                                                               "\n"
+          "  --allowGPUFallback                 When DLA is enabled, allow GPU fallback for unsupported layers (default = disabled)"                "\n"
+          "  --consistency                      Perform consistency checking on safety certified engine"                                            "\n"
+          "  --restricted                       Enable safety scope checking with kSAFETY_SCOPE build flag"                                         "\n"
+          "  --saveEngine=<file>                Save the serialized engine"                                                                         "\n"
+          "  --loadEngine=<file>                Load a serialized engine"                                                                           "\n"
+          "  --asyncFileReader                  Load a serialized engine using async stream reader. Should be combined with --loadEngine."          "\n"
+          "  --getPlanVersionOnly               Print TensorRT version when loaded plan was created. Works without deserialization of the plan."    "\n"
+          "                                     Use together with --loadEngine. Supported only for engines created with 8.6 and forward."           "\n"
+          "  --tacticSources=tactics            Specify the tactics to be used by adding (+) or removing (-) tactics from the default "             "\n"
+          "                                     tactic sources (default = all available tactics)."                                                  "\n"
+          "                                     Note: Currently only cuDNN, cuBLAS, cuBLAS-LT, and edge mask convolutions are listed as optional"   "\n"
+          "                                           tactics."                                                                                     "\n"
+        R"(                                     Tactic Sources: tactics ::= tactic[","tactics])"                                                    "\n"
+          "                                                     tactic  ::= (+|-)lib"                                                               "\n"
+        R"(                                                     lib     ::= "CUBLAS"|"CUBLAS_LT"|"CUDNN"|"EDGE_MASK_CONVOLUTIONS")"                 "\n"
+        R"(                                                                 |"JIT_CONVOLUTIONS")"                                                   "\n"
+          "                                     For example, to disable cudnn and enable cublas: --tacticSources=-CUDNN,+CUBLAS"                    "\n"
+          "  --noBuilderCache                   Disable timing cache in builder (default is to enable timing cache)"                                "\n"
+          "  --noCompilationCache               Disable Compilation cache in builder, and the cache is part of timing cache (default is to enable compilation cache)" "\n"
+          "  --errorOnTimingCacheMiss           Emit error when a tactic being timed is not present in the timing cache (default = false)"          "\n"
+          "  --timingCacheFile=<file>           Save/load the serialized global timing cache"                                                       "\n"
+          "  --preview=features                 Specify preview feature to be used by adding (+) or removing (-) preview features from the default" "\n"
+        R"(                                     Preview Features: features ::= feature[","features])"                                               "\n"
+          "                                                       feature  ::= (+|-)flag"                                                           "\n"
+        R"(                                                       flag     ::= "aliasedPluginIO1003")"                                              "\n"
+        R"(                                                                    |"runtimeActivationResize")"                                         "\n"
+        R"(                                                                    |"profileSharing0806")"                                              "\n"
+          "  --builderOptimizationLevel         Set the builder optimization level. (default is 3)"                                                 "\n"
+          "                                     A Higher level allows TensorRT to spend more time searching for better optimization strategy."      "\n"
+          "                                     Valid values include integers from 0 to the maximum optimization level, which is currently 5."      "\n"
+          "  --maxTactics                       Set the maximum number of tactics to time when there is a choice of tactics. (default is -1)"       "\n"
+          "                                     Larger number of tactics allow TensorRT to spend more building time on evaluating tactics."         "\n"
+          "                                     Default value -1 means TensorRT can decide the number of tactics based on its own heuristic."       "\n"
+          "  --hardwareCompatibilityLevel=mode  Make the engine file compatible with other GPU architectures. (default = none)"                     "\n"
+        R"(                                     Hardware Compatibility Level: mode ::= "none" | "ampere+" | "sameComputeCapability")"               "\n"
+          "                                         none = no compatibility"                                                                        "\n"
+          "                                         ampere+ = compatible with Ampere and newer GPUs"                                                "\n"
+          "                                         sameComputeCapability = compatible with GPUs that have the same Compute Capability version"     "\n"
+          "  --runtimePlatform=platform         Set the target platform for runtime execution. (default = SameAsBuild)"                             "\n"
+          "                                     When this option is enabled, --skipInference is enabled by default."                                "\n"
+        R"(                                     RuntimePlatfrom: platform ::= "SameAsBuild" | "WindowsAMD64")"                                      "\n"
+          "                                         SameAsBuild = no requirement for cross-platform compatibility."                                 "\n"
+          "                                         WindowsAMD64 = set the target platform for engine execution as Windows AMD64 system"            "\n"
+          "  --tempdir=<dir>                    Overrides the default temporary directory TensorRT will use when creating temporary files."         "\n"
+          "                                     See IRuntime::setTemporaryDirectory API documentation for more information."                        "\n"
+          "  --tempfileControls=controls        Controls what TensorRT is allowed to use when creating temporary executable files."                 "\n"
+          "                                     Should be a comma-separated list with entries in the format (in_memory|temporary):(allow|deny)."    "\n"
+          "                                     in_memory: Controls whether TensorRT is allowed to create temporary in-memory executable files."    "\n"
+          "                                     temporary: Controls whether TensorRT is allowed to create temporary executable files in the"        "\n"
+          "                                                filesystem (in the directory given by --tempdir)."                                       "\n"
+          "                                     For example, to allow in-memory files and disallow temporary files:"                                "\n"
+          "                                         --tempfileControls=in_memory:allow,temporary:deny"                                              "\n"
+        R"(                                     If a flag is unspecified, the default behavior is "allow".)"                                        "\n"
+          "  --maxAuxStreams=N                  Set maximum number of auxiliary streams per inference stream that TRT is allowed to use to run "    "\n"
+          "                                     kernels in parallel if the network contains ops that can run in parallel, with the cost of more "   "\n"
+          "                                     memory usage. Set this to 0 for optimal memory usage. (default = using heuristics)"                 "\n"
+          "  --profile                          Build with dynamic shapes using a profile with the min/max/opt shapes provided. Can be specified"   "\n"
+          "                                         multiple times to create multiple profiles with contiguous index."                              "\n"
+          "                                     (ex: --profile=0 --minShapes=<spec> --optShapes=<spec> --maxShapes=<spec> --profile=1 ...)"         "\n"
+          "  --calibProfile                     Select the optimization profile to calibrate by index. (default = "
+                                                                                                                << defaultOptProfileIndex << ")"    "\n"
+          "  --allowWeightStreaming             Enable a weight streaming engine. Must be specified with --stronglyTyped. TensorRT will disable"    "\n"
+          "                                     weight streaming at runtime unless --weightStreamingBudget is specified."                           "\n"
+          "  --markDebug                        Specify list of names of tensors to be marked as debug tensors. Separate names with a comma"        "\n"
+          "  --markUnfusedTensorsAsDebugTensors Mark unfused tensors as debug tensors"                                                              "\n"
+          "  --tilingOptimizationLevel          Set the tiling optimization level. (default is " << defaultTilingOptimizationLevel << ")"           "\n"
+          "                                     A Higher level allows TensorRT to spend more time searching for better optimization strategy."      "\n"
+          "                                     Valid values include integers from "
+                                                << static_cast<int32_t>(nvinfer1::TilingOptimizationLevel::kNONE)
+                                                << " to the maximum tiling optimization level("
+                                                << static_cast<int32_t>(nvinfer1::TilingOptimizationLevel::kFULL) << ")."                           "\n"
+          "  --l2LimitForTiling                 Set the L2 cache usage limit for tiling optimization(default is -1)"                                "\n"
+          "  --remoteAutoTuningConfig           Set the remote auto tuning config. Must be specified with --safe."                                  "\n"
+          "                                     Format: protocol://username[:password]@hostname[:port]?param1=value1&param2=value2"                 "\n"
+          "                                     Example: ssh://root:root@192.168.1.100:2213?remote_exec_path=/workspace/LWEServer&remote_lib_path=/workspace" "\n"
+          ;
+    // clang-format on
+    os << std::flush;
+}
+
+void SystemOptions::help(std::ostream& os)
+{
+    // clang-format off
+    os << "=== System Options ==="                                                                         << std::endl <<
+          "  --device=N                  Select cuda device N (default = "         << defaultDevice << ")" << std::endl <<
+          "  --useDLACore=N              Select DLA core N for layers that support DLA (default = none)"   << std::endl <<
+          "  --staticPlugins             Plugin library (.so) to load statically (can be specified multiple times)" << std::endl <<
+          "  --dynamicPlugins            Plugin library (.so) to load dynamically and may be serialized with the engine if they are included in --setPluginsToSerialize (can be specified multiple times)" << std::endl <<
+          "  --setPluginsToSerialize     Plugin library (.so) to be serialized with the engine (can be specified multiple times)" << std::endl <<
+          "  --ignoreParsedPluginLibs    By default, when building a version-compatible engine, plugin libraries specified by the ONNX parser " << std::endl <<
+          "                              are implicitly serialized with the engine (unless --excludeLeanRuntime is specified) and loaded dynamically. " << std::endl <<
+          "                              Enable this flag to ignore these plugin libraries instead." << std::endl <<
+          "  --safetyPlugins             Plugin library (.so) for TensorRT auto safety to manually load safety plugins specified by the command line arguments." << std::endl <<
+          "                              Example: --safetyPlugins=/path/to/plugin_lib.so[pluginNamespace1::plugin1,pluginNamespace2::plugin2]." << std::endl <<
+          "                              The option can be specified multiple times with different plugin libraries." << std::endl;
+    // clang-format on
+}
+
+void InferenceOptions::help(std::ostream& os)
+{
+    // clang-format off
+    os << "=== Inference Options ==="                                                                                                << std::endl <<
+          "  --shapes=spec               Set input shapes for dynamic shapes inference inputs."                                      << std::endl <<
+        R"(                              Note: Input names can be wrapped with escaped single quotes (ex: 'Input:0').)"              << std::endl <<
+          "                              Example input shapes spec: input0:1x3x256x256, input1:1x3x128x128"                          << std::endl <<
+          "                              For scalars (0-D shapes), use input0:scalar or simply input0: with nothing after the colon."<< std::endl <<
+          "                              Each input shape is supplied as a key-value pair where key is the input name and"           << std::endl <<
+          "                              value is the dimensions (including the batch dimension) to be used for that input."         << std::endl <<
+          "                              Each key-value pair has the key and value separated using a colon (:)."                     << std::endl <<
+          "                              Multiple input shapes can be provided via comma-separated key-value pairs, and each input " << std::endl <<
+          "                              name can contain at most one wildcard ('*') character."                                     << std::endl <<
+          "  --loadInputs=spec           Load input values from files (default = generate random inputs). Input names can be "
+                                                                                       "wrapped with single quotes (ex: 'Input:0')"  << std::endl <<
+        R"(                              Input values spec ::= Ival[","spec])"                                                       << std::endl <<
+        R"(                                           Ival ::= name":"file)"                                                         << std::endl <<
+          "                              Consult the README for more information on generating files for custom inputs."             << std::endl <<
+          "  --iterations=N              Run at least N inference iterations (default = "               << defaultIterations << ")"  << std::endl <<
+          "  --warmUp=N                  Run for N milliseconds to warmup before measuring performance (default = "
+                                                                                                            << defaultWarmUp << ")"  << std::endl <<
+          "  --duration=N                Run performance measurements for at least N seconds wallclock time (default = "
+                                                                                                          << defaultDuration << ")"  << std::endl <<
+          "                              If -1 is specified, inference will keep running unless stopped manually"                    << std::endl <<
+          "  --sleepTime=N               Delay inference start with a gap of N milliseconds between launch and compute "
+                                                                                               "(default = " << defaultSleep << ")"  << std::endl <<
+          "  --idleTime=N                Sleep N milliseconds between two continuous iterations"
+                                                                                               "(default = " << defaultIdle << ")"   << std::endl <<
+          "  --infStreams=N              Instantiate N execution contexts to run inference concurrently "
+                                                                                             "(default = " << defaultStreams << ")"  << std::endl <<
+          "  --exposeDMA                 Serialize DMA transfers to and from device (default = disabled)."                           << std::endl <<
+          "  --noDataTransfers           Disable DMA transfers to and from device (default = enabled). Note some device-to-host"     << std::endl <<
+          "                              data transfers will remain if output dumping is enabled via the --dumpOutput or"           << std::endl <<
+          "                              --exportOutput flags."                                                                     << std::endl <<
+          "  --useManagedMemory          Use managed memory instead of separate host and device allocations (default = disabled)."   << std::endl <<
+          "  --useSpinWait               Actively synchronize on GPU events. This option may decrease synchronization time but "
+                                                                             "increase CPU usage and power (default = disabled)"     << std::endl <<
+          "  --threads                   Enable multithreading to drive engines with independent threads"
+                                                                                " or speed up refitting (default = disabled) "       << std::endl <<
+          "  --useCudaGraph              Use CUDA graph to capture engine execution and then launch inference (default = disabled)." << std::endl <<
+          "                              This flag may be ignored if the graph capture fails."                                       << std::endl <<
+          "  --timeDeserialize           Time the amount of time it takes to deserialize the network and exit."                      << std::endl <<
+          "  --timeRefit                 Time the amount of time it takes to refit the engine before inference."                     << std::endl <<
+          "  --separateProfileRun        Do not attach the profiler in the benchmark run; if profiling is enabled, a second "
+                                                                                "profile run will be executed (default = disabled)"  << std::endl <<
+          "  --skipInference             Exit after the engine has been built and skip inference perf measurement "
+                                                                                                             "(default = disabled)"  << std::endl <<
+          "  --persistentCacheRatio      Set the persistentCacheLimit in ratio, 0.5 represent half of max persistent L2 size "
+                                                                                                                    "(default = 0)"  << std::endl <<
+          "  --useProfile                Set the optimization profile for the inference context "
+                                                                                   "(default = " << defaultOptProfileIndex << " )."  << std::endl <<
+          "  --allocationStrategy=spec   Specify how the internal device memory for inference is allocated."                         << std::endl <<
+        R"(                              Strategy: spec ::= "static"|"profile"|"runtime")"                                           << std::endl <<
+          "                                  static = Allocate device memory based on max size across all profiles."                 << std::endl <<
+          "                                  profile = Allocate device memory based on max size of the current profile."             << std::endl <<
+          "                                  runtime = Allocate device memory based on the actual input shapes."                     << std::endl <<
+          "  --saveDebugTensors          Specify list of names of tensors to turn on the debug state"                                << std::endl <<
+          "                              and filename to save raw outputs to."                                                       << std::endl <<
+          "                              These tensors must be specified as debug tensors during build time."                        << std::endl <<
+        R"(                              Input values spec ::= Ival[","spec])"                                                       << std::endl <<
+        R"(                                           Ival ::= name":"file)"                                                         << std::endl <<
+          "  --saveAllDebugTensors       Save all debug tensors to files. "                                                          << std::endl <<
+          "                              Including debug tensors marked by --markDebug and --markUnfusedTensorsAsDebugTensors"       << std::endl <<
+          "                              Multiple file formats can be saved simultaneously."                                         << std::endl <<
+        R"(                              Input values spec   ::= format[","format])"                                                 << std::endl <<
+        R"(                                           format ::= "summary"|"numpy"|"string"|"raw")"                                  << std::endl <<
+          "  --weightStreamingBudget     Set the maximum amount of GPU memory TensorRT is allowed to use for weights."               << std::endl <<
+          "                              It can take on the following values:"                                                       << std::endl <<
+          "                                  -2: (default) Disable weight streaming at runtime."                                     << std::endl <<
+          "                                  -1: TensorRT will automatically decide the budget."                                     << std::endl <<
+          "                                   0-100%: Percentage of streamable weights that reside on the GPU."                      << std::endl <<
+          "                                           0% saves the most memory but will have the worst performance."                 << std::endl <<
+          "                                           Requires the '%' character."                                                   << std::endl <<
+          "                                  >=0B: The exact amount of streamable weights that reside on the GPU. Supports the "     << std::endl <<
+          "                                       following base-2 suffixes: " << getAvailableUnitSuffixes() << "."                  << std::endl;
+
+    // clang-format on
+}
+
+void ReportingOptions::help(std::ostream& os)
+{
+    // clang-format off
+    os << "=== Reporting Options ==="                                                                    << std::endl <<
+          "  --verbose                   Use verbose logging (default = false)"                          << std::endl <<
+          "  --avgRuns=N                 Report performance measurements averaged over N consecutive "
+                                                       "iterations (default = " << defaultAvgRuns << ")" << std::endl <<
+          "  --percentile=P1,P2,P3,...   Report performance for the P1,P2,P3,... percentages (0<=P_i<=100, 0 "
+                                        "representing max perf, and 100 representing min perf; (default"
+                                            " = " << joinValuesToString(defaultPercentiles, ",") << "%)" << std::endl <<
+          "  --dumpRefit                 Print the refittable layers and weights from a refittable "
+                                        "engine"                                                         << std::endl <<
+          "  --dumpOutput                Print the output tensor(s) of the last inference iteration "
+                                                                                  "(default = disabled)" << std::endl <<
+          "  --dumpRawBindingsToFile     Print the input/output tensor(s) of the last inference iteration to file"
+                                                                                  "(default = disabled)" << std::endl <<
+          "  --dumpProfile               Print profile information per layer (default = disabled)"       << std::endl <<
+          "  --dumpLayerInfo             Print layer information of the engine to console "
+                                                                                "(default = disabled)"   << std::endl <<
+          "  --dumpOptimizationProfile   Print the optimization profile(s) information "
+                                                                                "(default = disabled)"   << std::endl <<
+          "  --exportTimes=<file>        Write the timing results in a json file (default = disabled)"   << std::endl <<
+          "  --exportOutput=<file>       Write the output tensors to a json file (default = disabled)"   << std::endl <<
+          "  --exportProfile=<file>      Write the profile information per layer in a json file "
+                                                                              "(default = disabled)"     << std::endl <<
+          "  --exportLayerInfo=<file>    Write the layer information of the engine in a json file "
+                                                                              "(default = disabled)"     << std::endl;
+    // clang-format on
+}
+
+void TaskInferenceOptions::help(std::ostream& os)
+{
+    // clang-format off
+    os << "=== Task Inference Options ==="                                                                                           << std::endl <<
+          "  engine=<file>               Specify a serialized engine for this task"                                                  << std::endl <<
+          "  device=N                    Specify a GPU device for this task"                                                         << std::endl <<
+          "  DLACore=N                   Specify a DLACore for this task"                                                            << std::endl <<
+          "  batch=N                     Set batch size for implicit batch engines (default = "              << defaultBatch << ")"  << std::endl <<
+          "                              This option should not be used for explicit batch engines"                                  << std::endl <<
+          "  graph=1                     Use cuda graph for this task"                                                               << std::endl <<
+          "  persistentCacheRatio=[0-1]  Set the persistentCacheLimit ratio for this task                            (default = 0)"  << std::endl;
+    // clang-format on
+}
+
+void helpHelp(std::ostream& os)
+{
+    // clang-format off
+    os << "=== Help ==="                                     << std::endl <<
+          "  --help, -h                  Print this message" << std::endl;
+    // clang-format on
+}
+
+void AllOptions::help(std::ostream& os)
+{
+    ModelOptions::help(os);
+    os << std::endl;
+    BuildOptions::help(os);
+    os << std::endl;
+    InferenceOptions::help(os);
+    os << std::endl;
+    ReportingOptions::help(os);
+    os << std::endl;
+    SystemOptions::help(os);
+    os << std::endl;
+    helpHelp(os);
+}
+
+void SafeBuilderOptions::printHelp(std::ostream& os)
+{
+    // clang-format off
+    os << "=== Mandatory ==="                                                                                                                << std::endl <<
+          "  --onnx=<file>               ONNX model"                                                                                         << std::endl <<
+          " "                                                                                                                                << std::endl <<
+          "=== Optional ==="                                                                                                                 << std::endl <<
+          "  --inputIOFormats=spec       Type and format of each of the input tensors (default = all inputs in fp32:chw)"                    << std::endl <<
+          "                              See --outputIOFormats help for the grammar of type and format list."                                << std::endl <<
+          "                              Note: If this option is specified, please set comma-separated types and formats for all"            << std::endl <<
+          "                                    inputs following the same order as network inputs ID (even if only one input"                 << std::endl <<
+          "                                    needs specifying IO format) or set the type and format once for broadcasting."                << std::endl <<
+          "  --outputIOFormats=spec      Type and format of each of the output tensors (default = all outputs in fp32:chw)"                  << std::endl <<
+          "                              Note: If this option is specified, please set comma-separated types and formats for all"            << std::endl <<
+          "                                    outputs following the same order as network outputs ID (even if only one output"              << std::endl <<
+          "                                    needs specifying IO format) or set the type and format once for broadcasting."                << std::endl <<
+        R"(                              IO Formats: spec  ::= IOfmt[","spec])"                                                              << std::endl <<
+          "                                          IOfmt ::= type:fmt"                                                                     << std::endl <<
+        R"(                                          type  ::= "fp32"|"fp16"|"int32"|"int8")"                                                << std::endl <<
+        R"(                                          fmt   ::= ("chw"|"chw2"|"hwc8"|"chw4"|"chw16"|"chw32"|"dhwc8"|)"                        << std::endl <<
+        R"(                                                   "cdhw32"|"hwc"|"dla_linear"|"dla_hwc4"|"hwc16"|"dhwc")["+"fmt])"               << std::endl <<
+          "  --int8                      Enable int8 precision, in addition to fp16 (default = disabled)"                                    << std::endl <<
+          "  --consistency               Perform consistency checking on safety certified engine"                                            << std::endl <<
+          "  --std                       Build standard serialized engine, (default = disabled)"                                             << std::endl <<
+          "  --calib=<file>              Read INT8 calibration cache file"                                                                   << std::endl <<
+          "  --serialized=<file>         Save the serialized network"                                                                        << std::endl <<
+          "  --staticPlugins             Plugin library (.so) to load statically (can be specified multiple times)"                          << std::endl <<
+          "  --verbose or -v             Use verbose logging (default = false)"                                                              << std::endl <<
+          "  --help or -h                Print this message"                                                                                 << std::endl <<
+          "  --noBuilderCache            Disable timing cache in builder (default is to enable timing cache)"                                << std::endl <<
+          "  --timingCacheFile=<file>    Save/load the serialized global timing cache"                                                       << std::endl <<
+          "  --sparsity=spec             Control sparsity (default = disabled). "                                                            << std::endl <<
+        R"(                              Sparsity: spec ::= "disable", "enable", "force")"                                                   << std::endl <<
+          "                              Note: Description about each of these options is as below"                                          << std::endl <<
+          "                                    disable = do not enable sparse tactics in the builder (this is the default)"                  << std::endl <<
+          "                                    enable  = enable sparse tactics in the builder (but these tactics will only be"               << std::endl <<
+          "                                              considered if the weights have the right sparsity pattern)"                         << std::endl <<
+          "                                    force   = enable sparse tactics in the builder and force-overwrite the weights to have"       << std::endl <<
+          "                                              a sparsity pattern"                                                                 << std::endl <<
+          "  --avgTiming=M               Set the number of times averaged in each iteration for kernel selection (default = "                << std::endl <<
+          ""                                                                                               << defaultAvgTiming << ")"        << std::endl <<
+          ""                                                                                                                                 << std::endl;
+    // clang-format on
+}
+
+} // namespace sample
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleOptions.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleOptions.h
new file mode 100644
index 0000000000000000000000000000000000000000..df17d2c2d2506d31c0cd0f59ab44d9c4704bb446
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleOptions.h
@@ -0,0 +1,519 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TRT_SAMPLE_OPTIONS_H
+#define TRT_SAMPLE_OPTIONS_H
+
+
+#include <array>
+#include <iostream>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "NvInfer.h"
+
+#if ENABLE_UNIFIED_BUILDER
+#include "safeCommon.h"
+#endif
+
+namespace sample
+{
+
+// Build default params
+constexpr int32_t defaultAvgTiming{8};
+constexpr int32_t defaultMaxAuxStreams{-1};
+constexpr int32_t defaultBuilderOptimizationLevel{-1};
+constexpr int32_t defaultTilingOptimizationLevel{static_cast<int32_t>(nvinfer1::TilingOptimizationLevel::kNONE)};
+constexpr int32_t defaultMaxTactics{-1};
+
+// System default params
+constexpr int32_t defaultDevice{0};
+
+// Inference default params
+constexpr int32_t defaultBatch{1};
+constexpr int32_t batchNotProvided{0};
+constexpr int32_t defaultStreams{1};
+constexpr int32_t defaultIterations{10};
+constexpr int32_t defaultOptProfileIndex{0};
+constexpr float defaultWarmUp{200.F};
+constexpr float defaultDuration{3.F};
+constexpr float defaultSleep{};
+constexpr float defaultIdle{};
+constexpr float defaultPersistentCacheRatio{0};
+
+// Reporting default params
+constexpr int32_t defaultAvgRuns{10};
+constexpr std::array<float, 3> defaultPercentiles{90, 95, 99};
+
+enum class PrecisionConstraints
+{
+    kNONE,
+    kOBEY,
+    kPREFER
+};
+
+enum class ModelFormat
+{
+    kANY,
+    kONNX
+};
+
+enum class SparsityFlag
+{
+    kDISABLE,
+    kENABLE,
+    kFORCE
+};
+
+enum class TimingCacheMode
+{
+    kDISABLE,
+    kLOCAL,
+    kGLOBAL
+};
+
+enum class MemoryAllocationStrategy
+{
+    kSTATIC,  //< Allocate device memory based on max size across all profiles.
+    kPROFILE, //< Allocate device memory based on max size of the current profile.
+    kRUNTIME, //< Allocate device memory based on the current input shapes.
+};
+
+//!
+//! \enum RuntimeMode
+//!
+//! \brief Used to dictate which TensorRT runtime library to dynamically load.
+//!
+enum class RuntimeMode
+{
+    //! Maps to libnvinfer.so or nvinfer.dll
+    kFULL,
+
+    //! Maps to libnvinfer_dispatch.so or nvinfer_dispatch.dll
+    kDISPATCH,
+
+    //! Maps to libnvinfer_lean.so or nvinfer_lean.dll
+    kLEAN,
+
+    //! Maps to libnvinfer_safe.so or nvinfer_safe.dll
+    kSAFE,
+};
+
+inline std::ostream& operator<<(std::ostream& os, RuntimeMode const mode)
+{
+    switch (mode)
+    {
+    case RuntimeMode::kFULL:
+    {
+        os << "full";
+        break;
+    }
+    case RuntimeMode::kDISPATCH:
+    {
+        os << "dispatch";
+        break;
+    }
+    case RuntimeMode::kLEAN:
+    {
+        os << "lean";
+        break;
+    }
+    case RuntimeMode::kSAFE:
+    {
+        os << "safe";
+        break;
+    }
+    }
+
+    return os;
+}
+
+using Arguments = std::unordered_multimap<std::string, std::pair<std::string, int32_t>>;
+
+using IOFormat = std::pair<nvinfer1::DataType, nvinfer1::TensorFormats>;
+
+using ShapeRange = std::array<std::vector<int64_t>, nvinfer1::EnumMax<nvinfer1::OptProfileSelector>()>;
+
+using LayerPrecisions = std::unordered_map<std::string, nvinfer1::DataType>;
+using LayerOutputTypes = std::unordered_map<std::string, std::vector<nvinfer1::DataType>>;
+using LayerDeviceTypes = std::unordered_map<std::string, nvinfer1::DeviceType>;
+
+using StringSet = std::unordered_set<std::string>;
+
+class WeightStreamingBudget
+{
+public:
+    static constexpr int64_t kDISABLE{-2};
+    static constexpr int64_t kAUTOMATIC{-1};
+    int64_t bytes{kDISABLE};
+    double percent{static_cast<double>(100.0)};
+
+    bool isDisabled()
+    {
+        return bytes == kDISABLE && percent == kDISABLE;
+    }
+};
+
+class Options
+{
+public:
+    virtual ~Options() = default;
+    virtual void parse(Arguments& arguments) = 0;
+};
+
+class BaseModelOptions : public Options
+{
+public:
+    ModelFormat format{ModelFormat::kANY};
+    std::string model;
+
+    void parse(Arguments& arguments) override;
+
+    static void help(std::ostream& out);
+};
+
+class ModelOptions : public Options
+{
+public:
+    BaseModelOptions baseModel;
+    std::string prototxt;
+    std::vector<std::string> outputs;
+
+    void parse(Arguments& arguments) override;
+
+    static void help(std::ostream& out);
+};
+
+constexpr nvinfer1::TempfileControlFlags getTempfileControlDefaults()
+{
+    using F = nvinfer1::TempfileControlFlag;
+    return (1U << static_cast<uint32_t>(F::kALLOW_TEMPORARY_FILES))
+        | (1U << static_cast<uint32_t>(F::kALLOW_IN_MEMORY_FILES));
+}
+
+class BuildOptions : public Options
+{
+public:
+    // Unit in MB.
+    double workspace{-1.0};
+    // Unit in MB.
+    double dlaSRAM{-1.0};
+    // Unit in MB.
+    double dlaLocalDRAM{-1.0};
+    // Unit in MB.
+    double dlaGlobalDRAM{-1.0};
+    // Unit in KB.
+    double tacticSharedMem{-1.0};
+    int32_t avgTiming{defaultAvgTiming};
+    size_t calibProfile{defaultOptProfileIndex};
+    bool tf32{true};
+    bool fp16{false};
+    bool bf16{false};
+    bool int8{false};
+    bool fp8{false};
+    bool int4{false};
+    bool stronglyTyped{false};
+    bool directIO{false};
+    PrecisionConstraints precisionConstraints{PrecisionConstraints::kNONE};
+    LayerPrecisions layerPrecisions;
+    LayerOutputTypes layerOutputTypes;
+    LayerDeviceTypes layerDeviceTypes;
+    StringSet debugTensors;
+    bool markUnfusedTensorsAsDebugTensors{false};
+    StringSet debugTensorStates;
+    bool safe{false};
+    bool consistency{false};
+    bool buildDLAStandalone{false};
+    bool allowGPUFallback{false};
+    bool restricted{false};
+    bool skipInference{false};
+    bool save{false};
+    bool load{false};
+    bool asyncFileReader{false};
+    bool refittable{false};
+    bool stripWeights{false};
+    bool versionCompatible{false};
+    bool pluginInstanceNorm{false};
+    bool enableUInt8AsymmetricQuantizationDLA{false};
+    bool excludeLeanRuntime{false};
+    bool disableCompilationCache{false};
+    bool enableMonitorMemory{false};
+    int32_t builderOptimizationLevel{defaultBuilderOptimizationLevel};
+    int32_t maxTactics{defaultMaxTactics};
+    SparsityFlag sparsity{SparsityFlag::kDISABLE};
+    nvinfer1::ProfilingVerbosity profilingVerbosity{nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY};
+    std::string engine;
+    std::string calibration;
+    using ShapeProfile = std::unordered_map<std::string, ShapeRange>;
+    std::vector<ShapeProfile> optProfiles;
+    ShapeProfile shapesCalib;
+    std::vector<IOFormat> inputFormats;
+    std::vector<IOFormat> outputFormats;
+    nvinfer1::TacticSources enabledTactics{0};
+    nvinfer1::TacticSources disabledTactics{0};
+    TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL};
+    std::string timingCacheFile{};
+    bool errorOnTimingCacheMiss{false};
+    // C++11 does not automatically generate hash function for enum class.
+    // Use int32_t to support C++11 compilers.
+    std::unordered_map<int32_t, bool> previewFeatures;
+    nvinfer1::HardwareCompatibilityLevel hardwareCompatibilityLevel{nvinfer1::HardwareCompatibilityLevel::kNONE};
+    nvinfer1::RuntimePlatform runtimePlatform{nvinfer1::RuntimePlatform::kSAME_AS_BUILD};
+    std::string tempdir{};
+    nvinfer1::TempfileControlFlags tempfileControls{getTempfileControlDefaults()};
+    RuntimeMode useRuntime{RuntimeMode::kFULL};
+    std::string leanDLLPath{};
+    int32_t maxAuxStreams{defaultMaxAuxStreams};
+    bool getPlanVersionOnly{false};
+
+    bool allowWeightStreaming{false};
+
+    int32_t tilingOptimizationLevel{defaultTilingOptimizationLevel};
+    int64_t l2LimitForTiling{-1};
+    bool distributiveIndependence{false};
+    std::string remoteAutoTuningConfig{};
+
+    void parse(Arguments& arguments) override;
+
+    static void help(std::ostream& out);
+};
+
+class SystemOptions : public Options
+{
+public:
+    int32_t device{defaultDevice};
+    int32_t DLACore{-1};
+    bool ignoreParsedPluginLibs{false};
+    std::vector<std::string> plugins;
+    std::vector<std::string> setPluginsToSerialize;
+    std::vector<std::string> dynamicPlugins;
+#if ENABLE_UNIFIED_BUILDER
+    std::vector<samplesSafeCommon::SafetyPluginLibraryArgument> safetyPlugins;
+#endif
+
+    void parse(Arguments& arguments) override;
+
+    static void help(std::ostream& out);
+};
+
+class InferenceOptions : public Options
+{
+public:
+    int32_t batch{batchNotProvided};
+    int32_t iterations{defaultIterations};
+    int32_t infStreams{defaultStreams};
+    int32_t optProfileIndex{defaultOptProfileIndex};
+    float warmup{defaultWarmUp};
+    float duration{defaultDuration};
+    float sleep{defaultSleep};
+    float idle{defaultIdle};
+    float persistentCacheRatio{defaultPersistentCacheRatio};
+    bool overlap{true};
+    bool skipTransfers{false};
+    bool useManaged{false};
+    bool spin{false};
+    bool threads{false};
+    bool graph{false};
+    bool rerun{false};
+    bool timeDeserialize{false};
+    bool timeRefit{false};
+    bool setOptProfile{false};
+    std::unordered_map<std::string, std::string> inputs;
+    using ShapeProfile = std::unordered_map<std::string, std::vector<int64_t>>;
+    ShapeProfile shapes;
+    nvinfer1::ProfilingVerbosity nvtxVerbosity{nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY};
+    MemoryAllocationStrategy memoryAllocationStrategy{MemoryAllocationStrategy::kSTATIC};
+    std::unordered_map<std::string, std::string> debugTensorFileNames;
+    std::vector<std::string> dumpAlldebugTensorFormats;
+    WeightStreamingBudget weightStreamingBudget;
+
+    void parse(Arguments& arguments) override;
+
+    static void help(std::ostream& out);
+};
+
+class ReportingOptions : public Options
+{
+public:
+    bool verbose{false};
+    int32_t avgs{defaultAvgRuns};
+    std::vector<float> percentiles{defaultPercentiles.begin(), defaultPercentiles.end()};
+    bool refit{false};
+    bool output{false};
+    bool dumpRawBindings{false};
+    bool profile{false};
+    bool layerInfo{false};
+    bool optProfileInfo{false};
+    std::string exportTimes;
+    std::string exportOutput;
+    std::string exportProfile;
+    std::string exportLayerInfo;
+
+    void parse(Arguments& arguments) override;
+
+    static void help(std::ostream& out);
+};
+
+class SafeBuilderOptions : public Options
+{
+public:
+    std::string serialized{};
+    std::string onnxModelFile{};
+    bool help{false};
+    bool verbose{false};
+    std::vector<IOFormat> inputFormats;
+    std::vector<IOFormat> outputFormats;
+    bool int8{false};
+    bool fp8{false};
+    bool int4{false};
+    std::string calibFile{};
+    std::vector<std::string> plugins;
+    bool consistency{false};
+    bool standard{false};
+    TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL};
+    std::string timingCacheFile{};
+    SparsityFlag sparsity{SparsityFlag::kDISABLE};
+    int32_t avgTiming{defaultAvgTiming};
+
+    void parse(Arguments& arguments) override;
+
+    static void printHelp(std::ostream& out);
+};
+
+class AllOptions : public Options
+{
+public:
+    ModelOptions model;
+    BuildOptions build;
+    SystemOptions system;
+    InferenceOptions inference;
+    ReportingOptions reporting;
+    bool helps{false};
+
+    void parse(Arguments& arguments) override;
+
+    static void help(std::ostream& out);
+};
+
+class TaskInferenceOptions : public Options
+{
+public:
+    std::string engine;
+    int32_t device{defaultDevice};
+    int32_t DLACore{-1};
+    int32_t batch{batchNotProvided};
+    bool graph{false};
+    float persistentCacheRatio{defaultPersistentCacheRatio};
+    void parse(Arguments& arguments) override;
+    static void help(std::ostream& out);
+};
+
+Arguments argsToArgumentsMap(int32_t argc, char* argv[]);
+
+bool parseHelp(Arguments& arguments);
+
+void helpHelp(std::ostream& out);
+
+// Functions to print options
+
+std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options);
+
+std::ostream& operator<<(std::ostream& os, const IOFormat& format);
+
+std::ostream& operator<<(std::ostream& os, const ShapeRange& dims);
+
+std::ostream& operator<<(std::ostream& os, const ModelOptions& options);
+
+std::ostream& operator<<(std::ostream& os, const BuildOptions& options);
+
+std::ostream& operator<<(std::ostream& os, const SystemOptions& options);
+
+std::ostream& operator<<(std::ostream& os, const InferenceOptions& options);
+
+std::ostream& operator<<(std::ostream& os, const ReportingOptions& options);
+
+std::ostream& operator<<(std::ostream& os, const AllOptions& options);
+
+std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options);
+
+std::ostream& operator<<(std::ostream& os, nvinfer1::DataType dtype);
+
+std::ostream& operator<<(std::ostream& os, nvinfer1::DeviceType devType);
+
+
+inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims)
+{
+    for (int32_t i = 0; i < dims.nbDims; ++i)
+    {
+        os << (i ? "x" : "") << dims.d[i];
+    }
+    return os;
+}
+inline std::ostream& operator<<(std::ostream& os, const nvinfer1::WeightsRole role)
+{
+    switch (role)
+    {
+    case nvinfer1::WeightsRole::kKERNEL:
+    {
+        os << "Kernel";
+        break;
+    }
+    case nvinfer1::WeightsRole::kBIAS:
+    {
+        os << "Bias";
+        break;
+    }
+    case nvinfer1::WeightsRole::kSHIFT:
+    {
+        os << "Shift";
+        break;
+    }
+    case nvinfer1::WeightsRole::kSCALE:
+    {
+        os << "Scale";
+        break;
+    }
+    case nvinfer1::WeightsRole::kCONSTANT:
+    {
+        os << "Constant";
+        break;
+    }
+    case nvinfer1::WeightsRole::kANY:
+    {
+        os << "Any";
+        break;
+    }
+    }
+
+    return os;
+}
+
+inline std::ostream& operator<<(std::ostream& os, const std::vector<int64_t>& vec)
+{
+    for (int32_t i = 0, e = static_cast<int32_t>(vec.size()); i < e; ++i)
+    {
+        os << (i ? "x" : "") << vec[i];
+    }
+    return os;
+}
+
+} // namespace sample
+
+#endif // TRT_SAMPLES_OPTIONS_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleReporting.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleReporting.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a85c049d0bd083c5ab4276ced356d819a8daf403
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleReporting.cpp
@@ -0,0 +1,690 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <exception>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <numeric>
+#include <utility>
+
+#include "sampleInference.h"
+#include "sampleOptions.h"
+#include "sampleReporting.h"
+
+#if ENABLE_UNIFIED_BUILDER
+#include "NvInferSafeRuntime.h"
+#include "bfloat16.h"
+#if CUDA_VERSION >= 11060
+#include <cuda_fp8.h>
+#endif
+#endif
+
+using namespace nvinfer1;
+
+namespace sample
+{
+
+namespace
+{
+
+//!
+//! \brief Find percentile in an ascending sequence of timings
+//! \note percentile must be in [0, 100]. Otherwise, an exception is thrown.
+//!
+template <typename T>
+float findPercentile(float percentile, std::vector<InferenceTime> const& timings, T const& toFloat)
+{
+    int32_t const all = static_cast<int32_t>(timings.size());
+    int32_t const exclude = static_cast<int32_t>((1 - percentile / 100) * all);
+    if (timings.empty())
+    {
+        return std::numeric_limits<float>::infinity();
+    }
+    if (percentile < 0.F || percentile > 100.F)
+    {
+        throw std::runtime_error("percentile is not in [0, 100]!");
+    }
+    return toFloat(timings[std::max(all - 1 - exclude, 0)]);
+}
+
+//!
+//! \brief Find median in a sorted sequence of timings
+//!
+template <typename T>
+float findMedian(std::vector<InferenceTime> const& timings, T const& toFloat)
+{
+    if (timings.empty())
+    {
+        return std::numeric_limits<float>::infinity();
+    }
+
+    int32_t const m = timings.size() / 2;
+    if (timings.size() % 2)
+    {
+        return toFloat(timings[m]);
+    }
+
+    return (toFloat(timings[m - 1]) + toFloat(timings[m])) / 2;
+}
+
+//!
+//! \brief Find coefficient of variance (which is std / mean) in a sorted sequence of timings given the mean
+//!
+template <typename T>
+float findCoeffOfVariance(std::vector<InferenceTime> const& timings, T const& toFloat, float mean)
+{
+    if (timings.empty())
+    {
+        return 0;
+    }
+
+    if (mean == 0.F)
+    {
+        return std::numeric_limits<float>::infinity();
+    }
+
+    auto const metricAccumulator = [toFloat, mean](float acc, InferenceTime const& a) {
+        float const diff = toFloat(a) - mean;
+        return acc + diff * diff;
+    };
+    float const variance = std::accumulate(timings.begin(), timings.end(), 0.F, metricAccumulator) / timings.size();
+
+    return std::sqrt(variance) / mean * 100.F;
+}
+
+inline InferenceTime traceToTiming(const InferenceTrace& a)
+{
+    return InferenceTime(
+        (a.enqEnd - a.enqStart), (a.h2dEnd - a.h2dStart), (a.computeEnd - a.computeStart), (a.d2hEnd - a.d2hStart));
+}
+
+inline std::string dimsToString(Dims const& shape)
+{
+    std::stringstream ss;
+
+    if (shape.nbDims == 0)
+    {
+        ss << "scalar";
+    }
+    else
+    {
+        for (int32_t i = 0; i < shape.nbDims; i++)
+        {
+            ss << shape.d[i] << (i != shape.nbDims - 1 ? "x" : "");
+        }
+    }
+    return ss.str();
+}
+
+} // namespace
+
+void printProlog(int32_t warmups, int32_t timings, float warmupMs, float benchTimeMs, std::ostream& os)
+{
+    os << "Warmup completed " << warmups << " queries over " << warmupMs << " ms" << std::endl;
+    os << "Timing trace has " << timings << " queries over " << benchTimeMs / 1000 << " s" << std::endl;
+}
+
+void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg, std::ostream& os)
+{
+    int64_t count = 0;
+    InferenceTime sum;
+
+    os << std::endl;
+    os << "=== Trace details ===" << std::endl;
+    os << "Trace averages of " << runsPerAvg << " runs:" << std::endl;
+
+    // Show only the first N lines and the last N lines, where N = kTIMING_PRINT_THRESHOLD.
+    constexpr int64_t kTIMING_PRINT_THRESHOLD{200};
+    int64_t const maxNbTimings{kTIMING_PRINT_THRESHOLD * runsPerAvg};
+
+    for (int64_t idx = 0, size = timings.size(); idx < size; ++idx)
+    {
+        // Omit some latency printing to avoid very long logs.
+        if (size > 2 * maxNbTimings && idx == maxNbTimings)
+        {
+            os << "... Omitting " << (size - 2 * maxNbTimings) << " lines" << std::endl;
+            idx = size - kTIMING_PRINT_THRESHOLD * runsPerAvg - 1;
+        }
+
+        sum += timings[idx];
+
+        if (++count == runsPerAvg)
+        {
+            // clang-format off
+            os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / runsPerAvg
+               << " ms - Host latency: " << sum.latency() / runsPerAvg << " ms (enqueue " << sum.enq / runsPerAvg
+               << " ms)" << std::endl;
+            // clang-format on
+            count = 0;
+            sum.enq = 0;
+            sum.h2d = 0;
+            sum.compute = 0;
+            sum.d2h = 0;
+        }
+    }
+}
+
+void printMetricExplanations(std::ostream& os)
+{
+    os << std::endl;
+    os << "=== Explanations of the performance metrics ===" << std::endl;
+    os << "Total Host Walltime: the host walltime from when the first query (after warmups) is enqueued to when the "
+          "last query is completed."
+       << std::endl;
+    os << "GPU Compute Time: the GPU latency to execute the kernels for a query." << std::endl;
+    os << "Total GPU Compute Time: the summation of the GPU Compute Time of all the queries. If this is significantly "
+          "shorter than Total Host Walltime, the GPU may be under-utilized because of host-side overheads or data "
+          "transfers."
+       << std::endl;
+    os << "Throughput: the observed throughput computed by dividing the number of queries by the Total Host Walltime. "
+          "If this is significantly lower than the reciprocal of GPU Compute Time, the GPU may be under-utilized "
+          "because of host-side overheads or data transfers."
+       << std::endl;
+    os << "Enqueue Time: the host latency to enqueue a query. If this is longer than GPU Compute Time, the GPU may be "
+          "under-utilized."
+       << std::endl;
+    os << "H2D Latency: the latency for host-to-device data transfers for input tensors of a single query."
+       << std::endl;
+    os << "D2H Latency: the latency for device-to-host data transfers for output tensors of a single query."
+       << std::endl;
+    os << "Latency: the summation of H2D Latency, GPU Compute Time, and D2H Latency. This is the latency to infer a "
+          "single query."
+       << std::endl;
+}
+
+PerformanceResult getPerformanceResult(std::vector<InferenceTime> const& timings,
+    std::function<float(InferenceTime const&)> metricGetter, std::vector<float> const& percentiles)
+{
+    auto const metricComparator
+        = [metricGetter](InferenceTime const& a, InferenceTime const& b) { return metricGetter(a) < metricGetter(b); };
+    auto const metricAccumulator = [metricGetter](float acc, InferenceTime const& a) { return acc + metricGetter(a); };
+    std::vector<InferenceTime> newTimings = timings;
+    std::sort(newTimings.begin(), newTimings.end(), metricComparator);
+    PerformanceResult result;
+    result.min = metricGetter(newTimings.front());
+    result.max = metricGetter(newTimings.back());
+    result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0F, metricAccumulator) / newTimings.size();
+    result.median = findMedian(newTimings, metricGetter);
+    for (auto percentile : percentiles)
+    {
+        result.percentiles.emplace_back(findPercentile(percentile, newTimings, metricGetter));
+    }
+    result.coeffVar = findCoeffOfVariance(newTimings, metricGetter, result.mean);
+    return result;
+}
+
+void printEpilog(std::vector<InferenceTime> const& timings, float walltimeMs, std::vector<float> const& percentiles,
+    int32_t batchSize, int32_t infStreams, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose)
+{
+    float const throughput = batchSize * timings.size() / walltimeMs * 1000;
+
+    auto const getLatency = [](InferenceTime const& t) { return t.latency(); };
+    auto const latencyResult = getPerformanceResult(timings, getLatency, percentiles);
+
+    auto const getEnqueue = [](InferenceTime const& t) { return t.enq; };
+    auto const enqueueResult = getPerformanceResult(timings, getEnqueue, percentiles);
+
+    auto const getH2d = [](InferenceTime const& t) { return t.h2d; };
+    auto const h2dResult = getPerformanceResult(timings, getH2d, percentiles);
+
+    auto const getCompute = [](InferenceTime const& t) { return t.compute; };
+    auto const gpuComputeResult = getPerformanceResult(timings, getCompute, percentiles);
+
+    auto const getD2h = [](InferenceTime const& t) { return t.d2h; };
+    auto const d2hResult = getPerformanceResult(timings, getD2h, percentiles);
+
+    auto const toPerfString = [&](const PerformanceResult& r) {
+        std::stringstream s;
+        s << "min = " << r.min << " ms, max = " << r.max << " ms, mean = " << r.mean << " ms, "
+          << "median = " << r.median << " ms";
+        for (int32_t i = 0, n = percentiles.size(); i < n; ++i)
+        {
+            s << ", percentile(" << percentiles[i] << "%) = " << r.percentiles[i] << " ms";
+        }
+        return s.str();
+    };
+
+    osInfo << std::endl;
+    osInfo << "=== Performance summary ===" << std::endl;
+    osInfo << "Throughput: " << throughput << " qps" << std::endl;
+    osInfo << "Latency: " << toPerfString(latencyResult) << std::endl;
+    osInfo << "Enqueue Time: " << toPerfString(enqueueResult) << std::endl;
+    osInfo << "H2D Latency: " << toPerfString(h2dResult) << std::endl;
+    osInfo << "GPU Compute Time: " << toPerfString(gpuComputeResult) << std::endl;
+    osInfo << "D2H Latency: " << toPerfString(d2hResult) << std::endl;
+    osInfo << "Total Host Walltime: " << walltimeMs / 1000 << " s" << std::endl;
+    osInfo << "Total GPU Compute Time: " << gpuComputeResult.mean * timings.size() / 1000 << " s" << std::endl;
+
+    // Report warnings if the throughput is bound by other factors than GPU Compute Time.
+    constexpr float kENQUEUE_BOUND_REPORTING_THRESHOLD{0.8F};
+    if (enqueueResult.median > kENQUEUE_BOUND_REPORTING_THRESHOLD * gpuComputeResult.median)
+    {
+        osWarning
+            << "* Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized."
+            << std::endl;
+        osWarning << "  If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the "
+                     "throughput."
+                  << std::endl;
+    }
+    if (h2dResult.median >= gpuComputeResult.median)
+    {
+        osWarning << "* Throughput may be bound by host-to-device transfers for the inputs rather than GPU Compute and "
+                     "the GPU may be under-utilized."
+                  << std::endl;
+        osWarning << "  Add --noDataTransfers flag to disable data transfers." << std::endl;
+    }
+    if (d2hResult.median >= gpuComputeResult.median)
+    {
+        osWarning << "* Throughput may be bound by device-to-host transfers for the outputs rather than GPU Compute "
+                     "and the GPU may be under-utilized."
+                  << std::endl;
+        osWarning << "  Add --noDataTransfers flag to disable data transfers." << std::endl;
+    }
+
+    // Report warnings if the GPU Compute Time is unstable.
+    constexpr float kUNSTABLE_PERF_REPORTING_THRESHOLD{1.0F};
+    if (gpuComputeResult.coeffVar > kUNSTABLE_PERF_REPORTING_THRESHOLD)
+    {
+        osWarning << "* GPU compute time is unstable, with coefficient of variance = " << gpuComputeResult.coeffVar
+                  << "%." << std::endl;
+        osWarning << "  If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the "
+                  << "stability." << std::endl;
+    }
+
+    // Report warnings if multiple inference streams are used.
+    if (infStreams > 1)
+    {
+        osWarning << "* Multiple inference streams are used. Latencies may not be accurate since inferences may run in "
+                  << "  parallel. Please use \"Throughput\" as the performance metric instead." << std::endl;
+    }
+
+    // Explain what the metrics mean.
+    osInfo << "Explanations of the performance metrics are printed in the verbose logs." << std::endl;
+    printMetricExplanations(osVerbose);
+
+    osInfo << std::endl;
+}
+
+void printPerformanceReport(std::vector<InferenceTrace> const& trace, ReportingOptions const& reportingOpts,
+    InferenceOptions const& infOpts, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose)
+{
+    int32_t batchSize = infOpts.batch;
+    float const warmupMs = infOpts.warmup;
+    auto const isNotWarmup = [&warmupMs](const InferenceTrace& a) { return a.computeStart >= warmupMs; };
+    auto const noWarmup = std::find_if(trace.begin(), trace.end(), isNotWarmup);
+    int32_t const warmups = noWarmup - trace.begin();
+    float const benchTime = trace.back().d2hEnd - noWarmup->h2dStart;
+    // treat inference with explicit batch as a single query and report the throughput
+    batchSize = batchSize ? batchSize : 1;
+    printProlog(warmups * batchSize, (trace.size() - warmups) * batchSize, warmupMs, benchTime, osInfo);
+
+    std::vector<InferenceTime> timings(trace.size() - warmups);
+    std::transform(noWarmup, trace.end(), timings.begin(), traceToTiming);
+    printTiming(timings, reportingOpts.avgs, osInfo);
+    printEpilog(
+        timings, benchTime, reportingOpts.percentiles, batchSize, infOpts.infStreams, osInfo, osWarning, osVerbose);
+
+    if (!reportingOpts.exportTimes.empty())
+    {
+        exportJSONTrace(trace, reportingOpts.exportTimes, warmups);
+    }
+}
+
+//! Printed format:
+//! [ value, ...]
+//! value ::= { "start enq : time, "end enq" : time, "start h2d" : time, "end h2d" : time, "start compute" : time,
+//!             "end compute" : time, "start d2h" : time, "end d2h" : time, "h2d" : time, "compute" : time,
+//!             "d2h" : time, "latency" : time }
+//!
+void exportJSONTrace(std::vector<InferenceTrace> const& trace, std::string const& fileName, int32_t const nbWarmups)
+{
+    std::ofstream os(fileName, std::ofstream::trunc);
+    os << "[" << std::endl;
+    char const* sep = "  ";
+    for (auto iter = trace.begin() + nbWarmups; iter < trace.end(); ++iter)
+    {
+        auto const& t = *iter;
+        InferenceTime const it(traceToTiming(t));
+        os << sep << "{ ";
+        sep = ", ";
+        // clang-format off
+        os << "\"startEnqMs\" : "     << t.enqStart     << sep << "\"endEnqMs\" : "     << t.enqEnd     << sep
+           << "\"startH2dMs\" : "     << t.h2dStart     << sep << "\"endH2dMs\" : "     << t.h2dEnd     << sep
+           << "\"startComputeMs\" : " << t.computeStart << sep << "\"endComputeMs\" : " << t.computeEnd << sep
+           << "\"startD2hMs\" : "     << t.d2hStart     << sep << "\"endD2hMs\" : "     << t.d2hEnd     << sep
+           << "\"h2dMs\" : "          << it.h2d         << sep << "\"computeMs\" : "    << it.compute   << sep
+           << "\"d2hMs\" : "          << it.d2h         << sep << "\"latencyMs\" : "    << it.latency() << " }"
+           << std::endl;
+        // clang-format on
+    }
+    os << "]" << std::endl;
+}
+
+void Profiler::reportLayerTime(char const* layerName, float timeMs) noexcept
+{
+    if (mIterator == mLayers.end())
+    {
+        bool const first = !mLayers.empty() && mLayers.begin()->name == layerName;
+        mUpdatesCount += mLayers.empty() || first;
+        if (first)
+        {
+            mIterator = mLayers.begin();
+        }
+        else
+        {
+            mLayers.emplace_back();
+            mLayers.back().name = layerName;
+            mIterator = mLayers.end() - 1;
+        }
+    }
+
+    mIterator->timeMs.push_back(timeMs);
+    ++mIterator;
+}
+
+void Profiler::print(std::ostream& os) const noexcept
+{
+    std::string const nameHdr("   Layer");
+    std::string const timeHdr("   Time(ms)");
+    std::string const avgHdr("     Avg.(ms)");
+    std::string const medHdr("   Median(ms)");
+    std::string const percentageHdr("   Time(%)");
+
+    float const totalTimeMs = getTotalTime();
+
+    auto const timeLength = timeHdr.size();
+    auto const avgLength = avgHdr.size();
+    auto const medLength = medHdr.size();
+    auto const percentageLength = percentageHdr.size();
+
+    os << std::endl
+       << "=== Profile (" << mUpdatesCount << " iterations ) ===" << std::endl
+       << timeHdr << avgHdr << medHdr << percentageHdr << nameHdr << std::endl;
+
+    for (auto const& p : mLayers)
+    {
+        if (p.timeMs.empty() || getTotalTime(p) == 0.F)
+        {
+            // there is no point to print profiling for layer that didn't run at all
+            continue;
+        }
+        // clang-format off
+        os << std::setw(timeLength) << std::fixed << std::setprecision(2) << getTotalTime(p)
+           << std::setw(avgLength) << std::fixed << std::setprecision(4) << getAvgTime(p)
+           << std::setw(medLength) << std::fixed << std::setprecision(4) << getMedianTime(p)
+           << std::setw(percentageLength) << std::fixed << std::setprecision(1) << getTotalTime(p) / totalTimeMs * 100
+           << "   " << p.name << std::endl;
+    }
+    {
+        os << std::setw(timeLength) << std::fixed << std::setprecision(2)
+           << totalTimeMs << std::setw(avgLength) << std::fixed << std::setprecision(4) << totalTimeMs / mUpdatesCount
+           << std::setw(medLength) << std::fixed << std::setprecision(4) << getMedianTime()
+           << std::setw(percentageLength) << std::fixed << std::setprecision(1) << 100.0
+           << "   Total" << std::endl;
+        // clang-format on
+    }
+    os << std::endl;
+}
+
+void Profiler::exportJSONProfile(std::string const& fileName) const noexcept
+{
+    std::ofstream os(fileName, std::ofstream::trunc);
+    os << "[" << std::endl << "  { \"count\" : " << mUpdatesCount << " }" << std::endl;
+
+    auto const totalTimeMs = getTotalTime();
+
+    for (auto const& l : mLayers)
+    {
+        // clang-format off
+        os << ", {" << R"( "name" : ")"      << l.name << R"(")"
+                       R"(, "timeMs" : )"     << getTotalTime(l)
+           <<          R"(, "averageMs" : )"  << getAvgTime(l)
+           <<          R"(, "medianMs" : )"  << getMedianTime(l)
+           <<          R"(, "percentage" : )" << getTotalTime(l) / totalTimeMs * 100
+           << " }"  << std::endl;
+        // clang-format on
+    }
+    os << "]" << std::endl;
+}
+
+void dumpInputs(nvinfer1::IExecutionContext const& context, BindingsStd const& bindings, std::ostream& os)
+{
+    os << "Input Tensors:" << std::endl;
+    bindings.dumpInputs(context, os);
+}
+
+void dumpOutputs(nvinfer1::IExecutionContext const& context, BindingsStd const& bindings, std::ostream& os)
+{
+    bindings.dumpOutputs(context, os);
+}
+
+void dumpRawBindingsToFiles(nvinfer1::IExecutionContext const& context, BindingsStd const& bindings, std::ostream& os)
+{
+    bindings.dumpRawBindingToFiles(context, os);
+}
+
+void exportJSONOutput(
+    nvinfer1::IExecutionContext const& context, BindingsStd const& bindings, std::string const& fileName, int32_t batch)
+{
+    std::ofstream os(fileName, std::ofstream::trunc);
+    std::string sep = "  ";
+    auto const output = bindings.getOutputBindings();
+    os << "[" << std::endl;
+    for (auto const& binding : output)
+    {
+        // clang-format off
+        os << sep << R"({ "name" : ")" << binding.first << "\"" << std::endl;
+        sep = ", ";
+        os << "  " << sep << R"("dimensions" : ")";
+        bindings.dumpBindingDimensions(binding.first, context, os);
+        os << "\"" << std::endl;
+        os << "  " << sep << "\"values\" : [ ";
+        bindings.dumpBindingValues(context, binding.second, os, sep, batch);
+        os << " ]" << std::endl << "  }" << std::endl;
+        // clang-format on
+    }
+    os << "]" << std::endl;
+}
+
+void exportJSONOutput(nvinfer1::IExecutionContext const& context, BindingsStd const& bindings,
+    std::string const& fileName, int32_t batch);
+
+#if ENABLE_UNIFIED_BUILDER
+void dumpSafeOutputs(nvinfer2::safe::ITRTGraph const& graph, BindingsSafe const& bindings, std::ostream& os)
+{
+    bindings.dumpOutputs(graph, os);
+}
+
+void dumpSafeRawBindingsToFiles(nvinfer2::safe::ITRTGraph const& graph, BindingsSafe const& bindings, std::ostream& os)
+{
+    bindings.dumpRawBindingToFiles(const_cast<nvinfer2::safe::ITRTGraph&>(graph), os);
+}
+
+void exportSafeJSONOutput(
+    nvinfer2::safe::ITRTGraph const& graph, BindingsSafe const& bindings, std::string const& fileName, int32_t batch)
+{
+    std::ofstream os(fileName, std::ofstream::trunc);
+    std::string sep = "  ";
+    auto const output = bindings.getOutputBindings();
+    os << "[" << std::endl;
+    for (auto const& binding : output)
+    {
+        // clang-format off
+        os << sep << R"({ "name" : ")" << binding.first << "\"" << std::endl;
+        sep = ", ";
+        os << "  " << sep << R"("dimensions" : ")";
+        bindings.dumpBindingDimensions(binding.first, graph, os);
+        os << "\"" << std::endl;
+        os << "  " << sep << "\"values\" : [ ";
+        bindings.dumpBindingValues(graph, binding.second, os, sep, batch);
+        os << " ]" << std::endl << "  }" << std::endl;
+        // clang-format on
+    }
+    os << "]" << std::endl;
+}
+
+void exportSafeJSONOutput(
+    nvinfer2::safe::ITRTGraph const& graph, BindingsSafe const& bindings, std::string const& fileName, int32_t batch);
+#endif
+
+void printLayerInfo(
+    ReportingOptions const& reporting, nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context)
+{
+    if (reporting.layerInfo)
+    {
+        sample::gLogInfo << "Layer Information:" << std::endl;
+        sample::gLogInfo << getLayerInformation(engine, context, nvinfer1::LayerInformationFormat::kONELINE)
+                         << std::flush;
+    }
+    if (!reporting.exportLayerInfo.empty())
+    {
+        std::ofstream os(reporting.exportLayerInfo, std::ofstream::trunc);
+        os << getLayerInformation(engine, context, nvinfer1::LayerInformationFormat::kJSON) << std::flush;
+    }
+}
+
+void printOptimizationProfileInfo(ReportingOptions const& reporting, nvinfer1::ICudaEngine const* engine)
+{
+    if (reporting.optProfileInfo)
+    {
+        sample::gLogInfo << "Optimization Profile Information:" << std::endl;
+        for (int32_t i = 0; i < engine->getNbOptimizationProfiles(); i++)
+        {
+            for (int32_t j = 0, e = engine->getNbIOTensors(); j < e; j++)
+            {
+                auto const tensorName = engine->getIOTensorName(j);
+
+                if (engine->getTensorIOMode(tensorName) == nvinfer1::TensorIOMode::kINPUT)
+                {
+                    auto tensorMinShape = engine->getProfileShape(tensorName, i, nvinfer1::OptProfileSelector::kMIN);
+                    auto tensorOptShape = engine->getProfileShape(tensorName, i, nvinfer1::OptProfileSelector::kOPT);
+                    auto tensorMaxShape = engine->getProfileShape(tensorName, i, nvinfer1::OptProfileSelector::kMAX);
+
+                    sample::gLogInfo << "Model input " << tensorName << " (profile " << i << "): "
+                                     << "min=" << dimsToString(tensorMinShape)
+                                     << ", opt=" << dimsToString(tensorOptShape)
+                                     << ", max=" << dimsToString(tensorMaxShape) << std::endl;
+                }
+            }
+        }
+    }
+}
+
+void printPerformanceProfile(ReportingOptions const& reporting, InferenceEnvironmentBase& iEnv)
+{
+    if (reporting.profile)
+    {
+        iEnv.profiler->print(sample::gLogInfo);
+    }
+    if (!reporting.exportProfile.empty())
+    {
+        iEnv.profiler->exportJSONProfile(reporting.exportProfile);
+    }
+
+    // Print an warning about total per-layer latency when auxiliary streams are used.
+    if (!iEnv.safe && (reporting.profile || !reporting.exportProfile.empty()))
+    {
+        int32_t const nbAuxStreams = iEnv.engine->getNbAuxStreams();
+        if (nbAuxStreams > 0)
+        {
+            sample::gLogWarning << "The engine uses " << nbAuxStreams << " auxiliary streams, so the \"Total\" latency "
+                                << "may not be accurate because some layers may have run in parallel!" << std::endl;
+        }
+    }
+}
+
+namespace details
+{
+void dump(std::unique_ptr<nvinfer1::IExecutionContext> const& context, std::unique_ptr<BindingsStd> const& binding,
+    ReportingOptions const& reporting, int32_t batch)
+{
+    if (!context)
+    {
+        sample::gLogError << "Empty context! Skip printing outputs." << std::endl;
+        return;
+    }
+    if (reporting.output)
+    {
+        dumpOutputs(*context, *binding, sample::gLogInfo);
+    }
+    if (reporting.dumpRawBindings)
+    {
+        dumpRawBindingsToFiles(*context, *binding, sample::gLogInfo);
+    }
+    if (!reporting.exportOutput.empty())
+    {
+        exportJSONOutput(*context, *binding, reporting.exportOutput, batch);
+    }
+}
+
+#if ENABLE_UNIFIED_BUILDER
+void safeDump(std::unique_ptr<nvinfer2::safe::ITRTGraph> const& graph, std::unique_ptr<BindingsSafe> const& binding,
+    ReportingOptions const& reporting, int32_t batch)
+{
+    if (!graph)
+    {
+        sample::gLogError << "Empty safe graph! Skip printing outputs." << std::endl;
+        return;
+    }
+    if (reporting.output)
+    {
+        dumpSafeOutputs(*graph, *binding, sample::gLogInfo);
+    }
+    if (reporting.dumpRawBindings)
+    {
+        dumpSafeRawBindingsToFiles(*graph, *binding, sample::gLogInfo);
+    }
+    if (!reporting.exportOutput.empty())
+    {
+        exportSafeJSONOutput(*graph, *binding, reporting.exportOutput, batch);
+    }
+}
+#endif
+
+} // namespace details
+
+void printOutput(ReportingOptions const& reporting, InferenceEnvironmentBase const& iEnv, int32_t batch)
+{
+    if (iEnv.safe)
+    {
+#if ENABLE_UNIFIED_BUILDER
+        auto const& binding = static_cast<const InferenceEnvironmentSafe&>(iEnv).bindings.at(0);
+        if (!binding)
+        {
+            sample::gLogError << "Empty bindings! Skip printing outputs." << std::endl;
+            return;
+        }
+        auto const& graph = static_cast<const InferenceEnvironmentSafe&>(iEnv).mClonedGraphs.at(0);
+        details::safeDump(graph, binding, reporting, batch);
+#else
+        sample::gLogWarning << "Safe mode is not supported! Skip printing outputs." << std::endl;
+#endif
+        return;
+    }
+    auto const& binding = static_cast<const InferenceEnvironmentStd&>(iEnv).bindings.at(0);
+    if (!binding)
+    {
+        sample::gLogError << "Empty bindings! Skip printing outputs." << std::endl;
+        return;
+    }
+    auto const& context = static_cast<const InferenceEnvironmentStd&>(iEnv).contexts.at(0);
+    details::dump(context, binding, reporting, batch);
+}
+
+} // namespace sample
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleReporting.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleReporting.h
new file mode 100644
index 0000000000000000000000000000000000000000..013273a8d58986fd04f489455949bc8ae3d47008
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleReporting.h
@@ -0,0 +1,298 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TRT_SAMPLE_REPORTING_H
+#define TRT_SAMPLE_REPORTING_H
+
+#include <functional>
+#include <iostream>
+#include <numeric>
+
+#include "sampleOptions.h"
+
+namespace sample
+{
+
+class BindingsStd;
+
+//!
+//! \struct InferenceTime
+//! \brief Measurement times in milliseconds
+//!
+struct InferenceTime
+{
+    InferenceTime(float q, float i, float c, float o)
+        : enq(q)
+        , h2d(i)
+        , compute(c)
+        , d2h(o)
+    {
+    }
+
+    InferenceTime() = default;
+    InferenceTime(InferenceTime const&) = default;
+    InferenceTime(InferenceTime&&) = default;
+    InferenceTime& operator=(InferenceTime const&) = default;
+    InferenceTime& operator=(InferenceTime&&) = default;
+    ~InferenceTime() = default;
+
+    float enq{0};     // Enqueue
+    float h2d{0};     // Host to Device
+    float compute{0}; // Compute
+    float d2h{0};     // Device to Host
+
+    // ideal latency
+    float latency() const
+    {
+        return h2d + compute + d2h;
+    }
+};
+
+//!
+//! \struct InferenceTrace
+//! \brief Measurement points in milliseconds
+//!
+struct InferenceTrace
+{
+    InferenceTrace(int32_t s, float es, float ee, float is, float ie, float cs, float ce, float os, float oe)
+        : stream(s)
+        , enqStart(es)
+        , enqEnd(ee)
+        , h2dStart(is)
+        , h2dEnd(ie)
+        , computeStart(cs)
+        , computeEnd(ce)
+        , d2hStart(os)
+        , d2hEnd(oe)
+    {
+    }
+
+    InferenceTrace() = default;
+    InferenceTrace(InferenceTrace const&) = default;
+    InferenceTrace(InferenceTrace&&) = default;
+    InferenceTrace& operator=(InferenceTrace const&) = default;
+    InferenceTrace& operator=(InferenceTrace&&) = default;
+    ~InferenceTrace() = default;
+
+    int32_t stream{0};
+    float enqStart{0};
+    float enqEnd{0};
+    float h2dStart{0};
+    float h2dEnd{0};
+    float computeStart{0};
+    float computeEnd{0};
+    float d2hStart{0};
+    float d2hEnd{0};
+};
+
+inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b)
+{
+    return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute, a.d2h + b.d2h);
+}
+
+inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b)
+{
+    return a = a + b;
+}
+
+//!
+//! \struct PerformanceResult
+//! \brief Performance result of a performance metric
+//!
+struct PerformanceResult
+{
+    float min{0.F};
+    float max{0.F};
+    float mean{0.F};
+    float median{0.F};
+    std::vector<float> percentiles;
+    float coeffVar{0.F}; // coefficient of variation
+};
+
+//!
+//! \brief Print benchmarking time and number of traces collected
+//!
+void printProlog(int32_t warmups, int32_t timings, float warmupMs, float walltime, std::ostream& os);
+
+//!
+//! \brief Print a timing trace
+//!
+void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg, std::ostream& os);
+
+//!
+//! \brief Print the performance summary of a trace
+//!
+void printEpilog(std::vector<InferenceTime> const& timings, std::vector<float> const& percentiles, int32_t batchSize,
+    std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose);
+
+//!
+//! \brief Get the result of a specific performance metric from a trace
+//!
+PerformanceResult getPerformanceResult(std::vector<InferenceTime> const& timings,
+    std::function<float(InferenceTime const&)> metricGetter, std::vector<float> const& percentiles);
+
+//!
+//! \brief Print the explanations of the performance metrics printed in printEpilog() function.
+//!
+void printMetricExplanations(std::ostream& os);
+
+//!
+//! \brief Print and summarize a timing trace
+//!
+void printPerformanceReport(std::vector<InferenceTrace> const& trace, ReportingOptions const& reportingOpts,
+    InferenceOptions const& infOpts, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose);
+
+//!
+//! \brief Export a timing trace to JSON file
+//!
+void exportJSONTrace(
+    std::vector<InferenceTrace> const& InferenceTime, std::string const& fileName, int32_t const nbWarmups);
+
+//!
+//! \brief Print input tensors to stream
+//!
+void dumpInputs(nvinfer1::IExecutionContext const& context, BindingsStd const& bindings, std::ostream& os);
+
+//!
+//! \brief Print output tensors to stream
+//!
+void dumpOutputs(nvinfer1::IExecutionContext const& context, BindingsStd const& bindings, std::ostream& os);
+
+void dumpRawBindingsToFiles(nvinfer1::IExecutionContext const& context, BindingsStd const& bindings, std::ostream& os);
+
+//!
+//! \brief Export output tensors to JSON file
+//!
+void exportJSONOutput(nvinfer1::IExecutionContext const& context, BindingsStd const& bindings,
+    std::string const& fileName, int32_t batch);
+
+//!
+//! \struct LayerProfile
+//! \brief Layer profile information
+//!
+struct LayerProfile
+{
+    std::string name;
+    std::vector<float> timeMs;
+};
+
+//!
+//! \class Profiler
+//! \brief Collect per-layer profile information, assuming times are reported in the same order
+//!
+class Profiler : public nvinfer1::IProfiler
+{
+
+public:
+    void reportLayerTime(char const* layerName, float timeMs) noexcept override;
+
+    void print(std::ostream& os) const noexcept;
+
+    //!
+    //! \brief Export a profile to JSON file
+    //!
+    void exportJSONProfile(std::string const& fileName) const noexcept;
+
+private:
+    float getTotalTime() const noexcept
+    {
+        auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) {
+            return accumulator + std::accumulate(lp.timeMs.begin(), lp.timeMs.end(), 0.F, std::plus<float>());
+        };
+        return std::accumulate(mLayers.begin(), mLayers.end(), 0.0F, plusLayerTime);
+    }
+
+    float getMedianTime() const noexcept
+    {
+        if (mLayers.empty())
+        {
+            return 0.F;
+        }
+        std::vector<float> totalTime;
+        for (size_t run = 0; run < mLayers[0].timeMs.size(); ++run)
+        {
+            auto const layerTime
+                = [&run](float accumulator, LayerProfile const& lp) { return accumulator + lp.timeMs[run]; };
+            auto t = std::accumulate(mLayers.begin(), mLayers.end(), 0.F, layerTime);
+            totalTime.push_back(t);
+        }
+        return median(totalTime);
+    }
+
+    float getMedianTime(LayerProfile const& p) const noexcept
+    {
+        return median(p.timeMs);
+    }
+
+    static float median(std::vector<float> vals)
+    {
+        if (vals.empty())
+        {
+            return 0.F;
+        }
+        std::sort(vals.begin(), vals.end());
+        if (vals.size() % 2U == 1U)
+        {
+            return vals[vals.size() / 2U];
+        }
+        return (vals[vals.size() / 2U - 1U] + vals[vals.size() / 2U]) * 0.5F;
+    }
+
+    //! return the total runtime of given layer profile
+    float getTotalTime(LayerProfile const& p) const noexcept
+    {
+        auto const& vals = p.timeMs;
+        return std::accumulate(vals.begin(), vals.end(), 0.F, std::plus<float>());
+    }
+
+    float getAvgTime(LayerProfile const& p) const noexcept
+    {
+        return getTotalTime(p) / p.timeMs.size();
+    }
+
+    std::vector<LayerProfile> mLayers;
+    std::vector<LayerProfile>::iterator mIterator{mLayers.begin()};
+    int32_t mUpdatesCount{0};
+};
+
+//!
+//! \brief Print layer info to logger or export it to output JSON file.
+//!
+void printLayerInfo(
+    ReportingOptions const& reporting, nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context);
+
+//!
+//! \brief Print optimization profile info to logger.
+//!
+void printOptimizationProfileInfo(ReportingOptions const& reporting, nvinfer1::ICudaEngine const* engine);
+
+//! Forward declaration.
+struct InferenceEnvironmentBase;
+
+//!
+//! \brief Print per-layer perf profile data to logger or export it to output JSON file.
+//!
+void printPerformanceProfile(ReportingOptions const& reporting, InferenceEnvironmentBase& iEnv);
+
+//!
+//! \brief Print binding output values to logger or export them to output JSON file.
+//!
+void printOutput(ReportingOptions const& reporting, InferenceEnvironmentBase const& iEnv, int32_t batch);
+
+} // namespace sample
+
+#endif // TRT_SAMPLE_REPORTING_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleUtils.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleUtils.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..93be2845b43a7c39de36c9d20a4719ee2fd0a64e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleUtils.cpp
@@ -0,0 +1,622 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "sampleUtils.h"
+#include "bfloat16.h"
+#include "common.h"
+#include "half.h"
+#include <cuda.h>
+#include <type_traits>
+
+#if CUDA_VERSION >= 11060
+#include <cuda_fp8.h>
+#endif
+
+using namespace nvinfer1;
+
+namespace sample
+{
+
+int64_t volume(nvinfer1::Dims const& dims, nvinfer1::Dims const& strides, int32_t vecDim, int32_t comps, int32_t batch)
+{
+    int64_t maxNbElems = 1;
+    for (int32_t i = 0; i < dims.nbDims; ++i)
+    {
+        // Get effective length of axis.
+        int64_t d = dims.d[i];
+        // Any dimension is 0, it is an empty tensor.
+        if (d == 0)
+        {
+            return 0;
+        }
+        if (i == vecDim)
+        {
+            d = samplesCommon::divUp(d, comps);
+        }
+        maxNbElems = std::max(maxNbElems, d * strides.d[i]);
+    }
+    return maxNbElems * batch * (vecDim < 0 ? 1 : comps);
+}
+
+nvinfer1::Dims toDims(std::vector<int64_t> const& vec)
+{
+    int32_t limit = static_cast<int32_t>(nvinfer1::Dims::MAX_DIMS);
+    if (static_cast<int32_t>(vec.size()) > limit)
+    {
+        sample::gLogWarning << "Vector too long, only first 8 elements are used in dimension." << std::endl;
+    }
+    // Pick first nvinfer1::Dims::MAX_DIMS elements
+    nvinfer1::Dims dims{std::min(static_cast<int32_t>(vec.size()), limit), {}};
+    std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
+    return dims;
+}
+
+void loadFromFile(std::string const& fileName, char* dst, size_t size)
+{
+    ASSERT(dst);
+
+    std::ifstream file(fileName, std::ios::in | std::ios::binary);
+    if (file.is_open())
+    {
+        file.seekg(0, std::ios::end);
+        int64_t fileSize = static_cast<int64_t>(file.tellg());
+        // Due to change from int32_t to int64_t VC engines created with earlier versions
+        // may expect input of the half of the size
+        if (fileSize != static_cast<int64_t>(size) && fileSize != static_cast<int64_t>(size * 2))
+        {
+            std::ostringstream msg;
+            msg << "Unexpected file size for input file: " << fileName << ". Note: Input binding size is: " << size
+                << " bytes but the file size is " << fileSize
+                << " bytes. Double check the size and datatype of the provided data.";
+            throw std::invalid_argument(msg.str());
+        }
+        // Move file pointer back to the beginning after reading file size.
+        file.seekg(0, std::ios::beg);
+        file.read(dst, size);
+        size_t const nbBytesRead = file.gcount();
+        file.close();
+        if (nbBytesRead != size)
+        {
+            std::ostringstream msg;
+            msg << "Unexpected file size for input file: " << fileName << ". Note: Expected: " << size
+                << " bytes but only read: " << nbBytesRead << " bytes";
+            throw std::invalid_argument(msg.str());
+        }
+    }
+    else
+    {
+        std::ostringstream msg;
+        msg << "Cannot open file " << fileName << "!";
+        throw std::invalid_argument(msg.str());
+    }
+}
+
+std::vector<std::string> splitToStringVec(std::string const& s, char separator, int64_t maxSplit)
+{
+    std::vector<std::string> splitted;
+
+    for (size_t start = 0; start < s.length();)
+    {
+        // If maxSplit is specified and we have reached maxSplit, emplace back the rest of the string and break the
+        // loop.
+        if (maxSplit >= 0 && static_cast<int64_t>(splitted.size()) == maxSplit)
+        {
+            splitted.emplace_back(s.substr(start, s.length() - start));
+            break;
+        }
+
+        size_t separatorIndex = s.find(separator, start);
+        if (separatorIndex == std::string::npos)
+        {
+            separatorIndex = s.length();
+        }
+        splitted.emplace_back(s.substr(start, separatorIndex - start));
+
+        // If the separator is the last character, then we should push an empty string at the end.
+        if (separatorIndex == s.length() - 1)
+        {
+            splitted.emplace_back("");
+        }
+
+        start = separatorIndex + 1;
+    }
+
+    return splitted;
+}
+
+bool broadcastIOFormats(std::vector<IOFormat> const& formats, size_t nbBindings, bool isInput /*= true*/)
+{
+    bool broadcast = formats.size() == 1;
+    bool validFormatsCount = broadcast || (formats.size() == nbBindings);
+    if (!formats.empty() && !validFormatsCount)
+    {
+        if (isInput)
+        {
+            throw std::invalid_argument(
+                "The number of inputIOFormats must match network's inputs or be one for broadcasting.");
+        }
+
+        throw std::invalid_argument(
+            "The number of outputIOFormats must match network's outputs or be one for broadcasting.");
+    }
+    return broadcast;
+}
+
+void sparsifyMatMulKernelWeights(nvinfer1::INetworkDefinition& network, std::vector<std::vector<int8_t>>& sparseWeights)
+{
+    using TensorToLayer = std::unordered_map<nvinfer1::ITensor*, nvinfer1::ILayer*>;
+    using LayerToTensor = std::unordered_map<nvinfer1::ILayer*, nvinfer1::ITensor*>;
+
+    // 1. Collect layers and tensors information from the network.
+    TensorToLayer matmulI2L;
+    TensorToLayer constO2L;
+    TensorToLayer shuffleI2L;
+    LayerToTensor shuffleL2O;
+    auto collectMappingInfo = [&](int32_t const idx) {
+        ILayer* l = network.getLayer(idx);
+        switch (l->getType())
+        {
+        case nvinfer1::LayerType::kMATRIX_MULTIPLY:
+        {
+            // assume weights on the second input.
+            matmulI2L.insert({l->getInput(1), l});
+            break;
+        }
+        case nvinfer1::LayerType::kCONSTANT:
+        {
+            DataType const dtype = static_cast<nvinfer1::IConstantLayer*>(l)->getWeights().type;
+            if (dtype == nvinfer1::DataType::kFLOAT || dtype == nvinfer1::DataType::kHALF)
+            {
+                // Sparsify float only.
+                constO2L.insert({l->getOutput(0), l});
+            }
+            break;
+        }
+        case nvinfer1::LayerType::kSHUFFLE:
+        {
+            shuffleI2L.insert({l->getInput(0), l});
+            shuffleL2O.insert({l, l->getOutput(0)});
+            break;
+        }
+        default: break;
+        }
+    };
+    int32_t const nbLayers = network.getNbLayers();
+    for (int32_t i = 0; i < nbLayers; ++i)
+    {
+        collectMappingInfo(i);
+    }
+    if (matmulI2L.size() == 0 || constO2L.size() == 0)
+    {
+        // No MatrixMultiply or Constant layer found, no weights to sparsify.
+        return;
+    }
+
+    // Helper for analysis
+    auto isTranspose
+        = [](nvinfer1::Permutation const& perm) -> bool { return (perm.order[0] == 1 && perm.order[1] == 0); };
+    auto is2D = [](nvinfer1::Dims const& dims) -> bool { return dims.nbDims == 2; };
+    auto isIdenticalReshape = [](nvinfer1::Dims const& dims) -> bool {
+        for (int32_t i = 0; i < dims.nbDims; ++i)
+        {
+            if (dims.d[i] != i || dims.d[i] != -1)
+            {
+                return false;
+            }
+        }
+        return true;
+    };
+    auto tensorReachedViaTranspose = [&](nvinfer1::ITensor* t, bool& needTranspose) -> ITensor* {
+        while (shuffleI2L.find(t) != shuffleI2L.end())
+        {
+            nvinfer1::IShuffleLayer* s = static_cast<nvinfer1::IShuffleLayer*>(shuffleI2L.at(t));
+            if (!is2D(s->getInput(0)->getDimensions()) || !is2D(s->getReshapeDimensions())
+                || !isIdenticalReshape(s->getReshapeDimensions()))
+            {
+                break;
+            }
+
+            if (isTranspose(s->getFirstTranspose()))
+            {
+                needTranspose = !needTranspose;
+            }
+            if (isTranspose(s->getSecondTranspose()))
+            {
+                needTranspose = !needTranspose;
+            }
+
+            t = shuffleL2O.at(s);
+        }
+        return t;
+    };
+
+    // 2. Forward analysis to collect the Constant layers connected to MatMul via Transpose
+    std::unordered_map<nvinfer1::IConstantLayer*, bool> constantLayerToSparse;
+    for (auto& o2l : constO2L)
+    {
+        // If need to transpose the weights of the Constant layer.
+        // Need to transpose by default due to semantic difference.
+        bool needTranspose{true};
+        ITensor* t = tensorReachedViaTranspose(o2l.first, needTranspose);
+        if (matmulI2L.find(t) == matmulI2L.end())
+        {
+            continue;
+        }
+
+        // check MatMul params...
+        IMatrixMultiplyLayer* mm = static_cast<nvinfer1::IMatrixMultiplyLayer*>(matmulI2L.at(t));
+        bool const twoInputs = mm->getNbInputs() == 2;
+        bool const all2D = is2D(mm->getInput(0)->getDimensions()) && is2D(mm->getInput(1)->getDimensions());
+        bool const isSimple = mm->getOperation(0) == nvinfer1::MatrixOperation::kNONE
+            && mm->getOperation(1) != nvinfer1::MatrixOperation::kVECTOR;
+        if (!(twoInputs && all2D && isSimple))
+        {
+            continue;
+        }
+        if (mm->getOperation(1) == nvinfer1::MatrixOperation::kTRANSPOSE)
+        {
+            needTranspose = !needTranspose;
+        }
+
+        constantLayerToSparse.insert({static_cast<IConstantLayer*>(o2l.second), needTranspose});
+    }
+
+    // 3. Finally, sparsify the weights
+    auto sparsifyConstantWeights = [&sparseWeights](nvinfer1::IConstantLayer* layer, bool const needTranspose) {
+        Dims dims = layer->getOutput(0)->getDimensions();
+        ASSERT(dims.nbDims == 2);
+        int32_t const idxN = needTranspose ? 1 : 0;
+        int32_t const n = dims.d[idxN];
+        int32_t const k = dims.d[1 - idxN];
+        sparseWeights.emplace_back();
+        std::vector<int8_t>& spw = sparseWeights.back();
+        Weights w = layer->getWeights();
+        DataType const dtype = w.type;
+        ASSERT(dtype == nvinfer1::DataType::kFLOAT
+            || dtype == nvinfer1::DataType::kHALF); // non-float weights should have been ignored.
+
+        if (needTranspose)
+        {
+            if (dtype == nvinfer1::DataType::kFLOAT)
+            {
+                spw.resize(w.count * sizeof(float));
+                transpose2DWeights<float>(spw.data(), w.values, k, n);
+            }
+            else if (dtype == nvinfer1::DataType::kHALF)
+            {
+                spw.resize(w.count * sizeof(half_float::half));
+                transpose2DWeights<half_float::half>(spw.data(), w.values, k, n);
+            }
+
+            w.values = spw.data();
+            std::vector<int8_t> tmpW;
+            sparsify(w, n, 1, tmpW);
+
+            if (dtype == nvinfer1::DataType::kFLOAT)
+            {
+                transpose2DWeights<float>(spw.data(), tmpW.data(), n, k);
+            }
+            else if (dtype == nvinfer1::DataType::kHALF)
+            {
+                transpose2DWeights<half_float::half>(spw.data(), tmpW.data(), n, k);
+            }
+        }
+        else
+        {
+            sparsify(w, n, 1, spw);
+        }
+
+        w.values = spw.data();
+        layer->setWeights(w);
+    };
+    for (auto& l : constantLayerToSparse)
+    {
+        sparsifyConstantWeights(l.first, l.second);
+    }
+}
+
+template <typename L>
+void setSparseWeights(L& l, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights)
+{
+    auto weights = l.getKernelWeights();
+    sparsify(weights, k, trs, sparseWeights);
+    weights.values = sparseWeights.data();
+    l.setKernelWeights(weights);
+}
+
+// Explicit instantiation
+template void setSparseWeights<IConvolutionLayer>(
+    IConvolutionLayer& l, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights);
+
+void sparsify(nvinfer1::INetworkDefinition& network, std::vector<std::vector<int8_t>>& sparseWeights)
+{
+    for (int32_t l = 0; l < network.getNbLayers(); ++l)
+    {
+        auto* layer = network.getLayer(l);
+        auto const t = layer->getType();
+        if (t == nvinfer1::LayerType::kCONVOLUTION)
+        {
+            auto& conv = *static_cast<IConvolutionLayer*>(layer);
+            auto const& dims = conv.getKernelSizeNd();
+            ASSERT(dims.nbDims == 2 || dims.nbDims == 3);
+            auto const k = conv.getNbOutputMaps();
+            auto const trs = std::accumulate(dims.d, dims.d + dims.nbDims, 1, std::multiplies<int32_t>());
+            sparseWeights.emplace_back();
+            setSparseWeights(conv, k, trs, sparseWeights.back());
+        }
+    }
+
+    sparsifyMatMulKernelWeights(network, sparseWeights);
+    sample::gLogVerbose << "--sparsity=force pruned " << sparseWeights.size() << " weights to be sparsity pattern."
+                        << std::endl;
+    sample::gLogVerbose << "--sparsity=force has been deprecated. Please use <polygraphy surgeon prune> to rewrite the "
+                           "weights to a sparsity pattern and then run with --sparsity=enable"
+                        << std::endl;
+}
+
+void sparsify(Weights const& weights, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights)
+{
+    switch (weights.type)
+    {
+    case DataType::kFLOAT:
+        sparsify(static_cast<float const*>(weights.values), weights.count, k, trs, sparseWeights);
+        break;
+    case DataType::kHALF:
+        sparsify(static_cast<half_float::half const*>(weights.values), weights.count, k, trs, sparseWeights);
+        break;
+    case DataType::kBF16:
+        sparsify(static_cast<BFloat16 const*>(weights.values), weights.count, k, trs, sparseWeights);
+        break;
+    case DataType::kINT8:
+    case DataType::kINT32:
+    case DataType::kUINT8:
+    case DataType::kBOOL:
+    case DataType::kINT4:
+    case DataType::kFP8:
+    case DataType::kINT64:
+    case DataType::kFP4: ASSERT(false && "Unsupported data type");
+    case DataType::kE8M0: ASSERT(false && "E8M0 is not supported");
+    }
+}
+
+template <typename T>
+void print(std::ostream& os, T v)
+{
+    os << v;
+}
+
+void print(std::ostream& os, int8_t v)
+{
+    os << static_cast<int32_t>(v);
+}
+
+void print(std::ostream& os, __half v)
+{
+    os << static_cast<float>(v);
+}
+
+#if CUDA_VERSION >= 11060
+void print(std::ostream& os, __nv_fp8_e4m3 v)
+{
+    os << static_cast<float>(v);
+}
+#endif
+
+int32_t dataOffsetFromDims(int64_t v, Dims const& dims, Dims const& strides, int32_t vectorDim, int32_t spv)
+{
+    int32_t dataOffset = 0;
+    for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex)
+    {
+        int32_t dimVal = v % dims.d[dimIndex];
+        if (dimIndex == vectorDim)
+        {
+            dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv;
+        }
+        else
+        {
+            dataOffset += dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv);
+        }
+        v /= dims.d[dimIndex];
+        ASSERT(v >= 0);
+    }
+
+    return dataOffset;
+}
+
+template <typename T>
+void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
+    Dims const& strides, int32_t vectorDim, int32_t spv)
+{
+    auto const vol = volume(dims);
+    T const* typedBuffer = static_cast<T const*>(buffer);
+    for (int64_t v = 0; v < vol; ++v)
+    {
+        int32_t dataOffset = dataOffsetFromDims(v, dims, strides, vectorDim, spv);
+        if (v > 0)
+        {
+            os << separator;
+        }
+        print(os, typedBuffer[dataOffset]);
+    }
+}
+
+void dumpInt4Buffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
+    Dims const& strides, int32_t vectorDim, int32_t spv)
+{
+    auto const vol = volume(dims);
+    uint8_t const* typedBuffer = static_cast<uint8_t const*>(buffer);
+    for (int64_t v = 0; v < vol; ++v)
+    {
+        int32_t dataOffset = dataOffsetFromDims(v, dims, strides, vectorDim, spv);
+        if (v > 0)
+        {
+            os << separator;
+        }
+
+        auto value = typedBuffer[dataOffset / 2];
+        if (dataOffset % 2 == 0)
+        {
+            // Cast to int8_t before right shift, so right-shift will sign-extend.
+            // Left shift on int8_t can be undefined behaviour, must perform left shift on uint8_t.
+            os << (static_cast<int8_t>(value << 4) >> 4);
+        }
+        else
+        {
+            os << (static_cast<int8_t>(value) >> 4);
+        }
+    }
+}
+
+// Explicit instantiation
+template void dumpBuffer<bool>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
+    Dims const& strides, int32_t vectorDim, int32_t spv);
+template void dumpBuffer<int32_t>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
+    Dims const& strides, int32_t vectorDim, int32_t spv);
+template void dumpBuffer<int8_t>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
+    Dims const& strides, int32_t vectorDim, int32_t spv);
+template void dumpBuffer<float>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
+    Dims const& strides, int32_t vectorDim, int32_t spv);
+template void dumpBuffer<__half>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
+    Dims const& strides, int32_t vectorDim, int32_t spv);
+template void dumpBuffer<BFloat16>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
+    Dims const& strides, int32_t vectorDim, int32_t spv);
+#if CUDA_VERSION >= 11060
+template void dumpBuffer<__nv_fp8_e4m3>(void const* buffer, std::string const& separator, std::ostream& os,
+    Dims const& dims, Dims const& strides, int32_t vectorDim, int32_t spv);
+#endif
+template void dumpBuffer<uint8_t>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
+    Dims const& strides, int32_t vectorDim, int32_t spv);
+template void dumpBuffer<int64_t>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
+    Dims const& strides, int32_t vectorDim, int32_t spv);
+
+template <typename T>
+void sparsify(T const* values, int64_t count, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights)
+{
+    auto const c = count / (k * trs);
+    sparseWeights.resize(count * sizeof(T));
+    auto* sparseValues = reinterpret_cast<T*>(sparseWeights.data());
+
+    constexpr int32_t window = 4;
+    constexpr int32_t nonzeros = 2;
+
+    int32_t const crs = c * trs;
+    auto const getIndex = [=](int32_t ki, int32_t ci, int32_t rsi) { return ki * crs + ci * trs + rsi; };
+
+    for (int64_t ki = 0; ki < k; ++ki)
+    {
+        for (int64_t rsi = 0; rsi < trs; ++rsi)
+        {
+            int32_t w = 0;
+            int32_t nz = 0;
+            for (int64_t ci = 0; ci < c; ++ci)
+            {
+                auto const index = getIndex(ki, ci, rsi);
+                if (nz < nonzeros)
+                {
+                    sparseValues[index] = values[index];
+                    ++nz;
+                }
+                else
+                {
+                    sparseValues[index] = 0;
+                }
+                if (++w == window)
+                {
+                    w = 0;
+                    nz = 0;
+                }
+            }
+        }
+    }
+}
+
+// Explicit instantiation
+template void sparsify<float>(
+    float const* values, int64_t count, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights);
+template void sparsify<half_float::half>(
+    half_float::half const* values, int64_t count, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights);
+
+template <typename T>
+void transpose2DWeights(void* dst, void const* src, int32_t const m, int32_t const n)
+{
+    ASSERT(dst != src);
+    T* tdst = reinterpret_cast<T*>(dst);
+    T const* tsrc = reinterpret_cast<T const*>(src);
+    for (int32_t mi = 0; mi < m; ++mi)
+    {
+        for (int32_t ni = 0; ni < n; ++ni)
+        {
+            int32_t const isrc = mi * n + ni;
+            int32_t const idst = ni * m + mi;
+            tdst[idst] = tsrc[isrc];
+        }
+    }
+}
+
+// Explicit instantiation
+template void transpose2DWeights<float>(void* dst, void const* src, int32_t const m, int32_t const n);
+template void transpose2DWeights<half_float::half>(void* dst, void const* src, int32_t const m, int32_t const n);
+
+template <typename T, typename std::enable_if_t<std::is_integral_v<T>, bool>>
+void fillBuffer(void* buffer, int64_t volume, int32_t min, int32_t max)
+{
+    T* typedBuffer = static_cast<T*>(buffer);
+    std::default_random_engine engine;
+    std::uniform_int_distribution<int32_t> distribution(min, max);
+    auto generator = [&engine, &distribution]() { return static_cast<T>(distribution(engine)); };
+    std::generate(typedBuffer, typedBuffer + volume, generator);
+}
+
+template <typename T, typename std::enable_if_t<!std::is_integral_v<T>, bool>>
+void fillBuffer(void* buffer, int64_t volume, float min, float max)
+{
+    T* typedBuffer = static_cast<T*>(buffer);
+    std::default_random_engine engine;
+    std::uniform_real_distribution<float> distribution(min, max);
+    auto generator = [&engine, &distribution]() { return static_cast<T>(distribution(engine)); };
+    std::generate(typedBuffer, typedBuffer + volume, generator);
+}
+
+// Explicit instantiation
+template void fillBuffer<bool>(void* buffer, int64_t volume, int32_t min, int32_t max);
+template void fillBuffer<int32_t>(void* buffer, int64_t volume, int32_t min, int32_t max);
+template void fillBuffer<int8_t>(void* buffer, int64_t volume, int32_t min, int32_t max);
+template void fillBuffer<float>(void* buffer, int64_t volume, float min, float max);
+template void fillBuffer<__half>(void* buffer, int64_t volume, float min, float max);
+template void fillBuffer<BFloat16>(void* buffer, int64_t volume, float min, float max);
+#if CUDA_VERSION >= 11060
+template void fillBuffer<__nv_fp8_e4m3>(void* buffer, int64_t volume, float min, float max);
+#endif
+template void fillBuffer<uint8_t>(void* buffer, int64_t volume, int32_t min, int32_t max);
+template void fillBuffer<int64_t>(void* buffer, int64_t volume, int32_t min, int32_t max);
+
+bool matchStringWithOneWildcard(std::string const& pattern, std::string const& target)
+{
+    auto const splitPattern = splitToStringVec(pattern, '*', 1);
+
+    // If there is no wildcard, return if the two strings match exactly.
+    if (splitPattern.size() == 1)
+    {
+        return pattern == target;
+    }
+
+    // Otherwise, target must follow prefix+anything+postfix pattern.
+    return target.size() >= (splitPattern[0].size() + splitPattern[1].size()) && target.find(splitPattern[0]) == 0
+        && target.rfind(splitPattern[1]) == (target.size() - splitPattern[1].size());
+}
+
+} // namespace sample
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleUtils.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleUtils.h
new file mode 100644
index 0000000000000000000000000000000000000000..118a336ba95f0ddf645ca362479361e22d8bdc0d
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/sampleUtils.h
@@ -0,0 +1,128 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TRT_SAMPLE_UTILS_H
+#define TRT_SAMPLE_UTILS_H
+
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <numeric>
+#include <random>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include <cuda.h>
+#include <cuda_fp16.h>
+
+#include "NvInfer.h"
+
+#include "common.h"
+#include "logger.h"
+
+#define SMP_RETVAL_IF_FALSE(condition, msg, retval, err)                                                               \
+    {                                                                                                                  \
+        if ((condition) == false)                                                                                      \
+        {                                                                                                              \
+            (err) << (msg) << std::endl;                                                                               \
+            return retval;                                                                                             \
+        }                                                                                                              \
+    }
+
+namespace sample
+{
+
+template <typename T>
+inline T roundUp(T m, T n)
+{
+    return ((m + n - 1) / n) * n;
+}
+
+//! comps is the number of components in a vector. Ignored if vecDim < 0.
+int64_t volume(nvinfer1::Dims const& dims, nvinfer1::Dims const& strides, int32_t vecDim, int32_t comps, int32_t batch);
+
+using samplesCommon::volume;
+
+nvinfer1::Dims toDims(std::vector<int64_t> const& vec);
+
+template <typename T, typename std::enable_if_t<std::is_integral_v<T>, bool> = true>
+void fillBuffer(void* buffer, int64_t volume, int32_t min, int32_t max);
+
+template <typename T, typename std::enable_if_t<!std::is_integral_v<T>, bool> = true>
+void fillBuffer(void* buffer, int64_t volume, float min, float max);
+
+template <typename T>
+void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, nvinfer1::Dims const& dims,
+    nvinfer1::Dims const& strides, int32_t vectorDim, int32_t spv);
+
+void dumpInt4Buffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
+    Dims const& strides, int32_t vectorDim, int32_t spv);
+
+void loadFromFile(std::string const& fileName, char* dst, size_t size);
+
+std::vector<std::string> splitToStringVec(std::string const& option, char separator, int64_t maxSplit = -1);
+
+bool broadcastIOFormats(std::vector<IOFormat> const& formats, size_t nbBindings, bool isInput = true);
+
+int32_t getCudaDriverVersion();
+
+int32_t getCudaRuntimeVersion();
+
+void sparsify(nvinfer1::INetworkDefinition& network, std::vector<std::vector<int8_t>>& sparseWeights);
+void sparsify(nvinfer1::Weights const& weights, int32_t k, int32_t rs, std::vector<int8_t>& sparseWeights);
+
+// Walk the weights elements and overwrite (at most) 2 out of 4 elements to 0.
+template <typename T>
+void sparsify(T const* values, int64_t count, int32_t k, int32_t rs, std::vector<int8_t>& sparseWeights);
+
+template <typename L>
+void setSparseWeights(L& l, int32_t k, int32_t rs, std::vector<int8_t>& sparseWeights);
+
+// Sparsify the weights of Constant layers that are fed to MatMul via Shuffle layers.
+// Forward analysis on the API graph to determine which weights to sparsify.
+void sparsifyMatMulKernelWeights(
+    nvinfer1::INetworkDefinition& network, std::vector<std::vector<int8_t>>& sparseWeights);
+
+template <typename T>
+void transpose2DWeights(void* dst, void const* src, int32_t const m, int32_t const n);
+
+//! A helper function to match a target string with a pattern where the pattern can contain up to one wildcard ('*')
+//! character that matches to any strings.
+bool matchStringWithOneWildcard(std::string const& pattern, std::string const& target);
+
+//! A helper method to find an item from an unordered_map. If the exact match exists, this is identical to
+//! map.find(target). If the exact match does not exist, it returns the first plausible match, taking up to one wildcard
+//! into account. If there is no plausible match, then it returns map.end().
+template <typename T>
+typename std::unordered_map<std::string, T>::const_iterator findPlausible(
+    std::unordered_map<std::string, T> const& map, std::string const& target)
+{
+    auto res = map.find(target);
+    if (res == map.end())
+    {
+        res = std::find_if(
+            map.begin(), map.end(), [&](typename std::unordered_map<std::string, T>::value_type const& item) {
+                return matchStringWithOneWildcard(item.first, target);
+            });
+    }
+    return res;
+}
+
+} // namespace sample
+
+#endif // TRT_SAMPLE_UTILS_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/streamReader.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/streamReader.h
new file mode 100644
index 0000000000000000000000000000000000000000..cd17a2d73864dcce3ea1af1f7887f7e8cd818dfe
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/common/streamReader.h
@@ -0,0 +1,162 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef STREAM_READER_H
+#define STREAM_READER_H
+
+
+#include "NvInferRuntime.h"
+#include <fstream>
+#include "sampleUtils.h"
+
+namespace samplesCommon
+{
+
+//! Implements the TensorRT IStreamReader to allow deserializing an engine directly from the plan file.
+class FileStreamReader final : public nvinfer1::IStreamReader
+{
+public:
+    bool open(std::string filepath)
+    {
+        mFile.open(filepath, std::ios::binary);
+        return mFile.is_open();
+    }
+
+    void close()
+    {
+        if (mFile.is_open())
+        {
+            mFile.close();
+        }
+    }
+
+    ~FileStreamReader() final
+    {
+        close();
+    }
+
+    int64_t read(void* dest, int64_t bytes) final
+    {
+        if (!mFile.good())
+        {
+            return -1;
+        }
+        mFile.read(static_cast<char*>(dest), bytes);
+        return mFile.gcount();
+    }
+
+    void reset()
+    {
+        ASSERT(mFile.good());
+        mFile.seekg(0);
+    }
+
+    bool isOpen() const
+    {
+        return mFile.is_open();
+    }
+
+private:
+    std::ifstream mFile;
+};
+
+//! Implements the TensorRT IStreamReaderV2 interface to allow deserializing an engine directly from the plan file.
+//! Supports seeking to a position within the file, and reading directly to device pointers.
+//! This implementation is not optimized, and will not provide performance improvements over the existing reader.
+class AsyncStreamReader final : public nvinfer1::IStreamReaderV2
+{
+public:
+    bool open(std::string const& filepath)
+    {
+        mFile.open(filepath, std::ios::binary);
+        return mFile.is_open();
+    }
+
+    void close()
+    {
+        if (mFile.is_open())
+        {
+            mFile.close();
+        }
+    }
+
+    ~AsyncStreamReader() final
+    {
+        close();
+    }
+
+    bool seek(int64_t offset, nvinfer1::SeekPosition where) noexcept final
+    {
+        switch (where)
+        {
+        case (nvinfer1::SeekPosition::kSET): mFile.seekg(offset, std::ios_base::beg); break;
+        case (nvinfer1::SeekPosition::kCUR): mFile.seekg(offset, std::ios_base::cur); break;
+        case (nvinfer1::SeekPosition::kEND): mFile.seekg(offset, std::ios_base::end); break;
+        }
+        return mFile.good();
+    }
+
+    int64_t read(void* destination, int64_t nbBytes, cudaStream_t stream) noexcept final
+    {
+        if (!mFile.good())
+        {
+            return -1;
+        }
+
+        cudaPointerAttributes attributes;
+        ASSERT(cudaPointerGetAttributes(&attributes, destination) == cudaSuccess);
+
+        // from CUDA 11 onward, host pointers are return cudaMemoryTypeUnregistered
+        if (attributes.type == cudaMemoryTypeHost || attributes.type == cudaMemoryTypeUnregistered)
+        {
+            mFile.read(static_cast<char*>(destination), nbBytes);
+            return mFile.gcount();
+        }
+        else if (attributes.type == cudaMemoryTypeDevice)
+        {
+            // Set up a temp buffer to read into if reading into device memory.
+            std::unique_ptr<char[]> tmpBuf{new char[nbBytes]};
+            mFile.read(tmpBuf.get(), nbBytes);
+            // cudaMemcpyAsync into device storage.
+            ASSERT(cudaMemcpyAsync(destination, tmpBuf.get(), nbBytes, cudaMemcpyHostToDevice, stream) == cudaSuccess);
+            // No race between the copying and freeing of tmpBuf, because cudaMemcpyAsync will
+            // return once the pageable buffer has been copied to the staging memory for DMA transfer
+            // to device memory.
+            return mFile.gcount();
+        }
+        return -1;
+    }
+
+    void reset()
+    {
+        ASSERT(mFile.good());
+        mFile.seekg(0);
+    }
+
+    bool isOpen() const
+    {
+        return mFile.is_open();
+    }
+
+private:
+    std::ifstream mFile;
+};
+
+
+} // namespace samplesCommon
+
+#endif // STREAM_READER_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1de718436d4a026b4074514b8b2cf3b564be149e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/README.md
@@ -0,0 +1,59 @@
+General Setup Guide for Samples
+==============================
+
+
+## Download Sample Data
+
+Install the tool dependencies via `python3 -m pip install -r requirements.txt`.
+
+Invoke [downloader.py](downloader.py) to download the data with
+a command like the one below if `download.yml` is present in the
+sample directory ([example](onnx_packnet/download.yml)).
+
+```sh
+downloader.py -d /path/to/data/dir -f /path/to/download.yml
+```
+
+The data directory i.e. `/path/to/data/dir` is a centralized directory
+to store data of all samples. So you can use same one for all samples.
+It can be provided by either `-d /path/to/data/dir` or the environment variable
+`$TRT_DATA_DIR`, where the `-d` has higher priority.
+
+Remember to use `-d` or `$TRT_DATA_DIR` when running sample scripts
+that rely on downloaded data. Scripts will abort if no downloaded data
+is found in data directory. (`$TRT_DATA_DIR` will be much simplier.)
+An error will be thrown if the data is not properly setup.
+
+The `download.yml` file is owned by the sample which describes the sample
+name, the path, URL and checksum of the data files that are required by the sample.
+
+
+**Notes for sample developers**
+
+To use the downloaded data files, integrate the code segment like below into
+the sample code, and obtain the path to the data file by passing the `path`
+as specified in the associated `download.yml` file of the sample.
+
+```py
+TRT_DATA_DIR = None
+
+def getFilePath(path):
+    global TRT_DATA_DIR
+    if not TRT_DATA_DIR:
+        parser = argparse.ArgumentParser(description="Convert PackNet to ONNX")
+        parser.add_argument('-d', '--data', help="Specify the data directory where it is saved in. $TRT_DATA_DIR will be overwritten by this argument.")
+        args, _ = parser.parse_known_args()
+        TRT_DATA_DIR = os.environ.get('TRT_DATA_DIR', None) if args.data is None else args.data
+    if TRT_DATA_DIR is None:
+        raise ValueError("Data directory must be specified by either `-d $DATA` or environment variable $TRT_DATA_DIR.")
+
+    fullpath = os.path.join(TRT_DATA_DIR, path)
+    if not os.path.exists(fullpath):
+        raise ValueError("Data file %s doesn't exist!" % fullpath)
+
+    return fullpath
+```
+
+**Python Version Support**
+
+All Python samples are expected to be run with Python>=3.8. It is not recommended to use any lower version as there may be compatibility issues.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/aliased_io_plugin/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/aliased_io_plugin/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..889f3975bf1b95106566dfb01950b8f06e303b5f
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/aliased_io_plugin/README.md
@@ -0,0 +1,85 @@
+# Utilizing a plugin with aliased I/O to realize in-place updates
+
+## Description
+
+This sample, `aliased_io_plugin`, implements a Python-based plugin for an in-place scatter-add operation.
+
+Scatter-add "scatters" a set of source values into memory locations based on a given set of indices and adds together those values mapped to the same location. 
+
+## How does this sample work?
+
+This sample creates and runs a TensorRT engine demonstrating an example commonly encountered with Graph Neural Networks (GNNs). In GNNs, the features associated with the neighbors of each node is aggregated with an order-independent operation (e.g. sum, product), averaged by the size of the neighborhood, and then run through a classifier to determine a property of interest; example applications of GNNs include the modeling of social networks and building recommendation systems. 
+
+Here, we use an addition as the aggregation function; therefore, we build a network containing a Scatter-add plugin node. It receives a "source" tensor containing the features of the neighbors of each node, and an "index" tensor denoting the index of each such node. For example, consider the following graph:
+
+![alt text](aliased_io_gnn.png "GNN example")
+
+For simplicity, in this example, and in the sample in general, we utilize scalar features at each node. The "source" could be represented as a flattened tensor `[1.0, 3.0, 5.0, 7.0, 1.0, 3.0]` while the corresponding source nodes are `[1, 2, 3, 0, 2, 3]`. It is clear that the Scatter-add should yield `[7.0, 1.0, 4.0, 8.0]`. This result is then normalized by the number of neighbors of each node and then fed into a simple dense layer followed by ReLU activation.
+
+### Implementing an in-place Scatter-add plugin using `IPluginV3OneBuildV2` interface
+
+Before the introduction of `IPluginV3OneBuildV2` interface, TensorRT plugin inputs were to be treated as read-only. In-place optimizations (output written to an input) and operations that inherently required an input to be modified, were kept out-of-reach due to this limitation.
+
+In the Scatter-add operation, an in-place operation is useful because a node of interest may have some pre-conditions that require the neighborhood aggregation to be combined with a bias. Another use case is in hierarchical aggregation where higher-layer features may have to be integrated as well. 
+
+To allow writes to the input, `IPluginV3OneBuildV2` interface provides an API to declare certain input-output pairs as being aliased. In this case, the first output of the plugin and the first input are aliased, so we may declare:
+```py
+def get_aliased_input(self, output_index: int):
+	if output_index == 0:
+		return 0
+	
+	return -1
+```
+A return value of `-1` indicates that that `output_index` is not aliased to any input.
+
+This new method `get_aliased_input` is the only difference between `IPluginV3OneBuildV2` and `IPluginV3OneBuild`. As part of the `V3_ONE` set of capability interfaces, `IPluginV3OneBuildV2` may be used in conjunction with `IPluginV3OneCore` and `IPluginV3OneRuntime`. 
+
+### Creating network and building the engine
+
+To add the plugin to the network, the `INetworkDefinition::add_plugin_v3()` method is used. 
+
+For subsequent averaging and classification steps, TensorRT ElementWise, MatrixMultiply, Activation and SoftMax layers are used.
+
+## Running the sample
+
+1.  Run the sample to create a TensorRT inference engine and run inference:
+    `python3 aliased_io_plugin.py [-h] [--precision {fp32,fp16}] [--node_features NODE_FEATURES] [--edges EDGES] [--num_classes NUM_CLASSES] [--validate] [--seed SEED]`
+
+2.  If the `--validate` flag was passed, verify that the sample ran successfully. If the sample runs successfully, you should see the following message:
+     ```
+    Validation against reference successful!
+    ```
+
+### Sample `--help` options
+
+To see the full list of available options and their descriptions, use the `-h` or `--help` command line option.
+
+
+# Additional resources
+
+The following resources provide a deeper understanding about the V3 TensorRT plugins and the Scatter-Add operation:
+
+**ScatterElements**
+- [ONNX: ScatterElements](https://onnx.ai/onnx/operators/onnx__ScatterElements.html)
+
+**TensorRT plugins**
+- [Extending TensorRT with Custom Layers](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#extending)
+- [TensorRT Python-based Plugins](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/#add_custom_layer_python)
+
+**Other documentation**
+- [Introduction To NVIDIAâ€™s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples)
+- [Working With TensorRT Using The Python API](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/#python_topics)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+# Changelog
+
+August 2024
+This is the first version of this `README.md` file.
+
+# Known issues
+
+There are no known issues in this sample.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/aliased_io_plugin/aliased_io_gnn.png b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/aliased_io_plugin/aliased_io_gnn.png
new file mode 100644
index 0000000000000000000000000000000000000000..3ce0e0d64dd72d1200a3e5246eec2bf7c02c8fc8
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/aliased_io_plugin/aliased_io_gnn.png differ
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/aliased_io_plugin/aliased_io_plugin.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/aliased_io_plugin/aliased_io_plugin.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c789da29dfde03c13c8ac2ec42c09ba99830a11
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/aliased_io_plugin/aliased_io_plugin.py
@@ -0,0 +1,454 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import torch
+import triton
+import triton.language as tl
+
+import tensorrt as trt
+import cupy as cp
+import numpy as np
+import ast
+
+from polygraphy.backend.trt import (
+    CreateConfig,
+    TrtRunner,
+    create_network,
+    engine_from_network,
+)
+
+import argparse
+
+import logging
+
+logging.basicConfig(level=logging.INFO)
+logging.getLogger("AliasedIOPlugin").setLevel(logging.INFO)
+log = logging.getLogger("AliasedIOPlugin")
+
+import sys
+
+# An OpenAI Triton kernel to both perform the scatter-add and counts of each index
+@triton.jit
+def scatter_add_kernel(
+    self_ptr,
+    src_ptr,  # Source array
+    index_ptr,  # Indices
+    n_elements,  # Number of elements in the source/indices array
+    n_labels,  # Number of labels (distinct indices)
+    counts,  # Output counts of each distinct index
+    BLOCK_SIZE: tl.constexpr,
+    BLOCK_SIZE_C: tl.constexpr,
+):
+    pid = tl.program_id(axis=0)
+    block_start = pid * BLOCK_SIZE
+    offsets = block_start + tl.arange(0, BLOCK_SIZE)
+
+    mask = offsets < n_elements
+
+    # Load the source values and indices
+    src = tl.load(src_ptr + offsets, mask=mask)
+    indices = tl.load(index_ptr + offsets, mask=mask)
+
+    # Iterate over n_labels
+    for i in range(0, BLOCK_SIZE_C):
+        idx = i + tl.program_id(1) * BLOCK_SIZE_C + 1
+        if idx <= n_labels:
+            l_mask = indices == idx
+            # Perform the scatter-add operation
+            tl.atomic_add(self_ptr + idx - 1, tl.sum(tl.where(l_mask, src, 0)))
+            # Update count for idx
+            tl.atomic_add(counts + idx - 1, tl.sum(tl.where(l_mask, 1, 0)))
+
+
+def volume(d):
+    return np.prod(d)
+
+
+class UnownedMemory:
+    def __init__(self, ptr, shape, dtype):
+        mem = cp.cuda.UnownedMemory(ptr, volume(shape) * cp.dtype(dtype).itemsize, self)
+        cupy_ptr = cp.cuda.MemoryPointer(mem, 0)
+        self.d = cp.ndarray(shape, dtype=dtype, memptr=cupy_ptr)
+
+
+class ScatterAddPlugin(
+    trt.IPluginV3,
+    trt.IPluginV3OneCore,
+    trt.IPluginV3OneBuildV2,
+    trt.IPluginV3OneRuntime,
+):
+    def __init__(self, fc=None):
+        trt.IPluginV3.__init__(self)
+        trt.IPluginV3OneCore.__init__(self)
+        trt.IPluginV3OneBuildV2.__init__(self)
+        trt.IPluginV3OneRuntime.__init__(self)
+
+        self.plugin_namespace = ""
+        self.plugin_name = "ScatterAddPlugin"
+        self.plugin_version = "1"
+        self.num_outputs = 2
+
+    def get_capability_interface(self, type):
+        return self
+
+    def get_output_data_types(self, input_types):
+        self.type = input_types[0]
+        return [input_types[0], trt.int64]
+
+    def get_fields_to_serialize(self):
+        return trt.PluginFieldCollection([])
+
+    def get_output_shapes(self, inputs, shape_inputs, exprBuilder):
+        output_dims = [
+            inputs[0],
+            trt.DimsExprs([inputs[0][0], exprBuilder.constant(1)]),
+        ]
+
+        return output_dims
+
+    def configure_plugin(self, inp, out):
+        pass
+
+    def on_shape_change(self, inp, out):
+        pass
+
+    def supports_format_combination(
+        self, pos: int, in_out: "list[trt.PluginTensorDesc]", num_inputs: int
+    ):
+        assert num_inputs == 3
+        assert pos < len(in_out)
+
+        desc = in_out[pos].desc
+        if desc.format != trt.TensorFormat.LINEAR:
+            return False
+
+        # self, src and output have the same type
+        if pos in [0, 1, 3]:
+            return desc.type == self.type
+
+        # indices anc the counts output are int64
+        return desc.type == trt.int64
+
+    def enqueue(self, input_desc, output_desc, inputs, outputs, workspace, stream):
+
+        # No-copy operations to setup torch tensors over the I/O buffers
+        inp_mem = UnownedMemory(
+            inputs[0], input_desc[0].dims, trt.nptype(input_desc[0].type)
+        )
+        src_mem = UnownedMemory(
+            inputs[1], input_desc[1].dims, trt.nptype(input_desc[1].type)
+        )
+        idx_mem = UnownedMemory(
+            inputs[2], input_desc[2].dims, trt.nptype(input_desc[2].type)
+        )
+        counts_mem = UnownedMemory(
+            outputs[1], output_desc[1].dims, trt.nptype(output_desc[1].type)
+        )
+
+        inp = torch.as_tensor(inp_mem.d, device="cuda")
+        src = torch.as_tensor(src_mem.d, device="cuda")
+        idx = torch.as_tensor(idx_mem.d, device="cuda")
+        counts = torch.as_tensor(counts_mem.d, device="cuda")
+
+        # Zero out the counts before passing to kernel
+        counts.zero_()
+
+        n_classes = inp.shape[0]
+        n_elements = src.numel()
+
+        # Block size definitions
+        BLOCK_SIZE = 1024
+        BLOCK_SIZE_C = 32
+
+        # Calculate grid size
+        grid_x = (n_elements + BLOCK_SIZE - 1) // BLOCK_SIZE
+        grid_y = (n_classes + BLOCK_SIZE_C - 1) // BLOCK_SIZE_C
+
+        scatter_add_kernel[(grid_x, grid_y)](
+            inp, src, idx, n_elements, n_classes, counts, BLOCK_SIZE, BLOCK_SIZE_C
+        )
+
+    def attach_to_context(self, context):
+        return self.clone()
+
+    def set_tactic(self, tactic):
+        pass
+
+    def get_aliased_input(self, output_index: int):
+        if output_index == 0:
+            return 0
+
+        return -1
+
+    def clone(self):
+        cloned_plugin = ScatterAddPlugin()
+        cloned_plugin.__dict__.update(self.__dict__)
+        return cloned_plugin
+
+
+class ScatterAddPluginCreator(trt.IPluginCreatorV3One):
+    def __init__(self):
+        trt.IPluginCreatorV3One.__init__(self)
+        self.name = "ScatterAddPlugin"
+        self.plugin_namespace = ""
+        self.plugin_version = "1"
+        self.field_names = trt.PluginFieldCollection([])
+
+    def create_plugin(self, name, fc, phase):
+        return ScatterAddPlugin()
+
+
+def torch_ref(node_features, edges, W, precision):
+    # Initialize an output tensor for aggregation
+    aggregated = torch.zeros_like(node_features, dtype=precision, device="cuda")
+
+    # Perform aggregation using scatter_add_
+    aggregated.scatter_add_(0, edges[:, 1].unsqueeze(1), node_features[edges[:, 0]])
+
+    # Get the counts of each distinct index
+    bincounts = torch.bincount(edges[:, 1].contiguous())
+
+    # Normalize and classify
+    Y = W * (aggregated / bincounts.unsqueeze(1)).transpose(1, 0)
+    return torch.softmax(torch.relu(Y), dim=0)
+
+
+numpy_to_torch_dtype = {
+    np.int32: torch.int32,
+    np.int64: torch.int64,
+    np.float16: torch.float16,
+    np.float32: torch.float32,
+}
+
+
+def parse_edges_string(input_string):
+    try:
+        # Parse the string into a list of integer pairs
+        raw_edges = ast.literal_eval(input_string)
+
+        # Check if the parsed object is a list
+        if not isinstance(raw_edges, list):
+            return None, "The input string does not represent a list."
+
+        edges = []
+        for edge in raw_edges:
+            if (
+                not isinstance(edge, list)
+                or len(edge) != 2
+                or not all(isinstance(x, int) for x in edge)
+            ):
+                return (
+                    None,
+                    f"Each edge must be a list of two integers. Invalid edge: {edge}",
+                )
+            edges.append(edge)
+
+        return edges, None
+    except (SyntaxError, ValueError) as e:
+        return None, f"Error parsing string: {e}"
+
+
+def validate_edges(edges, n_nodes):
+    for edge in edges:
+        src, target = edge
+        if not (0 <= src < n_nodes) or not (0 <= target < n_nodes):
+            return f"Edge ({src}, {target}) is out of bounds. Must be in range [0, {n_nodes - 1}]."
+
+    # check incoming edges
+    incoming_edges_count = [0] * n_nodes
+    for _, target in edges:
+        incoming_edges_count[target] += 1
+
+    for idx in range(n_nodes):
+        if incoming_edges_count[idx] == 0:
+            return f"Index {idx} has no incoming edges."
+    return None
+
+
+def parse_edges(input_string, n_nodes):
+    parsed_edges, parse_error = parse_edges_string(input_string)
+    if parse_error:
+        return None, parse_error
+    else:
+        # Validate the edges
+        validation_error = validate_edges(parsed_edges, n_nodes)
+        if validation_error is not None:
+            return None, validation_error
+        else:
+            return parsed_edges, None
+
+
+# Print adjacency matrix
+def print_graph(edges, n_nodes):
+    adjacency_matrix = [[0] * n_nodes for _ in range(n_nodes)]
+
+    for src, tgt in edges:
+        adjacency_matrix[src][tgt] = 1
+
+    for row in adjacency_matrix:
+        print(row)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--precision",
+        type=str,
+        default="fp32",
+        choices=["fp32", "fp16"],
+        help="Precision for node features",
+    )
+    parser.add_argument(
+        "--node_features",
+        type=str,
+        default="[1.0,3.0,5.0,7.0]",
+        help="List of node features as a comma-separated list. e.g. [1.0,2.0,3.0].",
+    )
+    parser.add_argument(
+        "--edges",
+        type=str,
+        default="[[0,1],[1,2],[2,3],[3,0],[0,2],[1,3]]",
+        help="Pairs of source->target directed edges. Every node must have at least one incoming edge. e.g. [[0,1],[1,0]].",
+    )
+    parser.add_argument(
+        "--num_classes", type=int, default=3, help="Number of classes in the classifier"
+    )
+    parser.add_argument(
+        "--validate", action="store_true", help="Validate result with reference"
+    )
+    parser.add_argument("--seed", type=int, help="Seed to use for weights generation")
+
+    args = parser.parse_args()
+
+    if args.seed is not None:
+        print("Setting seed to:", args.seed)
+        torch.manual_seed(args.seed)
+    else:
+        print("Setting seed to:", torch.seed())
+
+    precision = trt.float32 if args.precision == "fp32" else trt.float16
+    n_classes = args.num_classes
+
+    numpy_precision = trt.nptype(precision)
+    torch_precision = numpy_to_torch_dtype[numpy_precision]
+
+    if args.num_classes < 1:
+        parser.print_help()
+        log.error("num_classes must be a positive integer")
+        sys.exit(1)
+
+    try:
+        float_list = ast.literal_eval(args.node_features)
+        if not isinstance(float_list, list):
+            parser.print_help()
+            log.error("The node_features string does not represent a list")
+            sys.exit(1)
+
+        # Check if all elements in the list are floats/ints
+        if not all(isinstance(x, (float, int)) for x in float_list):
+            parser.print_help()
+            log.error("The node_features list must contain only numbers")
+            sys.exit(1)
+    except (SyntaxError, ValueError) as e:
+        parser.print_help()
+        log.error(f"The node_features string could not be parsed as a list: {e}")
+        sys.exit(1)
+
+    node_features = torch.tensor(float_list, dtype=torch_precision, device="cuda").view(
+        -1, 1
+    )
+
+    n_nodes = node_features.shape[0]
+
+    parsed_edges, parse_error = parse_edges(args.edges, n_nodes)
+    if parse_error:
+        parser.print_help()
+        log.error(parse_error)
+        sys.exit(1)
+
+    edges = torch.tensor(parsed_edges, device="cuda", dtype=torch.int64)
+
+    print()
+    print("Adjacency matrix for graph:")
+    print_graph(edges, n_nodes)
+    print()
+
+    target = torch.zeros_like(node_features, device="cuda")
+
+    input_x = target.clone()
+    input_src = node_features[edges[:, 0]].flatten()
+    input_idx = edges[:, 1].contiguous() + 1
+
+    W = torch.randn((n_classes, 1), dtype=torch_precision, device="cuda")
+
+    plg_registry = trt.get_plugin_registry()
+    my_plugin_creator = ScatterAddPluginCreator()
+    plg_registry.register_creator(my_plugin_creator, "")
+
+    builder, network = create_network()
+    input_x_T = network.add_input(name="X", dtype=precision, shape=input_x.shape)
+    input_src_T = network.add_input(name="src", dtype=precision, shape=input_src.shape)
+    input_idx_T = network.add_input(name="idx", dtype=trt.int64, shape=input_idx.shape)
+    w_T = network.add_input(name="W", dtype=precision, shape=W.shape)
+    out = network.add_plugin_v3(
+        [input_x_T, input_src_T, input_idx_T], [], ScatterAddPlugin()
+    )
+    cast_layer = network.add_cast(out.get_output(1), precision)
+    div_layer = network.add_elementwise(
+        out.get_output(0),
+        cast_layer.get_output(0),
+        op=trt.ElementWiseOperation.FLOOR_DIV,
+    )
+    matmul_layer = network.add_matrix_multiply(
+        w_T,
+        trt.MatrixOperation.NONE,
+        div_layer.get_output(0),
+        trt.MatrixOperation.TRANSPOSE,
+    )
+    relu_layer = network.add_activation(
+        matmul_layer.get_output(0), type=trt.ActivationType.RELU
+    )
+    softmax_layer = network.add_softmax(relu_layer.get_output(0))
+    softmax_layer.get_output(0).dtype = precision
+    softmax_layer.get_output(0).name = "softmax"
+    network.mark_output(tensor=softmax_layer.get_output(0))
+    build_engine = engine_from_network(
+        (builder, network),
+        CreateConfig(
+            fp16=precision == trt.float16,
+            preview_features=[trt.PreviewFeature.ALIASED_PLUGIN_IO_10_03],
+        ),
+    )
+
+    with TrtRunner(build_engine, "trt_runner") as runner:
+        outputs = runner.infer(
+            {"X": input_x, "src": input_src, "idx": input_idx, "W": W},
+            copy_outputs_to_host=False,
+        )
+
+        print()
+        print("Classifier output:")
+        print(outputs["softmax"])
+        print()
+
+        if args.validate:
+            tref = torch_ref(node_features, edges, W, torch_precision)
+            if torch.allclose(outputs["softmax"], tref, 1e-2):
+                print("Validation against reference successful!")
+            else:
+                print("Validation against reference failed!")
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/aliased_io_plugin/requirements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/aliased_io_plugin/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be79359d0d342c278946fef085f7d7d6383278e1
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/aliased_io_plugin/requirements.txt
@@ -0,0 +1,12 @@
+cupy-cuda12x
+triton==3.1.0; (platform_system != "Windows" and python_version <= "3.8")
+triton==3.2.0; (platform_system != "Windows" and python_version >= "3.9")
+torch
+--extra-index-url https://pypi.ngc.nvidia.com
+polygraphy
+colored
+numpy==1.23.5; (platform_system != "Windows" and python_version <= "3.10")
+numpy==1.26.4; (platform_system != "Windows" and python_version >= "3.11")
+pyyaml==6.0.1
+requests==2.32.2
+tqdm==4.66.4
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/common.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..10b2c323999ac28308d989799b140224fa1b0306
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/common.py
@@ -0,0 +1,146 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import os
+
+import tensorrt as trt
+from common_runtime import *
+
+try:
+    # Sometimes python does not understand FileNotFoundError
+    FileNotFoundError
+except NameError:
+    FileNotFoundError = IOError
+
+
+def GiB(val):
+    return val * 1 << 30
+
+
+def add_help(description):
+    parser = argparse.ArgumentParser(
+        description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    args, _ = parser.parse_known_args()
+
+
+def find_sample_data(
+    description="Runs a TensorRT Python sample", subfolder="", find_files=[], err_msg=""
+):
+    """
+    Parses sample arguments.
+
+    Args:
+        description (str): Description of the sample.
+        subfolder (str): The subfolder containing data relevant to this sample
+        find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.
+
+    Returns:
+        str: Path of data directory.
+    """
+
+    # Standard command-line arguments for all samples.
+    kDEFAULT_DATA_ROOT = os.path.join(os.sep, "usr", "src", "tensorrt", "data")
+    parser = argparse.ArgumentParser(
+        description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "-d",
+        "--datadir",
+        help="Location of the TensorRT sample data directory, and any additional data directories.",
+        action="append",
+        default=[kDEFAULT_DATA_ROOT],
+    )
+    args, _ = parser.parse_known_args()
+
+    def get_data_path(data_dir):
+        # If the subfolder exists, append it to the path, otherwise use the provided path as-is.
+        data_path = os.path.join(data_dir, subfolder)
+        if not os.path.exists(data_path):
+            if data_dir != kDEFAULT_DATA_ROOT:
+                print(
+                    "WARNING: "
+                    + data_path
+                    + " does not exist. Trying "
+                    + data_dir
+                    + " instead."
+                )
+            data_path = data_dir
+        # Make sure data directory exists.
+        if not (os.path.exists(data_path)) and data_dir != kDEFAULT_DATA_ROOT:
+            print(
+                "WARNING: {:} does not exist. Please provide the correct data path with the -d option.".format(
+                    data_path
+                )
+            )
+        return data_path
+
+    data_paths = [get_data_path(data_dir) for data_dir in args.datadir]
+    return data_paths, locate_files(data_paths, find_files, err_msg)
+
+
+def locate_files(data_paths, filenames, err_msg=""):
+    """
+    Locates the specified files in the specified data directories.
+    If a file exists in multiple data directories, the first directory is used.
+
+    Args:
+        data_paths (List[str]): The data directories.
+        filename (List[str]): The names of the files to find.
+
+    Returns:
+        List[str]: The absolute paths of the files.
+
+    Raises:
+        FileNotFoundError if a file could not be located.
+    """
+    found_files = [None] * len(filenames)
+    for data_path in data_paths:
+        # Find all requested files.
+        for index, (found, filename) in enumerate(zip(found_files, filenames)):
+            if not found:
+                file_path = os.path.abspath(os.path.join(data_path, filename))
+                if os.path.exists(file_path):
+                    found_files[index] = file_path
+
+    # Check that all files were found
+    for f, filename in zip(found_files, filenames):
+        if not f or not os.path.exists(f):
+            raise FileNotFoundError(
+                "Could not find {:}. Searched in data paths: {:}\n{:}".format(
+                    filename, data_paths, err_msg
+                )
+            )
+    return found_files
+
+
+# Sets up the builder to use the timing cache file, and creates it if it does not already exist
+def setup_timing_cache(config: trt.IBuilderConfig, timing_cache_path: os.PathLike):
+    buffer = b""
+    if os.path.exists(timing_cache_path):
+        with open(timing_cache_path, mode="rb") as timing_cache_file:
+            buffer = timing_cache_file.read()
+    timing_cache: trt.ITimingCache = config.create_timing_cache(buffer)
+    config.set_timing_cache(timing_cache, True)
+
+
+# Saves the config's timing cache to file
+def save_timing_cache(config: trt.IBuilderConfig, timing_cache_path: os.PathLike):
+    timing_cache: trt.ITimingCache = config.get_timing_cache()
+    with open(timing_cache_path, "wb") as timing_cache_file:
+        timing_cache_file.write(memoryview(timing_cache.serialize()))
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/common_runtime.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/common_runtime.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c682927cd224e4cf6a455f6f756570c0fe5cefd
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/common_runtime.py
@@ -0,0 +1,170 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import ctypes
+from typing import Optional, List, Union
+
+import numpy as np
+import tensorrt as trt
+from cuda import cuda, cudart
+
+def check_cuda_err(err):
+    if isinstance(err, cuda.CUresult):
+        if err != cuda.CUresult.CUDA_SUCCESS:
+            raise RuntimeError("Cuda Error: {}".format(err))
+    if isinstance(err, cudart.cudaError_t):
+        if err != cudart.cudaError_t.cudaSuccess:
+            raise RuntimeError("Cuda Runtime Error: {}".format(err))
+    else:
+        raise RuntimeError("Unknown error type: {}".format(err))
+
+def cuda_call(call):
+    err, res = call[0], call[1:]
+    check_cuda_err(err)
+    if len(res) == 1:
+        res = res[0]
+    return res
+
+
+class HostDeviceMem:
+    """Pair of host and device memory, where the host memory is wrapped in a numpy array"""
+    def __init__(self, size: int, dtype: Optional[np.dtype] = None):
+        dtype = dtype or np.dtype(np.uint8)
+        nbytes = size * dtype.itemsize
+        host_mem = cuda_call(cudart.cudaMallocHost(nbytes))
+        pointer_type = ctypes.POINTER(np.ctypeslib.as_ctypes_type(dtype))
+
+        self._host = np.ctypeslib.as_array(ctypes.cast(host_mem, pointer_type), (size,))
+        self._device = cuda_call(cudart.cudaMalloc(nbytes))
+        self._nbytes = nbytes
+
+    @property
+    def host(self) -> np.ndarray:
+        return self._host
+
+    @host.setter
+    def host(self, data: Union[np.ndarray, bytes]):
+        if isinstance(data, np.ndarray):
+            if data.size > self.host.size:
+                raise ValueError(
+                    f"Tried to fit an array of size {data.size} into host memory of size {self.host.size}"
+                )
+            np.copyto(self.host[:data.size], data.flat, casting='safe')
+        else:
+            assert self.host.dtype == np.uint8
+            self.host[:self.nbytes] = np.frombuffer(data, dtype=np.uint8)
+
+    @property
+    def device(self) -> int:
+        return self._device
+
+    @property
+    def nbytes(self) -> int:
+        return self._nbytes
+
+    def __str__(self):
+        return f"Host:\n{self.host}\nDevice:\n{self.device}\nSize:\n{self.nbytes}\n"
+
+    def __repr__(self):
+        return self.__str__()
+
+    def free(self):
+        cuda_call(cudart.cudaFree(self.device))
+        cuda_call(cudart.cudaFreeHost(self.host.ctypes.data))
+
+
+# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
+# If engine uses dynamic shapes, specify a profile to find the maximum input & output size.
+def allocate_buffers(engine: trt.ICudaEngine, profile_idx: Optional[int] = None):
+    inputs = []
+    outputs = []
+    bindings = []
+    stream = cuda_call(cudart.cudaStreamCreate())
+    tensor_names = [engine.get_tensor_name(i) for i in range(engine.num_io_tensors)]
+    for binding in tensor_names:
+        # get_tensor_profile_shape returns (min_shape, optimal_shape, max_shape)
+        # Pick out the max shape to allocate enough memory for the binding.
+        shape = engine.get_tensor_shape(binding) if profile_idx is None else engine.get_tensor_profile_shape(binding, profile_idx)[-1]
+        shape_valid = np.all([s >= 0 for s in shape])
+        if not shape_valid and profile_idx is None:
+            raise ValueError(f"Binding {binding} has dynamic shape, " +\
+                "but no profile was specified.")
+        size = trt.volume(shape)
+        trt_type = engine.get_tensor_dtype(binding)
+
+        # Allocate host and device buffers
+        try:
+            dtype = np.dtype(trt.nptype(trt_type))
+            bindingMemory = HostDeviceMem(size, dtype)
+        except TypeError: # no numpy support: create a byte array instead (BF16, FP8, INT4)
+            size = int(size * trt_type.itemsize)
+            bindingMemory = HostDeviceMem(size)
+
+        # Append the device buffer to device bindings.
+        bindings.append(int(bindingMemory.device))
+
+        # Append to the appropriate list.
+        if engine.get_tensor_mode(binding) == trt.TensorIOMode.INPUT:
+            inputs.append(bindingMemory)
+        else:
+            outputs.append(bindingMemory)
+    return inputs, outputs, bindings, stream
+
+
+# Frees the resources allocated in allocate_buffers
+def free_buffers(inputs: List[HostDeviceMem], outputs: List[HostDeviceMem], stream: cudart.cudaStream_t):
+    for mem in inputs + outputs:
+        mem.free()
+    cuda_call(cudart.cudaStreamDestroy(stream))
+
+
+# Wrapper for cudaMemcpy which infers copy size and does error checking
+def memcpy_host_to_device(device_ptr: int, host_arr: np.ndarray):
+    nbytes = host_arr.size * host_arr.itemsize
+    cuda_call(cudart.cudaMemcpy(device_ptr, host_arr, nbytes, cudart.cudaMemcpyKind.cudaMemcpyHostToDevice))
+
+# Wrapper for cudaMemcpy which infers copy size and does error checking
+def memcpy_device_to_host(host_arr: np.ndarray, device_ptr: int):
+    nbytes = host_arr.size * host_arr.itemsize
+    cuda_call(cudart.cudaMemcpy(host_arr, device_ptr, nbytes, cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost))
+
+
+def _do_inference_base(inputs, outputs, stream, execute_async_func):
+    # Transfer input data to the GPU.
+    kind = cudart.cudaMemcpyKind.cudaMemcpyHostToDevice
+    [cuda_call(cudart.cudaMemcpyAsync(inp.device, inp.host, inp.nbytes, kind, stream)) for inp in inputs]
+    # Run inference.
+    execute_async_func()
+    # Transfer predictions back from the GPU.
+    kind = cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost
+    [cuda_call(cudart.cudaMemcpyAsync(out.host, out.device, out.nbytes, kind, stream)) for out in outputs]
+    # Synchronize the stream
+    cuda_call(cudart.cudaStreamSynchronize(stream))
+    # Return only the host outputs.
+    return [out.host for out in outputs]
+
+
+# This function is generalized for multiple inputs/outputs.
+# inputs and outputs are expected to be lists of HostDeviceMem objects.
+def do_inference(context, engine, bindings, inputs, outputs, stream):
+    def execute_async_func():
+        context.execute_async_v3(stream_handle=stream)
+    # Setup context tensor address.
+    num_io = engine.num_io_tensors
+    for i in range(num_io):
+        context.set_tensor_address(engine.get_tensor_name(i), bindings[i])
+    return _do_inference_base(inputs, outputs, stream, execute_async_func)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7703d56feb9d480b3fd2c7f4a06ad277082dd93a
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/README.md
@@ -0,0 +1,98 @@
+# DDS Faster R-CNN Object Detection in TensorRT
+## Introduction
+The `dds_faster_rcnn` sample demonstrates the usage of [tensorrt.IOutputAllocator](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Core/ExecutionContext.html#tensorrt.IOutputAllocator) in TensorRT to execute networks with data-dependent shape (DDS) outputs. In this sample, we showcase an end-to-end workflow for building and running an object detection model [Faster-RCNN](https://arxiv.org/abs/1506.01497).
+
+### What are Data-Dependent Shapes (DDS)?
+Data-Dependent Shapes (DDS) refer to shapes of layer outputs in a neural network which depend on the input data to the layer; in other words, it cannot be inferred solely by inspecting the shapes of the layer's input tensors.  An example of this is the output shape of the `INonZeroLayer`, which is determined by the number of non-zero elements in the input tensor.
+
+DDS outputs are common in models that involve dynamic processing, such as object detection, segmentation, and natural language processing.
+
+### What is an `IOutputAllocator`?
+An `IOutputAllocator` is an interface in TensorRT that defines a class responsible for dynamically allocating and managing the device memory for output tensors of a TensorRT engine. The class implementing this interface must provide a way to allocate and deallocate memory for output tensors, which can vary in size depending on the input data.
+
+### Why do we need to implement `IOutputAllocator`
+In traditional models, the output shapes are typically fixed and known at build time. However, in the case of data-dependent shaped (DDS) outputs, the output size is only known at inference time. This means that the memory allocation for output tensors cannot be determined until the model is actually run with a specific input. To handle this situation, TensorRT provides the `IOutputAllocator` interface, which allows developers to implement a custom memory allocation strategy for DDS outputs. By implementing this interface, developers can ensure that the output tensors are properly allocated and deallocated during inference, avoiding potential memory issues and improving the overall performance of the model.
+
+### How does `IOutputAllocator` work?
+To implement the `IOutputAllocator` interface, you need to provide implementations for the following two key methods:
+
+- `reallocate_output_async(self, tensor_name, memory, size, alignment, stream)`: This method is responsible for allocating or reallocating memory for an output tensor. It is called during the inference phase when the output tensor size is known. The method takes in parameters such as the tensor name, current memory address, new size, alignment, and CUDA stream, and returns the new memory address.
+- `notify_shape(self, tensor_name, shape)`: This method is used to notify the allocator of a change in the shape of an output tensor. It is typically called after reallocate_output_async() to update the allocator's internal state with the new shape information.
+During inference, the TensorRT engine will call these methods to manage the memory allocation for DDS output tensors. The `IOutputAllocator` implementation is responsible for ensuring that the memory allocation is properly handled, taking into account factors such as memory fragmentation, alignment, and performance optimization.
+
+Here is a high-level overview of the workflow:
+
+1. Instantiate the output allocator and attach to TensorRT with [IExecutionContext.set_output_allocator()](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Core/ExecutionContext.html#tensorrt.IExecutionContext.set_output_allocator)
+1. The TensorRT engine determines that an output tensor needs to be allocated or reallocated.
+1. `reallocate_output_async` is called to allocate or reallocate memory for the output tensor.
+1. The allocator updates its internal state and returns the new memory address.
+1. The TensorRT engine uses the new memory address to store the output tensor data.
+1. `notify_shape()` method is called to update the allocator's internal state with the new shape information.
+
+By implementing the `IOutputAllocator` interface, developers can create custom memory allocation strategies that optimize performance, reduce memory fragmentation, and improve the overall efficiency of their model inference.
+
+## Setup
+We recommend running these scripts on an environment with TensorRT >= 10.8.0.
+
+Install TensorRT as per the [TensorRT Install Guide](https://docs.nvidia.com/deeplearning/tensorrt/latest/installing-tensorrt/installing.html). You will need to make sure the Python bindings for TensorRT are also installed correctly, these are available by installing the `python3-libnvinfer` and `python3-libnvinfer-dev` packages on your TensorRT download.
+
+To simplify TensorRT installation, use an NGC Docker Image, such as:
+
+```bash
+docker pull nvcr.io/nvidia/tensorrt:25.01-py3
+```
+
+Install all dependencies listed in requirements.txt:
+
+```bash
+pip3 install -r requirements.txt
+```
+
+## Model Conversion
+To start, download the pre-trained Faster R-CNN model in ONNX format using the following command:
+
+```bash
+wget https://github.com/onnx/models/raw/refs/heads/main/validated/vision/object_detection_segmentation/faster-rcnn/model/FasterRCNN-12.onnx
+```
+
+With the ONNX model downloaded, run the following command to prepare it for TensorRT engine conversion:
+
+```bash
+python3 modify_onnx.py \
+    --input ./FasterRCNN-12.onnx \
+    --output ./fasterrcnn12_trt.onnx
+```
+
+This will create a modified ONNX graph file that is ready for conversion to a TensorRT engine.
+
+## Build TensorRT Engine
+
+To build the TensorRT engine, run the following command:
+
+```bash
+python3 build_engine.py \
+    --onnx ./fasterrcnn12_trt.onnx \
+    --engine ./fasterrcnn12_trt.engine
+```
+
+## Inference
+To test the built TensorRT engine, download a test image using the following command:
+
+```bash
+wget https://onnxruntime.ai/images/demo.jpg
+```
+
+Then, run the inference script using the following command:
+
+```
+python3 infer.py \
+    --engine ./fasterrcnn12_trt.engine \
+    --input ./demo.jpg \
+    --output ./output_dir \
+    --labels labels_coco_80.txt
+```
+This will perform object detection on the test image and save the output to the specified directory (`output_dir` in this case).
+
+## Changlog
+- Feb 2025
+    - Initial release
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/build_engine.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ab451447a1c7fb2b6600f3322f7701a7c66c8d9
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/build_engine.py
@@ -0,0 +1,142 @@
+# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import logging
+import argparse
+
+import tensorrt as trt
+
+sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+import common
+
+logging.basicConfig(level=logging.INFO)
+logging.getLogger("EngineBuilder").setLevel(logging.INFO)
+log = logging.getLogger("EngineBuilder")
+
+
+class EngineBuilder:
+    """
+    Parses an ONNX graph and builds a TensorRT engine from it.
+    """
+
+    def __init__(self, verbose=False, workspace=8):
+        """
+        :param verbose: If enabled, a higher verbosity level will be set on the TensorRT logger.
+        :param workspace: Max memory workspace to allow, in Gb.
+        """
+        self.trt_logger = trt.Logger(trt.Logger.INFO)
+        if verbose:
+            self.trt_logger.min_severity = trt.Logger.Severity.VERBOSE
+
+        trt.init_libnvinfer_plugins(self.trt_logger, namespace="")
+
+        self.builder = trt.Builder(self.trt_logger)
+        self.config = self.builder.create_builder_config()
+        one_GiB = 2**30
+        self.config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace * one_GiB)
+        self.network = None
+        self.parser = None
+
+    def create_network(self, onnx_path):
+        """
+        Parse the ONNX graph and create the corresponding TensorRT network definition.
+        :param onnx_path: The path to the ONNX graph to load.
+        """
+
+        self.network = self.builder.create_network(0)
+        self.parser = trt.OnnxParser(self.network, self.trt_logger)
+
+        onnx_path = os.path.realpath(onnx_path)
+        with open(onnx_path, "rb") as f:
+            if not self.parser.parse(f.read()):
+                for error in range(self.parser.num_errors):
+                    log.error(self.parser.get_error(error))
+                raise RuntimeError(
+                    f"Failed to load ONNX file: {onnx_path}. Check the logs for more details or run with --verbose."
+                )
+
+        log.info("Network Description")
+
+        profile = self.builder.create_optimization_profile()
+        profile.set_shape("image", min=(3, 1, 1), opt=(3, 800, 800), max=(3, 800, 1312))
+        self.config.add_optimization_profile(profile)
+
+        inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
+        for input in inputs:
+            log.info(f"Input '{input.name}' with shape {input.shape} and dtype {input.dtype}")
+        outputs = [self.network.get_output(i) for i in range(self.network.num_outputs)]
+        for output in outputs:
+            log.info(f"Output '{output.name}' with shape {output.shape} and dtype {output.dtype}")
+
+    def create_engine(
+        self,
+        engine_path,
+    ):
+        """
+        Build the TensorRT engine and serialize it to disk.
+        :param engine_path: The path where to serialize the engine to.
+        """
+        engine_path = os.path.realpath(engine_path)
+        engine_dir = os.path.dirname(engine_path)
+        os.makedirs(engine_dir, exist_ok=True)
+        log.info(f"Building Engine in {engine_path}")
+
+        inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
+
+        log.info(f"Reading timing cache from file: {args.timing_cache}")
+        common.setup_timing_cache(self.config, args.timing_cache)
+
+        engine_bytes = self.builder.build_serialized_network(self.network, self.config)
+        if engine_bytes is None:
+            raise RuntimeError("Failed to create engine. Check the logs for more details or run with --verbose.")
+
+        log.info(f"Serializing timing cache to file: {args.timing_cache}")
+        common.save_timing_cache(self.config, args.timing_cache)
+
+        with open(engine_path, "wb") as f:
+            log.info(f"Serializing engine to file: {engine_path}")
+            f.write(engine_bytes)
+
+
+def main(args):
+    builder = EngineBuilder(args.verbose, args.workspace)
+    builder.create_network(args.onnx)
+    builder.create_engine(
+        args.engine,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-o", "--onnx", required=True, help="The input ONNX model file to load")
+    parser.add_argument("-e", "--engine", required=True, help="The output path for the TRT engine")
+    parser.add_argument("-v", "--verbose", action="store_true", help="Enable more verbose log output")
+    parser.add_argument(
+        "-w",
+        "--workspace",
+        default=8,
+        type=int,
+        help="The max memory workspace size to allow in Gb, default: 8",
+    )
+    parser.add_argument(
+        "--timing_cache",
+        default="./timing.cache",
+        help="The file path for timing cache, default: ./timing.cache",
+    )
+    args = parser.parse_args()
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/infer.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..154a0e3c82c883a3af109aea9f403ff0bd65aed0
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/infer.py
@@ -0,0 +1,477 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import time
+import argparse
+import numpy as np
+import tensorrt as trt
+from cuda import cuda, cudart
+from PIL import Image
+from pathlib import Path
+import threading
+from visualize import visualize_detections
+
+sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+import common
+from common import cuda_call
+
+
+class AllocatorState:
+    """
+    Represents the state of an allocator for a tensor.
+    """
+
+    def __init__(self, ptr, size, dim=None):
+        """
+        :param ptr: The pointer to the allocated device memory.
+        :param size: The size of the allocated device memory.
+        :param dim: The dimensions of the tensor.
+        """
+        self.ptr = ptr
+        self.size = size
+        self.dim = dim
+        self.lock = threading.Lock()
+
+    def update(self, ptr=None, size=None, dims=None):
+        """
+        Updates the state of the allocator.
+
+        :param ptr: The new pointer to the allocated device memory. If None, the current pointer is not changed.
+        :param size: The new size of the allocated device memory. If None, the current size is not changed.
+        :param dims: The new dimensions of the tensor. If None, the current dimensions are not changed.
+        """
+        with self.lock:
+            if ptr is not None:
+                self.ptr = ptr
+            if size is not None:
+                self.size = size
+            if dims is not None:
+                self.dims = dims
+
+
+class MyOutputAllocator(trt.IOutputAllocator):
+    """
+    Custom output allocator class.
+    """
+
+    def __init__(self, verbose=False):
+        """
+        :param verbose: If True, enables verbose logging.
+        """
+        trt.IOutputAllocator.__init__(self)
+
+        self.lock = threading.Lock()
+        self.states = {}
+        self.verbose = verbose
+
+    def reallocate_output_async(self, tensor_name, current_memory, size, alignment, stream):
+        """
+        Reallocates output memory for the given tensor.
+
+        :param tensor_name: The name of the tensor.
+        :param current_memory: The current device memory pointer.
+        :param size: The new size of the device memory block.
+        :param alignment: The alignment of the device memory block.
+        :param stream: The CUDA stream.
+        :return: The new memory pointer.
+        """
+        size = max(size, 1)
+        ptr = current_memory
+        with self.lock:
+            if tensor_name not in self.states or size > self.states[tensor_name].size:
+                ptr = cuda_call(cudart.cudaMalloc(size))
+                if tensor_name in self.states:
+                    cuda_call(cudart.cudaFree(self.states[tensor_name].ptr))
+                    self.states[tensor_name].update(ptr=ptr, size=size)
+                else:
+                    self.states[tensor_name] = AllocatorState(ptr=ptr, size=size)
+                if self.verbose:
+                    print(f"Reallocated {size} bytes for tensor '{tensor_name}' to {ptr}")
+        return ptr
+
+    def notify_shape(self, tensor_name, dims):
+        """
+        Notifies the allocator of a change in the shape of the tensor.
+
+        :param tensor_name: The name of the tensor.
+        :param dims: The new dimensions of the tensor.
+        """
+        with self.lock:
+            assert tensor_name in self.states, f'Tensor "{tensor_name}" is not in states.'
+            self.states[tensor_name].update(dims=dims)
+            if self.verbose:
+                print(f"Updated shape for tensor '{tensor_name}': {dims}")
+
+    def __del__(self):
+        with self.lock:
+            for tensor_name, item in self.states.items():
+                if item.ptr is not None:
+                    cuda_call(cudart.cudaFree(item.ptr))
+                    if self.verbose:
+                        print(f"Freed memory for tensor '{tensor_name}'")
+            self.states.clear()
+
+
+class PoolAllocator(trt.IGpuAsyncAllocator):
+    """
+    A custom GPU async allocator class that manages memory allocation and deallocation.
+
+    It utilizes the CUDA memory pool API to optimize memory allocation and minimize fragmentation.
+    """
+
+    def __init__(self):
+        """
+        Initializes the PoolAllocator instance.
+
+        Creates a CUDA memory pool with the specified properties and sets the release threshold to the maximum possible value.
+        """
+        trt.IGpuAsyncAllocator.__init__(self)
+
+        pool_props = cudart.cudaMemPoolProps()
+        pool_props.allocType = cudart.cudaMemAllocationType.cudaMemAllocationTypePinned
+        pool_props.handleTypes = cudart.cudaMemAllocationHandleType.cudaMemHandleTypeNone
+        pool_props.location.type = cudart.cudaMemLocationType.cudaMemLocationTypeDevice
+        pool_props.location.id = 0
+
+        self.pool = cuda_call(cudart.cudaMemPoolCreate(pool_props))
+
+        max_threshold = np.uint64(np.iinfo(np.uint64).max)
+        cuda_call(
+            cudart.cudaMemPoolSetAttribute(
+                self.pool, cudart.cudaMemPoolAttr.cudaMemPoolAttrReleaseThreshold, cuda.cuuint64_t(max_threshold)
+            )
+        )
+
+    def allocate_async(self, size: int, alignment: int, flags: int, stream: cudart.cudaStream_t):
+        """
+        Allocates memory asynchronously from the CUDA memory pool.
+
+        :param size: The size of the memory block to allocate.
+        :param alignment: The alignment of the memory block.
+        :param flags: The flags for the allocation.
+        :param stream: The CUDA stream for the allocation.
+        :return: The pointer to the allocated device memory.
+        """
+        ptr = cuda_call(cudart.cudaMallocFromPoolAsync(size, self.pool, stream))
+        return ptr
+
+    def deallocate_async(self, memory, stream: cudart.cudaStream_t):
+        """
+        Deallocates memory asynchronously.
+
+        :param memory: The pointer to the memory to deallocate.
+        :param stream: The CUDA stream for the deallocation.
+        :return: True if the deallocation was successful.
+        """
+        cuda_call(cudart.cudaFreeAsync(memory, stream))
+        return True
+
+    def __del__(self):
+        if self.pool:
+            cuda_call(cudart.cudaMemPoolDestroy(self.pool))
+
+
+class TensorRTInfer:
+    """
+    Implements inference for the FasterRCNN TensorRT engine.
+    """
+
+    def __init__(self, engine_path, use_custom_gpu_allocator=False, verbose=False):
+        """
+        Initializes the TensorRTInfer instance.
+
+        :param engine_path: The path to the serialized engine to load from disk.
+        :param use_custom_gpu_allocator: If True, uses a custom GPU allocator.
+        :param verbose: If True, enables verbose logging.
+        """
+        # Load TRT engine
+        self.logger = trt.Logger(trt.Logger.ERROR)
+        if verbose:
+            self.logger.min_severity = trt.Logger.VERBOSE
+        trt.init_libnvinfer_plugins(self.logger, namespace="")
+        with open(engine_path, "rb") as f, trt.Runtime(self.logger) as runtime:
+            assert runtime
+            if use_custom_gpu_allocator:
+                self.my_pool_allocator = PoolAllocator()
+                runtime.gpu_allocator = self.my_pool_allocator
+            self.engine = runtime.deserialize_cuda_engine(f.read())
+        assert self.engine
+        self.context = self.engine.create_execution_context()
+        assert self.context
+
+        self.my_output_allocator = MyOutputAllocator(verbose=True)
+        # Setup I/O bindings
+        self.inputs = []
+        self.outputs = []
+        self.allocations = []
+        for i in range(self.engine.num_io_tensors):
+            name = self.engine.get_tensor_name(i)
+            is_input = False
+            if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
+                is_input = True
+            dtype = trt.nptype(self.engine.get_tensor_dtype(name))
+
+            # trt.nptype returns a python 'type'. For here we want a numpy 'dtype' object
+            # instead to get more info about the dtype (dtype.itemsize in this case)
+            dtype = np.dtype(dtype)
+            shape = self.engine.get_tensor_shape(name)
+
+            # Use the max shape in the profile for dynamic shaped inputs
+            if is_input and any(value for value in shape if value < 0):
+                assert self.engine.num_optimization_profiles > 0
+                profile_shape = self.engine.get_tensor_profile_shape(name, 0)
+                assert len(profile_shape) == 3  # min,opt,max
+                # Set the *max* profile as binding shape
+                shape = profile_shape[2]
+
+            if is_input:
+                nbytes = np.prod(shape) * dtype.itemsize
+                allocation = cuda_call(cudart.cudaMalloc(nbytes))
+            else:
+                self.context.set_output_allocator(name, self.my_output_allocator)
+                allocation = cuda_call(
+                    cudart.cudaMalloc(128 * dtype.itemsize)
+                )  # Random number. More will be allocated using our custom allocator
+
+            binding = {
+                "index": i,
+                "name": name,
+                "dtype": dtype,
+                "shape": list(shape),
+                "allocation": allocation,
+            }
+            self.allocations.append(allocation)
+            if is_input:
+                self.inputs.append(binding)
+            else:
+                self.outputs.append(binding)
+            print(
+                f"{'Input' if is_input else 'Output'} '{binding['name']}' with shape {binding['shape']} and dtype {binding['dtype']}"
+            )
+
+        assert len(self.inputs) > 0
+        assert len(self.outputs) > 0
+        assert len(self.allocations) > 0
+
+    def input_spec(self):
+        """
+        Get the specs for the input tensor of the network. Useful to prepare memory allocations.
+        :return: Two items, the shape of the input tensor and its (numpy) datatype.
+        """
+        return self.inputs[0]["shape"], self.inputs[0]["dtype"]
+
+    def output_spec(self):
+        """
+        Get the specs for the output tensors of the network. Useful to prepare memory allocations.
+        :return: A list with two items per element, the shape and (numpy) datatype of each output tensor.
+        """
+        specs = []
+        for o in self.outputs:
+            specs.append((o["shape"], o["dtype"]))
+        return specs
+
+    def preprocess_image(self, image):
+        """
+        Preprocesses an image for inference. See also
+        https://github.com/onnx/models/tree/refs/heads/main/validated/vision/object_detection_segmentation/faster-rcnn#preprocessing-steps
+
+        :param image: The image to preprocess.
+        :return: The preprocessed image as a numpy array.
+        """
+        ratio = 800.0 / min(image.size[0], image.size[1])
+        image = image.resize((int(ratio * image.size[0]), int(ratio * image.size[1])), Image.BILINEAR)
+
+        # RGB -> BGR
+        image = np.array(image)[:, :, [2, 1, 0]].astype("float32")
+
+        # HWC -> CHW
+        image = np.transpose(image, [2, 0, 1])
+
+        # Normalize
+        mean_vec = np.array([102.9801, 115.9465, 122.7717])
+        for i in range(image.shape[0]):
+            image[i, :, :] = image[i, :, :] - mean_vec[i]
+
+        # Pad to be divisible of 32
+        padded_h = int(np.ceil(image.shape[1] / 32) * 32)
+        padded_w = int(np.ceil(image.shape[2] / 32) * 32)
+
+        padded_image = np.zeros((3, padded_h, padded_w), dtype=np.float32)
+        padded_image[:, : image.shape[1], : image.shape[2]] = image
+        image = padded_image
+
+        return image
+
+    def infer(self, arr):
+        """
+        Execute inference on an image.
+
+        :param arr: A numpy array for the input image values.
+        :return A list of outputs as numpy arrays.
+        """
+        # Copy I/O and Execute
+        common.memcpy_host_to_device(self.inputs[0]["allocation"], arr)
+        self.context.execute_v2(self.allocations)
+
+        # copy outputs to host
+        return_outputs = []
+        for output in self.outputs:
+            final_shape = self.my_output_allocator.states[output["name"]].dims
+            host_arr = np.random.random(final_shape).astype(output["dtype"])
+            device_ptr = self.my_output_allocator.states[output["name"]].ptr
+
+            nbytes = np.prod(final_shape) * output["dtype"].itemsize
+            common.memcpy_device_to_host(host_arr, device_ptr)
+
+            return_outputs.append(host_arr)
+
+        return return_outputs
+
+    def process(self, arr):
+        """
+        Execute inference on an image. The image should already be preprocessed. Memory
+        copying to and from the GPU device will be performed here.
+
+        :param arr: A numpy array holding the image values.
+        :return: A list of detected object with box, score, class included.
+        """
+        preprocess_arr = self.preprocess_image(arr.copy())
+        self.context.set_input_shape("image", preprocess_arr.shape)
+
+        # Run inference
+        outputs = self.infer(preprocess_arr)
+
+        # Post-process the results
+        scale = 800.0 / min(arr.size[0], arr.size[1])
+
+        boxes = outputs[0]
+        labels = outputs[1]
+        scores = outputs[2]
+        num = len(labels)
+
+        detections = []
+        for i in range(num):
+            if scores[i] > 0.9:
+                detections.append(
+                    {
+                        "xmin": boxes[i][0] / scale,
+                        "ymin": boxes[i][1] / scale,
+                        "xmax": boxes[i][2] / scale,
+                        "ymax": boxes[i][3] / scale,
+                        "score": scores[i],
+                        "class": labels[i] - 1,
+                    }
+                )
+        return detections
+
+
+def main(args):
+    if args.output:
+        args.output.resolve().mkdir(exist_ok=True, parents=True)
+
+    labels = []
+    if args.labels:
+        with open(args.labels) as f:
+            for label in f:
+                labels.append(label.strip())
+
+    trt_infer = TensorRTInfer(args.engine, args.use_custom_gpu_allocator, args.verbose)
+    if args.input:
+        print(f"\nInferring data in {args.input}")
+        image_paths = []
+        if args.input.is_dir():
+            for p in args.input.iterdir():
+                image_paths.append(p)
+        else:
+            image_paths.append(args.input)
+
+        for image_path in image_paths:
+            image = Image.open(image_path)
+            detections = trt_infer.process(image)
+            if args.output:
+                # Image Visualizations
+                output_path = args.output / f"{image_path.stem}.png"
+                visualize_detections(image_path, output_path, detections, labels)
+
+                # Text Results
+                output_results = ""
+                for d in detections:
+                    line = [
+                        d["xmin"],
+                        d["ymin"],
+                        d["xmax"],
+                        d["ymax"],
+                        d["score"],
+                    ]
+                    output_results += "\t".join([str(f) for f in line]) + "\n"
+                with open(args.output / f"{image_path.stem}.txt", "w") as f:
+                    f.write(output_results)
+    else:
+        print("No input provided, running in benchmark mode")
+        shape, dtype = trt_infer.input_spec()
+        batch = 255 * np.random.rand(*shape).astype(dtype)
+        trt_infer.context.set_input_shape("image", (batch.shape))
+        iterations = 200
+        times = []
+        for i in range(20):  # GPU warmup iterations
+            trt_infer.infer(batch)
+        for i in range(iterations):
+            start = time.time()
+            trt_infer.infer(batch)
+            times.append(time.time() - start)
+            print(f"Iteration {i+1} / {iterations}", end="\r")
+        print("Benchmark results include time for H2D and D2H memory copies")
+        print(f"Average Latency: {1000 * np.average(times):.3f} ms")
+
+    print("\nFinished Processing")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-e",
+        "--engine",
+        default=None,
+        required=True,
+        help="The serialized TensorRT engine",
+    )
+    parser.add_argument("-i", "--input", default=None, type=Path, help="Path to the image or directory to process")
+    parser.add_argument(
+        "-o",
+        "--output",
+        default=None,
+        type=Path,
+        help="Directory where to save the visualization results",
+    )
+    parser.add_argument(
+        "-l",
+        "--labels",
+        default="./labels_coco_80.txt",
+        help="File to use for reading the class labels from, default: ./labels_coco_80.txt",
+    )
+    parser.add_argument(
+        "-c",
+        "--use_custom_gpu_allocator",
+        action="store_true",
+        default=False,
+        help="Use a custom gpu allocator with CUDA memory pools for better performance",
+    )
+    parser.add_argument("-v", "--verbose", action="store_true", default=False, help="Set to verbose logging")
+    args = parser.parse_args()
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/labels_coco_80.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/labels_coco_80.txt
new file mode 100644
index 0000000000000000000000000000000000000000..941cb4e1392266f6a6c09b1fdc5f79503b2e5df6
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/labels_coco_80.txt
@@ -0,0 +1,80 @@
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+dining table
+toilet
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/modify_onnx.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/modify_onnx.py
new file mode 100644
index 0000000000000000000000000000000000000000..6844d8148547bcaa443c9a9c76cfcedbbedc3c2b
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/modify_onnx.py
@@ -0,0 +1,56 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import onnx_graphsurgeon as gs
+import onnx
+import numpy as np
+import argparse
+
+
+def modify_maskrcnn_opset12(path_to_model, output_path):
+    graph = gs.import_onnx(onnx.load(path_to_model))
+    """
+        Step 1: Remove unnecessary UINT8 cast
+            - Pattern match Cast[BOOL->UINT8] -> Cast[UINT8 -> BOOL]
+            - Fixes node 2838 - casts bool to uint8 for slice / gather. Can keep all operations in bool.
+    """
+    for node in graph.nodes:
+        if node.op == "Cast" and node.attrs["to"] == onnx.TensorProto.UINT8:
+            node.attrs["to"] = onnx.TensorProto.BOOL
+            node.outputs[0].dtype = np.bool_
+            # Need to modify output_node output to be bool as well.
+            for output_node in node.outputs[0].outputs:
+                output_node.outputs[0].dtype = np.bool_
+            print(f"Removed UINT8 casts in node {node.name}")
+
+    onnx.save(gs.export_onnx(graph.cleanup()), output_path)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-i",
+        "--input",
+        default="FasterRCNN-12.onnx",
+        help="Path to the onnx model obtained from https://github.com/onnx/models/raw/refs/heads/main/validated/vision/object_detection_segmentation/faster-rcnn/model/FasterRCNN-12.onnx",
+    )
+    parser.add_argument(
+        "-o", "--output", default="fasterrcnn12_trt.onnx", help="Desired path for the output onnx model"
+    )
+    args = parser.parse_args()
+
+    modify_maskrcnn_opset12(args.input, args.output)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/requirements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ef6e63395b4e7c71be06c35480654a60e66a00a0
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/requirements.txt
@@ -0,0 +1,6 @@
+pillow
+cuda-python
+onnx
+onnx-graphsurgeon --index-url https://pypi.ngc.nvidia.com
+numpy==1.24.4; python_version <= "3.10"
+numpy==1.26.4; python_version >= "3.11"
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/visualize.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/visualize.py
new file mode 100644
index 0000000000000000000000000000000000000000..39385c2713740487ed579ca8964c11f943678712
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/dds_faster_rcnn/visualize.py
@@ -0,0 +1,199 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+
+import PIL.Image as Image
+import PIL.ImageDraw as ImageDraw
+import PIL.ImageFont as ImageFont
+
+COLORS = [
+    "GoldenRod",
+    "MediumTurquoise",
+    "GreenYellow",
+    "SteelBlue",
+    "DarkSeaGreen",
+    "SeaShell",
+    "LightGrey",
+    "IndianRed",
+    "DarkKhaki",
+    "LawnGreen",
+    "WhiteSmoke",
+    "Peru",
+    "LightCoral",
+    "FireBrick",
+    "OldLace",
+    "LightBlue",
+    "SlateGray",
+    "OliveDrab",
+    "NavajoWhite",
+    "PaleVioletRed",
+    "SpringGreen",
+    "AliceBlue",
+    "Violet",
+    "DeepSkyBlue",
+    "Red",
+    "MediumVioletRed",
+    "PaleTurquoise",
+    "Tomato",
+    "Azure",
+    "Yellow",
+    "Cornsilk",
+    "Aquamarine",
+    "CadetBlue",
+    "CornflowerBlue",
+    "DodgerBlue",
+    "Olive",
+    "Orchid",
+    "LemonChiffon",
+    "Sienna",
+    "OrangeRed",
+    "Orange",
+    "DarkSalmon",
+    "Magenta",
+    "Wheat",
+    "Lime",
+    "GhostWhite",
+    "SlateBlue",
+    "Aqua",
+    "MediumAquaMarine",
+    "LightSlateGrey",
+    "MediumSeaGreen",
+    "SandyBrown",
+    "YellowGreen",
+    "Plum",
+    "FloralWhite",
+    "LightPink",
+    "Thistle",
+    "DarkViolet",
+    "Pink",
+    "Crimson",
+    "Chocolate",
+    "DarkGrey",
+    "Ivory",
+    "PaleGreen",
+    "DarkGoldenRod",
+    "LavenderBlush",
+    "SlateGrey",
+    "DeepPink",
+    "Gold",
+    "Cyan",
+    "LightSteelBlue",
+    "MediumPurple",
+    "ForestGreen",
+    "DarkOrange",
+    "Tan",
+    "Salmon",
+    "PaleGoldenRod",
+    "LightGreen",
+    "LightSlateGray",
+    "HoneyDew",
+    "Fuchsia",
+    "LightSeaGreen",
+    "DarkOrchid",
+    "Green",
+    "Chartreuse",
+    "LimeGreen",
+    "AntiqueWhite",
+    "Beige",
+    "Gainsboro",
+    "Bisque",
+    "SaddleBrown",
+    "Silver",
+    "Lavender",
+    "Teal",
+    "LightCyan",
+    "PapayaWhip",
+    "Purple",
+    "Coral",
+    "BurlyWood",
+    "LightGray",
+    "Snow",
+    "MistyRose",
+    "PowderBlue",
+    "DarkCyan",
+    "White",
+    "Turquoise",
+    "MediumSlateBlue",
+    "PeachPuff",
+    "Moccasin",
+    "LightSalmon",
+    "SkyBlue",
+    "Khaki",
+    "MediumSpringGreen",
+    "BlueViolet",
+    "MintCream",
+    "Linen",
+    "SeaGreen",
+    "HotPink",
+    "LightYellow",
+    "BlanchedAlmond",
+    "RoyalBlue",
+    "RosyBrown",
+    "MediumOrchid",
+    "DarkTurquoise",
+    "LightGoldenRodYellow",
+    "LightSkyBlue",
+]
+
+
+def visualize_detections(image_path, output_path, detections, labels=[]):
+    image = Image.open(image_path).convert(mode="RGB")
+    draw = ImageDraw.Draw(image)
+    line_width = 2
+    font = ImageFont.load_default()
+    for d in detections:
+        color = COLORS[d["class"] % len(COLORS)]
+        draw.line(
+            [
+                (d["xmin"], d["ymin"]),
+                (d["xmin"], d["ymax"]),
+                (d["xmax"], d["ymax"]),
+                (d["xmax"], d["ymin"]),
+                (d["xmin"], d["ymin"]),
+            ],
+            width=line_width,
+            fill=color,
+        )
+        label = f"Class {d['class']}"
+        if d["class"] < len(labels):
+            label = f"{labels[d['class']]}"
+        score = d["score"]
+        text = f"{label}: {int(100*score)}%"
+        if score < 0:
+            text = label
+        left, top, right, bottom = font.getbbox(text)
+        text_width, text_height = right - left, bottom - top
+        text_bottom = max(text_height, d["ymin"])
+        text_left = d["xmin"]
+        margin = np.ceil(0.05 * text_height)
+        draw.rectangle(
+            [
+                (text_left, text_bottom - text_height - 2 * margin),
+                (text_left + text_width, text_bottom),
+            ],
+            fill=color,
+        )
+        draw.text(
+            (text_left + margin, text_bottom - text_height - margin),
+            text,
+            fill="black",
+            font=font,
+        )
+    if output_path is None:
+        return image
+    image.save(output_path)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1d83e74dd8c6450951b1373b287b01430f85fbb0
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/README.md
@@ -0,0 +1,222 @@
+# Detectron 2 Mask R-CNN R50-FPN 3x in TensorRT
+
+Support for Detectron 2 Mask R-CNN R50-FPN 3x model in TensorRT. This script helps with converting, running and validating this model with TensorRT.
+
+## Changelog
+
+- Aug 2023
+  - Update ONNX version support to 1.14.0
+  - Update ONNX Runtime version support to 1.15.1 for Python>=3.8
+  - Removed support for Python versions < 3.8.
+- July 2023:
+  - Update benchmarks and include hardware used.
+- October 2022:
+  - Updated converter to support `tracing` export instead of deprecated `caffe2_tracing`
+
+## Setup
+
+In order for scripts to work we suggest an environment with TensorRT >= 8.4.1.
+
+Install TensorRT as per the [TensorRT Install Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html). You will need to make sure the Python bindings for TensorRT are also installed correctly, these are available by installing the `python3-libnvinfer` and `python3-libnvinfer-dev` packages on your TensorRT download.
+
+Install all dependencies listed in `requirements.txt`:
+
+```
+pip install -r requirements.txt
+```
+Note: this sample cannot be run on Jetson platforms as `torch.distributed` is not available. To check whether your platform supports `torch.distributed`, open a Python shell and confirm that `torch.distributed.is_available()` returns `True`.
+
+## Model Conversion
+
+The workflow to convert Detectron 2 Mask R-CNN R50-FPN 3x model is basically Detectron 2 â†’ ONNX â†’ TensorRT, and so parts of this process require Detectron 2 to be installed. Official export to ONNX is documented [here](https://detectron2.readthedocs.io/en/latest/tutorials/deployment.html).
+
+### Detectron 2 Deployment
+Deployment is done through export model script located in `detectron2/tools/deploy/export_model.py` of Detectron 2 [github](https://github.com/facebookresearch/detectron2). Detectron 2 Mask R-CNN R50-FPN 3x model is dynamic with minimum testing dimension size of 800 and maximum of 1333. TensorRT plug-ins used for conversion of this model do not support dynamic shapes, as a result we have to set both height and width of the input tensor to 1344. 1344 instead of 1333 because model requires both height and width of the input tensor to be divisible by 32. In order to export this model with correct 1344x1344 resolution, we have to make a change to `export_model.py`. Currently lines 160-162:
+
+```
+aug = T.ResizeShortestEdge(
+    [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
+)
+```
+have to be changed to:
+
+```
+aug = T.ResizeShortestEdge(
+    [1344, 1344], 1344
+)
+```
+
+Export script takes `--sample-image` as one of the arguments. Such image is used to adjust input dimensions and dimensions of tensors for the rest of the network. This sample image has to be an image of 1344x1344 dimensions, which contains at least one detectable by model object. My recommendation is to upsample one of COCO dataset images to 1344x1344. Sample command:
+
+```
+python detectron2/tools/deploy/export_model.py \
+    --sample-image 1344x1344.jpg \
+    --config-file detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
+    --export-method tracing \
+    --format onnx \
+    --output ./ \
+    MODEL.WEIGHTS path/to/model_final_f10217.pkl \
+    MODEL.DEVICE cuda
+
+```
+
+Where `--sample-image` is 1344x1344 image; `--config-file` path to Mask R-CNN R50-FPN 3x config, included with detectron2; `MODEL.WEIGHTS` are weights of Mask R-CNN R50-FPN 3x that can be downloaded [here](https://github.com/facebookresearch/detectron2/blob/main/MODEL_ZOO.md). Resulted `model.onnx` will be an input to conversion script.
+
+### Create ONNX Graph
+This is supported Detectron 2 model:
+
+| **Model**                                     | **Resolution** |
+| ----------------------------------------------|----------------|
+| Mask R-CNN R50-FPN 3x                         | 1344x1344      |
+
+If Detectron 2 Mask R-CNN is ready to be converted (i.e. you ran `detectron2/tools/deploy/export_model.py`), run:
+
+```
+python create_onnx.py \
+    --exported_onnx /path/to/model.onnx \
+    --onnx /path/to/converted.onnx \
+    --det2_config /detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
+    --det2_weights /model_final_f10217.pkl \
+    --sample_image any_image.jpg
+```
+
+This will create the file `converted.onnx` which is ready to convert to TensorRT.
+
+It is important to mention that `--sample_image` in this case is used for anchor generation. Detectron 2 ONNX models do not have anchor data inside the graph, so anchors have to be generated "offline". If custom model is used, make sure preprocessing of your model matches what is coded in `get_anchors(self, sample_image)` function.
+
+The script has a few optional arguments, including:
+
+* `--first_nms_threshold [...]` allows overriding the default 1st NMS score threshold parameter, as the runtime latency of the NMS plugin is sensitive to this value. It's a good practice to set this value as high as possible, while still fulfilling your application requirements, to reduce inference latency. In Mask R-CNN this will be a score threshold for Region Proposal Network.
+* `--second_nms_threshold [...]` allows overriding the default 2nd NMS score threshold parameter, further improves the runtime latency of the NMS plugin. It's a good practice to set this value as high as possible, while still fulfilling your application requirements, to reduce inference latency. It will be the second and last NMS.
+* `--batch_size` allows selection of various batch sizes, default is 1.
+
+
+Optionally, you may wish to visualize the resulting ONNX graph with a tool such as [Netron](https://netron.app/).
+
+The input to the graph is a `float32` tensor with the selected input shape, containing RGB pixel data in the range of 0 to 255. All preprocessing will be performed inside the Model graph, so it is not required to further pre-process the input data.
+
+
+The outputs of the graph are the same as the outputs of the [EfficientNMS_TRT](https://github.com/NVIDIA/TensorRT/tree/master/plugin/efficientNMSPlugin) plugin and segmentation head output, name of the last node is `detection_masks`, shape is `[batch_size, max_proposals, mask_height, mask_width]`, dtype is float32.
+
+### Build TensorRT Engine
+
+TensorRT engine can be built directly with `trtexec` using the ONNX graph generated in the previous step. If it's not already in your `$PATH`, the `trtexec` binary is usually found in `/usr/src/tensorrt/bin/trtexec`, depending on your TensorRT installation method. Run:
+
+```
+trtexec --onnx=/path/to/converted.onnx --saveEngine=/path/to/engine.trt --useCudaGraph
+```
+
+However, the script `build_engine.py` is also provided in this repository for convenience, as it has been tailored to Detectron 2 2 Mask R-CNN R50-FPN 3x engine building and INT8 calibration. Run `python3 build_engine.py --help` for details on available settings.
+
+#### FP16 Precision
+
+To build the TensorRT engine file with FP16 precision, run:
+
+```
+python3 build_engine.py \
+    --onnx /path/to/converted.onnx \
+    --engine /path/to/engine.trt \
+    --precision fp16
+```
+
+The file `engine.trt` will be created, which can now be used to infer with TensorRT.
+
+For best results, make sure no other processes are using the GPU during engine build, as it may affect the optimal tactic selection process.
+
+#### INT8 Precision
+
+To build and calibrate an engine for INT8 precision, run:
+
+```
+python3 build_engine.py \
+    --onnx /path/to/converted.onnx \
+    --engine /path/to/engine.trt \
+    --precision int8 \
+    --calib_input /path/to/calibration/images \
+    --calib_cache /path/to/calibration.cache
+```
+
+Where `--calib_input` points to a directory with several thousands of images. For example, this could be a subset of the training or validation datasets that were used for the model. It's important that this data represents the runtime data distribution relatively well, therefore, the more images that are used for calibration, the better accuracy that will be achieved in INT8 precision. For models trained for the [COCO dataset](https://cocodataset.org/#home), we have found that 5,000 images gives a good result.
+
+The `--calib_cache` is optional, and it controls where the calibration cache file will be written to. This is useful to keep a cached copy of the calibration results. Next time you need to build an int8 engine for the same network, if this file exists, the builder will skip the calibration step and use the cached values instead.
+
+#### Benchmark Engine
+
+Optionally, you can obtain execution timing information for the built engine by using the `trtexec` utility, as:
+
+```
+trtexec \
+    --loadEngine=/path/to/engine.trt \
+    --useCudaGraph --noDataTransfers \
+    --iterations=100 --avgRuns=100
+```
+
+An inference benchmark will run, with GPU Compute latency times printed out to the console. Depending on your environment, you should see something similar to:
+
+```
+GPU Compute Time: min = 30.1864 ms, max = 37.0945 ms, mean = 34.481 ms, median = 34.4187 ms, percentile(99%) = 37.0945 ms
+```
+
+Some sample results comparing different data precisions are shown below. The following results were obtained using an RTX A5000 and TensorRT 8.6.1. mAP was evaluated for the COCO val2017 dataset using the instructions in [Evaluate mAP Metric](#evaluate-map-metric).
+
+| **Precision**   | **Latency** | **bbox COCO mAP** | **segm COCO mAP** |
+| ----------------|-------------|-------------------|-------------------|
+| fp32            | 25.89 ms    | 0.402             | 0.368             |
+| fp16            | 13.00 ms    | 0.402             | 0.368             |
+| int8            | 7.29 ms     | 0.399             | 0.366             |
+
+## Inference
+
+For optimal performance, inference should be done in a C++ application that takes advantage of CUDA Graphs to launch the inference request. Alternatively, the TensorRT engine built with this process can also be executed through either [Triton Inference Server](https://developer.nvidia.com/nvidia-triton-inference-server) or [DeepStream SDK](https://developer.nvidia.com/deepstream-sdk).
+
+However, for convenience, a python inference script is provided here for quick testing of the built TensorRT engine.
+
+### Inference in Python
+
+To perform object detection on a set of images with TensorRT, run:
+
+```
+python infer.py \
+    --engine /path/to/engine.trt \
+    --input /path/to/images \
+    --det2_config /detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
+    --output /path/to/output \
+```
+
+Where the input path can be either a single image file, or a directory of jpg/png/bmp images.
+
+The script has a few optional arguments, including:
+* `--nms_threshold` allows overriding the default second NMS score threshold parameter.
+* `--iou_threshold` allows to set IoU threshold for the mask segmentation, default is 0.5.
+
+The detection results will be written out to the specified output directory, consisting of a visualization image, and a tab-separated results file for each input image processed.
+
+#### Sample Images
+
+![infer_1](https://drive.google.com/uc?export=view&id=1AOW9IXqjrU7eVYmaue-pqijNucXmx_s0)
+
+![infer_2](https://drive.google.com/uc?export=view&id=1m1fp2v41DOqKfj423G0-eyKVurrPNYGx)
+
+### Evaluate mAP Metric
+
+Given a validation dataset (such as [COCO val2017 data](http://images.cocodataset.org/zips/val2017.zip)), you can get the mAP metrics for the built TensorRT engine. This will use the mAP metrics tools functions from the [Detectron 2 evaluation](https://github.com/facebookresearch/detectron2/tree/main/detectron2/evaluation) repository. Make sure you follow [Use Builtin Datasets guide](https://detectron2.readthedocs.io/en/latest/tutorials/builtin_datasets.html) to correctly setup COCO or custom dataset. Additionally, run `eval_coco.py` in the same folder where `/datasets` is present, otherwise this error will appear:
+
+```
+FileNotFoundError: [Errno 2] No such file or directory: 'datasets/coco/annotations/instances_val2017.json'
+```
+
+To run evalutions, run:
+
+```
+python eval_coco.py \
+    --engine /path/to/engine.trt \
+    --input /path/to/coco/val2017 \
+    --det2_config /detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \
+    --det2_weights /model_final_f10217.pkl
+```
+
+The script has a few optional arguments, including:
+* `--nms_threshold` allows overriding the default second NMS score threshold parameter.
+* `--iou_threshold` allows to set IoU threshold for the mask segmentation, default is 0.5.
+
+The mAP metric is sensitive to the NMS score threshold used, as using a high threshold will reduce the model recall, resulting in a lower mAP value. It may be a good idea to build separate TensorRT engines for different purposes. That is, one engine with a default threshold (like 0.5) dedicated for mAP validation, and another engine with your application specific threshold (like 0.8) for deployment. This is why we keep the NMS threshold as a configurable parameter in the `create_onnx.py` script.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/build_engine.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..c62b941c1a334b660a9c6e716c6ff347cb037146
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/build_engine.py
@@ -0,0 +1,320 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import logging
+import argparse
+
+import numpy as np
+import tensorrt as trt
+from cuda import cudart
+
+from image_batcher import ImageBatcher
+
+logging.basicConfig(level=logging.INFO)
+logging.getLogger("EngineBuilder").setLevel(logging.INFO)
+log = logging.getLogger("EngineBuilder")
+
+sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+import common
+
+
+class EngineCalibrator(trt.IInt8MinMaxCalibrator):
+    """
+    Implements the INT8 MinMax Calibrator.
+    """
+
+    def __init__(self, cache_file):
+        """
+        :param cache_file: The location of the cache file.
+        """
+        super().__init__()
+        self.cache_file = cache_file
+        self.image_batcher = None
+        self.batch_allocation = None
+        self.batch_generator = None
+
+    def set_image_batcher(self, image_batcher: ImageBatcher):
+        """
+        Define the image batcher to use, if any. If using only the cache file, an image batcher doesn't need
+        to be defined.
+        :param image_batcher: The ImageBatcher object
+        """
+        self.image_batcher = image_batcher
+        self.size = int(
+            np.dtype(self.image_batcher.dtype).itemsize
+            * np.prod(self.image_batcher.shape)
+        )
+        self.batch_allocation = common.cuda_call(cudart.cudaMalloc(self.size))
+        self.batch_generator = self.image_batcher.get_batch()
+
+    def get_batch_size(self):
+        """
+        Overrides from trt.IInt8MinMaxCalibrator.
+        Get the batch size to use for calibration.
+        :return: Batch size.
+        """
+        if self.image_batcher:
+            return self.image_batcher.batch_size
+        return 1
+
+    def get_batch(self, names):
+        """
+        Overrides from trt.IInt8MinMaxCalibrator.
+        Get the next batch to use for calibration, as a list of device memory pointers.
+        :param names: The names of the inputs, if useful to define the order of inputs.
+        :return: A list of int-casted memory pointers.
+        """
+        if not self.image_batcher:
+            return None
+        try:
+            batch, _, _ = next(self.batch_generator)
+            log.info(
+                "Calibrating image {} / {}".format(
+                    self.image_batcher.image_index, self.image_batcher.num_images
+                )
+            )
+            common.memcpy_host_to_device(
+                self.batch_allocation, np.ascontiguousarray(batch)
+            )
+
+            return [int(self.batch_allocation)]
+        except StopIteration:
+            log.info("Finished calibration batches")
+            return None
+
+    def read_calibration_cache(self):
+        """
+        Overrides from trt.IInt8MinMaxCalibrator.
+        Read the calibration cache file stored on disk, if it exists.
+        :return: The contents of the cache file, if any.
+        """
+        if os.path.exists(self.cache_file):
+            with open(self.cache_file, "rb") as f:
+                log.info("Using calibration cache file: {}".format(self.cache_file))
+                return f.read()
+
+    def write_calibration_cache(self, cache):
+        """
+        Overrides from trt.IInt8MinMaxCalibrator.
+        Store the calibration cache to a file on disk.
+        :param cache: The contents of the calibration cache to store.
+        """
+        if self.cache_file is None:
+            return
+        with open(self.cache_file, "wb") as f:
+            log.info("Writing calibration cache data to: {}".format(self.cache_file))
+            f.write(cache)
+
+
+class EngineBuilder:
+    """
+    Parses an ONNX graph and builds a TensorRT engine from it.
+    """
+
+    def __init__(self, verbose=False, workspace=8):
+        """
+        :param verbose: If enabled, a higher verbosity level will be set on the TensorRT logger.
+        :param workspace: Max memory workspace to allow, in Gb.
+        """
+        self.trt_logger = trt.Logger(trt.Logger.INFO)
+        if verbose:
+            self.trt_logger.min_severity = trt.Logger.Severity.VERBOSE
+
+        trt.init_libnvinfer_plugins(self.trt_logger, namespace="")
+
+        self.builder = trt.Builder(self.trt_logger)
+        self.config = self.builder.create_builder_config()
+        self.config.set_memory_pool_limit(
+            trt.MemoryPoolType.WORKSPACE, workspace * (2**30)
+        )
+
+        self.batch_size = None
+        self.network = None
+        self.parser = None
+
+    def create_network(self, onnx_path):
+        """
+        Parse the ONNX graph and create the corresponding TensorRT network definition.
+        :param onnx_path: The path to the ONNX graph to load.
+        """
+        self.network = self.builder.create_network(0)
+        self.parser = trt.OnnxParser(self.network, self.trt_logger)
+
+        onnx_path = os.path.realpath(onnx_path)
+        with open(onnx_path, "rb") as f:
+            if not self.parser.parse(f.read()):
+                log.error("Failed to load ONNX file: {}".format(onnx_path))
+                for error in range(self.parser.num_errors):
+                    log.error(self.parser.get_error(error))
+                sys.exit(1)
+
+        inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
+        outputs = [self.network.get_output(i) for i in range(self.network.num_outputs)]
+
+        log.info("Network Description")
+        for input in inputs:
+            self.batch_size = input.shape[0]
+            log.info(
+                "Input '{}' with shape {} and dtype {}".format(
+                    input.name, input.shape, input.dtype
+                )
+            )
+        for output in outputs:
+            log.info(
+                "Output '{}' with shape {} and dtype {}".format(
+                    output.name, output.shape, output.dtype
+                )
+            )
+        assert self.batch_size > 0
+
+    def create_engine(
+        self,
+        engine_path,
+        precision,
+        config_file,
+        calib_input=None,
+        calib_cache=None,
+        calib_num_images=5000,
+        calib_batch_size=8,
+    ):
+        """
+        Build the TensorRT engine and serialize it to disk.
+        :param engine_path: The path where to serialize the engine to.
+        :param precision: The datatype to use for the engine, either 'fp32', 'fp16', 'int8'.
+        :param calib_input: The path to a directory holding the calibration images.
+        :param calib_cache: The path where to write the calibration cache to, or if it already exists, load it from.
+        :param calib_num_images: The maximum number of images to use for calibration.
+        :param calib_batch_size: The batch size to use for the calibration process.
+        """
+        engine_path = os.path.realpath(engine_path)
+        engine_dir = os.path.dirname(engine_path)
+        os.makedirs(engine_dir, exist_ok=True)
+        log.info("Building {} Engine in {}".format(precision, engine_path))
+
+        inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
+
+        if precision in ["fp16", "int8"]:
+            if not self.builder.platform_has_fast_fp16:
+                log.warning("FP16 is not supported natively on this platform/device")
+            self.config.set_flag(trt.BuilderFlag.FP16)
+        if precision in ["int8"]:
+            if not self.builder.platform_has_fast_int8:
+                log.warning("INT8 is not supported natively on this platform/device")
+            self.config.set_flag(trt.BuilderFlag.INT8)
+            self.config.int8_calibrator = EngineCalibrator(calib_cache)
+            if calib_cache is None or not os.path.exists(calib_cache):
+                calib_shape = [calib_batch_size] + list(inputs[0].shape[1:])
+                calib_dtype = trt.nptype(inputs[0].dtype)
+                self.config.int8_calibrator.set_image_batcher(
+                    ImageBatcher(
+                        calib_input,
+                        calib_shape,
+                        calib_dtype,
+                        max_num_images=calib_num_images,
+                        exact_batches=True,
+                        config_file=config_file,
+                    )
+                )
+
+        engine_bytes = self.builder.build_serialized_network(self.network, self.config)
+        if engine_bytes is None:
+            log.error("Failed to create engine")
+            sys.exit(1)
+
+        with open(engine_path, "wb") as f:
+            log.info("Serializing engine to file: {:}".format(engine_path))
+            f.write(engine_bytes)
+
+
+def main(args):
+    builder = EngineBuilder(args.verbose, args.workspace)
+    builder.create_network(args.onnx)
+    builder.create_engine(
+        args.engine,
+        args.precision,
+        args.det2_config,
+        args.calib_input,
+        args.calib_cache,
+        args.calib_num_images,
+        args.calib_batch_size,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-o", "--onnx", help="The input ONNX model file to load")
+    parser.add_argument("-e", "--engine", help="The output path for the TRT engine")
+    parser.add_argument(
+        "-c",
+        "--det2_config",
+        default=None,
+        help="The Detectron 2 config file (.yaml) for the model",
+        type=str,
+    )
+    parser.add_argument(
+        "-p",
+        "--precision",
+        default="fp16",
+        choices=["fp32", "fp16", "int8"],
+        help="The precision mode to build in, either fp32/fp16/int8, default: 'fp16'",
+    )
+    parser.add_argument(
+        "-v", "--verbose", action="store_true", help="Enable more verbose log output"
+    )
+    parser.add_argument(
+        "-w",
+        "--workspace",
+        default=1,
+        type=int,
+        help="The max memory workspace size to allow in Gb, " "default: 1",
+    )
+    parser.add_argument(
+        "--calib_input", help="The directory holding images to use for calibration"
+    )
+    parser.add_argument(
+        "--calib_cache",
+        default="./calibration.cache",
+        help="The file path for INT8 calibration cache to use, default: ./calibration.cache",
+    )
+    parser.add_argument(
+        "--calib_num_images",
+        default=5000,
+        type=int,
+        help="The maximum number of images to use for calibration, default: 5000",
+    )
+    parser.add_argument(
+        "--calib_batch_size",
+        default=8,
+        type=int,
+        help="The batch size for the calibration process, default: 8",
+    )
+    args = parser.parse_args()
+    if not all([args.onnx, args.engine]):
+        parser.print_help()
+        log.error("These arguments are required: --onnx and --engine")
+        sys.exit(1)
+    if args.precision in ["int8"] and not (
+        args.calib_input or os.path.exists(args.calib_cache)
+    ):
+        parser.print_help()
+        log.error(
+            "When building in int8 precision, --calib_input or an existing --calib_cache file is required"
+        )
+        sys.exit(1)
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/create_onnx.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/create_onnx.py
new file mode 100644
index 0000000000000000000000000000000000000000..478ead7500def219ca2891d97e2c254442750192
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/create_onnx.py
@@ -0,0 +1,874 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import re
+import sys
+import argparse
+import logging
+import cv2
+import onnx_graphsurgeon as gs
+import numpy as np
+import onnx
+from onnx import shape_inference
+import torch
+
+try:
+    from detectron2.engine.defaults import DefaultPredictor
+    from detectron2.modeling import build_model
+    from detectron2.config import get_cfg
+    from detectron2.structures import ImageList
+except ImportError:
+    print("Could not import Detectron 2 modules. Maybe you did not install Detectron 2")
+    print(
+        "Please install Detectron 2, check https://github.com/facebookresearch/detectron2/blob/main/INSTALL.md"
+    )
+    sys.exit(1)
+
+import onnx_utils
+
+logging.basicConfig(level=logging.INFO)
+logging.getLogger("ModelHelper").setLevel(logging.INFO)
+log = logging.getLogger("ModelHelper")
+
+
+class DET2GraphSurgeon:
+    def __init__(self, saved_model_path, config_file, weights):
+        """
+        Constructor of the Model Graph Surgeon object, to do the conversion of a Detectron 2 Mask R-CNN exported model
+        to an ONNX-TensorRT parsable model.
+        :param saved_model_path: The path pointing to the exported Detectron 2 Mask R-CNN ONNX model.
+        :param config_file: The path pointing to the Detectron 2 yaml file which describes the model.
+        :param config_file: Weights to load for the Detectron 2 model.
+        """
+
+        def det2_setup(config_file, weights):
+            """
+            Create configs and perform basic setups.
+            """
+            cfg = get_cfg()
+            cfg.merge_from_file(config_file)
+            cfg.merge_from_list(["MODEL.WEIGHTS", weights])
+            cfg.freeze()
+            return cfg
+
+        # Import exported Detectron 2 Mask R-CNN ONNX model as GraphSurgeon object.
+        self.graph = gs.import_onnx(onnx.load(saved_model_path))
+        assert self.graph
+        log.info("ONNX graph loaded successfully")
+
+        # Fold constants via ONNX-GS that exported script might've missed.
+        self.graph.fold_constants()
+
+        # Set up Detectron 2 model configuration.
+        self.det2_cfg = det2_setup(config_file, weights)
+
+        # Getting model characteristics.
+        self.fpn_out_channels = self.det2_cfg.MODEL.FPN.OUT_CHANNELS
+        self.num_classes = self.det2_cfg.MODEL.ROI_HEADS.NUM_CLASSES
+        self.first_NMS_max_proposals = self.det2_cfg.MODEL.RPN.POST_NMS_TOPK_TEST
+        self.first_NMS_iou_threshold = self.det2_cfg.MODEL.RPN.NMS_THRESH
+        self.first_NMS_score_threshold = 0.01
+        self.first_ROIAlign_pooled_size = (
+            self.det2_cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
+        )
+        self.first_ROIAlign_sampling_ratio = (
+            self.det2_cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
+        )
+        self.first_ROIAlign_type = self.det2_cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
+        self.second_NMS_max_proposals = self.det2_cfg.TEST.DETECTIONS_PER_IMAGE
+        self.second_NMS_iou_threshold = self.det2_cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST
+        self.second_NMS_score_threshold = (
+            self.det2_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST
+        )
+        self.second_ROIAlign_pooled_size = (
+            self.det2_cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
+        )
+        self.second_ROIAlign_sampling_ratio = (
+            self.det2_cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
+        )
+        self.second_ROIAlign_type = self.det2_cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE
+        self.mask_out_res = 28
+
+        # Model characteristics.
+        log.info("Number of FPN output channels is {}".format(self.fpn_out_channels))
+        log.info("Number of classes is {}".format(self.num_classes))
+        log.info("First NMS max proposals is {}".format(self.first_NMS_max_proposals))
+        log.info("First NMS iou threshold is {}".format(self.first_NMS_iou_threshold))
+        log.info(
+            "First NMS score threshold is {}".format(self.first_NMS_score_threshold)
+        )
+        log.info("First ROIAlign type is {}".format(self.first_ROIAlign_type))
+        log.info(
+            "First ROIAlign pooled size is {}".format(self.first_ROIAlign_pooled_size)
+        )
+        log.info(
+            "First ROIAlign sampling ratio is {}".format(
+                self.first_ROIAlign_sampling_ratio
+            )
+        )
+        log.info("Second NMS max proposals is {}".format(self.second_NMS_max_proposals))
+        log.info("Second NMS iou threshold is {}".format(self.second_NMS_iou_threshold))
+        log.info(
+            "Second NMS score threshold is {}".format(self.second_NMS_score_threshold)
+        )
+        log.info("Second ROIAlign type is {}".format(self.second_ROIAlign_type))
+        log.info(
+            "Second ROIAlign pooled size is {}".format(self.second_ROIAlign_pooled_size)
+        )
+        log.info(
+            "Second ROIAlign sampling ratio is {}".format(
+                self.second_ROIAlign_sampling_ratio
+            )
+        )
+        log.info(
+            "Individual mask output resolution is {}x{}".format(
+                self.mask_out_res, self.mask_out_res
+            )
+        )
+
+        self.batch_size = None
+
+    def sanitize(self):
+        """
+        Sanitize the graph by cleaning any unconnected nodes, do a topological resort, and fold constant inputs values.
+        When possible, run shape inference on the ONNX graph to determine tensor shapes.
+        """
+
+        for i in range(3):
+            count_before = len(self.graph.nodes)
+            self.graph.cleanup().toposort()
+            try:
+                for node in self.graph.nodes:
+                    for o in node.outputs:
+                        o.shape = None
+                model = gs.export_onnx(self.graph)
+                model = shape_inference.infer_shapes(model)
+                self.graph = gs.import_onnx(model)
+            except Exception as e:
+                log.info(
+                    "Shape inference could not be performed at this time:\n{}".format(e)
+                )
+            try:
+                self.graph.fold_constants(fold_shapes=True)
+            except TypeError as e:
+                log.error(
+                    "This version of ONNX GraphSurgeon does not support folding shapes, please upgrade your "
+                    "onnx_graphsurgeon module. Error:\n{}".format(e)
+                )
+                raise
+
+            count_after = len(self.graph.nodes)
+            if count_before == count_after:
+                # No new folding occurred in this iteration, so we can stop for now.
+                break
+
+    def get_anchors(self, sample_image):
+        """
+        Detectron 2 exported ONNX does not contain anchors required for efficientNMS plug-in, so they must be generated
+        "offline" by calling actual Detectron 2 model and getting anchors from it.
+        :param sample_image: Sample image required to run through the model and obtain anchors.
+        Can be any image from a dataset. Make sure listed here Detectron 2 preprocessing steps
+        actually match your preprocessing steps. Otherwise, behavior can be unpredictable.
+        Additionally, anchors have to be generated for a fixed input dimensions,
+        meaning as soon as image leaves a preprocessor and enters predictor.model.backbone() it must have
+        a fixed dimension (1344x1344 in my case) that every single image in dataset must follow, since currently
+        TensorRT plug-ins do not support dynamic shapes.
+        """
+        # Get Detectron 2 model config and build it.
+        predictor = DefaultPredictor(self.det2_cfg)
+        model = build_model(self.det2_cfg)
+
+        # Image preprocessing.
+        input_im = cv2.imread(sample_image)
+        raw_height, raw_width = input_im.shape[:2]
+        image = predictor.aug.get_transform(input_im).apply_image(input_im)
+        image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
+
+        # Model preprocessing.
+        inputs = [{"image": image, "height": raw_height, "width": raw_width}]
+        images = [x["image"].to(model.device) for x in inputs]
+        images = [(x - model.pixel_mean) / model.pixel_std for x in images]
+        imagelist_images = ImageList.from_tensors(images, 1344)
+
+        # Get feature maps from backbone.
+        features = predictor.model.backbone(imagelist_images.tensor)
+
+        # Get proposals from Region Proposal Network and obtain anchors from anchor generator.
+        features = [features[f] for f in predictor.model.proposal_generator.in_features]
+        det2_anchors = predictor.model.proposal_generator.anchor_generator(features)
+
+        # Extract anchors based on feature maps in ascending order (P2->P6).
+        p2_anchors = det2_anchors[0].tensor.detach().cpu().numpy()
+        p3_anchors = det2_anchors[1].tensor.detach().cpu().numpy()
+        p4_anchors = det2_anchors[2].tensor.detach().cpu().numpy()
+        p5_anchors = det2_anchors[3].tensor.detach().cpu().numpy()
+        p6_anchors = det2_anchors[4].tensor.detach().cpu().numpy()
+        final_anchors = np.concatenate(
+            (p2_anchors, p3_anchors, p4_anchors, p5_anchors, p6_anchors)
+        )
+
+        return final_anchors
+
+    def save(self, output_path):
+        """
+        Save the ONNX model to the given location.
+        :param output_path: Path pointing to the location where to write out the updated ONNX model.
+        """
+        self.graph.cleanup().toposort()
+        model = gs.export_onnx(self.graph)
+        output_path = os.path.realpath(output_path)
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        onnx.save(model, output_path)
+        log.info("Saved ONNX model to {}".format(output_path))
+
+    def update_preprocessor(self, batch_size):
+        """
+        Remove all the pre-processing nodes in the ONNX graph and leave only the image normalization essentials.
+        :param batch_size: The batch size to use for the ONNX graph.
+        """
+        # Set graph inputs.
+        self.batch_size = batch_size
+        self.height = self.graph.inputs[0].shape[1]
+        self.width = self.graph.inputs[0].shape[2]
+
+        input_shape = [self.batch_size, 3, self.height, self.width]
+        self.graph.inputs[0].shape = input_shape
+        self.graph.inputs[0].dtype = np.float32
+        self.graph.inputs[0].name = "input_tensor"
+
+        self.sanitize()
+        log.info(
+            "ONNX graph input shape: {} [NCHW format set]".format(
+                self.graph.inputs[0].shape
+            )
+        )
+
+        # Find the initial nodes of the graph, whatever the input is first connected to, and disconnect them.
+        for node in [
+            node for node in self.graph.nodes if self.graph.inputs[0] in node.inputs
+        ]:
+            node.inputs.clear()
+
+        # Get input tensor.
+        input_tensor = self.graph.inputs[0]
+
+        # Create preprocessing Sub node and connect input tensor to it.
+        sub_const = np.expand_dims(
+            np.asarray([255 * 0.406, 255 * 0.456, 255 * 0.485], dtype=np.float32),
+            axis=(1, 2),
+        )
+        sub_out = self.graph.op_with_const(
+            "Sub", "preprocessor/mean", input_tensor, sub_const
+        )
+
+        # Find first Div node and connect to output of Sub node.
+        div_node = self.graph.find_node_by_op("Div")
+        log.info("Found {} node".format(div_node.op))
+        div_node.inputs[0] = sub_out[0]
+
+        # Find first Conv and connect preprocessor directly to it.
+        conv_node = self.graph.find_node_by_op("Conv")
+        log.info("Found {} node".format(conv_node.op))
+        conv_node.inputs[0] = div_node.outputs[0]
+
+        # Reshape nodes tend to update the batch dimension to a fixed value of 1, they should use the batch size instead.
+        for node in [node for node in self.graph.nodes if node.op == "Reshape"]:
+            if type(node.inputs[1]) == gs.Constant and node.inputs[1].values[0] == 1:
+                node.inputs[1].values[0] = self.batch_size
+
+    def NMS(
+        self,
+        boxes,
+        scores,
+        anchors,
+        background_class,
+        score_activation,
+        max_proposals,
+        iou_threshold,
+        nms_score_threshold,
+        user_threshold,
+        nms_name=None,
+    ):
+        # Helper function to create the NMS Plugin node with the selected inputs.
+        # EfficientNMS_TRT TensorRT Plugin is suitable for our use case.
+        # :param boxes: The box predictions from the Box Net.
+        # :param scores: The class predictions from the Class Net.
+        # :param anchors: The default anchor coordinates.
+        # :param background_class: The label ID for the background class.
+        # :param max_proposals: Number of proposals made by NMS.
+        # :param score_activation: If set to True - apply sigmoid activation to the confidence scores during NMS operation,
+        # if false - no activation.
+        # :param iou_threshold: NMS intersection over union threshold, given by self.det2_cfg.
+        # :param nms_score_threshold: NMS score threshold, given by self.det2_cfg.
+        # :param user_threshold: User's given threshold to overwrite default NMS score threshold.
+        # :param nms_name: Name of NMS node in a graph, renames NMS elements accordingly in order to eliminate cycles.
+
+        if nms_name is None:
+            nms_name = ""
+        else:
+            nms_name = "_" + nms_name
+
+        # Set score threshold.
+        score_threshold = (
+            nms_score_threshold if user_threshold is None else user_threshold
+        )
+
+        # NMS Outputs.
+        nms_output_num_detections = gs.Variable(
+            name="num_detections" + nms_name, dtype=np.int32, shape=[self.batch_size, 1]
+        )
+        nms_output_boxes = gs.Variable(
+            name="detection_boxes" + nms_name,
+            dtype=np.float32,
+            shape=[self.batch_size, max_proposals, 4],
+        )
+        nms_output_scores = gs.Variable(
+            name="detection_scores" + nms_name,
+            dtype=np.float32,
+            shape=[self.batch_size, max_proposals],
+        )
+        nms_output_classes = gs.Variable(
+            name="detection_classes" + nms_name,
+            dtype=np.int32,
+            shape=[self.batch_size, max_proposals],
+        )
+
+        nms_outputs = [
+            nms_output_num_detections,
+            nms_output_boxes,
+            nms_output_scores,
+            nms_output_classes,
+        ]
+
+        # Plugin.
+        self.graph.plugin(
+            op="EfficientNMS_TRT",
+            name="nms" + nms_name,
+            inputs=[boxes, scores, anchors],
+            outputs=nms_outputs,
+            attrs={
+                "plugin_version": "1",
+                "background_class": background_class,
+                "max_output_boxes": max_proposals,
+                "score_threshold": max(0.01, score_threshold),
+                "iou_threshold": iou_threshold,
+                "score_activation": score_activation,
+                "class_agnostic": False,
+                "box_coding": 1,
+            },
+        )
+        log.info("Created nms{} with EfficientNMS_TRT plugin".format(nms_name))
+
+        return nms_outputs
+
+    def ROIAlign(
+        self,
+        rois,
+        p2,
+        p3,
+        p4,
+        p5,
+        pooled_size,
+        sampling_ratio,
+        roi_align_type,
+        num_rois,
+        ra_name,
+    ):
+        # Helper function to create the ROIAlign Plugin node with the selected inputs.
+        # PyramidROIAlign_TRT TensorRT Plugin is suitable for our use case.
+        # :param rois: Regions of interest/detection boxes outputs from preceding NMS node.
+        # :param p2: Output of p2 feature map.
+        # :param p3: Output of p3 feature map.
+        # :param p4: Output of p4 feature map.
+        # :param p5: Output of p5 feature map.
+        # :param pooled_size: Pooled output dimensions.
+        # :param sampling_ratio: Number of sampling points in the interpolation grid used to compute the output value of each pooled output bin.
+        # :param roi_align_type: Type of Detectron 2 ROIAlign op, either ROIAlign (vanilla) or ROIAlignV2 (0.5 coordinate offset).
+        # :param num_rois: Number of ROIs resulting from ROIAlign operation.
+        # :param ra_name: Name of ROIAlign node in a graph, renames ROIAlign elements accordingly in order to eliminate cycles.
+
+        # Different types of Detectron 2's ROIAlign ops require coordinate offset that is supported by PyramidROIAlign_TRT.
+        if roi_align_type == "ROIAlignV2":
+            roi_coords_transform = 2
+        elif roi_align_type == "ROIAlign":
+            roi_coords_transform = 0
+
+        # ROIAlign outputs.
+        roi_align_output = gs.Variable(
+            name="roi_align/output_" + ra_name,
+            dtype=np.float32,
+            shape=[
+                self.batch_size,
+                num_rois,
+                self.fpn_out_channels,
+                pooled_size,
+                pooled_size,
+            ],
+        )
+
+        # Plugin.
+        self.graph.plugin(
+            op="PyramidROIAlign_TRT",
+            name="roi_align_" + ra_name,
+            inputs=[rois, p2, p3, p4, p5],
+            outputs=[roi_align_output],
+            attrs={
+                "plugin_version": "1",
+                "fpn_scale": 224,
+                "pooled_size": pooled_size,
+                "image_size": [self.height, self.width],
+                "roi_coords_absolute": 0,
+                "roi_coords_swap": 0,
+                "roi_coords_transform": roi_coords_transform,
+                "sampling_ratio": sampling_ratio,
+            },
+        )
+        log.info("Created {} with PyramidROIAlign_TRT plugin".format(ra_name))
+
+        return roi_align_output
+
+    def process_graph(
+        self, anchors, first_nms_threshold=None, second_nms_threshold=None
+    ):
+        """
+        Processes the graph to replace the GenerateProposals and BoxWithNMSLimit operations with EfficientNMS_TRT
+        TensorRT plugin nodes and ROIAlign operations with PyramidROIAlign_TRT plugin nodes.
+        :param anchors: Anchors generated from sample image "offline" by Detectron 2, since anchors are not provided
+        inside the graph.
+        :param first_nms_threshold: Override the 1st NMS score threshold value. If set to None, use the value in the graph.
+        :param second_nms_threshold: Override the 2nd NMS score threshold value. If set to None, use the value in the graph.
+        """
+
+        def backbone():
+            """
+            Updates the graph to replace all ResizeNearest ops with ResizeNearest plugins in backbone.
+            """
+            # Get final backbone outputs.
+            p2 = self.graph.find_node_by_op_name("Conv", "/backbone/fpn_output2/Conv")
+            p3 = self.graph.find_node_by_op_name("Conv", "/backbone/fpn_output3/Conv")
+            p4 = self.graph.find_node_by_op_name("Conv", "/backbone/fpn_output4/Conv")
+            p5 = self.graph.find_node_by_op_name("Conv", "/backbone/fpn_output5/Conv")
+
+            return p2.outputs[0], p3.outputs[0], p4.outputs[0], p5.outputs[0]
+
+        def proposal_generator(anchors, first_nms_threshold):
+            """
+            Updates the graph to replace all GenerateProposals Caffe ops with one single NMS for proposals generation.
+            :param anchors: Anchors generated from sample image "offline" by Detectron 2, since anchors are not provided
+            inside the graph
+            :param first_nms_threshold: Override the 1st NMS score threshold value. If set to None, use the value in the graph.
+            """
+            # Get nodes containing final objectness logits.
+            p2_logits = self.graph.find_node_by_op_name(
+                "Flatten", "/proposal_generator/Flatten"
+            )
+            p3_logits = self.graph.find_node_by_op_name(
+                "Flatten", "/proposal_generator/Flatten_1"
+            )
+            p4_logits = self.graph.find_node_by_op_name(
+                "Flatten", "/proposal_generator/Flatten_2"
+            )
+            p5_logits = self.graph.find_node_by_op_name(
+                "Flatten", "/proposal_generator/Flatten_3"
+            )
+            p6_logits = self.graph.find_node_by_op_name(
+                "Flatten", "/proposal_generator/Flatten_4"
+            )
+
+            # Get nodes containing final anchor_deltas.
+            p2_anchors = self.graph.find_node_by_op_name(
+                "Reshape", "/proposal_generator/Reshape_1"
+            )
+            p3_anchors = self.graph.find_node_by_op_name(
+                "Reshape", "/proposal_generator/Reshape_3"
+            )
+            p4_anchors = self.graph.find_node_by_op_name(
+                "Reshape", "/proposal_generator/Reshape_5"
+            )
+            p5_anchors = self.graph.find_node_by_op_name(
+                "Reshape", "/proposal_generator/Reshape_7"
+            )
+            p6_anchors = self.graph.find_node_by_op_name(
+                "Reshape", "/proposal_generator/Reshape_9"
+            )
+
+            # Concatenate all objectness logits/scores data.
+            scores_inputs = [
+                p2_logits.outputs[0],
+                p3_logits.outputs[0],
+                p4_logits.outputs[0],
+                p5_logits.outputs[0],
+                p6_logits.outputs[0],
+            ]
+            scores_tensor = self.graph.layer(
+                name="scores",
+                op="Concat",
+                inputs=scores_inputs,
+                outputs=["scores"],
+                attrs={"axis": 1},
+            )[0]
+            # Unsqueeze to add 3rd dimension of 1 to match tensor dimensions of boxes tensor.
+            scores = self.graph.unsqueeze("scores_unsqueeze", scores_tensor, [2])[0]
+
+            # Concatenate all boxes/anchor_delta data.
+            boxes_inputs = [
+                p2_anchors.outputs[0],
+                p3_anchors.outputs[0],
+                p4_anchors.outputs[0],
+                p5_anchors.outputs[0],
+                p6_anchors.outputs[0],
+            ]
+            boxes = self.graph.layer(
+                name="boxes",
+                op="Concat",
+                inputs=boxes_inputs,
+                outputs=["anchors"],
+                attrs={"axis": 1},
+            )[0]
+
+            # Convert the anchors from Corners to CenterSize encoding.
+            anchors = np.matmul(
+                anchors,
+                [[0.5, 0, -1, 0], [0, 0.5, 0, -1], [0.5, 0, 1, 0], [0, 0.5, 0, 1]],
+            )
+            anchors = anchors / [
+                self.width,
+                self.height,
+                self.width,
+                self.height,
+            ]  # Normalize anchors to [0-1] range
+            anchors = np.expand_dims(anchors, axis=0)
+            anchors = anchors.astype(np.float32)
+            anchors = gs.Constant(name="default_anchors", values=anchors)
+
+            # Create NMS node.
+            nms_outputs = self.NMS(
+                boxes,
+                scores,
+                anchors,
+                -1,
+                False,
+                self.first_NMS_max_proposals,
+                self.first_NMS_iou_threshold,
+                self.first_NMS_score_threshold,
+                first_nms_threshold,
+                "rpn",
+            )
+
+            return nms_outputs
+
+        def roi_heads(rpn_outputs, p2, p3, p4, p5, second_nms_threshold):
+            """
+            Updates the graph to replace all ROIAlign Caffe ops with one single pyramid ROIAlign. Eliminates CollectRpnProposals
+            DistributeFpnProposals and BatchPermutation nodes that are not supported by TensorRT. Connects pyramid ROIAlign to box_head
+            and connects box_head to final box head outputs in a form of second NMS. In order to implement mask head outputs,
+            similar steps as in box_pooler are performed to replace mask_pooler. Finally, reimplemented mask_pooler is connected to
+            mask_head and mask head outputs are produced.
+            :param rpn_outputs: Outputs of the first NMS/proposal generator.
+            :param p2: Output of p2 feature map, required for ROIAlign operation.
+            :param p3: Output of p3 feature map, required for ROIAlign operation.
+            :param p4: Output of p4 feature map, required for ROIAlign operation.
+            :param p5: Output of p5 feature map, required for ROIAlign operation.
+            :param second_nms_threshold: Override the 2nd NMS score threshold value. If set to None, use the value in the graph.
+            """
+            # Create ROIAlign node.
+            box_pooler_output = self.ROIAlign(
+                rpn_outputs[1],
+                p2,
+                p3,
+                p4,
+                p5,
+                self.first_ROIAlign_pooled_size,
+                self.first_ROIAlign_sampling_ratio,
+                self.first_ROIAlign_type,
+                self.first_NMS_max_proposals,
+                "box_pooler",
+            )
+
+            # Reshape node that prepares ROIAlign/box pooler output for Gemm node that comes next.
+            box_pooler_shape = np.asarray(
+                [
+                    -1,
+                    self.fpn_out_channels
+                    * self.first_ROIAlign_pooled_size
+                    * self.first_ROIAlign_pooled_size,
+                ],
+                dtype=np.int64,
+            )
+            box_pooler_reshape = self.graph.op_with_const(
+                "Reshape", "box_pooler/reshape", box_pooler_output, box_pooler_shape
+            )
+
+            # Get first Gemm op of box head and connect box pooler to it.
+            first_box_head_gemm = self.graph.find_node_by_op_name(
+                "Gemm", "/roi_heads/box_head/fc1/Gemm"
+            )
+            first_box_head_gemm.inputs[0] = box_pooler_reshape[0]
+
+            # Get final two nodes of box predictor. Softmax op for cls_score, Gemm op for bbox_pred.
+            cls_score = self.graph.find_node_by_op_name("Softmax", "/roi_heads/Softmax")
+            bbox_pred = self.graph.find_node_by_op_name(
+                "Gemm", "/roi_heads/box_predictor/bbox_pred/Gemm"
+            )
+
+            # Linear transformation to convert box coordinates from (TopLeft, BottomRight) Corner encoding
+            # to CenterSize encoding. 1st NMS boxes are multiplied by transformation matrix in order to
+            # encode it into CenterSize format.
+            matmul_const = np.matrix(
+                "0.5 0 -1 0; 0 0.5 0 -1; 0.5 0 1 0; 0 0.5 0 1", dtype=np.float32
+            )
+            matmul_out = self.graph.matmul(
+                "RPN_NMS/detection_boxes_conversion", rpn_outputs[1], matmul_const
+            )
+
+            # Reshape node that prepares bbox_pred for scaling and second NMS.
+            bbox_pred_shape = np.asarray(
+                [self.batch_size, self.first_NMS_max_proposals, self.num_classes, 4],
+                dtype=np.int64,
+            )
+            bbox_pred_reshape = self.graph.op_with_const(
+                "Reshape", "bbox_pred/reshape", bbox_pred.outputs[0], bbox_pred_shape
+            )
+
+            # 0.1, 0.1, 0.2, 0.2 are localization head variance numbers, they scale bbox_pred_reshape, in order to get accurate coordinates.
+            scale_adj = np.expand_dims(
+                np.asarray([0.1, 0.1, 0.2, 0.2], dtype=np.float32), axis=(0, 1)
+            )
+            final_bbox_pred = self.graph.op_with_const(
+                "Mul", "bbox_pred/scale", bbox_pred_reshape[0], scale_adj
+            )
+
+            # Reshape node that prepares cls_score for slicing and second NMS.
+            cls_score_shape = np.array(
+                [self.batch_size, self.first_NMS_max_proposals, self.num_classes + 1],
+                dtype=np.int64,
+            )
+            cls_score_reshape = self.graph.op_with_const(
+                "Reshape", "cls_score/reshape", cls_score.outputs[0], cls_score_shape
+            )
+
+            # Slice operation to adjust third dimension of cls_score tensor, deletion of background class (81 in Detectron 2).
+            final_cls_score = self.graph.slice(
+                "cls_score/slicer", cls_score_reshape[0], 0, self.num_classes, 2
+            )
+
+            # Create NMS node.
+            nms_outputs = self.NMS(
+                final_bbox_pred[0],
+                final_cls_score[0],
+                matmul_out[0],
+                -1,
+                False,
+                self.second_NMS_max_proposals,
+                self.second_NMS_iou_threshold,
+                self.second_NMS_score_threshold,
+                second_nms_threshold,
+                "box_outputs",
+            )
+
+            # Create ROIAlign node.
+            mask_pooler_output = self.ROIAlign(
+                nms_outputs[1],
+                p2,
+                p3,
+                p4,
+                p5,
+                self.second_ROIAlign_pooled_size,
+                self.second_ROIAlign_sampling_ratio,
+                self.second_ROIAlign_type,
+                self.second_NMS_max_proposals,
+                "mask_pooler",
+            )
+
+            # Reshape mask pooler output.
+            mask_pooler_shape = np.asarray(
+                [
+                    self.second_NMS_max_proposals * self.batch_size,
+                    self.fpn_out_channels,
+                    self.second_ROIAlign_pooled_size,
+                    self.second_ROIAlign_pooled_size,
+                ],
+                dtype=np.int64,
+            )
+            mask_pooler_reshape_node = self.graph.op_with_const(
+                "Reshape", "mask_pooler/reshape", mask_pooler_output, mask_pooler_shape
+            )
+
+            # Get first Conv op in mask head and connect ROIAlign's squeezed output to it.
+            mask_head_conv = self.graph.find_node_by_op_name(
+                "Conv", "/roi_heads/mask_head/mask_fcn1/Conv"
+            )
+            mask_head_conv.inputs[0] = mask_pooler_reshape_node[0]
+
+            # Reshape node that is preparing 2nd NMS class outputs for Add node that comes next.
+            classes_reshape_shape = np.asarray(
+                [self.second_NMS_max_proposals * self.batch_size], dtype=np.int64
+            )
+            classes_reshape_node = self.graph.op_with_const(
+                "Reshape",
+                "box_outputs/reshape_classes",
+                nms_outputs[3],
+                classes_reshape_shape,
+            )
+
+            # This loop will generate an array used in Add node, which eventually will help Gather node to pick the single
+            # class of interest per bounding box, instead of creating 80 masks for every single bounding box.
+            add_array = []
+            for i in range(self.second_NMS_max_proposals * self.batch_size):
+                if i == 0:
+                    start_pos = 0
+                else:
+                    start_pos = i * self.num_classes
+                add_array.append(start_pos)
+
+            # This Add node is one of the Gather node inputs, Gather node performs gather on 0th axis of data tensor
+            # and requires indices that set tensors to be withing bounds, this Add node provides the bounds for Gather.
+            add_array = np.asarray(add_array, dtype=np.int32)
+            classes_add_node = self.graph.op_with_const(
+                "Add", "box_outputs/add", classes_reshape_node[0], add_array
+            )
+
+            # Get the last Conv op in mask head and reshape it to correctly gather class of interest's masks.
+            last_conv = self.graph.find_node_by_op_name(
+                "Conv", "/roi_heads/mask_head/predictor/Conv"
+            )
+            last_conv_reshape_shape = np.asarray(
+                [
+                    self.second_NMS_max_proposals * self.num_classes * self.batch_size,
+                    self.mask_out_res,
+                    self.mask_out_res,
+                ],
+                dtype=np.int64,
+            )
+            last_conv_reshape_node = self.graph.op_with_const(
+                "Reshape",
+                "mask_head/reshape_all_masks",
+                last_conv.outputs[0],
+                last_conv_reshape_shape,
+            )
+
+            # Gather node that selects only masks belonging to detected class, 79 other masks are discarded.
+            final_gather = self.graph.gather(
+                "mask_head/final_gather",
+                last_conv_reshape_node[0],
+                classes_add_node[0],
+                0,
+            )
+
+            # Get last Sigmoid node and connect Gather node to it.
+            mask_head_sigmoid = self.graph.find_node_by_op_name(
+                "Sigmoid", "/roi_heads/mask_head/Sigmoid"
+            )
+            mask_head_sigmoid.inputs[0] = final_gather[0]
+
+            # Final Reshape node, reshapes output of Sigmoid, important for various batch_size support (not tested yet).
+            final_graph_reshape_shape = np.asarray(
+                [
+                    self.batch_size,
+                    self.second_NMS_max_proposals,
+                    self.mask_out_res,
+                    self.mask_out_res,
+                ],
+                dtype=np.int64,
+            )
+            final_graph_reshape_node = self.graph.op_with_const(
+                "Reshape",
+                "mask_head/final_reshape",
+                mask_head_sigmoid.outputs[0],
+                final_graph_reshape_shape,
+            )
+            final_graph_reshape_node[0].dtype = np.float32
+            final_graph_reshape_node[0].name = "detection_masks"
+
+            return nms_outputs, final_graph_reshape_node[0]
+
+        # Only Detectron 2's Mask-RCNN R50-FPN 3x is supported currently.
+        p2, p3, p4, p5 = backbone()
+        rpn_outputs = proposal_generator(anchors, first_nms_threshold)
+        box_head_outputs, mask_head_output = roi_heads(
+            rpn_outputs, p2, p3, p4, p5, second_nms_threshold
+        )
+        # Append segmentation head output.
+        box_head_outputs.append(mask_head_output)
+        # Set graph outputs, both bbox and segmentation heads.
+        self.graph.outputs = box_head_outputs
+        self.sanitize()
+
+
+def main(args):
+    det2_gs = DET2GraphSurgeon(args.exported_onnx, args.det2_config, args.det2_weights)
+    det2_gs.update_preprocessor(args.batch_size)
+    anchors = det2_gs.get_anchors(args.sample_image)
+    det2_gs.process_graph(anchors, args.first_nms_threshold, args.second_nms_threshold)
+    det2_gs.save(args.onnx)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-i",
+        "--exported_onnx",
+        help="The exported to ONNX Detectron 2 Mask R-CNN",
+        type=str,
+    )
+    parser.add_argument(
+        "-o", "--onnx", help="The output ONNX model file to write", type=str
+    )
+    parser.add_argument(
+        "-c",
+        "--det2_config",
+        help="The Detectron 2 config file (.yaml) for the model",
+        type=str,
+    )
+    parser.add_argument(
+        "-w", "--det2_weights", help="The Detectron 2 model weights (.pkl)", type=str
+    )
+    parser.add_argument(
+        "-s", "--sample_image", help="Sample image for anchors generation", type=str
+    )
+    parser.add_argument(
+        "-b", "--batch_size", help="Batch size for the model", type=int, default=1
+    )
+    parser.add_argument(
+        "-t1",
+        "--first_nms_threshold",
+        help="Override the score threshold for the 1st NMS operation",
+        type=float,
+    )
+    parser.add_argument(
+        "-t2",
+        "--second_nms_threshold",
+        help="Override the score threshold for the 2nd NMS operation",
+        type=float,
+    )
+    args = parser.parse_args()
+    if not all(
+        [
+            args.exported_onnx,
+            args.onnx,
+            args.det2_config,
+            args.det2_weights,
+            args.sample_image,
+        ]
+    ):
+        parser.print_help()
+        print(
+            "\nThese arguments are required: --exported_onnx --onnx --det2_config --det2_weights and --sample_image"
+        )
+        sys.exit(1)
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/eval_coco.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/eval_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..7afb611681fd07e3aba47b737e166f490869a0ad
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/eval_coco.py
@@ -0,0 +1,161 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import argparse
+import numpy as np
+import torch
+from PIL import Image
+from infer import TensorRTInfer
+from image_batcher import ImageBatcher
+
+try:
+    from detectron2.config import get_cfg
+    from detectron2.data import MetadataCatalog
+    from detectron2.evaluation import COCOEvaluator
+    from detectron2.structures import Instances, Boxes, ROIMasks
+except ImportError:
+    print("Could not import Detectron 2 modules. Maybe you did not install Detectron 2")
+    print(
+        "Please install Detectron 2, check https://github.com/facebookresearch/detectron2/blob/main/INSTALL.md"
+    )
+    sys.exit(1)
+
+
+def build_evaluator(dataset_name):
+    """
+    Create evaluator for a COCO dataset.
+    Currently only Mask R-CNN is supported, dataset of interest is COCO, so only COCOEvaluator is implemented.
+    """
+    evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
+    if evaluator_type in ["coco"]:
+        return COCOEvaluator(dataset_name)
+    else:
+        raise NotImplementedError("Evaluator type is not supported")
+
+
+def setup(config_file, weights):
+    """
+    Create config and perform basic setup.
+    """
+    cfg = get_cfg()
+    cfg.merge_from_file(config_file)
+    cfg.merge_from_list(["MODEL.WEIGHTS", weights])
+    cfg.freeze()
+    return cfg
+
+
+def main(args):
+    # Set up Detectron 2 config and build evaluator.
+    cfg = setup(args.det2_config, args.det2_weights)
+    dataset_name = cfg.DATASETS.TEST[0]
+    evaluator = build_evaluator(dataset_name)
+    evaluator.reset()
+
+    trt_infer = TensorRTInfer(args.engine)
+    batcher = ImageBatcher(
+        args.input, *trt_infer.input_spec(), config_file=args.det2_config
+    )
+
+    for batch, images, scales in batcher.get_batch():
+        print(
+            "Processing Image {} / {}".format(batcher.image_index, batcher.num_images),
+            end="\r",
+        )
+        detections = trt_infer.infer(batch, scales, args.nms_threshold)
+        for i in range(len(images)):
+            # Get inference image resolution.
+            infer_im = Image.open(images[i])
+            im_width, im_height = infer_im.size
+            pred_boxes = []
+            scores = []
+            pred_classes = []
+            # Number of detections.
+            num_instances = len(detections[i])
+            # Reserve numpy array to hold all mask predictions per image.
+            pred_masks = np.empty((num_instances, 28, 28), dtype=np.float32)
+            # Image ID, required for Detectron 2 evaluations.
+            source_id = int(os.path.splitext(os.path.basename(images[i]))[0])
+            # Loop over every single detection.
+            for n in range(num_instances):
+                det = detections[i][n]
+                # Append box coordinates data.
+                pred_boxes.append([det["ymin"], det["xmin"], det["ymax"], det["xmax"]])
+                # Append score.
+                scores.append(det["score"])
+                # Append class.
+                pred_classes.append(det["class"])
+                # Append mask.
+                pred_masks[n] = det["mask"]
+            # Create new Instances object required for Detectron 2 evalutions and add:
+            # boxes, scores, pred_classes, pred_masks.
+            image_shape = (im_height, im_width)
+            instances = Instances(image_shape)
+            instances.pred_boxes = Boxes(pred_boxes)
+            instances.scores = torch.tensor(scores)
+            instances.pred_classes = torch.tensor(pred_classes)
+            roi_masks = ROIMasks(torch.tensor(pred_masks))
+            instances.pred_masks = roi_masks.to_bitmasks(
+                instances.pred_boxes, im_height, im_width, args.iou_threshold
+            ).tensor
+            # Process evaluations per image.
+            image_dict = [{"instances": instances}]
+            input_dict = [{"image_id": source_id}]
+            evaluator.process(input_dict, image_dict)
+
+    # Final evaluations, generation of mAP accuracy performance.
+    evaluator.evaluate()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-e", "--engine", help="The TensorRT engine to infer with.")
+    parser.add_argument(
+        "-i",
+        "--input",
+        help="The input to infer, either a single image path, or a directory of images.",
+    )
+    parser.add_argument(
+        "-c",
+        "--det2_config",
+        help="The Detectron 2 config file (.yaml) for the model",
+        type=str,
+    )
+    parser.add_argument(
+        "-w", "--det2_weights", help="The Detectron 2 model weights (.pkl)", type=str
+    )
+    parser.add_argument(
+        "-t",
+        "--nms_threshold",
+        type=float,
+        help="Override the score threshold for the NMS operation, if higher than the threshold in the engine.",
+    )
+    parser.add_argument(
+        "--iou_threshold",
+        default=0.5,
+        type=float,
+        help="Select the IoU threshold for the mask segmentation. Range is 0 to 1. Pixel values more than threshold will become 1, less 0.",
+    )
+    args = parser.parse_args()
+    if not all([args.engine, args.input, args.det2_config, args.det2_weights]):
+        parser.print_help()
+        print(
+            "\nThese arguments are required: --engine --input --det2_config and --det2_weights"
+        )
+        sys.exit(1)
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/image_batcher.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/image_batcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..0fb1d90a31dfec417d0c38ffa096d5feaf9396c8
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/image_batcher.py
@@ -0,0 +1,222 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import numpy as np
+from PIL import Image
+
+try:
+    from detectron2.config import get_cfg
+except ImportError:
+    print("Could not import Detectron 2 modules. Maybe you did not install Detectron 2")
+    print(
+        "Please install Detectron 2, check https://github.com/facebookresearch/detectron2/blob/main/INSTALL.md"
+    )
+    sys.exit(1)
+
+
+class ImageBatcher:
+    """
+    Creates batches of pre-processed images.
+    """
+
+    def __init__(
+        self,
+        input,
+        shape,
+        dtype,
+        max_num_images=None,
+        exact_batches=False,
+        config_file=None,
+    ):
+        """
+        :param input: The input directory to read images from.
+        :param shape: The tensor shape of the batch to prepare, either in NCHW or NHWC format.
+        :param dtype: The (numpy) datatype to cast the batched data to.
+        :param max_num_images: The maximum number of images to read from the directory.
+        :param exact_batches: This defines how to handle a number of images that is not an exact multiple of the batch
+        size. If false, it will pad the final batch with zeros to reach the batch size. If true, it will *remove* the
+        last few images in excess of a batch size multiple, to guarantee batches are exact (useful for calibration).
+        :param config_file: The path pointing to the Detectron 2 yaml file which describes the model.
+        """
+
+        def det2_setup(config_file):
+            """
+            Create configs and perform basic setups.
+            """
+            cfg = get_cfg()
+            if config_file is not None:
+                cfg.merge_from_file(config_file)
+            cfg.freeze()
+            return cfg
+
+        # Set up Detectron 2 model configuration.
+        self.det2_cfg = det2_setup(config_file)
+
+        # Extract min and max dimensions for testing.
+        self.min_size_test = self.det2_cfg.INPUT.MIN_SIZE_TEST
+        self.max_size_test = self.det2_cfg.INPUT.MAX_SIZE_TEST
+
+        # Find images in the given input path.
+        input = os.path.realpath(input)
+        self.images = []
+
+        extensions = [".jpg", ".jpeg", ".png", ".bmp", ".ppm"]
+
+        def is_image(path):
+            return (
+                os.path.isfile(path) and os.path.splitext(path)[1].lower() in extensions
+            )
+
+        if os.path.isdir(input):
+            self.images = [
+                os.path.join(input, f)
+                for f in os.listdir(input)
+                if is_image(os.path.join(input, f))
+            ]
+            self.images.sort()
+        elif os.path.isfile(input):
+            if is_image(input):
+                self.images.append(input)
+        self.num_images = len(self.images)
+        if self.num_images < 1:
+            print("No valid {} images found in {}".format("/".join(extensions), input))
+            sys.exit(1)
+
+        # Handle Tensor Shape.
+        self.dtype = dtype
+        self.shape = shape
+        assert len(self.shape) == 4
+        self.batch_size = shape[0]
+        assert self.batch_size > 0
+        self.format = None
+        self.width = -1
+        self.height = -1
+        if self.shape[1] == 3:
+            self.format = "NCHW"
+            self.height = self.shape[2]
+            self.width = self.shape[3]
+        elif self.shape[3] == 3:
+            self.format = "NHWC"
+            self.height = self.shape[1]
+            self.width = self.shape[2]
+        assert all([self.format, self.width > 0, self.height > 0])
+
+        # Adapt the number of images as needed.
+        if max_num_images and 0 < max_num_images < len(self.images):
+            self.num_images = max_num_images
+        if exact_batches:
+            self.num_images = self.batch_size * (self.num_images // self.batch_size)
+        if self.num_images < 1:
+            print("Not enough images to create batches")
+            sys.exit(1)
+        self.images = self.images[0 : self.num_images]
+
+        # Subdivide the list of images into batches.
+        self.num_batches = 1 + int((self.num_images - 1) / self.batch_size)
+        self.batches = []
+        for i in range(self.num_batches):
+            start = i * self.batch_size
+            end = min(start + self.batch_size, self.num_images)
+            self.batches.append(self.images[start:end])
+
+        # Indices.
+        self.image_index = 0
+        self.batch_index = 0
+
+    def preprocess_image(self, image_path):
+        """
+        The image preprocessor loads an image from disk and prepares it as needed for batching. This includes padding,
+        resizing, normalization, data type casting, and transposing.
+        This Image Batcher implements one algorithm for now:
+        * Resizes and pads the image to fit the input size.
+        :param image_path: The path to the image on disk to load.
+        :return: Two values: A numpy array holding the image sample, ready to be contacatenated into the rest of the
+        batch, and the resize scale used, if any.
+        """
+
+        def resize_pad(image, pad_color=(0, 0, 0)):
+            """
+            A subroutine to implement padding and resizing. This will resize the image to fit fully within the input
+            size, and pads the remaining bottom-right portions with the value provided.
+            :param image: The PIL image object
+            :pad_color: The RGB values to use for the padded area. Default: Black/Zeros.
+            :return: Two values: The PIL image object already padded and cropped, and the resize scale used.
+            """
+
+            # Get characteristics.
+            width, height = image.size
+
+            # Replicates behavior of ResizeShortestEdge augmentation.
+            size = self.min_size_test * 1.0
+            pre_scale = size / min(height, width)
+            if height < width:
+                newh, neww = size, pre_scale * width
+            else:
+                newh, neww = pre_scale * height, size
+
+            # If delta between min and max dimensions is so that max sized dimension reaches self.max_size_test
+            # before min dimension reaches self.min_size_test, keeping the same aspect ratio. We still need to
+            # maintain the same aspect ratio and keep max dimension at self.max_size_test.
+            if max(newh, neww) > self.max_size_test:
+                pre_scale = self.max_size_test * 1.0 / max(newh, neww)
+                newh = newh * pre_scale
+                neww = neww * pre_scale
+            neww = int(neww + 0.5)
+            newh = int(newh + 0.5)
+
+            # Scaling factor for normalized box coordinates scaling in post-processing.
+            scaling = max(newh / height, neww / width)
+
+            # Padding.
+            image = image.resize((neww, newh), resample=Image.BILINEAR)
+            pad = Image.new("RGB", (self.width, self.height))
+            pad.paste(pad_color, [0, 0, self.width, self.height])
+            pad.paste(image)
+            return pad, scaling
+
+        scale = None
+        image = Image.open(image_path)
+        image = image.convert(mode="RGB")
+        # Pad with mean values of COCO dataset, since padding is applied before actual model's
+        # preprocessor steps (Sub, Div ops), we need to pad with mean values in order to reverse
+        # the effects of Sub and Div, so that padding after model's preprocessor will be with actual 0s.
+        image, scale = resize_pad(image, (124, 116, 104))
+        image = np.asarray(image, dtype=np.float32)
+        # Change HWC -> CHW.
+        image = np.transpose(image, (2, 0, 1))
+        # Change RGB -> BGR.
+        return image[[2, 1, 0]], scale
+
+    def get_batch(self):
+        """
+        Retrieve the batches. This is a generator object, so you can use it within a loop as:
+        for batch, images in batcher.get_batch():
+           ...
+        Or outside of a batch with the next() function.
+        :return: A generator yielding three items per iteration: a numpy array holding a batch of images, the list of
+        paths to the images loaded within this batch, and the list of resize scales for each image in the batch.
+        """
+        for i, batch_images in enumerate(self.batches):
+            batch_data = np.zeros(self.shape, dtype=self.dtype)
+            batch_scales = [None] * len(batch_images)
+            for i, image in enumerate(batch_images):
+                self.image_index += 1
+                batch_data[i], batch_scales[i] = self.preprocess_image(image)
+            self.batch_index += 1
+            yield batch_data, batch_images, batch_scales
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/infer.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..d086fb764ac8c8cc40105bc98010f9ddd1e3e164
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/infer.py
@@ -0,0 +1,325 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import argparse
+import numpy as np
+import tensorrt as trt
+from cuda import cudart
+from image_batcher import ImageBatcher
+from visualize import visualize_detections
+
+sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+import common
+
+
+class TensorRTInfer:
+    """
+    Implements inference for the Model TensorRT engine.
+    """
+
+    def __init__(self, engine_path):
+        """
+        :param engine_path: The path to the serialized engine to load from disk.
+        """
+
+        # Load TRT engine
+        self.logger = trt.Logger(trt.Logger.ERROR)
+        trt.init_libnvinfer_plugins(self.logger, namespace="")
+        with open(engine_path, "rb") as f, trt.Runtime(self.logger) as runtime:
+            assert runtime
+            self.engine = runtime.deserialize_cuda_engine(f.read())
+        assert self.engine
+        self.context = self.engine.create_execution_context()
+        assert self.context
+
+        # Setup I/O bindings
+        self.inputs = []
+        self.outputs = []
+        self.allocations = []
+        for i in range(self.engine.num_io_tensors):
+            name = self.engine.get_tensor_name(i)
+            is_input = False
+            if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
+                is_input = True
+            dtype = self.engine.get_tensor_dtype(name)
+            shape = self.engine.get_tensor_shape(name)
+            if is_input:
+                self.batch_size = shape[0]
+            size = np.dtype(trt.nptype(dtype)).itemsize
+            for s in shape:
+                size *= s
+            allocation = common.cuda_call(cudart.cudaMalloc(size))
+            binding = {
+                "index": i,
+                "name": name,
+                "dtype": np.dtype(trt.nptype(dtype)),
+                "shape": list(shape),
+                "allocation": allocation,
+                "size": size,
+            }
+            self.allocations.append(allocation)
+            if is_input:
+                self.inputs.append(binding)
+            else:
+                self.outputs.append(binding)
+
+        assert self.batch_size > 0
+        assert len(self.inputs) > 0
+        assert len(self.outputs) > 0
+        assert len(self.allocations) > 0
+
+    def input_spec(self):
+        """
+        Get the specs for the input tensor of the network. Useful to prepare memory allocations.
+        :return: Two items, the shape of the input tensor and its (numpy) datatype.
+        """
+        return self.inputs[0]["shape"], self.inputs[0]["dtype"]
+
+    def output_spec(self):
+        """
+        Get the specs for the output tensors of the network. Useful to prepare memory allocations.
+        :return: A list with two items per element, the shape and (numpy) datatype of each output tensor.
+        """
+        specs = []
+        for o in self.outputs:
+            specs.append((o["shape"], o["dtype"]))
+        return specs
+
+    def infer(self, batch, scales=None, nms_threshold=None):
+        """
+        Execute inference on a batch of images. The images should already be batched and preprocessed, as prepared by
+        the ImageBatcher class. Memory copying to and from the GPU device will be performed here.
+        :param batch: A numpy array holding the image batch.
+        :param scales: The image resize scales for each image in this batch. Default: No scale postprocessing applied.
+        :return: A nested list for each image in the batch and each detection in the list.
+        """
+
+        # Prepare the output data.
+        outputs = []
+        for shape, dtype in self.output_spec():
+            outputs.append(np.zeros(shape, dtype))
+
+        # Process I/O and execute the network.
+        common.memcpy_host_to_device(
+            self.inputs[0]["allocation"], np.ascontiguousarray(batch)
+        )
+
+        self.context.execute_v2(self.allocations)
+        for o in range(len(outputs)):
+            common.memcpy_device_to_host(outputs[o], self.outputs[o]["allocation"])
+
+        # Process the results.
+        nums = outputs[0]
+        boxes = outputs[1]
+        scores = outputs[2]
+        pred_classes = outputs[3]
+        masks = outputs[4]
+
+        detections = []
+        for i in range(self.batch_size):
+            detections.append([])
+            for n in range(int(nums[i])):
+                # Select a mask.
+                mask = masks[i][n]
+
+                # Calculate scaling values for bboxes.
+                scale = self.inputs[0]["shape"][2]
+                scale /= scales[i]
+                scale_y = scale
+                scale_x = scale
+
+                if nms_threshold and scores[i][n] < nms_threshold:
+                    continue
+                # Append to detections
+                detections[i].append(
+                    {
+                        "ymin": boxes[i][n][0] * scale_y,
+                        "xmin": boxes[i][n][1] * scale_x,
+                        "ymax": boxes[i][n][2] * scale_y,
+                        "xmax": boxes[i][n][3] * scale_x,
+                        "score": scores[i][n],
+                        "class": int(pred_classes[i][n]),
+                        "mask": mask,
+                    }
+                )
+        return detections
+
+
+def main(args):
+    output_dir = os.path.realpath(args.output)
+    os.makedirs(output_dir, exist_ok=True)
+
+    labels = [
+        "person",
+        "bicycle",
+        "car",
+        "motorcycle",
+        "airplane",
+        "bus",
+        "train",
+        "truck",
+        "boat",
+        "traffic light",
+        "fire hydrant",
+        "stop sign",
+        "parking meter",
+        "bench",
+        "bird",
+        "cat",
+        "dog",
+        "horse",
+        "sheep",
+        "cow",
+        "elephant",
+        "bear",
+        "zebra",
+        "giraffe",
+        "backpack",
+        "umbrella",
+        "handbag",
+        "tie",
+        "suitcase",
+        "frisbee",
+        "skis",
+        "snowboard",
+        "sports ball",
+        "kite",
+        "baseball bat",
+        "baseball glove",
+        "skateboard",
+        "surfboard",
+        "tennis racket",
+        "bottle",
+        "wine glass",
+        "cup",
+        "fork",
+        "knife",
+        "spoon",
+        "bowl",
+        "banana",
+        "apple",
+        "sandwich",
+        "orange",
+        "broccoli",
+        "carrot",
+        "hot dog",
+        "pizza",
+        "donut",
+        "cake",
+        "chair",
+        "couch",
+        "potted plant",
+        "bed",
+        "dining table",
+        "toilet",
+        "tv",
+        "laptop",
+        "mouse",
+        "remote",
+        "keyboard",
+        "cell phone",
+        "microwave",
+        "oven",
+        "toaster",
+        "sink",
+        "refrigerator",
+        "book",
+        "clock",
+        "vase",
+        "scissors",
+        "teddy bear",
+        "hair drier",
+        "toothbrush",
+    ]
+
+    trt_infer = TensorRTInfer(args.engine)
+    batcher = ImageBatcher(
+        args.input, *trt_infer.input_spec(), config_file=args.det2_config
+    )
+    for batch, images, scales in batcher.get_batch():
+        print(
+            "Processing Image {} / {}".format(batcher.image_index, batcher.num_images),
+            end="\r",
+        )
+        detections = trt_infer.infer(batch, scales, args.nms_threshold)
+        for i in range(len(images)):
+            basename = os.path.splitext(os.path.basename(images[i]))[0]
+            # Image Visualizations
+            output_path = os.path.join(output_dir, "{}.png".format(basename))
+            visualize_detections(
+                images[i], output_path, detections[i], labels, args.iou_threshold
+            )
+            # Text Results
+            output_results = ""
+            for d in detections[i]:
+                line = [
+                    d["xmin"],
+                    d["ymin"],
+                    d["xmax"],
+                    d["ymax"],
+                    d["score"],
+                    d["class"],
+                ]
+                output_results += "\t".join([str(f) for f in line]) + "\n"
+            with open(os.path.join(args.output, "{}.txt".format(basename)), "w") as f:
+                f.write(output_results)
+    print()
+    print("Finished Processing")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-e", "--engine", default=None, help="The serialized TensorRT engine"
+    )
+    parser.add_argument(
+        "-i", "--input", default=None, help="Path to the image or directory to process"
+    )
+    parser.add_argument(
+        "-c",
+        "--det2_config",
+        help="The Detectron 2 config file (.yaml) for the model",
+        type=str,
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        default=None,
+        help="Directory where to save the visualization results",
+    )
+    parser.add_argument(
+        "-t",
+        "--nms_threshold",
+        type=float,
+        help="Override the score threshold for the NMS operation, if higher than the threshold in the engine.",
+    )
+    parser.add_argument(
+        "--iou_threshold",
+        default=0.5,
+        type=float,
+        help="Select the IoU threshold for the mask segmentation. Range is 0 to 1. Pixel values more than threshold will become 1, less 0",
+    )
+    args = parser.parse_args()
+    if not all([args.engine, args.input, args.output, args.det2_config]):
+        parser.print_help()
+        print(
+            "\nThese arguments are required: --engine --input --output and --det2_config"
+        )
+        sys.exit(1)
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/onnx_utils.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/onnx_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..2144fea086085c778e1c793d545ab1eafe0e7c11
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/onnx_utils.py
@@ -0,0 +1,332 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import logging
+import numpy as np
+import onnx_graphsurgeon as gs
+
+logging.basicConfig(level=logging.INFO)
+logging.getLogger("ModelHelper").setLevel(logging.INFO)
+log = logging.getLogger("ModelHelper")
+
+
+@gs.Graph.register()
+def op_with_const(self, op, name, input, value):
+    """
+    Add an operation with constant to the graph which will operate on the input tensor with the value(s) given.
+    :param op: The ONNX operation to perform, i.e. "Add" or "Mul".
+    :param input: The tensor to operate on.
+    :param value: The value array to operate with.
+    :param name: The name to use for the node.
+    """
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created {} node '{}': {}".format(op, name, value.squeeze()))
+    const = gs.Constant(name="{}_value:0".format(name), values=value)
+    return self.layer(
+        name=name, op=op, inputs=[input_tensor, const], outputs=[name + ":0"]
+    )
+
+
+@gs.Graph.register()
+def matmul(self, name, input, value):
+    """
+    Add MatMul operation to the graph which will operate on the input tensor with the value(s) given.
+    :param input: The tensor to operate on.
+    :param value: The linear transformation matrix to operate with.
+    :param name: The name to use for the node.
+    """
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created {} node '{}': {}".format("MatMul", name, value.squeeze()))
+    const = gs.Constant(name="{}_value:0".format(name), values=value)
+    return self.layer(
+        name=name, op="MatMul", inputs=[input_tensor, const], outputs=[name + ":0"]
+    )
+
+
+@gs.Graph.register()
+def clip(self, name, input, clip_min, clip_max):
+    """
+    Add Clip operation to the graph which will operate on the input tensor with the value(s) given.
+    :param input: The tensor to operate on.
+    :param name: The name to use for the node.
+    :param clip_min: Minimum value to include, less is clipped.
+    :param clip_max: Maximum value to include, more is clipped.
+    """
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created {} node '{}".format("Clip", name))
+    const_min = gs.Constant(
+        name="{}_value:0".format(name), values=np.asarray([clip_min], dtype=np.float32)
+    )
+    const_max = gs.Constant(
+        name="{}_value:1".format(name), values=np.asarray([clip_max], dtype=np.float32)
+    )
+    return self.layer(
+        name=name,
+        op="Clip",
+        inputs=[input_tensor, const_min, const_max],
+        outputs=[name + ":0"],
+    )
+
+
+@gs.Graph.register()
+def slice(self, name, input, starts, ends, axes):
+    """
+    Add Slice operation to the graph which will operate on the input tensor with the value(s) given.
+    :param op: The ONNX operation to perform, i.e. "Add" or "Mul".
+    :param input: The tensor to operate on.
+    :param name: The name to use for the node.
+    :param starts: Value at which Slice starts.
+    :param ends: Value at which Slice ends.
+    :param axes: Axes on which Slice operation should be performed.
+    """
+
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created {} node '{}".format("Slice", name))
+    const_start = gs.Constant(
+        name="{}_value:0".format(name), values=np.asarray([starts], dtype=np.int64)
+    )
+    const_end = gs.Constant(
+        name="{}_value:1".format(name), values=np.asarray([ends], dtype=np.int64)
+    )
+    const_axes = gs.Constant(
+        name="{}_value:2".format(name), values=np.asarray([axes], dtype=np.int64)
+    )
+    return self.layer(
+        name=name,
+        op="Slice",
+        inputs=[input_tensor, const_start, const_end, const_axes],
+        outputs=[name + ":0"],
+    )
+
+
+@gs.Graph.register()
+def unsqueeze(self, name, input, axes=[3]):
+    """
+    Adds to the graph an Unsqueeze node for the given axes and to the given input.
+    :param self: The gs.Graph object being extended.
+    :param name: The name to use for the node.
+    :param input: The tensor to be "unsqueezed".
+    :param axes: A list of axes on which to add the new dimension(s).
+    :return: The first output tensor, to allow chained graph construction.
+    """
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created Unsqueeze node '{}': {}".format(name, axes))
+    return self.layer(
+        name=name,
+        op="Unsqueeze",
+        inputs=[input_tensor],
+        outputs=[name + ":0"],
+        attrs={"axes": axes},
+    )
+
+
+@gs.Graph.register()
+def squeeze(self, name, input, axes=[2]):
+    """
+    Adds to the graph an Squeeze node for the given axes and to the given input.
+    :param self: The gs.Graph object being extended.
+    :param name: The name to use for the node.
+    :param input: The tensor to be "squeezed".
+    :param axes: A list of axes on which to remove a dimension(s).
+    :return: The first output tensor, to allow chained graph construction.
+    """
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created Squeeze node '{}': {}".format(name, axes))
+    return self.layer(
+        name=name,
+        op="Squeeze",
+        inputs=[input_tensor],
+        outputs=[name + ":0"],
+        attrs={"axes": axes},
+    )
+
+
+@gs.Graph.register()
+def gather(self, name, data, indices, axes=0):
+    """
+    Adds to the graph a Gather node for the given axes and to the given input.
+    :param self: The gs.Graph object being extended.
+    :param name: The name to use for the node.
+    :param data: Data from which to gather specific tensors.
+    :param indices: Indices by which to gather data tensors.
+    :param axes: A list of axes on which to perform gather operation
+    """
+    data_tensor = data if type(data) is gs.Variable else data[0]
+    indices_tensor = indices if type(indices) is gs.Variable else indices[0]
+    log.debug("Created Gather node '{}': {}".format(name, axes))
+    return self.layer(
+        name=name,
+        op="Gather",
+        inputs=[data_tensor, indices_tensor],
+        outputs=[name + ":0"],
+        attrs={"axes": axes},
+    )
+
+
+@gs.Graph.register()
+def transpose(self, name, input, perm):
+    """
+    Adds to the graph a Transpose node for the given axes permutation and to the given input.
+    :param self: The gs.Graph object being extended.
+    :param name: The name to use for the node.
+    :param input: The tensor to be transposed.
+    :param perm: A list of axes defining their order after transposing occurs.
+    :return: The first output tensor, to allow chained graph construction.
+    """
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created Transpose node '{}': {}".format(name, perm))
+    return self.layer(
+        name=name,
+        op="Transpose",
+        inputs=[input_tensor],
+        outputs=[name + ":0"],
+        attrs={"perm": perm},
+    )
+
+
+@gs.Graph.register()
+def sigmoid(self, name, input):
+    """
+    Adds to the graph a Sigmoid node for the given input.
+    :param self: The gs.Graph object being extended.
+    :param name: The name to use for the node.
+    :param input: The tensor to be applied to.
+    :return: The first output tensor, to allow chained graph construction.
+    """
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created Sigmoid node '{}'".format(name))
+    return self.layer(
+        name=name, op="Sigmoid", inputs=[input_tensor], outputs=[name + ":0"]
+    )
+
+
+@gs.Graph.register()
+def plugin(self, op, name, inputs: list, outputs: list, attrs):
+    """
+    Adds to the graph a TensorRT plugin node with the given name, inputs and outputs. The attrs dictionary holds
+    attributes to be added to the plugin node.
+    :param self: The gs.Graph object being extended.
+    :param op: The registered name for the TensorRT plugin.
+    :param name: The name to use for the node.
+    :param inputs: The list of tensors to use an inputs.
+    :param outputs: The list of tensors to use as outputs.
+    :param attrs: The dictionary to use as attributes.
+    :return: The first output tensor, to allow chained graph construction.
+    """
+    log.debug("Created TRT Plugin node '{}': {}".format(name, attrs))
+    return self.layer(op=op, name=name, inputs=inputs, outputs=outputs, attrs=attrs)
+
+
+@gs.Graph.register()
+def find_node_by_op(self, op):
+    """
+    Finds the first node in the graph with the given operation name.
+    :param self: The gs.Graph object being extended.
+    :param op: The operation name to search for.
+    :return: The first node matching that performs that op.
+    """
+    for node in self.nodes:
+        if node.op == op:
+            return node
+    return None
+
+
+@gs.Graph.register()
+def find_node_by_op_name(self, op, name):
+    """
+    Finds the first node in the graph with the given operation name.
+    :param self: The gs.Graph object being extended.
+    :param op: The operation name to search for.
+    :param name: Selected node name.
+    :return: The first node matching that performs that op.
+    """
+    for node in self.nodes:
+        if node.op == op and node.name == name:
+            return node
+    return None
+
+
+@gs.Graph.register()
+def find_node_by_op_input_output_name(
+    self, op, input_name, output_name, input_pos=0, output_pos=0
+):
+    """
+    Finds the first node in the graph with the given operation name.
+    :param self: The gs.Graph object being extended.
+    :param op: The operation name to search for.
+    :param input_pos: Which input to consider, default is 0.
+    :param output_pos: Which output to consider, default is 0.
+    :param input_name: Selected input's name.
+    :param output_name: Selected output's name.
+    :return: The first node matching that performs that op.
+    """
+    for node in self.nodes:
+        if (
+            node.op == op
+            and node.inputs[input_pos].name == input_name
+            and node.outputs[output_pos].name == output_name
+        ):
+            return node
+    return None
+
+
+@gs.Graph.register()
+def find_descendant_by_op(self, node, op, depth=10):
+    """
+    Starting from the given node, finds a node lower in the graph matching the given operation name.
+    This is not an exhaustive graph search.
+    In order to graph search bfs is used, so runtime complexity is O(V+E).
+    :param self: The gs.Graph object being extended.
+    :param node: The node to start searching from.
+    :param op: The operation name to search for.
+    :param depth: Stop searching after traversing these many nodes.
+    :return: The first descendant node matching that performs that op.
+    """
+    queue = []
+    for i in range(depth):
+        queue.append(node.o())
+        while queue:
+            node = queue.pop(0)
+            if node.op == op:
+                return node
+            for child in node.outputs[0].outputs:
+                queue.append(child)
+    return None
+
+
+@gs.Graph.register()
+def find_ancestor_by_op(self, node, op, depth=10):
+    """
+    Starting from the given node, finds a node higher in the graph matching the given operation name.
+    This is not an exhaustive graph search.
+    In order to graph search bfs is used, so runtime complexity is O(V+E).
+    :param self: The gs.Graph object being extended.
+    :param node: The node to start searching from.
+    :param op: The operation name to search for.
+    :param depth: Stop searching after traversing these many nodes.
+    :return: The first ancestor node matching that performs that op.
+    """
+    queue = []
+    for i in range(depth):
+        queue.append(node.i())
+        while queue:
+            node = queue.pop(0)
+            if node.op == op:
+                return node
+            for child in node.inputs[-1].inputs:
+                queue.append(child)
+    return None
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/requirements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d9dcdc9992b00232765b2e488f0efdaae49ba5f0
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/requirements.txt
@@ -0,0 +1,14 @@
+onnx==1.16.0
+onnxruntime==1.15.1; python_version <= "3.10"
+onnxruntime==1.18.1; python_version >= "3.11"
+Pillow>=10.0.0
+git+https://github.com/facebookresearch/detectron2.git
+git+https://github.com/NVIDIA/TensorRT#subdirectory=tools/onnx-graphsurgeon
+cuda-python==12.2.0; python_version <= "3.10"
+cuda-python==12.6.0; python_version >= "3.11"
+pywin32; platform_system == "Windows"
+pyyaml==6.0.1
+requests==2.32.2
+tqdm==4.66.4
+numpy==1.24.4; python_version <= "3.10"
+numpy==1.26.4; python_version >= "3.11"
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/visualize.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/visualize.py
new file mode 100644
index 0000000000000000000000000000000000000000..00e930f1f6e165f01ad90cbbb2f1c0bcbb6f7db1
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/detectron2/visualize.py
@@ -0,0 +1,255 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+import PIL.Image as Image
+import PIL.ImageDraw as ImageDraw
+import PIL.ImageFont as ImageFont
+import PIL.ImageFilter as ImageFilter
+
+
+COLORS = [
+    "GoldenRod",
+    "MediumTurquoise",
+    "GreenYellow",
+    "SteelBlue",
+    "DarkSeaGreen",
+    "SeaShell",
+    "LightGrey",
+    "IndianRed",
+    "DarkKhaki",
+    "LawnGreen",
+    "WhiteSmoke",
+    "Peru",
+    "LightCoral",
+    "FireBrick",
+    "OldLace",
+    "LightBlue",
+    "SlateGray",
+    "OliveDrab",
+    "NavajoWhite",
+    "PaleVioletRed",
+    "SpringGreen",
+    "AliceBlue",
+    "Violet",
+    "DeepSkyBlue",
+    "Red",
+    "MediumVioletRed",
+    "PaleTurquoise",
+    "Tomato",
+    "Azure",
+    "Yellow",
+    "Cornsilk",
+    "Aquamarine",
+    "CadetBlue",
+    "CornflowerBlue",
+    "DodgerBlue",
+    "Olive",
+    "Orchid",
+    "LemonChiffon",
+    "Sienna",
+    "OrangeRed",
+    "Orange",
+    "DarkSalmon",
+    "Magenta",
+    "Wheat",
+    "Lime",
+    "GhostWhite",
+    "SlateBlue",
+    "Aqua",
+    "MediumAquaMarine",
+    "LightSlateGrey",
+    "MediumSeaGreen",
+    "SandyBrown",
+    "YellowGreen",
+    "Plum",
+    "FloralWhite",
+    "LightPink",
+    "Thistle",
+    "DarkViolet",
+    "Pink",
+    "Crimson",
+    "Chocolate",
+    "DarkGrey",
+    "Ivory",
+    "PaleGreen",
+    "DarkGoldenRod",
+    "LavenderBlush",
+    "SlateGrey",
+    "DeepPink",
+    "Gold",
+    "Cyan",
+    "LightSteelBlue",
+    "MediumPurple",
+    "ForestGreen",
+    "DarkOrange",
+    "Tan",
+    "Salmon",
+    "PaleGoldenRod",
+    "LightGreen",
+    "LightSlateGray",
+    "HoneyDew",
+    "Fuchsia",
+    "LightSeaGreen",
+    "DarkOrchid",
+    "Green",
+    "Chartreuse",
+    "LimeGreen",
+    "AntiqueWhite",
+    "Beige",
+    "Gainsboro",
+    "Bisque",
+    "SaddleBrown",
+    "Silver",
+    "Lavender",
+    "Teal",
+    "LightCyan",
+    "PapayaWhip",
+    "Purple",
+    "Coral",
+    "BurlyWood",
+    "LightGray",
+    "Snow",
+    "MistyRose",
+    "PowderBlue",
+    "DarkCyan",
+    "White",
+    "Turquoise",
+    "MediumSlateBlue",
+    "PeachPuff",
+    "Moccasin",
+    "LightSalmon",
+    "SkyBlue",
+    "Khaki",
+    "MediumSpringGreen",
+    "BlueViolet",
+    "MintCream",
+    "Linen",
+    "SeaGreen",
+    "HotPink",
+    "LightYellow",
+    "BlanchedAlmond",
+    "RoyalBlue",
+    "RosyBrown",
+    "MediumOrchid",
+    "DarkTurquoise",
+    "LightGoldenRodYellow",
+    "LightSkyBlue",
+]
+
+
+# Overlay mask with transparency on top of the image.
+def overlay(image, mask, color, alpha_transparency=0.5):
+    for channel in range(3):
+        image[:, :, channel] = np.where(
+            mask == 1,
+            image[:, :, channel] * (1 - alpha_transparency)
+            + alpha_transparency * color[channel] * 255,
+            image[:, :, channel],
+        )
+    return image
+
+
+def visualize_detections(
+    image_path, output_path, detections, labels=[], iou_threshold=0.5
+):
+    image = Image.open(image_path).convert(mode="RGB")
+    # Get image dimensions.
+    im_width, im_height = image.size
+    line_width = 2
+    font = ImageFont.load_default()
+    for d in detections:
+        color = COLORS[d["class"] % len(COLORS)]
+        # Dynamically convert PIL color into RGB numpy array.
+        pixel_color = Image.new("RGB", (1, 1), color)
+        # Normalize.
+        np_color = (np.asarray(pixel_color)[0][0]) / 255
+        # TRT instance segmentation masks.
+        if isinstance(d["mask"], np.ndarray) and d["mask"].shape == (28, 28):
+            # PyTorch uses [x1,y1,x2,y2] format instead of regular [y1,x1,y2,x2].
+            d["ymin"], d["xmin"], d["ymax"], d["xmax"] = (
+                d["xmin"],
+                d["ymin"],
+                d["xmax"],
+                d["ymax"],
+            )
+            # Get detection bbox resolution.
+            det_width = round(d["xmax"] - d["xmin"])
+            det_height = round(d["ymax"] - d["ymin"])
+            # Slight scaling, to get binary masks after float32 -> uint8
+            # conversion, if not scaled all pixels are zero.
+            mask = d["mask"] > iou_threshold
+            # Convert float32 -> uint8.
+            mask = mask.astype(np.uint8)
+            # Create an image out of predicted mask array.
+            small_mask = Image.fromarray(mask)
+            # Upsample mask to detection bbox's size.
+            mask = small_mask.resize((det_width, det_height), resample=Image.BILINEAR)
+            # Create an original image sized template for correct mask placement.
+            pad = Image.new("L", (im_width, im_height))
+            # Place your mask according to detection bbox placement.
+            pad.paste(mask, (round(d["xmin"]), (round(d["ymin"]))))
+            # Reconvert mask into numpy array for evaluation.
+            padded_mask = np.array(pad)
+            # Creat np.array from original image, copy in order to modify.
+            image_copy = np.asarray(image).copy()
+            # Image with overlaid mask.
+            masked_image = overlay(image_copy, padded_mask, np_color)
+            # Reconvert back to PIL.
+            image = Image.fromarray(masked_image)
+
+        # Bbox lines.
+        draw = ImageDraw.Draw(image)
+        draw.line(
+            [
+                (d["xmin"], d["ymin"]),
+                (d["xmin"], d["ymax"]),
+                (d["xmax"], d["ymax"]),
+                (d["xmax"], d["ymin"]),
+                (d["xmin"], d["ymin"]),
+            ],
+            width=line_width,
+            fill=color,
+        )
+        label = "Class {}".format(d["class"])
+        if d["class"] < len(labels):
+            label = "{}".format(labels[d["class"]])
+        score = d["score"]
+        text = "{}: {}%".format(label, int(100 * score))
+        if score < 0:
+            text = label
+        left, top, right, bottom = font.getbbox(text)
+        text_width, text_height = right - left, bottom - top
+        text_bottom = max(text_height, d["ymin"])
+        text_left = d["xmin"]
+        margin = np.ceil(0.05 * text_height)
+        draw.rectangle(
+            [
+                (text_left, text_bottom - text_height - 2 * margin),
+                (text_left + text_width, text_bottom),
+            ],
+            fill=color,
+        )
+        draw.text(
+            (text_left + margin, text_bottom - text_height - margin),
+            text,
+            fill="black",
+            font=font,
+        )
+    if output_path is None:
+        return image
+    image.save(output_path)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/downloader.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/downloader.py
new file mode 100644
index 0000000000000000000000000000000000000000..c167d14a8b8af9cfc3bfd4291c472cc8f301f3d7
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/downloader.py
@@ -0,0 +1,259 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import errno
+import hashlib
+import logging
+import os
+import sys
+
+
+logger = logging.getLogger("downloader")
+
+
+class DataFile:
+    """Holder of a data file."""
+
+    def __init__(self, attr):
+        self.attr = attr
+        self.path = attr["path"]
+        self.url = attr["url"]
+        if "checksum" not in attr:
+            logger.warning("Checksum of %s not provided!", self.path)
+        self.checksum = attr.get("checksum", None)
+
+    def __str__(self):
+        return str(self.attr)
+
+
+class SampleData:
+    """Holder of data files of an sample."""
+
+    def __init__(self, attr):
+        self.attr = attr
+        self.sample = attr["sample"]
+        files = attr.get("files", [])
+        self.files = [DataFile(f) for f in files]
+
+    def __str__(self):
+        return str(self.attr)
+
+
+def _loadYAML(yaml_path):
+    with open(yaml_path, "rb") as f:
+        import yaml
+
+        y = yaml.load(f, yaml.FullLoader)
+        return SampleData(y)
+
+
+def _checkMD5(path, refMD5):
+    md5 = hashlib.md5(open(path, "rb").read()).hexdigest()
+    return md5 == refMD5
+
+
+def _createDirIfNeeded(path):
+    the_dir = os.path.dirname(path)
+    try:
+        os.makedirs(the_dir)
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise
+
+
+def download(data_dir, yaml_path, retries, overwrite=False):
+    """Download the data files specified in YAML file to a directory.
+
+    Return false if the downloaded file or the local copy (if not overwrite) has a different checksum.
+    """
+    sample_data = _loadYAML(yaml_path)
+    logger.info("Downloading data for %s", sample_data.sample)
+
+    def _downloadFile(path, url, retries):
+        logger.info("Downloading %s from %s", path, url)
+        import requests
+        from requests.adapters import HTTPAdapter, Retry
+
+        session = requests.Session()
+        retries = Retry(total=retries, backoff_factor=0.5)
+        session.mount("http://", HTTPAdapter(max_retries=retries))
+        session.mount("https://", HTTPAdapter(max_retries=retries))
+        try:
+            r = session.get(url, stream=True, timeout=60)
+            
+            if r.status_code == 200:
+                logger.info("Connecting to %s is successful.", url)
+
+                size = int(r.headers.get("content-length", 0))
+                from tqdm import tqdm
+
+                progress_bar = tqdm(total=size, unit="iB", unit_scale=True)
+                with open(path, "wb") as fd:
+                    for chunk in r.iter_content(chunk_size=1024):
+                        progress_bar.update(len(chunk))
+                        fd.write(chunk)
+                progress_bar.close()
+                return True
+            else:
+                logger.info("Failed to connect to %s with status code: %s.", url, r.status_code)
+                return False
+        
+        except requests.exceptions.ConnectionError as e:
+            logger.debug("Connection failed after retries:", e)
+        except requests.exceptions.Timeout as e:
+            logger.debug("A timeout occurred:", e)
+        except requests.exceptions.RequestException as e:
+            logger.debug("Error occurred while requesting connection to %s: %s.", url, e)
+        return False
+
+    allGood = True
+    for f in sample_data.files:
+        fpath = os.path.join(data_dir, f.path)
+        if os.path.exists(fpath):
+            if _checkMD5(fpath, f.checksum):
+                logger.info("Found local copy %s, skip downloading.", fpath)
+                continue
+            else:
+                logger.warning("Local copy %s has a different checksum!", fpath)
+                if overwrite:
+                    logging.warning("Removing local copy %s", fpath)
+                    os.remove(fpath)
+                else:
+                    allGood = False
+                    continue
+        _createDirIfNeeded(fpath)
+        assert _downloadFile(fpath, f.url, retries=retries)
+        if not _checkMD5(fpath, f.checksum):
+            logger.error("The downloaded file %s has a different checksum!", fpath)
+            allGood = False
+
+    return allGood
+
+
+def _parseArgs():
+    parser = argparse.ArgumentParser(description="Downloader of TensorRT sample data files.")
+    parser.add_argument(
+        "-d",
+        "--data",
+        help="Specify the data directory, data will be downloaded to there. $TRT_DATA_DIR will be overwritten by this argument.",
+    )
+    parser.add_argument(
+        "-f",
+        "--file",
+        help="Specify the path to the download.yml, default to `download.yml` in the working directory",
+        default="download.yml",
+    )
+    parser.add_argument(
+        "-o",
+        "--overwrite",
+        help="Force to overwrite if MD5 check failed",
+        action="store_true",
+        default=False,
+    )
+    parser.add_argument(
+        "-v",
+        "--verify",
+        help="Verify if the data has been downloaded. Will not download if specified.",
+        action="store_true",
+        default=False,
+    )
+    parser.add_argument(
+        "-r",
+        "--retries",
+        help="Number of retries for download",
+        type=int,
+        default=10,
+    )
+    args, _ = parser.parse_known_args()
+    data = os.environ.get("TRT_DATA_DIR", None) if args.data is None else args.data
+    if data is None:
+        raise ValueError("Data directory must be specified by either `-d $DATA` or environment variable $TRT_DATA_DIR.")
+
+    return data, args
+
+
+def verifyChecksum(data_dir, yaml_path):
+    """Verify the checksum of the files described by the YAML.
+
+    Return false of any of the file doesn't existed or checksum is different with the YAML.
+    """
+    sample_data = _loadYAML(yaml_path)
+    logger.info("Verifying data files and their MD5 for %s", sample_data.sample)
+
+    allGood = True
+    for f in sample_data.files:
+        fpath = os.path.join(data_dir, f.path)
+        if os.path.exists(fpath):
+            if _checkMD5(fpath, f.checksum):
+                logger.info("MD5 match for local copy %s", fpath)
+            else:
+                logger.error("Local file %s has a different checksum!", fpath)
+                allGood = False
+        else:
+            allGood = False
+            logger.error("Data file %s doesn't have a local copy", f.path)
+
+    return allGood
+
+
+def main():
+    data, args = _parseArgs()
+    logging.basicConfig()
+    logger.setLevel(logging.INFO)
+
+    ret = True
+    if args.verify:
+        ret = verifyChecksum(data, args.file)
+    else:
+        ret = download(data, args.file, args.retries, args.overwrite)
+
+    if not ret:
+        # Error of downloading or checksum
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
+
+
+TRT_DATA_DIR = None
+
+
+def getFilePath(path):
+    """Util to get the full path to the downloaded data files.
+
+    It only works when the sample doesn't have any other command line argument.
+    """
+    global TRT_DATA_DIR
+    if not TRT_DATA_DIR:
+        parser = argparse.ArgumentParser(description="Helper of data file download tool")
+        parser.add_argument(
+            "-d",
+            "--data",
+            help="Specify the data directory where it is saved in. $TRT_DATA_DIR will be overwritten by this argument.",
+        )
+        args, _ = parser.parse_known_args()
+        TRT_DATA_DIR = os.environ.get("TRT_DATA_DIR", None) if args.data is None else args.data
+    if TRT_DATA_DIR is None:
+        raise ValueError("Data directory must be specified by either `-d $DATA` or environment variable $TRT_DATA_DIR.")
+
+    fullpath = os.path.join(TRT_DATA_DIR, path)
+    if not os.path.exists(fullpath):
+        raise ValueError("Data file %s doesn't exist!" % fullpath)
+
+    return fullpath
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d3d6ebc0ef7d5c99295b5166ca7d4444456ae4a8
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/README.md
@@ -0,0 +1,367 @@
+# EfficientDet Object Detection in TensorRT
+
+![efficientdet](https://drive.google.com/uc?export=view&id=1Le98wETvmKKj0fUKoCFLsld7o8QPJq9C)
+
+These scripts help with conversion and execution of [Google EfficientDet](https://arxiv.org/abs/1911.09070) models with [NVIDIA TensorRT](https://developer.nvidia.com/tensorrt). This process is compatible with models trained through either Google AutoML or the TensorFlow Object Detection API.
+
+## Contents
+- [Changelog](#changelog)
+- [Setup](#setup)
+- [Model Conversion](#model-conversion)
+  * [TensorFlow Saved Model](#tensorflow-saved-model)
+  * [Create ONNX Graph](#create-onnx-graph)
+  * [Build TensorRT Engine](#build-tensorrt-engine)
+- [Inference](#inference)
+  * [Inference in Python](#inference-in-python)
+  * [Evaluate mAP Metric](#evaluate-map-metric)
+  * [TF vs TRT Comparison](#tf-vs-trt-comparison)
+
+## Changelog
+
+- August 2023:
+  - Removed support for Python versions < 3.8.
+  - Added support for TensorFlow 2.12.0
+  - Update ONNX version support to 1.14.0
+  - Update ONNX Runtime version support to 1.15.1 for Python>=3.8
+- January 2022:
+  - Added support for EfficientDet Lite and AdvProp models.
+  - Added dynamic batch support.
+  - Added mixed precision engine builder.
+- July 2021:
+  - Initial release.
+
+## Setup
+
+We recommend running these scripts on an environment with TensorRT >= 8.0.1 and TensorFlow 2.12.0.
+
+Install TensorRT as per the [TensorRT Install Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html). You will need to make sure the Python bindings for TensorRT are also installed correctly, these are available by installing the `python3-libnvinfer` and `python3-libnvinfer-dev` packages on your TensorRT download.
+
+To simplify TensorRT and TensorFlow installation, use an [NGC TensorFlow Docker Image](https://ngc.nvidia.com/catalog/containers/nvidia:tensorflow), such as:
+
+```bash
+docker pull nvcr.io/nvidia/tensorflow:23.07-tf2-py3
+```
+
+Install all dependencies listed in `requirements.txt`:
+
+```bash
+pip3 install -r requirements.txt
+```
+
+You will also need the latest `onnx_graphsurgeon` python module. If not already installed by TensorRT, you can install it manually by running:
+
+```bash
+pip3 install onnx-graphsurgeon --index-url https://pypi.ngc.nvidia.com
+```
+
+**NOTE:** Please make sure that the `onnx-graphsurgeon` module installed by pip is version >= 0.3.9.
+
+Finally, you may want to clone the EfficientDet code from the [AutoML Repository](https://github.com/google/automl) to use some helper utilities from it. This exporter has been tested with commit [0b0ba5e](https://github.com/google/automl/tree/0b0ba5ebd0860edd939465fc4152da4ff9f79b44/efficientdet) from December 2021, so it may be a good idea to checkout the repository at that specific commit to avoid possible future incompatibilities:
+
+```bash
+git clone https://github.com/google/automl
+cd automl
+git checkout 0b0ba5e
+```
+
+## Model Conversion
+
+The workflow to convert an EfficientDet model is basically TensorFlow â†’ ONNX â†’ TensorRT, and so parts of this process require TensorFlow to be installed. If you are performing this conversion to run inference on the edge, such as for NVIDIA Jetson devices, it might be easier to do the ONNX conversion on a PC first.
+
+### TensorFlow Saved Model
+
+The starting point of conversion is a TensorFlow saved model. This can be exported from your own trained models, or you can download a pre-trained model. This conversion script is compatible with three types of models:
+
+1. EfficientDet models trained with the [AutoML](https://github.com/google/automl/tree/master/efficientdet) framework. Compatible with all "d0-7", "lite0-4" and "AdvProp" variations.
+2. EfficientDet models trained with the [TensorFlow Object Detection](https://github.com/tensorflow/models/tree/master/research/object_detection) API (TFOD).
+3. EfficientDet models pre-trained on COCO and downloaded from [TFHub](https://tfhub.dev/s?network-architecture=efficientdet).
+
+#### 1. AutoML Models
+
+If you are training your own model, you will need the training checkpoint. You can also download a pre-trained checkpoint from the "ckpt" links on the [AutoML Repository](https://github.com/google/automl/tree/master/efficientdet) README file, such as [this](https://storage.googleapis.com/cloud-tpu-checkpoints/efficientdet/coco2/efficientdet-d0.tar.gz).
+
+This converter is compatible with all *efficientdet-d0* through *efficientdet-d7x*, and *efficientdet-lite0* through *efficientdet-lite4* model variations. This converter also works with the [AdvProp](https://github.com/google/automl/blob/master/efficientdet/Det-AdvProp.md) models. However, AdvProp models are trained with the `scale_range` hparam, which changes the expected input image value range, so you will need to adjust the preprocessor argument when creating the ONNX graph. More details on the corresponding section below.
+
+The checkpoint directory should have a file structure such as this:
+
+```
+efficientdet-d0
+â”œâ”€â”€ model.data-00000-of-00001
+â”œâ”€â”€ model.index
+â””â”€â”€ model.meta
+```
+
+To export a saved model from here, clone and install the [AutoML](https://github.com/google/automl) repository, and run:
+
+```bash
+cd /path/to/automl/efficientdet
+python3 model_inspect.py \
+    --runmode saved_model \
+    --model_name efficientdet-d0 \
+    --ckpt_path /path/to/efficientdet-d0 \
+    --saved_model_dir /path/to/saved_model
+```
+
+Where the `--model_name` argument is the network name corresponding to this checkpoint, usually between `efficientdet-d0` and `efficientdet-d7x`. The `--ckpt_path` points to the directory holding the checkpoint as described above. The TF saved model will be exported to the path given by `--saved_model_dir`.
+
+> **Custom Image Size:** If your application requires inference at a different image resolution than the training input size, you can re-export the model for the exact size you require. To do so, export a saved model from checkpoint as shown above, but add an extra argument as: `--hparams 'image_size=1920x1280'`
+
+#### 2. TFOD Models
+
+You can download one of the pre-trained TFOD models from the [TF2 Detection Model Zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md), such as:
+
+```bash
+wget http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d0_coco17_tpu-32.tar.gz
+```
+
+When extracted, this package holds a directory named `saved_model` which holds the saved model ready for conversion.
+
+However, if you are working with your own trained EfficientDet model from the TensorFlow Object Detection API, or if you need to re-export the saved model, you can do so from the training checkpoint. The downloaded package above also contains a pre-trained checkpoint. The structure is similar to this:
+
+```
+efficientdet_d0_coco17_tpu-32
+â”œâ”€â”€ checkpoint
+â”‚   â”œâ”€â”€ ckpt-0.data-00000-of-00001
+â”‚   â””â”€â”€ ckpt-0.index
+â”œâ”€â”€ pipeline.config
+â””â”€â”€ saved_model
+    â””â”€â”€ saved_model.pb
+```
+
+To (re-)export a saved model from here, clone the TFOD API repository from [TF Models Repository](https://github.com/tensorflow/models) repository, and install it following the [instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2.md#installation). Then run:
+
+```bash
+cd /path/to/models/research/object_detection
+python3 exporter_main_v2.py \
+    --input_type image_tensor \
+    --trained_checkpoint_dir /path/to/efficientdet_d0_coco17_tpu-32/checkpoint \
+    --pipeline_config_path /path/to/efficientdet_d0_coco17_tpu-32/pipeline.config \
+    --output_directory /path/to/export
+```
+
+Where `--trained_checkpoint_dir` and `--pipeline_config_path` point to the corresponding paths in the training checkpoint. On the path pointed by `--output_directory` you will then find the newly created saved model in a directory aptly named `saved_model`.
+
+**NOTE:** TFOD EfficientDet models will have a slightly reduced throughput than their AutoML model counterparts. This is due to differences in the graph construction that TFOD makes use of.
+
+#### 3. TFHub Models
+
+You can download one of the pre-trained AutoML saved models from the [EfficientDet TFHub](https://tfhub.dev/s?network-architecture=efficientdet). Currently, only the efficientdet/d0 - d7 models are compatible with this converter. If you need to work with a pre-trained lite model, please follow the AutoML checkpoint route above.
+
+Download a model from TFHub, such as:
+
+```bash
+wget https://storage.googleapis.com/tfhub-modules/tensorflow/efficientdet/d0/1.tar.gz
+```
+
+The contents of this package, when extracted, will hold a saved model ready for conversion.
+
+### Create ONNX Graph
+
+To generate an ONNX model file, first find the input size that corresponds to the model you're converting:
+
+| **Model**           | **Input Size** |
+| --------------------|----------------|
+| efficientdet-d0     | 512,512        |
+| efficientdet-d1     | 640,640        |
+| efficientdet-d2     | 768,768        |
+| efficientdet-d3     | 896,896        |
+| efficientdet-d4     | 1024,1024      |
+| efficientdet-d5     | 1280,1280      |
+| efficientdet-d6     | 1280,1280      |
+| efficientdet-d7     | 1536,1536      |
+| efficientdet-d7x    | 1536,1536      |
+| efficientdet-lite0  | 320,320        |
+| efficientdet-lite1  | 384,384        |
+| efficientdet-lite2  | 448,448        |
+| efficientdet-lite3  | 512,512        |
+| efficientdet-lite3x | 640,640        |
+| efficientdet-lite4  | 640,640        |
+
+If you've re-exported the model with a custom image size, then of course use that. With the correct input size and the TF saved model ready to be converted, run:
+
+```bash
+python3 create_onnx.py \
+    --input_size 512,512 \
+    --saved_model /path/to/saved_model \
+    --onnx /path/to/model.onnx
+```
+
+This will create the file `model.onnx` which is ready to convert to TensorRT.
+
+The script has a few optional arguments, including:
+
+* `--input_format [NHWC,NCHW]` allows switching between NHWC (default) and NCHW data format modes. If your data source is in NCHW format, you may want to select this mode to avoid extra transposes.
+* `--nms_threshold [...]` allows overriding the default NMS score threshold parameter, as the runtime latency of the NMS plugin is sensitive to this value. It's a good practice to set this value as high as possible, while still fulfilling your application requirements, to reduce inference latency.
+* `--preprocessor [imagenet,scale_range]` allows switching between two possible image preprocessing methods. Most EfficientDet models use the `imagenet` method, which this argument defaults to, and corresponds to standard ImageNet mean subtraction and standard deviation normalization. The `scale_range` method instead normalizes the image to a range of [-1,+1]. Please use this method only when converting the **AdvProp** pre-trained checkpoints, as they were created with this preprocessor operation.
+
+Optionally, you may wish to visualize the resulting ONNX graph with a tool such as [Netron](https://netron.app/).
+
+![netron](https://drive.google.com/uc?export=view&id=1m9zRbvNtlbftN7P46dtOLPbcwEbz4XwS)
+
+The input to the graph is a `float32` tensor with the selected input shape, containing RGB pixel data in the range of 0 to 255. Normalization, mean subtraction and scaling will be performed inside the EfficientDet graph, so it is not required to further pre-process the input data.
+
+The outputs of the graph are the same as the outputs of the [EfficientNMS](https://github.com/NVIDIA/TensorRT/tree/main/plugin/efficientNMSPlugin) plugin.
+
+### Build TensorRT Engine
+
+It is possible to build the TensorRT engine directly with `trtexec` using the ONNX graph generated in the previous step. You can do so by running:
+
+```bash
+trtexec \
+    --onnx=/path/to/model.onnx \
+    --saveEngine=/path/to/engine.trt \
+    --optShapes=input:$INPUT_SHAPE \
+    --memPoolSize=workspace:1024
+```
+
+Where `$INPUT_SHAPE` defines the input spec to build the engine with, e.g. `--optShapes=input:8x512x512x3`. Other common `trtexec` functionality for lower precision modes or other options will also work as expected.
+
+However, the script `build_engine.py` is also provided in this repository for convenience, as it has been tailored to EfficientDet engine building and INT8 calibration. Run `python3 build_engine.py --help` for details on available settings.
+
+#### FP16 Precision
+
+To build the TensorRT engine file with FP16 precision, run:
+
+```bash
+python3 build_engine.py \
+    --onnx /path/to/model.onnx \
+    --engine /path/to/engine.trt \
+    --precision fp16
+```
+
+The file `engine.trt` will be created, which can now be used to infer with TensorRT.
+
+For best results, make sure no other processes are using the GPU during engine build, as it may affect the optimal tactic selection process.
+
+#### INT8 Precision
+
+To build and calibrate an engine for INT8 precision, run:
+
+```bash
+python3 build_engine.py \
+    --onnx /path/to/model.onnx \
+    --engine /path/to/engine.trt \
+    --precision int8 \
+    --calib_input /path/to/calibration/images \
+    --calib_cache /path/to/calibration.cache
+```
+
+Where `--calib_input` points to a directory with several thousands of images. For example, this could be a subset of the training or validation datasets that were used for the model. It's important that this data represents the runtime data distribution relatively well, therefore, the more images that are used for calibration, the better accuracy that will be achieved in INT8 precision. For models trained for the [COCO dataset](https://cocodataset.org/#home), we have found that 5,000 images gives a good result.
+
+The `--calib_cache` is optional, and it controls where the calibration cache file will be written to. This is useful to keep a cached copy of the calibration results. Next time you need to build an int8 engine for the same network, if this file exists, the builder will skip the calibration step and use the cached values instead.
+
+#### Mixed Precision (Experimental)
+
+Mixed precision is a custom mode that pins some key layers to FP16, while the rest of the network is converted at INT8 precision. The purpose of this mode is to balance accuracy and throughput. It's experimental and is given here to show one possible way of balancing achieved accuracy according to an application's latency budget. This mode has been tuned for COCO pre-trained models. For other datasets, you may need to adjust the layers to pin.
+
+Some sample results of using this mode:
+
+| **Model / Precision**   | **Latency** | **COCO mAP** |
+| ------------------------|-------------|--------------|
+| efficientdet-d0 / fp32  | 3.25 ms     | 0.341        |
+| efficientdet-d0 / fp16  | 2.27 ms     | 0.341        |
+| efficientdet-d0 / mixed | **1.75 ms** | **0.320**    |
+| efficientdet-d0 / int8  | 1.63 ms     | 0.299        |
+
+To use mixed precision mode, follow the same instructions as for building and calibrating an INT8 engine as given above, but using the argument `--precision mixed` instead.
+
+#### Static and Dynamic Batch Size
+
+By default, `build_engine.py` creates a static batch size 1 engine. To build with a different static batch size, set the `--batch_size` argument accordingly:
+
+```bash
+python3 build_engine.py \
+    --onnx /path/to/model.onnx \
+    --engine /path/to/engine.trt \
+    --batch_size 8
+```
+
+You can also build an engine with a dynamic batch size. To do so, select a minimum and maximum batch size, as well as an optimal batch size for which TensorRT will fine tune the engine performance best. These batch sizes should be given via the argument `--dynamic_batch_size MIN,OPT,MAX`, such as:
+
+```bash
+python3 build_engine.py \
+    --onnx /path/to/model.onnx \
+    --engine /path/to/engine.trt \
+    --dynamic_batch_size 1,16,32
+```
+
+#### Benchmark Engine
+
+Optionally, you can obtain execution timing information for the built engine by using the `trtexec` utility, as:
+
+```bash
+trtexec \
+    --loadEngine=/path/to/engine.trt \
+    --useCudaGraph --noDataTransfers \
+    --iterations=100 --avgRuns=100
+```
+
+If it's not already in your `$PATH`, the `trtexec` binary is usually found in `/usr/src/tensorrt/bin/trtexec`, depending on your TensorRT installation method.
+
+An inference benchmark will run, with GPU Compute latency times printed out to the console. Depending on your environment, you should see something similar to:
+
+```
+GPU Compute Time: min = 1.55835 ms, max = 1.91591 ms, mean = 1.58719 ms, median = 1.578 ms, percentile(99%) = 1.90668 ms
+```
+
+## Inference
+
+For optimal performance, inference should be done in a C++ application that takes advantage of CUDA Graphs to launch the inference request. Alternatively, the TensorRT engine built with this process can also be executed through either [Triton Inference Server](https://developer.nvidia.com/nvidia-triton-inference-server) or [DeepStream SDK](https://developer.nvidia.com/deepstream-sdk).
+
+However, for convenience, a python inference script is provided here for quick testing of the built TensorRT engine.
+
+### Inference in Python
+
+To perform object detection on a set of images with TensorRT, run:
+
+```bash
+python3 infer.py \
+    --engine /path/to/engine.trt \
+    --input /path/to/images \
+    --output /path/to/output
+```
+
+Where the input path can be either a single image file, or a directory of jpg/png/bmp images.
+
+The detection results will be written out to the specified output directory, consisting of a visualization image, and a tab-separated results file for each input image processed.
+
+![infer](https://drive.google.com/uc?export=view&id=1ZzTHizLx65t_cJcIIflnzXA5yxCYsQz6)
+
+> *This example is generated with a TensorRT engine for the pre-trained AutoML EfficientDet-D0 model re-exported with a custom image size of 1920x1080 as described above. The engine uses an NMS score threshold of 0.4. This is the same [sample image](https://user-images.githubusercontent.com/11736571/77320690-099af300-6d37-11ea-9d86-24f14dc2d540.png) and model parameters as used in the AutoML [inference tutorial](https://github.com/google/automl/blob/master/efficientdet/tutorial.ipynb) to produce this [sample TensorFlow inference image](https://github.com/google/automl/blob/master/efficientdet/g3doc/street.jpg).*
+
+### Evaluate mAP Metric
+
+Given a validation dataset (such as [COCO val2017 data](http://images.cocodataset.org/zips/val2017.zip)) and ground truth annotations (such as [COCO instances_val2017.json](http://images.cocodataset.org/annotations/annotations_trainval2017.zip)), you can get the mAP metrics for the built TensorRT engine. This will use the mAP metrics calculation script from the [AutoML](https://github.com/google/automl) repository.
+
+```bash
+python3 eval_coco.py \
+    --engine /path/to/engine.trt \
+    --input /path/to/coco/val2017 \
+    --annotations /path/to/coco/annotations/instances_val2017.json \
+    --automl_path /path/to/automl
+```
+
+Where the `--automl_path` argument points to the root of the AutoML repository.
+
+The mAP metric is sensitive to the NMS score threshold used, as using a high threshold will reduce the model recall, resulting in a lower mAP value. Ideally, mAP should be measured with a threshold of 0, but such a low value will impact the runtime latency of the EfficientNMS plugin. It may be a good idea to build separate TensorRT engines for different purposes. That is, one engine with a low threshold (like 0) dedicated for mAP validation, and another engine with your application specific threshold (like 0.4) for deployment. This is why we keep the NMS threshold as a configurable parameter in the `create_onnx.py` script.
+
+### TF vs TRT Comparison
+
+To compare how the TensorRT detections match the original TensorFlow model results, you can run:
+
+```bash
+python3 compare_tf.py \
+    --engine /path/to/engine.trt \
+    --saved_model /path/to/saved_model \
+    --input /path/to/images \
+    --nms_threshold 0.4 \
+    --output /path/to/output
+```
+
+This script will process the images found in the given input path through both TensorFlow and TensorRT using the corresponding saved model and engine. It will then write to the output path a set of visualization images showing the inference results of both frameworks for visual qualitative comparison.
+
+`--nms_threshold` overrides the score threshold for the NMS operation if it is higher than the threshold in the model/engine. For better visualization, `--nms_threshold 0.4` is used here for filtering out the noisy detections.
+
+If you run this on COCO val2017 images, you may also add the parameter `--annotations /path/to/coco/annotations/instances_val2017.json` to further compare against COCO ground truth annotations.
+
+![compare_tf](https://drive.google.com/uc?export=view&id=1zgh_RbYX6RWzu7nKLCcSzy60VPiQROZJ)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/build_engine.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..58dd6d5cc2242cc16a0022c79bdd22efa5949da4
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/build_engine.py
@@ -0,0 +1,407 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import logging
+import argparse
+
+import numpy as np
+import tensorrt as trt
+from cuda import cudart
+
+sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+import common
+
+from image_batcher import ImageBatcher
+
+logging.basicConfig(level=logging.INFO)
+logging.getLogger("EngineBuilder").setLevel(logging.INFO)
+log = logging.getLogger("EngineBuilder")
+
+
+class EngineCalibrator(trt.IInt8EntropyCalibrator2):
+    """
+    Implements the INT8 Entropy Calibrator 2.
+    """
+
+    def __init__(self, cache_file):
+        """
+        :param cache_file: The location of the cache file.
+        """
+        super().__init__()
+        self.cache_file = cache_file
+        self.image_batcher = None
+        self.batch_allocation = None
+        self.batch_generator = None
+
+    def set_image_batcher(self, image_batcher: ImageBatcher):
+        """
+        Define the image batcher to use, if any. If using only the cache file, an image batcher doesn't need
+        to be defined.
+        :param image_batcher: The ImageBatcher object
+        """
+        self.image_batcher = image_batcher
+        size = int(
+            np.dtype(self.image_batcher.dtype).itemsize
+            * np.prod(self.image_batcher.shape)
+        )
+        self.batch_allocation = common.cuda_call(cudart.cudaMalloc(size))
+        self.batch_generator = self.image_batcher.get_batch()
+
+    def get_batch_size(self):
+        """
+        Overrides from trt.IInt8EntropyCalibrator2.
+        Get the batch size to use for calibration.
+        :return: Batch size.
+        """
+        if self.image_batcher:
+            return self.image_batcher.batch_size
+        return 1
+
+    def get_batch(self, names):
+        """
+        Overrides from trt.IInt8EntropyCalibrator2.
+        Get the next batch to use for calibration, as a list of device memory pointers.
+        :param names: The names of the inputs, if useful to define the order of inputs.
+        :return: A list of int-casted memory pointers.
+        """
+        if not self.image_batcher:
+            return None
+        try:
+            batch, _, _ = next(self.batch_generator)
+            log.info(
+                "Calibrating image {} / {}".format(
+                    self.image_batcher.image_index, self.image_batcher.num_images
+                )
+            )
+            common.memcpy_host_to_device(
+                self.batch_allocation, np.ascontiguousarray(batch)
+            )
+            return [int(self.batch_allocation)]
+        except StopIteration:
+            log.info("Finished calibration batches")
+            return None
+
+    def read_calibration_cache(self):
+        """
+        Overrides from trt.IInt8EntropyCalibrator2.
+        Read the calibration cache file stored on disk, if it exists.
+        :return: The contents of the cache file, if any.
+        """
+        if self.cache_file is not None and os.path.exists(self.cache_file):
+            with open(self.cache_file, "rb") as f:
+                log.info("Using calibration cache file: {}".format(self.cache_file))
+                return f.read()
+
+    def write_calibration_cache(self, cache):
+        """
+        Overrides from trt.IInt8EntropyCalibrator2.
+        Store the calibration cache to a file on disk.
+        :param cache: The contents of the calibration cache to store.
+        """
+        if self.cache_file is None:
+            return
+        with open(self.cache_file, "wb") as f:
+            log.info("Writing calibration cache data to: {}".format(self.cache_file))
+            f.write(cache)
+
+
+class EngineBuilder:
+    """
+    Parses an ONNX graph and builds a TensorRT engine from it.
+    """
+
+    def __init__(self, verbose=False, workspace=8):
+        """
+        :param verbose: If enabled, a higher verbosity level will be set on the TensorRT logger.
+        :param workspace: Max memory workspace to allow, in Gb.
+        """
+        self.trt_logger = trt.Logger(trt.Logger.INFO)
+        if verbose:
+            self.trt_logger.min_severity = trt.Logger.Severity.VERBOSE
+
+        trt.init_libnvinfer_plugins(self.trt_logger, namespace="")
+
+        self.builder = trt.Builder(self.trt_logger)
+        self.config = self.builder.create_builder_config()
+        self.config.set_memory_pool_limit(
+            trt.MemoryPoolType.WORKSPACE, workspace * (2**30)
+        )
+
+        self.network = None
+        self.parser = None
+
+    def create_network(self, onnx_path, batch_size, dynamic_batch_size=None):
+        """
+        Parse the ONNX graph and create the corresponding TensorRT network definition.
+        :param onnx_path: The path to the ONNX graph to load.
+        :param batch_size: Static batch size to build the engine with.
+        :param dynamic_batch_size: Dynamic batch size to build the engine with, if given,
+        batch_size is ignored, pass as a comma-separated string or int list as MIN,OPT,MAX
+        """
+
+        self.network = self.builder.create_network(0)
+        self.parser = trt.OnnxParser(self.network, self.trt_logger)
+
+        onnx_path = os.path.realpath(onnx_path)
+        with open(onnx_path, "rb") as f:
+            if not self.parser.parse(f.read()):
+                log.error("Failed to load ONNX file: {}".format(onnx_path))
+                for error in range(self.parser.num_errors):
+                    log.error(self.parser.get_error(error))
+                sys.exit(1)
+
+        log.info("Network Description")
+
+        inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
+        profile = self.builder.create_optimization_profile()
+        dynamic_inputs = False
+        for input in inputs:
+            log.info(
+                "Input '{}' with shape {} and dtype {}".format(
+                    input.name, input.shape, input.dtype
+                )
+            )
+            if input.shape[0] == -1:
+                dynamic_inputs = True
+                if dynamic_batch_size:
+                    if type(dynamic_batch_size) is str:
+                        dynamic_batch_size = [
+                            int(v) for v in dynamic_batch_size.split(",")
+                        ]
+                    assert len(dynamic_batch_size) == 3
+                    min_shape = [dynamic_batch_size[0]] + list(input.shape[1:])
+                    opt_shape = [dynamic_batch_size[1]] + list(input.shape[1:])
+                    max_shape = [dynamic_batch_size[2]] + list(input.shape[1:])
+                    profile.set_shape(input.name, min_shape, opt_shape, max_shape)
+                    log.info(
+                        "Input '{}' Optimization Profile with shape MIN {} / OPT {} / MAX {}".format(
+                            input.name, min_shape, opt_shape, max_shape
+                        )
+                    )
+                else:
+                    shape = [batch_size] + list(input.shape[1:])
+                    profile.set_shape(input.name, shape, shape, shape)
+                    log.info(
+                        "Input '{}' Optimization Profile with shape {}".format(
+                            input.name, shape
+                        )
+                    )
+        if dynamic_inputs:
+            self.config.add_optimization_profile(profile)
+
+        outputs = [self.network.get_output(i) for i in range(self.network.num_outputs)]
+        for output in outputs:
+            log.info(
+                "Output '{}' with shape {} and dtype {}".format(
+                    output.name, output.shape, output.dtype
+                )
+            )
+
+    def set_mixed_precision(self):
+        """
+        Experimental precision mode.
+        Enable mixed-precision mode. When set, the layers defined here will be forced to FP16 to maximize
+        INT8 inference accuracy, while having minimal impact on latency.
+        """
+        self.config.set_flag(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
+        self.config.set_flag(trt.BuilderFlag.DIRECT_IO)
+        self.config.set_flag(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+
+        # All convolution operations in the first four blocks of the graph are pinned to FP16.
+        # These layers have been manually chosen as they give a good middle-point between int8 and fp16
+        # accuracy in COCO, while maintining almost the same latency as a normal int8 engine.
+        # To experiment with other datasets, or a different balance between accuracy/latency, you may
+        # add or remove blocks.
+        for i in range(self.network.num_layers):
+            layer = self.network.get_layer(i)
+            if layer.type == trt.LayerType.CONVOLUTION and any(
+                [
+                    # AutoML Layer Names:
+                    "/stem/" in layer.name,
+                    "/blocks_0/" in layer.name,
+                    "/blocks_1/" in layer.name,
+                    "/blocks_2/" in layer.name,
+                    # TFOD Layer Names:
+                    "/stem_conv2d/" in layer.name,
+                    "/stack_0/block_0/" in layer.name,
+                    "/stack_1/block_0/" in layer.name,
+                    "/stack_1/block_1/" in layer.name,
+                ]
+            ):
+                self.network.get_layer(i).precision = trt.DataType.HALF
+                log.info(
+                    "Mixed-Precision Layer {} set to HALF STRICT data type".format(
+                        layer.name
+                    )
+                )
+
+    def create_engine(
+        self,
+        engine_path,
+        precision,
+        calib_input=None,
+        calib_cache=None,
+        calib_num_images=5000,
+        calib_batch_size=8,
+    ):
+        """
+        Build the TensorRT engine and serialize it to disk.
+        :param engine_path: The path where to serialize the engine to.
+        :param precision: The datatype to use for the engine, either 'fp32', 'fp16', 'int8', or 'mixed'.
+        :param calib_input: The path to a directory holding the calibration images.
+        :param calib_cache: The path where to write the calibration cache to, or if it already exists, load it from.
+        :param calib_num_images: The maximum number of images to use for calibration.
+        :param calib_batch_size: The batch size to use for the calibration process.
+        """
+        engine_path = os.path.realpath(engine_path)
+        engine_dir = os.path.dirname(engine_path)
+        os.makedirs(engine_dir, exist_ok=True)
+        log.info("Building {} Engine in {}".format(precision, engine_path))
+
+        inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
+
+        log.info("Reading timing cache from file: {:}".format(args.timing_cache))
+        common.setup_timing_cache(self.config, args.timing_cache)
+
+        if precision in ["fp16", "int8", "mixed"]:
+            if not self.builder.platform_has_fast_fp16:
+                log.warning("FP16 is not supported natively on this platform/device")
+            self.config.set_flag(trt.BuilderFlag.FP16)
+        if precision in ["int8", "mixed"]:
+            if not self.builder.platform_has_fast_int8:
+                log.warning("INT8 is not supported natively on this platform/device")
+            self.config.set_flag(trt.BuilderFlag.INT8)
+            self.config.int8_calibrator = EngineCalibrator(calib_cache)
+            if calib_cache is None or not os.path.exists(calib_cache):
+                calib_shape = [calib_batch_size] + list(inputs[0].shape[1:])
+                calib_dtype = trt.nptype(inputs[0].dtype)
+                self.config.int8_calibrator.set_image_batcher(
+                    ImageBatcher(
+                        calib_input,
+                        calib_shape,
+                        calib_dtype,
+                        max_num_images=calib_num_images,
+                        exact_batches=True,
+                        shuffle_files=True,
+                    )
+                )
+
+        engine_bytes = self.builder.build_serialized_network(self.network, self.config)
+        if engine_bytes is None:
+            log.error("Failed to create engine")
+            sys.exit(1)
+
+        log.info("Serializing timing cache to file: {:}".format(args.timing_cache))
+        common.save_timing_cache(self.config, args.timing_cache)
+
+        with open(engine_path, "wb") as f:
+            log.info("Serializing engine to file: {:}".format(engine_path))
+            f.write(engine_bytes)
+
+
+def main(args):
+    builder = EngineBuilder(args.verbose, args.workspace)
+    builder.create_network(args.onnx, args.batch_size, args.dynamic_batch_size)
+    if args.precision == "mixed":
+        builder.set_mixed_precision()
+    builder.create_engine(
+        args.engine,
+        args.precision,
+        args.calib_input,
+        args.calib_cache,
+        args.calib_num_images,
+        args.calib_batch_size,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-o", "--onnx", required=True, help="The input ONNX model file to load"
+    )
+    parser.add_argument(
+        "-e", "--engine", required=True, help="The output path for the TRT engine"
+    )
+    parser.add_argument(
+        "-b",
+        "--batch_size",
+        default=1,
+        type=int,
+        help="The static batch size to build the engine with, default: 1",
+    )
+    parser.add_argument(
+        "-d",
+        "--dynamic_batch_size",
+        default=None,
+        help="Enable dynamic batch size by providing a comma-separated MIN,OPT,MAX batch size, "
+        "if this option is set, --batch_size is ignored, example: -d 1,16,32, "
+        "default: None, build static engine",
+    )
+    parser.add_argument(
+        "-p",
+        "--precision",
+        default="fp16",
+        choices=["fp32", "fp16", "int8", "mixed"],
+        help="The precision mode to build in, either fp32/fp16/int8/mixed, default: fp16",
+    )
+    parser.add_argument(
+        "-v", "--verbose", action="store_true", help="Enable more verbose log output"
+    )
+    parser.add_argument(
+        "-w",
+        "--workspace",
+        default=8,
+        type=int,
+        help="The max memory workspace size to allow in Gb, default: 8",
+    )
+    parser.add_argument(
+        "--calib_input", help="The directory holding images to use for calibration"
+    )
+    parser.add_argument(
+        "--calib_cache",
+        default=None,
+        help="The file path for INT8 calibration cache to use, default: ./calibration.cache",
+    )
+    parser.add_argument(
+        "--calib_num_images",
+        default=5000,
+        type=int,
+        help="The maximum number of images to use for calibration, default: 5000",
+    )
+    parser.add_argument(
+        "--calib_batch_size",
+        default=8,
+        type=int,
+        help="The batch size for the calibration process, default: 8",
+    )
+    parser.add_argument(
+        "--timing_cache",
+        default="./timing.cache",
+        help="The file path for timing cache, default: ./timing.cache",
+    )
+    args = parser.parse_args()
+    if args.precision in ["int8", "mixed"] and not (
+        args.calib_input or os.path.exists(args.calib_cache)
+    ):
+        parser.print_help()
+        log.error(
+            "When building in int8 or mixed precision, --calib_input or an existing --calib_cache file is required"
+        )
+        sys.exit(1)
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/compare_tf.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/compare_tf.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e4b91fc350bc8d7ed3a0a2621146e2e44f254af
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/compare_tf.py
@@ -0,0 +1,215 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import json
+import argparse
+
+import numpy as np
+import tensorflow as tf
+
+from infer import TensorRTInfer
+from infer_tf import TensorFlowInfer
+from image_batcher import ImageBatcher
+from visualize import visualize_detections, concat_visualizations
+
+
+def run(batcher, inferer, framework, nms_threshold=None):
+    res_images = []
+    res_detections = []
+    for batch, images, scales in batcher.get_batch():
+        res_detections += inferer.process(batch, scales, nms_threshold)
+        res_images += images
+        print(
+            "Processing {} / {} images ({})".format(
+                batcher.image_index, batcher.num_images, framework
+            ),
+            end="\r",
+        )
+    print()
+    return res_images, res_detections
+
+
+def parse_annotations(annotations_path):
+    annotations = {}
+    if annotations_path and os.path.exists(annotations_path):
+        with open(annotations_path) as f:
+            ann_json = json.load(f)
+            for ann in ann_json["annotations"]:
+                img_id = ann["image_id"]
+                if img_id not in annotations.keys():
+                    annotations[img_id] = []
+                annotations[img_id].append(
+                    {
+                        "ymin": ann["bbox"][1],
+                        "xmin": ann["bbox"][0],
+                        "ymax": ann["bbox"][1] + ann["bbox"][3],
+                        "xmax": ann["bbox"][0] + ann["bbox"][2],
+                        "score": -1,
+                        "class": ann["category_id"] - 1,
+                    }
+                )
+    return annotations
+
+
+def compare_images(
+    tf_images,
+    tf_detections,
+    trt_images,
+    trt_detections,
+    output_dir,
+    annotations_path,
+    labels_path,
+):
+    labels = []
+    if labels_path and os.path.exists(labels_path):
+        with open(labels_path) as f:
+            for i, label in enumerate(f):
+                labels.append(label.strip())
+
+    annotations = parse_annotations(annotations_path)
+
+    count = 1
+    for tf_img, tf_det, trt_img, trt_det in zip(
+        tf_images, tf_detections, trt_images, trt_detections
+    ):
+        vis = []
+        names = []
+        colors = []
+
+        vis.append(visualize_detections(tf_img, None, tf_det, labels))
+        names.append("TensorFlow")
+        colors.append("DarkOrange")
+
+        vis.append(visualize_detections(trt_img, None, trt_det, labels))
+        names.append("TensorRT")
+        colors.append("YellowGreen")
+
+        if annotations:
+            img_id = os.path.splitext(os.path.basename(trt_img))[0]
+            if img_id.isnumeric():
+                img_id = int(img_id)
+            if img_id in annotations.keys():
+                vis.append(
+                    visualize_detections(trt_img, None, annotations[img_id], labels)
+                )
+                names.append("Ground Truth")
+                colors.append("RoyalBlue")
+            else:
+                print(
+                    "Image {} does not have a COCO annotation, skipping ground truth visualization".format(
+                        trt_img
+                    )
+                )
+
+        basename = os.path.splitext(os.path.basename(tf_img))[0]
+        output_path = os.path.join(output_dir, "{}.compare.png".format(basename))
+        os.makedirs(output_dir, exist_ok=True)
+        concat_visualizations(vis, names, colors, output_path)
+
+        print(
+            "Processing {} / {} images (Visualization)".format(count, len(tf_images)),
+            end="\r",
+        )
+        count += 1
+    print()
+
+
+def main(args):
+    tf_infer = TensorFlowInfer(args.saved_model)
+    trt_infer = TensorRTInfer(args.engine)
+
+    trt_batcher = ImageBatcher(
+        args.input, *trt_infer.input_spec(), max_num_images=args.num_images
+    )
+    tf_infer.override_input_shape(
+        0, [1, trt_batcher.height, trt_batcher.width, 3]
+    )  # Same size input in TF as TRT
+    tf_batcher = ImageBatcher(
+        args.input, *tf_infer.input_spec(), max_num_images=args.num_images
+    )
+
+    tf_images, tf_detections = run(
+        tf_batcher, tf_infer, "TensorFlow", args.nms_threshold
+    )
+    trt_images, trt_detections = run(
+        trt_batcher, trt_infer, "TensorRT", args.nms_threshold
+    )
+
+    compare_images(
+        tf_images,
+        tf_detections,
+        trt_images,
+        trt_detections,
+        args.output,
+        args.annotations,
+        args.labels,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-e", "--engine", help="The TensorRT engine to infer with")
+    parser.add_argument(
+        "-m",
+        "--saved_model",
+        help="The TensorFlow saved model path to validate against",
+    )
+    parser.add_argument(
+        "-i",
+        "--input",
+        help="The input to infer, either a single image path, or a directory of images",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        default=None,
+        help="Directory where to save the visualization results",
+    )
+    parser.add_argument(
+        "-l",
+        "--labels",
+        default="./labels_coco.txt",
+        help="File to use for reading the class labels from, default: ./labels_coco.txt",
+    )
+    parser.add_argument(
+        "-a",
+        "--annotations",
+        default=None,
+        help="Set the path to the 'instances_val2017.json' file to use for COCO annotations, in which "
+        "case --input should point to the COCO val2017 dataset, default: not used",
+    )
+    parser.add_argument(
+        "-n",
+        "--num_images",
+        default=100,
+        type=int,
+        help="The maximum number of images to visualize, default: 100",
+    )
+    parser.add_argument(
+        "-t",
+        "--nms_threshold",
+        type=float,
+        help="Override the score threshold for the NMS operation, "
+        "if higher than the threshold in the model/engine.",
+    )
+    args = parser.parse_args()
+    if not all([args.engine, args.saved_model, args.input, args.output]):
+        parser.print_help()
+        sys.exit(1)
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/create_onnx.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/create_onnx.py
new file mode 100644
index 0000000000000000000000000000000000000000..ffe83094fd640a3dbaf42fac1a71479e5021e91d
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/create_onnx.py
@@ -0,0 +1,602 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import argparse
+import logging
+
+import tensorflow as tf
+import onnx_graphsurgeon as gs
+import numpy as np
+import onnx
+from onnx import shape_inference
+from tf2onnx import tfonnx, optimizer, tf_loader
+
+import onnx_utils
+
+logging.basicConfig(level=logging.INFO)
+logging.getLogger("EfficientDetGraphSurgeon").setLevel(logging.INFO)
+log = logging.getLogger("EfficientDetGraphSurgeon")
+
+
+class EfficientDetGraphSurgeon:
+    def __init__(self, saved_model_path):
+        """
+        Constructor of the EfficientDet Graph Surgeon object, to do the conversion of an EfficientDet TF saved model
+        to an ONNX-TensorRT parsable model.
+        :param saved_model_path: The path pointing to the TensorFlow saved model to load.
+        """
+        saved_model_path = os.path.realpath(saved_model_path)
+        assert os.path.exists(saved_model_path)
+
+        # Use tf2onnx to convert saved model to an initial ONNX graph.
+        graph_def, inputs, outputs = tf_loader.from_saved_model(
+            saved_model_path, None, None, "serve", ["serving_default"]
+        )
+        log.info("Loaded saved model from {}".format(saved_model_path))
+        with tf.Graph().as_default() as tf_graph:
+            tf.import_graph_def(graph_def, name="")
+        with tf_loader.tf_session(graph=tf_graph):
+            onnx_graph = tfonnx.process_tf_graph(
+                tf_graph, input_names=inputs, output_names=outputs, opset=11
+            )
+        onnx_model = optimizer.optimize_graph(onnx_graph).make_model(
+            "Converted from {}".format(saved_model_path)
+        )
+        self.graph = gs.import_onnx(onnx_model)
+        assert self.graph
+        log.info("TF2ONNX graph created successfully")
+
+        # Fold constants via ONNX-GS that TF2ONNX may have missed
+        self.graph.fold_constants()
+
+        # Try to auto-detect by finding if nodes match a specific name pattern expected for either of the APIs.
+        self.api = None
+        if len([node for node in self.graph.nodes if "class_net/" in node.name]) > 0:
+            self.api = "AutoML"
+        elif (
+            len(
+                [
+                    node
+                    for node in self.graph.nodes
+                    if "/WeightSharedConvolutionalClassHead/" in node.name
+                ]
+            )
+            > 0
+        ):
+            self.api = "TFOD"
+        assert self.api
+        log.info("Graph was detected as {}".format(self.api))
+
+    def sanitize(self):
+        """
+        Sanitize the graph by cleaning any unconnected nodes, do a topological resort, and fold constant inputs values.
+        When possible, run shape inference on the ONNX graph to determine tensor shapes.
+        """
+        for i in range(3):
+            count_before = len(self.graph.nodes)
+
+            self.graph.cleanup().toposort()
+            try:
+                for node in self.graph.nodes:
+                    for o in node.outputs:
+                        o.shape = None
+                model = gs.export_onnx(self.graph)
+                model = shape_inference.infer_shapes(model)
+                self.graph = gs.import_onnx(model)
+            except Exception as e:
+                log.info(
+                    "Shape inference could not be performed at this time:\n{}".format(e)
+                )
+            try:
+                self.graph.fold_constants(fold_shapes=True)
+            except TypeError as e:
+                log.error(
+                    "This version of ONNX GraphSurgeon does not support folding shapes, please upgrade your "
+                    "onnx_graphsurgeon module. Error:\n{}".format(e)
+                )
+                raise
+
+            count_after = len(self.graph.nodes)
+            if count_before == count_after:
+                # No new folding occurred in this iteration, so we can stop for now.
+                break
+
+    def save(self, output_path):
+        """
+        Save the ONNX model to the given location.
+        :param output_path: Path pointing to the location where to write out the updated ONNX model.
+        """
+        self.graph.cleanup().toposort()
+        model = gs.export_onnx(self.graph)
+        output_path = os.path.realpath(output_path)
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        onnx.save(model, output_path)
+        log.info("Saved ONNX model to {}".format(output_path))
+
+    def update_preprocessor(self, input_format, input_size, preprocessor="imagenet"):
+        """
+        Remove all the pre-processing nodes in the ONNX graph and leave only the image normalization essentials.
+        :param input_format: The input data format, either "NCHW" or "NHWC".
+        :param input_size: The input size as a comma-separated string in H,W format, e.g. "512,512".
+        :param preprocessor: The preprocessor to use, either "imagenet" for imagenet mean and stdev normalization,
+        or "scale_range" for uniform [-1,+1] range normalization.
+        """
+        # Update the input and output tensors shape
+        input_size = input_size.split(",")
+        assert len(input_size) == 2
+        for i in range(len(input_size)):
+            input_size[i] = int(input_size[i])
+            assert input_size[i] >= 1
+        assert input_format in ["NCHW", "NHWC"]
+        if input_format == "NCHW":
+            self.graph.inputs[0].shape = ["N", 3, input_size[0], input_size[1]]
+        if input_format == "NHWC":
+            self.graph.inputs[0].shape = ["N", input_size[0], input_size[1], 3]
+        self.graph.inputs[0].dtype = np.float32
+        self.graph.inputs[0].name = "input"
+        log.info(
+            "ONNX graph input shape: {} [{} format]".format(
+                self.graph.inputs[0].shape, input_format
+            )
+        )
+        self.sanitize()
+
+        # Find the initial nodes of the graph, whatever the input is first connected to, and disconnect them
+        for node in [
+            node for node in self.graph.nodes if self.graph.inputs[0] in node.inputs
+        ]:
+            node.inputs.clear()
+
+        # Convert to NCHW format if needed
+        input_tensor = self.graph.inputs[0]
+        if input_format == "NHWC":
+            input_tensor = self.graph.transpose(
+                "preprocessor/transpose", input_tensor, [0, 3, 1, 2]
+            )
+
+        assert preprocessor in ["imagenet", "scale_range"]
+        preprocessed_tensor = None
+        if preprocessor == "imagenet":
+            # RGB Normalizers. The per-channel values are given with shape [1, 3, 1, 1] for proper NCHW shape broadcasting
+            scale_val = 1 / np.asarray([255], dtype=np.float32)
+            mean_val = -1 * np.expand_dims(
+                np.asarray([0.485, 0.456, 0.406], dtype=np.float32), axis=(0, 2, 3)
+            )
+            stddev_val = 1 / np.expand_dims(
+                np.asarray([0.229, 0.224, 0.225], dtype=np.float32), axis=(0, 2, 3)
+            )
+            # y = (x * scale + mean) * stddev   -->   y = x * scale * stddev + mean * stddev
+            scale_out = self.graph.elt_const(
+                "Mul", "preprocessor/scale", input_tensor, scale_val * stddev_val
+            )
+            mean_out = self.graph.elt_const(
+                "Add", "preprocessor/mean", scale_out, mean_val * stddev_val
+            )
+            preprocessed_tensor = mean_out[0]
+        if preprocessor == "scale_range":
+            # RGB Normalizers. The per-channel values are given with shape [1, 3, 1, 1] for proper NCHW shape broadcasting
+            scale_val = 2 / np.asarray([255], dtype=np.float32)
+            offset_val = np.expand_dims(
+                np.asarray([-1, -1, -1], dtype=np.float32), axis=(0, 2, 3)
+            )
+            # y = (x * scale + mean) * stddev   -->   y = x * scale * stddev + mean * stddev
+            scale_out = self.graph.elt_const(
+                "Mul", "preprocessor/scale", input_tensor, scale_val
+            )
+            range_out = self.graph.elt_const(
+                "Add", "preprocessor/range", scale_out, offset_val
+            )
+            preprocessed_tensor = range_out[0]
+
+        # Find the first stem conv node of the graph, and connect the normalizer directly to it
+        stem_name = None
+        if self.api == "AutoML":
+            stem_name = "/stem/"
+        if self.api == "TFOD":
+            stem_name = "/stem_conv2d/"
+        stem = [
+            node
+            for node in self.graph.nodes
+            if node.op == "Conv" and stem_name in node.name
+        ][0]
+        log.info("Found {} node '{}' as stem entry".format(stem.op, stem.name))
+        stem.inputs[0] = preprocessed_tensor
+
+        self.sanitize()
+
+    def update_shapes(self):
+        # Reshape nodes have the batch dimension as a fixed value of 1, they should use the batch size instead
+        # Output-Head reshapes use [1, -1, C], corrected reshape value should be [-1, V, C]
+        for node in [node for node in self.graph.nodes if node.op == "Reshape"]:
+            shape_in = node.inputs[0].shape
+            if shape_in is None or len(shape_in) not in [
+                4,
+                5,
+            ]:  # TFOD graphs have 5-dim inputs on this Reshape
+                continue
+            if type(node.inputs[1]) != gs.Constant:
+                continue
+            shape_out = node.inputs[1].values
+            if len(shape_out) != 3 or shape_out[0] != 1 or shape_out[1] != -1:
+                continue
+            volume = shape_in[1] * shape_in[2] * shape_in[3] / shape_out[2]
+            if len(shape_in) == 5:
+                volume *= shape_in[4]
+            shape_corrected = np.asarray([-1, volume, shape_out[2]], dtype=np.int64)
+            node.inputs[1] = gs.Constant(
+                "{}_shape".format(node.name), values=shape_corrected
+            )
+            log.info(
+                "Updating Output-Head Reshape node {} to {}".format(
+                    node.name, node.inputs[1].values
+                )
+            )
+
+        # Other Reshapes only need to change the first dim to -1, as long as there are no -1's already
+        for node in [node for node in self.graph.nodes if node.op == "Reshape"]:
+            if (
+                type(node.inputs[1]) != gs.Constant
+                or node.inputs[1].values[0] != 1
+                or -1 in node.inputs[1].values
+            ):
+                continue
+            node.inputs[1].values[0] = -1
+            log.info(
+                "Updating Reshape node {} to {}".format(
+                    node.name, node.inputs[1].values
+                )
+            )
+
+        # Resize nodes try to calculate the output shape dynamically, it's more optimal to pre-compute the shape
+        if self.api == "AutoML":
+            # Resize on a BiFPN will always be 2x, but grab it from the graph just in case
+            for node in [node for node in self.graph.nodes if node.op == "Resize"]:
+                if len(node.inputs) < 4 or node.inputs[0].shape is None:
+                    continue
+                scale_h, scale_w = None, None
+                if type(node.inputs[3]) == gs.Constant:
+                    # The sizes input is already folded
+                    if len(node.inputs[3].values) != 4:
+                        continue
+                    scale_h = node.inputs[3].values[2] / node.inputs[0].shape[2]
+                    scale_w = node.inputs[3].values[3] / node.inputs[0].shape[3]
+                if type(node.inputs[3]) == gs.Variable:
+                    # The sizes input comes from Shape+Slice+Concat
+                    concat = node.i(3)
+                    if concat.op != "Concat":
+                        continue
+                    if (
+                        type(concat.inputs[1]) != gs.Constant
+                        or len(concat.inputs[1].values) != 2
+                    ):
+                        continue
+                    scale_h = concat.inputs[1].values[0] / node.inputs[0].shape[2]
+                    scale_w = concat.inputs[1].values[1] / node.inputs[0].shape[3]
+                scales = np.asarray([1, 1, scale_h, scale_w], dtype=np.float32)
+                del node.inputs[3]
+                node.inputs[2] = gs.Constant(
+                    name="{}_scales".format(node.name), values=scales
+                )
+                log.info("Updating Resize node {} to {}".format(node.name, scales))
+
+        self.sanitize()
+
+    def update_network(self):
+        """
+        Updates the graph to replace certain nodes in the main EfficientDet network:
+        - the global average pooling nodes are optimized when running for TFOD models.
+        """
+
+        if self.api == "TFOD":
+            for reduce in [
+                node for node in self.graph.nodes if node.op == "ReduceMean"
+            ]:
+                # TFOD models have their ReduceMean nodes applied with some redundant transposes that can be
+                # optimized away for better performance
+                # Make sure the correct subgraph is being replaced, basically search for this:
+                # X > Transpose (0,2,3,1) > ReduceMean (1,2) > Reshape (?,1,1,?) > Reshape (?,?,1,1) > Conv > Y
+                # And change to this:
+                # X > ReduceMean (2,3) > Conv > Y
+                transpose = reduce.i()
+                if transpose.op != "Transpose" or transpose.attrs["perm"] != [
+                    0,
+                    2,
+                    3,
+                    1,
+                ]:
+                    continue
+                if len(reduce.attrs["axes"]) != 2 or reduce.attrs["axes"] != [1, 2]:
+                    continue
+                reshape1 = reduce.o()
+                if reshape1.op != "Reshape" or len(reshape1.inputs[1].values) != 4:
+                    continue
+                if (
+                    reshape1.inputs[1].values[1] != 1
+                    or reshape1.inputs[1].values[2] != 1
+                ):
+                    continue
+                reshape2 = reshape1.o()
+                if reshape2.op != "Reshape" or len(reshape2.inputs[1].values) != 4:
+                    continue
+                if (
+                    reshape2.inputs[1].values[2] != 1
+                    or reshape2.inputs[1].values[3] != 1
+                ):
+                    continue
+                conv = reshape2.o()
+                if conv.op != "Conv":
+                    continue
+                # If all the checks above pass, then this node sequence can be optimized by just the ReduceMean itself
+                # operating on a different set of axes
+                input_tensor = transpose.inputs[0]  # Input tensor to the Transpose
+                reduce.inputs[0] = (
+                    input_tensor  # Forward the Transpose input to the ReduceMean node
+                )
+                output_tensor = reduce.outputs[0]  # Output tensor of the ReduceMean
+                conv.inputs[0] = (
+                    output_tensor  # Forward the ReduceMean output to the Conv node
+                )
+                reduce.attrs["axes"] = [
+                    2,
+                    3,
+                ]  # Update the axes that ReduceMean operates on
+                reduce.attrs["keepdims"] = 1  # Keep the reduced dimensions
+                log.info(
+                    "Optimized subgraph around ReduceMean node '{}'".format(reduce.name)
+                )
+
+    def update_nms(self, threshold=None, detections=None):
+        """
+        Updates the graph to replace the NMS op by BatchedNMS_TRT TensorRT plugin node.
+        :param threshold: Override the score threshold attribute. If set to None, use the value in the graph.
+        :param detections: Override the max detections attribute. If set to None, use the value in the graph.
+        """
+
+        def find_head_concat(name_scope):
+            # This will find the concatenation node at the end of either Class Net or Box Net. These concatenation nodes
+            # bring together prediction data for each of 5 scales.
+            # The concatenated Class Net node will have shape [batch_size, num_anchors, num_classes],
+            # and the concatenated Box Net node has the shape [batch_size, num_anchors, 4].
+            # These concatenation nodes can be be found by searching for all Concat's and checking if the node two
+            # steps above in the graph has a name that begins with either "box_net/..." or "class_net/...".
+            for node in [
+                node
+                for node in self.graph.nodes
+                if node.op == "Transpose" and name_scope in node.name
+            ]:
+                concat = self.graph.find_descendant_by_op(node, "Concat")
+                assert concat and len(concat.inputs) == 5
+                log.info(
+                    "Found {} node '{}' as the tip of {}".format(
+                        concat.op, concat.name, name_scope
+                    )
+                )
+                return concat
+
+        def extract_anchors_tensor(split):
+            # This will find the anchors that have been hardcoded somewhere within the ONNX graph.
+            # The function will return a gs.Constant that can be directly used as an input to the NMS plugin.
+            # The anchor tensor shape will be [1, num_anchors, 4]. Note that '1' is kept as first dim, regardless of
+            # batch size, as it's not necessary to replicate the anchors for all images in the batch.
+
+            # The anchors are available (one per coordinate) hardcoded as constants within certain box decoder nodes.
+            # Each of these four constants have shape [1, num_anchors], so some numpy operations are used to expand the
+            # dims and concatenate them as needed.
+
+            # These constants can be found by starting from the Box Net's split operation , and for each coordinate,
+            # walking down in the graph until either an Add or Mul node is found. The second input on this nodes will
+            # be the anchor data required.
+            def get_anchor_np(output_idx, op):
+                node = self.graph.find_descendant_by_op(split.o(0, output_idx), op)
+                assert node
+                val = np.squeeze(node.inputs[1].values)
+                return np.expand_dims(val.flatten(), axis=(0, 2))
+
+            anchors_y = get_anchor_np(0, "Add")
+            anchors_x = get_anchor_np(1, "Add")
+            anchors_h = get_anchor_np(2, "Mul")
+            anchors_w = get_anchor_np(3, "Mul")
+            anchors = np.concatenate(
+                [anchors_y, anchors_x, anchors_h, anchors_w], axis=2
+            )
+            return gs.Constant(name="nms/anchors:0", values=anchors)
+
+        self.sanitize()
+
+        head_names = []
+        if self.api == "AutoML":
+            head_names = ["class_net/", "box_net/"]
+        if self.api == "TFOD":
+            head_names = [
+                "/WeightSharedConvolutionalClassHead/",
+                "/WeightSharedConvolutionalBoxHead/",
+            ]
+
+        # There are five nodes at the bottom of the graph that provide important connection points:
+
+        # 1. Find the concat node at the end of the class net (multi-scale class predictor)
+        class_net = find_head_concat(head_names[0])
+        class_net_tensor = class_net.outputs[0]
+
+        # 2. Find the concat node at the end of the box net (multi-scale localization predictor)
+        box_net = find_head_concat(head_names[1])
+        box_net_tensor = box_net.outputs[0]
+
+        # 3. Find the split node that separates the box net coordinates and feeds them into the box decoder.
+        box_net_split = self.graph.find_descendant_by_op(box_net, "Split")
+        assert box_net_split and len(box_net_split.outputs) == 4
+
+        # 4. Find the concat node at the end of the box decoder.
+        box_decoder = self.graph.find_descendant_by_op(box_net_split, "Concat")
+        assert box_decoder and len(box_decoder.inputs) == 4
+        box_decoder_tensor = box_decoder.outputs[0]
+
+        # 5. Find the NMS node.
+        nms_node = self.graph.find_node_by_op("NonMaxSuppression")
+
+        # Extract NMS Configuration
+        num_detections = (
+            int(nms_node.inputs[2].values) if detections is None else detections
+        )
+        iou_threshold = float(nms_node.inputs[3].values)
+        score_threshold = (
+            float(nms_node.inputs[4].values) if threshold is None else threshold
+        )
+        num_classes = class_net.i().inputs[1].values[-1]
+        normalized = True if self.api == "TFOD" else False
+
+        # NMS Inputs and Attributes
+        # NMS expects these shapes for its input tensors:
+        # box_net: [batch_size, number_boxes, 4]
+        # class_net: [batch_size, number_boxes, number_classes]
+        # anchors: [1, number_boxes, 4] (if used)
+        nms_op = None
+        nms_attrs = None
+        nms_inputs = None
+
+        # EfficientNMS TensorRT Plugin
+        # Fusing the decoder will always be faster, so this is the default NMS method supported. In this case,
+        # three inputs are given to the NMS TensorRT node:
+        # - The box predictions (from the Box Net node found above)
+        # - The class predictions (from the Class Net node found above)
+        # - The default anchor coordinates (from the extracted anchor constants)
+        # As the original tensors from EfficientDet will be used, the NMS code type is set to 1 (Center+Size),
+        # because this is the internal box coding format used by the network.
+        anchors_tensor = extract_anchors_tensor(box_net_split)
+        nms_inputs = [box_net_tensor, class_net_tensor, anchors_tensor]
+        nms_op = "EfficientNMS_TRT"
+        nms_attrs = {
+            "plugin_version": "1",
+            "background_class": -1,
+            "max_output_boxes": num_detections,
+            "score_threshold": max(
+                0.01, score_threshold
+            ),  # Keep threshold to at least 0.01 for better efficiency
+            "iou_threshold": iou_threshold,
+            "score_activation": True,
+            "class_agnostic": False,
+            "box_coding": 1,
+        }
+        nms_output_classes_dtype = np.int32
+
+        # NMS Outputs
+        nms_output_num_detections = gs.Variable(
+            name="num_detections", dtype=np.int32, shape=["N", 1]
+        )
+        nms_output_boxes = gs.Variable(
+            name="detection_boxes", dtype=np.float32, shape=["N", num_detections, 4]
+        )
+        nms_output_scores = gs.Variable(
+            name="detection_scores", dtype=np.float32, shape=["N", num_detections]
+        )
+        nms_output_classes = gs.Variable(
+            name="detection_classes",
+            dtype=nms_output_classes_dtype,
+            shape=["N", num_detections],
+        )
+
+        nms_outputs = [
+            nms_output_num_detections,
+            nms_output_boxes,
+            nms_output_scores,
+            nms_output_classes,
+        ]
+
+        # Create the NMS Plugin node with the selected inputs. The outputs of the node will also become the final
+        # outputs of the graph.
+        self.graph.plugin(
+            op=nms_op,
+            name="nms/non_maximum_suppression",
+            inputs=nms_inputs,
+            outputs=nms_outputs,
+            attrs=nms_attrs,
+        )
+        log.info(
+            "Created NMS plugin '{}' with attributes: {}".format(nms_op, nms_attrs)
+        )
+
+        self.graph.outputs = nms_outputs
+
+        self.sanitize()
+
+
+def main(args):
+    effdet_gs = EfficientDetGraphSurgeon(args.saved_model)
+    if args.tf2onnx:
+        effdet_gs.save(args.tf2onnx)
+    effdet_gs.update_preprocessor(args.input_format, args.input_size, args.preprocessor)
+    effdet_gs.update_shapes()
+    effdet_gs.update_network()
+    effdet_gs.update_nms(args.nms_threshold, args.nms_detections)
+    effdet_gs.save(args.onnx)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-m",
+        "--saved_model",
+        required=True,
+        help="The TensorFlow saved model directory to load",
+    )
+    parser.add_argument(
+        "-o", "--onnx", required=True, help="The output ONNX model file to write"
+    )
+    parser.add_argument(
+        "-f",
+        "--input_format",
+        default="NHWC",
+        choices=["NHWC", "NCHW"],
+        help="Set the input data format of the graph, either NCHW or NHWC, default: NHWC",
+    )
+    parser.add_argument(
+        "-i",
+        "--input_size",
+        default="512,512",
+        help="Set the input shape of the graph, as a comma-separated dimensions in H,W format, "
+        "default: 512,512",
+    )
+    parser.add_argument(
+        "-p",
+        "--preprocessor",
+        default="imagenet",
+        choices=["imagenet", "scale_range"],
+        help="Set the preprocessor to apply on the graph, either 'imagenet' for standard mean "
+        "subtraction and stdev normalization, or 'scale_range' for uniform [-1,+1] "
+        "normalization as is used in the AdvProp models, default: imagenet",
+    )
+    parser.add_argument(
+        "-t",
+        "--nms_threshold",
+        type=float,
+        help="Override the NMS score threshold, default: use the original value in the model",
+    )
+    parser.add_argument(
+        "-d",
+        "--nms_detections",
+        type=int,
+        help="Override the NMS max detections, default: use the original value in the model",
+    )
+    parser.add_argument(
+        "--tf2onnx",
+        help="The path where to save the intermediate ONNX graph generated by tf2onnx, useful"
+        "for graph debugging purposes, default: not saved",
+    )
+    args = parser.parse_args()
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/eval_coco.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/eval_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6796ac0b4d487e97922b4c7e5450b0eda64ab20
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/eval_coco.py
@@ -0,0 +1,106 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import argparse
+
+import numpy as np
+
+from infer import TensorRTInfer
+from image_batcher import ImageBatcher
+
+
+def main(args):
+    automl_path = os.path.realpath(args.automl_path)
+    sys.path.insert(1, os.path.join(automl_path, "efficientdet"))
+    try:
+        import coco_metric
+    except ImportError:
+        print(
+            "Could not import the 'coco_metric' module from AutoML. Searching in: {}".format(
+                automl_path
+            )
+        )
+        print(
+            "Please clone the repository https://github.com/google/automl and provide its path with --automl_path."
+        )
+        sys.exit(1)
+
+    trt_infer = TensorRTInfer(args.engine)
+    batcher = ImageBatcher(args.input, *trt_infer.input_spec())
+    evaluator = coco_metric.EvaluationMetric(filename=args.annotations)
+    for batch, images, scales in batcher.get_batch():
+        print(
+            "Processing Image {} / {}".format(batcher.image_index, batcher.num_images),
+            end="\r",
+        )
+        detections = trt_infer.process(batch, scales, args.nms_threshold)
+        coco_det = np.zeros((len(images), max([len(d) for d in detections]), 7))
+        coco_det[:, :, -1] = -1
+        for i in range(len(images)):
+            for n in range(len(detections[i])):
+                source_id = int(os.path.splitext(os.path.basename(images[i]))[0])
+                det = detections[i][n]
+                coco_det[i][n] = [
+                    source_id,
+                    det["xmin"],
+                    det["ymin"],
+                    det["xmax"] - det["xmin"],
+                    det["ymax"] - det["ymin"],
+                    det["score"],
+                    det["class"]
+                    + 1,  # The COCO evaluator expects class 0 to be background, so offset by 1
+                ]
+        evaluator.update_state(None, coco_det)
+    print()
+    evaluator.result(100)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-e", "--engine", help="The TensorRT engine to infer with")
+    parser.add_argument(
+        "-i",
+        "--input",
+        help="The input to infer, either a single image path, or a directory of images",
+    )
+    parser.add_argument(
+        "-a",
+        "--annotations",
+        help="Set the path to the COCO 'instances_val2017.json' file",
+    )
+    parser.add_argument(
+        "-p",
+        "--automl_path",
+        default="./automl",
+        help="Set the path where to find the AutoML repository, from "
+        "https://github.com/google/automl. Default: ./automl",
+    )
+    parser.add_argument(
+        "-t",
+        "--nms_threshold",
+        type=float,
+        help="Override the score threshold for the NMS operation, "
+        "if higher than the threshold in the engine.",
+    )
+    args = parser.parse_args()
+    if not all([args.engine, args.input, args.annotations]):
+        parser.print_help()
+        print("\nThese arguments are required: --engine  --input and --annotations")
+        sys.exit(1)
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/image_batcher.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/image_batcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..11b94c24aa05e437cdfc8c198ee60ae2b7816b15
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/image_batcher.py
@@ -0,0 +1,187 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import random
+
+import numpy as np
+from PIL import Image
+
+
+class ImageBatcher:
+    """
+    Creates batches of pre-processed images.
+    """
+
+    def __init__(
+        self,
+        input,
+        shape,
+        dtype,
+        max_num_images=None,
+        exact_batches=False,
+        preprocessor="EfficientDet",
+        shuffle_files=False,
+    ):
+        """
+        :param input: The input directory to read images from.
+        :param shape: The tensor shape of the batch to prepare, either in NCHW or NHWC format.
+        :param dtype: The (numpy) datatype to cast the batched data to.
+        :param max_num_images: The maximum number of images to read from the directory.
+        :param exact_batches: This defines how to handle a number of images that is not an exact multiple of the batch
+        size. If false, it will pad the final batch with zeros to reach the batch size. If true, it will *remove* the
+        last few images in excess of a batch size multiple, to guarantee batches are exact (useful for calibration).
+        :param preprocessor: Set the preprocessor to use, depending on which network is being used.
+        :param shuffle_files: Shuffle the list of files before batching.
+        """
+        # Find images in the given input path
+        input = os.path.realpath(input)
+        self.images = []
+
+        extensions = [".jpg", ".jpeg", ".png", ".bmp"]
+
+        def is_image(path):
+            return (
+                os.path.isfile(path) and os.path.splitext(path)[1].lower() in extensions
+            )
+
+        if os.path.isdir(input):
+            self.images = [
+                os.path.join(input, f)
+                for f in os.listdir(input)
+                if is_image(os.path.join(input, f))
+            ]
+            self.images.sort()
+            if shuffle_files:
+                random.seed(47)
+                random.shuffle(self.images)
+        elif os.path.isfile(input):
+            if is_image(input):
+                self.images.append(input)
+        self.num_images = len(self.images)
+        if self.num_images < 1:
+            print("No valid {} images found in {}".format("/".join(extensions), input))
+            sys.exit(1)
+
+        # Handle Tensor Shape
+        self.dtype = dtype
+        self.shape = shape
+        assert len(self.shape) == 4
+        self.batch_size = shape[0]
+        assert self.batch_size > 0
+        self.format = None
+        self.width = -1
+        self.height = -1
+        if self.shape[1] == 3:
+            self.format = "NCHW"
+            self.height = self.shape[2]
+            self.width = self.shape[3]
+        elif self.shape[3] == 3:
+            self.format = "NHWC"
+            self.height = self.shape[1]
+            self.width = self.shape[2]
+        assert all([self.format, self.width > 0, self.height > 0])
+
+        # Adapt the number of images as needed
+        if max_num_images and 0 < max_num_images < len(self.images):
+            self.num_images = max_num_images
+        if exact_batches:
+            self.num_images = self.batch_size * (self.num_images // self.batch_size)
+        if self.num_images < 1:
+            print("Not enough images to create batches")
+            sys.exit(1)
+        self.images = self.images[0 : self.num_images]
+
+        # Subdivide the list of images into batches
+        self.num_batches = 1 + int((self.num_images - 1) / self.batch_size)
+        self.batches = []
+        for i in range(self.num_batches):
+            start = i * self.batch_size
+            end = min(start + self.batch_size, self.num_images)
+            self.batches.append(self.images[start:end])
+
+        # Indices
+        self.image_index = 0
+        self.batch_index = 0
+
+        self.preprocessor = preprocessor
+
+    def preprocess_image(self, image_path):
+        """
+        The image preprocessor loads an image from disk and prepares it as needed for batching. This includes padding,
+        resizing, normalization, data type casting, and transposing.
+        This Image Batcher implements one algorithm for now:
+        * EfficientDet: Resizes and pads the image to fit the input size.
+        :param image_path: The path to the image on disk to load.
+        :return: Two values: A numpy array holding the image sample, ready to be contacatenated into the rest of the
+        batch, and the resize scale used, if any.
+        """
+
+        def resize_pad(image, pad_color=(0, 0, 0)):
+            """
+            A subroutine to implement padding and resizing. This will resize the image to fit fully within the input
+            size, and pads the remaining bottom-right portions with the value provided.
+            :param image: The PIL image object
+            :pad_color: The RGB values to use for the padded area. Default: Black/Zeros.
+            :return: Two values: The PIL image object already padded and cropped, and the resize scale used.
+            """
+            width, height = image.size
+            width_scale = width / self.width
+            height_scale = height / self.height
+            scale = 1.0 / max(width_scale, height_scale)
+            image = image.resize(
+                (round(width * scale), round(height * scale)), resample=Image.BILINEAR
+            )
+            pad = Image.new("RGB", (self.width, self.height))
+            pad.paste(pad_color, [0, 0, self.width, self.height])
+            pad.paste(image)
+            return pad, scale
+
+        scale = None
+        image = Image.open(image_path)
+        image = image.convert(mode="RGB")
+        if self.preprocessor == "EfficientDet":
+            # For EfficientNet V2: Resize & Pad with ImageNet mean values and keep as [0,255] Normalization
+            image, scale = resize_pad(image, (124, 116, 104))
+            image = np.asarray(image, dtype=self.dtype)
+            # [0-1] Normalization, Mean subtraction and Std Dev scaling are part of the EfficientDet graph, so
+            # no need to do it during preprocessing here
+        else:
+            print("Preprocessing method {} not supported".format(self.preprocessor))
+            sys.exit(1)
+        if self.format == "NCHW":
+            image = np.transpose(image, (2, 0, 1))
+        return image, scale
+
+    def get_batch(self):
+        """
+        Retrieve the batches. This is a generator object, so you can use it within a loop as:
+        for batch, images in batcher.get_batch():
+           ...
+        Or outside of a batch with the next() function.
+        :return: A generator yielding three items per iteration: a numpy array holding a batch of images, the list of
+        paths to the images loaded within this batch, and the list of resize scales for each image in the batch.
+        """
+        for i, batch_images in enumerate(self.batches):
+            batch_data = np.zeros(self.shape, dtype=self.dtype)
+            batch_scales = [None] * len(batch_images)
+            for i, image in enumerate(batch_images):
+                self.image_index += 1
+                batch_data[i], batch_scales[i] = self.preprocess_image(image)
+            self.batch_index += 1
+            yield batch_data, batch_images, batch_scales
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/infer.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5308cf470699b85e858d6779ceb0e7f0465f2f66
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/infer.py
@@ -0,0 +1,276 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import time
+import argparse
+import numpy as np
+import tensorrt as trt
+from cuda import cudart
+
+sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+import common
+
+from image_batcher import ImageBatcher
+from visualize import visualize_detections
+
+
+class TensorRTInfer:
+    """
+    Implements inference for the EfficientDet TensorRT engine.
+    """
+
+    def __init__(self, engine_path):
+        """
+        :param engine_path: The path to the serialized engine to load from disk.
+        """
+        # Load TRT engine
+        self.logger = trt.Logger(trt.Logger.ERROR)
+        trt.init_libnvinfer_plugins(self.logger, namespace="")
+        with open(engine_path, "rb") as f, trt.Runtime(self.logger) as runtime:
+            assert runtime
+            self.engine = runtime.deserialize_cuda_engine(f.read())
+        assert self.engine
+        self.context = self.engine.create_execution_context()
+        assert self.context
+
+        # Setup I/O bindings
+        self.inputs = []
+        self.outputs = []
+        self.allocations = []
+        for i in range(self.engine.num_io_tensors):
+            name = self.engine.get_tensor_name(i)
+            is_input = False
+            if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
+                is_input = True
+            dtype = np.dtype(trt.nptype(self.engine.get_tensor_dtype(name)))
+            shape = self.context.get_tensor_shape(name)
+            if is_input and shape[0] < 0:
+                assert self.engine.num_optimization_profiles > 0
+                profile_shape = self.engine.get_tensor_profile_shape(name, 0)
+                assert len(profile_shape) == 3  # min,opt,max
+                # Set the *max* profile as binding shape
+                self.context.set_input_shape(name, profile_shape[2])
+                shape = self.context.get_tensor_shape(name)
+            if is_input:
+                self.batch_size = shape[0]
+            size = dtype.itemsize
+            for s in shape:
+                size *= s
+            allocation = common.cuda_call(cudart.cudaMalloc(size))
+            host_allocation = None if is_input else np.zeros(shape, dtype)
+            binding = {
+                "index": i,
+                "name": name,
+                "dtype": dtype,
+                "shape": list(shape),
+                "allocation": allocation,
+                "host_allocation": host_allocation,
+            }
+            self.allocations.append(allocation)
+            if is_input:
+                self.inputs.append(binding)
+            else:
+                self.outputs.append(binding)
+            print(
+                "{} '{}' with shape {} and dtype {}".format(
+                    "Input" if is_input else "Output",
+                    binding["name"],
+                    binding["shape"],
+                    binding["dtype"],
+                )
+            )
+
+        assert self.batch_size > 0
+        assert len(self.inputs) > 0
+        assert len(self.outputs) > 0
+        assert len(self.allocations) > 0
+
+    def input_spec(self):
+        """
+        Get the specs for the input tensor of the network. Useful to prepare memory allocations.
+        :return: Two items, the shape of the input tensor and its (numpy) datatype.
+        """
+        return self.inputs[0]["shape"], self.inputs[0]["dtype"]
+
+    def output_spec(self):
+        """
+        Get the specs for the output tensors of the network. Useful to prepare memory allocations.
+        :return: A list with two items per element, the shape and (numpy) datatype of each output tensor.
+        """
+        specs = []
+        for o in self.outputs:
+            specs.append((o["shape"], o["dtype"]))
+        return specs
+
+    def infer(self, batch):
+        """
+        Execute inference on a batch of images.
+        :param batch: A numpy array holding the image batch.
+        :return A list of outputs as numpy arrays.
+        """
+        # Copy I/O and Execute
+        common.memcpy_host_to_device(self.inputs[0]["allocation"], batch)
+        self.context.execute_v2(self.allocations)
+        for o in range(len(self.outputs)):
+            common.memcpy_device_to_host(
+                self.outputs[o]["host_allocation"], self.outputs[o]["allocation"]
+            )
+        return [o["host_allocation"] for o in self.outputs]
+
+    def process(self, batch, scales=None, nms_threshold=None):
+        """
+        Execute inference on a batch of images. The images should already be batched and preprocessed, as prepared by
+        the ImageBatcher class. Memory copying to and from the GPU device will be performed here.
+        :param batch: A numpy array holding the image batch.
+        :param scales: The image resize scales for each image in this batch. Default: No scale postprocessing applied.
+        :return: A nested list for each image in the batch and each detection in the list.
+        """
+        # Run inference
+        outputs = self.infer(batch)
+
+        # Process the results
+        nums = outputs[0]
+        boxes = outputs[1]
+        scores = outputs[2]
+        classes = outputs[3]
+        detections = []
+        normalized = np.max(boxes) < 2.0
+        for i in range(self.batch_size):
+            detections.append([])
+            for n in range(int(nums[i])):
+                scale = self.inputs[0]["shape"][2] if normalized else 1.0
+                if scales and i < len(scales):
+                    scale /= scales[i]
+                if nms_threshold and scores[i][n] < nms_threshold:
+                    continue
+                detections[i].append(
+                    {
+                        "ymin": boxes[i][n][0] * scale,
+                        "xmin": boxes[i][n][1] * scale,
+                        "ymax": boxes[i][n][2] * scale,
+                        "xmax": boxes[i][n][3] * scale,
+                        "score": scores[i][n],
+                        "class": int(classes[i][n]),
+                    }
+                )
+        return detections
+
+
+def main(args):
+    if args.output:
+        output_dir = os.path.realpath(args.output)
+        os.makedirs(output_dir, exist_ok=True)
+
+    labels = []
+    if args.labels:
+        with open(args.labels) as f:
+            for i, label in enumerate(f):
+                labels.append(label.strip())
+
+    trt_infer = TensorRTInfer(args.engine)
+    if args.input:
+        print("Inferring data in {}".format(args.input))
+        batcher = ImageBatcher(args.input, *trt_infer.input_spec())
+        for batch, images, scales in batcher.get_batch():
+            print(
+                "Processing Image {} / {}".format(
+                    batcher.image_index, batcher.num_images
+                ),
+                end="\r",
+            )
+            detections = trt_infer.process(batch, scales, args.nms_threshold)
+            if args.output:
+                for i in range(len(images)):
+                    basename = os.path.splitext(os.path.basename(images[i]))[0]
+                    # Image Visualizations
+                    output_path = os.path.join(output_dir, "{}.png".format(basename))
+                    visualize_detections(images[i], output_path, detections[i], labels)
+                    # Text Results
+                    output_results = ""
+                    for d in detections[i]:
+                        line = [
+                            d["xmin"],
+                            d["ymin"],
+                            d["xmax"],
+                            d["ymax"],
+                            d["score"],
+                            d["class"],
+                        ]
+                        output_results += "\t".join([str(f) for f in line]) + "\n"
+                    with open(
+                        os.path.join(output_dir, "{}.txt".format(basename)), "w"
+                    ) as f:
+                        f.write(output_results)
+    else:
+        print("No input provided, running in benchmark mode")
+        spec = trt_infer.input_spec()
+        batch = 255 * np.random.rand(*spec[0]).astype(spec[1])
+        iterations = 200
+        times = []
+        for i in range(20):  # GPU warmup iterations
+            trt_infer.infer(batch)
+        for i in range(iterations):
+            start = time.time()
+            trt_infer.infer(batch)
+            times.append(time.time() - start)
+            print("Iteration {} / {}".format(i + 1, iterations), end="\r")
+        print("Benchmark results include time for H2D and D2H memory copies")
+        print("Average Latency: {:.3f} ms".format(1000 * np.average(times)))
+        print(
+            "Average Throughput: {:.1f} ips".format(
+                trt_infer.batch_size / np.average(times)
+            )
+        )
+
+    print()
+    print("Finished Processing")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-e",
+        "--engine",
+        default=None,
+        required=True,
+        help="The serialized TensorRT engine",
+    )
+    parser.add_argument(
+        "-i", "--input", default=None, help="Path to the image or directory to process"
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        default=None,
+        help="Directory where to save the visualization results",
+    )
+    parser.add_argument(
+        "-l",
+        "--labels",
+        default="./labels_coco.txt",
+        help="File to use for reading the class labels from, default: ./labels_coco.txt",
+    )
+    parser.add_argument(
+        "-t",
+        "--nms_threshold",
+        type=float,
+        help="Override the score threshold for the NMS operation, if higher than the built-in threshold",
+    )
+    args = parser.parse_args()
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/infer_tf.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/infer_tf.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2ecbd931f2f4081a48e095fa17530570bb23f90
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/infer_tf.py
@@ -0,0 +1,179 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import time
+import argparse
+
+import numpy as np
+import tensorflow as tf
+
+
+class TensorFlowInfer:
+    """
+    Implements TensorFlow inference of a saved model, following the same API as the TensorRTInfer class.
+    """
+
+    def __init__(self, saved_model_path):
+        gpus = tf.config.experimental.list_physical_devices("GPU")
+        for gpu in gpus:
+            tf.config.experimental.set_memory_growth(gpu, True)
+
+        self.model = tf.saved_model.load(saved_model_path)
+        self.pred_fn = self.model.signatures["serving_default"]
+
+        # Setup I/O bindings
+        self.batch_size = 1
+        self.inputs = []
+        fn_inputs = self.pred_fn.structured_input_signature[1]
+        for i, input in enumerate(list(fn_inputs.values())):
+            self.inputs.append(
+                {
+                    "index": i,
+                    "name": input.name,
+                    "dtype": np.dtype(input.dtype.as_numpy_dtype()),
+                    "shape": [1, 512, 512, 3],  # This can be overridden later
+                }
+            )
+        self.outputs = []
+        fn_outputs = self.pred_fn.structured_outputs
+        for i, output in enumerate(list(fn_outputs.values())):
+            self.outputs.append(
+                {
+                    "index": i,
+                    "name": output.name,
+                    "dtype": np.dtype(output.dtype.as_numpy_dtype()),
+                    "shape": output.shape.as_list(),
+                }
+            )
+
+    def override_input_shape(self, input, shape):
+        self.inputs[input]["shape"] = shape
+        self.batch_size = shape[0]
+
+    def input_spec(self):
+        return self.inputs[0]["shape"], self.inputs[0]["dtype"]
+
+    def output_spec(self):
+        return self.outputs[0]["shape"], self.outputs[0]["dtype"]
+
+    def infer(self, batch):
+        # Process I/O and execute the network
+        input = {self.inputs[0]["name"]: tf.convert_to_tensor(batch)}
+        output = self.pred_fn(**input)
+        return output
+
+    def process(self, batch, scales=None, nms_threshold=None):
+        # Infer network
+        output = self.infer(batch)
+
+        # Extract the results depending on what kind of saved model this is
+        boxes = None
+        scores = None
+        classes = None
+        if len(self.outputs) == 1:
+            # Detected as AutoML Saved Model
+            assert (
+                len(self.outputs[0]["shape"]) == 3 and self.outputs[0]["shape"][2] == 7
+            )
+            results = output[self.outputs[0]["name"]].numpy()
+            boxes = results[:, :, 1:5]
+            scores = results[:, :, 5]
+            classes = results[:, :, 6].astype(np.int32)
+        elif len(self.outputs) >= 4:
+            # Detected as TFOD Saved Model
+            assert output["num_detections"]
+            num = int(output["num_detections"].numpy().flatten()[0])
+            boxes = output["detection_boxes"].numpy()[:, 0:num, :]
+            scores = output["detection_scores"].numpy()[:, 0:num]
+            classes = output["detection_classes"].numpy()[:, 0:num]
+
+        # Process the results
+        detections = [[]]
+        normalized = np.max(boxes) < 2.0
+        for n in range(scores.shape[1]):
+            if scores[0][n] == 0.0:
+                break
+            scale = self.inputs[0]["shape"][2] if normalized else 1.0
+            if scales:
+                scale /= scales[0]
+            if nms_threshold and scores[0][n] < nms_threshold:
+                continue
+            detections[0].append(
+                {
+                    "ymin": boxes[0][n][0] * scale,
+                    "xmin": boxes[0][n][1] * scale,
+                    "ymax": boxes[0][n][2] * scale,
+                    "xmax": boxes[0][n][3] * scale,
+                    "score": scores[0][n],
+                    "class": int(classes[0][n]) - 1,
+                }
+            )
+        return detections
+
+
+def main(args):
+    print("Running in benchmark mode")
+    tf_infer = TensorFlowInfer(args.saved_model)
+    input_size = [int(v) for v in args.input_size.split(",")]
+    assert len(input_size) == 2
+    tf_infer.override_input_shape(0, [args.batch_size, input_size[0], input_size[1], 3])
+    spec = tf_infer.input_spec()
+    batch = 255 * np.random.rand(*spec[0]).astype(spec[1])
+    iterations = 200
+    times = []
+    for i in range(20):  # Warmup iterations
+        tf_infer.infer(batch)
+    for i in range(iterations):
+        start = time.time()
+        tf_infer.infer(batch)
+        times.append(time.time() - start)
+        print("Iteration {} / {}".format(i + 1, iterations), end="\r")
+    print("Benchmark results include TensorFlow host overhead")
+    print("Average Latency: {:.3f} ms".format(1000 * np.average(times)))
+    print(
+        "Average Throughput: {:.1f} ips".format(tf_infer.batch_size / np.average(times))
+    )
+
+    print()
+    print("Finished Processing")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-m",
+        "--saved_model",
+        required=True,
+        help="The TensorFlow saved model path to validate against",
+    )
+    parser.add_argument(
+        "-i",
+        "--input_size",
+        default="512,512",
+        help="The input size to run the model with, in HEIGHT,WIDTH format",
+    )
+    parser.add_argument(
+        "-b",
+        "--batch_size",
+        default=1,
+        type=int,
+        help="The batch size to run the model with",
+    )
+    args = parser.parse_args()
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/labels_coco.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/labels_coco.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5378c6cdad713fc3fc6fdd90f33a4d3333fdf169
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/labels_coco.txt
@@ -0,0 +1,91 @@
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+street sign
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+hat
+backpack
+umbrella
+shoe
+eye glasses
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+plate
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+mirror
+dining table
+window
+desk
+toilet
+door
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+blender
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
+hair brush
\ No newline at end of file
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/onnx_utils.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/onnx_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a98c3a7c73139b42e8e7e90d7b36c5daf5488a76
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/onnx_utils.py
@@ -0,0 +1,169 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import logging
+
+import onnx_graphsurgeon as gs
+
+logging.basicConfig(level=logging.INFO)
+logging.getLogger("EfficientDetHelper").setLevel(logging.INFO)
+log = logging.getLogger("EfficientDetHelper")
+
+
+@gs.Graph.register()
+def elt_const(self, op, name, input, value):
+    """
+    Add an element-wise operation to the graph which will operate on the input tensor with the value(s) given.
+    :param op: The ONNX operation to perform, i.e. "Add" or "Mul".
+    :param input: The tensor to operate on.
+    :param value: The value array to operate with.
+    :param name: The name to use for the node.
+    """
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created {} node '{}': {}".format(op, name, value.squeeze()))
+    const = gs.Constant(name="{}_value:0".format(name), values=value)
+    return self.layer(
+        name=name, op=op, inputs=[input_tensor, const], outputs=[name + ":0"]
+    )
+
+
+@gs.Graph.register()
+def unsqueeze(self, name, input, axes=[-1]):
+    """
+    Adds to the graph an Unsqueeze node for the given axes and to the given input.
+    :param self: The gs.Graph object being extended.
+    :param name: The name to use for the node.
+    :param input: The tensor to be "unsqueezed".
+    :param axes: A list of axes on which to add the new dimension(s).
+    :return: The first output tensor, to allow chained graph construction.
+    """
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created Unsqueeze node '{}': {}".format(name, axes))
+    return self.layer(
+        name=name,
+        op="Unsqueeze",
+        inputs=[input_tensor],
+        outputs=[name + ":0"],
+        attrs={"axes": axes},
+    )
+
+
+@gs.Graph.register()
+def transpose(self, name, input, perm):
+    """
+    Adds to the graph a Transpose node for the given axes permutation and to the given input.
+    :param self: The gs.Graph object being extended.
+    :param name: The name to use for the node.
+    :param input: The tensor to be transposed.
+    :param perm: A list of axes defining their order after transposing occurs.
+    :return: The first output tensor, to allow chained graph construction.
+    """
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created Transpose node '{}': {}".format(name, perm))
+    return self.layer(
+        name=name,
+        op="Transpose",
+        inputs=[input_tensor],
+        outputs=[name + ":0"],
+        attrs={"perm": perm},
+    )
+
+
+@gs.Graph.register()
+def sigmoid(self, name, input):
+    """
+    Adds to the graph a Sigmoid node for the given input.
+    :param self: The gs.Graph object being extended.
+    :param name: The name to use for the node.
+    :param input: The tensor to be applied to.
+    :return: The first output tensor, to allow chained graph construction.
+    """
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created Sigmoid node '{}'".format(name))
+    return self.layer(
+        name=name, op="Sigmoid", inputs=[input_tensor], outputs=[name + ":0"]
+    )
+
+
+@gs.Graph.register()
+def plugin(self, op, name, inputs, outputs, attrs):
+    """
+    Adds to the graph a TensorRT plugin node with the given name, inputs and outputs. The attrs dictionary holds
+    attributes to be added to the plugin node.
+    :param self: The gs.Graph object being extended.
+    :param op: The registered name for the TensorRT plugin.
+    :param name: The name to use for the node.
+    :param inputs: The list of tensors to use an inputs.
+    :param outputs: The list of tensors to use as outputs.
+    :param attrs: The dictionary to use as attributes.
+    :return: The first output tensor, to allow chained graph construction.
+    """
+    input_tensors = inputs if type(inputs) is list else [inputs]
+    log.debug("Created TRT Plugin node '{}': {}".format(name, attrs))
+    return self.layer(
+        op=op, name=name, inputs=input_tensors, outputs=outputs, attrs=attrs
+    )
+
+
+@gs.Graph.register()
+def find_node_by_op(self, op):
+    """
+    Finds the first node in the graph with the given operation name.
+    :param self: The gs.Graph object being extended.
+    :param op: The operation name to search for.
+    :return: The first node matching that performs that op.
+    """
+    for node in self.nodes:
+        if node.op == op:
+            return node
+    return None
+
+
+@gs.Graph.register()
+def find_descendant_by_op(self, node, op, depth=10):
+    """
+    Starting from the given node, finds a node lower in the graph matching the given operation name. This is not an
+    exhaustive graph search, it will take only the first output of each node traversed while searching depth-first.
+    :param self: The gs.Graph object being extended.
+    :param node: The node to start searching from.
+    :param op: The operation name to search for.
+    :param depth: Stop searching after traversing these many nodes.
+    :return: The first descendant node matching that performs that op.
+    """
+    for i in range(depth):
+        node = node.o()
+        if node.op == op:
+            return node
+    return None
+
+
+@gs.Graph.register()
+def find_ancestor_by_op(self, node, op, depth=10):
+    """
+    Starting from the given node, finds a node higher in the graph matching the given operation name. This is not an
+    exhaustive graph search, it will take only the first input of each node traversed while searching depth-first.
+    :param self: The gs.Graph object being extended.
+    :param node: The node to start searching from.
+    :param op: The operation name to search for.
+    :param depth: Stop searching after traversing these many nodes.
+    :return: The first ancestor node matching that performs that op.
+    """
+    for i in range(depth):
+        node = node.i()
+        if node.op == op:
+            return node
+    return None
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/requirements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dfed86b8616104016ff65b8e2c92c7e010c2989e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/requirements.txt
@@ -0,0 +1,15 @@
+Pillow>=10.0.0
+onnx==1.14.0; python_version <= "3.10"
+onnx==1.16.1; python_version >= "3.11"
+onnxruntime==1.15.1; python_version <= "3.10"
+onnxruntime==1.18.1; python_version >= "3.11"
+tf2onnx==1.8.1; python_version <= "3.10"
+tf2onnx==1.16.0; python_version >= "3.11"
+cuda-python==12.2.0; python_version <= "3.10"
+cuda-python==12.6.0; python_version >= "3.11"
+pywin32; platform_system == "Windows"
+pyyaml==6.0.1
+requests==2.32.2
+tqdm==4.66.4
+numpy==1.24.4; python_version <= "3.10"
+numpy==1.26.4; python_version >= "3.11"
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/visualize.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/visualize.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fb982efe6d3b6e85b9efe4890b1921656f3af6f
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientdet/visualize.py
@@ -0,0 +1,233 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+
+import PIL.Image as Image
+import PIL.ImageDraw as ImageDraw
+import PIL.ImageFont as ImageFont
+
+COLORS = [
+    "GoldenRod",
+    "MediumTurquoise",
+    "GreenYellow",
+    "SteelBlue",
+    "DarkSeaGreen",
+    "SeaShell",
+    "LightGrey",
+    "IndianRed",
+    "DarkKhaki",
+    "LawnGreen",
+    "WhiteSmoke",
+    "Peru",
+    "LightCoral",
+    "FireBrick",
+    "OldLace",
+    "LightBlue",
+    "SlateGray",
+    "OliveDrab",
+    "NavajoWhite",
+    "PaleVioletRed",
+    "SpringGreen",
+    "AliceBlue",
+    "Violet",
+    "DeepSkyBlue",
+    "Red",
+    "MediumVioletRed",
+    "PaleTurquoise",
+    "Tomato",
+    "Azure",
+    "Yellow",
+    "Cornsilk",
+    "Aquamarine",
+    "CadetBlue",
+    "CornflowerBlue",
+    "DodgerBlue",
+    "Olive",
+    "Orchid",
+    "LemonChiffon",
+    "Sienna",
+    "OrangeRed",
+    "Orange",
+    "DarkSalmon",
+    "Magenta",
+    "Wheat",
+    "Lime",
+    "GhostWhite",
+    "SlateBlue",
+    "Aqua",
+    "MediumAquaMarine",
+    "LightSlateGrey",
+    "MediumSeaGreen",
+    "SandyBrown",
+    "YellowGreen",
+    "Plum",
+    "FloralWhite",
+    "LightPink",
+    "Thistle",
+    "DarkViolet",
+    "Pink",
+    "Crimson",
+    "Chocolate",
+    "DarkGrey",
+    "Ivory",
+    "PaleGreen",
+    "DarkGoldenRod",
+    "LavenderBlush",
+    "SlateGrey",
+    "DeepPink",
+    "Gold",
+    "Cyan",
+    "LightSteelBlue",
+    "MediumPurple",
+    "ForestGreen",
+    "DarkOrange",
+    "Tan",
+    "Salmon",
+    "PaleGoldenRod",
+    "LightGreen",
+    "LightSlateGray",
+    "HoneyDew",
+    "Fuchsia",
+    "LightSeaGreen",
+    "DarkOrchid",
+    "Green",
+    "Chartreuse",
+    "LimeGreen",
+    "AntiqueWhite",
+    "Beige",
+    "Gainsboro",
+    "Bisque",
+    "SaddleBrown",
+    "Silver",
+    "Lavender",
+    "Teal",
+    "LightCyan",
+    "PapayaWhip",
+    "Purple",
+    "Coral",
+    "BurlyWood",
+    "LightGray",
+    "Snow",
+    "MistyRose",
+    "PowderBlue",
+    "DarkCyan",
+    "White",
+    "Turquoise",
+    "MediumSlateBlue",
+    "PeachPuff",
+    "Moccasin",
+    "LightSalmon",
+    "SkyBlue",
+    "Khaki",
+    "MediumSpringGreen",
+    "BlueViolet",
+    "MintCream",
+    "Linen",
+    "SeaGreen",
+    "HotPink",
+    "LightYellow",
+    "BlanchedAlmond",
+    "RoyalBlue",
+    "RosyBrown",
+    "MediumOrchid",
+    "DarkTurquoise",
+    "LightGoldenRodYellow",
+    "LightSkyBlue",
+]
+
+
+def visualize_detections(image_path, output_path, detections, labels=[]):
+    image = Image.open(image_path).convert(mode="RGB")
+    draw = ImageDraw.Draw(image)
+    line_width = 2
+    font = ImageFont.load_default()
+    for d in detections:
+        color = COLORS[d["class"] % len(COLORS)]
+        draw.line(
+            [
+                (d["xmin"], d["ymin"]),
+                (d["xmin"], d["ymax"]),
+                (d["xmax"], d["ymax"]),
+                (d["xmax"], d["ymin"]),
+                (d["xmin"], d["ymin"]),
+            ],
+            width=line_width,
+            fill=color,
+        )
+        label = "Class {}".format(d["class"])
+        if d["class"] < len(labels):
+            label = "{}".format(labels[d["class"]])
+        score = d["score"]
+        text = "{}: {}%".format(label, int(100 * score))
+        if score < 0:
+            text = label
+        left, top, right, bottom = font.getbbox(text)
+        text_width, text_height = right - left, bottom - top
+        text_bottom = max(text_height, d["ymin"])
+        text_left = d["xmin"]
+        margin = np.ceil(0.05 * text_height)
+        draw.rectangle(
+            [
+                (text_left, text_bottom - text_height - 2 * margin),
+                (text_left + text_width, text_bottom),
+            ],
+            fill=color,
+        )
+        draw.text(
+            (text_left + margin, text_bottom - text_height - margin),
+            text,
+            fill="black",
+            font=font,
+        )
+    if output_path is None:
+        return image
+    image.save(output_path)
+
+
+def concat_visualizations(images, names, colors, output_path):
+    def draw_text(draw, font, text, width, bar_height, offset, color):
+        left, top, right, bottom = font.getbbox(text)
+        text_width, text_height = right - left, bottom - top
+        draw.rectangle([(offset, 0), (offset + width, bar_height)], fill=color)
+        draw.text(
+            (offset + (width - text_width) / 2, text_height - text_height / 2),
+            text,
+            fill="black",
+            font=font,
+        )
+
+    bar_height = 18
+    width = 0
+    height = 0
+    for im in images:
+        width += im.width
+        height = max(height, im.height)
+
+    concat = Image.new("RGB", (width, height + bar_height))
+    draw = ImageDraw.Draw(concat)
+    font = ImageFont.load_default()
+
+    offset = 0
+    for i, im in enumerate(images):
+        concat.paste(im, (offset, bar_height))
+        draw_text(draw, font, names[i], im.width, bar_height, offset, colors[i])
+        offset += im.width
+
+    if output_path is None:
+        return concat
+    concat.save(output_path)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..15989257f6eab57f2c512b3bd3994ab4b1e15722
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/README.md
@@ -0,0 +1,291 @@
+# EfficientNet V1 and V2 in TensorRT
+
+These scripts help with conversion and execution of Google [EfficientNet V1](https://arxiv.org/abs/1905.11946) and [EfficientNet V2](https://arxiv.org/abs/2104.00298) models with [NVIDIA TensorRT](https://developer.nvidia.com/tensorrt).
+
+## Contents
+- [Changelog](#changelog)
+- [Setup](#setup)
+- [Model Conversion](#model-conversion)
+  * [TensorFlow Saved Model](#tensorflow-saved-model)
+  * [Create ONNX Graph](#create-onnx-graph)
+  * [Build TensorRT Engine](#build-tensorrt-engine)
+  * [Benchmark TensorRT Engine](#benchmark-tensorrt-engine)
+- [Inference](#inference)
+  * [Input Preprocessing](#input-preprocessing)
+  * [Inference in Python](#inference-in-python)
+  * [Validate against Ground Truth](#validate-against-ground-truth)
+  * [Compare against TensorFlow](#compare-against-tensorflow)
+
+# Changelog
+
+August 2023: 
+  - Update ONNX version support to 1.14.0
+  - Removed support for Python versions < 3.8.
+
+## Setup
+
+Note: The sample is not compatible with Python-3.12 because tensorflow-addons does not support Python-3.12.
+
+For best results, we recommend running these scripts on an environment with TensorRT >= 8.0.1 and TensorFlow 2.12.0.
+
+Install TensorRT as per the [TensorRT Install Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html). You will need to make sure the Python bindings for TensorRT are also installed correctly, these are available by installing the `python3-libnvinfer` and `python3-libnvinfer-dev` packages on your TensorRT download.
+
+Make sure all other packages listed in `requirements.txt`:
+
+```bash
+pip3 install -r requirements.txt
+```
+
+You will also need the latest `onnx_graphsurgeon` python module. If not already installed by TensorRT, you can install it manually by running:
+
+```bash
+pip3 install onnx-graphsurgeon --index-url https://pypi.ngc.nvidia.com
+```
+
+## Model Conversion
+
+The workflow to convert an EfficientNet model is basically TensorFlow â†’ ONNX â†’ TensorRT, and so parts of this process require TensorFlow to be installed. If you are performing this conversion to run inference on the edge, such as for NVIDIA Jetson devices, it might be easier to do the ONNX conversion on a PC first.
+
+### TensorFlow Saved Model
+
+The starting point of conversion is a TensorFlow saved model. This can be exported from your own trained models, or you can download a pre-trained model. This conversion script is compatible with two types of models:
+
+1. EfficientNet V1 models trained with the [TensorFlow TPU Models](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet) framework.
+2. EfficientNet V2 models trained with the [AutoML](https://github.com/google/automl/tree/master/efficientnetv2) framework.
+
+#### 1. EfficientNet V1
+
+You can download one of the pre-trained saved models from the [EfficientNet TFHub](https://tfhub.dev/google/collections/efficientnet), such as:
+
+```bash
+wget https://storage.googleapis.com/tfhub-modules/tensorflow/efficientnet/b0/classification/1.tar.gz
+```
+
+The contents of this package, when extracted, will hold a saved model ready for conversion.
+
+Alternatively, if you are training your own model, or if you need to re-export the saved model manually, you will need the training checkpoint (or a pre-trained "ckpt" from the [EfficientNet Repository](https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet) such as [this](https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/ckpts/efficientnet-b0.tar.gz)).
+
+To export a saved model from the checkpoint, clone and install the [TensorFlow TPU Models](https://github.com/tensorflow/tpu) repository, and run:
+
+```bash
+cd /path/to/tpu/models/official/efficientnet
+python3 export_model.py \
+    --ckpt_dir /path/to/efficientnet-b0 \
+    --image_size 224 \
+    --model_name efficientnet-b0 \
+    --output_tflite /dev/null \
+    --noquantize \
+    --output_saved_model_dir /path/to/saved_model
+```
+
+Adapt `--image_size` and `--model_name` according to the checkpoint model being used. The `--ckpt_dir` argument points to the directory holding the checkpoint as described above. The TF saved model will be exported to the path given by `--output_saved_model_dir`.
+
+#### 2. EfficientNet V2
+
+At the time of this writing, there exist no EfficientNet V2 saved models in TFHub yet. So you will need to download a pre-trained checkpoint, or use your own trained model of course.
+
+To do so, you will need your training checkpoint (or a pre-trained "ckpt" from the [EfficientNet V2 Repository](https://github.com/google/automl/tree/master/efficientnetv2) such as [this](https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/efficientnetv2-s.tgz)):
+
+```bash
+wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/efficientnetv2-s.tgz
+```
+
+To export a saved model from here, clone and install the [AutoML](https://github.com/google/automl) repository, and run:
+
+```bash
+cd /path/to/automl/efficientnetv2
+python3 infer.py \
+    --mode tf2bm \
+    --model_name efficientnetv2-s \
+    --model_dir ../../efficientnetv2-s/ \
+    --export_dir ../../efficientnetv2-s/saved_model
+```
+
+Where you should adapt `--model_name` to the corresponding model for the checkpoint used. The `--model_dir` argument should point to the downloaded or trained checkpoint as described above. The exported saved model will then be available in the directory pointed by the `--export_dir` argument.
+
+### Create ONNX Graph
+
+To generate an ONNX model file, find the saved model as described above, select a batch size and input size, and run:
+
+```bash
+python3 create_onnx.py \
+    --saved_model /path/to/saved_model \
+    --onnx /path/to/model.onnx \
+    --batch_size 1 \
+    --input_size 384
+```
+
+You may need to adapt the argument `--input_size` to explicitly define the exact input image dimensions to use in the graph. Consult the model definitions in the corresponding training system, to find the expected input size for the model you are working with.
+
+This will create the file `model.onnx` which is ready to convert to TensorRT.
+
+Optionally, you may wish to visualize the resulting ONNX graph with a tool such as [Netron](https://netron.app/).
+
+### Build TensorRT Engine
+
+It is possible to build the TensorRT engine directly with `trtexec` using the ONNX graph generated in the previous step. However, the script `build_engine.py` is provided for convenience, as it has been tailored to EfficientNet engine building and calibration. Run `python3 build_engine.py --help` for details on available settings.
+
+#### FP16 Precision
+
+To build the TensorRT engine file with FP16 precision, run:
+
+```bash
+python3 build_engine.py \
+    --onnx /path/to/model.onnx \
+    --engine /path/to/engine.trt \
+    --precision fp16
+```
+
+The file `engine.trt` will be created, which can now be used to infer with TensorRT.
+
+For best results, make sure no other processes are using the GPU during engine build, as it may affect the optimal tactic selection process.
+
+#### INT8 Precision
+
+To build and calibrate an engine for INT8 precision, run:
+
+```bash
+python3 build_engine.py \
+    --onnx /path/to/model.onnx \
+    --engine /path/to/engine.trt \
+    --precision int8 \
+    --calib_input /path/to/calibration/images \
+    --calib_cache /path/to/calibration.cache \
+    --calib_preprocessor V2
+```
+
+Where `--calib_input` points to a directory with several thousands of images. For example, this could be a subset of the training or validation datasets that were used for the model. It's important that this data represents the runtime data distribution relatively well, therefore, the more images that are used for calibration, the better accuracy that will be achieved in INT8 precision. For ImageNet networks, we have found that 25,000 images gives a good result.
+
+The `--calib_cache` argument controls where the calibration cache file will be written to. This is useful to keep a cached copy of the calibration results. Next time you need to build the engine for the same network, if this file exists, it will skip the calibration step and use the cached values instead.
+
+Finally, the `--calib_preprocessor` option sets the preprocessing algorithm to apply on calibration images. Please refer to the [Input Preprocessing](#input-preprocessing) section below for more details.
+
+Run `python3 build_engine.py --help` for additional build options.
+
+### Benchmark TensorRT Engine
+
+Optionally, you can obtain execution timing information for the built engine by using the `trtexec` utility, as:
+
+```bash
+trtexec \
+    --loadEngine=/path/to/engine.trt \
+    --useCudaGraph --noDataTransfers \
+    --iterations=100 --avgRuns=100
+```
+
+If it's not already in your `$PATH`, the `trtexec` binary is usually found in `/usr/src/tensorrt/bin/trtexec`, depending on your TensorRT installation method.
+
+An inference benchmark will run, with GPU Compute latency times printed out to the console. Depending on the version of TensorRT, you should see something similar to:
+
+```
+GPU Compute Time: min = 1.79895 ms, max = 1.9209 ms, mean = 1.80589 ms, median = 1.80493 ms, percentile(99%) = 1.81396 ms
+```
+
+## Inference
+
+For optimal performance, inference should be done in a C++ application that takes advantage of CUDA Graphs to launch the inference request. Alternatively, the TensorRT engine built with this process can also be executed through either [Triton Inference Server](https://developer.nvidia.com/nvidia-triton-inference-server) or [DeepStream SDK](https://developer.nvidia.com/deepstream-sdk).
+
+However, for convenience, a python inference script is provided here for quick testing of the built TensorRT engine.
+
+### Input Preprocessing
+
+An important concept for computer vision models is the preprocessing applied to an image before feeding it to the classifier network. The various EfficientNet models supported by this converter use different preprocessing algorithms.
+
+We have implemented three different preprocessor algorithms, as defined in `image_batcher.py`. They are:
+
+| **Preprocessing** | **Resizing**             | **Normalization** | **Mean Subtract** |
+| ----------------- | ------------------------ | ----------------- | ----------------- |
+| **V2**            | Bilinear Resize          | [-1 to +1] Range  | No                |
+| **V1**            | Bicubic Resize + PadCrop | [0 to +1] Range   | No                |
+| **V1MS**          | Bicubic Resize + PadCrop | [0 to +1] Range   | Yes               |
+
+**V2:** This is the preprocessor to be used with all EfficientNet V2 models. EfficientNet V2 does not require mean subtraction, so it is never performed for these models.
+
+**V1:** This is the default preprocessor to be used with most EfficientNet V1 models. EfficientNet V1 normally expects mean subtraction to be applied. However, some TensorFlow saved models, such as those downloaded from TFHub, already perform this operation within the graph itself, so it is not required to do it during preprocessing.
+
+**V1MS:** Depending on the saved model exporter, some EfficientNet V1 models may not have the integrated mean subtraction. This is often the case with models exported from the pre-trained *checkpoints*. For those cases, this preprocessor will apply mean subtraction during preprocessing.
+
+These are the supported values for `--preprocessor` and `--calib_preprocessor` arguments used throughout these scripts. Note that choosing an incorrect preprocessor for a model will considerably impact its accuracy. Please take a moment to choose the correct preprocessor to use before performing inference or validation of a model.
+
+### Inference in Python
+
+To classify a set of images with TensorRT, run:
+
+```bash
+python3 infer.py \
+    --engine /path/to/engine.trt \
+    --input /path/to/images \
+    --preprocessor V2
+```
+
+Where the input path can be either a single image file, or a directory of jpg/png/bmp images. The classification results will be printed out to the console, one image per line, as:
+
+```
+<image path>  <predicted class id>  <confidence score>
+```
+
+You can also redirect these results to a file, and optionally set a separator character (such as for CSV file creation):
+
+```bash
+python3 infer.py \
+    --engine /path/to/engine.trt \
+    --input /path/to/ILSVRC2012_img_val \
+    --preprocessor V2 \
+    --separator ',' > results.csv
+```
+
+### Validate against Ground Truth
+
+To validate the TensorRT inference results accuracy against ground truth labels, run:
+
+```bash
+python3 eval_gt.py \
+    --engine /path/to/engine.trt \
+    --annotations /path/to/val.txt \
+    --input /path/to/images \
+    --preprocessor V2
+```
+
+The annotations file is expected to have one line per image, where the first column is the image filename, and the second column is the ground truth class label. For example:
+
+```
+ILSVRC2012_val_00000001.JPEG 65
+ILSVRC2012_val_00000002.JPEG 970
+ILSVRC2012_val_00000003.JPEG 230
+ILSVRC2012_val_00000004.JPEG 809
+[...]
+```
+
+> **NOTE:** The ImageNet pre-trained models follow the label mapping introduced by [Caffe](https://github.com/BVLC/caffe/blob/master/data/ilsvrc12/get_ilsvrc_aux.sh), which indexes labels according to their synset number. The validation file for this format can be downloaded from Caffe's ILSVRC2012 auxiliary package at:
+>
+> http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz
+>
+> You can use the `val.txt` file bundled in this package for ImageNet evaluation purposes.
+
+Upon a successful run of `EfficientNet V2-S` on the `ILSVRC2012_img_val` [ImageNet](https://www.image-net.org/download.php) dataset, for example, you should see something like:
+
+```
+Top-1 Accuracy: 83.710%
+Top-5 Accuracy: 96.615%
+```
+
+### Compare against TensorFlow
+
+Another method to validate the engine is to compare the TensorRT inference results with what TensorFlow produces, to make sure both frameworks give similar results. For this, run:
+
+```bash
+python3 compare_tf.py \
+    --engine /path/to/engine.trt \
+    --saved_model /path/to/saved_model \
+    --input /path/to/images \
+    --preprocessor V2
+```
+
+This can be performed on any set of images, no ground truth is required. The script executes both the TensorFlow saved model and the TensorRT engine simultaneously on the given input images. It then computes the class prediction similarity and RMSE in confidence scores between both outputs.
+
+Upon a successful run, you should see something like:
+
+```
+Matching Top-1 class predictions for 4999 out of 5000 images: 99.98%
+RMSE between TensorFlow and TensorRT confidence scores: 0.006
+```
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/build_engine.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..683c567c62f2c04cb5e33e40213035fd17a8093c
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/build_engine.py
@@ -0,0 +1,321 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import logging
+import argparse
+
+import numpy as np
+import tensorrt as trt
+from cuda import cudart
+
+sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+import common
+
+from image_batcher import ImageBatcher
+
+logging.basicConfig(level=logging.INFO)
+logging.getLogger("EngineBuilder").setLevel(logging.INFO)
+log = logging.getLogger("EngineBuilder")
+
+
+class EngineCalibrator(trt.IInt8EntropyCalibrator2):
+    """
+    Implements the INT8 Entropy Calibrator 2.
+    """
+
+    def __init__(self, cache_file):
+        """
+        :param cache_file: The location of the cache file.
+        """
+        super().__init__()
+        self.cache_file = cache_file
+        self.image_batcher = None
+        self.batch_allocation = None
+        self.batch_generator = None
+
+    def set_image_batcher(self, image_batcher: ImageBatcher):
+        """
+        Define the image batcher to use, if any. If using only the cache file, an image batcher doesn't need
+        to be defined.
+        :param image_batcher: The ImageBatcher object
+        """
+        self.image_batcher = image_batcher
+        size = int(
+            np.dtype(self.image_batcher.dtype).itemsize
+            * np.prod(self.image_batcher.shape)
+        )
+        self.batch_allocation = common.cuda_call(cudart.cudaMalloc(size))
+        self.batch_generator = self.image_batcher.get_batch()
+
+    def get_batch_size(self):
+        """
+        Overrides from trt.IInt8EntropyCalibrator2.
+        Get the batch size to use for calibration.
+        :return: Batch size.
+        """
+        if self.image_batcher:
+            return self.image_batcher.batch_size
+        return 1
+
+    def get_batch(self, names):
+        """
+        Overrides from trt.IInt8EntropyCalibrator2.
+        Get the next batch to use for calibration, as a list of device memory pointers.
+        :param names: The names of the inputs, if useful to define the order of inputs.
+        :return: A list of int-casted memory pointers.
+        """
+        if not self.image_batcher:
+            return None
+        try:
+            batch, _ = next(self.batch_generator)
+            log.info(
+                "Calibrating image {} / {}".format(
+                    self.image_batcher.image_index, self.image_batcher.num_images
+                )
+            )
+            common.memcpy_host_to_device(
+                self.batch_allocation, np.ascontiguousarray(batch)
+            )
+            return [int(self.batch_allocation)]
+        except StopIteration:
+            log.info("Finished calibration batches")
+            return None
+
+    def read_calibration_cache(self):
+        """
+        Overrides from trt.IInt8EntropyCalibrator2.
+        Read the calibration cache file stored on disk, if it exists.
+        :return: The contents of the cache file, if any.
+        """
+        if os.path.exists(self.cache_file):
+            with open(self.cache_file, "rb") as f:
+                log.info("Using calibration cache file: {}".format(self.cache_file))
+                return f.read()
+
+    def write_calibration_cache(self, cache):
+        """
+        Overrides from trt.IInt8EntropyCalibrator2.
+        Store the calibration cache to a file on disk.
+        :param cache: The contents of the calibration cache to store.
+        """
+        with open(self.cache_file, "wb") as f:
+            log.info("Writing calibration cache data to: {}".format(self.cache_file))
+            f.write(cache)
+
+
+class EngineBuilder:
+    """
+    Parses an ONNX graph and builds a TensorRT engine from it.
+    """
+
+    def __init__(self, verbose=False):
+        """
+        :param verbose: If enabled, a higher verbosity level will be set on the TensorRT logger.
+        """
+        self.trt_logger = trt.Logger(trt.Logger.INFO)
+        if verbose:
+            self.trt_logger.min_severity = trt.Logger.Severity.VERBOSE
+
+        trt.init_libnvinfer_plugins(self.trt_logger, namespace="")
+
+        self.builder = trt.Builder(self.trt_logger)
+        self.config = self.builder.create_builder_config()
+        self.config.set_memory_pool_limit(
+            trt.MemoryPoolType.WORKSPACE, 8 * (2**30)
+        )  # 8 GB
+
+        self.batch_size = None
+        self.network = None
+        self.parser = None
+
+    def create_network(self, onnx_path):
+        """
+        Parse the ONNX graph and create the corresponding TensorRT network definition.
+        :param onnx_path: The path to the ONNX graph to load.
+        """
+
+        self.network = self.builder.create_network(0)
+        self.parser = trt.OnnxParser(self.network, self.trt_logger)
+
+        onnx_path = os.path.realpath(onnx_path)
+        with open(onnx_path, "rb") as f:
+            if not self.parser.parse(f.read()):
+                log.error("Failed to load ONNX file: {}".format(onnx_path))
+                for error in range(self.parser.num_errors):
+                    log.error(self.parser.get_error(error))
+                sys.exit(1)
+
+        inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
+        outputs = [self.network.get_output(i) for i in range(self.network.num_outputs)]
+
+        log.info("Network Description")
+        for input in inputs:
+            self.batch_size = input.shape[0]
+            log.info(
+                "Input '{}' with shape {} and dtype {}".format(
+                    input.name, input.shape, input.dtype
+                )
+            )
+        for output in outputs:
+            log.info(
+                "Output '{}' with shape {} and dtype {}".format(
+                    output.name, output.shape, output.dtype
+                )
+            )
+        assert self.batch_size > 0
+
+    def create_engine(
+        self,
+        engine_path,
+        precision,
+        calib_input=None,
+        calib_cache=None,
+        calib_num_images=25000,
+        calib_batch_size=8,
+        calib_preprocessor=None,
+    ):
+        """
+        Build the TensorRT engine and serialize it to disk.
+        :param engine_path: The path where to serialize the engine to.
+        :param precision: The datatype to use for the engine, either 'fp32', 'fp16' or 'int8'.
+        :param calib_input: The path to a directory holding the calibration images.
+        :param calib_cache: The path where to write the calibration cache to, or if it already exists, load it from.
+        :param calib_num_images: The maximum number of images to use for calibration.
+        :param calib_batch_size: The batch size to use for the calibration process.
+        :param calib_preprocessor: The ImageBatcher preprocessor algorithm to use.
+        """
+        engine_path = os.path.realpath(engine_path)
+        engine_dir = os.path.dirname(engine_path)
+        os.makedirs(engine_dir, exist_ok=True)
+        log.info("Building {} Engine in {}".format(precision, engine_path))
+
+        inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
+
+        log.info("Reading timing cache from file: {:}".format(args.timing_cache))
+        common.setup_timing_cache(self.config, args.timing_cache)
+
+        if precision == "fp16":
+            if not self.builder.platform_has_fast_fp16:
+                log.warning("FP16 is not supported natively on this platform/device")
+            else:
+                self.config.set_flag(trt.BuilderFlag.FP16)
+        elif precision == "int8":
+            if not self.builder.platform_has_fast_int8:
+                log.warning("INT8 is not supported natively on this platform/device")
+            else:
+                self.config.set_flag(trt.BuilderFlag.INT8)
+                self.config.int8_calibrator = EngineCalibrator(calib_cache)
+                if not os.path.exists(calib_cache):
+                    calib_shape = [calib_batch_size] + list(inputs[0].shape[1:])
+                    calib_dtype = trt.nptype(inputs[0].dtype)
+                    self.config.int8_calibrator.set_image_batcher(
+                        ImageBatcher(
+                            calib_input,
+                            calib_shape,
+                            calib_dtype,
+                            max_num_images=calib_num_images,
+                            exact_batches=True,
+                            preprocessor=calib_preprocessor,
+                        )
+                    )
+
+        engine_bytes = self.builder.build_serialized_network(self.network, self.config)
+        if engine_bytes is None:
+            log.error("Failed to create engine")
+            sys.exit(1)
+
+        log.info("Serializing timing cache to file: {:}".format(args.timing_cache))
+        common.save_timing_cache(self.config, args.timing_cache)
+
+        with open(engine_path, "wb") as f:
+            log.info("Serializing engine to file: {:}".format(engine_path))
+            f.write(engine_bytes)
+
+
+def main(args):
+    builder = EngineBuilder(args.verbose)
+    builder.create_network(args.onnx)
+    builder.create_engine(
+        args.engine,
+        args.precision,
+        args.calib_input,
+        args.calib_cache,
+        args.calib_num_images,
+        args.calib_batch_size,
+        args.calib_preprocessor,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-o", "--onnx", help="The input ONNX model file to load")
+    parser.add_argument("-e", "--engine", help="The output path for the TRT engine")
+    parser.add_argument(
+        "-p",
+        "--precision",
+        default="fp16",
+        choices=["fp32", "fp16", "int8"],
+        help="The precision mode to build in, either 'fp32', 'fp16' or 'int8', default: 'fp16'",
+    )
+    parser.add_argument(
+        "-v", "--verbose", action="store_true", help="Enable more verbose log output"
+    )
+    parser.add_argument(
+        "--calib_input", help="The directory holding images to use for calibration"
+    )
+    parser.add_argument(
+        "--calib_cache",
+        default="./calibration.cache",
+        help="The file path for INT8 calibration cache to use, default: ./calibration.cache",
+    )
+    parser.add_argument(
+        "--calib_num_images",
+        default=25000,
+        type=int,
+        help="The maximum number of images to use for calibration, default: 25000",
+    )
+    parser.add_argument(
+        "--calib_batch_size",
+        default=8,
+        type=int,
+        help="The batch size for the calibration process, default: 1",
+    )
+    parser.add_argument(
+        "--calib_preprocessor",
+        default="V2",
+        choices=["V1", "V1MS", "V2"],
+        help="Set the calibration image preprocessor to use, either 'V2', 'V1' or 'V1MS', default: V2",
+    )
+    parser.add_argument(
+        "--timing_cache",
+        default="./timing.cache",
+        help="The file path for timing cache, default: ./timing.cache",
+    )
+    args = parser.parse_args()
+    if not all([args.onnx, args.engine]):
+        parser.print_help()
+        log.error("These arguments are required: --onnx and --engine")
+        sys.exit(1)
+    if args.precision == "int8" and not any([args.calib_input, args.calib_cache]):
+        parser.print_help()
+        log.error(
+            "When building in int8 precision, either --calib_input or --calib_cache are required"
+        )
+        sys.exit(1)
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/compare_tf.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/compare_tf.py
new file mode 100644
index 0000000000000000000000000000000000000000..2671572e4f342e8acac984ba836b9a4c1ef7d472
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/compare_tf.py
@@ -0,0 +1,196 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import argparse
+
+import numpy as np
+import tensorflow as tf
+
+from infer import TensorRTInfer
+from image_batcher import ImageBatcher
+
+
+class TensorFlowInfer:
+    """
+    Implements TensorFlow inference of a saved model, following the same API as the TensorRTInfer class.
+    """
+
+    def __init__(self, saved_model_path):
+        gpus = tf.config.experimental.list_physical_devices("GPU")
+        for gpu in gpus:
+            tf.config.experimental.set_memory_growth(gpu, True)
+
+        self.model = tf.saved_model.load(saved_model_path)
+        self.pred_fn = self.model.signatures["serving_default"]
+
+        # Setup I/O bindings
+        self.inputs = []
+        fn_inputs = self.pred_fn.structured_input_signature[1]
+        for i, input in enumerate(list(fn_inputs.values())):
+            self.inputs.append(
+                {
+                    "index": i,
+                    "name": input.name,
+                    "dtype": np.dtype(input.dtype.as_numpy_dtype()),
+                    "shape": input.shape.as_list(),
+                }
+            )
+        self.outputs = []
+        fn_outputs = self.pred_fn.structured_outputs
+        for i, output in enumerate(list(fn_outputs.values())):
+            self.outputs.append(
+                {
+                    "index": i,
+                    "name": output.name,
+                    "dtype": np.dtype(output.dtype.as_numpy_dtype()),
+                    "shape": output.shape.as_list(),
+                }
+            )
+
+    def input_spec(self):
+        return self.inputs[0]["shape"], self.inputs[0]["dtype"]
+
+    def output_spec(self):
+        return self.outputs[0]["shape"], self.outputs[0]["dtype"]
+
+    def infer(self, batch, top=1):
+        # Process I/O and execute the network
+        input = {self.inputs[0]["name"]: tf.convert_to_tensor(batch)}
+        output = self.pred_fn(**input)
+        output = output[self.outputs[0]["name"]].numpy()
+
+        # Read and process the results
+        classes = np.argmax(output, axis=1)
+        scores = np.max(output, axis=1)
+        top = max(top, output.shape[1])
+        top_classes = np.flip(np.argsort(output, axis=1), axis=1)[:, 0:top]
+        top_scores = np.flip(np.sort(output, axis=1), axis=1)[:, 0:top]
+
+        return classes, scores, [top_classes, top_scores]
+
+
+def main(args):
+    # Initialize TRT and TF infer objects.
+    tf_infer = TensorFlowInfer(args.saved_model)
+    trt_infer = TensorRTInfer(args.engine)
+
+    batcher = ImageBatcher(
+        args.input,
+        *trt_infer.input_spec(),
+        max_num_images=args.num_images,
+        preprocessor=args.preprocessor
+    )
+
+    # Make sure both systems use the same input spec, so we can use the exact same image batches with both
+    tf_shape, tf_dtype = tf_infer.input_spec()
+    trt_shape, trt_dtype = trt_infer.input_spec()
+    if trt_dtype != tf_dtype:
+        print("Input datatype does not match")
+        print("TRT Engine Input Dtype: {} {}".format(trt_dtype))
+        print("TF Saved Model Input Dtype: {} {}".format(tf_dtype))
+        print(
+            "Please use the same TensorFlow saved model that the TensorRT engine was built with"
+        )
+        sys.exit(1)
+
+    if (tf_shape[1] and trt_shape[1] != tf_shape[1]) or (
+        tf_shape[2] and trt_shape[2] != tf_shape[2]
+    ):
+        print("Input shapes do not match")
+        print("TRT Engine Input Shape: {} {}".format(trt_shape[1:]))
+        print("TF Saved Model Input Shape: {} {}".format(tf_shape[1:]))
+        print(
+            "Please use the same TensorFlow saved model that the TensorRT engine was built with"
+        )
+        sys.exit(1)
+
+    match = 0
+    error = 0
+    for batch, images in batcher.get_batch():
+        # Run inference on the same batch with both inference systems
+        tf_classes, tf_scores, _ = tf_infer.infer(batch)
+        trt_classes, trt_scores, _ = trt_infer.infer(batch)
+
+        # The last batch may not have all image slots filled, so limit the results to only the amount of actual images
+        tf_classes = tf_classes[0 : len(images)]
+        tf_scores = tf_scores[0 : len(images)]
+        trt_classes = trt_classes[0 : len(images)]
+        trt_scores = trt_scores[0 : len(images)]
+
+        # Track how many images match on top-1 class id predictions
+        match += np.sum(trt_classes == tf_classes)
+        # Track the mean square error in confidence score
+        error += np.sum((trt_scores - tf_scores) * (trt_scores - tf_scores))
+
+        print(
+            "Processing {} / {} images: {:.2f}% match     ".format(
+                batcher.image_index,
+                batcher.num_images,
+                (100 * (match / batcher.image_index)),
+            ),
+            end="\r",
+        )
+
+    print()
+    pc = 100 * (match / batcher.num_images)
+    print(
+        "Matching Top-1 class predictions for {} out of {} images: {:.2f}%".format(
+            match, batcher.num_images, pc
+        )
+    )
+    avgerror = np.sqrt(error / batcher.num_images)
+    print(
+        "RMSE between TensorFlow and TensorRT confidence scores: {:.3f}".format(
+            avgerror
+        )
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-e", "--engine", help="The TensorRT engine to infer with")
+    parser.add_argument(
+        "-m",
+        "--saved_model",
+        help="The TensorFlow saved model path to validate against",
+    )
+    parser.add_argument(
+        "-i",
+        "--input",
+        help="The input to infer, either a single image path, or a directory of images",
+    )
+    parser.add_argument(
+        "-n",
+        "--num_images",
+        default=5000,
+        type=int,
+        help="The maximum number of images to use for validation, default: 5000",
+    )
+    parser.add_argument(
+        "-p",
+        "--preprocessor",
+        default="V2",
+        choices=["V1", "V1MS", "V2"],
+        help="Select the image preprocessor to use, either 'V2', 'V1' or 'V1MS', default: V2",
+    )
+    args = parser.parse_args()
+    if not all([args.engine, args.saved_model, args.input]):
+        parser.print_help()
+        sys.exit(1)
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/create_onnx.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/create_onnx.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0e7d1096d530de5ab881019233079ed6807d708
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/create_onnx.py
@@ -0,0 +1,122 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import argparse
+
+import onnx
+import onnx_graphsurgeon as gs
+from onnx import shape_inference
+
+import numpy as np
+import tensorflow as tf
+from tf2onnx import tfonnx, optimizer, tf_loader
+
+
+def main(args):
+    # Load saved model
+    saved_model_path = os.path.realpath(args.saved_model)
+    assert os.path.isdir(saved_model_path)
+    graph_def, inputs, outputs = tf_loader.from_saved_model(
+        saved_model_path, None, None, "serve", ["serving_default"]
+    )
+    with tf.Graph().as_default() as tf_graph:
+        tf.import_graph_def(graph_def, name="")
+    with tf_loader.tf_session(graph=tf_graph):
+        onnx_graph = tfonnx.process_tf_graph(
+            tf_graph, input_names=inputs, output_names=outputs, opset=11
+        )
+    onnx_model = optimizer.optimize_graph(onnx_graph).make_model(
+        "Converted from {}".format(saved_model_path)
+    )
+    graph = gs.import_onnx(onnx_model)
+    assert graph
+    print()
+    print("ONNX graph created successfully")
+
+    # Set the I/O tensor shapes
+    graph.inputs[0].shape[0] = args.batch_size
+    graph.outputs[0].shape[0] = args.batch_size
+    if args.input_size and args.input_size > 0:
+        if graph.inputs[0].shape[3] == 3:
+            # Format NHWC
+            graph.inputs[0].shape[1] = args.input_size
+            graph.inputs[0].shape[2] = args.input_size
+        elif graph.inputs[0].shape[1] == 3:
+            # Format NCHW
+            graph.inputs[0].shape[2] = args.input_size
+            graph.inputs[0].shape[3] = args.input_size
+    print(
+        "ONNX input named '{}' with shape {}".format(
+            graph.inputs[0].name, graph.inputs[0].shape
+        )
+    )
+    print(
+        "ONNX output named '{}' with shape {}".format(
+            graph.outputs[0].name, graph.outputs[0].shape
+        )
+    )
+    for i in range(4):
+        if type(graph.inputs[0].shape[i]) != int or graph.inputs[0].shape[i] <= 0:
+            print(
+                "The input shape of the graph is invalid, try overriding it by giving a fixed size with --input_size"
+            )
+            sys.exit(1)
+
+    # Fix Clip Nodes (ReLU6)
+    for node in [n for n in graph.nodes if n.op == "Clip"]:
+        for input in node.inputs[1:]:
+            # In TensorRT, the min/max inputs on a Clip op *must* have fp32 datatype
+            input.values = np.float32(input.values)
+
+    # Run tensor shape inference
+    graph.cleanup().toposort()
+    model = shape_inference.infer_shapes(gs.export_onnx(graph))
+    graph = gs.import_onnx(model)
+
+    # Save updated model
+    graph.cleanup().toposort()
+    model = gs.export_onnx(graph)
+    onnx_path = os.path.realpath(args.onnx)
+    os.makedirs(os.path.dirname(onnx_path), exist_ok=True)
+    onnx.save(model, onnx_path)
+    engine_path = os.path.join(os.path.dirname(onnx_path), "engine.trt")
+    print("ONNX model saved to {}".format(onnx_path))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-m", "--saved_model", help="The TensorFlow saved model directory to load"
+    )
+    parser.add_argument("-o", "--onnx", help="The output ONNX model file to write")
+    parser.add_argument(
+        "-b", "--batch_size", type=int, default=1, help="Set the batch size, default: 1"
+    )
+    parser.add_argument(
+        "-i",
+        "--input_size",
+        type=int,
+        help="Override the input height and width, e.g. '380', default: keep original size",
+    )
+    args = parser.parse_args()
+    if not all([args.saved_model, args.onnx]):
+        parser.print_help()
+        print("\nThese arguments are required: --saved_model and --onnx")
+        sys.exit(1)
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/eval_gt.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/eval_gt.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f57aaa5e7eddb13efc99952ad543ae5546662dc
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/eval_gt.py
@@ -0,0 +1,116 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import argparse
+
+import numpy as np
+
+from infer import TensorRTInfer
+from image_batcher import ImageBatcher
+
+
+def main(args):
+    annotations = {}
+    for line in open(args.annotations, "r"):
+        line = line.strip().split(args.separator)
+        if len(line) < 2 or not line[1].isnumeric():
+            print(
+                "Could not parse the annotations file correctly, make sure the correct separator is used"
+            )
+            sys.exit(1)
+        annotations[os.path.basename(line[0])] = int(line[1])
+
+    trt_infer = TensorRTInfer(args.engine)
+    batcher = ImageBatcher(
+        args.input,
+        *trt_infer.input_spec(),
+        max_num_images=args.num_images,
+        preprocessor=args.preprocessor
+    )
+    top1 = 0
+    top5 = 0
+    total = 0
+    for batch, images in batcher.get_batch():
+        classes, scores, top = trt_infer.infer(batch, top=5)
+        for i in range(len(images)):
+            image = os.path.basename(images[i])
+            if image not in annotations.keys():
+                print(
+                    "Image '{}' does not appear in the annotations file, please make sure all evaluated "
+                    "images have a corresponding ground truth label".format(image)
+                )
+                sys.exit(1)
+            if annotations[image] == classes[i]:
+                top1 += 1
+            if annotations[image] in top[0][i]:
+                top5 += 1
+            total += 1
+            top1_acc = 100 * (top1 / total)
+            top5_acc = 100 * (top5 / total)
+            print(
+                "Processing {} / {} : Top-1 {:0.1f}% , Top-5: {:0.1f}%     ".format(
+                    total, batcher.num_images, top1_acc, top5_acc
+                ),
+                end="\r",
+            )
+    print()
+    print("Top-1 Accuracy: {:0.3f}%".format(top1_acc))
+    print("Top-5 Accuracy: {:0.3f}%".format(top5_acc))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-e", "--engine", help="The TensorRT engine to infer with")
+    parser.add_argument(
+        "-i",
+        "--input",
+        help="The input to infer, either a single image path, or a directory of images",
+    )
+    parser.add_argument(
+        "-a",
+        "--annotations",
+        help="Set the file to use for classification ground truth annotations",
+    )
+    parser.add_argument(
+        "-s",
+        "--separator",
+        default=" ",
+        help="Separator to use between columns when parsing the annotations file, default: ' ' (space)",
+    )
+    parser.add_argument(
+        "-p",
+        "--preprocessor",
+        default="V2",
+        choices=["V1", "V1MS", "V2"],
+        help="Select the image preprocessor to use, either 'V2', 'V1' or 'V1MS', default: V2",
+    )
+    parser.add_argument(
+        "-n",
+        "--num_images",
+        default=5000,
+        type=int,
+        help="The maximum number of images to use for validation, default: 5000",
+    )
+    args = parser.parse_args()
+    if not all([args.engine, args.input, args.annotations]):
+        parser.print_help()
+        print("\nThese arguments are required: --engine  --input and --annotations")
+        sys.exit(1)
+
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/image_batcher.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/image_batcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..63d37784fce903befa726814191531f66031ce72
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/image_batcher.py
@@ -0,0 +1,191 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+
+import numpy as np
+from PIL import Image
+
+
+class ImageBatcher:
+    """
+    Creates batches of pre-processed images.
+    """
+
+    def __init__(
+        self,
+        input,
+        shape,
+        dtype,
+        max_num_images=None,
+        exact_batches=False,
+        preprocessor="V2",
+    ):
+        """
+        :param input: The input directory to read images from.
+        :param shape: The tensor shape of the batch to prepare, either in NCHW or NHWC format.
+        :param dtype: The (numpy) datatype to cast the batched data to.
+        :param max_num_images: The maximum number of images to read from the directory.
+        :param exact_batches: This defines how to handle a number of images that is not an exact multiple of the batch
+        size. If false, it will pad the final batch with zeros to reach the batch size. If true, it will *remove* the
+        last few images in excess of a batch size multiple, to guarantee batches are exact (useful for calibration).
+        :param preprocessor: Set the preprocessor to use, V1 or V2, depending on which network is being used.
+        """
+        # Find images in the given input path
+        input = os.path.realpath(input)
+        self.images = []
+
+        extensions = [".jpg", ".jpeg", ".png", ".bmp"]
+
+        def is_image(path):
+            return (
+                os.path.isfile(path) and os.path.splitext(path)[1].lower() in extensions
+            )
+
+        if os.path.isdir(input):
+            self.images = [
+                os.path.join(input, f)
+                for f in os.listdir(input)
+                if is_image(os.path.join(input, f))
+            ]
+            self.images.sort()
+        elif os.path.isfile(input):
+            if is_image(input):
+                self.images.append(input)
+        self.num_images = len(self.images)
+        if self.num_images < 1:
+            print("No valid {} images found in {}".format("/".join(extensions), input))
+            sys.exit(1)
+
+        # Handle Tensor Shape
+        self.dtype = dtype
+        self.shape = shape
+        assert len(self.shape) == 4
+        self.batch_size = shape[0]
+        assert self.batch_size > 0
+        self.format = None
+        self.width = -1
+        self.height = -1
+        if self.shape[1] == 3:
+            self.format = "NCHW"
+            self.height = self.shape[2]
+            self.width = self.shape[3]
+        elif self.shape[3] == 3:
+            self.format = "NHWC"
+            self.height = self.shape[1]
+            self.width = self.shape[2]
+        assert all([self.format, self.width > 0, self.height > 0])
+
+        # Adapt the number of images as needed
+        if max_num_images and 0 < max_num_images < len(self.images):
+            self.num_images = max_num_images
+        if exact_batches:
+            self.num_images = self.batch_size * (self.num_images // self.batch_size)
+        if self.num_images < 1:
+            print("Not enough images to create batches")
+            sys.exit(1)
+        self.images = self.images[0 : self.num_images]
+
+        # Subdivide the list of images into batches
+        self.num_batches = 1 + int((self.num_images - 1) / self.batch_size)
+        self.batches = []
+        for i in range(self.num_batches):
+            start = i * self.batch_size
+            end = min(start + self.batch_size, self.num_images)
+            self.batches.append(self.images[start:end])
+
+        # Indices
+        self.image_index = 0
+        self.batch_index = 0
+
+        self.preprocessor = preprocessor
+
+    def preprocess_image(self, image_path):
+        """
+        The image preprocessor loads an image from disk and prepares it as needed for batching. This includes cropping,
+        resizing, normalization, data type casting, and transposing.
+        This Image Batcher implements two algorithms:
+        * V2: The algorithm for EfficientNet V2, as defined in automl/efficientnetv2/preprocessing.py.
+        * V1: The algorithm for EfficientNet V1, aka "Legacy", as defined in automl/efficientnetv2/preprocess_legacy.py.
+        :param image_path: The path to the image on disk to load.
+        :return: A numpy array holding the image sample, ready to be contacatenated into the rest of the batch.
+        """
+
+        def pad_crop(image):
+            """
+            A subroutine to implement padded cropping. This will create a center crop of the image, padded by 32 pixels.
+            :param image: The PIL image object
+            :return: The PIL image object already padded and cropped.
+            """
+            # Assume square images
+            assert self.height == self.width
+            width, height = image.size
+            ratio = self.height / (self.height + 32)
+            crop_size = int(ratio * min(height, width))
+            y = (height - crop_size) // 2
+            x = (width - crop_size) // 2
+            return image.crop((x, y, x + crop_size, y + crop_size))
+
+        image = Image.open(image_path)
+        image = image.convert(mode="RGB")
+        if self.preprocessor == "V2":
+            # For EfficientNet V2: Bilinear Resize and [-1,+1] Normalization
+            if self.height < 320:
+                # Padded crop only on smaller sizes
+                image = pad_crop(image)
+            image = image.resize((self.width, self.height), resample=Image.BILINEAR)
+            image = np.asarray(image, dtype=self.dtype)
+            image = (image - 128.0) / 128.0
+        elif self.preprocessor == "V1":
+            # For EfficientNet V1: Padded Crop, Bicubic Resize, and [0,1] Normalization
+            # (Mean subtraction and Std Dev scaling will be part of the graph, so not done here)
+            image = pad_crop(image)
+            image = image.resize((self.width, self.height), resample=Image.BICUBIC)
+            image = np.asarray(image, dtype=self.dtype)
+            image = image / 255.0
+        elif self.preprocessor == "V1MS":
+            # For EfficientNet V1: Padded Crop, Bicubic Resize, and [0,1] Normalization
+            # Mean subtraction and Std dev scaling are applied as a pre-processing step outside the graph.
+            image = pad_crop(image)
+            image = image.resize((self.width, self.height), resample=Image.BICUBIC)
+            image = np.asarray(image, dtype=self.dtype)
+            image = image - np.asarray([123.68, 116.28, 103.53])
+            image = image / np.asarray([58.395, 57.120, 57.375])
+        else:
+            print("Preprocessing method {} not supported".format(self.preprocessor))
+            sys.exit(1)
+        if self.format == "NCHW":
+            image = np.transpose(image, (2, 0, 1))
+        return image
+
+    def get_batch(self):
+        """
+        Retrieve the batches. This is a generator object, so you can use it within a loop as:
+        for batch, images in batcher.get_batch():
+           ...
+        Or outside of a batch with the next() function.
+        :return: A generator yielding two items per iteration: a numpy array holding a batch of images, and the list of
+        paths to the images loaded within this batch.
+        """
+        for i, batch_images in enumerate(self.batches):
+            batch_data = np.zeros(self.shape, dtype=self.dtype)
+            for i, image in enumerate(batch_images):
+                self.image_index += 1
+                batch_data[i] = self.preprocess_image(image)
+            self.batch_index += 1
+            yield batch_data, batch_images
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/infer.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc18e1c88071dac7dff422be9e5cd7d2b6edbe0e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/infer.py
@@ -0,0 +1,182 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import argparse
+
+import numpy as np
+import tensorrt as trt
+from cuda import cudart
+
+sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+import common
+
+from image_batcher import ImageBatcher
+
+
+class TensorRTInfer:
+    """
+    Implements inference for the EfficientNet TensorRT engine.
+    """
+
+    def __init__(self, engine_path):
+        """
+        :param engine_path: The path to the serialized engine to load from disk.
+        """
+        # Load TRT engine
+        self.logger = trt.Logger(trt.Logger.ERROR)
+        with open(engine_path, "rb") as f, trt.Runtime(self.logger) as runtime:
+            assert runtime
+            self.engine = runtime.deserialize_cuda_engine(f.read())
+        assert self.engine
+        self.context = self.engine.create_execution_context()
+        assert self.context
+
+        # Setup I/O bindings
+        self.inputs = []
+        self.outputs = []
+        self.allocations = []
+        for i in range(self.engine.num_io_tensors):
+            name = self.engine.get_tensor_name(i)
+            is_input = False
+            if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
+                is_input = True
+            dtype = self.engine.get_tensor_dtype(name)
+            shape = self.engine.get_tensor_shape(name)
+            if is_input:
+                self.batch_size = shape[0]
+            size = np.dtype(trt.nptype(dtype)).itemsize
+            for s in shape:
+                size *= s
+            allocation = common.cuda_call(cudart.cudaMalloc(size))
+            binding = {
+                "index": i,
+                "name": name,
+                "dtype": np.dtype(trt.nptype(dtype)),
+                "shape": list(shape),
+                "allocation": allocation,
+            }
+            self.allocations.append(allocation)
+            if is_input:
+                self.inputs.append(binding)
+            else:
+                self.outputs.append(binding)
+
+        assert self.batch_size > 0
+        assert len(self.inputs) > 0
+        assert len(self.outputs) > 0
+        assert len(self.allocations) > 0
+
+    def input_spec(self):
+        """
+        Get the specs for the input tensor of the network. Useful to prepare memory allocations.
+        :return: Two items, the shape of the input tensor and its (numpy) datatype.
+        """
+        return self.inputs[0]["shape"], self.inputs[0]["dtype"]
+
+    def output_spec(self):
+        """
+        Get the specs for the output tensor of the network. Useful to prepare memory allocations.
+        :return: Two items, the shape of the output tensor and its (numpy) datatype.
+        """
+        return self.outputs[0]["shape"], self.outputs[0]["dtype"]
+
+    def infer(self, batch, top=1):
+        """
+        Execute inference on a batch of images. The images should already be batched and preprocessed, as prepared by
+        the ImageBatcher class. Memory copying to and from the GPU device will be performed here.
+        :param batch: A numpy array holding the image batch.
+        :param top: The number of classes to return as top_predicitons, in descending order by their score. By default,
+        setting to one will return the same as the maximum score class. Useful for Top-5 accuracy metrics in validation.
+        :return: Three items, as numpy arrays for each batch image: The maximum score class, the corresponding maximum
+        score, and a list of the top N classes and scores.
+        """
+        # Prepare the output data
+        output = np.zeros(*self.output_spec())
+
+        # Process I/O and execute the network
+        common.memcpy_host_to_device(
+            self.inputs[0]["allocation"], np.ascontiguousarray(batch)
+        )
+        self.context.execute_v2(self.allocations)
+        common.memcpy_device_to_host(output, self.outputs[0]["allocation"])
+
+        # Process the results
+        classes = np.argmax(output, axis=1)
+        scores = np.max(output, axis=1)
+        top = min(top, output.shape[1])
+        top_classes = np.flip(np.argsort(output, axis=1), axis=1)[:, 0:top]
+        top_scores = np.flip(np.sort(output, axis=1), axis=1)[:, 0:top]
+
+        return classes, scores, [top_classes, top_scores]
+
+
+def main(args):
+    trt_infer = TensorRTInfer(args.engine)
+    batcher = ImageBatcher(
+        args.input, *trt_infer.input_spec(), preprocessor=args.preprocessor
+    )
+    for batch, images in batcher.get_batch():
+        classes, scores, top = trt_infer.infer(batch)
+        for i in range(len(images)):
+            if args.top == 1:
+                print(images[i], classes[i], scores[i], sep=args.separator)
+            else:
+                line = [images[i]]
+                assert args.top <= top[0].shape[1]
+                for t in range(args.top):
+                    line.append(str(top[0][i][t]))
+                for t in range(args.top):
+                    line.append(str(top[1][i][t]))
+                print(args.separator.join(line))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-e", "--engine", help="The TensorRT engine to infer with")
+    parser.add_argument(
+        "-i",
+        "--input",
+        help="The input to infer, either a single image path, or a directory of images",
+    )
+    parser.add_argument(
+        "-t",
+        "--top",
+        default=1,
+        type=int,
+        help="The amount of top classes and scores to output per image, default: 1",
+    )
+    parser.add_argument(
+        "-s",
+        "--separator",
+        default="\t",
+        help="Separator to use between columns when printing the results, default: \\t",
+    )
+    parser.add_argument(
+        "-p",
+        "--preprocessor",
+        default="V2",
+        choices=["V1", "V1MS", "V2"],
+        help="Select the image preprocessor to use, either 'V2', 'V1' or 'V1MS', default: V2",
+    )
+    args = parser.parse_args()
+    if not all([args.engine, args.input]):
+        parser.print_help()
+        print("\nThese arguments are required: --engine and --input")
+        sys.exit(1)
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/requirements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..83b9ea7c3f7ecb25c2a58ccfd61d1ded4f099cf7
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/efficientnet/requirements.txt
@@ -0,0 +1,14 @@
+Pillow>=10.0.0
+onnx==1.14.0; python_version <= "3.10"
+onnx==1.16.1; python_version >= "3.11"
+tensorrt>=7.1.0.0
+tf2onnx==1.8.1; python_version <= "3.10"
+tf2onnx==1.16.0; python_version >= "3.11"
+cuda-python==12.2.0; python_version <= "3.10"
+cuda-python==12.6.0; python_version >= "3.11"
+pywin32; platform_system == "Windows"
+pyyaml==6.0.1
+requests==2.32.2
+tqdm==4.66.4
+numpy==1.24.4; python_version <= "3.10"
+numpy==1.26.4; python_version >= "3.11"
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/engine_refit_onnx_bidaf/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/engine_refit_onnx_bidaf/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a328f8e933b3509165a3f6ae1987d540fbad142f
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/engine_refit_onnx_bidaf/README.md
@@ -0,0 +1,157 @@
+# TensorRT Engine Refitting of ONNX models.
+
+**Table Of Contents**
+- [Description](#description)
+- [How does this sample work?](#how-does-this-sample-work)
+- [Prerequisites](#prerequisites)
+- [Running the sample](#running-the-sample)
+- [Additional resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+## Description
+
+This sample shows how to refit an engine built from an ONNX model via parsers. A modified version of the [ONNX BiDAF model](https://github.com/onnx/models/tree/master/text/machine_comprehension/bidirectional_attention_flow) is used as the sample model, which implements the Bi-Directional Attention Flow (BiDAF) network described in the paper [Bidirectional Attention Flow for Machine Comprehension](https://arxiv.org/abs/1611.01603).
+
+## How does this sample work?
+
+This sample replaces unsupported nodes (HardMax / Compress) in the original ONNX model via ONNX-graphsurgeon (in `prepare_model.py`) and build a refittable TensorRT engine.
+The engine is then refitted with fake weights and correct weights, each followed by inference on sample context and query sentences in `build_and_refit_engine.py`.
+
+## Prerequisites
+
+Dependencies required for this sample
+
+1. Install the dependencies for Python:
+```bash
+pip3 install -r requirements.txt
+```
+
+2. TensorRT
+
+3. [ONNX-GraphSurgeon](https://github.com/NVIDIA/TensorRT/tree/main/tools/onnx-graphsurgeon)
+
+4. Download sample data. See the "Download Sample Data" section of [the general setup guide](../README.md).
+
+## Running the sample
+
+The data directory needs to be specified (either via `-d /path/to/data` or environment varaiable `TRT_DATA_DIR`)
+when running these scripts. An error will be thrown if not. Taking `TRT_DATA_DIR` approach in following example.
+
+* Prepare the ONNX model. (The data directory needs to be specified.)
+  ```bash
+  python3 prepare_model.py
+  ```
+
+The output should look similar to the following:
+```
+Modifying the ONNX model ...
+Modified ONNX model saved as bidaf-modified.onnx
+Done.
+```
+
+The script will modify the original model from [onnx/models](https://github.com/onnx/models/raw/c02f8c8699fc12273649e658b8d2a1a8e32a35d0/text/machine_comprehension/bidirectional_attention_flow/model/bidaf-9.onnx) and save an ONNX model that can be parsed and run by TensorRT.
+
+The original ONNX model contains four CategoryMapper nodes to map the four input string arrays to int arrays.
+Since TensorRT does not support string data type and CategoryMapper nodes, we dump out the four maps for the four nodes as json files (`model/CategoryMapper_{4-6}.json`) and use them to preprocess input data.
+Now the four inputs become four outputs of the original CategoryMapper nodes.
+
+And unsupported HardMax nodes and Compress nodes are replaced by ArgMax nodes and Gather nodes, respectively.
+
+
+* Build a TensorRT engine, refit the engine and run inference.
+`python3 build_and_refit_engine.py --weights-location GPU`
+
+The script will build a TensorRT engine from the modified ONNX model, and then refit the engine from GPU weights and run inference on sample context and query sentences.
+
+When running the above command for the first time, the output should look similar to the following:
+```
+Loading ONNX file from path bidaf-modified.onnx...
+Beginning ONNX file parsing
+[09/25/2023-08:48:16] [TRT] [W] ModelImporter.cpp:407: Make sure input CategoryMapper_4 has Int64 binding.
+[09/25/2023-08:48:16] [TRT] [W] ModelImporter.cpp:407: Make sure input CategoryMapper_5 has Int64 binding.
+[09/25/2023-08:48:16] [TRT] [W] ModelImporter.cpp:407: Make sure input CategoryMapper_6 has Int64 binding.
+[09/25/2023-08:48:16] [TRT] [W] ModelImporter.cpp:407: Make sure input CategoryMapper_7 has Int64 binding.
+Completed parsing of ONNX file
+Network inputs:
+CategoryMapper_4 <class 'numpy.int64'> (-1, 1)
+CategoryMapper_5 <class 'numpy.int64'> (-1, 1, 1, 16)
+CategoryMapper_6 <class 'numpy.int64'> (-1, 1)
+CategoryMapper_7 <class 'numpy.int64'> (-1, 1, 1, 16)
+Building an engine from file bidaf-modified.onnx; this may take a while...
+Completed creating Engine
+Refitting engine from GPU weights...
+Engine refitted in 39.88 ms.
+Doing inference...
+Doing inference...
+Refitting engine from GPU weights...
+Engine refitted in 0.27 ms.
+Doing inference...
+Doing inference...
+Passed
+```
+
+Note that refitting for second time will be much faster than the first time.
+When running the above command again, engine will be deserialized from the plan file, the output should look similar to the following:
+```
+Reading engine from file bidaf.trt...
+Refitting engine from GPU weights...
+Engine refitted in 32.64 ms.
+Doing inference...
+Doing inference...
+Refitting engine from GPU weights...
+Engine refitted in 0.41 ms.
+Doing inference...
+Doing inference...
+Passed
+```
+
+To refit the engine from CPU weights, change the command to be `python3 build_and_refit_engine.py --weights-location CPU`. And the output should look similar to the following
+```
+Reading engine from file bidaf.trt...
+Refitting engine from CPU weights...
+Engine refitted in 45.18 ms.
+Doing inference...
+Doing inference...
+Refitting engine from CPU weights...
+Engine refitted in 1.20 ms.
+Doing inference...
+Doing inference...
+Passed
+```
+
+There is also an option `--version-compatible` to enable engine version compatibility. If installed, `tensorrt_dispatch` package will used for refitting and running version compatible engines instead of `tensorrt` package.
+To build and refit a version compatible engine, run the command `python3 build_and_refit_engine.py --version-compatible` and the output should look similar to the above cases.
+
+# Additional resources
+
+The following resources provide a deeper understanding about the model used in this sample:
+
+**Model**
+- [Bidirectional Attention Flow for Machine Comprehension](https://arxiv.org/abs/1611.01603)
+
+**Documentation**
+- [Introduction To NVIDIAâ€™s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples)
+- [Working With TensorRT Using The Python API](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#python_topics)
+- [Importing A Model Using A Parser In Python](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#import_model_python)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+# Changelog
+
+October 2020: This sample was recreated, updated and reviewed.
+
+August 2023: 
+  - Add support for refitting engines from GPU weights.
+  - Removed support for Python versions < 3.8.
+
+January 2024:
+  - Add support for refitting version compatible engines.
+
+# Known issues
+
+There are no known issues in this sample.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/engine_refit_onnx_bidaf/build_and_refit_engine.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/engine_refit_onnx_bidaf/build_and_refit_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..240f12955d7fc52f9d70fc35a34e37702a5cb708
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/engine_refit_onnx_bidaf/build_and_refit_engine.py
@@ -0,0 +1,296 @@
+#!/usr/bin/env python3
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+
+import numpy as np
+import argparse
+import tensorrt as trt
+
+sys.path.insert(1, os.path.join(sys.path[0], ".."))
+from cuda import cudart
+
+TRT_LOGGER = trt.Logger()
+
+
+def get_plan(onnx_file_path, engine_file_path, version_compatible):
+    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
+
+    def build_plan():
+        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
+        import tensorrt as trt
+
+        builder = trt.Builder(TRT_LOGGER)
+        network = builder.create_network(0)
+        parser = trt.OnnxParser(network, TRT_LOGGER)
+
+        # Parse model file
+        print("Loading ONNX file from path {}...".format(onnx_file_path))
+        with open(onnx_file_path, "rb") as model:
+            print("Beginning ONNX file parsing")
+            if not parser.parse(model.read()):
+                print("ERROR: Failed to parse the ONNX file.")
+                for error in range(parser.num_errors):
+                    print(parser.get_error(error))
+                return None
+        print("Completed parsing of ONNX file")
+
+        # Print input info
+        print("Network inputs:")
+        for i in range(network.num_inputs):
+            tensor = network.get_input(i)
+            print(tensor.name, trt.nptype(tensor.dtype), tensor.shape)
+
+        config = builder.create_builder_config()
+        config.set_flag(trt.BuilderFlag.REFIT)
+        if version_compatible:
+            config.set_flag(trt.BuilderFlag.VERSION_COMPATIBLE)
+
+        for opt in [6, 10]:
+            profile = builder.create_optimization_profile()
+
+            input0_min = (1, 1)
+            input0_opt = (opt, 1)
+            input0_max = (15, 1)
+            profile.set_shape(
+                network.get_input(0).name,
+                min=input0_min,
+                opt=input0_opt,
+                max=input0_max,
+            )
+
+            input1_min = (1, 1, 1, 16)
+            input1_opt = (opt, 1, 1, 16)
+            input1_max = (15, 1, 1, 16)
+            profile.set_shape(
+                network.get_input(1).name,
+                min=input1_min,
+                opt=input1_opt,
+                max=input1_max,
+            )
+
+            input2_min = (1, 1)
+            input2_opt = (opt, 1)
+            input2_max = (15, 1)
+            profile.set_shape(
+                network.get_input(2).name,
+                min=input2_min,
+                opt=input2_opt,
+                max=input2_max,
+            )
+
+            input3_min = (1, 1, 1, 16)
+            input3_opt = (opt, 1, 1, 16)
+            input3_max = (15, 1, 1, 16)
+            profile.set_shape(
+                network.get_input(3).name,
+                min=input3_min,
+                opt=input3_opt,
+                max=input3_max,
+            )
+
+            config.add_optimization_profile(profile)
+
+        print(
+            "Building an engine from file {}; this may take a while...".format(
+                onnx_file_path
+            )
+        )
+        plan = builder.build_serialized_network(network, config)
+        print("Completed creating Engine")
+
+        with open(engine_file_path, "wb") as f:
+            f.write(plan)
+        return plan
+
+    if os.path.exists(engine_file_path):
+        # If a serialized engine exists, use it instead of building an engine.
+        print("Reading engine from file {}...".format(engine_file_path))
+        f = open(engine_file_path, "rb")
+        return f.read()
+    return build_plan()
+
+
+def main():
+    global trt
+    global TRT_LOGGER
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-l",
+        "--weights-location",
+        dest="weights_location",
+        default="GPU",
+        choices=["GPU", "CPU"],
+        help="The location for weights passed to refitter, either GPU/CPU, default: GPU",
+    )
+    parser.add_argument(
+        "--version-compatible",
+        dest="version_compatible",
+        action="store_true",
+        help="Build a version compatible engine for refitting",
+    )
+    args = parser.parse_args()
+
+    onnx_file_path = "bidaf-modified.onnx"
+    engine_file_path = "bidaf{}.trt".format("-vc" if args.version_compatible else "")
+
+    plan = get_plan(onnx_file_path, engine_file_path, args.version_compatible)
+
+    if args.version_compatible:
+        # Try using dispatch runtime for refitting and inference. If failed, fallback to full runtime.
+        try:
+            del sys.modules["tensorrt"]
+            sys.modules["tensorrt"] = __import__("tensorrt_dispatch")
+            sys.modules["trt"] = sys.modules["tensorrt"]
+            import tensorrt_dispatch as trt
+
+            print(
+                "Importing tensorrt_dispatch instead of full tensorrt for refitting and running vc engines."
+            )
+        except:
+            print(
+                "Failed to import tensorrt_dispatch for refitting and running vc engines. Please install the package first!"
+            )
+            sys.modules["tensorrt"] = __import__("tensorrt")
+        TRT_LOGGER = trt.Logger()
+
+    engine = None
+    with open(engine_file_path, "rb") as f:
+        runtime = trt.Runtime(TRT_LOGGER)
+        if args.version_compatible:
+            runtime.engine_host_code_allowed = True
+        engine = runtime.deserialize_cuda_engine(plan)
+
+    # should be after get_engine
+    from data_processing import get_inputs, preprocess
+    import common_runtime as common
+
+    # input
+    context = "A quick brown fox jumps over the lazy dog."
+    query = "What color is the fox?"
+    cw_str, _ = preprocess(context)
+    # get ravelled data
+    cw, cc, qw, qc = get_inputs(context, query)
+
+    # Do inference with TensorRT
+    weights_names = ["Parameter576_B_0", "W_0"]
+    refit_weights_dict = {
+        name: np.load("{}.npy".format(name)) for name in weights_names
+    }
+    fake_weights_dict = {
+        name: np.ones_like(weights) for name, weights in refit_weights_dict.items()
+    }
+    device_mem_dict = {}
+    if args.weights_location == "GPU":
+        for name, weights in refit_weights_dict.items():
+            nbytes = weights.size * weights.itemsize
+            device_mem_dict[name] = common.cuda_call(cudart.cudaMalloc(nbytes))
+
+    execution_context = engine.create_execution_context()
+    refitter = trt.Refitter(engine, TRT_LOGGER)
+    # Skip weights validation since we are confident that the new weights are similar to the weights used to build engine.
+    refitter.weights_validation = False
+    # To get a list of all refittable weights' names
+    # in the network, use refitter.get_all_weights().
+
+    if args.weights_location == "GPU":
+        for name, device_mem in device_mem_dict.items():
+            device_weights = trt.Weights(
+                trt.DataType.FLOAT, device_mem, refit_weights_dict[name].size
+            )
+            weights_prototype = refitter.get_weights_prototype(name)
+            assert device_weights.dtype == weights_prototype.dtype
+            assert device_weights.size == weights_prototype.size
+            refitter.set_named_weights(name, device_weights, trt.TensorLocation.DEVICE)
+
+    for weights_dict, answer_correct in [
+        (fake_weights_dict, False),
+        (refit_weights_dict, True),
+    ]:
+        import time
+
+        T1 = time.perf_counter()
+        device_mem_list = []
+        # Refit named weights via set_named_weights
+        for name in weights_names:
+            host_weights = weights_dict[name]
+            if args.weights_location == "CPU":
+                weights = host_weights
+                location = trt.TensorLocation.HOST
+                refitter.set_named_weights(name, weights, location)
+            else:
+                common.memcpy_host_to_device(device_mem_dict[name], host_weights)
+
+        # Get missing weights names. This should return empty lists in this case.
+        missing_weights = refitter.get_missing_weights()
+        assert (
+            len(missing_weights) == 0
+        ), "Refitter found missing weights. Call set_named_weights() or set_weights() for all missing weights"
+
+        print(f"Refitting engine from {args.weights_location} weights...")
+        # Refit the engine with the new weights. This will return True if the refit operation succeeded.
+        assert refitter.refit_cuda_engine()
+
+        T2 = time.perf_counter()
+        print("Engine refitted in {:.2f} ms.".format((T2 - T1) * 1000))
+
+        for profile_idx in range(engine.num_optimization_profiles):
+            print("Doing inference...")
+            # Do inference
+            inputs, outputs, bindings, stream = common.allocate_buffers(
+                engine, profile_idx
+            )
+            padding_bindings = [0] * (len(bindings) * profile_idx)
+            new_bindings = padding_bindings + bindings
+
+            # Set host input. The common.do_inference function will copy the input to the GPU before executing.
+            inputs[0].host = cw
+            inputs[1].host = cc
+            inputs[2].host = qw
+            inputs[3].host = qc
+            execution_context.set_optimization_profile_async(profile_idx, stream)
+            execution_context.set_input_shape("CategoryMapper_4", (10, 1))
+            execution_context.set_input_shape("CategoryMapper_5", (10, 1, 1, 16))
+            execution_context.set_input_shape("CategoryMapper_6", (6, 1))
+            execution_context.set_input_shape("CategoryMapper_7", (6, 1, 1, 16))
+
+            trt_outputs = common.do_inference(
+                execution_context,
+                engine=engine,
+                bindings=bindings,
+                inputs=inputs,
+                outputs=outputs,
+                stream=stream,
+            )
+
+            start = trt_outputs[0].item()
+            end = trt_outputs[1].item()
+            answer = [w.encode() for w in cw_str[start : end + 1].reshape(-1)]
+            assert answer_correct == (answer == [b"brown"]), answer
+            common.free_buffers(inputs, outputs, stream)
+
+    for _, device_mem in device_mem_dict.items():
+        common.cuda_call(cudart.cudaFree(device_mem))
+
+    print("Passed")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/engine_refit_onnx_bidaf/data_processing.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/engine_refit_onnx_bidaf/data_processing.py
new file mode 100644
index 0000000000000000000000000000000000000000..7eb052adc29456e5c55f1bf152a7a5c7db2cf29c
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/engine_refit_onnx_bidaf/data_processing.py
@@ -0,0 +1,64 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+import nltk
+from nltk import word_tokenize
+import json
+import tensorrt as trt
+
+
+def preprocess(text):
+    try:
+        nltk.data.find("tokenizers/punkt_tab")
+    except LookupError:
+        nltk.download("punkt_tab")
+    tokens = word_tokenize(text)
+    # split into lower-case word tokens, in numpy array with shape of (seq, 1)
+    words = np.asarray([w.lower() for w in tokens]).reshape(-1, 1)
+    # split words into chars, in numpy array with shape of (seq, 1, 1, 16)
+    chars = [[c for c in t][:16] for t in tokens]
+    chars = [cs + [""] * (16 - len(cs)) for cs in chars]
+    chars = np.asarray(chars).reshape(-1, 1, 1, 16)
+    return words, chars
+
+
+def get_map_func(filepath):
+    file = open(filepath)
+    category_map = json.load(file)
+    category_mapper = dict(
+        zip(category_map["cats_strings"], category_map["cats_int64s"])
+    )
+    default_int64 = category_map["default_int64"]
+    func = lambda s: category_mapper.get(s, default_int64)
+    return np.vectorize(func)
+
+
+def get_inputs(context, query):
+    cw, cc = preprocess(context)
+    qw, qc = preprocess(query)
+
+    context_word_func = get_map_func("CategoryMapper_4.json")
+    context_char_func = get_map_func("CategoryMapper_5.json")
+    query_word_func = get_map_func("CategoryMapper_6.json")
+    query_char_func = get_map_func("CategoryMapper_7.json")
+
+    cw_input = context_word_func(cw).astype(trt.nptype(trt.int32)).ravel()
+    cc_input = context_char_func(cc).astype(trt.nptype(trt.int32)).ravel()
+    qw_input = query_word_func(qw).astype(trt.nptype(trt.int32)).ravel()
+    qc_input = query_char_func(qc).astype(trt.nptype(trt.int32)).ravel()
+    return cw_input, cc_input, qw_input, qc_input
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/engine_refit_onnx_bidaf/download.yml b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/engine_refit_onnx_bidaf/download.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f44bdaef24730d28d2fb3ffac5628e9e654d3297
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/engine_refit_onnx_bidaf/download.yml
@@ -0,0 +1,21 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+sample: engine_refit_onnx_bidaf
+files:
+  - path: samples/python/engine_refit_onnx_bidaf/bidaf-original.onnx
+    url: https://github.com/onnx/models/raw/c02f8c8699fc12273649e658b8d2a1a8e32a35d0/text/machine_comprehension/bidirectional_attention_flow/model/bidaf-9.onnx
+    checksum: cf11f1eceb4731f8dd39345467fe94a1
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/engine_refit_onnx_bidaf/prepare_model.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/engine_refit_onnx_bidaf/prepare_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb45226efc89db43a09c005041f4801f4b850c54
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/engine_refit_onnx_bidaf/prepare_model.py
@@ -0,0 +1,107 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import onnx_graphsurgeon as gs
+import onnx
+import numpy as np
+import json
+
+import sys, os
+
+sys.path.insert(1, os.path.join(sys.path[0], ".."))
+from downloader import getFilePath
+
+
+def drop_category_mapper_nodes(graph):
+    new_inputs = []
+    for org_input in graph.inputs:
+        # head node, simply disconnect it with others
+        assert len(org_input.outputs) == 1
+        category_mapper_node = org_input.outputs[0]
+        assert category_mapper_node.op == "CategoryMapper"
+        assert len(category_mapper_node.outputs) == 1
+        new_inputs.append(category_mapper_node.outputs[0])
+        category_mapper_node.inputs.clear()
+        category_mapper_node.outputs.clear()
+
+        # Save mapping info to preprocess inputs.
+        with open(category_mapper_node.name + ".json", "w") as fp:
+            json.dump(category_mapper_node.attrs, fp)
+
+    graph.inputs = new_inputs
+
+
+def replace_unsupported_ops(graph):
+    # replace hardmax with ArgMax
+    hardmaxes = [node for node in graph.nodes if node.op == "Hardmax"]
+    assert len(hardmaxes) == 1
+    hardmax = hardmaxes[0]
+    hardmax.op = "ArgMax"
+    hardmax.name = "ArgMax(org:" + hardmax.name + ")"
+    hardmax.attrs["axis"] = 1
+    hardmax.attrs["keepdims"] = 0
+
+    cast = hardmax.o()
+    reshape = cast.o()
+
+    hardmax.outputs = reshape.outputs
+    assert len(hardmax.outputs) == 1
+    hardmax.outputs[0].dtype = np.int64
+    hardmax.outputs[0].shape = [1]
+
+    compress = reshape.o()
+    compress.op = "Gather"
+    compress.name = "Gather(org:" + compress.name + ")"
+    compress.attrs["axis"] = 1
+
+    cast.outputs.clear()
+    reshape.outputs.clear()
+    # Remove the node from the graph completely
+    graph.cleanup().toposort()
+
+
+def save_weights_for_refitting(graph):
+    # Save weights for refitting
+    tmap = graph.tensors()
+    np.save("Parameter576_B_0.npy", tmap["Parameter576_B_0"].values)
+    np.save("W_0.npy", tmap["W_0"].values)
+
+
+def main():
+    org_model_file_path = getFilePath(
+        "samples/python/engine_refit_onnx_bidaf/bidaf-original.onnx"
+    )
+
+    print("Modifying the ONNX model ...")
+    original_model = onnx.load(org_model_file_path)
+    graph = gs.import_onnx(original_model)
+
+    drop_category_mapper_nodes(graph)
+    replace_unsupported_ops(graph)
+    save_weights_for_refitting(graph)
+
+    new_model = gs.export_onnx(graph)
+
+    modified_model_name = "bidaf-modified.onnx"
+    onnx.checker.check_model(new_model)
+    onnx.save(new_model, modified_model_name)
+    print("Modified ONNX model saved as {}".format(modified_model_name))
+    print("Done.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/engine_refit_onnx_bidaf/requirements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/engine_refit_onnx_bidaf/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1c9c715af218b0ad64f6fe1496c19635016e63a
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/engine_refit_onnx_bidaf/requirements.txt
@@ -0,0 +1,11 @@
+onnx==1.16.0
+nltk==3.9.1
+wget==3.2
+cuda-python==12.2.0; python_version <= "3.10"
+cuda-python==12.6.0; python_version >= "3.11"
+pywin32; platform_system == "Windows"
+pyyaml==6.0.1
+requests==2.32.2
+tqdm==4.66.4
+numpy==1.24.4; python_version <= "3.10"
+numpy==1.26.4; python_version >= "3.11"
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/introductory_parser_samples/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/introductory_parser_samples/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a240511f0d853e8a1191910e89e53fe978b61700
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/introductory_parser_samples/README.md
@@ -0,0 +1,91 @@
+# Introduction To Importing ONNX Models Into TensorRT Using Python
+
+
+**Table Of Contents**
+- [Description](#description)
+- [How does this sample work?](#how-does-this-sample-work)
+	* [onnx_resnet50](#onnx_resnet50)
+- [Prerequisites](#prerequisites)
+- [Running the sample](#running-the-sample)
+	* [Sample `--help` options](#sample-help-options)
+- [Additional resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+## Description
+
+This sample, introductory_parser_samples, is a Python sample which uses TensorRT and its included ONNX parser, to perform inference with ResNet-50 models saved in ONNX format.
+
+## How does this sample work?
+
+### onnx_resnet50
+
+This sample demonstrates how to build an engine from an ONNX model file using the open-source ONNX parser and then run inference. The ONNX parser can be used with any framework that supports the ONNX format (typically `.onnx` files).
+
+## Prerequisites
+
+1. Install the dependencies for Python.
+
+```bash
+pip3 install -r requirements.txt
+```
+
+## Running the sample
+
+1.  Run the sample to create a TensorRT inference engine and run inference:
+	`python3 onnx_resnet50.py`
+
+	**Note:** If the TensorRT sample data is not installed in the default location, for example `/usr/src/tensorrt/data/`, the `data` directory must be specified. For example: `python3 onnx_resnet50.py -d /path/to/my/data/`
+
+2.  Verify that the sample ran successfully. If the sample runs successfully you should see output similar to the following:
+	`Correctly recognized data/samples/resnet50/reflex_camera.jpeg as reflex camera`
+
+### Sample --help options
+
+To see the full list of available options and their descriptions, use the `-h` or `--help` command line option. For example:
+```
+usage: onnx_resnet50.py [-h] [-d DATADIR]
+
+Runs a ResNet50 network with a TensorRT inference engine.
+
+optional arguments:
+ -h, --help            show this help message and exit
+ -d DATADIR, --datadir DATADIR
+                       Location of the TensorRT sample data directory.
+                       (default: /usr/src/tensorrt/data)
+```
+
+# Additional resources
+
+The following resources provide a deeper understanding about importing a model into TensorRT using Python:
+
+**ResNet-50**
+- [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf)
+
+**Parsers**
+- [ONNX Parser](https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/python_api/parsers/Onnx/pyOnnx.html)
+
+**Documentation**
+- [Introduction To NVIDIAâ€™s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples)
+- [Working With TensorRT Using The Python API](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#python_topics)
+- [Importing A Model Using A Parser In Python](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#import_model_python)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+# Changelog
+Auguest 2023
+Removed support for Python versions < 3.8.
+
+Auguest 2022
+Removed options for Caffe and UFF parsers.
+
+February 2019
+This `README.md` file was recreated, updated and reviewed.
+
+# Known issues
+
+There are no known issues in this sample
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/introductory_parser_samples/onnx_resnet50.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/introductory_parser_samples/onnx_resnet50.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd69cc48616e89478867ef5be9bcb06963e0b196
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/introductory_parser_samples/onnx_resnet50.py
@@ -0,0 +1,133 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+
+# This sample uses an ONNX ResNet50 Model to create a TensorRT Inference Engine
+import random
+import sys
+
+import numpy as np
+
+import tensorrt as trt
+from PIL import Image
+
+sys.path.insert(1, os.path.join(sys.path[0], ".."))
+import common
+
+
+class ModelData(object):
+    MODEL_PATH = "ResNet50.onnx"
+    INPUT_SHAPE = (3, 224, 224)
+    # We can convert TensorRT data types to numpy types with trt.nptype()
+    DTYPE = trt.float32
+
+
+# You can set the logger severity higher to suppress messages (or lower to display more messages).
+TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
+
+
+# The Onnx path is used for Onnx models.
+def build_engine_onnx(model_file):
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(0)
+    config = builder.create_builder_config()
+    parser = trt.OnnxParser(network, TRT_LOGGER)
+
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, common.GiB(1))
+    # Load the Onnx model and parse it in order to populate the TensorRT network.
+    with open(model_file, "rb") as model:
+        if not parser.parse(model.read()):
+            print("ERROR: Failed to parse the ONNX file.")
+            for error in range(parser.num_errors):
+                print(parser.get_error(error))
+            return None
+
+    engine_bytes = builder.build_serialized_network(network, config)
+    runtime = trt.Runtime(TRT_LOGGER)
+    return runtime.deserialize_cuda_engine(engine_bytes)
+
+
+def load_normalized_test_case(test_image, pagelocked_buffer):
+    # Converts the input image to a CHW Numpy array
+    def normalize_image(image):
+        # Resize, antialias and transpose the image to CHW.
+        c, h, w = ModelData.INPUT_SHAPE
+        image_arr = (
+            np.asarray(image.resize((w, h), Image.LANCZOS))
+            .transpose([2, 0, 1])
+            .astype(trt.nptype(ModelData.DTYPE))
+            .ravel()
+        )
+        # This particular ResNet50 model requires some preprocessing, specifically, mean normalization.
+        return (image_arr / 255.0 - 0.45) / 0.225
+
+    # Normalize the image and copy to pagelocked memory.
+    np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
+    return test_image
+
+
+def main():
+    # Set the data path to the directory that contains the trained models and test images for inference.
+    _, data_files = common.find_sample_data(
+        description="Runs a ResNet50 network with a TensorRT inference engine.",
+        subfolder="resnet50",
+        find_files=[
+            "binoculars.jpeg",
+            "reflex_camera.jpeg",
+            "tabby_tiger_cat.jpg",
+            ModelData.MODEL_PATH,
+            "class_labels.txt",
+        ],
+    )
+    # Get test images, models and labels.
+    test_images = data_files[0:3]
+    onnx_model_file, labels_file = data_files[3:]
+    labels = open(labels_file, "r").read().split("\n")
+
+    # Build a TensorRT engine.
+    engine = build_engine_onnx(onnx_model_file)
+    # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.
+    # Allocate buffers and create a CUDA stream.
+    inputs, outputs, bindings, stream = common.allocate_buffers(engine)
+    # Contexts are used to perform inference.
+    context = engine.create_execution_context()
+
+    # Load a normalized test case into the host input page-locked buffer.
+    test_image = random.choice(test_images)
+    test_case = load_normalized_test_case(test_image, inputs[0].host)
+    # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the
+    # probability that the image corresponds to that label
+    trt_outputs = common.do_inference(
+        context,
+        engine=engine,
+        bindings=bindings,
+        inputs=inputs,
+        outputs=outputs,
+        stream=stream,
+    )
+    # We use the highest probability as our prediction. Its index corresponds to the predicted label.
+    pred = labels[np.argmax(trt_outputs[0])]
+    common.free_buffers(inputs, outputs, stream)
+    if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
+        print("Correctly recognized " + test_case + " as " + pred)
+    else:
+        print("Incorrectly recognized " + test_case + " as " + pred)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/introductory_parser_samples/requirements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/introductory_parser_samples/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc537473f41bd84b92e0739dd7c2dd0aa434218e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/introductory_parser_samples/requirements.txt
@@ -0,0 +1,9 @@
+Pillow>=10.0.0
+cuda-python==12.2.0; python_version <= "3.10"
+cuda-python==12.6.0; python_version >= "3.11"
+pywin32; platform_system == "Windows"
+pyyaml==6.0.1
+requests==2.32.2
+tqdm==4.66.4
+numpy==1.24.4; python_version <= "3.10"
+numpy==1.26.4; python_version >= "3.11"
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/network_api_pytorch_mnist/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/network_api_pytorch_mnist/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c5fdfb0ca6d9c9348a92ca18a16c0173e1bc43fa
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/network_api_pytorch_mnist/README.md
@@ -0,0 +1,108 @@
+# â€œHello Worldâ€ For TensorRT Using PyTorch And Python
+
+**Table Of Contents**
+
+- [Description](#description)
+- [How does this sample work?](#how-does-this-sample-work)
+    * [TensorRT API layers and ops](#tensorrt-api-layers-and-ops)
+- [Prerequisites](#prerequisites)
+- [Running the sample](#running-the-sample)
+    * [Sample `--help` options](#sample-help-options)
+- [Additional resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+## Description
+
+This sample, `network_api_pytorch_mnist`, trains a convolutional model on the [MNIST](https://ossci-datasets.s3.amazonaws.com/mnist/) dataset and runs inference with a TensorRT engine.
+
+## How does this sample work?
+
+This sample is an end-to-end sample that trains a model in PyTorch, recreates the network in TensorRT, imports weights from the trained model, and finally runs inference with a TensorRT engine. For more information, see [Creating A Network Definition In Python](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#network_python).
+
+The `sample.py` script imports the functions from the `mnist.py` script for training the PyTorch model, as well as retrieving test cases from the PyTorch Data Loader.
+
+### TensorRT API layers and ops
+
+In this sample, the following layers are used. For more information about these layers, see the [TensorRT Developer Guide: Layers](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#layers) documentation.
+
+[Activation layer](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#activation-layer)
+The Activation layer implements element-wise activation functions. Specifically, this sample uses the Activation layer with the type `RELU`.
+
+[Convolution layer](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#convolution-layer)
+The Convolution layer computes a 2D (channel, height, and width) convolution, with or without bias.
+
+[MatrixMultiplyLayer](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#matrixmultiply-layer)
+The MatrixMultiply layer implements a matrix multiplication.
+(The [FullyConnected layer](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#fullyconnected-layer) is deprecated since 8.4.
+The bias of FullyConnected semantic can be added with an
+[ElementwiseLayer](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#elementwise-layer) of `SUM` operation.)
+
+[Pooling layer](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#pooling-layer)
+The Pooling layer implements pooling within a channel. Supported pooling types are `maximum`, `average` and `maximum-average blend`.
+
+## Prerequisites
+
+1. Upgrade pip version and install the sample dependencies.
+    ```bash
+    pip3 install --upgrade pip
+    pip3 install -r requirements.txt
+    ```
+
+To run this sample you must be using Python 3.6 or newer.
+
+On PowerPC systems, you will need to manually install PyTorch using IBM's [PowerAI](https://www.ibm.com/support/knowledgecenter/SS5SF7_1.6.0/navigation/pai_install.htm).
+
+2. The MNIST dataset can be found under the data directory (usually `/usr/src/tensorrt/data/mnist`) if using the TensorRT containers. It is also bundled along with the [TensorRT tarball](https://developer.nvidia.com/nvidia-tensorrt-download).
+
+## Running the sample
+
+1.  Run the sample to create a TensorRT inference engine and run inference:
+    `python3 sample.py`
+
+2.  Verify that the sample ran successfully. If the sample runs successfully you should see a match between the test case and the prediction.
+     ```
+    Test Case: 0
+    Prediction: 0
+    ```
+
+### Sample --help options
+
+To see the full list of available options and their descriptions, use the `-h` or `--help` command line option.
+
+# Additional resources
+
+The following resources provide a deeper understanding about getting started with TensorRT using Python:
+
+**Model**
+- [MNIST model](https://github.com/pytorch/examples/tree/master/mnist)
+
+**Dataset**
+- [MNIST database](https://ossci-datasets.s3.amazonaws.com/mnist/)
+
+**Documentation**
+- [Introduction To NVIDIAâ€™s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples)
+- [Working With TensorRT Using The Python API](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#python_topics)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+# Changelog
+August 2023
+Removed support for Python versions < 3.8.
+
+September 2021
+Updated the sample to use explicit batch network definition.
+
+March 2021
+Documented the Python version limitations.
+
+February 2019
+This `README.md` file was recreated, updated and reviewed.
+
+# Known issues
+
+This sample only supports Python 3.6+ due to `torch` and `torchvision` version requirements.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/network_api_pytorch_mnist/model.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/network_api_pytorch_mnist/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..53371989adc01b3e56274ea046847ee742540d59
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/network_api_pytorch_mnist/model.py
@@ -0,0 +1,156 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This file contains functions for training a PyTorch MNIST Model
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torchvision import datasets, transforms
+from torch.autograd import Variable
+
+import numpy as np
+
+from random import randint
+
+
+# Network
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv2d(1, 20, kernel_size=5)
+        self.conv2 = nn.Conv2d(20, 50, kernel_size=5)
+        self.fc1 = nn.Linear(800, 500)
+        self.fc2 = nn.Linear(500, 10)
+
+    def forward(self, x):
+        x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2)
+        x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2)
+        x = x.view(-1, 800)
+        x = F.relu(self.fc1(x))
+        x = self.fc2(x)
+        return F.log_softmax(x, dim=1)
+
+
+class MnistModel(object):
+    def __init__(self):
+        self.batch_size = 64
+        self.test_batch_size = 100
+        self.learning_rate = 0.0025
+        self.sgd_momentum = 0.9
+        self.log_interval = 100
+        # Fetch MNIST data set.
+        self.train_loader = torch.utils.data.DataLoader(
+            datasets.MNIST(
+                "/tmp/mnist/data",
+                train=True,
+                download=True,
+                transform=transforms.Compose(
+                    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
+                ),
+            ),
+            batch_size=self.batch_size,
+            shuffle=True,
+            num_workers=1,
+            timeout=600,
+        )
+        self.test_loader = torch.utils.data.DataLoader(
+            datasets.MNIST(
+                "/tmp/mnist/data",
+                train=False,
+                transform=transforms.Compose(
+                    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
+                ),
+            ),
+            batch_size=self.test_batch_size,
+            shuffle=True,
+            num_workers=1,
+            timeout=600,
+        )
+        self.network = Net()
+        if torch.cuda.is_available():
+            self.network = self.network.to("cuda")
+
+    # Train the network for one or more epochs, validating after each epoch.
+    def learn(self, num_epochs=2):
+        # Train the network for a single epoch
+        def train(epoch):
+            self.network.train()
+            optimizer = optim.SGD(
+                self.network.parameters(),
+                lr=self.learning_rate,
+                momentum=self.sgd_momentum,
+            )
+            for batch, (data, target) in enumerate(self.train_loader):
+                if torch.cuda.is_available():
+                    data = data.to("cuda")
+                    target = target.to("cuda")
+                data, target = Variable(data), Variable(target)
+                optimizer.zero_grad()
+                output = self.network(data)
+                loss = F.nll_loss(output, target)
+                loss.backward()
+                optimizer.step()
+                if batch % self.log_interval == 0:
+                    print(
+                        "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
+                            epoch,
+                            batch * len(data),
+                            len(self.train_loader.dataset),
+                            100.0 * batch / len(self.train_loader),
+                            loss.data.item(),
+                        )
+                    )
+
+        # Test the network
+        def test(epoch):
+            self.network.eval()
+            test_loss = 0
+            correct = 0
+            for data, target in self.test_loader:
+                with torch.no_grad():
+                    if torch.cuda.is_available():
+                        data = data.to("cuda")
+                        target = target.to("cuda")
+                    data, target = Variable(data), Variable(target)
+                output = self.network(data)
+                test_loss += F.nll_loss(output, target).data.item()
+                pred = output.data.max(1)[1]
+                correct += pred.eq(target.data).cpu().sum()
+            test_loss /= len(self.test_loader)
+            print(
+                "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
+                    test_loss,
+                    correct,
+                    len(self.test_loader.dataset),
+                    100.0 * correct / len(self.test_loader.dataset),
+                )
+            )
+
+        for e in range(num_epochs):
+            train(e + 1)
+            test(e + 1)
+
+    def get_weights(self):
+        return self.network.state_dict()
+
+    def get_random_testcase(self):
+        data, target = next(iter(self.test_loader))
+        case_num = randint(0, len(data) - 1)
+        test_case = data.cpu().numpy()[case_num].ravel().astype(np.float32)
+        test_name = target.cpu().numpy()[case_num]
+        return test_case, test_name
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/network_api_pytorch_mnist/requirements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/network_api_pytorch_mnist/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71ef1a17f9199f7ca04b942af9ca7182ef02699f
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/network_api_pytorch_mnist/requirements.txt
@@ -0,0 +1,11 @@
+Pillow>=10.0.0
+torch
+torchvision
+cuda-python==12.2.0; python_version <= "3.10"
+cuda-python==12.6.0; python_version >= "3.11"
+pywin32; platform_system == "Windows"
+pyyaml==6.0.1
+requests==2.32.2
+tqdm==4.66.4
+numpy==1.24.4; python_version <= "3.10"
+numpy==1.26.4; python_version >= "3.11"
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/network_api_pytorch_mnist/sample.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/network_api_pytorch_mnist/sample.py
new file mode 100644
index 0000000000000000000000000000000000000000..a695ee9a71bc7e95d37692908b22caaa6512891d
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/network_api_pytorch_mnist/sample.py
@@ -0,0 +1,177 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+
+# This sample uses an MNIST PyTorch model to create a TensorRT Inference Engine
+import model
+import numpy as np
+
+import tensorrt as trt
+
+sys.path.insert(1, os.path.join(sys.path[0], ".."))
+import common
+
+# You can set the logger severity higher to suppress messages (or lower to display more messages).
+TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
+
+
+class ModelData(object):
+    INPUT_NAME = "data"
+    INPUT_SHAPE = (1, 1, 28, 28)
+    OUTPUT_NAME = "prob"
+    OUTPUT_SIZE = 10
+    DTYPE = trt.float32
+
+
+def populate_network(network, weights):
+    # Configure the network layers based on the weights provided.
+    input_tensor = network.add_input(
+        name=ModelData.INPUT_NAME, dtype=ModelData.DTYPE, shape=ModelData.INPUT_SHAPE
+    )
+
+    def add_matmul_as_fc(net, input, outputs, w, b):
+        assert len(input.shape) >= 3
+        m = 1 if len(input.shape) == 3 else input.shape[0]
+        k = int(np.prod(input.shape) / m)
+        assert np.prod(input.shape) == m * k
+        n = int(w.size / k)
+        assert w.size == n * k
+        assert b.size == n
+
+        input_reshape = net.add_shuffle(input)
+        input_reshape.reshape_dims = trt.Dims2(m, k)
+
+        filter_const = net.add_constant(trt.Dims2(n, k), w)
+        mm = net.add_matrix_multiply(
+            input_reshape.get_output(0),
+            trt.MatrixOperation.NONE,
+            filter_const.get_output(0),
+            trt.MatrixOperation.TRANSPOSE,
+        )
+
+        bias_const = net.add_constant(trt.Dims2(1, n), b)
+        bias_add = net.add_elementwise(
+            mm.get_output(0), bias_const.get_output(0), trt.ElementWiseOperation.SUM
+        )
+
+        output_reshape = net.add_shuffle(bias_add.get_output(0))
+        output_reshape.reshape_dims = trt.Dims4(m, n, 1, 1)
+        return output_reshape
+
+    conv1_w = weights["conv1.weight"].cpu().numpy()
+    conv1_b = weights["conv1.bias"].cpu().numpy()
+    conv1 = network.add_convolution_nd(
+        input=input_tensor,
+        num_output_maps=20,
+        kernel_shape=(5, 5),
+        kernel=conv1_w,
+        bias=conv1_b,
+    )
+    conv1.stride_nd = (1, 1)
+
+    pool1 = network.add_pooling_nd(
+        input=conv1.get_output(0), type=trt.PoolingType.MAX, window_size=(2, 2)
+    )
+    pool1.stride_nd = trt.Dims2(2, 2)
+
+    conv2_w = weights["conv2.weight"].cpu().numpy()
+    conv2_b = weights["conv2.bias"].cpu().numpy()
+    conv2 = network.add_convolution_nd(
+        pool1.get_output(0), 50, (5, 5), conv2_w, conv2_b
+    )
+    conv2.stride_nd = (1, 1)
+
+    pool2 = network.add_pooling_nd(conv2.get_output(0), trt.PoolingType.MAX, (2, 2))
+    pool2.stride_nd = trt.Dims2(2, 2)
+
+    fc1_w = weights["fc1.weight"].cpu().numpy()
+    fc1_b = weights["fc1.bias"].cpu().numpy()
+    fc1 = add_matmul_as_fc(network, pool2.get_output(0), 500, fc1_w, fc1_b)
+
+    relu1 = network.add_activation(
+        input=fc1.get_output(0), type=trt.ActivationType.RELU
+    )
+
+    fc2_w = weights["fc2.weight"].cpu().numpy()
+    fc2_b = weights["fc2.bias"].cpu().numpy()
+    fc2 = add_matmul_as_fc(
+        network, relu1.get_output(0), ModelData.OUTPUT_SIZE, fc2_w, fc2_b
+    )
+
+    fc2.get_output(0).name = ModelData.OUTPUT_NAME
+    network.mark_output(tensor=fc2.get_output(0))
+
+
+def build_engine(weights):
+    # For more information on TRT basics, refer to the introductory samples.
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(0)
+    config = builder.create_builder_config()
+    runtime = trt.Runtime(TRT_LOGGER)
+
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, common.GiB(1))
+    # Populate the network using weights from the PyTorch model.
+    populate_network(network, weights)
+    # Build and return an engine.
+    plan = builder.build_serialized_network(network, config)
+    return runtime.deserialize_cuda_engine(plan)
+
+
+# Loads a random test case from pytorch's DataLoader
+def load_random_test_case(model, pagelocked_buffer):
+    # Select an image at random to be the test case.
+    img, expected_output = model.get_random_testcase()
+    # Copy to the pagelocked input buffer
+    np.copyto(pagelocked_buffer, img)
+    return expected_output
+
+
+def main():
+    common.add_help(description="Runs an MNIST network using a PyTorch model")
+    # Train the PyTorch model
+    mnist_model = model.MnistModel()
+    mnist_model.learn()
+    weights = mnist_model.get_weights()
+    # Do inference with TensorRT.
+    engine = build_engine(weights)
+
+    # Build an engine, allocate buffers and create a stream.
+    # For more information on buffer allocation, refer to the introductory samples.
+    inputs, outputs, bindings, stream = common.allocate_buffers(engine)
+    context = engine.create_execution_context()
+
+    case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host)
+    # For more information on performing inference, refer to the introductory samples.
+    # The common.do_inference function will return a list of outputs - we only have one in this case.
+    [output] = common.do_inference(
+        context,
+        engine=engine,
+        bindings=bindings,
+        inputs=inputs,
+        outputs=outputs,
+        stream=stream,
+    )
+    pred = np.argmax(output)
+    common.free_buffers(inputs, outputs, stream)
+    print("Test Case: " + str(case_num))
+    print("Prediction: " + str(pred))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/non_zero_plugin/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/non_zero_plugin/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8d6afcc54666e98d291689570de7814d9016fea2
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/non_zero_plugin/README.md
@@ -0,0 +1,104 @@
+# Python-based NonZero Plugin for TensorRT using IPluginV3
+
+## Description
+
+This sample, `non_zero_plugin`, implements a Python-based plugin for the NonZero operation, configurable to use a `CUDA Python` or `PyTorch` backend.
+
+NonZero is an operation where the non-zero indices of the input tensor is found. 
+
+## How does this sample work?
+
+This sample creates and runs a TensorRT engine built from a network containing a single NonZeroPlugin node. It demonstrates how
+custom layers with data-dependent output shapes can be implemented and added to a TensorRT network using Python.
+
+### Implementing a NonZero plugin using IPluginV3 interface
+
+Until `IPluginV3` (and associated interfaces), TensorRT plugins could not have outputs whose shapes depended on the input values (they could only depend
+on input shapes). `IPluginV3OneBuild` which exposes a build capability for `IPluginV3`, provides support for such data-dependent output shapes.
+
+`NonZeroPlugin` in this sample is written to handle 2-D input tensors of shape $R \times C$. Assume that the tensor contains $K$ non-zero elements and that the
+non-zero indices are required in a row ordering (each set of indices in its own row). Then the output shape would be $K \times 2$.
+
+The output shapes are expressed to the TensorRT builder through the `IPluginV3OneBuild.get_output_shapes()` API. Expressing the second dimension of the output is
+straightforward:
+```
+# output_dims[0] = trt.DimsExprs(2)
+output_dims[0][1] = exprBuilder.constant(2)
+```
+
+The extent of each data-dependent dimension in the plugin must be expressed in terms of a *_size tensor_*. A size tensor is a scalar output of type
+`trt.int32` or `trt.int64` that must be added as one of the plugin outputs. In this case, it is sufficient to declare one size tensor to denote the extent of the
+first dimension of the non-zero indices output. To declare a size tensor, one must provide an upper-bound and optimum value for its extent as `IDimensionExpr`s. These can be formed through the `IExprBuilder` argument passed to the `IPluginV3OneBuild.get_output_shapes()` method.
+ - For unknown inputs, the upper-bound is the total number of elements in the input
+	```
+	upper_bound = exprBuilder.operation(trt.DimensionOperation.PROD, inputs[0][0], inputs[0][1])
+	```
+ - A good estimate for the optimum is that half of the elements are non-zero
+	```
+	opt_value = exprBuilder.operation(trt.DimensionOperation.FLOOR_DIV, upper_bound, exprBuilder.constant(2))
+	```
+
+Now we can declare the size tensor using the `IExprBuilder.declare_size_tensor()` method, which also requires the specification of the output index at which the size tensor would reside. Let us place it after the non-zero indices output:
+```
+num_non_zero_size_tensor = exprBuilder.declare_size_tensor(1, opt_value, upper_bound)
+```
+
+Now we are ready to specify the extent of the first dimension of the non-zero indices output:
+```
+# output_dims[0] = trt.DimsExprs(0) 
+output_dims[0][0] = num_non_zero_size_tensor
+```
+Note that the size tensor is declared to be a scalar (0-D):
+
+### Creating network and building the engine
+
+To add the plugin to the network, the `INetworkDefinition::add_plugin_v3()` method must be used. 
+
+Similar to `IPluginCreator` used for V2 plugins, V3 plugins must be accompanied by the registration of a plugin creator implementing the `IPluginCreatorV3One` interface.
+
+## Running the sample
+
+1.  Run the sample to create a TensorRT inference engine and run inference:
+    `python3 non_zero_plugin.py [-h] [--precision {fp32,fp16}] [--backend {cuda_python,torch}] [--net_type {onnx,inetdef}]`
+
+2.  Verify that the sample ran successfully. If the sample runs successfully you should see the following message:
+     ```
+    Inference result correct!
+    ```
+
+### Sample `--help` options
+
+To see the full list of available options and their descriptions, use the `-h` or `--help` command line option.
+
+
+# Additional resources
+
+The following resources provide a deeper understanding about the V3 TensorRT plugins and the NonZero operation:
+
+**NonZero**
+- [ONNX: NonZero](https://onnx.ai/onnx/operators/onnx__NonZero.html)
+
+**C++-based NonZero Plugin sample**
+- [NonZero C++ Plugin](../../sampleNonZeroPlugin/)
+
+**TensorRT plugins**
+- [Extending TensorRT with Custom Layers](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#extending)
+- [TensorRT Python-based Plugins](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/#add_custom_layer_python)
+
+**Other documentation**
+- [Introduction To NVIDIAâ€™s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples)
+- [Working With TensorRT Using The Python API](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/#python_topics)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+# Changelog
+
+April 2024
+This is the first version of this `README.md` file.
+
+# Known issues
+
+There are no known issues in this sample.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/non_zero_plugin/non_zero_plugin.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/non_zero_plugin/non_zero_plugin.py
new file mode 100644
index 0000000000000000000000000000000000000000..89ef3826a530d5bbd8cd8c71871eacfbdb61e123
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/non_zero_plugin/non_zero_plugin.py
@@ -0,0 +1,352 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import onnx_graphsurgeon as gs
+import numpy as np
+import onnx
+import os
+import sys
+
+import tensorrt as trt
+from polygraphy.backend.trt import (
+    CreateConfig,
+    EngineFromNetwork,
+    NetworkFromOnnxPath,
+    TrtRunner,
+    create_network,
+    engine_from_network,
+)
+
+import argparse
+
+from polygraphy import mod
+
+sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+from plugin_utils import checkCudaErrors, KernelHelper, UnownedMemory, volume
+
+cuda = mod.lazy_import("cuda.cuda")
+cudart = mod.lazy_import("cuda.cudart")
+nvrtc = mod.lazy_import("cuda.nvrtc")
+
+torch = mod.lazy_import("torch")
+cp = mod.lazy_import("cupy")
+
+non_zero_half_kernel = r'''
+#include <cuda_fp16.h>
+extern "C" __global__
+void find_non_zero_indices_half(
+    half const* X, int* indices, int* count, int R, int C)
+{
+    int row = blockIdx.x * blockDim.x + threadIdx.x;
+
+    // Check if the row index is within bounds
+    if (row < R)
+    {
+
+        for (int col = 0; col < C; ++col)
+        {
+            half const z = static_cast<half>(0.F);
+            if (X[col + C * row] != z)
+            {
+                int index = atomicAdd(count, 1); // Increment count atomically and get the previous value
+                indices[2 * index] = row;
+                indices[2 * index + 1] = col;
+            }
+        }
+    }
+}
+'''
+
+non_zero_float_kernel = r'''
+extern "C" __global__
+void find_non_zero_indices_float(
+    float const* X, int* indices, int* count, int R, int C)
+{
+    int row = blockIdx.x * blockDim.x + threadIdx.x;
+
+    // Check if the row index is within bounds
+    if (row < R)
+    {
+
+        for (int col = 0; col < C; ++col)
+        {
+            if (X[col + C * row] != 0.F)
+            {
+                int index = atomicAdd(count, 1); // Increment count atomically and get the previous value
+                indices[2 * index] = row;
+                indices[2 * index + 1] = col;
+            }
+        }
+    }
+}
+'''
+
+class NonZeroPlugin(trt.IPluginV3, trt.IPluginV3OneCore, trt.IPluginV3OneBuild, trt.IPluginV3OneRuntime):
+    def __init__(self, backend = None):
+        trt.IPluginV3.__init__(self)
+        trt.IPluginV3OneCore.__init__(self)
+        trt.IPluginV3OneBuild.__init__(self)
+        trt.IPluginV3OneRuntime.__init__(self)
+
+        self.num_outputs = 2
+        self.plugin_namespace = ""
+        self.plugin_name = "NonZeroPlugin"
+        self.plugin_version = "1"
+
+        if backend is not None:
+            self.backend = backend.tobytes().decode("utf-8")
+        else:
+            self.backend = "cuda_python"
+
+        self.cuDevice = None
+
+    def get_capability_interface(self, type):
+        return self
+
+    def get_output_data_types(self, input_types):
+        return [trt.DataType.INT32, trt.DataType.INT32]
+
+    def get_output_shapes(self, inputs, shape_inputs, exprBuilder):
+        # First output is 2-D
+        # Second output is a size tensor, which must be declared a scalar (0-D)
+        output_dims = [trt.DimsExprs(2), trt.DimsExprs(0)]
+
+        upper_bound = exprBuilder.operation(trt.DimensionOperation.PROD, inputs[0][0], inputs[0][1])
+        opt_value = exprBuilder.operation(trt.DimensionOperation.FLOOR_DIV, upper_bound, exprBuilder.constant(2))
+        num_non_zero_size_tensor = exprBuilder.declare_size_tensor(1, opt_value, upper_bound)
+
+        output_dims[0][0] = num_non_zero_size_tensor
+        output_dims[0][1] = exprBuilder.constant(2)
+
+        return output_dims
+
+    def get_fields_to_serialize(self):
+        return trt.PluginFieldCollection(
+            [
+                trt.PluginField(
+                    "backend", self.backend.encode(), trt.PluginFieldType.CHAR
+                )
+            ]
+        )
+
+    def configure_plugin(self, inp, out):
+        if self.backend == "cuda_python":
+            err, self.cuDevice = cuda.cuDeviceGet(0)
+
+    def on_shape_change(self, inp, out):
+        if self.backend == "cuda_python":
+            err, self.cuDevice = cuda.cuDeviceGet(0)
+
+    def supports_format_combination(self, pos, in_out, num_inputs):
+        assert num_inputs == 1
+        assert pos < len(in_out)
+
+        type_ok = False
+
+        # first input should be float16 or float32
+        if pos == 0:
+            type_ok = in_out[0].desc.type == trt.DataType.FLOAT or in_out[0].desc.type == trt.DataType.HALF
+        elif pos == 1:
+            type_ok = in_out[1].desc.type == trt.DataType.INT32
+        else: # pos == 2
+            # size tensor outputs must be NCHW INT32
+            type_ok = in_out[2].desc.type == trt.DataType.INT32
+
+        return in_out[pos].desc.format == trt.TensorFormat.LINEAR and type_ok
+
+    def enqueue(self, input_desc, output_desc, inputs, outputs, workspace, stream):
+        inp_dtype = trt.nptype(input_desc[0].type)
+
+        if self.backend == "cuda_python":
+            R = input_desc[0].dims[0]
+            C = input_desc[0].dims[1]
+
+            blockSize = 256
+            numBlocks = int((C + blockSize - 1) // blockSize)
+
+            d_in = np.array([inputs[0]], dtype=np.uint64)
+            d_out_0 = np.array([outputs[0]], dtype=np.uint64)
+            d_out_1 = np.array([outputs[1]], dtype=np.uint64)
+
+            args = [d_in, d_out_0, d_out_1, np.array(R, dtype=np.uint32), np.array(C, dtype=np.uint32)]
+            kernelArgs = np.array([arg.ctypes.data for arg in args], dtype=np.uint64)
+
+            stream_ptr = np.array([stream], dtype=np.uint64)
+
+            if inp_dtype == np.float32:
+                kernelHelper = KernelHelper(non_zero_float_kernel, int(self.cuDevice))
+                _non_zero_float_kernel = kernelHelper.getFunction(b'find_non_zero_indices_float')
+                checkCudaErrors(cuda.cuLaunchKernel(_non_zero_float_kernel,
+                                            numBlocks, 1, 1,
+                                            blockSize, 1, 1,
+                                            0,
+                                            stream_ptr,
+                                            kernelArgs, 0))
+            elif inp_dtype == np.float16:
+                kernelHelper = KernelHelper(non_zero_half_kernel, int(self.cuDevice))
+                _non_zero_half_kernel = kernelHelper.getFunction(b'find_non_zero_indices_half')
+                checkCudaErrors(cuda.cuLaunchKernel(_non_zero_half_kernel,
+                                            numBlocks, 1, 1,
+                                            blockSize, 1, 1,
+                                            0,
+                                            stream_ptr,
+                                            kernelArgs, 0))
+            else:
+                raise ValueError("inp_dtype not valid")
+
+        elif self.backend == "torch":
+            inp_mem = UnownedMemory(inputs[0], input_desc[0].dims, inp_dtype)
+
+            out_mem = UnownedMemory(
+                outputs[0], 2 * volume(input_desc[0].dims), np.int32
+            )
+
+            out_1_mem = UnownedMemory(outputs[1], 1, np.int32)
+
+            a_t = torch.as_tensor(inp_mem.d, device="cuda")
+            out = torch.nonzero(a_t)
+
+            out_mem.d[: volume(out.shape)] = cp.reshape(cp.asarray(out), (-1,))
+            cp.copyto(out_1_mem.d, cp.reshape(cp.asarray([out.shape[0]]), (-1,)))
+
+        else:
+            raise ValueError(f"backend not valid: {self.backend}")
+
+    def attach_to_context(self, context):
+        return self.clone()
+
+    def set_tactic(self, tactic):
+        pass
+
+    def clone(self):
+        cloned_plugin = NonZeroPlugin()
+        cloned_plugin.__dict__.update(self.__dict__)
+        return cloned_plugin
+
+    #
+    # The following defaults take effect since the respective methods are not overriden
+    #
+
+    # def get_valid_tactics(self):
+    #     return []
+
+    # def get_workspace_size(self, input_desc, output_desc):
+    #     return 0
+
+    # def destroy(self):
+    #     pass
+
+
+class NonZeroPluginCreator(trt.IPluginCreatorV3One):
+    def __init__(self):
+        trt.IPluginCreatorV3One.__init__(self)
+        self.name = "NonZeroPlugin"
+        self.plugin_namespace = ""
+        self.plugin_version = "1"
+        self.field_names = trt.PluginFieldCollection(
+            [trt.PluginField("backend", np.array([]), trt.PluginFieldType.CHAR)]
+        )
+
+    def create_plugin(self, name, fc, phase):
+        backend = None
+        for f in fc:
+            if f.name == "backend":
+                backend = f.data[:-1] if f.data[-1] == 0 else f.data
+        return NonZeroPlugin(backend)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--precision', type=str, default="fp32", choices=["fp32", "fp16"])
+    parser.add_argument("--backend", type=str, default="torch", choices=["cuda_python", "torch"])
+    parser.add_argument('--net_type', type=str, default="onnx", choices=["onnx", "inetdef"])
+
+    args = parser.parse_args()
+
+    if args.backend == "cuda_python":
+        # Initialize CUDA Driver API
+        err, = cuda.cuInit(0)
+        # Retrieve handle for device 0
+        err, cuDevice = cuda.cuDeviceGet(0)
+        # Create context
+        _, cudaCtx = cuda.cuCtxCreate(0, cuDevice)
+
+    precision = np.float32 if args.precision == "fp32" else np.float16
+
+    inp_shape = (128, 128)
+    X = np.random.normal(size=inp_shape).astype(precision)
+    # Zero out a random set of indices
+    indices = np.random.choice(np.prod(inp_shape), replace=False, size=np.random.randint(0, np.prod(inp_shape) + 1))
+    X[np.unravel_index(indices, inp_shape)] = 0
+
+    # Register plugin creator
+    plg_registry = trt.get_plugin_registry()
+    my_plugin_creator = NonZeroPluginCreator()
+    plg_registry.register_creator(my_plugin_creator, "")
+
+    if args.net_type == "onnx":
+        # create ONNX model
+        onnx_path = "test_NonZeroPlugin.onnx"
+        inputX = gs.Variable(name="X", shape=inp_shape, dtype=precision)
+        Y = gs.Variable(name="Y", dtype=np.int32)
+        Y_num = gs.Variable(name="Y_num", dtype=np.int32)
+        nonZeroPluginNode = gs.Node(
+            name="NonZeroPlugin",
+            op="NonZeroPlugin",
+            inputs=[inputX],
+            outputs=[Y, Y_num],
+            attrs={"backend": args.backend.encode()},
+        )
+        graph = gs.Graph(nodes=[nonZeroPluginNode], inputs=[inputX], outputs=[Y], opset=16)
+        onnx.save(gs.export_onnx(graph), onnx_path)
+
+        # build engine
+        build_engine = EngineFromNetwork(
+            NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision==np.float16)
+        )
+    else:
+        # Create plugin object
+        builder, network = create_network()
+        plg_creator = plg_registry.get_creator("NonZeroPlugin", "1", "")
+        plugin_fields_list = [
+            trt.PluginField("backend", args.backend.encode(), trt.PluginFieldType.CHAR)
+        ]
+        pfc = trt.PluginFieldCollection(plugin_fields_list)
+        plugin = plg_creator.create_plugin("NonZeroPlugin", pfc, trt.TensorRTPhase.BUILD)
+
+        # Populate network
+        inputX = network.add_input(name="X", dtype=trt.float32 if precision==np.float32 else trt.float16, shape=inp_shape)
+        out = network.add_plugin_v3([inputX], [], plugin)
+        out.get_output(0).name = "Y"
+        network.mark_output(tensor=out.get_output(0))
+        build_engine = engine_from_network((builder, network), CreateConfig(fp16=precision==trt.float16))
+
+    # Compare against Numpy's nonzero
+    Y_ref = np.transpose(np.nonzero(X))
+
+    # Run
+    with TrtRunner(build_engine, "trt_runner")as runner:
+        outputs = runner.infer({"X": X})
+        Y = outputs["Y"]
+        Y = Y[np.lexsort(np.fliplr(Y).T)]
+
+        if np.allclose(Y, Y_ref):
+            print("Inference result correct!")
+        else:
+            print("Inference result incorrect!")
+
+    if args.backend == "cuda_python":
+        checkCudaErrors(cuda.cuCtxDestroy(cudaCtx))
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/non_zero_plugin/requirements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/non_zero_plugin/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..595c3d8d3767f6587e9dbf9f85d411f984baaa4a
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/non_zero_plugin/requirements.txt
@@ -0,0 +1,15 @@
+cuda-python==12.2.0; python_version <= "3.10"
+cuda-python==12.6.0; python_version >= "3.11"
+cupy-cuda12x
+torch
+--extra-index-url https://pypi.ngc.nvidia.com
+polygraphy
+colored
+numpy==1.23.5; (platform_system != "Windows" and python_version <= "3.10")
+numpy==1.26.4; (platform_system != "Windows" and python_version >= "3.11")
+--extra-index-url https://pypi.ngc.nvidia.com
+onnx-graphsurgeon
+pywin32; platform_system == "Windows"
+pyyaml==6.0.1
+requests==2.32.2
+tqdm==4.66.4
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/CMakeLists.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..58c7dce58725f0c8e554259399a0831c288be6c4
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/CMakeLists.txt
@@ -0,0 +1,97 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# We need cmake >= 3.8, since 3.8 introduced CUDA as a first class language
+cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
+project(CustomHardMax LANGUAGES CXX CUDA)
+
+if(NOT MSVC)
+    # Enable all compile warnings
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-long-long -pedantic -Wno-deprecated-declarations")
+    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wno-deprecated-declarations")
+endif()
+
+# Sets variable to a value if variable is unset.
+macro(set_ifndef var val)
+    if(NOT ${var})
+        set(${var} ${val})
+    endif()
+    message(STATUS "Configurable variable ${var} set to ${${var}}")
+endmacro()
+
+# -------- CONFIGURATION --------
+if(NOT MSVC)
+    set_ifndef(TRT_LIB /usr/lib/x86_64-linux-gnu)
+    set_ifndef(TRT_INCLUDE /usr/include/x86_64-linux-gnu)
+    set_ifndef(CUDA_INC_DIR /usr/local/cuda/include)
+
+    set_ifndef(CUDA_LIB_DIR /usr/local/cuda)
+    set_ifndef(CUBLAS_LIB_SUFFIXES "lib;lib64")
+endif()
+
+# Find dependencies:
+message("\nThe following variables are derived from the values of the previous variables unless provided explicitly:\n")
+
+# TensorRTâ€™s nvinfer lib
+find_library(
+    _NVINFER_LIB nvinfer
+    HINTS ${TRT_LIB}
+    PATH_SUFFIXES lib lib64)
+set_ifndef(NVINFER_LIB ${_NVINFER_LIB})
+
+find_library(
+    _CUDART_LIB cudart
+    HINTS ${CUDA_LIB_DIR}
+    PATH_SUFFIXES lib lib64)
+set_ifndef(CUDART_LIB ${_CUDART_LIB})
+
+find_library(
+    _CUBLAS_LIB cublas
+    HINTS ${CUDA_LIB_DIR}
+    PATH_SUFFIXES ${CUBLAS_LIB_SUFFIXES})
+set_ifndef(CUBLAS_LIB ${_CUBLAS_LIB})
+
+find_library(
+    _CUDA_LIB cuda
+    HINTS ${CUDA_LIB_DIR}
+    PATH_SUFFIXES lib/stubs lib64/stubs)
+set_ifndef(CUDA_LIB ${_CUDA_LIB})
+
+# -------- BUILDING --------
+
+add_definitions(-DTENSORRT_BUILD_LIB)
+
+# Add include directories
+get_filename_component(SAMPLES_COMMON_DIR ${CMAKE_SOURCE_DIR}/../../common/ ABSOLUTE)
+get_filename_component(SAMPLES_DIR ${CMAKE_SOURCE_DIR}/../../ ABSOLUTE)
+include_directories(${CUDA_INC_DIR} ${TRT_INCLUDE} ${CMAKE_SOURCE_DIR}/plugin/
+                    ${SAMPLES_COMMON_DIR} ${SAMPLES_DIR})
+
+# Define Hardmax plugin library target
+add_library(
+    customHardmaxPlugin MODULE
+    ${SAMPLES_COMMON_DIR}/logger.cpp ${SAMPLES_DIR}/utils/fileLock.cpp
+    ${CMAKE_SOURCE_DIR}/plugin/customHardmaxPlugin.cpp ${CMAKE_SOURCE_DIR}/plugin/customHardmaxPlugin.h)
+
+# Use C++11
+target_compile_features(customHardmaxPlugin PUBLIC cxx_std_17)
+
+# Link TensorRTâ€™s nvinfer lib
+target_link_libraries(customHardmaxPlugin PRIVATE ${NVINFER_LIB})
+target_link_libraries(customHardmaxPlugin PRIVATE ${CUDART_LIB})
+target_link_libraries(customHardmaxPlugin PRIVATE ${CUBLAS_LIB})
+target_link_libraries(customHardmaxPlugin PRIVATE ${CUDA_LIB})
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..82a6583e7449695d108dda95b93084948ff06cb3
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/README.md
@@ -0,0 +1,168 @@
+# Adding A Custom Layer Implementation to Your ONNX Network
+
+**Table Of Contents**
+- [Description](#description)
+- [How does this sample work?](#how-does-this-sample-work)
+- [Prerequisites](#prerequisites)
+- [Download and preprocess the ONNX model](#download-the-onnx-model)
+- [Running the sample](#running-the-sample)
+- [Additional resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+## Description
+
+This sample, `onnx_custom_plugin`, demonstrates how to use plugins written in C++ with the TensorRT Python bindings and ONNX Parser. This sample uses the [BiDAF Model](https://github.com/onnx/models/tree/main/text/machine_comprehension/bidirectional_attention_flow) from ONNX Model Zoo.
+
+## How does this sample work?
+
+This sample implements a Hardmax layer using cuBLAS, wraps the implementation in a TensorRT plugin (with a corresponding plugin creator) and then generates a shared library module containing its code. The user then dynamically loads this library in Python, which causes the plugin to be registered in TensorRT's PluginRegistry and makes it available to the ONNX parser.
+
+This sample includes:
+
+`plugin/`
+This directory contains files for the Hardmax layer plugin.
+
+`customHardmaxPlugin.cpp`
+A custom TensorRT plugin implementation.
+
+`customHardmaxPlugin.h`
+The Hardmax Plugin headers.
+
+`model.py`
+This script downloads the BiDAF onnx model and uses Onnx Graphsurgeon to replace layers unsupported by TensorRT.
+
+`sample.py`
+This script loads the ONNX model and performs inference using TensorRT.
+
+`load_plugin_lib.py`
+This script contains a helper function to load the customHardmaxPlugin library in Python.
+
+`test_custom_hardmax_plugin.py`
+This script tests the Hardmax Plugin against a reference numpy implementation.
+
+`requirements.txt`
+This file specifies all the Python packages required to run this Python sample.
+
+## Prerequisites
+
+For specific software versions, see the [TensorRT Installation Guide](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html).
+
+1. Install the dependencies for Python.
+
+```bash
+pip3 install -r requirements.txt
+```
+
+2. [Install CMake](https://cmake.org/download/).
+
+3. [Install Cublas](https://developer.nvidia.com/cublas).
+
+4. (For Windows builds) [Visual Studio](https://visualstudio.microsoft.com/vs/older-downloads/) 2017 Community or Enterprise edition
+
+## Download and preprocess the ONNX model
+
+Run the model script to download the BiDAF model from the Onnx Model Zoo. The script will replace the `Hardmax` layer with an op called `CustomHardmax` to match the custom Plugin name. It will also replace the unsupported `Compress` node with an equivalent operation, and remove the `CategoryMapper` nodes which do a String-to-Int conversion of the model inputs.
+
+```bash
+python3 model.py
+```
+
+## Running the sample
+
+1.  Build the plugin and its corresponding Python bindings.
+
+   - On Linux, run:
+      ```bash
+      mkdir build && pushd build
+      cmake .. && make -j
+      popd
+      ```
+
+      **NOTE:** If any of the dependencies are not installed in their default locations, you can manually specify them. For example:
+      ```bash
+      cmake .. -DCMAKE_CUDA_COMPILER=/usr/local/cuda-x.x/bin/nvcc # (Or adding /path/to/nvcc into $PATH)
+               -DCUDA_INC_DIR=/usr/local/cuda-x.x/include/  # (Or adding /path/to/cuda/include into $CPLUS_INCLUDE_PATH)
+               -DTRT_LIB=/path/to/tensorrt/lib/
+               -DTRT_INCLUDE=/path/to/tensorrt/include/
+      ```
+
+   - On Windows, run the following in Powershell, replacing paths appropriately:
+      ```ps1
+      mkdir build; pushd build
+      cmake .. -G "Visual Studio 15 Win64" /
+         -DTRT_LIB=C:\path\to\tensorrt\lib /
+         -DTRT_INCLUDE=C:\path\to\tensorrt\lib /
+         -DCUDA_INC_DIR="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v<CUDA_VERSION>\include" /
+         -DCUDA_LIB_DIR="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v<CUDA_VERSION>\lib\x64"
+      # NOTE: msbuild is usually located under C:\Program Files (x86)\Microsoft Visual Studio\2017\<EDITION>\MSBuild\<VERSION>\Bin
+      #   You should add this path to your PATH environment variable.
+      msbuild ALL_BUILD.vcxproj
+      popd
+      ```
+
+   The command `cmake ..` displays a complete list of configurable variables. If a variable is set to `VARIABLE_NAME-NOTFOUND`, then youâ€™ll need to specify it manually or set the variable it is derived from correctly.
+
+2.  Run inference using TensorRT with the custom Hardmax plugin implementation:
+   ```bash
+   python3 sample.py
+   ```
+
+3.  Verify that the sample ran successfully.
+   ```
+   === Testing ===
+
+   Input context: Garry the lion is 5 years old. He lives in the savanna.
+   Input query: Where does the lion live?
+   Model prediction:  savanna
+
+   Input context: A quick brown fox jumps over the lazy dog.
+   Input query: What color is the fox?
+   Model prediction:  brown   
+   ```
+
+   The model can also be run interactively:
+   ```bash
+   python3 sample.py --interactive
+   ```
+
+   The context and query can then be entered from the command line:
+
+   ```
+   === Testing ===
+   Enter context: Waldo wears a striped shirt. He also wears glasses.
+   Enter query: Who wears glasses?
+   Model prediction:  waldo
+   ```
+
+# Additional resources
+
+The following resources provide a deeper understanding about getting started with TensorRT using Python:
+
+**Model**
+- [BiDAF model](https://allenai.github.io/bi-att-flow/)
+
+**Documentation**
+- [Introduction To NVIDIAâ€™s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples)
+- [Working With TensorRT Using The Python API](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#python_topics)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+# Changelog
+January 2024:
+  - Create cublas handle with cublasCreate instead of using the cublasContext argument from attachToContext. The cublasContext is still valid if TacticSource::kCUBLAS is enabled. TacticSource::kCUBLAS is deprecated.
+  - Added the Cublas library as a prerequisite.
+
+August 2023: 
+  - Update ONNX version support to 1.14.0
+  - Removed support for Python versions < 3.8.
+
+September 2022: This `README.md` file was created and reviewed.
+
+# Known issues
+
+There are no known issues in this sample.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/load_plugin_lib.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/load_plugin_lib.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3feaa3731403e0d1812edf3e9c737883a903f83
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/load_plugin_lib.py
@@ -0,0 +1,58 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import ctypes
+
+WORKING_DIR = os.environ.get("TRT_WORKING_DIR") or os.path.dirname(
+    os.path.realpath(__file__)
+)
+IS_WINDOWS = os.name == "nt"
+if IS_WINDOWS:
+    HARDMAX_PLUGIN_LIBRARY_NAME = "customHardmaxPlugin.dll"
+    HARDMAX_PLUGIN_LIBRARY = [
+        os.path.join(WORKING_DIR, "build", "Debug", HARDMAX_PLUGIN_LIBRARY_NAME),
+        os.path.join(WORKING_DIR, "build", "Release", HARDMAX_PLUGIN_LIBRARY_NAME),
+    ]
+else:
+    HARDMAX_PLUGIN_LIBRARY_NAME = "libcustomHardmaxPlugin.so"
+    HARDMAX_PLUGIN_LIBRARY = [
+        os.path.join(WORKING_DIR, "build", HARDMAX_PLUGIN_LIBRARY_NAME)
+    ]
+
+
+def load_plugin_lib():
+    for plugin_lib in HARDMAX_PLUGIN_LIBRARY:
+        if os.path.isfile(plugin_lib):
+            try:
+                # Python specifies that winmode is 0 by default, but some implementations
+                # incorrectly default to None instead. See:
+                # https://docs.python.org/3.8/library/ctypes.html
+                # https://github.com/python/cpython/blob/3.10/Lib/ctypes/__init__.py#L343
+                ctypes.CDLL(plugin_lib, winmode=0)
+            except TypeError:
+                # winmode only introduced in python 3.8
+                ctypes.CDLL(plugin_lib)
+            return
+
+    raise IOError(
+        "\n{}\n{}\n{}\n".format(
+            "Failed to load library ({}).".format(HARDMAX_PLUGIN_LIBRARY_NAME),
+            "Please build the Hardmax sample plugin.",
+            "For more information, see the included README.md",
+        )
+    )
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/model.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..53b2a96e9e57ceae1881167b77b405a4b0178117
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/model.py
@@ -0,0 +1,127 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import json
+
+import wget
+import onnx
+import onnx_graphsurgeon as gs
+
+MODEL_URL = "https://github.com/onnx/models/raw/e77240a62df68ed13e3138a5812553a552b857bb/text/machine_comprehension/bidirectional_attention_flow/model/bidaf-9.onnx"
+
+WORKING_DIR = os.environ.get("TRT_WORKING_DIR") or os.path.dirname(
+    os.path.realpath(__file__)
+)
+MODEL_DIR = os.path.join(WORKING_DIR, "models")
+RAW_MODEL_PATH = os.path.join(MODEL_DIR, "bidaf-9.onnx")
+TRT_MODEL_PATH = os.path.join(MODEL_DIR, "bidaf-9-trt.onnx")
+
+
+def _do_graph_surgery(raw_model_path, trt_model_path):
+    graph = gs.import_onnx(onnx.load(raw_model_path))
+
+    # Replace unsupported Hardmax with our CustomHardmax op
+    for node in graph.nodes:
+        if node.op == "Hardmax":
+            node.op = "CustomHardmax"
+            hardmax_node = node
+
+    # The original onnx model also uses another unsupported op called "Compress".
+    # "Compress" returns values from the first tensor for all indices which evaluate to
+    # True in the second tensor. In our case the second Tensor is the output of Hardmax,
+    # so exactly one index will evaluate to true because the value at it will be 1, and
+    # all other values will be 0. We can achieve the same result as "Compress" by taking the
+    # dot product of our value tensor and the Hardmax output.
+    #
+    # So, we will replace the subgraph Compress(Transpose_29, Cast(Reshape(Hardmax)))
+    # with the subgraph Einsum(Transpose_29, Hardmax) where the equation in Einsum takes the dot product.
+    node_by_name = {node.name: node for node in graph.nodes}
+    transpose_node = node_by_name["Transpose_29"]
+    compress_node = node_by_name["Compress_31"]
+
+    einsum_node = gs.Node(
+        "Einsum",
+        "Dot_of_Hardmax_and_Transpose",
+        attrs={"equation": "ij,ij->i"},  # "Dot product" of 2d tensors
+        inputs=[hardmax_node.outputs[0], transpose_node.outputs[0]],
+        outputs=[compress_node.outputs[0]],
+    )
+    graph.nodes.append(einsum_node)
+
+    # Separate the old subgraph which will be deleted with graph.cleanup()
+    hardmax_node.o().inputs.clear()
+    transpose_node.o().inputs.clear()
+    compress_node.outputs.clear()
+
+    # Also remove the CategoryMapper nodes which convert strings to integers as the first step in the model.
+    # We need to convert the following structure:
+    #
+    #      Input as                        Converted to
+    #   String tokens                     Integer tokens
+    #  ---------------->[CategoryMapper]------------------>[Rest of Model]
+    #
+    # into the following:
+    #
+    #      Input as
+    #   Integer tokens
+    #  ------------------>[Rest of Model]
+    #
+    # Later we will feed the model the integer tokens directly.
+    # Note: list conversion is necessary because we modify graph.nodes in the for loop.
+    category_mapper_nodes = [
+        node for node in graph.nodes if node.op == "CategoryMapper"
+    ]
+    for node in category_mapper_nodes:
+        # Remove CategoryMapper node from onnx graph
+        graph.nodes.remove(node)
+
+        # Also remove references its inputs in the graph's inputs
+        for input_tensor in node.inputs:
+            graph.inputs.remove(input_tensor)
+
+        # The graph's new inputs are the Integer tokens output by CategoryMapper
+        graph.inputs += node.outputs
+
+        # Save String->Int map
+        with open(node.name + ".json", "w") as fp:
+            json.dump(node.attrs, fp)
+
+    graph.cleanup().toposort()
+    onnx.save(gs.export_onnx(graph), trt_model_path)
+
+
+def make_trt_compatible_onnx_model():
+    os.makedirs(MODEL_DIR, exist_ok=True)
+    if not os.path.exists(RAW_MODEL_PATH):
+        wget.download(MODEL_URL, out=RAW_MODEL_PATH)
+        print("\nDownloaded BiDAF model from Onnx Model Zoo")
+    print("Performing graph surgery on Onnx Model Zoo BiDAF model")
+    _do_graph_surgery(RAW_MODEL_PATH, TRT_MODEL_PATH)
+    print("Graph Surgery complete!")
+
+
+def main():
+    if os.path.exists(TRT_MODEL_PATH):
+        print("TRT-compatible onnx model already exists!")
+    else:
+        print("TRT-compatible onnx model not found, generating...")
+        make_trt_compatible_onnx_model()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/plugin/customHardmaxPlugin.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/plugin/customHardmaxPlugin.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9ba419024893a2ebc44a3bbd69b801cf9d75f7a4
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/plugin/customHardmaxPlugin.cpp
@@ -0,0 +1,409 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "customHardmaxPlugin.h"
+#include "NvInferPlugin.h"
+#include "common.h" // volume(), ASSERT
+#include "logger.h" // sample::gLogError
+#include <cuda.h>
+
+using namespace nvinfer1;
+
+#define CUDRIVER_CALL(call)                                                                                            \
+    {                                                                                                                  \
+        cudaError_enum s_ = call;                                                                                      \
+        if (s_ != CUDA_SUCCESS)                                                                                        \
+        {                                                                                                              \
+            char const *errName_, *errDesc_;                                                                           \
+            cuGetErrorName(s_, &errName_);                                                                             \
+            cuGetErrorString(s_, &errDesc_);                                                                           \
+            sample::gLogError << "CUDA Error: " << errName_ << " " << errDesc_ << std::endl;                           \
+            return s_;                                                                                                 \
+        }                                                                                                              \
+    }
+
+#define CUDA_CALL(call)                                                                                                \
+    {                                                                                                                  \
+        cudaError_t s_ = call;                                                                                         \
+        if (s_ != cudaSuccess)                                                                                         \
+        {                                                                                                              \
+            sample::gLogError << "CUDA Error: " << cudaGetErrorName(s_) << " " << cudaGetErrorString(s_) << std::endl; \
+            return s_;                                                                                                 \
+        }                                                                                                              \
+    }
+
+#define CUBLAS_CALL(call)                                                                                              \
+    {                                                                                                                  \
+        cublasStatus_t s_ = call;                                                                                      \
+        if (s_ != CUBLAS_STATUS_SUCCESS)                                                                               \
+        {                                                                                                              \
+            sample::gLogError << "cuBLAS Error: " << s_ << std::endl;                                                  \
+            return s_;                                                                                                 \
+        }                                                                                                              \
+    }
+
+// Helper function for serializing plugin
+template <typename T>
+void writeToBuffer(uint8_t*& buffer, T const& val)
+{
+    *reinterpret_cast<T*>(buffer) = val;
+    buffer += sizeof(T);
+}
+
+// Helper function for deserializing plugin
+template <typename T>
+T readFromBuffer(uint8_t const*& buffer)
+{
+    T val = *reinterpret_cast<const T*>(buffer);
+    buffer += sizeof(T);
+    return val;
+}
+
+REGISTER_TENSORRT_PLUGIN(HardmaxPluginCreator);
+
+namespace
+{
+constexpr char const* kHARDMAX_NAME{"CustomHardmax"};
+constexpr char const* kHARDMAX_VERSION{"1"};
+} // namespace
+
+HardmaxPlugin::HardmaxPlugin(int32_t axis)
+{
+    mAxis = axis;
+}
+
+HardmaxPlugin::HardmaxPlugin(void const* serialData, size_t serialLength)
+{
+    uint8_t const* d = static_cast<uint8_t const*>(serialData);
+    uint8_t const* a = d;
+
+    mAxis = readFromBuffer<int32_t>(d);
+    mAxisSize = readFromBuffer<int32_t>(d);
+    mDimProductOuter = readFromBuffer<int32_t>(d);
+    mDimProductInner = readFromBuffer<int32_t>(d);
+
+    ASSERT(d == (a + serialLength));
+}
+
+HardmaxPlugin::~HardmaxPlugin()
+{
+    terminate();
+}
+
+int32_t HardmaxPlugin::getNbOutputs() const noexcept
+{
+    return 1;
+}
+
+int32_t HardmaxPlugin::initialize() noexcept
+{
+    return 0;
+}
+
+char const* HardmaxPlugin::getPluginType() const noexcept
+{
+    return kHARDMAX_NAME;
+}
+
+char const* HardmaxPlugin::getPluginVersion() const noexcept
+{
+    return kHARDMAX_VERSION;
+}
+
+nvinfer1::DimsExprs HardmaxPlugin::getOutputDimensions(
+    int32_t index, nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept
+{
+    ASSERT(nbInputs == 1);
+    ASSERT(index == 0);
+
+    // Dimensions are unchanged
+    return inputs[0];
+}
+
+void HardmaxPlugin::attachToContext(
+    cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept
+{
+    cublasStatus_t ret = cublasCreate(&mCublas);
+    ASSERT(ret == CUBLAS_STATUS_SUCCESS && mCublas != nullptr && "Failed to create cublasHandle_t.");
+}
+
+// Detach the plugin object from its execution context.
+void HardmaxPlugin::detachFromContext() noexcept {}
+
+int32_t HardmaxPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDesc,
+    nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, void* workspace,
+    cudaStream_t stream) noexcept
+{
+    if (inputDesc[0].type != nvinfer1::DataType::kFLOAT)
+    {
+        return -1;
+    }
+
+    CUBLAS_CALL(cublasSetStream(mCublas, stream));
+
+    auto const* data = static_cast<float const*>(inputs[0]);
+    auto* result = static_cast<float*>(outputs[0]);
+
+    // Make sure output is initialized to all 0's.
+    // Later we will set the correct outputs to be 1's and not touch the rest.
+    CUDA_CALL(cudaMemsetAsync(result, 0, mDimProductOuter * mDimProductInner * mAxisSize * sizeof(float), stream));
+
+    // We use the workspace in the case that the first call to 'cublasIsamax' is insufficient.
+    // The first half of the workspace we use to copy the values of the axis into, so that we can
+    // subtract out the minimum value and call 'cublasIsamax' again. See the comment below.
+    // The second half of the workspace will be a costant array of 1's, necessary for our cublasSaxpy call.
+    auto* const axisFlat = static_cast<float* const>(workspace);
+    float* const ones = axisFlat + mAxisSize;
+    float const one = 1.0F;
+    CUDRIVER_CALL(cuMemsetD32Async(CUdeviceptr(ones), *reinterpret_cast<int const*>(&one), mAxisSize, stream));
+
+    // This plugin works by parallelizing the argmax operation along a single axis.
+    // This is efficient when the axis size is very large compared to the other dimensions.
+    //
+    // Consider an input shape (1, 512, 3) with axis = 1. This plugin will perform well because
+    // the work which is parallelized is over the large 512-element-long axis, and the work that is done
+    // serially is over the small 1-element-long and 3-element-long axes.
+    //
+    // However, when the axis size is small compared to the other dimensions, this plugin will be very
+    // inefficient. If the input shape is (1, 512, 3) and the hardmax is over axis = 2, then
+    // the work is parallelized over the small 3-element-long axis and the work is done serially over
+    // the large 512-element-long axis. A smarter plugin would try to recognize this and parallelize
+    // the work which would take longest.
+    for (int32_t outer = 0; outer < mDimProductOuter; outer++)
+    {
+        for (int32_t inner = 0; inner < mDimProductInner; inner++)
+        {
+            int32_t const axesOffset = outer * mDimProductInner * mAxisSize + inner;
+            float const* arr = &data[axesOffset];
+            int32_t const stride = mDimProductInner;
+            int32_t argmaxResult;
+            CUBLAS_CALL(cublasIsamax(mCublas, mAxisSize, arr, stride, &argmaxResult));
+
+            // cublasIsamax returns 1-indexed so convert to 0-indexed
+            argmaxResult--;
+
+            // cublasIsamax returns the index of the element with the highest absolute value.
+            // If this element is positive, then we know it is also the max.
+            // However, if it is negative, we need to
+            //      1) Copy the axis into our workspace
+            //      2) Subtract the minimum value we found from our array. This ensures that
+            //         none of the values are negative, and that the largest element remains
+            //         the largest element.
+            //      3) Use cublasIsamax to find the largest element again.
+            // NOTE: We are using cudaMemcpy instead of cudaMemcpyAsync because we need to know
+            //       maxAbsValue before proceeding. However, using synchronous rather than
+            //       asynchronous calls inside of enqueue() hurts performance.
+            //       This could be fixed by implementing the functionality of this plugin with a kernel
+            //       instead of relying only on cuBLAS.
+            float maxAbsValue;
+            CUDA_CALL(cudaMemcpy(&maxAbsValue, &arr[argmaxResult * stride], sizeof(float), cudaMemcpyDeviceToHost));
+            if (maxAbsValue < 0)
+            {
+                float negMinValue = -maxAbsValue;
+                CUBLAS_CALL(cublasScopy(mCublas, mAxisSize, arr, stride, axisFlat, 1));
+                CUBLAS_CALL(cublasSaxpy(mCublas, mAxisSize, &negMinValue, ones, 1, axisFlat, 1));
+                CUBLAS_CALL(cublasIsamax(mCublas, mAxisSize, axisFlat, 1, &argmaxResult));
+                argmaxResult--;
+            }
+
+            CUDA_CALL(cudaMemcpyAsync(
+                &result[axesOffset + argmaxResult * stride], &one, sizeof(float), cudaMemcpyHostToDevice, stream));
+        }
+    }
+    return cudaPeekAtLastError();
+}
+
+size_t HardmaxPlugin::getSerializationSize() const noexcept
+{
+    return 4 * sizeof(int32_t);
+}
+
+void HardmaxPlugin::serialize(void* buffer) const noexcept
+{
+    // Same order as in deserialize()
+    uint8_t* d = static_cast<uint8_t*>(buffer);
+    uint8_t* const a = d;
+
+    writeToBuffer(d, mAxis);
+    writeToBuffer(d, mAxisSize);
+    writeToBuffer(d, mDimProductOuter);
+    writeToBuffer(d, mDimProductInner);
+
+    ASSERT(d == a + getSerializationSize());
+}
+
+bool HardmaxPlugin::supportsFormatCombination(
+    int32_t pos, nvinfer1::PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept
+{
+    ASSERT(inOut && pos < (nbInputs + nbOutputs));
+
+    // No change of type allowed
+    if (inOut[0].type != inOut[pos].type)
+    {
+        return false;
+    }
+
+    return inOut[pos].type == nvinfer1::DataType::kFLOAT && inOut[pos].format == nvinfer1::PluginFormat::kLINEAR;
+}
+
+void HardmaxPlugin::terminate() noexcept {}
+
+void HardmaxPlugin::destroy() noexcept
+{
+    // This gets called when the network containing plugin is destroyed
+    delete this;
+}
+
+IPluginV2DynamicExt* HardmaxPlugin::clone() const noexcept
+{
+    auto* plugin = new HardmaxPlugin(mAxis);
+    plugin->setPluginNamespace(mNamespace.c_str());
+    plugin->mAxisSize = mAxisSize;
+    plugin->mDimProductInner = mDimProductInner;
+    plugin->mDimProductOuter = mDimProductOuter;
+    plugin->mCublas = mCublas;
+    return plugin;
+}
+
+void HardmaxPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs,
+    nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept
+{
+    ASSERT(nbInputs == 1);
+    ASSERT(nbOutputs == 1);
+
+    nvinfer1::Dims const& inDims = in[0].desc.dims;
+    nvinfer1::Dims const& outDims = out[0].desc.dims;
+
+    // Check that inputs and outputs have the same dimensions
+    ASSERT(inDims.nbDims == outDims.nbDims);
+    for (int32_t dim = 0; dim < inDims.nbDims; dim++)
+    {
+        ASSERT(inDims.d[dim] == outDims.d[dim]);
+    }
+
+    // Check that axis is valid
+    if (mAxis < 0)
+    {
+        mAxis += inDims.nbDims;
+        ASSERT(mAxis >= 0);
+    }
+    ASSERT(inDims.nbDims > mAxis);
+
+    // samplesCommon::volume() requires that all dimensions are non-negative.
+    // Even in the case of dynamic shapes, the plugin will be configured with
+    // resolved shapes before enqueue() is called, so the below member variables
+    // will be set correctly.
+    if (std::all_of(inDims.d, inDims.d + inDims.nbDims, [](int32_t x) { return x >= 0; }))
+    {
+        mDimProductOuter = samplesCommon::volume(inDims, 0, mAxis);
+        mAxisSize = inDims.d[mAxis];
+        mDimProductInner = samplesCommon::volume(inDims, mAxis + 1, inDims.nbDims);
+    }
+}
+
+nvinfer1::DataType HardmaxPlugin::getOutputDataType(
+    int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept
+{
+    ASSERT(inputTypes && nbInputs == 1 && index == 0);
+    return inputTypes[0];
+}
+
+size_t HardmaxPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs,
+    nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept
+{
+    // 1st array to store the contents of the working axis
+    // 2nd array to store an array of 1's
+    return 2 * inputs[0].dims.d[mAxis] * sizeof(float);
+}
+
+void HardmaxPlugin::setPluginNamespace(char const* libNamespace) noexcept
+{
+    ASSERT(libNamespace != nullptr);
+    mNamespace = libNamespace;
+}
+
+char const* HardmaxPlugin::getPluginNamespace() const noexcept
+{
+    return mNamespace.c_str();
+}
+
+HardmaxPluginCreator::HardmaxPluginCreator()
+{
+    mPluginAttributes.clear();
+
+    // Consistent with the ONNX model attr fields
+    static auto const axisField = PluginField("axis", nullptr, PluginFieldType::kINT32, 1);
+    mPluginAttributes.emplace_back(axisField);
+
+    mFC.nbFields = mPluginAttributes.size();
+    mFC.fields = mPluginAttributes.data();
+}
+
+char const* HardmaxPluginCreator::getPluginName() const noexcept
+{
+    return kHARDMAX_NAME;
+}
+
+char const* HardmaxPluginCreator::getPluginVersion() const noexcept
+{
+    return kHARDMAX_VERSION;
+}
+
+PluginFieldCollection const* HardmaxPluginCreator::getFieldNames() noexcept
+{
+    return &mFC;
+}
+
+char const* HardmaxPluginCreator::getPluginNamespace() const noexcept
+{
+    return mNamespace.c_str();
+}
+
+void HardmaxPluginCreator::setPluginNamespace(char const* libNamespace) noexcept
+{
+    ASSERT(libNamespace != nullptr);
+    mNamespace = libNamespace;
+}
+
+IPluginV2DynamicExt* HardmaxPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept
+{
+    // Set default value
+    int32_t axis = -1;
+
+    for (int32_t i = 0; i < fc->nbFields; i++)
+    {
+        if (!strcmp(fc->fields[i].name, "axis"))
+        {
+            ASSERT(fc->fields[i].type == PluginFieldType::kINT32);
+            axis = *static_cast<int32_t const*>(fc->fields[i].data);
+        }
+    }
+
+    HardmaxPlugin* plugin = new HardmaxPlugin(axis);
+    plugin->setPluginNamespace(mNamespace.c_str());
+
+    return plugin;
+}
+
+IPluginV2DynamicExt* HardmaxPluginCreator::deserializePlugin(
+    char const* name, void const* serialData, size_t serialLength) noexcept
+{
+    HardmaxPlugin* plugin = new HardmaxPlugin(serialData, serialLength);
+    plugin->setPluginNamespace(mNamespace.c_str());
+
+    return plugin;
+}
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/plugin/customHardmaxPlugin.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/plugin/customHardmaxPlugin.h
new file mode 100644
index 0000000000000000000000000000000000000000..179a0a482ea246a7e265587a689863e603269631
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/plugin/customHardmaxPlugin.h
@@ -0,0 +1,140 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TRT_HARDMAX_PLUGIN_H
+#define TRT_HARDMAX_PLUGIN_H
+
+#include "NvInferPlugin.h"
+#include <cublas_v2.h>
+#include <string>
+#include <vector>
+
+// One of the preferred ways of making TensorRT to be able to see
+// our custom layer requires extending IPluginV2 and IPluginCreator classes.
+// For requirements for overriden functions, check TensorRT API docs.
+
+class HardmaxPlugin final : public nvinfer1::IPluginV2DynamicExt
+{
+public:
+    HardmaxPlugin() = delete;
+    HardmaxPlugin(int32_t axis);
+    HardmaxPlugin(void const* serialData, size_t serialLength);
+    ~HardmaxPlugin() override;
+
+    template <typename TDataType>
+    TDataType const* pointer_const_cast(void const* const p);
+
+    template <typename TDataType>
+    TDataType* pointer_cast(void* p);
+
+    int32_t getNbOutputs() const noexcept override;
+
+    // DynamicExt plugins returns DimsExprs class instead of Dims
+    nvinfer1::DimsExprs getOutputDimensions(int32_t index, nvinfer1::DimsExprs const* inputs, int32_t nbInputDims,
+        nvinfer1::IExprBuilder& exprBuilder) noexcept override; // determine output dims based on input info
+
+    int32_t initialize() noexcept override;
+
+    void terminate() noexcept override;
+
+    size_t getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs,
+        nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept override;
+
+    int32_t enqueue(nvinfer1::PluginTensorDesc const* inputDesc, nvinfer1::PluginTensorDesc const* outputDesc,
+        void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept override;
+
+    size_t getSerializationSize() const noexcept override;
+
+    void serialize(void* buffer) const noexcept override;
+
+    bool supportsFormatCombination(
+        int32_t pos, nvinfer1::PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override;
+
+    char const* getPluginType() const noexcept override;
+
+    char const* getPluginVersion() const noexcept override;
+
+    nvinfer1::IPluginV2DynamicExt* clone() const noexcept override;
+
+    void destroy() noexcept override;
+
+    nvinfer1::DataType getOutputDataType(
+        int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept override;
+
+    void attachToContext(
+        cudnnContext* cudnn, cublasContext* cublas, nvinfer1::IGpuAllocator* allocator) noexcept override;
+
+    void detachFromContext() noexcept override;
+
+    void setPluginNamespace(char const* pluginNamespace) noexcept override;
+
+    char const* getPluginNamespace() const noexcept override;
+
+    void configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs,
+        nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept override;
+
+private:
+    std::string mNamespace;
+
+    // Number of elements in the axis along which hardmax is performed.
+    int32_t mAxisSize{0};
+
+    // Product of dimensions before and after mAxis.
+    // For example, if the input dimensions are [3, 4, 5, 6, 7] and mAxis = 2,
+    // then mDimProductOuter = 12 and mDimProductInner = 42.
+    int32_t mDimProductOuter{1};
+    int32_t mDimProductInner{1};
+
+    cublasHandle_t mCublas;
+
+    // Attributes
+    // Axis along which to perform hardmax.
+    // Can be negative initially, but once configurePlugin() is called it will
+    // be converted to a positive axis.
+    int32_t mAxis{-1};
+};
+
+class HardmaxPluginCreator : public nvinfer1::IPluginCreator
+{
+public:
+    HardmaxPluginCreator();
+
+    ~HardmaxPluginCreator() override = default;
+
+    char const* getPluginName() const noexcept override;
+
+    char const* getPluginVersion() const noexcept override;
+
+    nvinfer1::PluginFieldCollection const* getFieldNames() noexcept override;
+
+    nvinfer1::IPluginV2DynamicExt* createPlugin(
+        char const* name, nvinfer1::PluginFieldCollection const* fc) noexcept override;
+
+    nvinfer1::IPluginV2DynamicExt* deserializePlugin(
+        char const* name, void const* serialData, size_t serialLength) noexcept override;
+
+    void setPluginNamespace(char const* pluginNamespace) noexcept override;
+
+    char const* getPluginNamespace() const noexcept override;
+
+private:
+    nvinfer1::PluginFieldCollection mFC;
+    std::vector<nvinfer1::PluginField> mPluginAttributes;
+    std::string mNamespace;
+};
+
+#endif // TRT_HARDMAX_PLUGIN_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/requirements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..34c96c8576489dbc994781155ba85353e16d9bd8
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/requirements.txt
@@ -0,0 +1,13 @@
+nltk==3.9.1
+onnx==1.16.0
+--extra-index-url https://pypi.ngc.nvidia.com
+onnx-graphsurgeon>=0.3.20
+wget>=3.2
+cuda-python==12.2.0; python_version <= "3.10"
+cuda-python==12.6.0; python_version >= "3.11"
+pywin32; platform_system == "Windows"
+pyyaml==6.0.1
+requests==2.32.2
+tqdm==4.66.4
+numpy==1.24.4; python_version <= "3.10"
+numpy==1.26.4; python_version >= "3.11"
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/sample.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/sample.py
new file mode 100644
index 0000000000000000000000000000000000000000..25d4ca36a37f1c6e5fec93e37083daec123e86d0
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/sample.py
@@ -0,0 +1,191 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+
+import tensorrt as trt
+
+from model import TRT_MODEL_PATH
+from load_plugin_lib import load_plugin_lib
+
+# ../common.py
+parent_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)
+sys.path.insert(1, parent_dir)
+import common
+
+# Reuse some BiDAF-specific methods
+# ../engine_refit_onnx_bidaf/data_processing.py
+sys.path.insert(1, os.path.join(parent_dir, "engine_refit_onnx_bidaf"))
+from engine_refit_onnx_bidaf.data_processing import preprocess, get_inputs
+
+# Maxmimum number of words in context or query text.
+# Used in optimization profile when building engine.
+# Adjustable.
+MAX_TEXT_LENGTH = 64
+
+WORKING_DIR = os.environ.get("TRT_WORKING_DIR") or os.path.dirname(
+    os.path.realpath(__file__)
+)
+
+# Path to which trained model will be saved (check README.md)
+ENGINE_FILE_PATH = os.path.join(WORKING_DIR, "bidaf.trt")
+
+# Define global logger object (it should be a singleton,
+# available for TensorRT from anywhere in code).
+# You can set the logger severity higher to suppress messages
+# (or lower to display more messages)
+TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
+
+
+# Builds TensorRT Engine
+def build_engine(model_path):
+
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(0)
+    config = builder.create_builder_config()
+    config.set_tactic_sources(
+        config.get_tactic_sources() | 1 << int(trt.TacticSource.CUBLAS)
+    )
+    parser = trt.OnnxParser(network, TRT_LOGGER)
+    runtime = trt.Runtime(TRT_LOGGER)
+
+    # Parse model file
+    print("Loading ONNX file from path {}...".format(model_path))
+    with open(model_path, "rb") as model:
+        print("Beginning ONNX file parsing")
+        if not parser.parse(model.read()):
+            print("ERROR: Failed to parse the ONNX file.")
+            for error in range(parser.num_errors):
+                print(parser.get_error(error))
+            return None
+    print("Completed parsing of ONNX file")
+
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, common.GiB(1))
+
+    # The input text length is variable, so we need to specify an optimization profile.
+    profile = builder.create_optimization_profile()
+    for i in range(network.num_inputs):
+        input = network.get_input(i)
+        assert input.shape[0] == -1
+        min_shape = [1] + list(input.shape[1:])
+        opt_shape = [8] + list(input.shape[1:])
+        max_shape = [MAX_TEXT_LENGTH] + list(input.shape[1:])
+        profile.set_shape(input.name, min_shape, opt_shape, max_shape)
+    config.add_optimization_profile(profile)
+
+    print("Building TensorRT engine. This may take a few minutes.")
+    plan = builder.build_serialized_network(network, config)
+    engine = runtime.deserialize_cuda_engine(plan)
+    with open(ENGINE_FILE_PATH, "wb") as f:
+        f.write(plan)
+    return engine
+
+
+def load_test_case(inputs, context_text, query_text, trt_context):
+    # Part 1: Specify Input shapes
+    cw, cc = preprocess(context_text)
+    qw, qc = preprocess(query_text)
+    for arr in (cw, cc, qw, qc):
+        assert arr.shape[0] <= MAX_TEXT_LENGTH, (
+            "Input context or query is too long! "
+            + "Either decrease the input length or increase MAX_TEXT_LENGTH"
+        )
+    trt_context.set_input_shape("CategoryMapper_4", cw.shape)
+    trt_context.set_input_shape("CategoryMapper_5", cc.shape)
+    trt_context.set_input_shape("CategoryMapper_6", qw.shape)
+    trt_context.set_input_shape("CategoryMapper_7", qc.shape)
+
+    # Part 2: load input data
+    cw_flat, cc_flat, qw_flat, qc_flat = get_inputs(context_text, query_text)
+    for i, arr in enumerate([cw_flat, cc_flat, qw_flat, qc_flat]):
+        inputs[i].host = arr
+
+
+def main():
+    # Load the shared object file containing the Hardmax plugin implementation.
+    # By doing this, you will also register the Hardmax plugin with the TensorRT
+    # PluginRegistry through use of the macro REGISTER_TENSORRT_PLUGIN present
+    # in the plugin implementation. Refer to plugin/customHardmaxPlugin.cpp for more details.
+    load_plugin_lib()
+
+    # Load pretrained model
+    if not os.path.isfile(TRT_MODEL_PATH):
+        raise IOError(
+            "\n{}\n{}\n{}\n".format(
+                "Failed to load model file ({}).".format(TRT_MODEL_PATH),
+                "Please use 'python3 model.py' to generate the ONNX model.",
+                "For more information, see README.md",
+            )
+        )
+
+    if os.path.exists(ENGINE_FILE_PATH):
+        print(f"Loading saved TRT engine from {ENGINE_FILE_PATH}")
+        with open(ENGINE_FILE_PATH, "rb") as f:
+            runtime = trt.Runtime(TRT_LOGGER)
+            runtime.max_threads = 10
+            engine = runtime.deserialize_cuda_engine(f.read())
+    else:
+        print("Engine plan not saved. Building new engine...")
+        engine = build_engine(TRT_MODEL_PATH)
+
+    inputs, outputs, bindings, stream = common.allocate_buffers(engine, profile_idx=0)
+
+    testcases = [
+        (
+            "Garry the lion is 5 years old. He lives in the savanna.",
+            "Where does the lion live?",
+        ),
+        ("A quick brown fox jumps over the lazy dog.", "What color is the fox?"),
+    ]
+
+    print("\n=== Testing ===")
+
+    interactive = "--interactive" in sys.argv
+    if interactive:
+        context_text = input("Enter context: ")
+        query_text = input("Enter query: ")
+        testcases = [(context_text, query_text)]
+
+    trt_context = engine.create_execution_context()
+    for context_text, query_text in testcases:
+
+        context_words, _ = preprocess(context_text)
+
+        load_test_case(inputs, context_text, query_text, trt_context)
+        if not interactive:
+            print(f"Input context: {context_text}")
+            print(f"Input query: {query_text}")
+        trt_outputs = common.do_inference(
+            trt_context,
+            engine=engine,
+            bindings=bindings,
+            inputs=inputs,
+            outputs=outputs,
+            stream=stream,
+        )
+        start = trt_outputs[1].item()
+        end = trt_outputs[0].item()
+        answer = context_words[start : end + 1].flatten()
+        print(f"Model prediction: ", " ".join(answer))
+        print()
+    common.free_buffers(inputs, outputs, stream)
+    print("Passed")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/test_custom_hardmax_plugin.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/test_custom_hardmax_plugin.py
new file mode 100644
index 0000000000000000000000000000000000000000..59b08b067162b5fa944ff19b89052f786210daa8
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_custom_plugin/test_custom_hardmax_plugin.py
@@ -0,0 +1,102 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+import os
+import sys
+import tensorrt as trt
+
+# ../common.py
+parent_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)
+sys.path.insert(1, parent_dir)
+import common
+
+from load_plugin_lib import load_plugin_lib
+
+TRT_LOGGER = trt.Logger(trt.Logger.ERROR)
+
+
+def hardmax_reference_impl(arr, axis):
+    one_hot = np.zeros(arr.shape, dtype=arr.dtype)
+    argmax = np.expand_dims(np.argmax(arr, axis), axis)
+    np.put_along_axis(one_hot, argmax, 1, axis=axis)
+    return one_hot
+
+
+def make_trt_network_and_engine(input_shape, axis):
+    registry = trt.get_plugin_registry()
+    plugin_creator = registry.get_plugin_creator("CustomHardmax", "1")
+    axis_buffer = np.array([axis])
+    axis_attr = trt.PluginField("axis", axis_buffer, type=trt.PluginFieldType.INT32)
+    field_collection = trt.PluginFieldCollection([axis_attr])
+    plugin = plugin_creator.create_plugin(
+        name="CustomHardmax", field_collection=field_collection
+    )
+
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(0)
+    config = builder.create_builder_config()
+    config.set_tactic_sources(
+        config.get_tactic_sources() | 1 << int(trt.TacticSource.CUBLAS)
+    )
+    runtime = trt.Runtime(TRT_LOGGER)
+
+    input_layer = network.add_input(
+        name="input_layer", dtype=trt.float32, shape=input_shape
+    )
+    hardmax = network.add_plugin_v2(inputs=[input_layer], plugin=plugin)
+    network.mark_output(hardmax.get_output(0))
+
+    plan = builder.build_serialized_network(network, config)
+    engine = runtime.deserialize_cuda_engine(plan)
+
+    return engine
+
+
+def custom_plugin_impl(input_arr, engine):
+    inputs, outputs, bindings, stream = common.allocate_buffers(engine)
+    context = engine.create_execution_context()
+    inputs[0].host = input_arr.astype(trt.nptype(trt.float32))
+    trt_outputs = common.do_inference(
+        context,
+        engine=engine,
+        bindings=bindings,
+        inputs=inputs,
+        outputs=outputs,
+        stream=stream,
+    )
+    output = trt_outputs[0].copy()
+    common.free_buffers(inputs, outputs, stream)
+    return output
+
+
+def main():
+    load_plugin_lib()
+    for num_dims in range(1, 8):
+        for axis in range(-num_dims, num_dims):
+            shape = np.random.randint(1, 4, size=num_dims)
+            arr = np.random.rand(*shape)
+            arr = (arr - 0.5) * 200
+            engine = make_trt_network_and_engine(shape, axis)
+            res1 = hardmax_reference_impl(arr, axis)
+            res2 = custom_plugin_impl(arr, engine).reshape(res1.shape)
+            assert np.all(res1 == res2), f"Test failed for shape={shape}, axis={axis}"
+    print("Passed")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_packnet/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_packnet/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..013456648c6187bbc4db5146073ac3db0e4a763e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_packnet/README.md
@@ -0,0 +1,109 @@
+# TensorRT Inference of ONNX models with custom layers.
+
+**Table Of Contents**
+- [Description](#description)
+- [How does this sample work?](#how-does-this-sample-work)
+- [Prerequisites](#prerequisites)
+- [Running the sample](#running-the-sample)
+    * [Cloning the packnet repository](#cloning-the-packnet-repository)
+    * [Conversion to ONNX](#conversion-to-onnx)
+    * [Inference with TensorRT](#inference-with-tensorrt)
+	* [Sample `--help` options](#sample-help-options)
+- [Additional resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+## Description
+
+This sample, samplePackNet, is a Python sample which uses TensorRT to perform inference with PackNet network. PackNet is a self-supervised monocular depth estimation network used in autonomous driving.
+
+
+## How does this sample work?
+
+This sample converts the Pytorch graph into ONNX and uses ONNX-parser included in TensorRT to parse the ONNX graph. The sample also demonstrates
+
+* Use of custom layers (plugins) in ONNX graph. These plugins would be automatically registered in TensorRT by using `REGISTER_TENSORRT_PLUGIN` API.
+* Use of ONNX-graphsurgeon (ONNX-GS) API to modify layers or subgraphs in the ONNX graph. For this network, we transform Group Normalization, upsample and pad layers to remove unnecessary
+  nodes for inference with TensorRT.
+
+
+## Prerequisites
+
+1. Upgrade pip version and install the sample dependencies.
+    ```bash
+    pip3 install --upgrade pip
+    pip3 install -r requirements.txt
+    ```
+
+On PowerPC systems, you will need to manually install PyTorch using IBM's [PowerAI](https://www.ibm.com/support/knowledgecenter/SS5SF7_1.6.0/navigation/pai_install.htm).
+
+
+## Running the sample
+
+### Preparing packnet
+
+Clone the [packnet](https://github.com/TRI-ML/packnet-sfm) repository and update `PYTHONPATH`.
+
+```
+git clone https://github.com/TRI-ML/packnet-sfm.git packnet-sfm
+pushd packnet-sfm && git checkout tags/v0.1.2 && popd
+export PYTHONPATH=$PWD/packnet-sfm # Note on Windows, the export command is: set PYTHONPATH=%cd%\packnet-sfm
+```
+
+### Conversion to ONNX
+Run the following command to convert the Packnet pytorch network to ONNX graph. This step also includes handling custom layers (Group Normalization) and using ONNX-GS to modify upsample and pad layers.
+
+```
+python3 convert_to_onnx.py --output model.onnx
+```
+
+### Inference with TensorRT
+
+Once the ONNX graph is generated, use `trtexec` tool (located in `bin` directory of TensorRT package) to perform inference on a random input image.
+
+```
+trtexec --onnx=model.onnx
+```
+
+Please refer to `trtexec` tool for more commandline options.
+
+### Sample --help options
+
+To see the full list of available options and their descriptions, use the `-h` or `--help` command line option. For example:
+```
+convert_to_onnx.py -h
+```
+
+# Additional resources
+
+The following resources provide a deeper understanding about PackNet network and importing a model into TensorRT using Python:
+
+**PackNet**
+- [3D Packing for Self-Supervised Monocular Depth Estimation](https://arxiv.org/pdf/1905.02693.pdf)
+- [TRI-ML Monocular Depth Estimation Repository](https://github.com/TRI-ML/packnet-sfm)
+
+**Parsers**
+- [ONNX Parser](https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/python_api/parsers/Onnx/pyOnnx.html)
+
+**Documentation**
+- [Introduction To NVIDIAâ€™s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples)
+- [Working With TensorRT Using The Python API](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#python_topics)
+- [Importing A Model Using A Parser In Python](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#import_model_python)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+# Changelog
+
+August 2023: 
+  - Update ONNX version support to 1.14.0
+  - Removed support for Python versions < 3.8.
+August 2021: Update sample to work with latest torch version
+June 2020: Initial release of this sample
+
+# Known issues
+
+There are no known issues in this sample
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_packnet/convert_to_onnx.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_packnet/convert_to_onnx.py
new file mode 100644
index 0000000000000000000000000000000000000000..72c31b726eaa89c2128a547189f10760772df69d
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_packnet/convert_to_onnx.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python3
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import onnx
+import torch
+import numpy as np
+import argparse
+import onnx_graphsurgeon as gs
+from post_processing import *
+from packnet_sfm.networks.depth.PackNet01 import PackNet01
+
+
+def post_process_packnet(model_file, opset=11):
+    """
+    Use ONNX graph surgeon to replace upsample and instance normalization nodes. Refer to post_processing.py for details.
+    Args:
+        model_file : Path to ONNX file
+    """
+    # Load the packnet graph
+    graph = gs.import_onnx(onnx.load(model_file))
+
+    if opset >= 11:
+        graph = process_pad_nodes(graph)
+
+    # Replace the subgraph of upsample with a single node with input and scale factor.
+    if torch.__version__ < "1.5.0":
+        graph = process_upsample_nodes(graph, opset)
+
+    # Convert the group normalization subgraph into a single plugin node.
+    graph = process_groupnorm_nodes(graph)
+
+    # Remove unused nodes, and topologically sort the graph.
+    graph.cleanup().toposort()
+
+    # Export the onnx graph from graphsurgeon
+    onnx.save_model(gs.export_onnx(graph), model_file)
+
+    print("Saving the ONNX model to {}".format(model_file))
+
+
+def build_packnet(model_file, args):
+    """
+    Construct the packnet network and export it to ONNX
+    """
+    input_pyt = torch.randn((1, 3, 192, 640), requires_grad=False)
+
+    # Build the model
+    model_pyt = PackNet01(version="1A")
+
+    # Convert the model into ONNX
+    torch.onnx.export(
+        model_pyt, input_pyt, model_file, verbose=args.verbose, opset_version=args.opset
+    )
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Exports PackNet01 to ONNX, and post-processes it to insert TensorRT plugins"
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        help="Path to save the generated ONNX model",
+        default="model.onnx",
+    )
+    parser.add_argument(
+        "-op", "--opset", type=int, help="ONNX opset to use", default=11
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Flag to enable verbose logging for torch.onnx.export",
+    )
+    args = parser.parse_args()
+
+    # Construct the packnet graph and generate the onnx graph
+    build_packnet(args.output, args)
+
+    # Perform post processing on Instance Normalization and upsampling nodes and create a new ONNX graph
+    post_process_packnet(args.output, args.opset)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_packnet/download.yml b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_packnet/download.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cc0f3725fdc18aac57d09d51ac2923e64f45f0e3
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_packnet/download.yml
@@ -0,0 +1,21 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+sample: onnx_packnet
+files:
+  - path: samples/python/onnx_packnet/packnet-sfm-0.1.2.zip
+    url: https://github.com/TRI-ML/packnet-sfm/archive/v0.1.2.zip
+    checksum: 7a73db591d3955ccf407910cd928d9c0
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_packnet/post_processing.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_packnet/post_processing.py
new file mode 100644
index 0000000000000000000000000000000000000000..33adcf1d72100f3d1fe067dbe150454d5f3d7ceb
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_packnet/post_processing.py
@@ -0,0 +1,248 @@
+#!/usr/bin/env python3
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import onnx_graphsurgeon as gs
+import argparse
+import onnx
+import numpy as np
+import torch
+
+
+# Pad layer subgraph structure in ONNX (specific to opset 11):
+#               Constant
+#                  |
+#                Shape
+#                  |
+#         Mul   Gather
+#          \     /
+#            Sub
+#             |
+#       ConstantOfShape
+#             |
+#          Concat
+#             |
+#          Reshape
+#             |
+#           Slice
+#             |
+#          Transpose
+#             |
+#          Reshape
+#             |
+#    Input  Cast  Constant
+#       \     |    /
+#            Pad
+def process_pad_nodes(graph):
+    """
+    Fold the pad subgraph into a single layer with pad values as input
+      Input
+       |
+      Pad
+       |
+      Conv
+    """
+    pad_nodes = [node for node in graph.nodes if node.op == "Pad"]
+    for node in pad_nodes:
+        fold_pad_inputs(node, graph)
+
+    return graph
+
+
+def fold_pad_inputs(node, graph):
+    # Gather the amount of padding in each dimension from pytorch graph.
+    if torch.__version__ < "1.5.0":
+        pad_values_pyt = (
+            node.i(1).i(0).i(0).i(0).i(0).i(0).i(0).i(0).attrs["value"].values
+        )
+    elif torch.__version__ < "2.0.0":
+        pad_values_pyt = node.i(1).i(0).i(0).i(0).i(0).i(0).inputs[0].values
+    else:
+        pad_values_pyt = node.i(1).i(0).i(0).i(0).i(0).i(0).i(0).attrs["value"].values
+
+    # Assumption a 4d input tensor
+    onnx_pad_values = [0] * 4 * 2  # 4d tensor and 2 sides padding for each dimension
+    j = 3
+    for i in range(0, len(pad_values_pyt), 2):
+        onnx_pad_values[j] = pad_values_pyt[i]
+        onnx_pad_values[j + 4] = pad_values_pyt[i + 1]
+        j -= 1
+
+    # Change the existing pad tensor to the new onnx_pad values tensor
+    pads_folded_tensor = gs.Constant(
+        name=node.inputs[1].name, values=np.array(onnx_pad_values)
+    )
+    node.inputs[1] = pads_folded_tensor
+
+
+# Pytorch-exported Upsample structure in ONNX:
+#        Mul        Mul
+#         |          |
+#        Cast       Cast
+#         |          |
+#        Floor      Floor
+#         |          |
+#      Unsqueeze  Unsqueeze
+#         \         /
+#           Concat
+#             |
+#            Cast    Cast
+#              \      /
+#                Div
+#                 |
+#     Input     Concat
+#       \         /
+#         Upsample
+def process_upsample_nodes(graph, opset=11):
+    """
+    Replace the upsample structure with structure below
+      Conv   scale_factor
+       |      /
+      Upsample
+       |
+      ReLU
+    """
+    if opset >= 11:
+        upsample_layer_name = "Resize"
+    else:
+        upsample_layer_name = "Upsample"
+
+    upsample_nodes = [node for node in graph.nodes if node.op == upsample_layer_name]
+    for node in upsample_nodes:
+        fold_upsample_inputs(node, graph, opset)
+
+    return graph
+
+
+def fold_upsample_inputs(upsample, graph, opset=11):
+    """
+    Inplace transformation of the graph. The upsample subgraph is collapsed
+    to single upsample node with input and scale factor (constant tensor).
+    Args:
+        upsample: upsample node in the original graph.
+        graph: graph object.
+    """
+
+    if opset == 9:
+        # Gather the scale factor from mul op in the upsample input subgraph
+        scale_factor = (
+            upsample.i(1).i(1).i(0).i(0).i(0).i(0).i(0).i(0).i(1).attrs["value"].values
+        )
+
+        # Create the new scales tensor
+        scales = np.array([1.0, 1.0, scale_factor, scale_factor], dtype=np.float32)
+        scale_tensor = gs.Constant(name=upsample.inputs[-1].name, values=scales)
+
+        # Change the last input to the node to the new constant scales tensor.
+        upsample.inputs[-1] = scale_tensor
+    else:
+        # In opset 11, upsample layer is exported as Resize. We will transform this Resize layer into an Upsample layer
+        # and collapse the input
+        sizes_tensor_name = upsample.inputs[3].name
+
+        # Create the new scales tensor
+        scale_factor = (
+            upsample.i(3).i(1).i().i().i().i().i(0).i(1).attrs["value"].values
+        )
+        scales = np.array([1.0, 1.0, scale_factor, scale_factor], dtype=np.float32)
+        scale_tensor = gs.Constant(name=sizes_tensor_name, values=scales)
+
+        # Rename the Resize op to upsample and add the data and scales as inputs to the upsample layer.
+        input_tensor = upsample.inputs[0]
+        upsample.inputs = [input_tensor, scale_tensor]
+        upsample.op = "Upsample"
+
+
+# Pytorch-exported GroupNorm subgraph in ONNX:
+# Conv
+#   |
+# Reshape    Scale    Bias
+#     \       |       /
+# InstanceNormalization
+#         |
+#      Reshape    Unsqueeze
+#          \      /
+#        Mul (scale)   Unsqueeze
+#           \         /
+#           Add (bias)
+#              |
+#            ReLU
+def process_groupnorm_nodes(graph):
+    """
+    Gather the instance normalization nodes and the rest of the subgraph
+    and convert into a single group normalization node.
+    """
+    instancenorms = [node for node in graph.nodes if node.op == "InstanceNormalization"]
+    for node in instancenorms:
+        convert_to_groupnorm(node, graph)
+
+    return graph
+
+
+def retrieve_attrs(instancenorm):
+    """
+    Gather the required attributes for the GroupNorm plugin from the subgraph.
+    Args:
+        instancenorm: Instance Normalization node in the graph.
+    """
+    attrs = {}
+    # The 2nd dimension of the Reshape shape is the number of groups
+    attrs["num_groups"] = instancenorm.i().i(1).attrs["value"].values[1]
+    attrs["eps"] = instancenorm.attrs["epsilon"]
+
+    # 1 is the default plugin version the parser will search for, and therefore can be omitted,
+    # but we include it here for illustrative purposes.
+    attrs["plugin_version"] = "1"
+
+    # "" is the default plugin namespace the parser will use, included here for illustrative purposes
+    attrs["plugin_namespace"] = ""
+
+    return attrs
+
+
+def convert_to_groupnorm(instancenorm, graph):
+    """
+    Convert the Pytorch-exported GroupNorm subgraph to the subgraph below
+    Conv
+      |
+    GroupNorm
+      |
+    ReLU
+    Attributes:
+        instancenorm: Instance Normalization node in the graph.
+        graph: Input graph object
+    """
+    # Retrieve the instancenorm attributes and create the replacement node
+    attrs = retrieve_attrs(instancenorm)
+    groupnorm = gs.Node(op="GroupNormalizationPlugin", attrs=attrs)
+    graph.nodes.append(groupnorm)
+
+    # The plugin needs to receive an input from the Conv node, and output to the ReLU node
+    conv_output_tensor = instancenorm.i().inputs[0]  # Output of Conv
+    relu_input_tensor = instancenorm.o().o().o().outputs[0]  # Output of Add
+
+    # Reconnect inputs/outputs to the groupnorm plugin
+    conv_output_tensor.outputs[0] = groupnorm
+    relu_input_tensor.inputs[0] = groupnorm
+
+    # Add scale and bias constant tensors to group norm plugin
+    if torch.__version__ < "1.5.0":
+        groupnorm.inputs.append(instancenorm.o().o().i(1).inputs[0])
+        groupnorm.inputs.append(instancenorm.o().o().o().i(1).inputs[0])
+    else:
+        groupnorm.inputs.append(instancenorm.o().o().inputs[1])
+        groupnorm.inputs.append(instancenorm.o().o().o().inputs[1])
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_packnet/requirements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_packnet/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d672ad99d5f770c7c727798784a54bb7968e7462
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/onnx_packnet/requirements.txt
@@ -0,0 +1,10 @@
+onnx==1.16.0
+--extra-index-url https://pypi.ngc.nvidia.com
+onnx-graphsurgeon>=0.3.20
+torch
+torchvision
+pyyaml==6.0.1
+requests==2.32.2
+tqdm==4.66.4
+numpy==1.24.4; python_version <= "3.10"
+numpy==1.26.4; python_version >= "3.11"
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/plugin_utils.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/plugin_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..e286bb99abfcecae1d5f6fd60e3184baef6b4863
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/plugin_utils.py
@@ -0,0 +1,154 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from cuda import cuda, cudart, nvrtc
+import numpy as np
+import os
+import argparse
+import threading
+
+import tensorrt as trt
+import cupy as cp
+
+
+def parseArgs():
+    parser = argparse.ArgumentParser(
+        description="Options for Circular Padding plugin C++ example"
+    )
+    parser.add_argument(
+        "--precision",
+        type=str,
+        default="fp32",
+        choices=["fp32", "fp16"],
+        help="Precision to use for plugin",
+    )
+
+    return parser.parse_args()
+
+
+def volume(d):
+    return np.prod(d)
+
+
+# Taken from https://github.com/NVIDIA/cuda-python/blob/main/examples/common/helper_cuda.py
+def checkCudaErrors(result):
+    def _cudaGetErrorEnum(error):
+        if isinstance(error, cuda.CUresult):
+            err, name = cuda.cuGetErrorName(error)
+            return name if err == cuda.CUresult.CUDA_SUCCESS else "<unknown>"
+        elif isinstance(error, cudart.cudaError_t):
+            return cudart.cudaGetErrorName(error)[1]
+        elif isinstance(error, nvrtc.nvrtcResult):
+            return nvrtc.nvrtcGetErrorString(error)[1]
+        else:
+            raise RuntimeError("Unknown error type: {}".format(error))
+
+    if result[0].value:
+        raise RuntimeError(
+            "CUDA error code={}({})".format(
+                result[0].value, _cudaGetErrorEnum(result[0])
+            )
+        )
+    if len(result) == 1:
+        return None
+    elif len(result) == 2:
+        return result[1]
+    else:
+        return result[1:]
+
+def getComputeCapacity(devID):
+    major = checkCudaErrors(cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, devID))
+    minor = checkCudaErrors(cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, devID))
+    return (major, minor)
+
+
+# Taken from https://github.com/NVIDIA/cuda-python/blob/main/examples/common/common.py
+class KernelHelper:
+    def __init__(self, code, devID):
+        prog = checkCudaErrors(
+            nvrtc.nvrtcCreateProgram(str.encode(code), b"sourceCode.cu", 0, [], [])
+        )
+        CUDA_HOME = os.getenv("CUDA_HOME")
+        if CUDA_HOME == None:
+            CUDA_HOME = os.getenv("CUDA_PATH")
+        if CUDA_HOME == None:
+            raise RuntimeError("Environment variable CUDA_HOME or CUDA_PATH is not set")
+        include_dirs = os.path.join(CUDA_HOME, "include")
+
+        # Initialize CUDA
+        checkCudaErrors(cudart.cudaFree(0))
+
+        major, minor = getComputeCapacity(devID)
+        _, nvrtc_minor = checkCudaErrors(nvrtc.nvrtcVersion())
+        use_cubin = nvrtc_minor >= 1
+        prefix = "sm" if use_cubin else "compute"
+        arch_arg = bytes(f"--gpu-architecture={prefix}_{major}{minor}", "ascii")
+
+        try:
+            opts = [
+                b"--fmad=true",
+                arch_arg,
+                "--include-path={}".format(include_dirs).encode("UTF-8"),
+                b"--std=c++11",
+                b"-default-device",
+            ]
+            checkCudaErrors(nvrtc.nvrtcCompileProgram(prog, len(opts), opts))
+        except RuntimeError as err:
+            logSize = checkCudaErrors(nvrtc.nvrtcGetProgramLogSize(prog))
+            log = b" " * logSize
+            checkCudaErrors(nvrtc.nvrtcGetProgramLog(prog, log))
+            print(log.decode())
+            print(err)
+            exit(-1)
+
+        if use_cubin:
+            dataSize = checkCudaErrors(nvrtc.nvrtcGetCUBINSize(prog))
+            data = b" " * dataSize
+            checkCudaErrors(nvrtc.nvrtcGetCUBIN(prog, data))
+        else:
+            dataSize = checkCudaErrors(nvrtc.nvrtcGetPTXSize(prog))
+            data = b" " * dataSize
+            checkCudaErrors(nvrtc.nvrtcGetPTX(prog, data))
+
+        self.module = checkCudaErrors(cuda.cuModuleLoadData(np.char.array(data)))
+
+    def getFunction(self, name):
+        return checkCudaErrors(cuda.cuModuleGetFunction(self.module, name))
+
+
+class CudaCtxManager(trt.IPluginResource):
+    def __init__(self, device=None):
+        trt.IPluginResource.__init__(self)
+        self.device = device
+        self.cuda_ctx = None
+
+    def clone(self):
+        cloned = CudaCtxManager()
+        cloned.__dict__.update(self.__dict__)
+        # Delay the CUDA ctx creation until clone()
+        # since only a cloned resource is registered by TRT
+        _, cloned.cuda_ctx = cuda.cuCtxCreate(0, self.device)
+        return cloned
+
+    def release(self):
+        checkCudaErrors(cuda.cuCtxDestroy(self.cuda_ctx))
+
+class UnownedMemory:
+    def __init__(self, ptr, shape, dtype):
+        mem = cp.cuda.UnownedMemory(ptr, volume(shape) * cp.dtype(dtype).itemsize, self)
+        cupy_ptr = cp.cuda.MemoryPointer(mem, 0)
+        self.d = cp.ndarray(shape, dtype=dtype, memptr=cupy_ptr)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/CMakeLists.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5d20f8dd743e90a1d6c38dc53c204d3059380e19
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/CMakeLists.txt
@@ -0,0 +1,169 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# We need cmake >= 3.8, since 3.8 introduced CUDA as a first class language
+cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
+project(CircPadPlugin LANGUAGES CXX CUDA)
+
+if(NOT MSVC)
+    # Enable all compile warnings
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-long-long -pedantic -Wno-deprecated-declarations")
+    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wno-deprecated-declarations")
+endif()
+
+# Sets variable to a value if variable is unset.
+macro(set_ifndef var val)
+    if(NOT ${var})
+        set(${var} ${val})
+    endif()
+    message(STATUS "Configurable variable ${var} set to ${${var}}")
+endmacro()
+
+# -------- CONFIGURATION --------
+if(NOT MSVC)
+    set_ifndef(TRT_LIB /usr/lib/x86_64-linux-gnu)
+    set_ifndef(TRT_INCLUDE /usr/include/x86_64-linux-gnu)
+    set_ifndef(CUDA_INC_DIR /usr/local/cuda/include)
+    set_ifndef(CUDA_LIB_DIR /usr/local/cuda)
+
+    find_program(NVCC_EXECUTABLE nvcc HINTS "${CUDA_LIB_DIR}/bin")
+
+    # extract CUDA version
+    if(NVCC_EXECUTABLE)
+        execute_process(
+            COMMAND "${NVCC_EXECUTABLE}" --version
+            OUTPUT_VARIABLE NVCC_VERSION_OUTPUT
+            ERROR_VARIABLE NVCC_VERSION_ERROR
+            OUTPUT_STRIP_TRAILING_WHITESPACE)
+        # Parse the version number from the output
+        string(REGEX MATCH "release ([0-9]+)\\.([0-9]+)" CUDA_VERSION_MATCH "${NVCC_VERSION_OUTPUT}")
+        if(CUDA_VERSION_MATCH)
+            set(CUDA_VERSION_MAJOR "${CMAKE_MATCH_1}")
+            set(CUDA_VERSION_MINOR "${CMAKE_MATCH_2}")
+            set(CUDA_VER "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}")
+        else()
+            message(FATAL_ERROR "Could not parse CUDA version from nvcc output.")
+        endif()
+    else()
+        message(FATAL_ERROR "nvcc not found in ${CUDA_INST_DIR}/bin")
+    endif()
+
+    # Function to check if the current CUDA version is greater than or equal to a specified version
+    function(cuda_ge major minor result_var)
+        set(VERSION_TO_COMPARE "${major}.${minor}")
+        if(CUDA_VER VERSION_GREATER_EQUAL "${VERSION_TO_COMPARE}")
+            set(${result_var}
+                1
+                PARENT_SCOPE)
+        else()
+            set(${result_var}
+                0
+                PARENT_SCOPE)
+        endif()
+    endfunction()
+
+    # Loop through minor versions from 0 to 9
+    foreach(minor RANGE 0 9)
+        set(result_var "CUDA_GE_11_${minor}")
+        cuda_ge(11 ${minor} ${result_var})
+    endforeach()
+
+    # Add checks for CUDA 12.x versions
+    foreach(minor RANGE 0 9)
+        set(result_var "CUDA_GE_12_${minor}")
+        cuda_ge(12 ${minor} ${result_var})
+    endforeach()
+
+    set(SAMPLE_SMS "75")
+
+    if(CUDA_GE_11_0)
+        list(APPEND SAMPLE_SMS "80")
+    endif()
+
+    if(CUDA_GE_11_1)
+        list(APPEND SAMPLE_SMS "86")
+    endif()
+
+    if(CUDA_GE_11_4)
+        list(APPEND SAMPLE_SMS "87")
+    endif()
+
+    if(CUDA_GE_11_8)
+        list(APPEND SAMPLE_SMS "89" "90")
+    endif()
+
+    # Blackwell support
+    if(CUDA_GE_12_8)
+        list(APPEND SAMPLE_SMS "100" "101" "120")
+    endif()
+
+    set(NON_HFC_SMS "89" "90" "100" "101" "120")
+
+    if(NOT DEFINED GENCODES)
+        set(GENCODES "")
+
+        # Add -gencode flags for each SM in SAMPLE_SMS
+        foreach(sm ${SAMPLE_SMS})
+            list(APPEND GENCODES "-gencode=arch=compute_${sm},code=sm_${sm}")
+        endforeach()
+
+        # Filter out NON_HFC_SMS from SAMPLE_SMS to get HFC_SMS
+        set(HFC_SMS ${SAMPLE_SMS})
+        foreach(sm ${NON_HFC_SMS})
+            list(REMOVE_ITEM HFC_SMS "${sm}")
+        endforeach()
+
+        # Get the highest supported forward compatible SM
+        if(HFC_SMS)
+            list(SORT HFC_SMS)
+            list(GET HFC_SMS -1 GEN_PTX_SM)
+            # Add PTX generation flag
+            list(APPEND GENCODES "-gencode=arch=compute_${GEN_PTX_SM},code=compute_${GEN_PTX_SM}")
+        else()
+            message(WARNING "No hardware forward compatible SMs found. PTX generation skipped.")
+        endif()
+    endif()
+endif()
+
+message("\nThe following variables are derived from the values of the previous variables unless provided explicitly:\n")
+
+find_library(
+    _NVINFER_LIB nvinfer
+    HINTS ${TRT_LIB}
+    PATH_SUFFIXES lib lib64)
+set_ifndef(NVINFER_LIB ${_NVINFER_LIB})
+
+find_library(
+    _CUDA_LIB cuda
+    HINTS ${CUDA_LIB_DIR}
+    PATH_SUFFIXES lib/stubs lib64/stubs)
+set_ifndef(CUDA_LIB ${_CUDA_LIB})
+
+# -------- BUILDING --------
+
+add_library(circ_pad_plugin SHARED ${CMAKE_SOURCE_DIR}/circ_plugin_cpp/circ_pad_plugin.cu)
+target_compile_options(circ_pad_plugin PRIVATE ${GENCODES})
+
+target_include_directories(
+    circ_pad_plugin
+    PUBLIC ${CUDA_INC_DIR}
+    PUBLIC ${TRT_INCLUDE})
+
+set_property(TARGET circ_pad_plugin PROPERTY CUDA_STANDARD 14)
+
+target_link_libraries(circ_pad_plugin PRIVATE ${NVINFER_LIB})
+target_link_libraries(circ_pad_plugin PRIVATE ${CUDA_LIB})
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fc625ddaca8f530ca7ad91598c23ba4a2f6fcdda
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/README.md
@@ -0,0 +1,167 @@
+# Python-based TRT Plugins
+
+This is a sample to showcase Python-based plugin definitions in TRT. No changes to existing TRT APIs have been made
+to deliver this feature, so using the updated bindings should not break any existing code.
+
+## Introduction
+
+Until TRT 9.1, plugin implementations could only be done through the TRT C++ API. To use a plugin in a Python app, one had to
+ - Implement plugin in C++ and build into a shared library
+ - Load plugin lib and register plugin creator (statically or dynamically)
+ - Retrieve plugin creator and create plugin instance through the respective Python API
+
+The following design considerations were followed in creating bindings to allow Python-based plugin definitions:
+ - Zero additional C++ code shall be required to implement, integrate and run a plugin within TensorRT
+ - Offer the flexibility to implement the kernel(s) for the plugin through any method of choice
+   - Many libraries have sprung up to provide CUDA kernel support with AOT/JIT compilation
+     - Numba, OpenAI Triton, CuPy etc.
+   - Could even do without explicit kernels (e.g. leverage PyTorch functional op)
+
+ - Will only support `IPluginV2DynamicExt` and `IPluginV3`-based plugins
+   - Other plugin interfaces (except `IPluginV2IOExt`) are deprecated since TRT 8.5
+
+With these bindings, plugins can be implemented and integrated to TRT purely with Python.
+
+## Setting Up The Build Environment
+
+To build and install the bindings, follow the instructions in `$TRT_OSSPATH/python/README.md`.
+
+Then install the requisite packages
+```bash
+cd $TRT_OSSPATH/samples/python/trt_python_plugin
+pip3 install -r requirements.txt
+```
+Install `cupy-cuda11x` instead if testing on a CUDA 11.x environment.
+
+# TensorRT Plugin API for Python
+
+Implementing a TRT plugin in Python is similar to C++ in that implementation of `IPluginV2DynamicExt`+`IPluginCreator` or `IPluginV3`+`IPluginCreatorV3One` is necessary. Refer to the TensorRT Python API reference for a concise description.
+
+## Differences in C++ and Python APIs for `IPluginV2DynamicExt`
+The interface methods in Python have mostly similar APIs to their C++ counterparts, except for `serialize()` and `enqueue()`.
+ - While the C++ API for `serialize()` is `void serialize (void *buffer)` where the plugin writes to the passed-in `buffer`, the Python API is `serialize(self) -> bytes`, where the implementation of the method is expected to return a bytes object containing a serialized representation of the plugin object. 
+ - In `enqueue()`, the device pointers for input and output tensors are passed as their `intptr_t` casts. Since these buffers are created and owned by TRT, care must be taken when writing to them from the Python side.
+  - No bindings yet for `attachToContext()` and `detachFromContext()` which are not pure virtual.
+
+# Running the sample: Circular padding plugin
+
+This sample contains a circular padding plugin, where the `enqueue` has been implemented with various frameworks for writing kernels or executing GPU ops (torch). 
+
+Each script accepts a command-line argument to choose precision from either FP32 or FP16. e.g.
+```bash
+python3 circ_pad_plugin_cuda_python.py --precision fp32 # fp32 or fp16
+```
+
+## Circular padding
+
+Circular padding is useful for ops like circular convolution in deep learning. The following image denotes how the original image (red) is circular padded once (green) and twice (blue):
+
+![alt text](circ_pad_example.png "Circular padding example")
+
+The plugin shall have the following characteristics:
+ - Input: 4-dimensional input (e.g. NxCxHxW)
+ - Attribute(s): m-dimensional parameter `pads` where $m$ is even and $m/2 \le 4$. `pads` denotes the amount of padding to apply before and after each of the $m/2$ last dimensions of the input tensor.
+ - Output: Padded tensor. Shape depends on `pads`.
+
+## Baseline: Using a C++ plugin
+
+To establish a baseline, we first demonstrate a C++ plugin implementing circular padding. The relevant files can be found in the `circ_plugin_cpp` folder: the included `CMakeLists.txt` can be used to build the shared library `libcirc_pad_plugin.so` / `circ_pad_plugin.dll`.
+
+```bash
+cd $TRT_OSSPATH/samples/python/trt_python_plugin
+mkdir build && pushd build
+cmake .. && make -j
+popd
+python3 circ_pad_plugin_cpp.py --plugin-lib build/libcirc_pad_plugin.so
+```
+
+## Python plugin: cuda-python
+
+The cuda-python based implementation can be found in `circ_pad_plugin_cuda_python.py`. `cuda.nvrtc` is used to JIT compile a C/C++-based kernel, which is provided as a string. The compiled kernel is then launched through cuda-python's `cuda.cuLaunchKernel`.
+
+`circ_pad_plugin_cuda_python.py` demonstrates an ONNX-based workflow: `circ_pad_plugin_inetdef_cuda_python.py` demonstrates a workflow where the model is constructed through `INetworkDefinition`.
+
+## Python plugin: CuPy
+
+The CuPy-based implementation can be found in `circ_pad_plugin_cupy.py`. CuPy's `RawKernel` class has been used to provide the C/C++-based kernel implementation as a string. CuPy will JIT compile the kernel.
+
+## Python plugin: Triton (valid only on Linux)
+
+The same plugin can be implemented with a Triton-based kernel as well. The only other change would be to `enqueue`. The entire implementation can be found in `circ_pad_plugin_triton.py`.
+
+Some remarks:
+ - Triton also allows for JIT-able kernels.
+ - CuPy device arrays cannot be passed into Triton kernels directly -- only Torch arrays are accepted. However, we can use `torch.as_tensor()` to get around this constraint.
+ - Triton does not seem to allow the specification of a CUDA stream.
+
+## Python plugin: Numba
+
+The Numba implementation can be found in `circ_pad_plugin_numba.py`. Some remarks:
+ - Numba also allows for JIT-able kernels.
+ - CuPy device arrays can be passed into Numba kernels without issue since CuPy arrays implement `__cuda_array_interface__`.
+
+## Python plugin: Torch
+
+The flexibility of the `enqueue()` interface means that it is not always necessary to implement a custom kernel. In this case, PyTorch's [torch.nn.functional.pad](https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html) offers the exact same capability we want, so we can use that inside `enqueue()`, as in `circ_pad_plugin_torch.py`.
+
+## Python plugin: Multi-tactic, Multi-plugin (based on IPluginV3)
+
+The entire implementation can be found in `circ_pad_plugin_multi_tactic.py`.
+
+### Custom tactics
+
+When multiple options are available to compute the same op, and it's not possible to reliably predict which one will be faster for the expected input shapes/types or the target platform,
+it is useful to ask TensorRT to time all available options during the build stage. In V2 plugins, TensorRT would only time different type/format combinations supported by the plugin, but
+V3 plugins allow users to specify any number of custom tactics to time also (in addition to type/format combinations).
+
+In this example, we specify two custom tactics: PyTorch's [torch.nn.functional.pad](https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html) and a custom kernel written using OpenAI Triton.
+
+It is possible to advertise tactics specific to a format combination. e.g. In this sample, we can support both tactics for FP32 I/O, and only support the OpenAI Triton tactic for FP16 I/O. To achieve this, return in `get_valid_tactics()` the set of tactics `T(f)` supported by the plugin for the format combination `f` indicated by the immediately preceding call to `configure_plugin()`. To enable this behavior in this sample, pass the flag `--per-format-tactics`. 
+
+### Multiple plugins instances
+
+Imagine that you expect to have multiple instances of the same plugin in your network, which would operate on separate inputs, but where the input and output shapes/formats, as well
+as other determining plugin attributes would be the same. With V2 plugins, TensorRT would time all such plugin instances during the engine build -- however, this would be inefficient because the only salient difference between those instances are the values of the input tensors. 
+
+To communicate to TensorRT that you would like the timing for similar plugin instances to be cached, V3 plugins allow for the specification of a timing cache ID. The timing cache ID
+should only capture timing determinants extraneous to plugin I/O, like their shapes and formats. Typically, this would be the values of any plugin attributes that might be different
+between the plugin instances. 
+
+In this example,
+ - The shape of the `pads` parameter affects timing, but only as far as it affects the output shape. Therefore, the timing cache ID could be an empty string.
+ - We consider a scenario where there are two circular padding plugin instances with identical configurations. Therefore, only a single instance should be timed by TensorRT.
+   This can be verified by inspecting the log.
+
+# Limitations
+
+ - Plugins cannot be serialized into the engine (in contrast to `IBuilderConfig::setPluginsToSerialize()`)
+   - Plugin class and Plugin Creator class must exist in the module where the engine is deserialized
+ - The engine / ONNX model cannot be run from outside Python (e.g. with `trtexec`)
+   - This functionality is possible to implement but comes at the cost of embedding the Python interpreter to the TRT runtime / the binary loading the engine
+ - (For `IPluginV2DynamicExt` only) No bindings yet for `attachToContext()` and `detachFromContext()` which are not pure virtual.
+ - `circ_pad_plugin_torch.py` may work on aarch64 platforms but is unsupported.
+
+# FAQ
+
+1. What are the performance impacts of a Python-based plugin versus a C++ one?
+
+   In preliminary testing, the Python overhead was found to be very minimal to negligible. In fact, if the kernels were compiled AOT (instead of JIT) the CuPY and Triton
+   versions of the plugin were as performant as the C++ one. However, with Numba, there seems to be a significant kernel launch overhead.
+
+2. Can I deploy a TRT engine including a Python plugin in a runtime environment without Python?
+
+   No. There is no way to fully embed a Python plugin into the engine that allows for it to be executed without the need for Python during inference time.
+
+   This design principle is what allows for the `enqueue()` to be implemented in any framework of choice.
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+# Changelog
+
+July 2023: Initial release of this sample
+
+# Known issues
+
+There are no known issues in this sample
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_example.png b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_example.png
new file mode 100644
index 0000000000000000000000000000000000000000..76a8e9fd16cb91b46d41d08827b667fc526e4000
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_example.png differ
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_cpp.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_cpp.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f8f87f7a930bb285413c5028f02de633b6f8001
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_cpp.py
@@ -0,0 +1,97 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import onnx_graphsurgeon as gs
+import numpy as np
+import onnx
+import ctypes
+
+import tensorrt as trt
+from polygraphy.backend.trt import (
+    CreateConfig,
+    EngineFromNetwork,
+    NetworkFromOnnxPath,
+    TrtRunner,
+)
+
+
+def parseArgs():
+    parser = argparse.ArgumentParser(
+        description="Options for Circular Padding plugin C++ example"
+    )
+
+    parser.add_argument(
+        "--precision",
+        type=str,
+        default="fp32",
+        choices=["fp32", "fp16"],
+        help="Precision to use for plugin",
+    )
+    parser.add_argument(
+        "--plugin-lib",
+        type=str,
+        help="Path to the Circular Padding plugin lib",
+        required=True,
+    )
+
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+
+    args = parseArgs()
+
+    handle = ctypes.CDLL(args.plugin_lib)
+    if not handle:
+        raise RuntimeError("Could not load Circular Padding plugin library")
+
+    precision = np.float32 if args.precision == "fp32" else np.float16
+    inp_shape = (10, 3, 32, 32)
+    X = np.random.normal(size=inp_shape).astype(precision)
+
+    pads = (1, 1, 1, 1)
+
+    # create ONNX model
+    onnx_path = f"test_CircPadPlugin_cpp_{args.precision}.onnx"
+    inputA = gs.Variable(name="X", shape=inp_shape, dtype=precision)
+    Y = gs.Variable(name="Y", dtype=precision)
+    myPluginNode = gs.Node(
+        name="CircPadPlugin",
+        op="CircPadPlugin",
+        inputs=[inputA],
+        outputs=[Y],
+        attrs={"pads": pads},
+    )
+    graph = gs.Graph(nodes=[myPluginNode], inputs=[inputA], outputs=[Y], opset=16)
+    onnx.save(gs.export_onnx(graph), onnx_path)
+
+    # build engine
+    build_engine = EngineFromNetwork(
+        NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision == np.float16)
+    )
+
+    Y_ref = np.pad(X, [[0, 0], [0, 0], [pads[0], pads[1]], [pads[2], pads[3]]], "wrap")
+    # Run
+    with TrtRunner(build_engine, "trt_runner") as runner:
+        outputs = runner.infer({"X": X})
+        Y = outputs["Y"]
+
+        if np.allclose(Y, Y_ref):
+            print("Inference result correct!")
+        else:
+            print("Inference result incorrect!")
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_cuda_python.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_cuda_python.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8e6a1e289a1eb2a4430bdd71a1f77d05c177e61
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_cuda_python.py
@@ -0,0 +1,375 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import onnx_graphsurgeon as gs
+import numpy as np
+import onnx
+import sys
+import os
+
+import tensorrt as trt
+from polygraphy.backend.trt import (
+    CreateConfig,
+    EngineFromNetwork,
+    NetworkFromOnnxPath,
+    TrtRunner,
+)
+from polygraphy.json import to_json, from_json
+
+sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+from plugin_utils import checkCudaErrors, KernelHelper, parseArgs, CudaCtxManager
+from cuda import cuda
+
+circ_pad_half_kernel = r"""
+#include <cuda_fp16.h>
+extern "C" __global__
+void circ_pad_half(half const* X, int const* all_pads, int const* orig_dims, half* Y, int const* Y_shape, int Y_len) {
+    int index = blockIdx.x * blockDim.x + threadIdx.x;
+    int stride = blockDim.x * gridDim.x;
+
+    for(int i = index; i < Y_len; i += stride)
+    {
+        int i3 = i % Y_shape[3];
+        int i2 = (i / Y_shape[3]) % Y_shape[2];
+        int i1 = (i / Y_shape[3] / Y_shape[2]) % Y_shape[1];
+        int i0 = i / Y_shape[3] / Y_shape[2] / Y_shape[1];
+
+        int j0 = (i0 - all_pads[0] + orig_dims[0]) % orig_dims[0];
+        int j1 = (i1 - all_pads[2] + orig_dims[1]) % orig_dims[1];
+        int j2 = (i2 - all_pads[4] + orig_dims[2]) % orig_dims[2];
+        int j3 = (i3 - all_pads[6] + orig_dims[3]) % orig_dims[3];
+
+        Y[i] = X[
+            orig_dims[3] * orig_dims[2] * orig_dims[1] * j0
+            + orig_dims[3] * orig_dims[2] * j1
+            + orig_dims[3] * j2
+            + j3
+        ];
+    }
+}
+"""
+
+circ_pad_float_kernel = r"""
+extern "C" __global__
+void circ_pad_float(float const* X, int const* all_pads, int const* orig_dims, float* Y, int const* Y_shape, int Y_len) {
+    int index = blockIdx.x * blockDim.x + threadIdx.x;
+    int stride = blockDim.x * gridDim.x;
+
+    for(int i = index; i < Y_len; i += stride)
+    {
+        int i3 = i % Y_shape[3];
+        int i2 = (i / Y_shape[3]) % Y_shape[2];
+        int i1 = (i / Y_shape[3] / Y_shape[2]) % Y_shape[1];
+        int i0 = i / Y_shape[3] / Y_shape[2] / Y_shape[1];
+
+        int j0 = (i0 - all_pads[0] + orig_dims[0]) % orig_dims[0];
+        int j1 = (i1 - all_pads[2] + orig_dims[1]) % orig_dims[1];
+        int j2 = (i2 - all_pads[4] + orig_dims[2]) % orig_dims[2];
+        int j3 = (i3 - all_pads[6] + orig_dims[3]) % orig_dims[3];
+
+        Y[i] = X[
+            orig_dims[3] * orig_dims[2] * orig_dims[1] * j0
+            + orig_dims[3] * orig_dims[2] * j1
+            + orig_dims[3] * j2
+            + j3
+        ];
+    }
+}
+"""
+
+
+class CircPadPlugin(trt.IPluginV2DynamicExt):
+    def __init__(self, fc=None):
+        trt.IPluginV2DynamicExt.__init__(self)
+        self.pads = []
+        self.X_shape = []
+        self.N = 0
+
+        self.all_pads_d = None
+        self.orig_dims_d = None
+        self.Y_shape_d = None
+
+        self.num_outputs = 1
+        self.plugin_namespace = ""
+        self.plugin_type = "CircPadPlugin"
+        self.plugin_version = "1"
+
+        self.cuDevice = None
+
+        if fc is not None:
+            assert set([f.name for f in fc]) == set(
+                ["pads", "N"]
+            ), "Field collection invalid"
+            for f in fc:
+                if f.name == "pads":
+                    self.pads = f.data
+                elif f.name == "N":
+                    self.N = int(f.data)
+
+    def initialize(self):
+        err, self.cuDevice = cuda.cuDeviceGet(0)
+        trt.get_plugin_registry().acquire_plugin_resource(
+            "cuda_ctx", CudaCtxManager(self.cuDevice)
+        )
+        self.all_pads_d = checkCudaErrors(
+            cuda.cuMemAlloc(np.int32().itemsize * self.N * 2)
+        )
+        self.orig_dims_d = checkCudaErrors(
+            cuda.cuMemAlloc(np.int32().itemsize * self.N)
+        )
+        self.Y_shape_d = checkCudaErrors(cuda.cuMemAlloc(np.int32().itemsize * self.N))
+
+    def get_output_datatype(self, index, input_types):
+        return input_types[0]
+
+    def get_output_dimensions(self, output_index, inputs, exprBuilder):
+
+        output_dims = trt.DimsExprs(inputs[0])
+
+        for i in range(np.size(self.pads) // 2):
+            output_dims[len(output_dims) - i - 1] = exprBuilder.operation(
+                trt.DimensionOperation.SUM,
+                inputs[0][len(output_dims) - i - 1],
+                exprBuilder.constant(self.pads[i * 2] + self.pads[i * 2 + 1]),
+            )
+
+        return output_dims
+
+    def serialize(self):
+        return to_json({"pads": self.pads, "N": self.N})
+
+    def configure_plugin(self, inp, out):
+        X_dims = inp[0].desc.dims
+        self.X_shape = np.zeros((len(X_dims),))
+        for i in range(len(X_dims)):
+            self.X_shape[i] = X_dims[i]
+
+        all_pads = np.zeros((self.N * 2,), dtype=np.int32)
+        orig_dims = np.array(self.X_shape, dtype=np.int32)
+        out_dims = np.array(self.X_shape, dtype=np.int32)
+
+        for i in range(np.size(self.pads) // 2):
+            out_dims[self.N - i - 1] += self.pads[i * 2] + self.pads[i * 2 + 1]
+            all_pads[self.N * 2 - 2 * i - 2] = self.pads[i * 2]
+            all_pads[self.N * 2 - 2 * i - 1] = self.pads[i * 2 + 1]
+
+        # Copy vectors from host memory to device memory
+        if self.all_pads_d:
+            checkCudaErrors(
+                cuda.cuMemcpyHtoD(self.all_pads_d, all_pads, all_pads.nbytes)
+            )
+        if self.orig_dims_d:
+            checkCudaErrors(
+                cuda.cuMemcpyHtoD(self.orig_dims_d, orig_dims, orig_dims.nbytes)
+            )
+        if self.Y_shape_d:
+            checkCudaErrors(
+                cuda.cuMemcpyHtoD(self.Y_shape_d, out_dims, out_dims.nbytes)
+            )
+
+        self.Y_len_d = np.prod(out_dims)
+
+    def supports_format_combination(self, pos, in_out, num_inputs):
+        assert num_inputs == 1
+        assert pos < len(in_out)
+
+        desc = in_out[pos]
+        if desc.format != trt.TensorFormat.LINEAR:
+            return False
+
+        # first input should be float16 or float32
+        if pos == 0:
+            return desc.type == trt.DataType.FLOAT or desc.type == trt.DataType.HALF
+
+        # output should have the same type as the input
+        if pos == 1:
+            return in_out[0].type == desc.type
+
+        assert False
+
+    def enqueue(self, input_desc, output_desc, inputs, outputs, workspace, stream):
+
+        inp_dtype = trt.nptype(input_desc[0].type)
+
+        blockSize = 256
+        numBlocks = int((np.prod(np.array(self.X_shape)) + blockSize - 1) // blockSize)
+
+        da = np.array([inputs[0]], dtype=np.uint64)
+        dc = np.array([outputs[0]], dtype=np.uint64)
+
+        d_all_pads = np.array([int(self.all_pads_d)], dtype=np.uint64)
+        d_orig_dims = np.array([int(self.orig_dims_d)], dtype=np.uint64)
+        d_Y_shape = np.array([int(self.Y_shape_d)], dtype=np.uint64)
+        Y_len = np.array(self.Y_len_d, dtype=np.uint32)
+
+        args = [da, d_all_pads, d_orig_dims, dc, d_Y_shape, Y_len]
+        kernelArgs = np.array([arg.ctypes.data for arg in args], dtype=np.uint64)
+
+        stream_ptr = np.array([stream], dtype=np.uint64)
+
+        if inp_dtype == np.float32:
+            kernelHelper = KernelHelper(circ_pad_float_kernel, int(self.cuDevice))
+            _circ_pad_float_kernel = kernelHelper.getFunction(b"circ_pad_float")
+            checkCudaErrors(
+                cuda.cuLaunchKernel(
+                    _circ_pad_float_kernel,
+                    numBlocks,
+                    1,
+                    1,
+                    blockSize,
+                    1,
+                    1,
+                    0,
+                    stream_ptr,
+                    kernelArgs,
+                    0,
+                )
+            )
+        elif inp_dtype == np.float16:
+            kernelHelper = KernelHelper(circ_pad_half_kernel, int(self.cuDevice))
+            _circ_pad_half_kernel = kernelHelper.getFunction(b"circ_pad_half")
+            checkCudaErrors(
+                cuda.cuLaunchKernel(
+                    _circ_pad_half_kernel,
+                    numBlocks,
+                    1,
+                    1,
+                    blockSize,
+                    1,
+                    1,
+                    0,
+                    stream_ptr,
+                    kernelArgs,
+                    0,
+                )
+            )
+        else:
+            raise ValueError("inp_dtype not valid")
+
+    def clone(self):
+        cloned_plugin = CircPadPlugin()
+        cloned_plugin.__dict__.update(self.__dict__)
+        return cloned_plugin
+
+    def terminate(self):
+        if self.all_pads_d:
+            checkCudaErrors(cuda.cuMemFree(self.all_pads_d))
+        if self.orig_dims_d:
+            checkCudaErrors(cuda.cuMemFree(self.orig_dims_d))
+        if self.Y_shape_d:
+            checkCudaErrors(cuda.cuMemFree(self.Y_shape_d))
+
+        trt.get_plugin_registry().release_plugin_resource("cuda_ctx")
+
+    #
+    # The following defaults take effect since the respective methods are not overriden
+    #
+
+    # def get_serialization_size(self):
+    #     return len(to_json({"pads": self.pads}))
+
+    # def get_workspace_size(self, input_desc, output_desc):
+    #     return 0
+
+    # def destroy(self):
+    #     pass
+
+
+class CircPadPluginCreator(trt.IPluginCreator):
+    def __init__(self):
+        trt.IPluginCreator.__init__(self)
+        self.name = "CircPadPlugin"
+        self.plugin_namespace = ""
+        self.plugin_version = "1"
+        self.field_names = trt.PluginFieldCollection(
+            [
+                trt.PluginField("pads", np.array([]), trt.PluginFieldType.INT32),
+                trt.PluginField("N", np.array([]), trt.PluginFieldType.INT32),
+            ]
+        )
+
+    def create_plugin(self, name, fc):
+        return CircPadPlugin(fc)
+
+    def deserialize_plugin(self, name, data):
+        deserialized = CircPadPlugin()
+        j = dict(from_json(data))
+        deserialized.__dict__.update(j)
+        return deserialized
+
+
+if __name__ == "__main__":
+
+    args = parseArgs()
+
+    # Initialize CUDA Driver API
+    (err,) = cuda.cuInit(0)
+
+    # Retrieve handle for device 0
+    err, cuDevice = cuda.cuDeviceGet(0)
+
+    plg_registry = trt.get_plugin_registry()
+
+    # Create context
+    plg_registry.acquire_plugin_resource("cuda_ctx", CudaCtxManager(cuDevice))
+
+    precision = np.float32 if args.precision == "fp32" else np.float16
+
+    inp_shape = (100, 2, 32, 32)
+    X = np.random.normal(size=inp_shape).astype(precision)
+
+    pads = (1, 1, 1, 1)
+
+    # Load standard plugins
+    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
+    trt.init_libnvinfer_plugins(TRT_LOGGER, namespace="")
+
+    # Register plugin creator
+    my_plugin_creator = CircPadPluginCreator()
+    plg_registry.register_creator(my_plugin_creator, "")
+
+    # create ONNX model
+    onnx_path = f"test_CircPadPlugin_cuda_python_{args.precision}.onnx"
+    inputA = gs.Variable(name="X", shape=inp_shape, dtype=precision)
+    Y = gs.Variable(name="Y", dtype=precision)
+    myPluginNode = gs.Node(
+        name="CircPadPlugin",
+        op="CircPadPlugin",
+        inputs=[inputA],
+        outputs=[Y],
+        attrs={"pads": pads, "N": 4},
+    )
+    graph = gs.Graph(nodes=[myPluginNode], inputs=[inputA], outputs=[Y], opset=16)
+    onnx.save(gs.export_onnx(graph), onnx_path)
+
+    # build engine
+    build_engine = EngineFromNetwork(
+        NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision == np.float16)
+    )
+
+    Y_ref = np.pad(X, [[0, 0], [0, 0], [pads[0], pads[1]], [pads[2], pads[3]]], "wrap")
+    # Run
+    with TrtRunner(build_engine, "trt_runner") as runner:
+        outputs = runner.infer({"X": X})
+        Y = outputs["Y"]
+
+        if np.allclose(Y, Y_ref):
+            print("Inference result correct!")
+        else:
+            print("Inference result incorrect!")
+
+    plg_registry.release_plugin_resource("cuda_ctx")
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_cupy.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_cupy.py
new file mode 100644
index 0000000000000000000000000000000000000000..1309bfdf7004c7f1d8d330c2903ee97ab1ef2044
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_cupy.py
@@ -0,0 +1,324 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import onnx_graphsurgeon as gs
+import numpy as np
+import onnx
+import cupy as cp
+import time
+import pickle
+import sys
+import os
+
+import tensorrt as trt
+from polygraphy.backend.trt import (
+    CreateConfig,
+    EngineFromNetwork,
+    NetworkFromOnnxPath,
+    TrtRunner,
+)
+
+from polygraphy.json import to_json, from_json
+
+sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+from plugin_utils import volume, parseArgs
+
+circ_pad_half_kernel = cp.RawKernel(
+    r"""
+#include <cuda_fp16.h>
+extern "C" __global__
+void circ_pad_half(half const* X, int const* all_pads, int const* orig_dims, half* Y, int const* Y_shape, int const* Y_len) {
+    int index = blockIdx.x * blockDim.x + threadIdx.x;
+    int stride = blockDim.x * gridDim.x;
+
+    for(int i = index; i < *Y_len; i += stride)
+    {
+        int i3 = i % Y_shape[3];
+        int i2 = (i / Y_shape[3]) % Y_shape[2];
+        int i1 = (i / Y_shape[3] / Y_shape[2]) % Y_shape[1];
+        int i0 = i / Y_shape[3] / Y_shape[2] / Y_shape[1];
+
+        int j0 = (i0 - all_pads[0] + orig_dims[0]) % orig_dims[0];
+        int j1 = (i1 - all_pads[2] + orig_dims[1]) % orig_dims[1];
+        int j2 = (i2 - all_pads[4] + orig_dims[2]) % orig_dims[2];
+        int j3 = (i3 - all_pads[6] + orig_dims[3]) % orig_dims[3];
+
+        Y[i] = X[
+            orig_dims[3] * orig_dims[2] * orig_dims[1] * j0
+            + orig_dims[3] * orig_dims[2] * j1
+            + orig_dims[3] * j2
+            + j3
+        ];
+    }
+}
+""",
+    "circ_pad_half",
+)
+
+circ_pad_float_kernel = cp.RawKernel(
+    r"""
+extern "C" __global__
+void circ_pad_float(float const* X, int const* all_pads, int const* orig_dims, float* Y, int const* Y_shape, int const* Y_len) {
+    int index = blockIdx.x * blockDim.x + threadIdx.x;
+    int stride = blockDim.x * gridDim.x;
+
+    for(int i = index; i < *Y_len; i += stride)
+    {
+        int i3 = i % Y_shape[3];
+        int i2 = (i / Y_shape[3]) % Y_shape[2];
+        int i1 = (i / Y_shape[3] / Y_shape[2]) % Y_shape[1];
+        int i0 = i / Y_shape[3] / Y_shape[2] / Y_shape[1];
+
+        int j0 = (i0 - all_pads[0] + orig_dims[0]) % orig_dims[0];
+        int j1 = (i1 - all_pads[2] + orig_dims[1]) % orig_dims[1];
+        int j2 = (i2 - all_pads[4] + orig_dims[2]) % orig_dims[2];
+        int j3 = (i3 - all_pads[6] + orig_dims[3]) % orig_dims[3];
+
+        Y[i] = X[
+            orig_dims[3] * orig_dims[2] * orig_dims[1] * j0
+            + orig_dims[3] * orig_dims[2] * j1
+            + orig_dims[3] * j2
+            + j3
+        ];
+    }
+}
+""",
+    "circ_pad_float",
+)
+
+
+class CircPadPlugin(trt.IPluginV2DynamicExt):
+    def __init__(self, fc=None):
+        trt.IPluginV2DynamicExt.__init__(self)
+        self.pads = []
+        self.X_shape = []
+
+        self.num_outputs = 1
+        self.plugin_namespace = ""
+        self.plugin_type = "CircPadPlugin"
+        self.plugin_version = "1"
+
+        if fc is not None:
+            assert fc[0].name == "pads"
+            self.pads = fc[0].data
+
+    def get_output_datatype(self, index, input_types):
+        return input_types[0]
+
+    def get_output_dimensions(self, output_index, inputs, exprBuilder):
+
+        output_dims = trt.DimsExprs(inputs[0])
+
+        for i in range(np.size(self.pads) // 2):
+            output_dims[len(output_dims) - i - 1] = exprBuilder.operation(
+                trt.DimensionOperation.SUM,
+                inputs[0][len(output_dims) - i - 1],
+                exprBuilder.constant(self.pads[i * 2] + self.pads[i * 2 + 1]),
+            )
+
+        return output_dims
+
+    def serialize(self):
+        return to_json({"pads": self.pads})
+
+    def configure_plugin(self, inp, out):
+        X_dims = inp[0].desc.dims
+        self.X_shape = np.zeros((len(X_dims),))
+        for i in range(len(X_dims)):
+            self.X_shape[i] = X_dims[i]
+
+        N = len(self.X_shape)
+        all_pads = np.zeros((N * 2,))
+        orig_dims = np.array(self.X_shape)
+        out_dims = np.array(self.X_shape)
+
+        for i in range(np.size(pads) // 2):
+            out_dims[N - i - 1] += self.pads[i * 2] + self.pads[i * 2 + 1]
+            all_pads[N * 2 - 2 * i - 2] = self.pads[i * 2]
+            all_pads[N * 2 - 2 * i - 1] = self.pads[i * 2 + 1]
+
+        self.all_pads_d = cp.asarray(all_pads, dtype=cp.int32)
+        self.orig_dims_d = cp.asarray(orig_dims, dtype=cp.int32)
+        self.Y_shape_d = cp.asarray(out_dims, dtype=cp.int32)
+        self.Y_len_d = cp.array([np.prod(out_dims)], dtype=cp.int32)
+
+    def supports_format_combination(self, pos, in_out, num_inputs):
+        assert num_inputs == 1
+        assert pos < len(in_out)
+
+        desc = in_out[pos]
+        if desc.format != trt.TensorFormat.LINEAR:
+            return False
+
+        # first input should be float16 or float32
+        if pos == 0:
+            return desc.type == trt.DataType.FLOAT or desc.type == trt.DataType.HALF
+
+        # output should have the same type as the input
+        if pos == 1:
+            return in_out[0].type == desc.type
+
+        assert False
+
+    def enqueue(self, input_desc, output_desc, inputs, outputs, workspace, stream):
+
+        inp_dtype = trt.nptype(input_desc[0].type)
+
+        a_mem = cp.cuda.UnownedMemory(
+            inputs[0], volume(input_desc[0].dims) * cp.dtype(inp_dtype).itemsize, self
+        )
+        c_mem = cp.cuda.UnownedMemory(
+            outputs[0],
+            volume(output_desc[0].dims) * cp.dtype(inp_dtype).itemsize,
+            self,
+        )
+
+        a_ptr = cp.cuda.MemoryPointer(a_mem, 0)
+        c_ptr = cp.cuda.MemoryPointer(c_mem, 0)
+
+        a = cp.ndarray((volume(input_desc[0].dims)), dtype=inp_dtype, memptr=a_ptr)
+        c = cp.ndarray((volume(output_desc[0].dims)), dtype=inp_dtype, memptr=c_ptr)
+
+        cuda_stream = cp.cuda.ExternalStream(stream)
+
+        blockSize = 256
+        numBlocks = int((np.prod(np.array(self.X_shape)) + blockSize - 1) // blockSize)
+
+        with cuda_stream:
+            if inp_dtype == np.float32:
+                circ_pad_float_kernel(
+                    (numBlocks,),
+                    (blockSize,),
+                    (
+                        a,
+                        self.all_pads_d,
+                        self.orig_dims_d,
+                        c,
+                        self.Y_shape_d,
+                        self.Y_len_d,
+                    ),
+                )
+            elif inp_dtype == np.float16:
+                circ_pad_half_kernel(
+                    (numBlocks,),
+                    (blockSize,),
+                    (
+                        a,
+                        self.all_pads_d,
+                        self.orig_dims_d,
+                        c,
+                        self.Y_shape_d,
+                        self.Y_len_d,
+                    ),
+                )
+            else:
+                raise ValueError("inp_dtype not valid")
+
+    def clone(self):
+        cloned_plugin = CircPadPlugin()
+        cloned_plugin.__dict__.update(self.__dict__)
+        return cloned_plugin
+
+    #
+    # The following defaults take effect since the respective methods are not overriden
+    #
+
+    # def initialize(self):
+    #     pass
+
+    # def get_serialization_size(self):
+    #     return len(to_json({"pads": self.pads}))
+
+    # def get_workspace_size(self, input_desc, output_desc):
+    #     return 0
+
+    # def destroy(self):
+    #     pass
+
+    # def terminate(self):
+    #     pass
+
+
+class CircPadPluginCreator(trt.IPluginCreator):
+    def __init__(self):
+        trt.IPluginCreator.__init__(self)
+
+        self.name = "CircPadPlugin"
+        self.plugin_namespace = ""
+        self.plugin_version = "1"
+        self.field_names = trt.PluginFieldCollection(
+            [trt.PluginField("pads", np.array([]), trt.PluginFieldType.INT32)]
+        )
+
+    def create_plugin(self, name, fc):
+        return CircPadPlugin(fc)
+
+    def deserialize_plugin(self, name, data):
+        j = dict(from_json(data.decode("utf-8")))
+        deserialized = CircPadPlugin()
+        deserialized.__dict__.update(j)
+        return deserialized
+
+
+if __name__ == "__main__":
+
+    args = parseArgs()
+    precision = np.float32 if args.precision == "fp32" else np.float16
+
+    inp_shape = (100, 2, 32, 32)
+    X = np.random.normal(size=inp_shape).astype(precision)
+
+    pads = (1, 1, 1, 1)
+
+    # Load standard plugins
+    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
+    trt.init_libnvinfer_plugins(TRT_LOGGER, namespace="")
+
+    # Register plugin creator
+    plg_registry = trt.get_plugin_registry()
+    my_plugin_creator = CircPadPluginCreator()
+    plg_registry.register_creator(my_plugin_creator, "")
+
+    # create ONNX model
+    onnx_path = f"test_CircPadPlugin_cupy_{args.precision}.onnx"
+    inputA = gs.Variable(name="X", shape=inp_shape, dtype=precision)
+    Y = gs.Variable(name="Y", dtype=precision)
+    myPluginNode = gs.Node(
+        name="CircPadPlugin",
+        op="CircPadPlugin",
+        inputs=[inputA],
+        outputs=[Y],
+        attrs={"pads": pads},
+    )
+    graph = gs.Graph(nodes=[myPluginNode], inputs=[inputA], outputs=[Y], opset=16)
+    onnx.save(gs.export_onnx(graph), onnx_path)
+
+    # build engine
+    build_engine = EngineFromNetwork(
+        NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision == np.float16)
+    )
+
+    Y_ref = np.pad(X, [[0, 0], [0, 0], [pads[0], pads[1]], [pads[2], pads[3]]], "wrap")
+    # Run
+    with TrtRunner(build_engine, "trt_runner") as runner:
+        outputs = runner.infer({"X": X})
+        Y = outputs["Y"]
+
+        if np.allclose(Y, Y_ref):
+            print("Inference result correct!")
+        else:
+            print("Inference result incorrect!")
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_inetdef_cuda_python.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_inetdef_cuda_python.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d145b512104a9782f5479099be2639a3512cbdf
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_inetdef_cuda_python.py
@@ -0,0 +1,383 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import onnx_graphsurgeon as gs
+import numpy as np
+import sys
+import os
+
+import tensorrt as trt
+from polygraphy.backend.trt import (
+    CreateConfig,
+    TrtRunner,
+    create_network,
+    engine_from_network,
+)
+
+from polygraphy.json import to_json, from_json
+
+sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+from plugin_utils import checkCudaErrors, KernelHelper, parseArgs, CudaCtxManager
+from cuda import cuda
+
+circ_pad_half_kernel = r"""
+#include <cuda_fp16.h>
+extern "C" __global__
+void circ_pad_half(half const* X, int const* all_pads, int const* orig_dims, half* Y, int const* Y_shape, int Y_len) {
+    int index = blockIdx.x * blockDim.x + threadIdx.x;
+    int stride = blockDim.x * gridDim.x;
+
+    for(int i = index; i < Y_len; i += stride)
+    {
+        int i3 = i % Y_shape[3];
+        int i2 = (i / Y_shape[3]) % Y_shape[2];
+        int i1 = (i / Y_shape[3] / Y_shape[2]) % Y_shape[1];
+        int i0 = i / Y_shape[3] / Y_shape[2] / Y_shape[1];
+
+        int j0 = (i0 - all_pads[0] + orig_dims[0]) % orig_dims[0];
+        int j1 = (i1 - all_pads[2] + orig_dims[1]) % orig_dims[1];
+        int j2 = (i2 - all_pads[4] + orig_dims[2]) % orig_dims[2];
+        int j3 = (i3 - all_pads[6] + orig_dims[3]) % orig_dims[3];
+
+        Y[i] = X[
+            orig_dims[3] * orig_dims[2] * orig_dims[1] * j0
+            + orig_dims[3] * orig_dims[2] * j1
+            + orig_dims[3] * j2
+            + j3
+        ];
+    }
+}
+"""
+
+circ_pad_float_kernel = r"""
+extern "C" __global__
+void circ_pad_float(float const* X, int const* all_pads, int const* orig_dims, float* Y, int const* Y_shape, int Y_len) {
+    int index = blockIdx.x * blockDim.x + threadIdx.x;
+    int stride = blockDim.x * gridDim.x;
+
+    for(int i = index; i < Y_len; i += stride)
+    {
+        int i3 = i % Y_shape[3];
+        int i2 = (i / Y_shape[3]) % Y_shape[2];
+        int i1 = (i / Y_shape[3] / Y_shape[2]) % Y_shape[1];
+        int i0 = i / Y_shape[3] / Y_shape[2] / Y_shape[1];
+
+        int j0 = (i0 - all_pads[0] + orig_dims[0]) % orig_dims[0];
+        int j1 = (i1 - all_pads[2] + orig_dims[1]) % orig_dims[1];
+        int j2 = (i2 - all_pads[4] + orig_dims[2]) % orig_dims[2];
+        int j3 = (i3 - all_pads[6] + orig_dims[3]) % orig_dims[3];
+
+        Y[i] = X[
+            orig_dims[3] * orig_dims[2] * orig_dims[1] * j0
+            + orig_dims[3] * orig_dims[2] * j1
+            + orig_dims[3] * j2
+            + j3
+        ];
+    }
+}
+"""
+
+
+class CircPadPlugin(trt.IPluginV2DynamicExt):
+    def __init__(self, fc=None):
+        trt.IPluginV2DynamicExt.__init__(self)
+        self.pads = []
+        self.X_shape = []
+        self.N = 0
+
+        self.all_pads_d = None
+        self.orig_dims_d = None
+        self.Y_shape_d = None
+
+        self.num_outputs = 1
+        self.plugin_namespace = ""
+        self.plugin_type = "CircPadPlugin"
+        self.plugin_version = "1"
+
+        self.cuDevice = None
+
+        if fc is not None:
+            assert set([f.name for f in fc]) == set(
+                ["pads", "N"]
+            ), "Field collection invalid"
+            for f in fc:
+                if f.name == "pads":
+                    self.pads = f.data
+                elif f.name == "N":
+                    self.N = int(f.data)
+
+    def initialize(self):
+        err, self.cuDevice = cuda.cuDeviceGet(0)
+        trt.get_plugin_registry().acquire_plugin_resource(
+            "cuda_ctx", CudaCtxManager(self.cuDevice)
+        )
+        self.all_pads_d = checkCudaErrors(
+            cuda.cuMemAlloc(np.int32().itemsize * self.N * 2)
+        )
+        self.orig_dims_d = checkCudaErrors(
+            cuda.cuMemAlloc(np.int32().itemsize * self.N)
+        )
+        self.Y_shape_d = checkCudaErrors(cuda.cuMemAlloc(np.int32().itemsize * self.N))
+
+    def get_output_datatype(self, index, input_types):
+        return input_types[0]
+
+    def get_output_dimensions(self, output_index, inputs, exprBuilder):
+
+        output_dims = trt.DimsExprs(inputs[0])
+
+        for i in range(np.size(self.pads) // 2):
+            output_dims[len(output_dims) - i - 1] = exprBuilder.operation(
+                trt.DimensionOperation.SUM,
+                inputs[0][len(output_dims) - i - 1],
+                exprBuilder.constant(self.pads[i * 2] + self.pads[i * 2 + 1]),
+            )
+
+        return output_dims
+
+    def serialize(self):
+        return to_json({"pads": self.pads, "N": self.N})
+
+    def configure_plugin(self, inp, out):
+        X_dims = inp[0].desc.dims
+        self.X_shape = np.zeros((len(X_dims),))
+        for i in range(len(X_dims)):
+            self.X_shape[i] = X_dims[i]
+
+        all_pads = np.zeros((self.N * 2,), dtype=np.int32)
+        orig_dims = np.array(self.X_shape, dtype=np.int32)
+        out_dims = np.array(self.X_shape, dtype=np.int32)
+
+        for i in range(np.size(self.pads) // 2):
+            out_dims[self.N - i - 1] += self.pads[i * 2] + self.pads[i * 2 + 1]
+            all_pads[self.N * 2 - 2 * i - 2] = self.pads[i * 2]
+            all_pads[self.N * 2 - 2 * i - 1] = self.pads[i * 2 + 1]
+
+        # Copy vectors from host memory to device memory
+        if self.all_pads_d:
+            checkCudaErrors(
+                cuda.cuMemcpyHtoD(self.all_pads_d, all_pads, all_pads.nbytes)
+            )
+        if self.orig_dims_d:
+            checkCudaErrors(
+                cuda.cuMemcpyHtoD(self.orig_dims_d, orig_dims, orig_dims.nbytes)
+            )
+        if self.Y_shape_d:
+            checkCudaErrors(
+                cuda.cuMemcpyHtoD(self.Y_shape_d, out_dims, out_dims.nbytes)
+            )
+
+        self.Y_len_d = np.prod(out_dims)
+
+    def supports_format_combination(self, pos, in_out, num_inputs):
+        assert num_inputs == 1
+        assert pos < len(in_out)
+
+        desc = in_out[pos]
+        if desc.format != trt.TensorFormat.LINEAR:
+            return False
+
+        # first input should be float16 or float32
+        if pos == 0:
+            return desc.type == trt.DataType.FLOAT or desc.type == trt.DataType.HALF
+
+        # output should have the same type as the input
+        if pos == 1:
+            return in_out[0].type == desc.type
+
+        assert False
+
+    def enqueue(self, input_desc, output_desc, inputs, outputs, workspace, stream):
+
+        inp_dtype = trt.nptype(input_desc[0].type)
+
+        blockSize = 256
+        numBlocks = int((np.prod(np.array(self.X_shape)) + blockSize - 1) // blockSize)
+
+        da = np.array([inputs[0]], dtype=np.uint64)
+        dc = np.array([outputs[0]], dtype=np.uint64)
+
+        d_all_pads = np.array([int(self.all_pads_d)], dtype=np.uint64)
+        d_orig_dims = np.array([int(self.orig_dims_d)], dtype=np.uint64)
+        d_Y_shape = np.array([int(self.Y_shape_d)], dtype=np.uint64)
+        Y_len = np.array(self.Y_len_d, dtype=np.uint32)
+
+        args = [da, d_all_pads, d_orig_dims, dc, d_Y_shape, Y_len]
+        kernelArgs = np.array([arg.ctypes.data for arg in args], dtype=np.uint64)
+
+        stream_ptr = np.array([stream], dtype=np.uint64)
+
+        if inp_dtype == np.float32:
+            kernelHelper = KernelHelper(circ_pad_float_kernel, int(self.cuDevice))
+            _circ_pad_float_kernel = kernelHelper.getFunction(b"circ_pad_float")
+            checkCudaErrors(
+                cuda.cuLaunchKernel(
+                    _circ_pad_float_kernel,
+                    numBlocks,
+                    1,
+                    1,
+                    blockSize,
+                    1,
+                    1,
+                    0,
+                    stream_ptr,
+                    kernelArgs,
+                    0,
+                )
+            )
+        elif inp_dtype == np.float16:
+            kernelHelper = KernelHelper(circ_pad_half_kernel, int(self.cuDevice))
+            _circ_pad_half_kernel = kernelHelper.getFunction(b"circ_pad_half")
+            checkCudaErrors(
+                cuda.cuLaunchKernel(
+                    _circ_pad_half_kernel,
+                    numBlocks,
+                    1,
+                    1,
+                    blockSize,
+                    1,
+                    1,
+                    0,
+                    stream_ptr,
+                    kernelArgs,
+                    0,
+                )
+            )
+        else:
+            raise ValueError("inp_dtype not valid")
+
+    def clone(self):
+        cloned_plugin = CircPadPlugin()
+        cloned_plugin.__dict__.update(self.__dict__)
+        return cloned_plugin
+
+    def terminate(self):
+        if self.all_pads_d:
+            checkCudaErrors(cuda.cuMemFree(self.all_pads_d))
+        if self.orig_dims_d:
+            checkCudaErrors(cuda.cuMemFree(self.orig_dims_d))
+        if self.Y_shape_d:
+            checkCudaErrors(cuda.cuMemFree(self.Y_shape_d))
+
+        plg_registry.release_plugin_resource("cuda_ctx")
+
+    #
+    # The following defaults take effect since the respective methods are not overriden
+    #
+
+    # def get_serialization_size(self):
+    #     return len(to_json({"pads": self.pads}))
+
+    # def get_workspace_size(self, input_desc, output_desc):
+    #     return 0
+
+    # def destroy(self):
+    #     pass
+
+
+class CircPadPluginCreator(trt.IPluginCreator):
+    def __init__(self):
+        trt.IPluginCreator.__init__(self)
+        self.name = "CircPadPlugin"
+        self.plugin_namespace = ""
+        self.plugin_version = "1"
+        self.field_names = trt.PluginFieldCollection(
+            [
+                trt.PluginField("pads", np.array([]), trt.PluginFieldType.INT32),
+                trt.PluginField("N", np.array([]), trt.PluginFieldType.INT32),
+            ]
+        )
+
+    def create_plugin(self, name, fc):
+        return CircPadPlugin(fc)
+
+    def deserialize_plugin(self, name, data):
+        deserialized = CircPadPlugin()
+        j = dict(from_json(data))
+        deserialized.__dict__.update(j)
+        return deserialized
+
+
+if __name__ == "__main__":
+
+    args = parseArgs()
+    precision = np.float32 if args.precision == "fp32" else np.float16
+
+    # Initialize CUDA Driver API
+    (err,) = cuda.cuInit(0)
+
+    # Retrieve handle for device 0
+    err, cuDevice = cuda.cuDeviceGet(0)
+
+    plg_registry = trt.get_plugin_registry()
+
+    # Create context
+    plg_registry.acquire_plugin_resource("cuda_ctx", CudaCtxManager(cuDevice))
+
+    inp_shape = (100, 2, 32, 32)
+    X = np.random.normal(size=inp_shape).astype(precision)
+
+    pads = (1, 1, 1, 1)
+
+    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
+    # Load standard plugins (if needed)
+    trt.init_libnvinfer_plugins(TRT_LOGGER, namespace="")
+
+    # Register plugin creator
+    my_plugin_creator = CircPadPluginCreator()
+    plg_registry.register_creator(my_plugin_creator, "")
+
+    # Create plugin object
+    builder, network = create_network()
+    plg_creator = plg_registry.get_plugin_creator("CircPadPlugin", "1", "")
+    plugin_fields_list = [
+        trt.PluginField(
+            "pads", np.array(pads, dtype=np.int32), trt.PluginFieldType.INT32
+        ),
+        trt.PluginField("N", np.array([4], dtype=np.int32), trt.PluginFieldType.INT32),
+    ]
+    pfc = trt.PluginFieldCollection(plugin_fields_list)
+    plugin = plg_creator.create_plugin("CircPadPlugin", pfc)
+
+    # Populate network
+    input_X = network.add_input(
+        name="X",
+        dtype=trt.float32 if precision == np.float32 else trt.float16,
+        shape=X.shape,
+    )
+    out = network.add_plugin_v2([input_X], plugin)
+    out.get_output(0).name = "Y"
+    network.mark_output(tensor=out.get_output(0))
+
+    # Build engine
+    config = builder.create_builder_config()
+    engine = engine_from_network(
+        (builder, network), CreateConfig(fp16=precision == trt.float16)
+    )
+
+    Y_ref = np.pad(X, [[0, 0], [0, 0], [pads[0], pads[1]], [pads[2], pads[3]]], "wrap")
+    # Run
+    with TrtRunner(engine, "trt_runner") as runner:
+        outputs = runner.infer({"X": X})
+        Y = outputs["Y"]
+
+        if np.allclose(Y, Y_ref):
+            print("Inference result correct!")
+        else:
+            print("Inference result incorrect!")
+
+    plg_registry.release_plugin_resource("cuda_ctx")
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_multi_tactic.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_multi_tactic.py
new file mode 100644
index 0000000000000000000000000000000000000000..431ccc928d69eab8526a957654a29eb910d82fcc
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_multi_tactic.py
@@ -0,0 +1,376 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import onnx_graphsurgeon as gs
+import numpy as np
+import onnx
+import cupy as cp
+import logging
+import sys
+import os
+
+import tensorrt as trt
+from polygraphy.backend.trt import (
+    CreateConfig,
+    EngineFromNetwork,
+    NetworkFromOnnxPath,
+    TrtRunner,
+)
+
+import triton
+import triton.language as tl
+
+from enum import IntEnum
+
+from polygraphy.json import to_json, from_json
+import torch
+
+sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+from plugin_utils import volume, parseArgs
+
+import argparse
+
+logger = logging.getLogger("CircPadMultiTactic")
+
+class Tactic(IntEnum):
+    TORCH = 1
+    TRITON = 2
+
+@triton.jit
+def circ_pad(X,
+            all_pads_0, all_pads_2, all_pads_4, all_pads_6,
+            orig_dims_0, orig_dims_1, orig_dims_2, orig_dims_3,
+            Y,
+            Y_shape_1, Y_shape_2, Y_shape_3,
+            X_len, Y_len, BLOCK_SIZE: tl.constexpr,):
+    pid = tl.program_id(0)
+    i = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
+
+    mask_y = i < Y_len
+
+    i3 = i % Y_shape_3
+    i2 = (i // Y_shape_3) % Y_shape_2
+    i1 = (i // Y_shape_3 // Y_shape_2) % Y_shape_1
+    i0 = i // Y_shape_3 // Y_shape_2 // Y_shape_1
+
+    j0 = (i0 - all_pads_0 + orig_dims_0) % orig_dims_0
+    j1 = (i1 - all_pads_2 + orig_dims_1) % orig_dims_1
+    j2 = (i2 - all_pads_4 + orig_dims_2) % orig_dims_2
+    j3 = (i3 - all_pads_6 + orig_dims_3) % orig_dims_3
+
+    load_idx = orig_dims_3 * orig_dims_2 * orig_dims_1 * j0 + orig_dims_3 * orig_dims_2 * j1 + orig_dims_3 * j2 + j3
+    mask_x = load_idx < X_len
+
+    x = tl.load(X + load_idx, mask=mask_x)
+
+    tl.store(Y + i, x, mask=mask_y)
+
+class CircPadPlugin(trt.IPluginV3, trt.IPluginV3OneCore, trt.IPluginV3OneBuild, trt.IPluginV3OneRuntime):
+    def __init__(self, fc=None, phase=None):
+        trt.IPluginV3.__init__(self)
+        trt.IPluginV3OneCore.__init__(self)
+        trt.IPluginV3OneBuild.__init__(self)
+        trt.IPluginV3OneRuntime.__init__(self)
+        self.pads = []
+        self.X_shape = []
+
+        self.per_format_tactics = (
+            False  # whether per-format tactics or global tactics should be used
+        )
+        self.curr_type = None  # format being timed currently by TRT auto-tuner
+
+        self.num_outputs = 1
+        self.plugin_namespace = ""
+        self.plugin_name = "CircPadPlugin"
+        self.plugin_version = "1"
+
+        # Set the timing cache ID to prevent unnecessary timing of second plugin instance
+        self.timing_cache_id = ""
+        
+        self.tactic = None
+
+        if fc is not None:
+            for f in fc:
+                if f.name == "pads":
+                    self.pads = f.data
+                elif f.name == "per_format_tactics":
+                    self.per_format_tactics = int(f.data)
+
+        if phase is not None:
+            self.phase = phase
+
+    def get_capability_interface(self, type):
+        return self
+
+    def get_output_data_types(self, input_types):
+        return [input_types[0]]
+
+    def get_output_shapes(self, inputs, shape_inputs, exprBuilder):
+        output_dims = trt.DimsExprs(inputs[0])
+
+        for i in range(np.size(self.pads) // 2):
+            output_dims[len(output_dims) - i - 1] = exprBuilder.operation(
+                trt.DimensionOperation.SUM,
+                inputs[0][len(output_dims) - i - 1],
+                exprBuilder.constant(self.pads[i * 2] + self.pads[i * 2 + 1]),
+            )
+
+        return [output_dims]
+
+    def get_fields_to_serialize(self):
+        return trt.PluginFieldCollection([
+            trt.PluginField("pads", self.pads, trt.PluginFieldType.INT32),
+            trt.PluginField(
+                "per_format_tactics",
+                np.array([self.per_format_tactics], dtype=np.int32),
+                trt.PluginFieldType.INT32,
+            ),
+        ])
+
+    def configure_plugin(self, inp, out):
+        assert inp[0].desc.type == trt.float32 or inp[0].desc.type == trt.float16
+        self.curr_type = inp[0].desc.type
+
+    def on_shape_change(self, inp, out):
+        if (
+            self.phase == trt.TensorRTPhase.RUNTIME
+            and self.per_format_tactics
+            and inp[0].type == trt.float16
+        ):
+            assert self.tactic == Tactic.TRITON
+
+        X_dims = inp[0].dims
+        self.X_shape = np.zeros((len(X_dims),))
+        for i in range(len(X_dims)):
+            self.X_shape[i] = X_dims[i]
+
+    def supports_format_combination(self, pos, in_out, num_inputs):
+        assert num_inputs == 1
+        assert pos < len(in_out)
+
+        desc = in_out[pos].desc
+        if desc.format != trt.TensorFormat.LINEAR:
+            return False
+
+        # first input should be float16 or float32
+        if pos == 0:
+            return desc.type == trt.DataType.FLOAT or desc.type == trt.DataType.HALF
+
+        # output should have the same type as the input
+        if pos == 1:
+            return in_out[0].desc.type == desc.type
+
+        assert False
+
+    def enqueue(self, input_desc, output_desc, inputs, outputs, workspace, stream):
+
+        inp_dtype = trt.nptype(input_desc[0].type)
+
+        a_mem = cp.cuda.UnownedMemory(
+            inputs[0], volume(input_desc[0].dims) * cp.dtype(inp_dtype).itemsize, self
+        )
+        c_mem = cp.cuda.UnownedMemory(
+            outputs[0],
+            volume(output_desc[0].dims) * cp.dtype(inp_dtype).itemsize,
+            self,
+        )
+
+        a_ptr = cp.cuda.MemoryPointer(a_mem, 0)
+        c_ptr = cp.cuda.MemoryPointer(c_mem, 0)
+
+        c_d = cp.ndarray((volume(output_desc[0].dims)), dtype=inp_dtype, memptr=c_ptr)
+        
+        if self.phase == trt.TensorRTPhase.BUILD:
+            logger.info(f"Timing tactic: {self.tactic}")
+
+        if self.tactic == Tactic.TORCH:
+            # Use PyTorch functional op - no need to write kernel
+            a_d = cp.ndarray(tuple(input_desc[0].dims), dtype=inp_dtype, memptr=a_ptr)
+            a_t = torch.as_tensor(a_d, device='cuda')
+            out = torch.nn.functional.pad(a_t, self.pads.tolist(), mode='circular')
+            cp.copyto(c_d, cp.reshape(cp.asarray(out), (-1,)))
+        elif self.tactic == Tactic.TRITON:
+            a_d = cp.ndarray((volume(input_desc[0].dims)), dtype=inp_dtype, memptr=a_ptr)
+            a_t = torch.as_tensor(a_d, device='cuda')
+            c_t = torch.as_tensor(c_d, device='cuda')
+
+            N = len(self.X_shape)
+            all_pads = np.zeros((N * 2,), dtype=np.int32)
+            orig_dims = np.array(self.X_shape, dtype=np.int32)
+            out_dims = np.array(self.X_shape, dtype=np.int32)
+
+            for i in range(np.size(pads) // 2):
+                out_dims[N - i - 1] += pads[i * 2] + pads[i * 2 + 1]
+                all_pads[N * 2 - 2 * i - 2] = pads[i * 2]
+                all_pads[N * 2 - 2 * i - 1] = pads[i * 2 + 1]
+
+            all_pads = all_pads.tolist()
+            orig_dims = orig_dims.tolist()
+            out_dims = out_dims.tolist()
+
+            blockSize = 256
+            numBlocks = tuple([int((np.prod(out_dims) + blockSize - 1) // blockSize)])
+
+            circ_pad[numBlocks](a_t,
+                all_pads[0], all_pads[2], all_pads[4], all_pads[6],
+                orig_dims[0], orig_dims[1], orig_dims[2], orig_dims[3],
+                c_t,
+                out_dims[1], out_dims[2], out_dims[3],
+                int(np.prod(orig_dims)), int(np.prod(out_dims)), BLOCK_SIZE=256
+            )
+        else:
+            raise RuntimeError("Invalid tactic")
+    
+    def attach_to_context(self, context):
+        return self.clone()
+    
+    def get_valid_tactics(self):
+        assert self.curr_type is not None
+        if self.per_format_tactics and self.curr_type == trt.float16:
+            return [int(Tactic.TRITON)]
+
+        return [int(Tactic.TORCH), int(Tactic.TRITON)]
+
+    def set_tactic(self, tactic):
+        self.tactic = Tactic(tactic)
+
+        if self.phase == trt.TensorRTPhase.RUNTIME:
+            logger.info(f"Best tactic chosen: {self.tactic}")
+
+    def clone(self):
+        cloned_plugin = CircPadPlugin()
+        cloned_plugin.__dict__.update(self.__dict__)
+        return cloned_plugin
+
+    # 
+    # The following defaults take effect since the respective methods are not overriden
+    #
+
+    # def get_workspace_size(self, input_desc, output_desc):
+    #     return 0
+    
+    # def destroy(self):
+    #     pass
+
+
+class CircPadPluginCreator(trt.IPluginCreatorV3One):
+    def __init__(self):
+        trt.IPluginCreatorV3One.__init__(self)
+        self.name = "CircPadPlugin"
+        self.plugin_namespace = ""
+        self.plugin_version = "1"
+        self.field_names = trt.PluginFieldCollection([
+            trt.PluginField("pads", np.array([]), trt.PluginFieldType.INT32),
+            trt.PluginField(
+                "per_format_tactics", np.array([]), trt.PluginFieldType.INT32
+            ),
+        ])
+
+    def create_plugin(self, name, fc, phase):
+        return CircPadPlugin(fc, phase)
+
+
+if __name__ == "__main__":
+    logging.basicConfig()
+    logger.setLevel(logging.INFO)
+
+    parser = argparse.ArgumentParser(
+        description="Options for Circular Padding plugin multi-tactic sample"
+    )
+
+    parser.add_argument(
+        "--precision",
+        type=str,
+        default="fp32",
+        choices=["fp32", "fp16"],
+        help="Precision to use for plugin",
+    )
+    parser.add_argument(
+        "--per-format-tactics",
+        action="store_true",
+        help="Whether per-format tactics or global tactics should be used",
+    )
+
+    args = parser.parse_args()
+
+    precision = np.float32 if args.precision == "fp32" else np.float16
+    is_tactics_per_format = 1 if args.per_format_tactics else 0
+
+    inp_shape = (10, 3, 32, 32)
+    X_A = np.random.normal(size=inp_shape).astype(precision)
+    X_B = np.random.normal(size=inp_shape).astype(precision)
+
+    pads = (1, 1, 1, 1)
+
+    # Register plugin creator
+    plg_registry = trt.get_plugin_registry()
+    my_plugin_creator = CircPadPluginCreator()
+    plg_registry.register_creator(my_plugin_creator, "")
+
+    # create ONNX model
+    onnx_path = f"test_CircPadPlugin_multi_tactic_{args.precision}.onnx"
+    inputA = gs.Variable(name="X_A", shape=inp_shape, dtype=precision)
+    inputB = gs.Variable(name="X_B", shape=inp_shape, dtype=precision)
+    Y_A = gs.Variable(name="Y_A", dtype=precision)
+    Y_B = gs.Variable(name="Y_B", dtype=precision)
+    myPluginNode_A = gs.Node(
+        name="CircPadPlugin_A",
+        op="CircPadPlugin",
+        inputs=[inputA],
+        outputs=[Y_A],
+        attrs={
+            "pads": pads,
+            "per_format_tactics": np.array([is_tactics_per_format], dtype=np.int32),
+        },
+    )
+    myPluginNode_B = gs.Node(
+        name="CircPadPlugin_B",
+        op="CircPadPlugin",
+        inputs=[inputB],
+        outputs=[Y_B],
+        attrs={
+            "pads": pads,
+            "per_format_tactics": np.array([is_tactics_per_format], dtype=np.int32),
+        },
+    )
+
+    graph = gs.Graph(nodes=[myPluginNode_A, myPluginNode_B], inputs=[inputA, inputB], outputs=[Y_A, Y_B], opset=16)
+    onnx.save(gs.export_onnx(graph), onnx_path)
+
+    # build engine
+    build_engine = EngineFromNetwork(
+        NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision==np.float16)
+    )
+
+    Y_A_ref = np.pad(X_A, [[0, 0], [0, 0], [pads[0], pads[1]], [pads[2], pads[3]]], "wrap")
+    Y_B_ref = np.pad(X_B, [[0, 0], [0, 0], [pads[0], pads[1]], [pads[2], pads[3]]], "wrap")
+
+    # Run
+    with TrtRunner(build_engine, "trt_runner")as runner:
+        outputs = runner.infer({"X_A": X_A, "X_B": X_B})
+        Y_A_out = outputs["Y_A"]
+        Y_B_out = outputs["Y_B"]
+
+        if np.allclose(Y_A_out, Y_A_ref):
+            print("Inference result A correct!")
+        else:
+            print("Inference result A incorrect!")
+
+        if np.allclose(Y_B_out, Y_B_ref):
+            print("Inference result B correct!")
+        else:
+            print("Inference result B incorrect!")
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_numba.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_numba.py
new file mode 100644
index 0000000000000000000000000000000000000000..faaa13140ab6f45736b95011999a4d710cc2e4be
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_numba.py
@@ -0,0 +1,257 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import onnx_graphsurgeon as gs
+import numpy as np
+import onnx
+import cupy as cp
+from numba import cuda
+import sys
+import os
+
+import tensorrt as trt
+from polygraphy.backend.trt import (
+    CreateConfig,
+    EngineFromNetwork,
+    NetworkFromOnnxPath,
+    TrtRunner,
+)
+
+from polygraphy.json import to_json, from_json
+
+sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+from plugin_utils import volume, parseArgs
+
+
+
+@cuda.jit
+def circ_pad(X, all_pads, orig_dims, Y, Y_shape, Y_len):
+    index = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
+    stride = cuda.blockDim.x * cuda.gridDim.x
+
+    for i in range(index, Y_len, stride):
+        i3 = int(i % Y_shape[3])
+        i2 = int((i // Y_shape[3]) % Y_shape[2])
+        i1 = int((i // Y_shape[3] // Y_shape[2]) % Y_shape[1])
+        i0 = int(i // Y_shape[3] // Y_shape[2] // Y_shape[1])
+
+        j0 = int((i0 - all_pads[0]) % orig_dims[0])
+        j1 = int((i1 - all_pads[2]) % orig_dims[1])
+        j2 = int((i2 - all_pads[4]) % orig_dims[2])
+        j3 = int((i3 - all_pads[6]) % orig_dims[3])
+
+        Y[i] = X[
+            int(
+                orig_dims[3] * orig_dims[2] * orig_dims[1] * j0
+                + orig_dims[3] * orig_dims[2] * j1
+                + orig_dims[3] * j2
+                + j3
+            )
+        ]
+
+
+class CircPadPlugin(trt.IPluginV2DynamicExt):
+    def __init__(self, fc=None):
+        trt.IPluginV2DynamicExt.__init__(self)
+        self.pads = []
+        self.X_shape = []
+
+        self.num_outputs = 1
+        self.plugin_namespace = ""
+        self.plugin_type = "CircPadPlugin"
+        self.plugin_version = "1"
+
+        if fc is not None:
+            assert fc[0].name == "pads"
+            self.pads = fc[0].data
+
+    def get_output_datatype(self, index, input_types):
+        return input_types[0]
+
+    def get_output_dimensions(self, output_index, inputs, exprBuilder):
+
+        output_dims = trt.DimsExprs(inputs[0])
+
+        for i in range(np.size(self.pads) // 2):
+            output_dims[len(output_dims) - i - 1] = exprBuilder.operation(
+                trt.DimensionOperation.SUM,
+                inputs[0][len(output_dims) - i - 1],
+                exprBuilder.constant(self.pads[i * 2] + self.pads[i * 2 + 1]),
+            )
+
+        return output_dims
+
+    def serialize(self):
+        return to_json({"pads": self.pads})
+
+    def configure_plugin(self, inp, out):
+        X_dims = inp[0].desc.dims
+        self.X_shape = np.zeros((len(X_dims),))
+        for i in range(len(X_dims)):
+            self.X_shape[i] = X_dims[i]
+
+    def supports_format_combination(self, pos, in_out, num_inputs):
+        assert num_inputs == 1
+        assert pos < len(in_out)
+
+        desc = in_out[pos]
+        if desc.format != trt.TensorFormat.LINEAR:
+            return False
+
+        # first input should be float16 or float32
+        if pos == 0:
+            return desc.type == trt.DataType.FLOAT or desc.type == trt.DataType.HALF
+
+        # output should have the same type as the input
+        if pos == 1:
+            return in_out[0].type == desc.type
+
+        assert False
+
+    def enqueue(self, input_desc, output_desc, inputs, outputs, workspace, stream):
+
+        inp_dtype = trt.nptype(input_desc[0].type)
+
+        a_mem = cp.cuda.UnownedMemory(
+            inputs[0], volume(input_desc[0].dims) * cp.dtype(inp_dtype).itemsize, self
+        )
+        c_mem = cp.cuda.UnownedMemory(
+            outputs[0],
+            volume(output_desc[0].dims) * cp.dtype(inp_dtype).itemsize,
+            self,
+        )
+
+        a_ptr = cp.cuda.MemoryPointer(a_mem, 0)
+        c_ptr = cp.cuda.MemoryPointer(c_mem, 0)
+
+        a = cp.ndarray((volume(input_desc[0].dims)), dtype=inp_dtype, memptr=a_ptr)
+        c = cp.ndarray((volume(output_desc[0].dims)), dtype=inp_dtype, memptr=c_ptr)
+
+        numba_stream = cuda.external_stream(stream)
+
+        N = len(self.X_shape)
+        all_pads = np.zeros((N * 2,))
+        orig_dims = np.array(self.X_shape)
+        out_dims = np.array(self.X_shape)
+
+        for i in range(np.size(pads) // 2):
+            out_dims[N - i - 1] += pads[i * 2] + pads[i * 2 + 1]
+            all_pads[N * 2 - 2 * i - 2] = pads[i * 2]
+            all_pads[N * 2 - 2 * i - 1] = pads[i * 2 + 1]
+
+        all_pads_d = cp.asarray(all_pads)
+        orig_dims_d = cp.asarray(orig_dims)
+        Y_shape_d = cp.asarray(out_dims)
+
+        blockSize = 256
+        numBlocks = int((np.prod(out_dims) + blockSize - 1) // blockSize)
+
+        circ_pad[numBlocks, blockSize, numba_stream](
+            a, all_pads_d, orig_dims_d, c, Y_shape_d, np.prod(out_dims)
+        )
+
+        return 0
+
+    def clone(self):
+        cloned_plugin = CircPadPlugin()
+        cloned_plugin.__dict__.update(self.__dict__)
+        return cloned_plugin
+
+    #
+    # The following defaults take effect since the respective methods are not overriden
+    #
+
+    # def initialize(self):
+    #     pass
+
+    # def get_serialization_size(self):
+    #     return len(to_json({"pads": self.pads}))
+
+    # def get_workspace_size(self, input_desc, output_desc):
+    #     return 0
+
+    # def destroy(self):
+    #     pass
+
+    # def terminate(self):
+    #     pass
+
+
+class CircPadPluginCreator(trt.IPluginCreator):
+    def __init__(self):
+        trt.IPluginCreator.__init__(self)
+        self.name = "CircPadPlugin"
+        self.plugin_namespace = ""
+        self.plugin_version = "1"
+        self.field_names = trt.PluginFieldCollection(
+            [trt.PluginField("pads", np.array([]), trt.PluginFieldType.INT32)]
+        )
+
+    def create_plugin(self, name, fc):
+        return CircPadPlugin(fc)
+
+    def deserialize_plugin(self, name, data):
+        j = dict(from_json(data.decode("utf-8")))
+        deserialized = CircPadPlugin()
+        deserialized.__dict__.update(j)
+        return deserialized
+
+
+if __name__ == "__main__":
+
+    args = parseArgs()
+    precision = np.float32 if args.precision == "fp32" else np.float16
+
+    inp_shape = (10, 3, 32, 32)
+    X = np.random.normal(size=inp_shape).astype(precision)
+
+    pads = (1, 1, 1, 1)
+
+    # Register plugin creator
+    plg_registry = trt.get_plugin_registry()
+    my_plugin_creator = CircPadPluginCreator()
+    plg_registry.register_creator(my_plugin_creator, "")
+
+    # create ONNX model
+    onnx_path = f"test_CircPadPlugin_numba_{args.precision}.onnx"
+    inputA = gs.Variable(name="X", shape=inp_shape, dtype=precision)
+    Y = gs.Variable(name="Y", dtype=precision)
+    myPluginNode = gs.Node(
+        name="CircPadPlugin",
+        op="CircPadPlugin",
+        inputs=[inputA],
+        outputs=[Y],
+        attrs={"pads": pads},
+    )
+    graph = gs.Graph(nodes=[myPluginNode], inputs=[inputA], outputs=[Y], opset=16)
+    onnx.save(gs.export_onnx(graph), onnx_path)
+
+    # build engine
+    build_engine = EngineFromNetwork(
+        NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision == np.float16)
+    )
+
+    Y_ref = np.pad(X, [[0, 0], [0, 0], [pads[0], pads[1]], [pads[2], pads[3]]], "wrap")
+    # Run
+    with TrtRunner(build_engine, "trt_runner") as runner:
+        outputs = runner.infer({"X": X})
+        Y = outputs["Y"]
+
+        if np.allclose(Y, Y_ref):
+            print("Inference result correct!")
+        else:
+            print("Inference result incorrect!")
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_torch.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_torch.py
new file mode 100644
index 0000000000000000000000000000000000000000..95861bee521c5b36c729a4e638e4eda2e12e6cd9
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_torch.py
@@ -0,0 +1,214 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import onnx_graphsurgeon as gs
+import numpy as np
+import onnx
+import cupy as cp
+import sys
+import os
+
+import tensorrt as trt
+from polygraphy.backend.trt import (
+    CreateConfig,
+    EngineFromNetwork,
+    NetworkFromOnnxPath,
+    TrtRunner,
+)
+
+from polygraphy.json import to_json, from_json
+import torch
+
+sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+from plugin_utils import volume, parseArgs
+
+
+
+class CircPadPlugin(trt.IPluginV2DynamicExt):
+    def __init__(self, fc=None):
+        trt.IPluginV2DynamicExt.__init__(self)
+        self.pads = []
+        self.X_shape = []
+
+        self.num_outputs = 1
+        self.plugin_namespace = ""
+        self.plugin_type = "CircPadPlugin"
+        self.plugin_version = "1"
+
+        if fc is not None:
+            assert fc[0].name == "pads"
+            self.pads = fc[0].data
+
+    def get_output_datatype(self, index, input_types):
+        return input_types[0]
+
+    def get_output_dimensions(self, output_index, inputs, exprBuilder):
+
+        output_dims = trt.DimsExprs(inputs[0])
+
+        for i in range(np.size(self.pads) // 2):
+            output_dims[len(output_dims) - i - 1] = exprBuilder.operation(
+                trt.DimensionOperation.SUM,
+                inputs[0][len(output_dims) - i - 1],
+                exprBuilder.constant(self.pads[i * 2] + self.pads[i * 2 + 1]),
+            )
+
+        return output_dims
+
+    def serialize(self):
+        return to_json({"pads": self.pads})
+
+    def configure_plugin(self, inp, out):
+        X_dims = inp[0].desc.dims
+        self.X_shape = np.zeros((len(X_dims),))
+        for i in range(len(X_dims)):
+            self.X_shape[i] = X_dims[i]
+
+    def supports_format_combination(self, pos, in_out, num_inputs):
+        assert num_inputs == 1
+        assert pos < len(in_out)
+
+        desc = in_out[pos]
+        if desc.format != trt.TensorFormat.LINEAR:
+            return False
+
+        # first input should be float16 or float32
+        if pos == 0:
+            return desc.type == trt.DataType.FLOAT or desc.type == trt.DataType.HALF
+
+        # output should have the same type as the input
+        if pos == 1:
+            return in_out[0].type == desc.type
+
+        assert False
+
+    def enqueue(self, input_desc, output_desc, inputs, outputs, workspace, stream):
+
+        inp_dtype = trt.nptype(input_desc[0].type)
+
+        a_mem = cp.cuda.UnownedMemory(
+            inputs[0], volume(input_desc[0].dims) * cp.dtype(inp_dtype).itemsize, self
+        )
+        c_mem = cp.cuda.UnownedMemory(
+            outputs[0],
+            volume(output_desc[0].dims) * cp.dtype(inp_dtype).itemsize,
+            self,
+        )
+
+        a_ptr = cp.cuda.MemoryPointer(a_mem, 0)
+        c_ptr = cp.cuda.MemoryPointer(c_mem, 0)
+
+        a_d = cp.ndarray(tuple(input_desc[0].dims), dtype=inp_dtype, memptr=a_ptr)
+        c_d = cp.ndarray((volume(output_desc[0].dims)), dtype=inp_dtype, memptr=c_ptr)
+
+        a_t = torch.as_tensor(a_d, device="cuda")
+
+        # Use PyTorch functional op - no need to write kernel
+        out = torch.nn.functional.pad(a_t, self.pads.tolist(), mode="circular")
+        cp.copyto(c_d, cp.reshape(cp.asarray(out), (-1,)))
+
+        return 0
+
+    def clone(self):
+        cloned_plugin = CircPadPlugin()
+        cloned_plugin.__dict__.update(self.__dict__)
+        return cloned_plugin
+
+    #
+    # The following defaults take effect since the respective methods are not overriden
+    #
+
+    # def initialize(self):
+    #     pass
+
+    # def get_serialization_size(self):
+    #     return len(to_json({"pads": self.pads}))
+
+    # def get_workspace_size(self, input_desc, output_desc):
+    #     return 0
+
+    # def destroy(self):
+    #     pass
+
+    # def terminate(self):
+    #     pass
+
+
+class CircPadPluginCreator(trt.IPluginCreator):
+    def __init__(self):
+        trt.IPluginCreator.__init__(self)
+        self.name = "CircPadPlugin"
+        self.plugin_namespace = ""
+        self.plugin_version = "1"
+        self.field_names = trt.PluginFieldCollection(
+            [trt.PluginField("pads", np.array([]), trt.PluginFieldType.INT32)]
+        )
+
+    def create_plugin(self, name, fc):
+        return CircPadPlugin(fc)
+
+    def deserialize_plugin(self, name, data):
+        j = dict(from_json(data.decode("utf-8")))
+        deserialized = CircPadPlugin()
+        deserialized.__dict__.update(j)
+        return deserialized
+
+
+if __name__ == "__main__":
+
+    args = parseArgs()
+    precision = np.float32 if args.precision == "fp32" else np.float16
+
+    inp_shape = (10, 3, 32, 32)
+    X = np.random.normal(size=inp_shape).astype(precision)
+
+    pads = (1, 1, 1, 1)
+
+    # Register plugin creator
+    plg_registry = trt.get_plugin_registry()
+    my_plugin_creator = CircPadPluginCreator()
+    plg_registry.register_creator(my_plugin_creator, "")
+
+    # create ONNX model
+    onnx_path = f"test_CircPadPlugin_torch_{args.precision}.onnx"
+    inputA = gs.Variable(name="X", shape=inp_shape, dtype=precision)
+    Y = gs.Variable(name="Y", dtype=precision)
+    myPluginNode = gs.Node(
+        name="CircPadPlugin",
+        op="CircPadPlugin",
+        inputs=[inputA],
+        outputs=[Y],
+        attrs={"pads": pads},
+    )
+    graph = gs.Graph(nodes=[myPluginNode], inputs=[inputA], outputs=[Y], opset=16)
+    onnx.save(gs.export_onnx(graph), onnx_path)
+
+    # build engine
+    build_engine = EngineFromNetwork(
+        NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision == np.float16)
+    )
+
+    Y_ref = np.pad(X, [[0, 0], [0, 0], [pads[0], pads[1]], [pads[2], pads[3]]], "wrap")
+    # Run
+    with TrtRunner(build_engine, "trt_runner") as runner:
+        outputs = runner.infer({"X": X})
+        Y = outputs["Y"]
+
+        if np.allclose(Y, Y_ref):
+            print("Inference result correct!")
+        else:
+            print("Inference result incorrect!")
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_triton.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_triton.py
new file mode 100644
index 0000000000000000000000000000000000000000..15990a6a1dea8baa1c0e411c858177911110223f
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_pad_plugin_triton.py
@@ -0,0 +1,297 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import onnx_graphsurgeon as gs
+import numpy as np
+import onnx
+import cupy as cp
+import sys
+import os
+
+import triton
+import triton.language as tl
+
+import tensorrt as trt
+from polygraphy.backend.trt import (
+    CreateConfig,
+    EngineFromNetwork,
+    NetworkFromOnnxPath,
+    TrtRunner,
+)
+
+from polygraphy.json import to_json, from_json
+import torch
+
+sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+from plugin_utils import volume, parseArgs
+
+
+
+@triton.jit
+def circ_pad(
+    X,
+    all_pads_0,
+    all_pads_2,
+    all_pads_4,
+    all_pads_6,
+    orig_dims_0,
+    orig_dims_1,
+    orig_dims_2,
+    orig_dims_3,
+    Y,
+    Y_shape_1,
+    Y_shape_2,
+    Y_shape_3,
+    X_len,
+    Y_len,
+    BLOCK_SIZE: tl.constexpr,
+):
+    pid = tl.program_id(0)
+    i = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
+
+    mask_y = i < Y_len
+
+    i3 = i % Y_shape_3
+    i2 = (i // Y_shape_3) % Y_shape_2
+    i1 = (i // Y_shape_3 // Y_shape_2) % Y_shape_1
+    i0 = i // Y_shape_3 // Y_shape_2 // Y_shape_1
+
+    j0 = (i0 - all_pads_0 + orig_dims_0) % orig_dims_0
+    j1 = (i1 - all_pads_2 + orig_dims_1) % orig_dims_1
+    j2 = (i2 - all_pads_4 + orig_dims_2) % orig_dims_2
+    j3 = (i3 - all_pads_6 + orig_dims_3) % orig_dims_3
+
+    load_idx = (
+        orig_dims_3 * orig_dims_2 * orig_dims_1 * j0
+        + orig_dims_3 * orig_dims_2 * j1
+        + orig_dims_3 * j2
+        + j3
+    )
+    mask_x = load_idx < X_len
+
+    x = tl.load(X + load_idx, mask=mask_x)
+
+    tl.store(Y + i, x, mask=mask_y)
+
+
+class CircPadPlugin(trt.IPluginV2DynamicExt):
+    def __init__(self, fc=None):
+        trt.IPluginV2DynamicExt.__init__(self)
+        self.pads = []
+        self.X_shape = []
+
+        self.num_outputs = 1
+        self.plugin_namespace = ""
+        self.plugin_type = "CircPadPlugin"
+        self.plugin_version = "1"
+
+        if fc is not None:
+            assert fc[0].name == "pads"
+            self.pads = fc[0].data
+
+    def get_output_datatype(self, index, input_types):
+        return input_types[0]
+
+    def get_output_dimensions(self, output_index, inputs, exprBuilder):
+
+        output_dims = trt.DimsExprs(inputs[0])
+
+        for i in range(np.size(self.pads) // 2):
+            output_dims[len(output_dims) - i - 1] = exprBuilder.operation(
+                trt.DimensionOperation.SUM,
+                inputs[0][len(output_dims) - i - 1],
+                exprBuilder.constant(self.pads[i * 2] + self.pads[i * 2 + 1]),
+            )
+
+        return output_dims
+
+    def serialize(self):
+        return to_json({"pads": self.pads})
+
+    def configure_plugin(self, inp, out):
+        X_dims = inp[0].desc.dims
+        self.X_shape = np.zeros((len(X_dims),))
+        for i in range(len(X_dims)):
+            self.X_shape[i] = X_dims[i]
+
+    def supports_format_combination(self, pos, in_out, num_inputs):
+        assert num_inputs == 1
+        assert pos < len(in_out)
+
+        desc = in_out[pos]
+        if desc.format != trt.TensorFormat.LINEAR:
+            return False
+
+        # first input should be float16 or float32
+        if pos == 0:
+            return desc.type == trt.DataType.FLOAT or desc.type == trt.DataType.HALF
+
+        # output should have the same type as the input
+        if pos == 1:
+            return in_out[0].type == desc.type
+
+        assert False
+
+    def enqueue(self, input_desc, output_desc, inputs, outputs, workspace, stream):
+
+        inp_dtype = trt.nptype(input_desc[0].type)
+
+        a_mem = cp.cuda.UnownedMemory(
+            inputs[0], volume(input_desc[0].dims) * cp.dtype(inp_dtype).itemsize, self
+        )
+        c_mem = cp.cuda.UnownedMemory(
+            outputs[0],
+            volume(output_desc[0].dims) * cp.dtype(inp_dtype).itemsize,
+            self,
+        )
+
+        a_ptr = cp.cuda.MemoryPointer(a_mem, 0)
+        c_ptr = cp.cuda.MemoryPointer(c_mem, 0)
+
+        a_d = cp.ndarray((volume(input_desc[0].dims)), dtype=inp_dtype, memptr=a_ptr)
+        c_d = cp.ndarray((volume(output_desc[0].dims)), dtype=inp_dtype, memptr=c_ptr)
+
+        a_t = torch.as_tensor(a_d, device="cuda")
+        c_t = torch.as_tensor(c_d, device="cuda")
+
+        N = len(self.X_shape)
+        all_pads = np.zeros((N * 2,), dtype=np.int32)
+        orig_dims = np.array(self.X_shape, dtype=np.int32)
+        out_dims = np.array(self.X_shape, dtype=np.int32)
+
+        for i in range(np.size(pads) // 2):
+            out_dims[N - i - 1] += pads[i * 2] + pads[i * 2 + 1]
+            all_pads[N * 2 - 2 * i - 2] = pads[i * 2]
+            all_pads[N * 2 - 2 * i - 1] = pads[i * 2 + 1]
+
+        all_pads = all_pads.tolist()
+        orig_dims = orig_dims.tolist()
+        out_dims = out_dims.tolist()
+
+        blockSize = 256
+        numBlocks = (int((np.prod(out_dims) + blockSize - 1) // blockSize),)
+
+        circ_pad[numBlocks](
+            a_t,
+            all_pads[0],
+            all_pads[2],
+            all_pads[4],
+            all_pads[6],
+            orig_dims[0],
+            orig_dims[1],
+            orig_dims[2],
+            orig_dims[3],
+            c_t,
+            out_dims[1],
+            out_dims[2],
+            out_dims[3],
+            int(np.prod(orig_dims)),
+            int(np.prod(out_dims)),
+            BLOCK_SIZE=256,
+        )
+
+        return 0
+
+    def clone(self):
+        cloned_plugin = CircPadPlugin()
+        cloned_plugin.__dict__.update(self.__dict__)
+        return cloned_plugin
+
+    #
+    # The following defaults take effect since the respective methods are not overriden
+    #
+
+    # def initialize(self):
+    #     pass
+
+    # def get_serialization_size(self):
+    #     return len(to_json({"pads": self.pads}))
+
+    # def get_workspace_size(self, input_desc, output_desc):
+    #     return 0
+
+    # def destroy(self):
+    #     pass
+
+    # def terminate(self):
+    #     pass
+
+
+class CircPadPluginCreator(trt.IPluginCreator):
+    def __init__(self):
+        trt.IPluginCreator.__init__(self)
+        self.name = "CircPadPlugin"
+        self.plugin_namespace = ""
+        self.plugin_version = "1"
+        self.field_names = trt.PluginFieldCollection(
+            [trt.PluginField("pads", np.array([]), trt.PluginFieldType.INT32)]
+        )
+
+    def create_plugin(self, name, fc):
+        return CircPadPlugin(fc)
+
+    def deserialize_plugin(self, name, data):
+        j = dict(from_json(data.decode("utf-8")))
+        deserialized = CircPadPlugin()
+        deserialized.__dict__.update(j)
+        return deserialized
+
+
+if __name__ == "__main__":
+
+    args = parseArgs()
+    precision = np.float32 if args.precision == "fp32" else np.float16
+
+    inp_shape = (10, 3, 32, 32)
+    X = np.random.normal(size=inp_shape).astype(precision)
+
+    pads = (1, 1, 1, 1)
+
+    # Register plugin creator
+    plg_registry = trt.get_plugin_registry()
+    my_plugin_creator = CircPadPluginCreator()
+    plg_registry.register_creator(my_plugin_creator, "")
+
+    # create ONNX model
+    onnx_path = f"test_CircPadPlugin_triton_{args.precision}.onnx"
+    inputA = gs.Variable(name="X", shape=inp_shape, dtype=precision)
+    Y = gs.Variable(name="Y", dtype=precision)
+    myPluginNode = gs.Node(
+        name="CircPadPlugin",
+        op="CircPadPlugin",
+        inputs=[inputA],
+        outputs=[Y],
+        attrs={"pads": pads},
+    )
+    graph = gs.Graph(nodes=[myPluginNode], inputs=[inputA], outputs=[Y], opset=16)
+    onnx.save(gs.export_onnx(graph), onnx_path)
+
+    # build engine
+    build_engine = EngineFromNetwork(
+        NetworkFromOnnxPath(onnx_path), CreateConfig(fp16=precision == np.float16)
+    )
+
+    Y_ref = np.pad(X, [[0, 0], [0, 0], [pads[0], pads[1]], [pads[2], pads[3]]], "wrap")
+    # Run
+    with TrtRunner(build_engine, "trt_runner") as runner:
+        outputs = runner.infer({"X": X})
+        Y = outputs["Y"]
+
+        if np.allclose(Y, Y_ref):
+            print("Inference result correct!")
+        else:
+            print("Inference result incorrect!")
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_plugin_cpp/circ_pad_plugin.cu b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_plugin_cpp/circ_pad_plugin.cu
new file mode 100644
index 0000000000000000000000000000000000000000..0bcffd5606288105a620146b7c1b24c9d30c3ab9
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/circ_plugin_cpp/circ_pad_plugin.cu
@@ -0,0 +1,414 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "NvInfer.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include <memory>
+#include <numeric>
+#include <vector>
+
+#include <cuda.h>
+#include <cuda_fp16.h>
+
+using namespace nvinfer1;
+
+static void caughtError(std::exception const& e)
+{
+    std::cout << e.what() << std::endl;
+}
+
+// Write values into buffer
+template <typename T>
+void write(char*& buffer, T const& val)
+{
+    std::memcpy(buffer, &val, sizeof(T));
+    buffer += sizeof(T);
+}
+
+// Read values from buffer
+template <typename T>
+T read(char const*& buffer)
+{
+    T val{};
+    std::memcpy(&val, buffer, sizeof(T));
+    buffer += sizeof(T);
+    return val;
+}
+
+#define ASSERT(condition)                                                                                              \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        if (!(condition))                                                                                              \
+        {                                                                                                              \
+            std::cout << "Assertion failure: " << #condition << std::endl;                                             \
+            abort();                                                                                                   \
+        }                                                                                                              \
+    } while (0)
+
+template <typename Dtype>
+struct CudaBind
+{
+    size_t mSize;
+    Dtype* mPtr;
+
+    CudaBind(size_t size)
+    {
+        mSize = size;
+        ASSERT(!cudaMalloc((void**) &mPtr, sizeof(Dtype) * mSize));
+    }
+
+    ~CudaBind()
+    {
+        if (mPtr != nullptr)
+        {
+            ASSERT(!cudaFree(mPtr));
+            mPtr = nullptr;
+        }
+    }
+};
+
+static int64_t volume(Dims const& dims)
+{
+    return std::accumulate(dims.d, dims.d + dims.nbDims, int64_t{1}, std::multiplies<int64_t>{});
+}
+
+template <typename T>
+__global__ void circPadKernel(
+    T const* x, int32_t const* allPads, int32_t const* origDims, T* y, int32_t const* yShape, int32_t yLen)
+{
+    int32_t index = blockIdx.x * blockDim.x + threadIdx.x;
+    int32_t stride = blockDim.x * gridDim.x;
+
+    for (int32_t i = index; i < yLen; i += stride)
+    {
+        int32_t i3 = i % yShape[3];
+        int32_t i2 = (i / yShape[3]) % yShape[2];
+        int32_t i1 = (i / yShape[3] / yShape[2]) % yShape[1];
+        int32_t i0 = i / yShape[3] / yShape[2] / yShape[1];
+
+        int32_t j0 = (i0 - allPads[0] + origDims[0]) % origDims[0];
+        int32_t j1 = (i1 - allPads[2] + origDims[1]) % origDims[1];
+        int32_t j2 = (i2 - allPads[4] + origDims[2]) % origDims[2];
+        int32_t j3 = (i3 - allPads[6] + origDims[3]) % origDims[3];
+
+        y[i] = x[origDims[3] * origDims[2] * origDims[1] * j0 + origDims[3] * origDims[2] * j1 + origDims[3] * j2 + j3];
+    }
+}
+
+class CircPadPlugin : public nvinfer1::IPluginV2DynamicExt
+{
+public:
+    CircPadPlugin() = default;
+
+    CircPadPlugin(std::vector<int32_t> pads)
+        : mPads(pads)
+    {
+    }
+
+    CircPadPlugin(CircPadPlugin const& p) = default;
+
+    CircPadPlugin(void const* serialData, size_t length)
+    {
+        ASSERT(serialData != nullptr);
+
+        char const* d = static_cast<char const*>(serialData);
+        char const* a = d;
+
+        int32_t padsSize = read<int32_t>(d);
+        mPads.resize(padsSize);
+        for (int i = 0; i < padsSize; ++i)
+        {
+            mPads[i] = read<int32_t>(d);
+        }
+
+        ASSERT(d == a + length);
+    }
+
+    int32_t getNbOutputs() const noexcept override
+    {
+        return 1;
+    }
+
+    bool supportsFormatCombination(
+        int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept
+    {
+        PluginTensorDesc const& desc = inOut[pos];
+        if (desc.format != TensorFormat::kLINEAR)
+        {
+            return false;
+        }
+
+        // first input should be float16 or float32
+        if (pos == 0)
+        {
+            return (inOut[pos].type == nvinfer1::DataType::kFLOAT || inOut[pos].type == nvinfer1::DataType::kHALF);
+        }
+
+        // output should have the same type as the input
+        if (pos == 1)
+        {
+            return (inOut[pos].type == inOut[0].type);
+        }
+
+        return false;
+    }
+
+    void configureWithFormat(nvinfer1::Dims const*, int32_t, nvinfer1::Dims const*, int32_t, nvinfer1::DataType type,
+        nvinfer1::PluginFormat floatFormat, int32_t) noexcept override
+    {
+    }
+
+    int32_t initialize() noexcept override
+    {
+        return 0;
+    }
+
+    void terminate() noexcept override
+    {
+        mAllPadsPtr.reset();
+        mOrigDimsPtr.reset();
+        mOutDimsPtr.reset();
+    }
+
+    int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, void const* const* inputs,
+        void* const* outputs, void* workspace, cudaStream_t stream) noexcept
+    {
+        auto inpDType = inputDesc[0].type;
+
+        int32_t const blockSize = 256;
+        int32_t const numBlocks = (volume(outputDesc[0].dims) + blockSize - 1) / blockSize;
+
+        ASSERT(inpDType == DataType::kFLOAT || inpDType == DataType::kHALF);
+
+        if (inpDType == DataType::kFLOAT)
+        {
+            circPadKernel<float><<<numBlocks, blockSize, 0, stream>>>(static_cast<float const*>(inputs[0]),
+                mAllPadsPtr->mPtr, mOrigDimsPtr->mPtr, static_cast<float*>(outputs[0]), mOutDimsPtr->mPtr,
+                volume(outputDesc[0].dims));
+        }
+        else if (inpDType == DataType::kHALF)
+        {
+            circPadKernel<half><<<numBlocks, blockSize, 0, stream>>>(static_cast<half const*>(inputs[0]),
+                mAllPadsPtr->mPtr, mOrigDimsPtr->mPtr, static_cast<half*>(outputs[0]), mOutDimsPtr->mPtr,
+                volume(outputDesc[0].dims));
+        }
+        return 0;
+    }
+
+    size_t getSerializationSize() const noexcept override
+    {
+        return (mPads.size() + 1) * sizeof(int32_t);
+    }
+
+    void serialize(void* buffer) const noexcept override
+    {
+        ASSERT(buffer != nullptr);
+        char* d = static_cast<char*>(buffer);
+        char* a = d;
+        write(d, static_cast<int32_t>(mPads.size()));
+        for (int i = 0; i < mPads.size(); ++i)
+        {
+            write(d, mPads[i]);
+        }
+        ASSERT(d == a + getSerializationSize());
+    }
+
+    char const* getPluginType() const noexcept override
+    {
+        return "CircPadPlugin";
+    }
+
+    char const* getPluginVersion() const noexcept override
+    {
+        return "1";
+    }
+
+    nvinfer1::IPluginV2DynamicExt* clone() const noexcept override
+    {
+        return new CircPadPlugin(*this);
+    }
+
+    void destroy() noexcept override
+    {
+        delete this;
+    }
+
+    void setPluginNamespace(char const* libNamespace) noexcept override
+    {
+        mNamespace = libNamespace;
+    }
+
+    char const* getPluginNamespace() const noexcept override
+    {
+        return mNamespace.c_str();
+    }
+
+    DataType getOutputDataType(int index, nvinfer1::DataType const* inputTypes, int nbInputs) const noexcept
+    {
+        return inputTypes[0];
+    }
+
+    DimsExprs getOutputDimensions(
+        int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept
+    {
+        nvinfer1::DimsExprs outDims{inputs[0]};
+        int32_t nbOutDims = inputs[0].nbDims;
+
+        for (int32_t i = 0; i < mPads.size() / 2; ++i)
+        {
+            outDims.d[nbOutDims - i - 1] = exprBuilder.operation(nvinfer1::DimensionOperation::kSUM,
+                *inputs[0].d[nbOutDims - i - 1], *exprBuilder.constant(mPads[i * 2] + mPads[i * 2 + 1]));
+        }
+
+        return outDims;
+    }
+
+    void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out,
+        int32_t nbOutputs) noexcept
+    {
+        mN = in[0].desc.dims.nbDims;
+
+        std::vector<int32_t> allPads(mN * 2);
+        std::vector<int32_t> origDims(mN);
+        std::vector<int32_t> outDims(mN);
+
+        for (int32_t i = 0; i < mN; ++i)
+        {
+            origDims[i] = in[0].desc.dims.d[i];
+            outDims[i] = in[0].desc.dims.d[i];
+        }
+
+        for (int32_t i = 0; i < mPads.size() / 2; ++i)
+        {
+            outDims[mN - i - 1] += mPads[i * 2] + mPads[i * 2 + 1];
+            allPads[mN * 2 - 2 * i - 2] = mPads[i * 2];
+            allPads[mN * 2 - 2 * i - 1] = mPads[i * 2 + 1];
+        }
+
+        mAllPadsPtr = std::make_shared<CudaBind<int32_t>>(mN * 2);
+        mOrigDimsPtr = std::make_shared<CudaBind<int32_t>>(mN);
+        mOutDimsPtr = std::make_shared<CudaBind<int32_t>>(mN);
+
+        ASSERT(
+            !cudaMemcpy(mAllPadsPtr->mPtr, &allPads.front(), allPads.size() * sizeof(int32_t), cudaMemcpyHostToDevice));
+        ASSERT(!cudaMemcpy(
+            mOrigDimsPtr->mPtr, &origDims.front(), origDims.size() * sizeof(int32_t), cudaMemcpyHostToDevice));
+        ASSERT(
+            !cudaMemcpy(mOutDimsPtr->mPtr, &outDims.front(), outDims.size() * sizeof(int32_t), cudaMemcpyHostToDevice));
+    }
+
+    size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs,
+        int32_t nbOutputs) const noexcept
+    {
+        return 0;
+    }
+
+private:
+    std::vector<int32_t> mPads{};
+    int32_t mN{};
+    std::shared_ptr<CudaBind<int32_t>> mAllPadsPtr{};
+    std::shared_ptr<CudaBind<int32_t>> mOrigDimsPtr{};
+    std::shared_ptr<CudaBind<int32_t>> mOutDimsPtr{};
+    std::string mNamespace;
+};
+
+class CircPadPluginCreator : public nvinfer1::IPluginCreator
+{
+public:
+    CircPadPluginCreator()
+    {
+        mPluginAttributes.clear();
+        mPluginAttributes.emplace_back(PluginField("pads", nullptr, PluginFieldType::kINT32, 1));
+        mFC.nbFields = mPluginAttributes.size();
+        mFC.fields = mPluginAttributes.data();
+    }
+
+    char const* getPluginName() const noexcept
+    {
+        return "CircPadPlugin";
+    }
+
+    char const* getPluginVersion() const noexcept
+    {
+        return "1";
+    }
+
+    PluginFieldCollection const* getFieldNames() noexcept
+    {
+        return &mFC;
+    }
+
+    IPluginV2* createPlugin(char const* name, PluginFieldCollection const* fc) noexcept
+    {
+        try
+        {
+            std::vector<int32_t> pads;
+
+            for (int32_t i = 0; i < fc->nbFields; i++)
+            {
+                std::string field_name(fc->fields[i].name);
+                if (field_name.compare("pads") == 0)
+                {
+                    pads.resize(fc->fields[i].length);
+                    auto const* padsPtr = static_cast<int32_t const*>(fc->fields[i].data);
+                    std::copy_n(padsPtr, fc->fields[i].length, pads.data());
+                }
+            }
+
+            return new CircPadPlugin(pads);
+        }
+        catch (std::exception const& e)
+        {
+            caughtError(e);
+        }
+        return nullptr;
+    }
+
+    IPluginV2* deserializePlugin(char const* name, void const* serialData, size_t serialLength) noexcept
+    {
+        try
+        {
+            return new CircPadPlugin(serialData, serialLength);
+        }
+        catch (std::exception const& e)
+        {
+            caughtError(e);
+        }
+        return nullptr;
+    }
+
+    void setPluginNamespace(char const* libNamespace) noexcept
+    {
+        mNamespace = libNamespace;
+    }
+
+    char const* getPluginNamespace() const noexcept
+    {
+        return mNamespace.c_str();
+    }
+
+private:
+    nvinfer1::PluginFieldCollection mFC;
+    std::vector<nvinfer1::PluginField> mPluginAttributes;
+    std::string mNamespace;
+};
+
+REGISTER_TENSORRT_PLUGIN(CircPadPluginCreator);
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/requirements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c11ebb8b6696c981862540756f03be11cfa043e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/python_plugin/requirements.txt
@@ -0,0 +1,18 @@
+cuda-python==12.2.0; python_version <= "3.10"
+cuda-python==12.6.0; python_version >= "3.11"
+cupy-cuda12x
+numba
+triton; platform_system != "Windows"
+torch
+--extra-index-url https://pypi.ngc.nvidia.com
+polygraphy
+colored
+numpy==1.23.5; (platform_system != "Windows" and python_version <= "3.10")
+numpy==1.26.4; (platform_system != "Windows" and python_version >= "3.11")
+onnx==1.16.0; platform_system == "Windows"
+--extra-index-url https://pypi.ngc.nvidia.com
+onnx-graphsurgeon
+pywin32; platform_system == "Windows"
+pyyaml==6.0.1
+requests==2.32.2
+tqdm==4.66.4
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/quickly_deployable_plugins/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/quickly_deployable_plugins/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..73d18fd64fd6b2f8702e765ea8f2355370afbf66
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/quickly_deployable_plugins/README.md
@@ -0,0 +1,306 @@
+# Quickly Deployable TensorRT Python Plugins [Experimental]
+
+This is a sample to showcase quickly deployable Python-based plugin definitions (QDPs) in TensorRT (TRT). QDPs are able to support a large majority of use cases for adding custom operators to TRT, and will be the recommended option when it becomes a stable feature in 10.9.
+
+This sample contains several mini-samples that demonstrate a few common use cases.
+
+# Contents
+- [Introduction](#introduction)
+- [Setting up the environment](#setting-up-the-environment)
+- [Implementing a Quickly Deployable Python (QDP) Plugin](#implementing-a-quickly-deployable-python-qdp-plugin)
+- [A Simple Plugin: Elementwise-Add](#a-simple-plugin-elementwise-add)
+- [Implementing in-place custom ops with I/O aliasing](#implementing-in-place-custom-ops-with-io-aliasing)
+- [An Op with data-dependent output shapes: Non-zero](#an-op-with-data-dependent-output-shapes-non-zero)
+- [Using multiple tactics and ONNX: Cirular padding](#using-multiple-tactics-and-onnx-cirular-padding)
+- [Poviding an Ahead-of-Time (AOT) implementation for Cirular padding](#poviding-an-ahead-of-time-aot-implementation-for-cirular-padding)
+- [Additional Resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+
+# Introduction
+
+While the regular TRT plugin interfaces are powerful in the flexibility and tunability they provide, for the vast majority of use cases, users will benefit from the simplicity offered by the QDP workflow.
+ - The `tensorrt.plugin` module provides many intuitive APIs that drastically reduces the amount of boilerplate required to implement a plugin
+ - The concept of plugin registration, plugin creators and the plugin registry is abstracted away
+ - The stateless nature of QDPs eliminates the complications of having to comply with a predefined plugin lifecycle
+
+
+# Setting Up The Environment
+
+To build and install the bindings, follow the instructions in `$TRT_OSSPATH/python/README.md`.
+
+Then install the requisite packages
+```bash
+cd $TRT_OSSPATH/samples/python/quickly_deployable_plugins
+pip3 install -r requirements.txt
+```
+
+# Implementing a Quickly Deployable Python (QDP) Plugin
+
+QDP definitions consist of a set of decorated functions that define properties and behaviors of the plugin.
+### `@tensorrt.plugin.register`
+Returns shape and type characteristics of output tensors, and any attributes the plugin needs to function.
+
+### `@tensorrt.plugin.impl`
+Performs the plugin computation. The decorated python function is executed 'just in time', as a python callback during runtime.
+
+### (Optional) `@tensorrt.plugin.aot_impl`
+The decorated function directly returns an 'ahead of time' compiled kernel, along with information required to invoke it at runtime by TRT. This is in contrast with the above `@tensorrt.plugin.impl` in that, the returned kernel is baked into the built TRT engine. This is beneficial, when we need an engine that is fully independent of the python runtime - and hence, can be executed solely in a standard TensorRT C++ runtime (through `trtexec`, for example).
+
+### (Optional) `@tensorrt.plugin.autotune`
+Defines the different data types and formats (tensor layouts) supported by the plugin's IO and any tactics supported by the plugin. Defining this function allows TensorRT to "tune" the plugin during the engine build to find the most performant type/format and tactic combination on the target system.
+
+The specifics of these functions will become clear through the following mini-samples.
+
+# A Simple Plugin: Elementwise-Add
+
+This mini-sample contains an elementwise addition plugin, where the computation is being performed with an OpenAI Triton kernel. Let's first take a look at the `tensorrt.plugin.register` function.
+
+```python
+import tensorrt.plugin as trtp
+
+@trtp.register("sample::elemwise_add_plugin")
+def add_plugin_desc(inp0: trtp.TensorDesc, block_size: int) -> trtp.TensorDesc:
+    return inp0.like()
+```
+
+The argument "sample::elemwise_add_plugin" defines the namespace ("sample") and name ("elemwise_add_plugin") of the plugin. Input arguments to the decorated function (`plugin_desc`) annotated with `trt.plugin.TensorDesc` denote the input tensors; all others are interpreted as plugin attributes (see the [TRT API Reference](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/tensorrt.plugin/trt_plugin_register.html) for a full list of allowed attribute types). The output signature is a `trt.plugin.TensorDesc` describing the output. `inp0.like()` returns a tensor descriptor with identical shape and type characteristics to `inp0`.
+
+The computation function, decorated with `trt.plugin.impl`, receives `trt.plugin.Tensor`s for each input and output. In contrast to `TensorDesc`s, a `Tensor` references an underlying data buffer, directly accessible through `Tensor.data_ptr`. When working with Torch and OpenAI Triton kernels, it is easier to use `torch.as_tensor()` to zero-copy construct a `torch.Tensor` corresponding to the `trt.plugin.Tensor`.
+
+This sample also showcases the effect of omitting/defining a `trt.plugin.autotune` function, which must return a list of `trt.plugin.AutoTuneCombination`s. In this case, we define a single combination `AutoTuneCombination("FP32|FP16, FP32|FP16")`; this indicates that the input and output must be either both FP32 or both FP16. See the TRT API Reference for a detailed description of the grammar underlying `AutoTuneCombination`s.
+
+## Running the sample
+
+```bash
+python3 qdp_runner.py add [--autotune] [-v]
+```
+
+`--autotune` simulates having defined a `trt.plugin.autotune` function. Enabling verbose logging (`-v`) is recommended to see the effect of autotuning. It can be observed that the `trt.plugin.impl` function is invoked several times during the engine build process when autotune is enabled. With autotuning turned off, `trt.plugin.impl` is invoked only once (when inference is run after building the engine).
+
+```bash
+$ python3 qdp_runner.py add --autotune -v
+...
+Executing for inp0.dtype=DataType.FLOAT and output[0].dtype=DataType.FLOAT
+Executing for inp0.dtype=DataType.FLOAT and output[0].dtype=DataType.FLOAT
+Executing for inp0.dtype=DataType.FLOAT and output[0].dtype=DataType.FLOAT
+Executing for inp0.dtype=DataType.FLOAT and output[0].dtype=DataType.FLOAT
+Executing for inp0.dtype=DataType.HALF and output[0].dtype=DataType.HALF
+Executing for inp0.dtype=DataType.HALF and output[0].dtype=DataType.HALF
+Executing for inp0.dtype=DataType.HALF and output[0].dtype=DataType.HALF
+Executing for inp0.dtype=DataType.HALF and output[0].dtype=DataType.HALF
+[I] Finished engine building in 1.073 seconds
+Executing for inp0.dtype=DataType.HALF and output[0].dtype=DataType.HALF
+```
+
+# Implementing in-place custom ops with I/O aliasing
+
+In-place computations can be accomplished with TRT plugins via aliased I/O. i.e. An input that needs to be modified in-place can be represented by an input-output pair, where the output is aliased to the input. For example, if in-place addition is needed (instead of the out-of-place addition of the above sample), that can be achieved as below:
+```python
+import tensorrt.plugin as trtp
+
+@trtp.register("sample::elemwise_add_plugin_")
+def add_plugin_desc_(inp0: trtp.TensorDesc) -> trtp.TensorDesc:
+    return inp0.aliased()
+```
+
+Note the use of `trt.plugin.TensorDesc.aliased()` to produce an output `TensorDesc` that is aliased to `inp0`.
+
+To appreciate the effect of aliasing better, this sample adds two in-place add plugins chained together.
+
+## Running the sample
+
+Enabling verbose logging (`-v`) is recommended to see the effect of autotuning, which is always enabled.
+
+```bash
+python3 qdp_runner.py inplace_add [--autotune] [-v]
+```
+
+# An Op with data-dependent output shapes: Non-zero
+
+Non-zero is an operation where the indices of the non-zero elements of the input tensor is found -- it has data-dependent output shapes (DDS). As such, typical shape calculations cannot be done with input shapes.
+
+To handle DDS, the extent of each data-dependent output dimension must be expressed in terms of a *_size tensor_*, which is a scalar that communicates to TRT an upper-bound and an autotune value for that dimension, in terms of the input shapes. The TRT engine build may be optimized for the autotune value, but the extent of that dimension may stretch up to the upper-bound at runtime.
+
+In this sample, we consider a 2D input tensor `inp0`; the output will be an $N x 2$ tensor (a set of $N$ 2D indices), where $N$ is the number of non-zero indices. At maximum, all elements could be non-zero, and so the upper-bound could be expressed as `upper_bound = inp0.shape_expr[0] * inp0.shape_expr[1]`. Note that `trt.plugin.TensorDesc.shape_expr` returns symbolic shape expressions for that tensor. Arithmetic operations on shape expressions are supported through standard Python binary operators (see [TRT Python API reference](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/tensorrt.plugin/Shape/ShapeExpr.html) for full list of supported operations).
+
+On average, we can expect half of the input to be filled with zero, so a size tensor can be constructed with that as the autotune value:
+```python
+st = trtp.size_tensor(opt = upper_bound // 2, upper_bound = upper_bound)
+```
+
+Now we're ready to construct the output shape. `st.expr()` returns a shape expression for the size tensor, so a tensor descriptor for the output shape can be constructed as `trt.plugin.from_shape_expr((st.expr(), 2), dtype=trt.int32)`. TRT requires that any size tensors also be made outputs of the plugin. Putting things together, we arrive at the following:
+
+```python
+import tensorrt.plugin as trtp
+
+@trtp.register("sample::non_zero_plugin")
+def non_zero_plugin_reg(
+    inp0: trtp.TensorDesc,
+) -> Tuple[trtp.TensorDesc, trtp.TensorDesc]:
+    upper_bound = inp0.shape_expr[0] * inp0.shape_expr[1]
+    st = trtp.size_tensor(upper_bound // 2, upper_bound)
+    return trtp.from_shape_expr((st.expr(), 2), dtype=trt.int32), st
+```
+
+## Running the sample
+
+Enabling verbose logging (`-v`) is recommended to see the effect of autotuning, which is always enabled.
+
+```bash
+python3 qdp_runner.py non_zero [-v]
+```
+
+# Using multiple tactics and ONNX: Cirular padding
+
+This sample contains a circular padding plugin, which is useful for ops like circular convolution. It is equivalent to PyTorch's [torch.nn.CircularPad2d](https://pytorch.org/docs/stable/generated/torch.nn.CircularPad2d.html#torch.nn.CircularPad2d).
+
+Refer [this section about circular padding plugin](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/pluginGuide.html#example-circular-padding-plugin) in the python plugin guide for more info.
+
+## ONNX model with a plugin
+
+It is often useful to run an ONNX node with a custom op through a TRT plugin that you have written. To allow the TRT ONNX parser to correctly recognize your plugin as being mapped to an ONNX node, ensure that
+ - The `op` property of the node is exactly the same as your plugin name.
+ - The node contains a string attribute called "plugin_namespace" with the namespace of your plugin.
+
+In this sample, we define a plugin with the ID "sample::circ_pad_plugin", so if using ONNX Graphsurgeon, the custom op node can be constructed as follows:
+
+```python
+import onnx_graphsurgeon as gs
+
+var_x = gs.Variable(name="x", shape=inp_shape, dtype=np.float32)
+var_y = gs.Variable(name="y", dtype=np.float32)
+
+circ_pad_node = gs.Node(
+    name="circ_pad_plugin",
+    op="circ_pad_plugin",
+    inputs=[var_x],
+    outputs=[var_y],
+    attrs={"pads": pads, "plugin_namespace": "sample"},
+)
+```
+
+## Multiple tactics
+
+Sometimes, you may have multiple kernels (or backends) that can be used to perform the computation of the plugin -- these are typically called *_tactics_*. If it cannot be predetermined which of these tactics may perform the fastest, it is possible to let TRT time the plugin for each tactic and determine which one is fastest.
+
+Communicating the availability of multiple tactics can simply be done through the `trt.plugin.autotune` function.
+```python
+import tensorrt.plugin as trtp
+from enum import IntEnum
+
+class Tactic(IntEnum):
+    TORCH = 1
+    TRITON = 2
+
+@trt.plugin.autotune("sample::circ_pad_plugin")
+def circ_pad_plugin_autotune(inp0: trtp.TensorDesc, pads: npt.NDArray[np.int32], outputs: Tuple[trtp.TensorDesc]) -> List[trtp.AutoTuneCombination]:
+    c = trtp.AutoTuneCombination()
+    c.pos([0, 1], "FP32|FP16")
+    c.tactics([int(Tactic.TORCH), int(Tactic.TRITON)])
+    return [c]
+```
+
+Note that we're using another way of constructing a `trt.plugin.AutoTuneCombination` here -- namely, through `pos(...)` to populate the type/format information and `tactics(...)` to specify the tactics. In this sample, we use an OpenAI Triton kernel and `torch.nn.functional.pad` as two methods to compute the circular padding.
+
+Refer [this section](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/pluginGuide.html#example-plugins-with-multiple-backends-using-custom-tactics) in the Python plugin guide for more info.
+
+## Loading and running a TRT engine containing a plugin
+
+If you have a TRT engine built with a plugin, executing that engine only requires the plugin definitions for `trt.plugin.register` and `trt.plugin.impl` to be available in the module where the engine is being deserialized (note: the `trt.plugin.autotune` definition is not required to be present).
+
+To simulate the loading of an engine, first run this sample with the `--save_engine` flag, followed by `--artifacts_dir [dir]` with a directory in which you wish the engine to be saved. Then run the sample again with `--load engine` and `--artifacts_dir` set to the same directory.
+
+## Running the sample
+
+```bash
+python3 qdp_runner.py circ_pad [--multi_tactic] [--save_engine] [--load_engine] --mode {onnx,inetdef} [--artifacts_dir ARTIFACTS_DIR]  [-v]
+
+options:
+  --multi_tactic        Enable multiple tactics.
+  --save_engine         Save engine to the artifacts_dir.
+  --load_engine         Load engine from the artifacts_dir. Ignores all other options.
+  --artifacts_dir ARTIFACTS_DIR
+                        Whether to store (or retrieve) artifacts.
+  --mode {onnx,inetdef} Whether to use ONNX parser or INetworkDefinition APIs to construct the network.
+  -v, --verbose         Enable verbose log output.
+```
+
+# Providing an Ahead-of-Time (AOT) implementation for Circular padding
+
+Let's extend the [above sample](#using-multiple-tactics-and-onnx-cirular-padding) by providing an AOT implementation for the same circular padding operation.
+Instead of specifying the OpenAI Triton Kernel callback to TRT through `@trt.plugin.impl`, we can directly
+compile the kernel ahead of time, and provide that to TRT under `@trt.plugin.aot_impl`.
+
+Refer [this section](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/pluginGuide.html#providing-an-ahead-of-time-aot-implementation) in the Python plugin guide for more info.
+
+## ONNX model with an AOT plugin
+
+The same rules apply as mentioned in the above [ONNX model with a plugin](#onnx-model-with-a-plugin) section.
+In addition to that, if the plugin has an AOT implementation that we'd like to use, we can modify the ONNX node to communicate that to the TRT ONNX parser.
+This should be done by adding a bool attribute called "aot" to the ONNX node, and setting it to True.
+Note that, this is on top of making sure that the ONNX node has the appropriate `op` property and "plugin_namespace" attribute as mentioned [previously](#onnx-model-with-a-plugin).
+
+Therefore, using ONNX Graphsurgeon, the custom op node that uses the AOT implementation of "sample::circ_pad_plugin" can be constructed similarly:
+
+```python
+import onnx_graphsurgeon as gs
+
+var_x = gs.Variable(name="x", shape=inp_shape, dtype=np.float32)
+var_y = gs.Variable(name="y", dtype=np.float32)
+
+circ_pad_aot_node = gs.Node(
+    name="circ_pad_plugin_aot",
+    op="circ_pad_plugin",
+    inputs=[var_x],
+    outputs=[var_y],
+    attrs={"pads": pads, "plugin_namespace": "sample", "aot": True},
+)
+```
+
+## Loading and running a TRT engine containing an AOT plugin
+
+If you have a TRT engine built with an AOT plugin, the plugin computation is already part of the engine. Therefore, it does not require any Python modules or definitions to be present at runtime. This means that the engine can be executed on the standard TRT runtime, as part of any tool that is capable of deserializing and running the engine (like [trtexec](../../trtexec/README.md).
+
+To simulate the loading of an engine, first run this sample with the `--save_engine` flag, followed by `--artifacts_dir [dir]` with a directory in which you wish the engine to be saved. Then run the sample again with `--load engine` and `--artifacts_dir` set to the same directory.
+
+## Running the sample
+
+```bash
+python3 qdp_runner.py circ_pad [--save_engine] [--load_engine] --mode {onnx,inetdef} [--artifacts_dir ARTIFACTS_DIR]  [-v]
+
+options:
+  --save_engine         Save engine to the artifacts_dir.
+  --load_engine         Load engine from the artifacts_dir. Ignores all other options.
+  --artifacts_dir ARTIFACTS_DIR
+                        Whether to store (or retrieve) artifacts.
+  --mode {onnx,inetdef} Whether to use ONNX parser or INetworkDefinition APIs to construct the network.
+  --aot                 Use the AOT implementation of the plugin.
+  -v, --verbose         Enable verbose log output.
+```
+
+# Additional Resources
+
+**Python Plugin Guide**
+- [pluginGuide.md](../../../documentation/python/pluginGuide.md)
+
+**`tensorrt.plugin` API reference**
+- [`tensorrt.plugin` module API reference](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/tensorrt.plugin/index.html)
+
+**Developer Guide**
+- [Extending TensorRT with Custom Layers](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#extending)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+# Changelog
+
+- December 2024: Added section on AOT Plugins, added contents section
+- October 2024: Initial release of this sample
+
+# Known issues
+
+There are no known issues in this sample
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/quickly_deployable_plugins/oait_kernels.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/quickly_deployable_plugins/oait_kernels.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb1dc2df4730150bcdc1f57c7cda0c2d82e7b801
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/quickly_deployable_plugins/oait_kernels.py
@@ -0,0 +1,78 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import triton
+import triton.language as tl
+
+@triton.jit
+def add_kernel(x_ptr, y_ptr, n_elements, BLOCK_SIZE: tl.constexpr):
+    pid = tl.program_id(0)
+    offsets = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
+    mask = offsets < n_elements
+    x = tl.load(x_ptr + offsets, mask=mask)
+    tl.store(y_ptr + offsets, x + 1, mask=mask)
+
+
+@triton.jit
+def circ_pad_kernel(
+    # input tensor
+    X,
+    # extra scalar args in between input and output tensors
+    # for kernel signature to be compatible with AOT plugin impl
+    all_pads_0,
+    all_pads_2,
+    all_pads_4,
+    all_pads_6,
+    orig_dims_0,
+    orig_dims_1,
+    orig_dims_2,
+    orig_dims_3,
+    Y_shape_1,
+    Y_shape_2,
+    Y_shape_3,
+    X_len,
+    Y_len,
+    # output tensor
+    Y,
+    BLOCK_SIZE: tl.constexpr,
+):
+    pid = tl.program_id(0)
+    i = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
+
+    mask_y = i < Y_len
+
+    i3 = i % Y_shape_3
+    i2 = (i // Y_shape_3) % Y_shape_2
+    i1 = (i // Y_shape_3 // Y_shape_2) % Y_shape_1
+    i0 = i // Y_shape_3 // Y_shape_2 // Y_shape_1
+
+    j0 = (i0 - all_pads_0 + orig_dims_0) % orig_dims_0
+    j1 = (i1 - all_pads_2 + orig_dims_1) % orig_dims_1
+    j2 = (i2 - all_pads_4 + orig_dims_2) % orig_dims_2
+    j3 = (i3 - all_pads_6 + orig_dims_3) % orig_dims_3
+
+    load_idx = (
+        orig_dims_3 * orig_dims_2 * orig_dims_1 * j0
+        + orig_dims_3 * orig_dims_2 * j1
+        + orig_dims_3 * j2
+        + j3
+    )
+    mask_x = load_idx < X_len
+
+    x = tl.load(X + load_idx, mask=mask_x)
+
+    tl.store(Y + i, x, mask=mask_y)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/quickly_deployable_plugins/qdp_defs.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/quickly_deployable_plugins/qdp_defs.py
new file mode 100644
index 0000000000000000000000000000000000000000..afb3c356be55b06a984f973594a786c741970046
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/quickly_deployable_plugins/qdp_defs.py
@@ -0,0 +1,322 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import tensorrt as trt
+import torch
+import numpy as np
+
+from typing import Tuple, List, Union
+
+import tensorrt.plugin as trtp
+import numpy.typing as npt
+
+import logging
+
+logging.basicConfig(level=logging.INFO)
+logging.getLogger("QuicklyDeployablePlugins").setLevel(logging.INFO)
+
+########## Elemwise-add plugin definition ##########
+
+
+@trtp.register("sample::elemwise_add_plugin")
+def add_plugin_desc(inp0: trtp.TensorDesc, block_size: int) -> trtp.TensorDesc:
+    return inp0.like()
+
+
+# Helper to simulate defining/omitting an autotune definition for the plugin
+def register_autotune():
+    # Type annotations can be omitted for autotune and impl definitions, but will be checked for consistency if added
+    @trtp.autotune("sample::elemwise_add_plugin")
+    def add_plugin_autotune(
+        inp0: trtp.TensorDesc, outputs: Tuple[trtp.TensorDesc]
+    ) -> List[trtp.AutoTuneCombination]:
+        return [trtp.AutoTuneCombination("FP32|FP16, FP32|FP16")]
+
+
+@trtp.impl("sample::elemwise_add_plugin")
+def add_plugin_impl(
+    inp0: trtp.Tensor, block_size: int, outputs: Tuple[trtp.Tensor], stream: int
+) -> None:
+
+    log = logging.getLogger("QuicklyDeployablePlugins")
+    log.debug(
+        f"Executing for inp0: dtype={inp0.dtype},format={inp0.format} and output[0]: dtype={outputs[0].dtype},format={outputs[0].format}"
+    )
+
+    n = inp0.numel()
+
+    with torch.cuda.stream(torch.cuda.ExternalStream(stream)):
+        inp0_t = torch.as_tensor(inp0, device="cuda")
+        out_t = torch.as_tensor(outputs[0], device="cuda")
+
+        import triton
+        from oait_kernels import add_kernel
+
+        add_kernel[(triton.cdiv(n, block_size),)](inp0_t, out_t, n, BLOCK_SIZE=block_size)
+
+
+########## In-place elemwise-add plugin definition ##########
+
+
+@trtp.register("sample::elemwise_add_plugin_")
+def add_plugin_desc_(inp0: trtp.TensorDesc, delta: int) -> trtp.TensorDesc:
+    return inp0.aliased()
+
+
+@trtp.autotune("sample::elemwise_add_plugin_")
+def add_plugin_autotune_(inp0, outputs) -> List[trtp.AutoTuneCombination]:
+    return [
+        trtp.AutoTuneCombination("FP32, FP32", "LINEAR*HWC"),
+        trtp.AutoTuneCombination("FP32|FP16, FP32|FP16", "LINEAR"),
+    ]
+
+
+@trtp.impl("sample::elemwise_add_plugin_")
+def add_plugin_impl_(inp0, delta: int, outputs, stream) -> None:
+
+    log = logging.getLogger("QuicklyDeployablePlugins")
+    log.debug(
+        f"Executing for inp0: dtype={inp0.dtype},format={inp0.format} and output[0]: dtype={outputs[0].dtype},format={outputs[0].format}"
+    )
+
+    with torch.cuda.stream(torch.cuda.ExternalStream(stream)):
+        inp0_t = torch.as_tensor(inp0, device="cuda")
+        inp0_t.add_(delta)
+
+
+########## Non-zero plugin (DDS) ##########
+
+
+@trtp.register("sample::non_zero_plugin")
+def non_zero_plugin_reg(
+    inp0: trtp.TensorDesc,
+) -> Tuple[trtp.TensorDesc, trtp.TensorDesc]:
+    upper_bound = inp0.shape_expr[0] * inp0.shape_expr[1]
+    st = trtp.size_tensor(upper_bound // 2, upper_bound)
+    return trtp.from_shape_expr((st.expr(), 2), dtype=trt.int32), st
+
+
+@trtp.autotune("sample::non_zero_plugin")
+def non_zero_plugin_autotune(inp0, outputs) -> List[trtp.AutoTuneCombination]:
+    return [trtp.AutoTuneCombination("FP32|FP16, INT32, INT32")]
+
+
+@trtp.impl("sample::non_zero_plugin")
+def non_zero_plugin_impl(inp0, outputs, stream) -> None:
+
+    log = logging.getLogger("QuicklyDeployablePlugins")
+    log.debug(
+        f"Executing for inp0: dtype={inp0.dtype},format={inp0.format} and output[0]: dtype={outputs[0].dtype},format={outputs[0].format}"
+    )
+
+    with torch.cuda.stream(torch.cuda.ExternalStream(stream)):
+        inp0_t = torch.as_tensor(inp0, device="cuda")
+        out_1 = torch.as_tensor(outputs[1], device="cuda").reshape((-1,))
+
+        out = torch.nonzero(inp0_t)
+
+        out0 = torch.as_tensor(outputs[0].aliased(out.shape), device="cuda")
+        out0.copy_(out)
+        out_1.copy_(torch.Tensor([out.shape[0]]))
+
+
+########## Circular padding plugin ########
+
+
+@trtp.register("sample::circ_pad_plugin")
+def circ_pad_plugin_desc(
+    inp0: trtp.TensorDesc, pads: npt.NDArray[np.int32]
+) -> trtp.TensorDesc:
+    ndim = inp0.ndim
+    out_desc = inp0.like()
+
+    for i in range(np.size(pads) // 2):
+        out_desc.shape_expr[ndim - i - 1] += int(pads[i * 2] + pads[i * 2 + 1])
+
+    return out_desc
+
+
+# Helper to define a multi-tactic implementation of the plugin
+def enable_multi_tactic_circ_pad():
+
+    from enum import IntEnum
+
+    class Tactic(IntEnum):
+        TORCH = 1
+        TRITON = 2
+
+    @trtp.autotune("sample::circ_pad_plugin")
+    def circ_pad_plugin_autotune(
+        inp0: trtp.TensorDesc,
+        outputs: Tuple[trtp.TensorDesc],
+    ) -> List[trtp.AutoTuneCombination]:
+        c = trtp.AutoTuneCombination()
+        c.pos([0, 1], "FP32|FP16")
+        c.tactics([int(Tactic.TORCH), int(Tactic.TRITON)])
+        return [c]
+
+    @trtp.impl("sample::circ_pad_plugin")
+    def circ_pad_plugin_impl(
+        inp0: trtp.Tensor,
+        pads: npt.NDArray[np.int32],
+        outputs: Tuple[trtp.Tensor],
+        stream: int,
+        tactic: int,
+    ) -> None:
+
+        log = logging.getLogger("QuicklyDeployablePlugins")
+        log.debug(
+            f"Executing for inp0: dtype={inp0.dtype},format={inp0.format} and output[0]: dtype={outputs[0].dtype},format={outputs[0].format}"
+        )
+
+        with torch.cuda.stream(torch.cuda.ExternalStream(stream)):
+            inp_t = torch.as_tensor(inp0, device="cuda")
+            out_t = torch.as_tensor(outputs[0], device="cuda")
+
+            if tactic == Tactic.TORCH:
+                out = torch.nn.functional.pad(inp_t, pads.tolist(), mode="circular")
+                out_t.copy_(out)
+            elif tactic == Tactic.TRITON:
+                N = inp0.ndim
+                all_pads = np.zeros((N * 2,), dtype=np.int32)
+                out_dims = trtp.Shape(tuple(inp0.shape))
+
+                for i in range(np.size(pads) // 2):
+                    out_dims[N - i - 1] += pads[i * 2] + pads[i * 2 + 1]
+                    all_pads[N * 2 - 2 * i - 2] = pads[i * 2]
+                    all_pads[N * 2 - 2 * i - 1] = pads[i * 2 + 1]
+
+                all_pads = all_pads.tolist()
+
+                block_size = 256
+                num_blocks = tuple(
+                    [int((np.prod(out_dims) + block_size - 1) // block_size)]
+                )
+
+                from oait_kernels import circ_pad
+
+                circ_pad[num_blocks](
+                    inp_t,
+                    all_pads[0],
+                    all_pads[2],
+                    all_pads[4],
+                    all_pads[6],
+                    inp0.shape[0],
+                    inp0.shape[1],
+                    inp0.shape[2],
+                    inp0.shape[3],
+                    int(out_dims[1]),
+                    int(out_dims[2]),
+                    int(out_dims[3]),
+                    inp0.numel(),
+                    out_dims.numel(),
+                    out_t,
+                    BLOCK_SIZE=block_size,
+                )
+
+
+# Helper to define a single tactic implementation of the plugin
+def enable_single_tactic_circ_pad():
+    @trtp.autotune("sample::circ_pad_plugin")
+    def circ_pad_plugin_autotune(
+        inp0: trtp.TensorDesc,
+        outputs: Tuple[trtp.TensorDesc],
+    ) -> List[trtp.AutoTuneCombination]:
+
+        return [trtp.AutoTuneCombination("FP32|FP16, FP32|FP16")]
+
+    @trtp.impl("sample::circ_pad_plugin")
+    def circ_pad_plugin_impl(
+        inp0: trtp.Tensor,
+        pads: npt.NDArray[np.int32],
+        outputs: Tuple[trtp.Tensor],
+        stream: int,
+    ) -> None:
+        with torch.cuda.stream(torch.cuda.ExternalStream(stream)):
+            inp_t = torch.as_tensor(inp0, device="cuda")
+            out_t = torch.as_tensor(outputs[0], device="cuda")
+
+            out = torch.nn.functional.pad(inp_t, pads.tolist(), mode="circular")
+            out_t.copy_(out)
+
+    @trtp.aot_impl("sample::circ_pad_plugin")
+    def circ_pad_plugin_aot_impl(
+        inp0: trtp.TensorDesc, pads: npt.NDArray[np.int32], outputs: Tuple[trtp.TensorDesc], tactic: int
+    ) -> Tuple[Union[str, bytes], Union[str, bytes], trtp.KernelLaunchParams, trtp.SymExprs]:
+
+        block_size = 256
+
+        N = inp0.ndim
+        all_pads = np.zeros((N * 2,), dtype=np.int32)
+        inp_dims = inp0.shape_expr
+        out_dims = outputs[0].shape_expr
+
+        for i in range(np.size(pads) // 2):
+            all_pads[N * 2 - 2 * i - 2] = pads[i * 2]
+            all_pads[N * 2 - 2 * i - 1] = pads[i * 2 + 1]
+
+        all_pads = all_pads.tolist()
+
+        # Representing all int32-scalar-kernel-inputs as symbolic expressions.
+        # These inputs are either constants or derivatives of input/output shapes (that may be dynamic).
+        # The symbolic expressions are resolved after the full shape context becomes available at runtime.
+        extra_args = trtp.SymIntExprs.from_tuple(
+            [
+                trtp.SymInt32(e)
+                for e in [
+                    all_pads[0],
+                    all_pads[2],
+                    all_pads[4],
+                    all_pads[6],
+                    inp_dims[0],
+                    inp_dims[1],
+                    inp_dims[2],
+                    inp_dims[3],
+                    out_dims[1],
+                    out_dims[2],
+                    out_dims[3],
+                    inp_dims.numel(),
+                    out_dims.numel(),
+                ]
+            ]
+        )
+
+
+        type_str = "fp32" if inp0.dtype == trt.float32 else "fp16"
+
+        from oait_kernels import circ_pad_kernel
+        import triton
+
+        src = triton.compiler.ASTSource(
+            fn=circ_pad_kernel,
+            signature=f"*{type_str},{','.join(['i32']*13)},*{type_str}",
+            constants={
+                "BLOCK_SIZE": block_size,
+            },
+        )
+
+        compiled_kernel = triton.compile(src)
+        launch_params = trtp.KernelLaunchParams()
+
+        # grid dims
+        launch_params.grid_x = trtp.cdiv(out_dims.numel(), block_size)
+        # block dims
+        launch_params.block_x = compiled_kernel.metadata.num_warps * 32
+        # shared memory
+        launch_params.shared_mem = compiled_kernel.metadata.shared
+
+        return compiled_kernel.metadata.name.encode(), compiled_kernel.asm["ptx"].encode(), launch_params, extra_args
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/quickly_deployable_plugins/qdp_runner.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/quickly_deployable_plugins/qdp_runner.py
new file mode 100644
index 0000000000000000000000000000000000000000..a685a5dccef74aed07ebaa1dd28a9c9d515ebdd4
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/quickly_deployable_plugins/qdp_runner.py
@@ -0,0 +1,366 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import tensorrt as trt
+import torch
+import numpy as np
+
+from polygraphy.backend.trt import (
+    CreateConfig,
+    TrtRunner,
+    create_network,
+    engine_from_network,
+    network_from_onnx_path,
+    bytes_from_engine,
+    engine_from_bytes,
+)
+
+from polygraphy.backend.common import bytes_from_path
+from polygraphy import cuda
+
+import onnx_graphsurgeon as gs
+import onnx
+import os
+import argparse
+
+import tensorrt.plugin as trtp
+
+import qdp_defs
+import logging
+
+def run_add(enable_autotune=False):
+
+    if enable_autotune:
+        qdp_defs.register_autotune()
+
+    BLOCK_SIZE = 256
+
+    builder, network = create_network()
+    x = torch.randint(10, (10, 3, 32, 32), dtype=torch.float32, device="cuda")
+
+    # Populate network
+    i_x = network.add_input(name="x", dtype=trt.DataType.FLOAT, shape=x.shape)
+
+    out = network.add_plugin(
+        trtp.op.sample.elemwise_add_plugin(i_x, block_size=BLOCK_SIZE)
+    )
+    out.get_output(0).name = "y"
+    network.mark_output(tensor=out.get_output(0))
+
+    builder.create_builder_config()
+
+    engine = engine_from_network(
+        (builder, network),
+        CreateConfig(fp16=True),
+    )
+
+    with TrtRunner(engine, "trt_runner") as runner:
+        outputs = runner.infer(
+            {
+                "x": x,
+            },
+            copy_outputs_to_host=False,
+        )
+
+    if torch.allclose(x + 1, outputs["y"]):
+        print("Inference result is correct!")
+    else:
+        print("Inference result is incorrect!")
+
+
+def run_inplace_add():
+    builder, network = create_network()
+    x = torch.ones((10, 3, 32, 32), dtype=torch.float32, device="cuda")
+
+    x_clone = x.clone()
+
+    i_x = network.add_input(name="x", dtype=trt.DataType.FLOAT, shape=x.shape)
+
+    # Amounts to elementwise-add in the first and second plugins
+    deltas = (2, 4)
+
+    out0 = network.add_plugin(trtp.op.sample.elemwise_add_plugin_(i_x, delta=deltas[0]))
+    out1 = network.add_plugin(
+        trtp.op.sample.elemwise_add_plugin_(out0.get_output(0), delta=deltas[1])
+    )
+    out1.get_output(0).name = "y"
+    network.mark_output(tensor=out1.get_output(0))
+
+    builder.create_builder_config()
+
+    # Enable preview feature for aliasing plugin I/O
+    config = CreateConfig(
+        fp16=True, preview_features=[trt.PreviewFeature.ALIASED_PLUGIN_IO_10_03]
+    )
+
+    engine = engine_from_network(
+        (builder, network),
+        config,
+    )
+
+    context = engine.create_execution_context()
+
+    stream = cuda.Stream()
+
+    context.set_tensor_address("x", x.data_ptr())
+    context.set_tensor_address("y", x.data_ptr())
+    context.execute_async_v3(stream.ptr)
+    stream.synchronize()
+
+    if torch.allclose(x, x_clone + sum(deltas), atol=1e-2):
+        print("Inference result is correct!")
+    else:
+        print("Inference result is incorrect!")
+        print(x[0][0][0][:10])
+        print(x_clone[0][0][0][:10])
+
+
+def run_non_zero():
+    builder, network = create_network()
+    inp_shape = (128, 128)
+
+    X = np.random.normal(size=inp_shape).astype(trt.nptype(trt.DataType.FLOAT))
+
+    # Zero out some random indices
+    indices = np.random.choice(
+        np.prod(inp_shape),
+        replace=False,
+        size=np.random.randint(0, np.prod(inp_shape) + 1),
+    )
+    X[np.unravel_index(indices, inp_shape)] = 0
+
+    # Populate network
+    i_x = network.add_input(name="X", dtype=trt.DataType.FLOAT, shape=inp_shape)
+
+    out = network.add_plugin(trtp.op.sample.non_zero_plugin(i_x))
+    out.get_output(0).name = "Y"
+    network.mark_output(tensor=out.get_output(0))
+
+    builder.create_builder_config()
+
+    engine = engine_from_network(
+        (builder, network),
+        config=CreateConfig(fp16=True),
+    )
+
+    Y_ref = np.transpose(np.nonzero(X))
+
+    with TrtRunner(engine, "trt_runner") as runner:
+        outputs = runner.infer({"X": X})
+        Y = outputs["Y"]
+        Y = Y[np.lexsort(np.fliplr(Y).T)]
+
+    if np.allclose(Y, Y_ref, atol=1e-3):
+        print("Inference result is correct!")
+    else:
+        print("Inference result is incorrect!")
+
+
+def check_artifacts_dir_exists(artifacts_dir):
+    if not os.path.exists(artifacts_dir):
+        raise ValueError(f"artifacts_dir '{artifacts_dir}' does not exist")
+
+
+def run_circ_pad(
+    enable_multi_tactic=False, mode="onnx", artifacts_dir=None, save_or_load_engine=None, aot=False
+):
+
+    if enable_multi_tactic:
+        qdp_defs.enable_multi_tactic_circ_pad()
+    else:
+        qdp_defs.enable_single_tactic_circ_pad()
+
+    inp_shape = (10, 3, 32, 32)
+    x = np.random.normal(size=inp_shape).astype(trt.nptype(trt.DataType.FLOAT))
+
+    pads = np.array((1, 1, 1, 1), dtype=np.int32)
+
+    if save_or_load_engine is not None and save_or_load_engine is False:
+        check_artifacts_dir_exists(artifacts_dir)
+        engine_path = os.path.join(artifacts_dir, "circ_pad.engine")
+        engine = engine_from_bytes(bytes_from_path(engine_path))
+    else:
+        if mode == "inetdef":
+            builder, network = create_network()
+            i_x = network.add_input(name="x", dtype=trt.DataType.FLOAT, shape=x.shape)
+            out = network.add_plugin(trtp.op.sample.circ_pad_plugin(i_x, pads=pads), aot = aot)
+            out.get_output(0).name = "y"
+            network.mark_output(tensor=out.get_output(0))
+
+            engine = engine_from_network(
+                (builder, network),
+                CreateConfig(fp16=True),
+            )
+        elif mode == "onnx":
+            if artifacts_dir is None:
+                raise ValueError("'artifacts_dir' must be specified in onnx mode")
+
+            check_artifacts_dir_exists(artifacts_dir)
+
+            onnx_path = os.path.join(artifacts_dir, "circ_pad.onnx")
+            var_x = gs.Variable(name="x", shape=inp_shape, dtype=np.float32)
+            var_y = gs.Variable(name="y", dtype=np.float32)
+            circ_pad_node = gs.Node(
+                name="circ_pad_plugin 0",
+                op="circ_pad_plugin",
+                inputs=[var_x],
+                outputs=[var_y],
+                attrs={"pads": pads, "plugin_namespace": "sample", "aot": aot},
+            )
+            graph = gs.Graph(
+                nodes=[circ_pad_node], inputs=[var_x], outputs=[var_y], opset=16
+            )
+            onnx.save(gs.export_onnx(graph), onnx_path)
+
+            engine = engine_from_network(
+                network_from_onnx_path(onnx_path), CreateConfig(fp16=True)
+            )
+        else:
+            raise ValueError(f"Unknown mode {mode}")
+
+        if save_or_load_engine is not None and save_or_load_engine is True:
+            check_artifacts_dir_exists(artifacts_dir)
+            engine_path = os.path.join(artifacts_dir, "circ_pad.engine")
+            with open(engine_path, "wb") as f:
+                f.write(bytes_from_engine(engine))
+
+    Y_ref = np.pad(x, [[0, 0], [0, 0], [pads[0], pads[1]], [pads[2], pads[3]]], "wrap")
+
+    with TrtRunner(engine, "trt_runner") as runner:
+        outputs = runner.infer({"x": x})
+        Y = outputs["y"]
+
+        if np.allclose(Y, Y_ref, atol=1e-2):
+            print("Inference result is correct!")
+        else:
+            print("Inference result is incorrect!")
+
+
+def setup_add_sample(subparsers):
+    subparser = subparsers.add_parser("add", help="'add' sample help")
+    subparser.add_argument("--autotune", action="store_true", help="Enable autotuning")
+    subparser.add_argument("--aot", action="store_true", help="Use the AOT implementation of the plugin")
+    subparser.add_argument(
+        "-v", "--verbose", action="store_true", help="Enable more verbose log output"
+    )
+
+
+def setup_inplace_add_sample(subparsers):
+    subparser = subparsers.add_parser("inplace_add", help="inplace_add sample help")
+    subparser.add_argument(
+        "-v", "--verbose", action="store_true", help="Enable more verbose log output"
+    )
+
+
+def setup_non_zero_sample(subparsers):
+    subparser = subparsers.add_parser("non_zero", help="non_zero sample help")
+    subparser.add_argument(
+        "-v", "--verbose", action="store_true", help="Enable more verbose log output"
+    )
+
+
+def setup_circ_pad_sample(subparsers):
+    subparser = subparsers.add_parser("circ_pad", help="circ_pad sample help.")
+    subparser.add_argument(
+        "--multi_tactic", action="store_true", help="Enable multiple tactics."
+    )
+    subparser.add_argument(
+        "--save_engine", action="store_true", help="Save engine to the artifacts_dir."
+    )
+    subparser.add_argument(
+        "--load_engine",
+        action="store_true",
+        help="Load engine from the artifacts_dir. Ignores all other options.",
+    )
+    subparser.add_argument(
+        "--artifacts_dir",
+        type=str,
+        help="Whether to store (or retrieve) artifacts.",
+    )
+    subparser.add_argument(
+        "--mode",
+        type=str,
+        choices=["onnx", "inetdef"],
+        help="Whether to use ONNX parser or INetworkDefinition APIs to construct the network.",
+    )
+    subparser.add_argument("--aot", action="store_true", help="Use the AOT implementation of the plugin.")
+    subparser.add_argument(
+        "-v", "--verbose", action="store_true", help="Enable verbose log output."
+    )
+
+    return subparser
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    parser = argparse.ArgumentParser(description="Main script help")
+    subparsers = parser.add_subparsers(dest="sample", help="Mode help", required=True)
+
+    setup_add_sample(subparsers)
+    setup_inplace_add_sample(subparsers)
+    circ_pad_subparser = setup_circ_pad_sample(subparsers)
+    setup_non_zero_sample(subparsers)
+
+    args = parser.parse_args()
+
+    if args.verbose:
+        logging.getLogger("QuicklyDeployablePlugins").setLevel(logging.DEBUG)
+
+    if args.sample == "add":
+        run_add(args.autotune)
+    if args.sample == "inplace_add":
+        run_inplace_add()
+    if args.sample == "non_zero":
+        run_non_zero()
+    if args.sample == "circ_pad":
+        if args.mode == "onnx":
+            if args.artifacts_dir is None:
+                parser.error(
+                    "circ_pad: argument --mode: When mode is 'onnx', artifacts_dir is required"
+                )
+
+        save_or_load_engine = None
+
+        if args.load_engine is True:
+            if args.save_engine is True:
+                parser.error(
+                    "circ_pad: save_engine and load_engine cannot be specified at the same time. First save_engine and load_engine separately."
+                )
+            else:
+                if args.multi_tactic is True or args.mode is not None:
+                    print(
+                        "warning circ_pad: when load_engine is specified, all other options except 'artifacts_dir' is ignored."
+                    )
+
+            save_or_load_engine = False
+        else:
+            if args.mode is None:
+                circ_pad_subparser.print_help()
+                parser.error(
+                    "circ_pad: '--mode' option is required."
+                )
+
+        if args.save_engine is True:
+            save_or_load_engine = True
+
+        if args.multi_tactic and args.aot:
+            parser.error(
+                "circ_pad: '--aot' is not supported when '--multi_tactic' is specified."
+            )
+
+        run_circ_pad(args.multi_tactic, args.mode, args.artifacts_dir, save_or_load_engine, args.aot)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/quickly_deployable_plugins/requirements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/quickly_deployable_plugins/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6f815b7fd6db5585fbf5479576917ae61f3ac64
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/quickly_deployable_plugins/requirements.txt
@@ -0,0 +1,14 @@
+triton==3.1.0; (platform_system != "Windows" and python_version <= "3.8")
+triton==3.2.0; (platform_system != "Windows" and python_version >= "3.9")
+torch
+--extra-index-url https://pypi.ngc.nvidia.com
+polygraphy
+colored
+numpy==1.23.5; (platform_system != "Windows" and python_version <= "3.10")
+numpy==1.26.4; (platform_system != "Windows" and python_version >= "3.11")
+onnx==1.16.0; platform_system == "Windows"
+--extra-index-url https://pypi.ngc.nvidia.com
+onnx-graphsurgeon
+pyyaml==6.0.1
+requests==2.32.2
+tqdm==4.66.4
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/refactored/1_run_onnx_with_tensorrt/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/refactored/1_run_onnx_with_tensorrt/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9279be1b03d176cbb61f8f5724d103c1a57c3b21
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/refactored/1_run_onnx_with_tensorrt/README.md
@@ -0,0 +1,61 @@
+# Run ONNX with TensorRT
+This sample demonstrates:
+
+- Converting a pre-trained [EfficientNet](https://arxiv.org/abs/1905.11946)-B0 ONNX model to a `TensorRT` engine
+- Performing inference with `TensorRT` using Python APIs
+- Comparing inference performance between `ONNX Runtime` and `TensorRT`
+- Proper memory management and resource cleanup in both Python implementations
+
+## Key features demonstrated:
+
+- `TensorRT`'s ONNX parser + ONNX model -> `TensorRT` engine
+- Engine building and serialization
+- Input/output tensor handling
+- Performance profiling
+- Editable timing cache for deterministic engine builds
+- Memory pool optimization with workspace configuration
+
+## Implementation Details
+
+### Memory Management
+- Configures workspace memory pool for running under limited hardware
+
+### Engine Building
+- Supports editable timing cache for deterministic builds
+- Serialization and deserialization of TensorRT engines
+
+### Inference Pipeline
+- Efficient image preprocessing with `PIL` and `NumPy`
+- Supports batch inference
+- Implements proper error handling and resource cleanup
+- Provides performance comparison between `ONNX Runtime` and `TensorRT`
+- Performs inference on a real-world image
+
+## CLI Tools 
+Users can run their onnx model and generate the engine with similar functionality using `trtexec`:
+
+```bash
+# Basic conversion with performance profiling
+trtexec --onnx=efficientnet-b0.onnx \
+        --saveEngine=efficientnet-b0_trtexec.plan \
+        --dumpProfile \
+        --iterations=100 \
+        --avgRuns=100 \
+        --workspace=1024 \
+        --batch=1
+```
+
+Key options explained:
+- `--onnx`: Input ONNX model
+- `--saveEngine`: Output TensorRT engine
+- `--dumpProfile`: Performance profiling
+- `--iterations`: Number of inference iterations
+- `--avgRuns`: Number of runs to average for timing
+- `--workspace`: Workspace size in MB (1024MB = 1GB)
+- `--batch`: Batch size for inference
+
+## Additional Resources
+
+- [TensorRT Documentation](https://docs.nvidia.com/deeplearning/tensorrt/latest/index.html)
+- [ONNX Documentation](https://onnx.ai/)
+- [EfficientNet Paper](https://arxiv.org/abs/1905.11946)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/refactored/1_run_onnx_with_tensorrt/main.ipynb b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/refactored/1_run_onnx_with_tensorrt/main.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..97685b938903b5125654fd4d759999ac0ad912e0
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/refactored/1_run_onnx_with_tensorrt/main.ipynb
@@ -0,0 +1,589 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.SPDX-License-Identifier: Apache-2.0\n",
+    "\n",
+    "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use\n",
+    "\n",
+    "this file except in compliance with the License. You may obtain a copy of the License at\n",
+    "\n",
+    "\n",
+    "\n",
+    "http://www.apache.org/licenses/LICENSE-2.0\n",
+    "\n",
+    "\n",
+    "\n",
+    "Unless required by applicable law or agreed to in writing, software\n",
+    "\n",
+    "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+    "\n",
+    "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+    "\n",
+    "See the License for the specific language governing permissions and\n",
+    "\n",
+    "limitations under the License.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Getting Started with TensorRT: Accelerate Your Deep Learning Inference\n",
+    "\n",
+    "Welcome to your first TensorRT tutorial! In this notebook, you'll learn how to:\n",
+    "1. Load a pre-trained EfficientNet model in ONNX format\n",
+    "2. Convert it to a TensorRT engine for faster inference\n",
+    "3. Run inference and see the speedup firsthand\n",
+    "4. Make predictions on real images\n",
+    "\n",
+    "## Understanding ONNX: The Universal Model Format\n",
+    "\n",
+    "ONNX (Open Neural Network Exchange) is a standard format for representing deep learning models. Think of it as a universal language that different deep learning frameworks can understand. Here's why it's important:\n",
+    "\n",
+    "- **Framework Independence**: Models trained in PyTorch, TensorFlow, or other frameworks can be exported to ONNX\n",
+    "- **Interoperability**: ONNX models can be imported into various inference engines and frameworks\n",
+    "- **Production Ready**: ONNX is widely used in production environments for model deployment\n",
+    "\n",
+    "### The ONNX to TensorRT Workflow\n",
+    "\n",
+    "TensorRT is NVIDIA's deep learning inference optimizer that can import models from ONNX. This makes it a powerful tool in your deployment pipeline:\n",
+    "\n",
+    "```\n",
+    "Your Framework (PyTorch/TF/etc.) â†’ ONNX â†’ TensorRT ===> Optimized Inference\n",
+    "```\n",
+    "\n",
+    "This workflow is particularly powerful because:\n",
+    "1. You can train your model in any framework you prefer\n",
+    "2. Export it to ONNX (a one-time conversion)\n",
+    "3. Use TensorRT to optimize it for NVIDIA GPUs\n",
+    "4. Get significant speedup in production\n",
+    "\n",
+    "## Prerequisites\n",
+    "\n",
+    "Before we start, make sure you have:\n",
+    "- NVIDIA GPU with CUDA support\n",
+    "- Python 3.8+ installed\n",
+    "- Basic understanding of deep learning and inference\n",
+    "\n",
+    "Let's begin by installing and importing the required packages:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install tensorrt cuda-python pillow onnxruntime-gpu==1.16.1\n",
+    "import tensorrt as trt\n",
+    "from cuda import cudart\n",
+    "from PIL import Image\n",
+    "import numpy as np\n",
+    "from pathlib import Path\n",
+    "import time\n",
+    "from typing import Optional, Union, Tuple"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "root = Path.cwd()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define a function to download files\n",
+    "\n",
+    "import requests\n",
+    "from requests.adapters import HTTPAdapter\n",
+    "from urllib3.util.retry import Retry\n",
+    "\n",
+    "def download_file(url: str, output_path: Union[str, Path]):\n",
+    "    \"\"\"Download a file with retry mechanism.\"\"\"\n",
+    "    session = requests.Session()\n",
+    "    retry = Retry(total=10, backoff_factor=1)\n",
+    "    adapter = HTTPAdapter(max_retries=retry)\n",
+    "    session.mount('http://', adapter)\n",
+    "    session.mount('https://', adapter)\n",
+    "    \n",
+    "    response = session.get(url, verify=False, timeout=30)\n",
+    "    output_path.parent.mkdir(parents=True, exist_ok=True)\n",
+    "    with open(output_path, 'wb') as f:\n",
+    "        f.write(response.content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 1: Download a Pre-trained Model\n",
+    "\n",
+    "We'll use EfficientNet-B0, a popular and efficient image classification model, as an example for this sample. \n",
+    "\n",
+    "### Understanding ONNX Model Structure\n",
+    "\n",
+    "An ONNX model contains:\n",
+    "- Model architecture (layers, connections)\n",
+    "- Weights and biases\n",
+    "- Input/output specifications\n",
+    "- Metadata about the model\n",
+    "just like any other model representations. \n",
+    "\n",
+    "This standardized format makes it easy to move models between different frameworks and inference engines."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "download_file(\"https://github.com/onnx/models/raw/refs/heads/main/Computer_Vision/efficientnet_b0_Opset17_timm/efficientnet_b0_Opset17.onnx\", root / \"efficientnet-b0.onnx\")\n",
+    "assert (root / \"efficientnet-b0.onnx\").exists(), \"Model file not found. Please check if the download was successful.\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 2: Convert ONNX to TensorRT Engine\n",
+    "\n",
+    "This is where the magic happens! We'll convert our ONNX model into a TensorRT engine. The engine is optimized for your specific GPU and will run much faster than the original model.\n",
+    "\n",
+    "### The Conversion Process\n",
+    "\n",
+    "1. **Load ONNX Model**: TensorRT reads the ONNX file and understands the model structure\n",
+    "2. **Optimize**: TensorRT performs several optimizations:\n",
+    "   - Layer fusion\n",
+    "   - Memory optimization\n",
+    "   - Precision calibration\n",
+    "3. **Generate Engine**: Creates a highly optimized inference engine\n",
+    "\n",
+    "The resulting engine is specific to your GPU and will run much faster than the original ONNX model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logger = trt.Logger(trt.Logger.WARNING)\n",
+    "builder = trt.Builder(logger)\n",
+    "network = builder.create_network()\n",
+    "\n",
+    "# Bind the TensorRT network to the parser so that the parser can update the network later accordingly\n",
+    "parser = trt.OnnxParser(network, logger)\n",
+    "\n",
+    "onnx_path = root / \"efficientnet-b0.onnx\"\n",
+    "print(f'Parsing ONNX model at {onnx_path}...')\n",
+    "with open(onnx_path, \"rb\") as model:\n",
+    "    parser.parse(model.read())\n",
+    "print('Parsing ONNX model... done')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now that we have the TensorRT `INetworkDefinition`, we can start building the engine"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = builder.create_builder_config()\n",
+    "\n",
+    "# TensorRT needs memory for layer operations and intermediate activations during inference\n",
+    "# Setting a memory limit helps control resource usage and prevents out-of-memory errors\n",
+    "config.set_memory_pool_limit(\n",
+    "        trt.MemoryPoolType.WORKSPACE, 1 << 30\n",
+    ") # 1GB\n",
+    "\n",
+    "print('Starting to build engine. This might take several minutes depending on the hardware...')\n",
+    "engine = builder.build_serialized_network(network, config)\n",
+    "assert engine is not None, 'Engine build failed'\n",
+    "\n",
+    "engine_path = root / \"efficientnet-b0.plan\"\n",
+    "with open(engine_path, 'wb') as f:\n",
+    "    f.write(engine)\n",
+    "\n",
+    "print(\"TensorRT engine created successfully!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Optional: Using Editable Timing Cache\n",
+    "\n",
+    "TensorRT engines may vary between builds because kernel selection is based on runtime performance measurements. The hardware state (GPU utilization, temperature, system load) affects which kernels are chosen since kernels might outperform each other under different scenarios. \n",
+    "\n",
+    "To ensure consistent builds, TensorRT provides an editable timing cache that:\n",
+    "- Stores intermediate optimization results\n",
+    "- Enables deterministic engine builds\n",
+    "- Speeds up subsequent builds since they don't need to measure kernel execution time for each op again"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def build_engine_with_cache(onnx_path: Union[str, Path], timing_cache: Optional[trt.ITimingCache]):\n",
+    "    builder = trt.Builder(logger)\n",
+    "    network = builder.create_network()\n",
+    "    parser = trt.OnnxParser(network, logger)\n",
+    "    with open(onnx_path, 'rb') as model:\n",
+    "        parser.parse(model.read())\n",
+    "    config = builder.create_builder_config()\n",
+    "    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 30)\n",
+    "    \n",
+    "    # Enable editable timing cache\n",
+    "    config.set_flag(trt.BuilderFlag.EDITABLE_TIMING_CACHE)\n",
+    "\n",
+    "    # Create timing cache if not provided\n",
+    "    if not timing_cache:\n",
+    "        timing_cache = config.create_timing_cache(bytes())\n",
+    "    config.set_timing_cache(timing_cache, True)\n",
+    "    \n",
+    "    # Build engine\n",
+    "    print('Start building engine...')\n",
+    "    tik = time.time()\n",
+    "    engine = builder.build_serialized_network(network, config)\n",
+    "    tok = time.time()\n",
+    "    \n",
+    "    print(f'Engine build cost {tok - tik}ms')\n",
+    "    return engine, timing_cache\n",
+    "\n",
+    "# First build (creates cache)\n",
+    "engine1, timing_cache = build_engine_with_cache(onnx_path, None)\n",
+    "print(\"First build completed with cache creation\")\n",
+    "\n",
+    "# Second build (uses cache)\n",
+    "engine2, timing_cache = build_engine_with_cache(onnx_path, timing_cache)\n",
+    "print(\"Second build completed with cache creation\")\n",
+    "\n",
+    "is_identical = np.array_equal(\n",
+    "    np.frombuffer(engine1, dtype=np.uint8),\n",
+    "    np.frombuffer(engine2, dtype=np.uint8))\n",
+    "print(f'Is engine identical: {is_identical}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 3: Run Inference and Compare Performance\n",
+    "\n",
+    "Now let's see the real power of TensorRT! We'll:\n",
+    "1. Run inference with both ONNX and TensorRT\n",
+    "2. Compare their performance\n",
+    "3. See the speedup TensorRT provides\n",
+    "\n",
+    "### Understanding the Performance Difference\n",
+    "\n",
+    "The speedup comes from several optimizations:\n",
+    "- Layer fusion: Combining multiple operations into one\n",
+    "- Memory optimization: Better memory access patterns\n",
+    "- Precision optimization: Using optimal precision for each layer\n",
+    "- CUDA optimization: Direct GPU execution without framework overhead"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_and_preprocess_image(image_path: Union[str, Path], input_size: Tuple[int, int] = (224, 224)):\n",
+    "    img = Image.open(image_path)\n",
+    "    img = img.resize(input_size)\n",
+    "    img = np.array(img).astype(np.float32)\n",
+    "    img = img / 255.0  # Normalize from [0, 255] to [0, 1]\n",
+    "    img = np.transpose(img, (2, 0, 1))  # HWC to CHW\n",
+    "    img = np.expand_dims(img, axis=0)  # Add batch dimension\n",
+    "    return img\n",
+    "    \n",
+    "def check_cuda_error(error):\n",
+    "    if isinstance(error, tuple):\n",
+    "        error = error[0]\n",
+    "    if error != cudart.cudaError_t.cudaSuccess:\n",
+    "        error_name = cudart.cudaGetErrorName(error)[1]\n",
+    "        error_string = cudart.cudaGetErrorString(error)[1]\n",
+    "        raise RuntimeError(f\"CUDA Error: {error_name} ({error_string})\")\n",
+    "\n",
+    "def run_inference_trt(engine: trt.ICudaEngine, input_data: np.ndarray):\n",
+    "    # Create execution context - this stores the device memory allocations\n",
+    "    # and bindings needed for inference\n",
+    "    context = engine.create_execution_context()\n",
+    "\n",
+    "    # Initialize lists to store input/output information and GPU memory allocations\n",
+    "    inputs = []\n",
+    "    outputs = []\n",
+    "    allocations = []\n",
+    "    \n",
+    "    # Iterate through all input/output tensors to set up memory and bindings\n",
+    "    for i in range(engine.num_io_tensors):\n",
+    "        name = engine.get_tensor_name(i)\n",
+    "        # Check if this tensor is an input or output\n",
+    "        is_input = engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT\n",
+    "        # Get tensor datatype and shape information\n",
+    "        dtype = engine.get_tensor_dtype(name)\n",
+    "        shape = engine.get_tensor_shape(name)\n",
+    "        \n",
+    "        # Calculate required memory size for this tensor\n",
+    "        size = np.dtype(trt.nptype(dtype)).itemsize\n",
+    "        for s in shape:\n",
+    "            size *= s\n",
+    "            \n",
+    "        # Allocate GPU memory for this tensor\n",
+    "        err, allocation = cudart.cudaMalloc(size)\n",
+    "        check_cuda_error(err)\n",
+    "        \n",
+    "        # Store tensor information in a dictionary for easy access\n",
+    "        binding = {\n",
+    "            \"index\": i,\n",
+    "            \"name\": name,\n",
+    "            \"dtype\": np.dtype(trt.nptype(dtype)),\n",
+    "            \"shape\": list(shape),\n",
+    "            \"allocation\": allocation,\n",
+    "            \"size\": size,\n",
+    "        }\n",
+    "        \n",
+    "        # Keep track of all allocations and sort tensors into inputs/outputs\n",
+    "        allocations.append(allocation)\n",
+    "        if is_input:\n",
+    "            inputs.append(binding)\n",
+    "        else:\n",
+    "            outputs.append(binding)\n",
+    "\n",
+    "    # Ensure input data is contiguous in memory for efficient GPU transfer\n",
+    "    input_data = np.ascontiguousarray(input_data)\n",
+    "    \n",
+    "    # Copy input data from host (CPU) to device (GPU)\n",
+    "    err = cudart.cudaMemcpy(\n",
+    "        inputs[0][\"allocation\"],\n",
+    "        input_data.ctypes.data,\n",
+    "        inputs[0][\"size\"],\n",
+    "        cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,\n",
+    "    )\n",
+    "    check_cuda_error(err)\n",
+    "\n",
+    "    # Set tensor addresses for all tensors\n",
+    "    for i in range(engine.num_io_tensors):\n",
+    "        context.set_tensor_address(engine.get_tensor_name(i), allocations[i])\n",
+    "\n",
+    "    # Create a CUDA stream for asynchronous execution\n",
+    "    err, stream = cudart.cudaStreamCreate()\n",
+    "    check_cuda_error(err)\n",
+    "\n",
+    "    # Run inference using the TensorRT engine\n",
+    "    context.execute_async_v3(stream_handle=stream)\n",
+    "    err = cudart.cudaStreamSynchronize(stream)\n",
+    "    check_cuda_error(err)\n",
+    "\n",
+    "    # Prepare numpy array for output and copy results from GPU to CPU\n",
+    "    output_shape = outputs[0][\"shape\"]\n",
+    "    output = np.empty(output_shape, dtype=outputs[0][\"dtype\"])\n",
+    "\n",
+    "    err = cudart.cudaMemcpy(\n",
+    "        output.ctypes.data,\n",
+    "        outputs[0][\"allocation\"],\n",
+    "        outputs[0][\"size\"],\n",
+    "        cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,\n",
+    "    )\n",
+    "    check_cuda_error(err)\n",
+    "\n",
+    "    # Free all GPU memory allocations\n",
+    "    for allocation in allocations:\n",
+    "        err = cudart.cudaFree(allocation)\n",
+    "        check_cuda_error(err)\n",
+    "\n",
+    "    # Destroy the CUDA stream\n",
+    "    err = cudart.cudaStreamDestroy(stream)\n",
+    "    check_cuda_error(err)\n",
+    "\n",
+    "    return output\n",
+    "\n",
+    "import onnxruntime as ort\n",
+    "def run_inference_onnx(session, input_data: np.ndarray):\n",
+    "    output = session.run(None, {'x': input_data})[0]\n",
+    "    return output"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Let's Compare Performance!\n",
+    "\n",
+    "We'll run both models multiple times to get an accurate comparison of their performance. This will show you the baseline speedup that TensorRT provides. \n",
+    "\n",
+    "Refer to https://docs.nvidia.com/deeplearning/tensorrt/latest/index.html for more information about how to further optimize your engine"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create a sample input\n",
+    "sample_input = np.random.randn(1, 3, 224, 224).astype(np.float32)\n",
+    "\n",
+    "# Benchmark ONNX Runtime\n",
+    "session = ort.InferenceSession(onnx_path)\n",
+    "onnx_times = []\n",
+    "for _ in range(100):\n",
+    "    start_time = time.time()\n",
+    "    _ = run_inference_onnx(session, sample_input)\n",
+    "    onnx_times.append(time.time() - start_time)\n",
+    "\n",
+    "# Benchmark TensorRT\n",
+    "with open(engine_path, \"rb\") as f, trt.Runtime(logger) as runtime:\n",
+    "    engine = runtime.deserialize_cuda_engine(f.read())\n",
+    "trt_times = []\n",
+    "for _ in range(100):\n",
+    "    start_time = time.time()\n",
+    "    _ = run_inference_trt(engine, sample_input)\n",
+    "    trt_times.append(time.time() - start_time)\n",
+    "\n",
+    "print(f\"ONNX Runtime Average Time: {np.mean(onnx_times)*1000:.2f} ms\")\n",
+    "print(f\"TensorRT Average Time: {np.mean(trt_times)*1000:.2f} ms\")\n",
+    "print(f\"Speedup: {np.mean(onnx_times)/np.mean(trt_times):.2f}x\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 4: Run Inference on a Real Image\n",
+    "\n",
+    "Now let's try our optimized model on a real image! We'll:\n",
+    "1. Download a sample image\n",
+    "2. Load the ImageNet class labels\n",
+    "3. Make predictions and show the results\n",
+    "\n",
+    "This will demonstrate how the optimized TensorRT engine performs in a real-world scenario."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download a sample image\n",
+    "download_file(\"https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg\", root / \"test_image.jpg\")\n",
+    "\n",
+    "from PIL import Image\n",
+    "from IPython.display import display\n",
+    "\n",
+    "# Open and display the image\n",
+    "img = Image.open(root/\"test_image.jpg\")\n",
+    "display(img)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_imagenet_labels():\n",
+    "    # Download ImageNet labels if not exists\n",
+    "    if not (root / \"imagenet_classes.txt\").is_file():\n",
+    "        download_file(\"https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt\", root / \"imagenet_classes.txt\")\n",
+    "    # Read the labels\n",
+    "    with open(root / \"imagenet_classes.txt\") as f:\n",
+    "        categories = [s.strip() for s in f.readlines()]\n",
+    "    return categories\n",
+    "\n",
+    "# Load ImageNet labels\n",
+    "categories = load_imagenet_labels()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load and preprocess a test image\n",
+    "test_image_path = root / \"test_image.jpg\"\n",
+    "input_data = load_and_preprocess_image(test_image_path)\n",
+    "\n",
+    "# Run inference\n",
+    "output = run_inference_trt(engine, input_data)\n",
+    "\n",
+    "# Get top 5 predictions\n",
+    "top5_idx = np.argsort(output[0])[-5:][::-1]\n",
+    "print(\"Top 5 predictions:\")\n",
+    "for idx in top5_idx:\n",
+    "    print(f\"{categories[idx]}: {output[0][idx]:.2f}%\")\n",
+    "assert categories[top5_idx[0]] == \"Samoyed\", 'Incorrect prediction'\n",
+    "print('Correctly recognized!')\n",
+    "print('Notebook executed successfully')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Congratulations! ðŸŽ‰\n",
+    "\n",
+    "### You've successfully:\n",
+    "1. Loaded a pre-trained EfficientNet model in ONNX format\n",
+    "2. Converted it to a TensorRT engine\n",
+    "3. Achieved significant speedup in inference\n",
+    "4. Made predictions on real images\n",
+    "5. Learned how to use timing cache to speed up engine building and ensure engine build determinism. \n",
+    "\n",
+    "### What's Next?\n",
+    "\n",
+    "Now that you understand the ONNX to TensorRT workflow, you can:\n",
+    "- Export your own models from PyTorch/TensorFlow to ONNX\n",
+    "- Try different optimization settings in TensorRT\n",
+    "- Apply this workflow to your production models and get instant performance boost with NVIDIA GPUs!\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/refactored/2_construct_network_with_layer_apis/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/refactored/2_construct_network_with_layer_apis/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..97107ac6f40ddfa882a413118c67610d9c28d42d
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/refactored/2_construct_network_with_layer_apis/README.md
@@ -0,0 +1,16 @@
+# Sample 2: Constructing a Network with TensorRT Layer APIs
+
+This sample demonstrates how to build a TensorRT network definition from scratch using the TensorRT Layer APIs, focusing on constructing a recurrent neural network (LSTM) and utilizing advanced builder features.
+
+## Description
+
+This sample constructs a simple, single-layer Long Short-Term Memory (LSTM) network using the TensorRT Layer APIs. The primary goal is to illustrate how to:
+
+1.  Define individual network layers and their connections programmatically using Python (**TensorRT Layer API**). This includes layers like constants, matrix multiply, element-wise operations, activations, and slicing.
+2.  Implement recurrent logic by building an LSTM cell and using TensorRT's `add_loop` construct to create a recurrent LSTM layer (**Recurrent Network Construction**).
+3.  Monitor the potentially lengthy engine build process by implementing `IProgressMonitor` for real-time feedback (**Build Progress Monitoring**).
+4.  Configure the builder for engine portability using `BuilderFlag.VERSION_COMPATIBLE` to create more portable engines (**Version-Compatible Engines**).
+5.  Run inference and verify the custom network's correctness by utilizing Polygraphy's `TrtRunner` for simplified engine loading/execution (**Inference with Polygraphy**) and comparing the TensorRT engine's output against a reference NumPy implementation (**NumPy Verification**).
+
+## Additional Resources
+*   [tensorrtx repo](https://github.com/wang-xinyu/tensorrtx): Offers real-world examples of constructing complex networks using the TensorRT Layer APIs.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/refactored/2_construct_network_with_layer_apis/main.ipynb b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/refactored/2_construct_network_with_layer_apis/main.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..75b69030960f45cb57acf830639b33fbf47a3dd9
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/refactored/2_construct_network_with_layer_apis/main.ipynb
@@ -0,0 +1,694 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.SPDX-License-Identifier: Apache-2.0\n",
+    "\n",
+    "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use\n",
+    "\n",
+    "this file except in compliance with the License. You may obtain a copy of the License at\n",
+    "\n",
+    "\n",
+    "\n",
+    "http://www.apache.org/licenses/LICENSE-2.0\n",
+    "\n",
+    "\n",
+    "\n",
+    "Unless required by applicable law or agreed to in writing, software\n",
+    "\n",
+    "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+    "\n",
+    "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+    "\n",
+    "See the License for the specific language governing permissions and\n",
+    "\n",
+    "limitations under the License."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 2. Constructing a Network with TensorRT Layer APIs\n",
+    "\n",
+    "In this notebook, you'll learn how to move beyond pre-built model formats and directly construct a neural network using TensorRT's versatile Layer APIs. This approach offers fine-grained control over your network architecture and optimizations.\n",
+    "\n",
+    "Specifically, we will cover:\n",
+    "\n",
+    "1.  **Building a Recurrent Network (LSTM) from Scratch:** Understand how to define each layer of a Long Short-Term Memory (LSTM) cell and then use these components to construct an entire recurrent LSTM layer. This involves using various Layer API functionalities like `add_constant`, `add_matrix_multiply`, `add_elementwise`, `add_slice`, and `add_activation`.\n",
+    "2.  **Implementing Loops for Recurrence:** Utilize TensorRT's `add_loop` functionality to efficiently handle the recurrent nature of the LSTM, processing an input sequence step-by-step.\n",
+    "3.  **Monitoring Build Progress:** Implement an `IProgressMonitor` to track the engine creation process in real-time, providing visibility into potentially long build times.\n",
+    "4.  **Creating Version-Compatible Engines:** Learn to save TensorRT engines with the `BuilderFlag.VERSION_COMPATIBLE` flag, enhancing their portability across different TensorRT patch versions and compatible hardware.\n",
+    "\n",
+    "This sample uses a small, single-layer LSTM to keep the focus on these core TensorRT API features. We'll also verify its output against an equivalent NumPy implementation to ensure correctness."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Introduction\n",
+    "\n",
+    "While importing models via ONNX offers convenience, constructing networks directly with TensorRT APIs provides fine-grained control over the network definition. The **[TensorRT Layer API](https://docs.nvidia.com/deeplearning/tensorrt/latest/python_api/infer/Graph/Layers.html)** enables users to define each layer explicitly, offering flexibility and optimization opportunities.\n",
+    "\n",
+    "To facilitate understanding and verification, this demonstration employs small tensors, allowing for direct comparison with an equivalent NumPy implementation.\n",
+    "\n",
+    "> **Note: This sample assumes familiarity with the basic concepts of Long Short-Term Memory (LSTM) networks. If you're new to LSTMs, you might find it helpful to review their structure and operation before proceeding.**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 0: Prerequisites"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install numpy tensorrt polygraphy --extra-index-url https://pypi.ngc.nvidia.com\n",
+    "import tensorrt as trt\n",
+    "import numpy as np\n",
+    "from typing import Tuple"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To simplify, network parameters and weight initializations use small, illustrative values and dimensions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# === Network Parameters & Weights Initialization ===\n",
+    "batch_size = 1\n",
+    "seq_len = 5      # Length of the sequence\n",
+    "input_size = 1   # Dimension of input vector at each time step\n",
+    "hidden_size = 2  # Dimension of hidden state and cell state\n",
+    "num_units = 1\n",
+    "\n",
+    "# --- Create Fixed Dummy Weights and Biases (NumPy arrays with dummy values) ---\n",
+    "# These will be used by both the TensorRT build and the NumPy verification\n",
+    "w_val, u_val, b_val = 0.01, 0.05, 0.3\n",
+    "initial_h_val = 0.1\n",
+    "initial_c_val = 0.2\n",
+    "\n",
+    "# Define shapes\n",
+    "w_shape = (input_size, 4 * hidden_size) # e.g., [1, 8] for layer 0\n",
+    "u_shape = (hidden_size, 4 * hidden_size)       # e.g., [2, 8]\n",
+    "b_shape = (4 * hidden_size,)                   # e.g., [8]\n",
+    "initial_h_shape = (batch_size, hidden_size)\n",
+    "initial_c_shape = (batch_size, hidden_size)\n",
+    "\n",
+    "# Create NumPy arrays\n",
+    "np_weight_W = np.full(w_shape, w_val, dtype=np.float32)\n",
+    "np_weight_U = np.full(u_shape, u_val, dtype=np.float32)\n",
+    "np_bias = np.full(b_shape, b_val, dtype=np.float32)\n",
+    "np_initial_h = np.full(initial_h_shape, initial_h_val, dtype=np.float32)\n",
+    "np_initial_c = np.full(initial_c_shape, initial_c_val, dtype=np.float32)\n",
+    "\n",
+    "# Create inputs for the network\n",
+    "np_inputs = np.ones((seq_len, batch_size, input_size), dtype=np.float32)\n",
+    "\n",
+    "print(\"NumPy Weights Initialized:\")\n",
+    "print(f\"  W shape : {np_weight_W.shape}\")\n",
+    "print(f\"  U shape : {np_weight_U.shape}\")\n",
+    "print(f\"  Bias shape : {np_bias.shape}\")\n",
+    "print(f\"  Initial H shape : {np_initial_h.shape}\")\n",
+    "print(f\"  Initial C shape : {np_initial_c.shape}\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 1: Defining LSTM Operations with the Layer API\n",
+    "\n",
+    "This step involves defining the LSTM operations by adding layers to the TensorRT `INetworkDefinition`.\n",
+    "\n",
+    "### Typical Usage Pattern for TensorRT Layer APIs\n",
+    "\n",
+    "When adding layers to a TensorRT network using the Layer API, the common pattern is:\n",
+    "\n",
+    "1.  **Add the layer:** Use a `network.add_*` method (e.g., `network.add_matrix_multiply`) to add the desired layer. This method takes input tensors and layer-specific parameters, returning an `ILayer` object representing the newly added layer.\n",
+    "2.  **Configure the layer:** Access the returned `ILayer` object to configure its properties. This is optional but useful for naming layer's name and output tensors name for easier debugging and more helpful logs. \n",
+    "\n",
+    "```python\n",
+    "# Example: Adding and configuring a generic layer\n",
+    "\n",
+    "# 1. Add the layer (replace with a specific layer like add_matrix_multiply)\n",
+    "layer = network.add_some_layer(input_tensor, ...)\n",
+    "\n",
+    "# 2. Configure the layer (optional)\n",
+    "output_tensor = layer.get_output(0)\n",
+    "output_tensor.name = 'my_layer_output'  # Name the output\n",
+    "# ... other configurations ...\n",
+    "```\n",
+    "\n",
+    "For a comprehensive list of available layer types and their specific methods and properties, consult the official [TensorRT Layer API documentation](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Graph/Layers.html)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "TRT_LOGGER = trt.Logger(trt.Logger.INFO)\n",
+    "\n",
+    "def add_lstm_unit(network: trt.INetworkDefinition,\n",
+    "                  input_x: trt.ITensor,      # Shape: [batch_size, input_size]\n",
+    "                  prev_h: trt.ITensor,       # Shape: [batch_size, hidden_size]\n",
+    "                  prev_c: trt.ITensor,       # Shape: [batch_size, hidden_size]\n",
+    "                  W: np.ndarray,            # Shape: [input_size, 4 * hidden_size]\n",
+    "                  U: np.ndarray,            # Shape: [hidden_size, 4 * hidden_size]\n",
+    "                  bias: np.ndarray,         # Shape: [4 * hidden_size]\n",
+    "                  hidden_size: int,\n",
+    "                  input_size: int\n",
+    "                  ) -> Tuple[trt.ITensor, trt.ITensor]:\n",
+    "    \"\"\"\n",
+    "    Adds the computations for a single LSTM time step.\n",
+    "    Assumes input tensors have a leading batch dimension.\n",
+    "    \"\"\"\n",
+    "    batch_size = input_x.shape[0] # Get batch size from input\n",
+    "\n",
+    "    # Create constant layers for weights and biases\n",
+    "    W_layer = network.add_constant(W.shape, trt.Weights(W))\n",
+    "    W_layer.get_output(0).name = \"W_const\"\n",
+    "    U_layer = network.add_constant(U.shape, U)\n",
+    "    U_layer.get_output(0).name = \"U_const\"\n",
+    "    # Reshape bias for broadcasting: [4*hidden] -> [1, 4*hidden]\n",
+    "    bias_reshaped_np = np.expand_dims(bias.copy(), axis=0)\n",
+    "    bias_layer = network.add_constant(bias_reshaped_np.shape, bias_reshaped_np)\n",
+    "    bias_layer.get_output(0).name = \"Bias_const\"\n",
+    "\n",
+    "\n",
+    "    # Linear transformations: Wx = input_x * W ; Uh = prev_h * U\n",
+    "    # Wx = [batch, input] * [input, 4*hidden] = [batch, 4*hidden]\n",
+    "    mm_wx = network.add_matrix_multiply(input_x, trt.MatrixOperation.NONE,\n",
+    "                                        W_layer.get_output(0), trt.MatrixOperation.NONE)\n",
+    "    mm_wx.get_output(0).name = \"Wx\"\n",
+    "\n",
+    "    # Uh = [batch, hidden] * [hidden, 4*hidden] = [batch, 4*hidden]\n",
+    "    mm_uh = network.add_matrix_multiply(prev_h, trt.MatrixOperation.NONE,\n",
+    "                                        U_layer.get_output(0), trt.MatrixOperation.NONE)\n",
+    "    mm_uh.get_output(0).name = \"Uh\"\n",
+    "\n",
+    "\n",
+    "    # Combined gates = Wx + Uh + Bias\n",
+    "    gates_wx_uh = network.add_elementwise(mm_wx.get_output(0), mm_uh.get_output(0),\n",
+    "                                         trt.ElementWiseOperation.SUM)\n",
+    "    gates_wx_uh.get_output(0).name = \"Wx_plus_Uh\"\n",
+    "\n",
+    "    gates = network.add_elementwise(gates_wx_uh.get_output(0), bias_layer.get_output(0),\n",
+    "                                    trt.ElementWiseOperation.SUM)\n",
+    "\n",
+    "    gates_output = gates.get_output(0) # Shape [batch, 4*hidden]\n",
+    "    gates_output.name = \"Gates_Combined\"\n",
+    "\n",
+    "    # Split the combined gates tensor [batch, 4*hidden] -> four [batch, hidden] gate tensors (Input, Forget, Candidate, Output)\n",
+    "    def add_gate_slice(index):\n",
+    "        gate_slice_layer = network.add_slice(input=gates_output,\n",
+    "                                       start=(0, index * hidden_size), # Start [batch_idx=0, col_idx]\n",
+    "                                       shape=(batch_size, hidden_size), # Slice shape\n",
+    "                                       stride=(1, 1))                   # Stride\n",
+    "        return gate_slice_layer.get_output(0)\n",
+    "\n",
+    "    slice_i = add_gate_slice(0)\n",
+    "    slice_i.name = \"Slice_I\"\n",
+    "    slice_f = add_gate_slice(1)\n",
+    "    slice_f.name = \"Slice_F\"\n",
+    "    slice_c = add_gate_slice(2)\n",
+    "    slice_c.name = \"Slice_C_candidate\" # Cell candidate\n",
+    "    slice_o = add_gate_slice(3)\n",
+    "    slice_o.name = \"Slice_O\"\n",
+    "\n",
+    "    # Apply activations\n",
+    "    act_i_layer = network.add_activation(slice_i, trt.ActivationType.SIGMOID)\n",
+    "    act_i = act_i_layer.get_output(0)\n",
+    "    act_i.name = \"Gate_I\"\n",
+    "    act_f_layer = network.add_activation(slice_f, trt.ActivationType.SIGMOID)\n",
+    "    act_f = act_f_layer.get_output(0)\n",
+    "    act_f.name = \"Gate_F\"\n",
+    "    act_c_layer = network.add_activation(slice_c, trt.ActivationType.TANH)\n",
+    "    act_c = act_c_layer.get_output(0)\n",
+    "    act_c.name = \"Gate_C_candidate\"\n",
+    "    act_o_layer = network.add_activation(slice_o, trt.ActivationType.SIGMOID)\n",
+    "    act_o = act_o_layer.get_output(0)\n",
+    "    act_o.name = \"Gate_O\"\n",
+    "\n",
+    "    # Cell state update: c_t = f_t * c_{t-1} + i_t * g_t\n",
+    "    term1_c = network.add_elementwise(act_f, prev_c, trt.ElementWiseOperation.PROD)\n",
+    "    term2_c = network.add_elementwise(act_i, act_c, trt.ElementWiseOperation.PROD)\n",
+    "    next_c_layer = network.add_elementwise(term1_c.get_output(0), term2_c.get_output(0), trt.ElementWiseOperation.SUM)\n",
+    "\n",
+    "    next_c = next_c_layer.get_output(0)\n",
+    "    next_c.name = \"next_c\" # Shape [batch, hidden]\n",
+    "\n",
+    "    # Hidden state update: h_t = o_t * tanh(c_t)\n",
+    "    tanh_c_layer = network.add_activation(next_c, trt.ActivationType.TANH)\n",
+    "    tanh_c = tanh_c_layer.get_output(0)\n",
+    "    next_h_layer = network.add_elementwise(act_o, tanh_c, trt.ElementWiseOperation.PROD)\n",
+    "\n",
+    "    next_h = next_h_layer.get_output(0)\n",
+    "    next_h.name = \"next_h\" # Shape [batch, hidden]\n",
+    "\n",
+    "    return next_h, next_c\n",
+    "\n",
+    "\n",
+    "def add_lstm_layer(network: trt.INetworkDefinition,\n",
+    "                   input_sequence: trt.ITensor, # Shape: [seq_len, batch_size, input_size]\n",
+    "                   hidden_size: int,\n",
+    "                   seq_len: int,\n",
+    "                   weight_W: np.ndarray, # [input_size, 4*hidden] or [hidden, 4*hidden]\n",
+    "                   weight_U: np.ndarray, # [hidden, 4*hidden]\n",
+    "                   bias: np.ndarray    # [4*hidden]\n",
+    "                   ) -> trt.ITensor:\n",
+    "    \"\"\"\n",
+    "    Adds a LSTM to the network by adding one lstm_unit, and run multiple times with loops.\n",
+    "    \"\"\"\n",
+    "    # Infer batch_size and input_size from the input tensor shape\n",
+    "    assert len(input_sequence.shape) == 3, f\"Input sequence tensor must have 3 dimensions [seq, batch, input]. Got shape {input_sequence.shape}\"\n",
+    "    input_size = input_sequence.shape[2]\n",
+    "\n",
+    "    # Shape: [batch_size, hidden_size]\n",
+    "    initial_h = network.add_constant(np_initial_h.shape, np_initial_h).get_output(0)\n",
+    "    initial_h.name = \"Initial_H\"\n",
+    "    initial_c = network.add_constant(np_initial_c.shape, np_initial_c).get_output(0)\n",
+    "    initial_c.name = \"Initial_C\"\n",
+    "\n",
+    "    loop = network.add_loop()\n",
+    "    loop.name = \"Time_Loop_Layer\"\n",
+    "\n",
+    "    # add_trip_limit determines when the loop should stop. For here we want the loop to run seq_len times.\n",
+    "    trip_limit = network.add_constant((), np.array([seq_len], dtype=np.int32)).get_output(0)\n",
+    "    loop.add_trip_limit(trip_limit, trt.TripLimit.COUNT)\n",
+    "\n",
+    "    # Recurrences for hidden and cell states\n",
+    "    h_recurrence = loop.add_recurrence(initial_h)\n",
+    "    c_recurrence = loop.add_recurrence(initial_c)\n",
+    "    prev_h_tensor = h_recurrence.get_output(0)\n",
+    "    prev_h_tensor.name = \"Prev_H\"\n",
+    "    prev_c_tensor = c_recurrence.get_output(0)\n",
+    "    prev_c_tensor.name = \"Prev_C\"\n",
+    "\n",
+    "    # add_iterator iterates through slices of the input sequence along the specified axis, providing one slice per iteration.\n",
+    "    x_t_iterator = loop.add_iterator(input_sequence, axis=0)\n",
+    "    x_t = x_t_iterator.get_output(0)\n",
+    "    x_t.name = \"x_t\"\n",
+    "\n",
+    "\n",
+    "    # Call the LSTM unit function\n",
+    "    next_h, next_c = add_lstm_unit(network=network,\n",
+    "                                    input_x=x_t,\n",
+    "                                    prev_h=prev_h_tensor,\n",
+    "                                    prev_c=prev_c_tensor,\n",
+    "                                    W=weight_W,\n",
+    "                                    U=weight_U,\n",
+    "                                    bias=bias,\n",
+    "                                    hidden_size=hidden_size,\n",
+    "                                    input_size=input_size)\n",
+    "\n",
+    "    # Feed the computed states back into the recurrence inputs\n",
+    "    h_recurrence.set_input(1, next_h)\n",
+    "    c_recurrence.set_input(1, next_c)\n",
+    "\n",
+    "    # add_loop_output() collects the values in the loop and outputs them. For this example, we concatenate the values along the first axis.\n",
+    "    loop_output_h = loop.add_loop_output(next_h, trt.LoopOutput.CONCATENATE, axis=0)\n",
+    "\n",
+    "    # when using CONCATENATE, the second input must be the trip limit.\n",
+    "    loop_output_h.set_input(1, trip_limit)\n",
+    "    loop_output_h.get_output(0).name = \"Hidden_Sequence\"\n",
+    "\n",
+    "    # --- End of time step loop definition ---\n",
+    "\n",
+    "    layer_output_sequence = loop_output_h.get_output(0)\n",
+    "\n",
+    "    # The final output sequence is the sequence from the last layer\n",
+    "    if layer_output_sequence is None:\n",
+    "         raise RuntimeError(\"LSTM layer output was not generated (num_layers may be 0)\")\n",
+    "    layer_output_sequence.name = \"Final_LSTM_Output_Sequence\"\n",
+    "    return layer_output_sequence"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 2: Build the Network\n",
+    "\n",
+    "Now that we have the LSTM layer implementation (`add_lstm_layer`), let's proceed to build the TensorRT `INetworkDefinition`.\n",
+    "This involves defining the network structure by:\n",
+    "1. Adding the input tensor using `network.add_input`.\n",
+    "2. Adding the LSTM layer using our custom `add_lstm_layer` function.\n",
+    "3. Marking the LSTM layer's output tensor as the network's final output."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "builder = trt.Builder(TRT_LOGGER)\n",
+    "network = builder.create_network()\n",
+    "\n",
+    "# === Network Definition ===\n",
+    "# Shape: [seq_len, batch_size, input_size] -> e.g., [5, 1, 1]\n",
+    "input_tensor = network.add_input(name='input', dtype=trt.float32, shape=(seq_len, batch_size, input_size))\n",
+    "\n",
+    "# --- Add SINGLE LSTM Layer ---\n",
+    "lstm_output = add_lstm_layer(network=network,\n",
+    "                                input_sequence=input_tensor,\n",
+    "                                hidden_size=hidden_size,\n",
+    "                                seq_len=seq_len,\n",
+    "                                weight_W=np_weight_W, \n",
+    "                                weight_U=np_weight_U, \n",
+    "                                bias=np_bias) \n",
+    "# lstm_output shape: [seq_len, batch_size, hidden_size] -> e.g., [5, 1, 2]\n",
+    "\n",
+    "# --- Mark Output ---\n",
+    "lstm_output.name = 'hidden_state_sequence'\n",
+    "network.mark_output(lstm_output)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 3: Build the Engine\n",
+    "\n",
+    "Now that we have defined the network (`INetworkDefinition`), the next step is to build the optimized TensorRT engine. This process involves using the `trt.Builder` along with an `trt.BuilderConfig` object to specify how the engine should be built.\n",
+    "\n",
+    "The `IBuilderConfig` allows you to control various aspects of the build process, such as:\n",
+    "*   Setting memory constraints (e.g., workspace size using `set_memory_pool_limit`).\n",
+    "*   Setting builder flags to control optimization strategies and compatibility.\n",
+    "\n",
+    "Once the network and configuration are ready, the `builder.build_serialized_network(network, config)` method is called to produce the serialized engine, which can then be saved to a file or used directly.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## (Optional) Defining a Progress Monitor\n",
+    "Building a TensorRT engine can sometimes take a while, especially for complex models. Don't worry if the build seems long! TensorRT offers a helpful tool called `IProgressMonitor`. This interface lets you track the build process step-by-step, making it easier to monitor progress and even debug if needed. \n",
+    "\n",
+    "### Implementing `IProgressMonitor`\n",
+    "\n",
+    "To use the progress monitor, inherit from `trt.IProgressMonitor` and override its key methods:\n",
+    "\n",
+    "*   `phase_start(self, phase_name, parent_phase, num_steps)`: TensorRT calls this method when it begins a significant phase of the build process (e.g., \"Parsing ONNX Model\", \"Building Engine\"). \n",
+    "    *   `phase_name`: Name of the phase starting.\n",
+    "    *   `parent_phase`: Name of the parent phase, if this is a sub-phase (can be `None`).\n",
+    "    *   `num_steps`: The total number of steps expected for this phase.\n",
+    "*   `step_complete(self, phase_name, step)`: Called after each incremental step within a phase is completed.\n",
+    "    *   `phase_name`: Name of the current phase.\n",
+    "    *   `step`: The index of the step that just finished (0-based).\n",
+    "    *   *Your implementation* usually updates the corresponding progress indicator.\n",
+    "    *   **Crucially, this method must return `True` to allow the build to continue.** Returning `False` or `None` will signal TensorRT to cancel the build.\n",
+    "*   `phase_finish(self, phase_name)`: Called when a phase (and all its steps) is completed.\n",
+    "    *   `phase_name`: Name of the phase that finished.\n",
+    "    *   *Your implementation* typically finalizes and removes the progress indicator for this phase.\n",
+    "\n",
+    "After that, hook it with `IBuilderConfig` by setting `config.progress_monitor = MyProgressMonitor()`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class SimpleProgressMonitor(trt.IProgressMonitor):\n",
+    "    def __init__(self):\n",
+    "        trt.IProgressMonitor.__init__(self)\n",
+    "        self._active_phases = 0\n",
+    "\n",
+    "    def phase_start(self, phase_name, parent_phase, num_steps):\n",
+    "        print(f\"[ProgressMonitor] Phase Start: {phase_name} ({num_steps} steps)\")\n",
+    "        self._active_phases += 1\n",
+    "\n",
+    "    def phase_finish(self, phase_name):\n",
+    "        print(f\"[ProgressMonitor] Phase Finish: {phase_name}\")\n",
+    "        self._active_phases -= 1\n",
+    "\n",
+    "    def step_complete(self, phase_name, step):\n",
+    "        print(f\"[ProgressMonitor] Step Complete: {phase_name}, Step {step}\")\n",
+    "        return True\n",
+    "\n",
+    "    @property\n",
+    "    def active_phases(self):\n",
+    "        return self._active_phases"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## (Optional) Version Compatible Engine\n",
+    "TensorRT engines are typically optimized for the specific GPU and TensorRT version they are built on. This maximizes performance but can cause incompatibility if the deployment environment differs.\n",
+    "\n",
+    "The `trt.BuilderFlag.VERSION_COMPATIBLE` flag addresses this by creating a more portable engine. This engine is less sensitive to minor variations in TensorRT versions or GPU models (within a compatible family), potentially at the cost of some performance compared to a non-compatible engine optimized for the exact target. It also reduces the need to rebuild the engine for every minor TensorRT update. Version compatibility is supported from TensorRT 8.6 onwards; the plan must be built with a version at least 8.6 or higher, and the runtime must also be 8.6 or higher.\n",
+    "\n",
+    "### Use Cases\n",
+    "*   Deploying across diverse hardware fleets with compatible GPUs/TRT versions.\n",
+    "*   Distributing applications where end-user system configurations vary.\n",
+    "*   Simplifying maintenance by avoiding frequent rebuilds for minor updates.\n",
+    "\n",
+    "### How it Works\n",
+    "Enabling `trt.BuilderFlag.VERSION_COMPATIBLE` instructs TensorRT to use more generic optimizations. By default, this flag also causes a copy of a \"lean runtime\" (a version-specific, stripped-down runtime component) to be packaged within the engine plan file. When you deserialize this engine plan on a compatible system, TensorRT recognizes the embedded lean runtime, loads it, and uses this runtime to deserialize and execute the rest of the plan. \n",
+    "\n",
+    "Because this process involves loading and executing code (the lean runtime) directly from the engine plan file, you must explicitly indicate that you trust the origin and integrity of the plan. This is done by setting `runtime.engine_host_code_allowed = True` on your `trt.Runtime` instance before attempting to deserialize the engine.\n",
+    "\n",
+    "> **Considerations for Multiple Version-Compatible Engines:**\n",
+    "If deploying many version-compatible engines, the embedded lean runtime in each plan can lead to large overall application sizes. An alternative is to exclude the runtime from the engine plan (using `trt.BuilderFlag.EXCLUDE_LEAN_RUNTIME`) and load it manually. This approach can significantly reduce the total deployment footprint. For detailed instructions, refer to the NVIDIA TensorRT documentation on [Manually Loading the Runtime](https://docs.nvidia.com/deeplearning/tensorrt/latest/inference-library/advanced.html#manually-loading-the-runtime)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ENGINE_FILE_PATH = './lstm_network.plan'\n",
+    "\n",
+    "config = builder.create_builder_config()\n",
+    "config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 28) # 256MB\n",
+    "config.progress_monitor = SimpleProgressMonitor()\n",
+    "config.set_flag(trt.BuilderFlag.VERSION_COMPATIBLE)\n",
+    "\n",
+    "print(\"Building engine...\")\n",
+    "serialized_engine = builder.build_serialized_network(network, config)\n",
+    "\n",
+    "print(\"Engine build completed.\")\n",
+    "with open(ENGINE_FILE_PATH, 'wb') as f:\n",
+    "    f.write(serialized_engine)\n",
+    "print(f\"Engine saved to {ENGINE_FILE_PATH}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Inference\n",
+    "\n",
+    "Once the TensorRT engine is built, the next step is typically to run inference to verify its functionality and performance. The standard process involves creating a runtime and execution context, managing GPU memory for inputs and outputs, transferring data between host and device, and executing the engine etc. While this process provides fine-grained control, it involves boilerplate code. This standard procedure was demonstrated in detail in Sample 1.\n",
+    "\n",
+    "In this sample, we'll simplify the inference process by using **[Polygraphy](https://github.com/NVIDIA/TensorRT/tree/main/tools/Polygraphy)**, a versatile toolkit included with TensorRT that automates many underlying details, such as:\n",
+    "*   Context creation\n",
+    "*   Buffer management\n",
+    "*   Data transfers\n",
+    "\n",
+    "> **Important Note:** While Polygraphy is excellent for debugging and testing due to its ease of use, it may introduce overhead.\n",
+    "> For optimal performance in deployment scenarios, consider handcrafting the inference code as demonstrated in the `1_run_onnx_with_tensorrt` sample.\n",
+    "\n",
+    "For more examples, please refer to [polygraphy examples](https://github.com/NVIDIA/TensorRT/tree/main/tools/Polygraphy/examples)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from polygraphy.backend.common import BytesFromPath\n",
+    "from polygraphy.backend.trt import EngineFromBytes, TrtRunner\n",
+    "\n",
+    "def run_inference_with_polygraphy(h_input: np.ndarray) -> np.ndarray:\n",
+    "    input_name = 'input'\n",
+    "    output_name = 'hidden_state_sequence'\n",
+    "\n",
+    "    # Prepare the feed dictionary required by Polygraphy\n",
+    "    # Ensure input is contiguous C-style array, which Polygraphy prefers.\n",
+    "    h_input_contiguous = np.ascontiguousarray(h_input)\n",
+    "    feed_dict = {input_name: h_input_contiguous}\n",
+    "\n",
+    "    print(f\"Loading engine from: {ENGINE_FILE_PATH}\")\n",
+    "    outputs = None\n",
+    "    load_engine = EngineFromBytes(BytesFromPath(ENGINE_FILE_PATH))\n",
+    "    with TrtRunner(load_engine) as runner:\n",
+    "        outputs = runner.infer(feed_dict=feed_dict)\n",
+    "        # Polygraphy automatically synchronizes, so no explicit stream sync needed here\n",
+    "\n",
+    "    output_sequence = outputs[output_name]\n",
+    "    print(f\"Output '{output_name}' shape: {output_sequence.shape}, dtype: {output_sequence.dtype}\")\n",
+    "    return output_sequence\n",
+    "\n",
+    "output_sequence = run_inference_with_polygraphy(np_inputs) \n",
+    "\n",
+    "if output_sequence is not None:\n",
+    "    print(f\"\\nInput Sequence (shape {np_inputs.shape}):\\n{np_inputs}\")\n",
+    "    print(f\"\\nOutput Hidden State Sequence (shape {output_sequence.shape}):\\n{output_sequence}\")\n",
+    "else:\n",
+    "    print(\"Inference failed.\")\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Verifying the Output (Comparison with Equivalent Operations in NumPy)\n",
+    "\n",
+    "To ensure our TensorRT LSTM implementation is correct, we'll compare its output with a reference implementation in NumPy. This is a common practice to validate custom layer logic.\n",
+    "\n",
+    "The NumPy version will mimic the same LSTM cell computations and unroll the loop over the time sequence."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def sigmoid_np(x):\n",
+    "    x_clipped = np.clip(x, -500, 500)  # avoid overflow\n",
+    "    return 1.0 / (1.0 + np.exp(-x_clipped))\n",
+    "\n",
+    "\n",
+    "def tanh_np(x):\n",
+    "    x_clipped = np.clip(x, -100, 100)  # avoid overflow\n",
+    "    return np.tanh(x_clipped)\n",
+    "\n",
+    "\n",
+    "def lstm_step_numpy(x_t, prev_h, prev_c, W, U, bias):\n",
+    "    # W: shape [input_size, 4*hidden_size]\n",
+    "    # U: shape [hidden_size, 4*hidden_size]\n",
+    "    # bias: shape [4*hidden_size]\n",
+    "    # x_t: shape [batch_size, input_size]\n",
+    "    # prev_h, prev_c: shape [batch_size, hidden_size]\n",
+    "\n",
+    "    hidden_size_ = prev_h.shape[1]\n",
+    "\n",
+    "    Wx = x_t @ W  # Shape [batch_size, 4*hidden_size]\n",
+    "    Uh = prev_h @ U  # Shape [batch_size, 4*hidden_size]\n",
+    "    gates = Wx + Uh + bias\n",
+    "\n",
+    "    # Split gates\n",
+    "    i = gates[:, 0 * hidden_size_ : 1 * hidden_size_]\n",
+    "    f = gates[:, 1 * hidden_size_ : 2 * hidden_size_]\n",
+    "    c = gates[:, 2 * hidden_size_ : 3 * hidden_size_]  # Cell candidate\n",
+    "    o = gates[:, 3 * hidden_size_ : 4 * hidden_size_]\n",
+    "\n",
+    "    i_act = sigmoid_np(i)\n",
+    "    f_act = sigmoid_np(f)\n",
+    "    c_act = tanh_np(c)\n",
+    "    o_act = sigmoid_np(o)\n",
+    "\n",
+    "    next_c = f_act * prev_c + i_act * c_act\n",
+    "    next_h = o_act * tanh_np(next_c)\n",
+    "\n",
+    "    return next_h, next_c\n",
+    "\n",
+    "\n",
+    "def lstm_layer_numpy(input_sequence_np, np_W, np_U, np_bias):\n",
+    "    seq_len_ = input_sequence_np.shape[0]\n",
+    "    final_output_sequence_np = None\n",
+    "    h = np_initial_h.copy()\n",
+    "    c = np_initial_c.copy()\n",
+    "\n",
+    "    layer_output_sequence_list = []\n",
+    "\n",
+    "    for t in range(seq_len_):\n",
+    "        # Slice the input sequence for this time step\n",
+    "        x_t = input_sequence_np[t, :, :]\n",
+    "\n",
+    "        h, c = lstm_step_numpy(x_t, h, c, np_W, np_U, np_bias)\n",
+    "        layer_output_sequence_list.append(h)\n",
+    "        layer_output_sequence_np = np.stack(layer_output_sequence_list, axis=0)\n",
+    "        final_output_sequence_np = layer_output_sequence_np\n",
+    "\n",
+    "    return final_output_sequence_np\n",
+    "\n",
+    "\n",
+    "numpy_output_sequence = lstm_layer_numpy(np_inputs, np_weight_W, np_weight_U, np_bias)\n",
+    "print(\"\\n--- NumPy LSTM Calculation Results ---\")\n",
+    "print(f\"Input Sequence (all ones, shape {np_inputs.shape}):\\n{np_inputs}\")\n",
+    "print(f\"\\nNumPy Output Hidden State Sequence (shape {numpy_output_sequence.shape}):\\n{numpy_output_sequence}\")\n",
+    "print(\"\\n--- Comparison ---\")\n",
+    "diff = np.abs(output_sequence - numpy_output_sequence)\n",
+    "max_diff = np.max(diff) if diff.size > 0 else 0.0\n",
+    "print(f\"Max absolute difference: {max_diff}\")\n",
+    "assert np.allclose(\n",
+    "    output_sequence, numpy_output_sequence, atol=1e-5\n",
+    "), f\"Output sequence mismatch between TensorRT and NumPy, max diff: {max_diff}\"\n",
+    "print(\"Notebook executed successfully\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Conclusion and Next Steps\n",
+    "\n",
+    "Congratulations! You have successfully:\n",
+    "- Defined an LSTM cell and layer using TensorRT's Layer APIs.\n",
+    "- Implemented a recurrent loop with `add_loop`.\n",
+    "- Monitored the engine build process using `IProgressMonitor`.\n",
+    "- Built a version-compatible TensorRT engine.\n",
+    "- Performed inference using the built engine via Polygraphy.\n",
+    "- Verified the results against a NumPy implementation.\n",
+    "\n",
+    "This sample demonstrates the fundamental building blocks for creating custom network architectures in TensorRT. From here, you can explore:\n",
+    "- More complex network structures.\n",
+    "- Different types of layers available in the TensorRT API.\n",
+    "- Advanced loop constructs and conditional logic.\n",
+    "- Further optimization techniques if performance is critical (though for this sample, we focused on API usage).\n",
+    "\n",
+    "By mastering the Layer API, you gain the power to optimize virtually any deep learning model for inference on NVIDIA GPUs."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/refactored/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/refactored/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bb0d1ca91589fbb441f1b2b0e33d6794ab7cbc2f
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/refactored/README.md
@@ -0,0 +1,22 @@
+# TensorRT Refactored Samples
+
+This directory contains refactored and improved versions of TensorRT samples, demonstrating best practices and modern implementations.
+
+## Available Samples
+
+| Sample Name | Description | Format |
+|-------------|-------------|---------|
+| [1_run_onnx_with_tensorrt](./1_run_onnx_with_tensorrt) | Demonstrates ONNX model conversion to TensorRT and inference comparison | `ipynb` |
+| [2_construct_network_with_layer_apis](./2_construct_network_with_layer_apis) | Constructing a Network with TensorRT Layer APIs | `ipynb` |
+
+
+
+## Launch Instructions
+
+1.  Navigate to the desired sample directory and start the Jupyter server:
+    ```bash
+    pip install notebook 
+    cd 1_run_onnx_with_tensorrt # or any other sample
+    jupyter notebook 
+    ```
+2.  Then, open the `main.ipynb` file in the Jupyter Notebook interface that opens in your web browser.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/requirements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8d227a8adf396617e26a5033d8936490f6c119b
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/requirements.txt
@@ -0,0 +1,5 @@
+pyyaml==6.0.1
+requests==2.32.2
+tqdm==4.66.4
+numpy==1.24.4; python_version <= "3.10"
+numpy==1.26.4; python_version >= "3.11"
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/sample_weight_stripping/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/sample_weight_stripping/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..34a3960d6079584bda476b8e6a5fc85781c70e02
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/sample_weight_stripping/README.md
@@ -0,0 +1,83 @@
+# Introduction To Building and Refitting Weight-stripped Engines from ONNX Models
+
+
+**Table Of Contents**
+- [Description](#description)
+- [How does this sample work?](#how-does-this-sample-work)
+- [Prerequisites](#prerequisites)
+- [Running the sample](#running-the-sample)
+	* [Sample `--help` options](#sample-help-options)
+- [Additional resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+## Description
+
+This sample, sample_weight_stripping, is a Python sample which uses TensorRT to build a weight-stripped engine and later refit to a full engine for inference.
+
+## How does this sample work?
+
+This sample demonstrates how to build a weight-stripped engine from an ONNX model file using TensorRT Python API which can reduce the saved engine size. Later, the weight-stripped engine is refitted by parser refitter with the original ONNX model as input. The refitted full engine is used for inference and guarantees no performance and accuracy loss. In this sample, we use ResNet50 to showcase our features.
+
+## Prerequisites
+
+1. Install the dependencies for Python.
+
+	```bash
+	pip3 install -r requirements.txt
+	```
+
+## Running the sample
+
+1.  Build and save both normal engine and weight-stripped engine:
+
+	```
+	python3 build_engines.py --output_stripped_engine=stripped_engine.trt --output_normal_engine=normal_engine.trt
+	```
+
+	After running this step, you can see two saved TensorRT engines. `stripped_engine.trt` contains a stripped engine (~2.3MB) and `normal_engine.trt` contains a normal engine with all weights included (~51MB). By using stripped engine build, we can greatly reduce the size of the saved engine file.
+
+
+	**Note:** If the TensorRT sample data is not installed in the default location, for example `/usr/src/tensorrt/data/`, the model directory must be specified. For example: `--stripped_onnx=/path/to/my/data/` sets the model path for building weight-stripped engine and `--original_onnx=/path/to/my/data/` sets the model path for building normal engine. In most of the cases, they can use the same ONNX model.
+
+2.  Refit the weight-stripped engine and perform inference with the weight-stripped engine and the normal engine:
+	```
+	python3 refit_engine_and_infer.py --stripped_engine=stripped_engine.trt -â€“normal_engine=normal_engine.trt
+	```
+3.  Verify that the sample ran successfully. If the sample runs successfully you should see output similar to the following. The prediction results of the refitted stripped engine is the same as the normal engine. There is no performance loss.
+	```
+	Normal engine inference time on 100 cases: 0.1066 seconds
+	Refitted stripped engine inference time on 100 cases: 0.0606 seconds
+	Normal engine correctly recognized data/samples/resnet50/tabby_tiger_cat.jpg as tiger cat
+	Refitted stripped engine correctly recognized data/samples/resnet50/tabby_tiger_cat.jpg as tiger cat
+	```
+### Sample --help options
+
+To see the full list of available options and their descriptions, use the `-h` or `--help` command line option.
+
+# Additional resources
+
+The following resources provide a deeper understanding about importing a model into TensorRT using Python:
+
+**ResNet-50**
+- [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf)
+
+**Documentation**
+- [Introduction To NVIDIAâ€™s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples)
+- [Working With TensorRT Using The Python API](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#python_topics)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+# Changelog
+
+February 2024
+
+Initial release of this sample.
+
+# Known issues
+
+There are no known issues in this sample.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/sample_weight_stripping/build_engines.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/sample_weight_stripping/build_engines.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f1e3936ce3ba606dc88ac67c83bb59e6e3efa10
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/sample_weight_stripping/build_engines.py
@@ -0,0 +1,116 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import argparse
+import math
+import time
+import datetime
+
+import tensorrt as trt
+
+sys.path.insert(1, os.path.join(sys.path[0], ".."))
+import common
+
+# You can set the logger severity higher to suppress messages (or lower to display more messages).
+TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
+
+
+def convert_size(size_bytes):
+    if size_bytes == 0:
+        return "0B"
+    size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
+    i = int(math.floor(math.log(size_bytes, 1024)))
+    p = math.pow(1024, i)
+    s = round(size_bytes / p, 2)
+    return "%s %s" % (s, size_name[i])
+
+def main(args):
+
+    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(0) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
+        with open(args.original_onnx, 'rb') as onnx_model:
+            parser.parse(onnx_model.read())
+
+        with builder.create_builder_config() as config:
+
+            config.set_flag(trt.BuilderFlag.FP16)
+            config.set_flag(trt.BuilderFlag.STRIP_PLAN)
+
+            cache = config.create_timing_cache(b"")
+            config.set_timing_cache(cache, ignore_mismatch = False)
+
+            profile = builder.create_optimization_profile()
+            profile.set_shape("gpu_0/data_0", min=[1, 3, 224, 224], opt=[1, 3, 224, 224], max=[1, 3, 224, 224])
+            config.add_optimization_profile(profile)
+
+            def build_and_save_engine(builder, network, config, output):
+                start_time = time.time()
+                engine_bytes = builder.build_serialized_network(network, config)
+                assert engine_bytes is not None
+                with open(output, 'wb') as f:
+                    f.write(engine_bytes)
+                total_time = time.time() - start_time
+                print("built and saved {} in time {}".format(output, str(datetime.timedelta(seconds=int(total_time)))))
+
+            # build weight-stripped engine and generate timing cache.
+            build_and_save_engine(builder, network, config, args.output_stripped_engine)
+
+            # build normal engine with the same timing cache.
+            config.flags &= ~(1 << int(trt.BuilderFlag.STRIP_PLAN))
+            build_and_save_engine(builder, network, config, args.output_normal_engine)
+
+def get_default_model_file():
+    # Set the data path to the directory that contains the ONNX model.
+    _, data_files = common.find_sample_data(
+        description="Runs a ResNet50 network with a TensorRT inference engine.",
+        subfolder="resnet50",
+        find_files=["ResNet50.onnx"],
+    )
+    onnx_model_file = data_files[0]
+    return onnx_model_file
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--stripped_onnx", default=None, type=str,
+                        help="The ONNX model file to load for building stripped engine.")
+    parser.add_argument("--original_onnx", default=None, type=str,
+                        help="The ONNX model file to load for building normal engine.")
+    parser.add_argument("--output_stripped_engine", default='stripped_engine.trt', type=str,
+                        help="The output path for the weight-stripped TRT engine.")
+    parser.add_argument("--output_normal_engine", default='normal_engine.trt', type=str,
+                        help="The output path for the full TRT engine.")
+    args, _ = parser.parse_known_args()
+
+    onnx_model_file = get_default_model_file()
+    if args.stripped_onnx is None:
+        args.stripped_onnx = onnx_model_file
+    if args.original_onnx is None:
+        args.original_onnx = onnx_model_file
+
+    if not os.path.exists(args.stripped_onnx):
+        parser.print_help()
+        print(f"--stripped_onnx {args.stripped_onnx} does not exist.")
+        sys.exit(1)
+    if not os.path.exists(args.original_onnx):
+        parser.print_help()
+        print(f"--original_onnx {args.original_onnx} does not exist.")
+        sys.exit(1)
+
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/sample_weight_stripping/refit_engine_and_infer.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/sample_weight_stripping/refit_engine_and_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..19d4df6179ef4b93945fff943e5711fb2db8f5ae
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/sample_weight_stripping/refit_engine_and_infer.py
@@ -0,0 +1,166 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import argparse
+import random
+import sys
+import time
+import datetime
+import numpy as np
+
+import tensorrt as trt
+from PIL import Image
+
+sys.path.insert(1, os.path.join(sys.path[0], ".."))
+import common
+
+
+# You can set the logger severity higher to suppress messages (or lower to display more messages).
+TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
+
+class ModelData(object):
+    MODEL_PATH = "ResNet50.onnx"
+    INPUT_SHAPE = (3, 224, 224)
+    # We can convert TensorRT data types to numpy types with trt.nptype()
+    DTYPE = trt.float32
+
+def load_stripped_engine_and_refit(input_file, onnx_model_path):
+    runtime = trt.Runtime(TRT_LOGGER)
+
+    with open(input_file, 'rb') as engine_file:
+        engine = runtime.deserialize_cuda_engine(engine_file.read())
+        refitter = trt.Refitter(engine, TRT_LOGGER)
+        parser_refitter = trt.OnnxParserRefitter(refitter, TRT_LOGGER)
+        assert parser_refitter.refit_from_file(onnx_model_path)
+        assert refitter.refit_cuda_engine()
+
+        return engine
+
+def load_normal_engine(input_file):
+    runtime = trt.Runtime(TRT_LOGGER)
+    with open(input_file, 'rb') as engine_file:
+        engine = runtime.deserialize_cuda_engine(engine_file.read())
+
+        return engine
+
+
+def load_normalized_test_case(test_image, pagelocked_buffer):
+    # Converts the input image to a CHW Numpy array
+    def normalize_image(image):
+        # Resize, antialias and transpose the image to CHW.
+        c, h, w = ModelData.INPUT_SHAPE
+        image_arr = (
+            np.asarray(image.resize((w, h), Image.LANCZOS))
+            .transpose([2, 0, 1])
+            .astype(trt.nptype(ModelData.DTYPE))
+            .ravel()
+        )
+        # This particular ResNet50 model requires some preprocessing, specifically, mean normalization.
+        return (image_arr / 255.0 - 0.45) / 0.225
+
+    # Normalize the image and copy to pagelocked memory.
+    np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
+    return test_image
+
+def main(args):
+    # Set the data path to the directory that contains the trained models and test images for inference.
+    _, data_files = common.find_sample_data(
+        description="Runs a ResNet50 network with a TensorRT inference engine.",
+        subfolder="resnet50",
+        find_files=[
+            "binoculars.jpeg",
+            "reflex_camera.jpeg",
+            "tabby_tiger_cat.jpg",
+            ModelData.MODEL_PATH,
+            "class_labels.txt",
+        ],
+    )
+    # Get test images, models and labels.
+    test_images = data_files[0:3]
+    onnx_model_file, labels_file = data_files[3:]
+
+    labels = open(labels_file, "r").read().split("\n")
+
+    # Load a TensorRT engine.
+    engine = load_normal_engine(args.normal_engine)
+    refitted_engine = load_stripped_engine_and_refit(args.stripped_engine, onnx_model_file)
+
+    # Allocate buffers and create a CUDA stream.
+    inputs, outputs, bindings, stream = common.allocate_buffers(engine)
+    inputs_1, outputs_1, bindings_1, stream_1 = common.allocate_buffers(refitted_engine)
+
+    # Contexts are used to perform inference.
+    context = engine.create_execution_context()
+    context_1 = refitted_engine.create_execution_context()
+
+    # Load a normalized test case into the host input page-locked buffer.
+    test_image = random.choice(test_images)
+    test_case = load_normalized_test_case(test_image, inputs[0].host)
+    test_case_1 = load_normalized_test_case(test_image, inputs_1[0].host)
+
+    # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the
+    # probability that the image corresponds to that label
+    start_time = time.time()
+    for i in range(100): # count time for 100 times of inference
+        trt_outputs = common.do_inference(context, engine=engine, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
+    total_time = time.time() - start_time
+    print("Normal engine inference time on 100 cases: {:.4f} seconds".format(total_time))
+
+    start_time = time.time()
+    for i in range(100):
+        trt_outputs_refitted = common.do_inference(context_1, engine=refitted_engine, bindings=bindings_1, inputs=inputs_1, outputs=outputs_1, stream=stream_1)
+    total_time = time.time() - start_time
+    print("Refitted stripped engine inference time on 100 cases: {:.4f} seconds".format(total_time))
+
+    # We use the highest probability as our prediction. Its index corresponds to the predicted label.
+    pred = labels[np.argmax(trt_outputs[0])]
+    if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
+        print("Normal engine correctly recognized " + test_case + " as " + pred)
+    else:
+        print("Normal engine incorrectly recognized " + test_case + " as " + pred)
+        exit(1)
+
+    pred_refitted = labels[np.argmax(trt_outputs_refitted[0])]
+    if "_".join(pred_refitted.split()) in os.path.splitext(os.path.basename(test_case_1))[0]:
+        print("Refitted stripped engine correctly recognized " + test_case + " as " + pred_refitted)
+    else:
+        print("Refitted stripped engine incorrectly recognized " + test_case + " as " + pred_refitted)
+        exit(1)
+
+    return trt_outputs, trt_outputs_refitted
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--stripped_engine", default='stripped_engine.trt', type=str,
+                        help="The stripped engine file to load.")
+    parser.add_argument("--normal_engine", default='normal_engine.trt', type=str,
+                        help="The normal engine file to load.")
+
+    args, _ = parser.parse_known_args()
+    if not os.path.exists(args.stripped_engine):
+        parser.print_help()
+        print(f"--stripped_engine {args.stripped_engine} does not exist.")
+        sys.exit(1)
+    if not os.path.exists(args.normal_engine):
+        parser.print_help()
+        print(f"--normal_engine {args.normal_engine} does not exist.")
+        sys.exit(1)
+
+    trt_outputs, trt_outputs_refitted = main(args)
+    print("The MSE of the final layer output is", np.square(np.subtract(trt_outputs, trt_outputs_refitted)).mean())
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/sample_weight_stripping/requirements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/sample_weight_stripping/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc537473f41bd84b92e0739dd7c2dd0aa434218e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/sample_weight_stripping/requirements.txt
@@ -0,0 +1,9 @@
+Pillow>=10.0.0
+cuda-python==12.2.0; python_version <= "3.10"
+cuda-python==12.6.0; python_version >= "3.11"
+pywin32; platform_system == "Windows"
+pyyaml==6.0.1
+requests==2.32.2
+tqdm==4.66.4
+numpy==1.24.4; python_version <= "3.10"
+numpy==1.26.4; python_version >= "3.11"
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/simple_progress_monitor/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/simple_progress_monitor/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6f6b52c008a69af8b3dd88b45a1f483afdeaae60
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/simple_progress_monitor/README.md
@@ -0,0 +1,95 @@
+# Introduction To IProgressMonitor Callbacks Using Python
+
+
+**Table Of Contents**
+- [Description](#description)
+- [How does this sample work?](#how-does-this-sample-work)
+	* [simple_progress_monitor](#simple_progress_monitor)
+- [Prerequisites](#prerequisites)
+- [Running the sample](#running-the-sample)
+	* [Sample `--help` options](#sample-help-options)
+- [Additional resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+## Description
+
+This sample, simple_progress_reporter, is a Python sample which uses TensorRT and its included ONNX parser, to perform inference with ResNet-50 models saved in ONNX format. It displays animated progress bars while TensorRT builds the engine. 
+
+## How does this sample work?
+
+### simple_progress_monitor
+
+This sample demonstrates how to build an engine from an ONNX model file using the open-source ONNX parser and then run inference. The ONNX parser can be used with any framework that supports the ONNX format (typically `.onnx` files). An `IProgressMonitor` object receives updates on the progress of the build, and displays them as ASCII progress bars on stdout.
+
+## Prerequisites
+
+1. Install the dependencies for Python.
+
+```bash
+pip3 install -r requirements.txt
+```
+
+## Running the sample
+
+1.  Run the sample from a terminal to create a TensorRT inference engine and run inference:
+	`python3 simple_progress_monitor.py`
+
+	**Note:** If the TensorRT sample data is not installed in the default location, for example `/usr/src/tensorrt/data/`, the `data` directory must be specified. For example: `python3 simple_progress_monitor.py -d /path/to/my/data/`
+
+	**Note:** Do not redirect the output of this script to a file or pipe.
+
+2.  Verify that the sample ran successfully. If the sample runs successfully you should see output similar to the following:
+	`Correctly recognized data/samples/resnet50/reflex_camera.jpeg as reflex camera`
+
+### Sample --help options
+
+To see the full list of available options and their descriptions, use the `-h` or `--help` command line option. For example:
+```
+usage: simple_progress_monitor.py [-h] [-d DATADIR]
+
+Runs a ResNet50 network with a TensorRT inference engine. Displays intermediate build progress.
+
+optional arguments:
+ -h, --help            show this help message and exit
+ -d DATADIR, --datadir DATADIR
+                       Location of the TensorRT sample data directory.
+                       (default: /usr/src/tensorrt/data)
+```
+
+# Additional resources
+
+The following resources provide a deeper understanding about importing a model into TensorRT using Python:
+
+**ResNet-50**
+- [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf)
+
+**Parsers**
+- [ONNX Parser](https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/python_api/parsers/Onnx/pyOnnx.html)
+
+**Documentation**
+- [Introduction To NVIDIAâ€™s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples)
+- [Working With TensorRT Using The Python API](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#python_topics)
+- [Importing A Model Using A Parser In Python](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#import_model_python)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+**Terminal Escape Sequences**
+- Linux: [XTerm Control Sequences](https://invisible-island.net/xterm/ctlseqs/ctlseqs.html)
+- Windows: [Console Virtual Terminal Sequences](https://learn.microsoft.com/en-us/windows/console/console-virtual-terminal-sequences)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+# Changelog
+
+August 2023
+Removed support for Python versions < 3.8.
+
+June 2023
+This `README.md` file was created and reviewed.
+
+# Known issues
+
+There are no known issues in this sample
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/simple_progress_monitor/requirements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/simple_progress_monitor/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fc537473f41bd84b92e0739dd7c2dd0aa434218e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/simple_progress_monitor/requirements.txt
@@ -0,0 +1,9 @@
+Pillow>=10.0.0
+cuda-python==12.2.0; python_version <= "3.10"
+cuda-python==12.6.0; python_version >= "3.11"
+pywin32; platform_system == "Windows"
+pyyaml==6.0.1
+requests==2.32.2
+tqdm==4.66.4
+numpy==1.24.4; python_version <= "3.10"
+numpy==1.26.4; python_version >= "3.11"
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/simple_progress_monitor/simple_progress_monitor.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/simple_progress_monitor/simple_progress_monitor.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe54f7202a5b69a4d416f3d964f4f4a2e16c9754
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/simple_progress_monitor/simple_progress_monitor.py
@@ -0,0 +1,220 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+
+# This sample demonstrates incremental progress reporting while it uses an ONNX ResNet50 Model to create a TensorRT Inference Engine.
+import random
+import sys
+
+import numpy as np
+
+import tensorrt as trt
+from PIL import Image
+
+sys.path.insert(1, os.path.join(sys.path[0], ".."))
+import common
+
+
+class ModelData(object):
+    MODEL_PATH = "ResNet50.onnx"
+    INPUT_SHAPE = (3, 224, 224)
+    # We can convert TensorRT data types to numpy types with trt.nptype().
+    DTYPE = trt.float32
+
+
+# This is a simple ASCII-art progress monitor comparable to the C++ version in sample_progress_monitor.
+class SimpleProgressMonitor(trt.IProgressMonitor):
+    def __init__(self):
+        trt.IProgressMonitor.__init__(self)
+        self._active_phases = {}
+        self._step_result = True
+
+    def phase_start(self, phase_name, parent_phase, num_steps):
+        try:
+            if parent_phase is not None:
+                nbIndents = 1 + self._active_phases[parent_phase]["nbIndents"]
+            else:
+                nbIndents = 0
+            self._active_phases[phase_name] = {
+                "title": phase_name,
+                "steps": 0,
+                "num_steps": num_steps,
+                "nbIndents": nbIndents,
+            }
+            self._redraw()
+        except KeyboardInterrupt:
+            # The phase_start callback cannot directly cancel the build, so request the cancellation from within step_complete.
+            _step_result = False
+
+    def phase_finish(self, phase_name):
+        try:
+            del self._active_phases[phase_name]
+            self._redraw(blank_lines=1)  # Clear the removed phase.
+        except KeyboardInterrupt:
+            _step_result = False
+
+    def step_complete(self, phase_name, step):
+        try:
+            self._active_phases[phase_name]["steps"] = step
+            self._redraw()
+            return self._step_result
+        except KeyboardInterrupt:
+            # There is no need to propagate this exception to TensorRT. We can simply cancel the build.
+            return False
+
+    def _redraw(self, *, blank_lines=0):
+        # The Python curses module is not widely available on Windows platforms.
+        # Instead, this function uses raw terminal escape sequences. See the sample documentation for references.
+        def clear_line():
+            print("\x1B[2K", end="")
+
+        def move_to_start_of_line():
+            print("\x1B[0G", end="")
+
+        def move_cursor_up(lines):
+            print("\x1B[{}A".format(lines), end="")
+
+        def progress_bar(steps, num_steps):
+            INNER_WIDTH = 10
+            completed_bar_chars = int(INNER_WIDTH * steps / float(num_steps))
+            return "[{}{}]".format(
+                "=" * completed_bar_chars, "-" * (INNER_WIDTH - completed_bar_chars)
+            )
+
+        # Set max_cols to a default of 200 if not run in interactive mode.
+        max_cols = os.get_terminal_size().columns if sys.stdout.isatty() else 200
+
+        move_to_start_of_line()
+        for phase in self._active_phases.values():
+            phase_prefix = "{indent}{bar} {title}".format(
+                indent=" " * phase["nbIndents"],
+                bar=progress_bar(phase["steps"], phase["num_steps"]),
+                title=phase["title"],
+            )
+            phase_suffix = "{steps}/{num_steps}".format(**phase)
+            allowable_prefix_chars = max_cols - len(phase_suffix) - 2
+            if allowable_prefix_chars < len(phase_prefix):
+                phase_prefix = phase_prefix[0 : allowable_prefix_chars - 3] + "..."
+            clear_line()
+            print(phase_prefix, phase_suffix)
+        for line in range(blank_lines):
+            clear_line()
+            print()
+        move_cursor_up(len(self._active_phases) + blank_lines)
+        sys.stdout.flush()
+
+
+# You can set the logger severity higher to suppress messages (or lower to display more messages).
+TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
+
+
+# The Onnx path is used for Onnx models.
+def build_engine_onnx(model_file):
+    builder = trt.Builder(TRT_LOGGER)
+    network = builder.create_network(0)
+    config = builder.create_builder_config()
+    if not sys.stdout.isatty():
+        print(
+            "Warning: This sample should be run from an interactive terminal in order to showcase the progress monitor correctly."
+        )
+    config.progress_monitor = SimpleProgressMonitor()
+    parser = trt.OnnxParser(network, TRT_LOGGER)
+
+    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, common.GiB(1))
+    # Load the Onnx model and parse it in order to populate the TensorRT network.
+    with open(model_file, "rb") as model:
+        if not parser.parse(model.read()):
+            print("ERROR: Failed to parse the ONNX file.")
+            for error in range(parser.num_errors):
+                print(parser.get_error(error))
+            return None
+
+    engine_bytes = builder.build_serialized_network(network, config)
+    runtime = trt.Runtime(TRT_LOGGER)
+    return runtime.deserialize_cuda_engine(engine_bytes)
+
+
+def load_normalized_test_case(test_image, pagelocked_buffer):
+    # Converts the input image to a CHW Numpy array.
+    def normalize_image(image):
+        # Resize, antialias and transpose the image to CHW.
+        c, h, w = ModelData.INPUT_SHAPE
+        image_arr = (
+            np.asarray(image.resize((w, h), Image.LANCZOS))
+            .transpose([2, 0, 1])
+            .astype(trt.nptype(ModelData.DTYPE))
+            .ravel()
+        )
+        # This particular ResNet50 model requires some preprocessing, specifically, mean normalization.
+        return (image_arr / 255.0 - 0.45) / 0.225
+
+    # Normalize the image and copy to pagelocked memory.
+    np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
+    return test_image
+
+
+def main():
+    # Set the data path to the directory that contains the trained models and test images for inference.
+    _, data_files = common.find_sample_data(
+        description="Runs a ResNet50 network with a TensorRT inference engine. Displays intermediate build progress.",
+        subfolder="resnet50",
+        find_files=[
+            "binoculars.jpeg",
+            "reflex_camera.jpeg",
+            "tabby_tiger_cat.jpg",
+            ModelData.MODEL_PATH,
+            "class_labels.txt",
+        ],
+    )
+    # Get test images, models and labels.
+    test_images = data_files[0:3]
+    onnx_model_file, labels_file = data_files[3:]
+    labels = open(labels_file, "r").read().split("\n")
+
+    # Build a TensorRT engine.
+    engine = build_engine_onnx(onnx_model_file)
+    # Inference is the same regardless of which parser is used to build the engine, since the model architecture is the same.
+    # Allocate buffers and create a CUDA stream.
+    inputs, outputs, bindings, stream = common.allocate_buffers(engine)
+    # Contexts are used to perform inference.
+    context = engine.create_execution_context()
+
+    # Load a normalized test case into the host input page-locked buffer.
+    test_image = random.choice(test_images)
+    test_case = load_normalized_test_case(test_image, inputs[0].host)
+    # Run the engine. The output will be a 1D tensor of length 1000, where each value represents the
+    # probability that the image corresponds to that label
+    trt_outputs = common.do_inference(
+        context,
+        engine=engine,
+        bindings=bindings,
+        inputs=inputs,
+        outputs=outputs,
+        stream=stream,
+    )
+    # We use the highest probability as our prediction. Its index corresponds to the predicted label.
+    pred = labels[np.argmax(trt_outputs[0])]
+    common.free_buffers(inputs, outputs, stream)
+    if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
+        print("Correctly recognized " + test_case + " as " + pred)
+    else:
+        print("Incorrectly recognized " + test_case + " as " + pred)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..789eead3e6fe805001d6825de280cb53bd3d48be
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/README.md
@@ -0,0 +1,331 @@
+# TensorFlow Object Detection API Models in TensorRT
+
+
+Support for [TensorFlow Object Detection (TFOD) API](https://github.com/tensorflow/models/tree/master/research/object_detection) models in TensorRT, including Single Shot Detector, Faster R-CNN and Mask R-CNN models. This script helps with converting, running and validating these models with TensorRT.
+
+## Setup
+
+### TensorFlow and TensorRT Environment
+
+Note: The sample is not compatible with Python-3.12 because tensorflow-addons does not support Python-3.12.
+
+In order for scripts to work we suggest an environment with TensorRT >= 8.0.1 and TensorFlow 2.13.1.
+
+Install TensorRT as per the [TensorRT Install Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html). You will need to make sure the Python bindings for TensorRT are also installed correctly, these are available by installing the `python3-libnvinfer` and `python3-libnvinfer-dev` packages on your TensorRT download.
+
+If you would like to use Docker, you can use an NGC image to fulfill these requirements, such as:
+
+```
+docker pull nvcr.io/nvidia/tensorflow:23.07-tf2-py3
+```
+### TFOD API Environment
+
+To run these scripts, you will also need a recent TFOD installation. You can do so by cloning the https://github.com/tensorflow/models/tree/master/research/object_detection repository and following the [installation procedure for TF2](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2.md).
+
+Alternatively, if you're running on an NGC TF2 docker, the TFOD installation procedure can be done as follows:
+
+```
+cd /workspace
+wget https://github.com/protocolbuffers/protobuf/releases/download/v3.15.4/protoc-3.15.4-linux-x86_64.zip
+unzip protoc*.zip bin/protoc -d /usr/local
+git clone https://github.com/tensorflow/models.git
+cd /workspace/models/research
+git checkout 66e22c4
+protoc object_detection/protos/*.proto --python_out=.
+cp object_detection/packages/tf2/setup.py ./
+## Pin pyyaml==6.0.1 to avoid v5.4.1 with known CVEs
+sed -i '22i\    '"'"'pyyaml==6.0.1'"'"',' setup.py
+pip --use-deprecated=legacy-resolver install .
+```
+
+You can verify the installation by running:
+
+```
+pip show object-detection
+```
+
+### Other Dependencies
+
+Install all dependencies listed in this sample's `requirements.txt` file:
+
+```
+pip install -r requirements.txt
+```
+
+You will also need the `onnx-graphsurgeon` python module. If not already installed by TensorRT, you can install it manually by running:
+
+```
+pip install onnx-graphsurgeon==0.3.10 --index-url https://pypi.ngc.nvidia.com
+```
+
+**NOTE:** Please make sure that the `onnx-graphsurgeon` module installed by pip is version == 0.3.10.
+
+## Model Conversion
+
+The workflow to convert a TFOD model is basically TensorFlow â†’ ONNX â†’ TensorRT, and so parts of this process require TensorFlow to be installed. If you are performing this conversion to run inference on the edge, such as for NVIDIA Jetson devices, it might be easier to do the ONNX conversion on a PC first, and then build the TensorRT engine on the device later.
+
+### TensorFlow Saved Model
+
+The starting point of conversion is a TensorFlow saved model. This can be exported from your own models trained with the TFOD API, or you can download a pre-trained model from the [TF2 Detection Model Zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md), such as:
+
+```
+wget http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_320x320_coco17_tpu-8.tar.gz
+tar -xvf ssd_mobilenet_v2_320x320_coco17_tpu-8.tar.gz
+```
+
+When extracted, this package holds a directory named `saved_model` which holds the saved model ready for conversion.
+
+Structure of the checkpoint is similar to this:
+
+```
+ssd_mobilenet_v2_320x320_coco17_tpu-8
+â”œâ”€â”€ checkpoint
+â”‚   â”œâ”€â”€ ckpt-0.data-00000-of-00001
+â”‚   â””â”€â”€ ckpt-0.index
+â”œâ”€â”€ pipeline.config
+â””â”€â”€ saved_model
+    â””â”€â”€ saved_model.pb
+```
+
+**NOTE:** In order to proceed, you need to re-export the saved model. If you don't re-export the model with with **float_image_tensor** as input type, the conversion process will fail.
+
+To re-export, run exporter_main_v2.py located in TFOD API:
+
+```
+cd /path/to/models/research/object_detection
+python exporter_main_v2.py \
+    --input_type float_image_tensor \
+    --trained_checkpoint_dir /path/to/ssd_mobilenet_v2_320x320_coco17_tpu-8/checkpoint \
+    --pipeline_config_path /path/to/ssd_mobilenet_v2_320x320_coco17_tpu-8/pipeline.config \
+    --output_directory /path/to/export
+```
+
+Where `--trained_checkpoint_dir` and `--pipeline_config_path` point to the corresponding paths in the training checkpoint, `--input_type` must be set to `float_image_tensor` for the ONNX conversion process to run correctly. On the path pointed by `--output_directory` you will then find the newly created export, under which there will be a re-exported saved model  in a directory aptly named `saved_model`.
+
+>**NOTE:** When you try to re-export the **Mask R-CNN** pre-trained model, you may see an error message such as:
+>
+>```
+>google.protobuf.text_format.ParseError: 153:40 : Message type "object_detection.protos.TFRecordInputReader" has no field named "s".
+>```
+>
+>This error comes from a bug inside of the `/mask_rcnn_inception_resnet_v2_1024x1024_coco17_gpu-8/pipeline.config` file, in order to fix it, all you have to do is find a line with `input_path: "PATH_TO_BE_CONFIGURED"s`, currently it is line **153** and delete the final `s` on the end of this line, save the file and try to re-export again.
+
+### Create ONNX Graph
+
+This is a list of supported models, additionally keep in mind that pre-trained TF2 model zoo names do not always reflect the actual model input resolution, see correct supported model resolution breakdown here:
+
+| **Model**                                     | **Resolution** |
+| ----------------------------------------------|----------------|
+| SSD MobileNet v2 320x320                      | **300x300**       |
+| SSD MobileNet V1 FPN 640x640                  | 640x640        |
+| SSD MobileNet V2 FPNLite 320x320              | 320x320        |
+| SSD MobileNet V2 FPNLite 640x640              | 640x640        |
+| SSD ResNet50 V1 FPN 640x640 (RetinaNet50)     | 640x640        |
+| SSD ResNet50 V1 FPN 1024x1024 (RetinaNet50)   | 1024x1024      |
+| SSD ResNet101 V1 FPN 640x640 (RetinaNet101)   | 640x640        |
+| SSD ResNet101 V1 FPN 1024x1024 (RetinaNet101) | 1024x1024      |
+| SSD ResNet152 V1 FPN 640x640 (RetinaNet152)   | 640x640        |
+| SSD ResNet152 V1 FPN 1024x1024 (RetinaNet152) | 1024x1024      |
+| Faster R-CNN ResNet50 V1 640x640              | 640x640        |
+| Faster R-CNN ResNet50 V1 1024x1024            | 1024x1024      |
+| Faster R-CNN ResNet50 V1 800x1333             | **1333x1333**      |
+| Faster R-CNN ResNet101 V1 640x640             | 640x640        |
+| Faster R-CNN ResNet101 V1 1024x1024           | 1024x1024      |
+| Faster R-CNN ResNet101 V1 800x1333            | **1333x1333**      |
+| Faster R-CNN ResNet152 V1 640x640             | 640x640        |
+| Faster R-CNN ResNet152 V1 1024x1024           | 1024x1024      |
+| Faster R-CNN ResNet152 V1 800x1333            | **1333x1333**      |
+| Faster R-CNN Inception ResNet V2 640x640      | 640x640        |
+| Faster R-CNN Inception ResNet V2 1024x1024    | **1333x1333**      |
+| Mask R-CNN Inception ResNet V2 1024x1024      | 1024x1024      |
+
+
+If the TF saved model is ready to be converted (i.e. you ran `exporter_main_v2.py` to re-export as float_image_tensor), run:
+
+```
+python create_onnx.py \
+    --pipeline_config /path/to/exported/pipeline.config \
+    --saved_model /path/to/exported/saved_model \
+    --onnx /path/to/save/model.onnx
+```
+
+This will create the file `model.onnx` which is ready to convert to TensorRT.
+
+The script has a few optional arguments, including:
+
+* `--first_nms_threshold [...]` allows overriding the default 1st NMS score threshold parameter, as the runtime latency of the NMS plugin is sensitive to this value. It's a good practice to set this value as high as possible, while still fulfilling your application requirements, to reduce inference latency. In case of SSD models it will be a score threshold for first and final NMS, in case of Faster R-CNN and Mask R-CNN this will be a score threshold for Region Proposal Network.
+* `--second_nms_threshold [...]` allows overriding the default 2nd NMS score threshold parameter, further improves the runtime latency of the NMS plugin. It's a good practice to set this value as high as possible, while still fulfilling your application requirements, to reduce inference latency. Only applicable in case of Faster R-CNN and Mask R-CNN since it will be the second and last NMS for both networks.
+* `--batch_size` allows selection of various batch sizes, default is 1.
+* `--debug` allows to add an extra output to debug a particular node.
+* `--input_format` allows to set an input format of the network, either `NHWC` that is set by default or `NCHW`.
+* `--tf2onnx` allows to save an intermediate ONNX graph generated by t2onnx.
+
+Optionally, you may wish to visualize the resulting ONNX graph with a tool such as [Netron](https://netron.app/).
+
+The input to the graph is a `float32` tensor with the selected input shape, containing RGB pixel data in the range of 0 to 255. All color value preprocessing will be performed inside the graph.
+
+The outputs of the graph are the same as the outputs of the [EfficientNMS_TRT](https://github.com/NVIDIA/TensorRT/tree/master/plugin/efficientNMSPlugin) plugin, named `num_detections`, `detection_boxes`, `detection_classes` and `detection_scores`. In the case of **Mask R-CNN**, the graph will have one more additional output from a segmentation head named `detection_masks`,  with shape `[batch_size, max_proposals, mask_height, mask_width]`, and float32 data type.
+
+### Build TensorRT Engine
+
+It is possible to build the TensorRT engine directly with `trtexec` using the ONNX graph generated in the previous step. However, the script `build_engine.py` is provided for convenience, as it has been tailored to engine building for TFOD models and calibration. Run `python build_engine.py --help` for details on available settings.
+
+#### FP16 Precision
+
+To build the TensorRT engine file with FP16 precision, run:
+
+```
+python build_engine.py \
+    --onnx /path/to/saved/model.onnx \
+    --engine /path/to/save/engine.trt \
+    --precision fp16
+```
+
+The file `engine.trt` will be created, which can now be used to infer with TensorRT.
+
+For best results, make sure no other processes are using the GPU during engine build, as it may affect the optimal tactic selection process.
+
+**Note:** If you receive any error messages about non sufficient workspace size, especially when converting models with a larger batch size, increase the max workspace by adding an argument such as `--workspace 8` to assign up to an 8 Gb workspace size.
+
+#### INT8 Precision
+
+To build and calibrate an engine for INT8 precision, run:
+
+```
+python build_engine.py \
+    --onnx /path/to/model.onnx \
+    --engine /path/to/engine.trt \
+    --precision int8 \
+    --calib_input /path/to/calibration/images \
+    --calib_cache /path/to/calibration.cache
+```
+
+Where `--calib_input` points to a directory with several thousands of images. For example, this could be a subset of the training or validation datasets that were used for the model. It's important that this data represents the runtime data distribution relatively well, therefore, the more images that are used for calibration, the better accuracy that will be achieved in INT8 precision. For models trained for the [COCO dataset](https://cocodataset.org/#home), we have found that 5,000 images gives a good result.
+
+The `--calib_cache` controls where the calibration cache file will be written to. This is useful to keep a cached copy of the calibration results. Next time you need to build the engine for the same network, if this file exists, it will skip the calibration step and use the cached values instead.
+
+#### Benchmark Engine
+
+Optionally, you can obtain execution timing information for the built engine by using the `trtexec` utility, as:
+
+```
+trtexec \
+    --loadEngine=/path/to/engine.trt \
+    --useCudaGraph --noDataTransfers \
+    --iterations=100 --avgRuns=100
+```
+
+If it's not already in your `$PATH`, the `trtexec` binary is usually found in `/usr/src/tensorrt/bin/trtexec`, depending on your TensorRT installation method.
+
+An inference benchmark will run, with GPU Compute latency times printed out to the console. Depending on your environment, you should see something similar to:
+
+```
+GPU Compute Time: min = 1.55835 ms, max = 1.91591 ms, mean = 1.58719 ms, median = 1.578 ms, percentile(99%) = 1.90668 ms
+```
+
+## Inference
+
+For optimal performance, inference should be done in a C++ application that takes advantage of CUDA Graphs to launch the inference request. Alternatively, the TensorRT engine built with this process can also be executed through either [Triton Inference Server](https://developer.nvidia.com/nvidia-triton-inference-server) or [DeepStream SDK](https://developer.nvidia.com/deepstream-sdk).
+
+However, for convenience, a python inference script is provided here for quick testing of the built TensorRT engine.
+
+### Inference in Python
+
+To perform object detection on a set of images with TensorRT, run:
+
+```
+python infer.py \
+    --engine /path/to/saved/engine.trt \
+    --input /path/to/images \
+    --output /path/to/output \
+    --preprocessor fixed_shape_resizer \
+    --labels /path/to/labels_coco.txt
+```
+
+Where the input path can be either a single image file, or a directory of jpg/png/bmp images.
+
+The argument `--preprocessor` corresponds to the image preprocessor set in your `pipeline.config` file, usually it is under `image_resizer` section, only two are now supported, namely `fixed_shape_resizer` and `keep_aspect_ratio_resizer`.
+
+The argument `--labels` is a path to a file that contains label data and is included with this repository.
+
+The script has a few optional arguments, including:
+* `--nms_threshold` allows overriding the default NMS score threshold parameter.
+* `--detection_type` allows to select a detection type, either `bbox` that is set by default or `segmentation` that works only with **Mask R-CNN**.
+* `--iou_threshold` allows to set IoU threshold for the mask segmentation, works only with **Mask R-CNN**, default is 0.5.
+
+The detection results will be written out to the specified output directory, consisting of a visualization image, and a tab-separated results file for each input image processed.
+
+#### SSD ResNet152 V1 FPN 1024x1024
+
+![ssd_infer](https://drive.google.com/uc?export=view&id=1xSA7IkJuAScCf_NaVyiAX6dA9BPQVyJf)
+
+#### Mask R-CNN
+
+![mrcnn_infer](https://drive.google.com/uc?export=view&id=1c_8hXdFjjpEWKJSJfqoeKKgVXE3BTuyQ)
+
+
+
+### Evaluate mAP Metric
+
+Given a validation dataset (such as [COCO val2017 data](http://images.cocodataset.org/zips/val2017.zip)) and ground truth annotations (such as [COCO instances_val2017.json](http://images.cocodataset.org/annotations/annotations_trainval2017.zip)), you can get the mAP metrics for the built TensorRT engine. This will use the mAP metrics tools functions from the [TFOD's research object detection](https://github.com/tensorflow/models/tree/master/research/object_detection) repository.
+
+```
+python eval_coco.py \
+    --engine /path/to/engine.trt \
+    --input /path/to/coco/val2017 \
+    --annotations /path/to/coco/annotations/instances_val2017.json \
+    --preprocessor fixed_shape_resizer
+```
+
+The script has a few optional arguments, including:
+* `--nms_threshold` allows overriding the default NMS score threshold parameter.
+* `--detection_type` allows to select a detection type, either `bbox` that is set by default. Alternatively you can set it to `segmentation` for **Mask R-CNN** models, to evaluate the masks instead.
+* `--iou_threshold` allows to set IoU threshold for the mask segmentation, works only with **Mask R-CNN**, default is 0.5.
+
+The argument`--preprocessor` corresponds to the image preprocessor set in your `pipeline.config` file as described in the inference section above.
+
+The mAP metric is sensitive to the NMS score threshold used, as using a high threshold will reduce the model recall, resulting in a lower mAP value. Ideally, mAP should be measured with a threshold of 0, but such a low value will impact the runtime latency of the EfficientNMS_TRT plugin. It may be a good idea to build separate TensorRT engines for different purposes. That is, one engine with a low threshold (like 0) dedicated for mAP validation, and another engine with your application specific threshold (like 0.4) for deployment. This is why we keep the NMS threshold as a configurable parameter in the `create_onnx.py` script.
+
+### TF vs TRT Comparison
+
+To compare how the TensorRT detections match the original TensorFlow model results, you can run:
+
+```
+python compare_tf.py \
+    --engine /path/to/saved/engine.trt \
+    --saved_model /path/to/exported/saved_model \
+    --input /path/to/images \
+    --annotations /path/to/coco/annotations/instances_val2017.json \
+    --output /path/to/output \
+    --preprocessor fixed_shape_resizer \
+    --labels /path/to/labels_coco.txt
+```
+
+The script has a few optional arguments, including:
+* `--nms_threshold` allows overriding the default NMS score threshold parameter.
+* `--detection_type` allows to select a detection type, either `bbox` that is set by default or `segmentation` that works only with **Mask R-CNN**.
+* `--iou_threshold` allows to set IoU threshold for the mask segmentation, works only with **Mask R-CNN**, default is 0.5.
+* `--num_images` the maximum number of images to visualize if you are passing a directory to `--input`.
+
+This script will process the images found in the given input path through both TensorFlow and TensorRT using the corresponding saved model and engine. It will then write to the output path a set of visualization images showing the inference results of both frameworks for visual qualitative comparison. Argument `--preprocessor` corresponds to the image preprocessor set in your `pipeline.config` file as described in the inference section above.
+
+If you run this on COCO val2017 images, you may also add the parameter `--annotations /path/to/coco/annotations/instances_val2017.json` to further compare against COCO ground truth annotations.
+
+#### SSD ResNet152 V1 FPN 1024x1024
+
+![ssd_compare_tf](https://drive.google.com/uc?export=view&id=1eX6LucHjDpmwG5tK-MGebRaW0zmdeee-)
+
+#### Mask R-CNN
+
+![mrcnn_compare_tf](https://drive.google.com/uc?export=view&id=1kNnfJ2H5OY85Z2e6KNxZgiYk3Lo-sB9r)
+
+# Changelog
+
+May 2024:
+  - Update TensorFlow version support to 2.13.1.
+
+August 2023:
+  - Removed support for Python versions < 3.8.
+  - Update ONNX version support to 1.14.0
+  - Update ONNX Runtime version support to 1.15.1 for Python>=3.8
+
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/build_engine.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/build_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bbf5f7c40595c3403e2e3e9ca1b62b2fce3d7d9
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/build_engine.py
@@ -0,0 +1,334 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import logging
+import argparse
+
+import numpy as np
+import tensorrt as trt
+from cuda import cudart
+
+sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+import common
+
+from image_batcher import ImageBatcher
+
+logging.basicConfig(level=logging.INFO)
+logging.getLogger("EngineBuilder").setLevel(logging.INFO)
+log = logging.getLogger("EngineBuilder")
+
+
+class EngineCalibrator(trt.IInt8EntropyCalibrator2):
+    """
+    Implements the INT8 Entropy Calibrator 2.
+    """
+
+    def __init__(self, cache_file):
+        """
+        :param cache_file: The location of the cache file.
+        """
+        super().__init__()
+        self.cache_file = cache_file
+        self.image_batcher = None
+        self.batch_allocation = None
+        self.batch_generator = None
+
+    def set_image_batcher(self, image_batcher: ImageBatcher):
+        """
+        Define the image batcher to use, if any. If using only the cache file, an image batcher doesn't need
+        to be defined.
+        :param image_batcher: The ImageBatcher object
+        """
+        self.image_batcher = image_batcher
+        size = int(
+            np.dtype(self.image_batcher.dtype).itemsize
+            * np.prod(self.image_batcher.shape)
+        )
+        self.batch_allocation = common.cuda_call(cudart.cudaMalloc(size))
+        self.batch_generator = self.image_batcher.get_batch()
+
+    def get_batch_size(self):
+        """
+        Overrides from trt.IInt8EntropyCalibrator2.
+        Get the batch size to use for calibration.
+        :return: Batch size.
+        """
+        if self.image_batcher:
+            return self.image_batcher.batch_size
+        return 1
+
+    def get_batch(self, names):
+        """
+        Overrides from trt.IInt8EntropyCalibrator2.
+        Get the next batch to use for calibration, as a list of device memory pointers.
+        :param names: The names of the inputs, if useful to define the order of inputs.
+        :return: A list of int-casted memory pointers.
+        """
+        if not self.image_batcher:
+            return None
+        try:
+            batch, _, _ = next(self.batch_generator)
+            log.info(
+                "Calibrating image {} / {}".format(
+                    self.image_batcher.image_index, self.image_batcher.num_images
+                )
+            )
+            common.memcpy_host_to_device(
+                self.batch_allocation, np.ascontiguousarray(batch)
+            )
+            return [int(self.batch_allocation)]
+        except StopIteration:
+            log.info("Finished calibration batches")
+            return None
+
+    def read_calibration_cache(self):
+        """
+        Overrides from trt.IInt8EntropyCalibrator2.
+        Read the calibration cache file stored on disk, if it exists.
+        :return: The contents of the cache file, if any.
+        """
+        if os.path.exists(self.cache_file):
+            with open(self.cache_file, "rb") as f:
+                log.info("Using calibration cache file: {}".format(self.cache_file))
+                return f.read()
+
+    def write_calibration_cache(self, cache):
+        """
+        Overrides from trt.IInt8EntropyCalibrator2.
+        Store the calibration cache to a file on disk.
+        :param cache: The contents of the calibration cache to store.
+        """
+        with open(self.cache_file, "wb") as f:
+            log.info("Writing calibration cache data to: {}".format(self.cache_file))
+            f.write(cache)
+
+
+class EngineBuilder:
+    """
+    Parses an ONNX graph and builds a TensorRT engine from it.
+    """
+
+    def __init__(self, verbose=False, workspace=8):
+        """
+        :param verbose: If enabled, a higher verbosity level will be set on the TensorRT logger.
+        :param workspace: Max memory workspace to allow, in Gb.
+        """
+        self.trt_logger = trt.Logger(trt.Logger.INFO)
+        if verbose:
+            self.trt_logger.min_severity = trt.Logger.Severity.VERBOSE
+
+        trt.init_libnvinfer_plugins(self.trt_logger, namespace="")
+
+        self.builder = trt.Builder(self.trt_logger)
+        self.config = self.builder.create_builder_config()
+        self.config.set_memory_pool_limit(
+            trt.MemoryPoolType.WORKSPACE, workspace * (2**30)
+        )
+
+        self.batch_size = None
+        self.network = None
+        self.parser = None
+
+    def create_network(self, onnx_path):
+        """
+        Parse the ONNX graph and create the corresponding TensorRT network definition.
+        :param onnx_path: The path to the ONNX graph to load.
+        """
+
+        self.network = self.builder.create_network(0)
+        self.parser = trt.OnnxParser(self.network, self.trt_logger)
+
+        onnx_path = os.path.realpath(onnx_path)
+        with open(onnx_path, "rb") as f:
+            if not self.parser.parse(f.read()):
+                log.error("Failed to load ONNX file: {}".format(onnx_path))
+                for error in range(self.parser.num_errors):
+                    log.error(self.parser.get_error(error))
+                sys.exit(1)
+
+        inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
+        outputs = [self.network.get_output(i) for i in range(self.network.num_outputs)]
+
+        log.info("Network Description")
+        for input in inputs:
+            self.batch_size = input.shape[0]
+            log.info(
+                "Input '{}' with shape {} and dtype {}".format(
+                    input.name, input.shape, input.dtype
+                )
+            )
+        for output in outputs:
+            log.info(
+                "Output '{}' with shape {} and dtype {}".format(
+                    output.name, output.shape, output.dtype
+                )
+            )
+        assert self.batch_size > 0
+
+        # TODO: These overrides are to improve fp16/int8 performance on FRCNN models
+        # it might be possible to avoid doing this by using different box encoding
+        # type on the two NMS plugins. To be determined.
+        for i in range(self.network.num_layers):
+            if self.network.get_layer(i).name in [
+                "FirstStageBoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/squeeze",
+                "FirstStageBoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/scale_value:0",
+                "FirstStageBoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor/scale",
+                "nms/anchors:0",
+            ]:
+                self.network.get_layer(i).precision = trt.DataType.FLOAT
+                self.network.get_layer(i - 1).precision = trt.DataType.FLOAT
+            if self.network.get_layer(i).name == "FirstNMS/detection_boxes_conversion":
+                self.network.get_layer(i).precision = trt.DataType.FLOAT
+
+    def create_engine(
+        self,
+        engine_path,
+        precision,
+        calib_input=None,
+        calib_cache=None,
+        calib_num_images=5000,
+        calib_batch_size=8,
+    ):
+        """
+        Build the TensorRT engine and serialize it to disk.
+        :param engine_path: The path where to serialize the engine to.
+        :param precision: The datatype to use for the engine, either 'fp32', 'fp16' or 'int8'.
+        :param calib_input: The path to a directory holding the calibration images.
+        :param calib_cache: The path where to write the calibration cache to, or if it already exists, load it from.
+        :param calib_num_images: The maximum number of images to use for calibration.
+        :param calib_batch_size: The batch size to use for the calibration process.
+        """
+        engine_path = os.path.realpath(engine_path)
+        engine_dir = os.path.dirname(engine_path)
+        os.makedirs(engine_dir, exist_ok=True)
+        log.info("Building {} Engine in {}".format(precision, engine_path))
+
+        inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
+
+        # TODO: Strict type is only needed If the per-layer precision overrides are used
+        # If a better method is found to deal with that issue, this flag can be removed.
+        self.config.set_flag(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
+        self.config.set_flag(trt.BuilderFlag.DIRECT_IO)
+        self.config.set_flag(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+
+        if precision == "fp16":
+            if not self.builder.platform_has_fast_fp16:
+                log.warning("FP16 is not supported natively on this platform/device")
+            else:
+                self.config.set_flag(trt.BuilderFlag.FP16)
+        elif precision == "int8":
+            if not self.builder.platform_has_fast_int8:
+                log.warning("INT8 is not supported natively on this platform/device")
+            else:
+                if self.builder.platform_has_fast_fp16:
+                    # Also enable fp16, as some layers may be even more efficient in fp16 than int8
+                    self.config.set_flag(trt.BuilderFlag.FP16)
+                self.config.set_flag(trt.BuilderFlag.INT8)
+                self.config.int8_calibrator = EngineCalibrator(calib_cache)
+                if not os.path.exists(calib_cache):
+                    calib_shape = [calib_batch_size] + list(inputs[0].shape[1:])
+                    calib_dtype = trt.nptype(inputs[0].dtype)
+                    self.config.int8_calibrator.set_image_batcher(
+                        ImageBatcher(
+                            calib_input,
+                            calib_shape,
+                            calib_dtype,
+                            max_num_images=calib_num_images,
+                            exact_batches=True,
+                        )
+                    )
+
+        engine_bytes = self.builder.build_serialized_network(self.network, self.config)
+        if engine_bytes is None:
+            log.error("Failed to create engine")
+            sys.exit(1)
+
+        with open(engine_path, "wb") as f:
+            log.info("Serializing engine to file: {:}".format(engine_path))
+            f.write(engine_bytes)
+
+
+def main(args):
+    builder = EngineBuilder(args.verbose, args.workspace)
+    builder.create_network(args.onnx)
+    builder.create_engine(
+        args.engine,
+        args.precision,
+        args.calib_input,
+        args.calib_cache,
+        args.calib_num_images,
+        args.calib_batch_size,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-o", "--onnx", help="The input ONNX model file to load")
+    parser.add_argument("-e", "--engine", help="The output path for the TRT engine")
+    parser.add_argument(
+        "-p",
+        "--precision",
+        default="fp16",
+        choices=["fp32", "fp16", "int8"],
+        help="The precision mode to build in, either 'fp32', 'fp16' or 'int8', default: 'fp16'",
+    )
+    parser.add_argument(
+        "-v", "--verbose", action="store_true", help="Enable more verbose log output"
+    )
+    parser.add_argument(
+        "-w",
+        "--workspace",
+        default=1,
+        type=int,
+        help="The max memory workspace size to allow in Gb, " "default: 1",
+    )
+    parser.add_argument(
+        "--calib_input", help="The directory holding images to use for calibration"
+    )
+    parser.add_argument(
+        "--calib_cache",
+        default="./calibration.cache",
+        help="The file path for INT8 calibration cache to use, default: ./calibration.cache",
+    )
+    parser.add_argument(
+        "--calib_num_images",
+        default=5000,
+        type=int,
+        help="The maximum number of images to use for calibration, default: 5000",
+    )
+    parser.add_argument(
+        "--calib_batch_size",
+        default=8,
+        type=int,
+        help="The batch size for the calibration process, default: 8",
+    )
+    args = parser.parse_args()
+    if not all([args.onnx, args.engine]):
+        parser.print_help()
+        log.error("These arguments are required: --onnx and --engine")
+        sys.exit(1)
+    if args.precision == "int8" and not (
+        args.calib_input or os.path.exists(args.calib_cache)
+    ):
+        parser.print_help()
+        log.error(
+            "When building in int8 precision, --calib_input or an existing --calib_cache file is required"
+        )
+        sys.exit(1)
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/compare_tf.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/compare_tf.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae5168ebde55412c075fa9163523246979892276
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/compare_tf.py
@@ -0,0 +1,380 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import json
+import argparse
+
+import numpy as np
+import tensorflow as tf
+from pycocotools.coco import COCO
+from infer import TensorRTInfer
+from image_batcher import ImageBatcher
+from visualize import visualize_detections, concat_visualizations
+
+
+class TensorFlowInfer:
+    """
+    Implements TensorFlow inference of a saved model, following the same API as the TensorRTInfer class.
+    """
+
+    def __init__(self, saved_model_path, preprocessor, detection_type, iou_threshold):
+        self.preprocessor = preprocessor
+        self.detection_type = detection_type
+        self.iou_threshold = iou_threshold
+        gpus = tf.config.experimental.list_physical_devices("GPU")
+        for gpu in gpus:
+            tf.config.experimental.set_memory_growth(gpu, True)
+
+        self.model = tf.saved_model.load(saved_model_path)
+        self.pred_fn = self.model.signatures["serving_default"]
+
+        # Setup I/O bindings
+        self.inputs = []
+        fn_inputs = self.pred_fn.structured_input_signature[1]
+        for i, input in enumerate(list(fn_inputs.values())):
+            self.inputs.append(
+                {
+                    "index": i,
+                    "name": input.name,
+                    "dtype": np.dtype(input.dtype.as_numpy_dtype()),
+                    "shape": [1, 512, 512, 3],  # This can be overridden later
+                }
+            )
+        self.outputs = []
+        fn_outputs = self.pred_fn.structured_outputs
+        for i, output in enumerate(list(fn_outputs.values())):
+            self.outputs.append(
+                {
+                    "index": i,
+                    "name": output.name,
+                    "dtype": np.dtype(output.dtype.as_numpy_dtype()),
+                    "shape": output.shape.as_list(),
+                }
+            )
+
+    def override_input_shape(self, input, shape):
+        self.inputs[input]["shape"] = shape
+
+    def input_spec(self):
+        return self.inputs[0]["shape"], self.inputs[0]["dtype"]
+
+    def output_spec(self):
+        return self.outputs[0]["shape"], self.outputs[0]["dtype"]
+
+    def infer(self, batch, scales=None, nms_threshold=None):
+        # Process I/O and execute the network
+        input = {self.inputs[0]["name"]: tf.convert_to_tensor(batch)}
+        output = self.pred_fn(**input)
+
+        # Extract the results depending on what kind of saved model this is
+        boxes = None
+        scores = None
+        classes = None
+
+        assert output["num_detections"]
+        num = int(output["num_detections"].numpy().flatten()[0])
+        boxes = output["detection_boxes"].numpy()[:, 0:num, :]
+        scores = output["detection_scores"].numpy()[:, 0:num]
+        classes = output["detection_classes"].numpy()[:, 0:num]
+        # One additional output for segmentation masks
+        if "detection_masks" in output:
+            masks = output["detection_masks"].numpy()[:, 0:num]
+        # Process the results
+        detections = [[]]
+        normalized = np.max(boxes) < 2.0
+        for n in range(scores.shape[1]):
+            # Depending on preprocessor, box scaling will be slightly different.
+            if self.preprocessor == "fixed_shape_resizer":
+                if scores[0][n] == 0.0:
+                    break
+                scale_x = self.inputs[0]["shape"][1] if normalized else 1.0
+                scale_y = self.inputs[0]["shape"][2] if normalized else 1.0
+
+                if scales:
+                    scale_x /= scales[0][0]
+                    scale_y /= scales[0][1]
+                if nms_threshold and scores[0][n] < nms_threshold:
+                    continue
+                # Depending on detection type you need slightly different data.
+                if self.detection_type == "bbox":
+                    mask = None
+                # Segmentation is only supported with Mask R-CNN, which has
+                # fixed_shape_resizer as image_resizer (lookup pipeline.config)
+                elif self.detection_type == "segmentation":
+                    # Select a mask
+                    mask = masks[0][n]
+                    # Slight scaling, to get binary masks after float32 -> uint8
+                    # conversion, if not scaled all pixels are zero.
+                    mask = mask > self.iou_threshold
+                    # Convert float32 -> uint8.
+                    mask = mask.astype(np.uint8)
+            elif self.preprocessor == "keep_aspect_ratio_resizer":
+                # No segmentation models with keep_aspect_ratio_resizer
+                mask = None
+                if scores[0][n] == 0.0:
+                    break
+                scale = self.inputs[0]["shape"][2] if normalized else 1.0
+                if scales:
+                    scale /= scales[0]
+                    scale_y = scale
+                    scale_x = scale
+                if nms_threshold and scores[0][n] < nms_threshold:
+                    continue
+            # Append to detections
+            detections[0].append(
+                {
+                    "ymin": boxes[0][n][0] * scale_y,
+                    "xmin": boxes[0][n][1] * scale_x,
+                    "ymax": boxes[0][n][2] * scale_y,
+                    "xmax": boxes[0][n][3] * scale_x,
+                    "score": scores[0][n],
+                    "class": int(classes[0][n]) - 1,
+                    "mask": mask,
+                }
+            )
+        return detections
+
+
+def run(batcher, inferer, framework, nms_threshold=None):
+    res_images = []
+    res_detections = []
+    for batch, images, scales in batcher.get_batch():
+        res_detections += inferer.infer(batch, scales, nms_threshold)
+        res_images += images
+        print(
+            "Processing {} / {} images ({})".format(
+                batcher.image_index, batcher.num_images, framework
+            ),
+            end="\r",
+        )
+    print()
+    return res_images, res_detections
+
+
+def parse_annotations(annotations_path, detection_type):
+    annotations = {}
+    if annotations_path and os.path.exists(annotations_path):
+        # Load annotations as coco, to extract segmentation masks
+        coco = COCO(annotations_path)
+        with open(annotations_path) as f:
+            ann_json = json.load(f)
+            for ann in ann_json["annotations"]:
+                img_id = ann["image_id"]
+                if img_id not in annotations.keys():
+                    annotations[img_id] = []
+                # Depending on detection type you need slightly different data.
+                if detection_type == "bbox":
+                    mask = None
+                # Segmentation is only supported with Mask R-CNN, which has
+                # fixed_shape_resizer as image_resizer (lookup pipeline.config)
+                elif detection_type == "segmentation":
+                    # Get np.array segmentation mask from annotation
+                    mask = coco.annToMask(ann)
+                annotations[img_id].append(
+                    {
+                        "ymin": ann["bbox"][1],
+                        "xmin": ann["bbox"][0],
+                        "ymax": ann["bbox"][1] + ann["bbox"][3],
+                        "xmax": ann["bbox"][0] + ann["bbox"][2],
+                        "score": -1,
+                        "class": ann["category_id"] - 1,
+                        "mask": mask,
+                    }
+                )
+    return annotations
+
+
+def compare_images(
+    tf_images,
+    tf_detections,
+    trt_images,
+    trt_detections,
+    output_dir,
+    annotations_path,
+    labels_path,
+    detection_type,
+):
+    labels = []
+    if labels_path and os.path.exists(labels_path):
+        with open(labels_path) as f:
+            for i, label in enumerate(f):
+                labels.append(label.strip())
+
+    annotations = parse_annotations(annotations_path, detection_type)
+
+    count = 1
+    for tf_img, tf_det, trt_img, trt_det in zip(
+        tf_images, tf_detections, trt_images, trt_detections
+    ):
+        vis = []
+        names = []
+        colors = []
+
+        vis.append(visualize_detections(tf_img, None, tf_det, labels))
+        names.append("TensorFlow")
+        colors.append("DarkOrange")
+
+        vis.append(visualize_detections(trt_img, None, trt_det, labels))
+        names.append("TensorRT")
+        colors.append("YellowGreen")
+
+        if annotations:
+            img_id = os.path.splitext(os.path.basename(trt_img))[0]
+            if img_id.isnumeric():
+                img_id = int(img_id)
+            if img_id in annotations.keys():
+                vis.append(
+                    visualize_detections(trt_img, None, annotations[img_id], labels)
+                )
+                names.append("Ground Truth")
+                colors.append("RoyalBlue")
+            else:
+                print(
+                    "Image {} does not have a COCO annotation, skipping ground truth visualization".format(
+                        trt_img
+                    )
+                )
+
+        basename = os.path.splitext(os.path.basename(tf_img))[0]
+        output_path = os.path.join(output_dir, "{}.compare.png".format(basename))
+        os.makedirs(output_dir, exist_ok=True)
+        concat_visualizations(vis, names, colors, output_path)
+
+        print(
+            "Processing {} / {} images (Visualization)".format(count, len(tf_images)),
+            end="\r",
+        )
+        count += 1
+    print()
+
+
+def main(args):
+    tf_infer = TensorFlowInfer(
+        args.saved_model, args.preprocessor, args.detection_type, args.iou_threshold
+    )
+    trt_infer = TensorRTInfer(
+        args.engine, args.preprocessor, args.detection_type, args.iou_threshold
+    )
+
+    trt_batcher = ImageBatcher(
+        args.input,
+        *trt_infer.input_spec(),
+        max_num_images=args.num_images,
+        preprocessor=args.preprocessor
+    )
+    tf_infer.override_input_shape(
+        0, [1, trt_batcher.height, trt_batcher.width, 3]
+    )  # Same size input in TF as TRT
+    tf_batcher = ImageBatcher(
+        args.input,
+        *tf_infer.input_spec(),
+        max_num_images=args.num_images,
+        preprocessor=args.preprocessor
+    )
+
+    tf_images, tf_detections = run(
+        tf_batcher, tf_infer, "TensorFlow", args.nms_threshold
+    )
+    trt_images, trt_detections = run(
+        trt_batcher, trt_infer, "TensorRT", args.nms_threshold
+    )
+
+    compare_images(
+        tf_images,
+        tf_detections,
+        trt_images,
+        trt_detections,
+        args.output,
+        args.annotations,
+        args.labels,
+        args.detection_type,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-e", "--engine", help="The TensorRT engine to infer with")
+    parser.add_argument(
+        "-m",
+        "--saved_model",
+        help="The TensorFlow saved model path to validate against",
+    )
+    parser.add_argument(
+        "-i",
+        "--input",
+        help="The input to infer, either a single image path, or a directory of images",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        default=None,
+        help="Directory where to save the visualization results",
+    )
+    parser.add_argument(
+        "-l",
+        "--labels",
+        default="./labels_coco.txt",
+        help="File to use for reading the class labels from, default: ./labels_coco.txt",
+    )
+    parser.add_argument(
+        "-a",
+        "--annotations",
+        default=None,
+        help="Set the path to the 'instances_val2017.json' file to use for COCO annotations, in which "
+        "case --input should point to the COCO val2017 dataset, default: not used",
+    )
+    parser.add_argument(
+        "-n",
+        "--num_images",
+        default=100,
+        type=int,
+        help="The maximum number of images to visualize, default: 100",
+    )
+    parser.add_argument(
+        "-t",
+        "--nms_threshold",
+        type=float,
+        help="Override the score threshold for the NMS operation, if higher than the threshold in the model/engine.",
+    )
+    parser.add_argument(
+        "--iou_threshold",
+        default=0.5,
+        type=float,
+        help="Select the IoU threshold for the mask segmentation. Range is 0 to 1. Pixel values more than threshold will become 1, less 0",
+    )
+    parser.add_argument(
+        "-d",
+        "--detection_type",
+        default="bbox",
+        choices=["bbox", "segmentation"],
+        help="Detection type for COCO, either bbox or if you are using Mask R-CNN's instance segmentation - segmentation",
+    )
+    parser.add_argument(
+        "--preprocessor",
+        default="fixed_shape_resizer",
+        choices=["fixed_shape_resizer", "keep_aspect_ratio_resizer"],
+        help="Select the image preprocessor to use based on your pipeline.config, either 'fixed_shape_resizer' or 'keep_aspect_ratio_resizer', default: fixed_shape_resizer",
+    )
+    args = parser.parse_args()
+    if not all(
+        [args.engine, args.saved_model, args.input, args.output, args.preprocessor]
+    ):
+        parser.print_help()
+        sys.exit(1)
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/create_onnx.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/create_onnx.py
new file mode 100644
index 0000000000000000000000000000000000000000..7776d50ad386cea8b60f172aa95d99404710624c
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/create_onnx.py
@@ -0,0 +1,1056 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import re
+import sys
+import argparse
+import logging
+
+import tensorflow as tf
+import onnx_graphsurgeon as gs
+import numpy as np
+import onnx
+from onnx import shape_inference
+from tf2onnx import tfonnx, optimizer, tf_loader
+
+try:
+    from object_detection.utils import config_util
+except ImportError:
+    print("Could not import TFOD modules. Maybe you did not install TFOD API")
+    print(
+        "Please install TensorFlow 2 Object Detection API, check https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2.md"
+    )
+    sys.exit(1)
+
+import onnx_utils
+
+logging.basicConfig(level=logging.INFO)
+logging.getLogger("ModelHelper").setLevel(logging.INFO)
+log = logging.getLogger("ModelHelper")
+
+
+class TFODGraphSurgeon:
+    def __init__(self, saved_model_path, pipeline_config_path):
+        """
+        Constructor of the Model Graph Surgeon object, to do the conversion of an TFOD saved model
+        to an ONNX-TensorRT parsable model.
+        :param saved_model_path: The path pointing to the TensorFlow saved model to load.
+        :param pipeline_config_path: The path pointing to the TensorFlow Object Detection API pipeline.config to load.
+        """
+
+        saved_model_path = os.path.realpath(saved_model_path)
+        assert os.path.exists(saved_model_path)
+
+        # Use tf2onnx to convert saved model to an initial ONNX graph.
+        graph_def, inputs, outputs = tf_loader.from_saved_model(
+            saved_model_path, None, None, "serve", ["serving_default"]
+        )
+        log.info("Loaded saved model from {}".format(saved_model_path))
+        with tf.Graph().as_default() as tf_graph:
+            tf.import_graph_def(graph_def, name="")
+        with tf_loader.tf_session(graph=tf_graph):
+            onnx_graph = tfonnx.process_tf_graph(
+                tf_graph, input_names=inputs, output_names=outputs, opset=11
+            )
+        onnx_model = optimizer.optimize_graph(onnx_graph).make_model(
+            "Converted from {}".format(saved_model_path)
+        )
+        self.graph = gs.import_onnx(onnx_model)
+        assert self.graph
+        log.info("TF2ONNX graph created successfully")
+
+        # Fold constants via ONNX-GS that TF2ONNX may have missed.
+        self.graph.fold_constants()
+
+        # Pipeline config parsing.
+        pipeline_config = config_util.get_configs_from_pipeline_file(
+            pipeline_config_path
+        )
+        # Get input resolution.
+        self.height, self.width = config_util.get_spatial_image_size(
+            config_util.get_image_resizer_config(pipeline_config["model"])
+        )
+
+        # If your model is SSD, get characteristics accordingly from pipeline.config file.
+        if pipeline_config["model"].HasField("ssd"):
+            # Getting model characteristics.
+            self.model = str(pipeline_config["model"].ssd.feature_extractor.type)
+            self.first_stage_nms_score_threshold = float(
+                pipeline_config[
+                    "model"
+                ].ssd.post_processing.batch_non_max_suppression.score_threshold
+            )
+            self.first_stage_nms_iou_threshold = float(
+                pipeline_config[
+                    "model"
+                ].ssd.post_processing.batch_non_max_suppression.iou_threshold
+            )
+            self.first_stage_max_proposals = int(
+                pipeline_config[
+                    "model"
+                ].ssd.post_processing.batch_non_max_suppression.max_detections_per_class
+            )
+        # If your model is Faster R-CNN get it's characteristics from pipeline.config file.
+        elif pipeline_config["model"].HasField("faster_rcnn"):
+            # Getting model characteristics.
+            self.model = str(
+                pipeline_config["model"].faster_rcnn.feature_extractor.type
+            )
+            self.num_classes = pipeline_config["model"].faster_rcnn.num_classes
+            self.first_stage_nms_score_threshold = float(
+                pipeline_config["model"].faster_rcnn.first_stage_nms_score_threshold
+            )
+            self.first_stage_nms_iou_threshold = float(
+                pipeline_config["model"].faster_rcnn.first_stage_nms_iou_threshold
+            )
+            self.first_stage_max_proposals = int(
+                pipeline_config["model"].faster_rcnn.first_stage_max_proposals
+            )
+            self.first_stage_crop_size = int(
+                pipeline_config["model"].faster_rcnn.initial_crop_size
+            )
+            self.second_stage_nms_score_threshold = float(
+                pipeline_config[
+                    "model"
+                ].faster_rcnn.second_stage_post_processing.batch_non_max_suppression.score_threshold
+            )
+            self.second_stage_iou_threshold = float(
+                pipeline_config[
+                    "model"
+                ].faster_rcnn.second_stage_post_processing.batch_non_max_suppression.iou_threshold
+            )
+            self.mask_height = None
+            self.mask_width = None
+            self.matmul_crop_and_resize = False
+            # Check what kind of Crop and Resize operation is used
+            if pipeline_config["model"].faster_rcnn.HasField(
+                "use_matmul_crop_and_resize"
+            ):
+                self.matmul_crop_and_resize = pipeline_config[
+                    "model"
+                ].faster_rcnn.use_matmul_crop_and_resize
+            # If model is Mask R-CNN, get final instance segmentation masks resolution.
+            if pipeline_config[
+                "model"
+            ].faster_rcnn.second_stage_box_predictor.mask_rcnn_box_predictor.HasField(
+                "mask_height"
+            ) and pipeline_config[
+                "model"
+            ].faster_rcnn.second_stage_box_predictor.mask_rcnn_box_predictor.HasField(
+                "mask_width"
+            ):
+                self.mask_height = int(
+                    pipeline_config[
+                        "model"
+                    ].faster_rcnn.second_stage_box_predictor.mask_rcnn_box_predictor.mask_height
+                )
+                self.mask_width = int(
+                    pipeline_config[
+                        "model"
+                    ].faster_rcnn.second_stage_box_predictor.mask_rcnn_box_predictor.mask_width
+                )
+        else:
+            log.info("Given Model type is not supported")
+            sys.exit(1)
+
+        # List of supported models.
+        supported_models = [
+            "ssd_mobilenet_v2_keras",
+            "ssd_mobilenet_v1_fpn_keras",
+            "ssd_mobilenet_v2_fpn_keras",
+            "ssd_resnet50_v1_fpn_keras",
+            "ssd_resnet101_v1_fpn_keras",
+            "ssd_resnet152_v1_fpn_keras",
+            "faster_rcnn_resnet50_keras",
+            "faster_rcnn_resnet101_keras",
+            "faster_rcnn_resnet152_keras",
+            "faster_rcnn_inception_resnet_v2_keras",
+        ]
+        assert self.model in supported_models
+
+        # Model characteristics.
+        log.info("Model is {}".format(self.model))
+        log.info("Height is {}".format(self.height))
+        log.info("Width is {}".format(self.width))
+        log.info(
+            "First NMS score threshold is {}".format(
+                self.first_stage_nms_score_threshold
+            )
+        )
+        log.info(
+            "First NMS iou threshold is {}".format(self.first_stage_nms_iou_threshold)
+        )
+        log.info("First NMS max proposals is {}".format(self.first_stage_max_proposals))
+        if "faster_rcnn" in self.model:
+            log.info("Number of classes is {}".format(self.num_classes))
+            log.info(
+                "Crop and Resize output size is {}".format(self.first_stage_crop_size)
+            )
+            log.info(
+                "Second NMS score threshold is {}".format(
+                    self.second_stage_nms_score_threshold
+                )
+            )
+            log.info(
+                "Second NMS iou threshold is {}".format(self.second_stage_iou_threshold)
+            )
+            log.info(
+                "Using MatMul Crop and Resize: {}".format(self.matmul_crop_and_resize)
+            )
+            if not (self.mask_height is None and self.mask_width is None):
+                log.info("Mask height is {}".format(self.mask_height))
+                log.info("Mask width is {}".format(self.mask_width))
+
+        self.batch_size = None
+
+    def sanitize(self):
+        """
+        Sanitize the graph by cleaning any unconnected nodes, do a topological resort, and fold constant inputs values.
+        When possible, run shape inference on the ONNX graph to determine tensor shapes.
+        """
+
+        # Type of model requires different amount of sanitize iterations
+        if "ssd" in self.model:
+            sanitize_num = 6
+        elif "faster_rcnn" in self.model:
+            sanitize_num = 3
+
+        for i in range(sanitize_num):
+            count_before = len(self.graph.nodes)
+            self.graph.cleanup().toposort()
+            try:
+                for node in self.graph.nodes:
+                    for o in node.outputs:
+                        o.shape = None
+                model = gs.export_onnx(self.graph)
+                model = shape_inference.infer_shapes(model)
+                self.graph = gs.import_onnx(model)
+            except Exception as e:
+                log.info(
+                    "Shape inference could not be performed at this time:\n{}".format(e)
+                )
+            try:
+                self.graph.fold_constants(fold_shapes=True)
+            except TypeError as e:
+                log.error(
+                    "This version of ONNX GraphSurgeon does not support folding shapes, please upgrade your "
+                    "onnx_graphsurgeon module. Error:\n{}".format(e)
+                )
+                raise
+
+            count_after = len(self.graph.nodes)
+            if count_before == count_after:
+                # No new folding occurred in this iteration, so we can stop for now.
+                break
+
+    def save(self, output_path):
+        """
+        Save the ONNX model to the given location.
+        :param output_path: Path pointing to the location where to write out the updated ONNX model.
+        """
+        self.graph.cleanup().toposort()
+        model = gs.export_onnx(self.graph)
+        output_path = os.path.realpath(output_path)
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        onnx.save(model, output_path)
+        log.info("Saved ONNX model to {}".format(output_path))
+
+    def add_debug_output(self, debug):
+        """
+        Add a debug output to a given node.
+        :param debug: Name of the output you would like to debug.
+        """
+        tensors = self.graph.tensors()
+        for n, name in enumerate(debug):
+            if name not in tensors:
+                log.warning("Could not find tensor '{}'".format(name))
+            debug_tensor = gs.Variable(
+                name="debug:{}".format(n), dtype=tensors[name].dtype
+            )
+            debug_node = gs.Node(
+                op="Identity",
+                name="debug_{}".format(n),
+                inputs=[tensors[name]],
+                outputs=[debug_tensor],
+            )
+            self.graph.nodes.append(debug_node)
+            self.graph.outputs.append(debug_tensor)
+            log.info(
+                "Adding debug output '{}' for graph tensor '{}'".format(
+                    debug_tensor.name, name
+                )
+            )
+
+    def update_preprocessor(self, batch_size, input_format):
+        """
+        Remove all the pre-processing nodes in the ONNX graph and leave only the image normalization essentials.
+        :param batch_size: The batch size to use for the ONNX graph.
+        :param input_format: Input tensor format, either NCHW or NHWC.
+        """
+        # Update batch size.
+        self.batch_size = batch_size
+
+        # Set input tensor shape.
+        assert input_format in ["NCHW", "NHWC"]
+        input_shape = [None] * 4
+        if input_format == "NHWC":
+            input_shape = [self.batch_size, self.height, self.width, 3]
+        if input_format == "NCHW":
+            input_shape = [self.batch_size, 3, self.height, self.width]
+        self.graph.inputs[0].shape = input_shape
+        self.graph.inputs[0].dtype = np.float32
+        self.graph.inputs[0].name = "input_tensor"
+
+        self.sanitize()
+        log.info(
+            "ONNX graph input shape: {} [NCHW format set]".format(
+                self.graph.inputs[0].shape
+            )
+        )
+
+        # Find the initial nodes of the graph, whatever the input is first connected to, and disconnect them.
+        for node in [
+            node for node in self.graph.nodes if self.graph.inputs[0] in node.inputs
+        ]:
+            node.inputs.clear()
+
+        # Get input tensor.
+        # Convert to NCHW format if needed.
+        input_tensor = self.graph.inputs[0]
+        if input_format == "NHWC":
+            input_tensor = self.graph.transpose(
+                "preprocessor/transpose", input_tensor, [0, 3, 1, 2]
+            )
+
+        # Mobilenets' and inception's backbones preprocessor.
+        if "mobilenet" in self.model or "inception_resnet" in self.model:
+            mul_const = np.expand_dims(
+                np.asarray([2 / 255], dtype=np.float32), axis=(0, 2, 3)
+            )
+            sub_const = np.expand_dims(
+                np.asarray([1], dtype=np.float32), axis=(0, 2, 3)
+            )
+            mul_out = self.graph.op_with_const(
+                "Mul", "preprocessor/scale", input_tensor, mul_const
+            )
+            sub_out = self.graph.op_with_const(
+                "Sub", "preprocessor/mean", mul_out, sub_const
+            )
+
+        # Resnet backbones' preprocessor.
+        elif "resnet" in self.model:
+            sub_const = np.expand_dims(
+                np.asarray([255 * 0.485, 255 * 0.456, 255 * 0.406], dtype=np.float32),
+                axis=(0, 2, 3),
+            )
+            sub_out = self.graph.op_with_const(
+                "Sub", "preprocessor/mean", input_tensor, sub_const
+            )
+
+        # Backbone is not supported.
+        else:
+            log.info(
+                "Given model's backbone is not supported, pre-processor algorithm can't be generated"
+            )
+            sys.exit(1)
+
+        # Find first Conv node and connect preprocessor directly to it.
+        conv_node = self.graph.find_node_by_op("Conv")
+        log.info(
+            "Found {} node '{}' as stem entry".format(conv_node.op, conv_node.name)
+        )
+        conv_node.inputs[0] = sub_out[0]
+
+        # Disconnect the last node in one of the preprocessing branches with first TensorListStack parent node.
+        concat_node = self.graph.find_node_by_op("Concat")
+        concat_node.outputs = []
+
+        # Disconnect the last node in second preprocessing branch with parent second TensorListStack node.
+        tile_node = self.graph.find_node_by_op("Tile")
+        tile_node.outputs = []
+
+        # Reshape nodes tend to update the batch dimension to a fixed value of 1, they should use the batch size instead.
+        for node in [node for node in self.graph.nodes if node.op == "Reshape"]:
+            if type(node.inputs[1]) == gs.Constant and node.inputs[1].values[0] == 1:
+                node.inputs[1].values[0] = self.batch_size
+
+        self.sanitize()
+
+    def find_head_end(self, head_name, descendant, end_op):
+        # This helper function finds ends of Class Net and Box Net, based on a model type.
+        # :param head_name: This is a common name that nodes in either Class or Box Nets start with.
+        # :param descendant: Descendant of head_name, identified by operation (Transpose, MatMul, etc.).
+        # :param end_op: Operation of a node you would like to get in the end of each Net.
+        # These end_op nodes bring together prediction data based on type of model.
+        # The Class Net end node will have shape [batch_size, num_anchors, num_classes],
+        # and the Box Net end node has the shape [batch_size, num_anchors, 4].
+        # These end nodes can be be found by searching for all end_op's operation nodes and checking if the node two
+        # steps above in the graph has a name that begins with one of head_names for Class Net and Box Net respectively.
+        for node in [
+            node
+            for node in self.graph.nodes
+            if node.op == descendant and head_name in node.name
+        ]:
+            target_node = self.graph.find_descendant_by_op(node, end_op)
+            log.info(
+                "Found {} node '{}' as the tip of {}".format(
+                    target_node.op, target_node.name, head_name
+                )
+            )
+            return target_node
+
+    def extract_anchors_tensor(self, split):
+        # This will find the anchors that have been hardcoded somewhere within the ONNX graph.
+        # The function will return a gs.Constant that can be directly used as an input to the NMS plugin.
+        # The anchor tensor shape will be [1, num_anchors, 4]. Note that '1' is kept as first dim, regardless of
+        # batch size, as it's not necessary to replicate the anchors for all images in the batch.
+
+        # The anchors are available (one per coordinate) hardcoded as constants within certain box decoder nodes.
+        # Each of these four constants have shape [1, num_anchors], so some numpy operations are used to expand the
+        # dims and concatenate them as needed.
+
+        # These constants can be found by starting from the Box Net's split operation , and for each coordinate,
+        # walking down in the graph until either an Add or specific Mul node is found. The second input on this nodes will
+        # be the anchor data required.
+        def get_anchor(output_idx, op, depth=5):
+            node = self.graph.find_descendant_by_op(split.o(0, output_idx), op)
+            for i in range(depth):
+                if node.op == op:
+                    # Input of size 1 is not anchor data
+                    if (node.inputs[1].values).size == 1:
+                        node = node.o()
+                    # Find the node that with anchor data, multielement input
+                    elif (node.inputs[1].values).size > 1:
+                        assert node
+                        val = np.squeeze(node.inputs[1].values)
+                        return np.expand_dims(val.flatten(), axis=(0, 2))
+                else:
+                    node = node.o()
+            return None
+
+        anchors_y = get_anchor(0, "Add")
+        anchors_x = get_anchor(1, "Add")
+        anchors_h = get_anchor(2, "Mul")
+        anchors_w = get_anchor(3, "Mul")
+
+        batched_anchors = np.concatenate(
+            [anchors_y, anchors_x, anchors_h, anchors_w], axis=2
+        )
+        # Identify num of anchors without repetitions.
+        num_anchors = int(batched_anchors.shape[1] / self.batch_size)
+        # Trim total number of anchors in order to not have copies introduced by growing number of batch_size.
+        anchors = batched_anchors[0:num_anchors, 0:num_anchors]
+        return gs.Constant(name="nms/anchors:0", values=anchors)
+
+    def NMS(
+        self,
+        box_net_tensor,
+        class_net_tensor,
+        anchors_tensor,
+        background_class,
+        score_activation,
+        iou_threshold,
+        nms_score_threshold,
+        user_threshold,
+        nms_name=None,
+    ):
+        # Helper function to create the NMS Plugin node with the selected inputs.
+        # EfficientNMS_TRT TensorRT Plugin is suitable for our use case.
+        # :param box_net_tensor: The box predictions from the Box Net.
+        # :param class_net_tensor: The class predictions from the Class Net.
+        # :param anchors_tensor: The default anchor coordinates (from the extracted anchor constants)
+        # :param background_class: The label ID for the background class.
+        # :param score_activation: If set to True - apply sigmoid activation to the confidence scores during NMS operation,
+        # if false - no activation, pass one from the graph.
+        # :param iou_threshold: NMS intersection over union threshold, given by pipeline.config.
+        # :param nms_score_threshold: NMS score threshold, given by pipeline.config.
+        # :param user_threshold: User's given threshold to overwrite default NMS score threshold.
+        # :param nms_name: Name of NMS node in a graph, renames NMS elements accordingly in order to eliminate cycles.
+
+        if nms_name is None:
+            nms_name = ""
+        else:
+            nms_name = "_" + nms_name
+
+        # Set score threshold.
+        score_threshold = (
+            nms_score_threshold if user_threshold is None else user_threshold
+        )
+
+        # NMS Outputs.
+        nms_output_num_detections = gs.Variable(
+            name="num_detections" + nms_name, dtype=np.int32, shape=[self.batch_size, 1]
+        )
+        nms_output_boxes = gs.Variable(
+            name="detection_boxes" + nms_name,
+            dtype=np.float32,
+            shape=[self.batch_size, self.first_stage_max_proposals, 4],
+        )
+        nms_output_scores = gs.Variable(
+            name="detection_scores" + nms_name,
+            dtype=np.float32,
+            shape=[self.batch_size, self.first_stage_max_proposals],
+        )
+        nms_output_classes = gs.Variable(
+            name="detection_classes" + nms_name,
+            dtype=np.int32,
+            shape=[self.batch_size, self.first_stage_max_proposals],
+        )
+
+        nms_outputs = [
+            nms_output_num_detections,
+            nms_output_boxes,
+            nms_output_scores,
+            nms_output_classes,
+        ]
+
+        # Plugin.
+        self.graph.plugin(
+            op="EfficientNMS_TRT",
+            name="nms/non_maximum_suppression" + nms_name,
+            inputs=[box_net_tensor, class_net_tensor, anchors_tensor],
+            outputs=nms_outputs,
+            attrs={
+                "plugin_version": "1",
+                "background_class": background_class,
+                "max_output_boxes": self.first_stage_max_proposals,
+                "score_threshold": max(0.01, score_threshold),
+                "iou_threshold": iou_threshold,
+                "score_activation": score_activation,
+                "class_agnostic": False,
+                "box_coding": 1,
+            },
+        )
+        log.info("Created 'nms/non_maximum_suppression{}' NMS plugin".format(nms_name))
+
+        return nms_outputs
+
+    def CropAndResize(self, unsqeeze_input, relu_node_outputs, cnr_num):
+        # Helper function to create the NMS Plugin node with the selected inputs.
+        # CropAndResize TensorRT Plugin is suitable for our use case.
+        # :param unsqeeze_input: NMS's bonding boxes output, clipped and normalized if this is first CropAndResize, this is a souce of rois for CropAndResize.
+        # :param relu_node_outputs: 1st backbone's last Relu node, this is a souce of feature_maps for CropAndResize
+        # :param cnr_num: Positional number of CropAndResize node in a graph, renames CropAndResize elements accordingly in order to eliminate cycles.
+
+        # CropAndResizePlugin requires 4th dimension of 1: [N, B, 4, 1], so
+        # we need to add unsqeeze node to make tensor 4 dimensional.
+        unsqueeze_node = self.graph.unsqueeze(
+            "CNR/detection_boxes_unsqueeze_" + cnr_num, unsqeeze_input
+        )
+
+        # CropAndResizePlugin's inputs
+        feature_maps = relu_node_outputs
+        rois = unsqueeze_node[0]
+
+        # CropAndResize Outputs.
+        cnr_pfmap = gs.Variable(
+            name="cnr/pfmap_" + cnr_num,
+            dtype=np.float32,
+            shape=[
+                self.batch_size,
+                self.first_stage_max_proposals,
+                feature_maps.shape[1],
+                self.first_stage_crop_size,
+                self.first_stage_crop_size,
+            ],
+        )
+
+        # Create the CropandResizeDynamic Plugin node with the selected inputs.
+        # Two inputs are given to the CropAndResizeDynamic TensorRT node:
+        # - The feature_maps (from the relu_node_outputs): [batch_size, channel_num, height, width]
+        # - The rois (clipped and normalized detection boxes resulting from NMS): [batch_size, featuremap, 4, 1]
+        plg_version = "2" # ver 1 uses IPluginV2DynamicExt (deprecated), ver 2 uses IPluginV3
+        plg_name = "CropAndResizeDynamic" # CropAndResize is deprecated
+        self.graph.plugin(
+            op=plg_name,
+            name="cnr/crop_and_resize_" + cnr_num,
+            inputs=[feature_maps, rois],
+            outputs=[cnr_pfmap],
+            attrs={
+                "plugin_version": plg_version,
+                "crop_width": self.first_stage_crop_size,
+                "crop_height": self.first_stage_crop_size,
+            },
+        )
+        log.info("Created {} {} plugin (version: {})".format(cnr_num, plg_name, plg_version))
+
+        # Reshape node that is preparing CropAndResize's pfmap output shape for MaxPool node that comes next.
+        reshape_shape = np.asarray(
+            [
+                self.first_stage_max_proposals * self.batch_size,
+                feature_maps.shape[1],
+                self.first_stage_crop_size,
+                self.first_stage_crop_size,
+            ],
+            dtype=np.int64,
+        )
+        reshape_node = self.graph.op_with_const(
+            "Reshape", "cnr/reshape_" + cnr_num, cnr_pfmap, reshape_shape
+        )
+
+        return reshape_node[0]
+
+    def process_graph(self, first_nms_threshold=None, second_nms_threshold=None):
+        """
+        Processes the graph to replace the NMS operations by EfficientNMS_TRT TensorRT plugin nodes and
+        cropAndResize operations by CropAndResize plugin node.
+        :param first_nms_threshold: Override the 1st NMS score threshold value. If set to None, use the value in the graph.
+        :param second_nms_threshold: Override the 2nd NMS score threshold value. If set to None, use the value in the graph.
+        """
+
+        def first_nms(
+            background_class, score_activation, first_nms_threshold, nms_name=None
+        ):
+            """
+            Updates the graph to replace the 1st NMS op by EfficientNMS_TRT TensorRT plugin node.
+            :param background_class: Set EfficientNMS_TRT's background_class atribute.
+            :param score_activation: Set EfficientNMS_TRT's score_activation atribute.
+            :param first_nms_threshold: Override the NMS score threshold.
+            :param nms_name: Set the NMS node name.
+            """
+            # Supported models
+            ssd_models = [
+                "ssd_mobilenet_v1_fpn_keras",
+                "ssd_mobilenet_v2_fpn_keras",
+                "ssd_resnet50_v1_fpn_keras",
+                "ssd_resnet101_v1_fpn_keras",
+                "ssd_resnet152_v1_fpn_keras",
+            ]
+            frcnn_models = [
+                "faster_rcnn_resnet50_keras",
+                "faster_rcnn_resnet101_keras",
+                "faster_rcnn_resnet152_keras",
+                "faster_rcnn_inception_resnet_v2_keras",
+            ]
+
+            # Getting SSD's Class and Box Nets final tensors.
+            if "ssd" in self.model:
+                # Find the concat node at the end of the class net (multi-scale class predictor).
+                class_net_head_name = (
+                    "BoxPredictor/ConvolutionalClassHead_"
+                    if self.model == "ssd_mobilenet_v2_keras"
+                    else "WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalClassHead"
+                )
+                class_net = self.find_head_end(
+                    class_net_head_name, "Transpose", "Concat"
+                )
+                # Final Class Net tensor
+                class_net_tensor = self.graph.slice(
+                    class_net_head_name + "/slicer", class_net.outputs[0], 1, 91, 2
+                )[
+                    0
+                ]  # Remove background class
+
+                # Find the concat or squeeze node at the end of the box net (multi-scale localization predictor).
+                if self.model == "ssd_mobilenet_v2_keras":
+                    box_net_head_name = "BoxPredictor/ConvolutionalBoxHead_"
+                    box_net = self.find_head_end(
+                        box_net_head_name, "Transpose", "Squeeze"
+                    )
+                else:
+                    box_net_head_name = "WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalBoxHead"
+                    box_net = self.find_head_end(
+                        box_net_head_name, "Transpose", "Concat"
+                    )
+
+                box_net_output = box_net.outputs[0]
+                # 0.1, 0.1, 0.2, 0.2 are localization head variance numbers, they scale box_net_output in order to get accurate coordinates.
+                variance_adj = np.expand_dims(
+                    np.asarray([0.1, 0.1, 0.2, 0.2], dtype=np.float32), axis=(0, 1)
+                )
+                # Final Box Net tensor.
+                box_net_tensor = self.graph.op_with_const(
+                    "Mul", box_net_head_name + "/scale", box_net_output, variance_adj
+                )[0]
+
+            # Getting Faster R-CNN's 1st Class and Box Nets tensors.
+            elif "faster_rcnn" in self.model:
+                # Identify Class Net and Box Net head names
+                head_names = [
+                    "FirstStageBoxPredictor/ConvolutionalClassHead_0/ClassPredictor",
+                    "FirstStageBoxPredictor/ConvolutionalBoxHead_0/BoxEncodingPredictor",
+                ]
+
+                # Find the softmax node at the end of the class net (multi-scale class predictor).
+                class_net = self.find_head_end(head_names[0], "Transpose", "Softmax")
+                # Final Class Net tensor
+                class_net_tensor = class_net.outputs[0]
+
+                # Find the reshape node at the end of the box net (multi-scale localization predictor).
+                box_net = self.find_head_end(head_names[1], "Transpose", "Reshape")
+                # Final Box Net tensor.
+                box_net_output = box_net.outputs[0]
+
+                # Insert a squeeze node
+                squeeze_node = self.graph.squeeze(
+                    head_names[1] + "/squeeze", box_net_output
+                )
+                # 0.1, 0.1, 0.2, 0.2 are localization head variance numbers, they scale box_net_output, in order to get accurate coordinates.
+                variance_adj = np.expand_dims(
+                    np.asarray([0.1, 0.1, 0.2, 0.2], dtype=np.float32), axis=(0, 1)
+                )
+                # Final Box Net tensor.
+                box_net_tensor = self.graph.op_with_const(
+                    "Mul", head_names[1] + "/scale", squeeze_node, variance_adj
+                )[0]
+
+            # Find the split node that separates the box net coordinates and feeds them into the box decoder.
+            box_net_split = self.graph.find_descendant_by_op(box_net, "Split")
+            assert box_net_split and len(box_net_split.outputs) == 4
+
+            # Get anchors tensor.
+            anchors_tensor = self.extract_anchors_tensor(box_net_split)
+
+            # Create NMS node.
+            nms_outputs = self.NMS(
+                box_net_tensor,
+                class_net_tensor,
+                anchors_tensor,
+                background_class,
+                score_activation,
+                self.first_stage_nms_iou_threshold,
+                self.first_stage_nms_score_threshold,
+                first_nms_threshold,
+                nms_name,
+            )
+
+            # Return NMS's outputs.
+            return nms_outputs
+
+        def first_cnr(input):
+            """
+            Updates the graph to replace the 1st cropAndResize op by CropAndResize TensorRT plugin node.
+            :param input: Input tensor is the output from previous first_nms() step.
+            """
+
+            # Locate the last Relu node of the first backbone (pre 1st NMS). Relu node contains feature maps
+            # necessary for CropAndResize plugin.
+            relu_name = "StatefulPartitionedCall/model/"
+            relu_node = [
+                node
+                for node in self.graph.nodes
+                if node.op == "Relu" and relu_name in node.name
+            ][-1]
+
+            # Before passing 1st NMS's detection boxes (rois) to CropAndResize, we need to clip and normalize them.
+            # Clipping happens for coordinates that are less than 0 and more than self.height.
+            # Normalization is just divison of every coordinate by self.height.
+            clip_out = self.graph.clip(
+                "FirstNMS/detection_boxes_clipper", input, 0, self.height
+            )
+            div_const = np.expand_dims(
+                np.asarray(
+                    [self.height, self.width, self.height, self.width], dtype=np.float32
+                ),
+                axis=(0, 1),
+            )
+            div_out = self.graph.op_with_const(
+                "Div", "FirstNMS/detection_boxes_normalizer", clip_out[0], div_const
+            )
+
+            # Linear transformation to convert box coordinates from (TopLeft, BottomRight) Corner encoding
+            # to CenterSize encoding. 1st NMS boxes are multiplied by transformation matrix in order to
+            # encode it into CenterSize format.
+            matmul_const = np.matrix(
+                "0.5 0 -1 0; 0 0.5 0 -1; 0.5 0 1 0; 0 0.5 0 1", dtype=np.float32
+            )
+            matmul_out = self.graph.matmul(
+                "FirstNMS/detection_boxes_conversion", div_out[0], matmul_const
+            )
+
+            # Create Crop and Resize node.
+            cnr_output = self.CropAndResize(div_out, relu_node.outputs[0], "first")
+
+            # Find MaxPool node that summarizes CropAndResize structure.
+            maxpool_node = [
+                node
+                for node in self.graph.nodes
+                if node.op == "MaxPool" and "MaxPool2D/MaxPool" in node.name
+            ][0]
+            maxpool_node.inputs[0] = cnr_output
+
+            # Return linear transformation node, it will be located between 1st and 2nd NMS,
+            # so we need to pass and connect it to 2nd NMS.
+            # In case you are converting Mask R-CNN, feature maps are required for 2nd CropAndResize.
+            return matmul_out[0], relu_node.outputs[0]
+
+        def second_nms(
+            background_class,
+            score_activation,
+            encoded_boxes,
+            second_nms_threshold,
+            nms_name=None,
+        ):
+            """
+            Updates the graph to replace the 2nd (or final) NMS op by EfficientNMS_TRT TensorRT plugin node.
+            :param background_class: Set EfficientNMS_TRT's background_class atribute.
+            :param score_activation: Set EfficientNMS_TRT's score_activation atribute.
+            :param encoded_boxes: The boxes to use as input.
+            :param second_nms_threshold: Override the NMS score threshold.
+            :param nms_name: Set the NMS node name.
+            """
+
+            # Identify Class Net and Box Net head names.
+            second_head_names = [
+                "StatefulPartitionedCall/mask_rcnn_keras_box_predictor/mask_rcnn_class_head/ClassPredictor_dense",
+                "StatefulPartitionedCall/mask_rcnn_keras_box_predictor/mask_rcnn_box_head/BoxEncodingPredictor_dense",
+            ]
+
+            # Find the softmax node at the end of the 2nd class net (multi-scale class predictor).
+            second_class_net = self.find_head_end(
+                second_head_names[0], "MatMul", "Softmax"
+            )
+
+            # Faster R-CNN's slice operation to adjust third dimension of Class Net's last node tensor (adjusting class values).
+            slice_out = self.graph.slice(
+                second_head_names[0] + "/slicer", second_class_net.outputs[0], 1, 91, 2
+            )
+
+            # Final Class Net tensor.
+            second_class_net_tensor = slice_out[0]
+
+            # Find the add node at the end of the box net (multi-scale localization predictor).
+            second_box_net = self.find_head_end(second_head_names[1], "MatMul", "Add")
+            # Final Box Net tensor.
+            second_box_net_output = second_box_net.outputs[0]
+
+            # Reshape node that is preparing second_box_net_output's output shape for Mul scaling node that comes next.
+            # Based on type of Crop and Resize operation, second_box_net_output can be of two types, example:
+            # If use_matmul_crop_and_resize in pipeline.config is set to True, expect: [batch_size, first_stage_max_proposals, 4].
+            # Else use_matmul_crop_and_resize is either False or absent, expect: [batch_size, first_stage_max_proposals, num_classes, 4]
+            if self.matmul_crop_and_resize:
+                reshape_shape_second = np.asarray(
+                    [
+                        self.batch_size,
+                        self.first_stage_max_proposals,
+                        second_box_net.outputs[0].shape[1],
+                    ],
+                    dtype=np.int64,
+                )
+            else:
+                reshape_shape_second = np.asarray(
+                    [
+                        self.batch_size,
+                        self.first_stage_max_proposals,
+                        self.num_classes,
+                        second_box_net.outputs[0].shape[1] / self.num_classes,
+                    ],
+                    dtype=np.int64,
+                )
+            reshape_node_second = self.graph.op_with_const(
+                "Reshape",
+                second_head_names[1] + "/reshape",
+                second_box_net_output,
+                reshape_shape_second,
+            )
+            # 0.1, 0.1, 0.2, 0.2 are localization head variance numbers, they scale second_box_net_output, in order to get accurate coordinates.
+            second_scale_adj = np.expand_dims(
+                np.asarray([0.1, 0.1, 0.2, 0.2], dtype=np.float32), axis=(0, 1)
+            )
+            second_scale_out = self.graph.op_with_const(
+                "Mul",
+                second_head_names[1] + "/scale_second",
+                reshape_node_second[0],
+                second_scale_adj,
+            )
+
+            # Final Box Net tensor.
+            second_box_net_tensor = second_scale_out[0]
+
+            # Create NMS node.
+            nms_outputs = self.NMS(
+                second_box_net_tensor,
+                second_class_net_tensor,
+                encoded_boxes,
+                background_class,
+                score_activation,
+                self.second_stage_iou_threshold,
+                self.second_stage_nms_score_threshold,
+                second_nms_threshold,
+                nms_name,
+            )
+
+            return nms_outputs
+
+        def second_cnr(feature_maps, second_nms_outputs):
+            """
+            Updates the graph to replace the 2nd cropAndResize op by CropAndResize TensorRT plugin node.
+            :param input: Input tensor is the output from previous first_nms() step.
+            """
+
+            # Before passing 2nd NMS's detection boxes (rois) to second CropAndResize, we need to clip them.
+            # Clipping happens for coordinates that are less than 0 and more than 1 (binary).
+            clip_out = self.graph.clip(
+                "SecondNMS/detection_boxes_clipper", second_nms_outputs[1], 0, 1
+            )
+
+            # Create Crop and Resize node.
+            cnr_output = self.CropAndResize(clip_out, feature_maps, "second")
+
+            # Find MaxPool node that summarizes CropAndResize structure
+            maxpool_node = [
+                node
+                for node in self.graph.nodes
+                if node.op == "MaxPool" and "MaxPool2D/MaxPool_1" in node.name
+            ][0]
+            maxpool_node.inputs[0] = cnr_output
+
+            # Reshape node that is preparing 2nd NMS class outputs for Add node that comes next.
+            # [self.batch_size, self.first_stage_max_proposals] -> [self.first_stage_max_proposals*self.batch_size]
+            class_reshape_shape = np.asarray(
+                [self.first_stage_max_proposals * self.batch_size], dtype=np.int64
+            )
+            class_reshape_node = self.graph.op_with_const(
+                "Reshape", "Reshape_Class", second_nms_outputs[3], class_reshape_shape
+            )
+
+            # Find sigmoid node in the end of the network, applies sigmoid to get instance segmentation masks
+            last_sigmoid_node = self.graph.find_descendant_by_op(
+                maxpool_node, "Sigmoid", 40
+            )
+
+            if self.num_classes > 1:
+                # Find first ancestor of Sigmoid of operation type Add. This Add node is one of the Gather node inputs,
+                # Gather node performs gather on 0th axis of data tensor and requires indices that set tesnors to be withing bounds,
+                # this Add node provides the bounds for Gather.
+                add_node = self.graph.find_ancestor_by_op(last_sigmoid_node, "Add")
+                add_node.inputs[1] = class_reshape_node[0]
+
+            # Final Reshape node, reshapes output of Sigmoid, important for various batch_size support.
+            final_reshape_shape = np.asarray(
+                [
+                    self.batch_size,
+                    self.first_stage_max_proposals,
+                    self.mask_height,
+                    self.mask_width,
+                ],
+                dtype=np.int64,
+            )
+            final_reshape_node = self.graph.op_with_const(
+                "Reshape",
+                "Reshape_Final_Masks",
+                last_sigmoid_node.outputs[0],
+                final_reshape_shape,
+            )
+            final_reshape_node[0].dtype = np.float32
+            final_reshape_node[0].name = "detection_masks"
+
+            return final_reshape_node[0]
+
+        # If you model is SSD, you need only one NMS and nothin else.
+        if "ssd" in self.model:
+            # Set graph outputs.
+            self.graph.outputs = first_nms(-1, True, first_nms_threshold)
+            self.sanitize()
+        # If your model is Faster R-CNN, you will need 2 NMS nodes with CropAndResize in between.
+        elif (
+            "faster_rcnn" in self.model
+            and self.mask_height is None
+            and self.mask_width is None
+        ):
+            first_nms_outputs = first_nms(0, False, first_nms_threshold, "rpn")
+            first_cnr_output, feature_maps = first_cnr(first_nms_outputs[1])
+            # Set graph outputs.
+            self.graph.outputs = second_nms(
+                -1, False, first_cnr_output, second_nms_threshold
+            )
+            self.sanitize()
+        # Mask R-CNN
+        elif "faster_rcnn" in self.model and not (
+            self.mask_height is None and self.mask_width is None
+        ):
+            first_nms_outputs = first_nms(0, False, first_nms_threshold, "rpn")
+            first_cnr_output, feature_maps = first_cnr(first_nms_outputs[1])
+            second_nms_outputs = second_nms(
+                -1, False, first_cnr_output, second_nms_threshold
+            )
+            second_cnr_output = second_cnr(feature_maps, second_nms_outputs)
+            # Append segmentation head output.
+            second_nms_outputs.append(second_cnr_output)
+            # Set graph outputs, both bbox and segmentation heads.
+            self.graph.outputs = second_nms_outputs
+            self.sanitize()
+
+
+def main(args):
+    effdet_gs = TFODGraphSurgeon(args.saved_model, args.pipeline_config)
+    if args.tf2onnx:
+        effdet_gs.save(args.tf2onnx)
+    effdet_gs.update_preprocessor(args.batch_size, args.input_format)
+    effdet_gs.process_graph(args.first_nms_threshold, args.second_nms_threshold)
+    if args.debug:
+        effdet_gs.add_debug_output(args.debug)
+    effdet_gs.save(args.onnx)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-p", "--pipeline_config", help="Pipeline configuration file to load", type=str
+    )
+    parser.add_argument(
+        "-m",
+        "--saved_model",
+        help="The TensorFlow saved model directory to load",
+        type=str,
+    )
+    parser.add_argument(
+        "-o", "--onnx", help="The output ONNX model file to write", type=str
+    )
+    parser.add_argument(
+        "-b", "--batch_size", help="Batch size for the model", type=int, default=1
+    )
+    parser.add_argument(
+        "-t1",
+        "--first_nms_threshold",
+        help="Override the score threshold for the 1st NMS operation",
+        type=float,
+    )
+    parser.add_argument(
+        "-t2",
+        "--second_nms_threshold",
+        help="Override the score threshold for the 2nd NMS operation",
+        type=float,
+    )
+    parser.add_argument(
+        "-d",
+        "--debug",
+        action="append",
+        help="Add an extra output to debug a particular node",
+    )
+    parser.add_argument(
+        "-f",
+        "--input_format",
+        default="NHWC",
+        choices=["NHWC", "NCHW"],
+        help="Set the input shape of the graph, as comma-separated dimensions in NCHW or NHWC format, default: NHWC",
+    )
+    parser.add_argument(
+        "--tf2onnx",
+        help="The path where to save the intermediate ONNX graph generated by tf2onnx, "
+        "useful for debugging purposes, default: not saved",
+        type=str,
+    )
+    args = parser.parse_args()
+    if not all([args.pipeline_config, args.saved_model, args.onnx]):
+        parser.print_help()
+        print(
+            "\nThese arguments are required: --pipeline_config, --saved_model and --onnx"
+        )
+        sys.exit(1)
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/eval_coco.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/eval_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..f04c17f33dbc3fb0dce1b30b0c38fb98db0e5902
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/eval_coco.py
@@ -0,0 +1,159 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import argparse
+import json
+import numpy as np
+from PIL import Image
+from infer import TensorRTInfer
+from image_batcher import ImageBatcher
+
+
+def main(args):
+    try:
+        import object_detection.metrics.coco_tools as coco_tools
+    except ImportError:
+        print(
+            "Could not import the 'object_detection.metrics.coco_tools' module from TFOD. Maybe you did not install TFOD API"
+        )
+        print(
+            "Please install TensorFlow 2 Object Detection API, check https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/install.html"
+        )
+        sys.exit(1)
+
+    trt_infer = TensorRTInfer(
+        args.engine, args.preprocessor, args.detection_type, args.iou_threshold
+    )
+    batcher = ImageBatcher(
+        args.input, *trt_infer.input_spec(), preprocessor=args.preprocessor
+    )
+    # Read annotations json as dictionary.
+    with open(args.annotations) as f:
+        data = json.load(f)
+    groundtruth = coco_tools.COCOWrapper(data, detection_type=args.detection_type)
+    detections_list = []
+    for batch, images, scales in batcher.get_batch():
+        print(
+            "Processing Image {} / {}".format(batcher.image_index, batcher.num_images),
+            end="\r",
+        )
+        detections = trt_infer.infer(batch, scales, args.nms_threshold)
+        for i in range(len(images)):
+            # Get inference image resolution.
+            infer_im = Image.open(images[i])
+            im_width, im_height = infer_im.size
+            for n in range(len(detections[i])):
+                source_id = int(os.path.splitext(os.path.basename(images[i]))[0])
+                det = detections[i][n]
+                if args.detection_type == "bbox":
+                    coco_det = {
+                        "image_id": source_id,
+                        "category_id": det["class"] + 1,  # adjust class num
+                        "bbox": [
+                            det["xmin"],
+                            det["ymin"],
+                            det["xmax"] - det["xmin"],
+                            det["ymax"] - det["ymin"],
+                        ],
+                        "score": det["score"],
+                    }
+                    detections_list.append(coco_det)
+                elif args.detection_type == "segmentation":
+                    # Get detection bbox resolution.
+                    det_width = round(det["xmax"] - det["xmin"])
+                    det_height = round(det["ymax"] - det["ymin"])
+                    # Create an image out of predicted mask array.
+                    small_mask = Image.fromarray(det["mask"])
+                    # Upsample mask to detection bbox's size.
+                    mask = small_mask.resize(
+                        (det_width, det_height), resample=Image.BILINEAR
+                    )
+                    # Create an original image sized template for correct mask placement.
+                    pad = Image.new("L", (im_width, im_height))
+                    # Place your mask according to detection bbox placement.
+                    pad.paste(mask, (round(det["xmin"]), (round(det["ymin"]))))
+                    # Reconvert mask into numpy array for evaluation.
+                    padded_mask = np.array(pad)
+                    # Add one more dimension of 1, this is required by ExportSingleImageDetectionMasksToCoco.
+                    final_mask = padded_mask[np.newaxis, :, :]
+                    # Export detection mask to COCO format
+                    coco_mask = coco_tools.ExportSingleImageDetectionMasksToCoco(
+                        image_id=source_id,
+                        category_id_set=set(list(range(1, 91))),
+                        detection_classes=np.array([det["class"] + 1]),
+                        detection_scores=np.array([det["score"]]),
+                        detection_masks=final_mask,
+                    )
+                    detections_list.append(coco_mask[0])
+
+    # Finish evalutions.
+    detections = groundtruth.LoadAnnotations(detections_list)
+    if args.detection_type == "bbox":
+        evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, iou_type="bbox")
+    elif args.detection_type == "segmentation":
+        evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, iou_type="segm")
+    evaluator.ComputeMetrics()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-e", "--engine", help="The TensorRT engine to infer with.")
+    parser.add_argument(
+        "-i",
+        "--input",
+        help="The input to infer, either a single image path, or a directory of images.",
+    )
+    parser.add_argument(
+        "-d",
+        "--detection_type",
+        default="bbox",
+        choices=["bbox", "segmentation"],
+        help="Detection type for COCO, either bbox or if you are using Mask R-CNN's instance segmentation - segmentation.",
+    )
+    parser.add_argument(
+        "-a",
+        "--annotations",
+        help="Set the json file to use for COCO instance annotations.",
+    )
+    parser.add_argument(
+        "-t",
+        "--nms_threshold",
+        type=float,
+        help="Override the score threshold for the NMS operation, if higher than the threshold in the engine.",
+    )
+    parser.add_argument(
+        "--iou_threshold",
+        default=0.5,
+        type=float,
+        help="Select the IoU threshold for the mask segmentation. Range is 0 to 1. Pixel values more than threshold will become 1, less 0.",
+    )
+    parser.add_argument(
+        "--preprocessor",
+        default="fixed_shape_resizer",
+        choices=["fixed_shape_resizer", "keep_aspect_ratio_resizer"],
+        help="Select the image preprocessor to use based on your pipeline.config, either 'fixed_shape_resizer' or 'keep_aspect_ratio_resizer', default: fixed_shape_resizer.",
+    )
+    args = parser.parse_args()
+    if not all([args.engine, args.input, args.annotations, args.preprocessor]):
+        parser.print_help()
+        print(
+            "\nThese arguments are required: --engine --input --output and --preprocessor"
+        )
+        sys.exit(1)
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/image_batcher.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/image_batcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..202e998dd23df7af97dedcb2bf4f674c25305446
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/image_batcher.py
@@ -0,0 +1,192 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+
+import numpy as np
+from PIL import Image
+
+
+class ImageBatcher:
+    """
+    Creates batches of pre-processed images.
+    """
+
+    def __init__(
+        self,
+        input,
+        shape,
+        dtype,
+        max_num_images=None,
+        exact_batches=False,
+        preprocessor="fixed_shape_resizer",
+    ):
+        """
+        :param input: The input directory to read images from.
+        :param shape: The tensor shape of the batch to prepare, either in NCHW or NHWC format.
+        :param dtype: The (numpy) datatype to cast the batched data to.
+        :param max_num_images: The maximum number of images to read from the directory.
+        :param exact_batches: This defines how to handle a number of images that is not an exact multiple of the batch
+        size. If false, it will pad the final batch with zeros to reach the batch size. If true, it will *remove* the
+        last few images in excess of a batch size multiple, to guarantee batches are exact (useful for calibration).
+        :param preprocessor: Set the preprocessor to use, depending on which network is being used.
+        """
+        # Find images in the given input path
+        input = os.path.realpath(input)
+        self.images = []
+
+        extensions = [".jpg", ".jpeg", ".png", ".bmp"]
+
+        def is_image(path):
+            return (
+                os.path.isfile(path) and os.path.splitext(path)[1].lower() in extensions
+            )
+
+        if os.path.isdir(input):
+            self.images = [
+                os.path.join(input, f)
+                for f in os.listdir(input)
+                if is_image(os.path.join(input, f))
+            ]
+            self.images.sort()
+        elif os.path.isfile(input):
+            if is_image(input):
+                self.images.append(input)
+        self.num_images = len(self.images)
+        if self.num_images < 1:
+            print("No valid {} images found in {}".format("/".join(extensions), input))
+            sys.exit(1)
+
+        # Handle Tensor Shape
+        self.dtype = dtype
+        self.shape = shape
+        assert len(self.shape) == 4
+        self.batch_size = shape[0]
+        assert self.batch_size > 0
+        self.format = None
+        self.width = -1
+        self.height = -1
+        if self.shape[1] == 3:
+            self.format = "NCHW"
+            self.height = self.shape[2]
+            self.width = self.shape[3]
+        elif self.shape[3] == 3:
+            self.format = "NHWC"
+            self.height = self.shape[1]
+            self.width = self.shape[2]
+        assert all([self.format, self.width > 0, self.height > 0])
+
+        # Adapt the number of images as needed
+        if max_num_images and 0 < max_num_images < len(self.images):
+            self.num_images = max_num_images
+        if exact_batches:
+            self.num_images = self.batch_size * (self.num_images // self.batch_size)
+        if self.num_images < 1:
+            print("Not enough images to create batches")
+            sys.exit(1)
+        self.images = self.images[0 : self.num_images]
+
+        # Subdivide the list of images into batches
+        self.num_batches = 1 + int((self.num_images - 1) / self.batch_size)
+        self.batches = []
+        for i in range(self.num_batches):
+            start = i * self.batch_size
+            end = min(start + self.batch_size, self.num_images)
+            self.batches.append(self.images[start:end])
+
+        # Indices
+        self.image_index = 0
+        self.batch_index = 0
+
+        self.preprocessor = preprocessor
+
+    def preprocess_image(self, image_path):
+        """
+        The image preprocessor loads an image from disk and prepares it as needed for batching. This includes padding,
+        resizing, normalization, data type casting, and transposing.
+        This Image Batcher implements one algorithm for now:
+        * Resizes and pads the image to fit the input size.
+        :param image_path: The path to the image on disk to load.
+        :return: Two values: A numpy array holding the image sample, ready to be contacatenated into the rest of the
+        batch, and the resize scale used, if any.
+        """
+
+        def resize_pad(image, pad_color=(0, 0, 0)):
+            """
+            A subroutine to implement padding and resizing. This will resize the image to fit fully within the input
+            size, and pads the remaining bottom-right portions with the value provided.
+            :param image: The PIL image object
+            :pad_color: The RGB values to use for the padded area. Default: Black/Zeros.
+            :return: Two values: The PIL image object already padded and cropped, and the resize scale used.
+            """
+
+            # Get characteristics.
+            width, height = image.size
+            width_scale = width / self.width
+            height_scale = height / self.height
+
+            # Depending on preprocessor, box scaling will be slightly different.
+            if self.preprocessor == "fixed_shape_resizer":
+                scale = [self.width / width, self.height / height]
+                image = image.resize((self.width, self.height), resample=Image.BILINEAR)
+                return image, scale
+            elif self.preprocessor == "keep_aspect_ratio_resizer":
+                scale = 1.0 / max(width_scale, height_scale)
+                image = image.resize(
+                    (round(width * scale), round(height * scale)),
+                    resample=Image.BILINEAR,
+                )
+                pad = Image.new("RGB", (self.width, self.height))
+                pad.paste(pad_color, [0, 0, self.width, self.height])
+                pad.paste(image)
+                return pad, scale
+
+        scale = None
+        image = Image.open(image_path)
+        image = image.convert(mode="RGB")
+        if (
+            self.preprocessor == "fixed_shape_resizer"
+            or self.preprocessor == "keep_aspect_ratio_resizer"
+        ):
+            # Resize & Pad with ImageNet mean values and keep as [0,255] Normalization
+            image, scale = resize_pad(image, (124, 116, 104))
+            image = np.asarray(image, dtype=self.dtype)
+        else:
+            print("Preprocessing method {} not supported".format(self.preprocessor))
+            sys.exit(1)
+        if self.format == "NCHW":
+            image = np.transpose(image, (2, 0, 1))
+        return image, scale
+
+    def get_batch(self):
+        """
+        Retrieve the batches. This is a generator object, so you can use it within a loop as:
+        for batch, images in batcher.get_batch():
+           ...
+        Or outside of a batch with the next() function.
+        :return: A generator yielding three items per iteration: a numpy array holding a batch of images, the list of
+        paths to the images loaded within this batch, and the list of resize scales for each image in the batch.
+        """
+        for i, batch_images in enumerate(self.batches):
+            batch_data = np.zeros(self.shape, dtype=self.dtype)
+            batch_scales = [None] * len(batch_images)
+            for i, image in enumerate(batch_images):
+                self.image_index += 1
+                batch_data[i], batch_scales[i] = self.preprocess_image(image)
+            self.batch_index += 1
+            yield batch_data, batch_images, batch_scales
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/infer.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..298b7a0c4bf4a58d60e635a637475b92e1c71b13
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/infer.py
@@ -0,0 +1,286 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import argparse
+import numpy as np
+import tensorrt as trt
+from cuda import cudart
+
+sys.path.insert(1, os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
+import common
+
+from image_batcher import ImageBatcher
+from visualize import visualize_detections
+
+
+class TensorRTInfer:
+    """
+    Implements inference for the Model TensorRT engine.
+    """
+
+    def __init__(self, engine_path, preprocessor, detection_type, iou_threshold):
+        """
+        :param engine_path: The path to the serialized engine to load from disk.
+        """
+        self.preprocessor = preprocessor
+        self.detection_type = detection_type
+        self.iou_threshold = iou_threshold
+        # Load TRT engine
+        self.logger = trt.Logger(trt.Logger.ERROR)
+        trt.init_libnvinfer_plugins(self.logger, namespace="")
+        with open(engine_path, "rb") as f, trt.Runtime(self.logger) as runtime:
+            assert runtime
+            self.engine = runtime.deserialize_cuda_engine(f.read())
+        assert self.engine
+        self.context = self.engine.create_execution_context()
+        assert self.context
+
+        # Setup I/O bindings
+        self.inputs = []
+        self.outputs = []
+        self.allocations = []
+        for i in range(self.engine.num_io_tensors):
+            name = self.engine.get_tensor_name(i)
+            is_input = False
+            if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
+                is_input = True
+            dtype = self.engine.get_tensor_dtype(name)
+            shape = self.engine.get_tensor_shape(name)
+            if is_input:
+                self.batch_size = shape[0]
+            size = np.dtype(trt.nptype(dtype)).itemsize
+            for s in shape:
+                size *= s
+            allocation = common.cuda_call(cudart.cudaMalloc(size))
+            binding = {
+                "index": i,
+                "name": name,
+                "dtype": np.dtype(trt.nptype(dtype)),
+                "shape": list(shape),
+                "allocation": allocation,
+            }
+            self.allocations.append(allocation)
+            if is_input:
+                self.inputs.append(binding)
+            else:
+                self.outputs.append(binding)
+
+        assert self.batch_size > 0
+        assert len(self.inputs) > 0
+        assert len(self.outputs) > 0
+        assert len(self.allocations) > 0
+
+    def input_spec(self):
+        """
+        Get the specs for the input tensor of the network. Useful to prepare memory allocations.
+        :return: Two items, the shape of the input tensor and its (numpy) datatype.
+        """
+        return self.inputs[0]["shape"], self.inputs[0]["dtype"]
+
+    def output_spec(self):
+        """
+        Get the specs for the output tensors of the network. Useful to prepare memory allocations.
+        :return: A list with two items per element, the shape and (numpy) datatype of each output tensor.
+        """
+        specs = []
+        for o in self.outputs:
+            specs.append((o["shape"], o["dtype"]))
+        return specs
+
+    def infer(self, batch, scales=None, nms_threshold=None):
+        """
+        Execute inference on a batch of images. The images should already be batched and preprocessed, as prepared by
+        the ImageBatcher class. Memory copying to and from the GPU device will be performed here.
+        :param batch: A numpy array holding the image batch.
+        :param scales: The image resize scales for each image in this batch. Default: No scale postprocessing applied.
+        :return: A nested list for each image in the batch and each detection in the list.
+        """
+
+        # Prepare the output data
+        outputs = []
+        for shape, dtype in self.output_spec():
+            outputs.append(np.zeros(shape, dtype))
+
+        # Process I/O and execute the network
+        common.memcpy_host_to_device(
+            self.inputs[0]["allocation"], np.ascontiguousarray(batch)
+        )
+        self.context.execute_v2(self.allocations)
+        for o in range(len(outputs)):
+            common.memcpy_device_to_host(outputs[o], self.outputs[o]["allocation"])
+
+        # Process the results
+        nums = outputs[0]
+        boxes = outputs[1]
+        scores = outputs[2]
+        classes = outputs[3]
+        # One additional output for segmentation masks
+        if len(outputs) == 5:
+            masks = outputs[4]
+        detections = []
+        normalized = np.max(boxes) < 2.0
+        for i in range(self.batch_size):
+            detections.append([])
+            for n in range(int(nums[i])):
+                # Depending on preprocessor, box scaling will be slightly different.
+                if self.preprocessor == "fixed_shape_resizer":
+                    scale_x = self.inputs[0]["shape"][1] if normalized else 1.0
+                    scale_y = self.inputs[0]["shape"][2] if normalized else 1.0
+
+                    if scales and i < len(scales):
+                        scale_x /= scales[i][0]
+                        scale_y /= scales[i][1]
+                    if nms_threshold and scores[i][n] < nms_threshold:
+                        continue
+                    # Depending on detection type you need slightly different data.
+                    if self.detection_type == "bbox":
+                        mask = None
+                    # Segmentation is only supported with Mask R-CNN, which has
+                    # fixed_shape_resizer as image_resizer (lookup pipeline.config)
+                    elif self.detection_type == "segmentation":
+                        # Select a mask
+                        mask = masks[i][n]
+                        # Slight scaling, to get binary masks after float32 -> uint8
+                        # conversion, if not scaled all pixels are zero.
+                        mask = mask > self.iou_threshold
+                        # Convert float32 -> uint8.
+                        mask = mask.astype(np.uint8)
+                elif self.preprocessor == "keep_aspect_ratio_resizer":
+                    # No segmentation models with keep_aspect_ratio_resizer
+                    mask = None
+                    scale = self.inputs[0]["shape"][2] if normalized else 1.0
+                    if scales and i < len(scales):
+                        scale /= scales[i]
+                        scale_y = scale
+                        scale_x = scale
+                    if nms_threshold and scores[i][n] < nms_threshold:
+                        continue
+                # Append to detections
+                detections[i].append(
+                    {
+                        "ymin": boxes[i][n][0] * scale_y,
+                        "xmin": boxes[i][n][1] * scale_x,
+                        "ymax": boxes[i][n][2] * scale_y,
+                        "xmax": boxes[i][n][3] * scale_x,
+                        "score": scores[i][n],
+                        "class": int(classes[i][n]),
+                        "mask": mask,
+                    }
+                )
+        return detections
+
+
+def main(args):
+    output_dir = os.path.realpath(args.output)
+    os.makedirs(output_dir, exist_ok=True)
+
+    labels = []
+    if args.labels:
+        with open(args.labels) as f:
+            for i, label in enumerate(f):
+                labels.append(label.strip())
+
+    trt_infer = TensorRTInfer(
+        args.engine, args.preprocessor, args.detection_type, args.iou_threshold
+    )
+    batcher = ImageBatcher(
+        args.input, *trt_infer.input_spec(), preprocessor=args.preprocessor
+    )
+    for batch, images, scales in batcher.get_batch():
+        print(
+            "Processing Image {} / {}".format(batcher.image_index, batcher.num_images),
+            end="\r",
+        )
+        detections = trt_infer.infer(batch, scales, args.nms_threshold)
+        for i in range(len(images)):
+            basename = os.path.splitext(os.path.basename(images[i]))[0]
+            # Image Visualizations
+            output_path = os.path.join(output_dir, "{}.png".format(basename))
+            visualize_detections(images[i], output_path, detections[i], labels)
+            # Text Results
+            output_results = ""
+            for d in detections[i]:
+                line = [
+                    d["xmin"],
+                    d["ymin"],
+                    d["xmax"],
+                    d["ymax"],
+                    d["score"],
+                    d["class"],
+                ]
+                output_results += "\t".join([str(f) for f in line]) + "\n"
+            with open(os.path.join(args.output, "{}.txt".format(basename)), "w") as f:
+                f.write(output_results)
+    print()
+    print("Finished Processing")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-e", "--engine", default=None, help="The serialized TensorRT engine"
+    )
+    parser.add_argument(
+        "-i", "--input", default=None, help="Path to the image or directory to process"
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        default=None,
+        help="Directory where to save the visualization results",
+    )
+    parser.add_argument(
+        "-l",
+        "--labels",
+        default="./labels_coco.txt",
+        help="File to use for reading the class labels from, default: ./labels_coco.txt",
+    )
+    parser.add_argument(
+        "-d",
+        "--detection_type",
+        default="bbox",
+        choices=["bbox", "segmentation"],
+        help="Detection type for COCO, either bbox or if you are using Mask R-CNN's instance segmentation - segmentation",
+    )
+    parser.add_argument(
+        "-t",
+        "--nms_threshold",
+        type=float,
+        help="Override the score threshold for the NMS operation, if higher than the threshold in the engine.",
+    )
+    parser.add_argument(
+        "--iou_threshold",
+        default=0.5,
+        type=float,
+        help="Select the IoU threshold for the mask segmentation. Range is 0 to 1. Pixel values more than threshold will become 1, less 0",
+    )
+    parser.add_argument(
+        "--preprocessor",
+        default="fixed_shape_resizer",
+        choices=["fixed_shape_resizer", "keep_aspect_ratio_resizer"],
+        help="Select the image preprocessor to use based on your pipeline.config, either 'fixed_shape_resizer' or 'keep_aspect_ratio_resizer', default: fixed_shape_resizer",
+    )
+    args = parser.parse_args()
+    if not all([args.engine, args.input, args.output, args.preprocessor]):
+        parser.print_help()
+        print(
+            "\nThese arguments are required: --engine --input --output and --preprocessor"
+        )
+        sys.exit(1)
+    main(args)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/labels_coco.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/labels_coco.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7bc1671bf4c6a0c90e5f0c312cf056a459dada11
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/labels_coco.txt
@@ -0,0 +1,92 @@
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+street sign
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+hat
+backpack
+umbrella
+shoe
+eye glasses
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+plate
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+mirror
+dining table
+window
+desk
+toilet
+door
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+blender
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
+hair brush
+
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/onnx_utils.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/onnx_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0781932897d19f733be6207e5e8e340e25f3453a
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/onnx_utils.py
@@ -0,0 +1,274 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import logging
+import numpy as np
+import onnx_graphsurgeon as gs
+
+logging.basicConfig(level=logging.INFO)
+logging.getLogger("SSDHelper").setLevel(logging.INFO)
+log = logging.getLogger("SSDHelper")
+
+
+@gs.Graph.register()
+def op_with_const(self, op, name, input, value):
+    """
+    Add an operation with constant to the graph which will operate on the input tensor with the value(s) given.
+    :param op: The ONNX operation to perform, i.e. "Add" or "Mul".
+    :param input: The tensor to operate on.
+    :param value: The value array to operate with.
+    :param name: The name to use for the node.
+    """
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created {} node '{}': {}".format(op, name, value.squeeze()))
+    const = gs.Constant(name="{}_value:0".format(name), values=value)
+    return self.layer(
+        name=name, op=op, inputs=[input_tensor, const], outputs=[name + ":0"]
+    )
+
+
+@gs.Graph.register()
+def matmul(self, name, input, value):
+    """
+    Add MatMul operation to the graph which will operate on the input tensor with the value(s) given.
+    :param input: The tensor to operate on.
+    :param value: The linear transformation matrix to operate with.
+    :param name: The name to use for the node.
+    """
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created {} node '{}': {}".format("MatMul", name, value.squeeze()))
+    const = gs.Constant(name="{}_value:0".format(name), values=value)
+    return self.layer(
+        name=name, op="MatMul", inputs=[input_tensor, const], outputs=[name + ":0"]
+    )
+
+
+@gs.Graph.register()
+def clip(self, name, input, clip_min, clip_max):
+    """
+    Add Clip operation to the graph which will operate on the input tensor with the value(s) given.
+    :param input: The tensor to operate on.
+    :param name: The name to use for the node.
+    :param clip_min: Minimum value to include, less is clipped.
+    :param clip_max: Maximum value to include, more is clipped.
+    """
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created {} node '{}".format("Clip", name))
+    const_min = gs.Constant(
+        name="{}_value:0".format(name), values=np.asarray([clip_min], dtype=np.float32)
+    )
+    const_max = gs.Constant(
+        name="{}_value:1".format(name), values=np.asarray([clip_max], dtype=np.float32)
+    )
+    return self.layer(
+        name=name,
+        op="Clip",
+        inputs=[input_tensor, const_min, const_max],
+        outputs=[name + ":0"],
+    )
+
+
+@gs.Graph.register()
+def slice(self, name, input, starts, ends, axes):
+    """
+    Add Slice operation to the graph which will operate on the input tensor with the value(s) given.
+    :param op: The ONNX operation to perform, i.e. "Add" or "Mul".
+    :param input: The tensor to operate on.
+    :param name: The name to use for the node.
+    :param starts: Value at which Slice starts.
+    :param ends: Value at which Slice ends.
+    :param axes: Axes on which Slice operation should be performed.
+    """
+
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created {} node '{}".format("Slice", name))
+    const_start = gs.Constant(
+        name="{}_value:0".format(name), values=np.asarray([starts], dtype=np.int64)
+    )
+    const_end = gs.Constant(
+        name="{}_value:1".format(name), values=np.asarray([ends], dtype=np.int64)
+    )
+    const_axes = gs.Constant(
+        name="{}_value:2".format(name), values=np.asarray([axes], dtype=np.int64)
+    )
+    return self.layer(
+        name=name,
+        op="Slice",
+        inputs=[input_tensor, const_start, const_end, const_axes],
+        outputs=[name + ":0"],
+    )
+
+
+@gs.Graph.register()
+def unsqueeze(self, name, input, axes=[3]):
+    """
+    Adds to the graph an Unsqueeze node for the given axes and to the given input.
+    :param self: The gs.Graph object being extended.
+    :param name: The name to use for the node.
+    :param input: The tensor to be "unsqueezed".
+    :param axes: A list of axes on which to add the new dimension(s).
+    :return: The first output tensor, to allow chained graph construction.
+    """
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created Unsqueeze node '{}': {}".format(name, axes))
+    return self.layer(
+        name=name,
+        op="Unsqueeze",
+        inputs=[input_tensor],
+        outputs=[name + ":0"],
+        attrs={"axes": axes},
+    )
+
+
+@gs.Graph.register()
+def squeeze(self, name, input, axes=[2]):
+    """
+    Adds to the graph an Squeeze node for the given axes and to the given input.
+    :param self: The gs.Graph object being extended.
+    :param name: The name to use for the node.
+    :param input: The tensor to be "squeezed".
+    :param axes: A list of axes on which to remove a dimension(s).
+    :return: The first output tensor, to allow chained graph construction.
+    """
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created Squeeze node '{}': {}".format(name, axes))
+    return self.layer(
+        name=name,
+        op="Squeeze",
+        inputs=[input_tensor],
+        outputs=[name + ":0"],
+        attrs={"axes": axes},
+    )
+
+
+@gs.Graph.register()
+def transpose(self, name, input, perm):
+    """
+    Adds to the graph a Transpose node for the given axes permutation and to the given input.
+    :param self: The gs.Graph object being extended.
+    :param name: The name to use for the node.
+    :param input: The tensor to be transposed.
+    :param perm: A list of axes defining their order after transposing occurs.
+    :return: The first output tensor, to allow chained graph construction.
+    """
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created Transpose node '{}': {}".format(name, perm))
+    return self.layer(
+        name=name,
+        op="Transpose",
+        inputs=[input_tensor],
+        outputs=[name + ":0"],
+        attrs={"perm": perm},
+    )
+
+
+@gs.Graph.register()
+def sigmoid(self, name, input):
+    """
+    Adds to the graph a Sigmoid node for the given input.
+    :param self: The gs.Graph object being extended.
+    :param name: The name to use for the node.
+    :param input: The tensor to be applied to.
+    :return: The first output tensor, to allow chained graph construction.
+    """
+    input_tensor = input if type(input) is gs.Variable else input[0]
+    log.debug("Created Sigmoid node '{}'".format(name))
+    return self.layer(
+        name=name, op="Sigmoid", inputs=[input_tensor], outputs=[name + ":0"]
+    )
+
+
+@gs.Graph.register()
+def plugin(self, op, name, inputs, outputs, attrs):
+    """
+    Adds to the graph a TensorRT plugin node with the given name, inputs and outputs. The attrs dictionary holds
+    attributes to be added to the plugin node.
+    :param self: The gs.Graph object being extended.
+    :param op: The registered name for the TensorRT plugin.
+    :param name: The name to use for the node.
+    :param inputs: The list of tensors to use an inputs.
+    :param outputs: The list of tensors to use as outputs.
+    :param attrs: The dictionary to use as attributes.
+    :return: The first output tensor, to allow chained graph construction.
+    """
+    input_tensors = inputs if type(inputs) is list else [inputs]
+    log.debug("Created TRT Plugin node '{}': {}".format(name, attrs))
+    return self.layer(
+        op=op, name=name, inputs=input_tensors, outputs=outputs, attrs=attrs
+    )
+
+
+@gs.Graph.register()
+def find_node_by_op(self, op):
+    """
+    Finds the first node in the graph with the given operation name.
+    :param self: The gs.Graph object being extended.
+    :param op: The operation name to search for.
+    :return: The first node matching that performs that op.
+    """
+    for node in self.nodes:
+        if node.op == op:
+            return node
+    return None
+
+
+@gs.Graph.register()
+def find_descendant_by_op(self, node, op, depth=10):
+    """
+    Starting from the given node, finds a node lower in the graph matching the given operation name.
+    This is not an exhaustive graph search.
+    In order to graph search bfs is used, so runtime complexity is O(V+E).
+    :param self: The gs.Graph object being extended.
+    :param node: The node to start searching from.
+    :param op: The operation name to search for.
+    :param depth: Stop searching after traversing these many nodes.
+    :return: The first descendant node matching that performs that op.
+    """
+    queue = []
+    for i in range(depth):
+        queue.append(node.o())
+        while queue:
+            node = queue.pop(0)
+            if node.op == op:
+                return node
+            for child in node.outputs[0].outputs:
+                queue.append(child)
+    return None
+
+
+@gs.Graph.register()
+def find_ancestor_by_op(self, node, op, depth=10):
+    """
+    Starting from the given node, finds a node higher in the graph matching the given operation name.
+    This is not an exhaustive graph search.
+    In order to graph search bfs is used, so runtime complexity is O(V+E).
+    :param self: The gs.Graph object being extended.
+    :param node: The node to start searching from.
+    :param op: The operation name to search for.
+    :param depth: Stop searching after traversing these many nodes.
+    :return: The first ancestor node matching that performs that op.
+    """
+    queue = []
+    for i in range(depth):
+        queue.append(node.i())
+        while queue:
+            node = queue.pop(0)
+            if node.op == op:
+                return node
+            for child in node.inputs[-1].inputs:
+                queue.append(child)
+    return None
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/requirements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e38c8ef9fa55b3bcb88fda3d36da61e108664511
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/requirements.txt
@@ -0,0 +1,17 @@
+onnx==1.14.0; python_version <= "3.10"
+onnx==1.16.1; python_version >= "3.11"
+onnxruntime==1.15.1; python_version <= "3.10"
+onnxruntime==1.18.1; python_version >= "3.11"
+Pillow>=10.0.0
+tf2onnx==1.15.0
+pycocotools; platform_system != "Windows"
+pycocotools-windows; platform_system == "Windows"
+cuda-python==12.2.0; python_version <= "3.10"
+cuda-python==12.6.0; python_version >= "3.11"
+pywin32; platform_system == "Windows"
+Cython<3.0
+pyyaml==6.0.1
+requests==2.32.2
+tqdm==4.66.4
+numpy==1.24.4; python_version <= "3.10"
+numpy==1.26.4; python_version >= "3.11"
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/visualize.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/visualize.py
new file mode 100644
index 0000000000000000000000000000000000000000..f88ed6f0e4198aacdb9fa1fe1cbe2f9250e2bce5
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/tensorflow_object_detection_api/visualize.py
@@ -0,0 +1,286 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+
+np.set_printoptions(threshold=np.inf, suppress=True)
+
+import PIL.Image as Image
+import PIL.ImageDraw as ImageDraw
+import PIL.ImageFont as ImageFont
+import PIL.ImageFilter as ImageFilter
+
+
+COLORS = [
+    "GoldenRod",
+    "MediumTurquoise",
+    "GreenYellow",
+    "SteelBlue",
+    "DarkSeaGreen",
+    "SeaShell",
+    "LightGrey",
+    "IndianRed",
+    "DarkKhaki",
+    "LawnGreen",
+    "WhiteSmoke",
+    "Peru",
+    "LightCoral",
+    "FireBrick",
+    "OldLace",
+    "LightBlue",
+    "SlateGray",
+    "OliveDrab",
+    "NavajoWhite",
+    "PaleVioletRed",
+    "SpringGreen",
+    "AliceBlue",
+    "Violet",
+    "DeepSkyBlue",
+    "Red",
+    "MediumVioletRed",
+    "PaleTurquoise",
+    "Tomato",
+    "Azure",
+    "Yellow",
+    "Cornsilk",
+    "Aquamarine",
+    "CadetBlue",
+    "CornflowerBlue",
+    "DodgerBlue",
+    "Olive",
+    "Orchid",
+    "LemonChiffon",
+    "Sienna",
+    "OrangeRed",
+    "Orange",
+    "DarkSalmon",
+    "Magenta",
+    "Wheat",
+    "Lime",
+    "GhostWhite",
+    "SlateBlue",
+    "Aqua",
+    "MediumAquaMarine",
+    "LightSlateGrey",
+    "MediumSeaGreen",
+    "SandyBrown",
+    "YellowGreen",
+    "Plum",
+    "FloralWhite",
+    "LightPink",
+    "Thistle",
+    "DarkViolet",
+    "Pink",
+    "Crimson",
+    "Chocolate",
+    "DarkGrey",
+    "Ivory",
+    "PaleGreen",
+    "DarkGoldenRod",
+    "LavenderBlush",
+    "SlateGrey",
+    "DeepPink",
+    "Gold",
+    "Cyan",
+    "LightSteelBlue",
+    "MediumPurple",
+    "ForestGreen",
+    "DarkOrange",
+    "Tan",
+    "Salmon",
+    "PaleGoldenRod",
+    "LightGreen",
+    "LightSlateGray",
+    "HoneyDew",
+    "Fuchsia",
+    "LightSeaGreen",
+    "DarkOrchid",
+    "Green",
+    "Chartreuse",
+    "LimeGreen",
+    "AntiqueWhite",
+    "Beige",
+    "Gainsboro",
+    "Bisque",
+    "SaddleBrown",
+    "Silver",
+    "Lavender",
+    "Teal",
+    "LightCyan",
+    "PapayaWhip",
+    "Purple",
+    "Coral",
+    "BurlyWood",
+    "LightGray",
+    "Snow",
+    "MistyRose",
+    "PowderBlue",
+    "DarkCyan",
+    "White",
+    "Turquoise",
+    "MediumSlateBlue",
+    "PeachPuff",
+    "Moccasin",
+    "LightSalmon",
+    "SkyBlue",
+    "Khaki",
+    "MediumSpringGreen",
+    "BlueViolet",
+    "MintCream",
+    "Linen",
+    "SeaGreen",
+    "HotPink",
+    "LightYellow",
+    "BlanchedAlmond",
+    "RoyalBlue",
+    "RosyBrown",
+    "MediumOrchid",
+    "DarkTurquoise",
+    "LightGoldenRodYellow",
+    "LightSkyBlue",
+]
+
+
+# Overlay mask with transparency on top of the image.
+def overlay(image, mask, color, alpha_transparency=0.5):
+    for channel in range(3):
+        image[:, :, channel] = np.where(
+            mask == 1,
+            image[:, :, channel] * (1 - alpha_transparency)
+            + alpha_transparency * color[channel] * 255,
+            image[:, :, channel],
+        )
+    return image
+
+
+def visualize_detections(image_path, output_path, detections, labels=[]):
+    image = Image.open(image_path).convert(mode="RGB")
+    # Get image dimensions.
+    im_width, im_height = image.size
+    line_width = 2
+    font = ImageFont.load_default()
+    for d in detections:
+        color = COLORS[d["class"] % len(COLORS)]
+        # Dynamically convert PIL color into RGB numpy array.
+        pixel_color = Image.new("RGB", (1, 1), color)
+        # Normalize.
+        np_color = (np.asarray(pixel_color)[0][0]) / 255
+        # Process TF and TRT instance segmentation masks.
+        if isinstance(d["mask"], np.ndarray) and d["mask"].shape == (33, 33):
+            # Get detection bbox resolution.
+            det_width = round(d["xmax"] - d["xmin"])
+            det_height = round(d["ymax"] - d["ymin"])
+            # Create an image out of predicted mask array.
+            small_mask = Image.fromarray(d["mask"])
+            # Upsample mask to detection bbox's size.
+            mask = small_mask.resize((det_width, det_height), resample=Image.BILINEAR)
+            # Create an original image sized template for correct mask placement.
+            pad = Image.new("L", (im_width, im_height))
+            # Place your mask according to detection bbox placement.
+            pad.paste(mask, (round(d["xmin"]), (round(d["ymin"]))))
+            # Reconvert mask into numpy array for evaluation.
+            padded_mask = np.array(pad)
+            # Creat np.array from original image, copy in order to modify.
+            image_copy = np.asarray(image).copy()
+            # Image with overlaid mask.
+            masked_image = overlay(image_copy, padded_mask, np_color)
+            # Reconvert back to PIL.
+            image = Image.fromarray(masked_image)
+        # Separate clause for ground truth instance segmentation masks.
+        elif isinstance(d["mask"], np.ndarray):
+            # Creat np.array from original image, copy in order to modify.
+            image_copy = np.asarray(image).copy()
+            # Image with overlaid mask.
+            masked_image = overlay(image_copy, d["mask"], np_color)
+            # Reconvert back to PIL
+            image = Image.fromarray(masked_image)
+
+        # Bbox lines.
+        draw = ImageDraw.Draw(image)
+        draw.line(
+            [
+                (d["xmin"], d["ymin"]),
+                (d["xmin"], d["ymax"]),
+                (d["xmax"], d["ymax"]),
+                (d["xmax"], d["ymin"]),
+                (d["xmin"], d["ymin"]),
+            ],
+            width=line_width,
+            fill=color,
+        )
+        label = "Class {}".format(d["class"])
+        if d["class"] < len(labels):
+            label = "{}".format(labels[d["class"]])
+        score = d["score"]
+        text = "{}: {}%".format(label, int(100 * score))
+        if score < 0:
+            text = label
+        left, top, right, bottom = font.getbbox(text)
+        text_width, text_height = right - left, bottom - top
+        text_bottom = max(text_height, d["ymin"])
+        text_left = d["xmin"]
+        margin = np.ceil(0.05 * text_height)
+        draw.rectangle(
+            [
+                (text_left, text_bottom - text_height - 2 * margin),
+                (text_left + text_width, text_bottom),
+            ],
+            fill=color,
+        )
+        draw.text(
+            (text_left + margin, text_bottom - text_height - margin),
+            text,
+            fill="black",
+            font=font,
+        )
+    if output_path is None:
+        return image
+    image.save(output_path)
+
+
+def concat_visualizations(images, names, colors, output_path):
+    def draw_text(draw, font, text, width, bar_height, offset, color):
+        left, top, right, bottom = font.getbbox(text)
+        text_width, text_height = right - left, bottom - top
+        draw.rectangle([(offset, 0), (offset + width, bar_height)], fill=color)
+        draw.text(
+            (offset + (width - text_width) / 2, text_height - text_height / 2),
+            text,
+            fill="black",
+            font=font,
+        )
+
+    bar_height = 18
+    width = 0
+    height = 0
+    for im in images:
+        width += im.width
+        height = max(height, im.height)
+
+    concat = Image.new("RGB", (width, height + bar_height))
+    draw = ImageDraw.Draw(concat)
+    font = ImageFont.load_default()
+
+    offset = 0
+    for i, im in enumerate(images):
+        concat.paste(im, (offset, bar_height))
+        draw_text(draw, font, names[i], im.width, bar_height, offset, colors[i])
+        offset += im.width
+
+    if output_path is None:
+        return concat
+    concat.save(output_path)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fe8199a401d8739370eaccb27b4bf9000f027157
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/README.md
@@ -0,0 +1,117 @@
+# [DEPRECATED] Object Detection With The ONNX TensorRT Backend In Python
+
+> This sample has been deprecated starting from TensorRT 10.13 due to unstable yolov3 weights. See also https://github.com/pjreddie/darknet/issues/2655
+
+**Table Of Contents**
+- [Description](#description)
+- [How does this sample work?](#how-does-this-sample-work)
+- [Prerequisites](#prerequisites)
+- [Running the sample](#running-the-sample)
+- [Additional resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+## Description
+
+This sample, yolov3_onnx, implements a full ONNX-based pipeline for performing inference with the YOLOv3 network, with an input size of 608 x 608 pixels, including pre and post-processing. This sample is based on the [YOLOv3-608](https://pjreddie.com/media/files/papers/YOLOv3.pdf) paper.
+
+## How does this sample work?
+
+First, the original YOLOv3 specification from the paper is converted to the Open Neural Network Exchange (ONNX) format in `yolov3_to_onnx.py` (only has to be done once).
+
+Second, this ONNX representation of YOLOv3 is used to build a TensorRT engine, followed by inference on a sample image in `onnx_to_tensorrt.py`. The predicted bounding boxes are finally drawn to the original input image and saved to disk.
+
+After inference, post-processing including bounding-box clustering is applied. The resulting bounding boxes are eventually drawn to a new image file and stored on disk for inspection.
+
+**Note:** This sample is not supported on Ubuntu 14.04 and older.
+
+## Prerequisites
+
+For specific software versions, see the [TensorRT Installation Guide](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html).
+
+1. Install the dependencies for Python.
+    ```bash
+    pip3 install -r requirements.txt
+    ```
+
+2.  Download sample data. See the "Download Sample Data" section of [the general setup guide](../README.md).
+
+
+## Running the sample
+
+The data directory needs to be specified (either via `-d /path/to/data` or environment varaiable `TRT_DATA_DIR`)
+when running these scripts. An error will be thrown if not. Taking `TRT_DATA_DIR` approach in following example.
+
+1.  Create an ONNX version of YOLOv3 with the following command.
+    ```bash
+    python3 yolov3_to_onnx.py
+    ```
+    When running the above command for the first time, the output should look similar to the following:
+    ```
+    [...]
+    %106_convolutional = Conv[auto_pad = u'SAME_LOWER', dilations = [1, 1], kernel_shape = [1, 1], strides = [1, 1]]
+    (%105_convolutional_lrelu, %106_convolutional_conv_weights, %106_convolutional_conv_bias)
+    return %082_convolutional, %094_convolutional,%106_convolutional
+    }
+    ```
+
+2.  Build a TensorRT engine from the generated ONNX file and run inference on a sample image
+    ```bash
+    python3 onnx_to_tensorrt.py
+    ```
+    When running the above command for the first time, the output should look similar to the following:
+    ```
+    Building an engine from file yolov3.onnx, this may take a while...
+    Running inference on image dog.jpg...
+    Saved image with bounding boxes of detected objects to dog_bboxes.jpg.
+    ```
+
+3.  Verify that the sample ran successfully. If the sample runs successfully you should see output similar to the following:
+    ```
+    Loading ONNX file from path yolov3.onnx...
+    Beginning ONNX file parsing
+    Completed parsing of ONNX file
+    Building an engine from file yolov3.onnx; this may take a while...
+    Completed creating Engine
+    Running inference on image dog.jpg...
+    [[135.14841333 219.59879284 184.30209195 324.0265199 ]
+      [ 98.30805074 135.72613533 499.71263299 299.25579652]
+      [478.00605802 81.25702449 210.57787895 86.91502688]] [0.99854713 0.99880403 0.93829258] [16 1 7]
+    Saved image with bounding boxes of detected objects to dog_bboxes.png.
+    ```
+    You should be able to visually confirm whether the detection was correct.
+
+# Additional resources
+
+The following resources provide a deeper understanding about the model used in this sample, as well as the dataset it was trained on:
+
+**Model**
+- [YOLOv3: An Incremental Improvement](https://pjreddie.com/media/files/papers/YOLOv3.pdf)
+
+**Dataset**
+- [COCO dataset](http://cocodataset.org/#home)
+
+**Documentation**
+- [YOLOv3-608 paper](https://pjreddie.com/media/files/papers/YOLOv3.pdf)
+- [Introduction To NVIDIAâ€™s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples)
+- [Working With TensorRT Using The Python API](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#python_topics)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+# Changelog
+August 2023
+- Removed support for Python versions < 3.8.
+- This sample was updated to support protobuf 3.20.3 for Python>=3.8
+- Update ONNX version support to 1.14.0
+
+March 2019
+- This `README.md` file was recreated, updated and reviewed.
+
+
+# Known issues
+
+When installing the requirements with Python 3.10, there is a known issue for building onnx. The recommendation is to use a python version < 3.10 when running the sample.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/coco_labels.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/coco_labels.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16315f2becec9705017bfaf1b9fb81ca2a83c0b0
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/coco_labels.txt
@@ -0,0 +1,80 @@
+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
\ No newline at end of file
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/data_processing.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/data_processing.py
new file mode 100644
index 0000000000000000000000000000000000000000..998cbc5f9fe20b890ede0d3e08cde8a025fe1239
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/data_processing.py
@@ -0,0 +1,347 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import math
+from PIL import Image
+import numpy as np
+import os
+
+
+# YOLOv3-608 has been trained with these 80 categories from COCO:
+# Lin, Tsung-Yi, et al. "Microsoft COCO: Common Objects in Context."
+# European Conference on Computer Vision. Springer, Cham, 2014.
+
+
+def load_label_categories(label_file_path):
+    categories = [line.rstrip("\n") for line in open(label_file_path)]
+    return categories
+
+
+LABEL_FILE_PATH = os.path.join(
+    os.path.dirname(os.path.realpath(__file__)), "coco_labels.txt"
+)
+ALL_CATEGORIES = load_label_categories(LABEL_FILE_PATH)
+
+# Let's make sure that there are 80 classes, as expected for the COCO data set:
+CATEGORY_NUM = len(ALL_CATEGORIES)
+assert CATEGORY_NUM == 80
+
+
+class PreprocessYOLO(object):
+    """A simple class for loading images with PIL and reshaping them to the specified
+    input resolution for YOLOv3-608.
+    """
+
+    def __init__(self, yolo_input_resolution):
+        """Initialize with the input resolution for YOLOv3, which will stay fixed in this sample.
+
+        Keyword arguments:
+        yolo_input_resolution -- two-dimensional tuple with the target network's (spatial)
+        input resolution in HW order
+        """
+        self.yolo_input_resolution = yolo_input_resolution
+
+    def process(self, input_image_path):
+        """Load an image from the specified input path,
+        and return it together with a pre-processed version required for feeding it into a
+        YOLOv3 network.
+
+        Keyword arguments:
+        input_image_path -- string path of the image to be loaded
+        """
+        image_raw, image_resized = self._load_and_resize(input_image_path)
+        image_preprocessed = self._shuffle_and_normalize(image_resized)
+        return image_raw, image_preprocessed
+
+    def _load_and_resize(self, input_image_path):
+        """Load an image from the specified path and resize it to the input resolution.
+        Return the input image before resizing as a PIL Image (required for visualization),
+        and the resized image as a NumPy float array.
+
+        Keyword arguments:
+        input_image_path -- string path of the image to be loaded
+        """
+
+        image_raw = Image.open(input_image_path)
+        # Expecting yolo_input_resolution in (height, width) format, adjusting to PIL
+        # convention (width, height) in PIL:
+        new_resolution = (self.yolo_input_resolution[1], self.yolo_input_resolution[0])
+        image_resized = image_raw.resize(new_resolution, resample=Image.BICUBIC)
+        image_resized = np.array(image_resized, dtype=np.float32, order="C")
+        return image_raw, image_resized
+
+    def _shuffle_and_normalize(self, image):
+        """Normalize a NumPy array representing an image to the range [0, 1], and
+        convert it from HWC format ("channels last") to NCHW format ("channels first"
+        with leading batch dimension).
+
+        Keyword arguments:
+        image -- image as three-dimensional NumPy float array, in HWC format
+        """
+        image /= 255.0
+        # HWC to CHW format:
+        image = np.transpose(image, [2, 0, 1])
+        # CHW to NCHW format
+        image = np.expand_dims(image, axis=0)
+        # Convert the image to row-major order, also known as "C order":
+        image = np.array(image, dtype=np.float32, order="C")
+        return image
+
+
+class PostprocessYOLO(object):
+    """Class for post-processing the three outputs tensors from YOLOv3-608."""
+
+    def __init__(
+        self,
+        yolo_masks,
+        yolo_anchors,
+        obj_threshold,
+        nms_threshold,
+        yolo_input_resolution,
+    ):
+        """Initialize with all values that will be kept when processing several frames.
+        Assuming 3 outputs of the network in the case of (large) YOLOv3.
+
+        Keyword arguments:
+        yolo_masks -- a list of 3 three-dimensional tuples for the YOLO masks
+        yolo_anchors -- a list of 9 two-dimensional tuples for the YOLO anchors
+        object_threshold -- threshold for object coverage, float value between 0 and 1
+        nms_threshold -- threshold for non-max suppression algorithm,
+        float value between 0 and 1
+        input_resolution_yolo -- two-dimensional tuple with the target network's (spatial)
+        input resolution in HW order
+        """
+        self.masks = yolo_masks
+        self.anchors = yolo_anchors
+        self.object_threshold = obj_threshold
+        self.nms_threshold = nms_threshold
+        self.input_resolution_yolo = yolo_input_resolution
+
+    def process(self, outputs, resolution_raw):
+        """Take the YOLOv3 outputs generated from a TensorRT forward pass, post-process them
+        and return a list of bounding boxes for detected object together with their category
+        and their confidences in separate lists.
+
+        Keyword arguments:
+        outputs -- outputs from a TensorRT engine in NCHW format
+        resolution_raw -- the original spatial resolution from the input PIL image in WH order
+        """
+        outputs_reshaped = list()
+        for output in outputs:
+            outputs_reshaped.append(self._reshape_output(output))
+
+        boxes, categories, confidences = self._process_yolo_output(
+            outputs_reshaped, resolution_raw
+        )
+
+        return boxes, categories, confidences
+
+    def _reshape_output(self, output):
+        """Reshape a TensorRT output from NCHW to NHWC format (with expected C=255),
+        and then return it in (height,width,3,85) dimensionality after further reshaping.
+
+        Keyword argument:
+        output -- an output from a TensorRT engine after inference
+        """
+        output = np.transpose(output, [0, 2, 3, 1])
+        _, height, width, _ = output.shape
+        dim1, dim2 = height, width
+        dim3 = 3
+        # There are CATEGORY_NUM=80 object categories:
+        dim4 = 4 + 1 + CATEGORY_NUM
+        return np.reshape(output, (dim1, dim2, dim3, dim4))
+
+    def _process_yolo_output(self, outputs_reshaped, resolution_raw):
+        """Take in a list of three reshaped YOLO outputs in (height,width,3,85) shape and return
+        return a list of bounding boxes for detected object together with their category and their
+        confidences in separate lists.
+
+        Keyword arguments:
+        outputs_reshaped -- list of three reshaped YOLO outputs as NumPy arrays
+        with shape (height,width,3,85)
+        resolution_raw -- the original spatial resolution from the input PIL image in WH order
+        """
+
+        # E.g. in YOLOv3-608, there are three output tensors, which we associate with their
+        # respective masks. Then we iterate through all output-mask pairs and generate candidates
+        # for bounding boxes, their corresponding category predictions and their confidences:
+        boxes, categories, confidences = list(), list(), list()
+        for output, mask in zip(outputs_reshaped, self.masks):
+            box, category, confidence = self._process_feats(output, mask)
+            box, category, confidence = self._filter_boxes(box, category, confidence)
+            boxes.append(box)
+            categories.append(category)
+            confidences.append(confidence)
+
+        boxes = np.concatenate(boxes)
+        categories = np.concatenate(categories)
+        confidences = np.concatenate(confidences)
+
+        # Scale boxes back to original image shape:
+        width, height = resolution_raw
+        image_dims = [width, height, width, height]
+        boxes = boxes * image_dims
+
+        # Using the candidates from the previous (loop) step, we apply the non-max suppression
+        # algorithm that clusters adjacent bounding boxes to a single bounding box:
+        nms_boxes, nms_categories, nscores = list(), list(), list()
+        for category in set(categories):
+            idxs = np.where(categories == category)
+            box = boxes[idxs]
+            category = categories[idxs]
+            confidence = confidences[idxs]
+
+            keep = self._nms_boxes(box, confidence)
+
+            nms_boxes.append(box[keep])
+            nms_categories.append(category[keep])
+            nscores.append(confidence[keep])
+
+        if not nms_categories and not nscores:
+            return None, None, None
+
+        boxes = np.concatenate(nms_boxes)
+        categories = np.concatenate(nms_categories)
+        confidences = np.concatenate(nscores)
+
+        return boxes, categories, confidences
+
+    def _process_feats(self, output_reshaped, mask):
+        """Take in a reshaped YOLO output in height,width,3,85 format together with its
+        corresponding YOLO mask and return the detected bounding boxes, the confidence,
+        and the class probability in each cell/pixel.
+
+        Keyword arguments:
+        output_reshaped -- reshaped YOLO output as NumPy arrays with shape (height,width,3,85)
+        mask -- 2-dimensional tuple with mask specification for this output
+        """
+
+        # Two in-line functions required for calculating the bounding box
+        # descriptors:
+        def sigmoid(value):
+            """Return the sigmoid of the input."""
+            return 1.0 / (1.0 + math.exp(-value))
+
+        def exponential(value):
+            """Return the exponential of the input."""
+            return math.exp(value)
+
+        # Vectorized calculation of above two functions:
+        sigmoid_v = np.vectorize(sigmoid)
+        exponential_v = np.vectorize(exponential)
+
+        grid_h, grid_w, _, _ = output_reshaped.shape
+
+        anchors = [self.anchors[i] for i in mask]
+
+        # Reshape to N, height, width, num_anchors, box_params:
+        anchors_tensor = np.reshape(anchors, [1, 1, len(anchors), 2])
+        box_xy = sigmoid_v(output_reshaped[..., :2])
+        box_wh = exponential_v(output_reshaped[..., 2:4]) * anchors_tensor
+        box_confidence = sigmoid_v(output_reshaped[..., 4])
+
+        box_confidence = np.expand_dims(box_confidence, axis=-1)
+        box_class_probs = sigmoid_v(output_reshaped[..., 5:])
+
+        col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)
+        row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)
+
+        col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
+        row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)
+        grid = np.concatenate((col, row), axis=-1)
+
+        box_xy += grid
+        box_xy /= (grid_w, grid_h)
+        box_wh /= self.input_resolution_yolo
+        box_xy -= box_wh / 2.0
+        boxes = np.concatenate((box_xy, box_wh), axis=-1)
+
+        # boxes: centroids, box_confidence: confidence level, box_class_probs:
+        # class confidence
+        return boxes, box_confidence, box_class_probs
+
+    def _filter_boxes(self, boxes, box_confidences, box_class_probs):
+        """Take in the unfiltered bounding box descriptors and discard each cell
+        whose score is lower than the object threshold set during class initialization.
+
+        Keyword arguments:
+        boxes -- bounding box coordinates with shape (height,width,3,4); 4 for
+        x,y,height,width coordinates of the boxes
+        box_confidences -- bounding box confidences with shape (height,width,3,1); 1 for as
+        confidence scalar per element
+        box_class_probs -- class probabilities with shape (height,width,3,CATEGORY_NUM)
+
+        """
+        box_scores = box_confidences * box_class_probs
+        box_classes = np.argmax(box_scores, axis=-1)
+        box_class_scores = np.max(box_scores, axis=-1)
+        pos = np.where(box_class_scores >= self.object_threshold)
+
+        boxes = boxes[pos]
+        classes = box_classes[pos]
+        scores = box_class_scores[pos]
+
+        return boxes, classes, scores
+
+    def _nms_boxes(self, boxes, box_confidences):
+        """Apply the Non-Maximum Suppression (NMS) algorithm on the bounding boxes with their
+        confidence scores and return an array with the indexes of the bounding boxes we want to
+        keep (and display later).
+
+        Keyword arguments:
+        boxes -- a NumPy array containing N bounding-box coordinates that survived filtering,
+        with shape (N,4); 4 for x,y,height,width coordinates of the boxes
+        box_confidences -- a Numpy array containing the corresponding confidences with shape N
+        """
+        x_coord = boxes[:, 0]
+        y_coord = boxes[:, 1]
+        width = boxes[:, 2]
+        height = boxes[:, 3]
+
+        areas = width * height
+        ordered = box_confidences.argsort()[::-1]
+
+        keep = list()
+        while ordered.size > 0:
+            # Index of the current element:
+            i = ordered[0]
+            keep.append(i)
+            xx1 = np.maximum(x_coord[i], x_coord[ordered[1:]])
+            yy1 = np.maximum(y_coord[i], y_coord[ordered[1:]])
+            xx2 = np.minimum(
+                x_coord[i] + width[i], x_coord[ordered[1:]] + width[ordered[1:]]
+            )
+            yy2 = np.minimum(
+                y_coord[i] + height[i], y_coord[ordered[1:]] + height[ordered[1:]]
+            )
+
+            width1 = np.maximum(0.0, xx2 - xx1 + 1)
+            height1 = np.maximum(0.0, yy2 - yy1 + 1)
+            intersection = width1 * height1
+            union = areas[i] + areas[ordered[1:]] - intersection
+
+            # Compute the Intersection over Union (IoU) score:
+            iou = intersection / union
+
+            # The goal of the NMS algorithm is to reduce the number of adjacent bounding-box
+            # candidates to a minimum. In this step, we keep only those elements whose overlap
+            # with the current bounding box is lower than the threshold:
+            indexes = np.where(iou <= self.nms_threshold)[0]
+            ordered = ordered[indexes + 1]
+
+        keep = np.array(keep)
+        return keep
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/download.yml b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/download.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4be447c3a2de8f74b5f43b62442c7a554ed96a3d
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/download.yml
@@ -0,0 +1,30 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+sample: yolov3_onnx
+# files:
+  # - path: samples/python/yolov3_onnx/yolov3.cfg
+  #   url: https://raw.githubusercontent.com/pjreddie/darknet/f86901f6177dfc6116360a13cc06ab680e0c86b0/cfg/yolov3.cfg
+  #   checksum: b969a43a848bbf26901643b833cfb96c
+
+  # - path: samples/python/yolov3_onnx/yolov3.weights
+  #   url: https://pjreddie.com/media/files/yolov3.weights
+  #   mirror: https://master.dl.sourceforge.net/project/darknet-yolo.mirror/darknet_yolo_v3_optimal/yolov3.weights
+  #   checksum: c84e5b99d0e52cd466ae710cadf6d84c
+
+  # - path: samples/python/yolov3_onnx/dog.jpg
+  #   url: https://github.com/pjreddie/darknet/raw/f86901f6177dfc6116360a13cc06ab680e0c86b0/data/dog.jpg
+  #   checksum: 0efe2b8fa0609cf67d33ad9ed8112e66
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/onnx_to_tensorrt.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/onnx_to_tensorrt.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ba322bcecb301bce11d8e3cf55e692ec605e218
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/onnx_to_tensorrt.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python3
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+
+import os
+import sys
+
+import numpy as np
+import tensorrt as trt
+from data_processing import ALL_CATEGORIES, PostprocessYOLO, PreprocessYOLO
+from PIL import ImageDraw
+
+sys.path.insert(1, os.path.join(sys.path[0], ".."))
+from downloader import getFilePath
+
+import common
+
+TRT_LOGGER = trt.Logger()
+
+
+def draw_bboxes(
+    image_raw, bboxes, confidences, categories, all_categories, bbox_color="blue"
+):
+    """Draw the bounding boxes on the original input image and return it.
+
+    Keyword arguments:
+    image_raw -- a raw PIL Image
+    bboxes -- NumPy array containing the bounding box coordinates of N objects, with shape (N,4).
+    categories -- NumPy array containing the corresponding category for each object,
+    with shape (N,)
+    confidences -- NumPy array containing the corresponding confidence for each object,
+    with shape (N,)
+    all_categories -- a list of all categories in the correct ordered (required for looking up
+    the category name)
+    bbox_color -- an optional string specifying the color of the bounding boxes (default: 'blue')
+    """
+    draw = ImageDraw.Draw(image_raw)
+    print(bboxes, confidences, categories)
+    for box, score, category in zip(bboxes, confidences, categories):
+        x_coord, y_coord, width, height = box
+        left = max(0, np.floor(x_coord + 0.5).astype(int))
+        top = max(0, np.floor(y_coord + 0.5).astype(int))
+        right = min(image_raw.width, np.floor(x_coord + width + 0.5).astype(int))
+        bottom = min(image_raw.height, np.floor(y_coord + height + 0.5).astype(int))
+
+        draw.rectangle(((left, top), (right, bottom)), outline=bbox_color)
+        draw.text(
+            (left, top - 12),
+            "{0} {1:.2f}".format(all_categories[category], score),
+            fill=bbox_color,
+        )
+
+    return image_raw
+
+
+def get_engine(onnx_file_path, engine_file_path=""):
+    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
+
+    def build_engine():
+        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
+        with trt.Builder(TRT_LOGGER) as builder, builder.create_network(
+            0
+        ) as network, builder.create_builder_config() as config, trt.OnnxParser(
+            network, TRT_LOGGER
+        ) as parser, trt.Runtime(
+            TRT_LOGGER
+        ) as runtime:
+            config.set_memory_pool_limit(
+                trt.MemoryPoolType.WORKSPACE, 1 << 28
+            )  # 256MiB
+            # Parse model file
+            if not os.path.exists(onnx_file_path):
+                print(
+                    "ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.".format(
+                        onnx_file_path
+                    )
+                )
+                exit(0)
+            print("Loading ONNX file from path {}...".format(onnx_file_path))
+            with open(onnx_file_path, "rb") as model:
+                print("Beginning ONNX file parsing")
+                if not parser.parse(model.read()):
+                    print("ERROR: Failed to parse the ONNX file.")
+                    for error in range(parser.num_errors):
+                        print(parser.get_error(error))
+                    return None
+            # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1
+            network.get_input(0).shape = [1, 3, 608, 608]
+            print("Completed parsing of ONNX file")
+            print(
+                "Building an engine from file {}; this may take a while...".format(
+                    onnx_file_path
+                )
+            )
+            plan = builder.build_serialized_network(network, config)
+            engine = runtime.deserialize_cuda_engine(plan)
+            print("Completed creating Engine")
+            with open(engine_file_path, "wb") as f:
+                f.write(plan)
+            return engine
+
+    if os.path.exists(engine_file_path):
+        # If a serialized engine exists, use it instead of building an engine.
+        print("Reading engine from file {}".format(engine_file_path))
+        with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
+            return runtime.deserialize_cuda_engine(f.read())
+    else:
+        return build_engine()
+
+
+def main():
+    """Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference."""
+
+    # Try to load a previously generated YOLOv3-608 network graph in ONNX format:
+    onnx_file_path = "yolov3.onnx"
+    engine_file_path = "yolov3.trt"
+    # Download a dog image and save it to the following file path:
+    input_image_path = getFilePath("samples/python/yolov3_onnx/dog.jpg")
+    # Two-dimensional tuple with the target network's (spatial) input resolution in HW ordered
+    input_resolution_yolov3_HW = (608, 608)
+    # Create a pre-processor object by specifying the required input resolution for YOLOv3
+    preprocessor = PreprocessYOLO(input_resolution_yolov3_HW)
+    # Load an image from the specified input path, and return it together with  a pre-processed version
+    image_raw, image = preprocessor.process(input_image_path)
+    # Store the shape of the original input image in WH format, we will need it for later
+    shape_orig_WH = image_raw.size
+
+    # Output shapes expected by the post-processor
+    output_shapes = [(1, 255, 19, 19), (1, 255, 38, 38), (1, 255, 76, 76)]
+    # Do inference with TensorRT
+    trt_outputs = []
+    with get_engine(
+        onnx_file_path, engine_file_path
+    ) as engine, engine.create_execution_context() as context:
+        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
+        # Do inference
+        print("Running inference on image {}...".format(input_image_path))
+        # Set host input to the image. The common.do_inference function will copy the input to the GPU before executing.
+        inputs[0].host = image
+        trt_outputs = common.do_inference(
+            context,
+            engine=engine,
+            bindings=bindings,
+            inputs=inputs,
+            outputs=outputs,
+            stream=stream,
+        )
+
+    # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays.
+    trt_outputs = [
+        output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)
+    ]
+
+    postprocessor_args = {
+        "yolo_masks": [
+            (6, 7, 8),
+            (3, 4, 5),
+            (0, 1, 2),
+        ],  # A list of 3 three-dimensional tuples for the YOLO masks
+        "yolo_anchors": [
+            (10, 13),
+            (16, 30),
+            (33, 23),
+            (30, 61),
+            (62, 45),  # A list of 9 two-dimensional tuples for the YOLO anchors
+            (59, 119),
+            (116, 90),
+            (156, 198),
+            (373, 326),
+        ],
+        "obj_threshold": 0.6,  # Threshold for object coverage, float value between 0 and 1
+        "nms_threshold": 0.5,  # Threshold for non-max suppression algorithm, float value between 0 and 1
+        "yolo_input_resolution": input_resolution_yolov3_HW,
+    }
+
+    postprocessor = PostprocessYOLO(**postprocessor_args)
+
+    # Run the post-processing algorithms on the TensorRT outputs and get the bounding box details of detected objects
+    boxes, classes, scores = postprocessor.process(trt_outputs, (shape_orig_WH))
+    # Draw the bounding boxes onto the original input image and save it as a PNG file
+    obj_detected_img = draw_bboxes(image_raw, boxes, scores, classes, ALL_CATEGORIES)
+    output_image_path = "dog_bboxes.png"
+    obj_detected_img.save(output_image_path, "PNG")
+    print(
+        "Saved image with bounding boxes of detected objects to {}.".format(
+            output_image_path
+        )
+    )
+
+    # Free host and device memory used for inputs and outputs
+    common.free_buffers(inputs, outputs, stream)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/requirements.txt b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32c7e45cd46d6b7fc34b2f0a5dd76e4f0ee27445
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/requirements.txt
@@ -0,0 +1,11 @@
+cuda-python==12.2.0; python_version <= "3.10"
+cuda-python==12.6.0; python_version >= "3.11"
+pywin32; platform_system == "Windows"
+numpy==1.24.4; python_version <= "3.10"
+numpy==1.26.4; python_version >= "3.11"
+onnx==1.16.0
+Pillow>=10.0.0
+protobuf==3.20.3
+pyyaml==6.0.1
+requests==2.32.2
+tqdm==4.66.4
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/yolov3_to_onnx.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/yolov3_to_onnx.py
new file mode 100644
index 0000000000000000000000000000000000000000..ffd9d19fde50990256c6c51f44e5600ab6f522f6
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/python/yolov3_onnx/yolov3_to_onnx.py
@@ -0,0 +1,768 @@
+#!/usr/bin/env python3
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+from collections import OrderedDict
+import sys
+import os
+
+import onnx
+from onnx import helper
+from onnx import TensorProto
+import numpy as np
+
+sys.path.insert(1, os.path.join(sys.path[0], os.path.pardir))
+from downloader import getFilePath
+
+
+class DarkNetParser(object):
+    """Definition of a parser for DarkNet-based YOLOv3-608 (only tested for this topology)."""
+
+    def __init__(self, supported_layers):
+        """Initializes a DarkNetParser object.
+
+        Keyword argument:
+        supported_layers -- a string list of supported layers in DarkNet naming convention,
+        parameters are only added to the class dictionary if a parsed layer is included.
+        """
+
+        # A list of YOLOv3 layers containing dictionaries with all layer
+        # parameters:
+        self.layer_configs = OrderedDict()
+        self.supported_layers = supported_layers
+        self.layer_counter = 0
+
+    def parse_cfg_file(self, cfg_file_path):
+        """Takes the yolov3.cfg file and parses it layer by layer,
+        appending each layer's parameters as a dictionary to layer_configs.
+
+        Keyword argument:
+        cfg_file_path -- path to the yolov3.cfg file as string
+        """
+        with open(cfg_file_path) as cfg_file:
+            remainder = cfg_file.read()
+            while remainder is not None:
+                layer_dict, layer_name, remainder = self._next_layer(remainder)
+                if layer_dict is not None:
+                    self.layer_configs[layer_name] = layer_dict
+        return self.layer_configs
+
+    def _next_layer(self, remainder):
+        """Takes in a string and segments it by looking for DarkNet delimiters.
+        Returns the layer parameters and the remaining string after the last delimiter.
+        Example for the first Conv layer in yolo.cfg ...
+
+        [convolutional]
+        batch_normalize=1
+        filters=32
+        size=3
+        stride=1
+        pad=1
+        activation=leaky
+
+        ... becomes the following layer_dict return value:
+        {'activation': 'leaky', 'stride': 1, 'pad': 1, 'filters': 32,
+        'batch_normalize': 1, 'type': 'convolutional', 'size': 3}.
+
+        '001_convolutional' is returned as layer_name, and all lines that follow in yolo.cfg
+        are returned as the next remainder.
+
+        Keyword argument:
+        remainder -- a string with all raw text after the previously parsed layer
+        """
+        remainder = remainder.split("[", 1)
+        if len(remainder) == 2:
+            remainder = remainder[1]
+        else:
+            return None, None, None
+        remainder = remainder.split("]", 1)
+        if len(remainder) == 2:
+            layer_type, remainder = remainder
+        else:
+            return None, None, None
+        if remainder.replace(" ", "")[0] == "#":
+            remainder = remainder.split("\n", 1)[1]
+
+        layer_param_block, remainder = remainder.split("\n\n", 1)
+        layer_param_lines = layer_param_block.split("\n")[1:]
+        layer_name = str(self.layer_counter).zfill(3) + "_" + layer_type
+        layer_dict = dict(type=layer_type)
+        if layer_type in self.supported_layers:
+            for param_line in layer_param_lines:
+                if param_line[0] == "#":
+                    continue
+                param_type, param_value = self._parse_params(param_line)
+                layer_dict[param_type] = param_value
+        self.layer_counter += 1
+        return layer_dict, layer_name, remainder
+
+    def _parse_params(self, param_line):
+        """Identifies the parameters contained in one of the cfg file and returns
+        them in the required format for each parameter type, e.g. as a list, an int or a float.
+
+        Keyword argument:
+        param_line -- one parsed line within a layer block
+        """
+        param_line = param_line.replace(" ", "")
+        param_type, param_value_raw = param_line.split("=")
+        param_value = None
+        if param_type == "layers":
+            layer_indexes = list()
+            for index in param_value_raw.split(","):
+                layer_indexes.append(int(index))
+            param_value = layer_indexes
+        elif isinstance(param_value_raw, str) and not param_value_raw.isalpha():
+            condition_param_value_positive = param_value_raw.isdigit()
+            condition_param_value_negative = (
+                param_value_raw[0] == "-" and param_value_raw[1:].isdigit()
+            )
+            if condition_param_value_positive or condition_param_value_negative:
+                param_value = int(param_value_raw)
+            else:
+                param_value = float(param_value_raw)
+        else:
+            param_value = str(param_value_raw)
+        return param_type, param_value
+
+
+class MajorNodeSpecs(object):
+    """Helper class used to store the names of ONNX output names,
+    corresponding to the output of a DarkNet layer and its output channels.
+    Some DarkNet layers are not created and there is no corresponding ONNX node,
+    but we still need to track them in order to set up skip connections.
+    """
+
+    def __init__(self, name, channels):
+        """Initialize a MajorNodeSpecs object.
+
+        Keyword arguments:
+        name -- name of the ONNX node
+        channels -- number of output channels of this node
+        """
+        self.name = name
+        self.channels = channels
+        self.created_onnx_node = False
+        if name is not None and isinstance(channels, int) and channels > 0:
+            self.created_onnx_node = True
+
+
+class ConvParams(object):
+    """Helper class to store the hyper parameters of a Conv layer,
+    including its prefix name in the ONNX graph and the expected dimensions
+    of weights for convolution, bias, and batch normalization.
+
+    Additionally acts as a wrapper for generating safe names for all
+    weights, checking on feasible combinations.
+    """
+
+    def __init__(self, node_name, batch_normalize, conv_weight_dims):
+        """Constructor based on the base node name (e.g. 101_convolutional), the batch
+        normalization setting, and the convolutional weights shape.
+
+        Keyword arguments:
+        node_name -- base name of this YOLO convolutional layer
+        batch_normalize -- bool value if batch normalization is used
+        conv_weight_dims -- the dimensions of this layer's convolutional weights
+        """
+        self.node_name = node_name
+        self.batch_normalize = batch_normalize
+        assert len(conv_weight_dims) == 4
+        self.conv_weight_dims = conv_weight_dims
+
+    def generate_param_name(self, param_category, suffix):
+        """Generates a name based on two string inputs,
+        and checks if the combination is valid."""
+        assert suffix
+        assert param_category in ["bn", "conv"]
+        assert suffix in ["scale", "mean", "var", "weights", "bias"]
+        if param_category == "bn":
+            assert self.batch_normalize
+            assert suffix in ["scale", "bias", "mean", "var"]
+        elif param_category == "conv":
+            assert suffix in ["weights", "bias"]
+            if suffix == "bias":
+                assert not self.batch_normalize
+        param_name = self.node_name + "_" + param_category + "_" + suffix
+        return param_name
+
+
+class ResizeParams(object):
+    # Helper class to store the scale parameter for an Resize node.
+
+    def __init__(self, node_name, value):
+        """Constructor based on the base node name (e.g. 86_Resize),
+        and the value of the scale input tensor.
+
+        Keyword arguments:
+        node_name -- base name of this YOLO Resize layer
+        value -- the value of the scale input to the Resize layer as numpy array
+        """
+        self.node_name = node_name
+        self.value = value
+
+    def generate_param_name(self):
+        """Generates the scale parameter name for the Resize node."""
+        param_name = self.node_name + "_" + "scale"
+        return param_name
+
+    def generate_roi_name(self):
+        """Generates the roi input name for the Resize node."""
+        param_name = self.node_name + "_" + "roi"
+        return param_name
+
+
+class WeightLoader(object):
+    """Helper class used for loading the serialized weights of a binary file stream
+    and returning the initializers and the input tensors required for populating
+    the ONNX graph with weights.
+    """
+
+    def __init__(self, weights_file_path):
+        """Initialized with a path to the YOLOv3 .weights file.
+
+        Keyword argument:
+        weights_file_path -- path to the weights file.
+        """
+        self.weights_file = self._open_weights_file(weights_file_path)
+
+    def load_resize_scales(self, resize_params):
+        """Returns the initializers with the value of the scale input
+        tensor given by resize_params.
+
+        Keyword argument:
+        resize_params -- a ResizeParams object
+        """
+        initializer = list()
+        inputs = list()
+        name = resize_params.generate_param_name()
+        shape = resize_params.value.shape
+        data = resize_params.value
+        scale_init = helper.make_tensor(name, TensorProto.FLOAT, shape, data)
+        scale_input = helper.make_tensor_value_info(name, TensorProto.FLOAT, shape)
+        initializer.append(scale_init)
+        inputs.append(scale_input)
+
+        # In opset 11 an additional input named roi is required. Create a dummy tensor to satisfy this.
+        # It is a 1D tensor of size of the rank of the input (4)
+        rank = 4
+        roi_name = resize_params.generate_roi_name()
+        roi_input = helper.make_tensor_value_info(roi_name, TensorProto.FLOAT, [rank])
+        roi_init = helper.make_tensor(roi_name, TensorProto.FLOAT, [rank], [0, 0, 0, 0])
+        initializer.append(roi_init)
+        inputs.append(roi_input)
+
+        return initializer, inputs
+
+    def load_conv_weights(self, conv_params):
+        """Returns the initializers with weights from the weights file and
+        the input tensors of a convolutional layer for all corresponding ONNX nodes.
+
+        Keyword argument:
+        conv_params -- a ConvParams object
+        """
+        initializer = list()
+        inputs = list()
+        if conv_params.batch_normalize:
+            bias_init, bias_input = self._create_param_tensors(
+                conv_params, "bn", "bias"
+            )
+            bn_scale_init, bn_scale_input = self._create_param_tensors(
+                conv_params, "bn", "scale"
+            )
+            bn_mean_init, bn_mean_input = self._create_param_tensors(
+                conv_params, "bn", "mean"
+            )
+            bn_var_init, bn_var_input = self._create_param_tensors(
+                conv_params, "bn", "var"
+            )
+            initializer.extend([bn_scale_init, bias_init, bn_mean_init, bn_var_init])
+            inputs.extend([bn_scale_input, bias_input, bn_mean_input, bn_var_input])
+        else:
+            bias_init, bias_input = self._create_param_tensors(
+                conv_params, "conv", "bias"
+            )
+            initializer.append(bias_init)
+            inputs.append(bias_input)
+        conv_init, conv_input = self._create_param_tensors(
+            conv_params, "conv", "weights"
+        )
+        initializer.append(conv_init)
+        inputs.append(conv_input)
+        return initializer, inputs
+
+    def _open_weights_file(self, weights_file_path):
+        """Opens a YOLOv3 DarkNet file stream and skips the header.
+
+        Keyword argument:
+        weights_file_path -- path to the weights file.
+        """
+        weights_file = open(weights_file_path, "rb")
+        length_header = 5
+        np.ndarray(
+            shape=(length_header,),
+            dtype="int32",
+            buffer=weights_file.read(length_header * 4),
+        )
+        return weights_file
+
+    def _create_param_tensors(self, conv_params, param_category, suffix):
+        """Creates the initializers with weights from the weights file together with
+        the input tensors.
+
+        Keyword arguments:
+        conv_params -- a ConvParams object
+        param_category -- the category of parameters to be created ('bn' or 'conv')
+        suffix -- a string determining the sub-type of above param_category (e.g.,
+        'weights' or 'bias')
+        """
+        param_name, param_data, param_data_shape = self._load_one_param_type(
+            conv_params, param_category, suffix
+        )
+
+        initializer_tensor = helper.make_tensor(
+            param_name, TensorProto.FLOAT, param_data_shape, param_data
+        )
+        input_tensor = helper.make_tensor_value_info(
+            param_name, TensorProto.FLOAT, param_data_shape
+        )
+        return initializer_tensor, input_tensor
+
+    def _load_one_param_type(self, conv_params, param_category, suffix):
+        """Deserializes the weights from a file stream in the DarkNet order.
+
+        Keyword arguments:
+        conv_params -- a ConvParams object
+        param_category -- the category of parameters to be created ('bn' or 'conv')
+        suffix -- a string determining the sub-type of above param_category (e.g.,
+        'weights' or 'bias')
+        """
+        param_name = conv_params.generate_param_name(param_category, suffix)
+        channels_out, channels_in, filter_h, filter_w = conv_params.conv_weight_dims
+        if param_category == "bn":
+            param_shape = [channels_out]
+        elif param_category == "conv":
+            if suffix == "weights":
+                param_shape = [channels_out, channels_in, filter_h, filter_w]
+            elif suffix == "bias":
+                param_shape = [channels_out]
+        param_size = np.product(np.array(param_shape))
+        param_data = np.ndarray(
+            shape=param_shape,
+            dtype="float32",
+            buffer=self.weights_file.read(param_size * 4),
+        )
+        param_data = param_data.flatten().astype(float)
+        return param_name, param_data, param_shape
+
+
+class GraphBuilderONNX(object):
+    """Class for creating an ONNX graph from a previously generated list of layer dictionaries."""
+
+    def __init__(self, output_tensors):
+        """Initialize with all DarkNet default parameters used creating YOLOv3,
+        and specify the output tensors as an OrderedDict for their output dimensions
+        with their names as keys.
+
+        Keyword argument:
+        output_tensors -- the output tensors as an OrderedDict containing the keys'
+        output dimensions
+        """
+        self.output_tensors = output_tensors
+        self._nodes = list()
+        self.graph_def = None
+        self.input_tensor = None
+        self.epsilon_bn = 1e-5
+        self.momentum_bn = 0.99
+        self.alpha_lrelu = 0.1
+        self.param_dict = OrderedDict()
+        self.major_node_specs = list()
+        self.batch_size = 1
+
+    def build_onnx_graph(self, layer_configs, weights_file_path, verbose=True):
+        """Iterate over all layer configs (parsed from the DarkNet representation
+        of YOLOv3-608), create an ONNX graph, populate it with weights from the weights
+        file and return the graph definition.
+
+        Keyword arguments:
+        layer_configs -- an OrderedDict object with all parsed layers' configurations
+        weights_file_path -- location of the weights file
+        verbose -- toggles if the graph is printed after creation (default: True)
+        """
+        for layer_name in layer_configs.keys():
+            layer_dict = layer_configs[layer_name]
+            major_node_specs = self._make_onnx_node(layer_name, layer_dict)
+            if major_node_specs.name is not None:
+                self.major_node_specs.append(major_node_specs)
+        outputs = list()
+        for tensor_name in self.output_tensors.keys():
+            output_dims = [
+                self.batch_size,
+            ] + self.output_tensors[tensor_name]
+            output_tensor = helper.make_tensor_value_info(
+                tensor_name, TensorProto.FLOAT, output_dims
+            )
+            outputs.append(output_tensor)
+        inputs = [self.input_tensor]
+        weight_loader = WeightLoader(weights_file_path)
+        initializer = list()
+        # If a layer has parameters, add them to the initializer and input lists.
+        for layer_name in self.param_dict.keys():
+            _, layer_type = layer_name.split("_", 1)
+            params = self.param_dict[layer_name]
+            if layer_type == "convolutional":
+                initializer_layer, inputs_layer = weight_loader.load_conv_weights(
+                    params
+                )
+                initializer.extend(initializer_layer)
+                inputs.extend(inputs_layer)
+            elif layer_type == "upsample":
+                initializer_layer, inputs_layer = weight_loader.load_resize_scales(
+                    params
+                )
+                initializer.extend(initializer_layer)
+                inputs.extend(inputs_layer)
+        del weight_loader
+        self.graph_def = helper.make_graph(
+            nodes=self._nodes,
+            name="YOLOv3-608",
+            inputs=inputs,
+            outputs=outputs,
+            initializer=initializer,
+        )
+        if verbose:
+            print(helper.printable_graph(self.graph_def))
+        model_def = helper.make_model(
+            self.graph_def, producer_name="NVIDIA TensorRT sample"
+        )
+        return model_def
+
+    def _make_onnx_node(self, layer_name, layer_dict):
+        """Take in a layer parameter dictionary, choose the correct function for
+        creating an ONNX node and store the information important to graph creation
+        as a MajorNodeSpec object.
+
+        Keyword arguments:
+        layer_name -- the layer's name (also the corresponding key in layer_configs)
+        layer_dict -- a layer parameter dictionary (one element of layer_configs)
+        """
+        layer_type = layer_dict["type"]
+        if self.input_tensor is None:
+            if layer_type == "net":
+                major_node_output_name, major_node_output_channels = (
+                    self._make_input_tensor(layer_name, layer_dict)
+                )
+                major_node_specs = MajorNodeSpecs(
+                    major_node_output_name, major_node_output_channels
+                )
+            else:
+                raise ValueError('The first node has to be of type "net".')
+        else:
+            node_creators = dict()
+            node_creators["convolutional"] = self._make_conv_node
+            node_creators["shortcut"] = self._make_shortcut_node
+            node_creators["route"] = self._make_route_node
+            node_creators["upsample"] = self._make_resize_node
+
+            if layer_type in node_creators.keys():
+                major_node_output_name, major_node_output_channels = node_creators[
+                    layer_type
+                ](layer_name, layer_dict)
+                major_node_specs = MajorNodeSpecs(
+                    major_node_output_name, major_node_output_channels
+                )
+            else:
+                print(
+                    "Layer of type %s not supported, skipping ONNX node generation."
+                    % layer_type
+                )
+                major_node_specs = MajorNodeSpecs(layer_name, None)
+        return major_node_specs
+
+    def _make_input_tensor(self, layer_name, layer_dict):
+        """Create an ONNX input tensor from a 'net' layer and store the batch size.
+
+        Keyword arguments:
+        layer_name -- the layer's name (also the corresponding key in layer_configs)
+        layer_dict -- a layer parameter dictionary (one element of layer_configs)
+        """
+        batch_size = layer_dict["batch"]
+        channels = layer_dict["channels"]
+        height = layer_dict["height"]
+        width = layer_dict["width"]
+        self.batch_size = batch_size
+        input_tensor = helper.make_tensor_value_info(
+            str(layer_name), TensorProto.FLOAT, [batch_size, channels, height, width]
+        )
+        self.input_tensor = input_tensor
+        return layer_name, channels
+
+    def _get_previous_node_specs(self, target_index=-1):
+        """Get a previously generated ONNX node (skip those that were not generated).
+        Target index can be passed for jumping to a specific index.
+
+        Keyword arguments:
+        target_index -- optional for jumping to a specific index (default: -1 for jumping
+        to previous element)
+        """
+        previous_node = None
+        for node in self.major_node_specs[target_index::-1]:
+            if node.created_onnx_node:
+                previous_node = node
+                break
+        assert previous_node is not None
+        return previous_node
+
+    def _make_conv_node(self, layer_name, layer_dict):
+        """Create an ONNX Conv node with optional batch normalization and
+        activation nodes.
+
+        Keyword arguments:
+        layer_name -- the layer's name (also the corresponding key in layer_configs)
+        layer_dict -- a layer parameter dictionary (one element of layer_configs)
+        """
+        previous_node_specs = self._get_previous_node_specs()
+        inputs = [previous_node_specs.name]
+        previous_channels = previous_node_specs.channels
+        kernel_size = layer_dict["size"]
+        stride = layer_dict["stride"]
+        filters = layer_dict["filters"]
+        batch_normalize = False
+        if (
+            "batch_normalize" in layer_dict.keys()
+            and layer_dict["batch_normalize"] == 1
+        ):
+            batch_normalize = True
+
+        kernel_shape = [kernel_size, kernel_size]
+        weights_shape = [filters, previous_channels] + kernel_shape
+        conv_params = ConvParams(layer_name, batch_normalize, weights_shape)
+
+        strides = [stride, stride]
+        dilations = [1, 1]
+        weights_name = conv_params.generate_param_name("conv", "weights")
+        inputs.append(weights_name)
+        if not batch_normalize:
+            bias_name = conv_params.generate_param_name("conv", "bias")
+            inputs.append(bias_name)
+
+        conv_node = helper.make_node(
+            "Conv",
+            inputs=inputs,
+            outputs=[layer_name],
+            kernel_shape=kernel_shape,
+            strides=strides,
+            auto_pad="SAME_LOWER",
+            dilations=dilations,
+            name=layer_name,
+        )
+        self._nodes.append(conv_node)
+        inputs = [layer_name]
+        layer_name_output = layer_name
+
+        if batch_normalize:
+            layer_name_bn = layer_name + "_bn"
+            bn_param_suffixes = ["scale", "bias", "mean", "var"]
+            for suffix in bn_param_suffixes:
+                bn_param_name = conv_params.generate_param_name("bn", suffix)
+                inputs.append(bn_param_name)
+            batchnorm_node = helper.make_node(
+                "BatchNormalization",
+                inputs=inputs,
+                outputs=[layer_name_bn],
+                epsilon=self.epsilon_bn,
+                momentum=self.momentum_bn,
+                name=layer_name_bn,
+            )
+            self._nodes.append(batchnorm_node)
+            inputs = [layer_name_bn]
+            layer_name_output = layer_name_bn
+
+        if layer_dict["activation"] == "leaky":
+            layer_name_lrelu = layer_name + "_lrelu"
+
+            lrelu_node = helper.make_node(
+                "LeakyRelu",
+                inputs=inputs,
+                outputs=[layer_name_lrelu],
+                name=layer_name_lrelu,
+                alpha=self.alpha_lrelu,
+            )
+            self._nodes.append(lrelu_node)
+            inputs = [layer_name_lrelu]
+            layer_name_output = layer_name_lrelu
+        elif layer_dict["activation"] == "linear":
+            pass
+        else:
+            print("Activation not supported.")
+
+        self.param_dict[layer_name] = conv_params
+        return layer_name_output, filters
+
+    def _make_shortcut_node(self, layer_name, layer_dict):
+        """Create an ONNX Add node with the shortcut properties from
+        the DarkNet-based graph.
+
+        Keyword arguments:
+        layer_name -- the layer's name (also the corresponding key in layer_configs)
+        layer_dict -- a layer parameter dictionary (one element of layer_configs)
+        """
+        shortcut_index = layer_dict["from"]
+        activation = layer_dict["activation"]
+        assert activation == "linear"
+
+        first_node_specs = self._get_previous_node_specs()
+        second_node_specs = self._get_previous_node_specs(target_index=shortcut_index)
+        assert first_node_specs.channels == second_node_specs.channels
+        channels = first_node_specs.channels
+        inputs = [first_node_specs.name, second_node_specs.name]
+        shortcut_node = helper.make_node(
+            "Add",
+            inputs=inputs,
+            outputs=[layer_name],
+            name=layer_name,
+        )
+        self._nodes.append(shortcut_node)
+        return layer_name, channels
+
+    def _make_route_node(self, layer_name, layer_dict):
+        """If the 'layers' parameter from the DarkNet configuration is only one index, continue
+        node creation at the indicated (negative) index. Otherwise, create an ONNX Concat node
+        with the route properties from the DarkNet-based graph.
+
+        Keyword arguments:
+        layer_name -- the layer's name (also the corresponding key in layer_configs)
+        layer_dict -- a layer parameter dictionary (one element of layer_configs)
+        """
+        route_node_indexes = layer_dict["layers"]
+        if len(route_node_indexes) == 1:
+            split_index = route_node_indexes[0]
+            assert split_index < 0
+            # Increment by one because we skipped the YOLO layer:
+            split_index += 1
+            self.major_node_specs = self.major_node_specs[:split_index]
+            layer_name = None
+            channels = None
+        else:
+            inputs = list()
+            channels = 0
+            for index in route_node_indexes:
+                if index > 0:
+                    # Increment by one because we count the input as a node (DarkNet
+                    # does not)
+                    index += 1
+                route_node_specs = self._get_previous_node_specs(target_index=index)
+                inputs.append(route_node_specs.name)
+                channels += route_node_specs.channels
+            assert inputs
+            assert channels > 0
+
+            route_node = helper.make_node(
+                "Concat",
+                axis=1,
+                inputs=inputs,
+                outputs=[layer_name],
+                name=layer_name,
+            )
+            self._nodes.append(route_node)
+        return layer_name, channels
+
+    def _make_resize_node(self, layer_name, layer_dict):
+        """Create an ONNX Resize node with the properties from
+        the DarkNet-based graph.
+
+        Keyword arguments:
+        layer_name -- the layer's name (also the corresponding key in layer_configs)
+        layer_dict -- a layer parameter dictionary (one element of layer_configs)
+        """
+        resize_scale_factors = float(layer_dict["stride"])
+        # Create the scale factor array with node parameters
+        scales = np.array(
+            [1.0, 1.0, resize_scale_factors, resize_scale_factors]
+        ).astype(np.float32)
+        previous_node_specs = self._get_previous_node_specs()
+        inputs = [previous_node_specs.name]
+
+        channels = previous_node_specs.channels
+        assert channels > 0
+        resize_params = ResizeParams(layer_name, scales)
+
+        # roi input is the second input, so append it before scales
+        roi_name = resize_params.generate_roi_name()
+        inputs.append(roi_name)
+
+        scales_name = resize_params.generate_param_name()
+        inputs.append(scales_name)
+
+        resize_node = helper.make_node(
+            "Resize",
+            coordinate_transformation_mode="asymmetric",
+            mode="nearest",
+            nearest_mode="floor",
+            inputs=inputs,
+            outputs=[layer_name],
+            name=layer_name,
+        )
+        self._nodes.append(resize_node)
+        self.param_dict[layer_name] = resize_params
+        return layer_name, channels
+
+
+def main():
+    """Run the DarkNet-to-ONNX conversion for YOLOv3-608."""
+    cfg_file_path = getFilePath("samples/python/yolov3_onnx/yolov3.cfg")
+    # These are the only layers DarkNetParser will extract parameters from. The three layers of
+    # type 'yolo' are not parsed in detail because they are included in the post-processing later:
+    supported_layers = ["net", "convolutional", "shortcut", "route", "upsample"]
+
+    # Create a DarkNetParser object, and the use it to generate an OrderedDict with all
+    # layer's configs from the cfg file:
+    parser = DarkNetParser(supported_layers)
+    layer_configs = parser.parse_cfg_file(cfg_file_path)
+    # We do not need the parser anymore after we got layer_configs:
+    del parser
+
+    # In above layer_config, there are three outputs that we need to know the output
+    # shape of (in CHW format):
+    output_tensor_dims = OrderedDict()
+    output_tensor_dims["082_convolutional"] = [255, 19, 19]
+    output_tensor_dims["094_convolutional"] = [255, 38, 38]
+    output_tensor_dims["106_convolutional"] = [255, 76, 76]
+
+    # Create a GraphBuilderONNX object with the known output tensor dimensions:
+    builder = GraphBuilderONNX(output_tensor_dims)
+
+    weights_file_path = getFilePath("samples/python/yolov3_onnx/yolov3.weights")
+
+    # Now generate an ONNX graph with weights from the previously parsed layer configurations
+    # and the weights file:
+    yolov3_model_def = builder.build_onnx_graph(
+        layer_configs=layer_configs, weights_file_path=weights_file_path, verbose=True
+    )
+    # Once we have the model definition, we do not need the builder anymore:
+    del builder
+
+    # Perform a sanity check on the ONNX model definition:
+    onnx.checker.check_model(yolov3_model_def)
+
+    # Serialize the generated ONNX graph to this file:
+    output_file_path = "yolov3.onnx"
+    onnx.save(yolov3_model_def, output_file_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleCharRNN/Makefile b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleCharRNN/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..486e735b2ebb7b6df2db0a6a74563dc455db10f7
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleCharRNN/Makefile
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+OUTNAME_RELEASE = sample_char_rnn
+OUTNAME_DEBUG   = sample_char_rnn_debug
+EXTRA_DIRECTORIES = ../common ../utils
+SAMPLE_DIR_NAME = $(shell basename $(dir $(abspath $(firstword $(MAKEFILE_LIST)))))
+MAKEFILE ?= ../Makefile.config
+include $(MAKEFILE)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleCharRNN/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleCharRNN/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..40c8148935983b2a837ceae126485fd760b00c80
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleCharRNN/README.md
@@ -0,0 +1,135 @@
+# Building An RNN Network Layer By Layer
+
+**Table Of Contents**
+- [Description](#description)
+- [How does this sample work?](#how-does-this-sample-work)
+	* [TensorRT API layers and ops](#tensorrt-api-layers-and-ops)
+- [Preparing sample data](#preparing-sample-data)
+- [Converting TensorFlow weights](#converting-tensorflow-weights)
+- [Running the sample](#running-the-sample)
+	* [Sample `--help` options](#sample-help-options)
+- [Additional resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+## Description
+
+This sample, sampleCharRNN, uses the TensorRT API to build an RNN network layer by layer, sets up weights and inputs/outputs and then performs inference. Specifically, this sample creates a CharRNN network that has been trained on the [Tiny Shakespeare](https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt) dataset. For more information about character level modeling, see [char-rnn](https://github.com/karpathy/char-rnn).
+
+TensorFlow has a useful  [RNN Tutorial](https://www.tensorflow.org/tutorials/recurrent)  which can be used to train a word level model. Word level models learn a probability distribution over a set of all possible word sequence. Since our goal is to train a char level model, which learns a probability distribution over a set of all possible characters, a few modifications will need to be made to get the TensorFlow sample to work. These modifications can be seen  [here](http://deeplearningathome.com/2016/10/Text-generation-using-deep-recurrent-neural-networks.html).
+
+There are also many GitHub repositories that contain CharRNN implementations that will work out of the box. [Tensorflow-char-rnn](https://github.com/crazydonkey200/tensorflow-char-rnn)  is one such implementation.
+
+## How does this sample work?
+
+The CharRNN network is a fairly simple RNN network. The input into the network is a single character that is embedded into a vector of size 512. This embedded input is then supplied to a RNN layer containing two stacked LSTM cells. The output from the RNN layer is then supplied to a fully connected layer, which can be represented in TensorRT by a Matrix Multiply layer followed by an ElementWise sum layer. Constant layers are used to supply the weights and biases to the Matrix Multiply and ElementWise Layers, respectively. A TopK operation is then performed on the output of the ElementWise sum layer where `K = 1` to find the next predicted character in the sequence. For more information about these layers, see the [TensorRT API](http://docs.nvidia.com/deeplearning/sdk/tensorrt-api/index.html) documentation.
+
+This sample provides a pre-trained model called `model-20080.data-00000-of-00001` located in the `/usr/src/tensorrt/data/samples/char-rnn/model` directory, therefore, training is not required for this sample. The model used by this sample was trained using [tensorflow-char-rnn](https://github.com/crazydonkey200/tensorflow-char-rnn). This GitHub repository includes instructions on how to train and produce checkpoint that can be used by TensorRT.
+
+**Note:** If you wanted to train your own model and then perform inference with TensorRT, you will simply need to do a char to char comparison between TensorFlow and TensorRT.
+
+
+### TensorRT API layers and ops
+
+In this sample, the following layers are used.  For more information about these layers, see the [TensorRT Developer Guide: Layers](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#layers) documentation.
+
+[ElementWise](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#elementwise-layer)
+The ElementWise layer, also known as the Eltwise layer, implements per-element operations. The ElementWise layer is used to execute the second step of the functionality provided by a FullyConnected layer.
+
+[MatrixMultiply](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#matrixmultiply-layer)
+The MatrixMultiply layer implements matrix multiplication for a collection of matrices. The Matrix Multiplication layer is used to execute the first step of the functionality provided by a FullyConnected layer.
+
+[TopK](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#topk-layer)
+The TopK layer is used to identify the character that has the maximum probability of appearing next. The TopK layer finds the top K maximum (or minimum) elements along a dimension, returning a reduced tensor and a tensor of index positions.
+
+## Preparing sample data
+
+1. Download the sample data from [TensorRT release tarball](https://developer.nvidia.com/nvidia-tensorrt-download#), if not already mounted under `/usr/src/tensorrt/data` (NVIDIA NGC containers) and set it to `$TRT_DATADIR`.
+    ```bash
+    export TRT_DATADIR=/usr/src/tensorrt/data
+    ```
+
+## Converting TensorFlow weights
+
+(Optional) If you want to train your own model and not use the pre-trained model included in this sample, youâ€™ll need to convert the TensorFlow weights into a format that TensorRT can use.
+
+1.  Locate TensorFlow weights dumping script:  
+    ```bash
+    $TRT_OSSPATH/samples/common/dumpTFWts.py
+    ```
+
+	This script has been provided to extract the weights from the model checkpoint files that are created during training. Use `dumpTFWts.py -h` for directions on the usage of the script.
+
+2.  Convert the TensorFlow weights using the following command:
+    ```bash
+    dumpTFWts.py -m /path/to/checkpoint -o /path/to/output
+    ```
+
+## Running the sample
+
+1. Compile the sample by following build instructions in [TensorRT README](https://github.com/NVIDIA/TensorRT/).
+
+2.  Run the sample to generate characters based on the trained model:
+    ```bash
+    ./sample_char_rnn --datadir=<path/to/data>
+    ```
+
+    For example:
+    ```bash
+    ./sample_char_rnn --datadir $TRT_DATADIR/char-rnn
+    ```
+
+3.  Verify that the sample ran successfully. If the sample runs successfully you should see output similar to the following:
+	```
+	&&&& RUNNING TensorRT.sample_char_rnn # ./sample_char_rnn
+	[I] [TRT] Detected 4 input and 3 output network tensors.
+	[I] RNN Warmup: JACK
+	[I] Expect: INGHAM:
+	What shall I
+	[I] Received: INGHAM:
+	What shall I
+	&&&& PASSED TensorRT.sample_char_rnn # ./sample_char_rnn
+	```
+	This output shows that the sample ran successfully; `PASSED`.
+
+### Sample --help options
+
+To see the full list of available options and their descriptions, use the `-h` or `--help` command line option.
+
+
+# Additional resources
+
+The following resources provide a deeper understanding about RNN networks:
+
+**RNN networks**
+- [GNMT](https://arxiv.org/pdf/1609.08144v1.pdf)
+- [NMT](https://arxiv.org/pdf/1701.02810.pdf)
+- [Transformer](https://arxiv.org/pdf/1706.03762.pdf)
+
+**Videos**
+- [Introduction to RNNs in TensorRT](https://www.youtube.com/watch?reload=9&v=G3QA3ZzD4oc)
+
+**Documentation**
+- [TensorRT Sample Support Guide: sampleCharRNN](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#charRNN_sample)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) 
+documentation.
+
+
+# Changelog
+
+January 2024
+* Removed RNNv2Layer based addLSTMLayer implementation. addLSTMLayer is now implemented with ILoop only.
+* Default to use ILoop in paramaters.
+
+February 2019
+This is the first release of this `README.md` file.
+
+
+# Known issues
+
+There are no known issues in this sample.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleCharRNN/sampleCharRNN.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleCharRNN/sampleCharRNN.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..680e8943248ebaf86a5767aa5d5e1e2300575975
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleCharRNN/sampleCharRNN.cpp
@@ -0,0 +1,1037 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//!
+//! sampleCharRNN.cpp
+//! This file contains the implementation of the char_rnn sample.
+//! It uses weights from a trained TensorFlow model and creates the network
+//! using the TensorRT network definition API
+//! It can be run with the following command line:
+//! Command: ./sample_char_rnn [-h or --help] [-d or --datadir=<path to data directory>]
+//!
+
+// Define TRT entrypoints used in common code
+#define DEFINE_TRT_ENTRYPOINTS 1
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <cstdio>
+#include <cstring>
+#include <ctime>
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <sstream>
+#include <sys/stat.h>
+#include <unordered_set>
+#include <vector>
+
+#include "NvInfer.h"
+#include "argsParser.h"
+#include "buffers.h"
+#include "common.h"
+#include "cuda_runtime_api.h"
+#include "logger.h"
+#include "sampleEngines.h"
+using namespace nvinfer1;
+using samplesCommon::SampleUniquePtr;
+
+const std::string gSampleName = "TensorRT.sample_char_rnn";
+
+static const std::array<int, 4> INDICES{0, 1, 2, 3};
+
+// The model used by this sample was trained using github repository:
+// https://github.com/crazydonkey200/tensorflow-char-rnn
+//
+// The data set used: tensorflow-char-rnn/data/tiny_shakespeare.txt
+//
+// The command used to train:
+// python train.py --data_file=data/tiny_shakespeare.txt --num_epochs=100 --num_layer=2 --hidden_size=512
+// --embedding_size=512 --dropout=.5
+//
+// Epochs trained: 100
+// Test perplexity: 4.940
+//
+// Layer0 and Layer1 weights matrices are added as RNNW_L0_NAME and RNNW_L1_NAME, respectively.
+// Layer0 and Layer1 bias are added as RNNB_L0_NAME and RNNB_L1_NAME, respectively.
+// Embedded is added as EMBED_NAME.
+// fc_w is added as FCW_NAME.
+// fc_b is added as FCB_NAME.
+struct SampleCharRNNWeightNames
+{
+    const std::string RNNW_L0_NAME{"rnn_multi_rnn_cell_cell_0_basic_lstm_cell_kernel"};
+    const std::string RNNB_L0_NAME{"rnn_multi_rnn_cell_cell_0_basic_lstm_cell_bias"};
+    const std::string RNNW_L1_NAME{"rnn_multi_rnn_cell_cell_1_basic_lstm_cell_kernel"};
+    const std::string RNNB_L1_NAME{"rnn_multi_rnn_cell_cell_1_basic_lstm_cell_bias"};
+    const std::string FCW_NAME{"softmax_softmax_w"};
+    const std::string FCB_NAME{"softmax_softmax_b"};
+    const std::string EMBED_NAME{"embedding"};
+
+    std::unordered_set<std::string> names
+        = {{RNNW_L0_NAME, RNNB_L0_NAME, RNNW_L1_NAME, RNNB_L1_NAME, FCW_NAME, FCB_NAME, EMBED_NAME}};
+};
+
+struct SampleCharRNNBindingNames
+{
+    const char* INPUT_BLOB_NAME{"data"};
+    const char* HIDDEN_IN_BLOB_NAME{"hiddenIn"};
+    const char* CELL_IN_BLOB_NAME{"cellIn"};
+    const char* HIDDEN_OUT_BLOB_NAME{"hiddenOut"};
+    const char* CELL_OUT_BLOB_NAME{"cellOut"};
+    const char* OUTPUT_BLOB_NAME{"pred"};
+    const char* SEQ_LEN_IN_BLOB_NAME{"seqLen"};
+};
+
+struct SampleCharRNNMaps
+{
+    // A mapping from character to index used by the tensorflow model.
+    const std::map<char, int> charToID{{'\n', 0}, {'!', 1}, {' ', 2}, {'$', 3}, {'\'', 4}, {'&', 5}, {'-', 6}, {',', 7},
+        {'.', 8}, {'3', 9}, {';', 10}, {':', 11}, {'?', 12}, {'A', 13}, {'C', 14}, {'B', 15}, {'E', 16}, {'D', 17},
+        {'G', 18}, {'F', 19}, {'I', 20}, {'H', 21}, {'K', 22}, {'J', 23}, {'M', 24}, {'L', 25}, {'O', 26}, {'N', 27},
+        {'Q', 28}, {'P', 29}, {'S', 30}, {'R', 31}, {'U', 32}, {'T', 33}, {'W', 34}, {'V', 35}, {'Y', 36}, {'X', 37},
+        {'Z', 38}, {'a', 39}, {'c', 40}, {'b', 41}, {'e', 42}, {'d', 43}, {'g', 44}, {'f', 45}, {'i', 46}, {'h', 47},
+        {'k', 48}, {'j', 49}, {'m', 50}, {'l', 51}, {'o', 52}, {'n', 53}, {'q', 54}, {'p', 55}, {'s', 56}, {'r', 57},
+        {'u', 58}, {'t', 59}, {'w', 60}, {'v', 61}, {'y', 62}, {'x', 63}, {'z', 64}};
+
+    // A mapping from index to character used by the tensorflow model.
+    const std::vector<char> idToChar{{'\n', '!', ' ', '$', '\'', '&', '-', ',', '.', '3', ';', ':', '?', 'A', 'C', 'B',
+        'E', 'D', 'G', 'F', 'I', 'H', 'K', 'J', 'M', 'L', 'O', 'N', 'Q', 'P', 'S', 'R', 'U', 'T', 'W', 'V', 'Y', 'X',
+        'Z', 'a', 'c', 'b', 'e', 'd', 'g', 'f', 'i', 'h', 'k', 'j', 'm', 'l', 'o', 'n', 'q', 'p', 's', 'r', 'u', 't',
+        'w', 'v', 'y', 'x', 'z'}};
+};
+
+struct SampleCharRNNParams : samplesCommon::SampleParams
+{
+    int layerCount;
+    int hiddenSize;
+    int seqSize;
+    int dataSize;
+    int vocabSize;
+    int outputSize;
+    std::string weightFileName;
+
+    std::string saveEngine;
+    std::string loadEngine;
+
+    SampleCharRNNMaps charMaps;
+    SampleCharRNNWeightNames weightNames;
+    SampleCharRNNBindingNames bindingNames;
+
+    std::vector<std::string> inputSentences;
+    std::vector<std::string> outputSentences;
+};
+
+//!
+//! \brief  The SampleCharRNNBase class implements the char_rnn sample
+//!
+//! \details It uses weights from a trained TensorFlow model and creates
+//!          the network using the TensorRT network definition API
+//!
+class SampleCharRNNBase
+{
+public:
+    SampleCharRNNBase(const SampleCharRNNParams& params)
+        : mParams(params)
+    {
+    }
+
+    virtual ~SampleCharRNNBase() = default;
+
+    //!
+    //! \brief Builds the network engine
+    //!
+    bool build();
+
+    //!
+    //! \brief Runs the TensorRT inference engine for this sample
+    //!
+    bool infer();
+
+    //!
+    //! \brief Used to clean up any state created in the sample class
+    //!
+    bool teardown();
+
+protected:
+    //!
+    //! \brief Add inputs to the TensorRT network and configure LSTM layers using network definition API.
+    //!
+    virtual nvinfer1::ILayer* addLSTMLayers(SampleUniquePtr<nvinfer1::INetworkDefinition>& network) = 0;
+
+    //!
+    //! \brief Converts RNN weights from TensorFlow's format to TensorRT's format.
+    //!
+    nvinfer1::Weights convertRNNWeights(nvinfer1::Weights input, int dataSize);
+
+    //!
+    //! \brief Converts RNN Biases from TensorFlow's format to TensorRT's format.
+    //!
+    nvinfer1::Weights convertRNNBias(nvinfer1::Weights input);
+
+    std::map<std::string, nvinfer1::Weights> mWeightMap;
+    std::vector<std::unique_ptr<samplesCommon::HostMemory>> weightsMemory;
+    SampleCharRNNParams mParams;
+
+    nvinfer1::ITensor* addReshape(
+        SampleUniquePtr<nvinfer1::INetworkDefinition>& network, nvinfer1::ITensor& tensor, nvinfer1::Dims dims);
+
+private:
+    //!
+    //! \brief Load requested weights from a formatted file into a map.
+    //!
+    std::map<std::string, nvinfer1::Weights> loadWeights(const std::string file);
+
+    //!
+    //! \brief Create full model using the TensorRT network definition API and build the engine.
+    //!
+    void constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
+        SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config);
+
+    //!
+    //! \brief Looks up the embedding tensor for a given char and copies it to input buffer
+    //!
+    void copyEmbeddingToInput(samplesCommon::BufferManager& buffers, char const& c);
+
+    //!
+    //! \brief Perform one time step of inference with the TensorRT execution context
+    //!
+    bool stepOnce(samplesCommon::BufferManager& buffers, SampleUniquePtr<nvinfer1::IExecutionContext>& context,
+        cudaStream_t& stream);
+
+    //!
+    //! \brief Copies Ct/Ht output from the RNN to the Ct-1/Ht-1 input buffers for next time step
+    //!
+    void copyRNNOutputsToInputs(samplesCommon::BufferManager& buffers);
+
+    //!
+    //! \brief Transposes a sub-buffer of size height * width.
+    //!
+    bool transposeSubBuffers(void* data, int64_t height, int64_t width) noexcept;
+
+    std::shared_ptr<nvinfer1::IRuntime> mRuntime{nullptr};   //!< The TensorRT runtime used to run the network
+    std::shared_ptr<nvinfer1::ICudaEngine> mEngine{nullptr}; //!< The TensorRT engine used to run the network
+};
+
+class SampleCharRNNLoop : public SampleCharRNNBase
+{
+public:
+    struct LstmIO
+    {
+        nvinfer1::ITensor* data;
+        nvinfer1::ITensor* hidden;
+        nvinfer1::ITensor* cell;
+    };
+
+    struct LstmParams
+    {
+        nvinfer1::ITensor* inputWeights;
+        nvinfer1::ITensor* recurrentWeights;
+        nvinfer1::ITensor* inputBias;
+        nvinfer1::ITensor* recurrentBias;
+        nvinfer1::ITensor* maxSequenceSize;
+    };
+
+    SampleCharRNNLoop(SampleCharRNNParams params)
+        : SampleCharRNNBase(params)
+    {
+    }
+
+protected:
+    //!
+    //! \brief Add inputs to the TensorRT network and configure LSTM layers using network definition API.
+    //!
+    nvinfer1::ILayer* addLSTMLayers(SampleUniquePtr<nvinfer1::INetworkDefinition>& network) final;
+
+private:
+    nvinfer1::ILayer* addLSTMCell(SampleUniquePtr<nvinfer1::INetworkDefinition>& network, const LstmIO& inputTensors,
+        nvinfer1::ITensor* sequenceSize, const LstmParams& params, LstmIO& outputTensors);
+};
+
+//!
+//! \brief Transpose a sub-buffer of size height * width.
+//!
+//! \param data The data to transpose. Serves as both input and output.
+//! \param height The size of the height dimension to transpose.
+//! \param width The size of the width dimension to transpose.
+//!
+//! \return True on success, false on failure.
+//!
+bool SampleCharRNNBase::transposeSubBuffers(void* data, int64_t height, int64_t width) noexcept
+{
+    try
+    {
+        ASSERT(data != nullptr);
+        ASSERT(height > 0);
+        ASSERT(width > 0);
+        int64_t const tmpSize = height * width * sizeof(float);
+        samplesCommon::HostBuffer tmpbuf(tmpSize, DataType::kFLOAT);
+        ASSERT(tmpbuf.data() != nullptr);
+        auto in = static_cast<float*>(data);
+        auto out = static_cast<float*>(tmpbuf.data());
+
+        for (int64_t i{}; i < height; ++i)
+        {
+            for (int64_t j{}; j < width; ++j)
+            {
+                out[j * height + i] = in[i * width + j];
+            }
+        }
+
+        std::copy(static_cast<uint8_t*>(tmpbuf.data()), static_cast<uint8_t*>(tmpbuf.data()) + tmpSize,
+            static_cast<uint8_t*>(data));
+    }
+    catch (...)
+    {
+        return false;
+    }
+    return true;
+}
+
+//!
+//! \brief Creates the network, configures the builder and creates
+//!        the network engine
+//!
+//! \details This function loads weights from a trained TensorFlow model,
+//!          creates the network using the TensorRT network definition API,
+//!          and builds a TensorRT engine.
+//!
+//! \return true if the engine was created successfully and false otherwise
+//!
+bool SampleCharRNNBase::build()
+{
+    mWeightMap = SampleCharRNNBase::loadWeights(mParams.weightFileName);
+
+    if (mParams.loadEngine.empty())
+    {
+        auto builder
+            = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
+        if (!builder)
+        {
+            return false;
+        }
+        auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(0));
+        if (!network)
+        {
+            return false;
+        }
+        auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
+        if (!config)
+        {
+            return false;
+        }
+
+        config->setFlag(BuilderFlag::kGPU_FALLBACK);
+
+        // CUDA stream used for profiling by the builder.
+        auto profileStream = samplesCommon::makeCudaStream();
+        if (!profileStream)
+        {
+            return false;
+        }
+        config->setProfileStream(*profileStream);
+
+        constructNetwork(builder, network, config);
+    }
+    else
+    {
+        sample::gLogInfo << "Loading engine from: " << mParams.loadEngine << std::endl;
+        mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
+            sample::loadEngine(mParams.loadEngine, -1, std::cerr), samplesCommon::InferDeleter());
+    }
+
+    if (!mEngine)
+    {
+        return false;
+    }
+
+    if (!mParams.saveEngine.empty())
+    {
+        sample::gLogInfo << "Saving engine to: " << mParams.saveEngine << std::endl;
+        sample::saveEngine(*mEngine, mParams.saveEngine, std::cerr);
+    }
+
+    return true;
+}
+
+//!
+//! \brief Load requested weights from a formatted file into a map.
+//!
+//! \param file Path to weights file. File has to be the formatted dump from
+//!        the dumpTFWts.py script. Otherwise, this function will not work as
+//!        intended.
+//!
+//! \return A map containing the extracted weights.
+//!
+//! \note  Weight V2 files are in a very simple space delimited format.
+//!        <number of buffers>
+//!        for each buffer: [name] [type] [shape] <data as binary blob>\n
+//!        Note: type is the integer value of the DataType enum in NvInfer.h.
+//!
+std::map<std::string, nvinfer1::Weights> SampleCharRNNBase::loadWeights(const std::string file)
+{
+    std::map<std::string, nvinfer1::Weights> weightMap;
+
+    std::ifstream input(file, std::ios_base::binary);
+    ASSERT(input.is_open() && "Unable to load weight file.");
+
+    int32_t count;
+    input >> count;
+    ASSERT(count > 0 && "Invalid weight map file.");
+
+    while (count--)
+    {
+        if (mParams.weightNames.names.empty())
+        {
+            break;
+        }
+
+        nvinfer1::Weights wt{nvinfer1::DataType::kFLOAT, nullptr, 0};
+
+        // parse name and DataType
+        std::string name;
+        uint32_t type;
+        input >> name >> std::dec >> type;
+        wt.type = static_cast<nvinfer1::DataType>(type);
+
+        // extract shape
+        std::string temp, shape;
+        std::getline(std::getline(input, temp, '('), shape, ')');
+
+        // calculate count based on shape
+        wt.count = 1;
+        std::istringstream shapeStream(shape);
+        while (std::getline(shapeStream, temp, ','))
+            wt.count *= std::stoul(temp);
+        size_t numOfBytes = samplesCommon::getNbBytes(wt.type, wt.count);
+
+        // skip reading of weights if name is not in the set of names requested for extraction
+        if (mParams.weightNames.names.find(name) == mParams.weightNames.names.end())
+        {
+            input.seekg(input.tellg() + static_cast<std::streamoff>(2 + numOfBytes));
+            continue;
+        }
+        else
+        {
+            mParams.weightNames.names.erase(name);
+        }
+
+        // Read weight values
+        input.seekg(input.tellg() + static_cast<std::streamoff>(1)); // skip space char
+        // We do not really care about the setup of DataType here. Use char here to avoid additional conversion
+        auto mem = new samplesCommon::TypedHostMemory<char, nvinfer1::DataType::kINT8>(numOfBytes);
+        weightsMemory.emplace_back(mem);
+        auto wtVals = mem->raw();
+        input.read(wtVals, numOfBytes);
+        input.seekg(input.tellg() + static_cast<std::streamoff>(1)); // skip new-line char
+        wt.values = wtVals;
+
+        weightMap[name] = wt;
+    }
+
+    input.close();
+    sample::gLogInfo << "Done reading weights from file..." << std::endl;
+    return weightMap;
+}
+
+//!
+//! \brief Converts RNN weights from TensorFlow's format to TensorRT's format.
+//!
+//! \param input Weights that are stored in TensorFlow's format.
+//!
+//! \return Converted weights in TensorRT's format.
+//!
+//! \note TensorFlow weight parameters for BasicLSTMCell are formatted as:
+//!       Each [WR][icfo] is hiddenSize sequential elements.
+//!       CellN  Row 0: WiT, WcT, WfT, WoT
+//!       CellN  Row 1: WiT, WcT, WfT, WoT
+//!       ...
+//!       CellN RowM-1: WiT, WcT, WfT, WoT
+//!       CellN RowM+0: RiT, RcT, RfT, RoT
+//!       CellN RowM+1: RiT, RcT, RfT, RoT
+//!       ...
+//!       CellNRow2M-1: RiT, RcT, RfT, RoT
+//!
+//!       TensorRT expects the format to laid out in memory:
+//!       CellN: Wi, Wc, Wf, Wo, Ri, Rc, Rf, Ro
+//!
+nvinfer1::Weights SampleCharRNNBase::convertRNNWeights(nvinfer1::Weights orig, int dataSize)
+{
+    nvinfer1::Weights input{orig.type, orig.values, (dataSize + mParams.hiddenSize) * 4 * mParams.hiddenSize};
+    auto mem = new samplesCommon::FloatMemory(input.count);
+    weightsMemory.emplace_back(mem);
+    auto ptr = mem->raw();
+    float const* data = static_cast<float const*>(input.values);
+    int64_t dimsW[2]{dataSize, 4 * mParams.hiddenSize};
+    int64_t dimsR[2]{mParams.hiddenSize, 4 * mParams.hiddenSize};
+    std::copy(data, data + input.count, ptr);
+    ASSERT(transposeSubBuffers(ptr, dimsW[0], dimsW[1]));
+    ASSERT(transposeSubBuffers(&ptr[dimsW[0] * dimsW[1]], dimsR[0], dimsR[1]));
+    return nvinfer1::Weights{input.type, ptr, input.count};
+}
+
+//!
+//! \brief Converts RNN Biases from TensorFlow's format to TensorRT's format.
+//!
+//! \param input Biases that are stored in TensorFlow's format.
+//!
+//! \return Converted bias in TensorRT's format.
+//!
+//! \note TensorFlow bias parameters for BasicLSTMCell are formatted as:
+//!       CellN: Bi, Bc, Bf, Bo
+//!
+//!       TensorRT expects the format to be:
+//!       CellN: Wi, Wc, Wf, Wo, Ri, Rc, Rf, Ro
+//!
+//!       Since tensorflow already combines U and W,
+//!       we double the size and set all of U to zero.
+nvinfer1::Weights SampleCharRNNBase::convertRNNBias(nvinfer1::Weights input)
+{
+    auto mem = new samplesCommon::FloatMemory(input.count * 2);
+    weightsMemory.emplace_back(mem);
+    auto ptr = mem->raw();
+    const float* iptr = static_cast<const float*>(input.values);
+    int64_t count = 4 * mParams.hiddenSize;
+    ASSERT(input.count == count);
+    std::copy(iptr, iptr + count, ptr);
+    float* shiftedPtr = ptr + count;
+    std::fill(shiftedPtr, shiftedPtr + count, 0.0);
+    return nvinfer1::Weights{input.type, ptr, input.count * 2};
+}
+
+nvinfer1::ILayer* SampleCharRNNLoop::addLSTMCell(SampleUniquePtr<nvinfer1::INetworkDefinition>& network,
+    const LstmIO& inputTensors, nvinfer1::ITensor* sequenceSize, const LstmParams& params, LstmIO& outputTensors)
+{
+    nvinfer1::ILoop* sequenceLoop = network->addLoop();
+    sequenceLoop->addTripLimit(*sequenceSize, nvinfer1::TripLimit::kCOUNT);
+
+    nvinfer1::ITensor* input = sequenceLoop->addIterator(*inputTensors.data)->getOutput(0);
+    nvinfer1::IRecurrenceLayer* hidden = sequenceLoop->addRecurrence(*inputTensors.hidden);
+    nvinfer1::IRecurrenceLayer* cell = sequenceLoop->addRecurrence(*inputTensors.cell);
+
+    nvinfer1::ITensor* mmInput = network
+                                     ->addMatrixMultiply(*input, nvinfer1::MatrixOperation::kVECTOR,
+                                         *params.inputWeights, nvinfer1::MatrixOperation::kTRANSPOSE)
+                                     ->getOutput(0);
+
+    nvinfer1::ITensor* mmHidden = network
+                                      ->addMatrixMultiply(*hidden->getOutput(0), nvinfer1::MatrixOperation::kVECTOR,
+                                          *params.recurrentWeights, nvinfer1::MatrixOperation::kTRANSPOSE)
+                                      ->getOutput(0);
+
+    nvinfer1::ITensor* mm
+        = network->addElementWise(*mmInput, *mmHidden, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
+
+    nvinfer1::ITensor* bias
+        = network->addElementWise(*params.inputBias, *params.recurrentBias, nvinfer1::ElementWiseOperation::kSUM)
+              ->getOutput(0);
+
+    nvinfer1::ITensor* gatesICFO
+        = network->addElementWise(*mm, *bias, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
+
+    const auto isolateGate = [&](nvinfer1::ITensor& gates, int gateIndex) -> nvinfer1::ITensor* {
+        nvinfer1::ISliceLayer* slice = network->addSlice(gates, nvinfer1::Dims{1, {gateIndex * mParams.hiddenSize}},
+            nvinfer1::Dims{1, {mParams.hiddenSize}}, nvinfer1::Dims{1, {1}});
+        return addReshape(network, *slice->getOutput(0), nvinfer1::Dims{1, {mParams.hiddenSize}});
+    };
+
+    nvinfer1::ITensor* i
+        = network->addActivation(*isolateGate(*gatesICFO, 0), nvinfer1::ActivationType::kSIGMOID)->getOutput(0);
+    nvinfer1::ITensor* c
+        = network->addActivation(*isolateGate(*gatesICFO, 1), nvinfer1::ActivationType::kTANH)->getOutput(0);
+    nvinfer1::ITensor* f
+        = network->addActivation(*isolateGate(*gatesICFO, 2), nvinfer1::ActivationType::kSIGMOID)->getOutput(0);
+    nvinfer1::ITensor* o
+        = network->addActivation(*isolateGate(*gatesICFO, 3), nvinfer1::ActivationType::kSIGMOID)->getOutput(0);
+
+    nvinfer1::ITensor* C
+        = network
+              ->addElementWise(*network->addElementWise(*f, *cell->getOutput(0), nvinfer1::ElementWiseOperation::kPROD)
+                                    ->getOutput(0),
+                  *network->addElementWise(*i, *c, nvinfer1::ElementWiseOperation::kPROD)->getOutput(0),
+                  nvinfer1::ElementWiseOperation::kSUM)
+              ->getOutput(0);
+    nvinfer1::ITensor* H
+        = network
+              ->addElementWise(*o, *network->addActivation(*C, nvinfer1::ActivationType::kTANH)->getOutput(0),
+                  nvinfer1::ElementWiseOperation::kPROD)
+              ->getOutput(0);
+
+    // Recurrent backedge input for hidden and cell.
+    cell->setInput(1, *C);
+    hidden->setInput(1, *H);
+
+    nvinfer1::ILoopOutputLayer* outputLayer = sequenceLoop->addLoopOutput(*H, nvinfer1::LoopOutput::kCONCATENATE);
+    outputLayer->setInput(1, *params.maxSequenceSize);
+    nvinfer1::ITensor* hiddenOut
+        = sequenceLoop->addLoopOutput(*hidden->getOutput(0), nvinfer1::LoopOutput::kLAST_VALUE)->getOutput(0);
+    nvinfer1::ITensor* cellOut
+        = sequenceLoop->addLoopOutput(*cell->getOutput(0), nvinfer1::LoopOutput::kLAST_VALUE)->getOutput(0);
+
+    outputTensors = LstmIO{outputLayer->getOutput(0), hiddenOut, cellOut};
+    return outputLayer;
+}
+
+nvinfer1::ITensor* SampleCharRNNBase::addReshape(
+    SampleUniquePtr<nvinfer1::INetworkDefinition>& network, nvinfer1::ITensor& tensor, nvinfer1::Dims dims)
+{
+    nvinfer1::IShuffleLayer* shuffle = network->addShuffle(tensor);
+    shuffle->setReshapeDimensions(dims);
+    return shuffle->getOutput(0);
+}
+
+nvinfer1::ILayer* SampleCharRNNLoop::addLSTMLayers(SampleUniquePtr<nvinfer1::INetworkDefinition>& network)
+{
+    nvinfer1::ILayer* dataOut{nullptr};
+
+    nvinfer1::ITensor* data = network->addInput(mParams.bindingNames.INPUT_BLOB_NAME, nvinfer1::DataType::kFLOAT,
+        nvinfer1::Dims2(mParams.seqSize, mParams.dataSize));
+    ASSERT(data != nullptr);
+
+    nvinfer1::ITensor* hiddenLayers = network->addInput(mParams.bindingNames.HIDDEN_IN_BLOB_NAME,
+        nvinfer1::DataType::kFLOAT, nvinfer1::Dims2(mParams.layerCount, mParams.hiddenSize));
+    ASSERT(hiddenLayers != nullptr);
+
+    nvinfer1::ITensor* cellLayers = network->addInput(mParams.bindingNames.CELL_IN_BLOB_NAME,
+        nvinfer1::DataType::kFLOAT, nvinfer1::Dims2(mParams.layerCount, mParams.hiddenSize));
+    ASSERT(cellLayers != nullptr);
+
+    nvinfer1::ITensor* sequenceSize
+        = network->addInput(mParams.bindingNames.SEQ_LEN_IN_BLOB_NAME, nvinfer1::DataType::kINT32, nvinfer1::Dims{});
+    ASSERT(sequenceSize != nullptr);
+
+    // convert tensorflow weight format to trt weight format
+    std::array<nvinfer1::Weights, 2> rnnw{
+        SampleCharRNNBase::convertRNNWeights(mWeightMap[mParams.weightNames.RNNW_L0_NAME], mParams.dataSize),
+        SampleCharRNNBase::convertRNNWeights(mWeightMap[mParams.weightNames.RNNW_L1_NAME], mParams.hiddenSize)};
+    std::array<nvinfer1::Weights, 2> rnnb{
+        SampleCharRNNBase::convertRNNBias(mWeightMap[mParams.weightNames.RNNB_L0_NAME]),
+        SampleCharRNNBase::convertRNNBias(mWeightMap[mParams.weightNames.RNNB_L1_NAME])};
+
+    // Store the transformed weights in the weight map so the memory can be properly released later.
+    mWeightMap["rnnwL0"] = rnnw[0];
+    mWeightMap["rnnwL1"] = rnnw[1];
+    mWeightMap["rnnbL0"] = rnnb[0];
+    mWeightMap["rnnbL1"] = rnnb[1];
+
+    nvinfer1::ITensor* maxSequenceSize
+        = network->addConstant(nvinfer1::Dims{}, Weights{DataType::kINT32, &mParams.seqSize, 1})->getOutput(0);
+    ASSERT(static_cast<size_t>(mParams.layerCount) <= INDICES.size());
+    LstmIO lstmNext{data, nullptr, nullptr};
+    std::vector<nvinfer1::ITensor*> hiddenOutputs;
+    std::vector<nvinfer1::ITensor*> cellOutputs;
+    nvinfer1::Dims2 dimWL0(4 * mParams.hiddenSize, mParams.dataSize);
+    nvinfer1::Dims2 dimR(4 * mParams.hiddenSize, mParams.hiddenSize);
+    nvinfer1::Dims dimB{1, {4 * mParams.hiddenSize}};
+    nvinfer1::Dims dim0{1, {0}};
+    auto extractWeights = [](nvinfer1::Weights weights, Dims start, Dims size) -> nvinfer1::Weights {
+        const char* data = static_cast<const char*>(weights.values);
+        int64_t shift = samplesCommon::volume(start);
+        const int bufferSize = samplesCommon::getNbBytes(weights.type, shift);
+        int64_t count = samplesCommon::volume(size);
+        ASSERT(shift + count <= weights.count);
+        return nvinfer1::Weights{weights.type, data + bufferSize, count};
+    };
+    for (int i = 0; i < mParams.layerCount; ++i)
+    {
+        nvinfer1::Dims dimW = i == 0 ? dimWL0 : dimR;
+        nvinfer1::ITensor* index
+            = network->addConstant(nvinfer1::Dims{}, Weights{DataType::kINT32, &INDICES[i], 1})->getOutput(0);
+        nvinfer1::ITensor* hidden = network->addGather(*hiddenLayers, *index, 0)->getOutput(0);
+        nvinfer1::ITensor* cell = network->addGather(*cellLayers, *index, 0)->getOutput(0);
+        nvinfer1::ITensor* weightIn = network->addConstant(dimW, extractWeights(rnnw[i], dim0, dimW))->getOutput(0);
+        nvinfer1::ITensor* weightRec = network->addConstant(dimR, extractWeights(rnnw[i], dimW, dimR))->getOutput(0);
+        nvinfer1::ITensor* biasIn = network->addConstant(dimB, extractWeights(rnnb[i], dim0, dimB))->getOutput(0);
+        nvinfer1::ITensor* biasRec = network->addConstant(dimB, extractWeights(rnnb[i], dimB, dimB))->getOutput(0);
+        LstmIO lstmInput{lstmNext.data, hidden, cell};
+        LstmParams params{weightIn, weightRec, biasIn, biasRec, maxSequenceSize};
+
+        Dims2 dims{1, mParams.hiddenSize};
+        dataOut = addLSTMCell(network, lstmInput, sequenceSize, params, lstmNext);
+        hiddenOutputs.push_back(addReshape(network, *lstmNext.hidden, dims));
+        cellOutputs.push_back(addReshape(network, *lstmNext.cell, dims));
+    }
+
+    auto addConcatenation = [&network](std::vector<nvinfer1::ITensor*> tensors) -> nvinfer1::ITensor* {
+        nvinfer1::IConcatenationLayer* concat = network->addConcatenation(tensors.data(), tensors.size());
+        concat->setAxis(0);
+        return concat->getOutput(0);
+    };
+
+    nvinfer1::ITensor* hiddenNext = addConcatenation(hiddenOutputs);
+    hiddenNext->setName(mParams.bindingNames.HIDDEN_OUT_BLOB_NAME);
+    network->markOutput(*hiddenNext);
+
+    nvinfer1::ITensor* cellNext = addConcatenation(cellOutputs);
+    cellNext->setName(mParams.bindingNames.CELL_OUT_BLOB_NAME);
+    network->markOutput(*cellNext);
+
+    return dataOut;
+}
+
+//!
+//! \brief Create full model using the TensorRT network definition API and build the engine.
+//!
+//! \param weightMap Map that contains all the weights required by the model.
+//! \param modelStream The stream within which the engine is serialized once built.
+//!
+void SampleCharRNNBase::constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
+    SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config)
+{
+    // add RNNv2 layer and set its parameters
+    auto rnn = addLSTMLayers(network);
+
+    // Transpose FC weights since TensorFlow's weights are transposed when compared to TensorRT
+    ASSERT(transposeSubBuffers(
+        (void*) mWeightMap[mParams.weightNames.FCW_NAME].values, mParams.hiddenSize, mParams.vocabSize));
+
+    // add Constant layers for fully connected weights
+    auto fcwts = network->addConstant(
+        nvinfer1::Dims2(mParams.vocabSize, mParams.hiddenSize), mWeightMap[mParams.weightNames.FCW_NAME]);
+
+    // Add matrix multiplication layer for multiplying rnn output with FC weights
+    auto matrixMultLayer = network->addMatrixMultiply(
+        *fcwts->getOutput(0), MatrixOperation::kNONE, *rnn->getOutput(0), MatrixOperation::kTRANSPOSE);
+    ASSERT(matrixMultLayer != nullptr);
+    matrixMultLayer->getOutput(0)->setName("Matrix Multiplicaton output");
+
+    // Add elementwise layer for adding bias
+    auto fcbias = network->addConstant(nvinfer1::Dims2(mParams.vocabSize, 1), mWeightMap[mParams.weightNames.FCB_NAME]);
+    auto addBiasLayer = network->addElementWise(
+        *matrixMultLayer->getOutput(0), *fcbias->getOutput(0), nvinfer1::ElementWiseOperation::kSUM);
+    ASSERT(addBiasLayer != nullptr);
+    addBiasLayer->getOutput(0)->setName("Add Bias output");
+
+    // Add TopK layer to determine which character has highest probability.
+    int reduceAxis = 0x1; // reduce across vocab axis
+    auto pred = network->addTopK(*addBiasLayer->getOutput(0), nvinfer1::TopKOperation::kMAX, 1, reduceAxis);
+    ASSERT(pred != nullptr);
+    pred->getOutput(1)->setName(mParams.bindingNames.OUTPUT_BLOB_NAME);
+
+    // Mark the outputs for the network
+    network->markOutput(*pred->getOutput(1));
+    pred->getOutput(1)->setType(nvinfer1::DataType::kINT32);
+
+    SampleUniquePtr<nvinfer1::ITimingCache> timingCache{};
+    if (!mParams.timingCacheFile.empty())
+    {
+        timingCache
+            = samplesCommon::buildTimingCacheFromFile(sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile);
+    }
+
+    sample::gLogInfo << "Done constructing network..." << std::endl;
+
+    SampleUniquePtr<IHostMemory> plan{builder->buildSerializedNetwork(*network, *config)};
+    if (!plan)
+    {
+        return;
+    }
+
+    if (timingCache != nullptr && !mParams.timingCacheFile.empty())
+    {
+        samplesCommon::updateTimingCacheFile(
+            sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder);
+    }
+
+    mRuntime = std::shared_ptr<nvinfer1::IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger()));
+    if (!mRuntime)
+    {
+        return;
+    }
+
+    mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
+        mRuntime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter());
+}
+
+//!
+//! \brief Runs the TensorRT inference engine for this sample
+//!
+//! \details This function is the main execution function of the sample. It
+//!          allocates the buffer, sets inputs, executes the engine, and verifies the output.
+//!
+bool SampleCharRNNBase::infer()
+{
+    // Create RAII buffer manager object
+    samplesCommon::BufferManager buffers(mEngine, 0);
+
+    auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
+
+    if (!context)
+    {
+        return false;
+    }
+
+    // Select a random seed string.
+    srand(unsigned(time(nullptr)));
+    int sentenceIndex = rand() % mParams.inputSentences.size();
+    std::string inputSentence = mParams.inputSentences[sentenceIndex];
+    std::string expected = mParams.outputSentences[sentenceIndex];
+    std::string genstr;
+
+    sample::gLogInfo << "RNN warmup sentence: " << inputSentence << std::endl;
+    sample::gLogInfo << "Expected output: " << expected << std::endl;
+
+    // create stream for trt execution
+    cudaStream_t stream;
+    CHECK(cudaStreamCreate(&stream));
+
+    // Set sequence lengths to maximum
+    int* sequenceLengthIn
+        = reinterpret_cast<int32_t*>(buffers.getHostBuffer(mParams.bindingNames.SEQ_LEN_IN_BLOB_NAME));
+    auto sequenceLengthTensorSize = buffers.size(mParams.bindingNames.SEQ_LEN_IN_BLOB_NAME);
+    std::fill_n(sequenceLengthIn, sequenceLengthTensorSize / sizeof(mParams.seqSize), mParams.seqSize);
+
+    // Initialize hiddenIn and cellIn tensors to zero before seeding
+    void* hiddenIn = buffers.getHostBuffer(mParams.bindingNames.HIDDEN_IN_BLOB_NAME);
+    auto hiddenTensorSize = buffers.size(mParams.bindingNames.HIDDEN_IN_BLOB_NAME);
+
+    void* cellIn = buffers.getHostBuffer(mParams.bindingNames.CELL_IN_BLOB_NAME);
+    auto cellTensorSize = buffers.size(mParams.bindingNames.CELL_IN_BLOB_NAME);
+
+    std::memset(hiddenIn, 0, hiddenTensorSize);
+    std::memset(cellIn, 0, cellTensorSize);
+
+    // Seed the RNN with the input sentence.
+    for (auto& a : inputSentence)
+    {
+        SampleCharRNNBase::copyEmbeddingToInput(buffers, a);
+
+        if (!SampleCharRNNBase::stepOnce(buffers, context, stream))
+        {
+            return false;
+        }
+
+        SampleCharRNNBase::copyRNNOutputsToInputs(buffers);
+        genstr.push_back(a);
+    }
+
+    // Extract first predicted character
+    uint32_t predIdx = *reinterpret_cast<uint32_t*>(buffers.getHostBuffer(mParams.bindingNames.OUTPUT_BLOB_NAME));
+    genstr.push_back(mParams.charMaps.idToChar.at(predIdx));
+
+    // Generate predicted sequence of characters
+    for (size_t x = 0, y = expected.size() - 1; x < y; x++)
+    {
+        SampleCharRNNBase::copyEmbeddingToInput(buffers, *genstr.rbegin());
+
+        if (!SampleCharRNNBase::stepOnce(buffers, context, stream))
+        {
+            return false;
+        }
+
+        SampleCharRNNBase::copyRNNOutputsToInputs(buffers);
+        predIdx = *reinterpret_cast<uint32_t*>(buffers.getHostBuffer(mParams.bindingNames.OUTPUT_BLOB_NAME));
+        genstr.push_back(mParams.charMaps.idToChar.at(predIdx));
+    }
+
+    sample::gLogInfo << "Received: " << genstr.substr(inputSentence.size()) << std::endl;
+
+    // release the stream
+    CHECK(cudaStreamDestroy(stream));
+
+    return genstr == (inputSentence + expected);
+}
+
+//!
+//! \brief Looks up the embedding tensor for a given char and copies it to input buffer
+//!
+void SampleCharRNNBase::copyEmbeddingToInput(samplesCommon::BufferManager& buffers, char const& c)
+{
+    auto embed = mWeightMap[mParams.weightNames.EMBED_NAME];
+    float* inputBuffer = static_cast<float*>(buffers.getHostBuffer(mParams.bindingNames.INPUT_BLOB_NAME));
+    auto index = mParams.charMaps.charToID.at(c);
+    auto bufSize = buffers.size(mParams.bindingNames.INPUT_BLOB_NAME);
+
+    std::memcpy(inputBuffer, static_cast<const float*>(embed.values) + index * mParams.dataSize, bufSize);
+}
+
+//!
+//! \brief Perform one time step of inference with the TensorRT execution context
+//!
+bool SampleCharRNNBase::stepOnce(
+    samplesCommon::BufferManager& buffers, SampleUniquePtr<nvinfer1::IExecutionContext>& context, cudaStream_t& stream)
+{
+    // Asynchronously copy data from host input buffers to device input buffers
+    buffers.copyInputToDeviceAsync(stream);
+
+    for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
+    {
+        auto const name = mEngine->getIOTensorName(i);
+        context->setTensorAddress(name, buffers.getDeviceBuffer(name));
+    }
+
+    // Asynchronously enqueue the inference work
+    ASSERT(context->enqueueV3(stream));
+    // Asynchronously copy data from device output buffers to host output buffers
+    buffers.copyOutputToHostAsync(stream);
+
+    CHECK(cudaStreamSynchronize(stream));
+    return true;
+}
+
+//!
+//! \brief Copies Ct/Ht output from the RNN to the Ct-1/Ht-1 input buffers for next time step
+//!
+void SampleCharRNNBase::copyRNNOutputsToInputs(samplesCommon::BufferManager& buffers)
+{
+    // Copy Ct/Ht to the Ct-1/Ht-1 slots.
+    void* hiddenIn = buffers.getHostBuffer(mParams.bindingNames.HIDDEN_IN_BLOB_NAME);
+    void* hiddenOut = buffers.getHostBuffer(mParams.bindingNames.HIDDEN_OUT_BLOB_NAME);
+    auto hiddenTensorSize = buffers.size(mParams.bindingNames.HIDDEN_IN_BLOB_NAME);
+
+    void* cellIn = buffers.getHostBuffer(mParams.bindingNames.CELL_IN_BLOB_NAME);
+    void* cellOut = buffers.getHostBuffer(mParams.bindingNames.CELL_OUT_BLOB_NAME);
+    auto cellTensorSize = buffers.size(mParams.bindingNames.CELL_IN_BLOB_NAME);
+
+    std::memcpy(hiddenIn, hiddenOut, hiddenTensorSize);
+    std::memcpy(cellIn, cellOut, cellTensorSize);
+}
+
+//!
+//! \brief Used to clean up any state created in the sample class
+//!
+bool SampleCharRNNBase::teardown()
+{
+    return true;
+}
+
+//!
+//! \brief Initializes members of the params struct using the
+//!        command line args
+//!
+SampleCharRNNParams initializeSampleParams(const samplesCommon::Args& args)
+{
+    SampleCharRNNParams params;
+
+    if (args.dataDirs.empty())
+    {
+        params.dataDirs.push_back("data/char-rnn/");
+        params.dataDirs.push_back("data/samples/char-rnn/");
+    }
+    else
+    {
+        params.dataDirs = args.dataDirs;
+    }
+
+    params.batchSize = 1;
+    params.layerCount = 2;
+    params.hiddenSize = 512;
+    params.seqSize = 1;
+    params.dataSize = params.hiddenSize;
+    params.vocabSize = 65;
+    params.outputSize = 1;
+    params.weightFileName = samplesCommon::locateFile("char-rnn.wts", params.dataDirs);
+    params.saveEngine = args.saveEngine;
+    params.loadEngine = args.loadEngine;
+    params.timingCacheFile = args.timingCacheFile;
+
+    // Input strings and their respective expected output strings
+    const std::vector<std::string> inS{
+        "ROMEO",
+        "JUL",
+        "The K",
+        "That tho",
+        "KING",
+        "beauty of",
+        "birth of K",
+        "Hi",
+        "JACK",
+        "interestingly, it was J",
+    };
+    const std::vector<std::string> outS{
+        ":\nThe sense to",
+        "IET:\nWhat shall I shall be",
+        "ing Richard shall be the strange",
+        "u shalt be the",
+        " HENRY VI:\nWhat",
+        " the son,",
+        "ing Richard's son",
+        "ng of York,\nThat thou hast so the",
+        "INGHAM:\nWhat shall I",
+        "uliet",
+    };
+
+    params.inputSentences = inS;
+    params.outputSentences = outS;
+
+    return params;
+}
+
+//!
+//! \brief Prints the help information for running this sample
+//!
+void printHelpInfo()
+{
+    std::cout << "Usage: ./sample_char_rnn [-h or --help] [-d or --datadir=<path to data directory>]\n";
+    std::cout << "--help             Display help information\n";
+    std::cout << "--datadir          Specify path to a data directory, overriding the default. This option can be used "
+                 "multiple times to add multiple directories. If no data directories are given, the default is to use "
+                 "data/samples/char-rnn/ and data/char-rnn/"
+              << std::endl;
+    std::cout << "--loadEngine       Specify path from which to load the engine. When this option is provided, engine "
+              << std::endl;
+    std::cout << "--saveEngine       Specify path at which to save the engine." << std::endl;
+    std::cout << "--timingCacheFile  Specify path to a timing cache file. If it does not already exist, it will be "
+              << "created." << std::endl;
+}
+
+//!
+//! \brief Runs the char-rnn model in TensorRT with a set of expected input and output strings.
+//!
+int main(int argc, char** argv)
+{
+    sample::setReportableSeverity(sample::Logger::Severity::kVERBOSE);
+    samplesCommon::Args args;
+    bool argsOK = samplesCommon::parseArgs(args, argc, argv);
+    if (!argsOK)
+    {
+        sample::gLogError << "Invalid arguments" << std::endl;
+        printHelpInfo();
+        return EXIT_FAILURE;
+    }
+    if (args.help)
+    {
+        printHelpInfo();
+        return EXIT_SUCCESS;
+    }
+
+    auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv);
+
+    sample::gLogger.reportTestStart(sampleTest);
+
+    SampleCharRNNParams params = initializeSampleParams(args);
+    std::unique_ptr<SampleCharRNNBase> sample;
+
+    sample.reset(new SampleCharRNNLoop(params));
+
+    sample::gLogInfo << "Building and running a GPU inference engine for Char RNN model..." << std::endl;
+
+    if (!sample->build())
+    {
+        return sample::gLogger.reportFail(sampleTest);
+    }
+    if (!sample->infer())
+    {
+        return sample::gLogger.reportFail(sampleTest);
+    }
+    if (!sample->teardown())
+    {
+        return sample::gLogger.reportFail(sampleTest);
+    }
+
+    return sample::gLogger.reportPass(sampleTest);
+}
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleDynamicReshape/Makefile b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleDynamicReshape/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..ab1c8689d73fe14f00cd21bf6caad0203987b594
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleDynamicReshape/Makefile
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+OUTNAME_RELEASE = sample_dynamic_reshape
+OUTNAME_DEBUG   = sample_dynamic_reshape_debug
+EXTRA_DIRECTORIES = ../common ../utils
+SAMPLE_DIR_NAME = $(shell basename $(dir $(abspath $(firstword $(MAKEFILE_LIST)))))
+MAKEFILE ?= ../Makefile.config
+include $(MAKEFILE)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleDynamicReshape/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleDynamicReshape/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4216dbc674bdee54257139de77e2228569ea1151
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleDynamicReshape/README.md
@@ -0,0 +1,311 @@
+# Digit Recognition With Dynamic Shapes In TensorRT
+
+
+**Table Of Contents**
+- [Description](#description)
+- [How does this sample work?](#how-does-this-sample-work)
+    * [Creating the preprocessing network](#creating-the-preprocessing-network)
+    * [Parsing the ONNX MNIST model](#parsing-the-onnx-mnist-model)
+    * [Building engines](#building-engines)
+    * [Running inference](#running-inference)
+	* [TensorRT API layers and ops](#tensorrt-api-layers-and-ops)
+- [Preparing sample data](#preparing-sample-data)
+- [Running the sample](#running-the-sample)
+	* [Sample `--help` options](#sample-help-options)
+- [Additional resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+## Description
+
+This sample, sampleDynamicReshape, demonstrates how to use dynamic input dimensions in TensorRT. It creates an engine that takes a dynamically shaped input and resizes it to be consumed by an ONNX MNIST model that expects a fixed size input. For more information, see [Working With Dynamic Shapes](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#work_dynamic_shapes) in the TensorRT Developer Guide.
+
+## How does this sample work?
+
+This sample creates an engine for resizing an input with dynamic dimensions to a size that an ONNX MNIST model can consume.
+
+Specifically, this sample:
+-   Creates a network with dynamic input dimensions to act as a preprocessor for the model
+-   Parses an ONNX MNIST model to create a second network
+-   Builds engines for both networks and does calibration if running in int8
+-   Runs inference using both engines
+
+### Creating the preprocessing network
+
+First, create a network with full dims support:
+`auto preprocessorNetwork = makeUnique(builder->createNetworkV2(0));`
+
+Next, add an input layer that accepts an input with a dynamic shape, followed by a resize layer that will reshape the input to the shape the model expects:
+```
+auto input = preprocessorNetwork->addInput("input", nvinfer1::DataType::kFLOAT, Dims4{-1, 1, -1, -1});
+auto resizeLayer = preprocessorNetwork->addResize(*input);
+resizeLayer->setOutputDimensions(mPredictionInputDims);
+preprocessorNetwork->markOutput(*resizeLayer->getOutput(0));
+```
+
+The -1 dimensions denote dimensions that will be supplied at runtime.
+
+### Parsing the ONNX MNIST model
+
+First, create an empty full-dims network, and parser:
+```
+auto network = makeUnique(builder->createNetworkV2(0));
+auto parser = nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger());
+```
+
+Next, parse the model file to populate the network:
+```
+parser->parseFromFile(locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), static_cast<int>(sample::gLogger.getReportableSeverity()));
+```
+
+### Building engines
+
+When building the preprocessor engine, also provide an optimization profile so that TensorRT knows which input shapes to optimize for:
+```
+auto preprocessorConfig = makeUnique(builder->createBuilderConfig());
+auto profile = builder->createOptimizationProfile();
+```
+
+`OptProfileSelector::kOPT` specifies the dimensions that the profile will be optimized for, whereas `OptProfileSelector::kMIN` and `OptProfileSelector::kMAX` specify the minimum and maximum dimensions for which the profile will be valid:
+```
+profile->setDimensions(input->getName(), OptProfileSelector::kMIN, Dims4{1, 1, 1, 1});
+profile->setDimensions(input->getName(), OptProfileSelector::kOPT, Dims4{1, 1, 28, 28});
+profile->setDimensions(input->getName(), OptProfileSelector::kMAX, Dims4{1, 1, 56, 56});
+preprocessorConfig->addOptimizationProfile(profile);
+```
+
+Create an optimization profile for calibration:
+```
+auto profileCalib = builder->createOptimizationProfile();
+const int calibBatchSize{256};
+profileCalib->setDimensions(input->getName(), OptProfileSelector::kMIN, Dims4{calibBatchSize, 1, 28, 28});
+profileCalib->setDimensions(input->getName(), OptProfileSelector::kOPT, Dims4{calibBatchSize, 1, 28, 28});
+profileCalib->setDimensions(input->getName(), OptProfileSelector::kMAX, Dims4{calibBatchSize, 1, 28, 28});
+preprocessorConfig->setCalibrationProfile(profileCalib);
+```
+
+Prepare and set int8 calibrator if running in int8 mode:
+```
+std::unique_ptr<IInt8Calibrator> calibrator;
+if (mParams.int8)
+{
+    preprocessorConfig->setFlag(BuilderFlag::kINT8);    
+    const int nCalibBatches{10}; 
+    MNISTBatchStream calibrationStream(calibBatchSize, nCalibBatches, "train-images-idx3-ubyte",
+        "train-labels-idx1-ubyte", mParams.dataDirs);
+    calibrator.reset(new Int8EntropyCalibrator2<MNISTBatchStream>(
+        calibrationStream, 0, "MNISTPreprocessor", "input"));
+    preprocessorConfig->setInt8Calibrator(calibrator.get());
+}
+```
+
+Run engine build with config: 
+```
+SampleUniquePtr<nvinfer1::IHostMemory> preprocessorPlan = makeUnique(
+        builder->buildSerializedNetwork(*preprocessorNetwork, *preprocessorConfig));
+if (!preprocessorPlan)
+{
+    sample::gLogError << "Preprocessor serialized engine build failed." << std::endl;
+    return false;
+}
+
+mPreprocessorEngine = makeUnique(
+    runtime->deserializeCudaEngine(preprocessorPlan->data(), preprocessorPlan->size()));
+if (!mPreprocessorEngine)
+{
+    sample::gLogError << "Preprocessor engine deserialization failed." << std::endl;
+    return false;
+}
+```
+
+For the MNIST model, attach a Softmax layer to the end of the network, set softmax axis to 1 since network output has shape [1, 10] in full dims mode and replace the existing network output with the Softmax:
+```
+auto softmax = network->addSoftMax(*network->getOutput(0));
+softmax->setAxes(1 << 1);
+network->unmarkOutput(*network->getOutput(0));
+network->markOutput(*softmax->getOutput(0));
+```
+
+A calibrator and a calibration profile are set the same way as above for the preprocessor engine config. `calibBatchSize` is set to 1 for the prediction engine as ONNX model has an explicit batch.
+
+Finally, build as normal:
+```
+SampleUniquePtr<nvinfer1::IHostMemory> predictionPlan = makeUnique(builder->buildSerializedNetwork(*network, *config));
+if (!predictionPlan)
+{
+    sample::gLogError << "Prediction serialized engine build failed." << std::endl;
+    return false;
+}
+
+mPredictionEngine = makeUnique(
+    runtime->deserializeCudaEngine(predictionPlan->data(), predictionPlan->size()));
+if (!mPredictionEngine)
+{
+    sample::gLogError << "Prediction engine deserialization failed." << std::endl;
+    return false;
+}
+```
+
+### Running inference
+
+During inference, first copy the input buffer to the device:
+```
+CHECK(cudaMemcpy(mInput.deviceBuffer.data(), mInput.hostBuffer.data(), mInput.hostBuffer.nbBytes(), cudaMemcpyHostToDevice));
+```
+
+Since the preprocessor engine accepts dynamic shapes, specify the actual shape of the current input to the execution context:
+`mPreprocessorContext->setInputShape(inputTensorName, inputDims);`, where inputTensorName is the name of the input tensor on binding index 0.
+
+Next, run the preprocessor using the `executeV2` function. The example writes the output of the preprocessor engine directly to the input device buffer of the MNIST engine:
+```
+std::vector<void*> preprocessorBindings = {mInput.deviceBuffer.data(), mPredictionInput.data()};
+bool status = mPreprocessorContext->executeV2(preprocessorBindings.data());
+```
+
+Then, run the MNIST engine:
+```
+std::vector<void*> predicitonBindings = {mPredictionInput.data(), mOutput.deviceBuffer.data()};
+status = mPredictionContext->executeV2(predicitonBindings.data());
+```
+
+Finally, copy the output back to the host:
+```
+CHECK(cudaMemcpy(mOutput.hostBuffer.data(), mOutput.deviceBuffer.data(), mOutput.deviceBuffer.nbBytes(), cudaMemcpyDeviceToHost));
+```
+
+### TensorRT API layers and ops
+
+In this sample, the following layers are used. For more information about these layers, see the [TensorRT Developer Guide: Layers](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#layers) documentation.
+
+[Resize layer](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#resize-layer)
+The IResizeLayer implements the resize operation on an input tensor.
+
+## Preparing sample data
+
+1. Download the sample data from [TensorRT release tarball](https://developer.nvidia.com/nvidia-tensorrt-download#), if not already mounted under `/usr/src/tensorrt/data` (NVIDIA NGC containers) and set it to `$TRT_DATADIR`.
+    ```bash
+    export TRT_DATADIR=/usr/src/tensorrt/data
+    pushd $TRT_DATADIR/mnist
+    pip3 install Pillow
+    popd
+    ```
+
+## Running the sample
+
+1. Compile the sample by following build instructions in [TensorRT README](https://github.com/NVIDIA/TensorRT/).
+
+2.  Run the sample.
+    ```bash
+    ./sample_dynamic_reshape [-h or --help] [-d or --datadir=<path to data directory>] [--useDLACore=<int>] [--int8 or --fp16]
+    ```
+
+    For example:
+    ```bash
+    ./sample_dynamic_reshape --datadir $TRT_DATADIR/mnist --fp16
+    ```
+
+3. Verify that the sample ran successfully. If the sample runs successfully you should see output similar to the following:
+    ```
+  	&&&& RUNNING TensorRT.sample_dynamic_reshape # ./sample_dynamic_reshape
+    ----------------------------------------------------------------
+    Input filename:   ../../../../../data/samples/mnist/mnist.onnx
+    ONNX IR version:  0.0.3
+    Opset version:    8
+    Producer name:    CNTK
+    Producer version: 2.5.1
+    Domain:           ai.cntk
+    Model version:    1
+    Doc string:  
+    ----------------------------------------------------------------
+    [W] [TRT] onnx2trt_utils.cpp:214: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.
+    [W] [TRT] onnx2trt_utils.cpp:214: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.
+    [I] [TRT] Detected 1 inputs and 1 output network tensors.
+    [I] [TRT] Detected 1 inputs and 1 output network tensors.
+    [I] Profile dimensions in preprocessor engine:
+    [I]     Minimum = (1, 1, 1, 1)
+    [I]     Optimum = (1, 1, 28, 28)
+    [I]     Maximum = (1, 1, 56, 56)
+    [I] Input:
+    @@@@@@@@@@@@@@@@@@@@@@@@@@@@
+    @@@@@@@@@@@@@@@@@@@@@@@@@@@@
+    @@@@@@@@@@@@@@@@@@@@@@@@@@@@
+    @@@@@@@@@@@@@@@@@@@@@@@@@@@@
+    @@@@@@@@@@@*.  .*@@@@@@@@@@@
+    @@@@@@@@@@*.     +@@@@@@@@@@
+    @@@@@@@@@@. :#+   %@@@@@@@@@
+    @@@@@@@@@@.:@@@+  +@@@@@@@@@
+    @@@@@@@@@@.:@@@@: +@@@@@@@@@
+    @@@@@@@@@@=%@@@@: +@@@@@@@@@
+    @@@@@@@@@@@@@@@@# +@@@@@@@@@
+    @@@@@@@@@@@@@@@@* +@@@@@@@@@
+    @@@@@@@@@@@@@@@@: +@@@@@@@@@
+    @@@@@@@@@@@@@@@@: +@@@@@@@@@
+    @@@@@@@@@@@@@@@* .@@@@@@@@@@
+    @@@@@@@@@@%**%@. *@@@@@@@@@@
+    @@@@@@@@%+.  .: .@@@@@@@@@@@
+    @@@@@@@@=  ..   :@@@@@@@@@@@
+    @@@@@@@@: *@@:  :@@@@@@@@@@@
+    @@@@@@@%  %@*    *@@@@@@@@@@
+    @@@@@@@%  ++  ++ .%@@@@@@@@@
+    @@@@@@@@-    +@@- +@@@@@@@@@
+    @@@@@@@@=  :*@@@# .%@@@@@@@@
+    @@@@@@@@@+*@@@@@%.  %@@@@@@@
+    @@@@@@@@@@@@@@@@@@@@@@@@@@@@
+    @@@@@@@@@@@@@@@@@@@@@@@@@@@@
+    @@@@@@@@@@@@@@@@@@@@@@@@@@@@
+    @@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+    [I] Output:
+    [I]  Prob 0  0.0000 Class 0: 
+    [I]  Prob 1  0.0000 Class 1: 
+    [I]  Prob 2  1.0000 Class 2: **********
+    [I]  Prob 3  0.0000 Class 3: 
+    [I]  Prob 4  0.0000 Class 4: 
+    [I]  Prob 5  0.0000 Class 5: 
+    [I]  Prob 6  0.0000 Class 6: 
+    [I]  Prob 7  0.0000 Class 7: 
+    [I]  Prob 8  0.0000 Class 8: 
+    [I]  Prob 9  0.0000 Class 9: 
+    &&&& PASSED TensorRT.sample_dynamic_reshape # ./sample_dynamic_reshape
+    ```
+
+    This output shows that the sample ran successfully; `PASSED`.
+
+
+### Sample `--help` options
+
+To see the full list of available options and their descriptions, use the `-h` or `--help` command line option.
+
+
+# Additional resources
+
+The following resources provide a deeper understanding of dynamic shapes.
+
+**ONNX**
+- [GitHub: ONNX](https://github.com/onnx/onnx)
+- [GitHub: ONNX-TensorRT open source parser](https://github.com/onnx/onnx-tensorrt)
+
+**Models**
+- [MNIST - Handwritten Digit Recognition](https://github.com/onnx/models/tree/master/mnist)
+- [GitHub: ONNX Models](https://github.com/onnx/models)
+
+**Documentation**
+- [Introduction To NVIDIAâ€™s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples)
+- [Working With TensorRT Using The Python API](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#python_topics)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+
+# Changelog
+
+February 2020
+This is the second release of the `README.md` file and sample.
+
+
+# Known issues
+
+There are no known issues in this sample.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleDynamicReshape/sampleDynamicReshape.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleDynamicReshape/sampleDynamicReshape.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b66ca5b85506f46a16eca618d6414b96548a551f
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleDynamicReshape/sampleDynamicReshape.cpp
@@ -0,0 +1,598 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//!
+//! sampleDynamicReshape.cpp
+//! This file contains the implementation of the dynamic reshape MNIST sample. It creates a network
+//! using the MNIST ONNX model, and uses a second engine to resize inputs to the shape the model
+//! expects.
+//! It can be run with the following command:
+//! Command: ./sample_dynamic_reshape [-h or --help [-d=/path/to/data/dir or --datadir=/path/to/data/dir]
+//!
+
+// Define TRT entrypoints used in common code
+#define DEFINE_TRT_ENTRYPOINTS 1
+#define DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT 0
+
+#include "BatchStream.h"
+#include "EntropyCalibrator.h"
+#include "argsParser.h"
+#include "buffers.h"
+#include "common.h"
+#include "logger.h"
+#include "parserOnnxConfig.h"
+
+#include "NvInfer.h"
+#include <cuda_runtime_api.h>
+#include <random>
+using namespace nvinfer1;
+using samplesCommon::SampleUniquePtr;
+
+const std::string gSampleName = "TensorRT.sample_dynamic_reshape";
+
+//! \brief The SampleDynamicReshape class implementes the dynamic reshape sample.
+//!
+//! \details This class builds one engine that resizes a given input to the correct size, and a
+//! second engine based on an ONNX MNIST model that generates a prediction.
+//!
+class SampleDynamicReshape
+{
+public:
+    SampleDynamicReshape(const samplesCommon::OnnxSampleParams& params)
+        : mParams(params)
+    {
+    }
+
+    //!
+    //! \brief Builds both engines.
+    //!
+    bool build();
+
+    //!
+    //! \brief Prepares the model for inference by creating execution contexts and allocating buffers.
+    //!
+    bool prepare();
+
+    //!
+    //! \brief Runs inference using TensorRT on a random image.
+    //!
+    bool infer();
+
+private:
+    bool buildPreprocessorEngine(const SampleUniquePtr<nvinfer1::IBuilder>& builder,
+        const SampleUniquePtr<nvinfer1::IRuntime>& runtime, cudaStream_t profileStream);
+    bool buildPredictionEngine(const SampleUniquePtr<nvinfer1::IBuilder>& builder,
+        const SampleUniquePtr<nvinfer1::IRuntime>& runtime, cudaStream_t profileStream);
+
+    Dims loadPGMFile(const std::string& fileName);
+    bool validateOutput(int digit);
+
+    samplesCommon::OnnxSampleParams mParams; //!< The parameters for the sample.
+
+    nvinfer1::Dims mPredictionInputDims;  //!< The dimensions of the input of the MNIST model.
+    nvinfer1::Dims mPredictionOutputDims; //!< The dimensions of the output of the MNIST model.
+
+    SampleUniquePtr<nvinfer1::IRuntime> mRuntime{nullptr};
+
+    // Engine plan files used for inference. One for resizing inputs, another for prediction.
+    SampleUniquePtr<nvinfer1::ICudaEngine> mPreprocessorEngine{nullptr}, mPredictionEngine{nullptr};
+
+    SampleUniquePtr<nvinfer1::IExecutionContext> mPreprocessorContext{nullptr}, mPredictionContext{nullptr};
+
+    samplesCommon::ManagedBuffer mInput{};          //!< Host and device buffers for the input.
+    samplesCommon::DeviceBuffer mPredictionInput{}; //!< Device buffer for the output of the preprocessor, i.e. the
+                                                    //!< input to the prediction model.
+    samplesCommon::ManagedBuffer mOutput{};         //!< Host buffer for the ouptut
+
+    template <typename T>
+    SampleUniquePtr<T> makeUnique(T* t)
+    {
+        return SampleUniquePtr<T>{t};
+    }
+};
+
+//!
+//! \brief Builds the two engines required for inference.
+//!
+//! \details This function creates one TensorRT engine for resizing inputs to the correct sizes,
+//!          then creates a TensorRT network by parsing the ONNX model and builds
+//!          an engine that will be used to run inference (mPredictionEngine).
+//!
+//! \return false if error in build preprocessor or predict engine.
+//!
+bool SampleDynamicReshape::build()
+{
+    auto builder = makeUnique(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
+    if (!builder)
+    {
+        sample::gLogError << "Create inference builder failed." << std::endl;
+        return false;
+    }
+
+    mRuntime = makeUnique(nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger()));
+    if (!mRuntime)
+    {
+        sample::gLogError << "Runtime object creation failed." << std::endl;
+        return false;
+    }
+
+    // This function will also set mPredictionInputDims and mPredictionOutputDims,
+    // so it needs to be called before building the preprocessor.
+    try
+    {
+        // CUDA stream used for profiling by the builder.
+        auto profileStream = samplesCommon::makeCudaStream();
+        if (!profileStream)
+        {
+            return false;
+        }
+
+        bool result = buildPredictionEngine(builder, mRuntime, *profileStream)
+            && buildPreprocessorEngine(builder, mRuntime, *profileStream);
+        return result;
+    }
+    catch (std::runtime_error& e)
+    {
+        sample::gLogError << e.what()  << std::endl;
+        return false;
+    }
+}
+
+//!
+//! \brief Builds an engine for preprocessing (mPreprocessorEngine).
+//!
+//! \return false if error in build preprocessor engine.
+//!
+bool SampleDynamicReshape::buildPreprocessorEngine(const SampleUniquePtr<nvinfer1::IBuilder>& builder,
+    const SampleUniquePtr<nvinfer1::IRuntime>& runtime, cudaStream_t profileStream)
+{
+    // Create the preprocessor engine using a network that supports full dimensions (createNetworkV2).
+    auto preprocessorNetwork = makeUnique(builder->createNetworkV2(0));
+    if (!preprocessorNetwork)
+    {
+        sample::gLogError << "Create network failed." << std::endl;
+        return false;
+    }
+
+    // Reshape a dynamically shaped input to the size expected by the model, (1, 1, 28, 28).
+    auto input = preprocessorNetwork->addInput("input", nvinfer1::DataType::kFLOAT, Dims4{-1, 1, -1, -1});
+    auto resizeLayer = preprocessorNetwork->addResize(*input);
+    resizeLayer->setOutputDimensions(mPredictionInputDims);
+    preprocessorNetwork->markOutput(*resizeLayer->getOutput(0));
+
+    // Finally, configure and build the preprocessor engine.
+    auto preprocessorConfig = makeUnique(builder->createBuilderConfig());
+    if (!preprocessorConfig)
+    {
+        sample::gLogError << "Create builder config failed." << std::endl;
+        return false;
+    }
+
+    // Create an optimization profile so that we can specify a range of input dimensions.
+    auto profile = builder->createOptimizationProfile();
+    // This profile will be valid for all images whose size falls in the range of [(1, 1, 1, 1), (1, 1, 56, 56)]
+    // but TensorRT will optimize for (1, 1, 28, 28)
+    // We do not need to check the return of setDimension and addOptimizationProfile here as all dims are explicitly set
+    profile->setDimensions(input->getName(), OptProfileSelector::kMIN, Dims4{1, 1, 1, 1});
+    profile->setDimensions(input->getName(), OptProfileSelector::kOPT, Dims4{1, 1, 28, 28});
+    profile->setDimensions(input->getName(), OptProfileSelector::kMAX, Dims4{1, 1, 56, 56});
+    preprocessorConfig->addOptimizationProfile(profile);
+
+    // Create a calibration profile.
+    auto profileCalib = builder->createOptimizationProfile();
+    const int calibBatchSize{256};
+    // We do not need to check the return of setDimension and setCalibrationProfile here as all dims are explicitly set
+    profileCalib->setDimensions(input->getName(), OptProfileSelector::kMIN, Dims4{calibBatchSize, 1, 28, 28});
+    profileCalib->setDimensions(input->getName(), OptProfileSelector::kOPT, Dims4{calibBatchSize, 1, 28, 28});
+    profileCalib->setDimensions(input->getName(), OptProfileSelector::kMAX, Dims4{calibBatchSize, 1, 28, 28});
+    preprocessorConfig->setCalibrationProfile(profileCalib);
+    preprocessorConfig->setProfileStream(profileStream);
+
+    std::unique_ptr<IInt8Calibrator> calibrator;
+    if (mParams.int8)
+    {
+        preprocessorConfig->setFlag(BuilderFlag::kINT8);
+        const int nCalibBatches{10};
+        MNISTBatchStream calibrationStream(
+            calibBatchSize, nCalibBatches, "train-images-idx3-ubyte", "train-labels-idx1-ubyte", mParams.dataDirs);
+        calibrator.reset(
+            new Int8EntropyCalibrator2<MNISTBatchStream>(calibrationStream, 0, "MNISTPreprocessor", "input"));
+        preprocessorConfig->setInt8Calibrator(calibrator.get());
+    }
+
+    SampleUniquePtr<nvinfer1::ITimingCache> timingCache{};
+
+    // Load timing cache
+    if (!mParams.timingCacheFile.empty())
+    {
+        timingCache = samplesCommon::buildTimingCacheFromFile(
+            sample::gLogger.getTRTLogger(), *preprocessorConfig, mParams.timingCacheFile);
+    }
+
+    SampleUniquePtr<nvinfer1::IHostMemory> preprocessorPlan
+        = makeUnique(builder->buildSerializedNetwork(*preprocessorNetwork, *preprocessorConfig));
+    if (!preprocessorPlan)
+    {
+        sample::gLogError << "Preprocessor serialized engine build failed." << std::endl;
+        return false;
+    }
+
+    if (timingCache != nullptr && !mParams.timingCacheFile.empty())
+    {
+        samplesCommon::updateTimingCacheFile(
+            sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder);
+    }
+
+    mPreprocessorEngine
+        = makeUnique(runtime->deserializeCudaEngine(preprocessorPlan->data(), preprocessorPlan->size()));
+    if (!mPreprocessorEngine)
+    {
+        sample::gLogError << "Preprocessor engine deserialization failed." << std::endl;
+        return false;
+    }
+
+     auto const tensorName = mPreprocessorEngine->getIOTensorName(0);
+
+    sample::gLogInfo << "Profile dimensions in preprocessor engine:" << std::endl;
+    sample::gLogInfo << "    Minimum = " << mPreprocessorEngine->getProfileShape(tensorName, 0, OptProfileSelector::kMIN)
+                     << std::endl;
+    sample::gLogInfo << "    Optimum = " << mPreprocessorEngine->getProfileShape(tensorName, 0, OptProfileSelector::kOPT)
+                     << std::endl;
+    sample::gLogInfo << "    Maximum = " << mPreprocessorEngine->getProfileShape(tensorName, 0, OptProfileSelector::kMAX)
+                     << std::endl;
+
+
+    return true;
+}
+
+//!
+//! \brief Builds an engine for prediction (mPredictionEngine).
+//!
+//! \details This function builds an engine for the MNIST model, and updates mPredictionInputDims and
+//! mPredictionOutputDims according to the dimensions specified by the model. The preprocessor reshapes inputs to
+//! mPredictionInputDims.
+//!
+//! \return false if error in build prediction engine.
+//!
+bool SampleDynamicReshape::buildPredictionEngine(const SampleUniquePtr<nvinfer1::IBuilder>& builder,
+    const SampleUniquePtr<nvinfer1::IRuntime>& runtime, cudaStream_t profileStream)
+{
+    // Create a network using the parser.
+    auto network = makeUnique(builder->createNetworkV2(0));
+    if (!network)
+    {
+        sample::gLogError << "Create network failed." << std::endl;
+        return false;
+    }
+
+    auto parser = samplesCommon::infer_object(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger()));
+    bool parsingSuccess
+        = parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(),
+            static_cast<int>(sample::gLogger.getReportableSeverity()));
+    if (!parsingSuccess)
+    {
+        sample::gLogError << "Failed to parse model." << std::endl;
+        return false;
+    }
+
+    // Attach a softmax layer to the end of the network.
+    auto softmax = network->addSoftMax(*network->getOutput(0));
+    // Set softmax axis to 1 since network output has shape [1, 10] in full dims mode
+    softmax->setAxes(1 << 1);
+    network->unmarkOutput(*network->getOutput(0));
+    network->markOutput(*softmax->getOutput(0));
+
+    // Get information about the inputs/outputs directly from the model.
+    mPredictionInputDims = network->getInput(0)->getDimensions();
+    mPredictionOutputDims = network->getOutput(0)->getDimensions();
+
+    // Create a builder config
+    auto config = makeUnique(builder->createBuilderConfig());
+    if (!config)
+    {
+        sample::gLogError << "Create builder config failed." << std::endl;
+        return false;
+    }
+    if (mParams.fp16)
+    {
+        config->setFlag(BuilderFlag::kFP16);
+    }
+    if (mParams.bf16)
+    {
+        config->setFlag(BuilderFlag::kBF16);
+    }
+    config->setProfileStream(profileStream);
+
+    auto profileCalib = builder->createOptimizationProfile();
+    const auto inputName = mParams.inputTensorNames[0].c_str();
+    const int calibBatchSize{1};
+    // We do not need to check the return of setDimension and setCalibrationProfile here as all dims are explicitly set
+    profileCalib->setDimensions(inputName, OptProfileSelector::kMIN, Dims4{calibBatchSize, 1, 28, 28});
+    profileCalib->setDimensions(inputName, OptProfileSelector::kOPT, Dims4{calibBatchSize, 1, 28, 28});
+    profileCalib->setDimensions(inputName, OptProfileSelector::kMAX, Dims4{calibBatchSize, 1, 28, 28});
+    config->setCalibrationProfile(profileCalib);
+
+    std::unique_ptr<IInt8Calibrator> calibrator;
+    if (mParams.int8)
+    {
+        config->setFlag(BuilderFlag::kINT8);
+        int nCalibBatches{10};
+        MNISTBatchStream calibrationStream(
+            calibBatchSize, nCalibBatches, "train-images-idx3-ubyte", "train-labels-idx1-ubyte", mParams.dataDirs);
+        calibrator.reset(
+            new Int8EntropyCalibrator2<MNISTBatchStream>(calibrationStream, 0, "MNISTPrediction", inputName));
+        config->setInt8Calibrator(calibrator.get());
+    }
+    // Build the prediciton engine.
+    SampleUniquePtr<nvinfer1::ITimingCache> timingCache{};
+
+    // Load timing cache
+    if (!mParams.timingCacheFile.empty())
+    {
+        timingCache
+            = samplesCommon::buildTimingCacheFromFile(sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile);
+    }
+
+    // Build the prediction engine.
+    SampleUniquePtr<nvinfer1::IHostMemory> predictionPlan
+        = makeUnique(builder->buildSerializedNetwork(*network, *config));
+    if (!predictionPlan)
+    {
+        sample::gLogError << "Prediction serialized engine build failed." << std::endl;
+        return false;
+    }
+
+    if (timingCache != nullptr && !mParams.timingCacheFile.empty())
+    {
+        samplesCommon::updateTimingCacheFile(
+            sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder);
+    }
+
+    mPredictionEngine = makeUnique(runtime->deserializeCudaEngine(predictionPlan->data(), predictionPlan->size()));
+    if (!mPredictionEngine)
+    {
+        sample::gLogError << "Prediction engine deserialization failed." << std::endl;
+        return false;
+    }
+
+    return true;
+}
+
+//!
+//! \brief Prepares the model for inference by creating an execution context and allocating buffers.
+//!
+//! \details This function sets up the sample for inference. This involves allocating buffers for the inputs and
+//! outputs, as well as creating TensorRT execution contexts for both engines. This only needs to be called a single
+//! time.
+//!
+//! \return false if error in build preprocessor or predict context.
+//!
+bool SampleDynamicReshape::prepare()
+{
+    mPreprocessorContext = makeUnique(mPreprocessorEngine->createExecutionContext());
+    if (!mPreprocessorContext)
+    {
+        sample::gLogError << "Preprocessor context build failed." << std::endl;
+        return false;
+    }
+
+
+    mPredictionContext = makeUnique(mPredictionEngine->createExecutionContext());
+    if (!mPredictionContext)
+    {
+        sample::gLogError << "Prediction context build failed." << std::endl;
+        return false;
+    }
+
+    // Since input dimensions are not known ahead of time, we only allocate the output buffer and preprocessor output
+    // buffer.
+    mPredictionInput.resize(mPredictionInputDims);
+    mOutput.hostBuffer.resize(mPredictionOutputDims);
+    mOutput.deviceBuffer.resize(mPredictionOutputDims);
+    return true;
+}
+
+//!
+//! \brief Runs inference for this sample
+//!
+//! \details This function is the main execution function of the sample.
+//! It runs inference for using a random image from the MNIST dataset as an input.
+//!
+bool SampleDynamicReshape::infer()
+{
+    // Load a random PGM file into a host buffer, then copy to device.
+    std::random_device rd{};
+    std::default_random_engine generator{rd()};
+    std::uniform_int_distribution<int> digitDistribution{0, 9};
+    int digit = digitDistribution(generator);
+
+    Dims inputDims = loadPGMFile(samplesCommon::locateFile(std::to_string(digit) + ".pgm", mParams.dataDirs));
+    mInput.deviceBuffer.resize(inputDims);
+    CHECK(cudaMemcpy(
+        mInput.deviceBuffer.data(), mInput.hostBuffer.data(), mInput.hostBuffer.nbBytes(), cudaMemcpyHostToDevice));
+
+    // Set the input size for the preprocessor
+    CHECK_RETURN_W_MSG(mPreprocessorContext->setInputShape("input", inputDims), false, "Invalid binding dimensions.");
+
+    // We can only run inference once all dynamic input shapes have been specified.
+    if (!mPreprocessorContext->allInputDimensionsSpecified())
+    {
+        return false;
+    }
+
+    // Run the preprocessor to resize the input to the correct shape
+    std::vector<void*> preprocessorBindings = {mInput.deviceBuffer.data(), mPredictionInput.data()};
+    // For engines using full dims, we can use executeV2, which does not include a separate batch size parameter.
+    bool status = mPreprocessorContext->executeV2(preprocessorBindings.data());
+    if (!status)
+    {
+        return false;
+    }
+
+    // Next, run the model to generate a prediction.
+    std::vector<void*> predicitonBindings = {mPredictionInput.data(), mOutput.deviceBuffer.data()};
+    status = mPredictionContext->executeV2(predicitonBindings.data());
+    if (!status)
+    {
+        return false;
+    }
+
+    // Copy the outputs back to the host and verify the output.
+    CHECK(cudaMemcpy(mOutput.hostBuffer.data(), mOutput.deviceBuffer.data(), mOutput.deviceBuffer.nbBytes(),
+        cudaMemcpyDeviceToHost));
+    return validateOutput(digit);
+}
+
+//!
+//! \brief Loads a PGM file into mInput and returns the dimensions of the loaded image.
+//!
+//! \details This function loads the specified PGM file into the input host buffer.
+//!
+Dims SampleDynamicReshape::loadPGMFile(const std::string& fileName)
+{
+    std::ifstream infile(fileName, std::ifstream::binary);
+    ASSERT(infile.is_open() && "Attempting to read from a file that is not open.");
+
+    std::string magic;
+    int h, w, max;
+    infile >> magic >> h >> w >> max;
+
+    infile.seekg(1, infile.cur);
+    Dims4 inputDims{1, 1, h, w};
+    size_t vol = samplesCommon::volume(inputDims);
+    std::vector<uint8_t> fileData(vol);
+    infile.read(reinterpret_cast<char*>(fileData.data()), vol);
+
+    // Print an ascii representation
+    sample::gLogInfo << "Input:\n";
+    for (size_t i = 0; i < vol; i++)
+    {
+        sample::gLogInfo << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % w) ? "" : "\n");
+    }
+    sample::gLogInfo << std::endl;
+
+    // Normalize and copy to the host buffer.
+    mInput.hostBuffer.resize(inputDims);
+    float* hostDataBuffer = static_cast<float*>(mInput.hostBuffer.data());
+    std::transform(fileData.begin(), fileData.end(), hostDataBuffer,
+        [](uint8_t x) { return 1.0 - static_cast<float>(x / 255.0); });
+    return inputDims;
+}
+
+//!
+//! \brief Checks whether the model prediction (in mOutput) is correct.
+//!
+bool SampleDynamicReshape::validateOutput(int digit)
+{
+    const float* bufRaw = static_cast<const float*>(mOutput.hostBuffer.data());
+    std::vector<float> prob(bufRaw, bufRaw + mOutput.hostBuffer.size());
+
+    int curIndex{0};
+    for (const auto& elem : prob)
+    {
+        sample::gLogInfo << " Prob " << curIndex << "  " << std::fixed << std::setw(5) << std::setprecision(4) << elem
+                         << " "
+                         << "Class " << curIndex << ": " << std::string(int(std::floor(elem * 10 + 0.5F)), '*')
+                         << std::endl;
+        ++curIndex;
+    }
+
+    int predictedDigit = std::max_element(prob.begin(), prob.end()) - prob.begin();
+    return digit == predictedDigit;
+}
+
+//!
+//! \brief Initializes members of the params struct using the command line args
+//!
+samplesCommon::OnnxSampleParams initializeSampleParams(const samplesCommon::Args& args)
+{
+    samplesCommon::OnnxSampleParams params;
+    if (args.dataDirs.empty()) // Use default directories if user hasn't provided directory paths
+    {
+        params.dataDirs.push_back("data/mnist/");
+        params.dataDirs.push_back("data/samples/mnist/");
+    }
+    else // Use the data directory provided by the user
+    {
+        params.dataDirs = args.dataDirs;
+    }
+    params.onnxFileName = "mnist.onnx";
+    params.inputTensorNames.push_back("Input3");
+    params.outputTensorNames.push_back("Plus214_Output_0");
+    params.int8 = args.runInInt8;
+    params.fp16 = args.runInFp16;
+    params.bf16 = args.runInBf16;
+    params.timingCacheFile = args.timingCacheFile;
+    return params;
+}
+
+//!
+//! \brief Prints the help information for running this sample
+//!
+void printHelpInfo()
+{
+    std::cout << "Usage: ./sample_dynamic_reshape [-h or --help] [-d or --datadir=<path to data directory>] "
+                 "[--timingCacheFile=<path to timing cache file>]"
+              << std::endl;
+    std::cout << "--help, -h         Display help information" << std::endl;
+    std::cout << "--datadir          Specify path to a data directory, overriding the default. This option can be used "
+                 "multiple times to add multiple directories. If no data directories are given, the default is to use "
+                 "(data/samples/mnist/, data/mnist/)"
+              << std::endl;
+    std::cout << "--timingCacheFile  Specify path to a timing cache file. If it does not already exist, it will be "
+              << "created." << std::endl;
+    std::cout << "--int8             Run in Int8 mode." << std::endl;
+    std::cout << "--fp16             Run in FP16 mode." << std::endl;
+    std::cout << "--bf16             Run in BF16 mode." << std::endl;
+}
+
+int main(int argc, char** argv)
+{
+    samplesCommon::Args args;
+    bool argsOK = samplesCommon::parseArgs(args, argc, argv);
+    if (!argsOK)
+    {
+        sample::gLogError << "Invalid arguments" << std::endl;
+        printHelpInfo();
+        return EXIT_FAILURE;
+    }
+    if (args.help)
+    {
+        printHelpInfo();
+        return EXIT_SUCCESS;
+    }
+
+    auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv);
+
+    sample::gLogger.reportTestStart(sampleTest);
+
+    SampleDynamicReshape sample{initializeSampleParams(args)};
+
+    if (!sample.build())
+    {
+        return sample::gLogger.reportFail(sampleTest);
+    }
+    if (!sample.prepare())
+    {
+        return sample::gLogger.reportFail(sampleTest);
+    }
+    if (!sample.infer())
+    {
+        return sample::gLogger.reportFail(sampleTest);
+    }
+    return sample::gLogger.reportPass(sampleTest);
+}
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleEditableTimingCache/Makefile b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleEditableTimingCache/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..eba3d3f2870ea15d828c0a82b58d80d3988c862a
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleEditableTimingCache/Makefile
@@ -0,0 +1,17 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+OUTNAME_RELEASE = sample_editable_timing_cache
+OUTNAME_DEBUG   = sample_editable_timing_cache_debug
+EXTRA_DIRECTORIES = ../common ../utils
+SAMPLE_DIR_NAME = $(shell basename $(dir $(abspath $(firstword $(MAKEFILE_LIST)))))
+MAKEFILE ?= ../Makefile.config
+
+include $(MAKEFILE)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleEditableTimingCache/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleEditableTimingCache/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..99dcb2a934594b346926c7a6fd9999c437af92c7
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleEditableTimingCache/README.md
@@ -0,0 +1,106 @@
+# Create a deterministic build using editable timing cache
+**Table of Contents**
+
+- [Create a deterministic build using editable timing cache](#create-a-deterministic-build-using-editable-timing-cache)
+  - [Description](#description)
+  - [Running the sample](#running-the-sample)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+## Description
+
+This sample, `sampleEditableTimingCache`, illustrates how to build an engine with the desired tactics by modifying the timing cache.
+
+In TensorRT some layers may have multiple implementations, which are called tactics. When building an engine, all of the tactics will be profiled and the fastest one will be chosen and will be written into the TimingCache. In some circumastances, the expected tactic is not the fastest one, and the user needs to replace the best tactic with another tactic. This requirement can be satisfied by editing the timing cache. This sample demonstrates how to achieve this using the Timing Cache editing API and the profiling log.
+
+In this sample, we construct a simple network with 3 nodes: MatMul->Softmax->MatMul. The two MatMuls are identical in all properties except for their names.
+
+First, we construct the network and build an engine from it. The `BuilderConfig` was configured to enable the editable timing cache, so TensorRT outputs the profiling information in logs. Also, it records the decisions on which tactics to use in the Timing Cache.
+
+Then we choose a different tactic from the previously used for the first MatMul and add it to the cache.
+
+Finally, we build the engine again. At this time, the cache is reused, so TensorRT doesn't do profiling. Rather, it uses the tactics recorded in the cache. This way, apart from the tactics used by the first MatMul, all the others are the same as before.
+
+## Running the sample
+
+1. The sample gets compiled when building the TensorRT OSS following the [instructions](https://github.com/NVIDIA/TensorRT). The binary named `sample_editable_timing_cache` will be created in the output directory.
+
+2. Run the sample and observe the logs.
+
+    ```
+    ./sample_editable_timing_cache
+    ```
+
+3.  Verify that the sample has run successfully.
+
+    This sample will ouput a lot of logs. You should see something similar to the following:
+
+    ```
+    Autotuning op matMul1(key: 0x1814870c44ff0f8574df6e3dda04cbd7):
+    Sorted table of all evaluated tactics:
+    tactic_id, cost(in ms), cost/fastest_cost, prediction_correlation, kernel_name, tactic_hash, tunable_parameter
+       3, 0.0112640, 1.00000, 0.50673, sm80_xmma_gemm_f32f32_tf32f32_f32_nn_n_tilesize32x32x64_stage3_warpsize2x1x2_tensor16x8x8, 0x665ded9abbf88,
+       5, 0.0118784, 1.05455, 0.51157, sm80_xmma_gemm_f32f32_tf32f32_f32_nn_n_tilesize64x32x64_stage4_warpsize2x1x2_tensor16x8x8, 0x393e4ef8ad243,
+       6, 0.0123904, 1.10000, 0.50600, sm80_xmma_gemm_f32f32_tf32f32_f32_nn_n_tilesize64x32x64_stage5_warpsize2x2x1_tensor16x8x8, 0x2ad3a182fb05c,
+    ...
+    The selected tactic is (tactic hash, cost(in ms)):0x665ded9abbf88,  0.011264
+    Writing the best tactic (0x665ded9abbf88) to cache
+    ```
+    It reports the name of the profiled operator, the key, the available tactics and the finally used one.
+
+    Also, yous should see something like this:
+    ```
+    Name: matMul1_myl0_0, LayerType: gemm, Inputs: [ { Name: input, Dimensions: [128,128], Format/Datatype: Float }, { Name: weight1, Dimensions: [128,128], Format/Datatype: Float }, { Name: __mye34matMul1_alpha, Dimensions: [1], Format/Datatype: Float }, { Name: __mye35matMul1_beta, Dimensions: [1], Format/Datatype: Float }], Outputs: [ { Name: __myln_k_arg__bb1_4, Dimensions: [128,128], Format/Datatype: Float }], TacticName: sm80_xmma_gemm_f32f32_tf32f32_f32_nn_n_tilesize32x32x64_stage3_warpsize2x1x2_tensor16x8x8, StreamId: 0, Metadata:
+    ```
+    It reports the information about layer `matMul1_myl0_0` in the engine.
+
+    The above logs output by TensorRT aren't very intuitive. For better understanding, a concise version is placed at the very end.
+    ```
+    Layers of the first engine:
+    #0: matMul1_myl0_0                 =uses=> sm80_xmma_gemm_f32f32_tf32f32_f32_nn_n_tilesize32x32x64_stage3_warpsize2x1x2_tensor16x8x8
+    #1: __myl_TraMaxSubExpSum_myl0_1   =uses=> __myl_TraMaxSubExpSum_0xcbcb71f14cb4526fd18f61134658c571
+    #2: __myl_DivMul_myl0_2            =uses=> __myl_DivMul_0x80125aec9f1e9979e47ef2b407811651
+    #3: matMul2_myl0_3                 =uses=> sm80_xmma_gemm_f32f32_tf32f32_f32_nn_n_tilesize32x32x64_stage3_warpsize2x1x2_tensor16x8x8
+
+    Profiling table:
+        op: matMul1
+            key: 0x1814870c44ff0f8574df6e3dda04cbd7
+            selected: 0x665ded9abbf88
+            available tactics:
+                0x665ded9abbf88 sm80_xmma_gemm_f32f32_tf32f32_f32_nn_n_tilesize32x32x64_stage3_warpsize2x1x2_tensor16x8x8
+                0x393e4ef8ad243 sm80_xmma_gemm_f32f32_tf32f32_f32_nn_n_tilesize64x32x64_stage4_warpsize2x1x2_tensor16x8x8
+                0x2ad3a182fb05c sm80_xmma_gemm_f32f32_tf32f32_f32_nn_n_tilesize64x32x64_stage5_warpsize2x2x1_tensor16x8x8
+    ...
+
+        op: matMul2
+            key: 0xb222b0832016f1115ff61116c094875a
+            selected: 0x665ded9abbf88
+            available tactics:
+                0x665ded9abbf88 sm80_xmma_gemm_f32f32_tf32f32_f32_nn_n_tilesize32x32x64_stage3_warpsize2x1x2_tensor16x8x8
+                0x2ad3a182fb05c sm80_xmma_gemm_f32f32_tf32f32_f32_nn_n_tilesize64x32x64_stage5_warpsize2x2x1_tensor16x8x8
+                0x393e4ef8ad243 sm80_xmma_gemm_f32f32_tf32f32_f32_nn_n_tilesize64x32x64_stage4_warpsize2x1x2_tensor16x8x8
+    ...
+
+    Originally, layer `matMul1_myl0_0` used kernel `sm80_xmma_gemm_f32f32_tf32f32_f32_nn_n_tilesize32x32x64_stage3_warpsize2x1x2_tensor16x8x8`.
+    Now, it should use the new kernel `sm80_xmma_gemm_f32f32_tf32f32_f32_nn_n_tilesize64x32x64_stage4_warpsize2x1x2_tensor16x8x8.`
+
+    Layers of the second engine:
+    #0: matMul1_myl0_0                 =uses=> sm80_xmma_gemm_f32f32_tf32f32_f32_nn_n_tilesize64x32x64_stage4_warpsize2x1x2_tensor16x8x8
+    #1: __myl_TraMaxSubExpSum_myl0_1   =uses=> __myl_TraMaxSubExpSum_0xcbcb71f14cb4526fd18f61134658c571
+    #2: __myl_DivMul_myl0_2            =uses=> __myl_DivMul_0x80125aec9f1e9979e47ef2b407811651
+    #3: matMul2_myl0_3                 =uses=> sm80_xmma_gemm_f32f32_tf32f32_f32_nn_n_tilesize32x32x64_stage3_warpsize2x1x2_tensor16x8x8
+    ```
+
+    If the sample runs successfully, you should see the following text:
+    ```
+    &&&& PASSED TensorRT.sample_editable_timing_cache [TensorRT v100800] [b18] # sample_editable_timing_cache
+    ```
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+# Changelog
+
+# Known issues
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleEditableTimingCache/sampleEditableTimingCache.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleEditableTimingCache/sampleEditableTimingCache.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ef8744d7b628c128b37572dfbaa2c78408d57572
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleEditableTimingCache/sampleEditableTimingCache.cpp
@@ -0,0 +1,629 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//! \file sampleEditableTimingCache.cpp
+//!
+//! \brief This file contains the implementation of the editable
+//! timing cache sample.
+//!
+//! It builds two engines from a simple network. The second build
+//! reuses a timing cache generated during the first build but made
+//! some modifications, specifically assigning a different tactic to a
+//! layer.
+//!
+//! The goal of this sample is to show how to build an engine with
+//! desired tactics by modifying the timing cache.
+//!
+//! It can be run with the following command line:
+//! Command: ./sample_editable_timing_cache
+
+#include <cinttypes>
+#include <cstdio>
+#include <cstring>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <vector>
+#include <cstdlib> // for strtoull
+
+#define DEFINE_TRT_ENTRYPOINTS 1
+#define DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT 0
+#include "NvInfer.h"
+#include "common.h"
+#include "logger.h"
+
+using namespace nvinfer1;
+
+using samplesCommon::SampleUniquePtr;
+
+namespace
+{
+
+std::string const kSAMPLE_NAME = "TensorRT.sample_editable_timing_cache";
+
+using Name = std::string;
+
+//! \brief A hash string which starts with `0x` followed by some
+//! hexadecimal digits.
+using Hash = std::string;
+
+//! \brief A pair that denotes a tactic of some op.
+struct Tactic
+{
+    Hash hash;   //!< Hash string which uniquely identifies the tactic.
+    Name kernel; //!< Name of the kernel used by the tactic.
+};
+
+//! \brief A structure recording the profiling result of an op.
+struct ProfilingRecord
+{
+    Name op; //!< Name of the op.
+
+    Hash key; //!< Hash string which uniquely identifies the op. Its' used
+              //!< as a key in Timing Cache.
+
+    std::vector<Tactic> tactics; //!< Available tactics.
+
+    Hash selected; //!< Hash string which uniquely identifies the
+                   //!< tactic finally used by the op.
+};
+
+//! \brief A mapping from the name of an op to its profiling result.
+using ProfilingTable = std::unordered_map<Name, ProfilingRecord>;
+
+void printProfilingTable(ProfilingTable const& table)
+{
+    sample::gLogInfo << "Profiling table:\n";
+
+    for (auto const& [op, record] : table)
+    {
+        sample::gLogInfo << "\top: " << op << "\n";
+        sample::gLogInfo << "\t\tkey: " << record.key << "\n";
+        sample::gLogInfo << "\t\tselected: " << record.selected << "\n";
+        sample::gLogInfo << "\t\tavailable tactics:\n";
+
+        for (auto const& [hash, kernel] : record.tactics)
+        {
+            sample::gLogInfo << "\t\t\t" << hash << " " << kernel << "\n";
+        }
+
+        sample::gLogInfo << "\n\n";
+    }
+}
+
+// The implementation of std::regex is not entirely reliable on some
+// platforms, so we use basic string interfaces for pattern matching.
+namespace patterns
+{
+
+struct OpKey
+{
+    Name op;
+    Hash key;
+};
+
+//! Searches \p text for a sub string like `Autotuning op matMul1(key: 0x1814870c44ff0f8574df6e3dda04cbd7)`
+//! where in this example the field `op` of the returned `OpKey` would be assigned `matMul1`
+//! and the field `key` would be assigned `0x181487...`.
+[[nodiscard]] std::optional<OpKey> matchOpKey(char const* const text)
+{
+    char const* const kPREFIX = "Autotuning op ";
+
+    char const* const substr = std::strstr(text, kPREFIX);
+    if (!substr)
+    {
+        return std::nullopt;
+    }
+
+    char op[128 + 1]{}; //< Plus one for the null terminator.
+    char key[128 + 1]{}; //< Plus one for the null terminator.
+
+    int numReceived = std::sscanf(substr + std::strlen(kPREFIX), "%128[^(](key: %128[^)])", op, key);
+    if (numReceived != 2)
+    {
+        return std::nullopt;
+    }
+
+    return OpKey{Name(op), Hash(key)};
+}
+
+[[nodiscard]] bool matchTacticHeader(std::string_view text)
+{
+    return text.find("tactic_id, cost(in ms), cost/fastest_cost") != text.npos;
+}
+
+struct TacticKernel
+{
+    Hash tactic;
+    Name kernel;
+};
+
+//! Searches \p text for a sub string like `4, 0.00520, 1.00, 0.883, sm86_xmma_gemm, 0x533a71cee0d0e,`
+//! where in this example the field `tactic` of the returned `TacticKernel` would be assigned `0x533a71cee0d0e`
+//! and the field `kernel` would be assigned `sm86_xmma_gemm`.
+[[nodiscard]] std::optional<TacticKernel> matchTacticKernel(char const* const text)
+{
+    char const* const kDIGITS = "0123456789";
+
+    char const* const substr = std::strpbrk(text, kDIGITS);
+    if (!substr)
+    {
+        return std::nullopt;
+    }
+
+    char kernel[128 + 1]{}; //< Plus one for the null terminator.
+    char tactic[128 + 1]{}; //< Plus one for the null terminator.
+
+    int numReceived = std::sscanf(substr, "%*d, %*f, %*f, %*f, %128[^,], %128[^,]", kernel, tactic);
+    if (numReceived != 2)
+    {
+        return std::nullopt;
+    }
+
+    return TacticKernel{Hash(tactic), Name(kernel)};
+}
+
+//! Searches \p text for a sub string like `The selected tactic is (tactic hash, cost(in ms)):0x533a71cee0d0e,
+//! 0.0050048` where in this example the returned `Hash` would be `0x533a71cee0d0e`.
+[[nodiscard]] std::optional<Hash> matchSelection(char const* const text)
+{
+    char const* const kPREFIX = "(tactic hash, cost(in ms)):";
+
+    char const* const substr = std::strstr(text, kPREFIX);
+    if (!substr)
+    {
+        return std::nullopt;
+    }
+
+    char tactic[128 + 1]{}; //< Plus one for the null terminator.
+
+    int numReceived = sscanf(substr + std::strlen(kPREFIX), "%128[^,]", tactic);
+    if (numReceived != 1)
+    {
+        return std::nullopt;
+    }
+
+    return Hash(tactic);
+}
+
+struct LayerKernel
+{
+    Name layer;
+    Name kernel;
+};
+
+//! Searches \p text for a sub string like `Name: matMul2_myl0_3,
+//! LayerType: ...., TacticName: sm80_xmma_gemm, StreamId: 0` where in
+//! this example the field `layer` of the returned `LayerKernel` would be `matMul2_myl0_3`
+//! and the field `kernel` would be `sm80_xmma_gemm`.
+[[nodiscard]] std::optional<LayerKernel> matchLayerKernel(char const* const text)
+{
+    char const* const kLAYER_PREFIX = "Name: ";
+
+    char const* const layerSubstr = std::strstr(text, kLAYER_PREFIX);
+    if (!layerSubstr)
+    {
+        return std::nullopt;
+    }
+
+    char layer[128 + 1]{}; //< Plus one for the null terminator.
+
+    int numReceived = std::sscanf(layerSubstr + std::strlen(kLAYER_PREFIX), "%128[^,]", layer);
+    if (numReceived != 1)
+    {
+        return std::nullopt;
+    }
+
+    char const* const kKERNEL_PREFIX = "TacticName: ";
+
+    char const* const kernelSubstr = std::strstr(text, kKERNEL_PREFIX);
+    if (!kernelSubstr)
+    {
+        return std::nullopt;
+    }
+
+    char kernel[128 + 1]{}; //< Plus one for the null terminator.
+
+    numReceived = std::sscanf(kernelSubstr + std::strlen(kKERNEL_PREFIX), "%128[^,]", kernel);
+    if (numReceived != 1)
+    {
+        return std::nullopt;
+    }
+
+    return LayerKernel{Name(layer), Name(kernel)};
+}
+
+} // namespace patterns
+
+//! \brief `ProfilingLogger` is a decorator of `ILogger`. It
+//! dispatches the message to the decorated logger and extracts
+//! profiling information from the message.
+//!
+//! \details This class overrides the method `log` of class `ILogger`
+//! to analyze each line of the logs. Since the profiling information
+//! are spread across different lines, it builds a simple state
+//! machine to recognize and capture this information.
+class ProfilingLogger : public nvinfer1::ILogger
+{
+private:
+    enum class State
+    {
+        kEXPECT_KEY,
+        kEXPECT_TACTIC_HEADER,
+        kEXPECT_TACTIC,
+        kEXPECT_SELECTION,
+    };
+
+public:
+    ProfilingLogger(ILogger& logger)
+        : mLogger(logger)
+        , mState(State::kEXPECT_KEY)
+    {
+    }
+
+    void log(Severity severity, AsciiChar const* msg) noexcept override
+    {
+        mLogger.log(severity, msg);
+
+        bool resolved = false;
+
+        while (!resolved)
+        {
+            resolved = true;
+
+            switch (mState)
+            {
+            case State::kEXPECT_KEY:
+            {
+                if (auto optOpKey = patterns::matchOpKey(msg))
+                {
+                    mRecord.op = std::move(optOpKey->op);
+                    mRecord.key = std::move(optOpKey->key);
+                    mState = State::kEXPECT_TACTIC_HEADER;
+                }
+
+                break;
+            }
+
+            case State::kEXPECT_TACTIC_HEADER:
+            {
+                if (patterns::matchTacticHeader(msg))
+                {
+                    mState = State::kEXPECT_TACTIC;
+                }
+
+                break;
+            }
+
+            case State::kEXPECT_TACTIC:
+            {
+                if (auto optTacticKernel = patterns::matchTacticKernel(msg))
+                {
+                    mRecord.tactics.push_back(
+                        Tactic{std::move(optTacticKernel->tactic), std::move(optTacticKernel->kernel)});
+                }
+                else
+                {
+                    mState = State::kEXPECT_SELECTION;
+                    resolved = false;
+                }
+
+                break;
+            }
+
+            case State::kEXPECT_SELECTION:
+            {
+                if (auto optTactic = patterns::matchSelection(msg))
+                {
+                    mRecord.selected = std::move(*optTactic);
+                    mTable[mRecord.op] = mRecord;
+                    mRecord = ProfilingRecord{};
+                    mState = State::kEXPECT_KEY;
+                }
+
+                break;
+            }
+            }
+        }
+    }
+
+    //! \brief Get the profiling result and reset the state machine.
+    ProfilingTable fetchTable()
+    {
+        mState = State::kEXPECT_KEY;
+        mRecord = ProfilingRecord{};
+        return std::exchange(mTable, ProfilingTable{});
+    }
+
+private:
+    ILogger& mLogger;
+    State mState;
+    ProfilingTable mTable;
+    ProfilingRecord mRecord;
+};
+
+//! \brief Build a simple graph with three nodes: MatMul -> SoftMax ->
+//! MatMul.
+//!
+//! \details The two MatMuls are identical in all attributes
+//! except for their names.
+//!
+//! \return a pointer to the first MatMul.
+ILayer const* buildGraph(INetworkDefinition* network)
+{
+    auto input = network->addInput("input", DataType::kFLOAT, Dims2{128, 128});
+    auto weight1 = network->addInput("weight1", DataType::kFLOAT, Dims2{128, 128});
+    auto weight2 = network->addInput("weight2", DataType::kFLOAT, Dims2{128, 128});
+    auto matMul1 = network->addMatrixMultiply(*input, MatrixOperation::kNONE, *weight1, MatrixOperation::kNONE);
+    auto softmax = network->addSoftMax(*matMul1->getOutput(0));
+    auto matMul2
+        = network->addMatrixMultiply(*softmax->getOutput(0), MatrixOperation::kNONE, *weight2, MatrixOperation::kNONE);
+
+    network->markOutput(*matMul2->getOutput(0));
+
+    matMul1->setName("matMul1");
+    softmax->setName("softmax");
+    matMul2->setName("matMul2");
+
+    return matMul1;
+}
+
+//! \brief Find a tactic different from the selected one in the
+//! candidate set.
+std::optional<Tactic> findDifferentTactic(ProfilingRecord const& record)
+{
+    auto it = std::find_if(record.tactics.cbegin(), record.tactics.cend(),
+        [&](auto const& entry) { return entry.hash != record.selected; });
+
+    return it == record.tactics.end() ? std::nullopt : std::make_optional(*it);
+}
+
+constexpr int64_t kNUM_PREFIX_CHARS = std::char_traits<char>::length("0x");
+constexpr int64_t kCHARS_PER_BYTE = 2;
+
+constexpr int64_t kBYTES_PER_KEY = 16;
+constexpr int64_t kTOTAL_CHARS_PER_KEY = kNUM_PREFIX_CHARS + kBYTES_PER_KEY * kCHARS_PER_BYTE;
+
+//! \brief Parse a TimingCacheKey from its text form.
+//! \return false if an error occurs.
+bool parseKey(std::string_view text, TimingCacheKey* key)
+{
+    CHECK_RETURN_W_MSG(static_cast<int64_t>(text.size()) == kTOTAL_CHARS_PER_KEY, false, "Unexpected length of key");
+
+    for (int64_t i = 0, offset = kNUM_PREFIX_CHARS; i < kBYTES_PER_KEY; ++i, offset += kCHARS_PER_BYTE)
+    {
+        CHECK_RETURN(1 == sscanf(text.data() + offset, "%2" SCNx8, &key->data[i]), false);
+    }
+
+    return true;
+}
+
+constexpr int64_t kBYTES_PER_TACTIC = 8;
+constexpr int64_t kTOTAL_CAHRS_PER_TACTIC = kNUM_PREFIX_CHARS + kBYTES_PER_TACTIC * kCHARS_PER_BYTE;
+
+//! \brief Parse a tactic hash from its text form.
+//! \return false if an error occurs.
+bool parseTactic(std::string_view text, size_t* hash)
+{
+    CHECK_RETURN_W_MSG(
+        static_cast<int64_t>(text.size()) <= kTOTAL_CAHRS_PER_TACTIC, false, "Unexpected length of tactic");
+
+    char const* start = text.data() + kNUM_PREFIX_CHARS;
+    char* end = nullptr;
+    *hash = std::strtoull(start, &end, 16);
+    CHECK_RETURN_W_MSG(end == text.data() + text.size(), false, "Found junk in the text.");
+
+    return true;
+}
+
+//! \brief Set a new tactic for some key in the timing cache.
+//! \return false if an error occurs.
+bool setTactic(ITimingCache* cache, std::string_view keyText, std::string_view tacticText)
+{
+    TimingCacheKey key;
+    CHECK_RETURN_W_MSG(parseKey(keyText, &key), false, "Failed to parse the key.");
+
+    TimingCacheValue value;
+    CHECK_RETURN_W_MSG(parseTactic(tacticText, &value.tacticHash), false, "Failed to parse the tactic hash");
+
+    value.timingMSec = 1.0F;
+    CHECK_RETURN_W_MSG(cache->update(key, value), false, "Failed to update the timing cache.");
+    return true;
+}
+
+//! \brief A pair which denotes a layer in the engine.
+struct LayerKernel
+{
+    Name layer;  //!< Name of the layer.
+    Name kernel; //!< Name of the kernel used by the layer.
+};
+
+//! \brief Extract the name of each layer in the engine, along with
+//! the kernel used by it.
+void extractLayerKernels(ICudaEngine const* engine, std::vector<LayerKernel>& table)
+{
+    SampleUniquePtr<IEngineInspector> inspector{engine->createEngineInspector()};
+
+    int32_t numLayers = engine->getNbLayers();
+
+    for (int32_t i = 0; i < numLayers; ++i)
+    {
+        char const* line = inspector->getLayerInformation(i, LayerInformationFormat::kONELINE);
+
+        if (auto optLayerKernel = patterns::matchLayerKernel(line))
+        {
+            table.push_back({std::move(optLayerKernel->layer), std::move(optLayerKernel->kernel)});
+        }
+    }
+}
+
+void printLayerKernels(std::vector<LayerKernel> const& table)
+{
+    for (size_t i = 0; i < table.size(); ++i)
+    {
+        auto const& [layer, kernel] = table[i];
+        sample::gLogInfo << "#" << i << ": " << std::setw(30) << std::setfill(' ') << std::left << layer << " =uses=> "
+                         << kernel << "\n";
+    }
+}
+
+bool isPrefixOf(std::string_view shorter, std::string_view longer)
+{
+    return shorter.size() <= longer.size() && std::equal(shorter.begin(), shorter.end(), longer.begin());
+}
+
+//! \brief Find the layer derived from the op.
+//!
+//! \details In this sample, the name of a layer derived from a MatMul
+//! op is prefixed with the op's name.
+std::optional<LayerKernel> findLayer(std::vector<LayerKernel> const& table, std::string_view op)
+{
+    auto it = std::find_if(
+        table.begin(), table.end(), [op](LayerKernel const& entry) { return isPrefixOf(op, entry.layer); });
+
+    return it == table.end() ? std::nullopt : std::make_optional(*it);
+}
+
+} // namespace
+
+#define FAIL_IF_NOT(status, errMsg)                                                                                    \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        if (!(status))                                                                                                 \
+        {                                                                                                              \
+            sample::gLogError << (errMsg) << " Error in " << __FILE__ << ", function " << FN_NAME << "(), line "       \
+                              << __LINE__ << std::endl;                                                                \
+            return sample::gLogger.reportFail(sampleTest);                                                             \
+        }                                                                                                              \
+    } while (0)
+
+int32_t main(int32_t argc, char* argv[])
+{
+    auto sampleTest = sample::gLogger.defineTest(kSAMPLE_NAME, argc, argv);
+
+    sample::gLogger.reportTestStart(sampleTest);
+
+    try
+    {
+        // Set the logging level to kVERBOSE to see the profiling
+        // information.
+        sample::gLogger.setReportableSeverity(ILogger::Severity::kVERBOSE);
+
+        ProfilingLogger profilingLogger(sample::gLogger.getTRTLogger());
+
+        SampleUniquePtr<IBuilder> builder{createInferBuilder(profilingLogger)};
+        FAIL_IF_NOT(builder, "Failed to create inference builder.");
+
+        SampleUniquePtr<INetworkDefinition> network{builder->createNetworkV2(0)};
+        FAIL_IF_NOT(network, "Failed to create network.");
+
+        ILayer const* matMul1 = buildGraph(network.get());
+        std::string const opName = matMul1->getName();
+
+        SampleUniquePtr<IBuilderConfig> config{builder->createBuilderConfig()};
+        FAIL_IF_NOT(config, "Failed to create builder config.");
+
+        // Tell the builder to save the name of tactic used by each layer
+        // in the engine.
+        config->setProfilingVerbosity(ProfilingVerbosity::kDETAILED);
+
+        // Enable the editable timing cache. In editable mode, the logs
+        // will contain profiling results of all layers. Besides, each
+        // layer will have its own tactics, which means that changes in
+        // one layer will not affect others.
+        config->setFlag(BuilderFlag::kEDITABLE_TIMING_CACHE);
+
+        // Provide the builder with an empty timing cache.
+        SampleUniquePtr<ITimingCache> timingCache{config->createTimingCache(nullptr, 0)};
+        FAIL_IF_NOT(timingCache, "Failed to set timing cache.");
+
+        FAIL_IF_NOT(config->setTimingCache(*timingCache, true), "Failed to set timing cache.");
+
+        // Build the first engine.
+        SampleUniquePtr<IHostMemory> plan{builder->buildSerializedNetwork(*network, *config)};
+        FAIL_IF_NOT(plan, "Failed to build serialized engine.");
+
+        SampleUniquePtr<IRuntime> runtime{createInferRuntime(profilingLogger)};
+        FAIL_IF_NOT(runtime, "Failed to create the runtime.");
+
+        SampleUniquePtr<ICudaEngine> engine{runtime->deserializeCudaEngine(plan->data(), plan->size())};
+        FAIL_IF_NOT(engine, "Failed to deserialize the engine.");
+
+        // Extract layers' information of the first engine.
+        std::vector<LayerKernel> layerKernels;
+        extractLayerKernels(engine.get(), layerKernels);
+
+        std::optional<LayerKernel> matMulLayer = findLayer(layerKernels, opName);
+        FAIL_IF_NOT(matMulLayer.has_value(), "Cannot find the layer derived from the first MatMul node.");
+
+        // Extract profiling results from the logs.
+        ProfilingTable table = profilingLogger.fetchTable();
+
+        // Find a different tactic for the first MatMul.
+        ProfilingRecord const& opRecord = table.at(opName);
+
+        std::optional<Tactic> newTactic = findDifferentTactic(opRecord);
+        FAIL_IF_NOT(newTactic.has_value(), "No other tactics.");
+
+        // Put the new tactic in the cache.
+        CHECK_RETURN(setTactic(timingCache.get(), opRecord.key, newTactic->hash), EXIT_FAILURE);
+
+        // Build the second engine, with the modified timing cache.
+        SampleUniquePtr<IHostMemory> newPlan{builder->buildSerializedNetwork(*network, *config)};
+        FAIL_IF_NOT(newPlan, "Failed to build the engine again.");
+
+        SampleUniquePtr<ICudaEngine> newEngine{runtime->deserializeCudaEngine(newPlan->data(), newPlan->size())};
+        FAIL_IF_NOT(newEngine, "Failed to deserialize the engine again.");
+
+        // Extract layers' information of the second engine.
+        std::vector<LayerKernel> newLayerKernels;
+        extractLayerKernels(newEngine.get(), newLayerKernels);
+
+        std::optional<LayerKernel> newMatMulLayer = findLayer(newLayerKernels, opName);
+
+        FAIL_IF_NOT(newMatMulLayer.has_value(), "Cannot find the layer derived from the first MatMul node.");
+
+        FAIL_IF_NOT(newMatMulLayer->kernel == newTactic->kernel, "The layer didn't use the assigned new kernel.");
+
+        sample::gLogInfo << "\n";
+
+        sample::gLogInfo << "Layers of the first engine:\n";
+        printLayerKernels(layerKernels);
+
+        sample::gLogInfo << "\n";
+
+        printProfilingTable(table);
+
+        sample::gLogInfo << "Originally, layer `" << matMulLayer->layer << "` used kernel `" << matMulLayer->kernel
+                         << "`.\n";
+        sample::gLogInfo << "Now, it should use the new kernel `" << newTactic->kernel << ".`\n";
+        sample::gLogInfo << "\n";
+
+        sample::gLogInfo << "Layers of the second engine:\n";
+        printLayerKernels(newLayerKernels);
+
+        sample::gLogInfo << "\n";
+
+        return sample::gLogger.reportPass(sampleTest);
+    }
+    catch (std::exception const& err)
+    {
+        sample::gLogError << "Exception: " << err.what() << "\n";
+        return sample::gLogger.reportFail(sampleTest);
+    }
+}
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleINT8API/Makefile b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleINT8API/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..89357cf63b5531fb86375299b6f264aa60dac632
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleINT8API/Makefile
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+OUTNAME_RELEASE = sample_int8_api
+OUTNAME_DEBUG   = sample_int8_api_debug
+EXTRA_DIRECTORIES = ../common ../utils
+SAMPLE_DIR_NAME = $(shell basename $(dir $(abspath $(firstword $(MAKEFILE_LIST)))))
+MAKEFILE ?= ../Makefile.config
+include $(MAKEFILE)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleINT8API/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleINT8API/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4c2264a86a206c416d2a0a584cd917e746fa5938
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleINT8API/README.md
@@ -0,0 +1,323 @@
+# Performing Inference In INT8 Precision
+
+
+**Table Of Contents**
+- [Description](#description)
+- [How does this sample work?](#how-does-this-sample-work)
+	* [Configuring the builder to use INT8 without the INT8 calibrator](#configuring-the-builder-to-use-int8-without-the-int8-calibrator)
+	* [Configuring the network to use custom dynamic ranges and set per-layer precision](#configuring-the-network-to-use-custom-dynamic-ranges-and-set-per-layer-precision)
+	* [Building the engine](#building-the-engine)
+	* [Running the engine](#running-the-engine)
+	* [TensorRT API layers and ops](#tensorrt-api-layers-and-ops)
+- [Preparing sample data](#preparing-sample-data)
+- [Running the sample](#running-the-sample)
+	* [Sample `--help` options](#sample-help-options)
+- [Models other than ResNet-50 with custom configuration](#models-other-than-resnet-50-with-custom-configuration)
+- [Additional resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+
+## Description
+
+This sample, sampleINT8API, performs INT8 inference without using the INT8 calibrator; using the user provided per activation tensor dynamic range. INT8 inference is available only on GPUs with compute capability 6.1 or newer and supports Image Classification ONNX models such as ResNet-50, VGG19, and MobileNet.
+
+Specifically, this sample demonstrates how to:
+-   Use `nvinfer1::ITensor::setDynamicRange` to set per-tensor dynamic range
+-   Use `nvinfer1::ILayer::setPrecision` to set computation precision of a layer
+-   Use `nvinfer1::ILayer::setOutputType` to set output tensor data type of a layer
+-   Perform INT8 inference without using INT8 calibration
+
+## How does this sample work?
+
+In order to perform INT8 inference, you need to provide TensorRT with the dynamic range for each network tensor, including network input and output tensor. One way to choose the dynamic range is to use the TensorRT INT8 calibrator. But if you don't want to go that route (for example, letâ€™s say you used quantization-aware training or you just want to use the min and max tensor values seen during training), you can skip the INT8 calibration and set custom per-network tensor dynamic ranges. This sample implements INT8 inference for the ONNX ResNet-50 model using per-network tensor dynamic ranges specified in an input file.
+
+This sample uses the [ONNX ResNet-50 model](https://github.com/onnx/models/tree/master/vision/classification/resnet/model).
+
+Specifically, this sample performs the following steps:
+- [Configuring the builder to use INT8 without the INT8 calibrator](#configuring-the-builder-to-use-int8-without-the-int8-calibrator)
+- [Configuring the network to use custom dynamic ranges and set per-layer precision](#configuring-the-network-to-use-custom-dynamic-ranges-and-set-per-layer-precision)
+- [Building the engine](#building-the-engine)
+- [Running the engine](#running-the-engine)
+
+### Configuring the builder to use INT8 without the INT8 calibrator
+
+1.  Enable INT8 mode by setting the builder flag:
+	`builder->setFlag(BuilderFlag::kINT8);`
+
+	You can choose not to provide the INT8 calibrator.
+	`builder->setInt8Calibrator(nullptr);`
+
+	If you want to provide the calibrator, manual dynamic range will override calibration generate dynamic range/scale. See sampleINT8 on how to setup INT8 calibrator.
+
+2.  Optionally and for debugging purposes, the following flag configures the builder to choose type conforming layer implementation, if one exists.
+
+	`builder->setStrictTypeConstraints(true);`
+
+	Setting `setStrictTypeConstraints(true)` together with the builder flag `setFlag(BuilderFlag::kINT8)` ensures that only the conformant layer implementation (with `kINT8` input and output types) is chosen even if a high performance non-conformant implementation is available. If no conformant layer exists, TensorRT will choose a non-conformant layer if available regardless of the setting for this flag.
+
+### Configuring the network to use custom dynamic ranges and set per-layer precision
+
+1.  Iterate through the network to set the per activation tensor dynamic range.
+	```
+	readPerTensorDynamicRangeValue() // This function populates dictionary with keys=tensor_names, values=floating point dynamic range.
+	```
+
+2.  Set the dynamic range for network inputs:
+	```
+	string input_name = network->getInput(i)->getName();
+	network->getInput(i)->setDynamicRange(-tensorMap.at(input_name), tensorMap.at(input_name));
+	```
+
+3.  Set the dynamic range for per layer tensors:
+	```
+	string tensor_name = network->getLayer(i)->getOutput(j)->getName();
+	network->getLayer(i)->getOutput(j)->setDynamicRange(-tensorMap.at(name), tensorMap.at(name));
+	```
+
+4.  Optional: This sample also showcases using layer precision APIs. Using these APIs, you can selectively choose to run the layer with user configurable precision and type constraints. It may not result in optimal inference performance, but can be helpful while debugging mixed precision inference.
+
+	Iterate through the network to per layer precision:
+	```
+	auto layer = network->getLayer(i);
+	layer->setPrecision(nvinfer1::DataType::kINT8);
+	```
+
+	This gives the layerâ€™s inputs and outputs a preferred type (for example, `DataType::kINT8`). You can choose a different preferred type for an input or output of a layer using:
+	```
+	for (int j=0; j<layer->getNbOutputs(); ++j) {
+	layer->setOutputType(j, nvinfer1::DataType::kFLOAT);
+	}
+	```
+
+	Using layer precision APIs with `builder->setStrictTypeConstraints(true)` set, ensures that the requested layer precisions are obeyed by the builder irrespective of the performance. If no implementation is available with request precision constraints, the builder will choose the fastest implementation irrespective of precision and type constraints. For more information on using mixed precision APIs, see [Setting The Layer Precision Using C++](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#set_layer_mp_c).
+
+### Building the engine
+
+After we configure the builder with INT8 mode and calibrator, we can build the engine similar to any FP32 engine.
+
+`ICudaEngine* engine = builder->buildCudaEngine(*network);`
+
+### Running the engine
+
+After the engine has been built, it can be used just like an FP32 engine. For example, inputs and outputs remain in 32-bit floating point.
+
+1.  Create an execution context and CUDA stream for the execution of this inference.
+	```
+	auto context = mEngine->createExecutionContext();
+	cudaStream_t stream;
+	cudaStreamCreate(&stream);
+	```
+
+2.  Copy the data from the host input buffers to the device input buffers.
+	```
+	buffers.copyInputToDeviceAsync(stream);
+	```
+
+3.  Enqueue the inference work and perform actual inference.
+	```
+	context->enqueueV3(input_stream))
+	```
+
+4.  Copy data from the device output buffers to the host output buffers.
+	`buffers.copyOutputToHostAsync(stream);`
+
+5.  Wait for the work in the stream to complete and release it.
+	```
+	cudaStreamSynchronize(stream);
+	cudaStreamDestroy(stream);
+	```
+
+6.  Check and print the output of the inference.
+	`outputCorrect = verifyOutput(buffers);`
+
+### TensorRT API layers and ops
+
+This sample demonstrates how you can enable INT8 inference using the following mixed precision APIs.
+
+[ITensor::SetDynamicRange](https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/c_api/classnvinfer1_1_1_i_tensor.html#a956f662b1d2ebe7ba3aba3391aedddf5)
+Set dynamic range for the tensor. Currently, only symmetric ranges are supported, therefore, the larger of the absolute values of the provided bounds is used.
+
+[ILayer::SetPrecision](https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/c_api/classnvinfer1_1_1_i_layer.html#ac66f1546a28a92c20a76718a6762ea14)
+Set the computational precision of this layer. Setting the precision forces TensorRT to choose the implementations which run at this precision. If precision is not set, TensorRT will select the computational precision based on performance considerations and the flags specified to the builder.
+
+[ILayer::SetOutputType](https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/c_api/classnvinfer1_1_1_i_layer.html#a85aded4e3ff0867e392602551d5b5dc7)
+Set the output type of this layer. Setting the output type forces TensorRT to choose the implementations which generate output data with the given type. If the output type is not set, TensorRT will select the implementation based on performance considerations and the flags specified to the builder.
+
+## Preparing sample data
+
+`ResNet50.onnx` is located in the `data/resnet50` directory.
+
+In addition to the model file and input image, you will need per-tensor dynamic range stored in a text file along with the ImageNet label reference file.
+
+The following required files are included in the package and are located in the `data/int8_api` directory.
+
+`reference_labels.txt`
+The ImageNet reference label file.
+
+`resnet50_per_tensor_dynamic_range.txt`
+The ResNet-50 per-tensor dynamic ranges file.
+
+`airliner.ppm`
+The image to be inferred.
+
+## Running the sample
+
+1. Compile the sample by following build instructions in [TensorRT README](https://github.com/NVIDIA/TensorRT/).
+
+2. Run the sample to perform INT8 inference on a classification network, for example, ResNet-50.
+
+    To run INT8 inference with custom dynamic ranges:
+    ```bash
+    ./sample_int8_api [--model=model_file] [--ranges=per_tensor_dynamic_range_file] [--image=image_file] [--reference=reference_file] [--data=/path/to/data/dir] [--useDLACore=<int>] [-v or --verbose]
+    ```
+
+    For example:
+    ```bash
+    ./sample_int8_api --model=$TRT_DATADIR/resnet50/ResNet50.onnx --image=$TRT_DATADIR/int8_api/airliner.ppm --reference=$TRT_DATADIR/int8_api/reference_labels.txt --ranges=$TRT_DATADIR/int8_api/resnet50_per_tensor_dynamic_range.txt
+    ```
+
+3. Verify that the sample ran successfully. If the sample runs successfully you should see output similar to the following:
+
+	```
+	&&&& RUNNING TensorRT.sample_int8_api # ./sample_int8_api
+	[I] Please follow README.md to generate missing input files.
+	[I] Validating input parameters. Using following input files for inference.
+	[I]     Model File: ../../../../../../../../../data/samples/int8_api/resnet50.onnx
+	[I]     Image File: ../../../../../../../../../data/samples/int8_api/airliner.ppm
+	[I]     Reference File: ../../../../../../../../../data/samples/int8_api/reference_labels.txt
+	[I]     Dynamic Range File: ../../../../../../../../../data/samples/int8_api/resnet50_per_tensor_dynamic_range.txt
+	[I] Building and running a INT8 GPU inference engine for ../../../../../../../../../data/samples/int8_api/resnet50.onnx
+	[I] [TRT] ----------------------------------------------------------------
+	[I] [TRT] Input filename:   ../../../../../../../../../data/samples/int8_api/resnet50.onnx
+	[I] [TRT] ONNX IR version:  0.0.3
+	[I] [TRT] Opset version:    9
+	[I] [TRT] Producer name:    onnx-caffe2
+	[I] [TRT] Producer version:
+	[I] [TRT] Domain:
+	[I] [TRT] Model version:    0
+	[I] [TRT] Doc string:
+	[I] [TRT] ----------------------------------------------------------------
+	[I] Setting Per Layer Computation Precision
+	[I] Setting Per Tensor Dynamic Range
+	[W] [TRT] Calibrator is not being used. Users must provide dynamic range for all tensors that are not Int32 or Bool.
+	[I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored.
+	[I] [TRT] Detected 1 inputs and 1 output network tensors.
+	[I] [TRT] Total Host Persistent Memory: 123728
+	[I] [TRT] Total Device Persistent Memory: 0
+	[I] [TRT] Total Scratch Memory: 0
+	[I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 116 MiB, GPU 4523 MiB
+	[I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 3.49361ms to assign 3 blocks to 74 nodes requiring 2408448 bytes.
+	[I] [TRT] Total Activation Memory: 2408448
+	[I] [TRT] Loaded engine size: 25 MiB
+	[I] SampleINT8API result: Detected:
+	[I] [1] space shuttle
+	[I] [2] airliner
+	[I] [3] warplane
+	[I] [4] projectile
+	[I] [5] wing
+	&&&& PASSED TensorRT.sample_int8_api # ./sample_int8_api
+	```
+
+	This output shows that the sample ran successfully; `PASSED`.
+
+
+### Sample `--help` options
+
+To see the full list of available options and their descriptions, use the `-h` or `--help` command line option.
+
+
+# Models other than ResNet-50 with custom configuration
+
+In order to use this sample with other model files with a custom configuration, perform the following steps:
+
+1.  Download the [Image Classification model files](https://github.com/onnx/models/tree/master/vision/classification) from GitHub.
+
+2.  Create an input image with a PPM extension. Resize it with the dimensions of 224x224x3.
+
+3.  Create a file called `reference_labels.txt`.
+
+	**Note:** Ensure each line corresponds to a single imagenet label. You can download the imagenet 1000 class human readable labels from [here](https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a). The reference label file contains only a single label name per line, for example, `0:'tench, Tinca tinca'` is represented as `tench`.
+
+4.  Create a file called `<network_name>_per_tensor_dynamic_ranges.txt`.
+
+	1.  Before you can create the dynamic range file, you need to generate the tensor names by providing the dynamic range for each network tensor.
+
+		This sample provides an option to write names of the network tensors to a file, for example `network_tensors.txt`. This file can then be used to generate the `<network_name>_per_tensor_dynamic_ranges.txt` file in step 4-2 below. To generate the list of network tensors file, perform the following steps:
+
+		i.  Write network tensors to a file:
+		```bash
+		./sample_int8_api [--model=model_file] [--write_tensors] [--network_tensors_file=network_tensors.txt] [-v or --verbose]
+		```
+
+		ii.  Run INT8 inference with user provided dynamic ranges:
+		```bash
+		./sample_int8_api [--model=model_file] [--ranges=per_tensor_dynamic_range_file] [--image=image_file] [--reference=reference_file] [--data=/path/to/data/dir] [--useDLACore=<int>] [-v or --verbose]
+		```
+
+		sampleINT8API needs following files to build the network and run inference:
+
+		`<network>.onnx`
+		The model file which contains the network and trained weights.
+
+		`Reference_labels.txt`
+		Labels reference file i.e. ground truth ImageNet 1000 class mappings.
+
+		`Per_tensor_dynamic_range.txt`
+		Custom per-tensor dynamic range file or you can simply override them by iterating through network layers.
+
+		`Image_to_infer.ppm`
+		PPM Image to run inference with.
+
+		**Note:** By default, the sample expects these files to be in either the `data/samples/int8_api/` or `data/int8_api/` directories. The list of default directories can be changed by adding one or more paths with `--data=/new/path` as a command line argument.
+
+	2.  To create the `<network_name>_per_tensor_dynamic_ranges.txt` file, ensure each line corresponds to the tensor name and floating point dynamic range, for example `<tensor_name> : <float dynamic range>`.
+
+		Tensor names generated in the `network_tensors.txt` file (step 4-1) can be used here to represent `<tensor_name>`. The dynamic range can either be obtained from training (by measuring the `min` and `max` value of activation tensors in each epoch) or from using custom post processing techniques (similar to TensorRT calibration). You can also choose to use a dummy per-tensor dynamic range to run the sample.
+
+		**Note:** INT8 inference accuracy may reduce when dummy/random dynamic ranges are provided.
+
+# Additional resources
+
+The following resources provide a deeper understanding how to perform inference in INT8:
+
+**INT8API:**
+- [Setting Per-Tensor Dynamic Range Using C++](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#set_tensor_mp_c)
+
+**Generate per-tensor dynamic range:**
+- [Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference](https://arxiv.org/pdf/1712.05877.pdf)
+- [Quantizing Deep Convolutional Networks for Efficient Inference: A Whitepaper](https://arxiv.org/pdf/1806.08342.pdf)
+- [8-bit Inference with TensorRT](http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf)
+
+**Models:**
+- [ONNX ResNet-50 model](https://github.com/onnx/models/tree/master/vision/classification/resnet/model)
+- [Image Classification Model Files](https://github.com/onnx/models/tree/master/vision/classification)
+
+**Blogs:**
+- [Why are Eight Bits Enough for Deep Neural Networks?](https://petewarden.com/2015/05/23/why-are-eight-bits-enough-for-deep-neural-networks/)
+- [What Iâ€™ve learned about Neural Network Quantization](https://petewarden.com/2017/06/22/what-ive-learned-about-neural-network-quantization/)
+
+**Videos:**
+- [Inference and Quantization](https://www.youtube.com/watch?v=VsGX9kFXjbs)
+- [8-bit Inference with TensorRT Webinar](http://on-demand.gputechconf.com/gtcdc/2017/video/DC7172/)
+
+**Documentation:**
+- [Introduction to NVIDIAâ€™s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples)
+- [Working with TensorRT Using the C++ API](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#c_topics)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+
+# Changelog
+
+March 2019
+This `README.md` file was recreated, updated and reviewed.
+
+
+# Known issues
+
+There are no known issues in this sample.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleINT8API/sampleINT8API.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleINT8API/sampleINT8API.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..352d66a0b427ea870c225f0313cc953ecbb6b18e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleINT8API/sampleINT8API.cpp
@@ -0,0 +1,923 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//! sampleINT8API.cpp
+//! This file contains implementation showcasing usage of INT8 calibration and precision APIs.
+//! It creates classification networks such as mobilenet, vgg19, resnet-50 from onnx model file.
+//! This sample showcae setting per-tensor dynamic range overriding calibrator generated scales if it exists.
+//! This sample showcase how to set computation precision of layer. It involves forcing output tensor type of the layer
+//! to particular precision. It can be run with the following command line: Command: ./sample_int8_api [-h or --help]
+//! [-m modelfile] [-s per_tensor_dynamic_range_file] [-i image_file] [-r reference_file] [-d path/to/data/dir]
+//! [--verbose] [-useDLA <id>]
+
+// Define TRT entrypoints used in common code
+#define DEFINE_TRT_ENTRYPOINTS 1
+#define DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT 0
+
+#include "argsParser.h"
+#include "buffers.h"
+#include "common.h"
+#include "logger.h"
+
+#include "NvInfer.h"
+#include "NvOnnxParser.h"
+
+#include <cstdlib>
+#include <cuda_runtime_api.h>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <unordered_map>
+#include <vector>
+using namespace nvinfer1;
+using samplesCommon::SampleUniquePtr;
+
+const std::string gSampleName = "TensorRT.sample_int8_api";
+
+struct SampleINT8APIPreprocessing
+{
+    // Preprocessing values are available here:
+    // https://github.com/onnx/models/tree/master/models/image_classification/resnet
+    std::vector<int> inputDims{1, 3, 224, 224};
+};
+
+//!
+//! \brief The SampleINT8APIParams structure groups the additional parameters required by
+//!         the INT8 API sample
+//!
+struct SampleINT8APIParams
+{
+    bool verbose{false};
+    bool writeNetworkTensors{false};
+    int dlaCore{-1};
+
+    SampleINT8APIPreprocessing mPreproc;
+    std::string modelFileName;
+    std::vector<std::string> dataDirs;
+    std::string dynamicRangeFileName;
+    std::string imageFileName;
+    std::string referenceFileName;
+    std::string networkTensorsFileName;
+    std::string timingCacheFile;
+};
+
+//!
+//! \brief The SampleINT8API class implements INT8 inference on classification networks.
+//!
+//! \details INT8 API usage for setting custom int8 range for each input layer. API showcase how
+//!           to perform INT8 inference without calibration table
+//!
+class SampleINT8API
+{
+private:
+    template <typename T>
+    using SampleUniquePtr = std::unique_ptr<T>;
+
+public:
+    SampleINT8API(const SampleINT8APIParams& params)
+        : mParams(params)
+    {
+    }
+
+    //!
+    //! \brief Builds the network engine
+    //!
+    sample::Logger::TestResult build();
+
+    //!
+    //! \brief Runs the TensorRT inference engine for this sample
+    //!
+    sample::Logger::TestResult infer();
+
+    //!
+    //! \brief Used to clean up any state created in the sample class
+    //!
+    sample::Logger::TestResult teardown();
+
+    SampleINT8APIParams mParams; //!< Stores Sample Parameter
+
+private:
+    SampleUniquePtr<IRuntime> mRuntime{}; //!< The TensorRT Runtime used to deserialize the engine.
+
+    std::shared_ptr<nvinfer1::ICudaEngine> mEngine{nullptr}; //!< The TensorRT engine used to run the network
+
+    std::map<std::string, std::string> mInOut; //!< Input and output mapping of the network
+
+    nvinfer1::Dims mInputDims; //!< The dimensions of the input to the network
+
+    nvinfer1::Dims mOutputDims; //!< The dimensions of the output to the network
+
+    std::unordered_map<std::string, float>
+        mPerTensorDynamicRangeMap; //!< Mapping from tensor name to max absolute dynamic range values
+
+    void getInputOutputNames(); //!< Populates input and output mapping of the network
+
+    //!
+    //! \brief Reads the ppm input image, preprocesses, and stores the result in a managed buffer
+    //!
+    bool prepareInput(const samplesCommon::BufferManager& buffers);
+
+    //!
+    //! \brief Verifies that the output is correct and prints it
+    //!
+    bool verifyOutput(const samplesCommon::BufferManager& buffers) const;
+
+    //!
+    //! \brief Populate per-tensor dynamic range values
+    //!
+    bool readPerTensorDynamicRangeValues();
+
+    //!
+    //! \brief  Sets custom dynamic range for network tensors
+    //!
+    bool setDynamicRange(SampleUniquePtr<nvinfer1::INetworkDefinition>& network);
+
+    //!
+    //! \brief  Sets computation precision for network layers
+    //!
+    void setLayerPrecision(SampleUniquePtr<nvinfer1::INetworkDefinition>& network);
+
+    //!
+    //! \brief  Write network tensor names to a file.
+    //!
+    void writeNetworkTensorNames(const SampleUniquePtr<nvinfer1::INetworkDefinition>& network);
+};
+
+//!
+//! \brief  Populates input and output mapping of the network
+//!
+void SampleINT8API::getInputOutputNames()
+{
+    int32_t nbindings = mEngine->getNbIOTensors();
+    ASSERT(nbindings == 2);
+    for (int32_t b = 0; b < nbindings; ++b)
+    {
+        auto const bindingName = mEngine->getIOTensorName(b);
+        nvinfer1::Dims dims = mEngine->getTensorShape(bindingName);
+        if (mEngine->getTensorIOMode(bindingName) == TensorIOMode::kINPUT)
+        {
+            if (mParams.verbose)
+            {
+                sample::gLogInfo << "Found input: " << bindingName << " shape=" << dims
+                                 << " dtype=" << static_cast<int32_t>(mEngine->getTensorDataType(bindingName))
+                                 << std::endl;
+            }
+            mInOut["input"] = bindingName;
+        }
+        else
+        {
+            if (mParams.verbose)
+            {
+                sample::gLogInfo << "Found output: " << bindingName << " shape=" << dims
+                                 << " dtype=" << static_cast<int32_t>(mEngine->getTensorDataType(bindingName))
+                                 << std::endl;
+            }
+            mInOut["output"] = bindingName;
+        }
+    }
+}
+
+//!
+//! \brief Populate per-tensor dyanamic range values
+//!
+bool SampleINT8API::readPerTensorDynamicRangeValues()
+{
+    std::ifstream iDynamicRangeStream(mParams.dynamicRangeFileName);
+    if (!iDynamicRangeStream)
+    {
+        sample::gLogError << "Could not find per-tensor scales file: " << mParams.dynamicRangeFileName << std::endl;
+        return false;
+    }
+
+    std::string line;
+    char delim = ':';
+    while (std::getline(iDynamicRangeStream, line))
+    {
+        std::istringstream iline(line);
+        std::string token;
+        std::getline(iline, token, delim);
+        std::string tensorName = token;
+        std::getline(iline, token, delim);
+        float dynamicRange = std::stof(token);
+        mPerTensorDynamicRangeMap[tensorName] = dynamicRange;
+    }
+    return true;
+}
+
+//!
+//! \brief  Sets computation precision for network layers
+//!
+void SampleINT8API::setLayerPrecision(SampleUniquePtr<nvinfer1::INetworkDefinition>& network)
+{
+    sample::gLogInfo << "Setting Per Layer Computation Precision" << std::endl;
+    for (int i = 0; i < network->getNbLayers(); ++i)
+    {
+        auto layer = network->getLayer(i);
+        if (mParams.verbose)
+        {
+            std::string layerName = layer->getName();
+            sample::gLogInfo << "Layer: " << layerName << ". Precision: INT8" << std::endl;
+        }
+
+        // Don't set the precision on non-computation layers as they don't support
+        // int8.
+        if (layer->getType() != LayerType::kCONSTANT && layer->getType() != LayerType::kCONCATENATION
+            && layer->getType() != LayerType::kSHAPE)
+        {
+            // set computation precision of the layer
+            layer->setPrecision(nvinfer1::DataType::kINT8);
+        }
+
+        for (int j = 0; j < layer->getNbOutputs(); ++j)
+        {
+            std::string tensorName = layer->getOutput(j)->getName();
+            if (mParams.verbose)
+            {
+                std::string tensorName = layer->getOutput(j)->getName();
+                sample::gLogInfo << "Tensor: " << tensorName << ". OutputType: INT8" << std::endl;
+            }
+            // set output type of execution tensors and not shape tensors.
+            if (layer->getOutput(j)->isExecutionTensor())
+            {
+                layer->setOutputType(j, nvinfer1::DataType::kINT8);
+            }
+        }
+    }
+}
+
+//!
+//! \brief  Write network tensor names to a file.
+//!
+void SampleINT8API::writeNetworkTensorNames(const SampleUniquePtr<nvinfer1::INetworkDefinition>& network)
+{
+    sample::gLogInfo << "Sample requires to run with per-tensor dynamic range." << std::endl;
+    sample::gLogInfo
+        << "In order to run Int8 inference without calibration, user will need to provide dynamic range for all "
+           "the network tensors."
+        << std::endl;
+
+    std::ofstream tensorsFile{mParams.networkTensorsFileName};
+
+    // Iterate through network inputs to write names of input tensors.
+    for (int i = 0; i < network->getNbInputs(); ++i)
+    {
+        std::string tName = network->getInput(i)->getName();
+        tensorsFile << "TensorName: " << tName << std::endl;
+        if (mParams.verbose)
+        {
+            sample::gLogInfo << "TensorName: " << tName << std::endl;
+        }
+    }
+
+    // Iterate through network layers.
+    for (int i = 0; i < network->getNbLayers(); ++i)
+    {
+        // Write output tensors of a layer to the file.
+        for (int j = 0; j < network->getLayer(i)->getNbOutputs(); ++j)
+        {
+            std::string tName = network->getLayer(i)->getOutput(j)->getName();
+            tensorsFile << "TensorName: " << tName << std::endl;
+            if (mParams.verbose)
+            {
+                sample::gLogInfo << "TensorName: " << tName << std::endl;
+            }
+        }
+    }
+    tensorsFile.close();
+    sample::gLogInfo << "Successfully generated network tensor names. Writing: " << mParams.networkTensorsFileName
+                     << std::endl;
+    sample::gLogInfo
+        << "Use the generated tensor names file to create dynamic range file for Int8 inference. Follow README.md "
+           "for instructions to generate dynamic_ranges.txt file."
+        << std::endl;
+}
+
+//!
+//! \brief  Sets custom dynamic range for network tensors
+//!
+bool SampleINT8API::setDynamicRange(SampleUniquePtr<nvinfer1::INetworkDefinition>& network)
+{
+    // populate per-tensor dynamic range
+    if (!readPerTensorDynamicRangeValues())
+    {
+        return false;
+    }
+
+    sample::gLogInfo << "Setting Per Tensor Dynamic Range" << std::endl;
+    if (mParams.verbose)
+    {
+        sample::gLogInfo
+            << "If dynamic range for a tensor is missing, TensorRT will run inference assuming dynamic range for "
+               "the tensor as optional."
+            << std::endl;
+        sample::gLogInfo
+            << "If dynamic range for a tensor is required then inference will fail. Follow README.md to generate "
+               "missing per-tensor dynamic range."
+            << std::endl;
+    }
+    // set dynamic range for network input tensors
+    for (int i = 0; i < network->getNbInputs(); ++i)
+    {
+        std::string tName = network->getInput(i)->getName();
+        if (mPerTensorDynamicRangeMap.find(tName) != mPerTensorDynamicRangeMap.end())
+        {
+            if (!network->getInput(i)->setDynamicRange(
+                    -mPerTensorDynamicRangeMap.at(tName), mPerTensorDynamicRangeMap.at(tName)))
+            {
+                return false;
+            }
+        }
+        else
+        {
+            if (mParams.verbose)
+            {
+                sample::gLogWarning << "Missing dynamic range for tensor: " << tName << std::endl;
+            }
+        }
+    }
+
+    // set dynamic range for layer output tensors
+    for (int i = 0; i < network->getNbLayers(); ++i)
+    {
+        auto lyr = network->getLayer(i);
+        for (int j = 0, e = lyr->getNbOutputs(); j < e; ++j)
+        {
+            std::string tName = lyr->getOutput(j)->getName();
+            if (mPerTensorDynamicRangeMap.find(tName) != mPerTensorDynamicRangeMap.end())
+            {
+                // Calibrator generated dynamic range for network tensor can be overriden or set using below API
+                if (!lyr->getOutput(j)->setDynamicRange(
+                        -mPerTensorDynamicRangeMap.at(tName), mPerTensorDynamicRangeMap.at(tName)))
+                {
+                    return false;
+                }
+            }
+            else if (lyr->getType() == LayerType::kCONSTANT)
+            {
+                IConstantLayer* cLyr = static_cast<IConstantLayer*>(lyr);
+                if (mParams.verbose)
+                {
+                    sample::gLogWarning << "Computing missing dynamic range for tensor, " << tName << ", from weights."
+                                        << std::endl;
+                }
+                auto wts = cLyr->getWeights();
+                double max = std::numeric_limits<double>::min();
+                for (int64_t wb = 0, we = wts.count; wb < we; ++wb)
+                {
+                    double val{};
+                    switch (wts.type)
+                    {
+                    case DataType::kFLOAT: val = static_cast<const float*>(wts.values)[wb]; break;
+                    case DataType::kBOOL: val = static_cast<const bool*>(wts.values)[wb]; break;
+                    case DataType::kINT8: val = static_cast<const int8_t*>(wts.values)[wb]; break;
+                    case DataType::kHALF: val = static_cast<const half_float::half*>(wts.values)[wb]; break;
+                    case DataType::kINT32: val = static_cast<const int32_t*>(wts.values)[wb]; break;
+                    case DataType::kUINT8: val = static_cast<uint8_t const*>(wts.values)[wb]; break;
+                    case DataType::kFP8:
+                    case DataType::kBF16:
+                    case DataType::kINT4:
+                    case DataType::kINT64:
+                    case DataType::kFP4:
+                    case DataType::kE8M0: ASSERT(false && "Unsupported data type");
+                    }
+                    max = std::max(max, std::abs(val));
+                }
+
+                if (!lyr->getOutput(j)->setDynamicRange(-max, max))
+                {
+                    return false;
+                }
+            }
+            else
+            {
+                if (mParams.verbose)
+                {
+                    sample::gLogWarning << "Missing dynamic range for tensor: " << tName << std::endl;
+                }
+            }
+        }
+    }
+
+    if (mParams.verbose)
+    {
+        sample::gLogInfo << "Per Tensor Dynamic Range Values for the Network:" << std::endl;
+        for (auto iter = mPerTensorDynamicRangeMap.begin(); iter != mPerTensorDynamicRangeMap.end(); ++iter)
+            sample::gLogInfo << "Tensor: " << iter->first << ". Max Absolute Dynamic Range: " << iter->second
+                             << std::endl;
+    }
+    return true;
+}
+
+//!
+//! \brief Preprocess inputs and allocate host/device input buffers
+//!
+bool SampleINT8API::prepareInput(const samplesCommon::BufferManager& buffers)
+{
+    if (samplesCommon::toLower(samplesCommon::getFileType(mParams.imageFileName)).compare("ppm") != 0)
+    {
+        sample::gLogError << "Wrong format: " << mParams.imageFileName << " is not a ppm file." << std::endl;
+        return false;
+    }
+
+    int channels = mParams.mPreproc.inputDims.at(1);
+    int height = mParams.mPreproc.inputDims.at(2);
+    int width = mParams.mPreproc.inputDims.at(3);
+    int max{0};
+    std::string magic;
+
+    std::vector<uint8_t> fileData(channels * height * width);
+
+    std::ifstream infile(mParams.imageFileName, std::ifstream::binary);
+    ASSERT(infile.is_open() && "Attempting to read from a file that is not open.");
+    infile >> magic >> width >> height >> max;
+    infile.seekg(1, infile.cur);
+    infile.read(reinterpret_cast<char*>(fileData.data()), width * height * channels);
+
+    float* hostInputBuffer = static_cast<float*>(buffers.getHostBuffer(mInOut["input"]));
+
+    // Convert HWC to CHW and Normalize
+    for (int c = 0; c < channels; ++c)
+    {
+        for (int h = 0; h < height; ++h)
+        {
+            for (int w = 0; w < width; ++w)
+            {
+                int dstIdx = c * height * width + h * width + w;
+                int srcIdx = h * width * channels + w * channels + c;
+                hostInputBuffer[dstIdx] = (2.0F / 255.0F) * static_cast<float>(fileData[srcIdx]) - 1.0F;
+            }
+        }
+    }
+    return true;
+}
+
+//!
+//! \brief Verifies that the output is correct and prints it
+//!
+bool SampleINT8API::verifyOutput(const samplesCommon::BufferManager& buffers) const
+{
+    // copy output host buffer data for further processing
+    const float* probPtr = static_cast<const float*>(buffers.getHostBuffer(mInOut.at("output")));
+    std::vector<float> output(probPtr, probPtr + mOutputDims.d[1]);
+
+    auto inds = samplesCommon::argMagnitudeSort(output.cbegin(), output.cend());
+
+    // read reference lables to generate prediction lables
+    std::vector<std::string> referenceVector;
+    if (!samplesCommon::readReferenceFile(mParams.referenceFileName, referenceVector))
+    {
+        sample::gLogError << "Unable to read reference file: " << mParams.referenceFileName << std::endl;
+        return false;
+    }
+
+    std::vector<std::string> top5Result = samplesCommon::classify(referenceVector, output, 5);
+
+    sample::gLogInfo << "SampleINT8API result: Detected:" << std::endl;
+    for (int i = 1; i <= 5; ++i)
+    {
+        sample::gLogInfo << "[" << i << "]  " << top5Result[i - 1] << std::endl;
+    }
+
+    return true;
+}
+
+//!
+//! \brief Creates the network, configures the builder and creates the network engine
+//!
+//! \details This function creates INT8 classification network by parsing the onnx model and builds
+//!          the engine that will be used to run INT8 inference (mEngine)
+//!
+//! \return true if the engine was created successfully and false otherwise
+//!
+sample::Logger::TestResult SampleINT8API::build()
+{
+    auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
+    if (!builder)
+    {
+        sample::gLogError << "Unable to create builder object." << std::endl;
+        return sample::Logger::TestResult::kFAILED;
+    }
+
+    auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(0));
+    if (!network)
+    {
+        sample::gLogError << "Unable to create network object." << mParams.referenceFileName << std::endl;
+        return sample::Logger::TestResult::kFAILED;
+    }
+
+    auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
+    if (!config)
+    {
+        sample::gLogError << "Unable to create config object." << mParams.referenceFileName << std::endl;
+        return sample::Logger::TestResult::kFAILED;
+    }
+
+    auto parser
+        = SampleUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger()));
+    if (!parser)
+    {
+        sample::gLogError << "Unable to create parser object." << mParams.referenceFileName << std::endl;
+        return sample::Logger::TestResult::kFAILED;
+    }
+
+    // Parse ONNX model file to populate TensorRT INetwork
+    int verbosity = (int) nvinfer1::ILogger::Severity::kERROR;
+    if (!parser->parseFromFile(mParams.modelFileName.c_str(), verbosity))
+    {
+        sample::gLogError << "Unable to parse ONNX model file: " << mParams.modelFileName << std::endl;
+        return sample::Logger::TestResult::kFAILED;
+    }
+
+    if (mParams.writeNetworkTensors)
+    {
+        writeNetworkTensorNames(network);
+        return sample::Logger::TestResult::kWAIVED;
+    }
+
+    // Configure buider
+    config->setFlag(BuilderFlag::kGPU_FALLBACK);
+
+    // Enable INT8 model. Required to set custom per-tensor dynamic range or INT8 Calibration
+    config->setFlag(BuilderFlag::kINT8);
+    // Mark calibrator as null. As user provides dynamic range for each tensor, no calibrator is required
+    config->setInt8Calibrator(nullptr);
+
+    // force layer to execute with required precision
+    setLayerPrecision(network);
+
+    // set INT8 Per Tensor Dynamic range
+    if (!setDynamicRange(network))
+    {
+        sample::gLogError << "Unable to set per-tensor dynamic range." << std::endl;
+        return sample::Logger::TestResult::kFAILED;
+    }
+
+    // CUDA stream used for profiling by the builder.
+    auto profileStream = samplesCommon::makeCudaStream();
+    if (!profileStream)
+    {
+        return sample::Logger::TestResult::kFAILED;
+    }
+    config->setProfileStream(*profileStream);
+
+    SampleUniquePtr<nvinfer1::ITimingCache> timingCache;
+    if (!mParams.timingCacheFile.empty())
+    {
+        timingCache
+            = samplesCommon::buildTimingCacheFromFile(sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile);
+    }
+
+    SampleUniquePtr<IHostMemory> plan{builder->buildSerializedNetwork(*network, *config)};
+    if (!plan)
+    {
+        sample::gLogError << "Unable to build serialized plan." << std::endl;
+        return sample::Logger::TestResult::kFAILED;
+    }
+
+    if (timingCache != nullptr && !mParams.timingCacheFile.empty())
+    {
+        samplesCommon::updateTimingCacheFile(
+            sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder);
+    }
+
+    if (!mRuntime)
+    {
+        mRuntime = SampleUniquePtr<IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger()));
+    }
+
+    if (!mRuntime)
+    {
+        sample::gLogError << "Unable to create runtime." << std::endl;
+        return sample::Logger::TestResult::kFAILED;
+    }
+
+    // build TRT engine
+    mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
+        mRuntime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter());
+    if (!mEngine)
+    {
+        sample::gLogError << "Unable to build cuda engine." << std::endl;
+        return sample::Logger::TestResult::kFAILED;
+    }
+
+    // populates input output map structure
+    getInputOutputNames();
+
+    mInputDims = mEngine->getTensorShape(mInOut["input"].c_str());
+    mOutputDims = mEngine->getTensorShape(mInOut["output"].c_str());
+
+    return sample::Logger::TestResult::kRUNNING;
+}
+
+//!
+//! \brief Runs the TensorRT inference engine for this sample
+//!
+//! \details This function is the main execution function of the sample. It allocates
+//!          the buffer, sets inputs, executes the engine, and verifies the output
+//!
+sample::Logger::TestResult SampleINT8API::infer()
+{
+    // Create RAII buffer manager object
+    samplesCommon::BufferManager buffers(mEngine);
+
+    auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
+    if (!context)
+    {
+        return sample::Logger::TestResult::kFAILED;
+    }
+
+    for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
+    {
+        auto const name = mEngine->getIOTensorName(i);
+        context->setTensorAddress(name, buffers.getDeviceBuffer(name));
+    }
+
+    // Read the input data into the managed buffers
+    // There should be just 1 input tensor
+
+    if (!prepareInput(buffers))
+    {
+        return sample::Logger::TestResult::kFAILED;
+    }
+
+    // Create CUDA stream for the execution of this inference
+    cudaStream_t stream;
+    CHECK(cudaStreamCreate(&stream));
+
+    // Asynchronously copy data from host input buffers to device input buffers
+    buffers.copyInputToDeviceAsync(stream);
+
+    // Asynchronously enqueue the inference work
+    if (!context->enqueueV3(stream))
+    {
+        return sample::Logger::TestResult::kFAILED;
+    }
+
+    // Asynchronously copy data from device output buffers to host output buffers
+    buffers.copyOutputToHostAsync(stream);
+
+    // Wait for the work in the stream to complete
+    CHECK(cudaStreamSynchronize(stream));
+
+    // Release stream
+    CHECK(cudaStreamDestroy(stream));
+
+    // Check and print the output of the inference
+    return verifyOutput(buffers) ? sample::Logger::TestResult::kRUNNING : sample::Logger::TestResult::kFAILED;
+}
+
+//!
+//! \brief Used to clean up any state created in the sample class
+//!
+sample::Logger::TestResult SampleINT8API::teardown()
+{
+    return sample::Logger::TestResult::kRUNNING;
+}
+
+//!
+//! \brief The SampleINT8APIArgs structures groups the additional arguments required by
+//!         the INT8 API sample
+//!
+struct SampleINT8APIArgs : public samplesCommon::Args
+{
+    bool verbose{false};
+    bool writeNetworkTensors{false};
+    std::string modelFileName{"resnet50.onnx"};
+    std::string imageFileName{"airliner.ppm"};
+    std::string referenceFileName{"reference_labels.txt"};
+    std::string dynamicRangeFileName{"resnet50_per_tensor_dynamic_range.txt"};
+    std::string networkTensorsFileName{"network_tensors.txt"};
+};
+
+//! \brief This function parses arguments specific to SampleINT8API
+//!
+bool parseSampleINT8APIArgs(SampleINT8APIArgs& args, int argc, char* argv[])
+{
+    for (int i = 1; i < argc; ++i)
+    {
+        if (!strncmp(argv[i], "--model=", 8))
+        {
+            args.modelFileName = (argv[i] + 8);
+        }
+        else if (!strncmp(argv[i], "--image=", 8))
+        {
+            args.imageFileName = (argv[i] + 8);
+        }
+        else if (!strncmp(argv[i], "--reference=", 12))
+        {
+            args.referenceFileName = (argv[i] + 12);
+        }
+        else if (!strncmp(argv[i], "--write_tensors", 15))
+        {
+            args.writeNetworkTensors = true;
+        }
+        else if (!strncmp(argv[i], "--network_tensors_file=", 23))
+        {
+            args.networkTensorsFileName = (argv[i] + 23);
+        }
+        else if (!strncmp(argv[i], "--ranges=", 9))
+        {
+            args.dynamicRangeFileName = (argv[i] + 9);
+        }
+        else if (!strncmp(argv[i], "--int8", 6))
+        {
+            args.runInInt8 = true;
+        }
+        else if (!strncmp(argv[i], "--fp16", 6))
+        {
+            args.runInFp16 = true;
+        }
+        else if (!strncmp(argv[i], "--useDLACore=", 13))
+        {
+            args.useDLACore = std::stoi(argv[i] + 13);
+        }
+        else if (!strncmp(argv[i], "--data=", 7))
+        {
+            std::string dirPath = (argv[i] + 7);
+            if (dirPath.back() != '/')
+            {
+                dirPath.push_back('/');
+            }
+            args.dataDirs.push_back(dirPath);
+        }
+        else if (!strncmp(argv[i], "--timingCacheFile=", 18))
+        {
+            args.timingCacheFile = (argv[i] + 18);
+        }
+        else if (!strncmp(argv[i], "--verbose", 9) || !strncmp(argv[i], "-v", 2))
+        {
+            args.verbose = true;
+        }
+        else if (!strncmp(argv[i], "--help", 6) || !strncmp(argv[i], "-h", 2))
+        {
+            args.help = true;
+        }
+        else
+        {
+            sample::gLogError << "Invalid Argument: " << argv[i] << std::endl;
+            return false;
+        }
+    }
+    return true;
+}
+
+void validateInputParams(SampleINT8APIParams& params)
+{
+    sample::gLogInfo << "Please follow README.md to generate missing input files." << std::endl;
+    sample::gLogInfo << "Validating input parameters. Using following input files for inference." << std::endl;
+    params.modelFileName = samplesCommon::locateFile(params.modelFileName, params.dataDirs);
+    sample::gLogInfo << "    Model File: " << params.modelFileName << std::endl;
+    if (params.writeNetworkTensors)
+    {
+        sample::gLogInfo << "    Writing Network Tensors File to: " << params.networkTensorsFileName << std::endl;
+        return;
+    }
+    params.imageFileName = samplesCommon::locateFile(params.imageFileName, params.dataDirs);
+    sample::gLogInfo << "    Image File: " << params.imageFileName << std::endl;
+    params.referenceFileName = samplesCommon::locateFile(params.referenceFileName, params.dataDirs);
+    sample::gLogInfo << "    Reference File: " << params.referenceFileName << std::endl;
+    params.dynamicRangeFileName = samplesCommon::locateFile(params.dynamicRangeFileName, params.dataDirs);
+    sample::gLogInfo << "    Dynamic Range File: " << params.dynamicRangeFileName << std::endl;
+    return;
+}
+
+//!
+//! \brief This function initializes members of the params struct using the command line args
+//!
+SampleINT8APIParams initializeSampleParams(SampleINT8APIArgs args)
+{
+    SampleINT8APIParams params;
+    if (args.dataDirs.empty()) // Use default directories if user hasn't provided directory paths
+    {
+        params.dataDirs.push_back("data/samples/int8_api/");
+        params.dataDirs.push_back("data/int8_api/");
+    }
+    else // Use the data directory provided by the user
+    {
+        params.dataDirs = args.dataDirs;
+    }
+
+    params.dataDirs.push_back(""); // In case of absolute path search
+    params.verbose = args.verbose;
+    params.modelFileName = args.modelFileName;
+    params.imageFileName = args.imageFileName;
+    params.referenceFileName = args.referenceFileName;
+    params.dynamicRangeFileName = args.dynamicRangeFileName;
+    params.dlaCore = args.useDLACore;
+    params.writeNetworkTensors = args.writeNetworkTensors;
+    params.networkTensorsFileName = args.networkTensorsFileName;
+    params.timingCacheFile = args.timingCacheFile;
+    validateInputParams(params);
+    return params;
+}
+
+//!
+//! \brief This function prints the help information for running this sample
+//!
+void printHelpInfo()
+{
+    std::cout << "Usage: ./sample_int8_api [-h or --help] [--model=model_file] "
+                 "[--ranges=per_tensor_dynamic_range_file] [--image=image_file] [--reference=reference_file] "
+                 "[--data=/path/to/data/dir] [--useDLACore=<int>] [-v or --verbose] "
+                 "[--timingCacheFile=timing_cache_file]\n";
+    std::cout << "-h or --help. Display This help information" << std::endl;
+    std::cout << "--model=model_file.onnx or /absolute/path/to/model_file.onnx. Generate model file using README.md in "
+                 "case it does not exists. Default to resnet50.onnx"
+              << std::endl;
+    std::cout << "--image=image.ppm or /absolute/path/to/image.ppm. Image to infer. Defaults to airlines.ppm"
+              << std::endl;
+    std::cout << "--reference=reference.txt or /absolute/path/to/reference.txt. Reference labels file. Defaults to "
+                 "reference_labels.txt"
+              << std::endl;
+    std::cout << "--ranges=ranges.txt or /absolute/path/to/ranges.txt. Specify custom per-tensor dynamic range for the "
+                 "network. Defaults to resnet50_per_tensor_dynamic_range.txt"
+              << std::endl;
+    std::cout << "--write_tensors. Option to generate file containing network tensors name. By default writes to "
+                 "network_tensors.txt file. To provide user defined file name use additional option "
+                 "--network_tensors_file. See --network_tensors_file option usage for more detail."
+              << std::endl;
+    std::cout << "--network_tensors_file=network_tensors.txt or /absolute/path/to/network_tensors.txt. This option "
+                 "needs to be used with --write_tensors option. Specify file name (will write to current execution "
+                 "directory) or absolute path to file name to write network tensor names file. Dynamic range "
+                 "corresponding to each network tensor is required to run the sample. Defaults to network_tensors.txt"
+              << std::endl;
+    std::cout << "--data=/path/to/data/dir. Specify data directory to search for above files in case absolute paths to "
+                 "files are not provided. Defaults to data/samples/int8_api/ or data/int8_api/"
+              << std::endl;
+    std::cout << "--useDLACore=N. Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, "
+                 "where n is the number of DLA engines on the platform."
+              << std::endl;
+    std::cout << "--timingCacheFile=functional.cache or /absolute/path/to/functional.cache. Specify path for timing "
+                 "cache file. If it does not already exist, it will be created. Defaults to not using a timing cache."
+              << std::endl;
+    std::cout << "--verbose. Outputs per-tensor dynamic range and layer precision info for the network" << std::endl;
+}
+
+int main(int argc, char** argv)
+{
+    SampleINT8APIArgs args;
+    bool argsOK = parseSampleINT8APIArgs(args, argc, argv);
+
+    if (!argsOK)
+    {
+        sample::gLogError << "Invalid arguments" << std::endl;
+        printHelpInfo();
+        return EXIT_FAILURE;
+    }
+
+    if (args.help)
+    {
+        printHelpInfo();
+        return EXIT_SUCCESS;
+    }
+    if (args.verbose)
+    {
+        sample::gLogger.setReportableSeverity(nvinfer1::ILogger::Severity::kVERBOSE);
+    }
+
+    auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv);
+
+    sample::gLogger.reportTestStart(sampleTest);
+
+    SampleINT8APIParams params;
+    params = initializeSampleParams(args);
+
+    SampleINT8API sample(params);
+    sample::gLogInfo << "Building and running a INT8 GPU inference engine for " << params.modelFileName << std::endl;
+
+    auto buildStatus = sample.build();
+    if (buildStatus == sample::Logger::TestResult::kWAIVED)
+    {
+        return sample::gLogger.reportWaive(sampleTest);
+    }
+    else if (buildStatus == sample::Logger::TestResult::kFAILED)
+    {
+        return sample::gLogger.reportFail(sampleTest);
+    }
+
+    if (sample.infer() != sample::Logger::TestResult::kRUNNING)
+    {
+        return sample::gLogger.reportFail(sampleTest);
+    }
+
+    if (sample.teardown() != sample::Logger::TestResult::kRUNNING)
+    {
+        return sample::gLogger.reportFail(sampleTest);
+    }
+
+    return sample::gLogger.reportPass(sampleTest);
+}
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleIOFormats/Makefile b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleIOFormats/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..9eb13a4d677cfd5dadaf4f37989e8f81aa8cdd69
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleIOFormats/Makefile
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+OUTNAME_RELEASE = sample_io_formats
+OUTNAME_DEBUG   = sample_io_formats_debug
+EXTRA_DIRECTORIES = ../common ../utils
+SAMPLE_DIR_NAME = $(shell basename $(dir $(abspath $(firstword $(MAKEFILE_LIST)))))
+MAKEFILE ?= ../Makefile.config
+include $(MAKEFILE)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleIOFormats/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleIOFormats/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1d5fd80ed29f3099dc5cd89291b1dd74a6623ed0
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleIOFormats/README.md
@@ -0,0 +1,111 @@
+# Specifying I/O Formats
+
+
+**Table Of Contents**
+- [Description](#description)
+- [How does this sample work?](#how-does-this-sample-work)
+- [Running the sample](#running-the-sample)
+	* [Sample `--help` options](#sample-help-options)
+- [Preparing sample data](#preparing-sample-data)
+- [Additional resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+## Description
+
+This sample, sampleIOFormats, uses a Onnx model that was trained on the [MNIST dataset](https://github.com/NVIDIA/DIGITS/blob/master/docs/GettingStarted.md) and performs engine building and inference using TensorRT. The correctness of outputs is then compared to the golden reference. Specifically, it shows how to use APIs to explicitly specify input formats to `TensorFormat::kLINEAR` for Float32, and additionally `TensorFormat::kCHW2` and `TensorFormat::kHWC8` for Float16 and INT8 precision.
+
+## How does this sample work?
+
+`ITensor::setAllowedFormats` is invoked to specify which format is expected to be supported.
+
+	```
+	bool SampleIOFormats::build(int dataWidth)
+	{
+		...
+
+		network->getInput(0)->setAllowedFormats(static_cast<TensorFormats>(1 << static_cast<int>(mTensorFormat)));
+		...
+	}
+	```
+
+## Preparing sample data
+
+1. Download the sample data from [TensorRT release tarball](https://developer.nvidia.com/nvidia-tensorrt-download#), if not already mounted under `/usr/src/tensorrt/data` (NVIDIA NGC containers) and set it to `$TRT_DATADIR`.
+    ```bash
+    export TRT_DATADIR=/usr/src/tensorrt/data
+    pushd $TRT_DATADIR/mnist
+    pip3 install Pillow
+    popd
+    ```
+
+## Running the sample
+
+1. Compile the sample by following build instructions in [TensorRT README](https://github.com/NVIDIA/TensorRT/).
+
+2.  Run inference on the digit looping from 0 to 9:
+    ```bash
+    ./sample_io_formats --datadir=<path/to/data> --useDLACore=N
+    ```
+
+    For example:
+    ```bash
+    ./sample_io_formats --datadir $TRT_DATADIR/mnist
+    ```
+
+3.  Verify that all 10 digits match correctly. If the sample runs successfully, you should see output similar to the following:
+	```
+	&&&& RUNNING TensorRT.sample_io_formats # ./sample_io_formats
+	[I] The test chooses MNIST as the network and recognizes a randomly generated digit
+	[I] Firstly it runs the FP32 as the golden data, then INT8/FP16 with different formats will be tested
+	[I]
+	[I] Building and running a FP32 GPU inference to get golden input/output
+	[I] [TRT] Detected 1 input and 1 output network tensors.
+	[I] Input:
+	... (omitted message)
+	&&&& PASSED TensorRT.sample_io_formats
+	```
+	This output shows that the sample ran successfully; `PASSED`.
+
+
+### Sample `--help` options
+
+To see the full list of available options and their descriptions, use the `-h` or `--help` command line option.
+
+
+## Additional resources
+
+The following resources provide a deeper understanding about this sample:
+
+**Models**
+- [MNIST](https://keras.io/datasets/#mnist-database-of-handwritten-digits)
+
+**Documentation**
+- [Introduction To NVIDIAâ€™s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples)
+- [Working With TensorRT Using The C++ API](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#c_topics)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+## License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+
+## Changelog
+
+**August 2022**
+- Migrated code from parsing a `caffe` model to an `onnx` model.
+
+**Oct 2021**
+- Change names and topic from "reformat-free" to "I/O formats", because `BuilderFlag::kSTRICT_TYPES`
+is deprecated. "Reformat-free I/O" (see `BuilderFlag::kDIRECT_IO`) is generally counterproductive
+and fragile, since it constrains the optimizer from choosing the fastest implementation,
+and depends upon what kernels are available on a particular target.
+
+**June 2019**
+- This is the first release of the `README.md` file and sample.
+
+
+## Known issues
+
+There are no known issues in this sample.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleIOFormats/sampleIOFormats.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleIOFormats/sampleIOFormats.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..88b9f6876b9fd5761825956aea080e00c553d030
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleIOFormats/sampleIOFormats.cpp
@@ -0,0 +1,679 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//! \file SampleIOFormats.cpp
+//! \brief This file contains the implementation of the I/O formats sample.
+//!
+//! It builds a TensorRT engine by from an MNIST network.
+//! It uses the engine to identify input images.
+//! The goal of this sample is to show how to specify allowed I/O formats.
+//! It can be run with the following command line:
+//! Command: ./sample_io_formats
+
+// Define TRT entrypoints used in common code
+#define DEFINE_TRT_ENTRYPOINTS 1
+
+#include "argsParser.h"
+#include "buffers.h"
+#include "common.h"
+#include "half.h"
+#include "logger.h"
+#include "parserOnnxConfig.h"
+#include "sampleOptions.h"
+
+#include "NvInfer.h"
+#include "NvOnnxParser.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cuda_runtime_api.h>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+#include <array>
+#include <cstdlib>
+#include <random>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace nvinfer1;
+using samplesCommon::SampleUniquePtr;
+
+std::string const gSampleName = "TensorRT.sample_io_formats";
+
+inline int32_t divUp(int32_t a, int32_t b)
+{
+    return (a + b - 1) / b;
+}
+
+template <typename T>
+std::shared_ptr<T> mallocCudaMem(size_t nbElems)
+{
+    T* ptr = nullptr;
+    CHECK(cudaMalloc((void**) &ptr, sizeof(T) * nbElems));
+    return std::shared_ptr<T>(ptr, [](T* p) { CHECK(cudaFree(p)); });
+}
+
+class BufferDesc
+{
+public:
+    BufferDesc() = default;
+
+    BufferDesc(nvinfer1::Dims dims, int32_t dataWidth, TensorFormat format)
+    {
+        this->dataWidth = dataWidth;
+        if (format == TensorFormat::kLINEAR)
+        {
+            this->dims[0] = dims.d[0];
+            this->dims[1] = dims.d[1];
+            this->dims[2] = dims.d[2];
+            this->dims[3] = dims.d[3];
+            this->dims[4] = 1;
+        }
+        else if (format == TensorFormat::kCHW2)
+        {
+
+            this->dims[0] = dims.d[0];
+            this->dims[1] = divUp(dims.d[1], 2);
+            this->dims[2] = dims.d[2];
+            this->dims[3] = dims.d[3];
+            this->dims[4] = 2;
+            this->scalarPerVector = 2;
+        }
+        else if (format == TensorFormat::kCHW4)
+        {
+            this->dims[0] = dims.d[0];
+            this->dims[1] = divUp(dims.d[1], 4);
+            this->dims[2] = dims.d[2];
+            this->dims[3] = dims.d[3];
+            this->dims[4] = 4;
+            this->scalarPerVector = 4;
+        }
+        else if (format == TensorFormat::kCHW32)
+        {
+            this->dims[0] = dims.d[0];
+            this->dims[1] = divUp(dims.d[1], 32);
+            this->dims[2] = dims.d[2];
+            this->dims[3] = dims.d[3];
+            this->dims[4] = 32;
+            this->scalarPerVector = 32;
+        }
+        else if (format == TensorFormat::kHWC8)
+        {
+            this->dims[0] = dims.d[0];
+            this->dims[1] = dims.d[2];
+            this->dims[2] = dims.d[3];
+            this->dims[3] = divUp(dims.d[1], 8) * 8;
+            this->dims[4] = 1;
+            this->scalarPerVector = 8;
+            this->channelPivot = true;
+        }
+    }
+
+    // [(C+x-1)/x][H][W][x]
+    // or
+    // [H][W][(C+x-1)/x*x][1]
+    int32_t dims[5] = {1, 1, 1, 1, 1};
+    int32_t dataWidth = 1;
+    int32_t scalarPerVector = 1;
+
+    bool channelPivot = false;
+
+    int32_t getElememtSize()
+    {
+        return dims[0] * dims[1] * dims[2] * dims[3] * dims[4];
+    }
+    int32_t getBufferSize()
+    {
+        return getElememtSize() * dataWidth;
+    }
+};
+
+//! Specification for a network I/O tensor.
+class TypeSpec
+{
+public:
+    DataType dtype;         //!< datatype
+    TensorFormat format;    //!< format
+    std::string formatName; //!< name of the format
+};
+
+class SampleBuffer
+{
+public:
+    SampleBuffer()
+    {
+        dims.d[0] = 1;
+        dims.d[1] = 1;
+        dims.d[2] = 1;
+        dims.d[3] = 1;
+    }
+
+    SampleBuffer(nvinfer1::Dims dims, int32_t dataWidth, TensorFormat format, bool isInput)
+        : dims(dims)
+        , dataWidth(dataWidth)
+        , format(format)
+        , isInput(isInput)
+    {
+
+        // Output buffer is unsqueezed to 4D in order to reuse the BufferDesc class
+        if (isInput == false)
+        {
+            dims.d[2] = dims.d[0];
+            dims.d[3] = dims.d[1];
+            dims.d[0] = 1;
+            dims.d[1] = 1;
+        }
+
+        desc = BufferDesc(dims, dataWidth, format);
+
+        if (nullptr == buffer)
+        {
+            buffer = new uint8_t[getBufferSize()]();
+        }
+    }
+
+    ~SampleBuffer()
+    {
+        destroy();
+    }
+
+    SampleBuffer& operator=(SampleBuffer&& sampleBuffer) noexcept
+    {
+        destroy();
+
+        this->dims = sampleBuffer.dims;
+        this->dataWidth = sampleBuffer.dataWidth;
+        this->desc = sampleBuffer.desc;
+        this->format = sampleBuffer.format;
+        this->isInput = sampleBuffer.isInput;
+        this->buffer = sampleBuffer.buffer;
+        sampleBuffer.buffer = nullptr;
+
+        return *this;
+    }
+
+    void destroy()
+    {
+        if (buffer != nullptr)
+        {
+            delete[] buffer;
+            buffer = nullptr;
+        }
+    }
+
+    nvinfer1::Dims dims;
+
+    int32_t dataWidth{1};
+
+    TensorFormat format{TensorFormat::kLINEAR};
+
+    bool isInput{true};
+
+    BufferDesc desc;
+
+    uint8_t* buffer = nullptr;
+
+    int32_t getBufferSize()
+    {
+        return desc.getBufferSize();
+    }
+};
+
+//!
+//! \brief  The SampleIOFormats class implements the I/O formats sample
+//!
+//! \details It creates the network using the Onnx parser.
+//!
+class SampleIOFormats
+{
+public:
+    SampleIOFormats(samplesCommon::OnnxSampleParams const& params)
+        : mParams(params)
+    {
+    }
+
+    //!
+    //! \brief Builds the network engine
+    //!
+    bool build(int32_t dataWidth);
+
+    //!
+    //! \brief Verify the built engine I/O types and formats.
+    //!
+    bool verify(TypeSpec const& spec);
+
+    //!
+    //! \brief Runs the TensorRT inference engine for this sample
+    //!
+    bool infer(SampleBuffer& inputBuf, SampleBuffer& outputBuf);
+
+private:
+    //!
+    //! \brief Parses an ONNX model for MNIST and creates a TensorRT network
+    //!
+    bool constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
+        SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
+        SampleUniquePtr<nvonnxparser::IParser>& parser);
+
+    SampleUniquePtr<IRuntime> mRuntime{};                    //!< The TensorRT Runtime used to deserialize the engine.
+    std::shared_ptr<nvinfer1::ICudaEngine> mEngine{nullptr}; //!< The TensorRT engine used to run the network
+
+public:
+    samplesCommon::OnnxSampleParams mParams;
+
+    nvinfer1::Dims mInputDims; //!< The dimensions of the input to the network.
+
+    nvinfer1::Dims mOutputDims; //!< The dimensions of the output to the network.
+
+    TensorFormat mTensorFormat{TensorFormat::kLINEAR};
+
+    int32_t mDigit;
+};
+
+//!
+//! \brief Validates engine I/O datatypes and formats against a reference.
+//!
+//! \details This function queries I/O datatype and format description from the built engine.
+//!           Validating them is sufficient to ensure that `ITensor::setType` and `ITensor::setAllowedFormats` API as
+//!           expected.
+//!
+//! \return true if type and format validation succeeds.
+//!
+bool SampleIOFormats::verify(TypeSpec const& spec)
+{
+    assert(mEngine->getNbIOTensors() == 2);
+    char const* inputName = mEngine->getIOTensorName(0);
+    char const* outputName = mEngine->getIOTensorName(1);
+
+    auto verifyType = [](DataType actual, DataType expected) {
+        if (actual != expected)
+        {
+            sample::gLogError << "Expected " << expected << " data type, got " << actual;
+            return false;
+        }
+        return true;
+    };
+
+    if (!verifyType(mEngine->getTensorDataType(inputName), spec.dtype))
+    {
+        return false;
+    }
+
+    if (!verifyType(mEngine->getTensorDataType(outputName), spec.dtype))
+    {
+        return false;
+    }
+
+    auto verifyFormat = [](std::string actual, std::string expected) {
+        if (expected.find(actual) != std::string::npos)
+        {
+            sample::gLogError << "Expected " << expected << " format, got " << actual;
+            return false;
+        }
+        return true;
+    };
+
+    if (!verifyFormat(std::string(mEngine->getTensorFormatDesc(inputName)), spec.formatName))
+    {
+        return false;
+    }
+
+    if (!verifyFormat(std::string(mEngine->getTensorFormatDesc(inputName)), "kLINEAR"))
+    {
+        return false;
+    }
+
+    return true;
+}
+
+//!
+//! \brief Creates the network, configures the builder and creates the network engine
+//!
+//! \details This function creates the single layer network by manual insertion and builds
+//!          the engine
+//!
+//! \return true if the engine was created successfully and false otherwise
+//!
+bool SampleIOFormats::build(int32_t dataWidth)
+{
+    auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
+    if (!builder)
+    {
+        return false;
+    }
+
+    auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(0));
+    if (!network)
+    {
+        return false;
+    }
+
+    auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
+    if (!config)
+    {
+        return false;
+    }
+
+    auto parser
+        = SampleUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger()));
+    if (!parser)
+    {
+        return false;
+    }
+
+    auto constructed = constructNetwork(builder, network, config, parser);
+    if (!constructed)
+    {
+        return false;
+    }
+
+    network->getInput(0)->setAllowedFormats(static_cast<TensorFormats>(1 << static_cast<int32_t>(mTensorFormat)));
+    network->getOutput(0)->setAllowedFormats(1U << static_cast<int32_t>(TensorFormat::kLINEAR));
+
+    mEngine.reset();
+
+    if (dataWidth == 1)
+    {
+        config->setFlag(BuilderFlag::kINT8);
+        network->getInput(0)->setType(DataType::kINT8);
+        network->getOutput(0)->setType(DataType::kINT8);
+        network->getInput(0)->setDynamicRange(-1.0F, 1.0F);
+        constexpr float kTENSOR_DYNAMIC_RANGE = 4.0F;
+        samplesCommon::setAllDynamicRanges(network.get(), kTENSOR_DYNAMIC_RANGE, kTENSOR_DYNAMIC_RANGE);
+    }
+    if (dataWidth == 2)
+    {
+        config->setFlag(BuilderFlag::kFP16);
+        network->getInput(0)->setType(DataType::kHALF);
+        network->getOutput(0)->setType(DataType::kHALF);
+    }
+
+    config->setFlag(BuilderFlag::kGPU_FALLBACK);
+
+    // CUDA stream used for profiling by the builder.
+    auto profileStream = samplesCommon::makeCudaStream();
+    if (!profileStream)
+    {
+        return false;
+    }
+    config->setProfileStream(*profileStream);
+
+    SampleUniquePtr<nvinfer1::ITimingCache> timingCache{};
+
+    // Load timing cache
+    if (!mParams.timingCacheFile.empty())
+    {
+        timingCache
+            = samplesCommon::buildTimingCacheFromFile(sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile);
+    }
+
+    SampleUniquePtr<IHostMemory> plan{builder->buildSerializedNetwork(*network, *config)};
+    if (!plan)
+    {
+        return false;
+    }
+
+    if (timingCache != nullptr && !mParams.timingCacheFile.empty())
+    {
+        samplesCommon::updateTimingCacheFile(
+            sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder);
+    }
+
+    if (!mRuntime)
+    {
+        mRuntime = SampleUniquePtr<IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger()));
+    }
+
+    if (!mRuntime)
+    {
+        return false;
+    }
+
+    mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
+        mRuntime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter());
+    if (!mEngine)
+    {
+        return false;
+    }
+
+    ASSERT(network->getNbInputs() == 1);
+    mInputDims = network->getInput(0)->getDimensions();
+    ASSERT(mInputDims.nbDims == 4);
+
+    ASSERT(network->getNbOutputs() == 1);
+    mOutputDims = network->getOutput(0)->getDimensions();
+    ASSERT(mOutputDims.nbDims == 2);
+
+    return true;
+}
+
+//!
+//! \brief Uses a ONNX parser to create the Onnx MNIST Network and marks the
+//!        output layers
+//!
+//! \param network Pointer to the network that will be populated with the Onnx MNIST network
+//!
+//! \param builder Pointer to the engine builder
+//!
+bool SampleIOFormats::constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
+    SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
+    SampleUniquePtr<nvonnxparser::IParser>& parser)
+{
+    auto parsed = parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(),
+        static_cast<int32_t>(sample::gLogger.getReportableSeverity()));
+    if (!parsed)
+    {
+        return false;
+    }
+
+    samplesCommon::enableDLA(builder.get(), config.get(), mParams.dlaCore);
+
+    return true;
+}
+
+//!
+//! \brief Runs the TensorRT inference engine for this sample
+//!
+//! \details This function is the main execution function of the sample. It allocates
+//!          the buffer, sets inputs, executes the engine, and verifies the output.
+//!
+bool SampleIOFormats::infer(SampleBuffer& inputBuf, SampleBuffer& outputBuf)
+{
+    auto const devInput = mallocCudaMem<uint8_t>(inputBuf.getBufferSize());
+    auto devOutput = mallocCudaMem<uint8_t>(outputBuf.getBufferSize());
+
+    CHECK(cudaMemcpy(devInput.get(), inputBuf.buffer, inputBuf.getBufferSize(), cudaMemcpyHostToDevice));
+
+    auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
+    if (!context)
+    {
+        return false;
+    }
+
+    for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
+    {
+        auto const name = mEngine->getIOTensorName(i);
+        if (mEngine->getTensorIOMode(name) == TensorIOMode::kINPUT)
+        {
+            context->setTensorAddress(name, devInput.get());
+        }
+        else
+        {
+            context->setTensorAddress(name, devOutput.get());
+        }
+    }
+
+    // Create CUDA stream for the execution of this inference.
+    cudaStream_t stream;
+    CHECK(cudaStreamCreate(&stream));
+
+    // Asynchronously enqueue the inference work
+    if (!context->enqueueV3(stream))
+    {
+        return false;
+    }
+
+    // Wait for the work in the stream to complete
+    CHECK(cudaStreamSynchronize(stream));
+
+    // Release stream
+    CHECK(cudaStreamDestroy(stream));
+
+    CHECK(cudaMemcpy(outputBuf.buffer, devOutput.get(), outputBuf.getBufferSize(), cudaMemcpyDeviceToHost));
+
+    return true;
+}
+
+//!
+//! \brief Initializes members of the params struct using the command line args
+//!
+samplesCommon::OnnxSampleParams initializeSampleParams(samplesCommon::Args const& args)
+{
+    samplesCommon::OnnxSampleParams params;
+    if (args.dataDirs.empty()) // Use default directories if user hasn't provided directory paths
+    {
+        params.dataDirs.push_back("data/mnist/");
+        params.dataDirs.push_back("data/samples/mnist/");
+    }
+    else // Use the data directory provided by the user
+    {
+        params.dataDirs = args.dataDirs;
+    }
+    params.onnxFileName = "mnist.onnx";
+    params.dlaCore = args.useDLACore;
+    params.timingCacheFile = args.timingCacheFile;
+
+    return params;
+}
+//!
+//! \brief Prints the help information for running this sample
+//!
+void printHelpInfo()
+{
+    std::cout
+        << "Usage: ./sample_onnx_mnist [-h or --help] [-d or --datadir=<path to data directory>] [--useDLACore=<int>] "
+        << "[-t or --timingCacheFile=<path to timing cache file>]" << std::endl;
+    std::cout << "--help             Display help information" << std::endl;
+    std::cout << "--datadir          Specify path to a data directory, overriding the default. This option can be used "
+                 "multiple times to add multiple directories. If no data directories are given, the default is to use "
+                 "(data/samples/mnist/, data/mnist/)"
+              << std::endl;
+    std::cout << "--useDLACore=N     Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, "
+                 "where n is the number of DLA engines on the platform."
+              << std::endl;
+    std::cout << "--timingCacheFile  Specify path to a timing cache file. If it does not already exist, it will be "
+              << "created." << std::endl;
+}
+//!
+//! \brief Used to run the engine build and inference/reference functions
+//!
+template <typename T>
+bool process(SampleIOFormats& sample, sample::Logger::TestAtom const& sampleTest, SampleBuffer& inputBuf,
+    SampleBuffer& outputBuf, TypeSpec& spec)
+{
+    sample::gLogInfo << "Building and running a GPU inference engine with specified I/O formats." << std::endl;
+
+    if (!sample.build(sizeof(T)))
+    {
+        return false;
+    }
+    if (!sample.verify(spec))
+    {
+        return false;
+    }
+
+    inputBuf = SampleBuffer(sample.mInputDims, sizeof(T), sample.mTensorFormat, true);
+    outputBuf = SampleBuffer(sample.mOutputDims, sizeof(T), TensorFormat::kLINEAR, false);
+
+    if (!sample.infer(inputBuf, outputBuf))
+    {
+        return false;
+    }
+    return true;
+}
+
+int32_t main(int32_t argc, char** argv)
+{
+    samplesCommon::Args args;
+    bool argsOK = samplesCommon::parseArgs(args, argc, argv);
+    if (!argsOK)
+    {
+        sample::gLogError << "Invalid arguments" << std::endl;
+        printHelpInfo();
+        return EXIT_FAILURE;
+    }
+    if (args.help)
+    {
+        printHelpInfo();
+        return EXIT_SUCCESS;
+    }
+
+    auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv);
+
+    sample::gLogger.reportTestStart(sampleTest);
+
+    samplesCommon::OnnxSampleParams params = initializeSampleParams(args);
+
+    std::vector<TypeSpec> fp16TypeSpec = {
+        TypeSpec{DataType::kHALF, TensorFormat::kLINEAR, "kLINEAR"},
+        TypeSpec{DataType::kHALF, TensorFormat::kCHW2, "kCHW2"},
+        TypeSpec{DataType::kHALF, TensorFormat::kHWC8, "kHWC8"},
+    };
+
+    std::vector<TypeSpec> int8TypeSpec = {
+        TypeSpec{DataType::kINT8, TensorFormat::kLINEAR, "kLINEAR"},
+        TypeSpec{DataType::kINT8, TensorFormat::kCHW4, "kCHW4"},
+        TypeSpec{DataType::kINT8, TensorFormat::kCHW32, "kCHW32"},
+    };
+
+    SampleIOFormats sample(params);
+
+    sample::gLogInfo
+        << "Build TRT engine with different IO data type and formats. Ensure that built engine abide by them"
+        << std::endl;
+
+    // Test FP16 formats
+    for (auto spec : fp16TypeSpec)
+    {
+        sample::gLogInfo << "Testing datatype FP16 with format " << spec.formatName << std::endl;
+        sample.mTensorFormat = spec.format;
+        SampleBuffer inputBuf, outputBuf;
+
+        if (!process<half_float::half>(sample, sampleTest, inputBuf, outputBuf, spec))
+        {
+            return sample::gLogger.reportFail(sampleTest);
+        }
+    }
+
+    // Test INT8 formats
+    for (auto spec : int8TypeSpec)
+    {
+        sample::gLogInfo << "Testing datatype INT8 with format " << spec.formatName << std::endl;
+        sample.mTensorFormat = spec.format;
+        SampleBuffer inputBuf, outputBuf;
+
+        if (!process<int8_t>(sample, sampleTest, inputBuf, outputBuf, spec))
+        {
+            return sample::gLogger.reportFail(sampleTest);
+        }
+    }
+
+    return sample::gLogger.reportPass(sampleTest);
+}
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNamedDimensions/Makefile b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNamedDimensions/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..6ba28ca5049e5e1855fcdc6e4868f477a6690d01
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNamedDimensions/Makefile
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+OUTNAME_RELEASE = sample_named_dimensions
+OUTNAME_DEBUG   = sample_named_dimensions_debug
+EXTRA_DIRECTORIES = ../common ../utils
+SAMPLE_DIR_NAME = $(shell basename $(dir $(abspath $(firstword $(MAKEFILE_LIST)))))
+MAKEFILE ?= ../Makefile.config
+include $(MAKEFILE)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNamedDimensions/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNamedDimensions/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..daf00e0cb240db985caedf80c79c1806d4ba8815
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNamedDimensions/README.md
@@ -0,0 +1,110 @@
+# Working with ONNX models with named input dimensions
+
+
+**Table Of Contents**
+- [Description](#description)
+- [Running the sample](#running-the-sample)
+- [Additional resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+## Description
+
+This sample, `sampleNamedDimensions`, illustrates how to work with ONNX models with named input dimensions in TensorRT.
+
+ONNX has a notion of named dimension parameters: two network inputs with the same named dimension parameter are considered equal. TensorRT supports this feature by checking that in the optimization profile these dimensions have overlapping intervals and that at runtime they have the same value.
+
+Here, we synthetically create an ONNX model consisting of a single [Concat](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Concat) layer with two 2D input tensors:
+```
+input0      input1
+    \         /
+     \       /
+      --------
+      |Concat|
+      --------
+          |
+          |
+       output
+```
+Concatenation is performed on the zeroth axis, so only the first dimensions of the input tensors are required to be the same. However, since both inputs have dimension `[n_rows, 8]`, the named dimensions `n_rows` additionally require the zeroth dimensions of the two input tensors to match as well.
+
+
+## Running the sample
+
+1.  The sample gets compiled when building the TensorRT OSS following the [instructions](https://github.com/NVIDIA/TensorRT). The binary named `sample_named_dimensions` will be created in the output directory.
+
+2.  Generate the ONNX model file by running this command:
+	```
+	python3 create_model.py
+	```
+	This will create a file named `concat_layer.onnx`.
+
+3. Run the sample to build and run the engine from the ONNX model.
+	```
+	./sample_named_dimensions [-h or --help] [-d or --datadir=<path to data directory>]
+	```
+
+3.  Verify that the sample has run successfully. If successful you should see output similar to the following:
+	```
+	&&&& RUNNING TensorRT.sample_named_dimensions [TensorRT v8500] # build/x86_64-gnu/sample_named_dimensions
+	[I] [TRT] ----------------------------------------------------------------
+	[I] [TRT] Input filename:   ../trt/samples/sampleNamedDimensions/concat_layer.onnx
+	[I] [TRT] ONNX IR version:  0.0.7
+	[I] [TRT] Opset version:    11
+	[I] [TRT] Producer name:
+	[I] [TRT] Producer version:
+	[I] [TRT] Domain:
+	[I] [TRT] Model version:    0
+	[I] [TRT] Doc string:
+	[I] [TRT] ----------------------------------------------------------------
+	[I] Input0:
+	-4.17896 4.21201 -8.6982 9.33153 -4.90741 1.1953 9.45208 1.04329
+	-5.47509 0.150872 -4.29573 1.72331 3.69642 5.73303 -4.89766 5.00559
+	
+	[I] Input1:
+	9.01907 3.57581 -1.36986 -3.22044 -5.90874 -8.11433 2.38472 -0.0868187
+	0.842402 -1.75138 4.55962 -6.38946 -7.73614 -1.26044 -4.23012 4.33806
+	
+	[I] Output:
+	-4.17896 4.21201 -8.6982 9.33153 -4.90741 1.1953 9.45208 1.04329
+	-5.47509 0.150872 -4.29573 1.72331 3.69642 5.73303 -4.89766 5.00559
+	9.01907 3.57581 -1.36986 -3.22044 -5.90874 -8.11433 2.38472 -0.0868187
+	0.842402 -1.75138 4.55962 -6.38946 -7.73614 -1.26044 -4.23012 4.33806
+	
+	&&&& PASSED TensorRT.sample_named_dimensions [TensorRT v8500] # build/x86_64-gnu/sample_named_dimensions
+	```
+
+
+### Sample `--help` options
+
+To see the full list of available options and their descriptions, use the `-h` or `--help` command line option.
+
+
+# Additional resources
+
+The following resources provide a deeper understanding about the named input dimensions feature in the ONNX project:
+
+**ONNX**
+- [GitHub: ONNX](https://github.com/onnx/onnx)
+- [Github: ONNX-TensorRT Open source parser](https://github.com/onnx/onnx-tensorrt)
+
+**Documentation**
+- [Introduction To NVIDIAâ€™s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples)
+- [Working With TensorRT Using The C++ API](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#c_topics)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+
+# Changelog
+
+June 2022
+This `README.md` file was recreated, updated and reviewed.
+
+
+# Known issues
+
+There are no known issues in this sample.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNamedDimensions/create_model.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNamedDimensions/create_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..575bd4e6abdabb3147a3da500943d2c5ebabf85f
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNamedDimensions/create_model.py
@@ -0,0 +1,35 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+import onnx
+import onnx_graphsurgeon as gs
+
+def main():
+    input0 = gs.Variable(name="input0", dtype=np.float32, shape=('n_rows', 8))
+    input1 = gs.Variable(name="input1", dtype=np.float32, shape=('n_rows', 8))
+    output = gs.Variable(name="output", dtype=np.float32, )
+
+    node = gs.Node(op="Concat", inputs=[input0, input1], outputs=[output], attrs={"axis": 0})
+
+    graph = gs.Graph(nodes=[node], inputs=[input0, input1], outputs=[output])
+
+    model = gs.export_onnx(graph)
+    onnx.save(model, "concat_layer.onnx")
+
+if __name__ == '__main__':
+    main()
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNamedDimensions/sampleNamedDimensions.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNamedDimensions/sampleNamedDimensions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..07b83eff73ebcda9276dba2662156d5554a9aa22
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNamedDimensions/sampleNamedDimensions.cpp
@@ -0,0 +1,462 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//!
+//! sampleNamedDimensions.cpp
+//! This file contains the implementation of the named dimensions sample. It creates the network using
+//! a synthetic ONNX model with named input dimensions.
+//! It can be run with the following command line:
+//! Command: ./sample_named_dimensions [-h or --help] [-d=/path/to/data/dir or --datadir=/path/to/data/dir]
+//!
+
+// Define TRT entrypoints used in common code
+#define DEFINE_TRT_ENTRYPOINTS 1
+#define DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT 0
+
+#include "argsParser.h"
+#include "buffers.h"
+#include "common.h"
+#include "logger.h"
+#include "parserOnnxConfig.h"
+
+#include "NvInfer.h"
+#include <cuda_runtime_api.h>
+
+#include <algorithm>
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <random>
+#include <sstream>
+using namespace nvinfer1;
+using samplesCommon::SampleUniquePtr;
+
+std::string const gSampleName = "TensorRT.sample_named_dimensions";
+
+//! \brief  The SampleNamedDimensions class implements a sample with named input dimensions
+//!
+//! \details It creates the network using an ONNX model
+//!
+class SampleNamedDimensions
+{
+public:
+    SampleNamedDimensions(samplesCommon::OnnxSampleParams const& params)
+        : mParams(params)
+        , mEngine(nullptr)
+    {
+    }
+
+    //! \brief Adds an optimization profile for dynamic shapes
+    void setNamedDimension(int32_t dim);
+
+    //!
+    //! \brief Function builds the network engine
+    //!
+    bool build();
+
+    //!
+    //! \brief Runs the TensorRT inference engine for this sample
+    //!
+    bool infer();
+
+private:
+    samplesCommon::OnnxSampleParams mParams; //!< The parameters for the sample.
+
+    std::vector<nvinfer1::Dims> mInputDims;  //!< The dimensions of the inputs to the network.
+    std::vector<nvinfer1::Dims> mOutputDims; //!< The dimensions of the outputs to the network.
+
+    int32_t mNamedDimension; //!< The value of the named dimension.
+
+    //! Input Tensors.
+    std::vector<float> mInput0;
+    std::vector<float> mInput1;
+
+    SampleUniquePtr<IRuntime> mRuntime{};           //!< The TensorRT Runtime used to deserialize the engine.
+    std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The TensorRT engine used to run the network
+
+    //!
+    //! \brief Parses a synthetic ONNX model and creates a TensorRT network
+    //!
+    bool constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
+        SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
+        SampleUniquePtr<nvonnxparser::IParser>& parser);
+
+    //!
+    //! \brief Adds an optimization profile for dynamic shapes
+    //!
+    void addOptimizationProfile(SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
+        SampleUniquePtr<nvinfer1::IBuilder>& builder);
+
+    //!
+    //! \brief Reads the input  and stores the result in a managed buffer
+    //!
+    bool processInput(samplesCommon::BufferManager const& buffers);
+
+    //!
+    //! \brief Classifies digits and verify result
+    //!
+    bool verifyOutput(samplesCommon::BufferManager const& buffers);
+};
+
+//!
+//! \brief Sets the value of the named input dimension
+//!
+void SampleNamedDimensions::setNamedDimension(int32_t dim)
+{
+    mNamedDimension = dim;
+}
+
+//!
+//! \brief Creates the network, configures the builder and creates the network engine
+//!
+//! \details This function creates the network definition by parsing the Onnx model and builds
+//!          the engine that will be used to run the model (mEngine)
+//!
+//! \return true if the engine was created successfully and false otherwise
+//!
+bool SampleNamedDimensions::build()
+{
+    auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
+    if (!builder)
+    {
+        return false;
+    }
+
+    auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(0));
+    if (!network)
+    {
+        return false;
+    }
+
+    auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
+    if (!config)
+    {
+        return false;
+    }
+
+    auto parser
+        = SampleUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger()));
+    if (!parser)
+    {
+        return false;
+    }
+
+    auto constructed = constructNetwork(builder, network, config, parser);
+    if (!constructed)
+    {
+        return false;
+    }
+
+    ASSERT(network->getNbInputs() == 2);
+    mInputDims.push_back(network->getInput(0)->getDimensions());
+    mInputDims.push_back(network->getInput(1)->getDimensions());
+    ASSERT(mInputDims[0].nbDims == 2);
+    ASSERT(mInputDims[1].nbDims == 2);
+
+    ASSERT(network->getNbOutputs() == 1);
+    mOutputDims.push_back(network->getOutput(0)->getDimensions());
+    ASSERT(mOutputDims[0].nbDims == 2);
+
+    // CUDA stream used for profiling by the builder.
+    auto profileStream = samplesCommon::makeCudaStream();
+    if (!profileStream)
+    {
+        return false;
+    }
+    config->setProfileStream(*profileStream);
+
+    addOptimizationProfile(config, builder);
+
+    SampleUniquePtr<nvinfer1::ITimingCache> timingCache{};
+
+    // Load timing cache
+    if (!mParams.timingCacheFile.empty())
+    {
+        timingCache
+            = samplesCommon::buildTimingCacheFromFile(sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile);
+    }
+
+    SampleUniquePtr<IHostMemory> plan{builder->buildSerializedNetwork(*network, *config)};
+    if (!plan)
+    {
+        return false;
+    }
+
+    if (timingCache != nullptr && !mParams.timingCacheFile.empty())
+    {
+        samplesCommon::updateTimingCacheFile(
+            sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder);
+    }
+
+    if (!mRuntime)
+    {
+        mRuntime = SampleUniquePtr<IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger()));
+    }
+
+    if (!mRuntime)
+    {
+        return false;
+    }
+
+    mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
+        mRuntime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter());
+    if (!mEngine)
+    {
+        return false;
+    }
+
+    return true;
+}
+
+//!
+//! \brief Uses ONNX parser to create the ONNX Network and marks the output layers
+//!
+bool SampleNamedDimensions::constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
+    SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
+    SampleUniquePtr<nvonnxparser::IParser>& parser)
+{
+    auto parsed = parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(),
+        static_cast<int32_t>(sample::gLogger.getReportableSeverity()));
+    if (!parsed)
+    {
+        return false;
+    }
+
+    return true;
+}
+
+//!
+//! \brief Adds an optimization profile for dynamic shapes
+//!
+void SampleNamedDimensions::addOptimizationProfile(SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
+    SampleUniquePtr<nvinfer1::IBuilder>& builder)
+{
+    auto const input0ProfileDims = Dims2(mNamedDimension, mInputDims[0].d[1]);
+    auto profile = builder->createOptimizationProfile();
+    profile->setDimensions("input0", OptProfileSelector::kMIN, input0ProfileDims);
+    profile->setDimensions("input0", OptProfileSelector::kMAX, input0ProfileDims);
+    profile->setDimensions("input0", OptProfileSelector::kOPT, input0ProfileDims);
+
+    auto input1ProfileDims = Dims2(mNamedDimension, mInputDims[1].d[1]);
+    profile->setDimensions("input1", OptProfileSelector::kMIN, input1ProfileDims);
+    profile->setDimensions("input1", OptProfileSelector::kMAX, input1ProfileDims);
+    profile->setDimensions("input1", OptProfileSelector::kOPT, input1ProfileDims);
+
+    config->addOptimizationProfile(profile);
+}
+
+//!
+//! \brief Runs the TensorRT inference engine for this sample
+//!
+//! \details This function is the main execution function of the sample. It allocates the buffer,
+//!          sets inputs and executes the engine.
+//!
+bool SampleNamedDimensions::infer()
+{
+    // Create RAII buffer manager object
+    samplesCommon::BufferManager buffers(mEngine);
+
+    auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
+    if (!context)
+    {
+        return false;
+    }
+
+    for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
+    {
+        auto const name = mEngine->getIOTensorName(i);
+        context->setTensorAddress(name, buffers.getDeviceBuffer(name));
+    }
+
+    // Read the input data into the managed buffers
+    ASSERT(mParams.inputTensorNames.size() == 2);
+    if (!processInput(buffers))
+    {
+        return false;
+    }
+
+    // Memcpy from host input buffers to device input buffers
+    buffers.copyInputToDevice();
+
+    bool status = context->executeV2(buffers.getDeviceBindings().data());
+    if (!status)
+    {
+        return false;
+    }
+
+    // Memcpy from device output buffers to host output buffers
+    buffers.copyOutputToHost();
+
+    // Verify results
+    if (!verifyOutput(buffers))
+    {
+        return false;
+    }
+
+    return true;
+}
+
+//!
+//! \brief Reads the input and stores the result in a managed buffer
+//!
+bool SampleNamedDimensions::processInput(samplesCommon::BufferManager const& buffers)
+{
+    int32_t const input0H = mNamedDimension;
+    int32_t const input0W = mInputDims[0].d[1];
+    int32_t const input1H = mNamedDimension;
+    int32_t const input1W = mInputDims[1].d[1];
+
+    // Generate random input
+    mInput0.resize(input0H * input0W);
+    mInput1.resize(input1H * input1W);
+    std::default_random_engine generator(static_cast<uint32_t>(time(nullptr)));
+    std::uniform_real_distribution<float> unif_real_distr(-10., 10.);
+
+    sample::gLogInfo << "Input0:\n";
+    for (int32_t i = 0; i < input0H * input0W; i++)
+    {
+        mInput0[i] = unif_real_distr(generator);
+        sample::gLogInfo << mInput0[i] << (((i + 1) % input0W) ? " " : "\n");
+    }
+    sample::gLogInfo << std::endl;
+
+    sample::gLogInfo << "Input1:\n";
+    for (int32_t i = 0; i < input1H * input1W; i++)
+    {
+        mInput1[i] = unif_real_distr(generator);
+        sample::gLogInfo << mInput1[i] << (((i + 1) % input1W) ? " " : "\n");
+    }
+    sample::gLogInfo << std::endl;
+
+    auto* hostInput0Buffer = static_cast<float*>(buffers.getHostBuffer(mParams.inputTensorNames[0]));
+    std::copy(mInput0.begin(), mInput0.begin() + input0H * input0W, hostInput0Buffer);
+
+    auto* hostInput1Buffer = static_cast<float*>(buffers.getHostBuffer(mParams.inputTensorNames[1]));
+    std::copy(mInput1.begin(), mInput1.begin() + input1H * input1W, hostInput1Buffer);
+
+    return true;
+}
+
+//!
+//! \brief Verify the result of concatenation
+//!
+//! \return whether the concatenated tesnor matches reference
+//!
+bool SampleNamedDimensions::verifyOutput(samplesCommon::BufferManager const& buffers)
+{
+    int32_t const outputH = 2 * mNamedDimension;
+    int32_t const outputW = mOutputDims[0].d[1];
+    int32_t const outputSize = outputH * outputW;
+
+    auto* output = static_cast<float*>(buffers.getHostBuffer(mParams.outputTensorNames[0]));
+
+    sample::gLogInfo << "Output:\n";
+    for (int32_t i = 0; i < outputSize; i++)
+    {
+        sample::gLogInfo << output[i] << (((i + 1) % outputW) ? " " : "\n");
+    }
+    sample::gLogInfo << std::endl;
+
+    mInput0.insert(mInput0.end(), mInput1.begin(), mInput1.end());
+
+    for (int32_t i = 0; i < outputH * outputW; i++)
+    {
+        auto const reference_value = i > outputSize / 2 ? mInput1[i - outputSize / 2] : mInput0[i];
+        if (fabs(output[i] - reference_value) > std::numeric_limits<float>::epsilon())
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+//!
+//! \brief Initializes members of the params struct using the command line args
+//!
+samplesCommon::OnnxSampleParams initializeSampleParams(samplesCommon::Args const& args)
+{
+    samplesCommon::OnnxSampleParams params;
+    if (args.dataDirs.empty()) // Use default directories if user hasn't provided directory paths
+    {
+        params.dataDirs.push_back("trt/samples/sampleNamedDimensions/");
+    }
+    else // Use the data directory provided by the user
+    {
+        params.dataDirs = args.dataDirs;
+    }
+    params.onnxFileName = "concat_layer.onnx";
+    params.inputTensorNames.push_back("input0");
+    params.inputTensorNames.push_back("input1");
+    params.outputTensorNames.push_back("output");
+    params.timingCacheFile = params.timingCacheFile;
+
+    return params;
+}
+
+//!
+//! \brief Prints the help information for running this sample
+//!
+void printHelpInfo()
+{
+    std::cout << "Usage: ./sample_named_dimensions [-h or --help] [-d or --datadir=<path to data directory>] "
+              << "[--timingCacheFile=<path to timing cache file>]" << std::endl;
+    std::cout << "--help             Display help information" << std::endl;
+    std::cout << "--datadir          Specify path to a data directory, overriding the default. This option can be used "
+                 "multiple times to add multiple directories. If no data directories are given, the default is to use "
+                 "(trt/samples/sampleNamedDimensions)"
+              << std::endl;
+    std::cout << "--timingCacheFile  Specify path to a timing cache file. If it does not already exist, it will be "
+              << "created." << std::endl;
+}
+
+int32_t main(int32_t argc, char** argv)
+{
+    samplesCommon::Args args;
+    bool argsOK = samplesCommon::parseArgs(args, argc, argv);
+    if (!argsOK)
+    {
+        sample::gLogError << "Invalid arguments" << std::endl;
+        printHelpInfo();
+        return EXIT_FAILURE;
+    }
+    if (args.help)
+    {
+        printHelpInfo();
+        return EXIT_SUCCESS;
+    }
+
+    auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv);
+
+    sample::gLogger.reportTestStart(sampleTest);
+
+    SampleNamedDimensions sample(initializeSampleParams(args));
+
+    sample::gLogInfo << "Building and running a GPU inference engine for synthetic ONNX model" << std::endl;
+
+    sample.setNamedDimension(2);
+
+    if (!sample.build())
+    {
+        return sample::gLogger.reportFail(sampleTest);
+    }
+    if (!sample.infer())
+    {
+        return sample::gLogger.reportFail(sampleTest);
+    }
+
+    return sample::gLogger.reportPass(sampleTest);
+}
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNonZeroPlugin/Makefile b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNonZeroPlugin/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..b0975dd74aa420d734b59611c685ba25223ba1ce
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNonZeroPlugin/Makefile
@@ -0,0 +1,73 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+OUTNAME_RELEASE = sample_non_zero_plugin
+OUTNAME_DEBUG   = sample_non_zero_plugin_debug
+EXTRA_DIRECTORIES = ../common ../utils
+SAMPLE_DIR_NAME = $(shell basename $(dir $(abspath $(firstword $(MAKEFILE_LIST)))))
+MAKEFILE ?= ../Makefile.config
+
+ifneq ($(CUDA_INSTALL_DIR), )
+	CUDA_INST_DIR ?= $(CUDA_INSTALL_DIR)
+endif
+CUDA_INST_DIR ?= /usr/local/cuda
+CUDA_VER := $(shell $(CUDA_INST_DIR)/bin/nvcc --version | grep "release" | sed 's/.*release //' | sed 's/,.*//')
+
+define cuda-ge
+    CUDA_VER_GE_$(1)_$(2) := 0
+    ifeq ($$(lastword $$(sort $(3) $(1).$(2))), $(3))
+        CUDA_VER_GE_$(1)_$(2) := 1
+    endif
+
+    CUDA_GE_$(1)_$(2) ?= $$(CUDA_VER_GE_$(1)_$(2))
+
+endef
+
+$(eval $(foreach minor,$(shell seq 0 9),$(call cuda-ge,11,$(minor),$(CUDA_VER))))
+$(eval $(foreach minor,$(shell seq 0 9),$(call cuda-ge,12,$(minor),$(CUDA_VER))))
+$(eval $(foreach minor,$(shell seq 0 9),$(call cuda-ge,13,$(minor),$(CUDA_VER))))
+
+SAMPLE_SMS = 75
+ifeq ($(CUDA_GE_11_0), 1)
+    SAMPLE_SMS += 80
+endif
+
+ifeq ($(CUDA_GE_11_1), 1)
+    SAMPLE_SMS += 86
+endif
+
+ifeq ($(CUDA_GE_11_4), 1)
+    SAMPLE_SMS += 87
+endif
+
+ifeq ($(CUDA_GE_11_8), 1)
+    SAMPLE_SMS += 89 90
+endif
+
+ifeq ($(CUDA_GE_12_8), 1)
+    SAMPLE_SMS += 100 101 120
+endif
+
+# Non-hardware forward compatible SMs
+NON_HFC_SMS := 89 90
+
+ifeq ($(CUDA_GE_12_8), 1)
+    NON_HFC_SMS += 100 101 120
+endif
+
+ifeq ($(GENCODES),)
+    $(foreach sm,$(SAMPLE_SMS),$(eval GENCODES += -gencode arch=compute_$(sm),code=sm_$(sm)))
+    HFC_SMS := $(filter-out $(NON_HFC_SMS),$(SAMPLE_SMS))
+    # Generate PTX with the highest supported FC SM
+    GEN_PTX_SM := $(lastword $(sort $(HFC_SMS)))
+    GENCODES += -gencode arch=compute_$(GEN_PTX_SM),code=compute_$(GEN_PTX_SM)
+endif
+
+include $(MAKEFILE)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNonZeroPlugin/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNonZeroPlugin/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..411dbb763c20c935a70aa6f7f636e632d92a0c8a
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNonZeroPlugin/README.md
@@ -0,0 +1,180 @@
+# NonZero Plugin for TensorRT using IPluginV3
+
+**Table Of Contents**
+- [Description](#description)
+- [How does this sample work?](#how-does-this-sample-work)
+	* [Implementing a NonZero plugin using IPluginV3 interface](#implementing-a-nonzero-plugin-using-ipluginv3-interface)
+	* [Creating network and building the engine](#creating-network-and-building-the-engine)
+	* [Running inference](#running-inference)
+- [Running the sample](#running-the-sample)
+	* [Sample `--help` options](#sample---help-options)
+- [Additional resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+## Description
+
+This sample, sampleNonZeroPlugin, implements a plugin for the NonZero operation, customizable to output the non-zero indices in
+either a row order (each set of indices in the same row) or column order format (each set of indices in the same column).
+
+NonZero is an operation where the non-zero indices of the input tensor is found. 
+
+## How does this sample work?
+
+This sample creates and runs a TensorRT engine built from a network containing a single NonZeroPlugin node. It demonstrates how
+custom layers with data-dependent output shapes can be implemented and added to a TensorRT network.
+
+Specifically, this sample:
+- [Implements a TensorRT plugin for the NonZero operation](#implementing-a-nonzero-plugin-using-ipluginv3-interface)
+- [Creates a network and builds an engine](#creating-network-and-building-the-engine)
+- [Runs inference using the generated TensorRT network](#running-inference)
+
+### Implementing a NonZero plugin using IPluginV3 interface
+
+Until `IPluginV3` (and associated interfaces), TensorRT plugins could not have outputs whose shapes depended on the input values (they could only depend
+on input shapes). `IPluginV3OneBuild` which exposes a build capability for `IPluginV3`, provides support for such data-dependent output shapes.
+
+`NonZeroPlugin` in this sample is written to handle 2-D input tensors of shape $R \times C$. Assume that the tensor contains $K$ non-zero elements and that the
+non-zero indices are required in a row ordering (each set of indices in its own row). Then the output shape would be $K \times 2$.
+
+The output shapes are expressed to the TensorRT builder through the `IPluginV3OneBuild::getOutputShapes()` API. Expressing the second dimension of the output is
+straightforward:
+```
+outputs[0].d[1] = exprBuilder.constant(2);
+```
+
+The extent of each data-dependent dimension in the plugin must be expressed in terms of a *_size tensor_*. A size tensor is a scalar output of 
+`DataType::kINT32` or `DataType::kINT64` that must be added as one of the plugin outputs. In this case, it is sufficient to declare one size tensor to denote the extent of the
+first dimension of the non-zero indices output. To declare a size tensor, one must provide an upper-bound and optimum value for its extent as `IDimensionExpr`s. These can be formed through the `IExprBuilder` argument passed to the `IPluginV3OneBuild::getOutputShapes()` method.
+ - For unknown inputs, the upper-bound is the total number of elements in the input
+	```
+	auto upperBound = exprBuilder.operation(DimensionOperation::kPROD, *inputs[0].d[0], *inputs[0].d[1]);
+	```
+ - A good estimate for the optimum is that half of the elements are non-zero
+	```
+	auto optValue = exprBuilder.operation(DimensionOperation::kFLOOR_DIV, *upperBound, *exprBuilder.constant(2));
+	```
+
+Now we can declare the size tensor using the `IExprBuilder::declareSizeTensor()` method, which also requires the specification of the output index at which the size tensor would reside. Let us place it after the non-zero indices output:
+```
+auto numNonZeroSizeTensor = exprBuilder.declareSizeTensor(1, *optValue, *upperBound);
+```
+
+Now we are ready to specify the extent of the first dimension of the non-zero indices output:
+```
+outputs[0].d[0] = numNonZeroSizeTensor;
+```
+and let's not forget to declare that the size tensor is a scalar (0-D):
+```
+outputs[1].nbDims = 0;
+```
+
+The `NonZeroPlugin` can also be configured to emit the non-zero indices in a column-order fashion through the `rowOrder` plugin attribute, by setting it to `0`.
+In this case, the first output of the plugin will have shape $2 \times K$, and the output shape specification must be adjusted accordingly.
+
+### Creating network and building the engine
+
+To add the plugin to the network, the `INetworkDefinition::addPluginV3()` method must be used. 
+
+Similar to `IPluginCreator` used for V2 plugins, V3 plugins must be accompanied by the registration of a plugin creator implementing the `IPluginCreatorV3One`
+interface.
+
+### Running inference
+
+As sample inputs, random images from MNIST dataset are selected and scaled to between `[0,1]`. The network will output both the non-zero indices,
+as well as the non-zero count.
+
+## Preparing sample data
+
+Download the sample data from the [TensorRT release tarball](https://developer.nvidia.com/nvidia-tensorrt-download#).
+
+## Running the sample
+
+1. Compile the sample by following build instructions in [TensorRT README](https://github.com/NVIDIA/TensorRT/).
+
+2.  Run the sample to build and run the MNIST engine from the ONNX model.
+	```
+	./sample_non_zero_plugin [-h or --help] [-d or --datadir=<path to data directory>] [--columnOrder] [--fp16]
+	```
+
+3.  Verify that the sample ran successfully. If the sample runs successfully you should see output similar to the following:
+	```
+	&&&& RUNNING TensorRT.sample_non_zero_plugin # ./sample_non_zero_plugin
+	...
+	[I] Input:
+	[I] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	[I] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	[I] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.854902, 0
+	[I] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.858824, 0, 0, 0.0745098, 0, 0.564706, 0
+	[I] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.317647, 0, 0, 0.47451, 0, 0, 0
+	[I] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.0431373, 0, 0, 0
+	[I] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	[I] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	[I] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.854902, 0, 0, 0.145098
+	[I] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.564706, 0, 0, 0.996078
+	[I] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.282353
+	[I] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	[I] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	[I] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.854902
+	[I] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.854902, 0, 0, 0.145098, 0, 0.564706
+	[I] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.564706, 0, 0, 0.996078, 0, 0
+	[I] 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.282353, 0, 0
+	[I]
+	[I] Output:
+	[I] 2 14
+	[I] 3 9
+	[I] 3 12
+	[I] 3 14
+	[I] 4 9
+	[I] 4 12
+	[I] 5 12
+	[I] 8 12
+	[I] 8 15
+	[I] 9 12
+	[I] 9 15
+	[I] 10 15
+	[I] 13 15
+	[I] 14 10
+	[I] 14 13
+	[I] 14 15
+	[I] 15 10
+	[I] 15 13
+	[I] 16 13
+	&&&& PASSED TensorRT.sample_non_zero_plugin # ./sample_non_zero_plugin
+	```
+
+### Sample `--help` options
+
+To see the full list of available options and their descriptions, use the `-h` or `--help` command line option.
+
+
+# Additional resources
+
+The following resources provide a deeper understanding about the V3 TensorRT plugins and the NonZero operation:
+
+**NonZero**
+- [ONNX: NonZero](https://onnx.ai/onnx/operators/onnx__NonZero.html)
+
+**TensorRT plugins**
+- [Extending TensorRT with Custom Layers](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#extending)
+
+**Other documentation**
+- [Introduction To NVIDIAâ€™s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples)
+- [Working With TensorRT Using The C++ API](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#c_topics)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+
+# Changelog
+
+March 2024
+This is the first version of this `README.md` file.
+
+
+# Known issues
+
+Windows users building this sample with Visual Studio with a CUDA version different from the TensorRT package will need to retarget the project to build against the installed CUDA version through the `Build Dependencies -> Build Customization` menu.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNonZeroPlugin/nonZeroKernel.cu b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNonZeroPlugin/nonZeroKernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..cdb4c615ea23522c478945f9da9062cfd705850e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNonZeroPlugin/nonZeroKernel.cu
@@ -0,0 +1,82 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nonZeroKernel.h"
+
+inline __device__ int32_t isZero(float const& a)
+{
+    return a == 0.F;
+}
+
+inline __device__ int32_t isZero(half const& a)
+{
+#if __CUDA_ARCH__ >= 530
+    return a == __float2half(0.F);
+#else
+    return __half2float(a) == 0.F;
+#endif
+}
+
+template <typename T>
+__global__ void findNonZeroIndicesKernel(
+    T const* X, int32_t* indices, int32_t* count, int32_t const* K, int32_t R, int32_t C, int32_t rowOrder)
+{
+    int32_t col = blockIdx.x * blockDim.x + threadIdx.x;
+
+    // Check if the column index is within bounds
+    if (col < C)
+    {
+        for (int32_t row = 0; row < R; ++row)
+        {
+            if (!isZero(X[row * C + col]))
+            {
+                int32_t index = atomicAdd(count, 1); // Increment count atomically and get the previous value
+                if (indices)
+                {
+                    if(rowOrder == 0)
+                    {
+                        indices[index] = row;
+                        indices[index + *K] = col;
+                    }
+                    else
+                    {
+                        indices[2 * index] = row;
+                        indices[2 * index + 1] = col;
+                    }
+                }
+            }
+        }
+    }
+}
+
+template <typename T>
+void nonZeroIndicesImpl(T const* X, int32_t* indices, int32_t* count, int32_t const* K, int32_t R, int32_t C,
+    bool rowOrder, cudaStream_t stream)
+{
+    constexpr int32_t kBLOCK_SIZE = 256;
+    int32_t const blocksPerGrid = (C + kBLOCK_SIZE - 1) / kBLOCK_SIZE;
+
+    findNonZeroIndicesKernel<<<blocksPerGrid, kBLOCK_SIZE, 0, stream>>>(
+        X, indices, count, K, R, C, static_cast<int32_t>(rowOrder));
+}
+
+#define NONZERO_SPECIALIZED_IMPL(T)                                                                                    \
+    template void nonZeroIndicesImpl<T>(T const* X, int32_t* indices, int32_t* count, int32_t const* K, int32_t R,     \
+        int32_t C, bool rowOrder, cudaStream_t stream);
+
+NONZERO_SPECIALIZED_IMPL(float)
+NONZERO_SPECIALIZED_IMPL(half)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNonZeroPlugin/nonZeroKernel.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNonZeroPlugin/nonZeroKernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..c2f23c8ec79f0835f0f6d62521beebb072ff19f4
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNonZeroPlugin/nonZeroKernel.h
@@ -0,0 +1,28 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef SAMPLE_NONZERO_KERNEL_H
+#define SAMPLE_NONZERO_KERNEL_H
+
+#include <cuda_fp16.h>
+
+#include <cstdint>
+
+template <typename T>
+void nonZeroIndicesImpl(T const* X, int32_t* indices, int32_t* count, int32_t const* K, int32_t R, int32_t C,
+    bool rowOrder, cudaStream_t stream);
+
+#endif // SAMPLE_NONZERO_KERNEL_H
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNonZeroPlugin/sampleNonZeroPlugin.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNonZeroPlugin/sampleNonZeroPlugin.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4de0b5b5df137e07d9940b9cdbf9c1973913829b
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleNonZeroPlugin/sampleNonZeroPlugin.cpp
@@ -0,0 +1,778 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//!
+//! sampleNonZeroPlugin.cpp
+//! This file contains a sample demonstrating a plugin for NonZero.
+//! It can be run with the following command line:
+//! Command: ./sample_non_zero_plugin [-h or --help] [-d=/path/to/data/dir or --datadir=/path/to/data/dir]
+//!
+
+// Define TRT entrypoints used in common code
+#define DEFINE_TRT_ENTRYPOINTS 1
+#define DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT 0
+
+#include "argsParser.h"
+#include "buffers.h"
+#include "common.h"
+#include "logger.h"
+#include "nonZeroKernel.h"
+#include "parserOnnxConfig.h"
+
+#include "NvInfer.h"
+#include <cuda_runtime_api.h>
+
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <random>
+#include <sstream>
+using namespace nvinfer1;
+using samplesCommon::SampleUniquePtr;
+
+std::string const kSAMPLE_NAME = "TensorRT.sample_non_zero_plugin";
+
+using half = __half;
+
+void nonZeroIndicesHelper(nvinfer1::DataType type, void const* X, void* indices, void* count, void const* K, int32_t R,
+    int32_t C, bool rowOrder, cudaStream_t stream)
+{
+    if (type == nvinfer1::DataType::kFLOAT)
+    {
+        nonZeroIndicesImpl<float>(static_cast<float const*>(X), static_cast<int32_t*>(indices),
+            static_cast<int32_t*>(count), static_cast<int32_t const*>(K), R, C, rowOrder, stream);
+    }
+    else if (type == nvinfer1::DataType::kHALF)
+    {
+        nonZeroIndicesImpl<half>(static_cast<half const*>(X), static_cast<int32_t*>(indices),
+            static_cast<int32_t*>(count), static_cast<int32_t const*>(K), R, C, rowOrder, stream);
+    }
+    else
+    {
+        ASSERT(false && "Unsupported data type");
+    }
+}
+
+class NonZeroPlugin : public IPluginV3, public IPluginV3OneCore, public IPluginV3OneBuild, public IPluginV3OneRuntime
+{
+public:
+    NonZeroPlugin(NonZeroPlugin const& p) = default;
+
+    NonZeroPlugin(bool rowOrder)
+        : mRowOrder(rowOrder)
+    {
+        initFieldsToSerialize();
+    }
+
+    void initFieldsToSerialize()
+    {
+        mDataToSerialize.clear();
+        mDataToSerialize.emplace_back(PluginField("rowOrder", &mRowOrder, PluginFieldType::kINT32, 1));
+        mFCToSerialize.nbFields = mDataToSerialize.size();
+        mFCToSerialize.fields = mDataToSerialize.data();
+    }
+
+    // IPluginV3 methods
+
+    IPluginCapability* getCapabilityInterface(PluginCapabilityType type) noexcept override
+    {
+        try
+        {
+            if (type == PluginCapabilityType::kBUILD)
+            {
+                return static_cast<IPluginV3OneBuild*>(this);
+            }
+            if (type == PluginCapabilityType::kRUNTIME)
+            {
+                return static_cast<IPluginV3OneRuntime*>(this);
+            }
+            ASSERT(type == PluginCapabilityType::kCORE);
+            return static_cast<IPluginV3OneCore*>(this);
+        }
+        catch (std::exception const& e)
+        {
+            sample::gLogError << e.what() << std::endl;
+        }
+        return nullptr;
+    }
+
+    IPluginV3* clone() noexcept override
+    {
+        auto clone = std::make_unique<NonZeroPlugin>(*this);
+        clone->initFieldsToSerialize();
+        return clone.release();
+    }
+
+    // IPluginV3OneCore methods
+    char const* getPluginName() const noexcept override
+    {
+        return "NonZeroPlugin";
+    }
+
+    char const* getPluginVersion() const noexcept override
+    {
+        return "0";
+    }
+
+    char const* getPluginNamespace() const noexcept override
+    {
+        return "";
+    }
+
+    // IPluginV3OneBuild methods
+    int32_t getNbOutputs() const noexcept override
+    {
+        return 2;
+    }
+
+    int32_t configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out,
+        int32_t nbOutputs) noexcept override
+    {
+        return 0;
+    }
+
+    bool supportsFormatCombination(
+        int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override
+    {
+        bool typeOk{false};
+        if (pos == 0)
+        {
+            typeOk = inOut[0].desc.type == DataType::kFLOAT || inOut[0].desc.type == DataType::kHALF;
+        }
+        else if (pos == 1)
+        {
+            typeOk = inOut[1].desc.type == DataType::kINT32;
+        }
+        else // pos == 2
+        {
+            // size tensor outputs must be NCHW INT32
+            typeOk = inOut[2].desc.type == DataType::kINT32;
+        }
+
+        return inOut[pos].desc.format == PluginFormat::kLINEAR && typeOk;
+    }
+
+    int32_t getOutputDataTypes(
+        DataType* outputTypes, int32_t nbOutputs, DataType const* inputTypes, int32_t nbInputs) const noexcept override
+    {
+        outputTypes[0] = DataType::kINT32;
+        outputTypes[1] = DataType::kINT32;
+        return 0;
+    }
+
+    int32_t getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs,
+        int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept override
+    {
+        // The input tensor must be 2-D
+        if (inputs[0].nbDims != 2)
+        {
+            return -1;
+        }
+
+        outputs[0].nbDims = 2;
+
+        auto upperBound = exprBuilder.operation(DimensionOperation::kPROD, *inputs[0].d[0], *inputs[0].d[1]);
+
+        // On average, we can assume that half of all elements will be non-zero
+        auto optValue = exprBuilder.operation(DimensionOperation::kFLOOR_DIV, *upperBound, *exprBuilder.constant(2));
+        auto numNonZeroSizeTensor = exprBuilder.declareSizeTensor(1, *optValue, *upperBound);
+
+        if (!mRowOrder)
+        {
+            outputs[0].d[0] = exprBuilder.constant(2);
+            outputs[0].d[1] = numNonZeroSizeTensor;
+        }
+        else
+        {
+            outputs[0].d[0] = numNonZeroSizeTensor;
+            outputs[0].d[1] = exprBuilder.constant(2);
+        }
+
+        // output at index 1 is a size tensor
+        outputs[1].nbDims = 0; // size tensors must be declared as 0-D
+
+        return 0;
+    }
+
+    // IPluginV3OneRuntime methods
+    int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, void const* const* inputs,
+        void* const* outputs, void* workspace, cudaStream_t stream) noexcept override
+    {
+
+        int32_t const R = inputDesc[0].dims.d[0];
+        int32_t const C = inputDesc[0].dims.d[1];
+
+        auto type = inputDesc[0].type;
+
+        if (!(type == nvinfer1::DataType::kHALF || type == nvinfer1::DataType::kFLOAT))
+        {
+            sample::gLogError << "Unsupported: Sample only supports DataType::kHALF and DataType::FLOAT" << std::endl;
+            return -1;
+        }
+
+        cudaMemsetAsync(outputs[1], 0, sizeof(int32_t), stream);
+
+        if (workspace == nullptr)
+        {
+            sample::gLogError << "Unsupported: workspace is null" << std::endl;
+            return -1;
+        }
+
+        if (!mRowOrder)
+        {
+            // When constructing a column major output, the kernel needs to be aware of the total number of non-zero
+            // elements so as to write the non-zero indices at the correct places. Therefore, we will launch the kernel
+            // twice: first, only to calculate the total non-zero count, which will be stored in workspace; and
+            // then to actually write the non-zero indices to the outputs[0] buffer.
+            cudaMemsetAsync(workspace, 0, sizeof(int32_t), stream);
+            nonZeroIndicesHelper(type, inputs[0], nullptr, workspace, 0, R, C, mRowOrder, stream);
+            nonZeroIndicesHelper(type, inputs[0], outputs[0], outputs[1], workspace, R, C, mRowOrder, stream);
+        }
+        else
+        {
+            nonZeroIndicesHelper(type, inputs[0], outputs[0], outputs[1], 0, R, C, mRowOrder, stream);
+        }
+
+        return 0;
+    }
+
+    int32_t onShapeChange(
+        PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept override
+    {
+        return 0;
+    }
+
+    IPluginV3* attachToContext(IPluginResourceContext* context) noexcept override
+    {
+        return clone();
+    }
+
+    PluginFieldCollection const* getFieldsToSerialize() noexcept override
+    {
+        return &mFCToSerialize;
+    }
+
+    size_t getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs,
+        DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept override
+    {
+        return sizeof(int32_t);
+    }
+
+private:
+    bool mRowOrder{true};
+    std::vector<nvinfer1::PluginField> mDataToSerialize;
+    nvinfer1::PluginFieldCollection mFCToSerialize;
+};
+
+class NonZeroPluginCreator : public nvinfer1::IPluginCreatorV3One
+{
+public:
+    NonZeroPluginCreator()
+    {
+        mPluginAttributes.clear();
+        mPluginAttributes.emplace_back(PluginField("rowOrder", nullptr, PluginFieldType::kINT32, 1));
+        mFC.nbFields = mPluginAttributes.size();
+        mFC.fields = mPluginAttributes.data();
+    }
+
+    char const* getPluginName() const noexcept override
+    {
+        return "NonZeroPlugin";
+    }
+
+    char const* getPluginVersion() const noexcept override
+    {
+        return "0";
+    }
+
+    PluginFieldCollection const* getFieldNames() noexcept override
+    {
+        return &mFC;
+    }
+
+    IPluginV3* createPlugin(char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept override
+    {
+        try
+        {
+            bool rowOrder{true};
+            for (int32_t i = 0; i < fc->nbFields; ++i)
+            {
+                auto const fieldName(fc->fields[i].name);
+                if (std::strcmp(fieldName, "rowOrder") == 0)
+                {
+                    rowOrder = *static_cast<bool const*>(fc->fields[i].data);
+                }
+            }
+            return new NonZeroPlugin(rowOrder);
+        }
+        catch (std::exception const& e)
+        {
+            sample::gLogError << e.what() << std::endl;
+        }
+        return nullptr;
+    }
+
+    char const* getPluginNamespace() const noexcept override
+    {
+        return "";
+    }
+
+private:
+    nvinfer1::PluginFieldCollection mFC;
+    std::vector<nvinfer1::PluginField> mPluginAttributes;
+};
+
+namespace
+{
+struct NonZeroParams : public samplesCommon::SampleParams
+{
+    bool rowOrder{true};
+};
+} // namespace
+
+//! \brief  The SampleNonZeroPlugin class implements a NonZero plugin
+//!
+//! \details The plugin is able to output the non-zero indices in row major or column major order
+//!
+class SampleNonZeroPlugin
+{
+public:
+    SampleNonZeroPlugin(NonZeroParams const& params)
+        : mParams(params)
+        , mRuntime(nullptr)
+        , mEngine(nullptr)
+    {
+        mSeed = static_cast<uint32_t>(time(nullptr));
+    }
+
+    //!
+    //! \brief Function builds the network engine
+    //!
+    bool build();
+
+    //!
+    //! \brief Runs the TensorRT inference engine for this sample
+    //!
+    bool infer();
+
+private:
+    NonZeroParams mParams; //!< The parameters for the sample.
+
+    nvinfer1::Dims mInputDims;  //!< The dimensions of the input to the network.
+    nvinfer1::Dims mOutputDims; //!< The dimensions of the output to the network.
+
+    std::shared_ptr<nvinfer1::IRuntime> mRuntime;   //!< The TensorRT runtime used to deserialize the engine
+    std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The TensorRT engine used to run the network
+
+    uint32_t mSeed{};
+
+    //!
+    //! \brief Creates a TensorRT network and inserts a NonZero plugin
+    //!
+    bool constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
+        SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config);
+
+    //!
+    //! \brief Reads the input and stores the result in a managed buffer
+    //!
+    bool processInput(samplesCommon::BufferManager const& buffers);
+
+    //!
+    //! \brief Verifies the result
+    //!
+    bool verifyOutput(samplesCommon::BufferManager const& buffers);
+};
+
+//!
+//! \brief Creates the network, configures the builder and creates the network engine
+//!
+//! \details This function creates a network containing a NonZeroPlugin and builds
+//!          the engine that will be used to run the plugin (mEngine)
+//!
+//! \return true if the engine was created successfully and false otherwise
+//!
+bool SampleNonZeroPlugin::build()
+{
+    auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
+    if (!builder)
+    {
+        return false;
+    }
+
+    auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(0));
+    if (!network)
+    {
+        return false;
+    }
+
+    auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
+    if (!config)
+    {
+        return false;
+    }
+
+    auto pluginCreator = std::make_unique<NonZeroPluginCreator>();
+    getPluginRegistry()->registerCreator(*pluginCreator, "");
+
+    auto constructed = constructNetwork(builder, network, config);
+    if (!constructed)
+    {
+        return false;
+    }
+
+    // CUDA stream used for profiling by the builder.
+    auto profileStream = samplesCommon::makeCudaStream();
+    if (!profileStream)
+    {
+        return false;
+    }
+    config->setProfileStream(*profileStream);
+
+    SampleUniquePtr<IHostMemory> plan{builder->buildSerializedNetwork(*network, *config)};
+    if (!plan)
+    {
+        return false;
+    }
+
+    mRuntime = std::shared_ptr<nvinfer1::IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger()));
+    if (!mRuntime)
+    {
+        return false;
+    }
+
+    mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
+        mRuntime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter());
+    if (!mEngine)
+    {
+        return false;
+    }
+
+    ASSERT(network->getNbInputs() == 1);
+    mInputDims = network->getInput(0)->getDimensions();
+    ASSERT(mInputDims.nbDims == 2);
+
+    ASSERT(network->getNbOutputs() == 2);
+    mOutputDims = network->getOutput(0)->getDimensions();
+    ASSERT(mOutputDims.nbDims == 2);
+
+    return true;
+}
+
+//!
+//! \brief Creates a network with a single custom layer containing the NonZero plugin and marks the
+//!        output layers
+//!
+//! \param network Pointer to the network that will be populated with the NonZero plugin
+//!
+//! \param builder Pointer to the engine builder
+//!
+bool SampleNonZeroPlugin::constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
+    SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config)
+{
+    if (mParams.fp16)
+    {
+        config->setFlag(BuilderFlag::kFP16);
+    }
+
+    std::default_random_engine generator(mSeed);
+    std::uniform_int_distribution<int32_t> distr(10, 25);
+
+    int32_t const R = distr(generator);
+    int32_t const C = distr(generator);
+    auto* in = network->addInput("Input", DataType::kFLOAT, {2, {R, C}});
+    ASSERT(in != nullptr);
+
+    std::vector<PluginField> const vecPF{{"rowOrder", &mParams.rowOrder, PluginFieldType::kINT32, 1}};
+    PluginFieldCollection pfc{static_cast<int32_t>(vecPF.size()), vecPF.data()};
+
+    auto pluginCreator = static_cast<IPluginCreatorV3One*>(getPluginRegistry()->getCreator("NonZeroPlugin", "0", ""));
+    auto plugin = std::unique_ptr<IPluginV3>(pluginCreator->createPlugin("NonZeroPlugin", &pfc, TensorRTPhase::kBUILD));
+
+    std::vector<ITensor*> inputsVec{in};
+    auto pluginNonZeroLayer = network->addPluginV3(inputsVec.data(), inputsVec.size(), nullptr, 0, *plugin);
+    ASSERT(pluginNonZeroLayer != nullptr);
+    ASSERT(pluginNonZeroLayer->getOutput(0) != nullptr);
+    ASSERT(pluginNonZeroLayer->getOutput(1) != nullptr);
+
+    pluginNonZeroLayer->getOutput(0)->setName("Output0");
+    pluginNonZeroLayer->getOutput(1)->setName("Output1");
+
+    network->markOutput(*(pluginNonZeroLayer->getOutput(0)));
+    network->markOutput(*(pluginNonZeroLayer->getOutput(1)));
+
+    return true;
+}
+
+//!
+//! \brief Runs the TensorRT inference engine for this sample
+//!
+//! \details This function is the main execution function of the sample. It allocates the buffer,
+//!          sets inputs and executes the engine.
+//!
+bool SampleNonZeroPlugin::infer()
+{
+
+    // Since the data dependent output size cannot be inferred from the engine denote a sufficient size for the
+    // corresponding output buffer (along with the rest of the I/O tensors)
+    std::vector<int64_t> ioVolumes = {mInputDims.d[0] * mInputDims.d[1], mInputDims.d[0] * mInputDims.d[1] * 2, 1};
+
+    // Create RAII buffer manager object
+    samplesCommon::BufferManager buffers(mEngine, ioVolumes);
+
+    auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
+    if (!context)
+    {
+        return false;
+    }
+
+    for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; ++i)
+    {
+        auto const name = mEngine->getIOTensorName(i);
+        context->setTensorAddress(name, buffers.getDeviceBuffer(name));
+    }
+
+    // Read the input data into the managed buffers
+    ASSERT(mParams.inputTensorNames.size() == 1);
+    if (!processInput(buffers))
+    {
+        return false;
+    }
+
+    // Create CUDA stream for the execution of this inference.
+    cudaStream_t stream;
+    CHECK(cudaStreamCreate(&stream));
+
+    // Memcpy from host input buffers to device input buffers
+    buffers.copyInputToDeviceAsync(stream);
+
+    bool status = context->enqueueV3(stream);
+    if (!status)
+    {
+        return false;
+    }
+
+    // Asynchronously copy data from device output buffers to host output buffers.
+    buffers.copyOutputToHostAsync(stream);
+
+    // Wait for the work in the stream to complete.
+    CHECK(cudaStreamSynchronize(stream));
+
+    // Release stream.
+    CHECK(cudaStreamDestroy(stream));
+
+    // Verify results
+    if (!verifyOutput(buffers))
+    {
+        return false;
+    }
+
+    return true;
+}
+
+//!
+//! \brief Reads the input and stores the result in a managed buffer
+//!
+bool SampleNonZeroPlugin::processInput(samplesCommon::BufferManager const& buffers)
+{
+    int32_t const inputH = mInputDims.d[0];
+    int32_t const inputW = mInputDims.d[1];
+
+    std::vector<uint8_t> fileData(inputH * inputW);
+
+    std::default_random_engine generator(mSeed);
+    std::uniform_int_distribution<int32_t> distr(0, 9);
+    auto const number = distr(generator);
+    samplesCommon::readPGMFile(
+        samplesCommon::locateFile(std::to_string(number) + ".pgm", mParams.dataDirs), fileData.data(), inputH, inputW);
+
+    float* hostDataBuffer = static_cast<float*>(buffers.getHostBuffer(mParams.inputTensorNames[0]));
+    for (int32_t i = 0; i < inputH * inputW; ++i)
+    {
+        auto const raw = 1.0 - float(fileData[i] / 255.0);
+        hostDataBuffer[i] = raw;
+    }
+
+    sample::gLogInfo << "Input:" << std::endl;
+    for (int32_t i = 0; i < inputH; ++i)
+    {
+        for (int32_t j = 0; j < inputW; ++j)
+        {
+            sample::gLogInfo << hostDataBuffer[i * inputW + j];
+            if (j < inputW - 1)
+            {
+                sample::gLogInfo << ", ";
+            }
+        }
+        sample::gLogInfo << std::endl;
+    }
+    sample::gLogInfo << std::endl;
+
+    return true;
+}
+
+//!
+//! \brief Verify result
+//!
+//! \return whether the output correctly identifies all (and only) non-zero elements
+//!
+bool SampleNonZeroPlugin::verifyOutput(samplesCommon::BufferManager const& buffers)
+{
+    float* input = static_cast<float*>(buffers.getHostBuffer(mParams.inputTensorNames[0]));
+    int32_t* output = static_cast<int32_t*>(buffers.getHostBuffer(mParams.outputTensorNames[0]));
+    int32_t count = *static_cast<int32_t*>(buffers.getHostBuffer(mParams.outputTensorNames[1]));
+
+    std::vector<bool> covered(mInputDims.d[0] * mInputDims.d[1], false);
+
+    sample::gLogInfo << "Output:" << std::endl;
+    if (mParams.rowOrder)
+    {
+        for (int32_t i = 0; i < count; ++i)
+        {
+            for (int32_t j = 0; j < 2; ++j)
+            {
+                sample::gLogInfo << output[j + 2 * i] << " ";
+            }
+            sample::gLogInfo << std::endl;
+        }
+    }
+    else
+    {
+        for (int32_t i = 0; i < 2; ++i)
+        {
+            for (int32_t j = 0; j < count; ++j)
+            {
+                sample::gLogInfo << output[j + count * i] << " ";
+            }
+            sample::gLogInfo << std::endl;
+        }
+    }
+
+    if (!mParams.rowOrder)
+    {
+        for (int32_t i = 0; i < count; ++i)
+        {
+            auto const idx = output[i] * mInputDims.d[1] + output[i + count];
+            covered[idx] = true;
+            if (input[idx] == 0.F)
+            {
+                return false;
+            }
+        }
+    }
+    else
+    {
+        for (int32_t i = 0; i < count; ++i)
+        {
+            auto const idx = output[2 * i] * mInputDims.d[1] + output[2 * i + 1];
+            covered[idx] = true;
+            if (input[idx] == 0.F)
+            {
+                return false;
+            }
+        }
+    }
+
+    for (int32_t i = 0; i < static_cast<int32_t>(covered.size()); ++i)
+    {
+        if (!covered[i])
+        {
+            if (input[i] != 0.F)
+            {
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
+
+//!
+//! \brief Initializes members of the params struct using the command line args
+//!
+NonZeroParams initializeSampleParams(samplesCommon::Args const& args)
+{
+    NonZeroParams params;
+    if (args.dataDirs.empty()) // Use default directories if user hasn't provided directory paths
+    {
+        params.dataDirs.push_back("data/mnist/");
+        params.dataDirs.push_back("data/samples/mnist/");
+    }
+    else // Use the data directory provided by the user
+    {
+        params.dataDirs = args.dataDirs;
+    }
+
+    params.inputTensorNames.push_back("Input");
+    params.outputTensorNames.push_back("Output0");
+    params.outputTensorNames.push_back("Output1");
+    params.fp16 = args.runInFp16;
+    params.rowOrder = args.rowOrder;
+
+    return params;
+}
+
+//!
+//! \brief Prints the help information for running this sample
+//!
+void printHelpInfo()
+{
+    std::cout << "Usage: ./sample_non_zero_plugin [-h or --help] [-d or --datadir=<path to data directory>]"
+              << std::endl;
+    std::cout << "--help          Display help information" << std::endl;
+    std::cout << "--datadir       Specify path to a data directory, overriding the default. This option can be used "
+                 "multiple times to add multiple directories. If no data directories are given, the default is to use "
+                 "(data/samples/mnist/, data/mnist/)"
+              << std::endl;
+    std::cout << "--fp16          Run in FP16 mode." << std::endl;
+    std::cout << "--columnOrder   Run plugin in column major output mode." << std::endl;
+}
+
+int main(int argc, char** argv)
+{
+    samplesCommon::Args args;
+    bool argsOK = samplesCommon::parseArgs(args, argc, argv);
+    if (!argsOK)
+    {
+        sample::gLogError << "Invalid arguments" << std::endl;
+        printHelpInfo();
+        return EXIT_FAILURE;
+    }
+    if (args.help)
+    {
+        printHelpInfo();
+        return EXIT_SUCCESS;
+    }
+
+    auto sampleTest = sample::gLogger.defineTest(kSAMPLE_NAME, argc, argv);
+
+    sample::gLogger.reportTestStart(sampleTest);
+
+    SampleNonZeroPlugin sample(initializeSampleParams(args));
+
+    sample::gLogInfo << "Building and running a GPU inference engine for NonZero plugin" << std::endl;
+
+    if (!sample.build())
+    {
+        return sample::gLogger.reportFail(sampleTest);
+    }
+    if (!sample.infer())
+    {
+        return sample::gLogger.reportFail(sampleTest);
+    }
+
+    return sample::gLogger.reportPass(sampleTest);
+}
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMNIST/Makefile b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMNIST/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..77e85f29bb78e20b5fa88cf1f15f46a96da204d6
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMNIST/Makefile
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+OUTNAME_RELEASE = sample_onnx_mnist
+OUTNAME_DEBUG   = sample_onnx_mnist_debug
+EXTRA_DIRECTORIES = ../common ../utils
+SAMPLE_DIR_NAME = $(shell basename $(dir $(abspath $(firstword $(MAKEFILE_LIST)))))
+MAKEFILE ?= ../Makefile.config
+include $(MAKEFILE)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMNIST/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMNIST/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..05abeafda2c5793bc830d82d084be7b61666c4ee
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMNIST/README.md
@@ -0,0 +1,203 @@
+# â€œHello Worldâ€ For TensorRT From ONNX
+
+
+**Table Of Contents**
+- [Description](#description)
+- [How does this sample work?](#how-does-this-sample-work)
+	* [Converting the ONNX model to a TensorRT network](#converting-the-onnx-model-to-a-tensorrt-network)
+	* [Building the engine](#building-the-engine)
+	* [Running inference](#running-inference)
+	* [TensorRT API layers and ops](#tensorrt-api-layers-and-ops)
+- [Running the sample](#running-the-sample)
+	* [Sample `--help` options](#sample-help-options)
+- [Additional resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+## Description
+
+This sample, sampleOnnxMNIST, converts a model trained on the [MNIST dataset](https://github.com/onnx/models/tree/master/vision/classification/mnist) in Open Neural Network Exchange (ONNX) format to a TensorRT network and runs inference on the network.
+
+ONNX is a standard for representing deep learning models that enables models to be transferred between frameworks.
+
+## How does this sample work?
+
+This sample creates and runs the TensorRT engine from an ONNX model of the MNIST network. It demonstrates how TensorRT can consume an ONNX model as input to create a network.
+
+Specifically, this sample:
+- [Converts the ONNX model to a TensorRT network](#converting-the-onnx-model-to-a-tensorrt-network)
+- [Builds an engine](#building-an-engine)
+- [Runs inference using the generated TensorRT network](#running-inference)
+
+### Converting the ONNX model to a TensorRT network
+
+The model file can be converted to a TensorRT network using the ONNX parser. The parser can be initialized with the
+network definition that the parser will write to and the logger object.
+
+`auto parser = nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger());`
+
+The ONNX model file is then passed onto the parser along with the logging level
+
+```
+if (!parser->parseFromFile(model_file, static_cast<int>(sample::gLogger.getReportableSeverity())))
+{
+	  string msg("failed to parse onnx file");
+	  sample::gLogger->log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
+	  exit(EXIT_FAILURE);
+}
+```
+
+After the TensorRT network is constructed by parsing the model, the TensorRT engine can be built to run inference.
+
+### Building the engine
+
+To build the engine, create the builder and pass a logger created for TensorRT which is used for reporting errors, warnings and informational messages in the network:
+`IBuilder* builder = createInferBuilder(sample::gLogger);`
+
+To build the engine from the generated TensorRT network, issue the following call:
+`SampleUniquePtr<IHostMemory> plan{builder->buildSerializedNetwork(*network, *config)};`
+
+After you build the engine, verify that the engine is running properly by confirming the output is what you expected. The output format of this sample should be the same as the output of sampleMNIST.
+
+### Running inference
+
+To run inference using the created engine, see [Performing Inference In C++](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#perform_inference_c).
+
+**Note:** Itâ€™s important to preprocess the data and convert it to the format accepted by the network. In this example, the sample input is in PGM (portable graymap) format. The model expects an input of image `1x28x28` scaled to between `[0,1]`.
+
+### TensorRT API layers and ops
+
+In this sample, the following layers are used. For more information about these layers, see the [TensorRT Developer Guide: Layers](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#layers) documentation.
+
+[Activation layer](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#activation-layer)
+The Activation layer implements element-wise activation functions. Specifically, this sample uses the Activation layer with the type `kRELU`.
+
+[Convolution layer](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#convolution-layer)
+The Convolution layer computes a 2D (channel, height, and width) convolution, with or without bias.
+
+[MatrixMultiplyLayer](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#matrixmultiply-layer)
+The MatrixMultiply layer implements a matrix multiplication operation.
+(The [FullyConnected layer](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#fullyconnected-layer) is deprecated since 8.4.
+The bias of a FullyConnected layer can be added with an
+[ElementwiseLayer](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#elementwise-layer) of `SUM` operation.)
+
+[Pooling layer](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#pooling-layer)
+The Pooling layer implements pooling within a channel. Supported pooling types are `maximum`, `average` and `maximum-average blend`.
+
+[Scale layer](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#scale-layer)
+The Scale layer implements a per-tensor, per-channel, or per-element affine transformation and/or exponentiation by constant values.
+
+[Shuffle layer](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#shuffle-layer)
+The Shuffle layer implements a reshape and transpose operator for tensors.
+
+## Preparing sample data
+
+Download the sample data from the [TensorRT release tarball](https://developer.nvidia.com/nvidia-tensorrt-download#).
+
+## Running the sample
+
+1. Compile the sample by following build instructions in [TensorRT README](https://github.com/NVIDIA/TensorRT/).
+
+2.  Run the sample to build and run the MNIST engine from the ONNX model.
+	```
+	./sample_onnx_mnist [-h or --help] [-d or --datadir=<path to data directory>] [--useDLACore=<int>] [--int8 or --fp16]
+	```
+
+3.  Verify that the sample ran successfully. If the sample runs successfully you should see output similar to the following:
+	```
+	&&&& RUNNING TensorRT.sample_onnx_mnist # ./sample_onnx_mnist
+	----------------------------------------------------------------
+	Input filename: ../../../../../../data/samples/mnist/mnist.onnx
+	ONNX IR version: 0.0.3
+	Opset version: 1
+	Producer name: CNTK
+	Producer version: 2.4
+	Domain:
+	Model version: 1
+	Doc string:
+	----------------------------------------------------------------
+	[I] Input:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@*.  .*@@@@@@@@@@@
+	@@@@@@@@@@*.     +@@@@@@@@@@
+	@@@@@@@@@@. :#+   %@@@@@@@@@
+	@@@@@@@@@@.:@@@+  +@@@@@@@@@
+	@@@@@@@@@@.:@@@@:  +@@@@@@@@
+	@@@@@@@@@@=%@@@@:  +@@@@@@@@
+	@@@@@@@@@@@@@@@@#  +@@@@@@@@
+	@@@@@@@@@@@@@@@@*  +@@@@@@@@
+	@@@@@@@@@@@@@@@@:  +@@@@@@@@
+	@@@@@@@@@@@@@@@@:  +@@@@@@@@
+	@@@@@@@@@@@@@@@*  .@@@@@@@@@
+	@@@@@@@@@@%**%@.  *@@@@@@@@@
+	@@@@@@@@%+.  .:  .@@@@@@@@@@
+	@@@@@@@@=  ..    :@@@@@@@@@@
+	@@@@@@@@:  *@@:  :@@@@@@@@@@
+	@@@@@@@%   %@*    *@@@@@@@@@
+	@@@@@@@%   ++ ++  .%@@@@@@@@
+	@@@@@@@@-    +@@-  +@@@@@@@@
+	@@@@@@@@=  :*@@@#  .%@@@@@@@
+	@@@@@@@@@+*@@@@@%.   %@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+	[I] Output:
+	Prob 0 0.0000 Class 0:
+	Prob 1 0.0000 Class 1:
+	Prob 2 1.0000 Class 2: **********
+	Prob 3 0.0000 Class 3:
+	Prob 4 0.0000 Class 4:
+	Prob 5 0.0000 Class 5:
+	Prob 6 0.0000 Class 6:
+	Prob 7 0.0000 Class 7:
+	Prob 8 0.0000 Class 8:
+	Prob 9 0.0000 Class 9:
+
+	&&&& PASSED TensorRT.sample_onnx_mnist # ./sample_onnx_mnist
+	```
+
+	This output shows that the sample ran successfully; PASSED.
+
+
+### Sample `--help` options
+
+To see the full list of available options and their descriptions, use the `-h` or `--help` command line option.
+
+
+# Additional resources
+
+The following resources provide a deeper understanding about the ONNX project and MNIST model:
+
+**ONNX**
+- [GitHub: ONNX](https://github.com/onnx/onnx)
+- [Github: ONNX-TensorRT Open source parser](https://github.com/onnx/onnx-tensorrt)
+
+**Models**
+- [MNIST - Handwritten Digit Recognition](https://github.com/onnx/models/tree/master/mnist)
+- [GitHub: ONNX Models](https://github.com/onnx/models)
+
+**Documentation**
+- [Introduction To NVIDIAâ€™s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples)
+- [Working With TensorRT Using The C++ API](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#c_topics)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+
+# Changelog
+
+March 2019
+This `README.md` file was recreated, updated and reviewed.
+
+
+# Known issues
+
+There are no known issues in this sample.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8da7215e8b59bd1af53742e1cc19e4154844d8f7
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp
@@ -0,0 +1,440 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//!
+//! sampleOnnxMNIST.cpp
+//! This file contains the implementation of the ONNX MNIST sample. It creates the network using
+//! the MNIST onnx model.
+//! It can be run with the following command line:
+//! Command: ./sample_onnx_mnist [-h or --help] [-d=/path/to/data/dir or --datadir=/path/to/data/dir]
+//! [--useDLACore=<int>]
+//!
+
+// Define TRT entrypoints used in common code
+#define DEFINE_TRT_ENTRYPOINTS 1
+#define DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT 0
+
+#include "argsParser.h"
+#include "buffers.h"
+#include "common.h"
+#include "logger.h"
+#include "parserOnnxConfig.h"
+
+#include "NvInfer.h"
+#include <cuda_runtime_api.h>
+
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+using namespace nvinfer1;
+using samplesCommon::SampleUniquePtr;
+
+const std::string gSampleName = "TensorRT.sample_onnx_mnist";
+
+//! \brief  The SampleOnnxMNIST class implements the ONNX MNIST sample
+//!
+//! \details It creates the network using an ONNX model
+//!
+class SampleOnnxMNIST
+{
+public:
+    SampleOnnxMNIST(const samplesCommon::OnnxSampleParams& params)
+        : mParams(params)
+        , mRuntime(nullptr)
+        , mEngine(nullptr)
+    {
+    }
+
+    //!
+    //! \brief Function builds the network engine
+    //!
+    bool build();
+
+    //!
+    //! \brief Runs the TensorRT inference engine for this sample
+    //!
+    bool infer();
+
+private:
+    samplesCommon::OnnxSampleParams mParams; //!< The parameters for the sample.
+
+    nvinfer1::Dims mInputDims;  //!< The dimensions of the input to the network.
+    nvinfer1::Dims mOutputDims; //!< The dimensions of the output to the network.
+    int mNumber{0};             //!< The number to classify
+
+    std::shared_ptr<nvinfer1::IRuntime> mRuntime;   //!< The TensorRT runtime used to deserialize the engine
+    std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The TensorRT engine used to run the network
+
+    //!
+    //! \brief Parses an ONNX model for MNIST and creates a TensorRT network
+    //!
+    bool constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
+        SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
+        SampleUniquePtr<nvonnxparser::IParser>& parser, SampleUniquePtr<nvinfer1::ITimingCache>& timingCache);
+
+    //!
+    //! \brief Reads the input  and stores the result in a managed buffer
+    //!
+    bool processInput(const samplesCommon::BufferManager& buffers);
+
+    //!
+    //! \brief Classifies digits and verify result
+    //!
+    bool verifyOutput(const samplesCommon::BufferManager& buffers);
+};
+
+//!
+//! \brief Creates the network, configures the builder and creates the network engine
+//!
+//! \details This function creates the Onnx MNIST network by parsing the Onnx model and builds
+//!          the engine that will be used to run MNIST (mEngine)
+//!
+//! \return true if the engine was created successfully and false otherwise
+//!
+bool SampleOnnxMNIST::build()
+{
+    auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
+    if (!builder)
+    {
+        return false;
+    }
+
+    auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(0));
+    if (!network)
+    {
+        return false;
+    }
+
+    auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
+    if (!config)
+    {
+        return false;
+    }
+
+    auto parser
+        = SampleUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger()));
+    if (!parser)
+    {
+        return false;
+    }
+
+    auto timingCache = SampleUniquePtr<nvinfer1::ITimingCache>();
+
+    auto constructed = constructNetwork(builder, network, config, parser, timingCache);
+    if (!constructed)
+    {
+        return false;
+    }
+
+    // CUDA stream used for profiling by the builder.
+    auto profileStream = samplesCommon::makeCudaStream();
+    if (!profileStream)
+    {
+        return false;
+    }
+    config->setProfileStream(*profileStream);
+
+    SampleUniquePtr<IHostMemory> plan{builder->buildSerializedNetwork(*network, *config)};
+    if (!plan)
+    {
+        return false;
+    }
+
+    if (timingCache != nullptr && !mParams.timingCacheFile.empty())
+    {
+        samplesCommon::updateTimingCacheFile(
+            sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder);
+    }
+
+    mRuntime = std::shared_ptr<nvinfer1::IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger()));
+    if (!mRuntime)
+    {
+        return false;
+    }
+
+    mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
+        mRuntime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter());
+    if (!mEngine)
+    {
+        return false;
+    }
+
+    ASSERT(network->getNbInputs() == 1);
+    mInputDims = network->getInput(0)->getDimensions();
+    ASSERT(mInputDims.nbDims == 4);
+
+    ASSERT(network->getNbOutputs() == 1);
+    mOutputDims = network->getOutput(0)->getDimensions();
+    ASSERT(mOutputDims.nbDims == 2);
+
+    return true;
+}
+
+//!
+//! \brief Uses a ONNX parser to create the Onnx MNIST Network and marks the
+//!        output layers
+//!
+//! \param network Pointer to the network that will be populated with the Onnx MNIST network
+//!
+//! \param builder Pointer to the engine builder
+//!
+bool SampleOnnxMNIST::constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
+    SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
+    SampleUniquePtr<nvonnxparser::IParser>& parser, SampleUniquePtr<nvinfer1::ITimingCache>& timingCache)
+{
+    auto parsed = parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(),
+        static_cast<int>(sample::gLogger.getReportableSeverity()));
+    if (!parsed)
+    {
+        return false;
+    }
+
+    if (mParams.fp16)
+    {
+        config->setFlag(BuilderFlag::kFP16);
+    }
+    if (mParams.bf16)
+    {
+        config->setFlag(BuilderFlag::kBF16);
+    }
+    if (mParams.int8)
+    {
+        config->setFlag(BuilderFlag::kINT8);
+        network->getInput(0)->setDynamicRange(-1.0F, 1.0F);
+        constexpr float kTENSOR_DYNAMIC_RANGE = 4.0F;
+        samplesCommon::setAllDynamicRanges(network.get(), kTENSOR_DYNAMIC_RANGE, kTENSOR_DYNAMIC_RANGE);
+    }
+    if (mParams.timingCacheFile.size())
+    {
+        timingCache
+            = samplesCommon::buildTimingCacheFromFile(sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile);
+    }
+
+    samplesCommon::enableDLA(builder.get(), config.get(), mParams.dlaCore);
+
+    return true;
+}
+
+//!
+//! \brief Runs the TensorRT inference engine for this sample
+//!
+//! \details This function is the main execution function of the sample. It allocates the buffer,
+//!          sets inputs and executes the engine.
+//!
+bool SampleOnnxMNIST::infer()
+{
+    // Create RAII buffer manager object
+    samplesCommon::BufferManager buffers(mEngine);
+
+    auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
+    if (!context)
+    {
+        return false;
+    }
+
+    for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
+    {
+        auto const name = mEngine->getIOTensorName(i);
+        context->setTensorAddress(name, buffers.getDeviceBuffer(name));
+    }
+
+    // Read the input data into the managed buffers
+    ASSERT(mParams.inputTensorNames.size() == 1);
+    if (!processInput(buffers))
+    {
+        return false;
+    }
+
+    // Memcpy from host input buffers to device input buffers
+    buffers.copyInputToDevice();
+
+    bool status = context->executeV2(buffers.getDeviceBindings().data());
+    if (!status)
+    {
+        return false;
+    }
+
+    // Memcpy from device output buffers to host output buffers
+    buffers.copyOutputToHost();
+
+    // Verify results
+    if (!verifyOutput(buffers))
+    {
+        return false;
+    }
+
+    return true;
+}
+
+//!
+//! \brief Reads the input and stores the result in a managed buffer
+//!
+bool SampleOnnxMNIST::processInput(const samplesCommon::BufferManager& buffers)
+{
+    const int inputH = mInputDims.d[2];
+    const int inputW = mInputDims.d[3];
+
+    // Read a random digit file
+    srand(unsigned(time(nullptr)));
+    std::vector<uint8_t> fileData(inputH * inputW);
+    mNumber = rand() % 10;
+    samplesCommon::readPGMFile(
+        samplesCommon::locateFile(std::to_string(mNumber) + ".pgm", mParams.dataDirs), fileData.data(), inputH, inputW);
+
+    // Print an ascii representation
+    sample::gLogInfo << "Input:" << std::endl;
+    for (int i = 0; i < inputH * inputW; i++)
+    {
+        sample::gLogInfo << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % inputW) ? "" : "\n");
+    }
+    sample::gLogInfo << std::endl;
+
+    float* hostDataBuffer = static_cast<float*>(buffers.getHostBuffer(mParams.inputTensorNames[0]));
+    for (int i = 0; i < inputH * inputW; i++)
+    {
+        hostDataBuffer[i] = 1.0 - float(fileData[i] / 255.0);
+    }
+
+    return true;
+}
+
+//!
+//! \brief Classifies digits and verify result
+//!
+//! \return whether the classification output matches expectations
+//!
+bool SampleOnnxMNIST::verifyOutput(const samplesCommon::BufferManager& buffers)
+{
+    const int outputSize = mOutputDims.d[1];
+    float* output = static_cast<float*>(buffers.getHostBuffer(mParams.outputTensorNames[0]));
+    float val{0.0F};
+    int idx{0};
+
+    // Calculate Softmax
+    float sum{0.0F};
+    for (int i = 0; i < outputSize; i++)
+    {
+        output[i] = exp(output[i]);
+        sum += output[i];
+    }
+
+    sample::gLogInfo << "Output:" << std::endl;
+    for (int i = 0; i < outputSize; i++)
+    {
+        output[i] /= sum;
+        val = std::max(val, output[i]);
+        if (val == output[i])
+        {
+            idx = i;
+        }
+
+        sample::gLogInfo << " Prob " << i << "  " << std::fixed << std::setw(5) << std::setprecision(4) << output[i]
+                         << " "
+                         << "Class " << i << ": " << std::string(int(std::floor(output[i] * 10 + 0.5F)), '*')
+                         << std::endl;
+    }
+    sample::gLogInfo << std::endl;
+
+    return idx == mNumber && val > 0.9F;
+}
+
+//!
+//! \brief Initializes members of the params struct using the command line args
+//!
+samplesCommon::OnnxSampleParams initializeSampleParams(const samplesCommon::Args& args)
+{
+    samplesCommon::OnnxSampleParams params;
+    if (args.dataDirs.empty()) // Use default directories if user hasn't provided directory paths
+    {
+        params.dataDirs.push_back("data/mnist/");
+        params.dataDirs.push_back("data/samples/mnist/");
+    }
+    else // Use the data directory provided by the user
+    {
+        params.dataDirs = args.dataDirs;
+    }
+    params.onnxFileName = "mnist.onnx";
+    params.inputTensorNames.push_back("Input3");
+    params.outputTensorNames.push_back("Plus214_Output_0");
+    params.dlaCore = args.useDLACore;
+    params.int8 = args.runInInt8;
+    params.fp16 = args.runInFp16;
+    params.bf16 = args.runInBf16;
+    params.timingCacheFile = args.timingCacheFile;
+
+    return params;
+}
+
+//!
+//! \brief Prints the help information for running this sample
+//!
+void printHelpInfo()
+{
+    std::cout
+        << "Usage: ./sample_onnx_mnist [-h or --help] [-d or --datadir=<path to data directory>] [--useDLACore=<int>]"
+        << "[-t or --timingCacheFile=<path to timing cache file]" << std::endl;
+    std::cout << "--help             Display help information" << std::endl;
+    std::cout << "--datadir          Specify path to a data directory, overriding the default. This option can be used "
+                 "multiple times to add multiple directories. If no data directories are given, the default is to use "
+                 "(data/samples/mnist/, data/mnist/)"
+              << std::endl;
+    std::cout << "--useDLACore=N     Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, "
+                 "where n is the number of DLA engines on the platform."
+              << std::endl;
+    std::cout << "--int8             Run in Int8 mode." << std::endl;
+    std::cout << "--fp16             Run in FP16 mode." << std::endl;
+    std::cout << "--bf16             Run in BF16 mode." << std::endl;
+    std::cout << "--timingCacheFile  Specify path to a timing cache file. If it does not already exist, it will be "
+              << "created." << std::endl;
+}
+
+int main(int argc, char** argv)
+{
+    samplesCommon::Args args;
+    bool argsOK = samplesCommon::parseArgs(args, argc, argv);
+    if (!argsOK)
+    {
+        sample::gLogError << "Invalid arguments" << std::endl;
+        printHelpInfo();
+        return EXIT_FAILURE;
+    }
+    if (args.help)
+    {
+        printHelpInfo();
+        return EXIT_SUCCESS;
+    }
+
+    auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv);
+
+    sample::gLogger.reportTestStart(sampleTest);
+
+    SampleOnnxMNIST sample(initializeSampleParams(args));
+
+    sample::gLogInfo << "Building and running a GPU inference engine for Onnx MNIST" << std::endl;
+
+    if (!sample.build())
+    {
+        return sample::gLogger.reportFail(sampleTest);
+    }
+    if (!sample.infer())
+    {
+        return sample::gLogger.reportFail(sampleTest);
+    }
+
+    return sample::gLogger.reportPass(sampleTest);
+}
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMnistCoordConvAC/Makefile b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMnistCoordConvAC/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..2133e588e4e01579e146be66aae5385512c05e6a
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMnistCoordConvAC/Makefile
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+OUTNAME_RELEASE = sample_onnx_mnist_coord_conv_ac
+OUTNAME_DEBUG   = sample_onnx_mnist_coord_conv_ac_debug
+EXTRA_DIRECTORIES = ../common ../utils
+SAMPLE_DIR_NAME = $(shell basename $(dir $(abspath $(firstword $(MAKEFILE_LIST)))))
+MAKEFILE ?= ../Makefile.config
+include $(MAKEFILE)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMnistCoordConvAC/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMnistCoordConvAC/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d302249667e1d681d9c9dc337af59bef2421defd
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMnistCoordConvAC/README.md
@@ -0,0 +1,244 @@
+# Implementing CoordConv in TensorRT with a custom plugin
+
+
+**Table Of Contents**
+- [Implementing CoordConv in TensorRT with a custom plugin](#implementing-coordconv-in-tensorrt-with-a-custom-plugin)
+	- [Description](#description)
+	- [How does this sample work?](#how-does-this-sample-work)
+		- [Converting the ONNX model to a TensorRT network](#converting-the-onnx-model-to-a-tensorrt-network)
+		- [Building the engine](#building-the-engine)
+		- [Running inference](#running-inference)
+		- [TensorRT API layers and ops](#tensorrt-api-layers-and-ops)
+	- [Running the sample](#running-the-sample)
+		- [Sample --help options](#sample---help-options)
+- [Additional resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+## Description
+
+This sample, sampleOnnxMnistCoordConvAC, converts a model trained on the `MNIST dataset` in Open Neural Network Exchange (ONNX) format to a TensorRT network and runs inference on the network.
+This model was trained in PyTorch and it contains custom CoordConv layers instead of Conv layers.<br/>
+Model with CoordConvAC layers training script and code of CoordConv layers in PyTorch: [link](https://github.com/denti/mnist-coordconv-pytorch/blob/master/main_coord_conv.py)<br/>
+Original model with usual Conv layers: [link](https://github.com/pytorch/examples/tree/master/mnist)
+
+[CoordConv](https://arxiv.org/abs/1807.03247)  layer is a layer proposed by Uber AI Labs at 2018.
+It improves the quality of regular Conv layers by adding additional channels with relative coordinates to the input data.
+This layer is used in classification, detection, segmentation and other NN architectures.
+The CoordConv layer maps to the `CoordConvAC_TRT` custom plugin implemented in TensorRT for fast inference.
+This plugin can be found at `TensorRT/plugin/coordConvACPlugin`. Additional information about the layer and plugin implementation can be found at `TensorRT/plugin/coordConvACPlugin/README.md`
+
+ONNX is a standard for representing deep learning models that enables models to be transferred between frameworks.
+
+## How does this sample work?
+
+This sample creates and runs a TensorRT engine on an ONNX model of MNIST trained with CoordConv layers. It demonstrates how TensorRT can parse and import ONNX models, as well as use plugins to run custom layers in neural networks.
+
+Specifically, this sample:
+- [Converts the ONNX model with custom layer to a TensorRT network](#converting-the-onnx-model-to-a-tensorrt-network)
+- [Builds an engine with custom layer](#building-an-engine)
+- [Runs inference using the generated TensorRT network](#running-inference)
+
+### Converting the ONNX model to a TensorRT network
+
+The model file can be converted to a TensorRT network using the ONNX parser. The parser can be initialized with the
+network definition that the parser will write to and the logger object.
+
+`auto parser = nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger());`
+
+Plugins library needs to be added to the code to parse custom layers implemented as Plugins
+
+`initLibNvInferPlugins(&sample::gLogger, "ONNXTRT_NAMESPACE");`
+
+The ONNX model file is then passed onto the parser along with the logging level
+
+```
+if (!parser->parseFromFile(model_file, static_cast<int>(sample::gLogger.getReportableSeverity())))
+{
+	  string msg("failed to parse onnx file");
+	  sample::gLogger->log(nvinfer1::ILogger::Severity::kERROR, msg.c_str());
+	  exit(EXIT_FAILURE);
+}
+```
+
+After the TensorRT network is constructed by parsing the model, the TensorRT engine can be built to run inference.
+
+### Building the engine
+
+To build the engine, create the builder and pass a logger created for TensorRT which is used for reporting errors, warnings and informational messages in the network:
+`IBuilder* builder = createInferBuilder(sample::gLogger);`
+
+To build the engine from the generated TensorRT network, issue the following call:
+`nvinfer1::ICudaEngine* engine = builder->buildCudaEngine(*network);`
+
+After you build the engine, verify that the engine is running properly by confirming the output is what you expected. The output format of this sample should be the same as the output of sampleMNIST.
+
+### Running inference
+
+To run inference using the created engine, see [Performing Inference In C++](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#perform_inference_c).
+
+**Note:** Itâ€™s important to preprocess the data and convert it to the format accepted by the network. In this example, the sample input is in PGM (portable graymap) format. The model expects an input of image `1x28x28` scaled to between `[0,1]`. 
+
+**Note2:** Additional preprocessing needs to be applied to the data before putting it to the NN input due to the same normalization preprocessing were used when model was trained [transforms.Normalize((0.1307,), (0.3081,))](https://github.com/pytorch/examples/tree/master/mnist):
+
+```
+const float PYTORCH_NORMALIZE_MEAN = 0.1307;
+const float PYTORCH_NORMALIZE_STD = 0.3081;
+hostDataBuffer[i] = ((1.0 - float(fileData[i] / 255.0)) - PYTORCH_NORMALIZE_MEAN) / PYTORCH_NORMALIZE_STD;
+```
+
+### TensorRT API layers and ops
+
+In this sample, the following layers and plugins are used. For more information about these layers, see the [TensorRT Developer Guide: Layers](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#layers) documentation.
+
+[CoordConvAC layer](https://github.com/NVIDIA/TensorRT/tree/main/plugin/coordConvACPlugin)
+Custom layer implemented with CUDA API that implements operation AddChannels. This layer expands the input data by adding additional channels with relative coordinates.
+
+[Activation layer](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#activation-layer)
+The Activation layer implements element-wise activation functions. Specifically, this sample uses the Activation layer with the type `kRELU`.
+
+[Convolution layer](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#convolution-layer)
+The Convolution layer computes a 2D (channel, height, and width) convolution, with or without bias.
+
+[MatrixMultiplyLayer](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#matrixmultiply-layer)
+The MatrixMultiply layer implements a matrix multiplication.
+(The [FullyConnected layer](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#fullyconnected-layer) is deprecated since 8.4.
+The bias of a FullyConnected layer can be added with an
+[ElementwiseLayer](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#elementwise-layer) of `SUM` operation.)
+
+[Pooling layer](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#pooling-layer)
+The Pooling layer implements pooling within a channel. Supported pooling types are `maximum`, `average` and `maximum-average blend`.
+
+[Scale layer](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#scale-layer)
+The Scale layer implements a per-tensor, per-channel, or per-element affine transformation and/or exponentiation by constant values.
+
+[Shuffle layer](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#shuffle-layer)
+The Shuffle layer implements a reshape and transpose operator for tensors.
+
+
+
+## Running the sample
+
+1. The sample gets compiled when building the TensorRT OSS following the [instructions](https://github.com/NVIDIA/TensorRT). The binary named sample_onnx_mnist_coord_conv_ac will be created in the output directory.
+
+2. (Optional) If the ONNX model on MNIST dataset is not available, you can generate an ONNX model for running this sample using the following commands:
+    ```
+    python3 mnist_coord_conv_train.py --save-onnx
+    python3 modify_onnx_ac.py
+    ``` 
+    The first line trains a model for the MNIST dataset and saves it as an ONNX model. The second line modifies the ONNX model structure to make it work with TensorRT for building the MNIST engine. These scripts are expected to be used with `torch==2.0.1+cu118` and `torchvision==0.15.2+cu118`.
+
+3.  Run the sample to build and run the MNIST engine from the ONNX model.
+    ```
+    ./sample_onnx_mnist_coord_conv_ac [-h or --help] [-d or --datadir=<path to data directory>] [--useDLACore=<int>] [--int8 or --fp16]
+    ```
+
+4. Verify that the sample ran successfully. If the sample runs successfully you should see output similar to the following:
+    ```
+    &&&& RUNNING TensorRT.sample_coord_conv_ac_onnx_mnist # ./sample_onnx_mnist_coord_conv_ac
+    ----------------------------------------------------------------
+    Input filename:   data/mnist/mnist_with_coordconv.onnx
+    ONNX IR version:  0.0.6
+    Opset version:    11
+    Producer name:
+    Producer version:
+    Domain:
+    Model version:    0
+    Doc string:
+	----------------------------------------------------------------
+	[I] Input:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@*.  .*@@@@@@@@@@@
+	@@@@@@@@@@*.     +@@@@@@@@@@
+	@@@@@@@@@@. :#+   %@@@@@@@@@
+	@@@@@@@@@@.:@@@+  +@@@@@@@@@
+	@@@@@@@@@@.:@@@@:  +@@@@@@@@
+	@@@@@@@@@@=%@@@@:  +@@@@@@@@
+	@@@@@@@@@@@@@@@@#  +@@@@@@@@
+	@@@@@@@@@@@@@@@@*  +@@@@@@@@
+	@@@@@@@@@@@@@@@@:  +@@@@@@@@
+	@@@@@@@@@@@@@@@@:  +@@@@@@@@
+	@@@@@@@@@@@@@@@*  .@@@@@@@@@
+	@@@@@@@@@@%**%@.  *@@@@@@@@@
+	@@@@@@@@%+.  .:  .@@@@@@@@@@
+	@@@@@@@@=  ..    :@@@@@@@@@@
+	@@@@@@@@:  *@@:  :@@@@@@@@@@
+	@@@@@@@%   %@*    *@@@@@@@@@
+	@@@@@@@%   ++ ++  .%@@@@@@@@
+	@@@@@@@@-    +@@-  +@@@@@@@@
+	@@@@@@@@=  :*@@@#  .%@@@@@@@
+	@@@@@@@@@+*@@@@@%.   %@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+	[I] Output:
+    Prob 0  0.0001 Class 0:
+    Prob 1  0.0003 Class 1:
+    Prob 2  0.9975 Class 2: **********
+    Prob 3  0.0009 Class 3:
+    Prob 4  0.0000 Class 4:
+    Prob 5  0.0001 Class 5:
+    Prob 6  0.0001 Class 6:
+    Prob 7  0.0000 Class 7:
+    Prob 8  0.0009 Class 8:
+    Prob 9  0.0000 Class 9:
+
+    &&&& PASSED TensorRT.sample_coord_conv_ac_onnx_mnist # ./sample_onnx_mnist_coord_conv_ac
+	```
+
+	This output shows that the sample ran successfully; PASSED.
+
+### Sample --help options
+
+To see the full list of available options and their descriptions, use the `-h` or `--help` command line option. For example:
+```
+Usage: ./sample_onnx_mnist_coord_conv_ac [-h or --help] [-d or --datadir=<path to data directory>] [--useDLACore=<int>]
+--help Display help information
+--datadir Specify path to a data directory, overriding the default. This option can be used multiple times to add multiple directories. If no data directories are given, the default is to use (data/samples/mnist/, data/mnist/)
+--useDLACore=N Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, where n is the number of DLA engines on the platform.
+--int8 Run in Int8 mode.
+--fp16 Run in FP16 mode.
+```
+
+# Additional resources
+
+The following resources provide a deeper understanding about the ONNX project and MNIST model:
+
+**CoordConv Layer**
+- [Arxiv paper by Uber AI Labs](https://arxiv.org/abs/1807.03247)
+- [Blog post about the CoordConv layer](https://eng.uber.com/coordconv/)
+- [Path to the layer's plugin in repository](https://github.com/NVIDIA/TensorRT/tree/main/plugin/coordConvACPlugin)
+
+**ONNX**
+- [GitHub: ONNX](https://github.com/onnx/onnx)
+- [Github: ONNX-TensorRT Open source parser](https://github.com/onnx/onnx-tensorrt)
+
+**Models**
+- [MNIST - Handwritten Digit Recognition](https://github.com/onnx/models/tree/master/mnist)
+- [GitHub: ONNX Models](https://github.com/onnx/models)
+
+**Documentation**
+- [Introduction To NVIDIAâ€™s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples)
+- [Working With TensorRT Using The C++ API](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#c_topics)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+
+# Changelog
+
+April 2020
+This `README.md` file was recreated, updated and reviewed.
+
+
+# Known issues
+
+There are no known issues in this sample.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMnistCoordConvAC/coord_conv.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMnistCoordConvAC/coord_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cd47ecabe7635da71f2344985c020228f0b0989
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMnistCoordConvAC/coord_conv.py
@@ -0,0 +1,194 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import torch
+import torch.nn as nn
+import torch.nn.modules.conv as conv
+
+
+class AddCoords(nn.Module):
+    def __init__(self, rank, with_r=False):
+        super(AddCoords, self).__init__()
+        self.rank = rank
+        self.with_r = with_r
+
+    def forward(self, input_tensor):
+        """
+        :param input_tensor: shape (N, C_in, H, W)
+        :return:
+        """
+        if self.rank == 1:
+            batch_size_shape, channel_in_shape, dim_x = input_tensor.shape
+            xx_range = torch.arange(dim_x, dtype=torch.int32)
+            xx_channel = xx_range[None, None, :]
+
+            xx_channel = xx_channel.float() / (dim_x - 1)
+            xx_channel = xx_channel * 2 - 1
+            xx_channel = xx_channel.repeat(batch_size_shape, 1, 1)
+
+            if torch.cuda.is_available:
+                input_tensor = input_tensor.cuda()
+                xx_channel = xx_channel.cuda()
+            out = torch.cat([input_tensor, xx_channel], dim=1)
+
+            if self.with_r:
+                rr = torch.sqrt(torch.pow(xx_channel - 0.5, 2))
+                out = torch.cat([out, rr], dim=1)
+
+        elif self.rank == 2:
+            batch_size_shape, channel_in_shape, dim_y, dim_x = input_tensor.shape
+            xx_ones = torch.ones([1, 1, 1, dim_x], dtype=torch.int32)
+            yy_ones = torch.ones([1, 1, 1, dim_y], dtype=torch.int32)
+
+            xx_range = torch.arange(dim_y, dtype=torch.int32)
+            yy_range = torch.arange(dim_x, dtype=torch.int32)
+            xx_range = xx_range[None, None, :, None]
+            yy_range = yy_range[None, None, :, None]
+
+            xx_channel = torch.matmul(xx_range, xx_ones)
+            yy_channel = torch.matmul(yy_range, yy_ones)
+
+            # transpose y
+            yy_channel = yy_channel.permute(0, 1, 3, 2)
+
+            xx_channel = xx_channel.float() / (dim_y - 1)
+            yy_channel = yy_channel.float() / (dim_x - 1)
+
+            xx_channel = xx_channel * 2 - 1
+            yy_channel = yy_channel * 2 - 1
+
+            xx_channel = xx_channel.repeat(batch_size_shape, 1, 1, 1)
+            yy_channel = yy_channel.repeat(batch_size_shape, 1, 1, 1)
+
+            if torch.cuda.is_available:
+                input_tensor = input_tensor.cuda()
+                xx_channel = xx_channel.cuda()
+                yy_channel = yy_channel.cuda()
+            out = torch.cat([input_tensor, xx_channel, yy_channel], dim=1)
+
+            if self.with_r:
+                rr = torch.sqrt(torch.pow(xx_channel - 0.5, 2) + torch.pow(yy_channel - 0.5, 2))
+                out = torch.cat([out, rr], dim=1)
+
+        elif self.rank == 3:
+            batch_size_shape, channel_in_shape, dim_z, dim_y, dim_x = input_tensor.shape
+            xx_ones = torch.ones([1, 1, 1, 1, dim_x], dtype=torch.int32)
+            yy_ones = torch.ones([1, 1, 1, 1, dim_y], dtype=torch.int32)
+            zz_ones = torch.ones([1, 1, 1, 1, dim_z], dtype=torch.int32)
+
+            xy_range = torch.arange(dim_y, dtype=torch.int32)
+            xy_range = xy_range[None, None, None, :, None]
+
+            yz_range = torch.arange(dim_z, dtype=torch.int32)
+            yz_range = yz_range[None, None, None, :, None]
+
+            zx_range = torch.arange(dim_x, dtype=torch.int32)
+            zx_range = zx_range[None, None, None, :, None]
+
+            xy_channel = torch.matmul(xy_range, xx_ones)
+            xx_channel = torch.cat([xy_channel + i for i in range(dim_z)], dim=2)
+
+            yz_channel = torch.matmul(yz_range, yy_ones)
+            yz_channel = yz_channel.permute(0, 1, 3, 4, 2)
+            yy_channel = torch.cat([yz_channel + i for i in range(dim_x)], dim=4)
+
+            zx_channel = torch.matmul(zx_range, zz_ones)
+            zx_channel = zx_channel.permute(0, 1, 4, 2, 3)
+            zz_channel = torch.cat([zx_channel + i for i in range(dim_y)], dim=3)
+
+            if torch.cuda.is_available:
+                input_tensor = input_tensor.cuda()
+                xx_channel = xx_channel.cuda()
+                yy_channel = yy_channel.cuda()
+                zz_channel = zz_channel.cuda()
+            out = torch.cat([input_tensor, xx_channel, yy_channel, zz_channel], dim=1)
+
+            if self.with_r:
+                rr = torch.sqrt(torch.pow(xx_channel - 0.5, 2) +
+                                torch.pow(yy_channel - 0.5, 2) +
+                                torch.pow(zz_channel - 0.5, 2))
+                out = torch.cat([out, rr], dim=1)
+        else:
+            raise NotImplementedError
+
+        return out
+
+
+class CoordConv1d(conv.Conv1d):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True, with_r=False):
+        super(CoordConv1d, self).__init__(in_channels, out_channels, kernel_size,
+                                          stride, padding, dilation, groups, bias)
+        self.rank = 1
+        self.addcoords = AddCoords(self.rank, with_r)
+        self.conv = nn.Conv1d(in_channels + self.rank + int(with_r), out_channels,
+                              kernel_size, stride, padding, dilation, groups, bias)
+
+    def forward(self, input_tensor):
+        """
+        input_tensor_shape: (N, C_in,H,W)
+        output_tensor_shape: N,C_out,H_out,W_outï¼‰
+        :return: CoordConv2d Result
+        """
+        out = self.addcoords(input_tensor)
+        out = self.conv(out)
+
+        return out
+
+
+class CoordConv2d(conv.Conv2d):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True, with_r=False):
+        super(CoordConv2d, self).__init__(in_channels, out_channels, kernel_size,
+                                          stride, padding, dilation, groups, bias)
+        self.rank = 2
+        self.addcoords = AddCoords(self.rank, with_r)
+        self.conv = nn.Conv2d(in_channels + self.rank + int(with_r), out_channels,
+                              kernel_size, stride, padding, dilation, groups, bias)
+
+    def forward(self, input_tensor):
+        """
+        input_tensor_shape: (N, C_in,H,W)
+        output_tensor_shape: N,C_out,H_out,W_outï¼‰
+        :return: CoordConv2d Result
+        """
+        out = self.addcoords(input_tensor)
+        out = self.conv(out)
+
+        return out
+
+
+class CoordConv3d(conv.Conv3d):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True, with_r=False):
+        super(CoordConv3d, self).__init__(in_channels, out_channels, kernel_size,
+                                          stride, padding, dilation, groups, bias)
+        self.rank = 3
+        self.addcoords = AddCoords(self.rank, with_r)
+        self.conv = nn.Conv3d(in_channels + self.rank + int(with_r), out_channels,
+                              kernel_size, stride, padding, dilation, groups, bias)
+
+    def forward(self, input_tensor):
+        """
+        input_tensor_shape: (N, C_in,H,W)
+        output_tensor_shape: N,C_out,H_out,W_outï¼‰
+        :return: CoordConv2d Result
+        """
+        out = self.addcoords(input_tensor)
+        out = self.conv(out)
+
+        return out
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMnistCoordConvAC/mnist_coord_conv_train.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMnistCoordConvAC/mnist_coord_conv_train.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7891d923868530711341bed7f53af873afaa6b6
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMnistCoordConvAC/mnist_coord_conv_train.py
@@ -0,0 +1,163 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from __future__ import print_function
+import argparse
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torchvision import datasets, transforms
+from torch.optim.lr_scheduler import StepLR
+from coord_conv import CoordConv2d
+
+
+class Net(nn.Module):
+    """
+    Original implementation of Convnet from
+    PyTorch repo https://github.com/pytorch/examples/tree/master/mnist
+    but with CoordConv2d layers instead of Conv layers
+    """
+    def __init__(self):
+        super(Net, self).__init__()
+        # Regular Conv layer replaced with CoordConv2d layer
+        self.conv1 = CoordConv2d(1, 32, 3, 1)
+        # Regular Conv layer replaced with CoordConv2d layer
+        self.conv2 = CoordConv2d(32, 64, 3, 1)
+        self.dropout1 = nn.Dropout2d(0.25)
+        self.dropout2 = nn.Dropout2d(0.5)
+        self.fc1 = nn.Linear(9216, 128)
+        self.fc2 = nn.Linear(128, 10)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = F.relu(x)
+        x = self.conv2(x)
+        x = F.max_pool2d(x, 2)
+        x = self.dropout1(x)
+        x = torch.flatten(x, 1)
+        x = self.fc1(x)
+        x = F.relu(x)
+        x = self.dropout2(x)
+        x = self.fc2(x)
+        output = F.log_softmax(x, dim=1)
+        return output
+
+
+def train(args, model, device, train_loader, optimizer, epoch):
+    model.train()
+    for batch_idx, (data, target) in enumerate(train_loader):
+        data, target = data.to(device), target.to(device)
+        optimizer.zero_grad()
+        output = model(data)
+        loss = F.nll_loss(output, target)
+        loss.backward()
+        optimizer.step()
+        if batch_idx % args.log_interval == 0:
+            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
+                epoch, batch_idx * len(data), len(train_loader.dataset),
+                100. * batch_idx / len(train_loader), loss.item()))
+
+
+def test(args, model, device, test_loader):
+    model.eval()
+    test_loss = 0
+    correct = 0
+    with torch.no_grad():
+        for data, target in test_loader:
+            data, target = data.to(device), target.to(device)
+            output = model(data)
+            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
+            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
+            correct += pred.eq(target.view_as(pred)).sum().item()
+
+    test_loss /= len(test_loader.dataset)
+
+    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
+        test_loss, correct, len(test_loader.dataset),
+        100. * correct / len(test_loader.dataset)))
+
+
+def main():
+    # Training settings
+    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
+    parser.add_argument('--batch-size', type=int, default=4, metavar='N',
+                        help='input batch size for training (default: 4)')
+    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
+                        help='input batch size for testing (default: 1000)')
+    parser.add_argument('--epochs', type=int, default=1, metavar='N',
+                        help='number of epochs to train (default: 14)')
+    parser.add_argument('--lr', type=float, default=1.0, metavar='LR',
+                        help='learning rate (default: 1.0)')
+    parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
+                        help='Learning rate step gamma (default: 0.7)')
+    parser.add_argument('--no-cuda', action='store_true', default=False,
+                        help='disables CUDA training')
+    parser.add_argument('--seed', type=int, default=1, metavar='S',
+                        help='random seed (default: 1)')
+    parser.add_argument('--log-interval', type=int, default=10, metavar='N',
+                        help='how many batches to wait before logging training status')
+
+    parser.add_argument('--save-model', action='store_true', default=False,
+                        help='For Saving the current Model')
+    parser.add_argument('--save-onnx', action='store_true', default=False,
+                        help='For Converting current model to onnx and Saving the current Model')
+    args = parser.parse_args()
+    use_cuda = not args.no_cuda and torch.cuda.is_available()
+
+    torch.manual_seed(args.seed)
+
+    device = torch.device("cuda" if use_cuda else "cpu")
+
+    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
+    train_loader = torch.utils.data.DataLoader(
+        datasets.MNIST('../data', train=True, download=True,
+                       transform=transforms.Compose([
+                           transforms.ToTensor(),
+                           transforms.Normalize((0.1307,), (0.3081,))
+                       ])),
+        batch_size=args.batch_size, shuffle=True, **kwargs)
+    test_loader = torch.utils.data.DataLoader(
+        datasets.MNIST('../data', train=False, transform=transforms.Compose([
+                           transforms.ToTensor(),
+                           transforms.Normalize((0.1307,), (0.3081,))
+                       ])),
+        batch_size=args.test_batch_size, shuffle=True, **kwargs)
+
+
+    model = Net().to(device)
+    print(model)
+    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
+
+    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
+    for epoch in range(1, args.epochs + 1):
+        train(args, model, device, train_loader, optimizer, epoch)
+        test(args, model, device, test_loader)
+        scheduler.step()
+
+    if args.save_model:
+        torch.save(model.state_dict(), "mnist_cnn_cc.pt")
+
+    if args.save_onnx:
+        sample = next(iter(train_loader))[0].to(device)
+        input_names = [ "conv1" ]
+        output_names = [ "fc2" ]
+        torch.onnx.export(model, sample, "mnist_cc.onnx", verbose=False, input_names=input_names, output_names=output_names, opset_version=13)
+
+if __name__ == '__main__':
+    main()
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMnistCoordConvAC/modify_onnx_ac.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMnistCoordConvAC/modify_onnx_ac.py
new file mode 100644
index 0000000000000000000000000000000000000000..8eb45bf92e2c269a14e47392bda54db133831973
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMnistCoordConvAC/modify_onnx_ac.py
@@ -0,0 +1,74 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import onnx
+import onnx_graphsurgeon as gs
+import argparse
+
+COORD_CONV_AC_OP_TYPE = 'CoordConvAC'
+
+def replace_with_coordconvac(graph, inputs, outputs):
+    '''
+    Replace each unfolded CoordConv graph with a single CoordConv node.
+    From
+    ... -> (CoordConv subgraph) -> Conv -> Relu -> (CoordConv subgraph) -> ...
+    To
+    ... -> CoordConv -> Conv -> Relu -> CoordConv -> ...
+    '''
+
+    # Disconnect output nodes of all input tensors
+    for inp in inputs:
+        inp.outputs.clear()
+
+    # Disconnet input nodes of all output tensors
+    for out in outputs:
+        out.inputs.clear()
+
+    # Insert the new node.
+    return graph.layer(op=COORD_CONV_AC_OP_TYPE, inputs=inputs, outputs=outputs)
+
+
+def main():
+    # Configurable parameters from command line
+    parser = argparse.ArgumentParser(description='ONNX Modifying Example')
+    parser.add_argument('--onnx', default="mnist_cc.onnx",
+                        help='onnx file to modify')
+    parser.add_argument('--output', default="mnist_with_coordconv.onnx",
+                        help='input batch size for testing (default: output.onnx)')
+    args = parser.parse_args()
+
+    # Load ONNX file
+    graph = gs.import_onnx(onnx.load(args.onnx))
+
+    tmap = graph.tensors()
+    # You can figure out the input and output tensors using Netron.
+    inputs = [tmap["conv1"]]
+    outputs = [tmap["/conv1/addcoords/Concat_output_0"]]
+    replace_with_coordconvac(graph, inputs, outputs)
+
+    inputs = [tmap["/Relu_output_0"]]
+    outputs = [tmap["/conv2/addcoords/Concat_output_0"]]
+    replace_with_coordconvac(graph, inputs, outputs)
+
+    # Remove the now-dangling subgraph.
+    graph.cleanup().toposort()
+
+    # Save the modified model.
+    onnx.save(gs.export_onnx(graph), "mnist_with_coordconv.onnx")
+
+if __name__ == '__main__':
+    main()
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..54b294245ea0cf2a4033259cd66b1ba982932a77
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp
@@ -0,0 +1,444 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//!
+//! sampleOnnxMnistCoordConvAC.cpp
+//! This file contains the implementation of the ONNX MNIST sample. It creates the network using
+//! the MNIST onnx model.
+//! It can be run with the following command line:
+//! Command: ./sample_onnx_mnist_coord_conv_ac [-h or --help] [-d=/path/to/data/dir or --datadir=/path/to/data/dir]
+//! [--useDLACore=<int>]
+//!
+
+// Define TRT entrypoints used in common code
+#define DEFINE_TRT_ENTRYPOINTS 1
+#define DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT 0
+
+#include "argsParser.h"
+#include "buffers.h"
+#include "common.h"
+#include "logger.h"
+#include "parserOnnxConfig.h"
+
+#include "NvInfer.h"
+#include "NvInferPlugin.h"
+#include <cuda_runtime_api.h>
+
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+using namespace nvinfer1;
+using samplesCommon::SampleUniquePtr;
+
+const std::string gSampleName = "TensorRT.sample_onnx_mnist_coord_conv_ac";
+
+// Normalization constants from Pytorch transform.Normalize().
+// They are needed to preprocess the data:
+// https://discuss.pytorch.org/t/understanding-transform-normalize/21730
+const float PYTORCH_NORMALIZE_MEAN = 0.1307;
+const float PYTORCH_NORMALIZE_STD = 0.3081;
+
+//! \brief  The SampleOnnxMnistCoordConvAC class implements the ONNX MNIST sample
+//!
+//! \details It creates the network using an ONNX model
+//!
+class SampleOnnxMnistCoordConvAC
+{
+public:
+    SampleOnnxMnistCoordConvAC(const samplesCommon::OnnxSampleParams& params)
+        : mParams(params)
+        , mEngine(nullptr)
+    {
+    }
+
+    //!
+    //! \brief Function builds the network engine
+    //!
+    bool build();
+
+    //!
+    //! \brief Runs the TensorRT inference engine for this sample
+    //!
+    bool infer();
+
+private:
+    samplesCommon::OnnxSampleParams mParams; //!< The parameters for the sample.
+
+    nvinfer1::Dims mInputDims;  //!< The dimensions of the input to the network.
+    nvinfer1::Dims mOutputDims; //!< The dimensions of the output to the network.
+    int mNumber{0};             //!< The number to classify
+
+    SampleUniquePtr<IRuntime> mRuntime{};           //!< The TensorRT Runtime used to deserialize the engine.
+    std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The TensorRT engine used to run the network
+
+    //!
+    //! \brief Parses an ONNX model for MNIST and creates a TensorRT network
+    //!
+    bool constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
+        SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
+        SampleUniquePtr<nvonnxparser::IParser>& parser);
+
+    //!
+    //! \brief Reads the input  and stores the result in a managed buffer
+    //!
+    bool processInput(const samplesCommon::BufferManager& buffers);
+
+    //!
+    //! \brief Classifies digits and verify result
+    //!
+    bool verifyOutput(const samplesCommon::BufferManager& buffers);
+};
+
+//!
+//! \brief Creates the network, configures the builder and creates the network engine
+//!
+//! \details This function creates the Onnx MNIST network by parsing the Onnx model and builds
+//!          the engine that will be used to run MNIST (mEngine)
+//!
+//! \return true if the engine was created successfully and false otherwise
+//!
+bool SampleOnnxMnistCoordConvAC::build()
+{
+    initLibNvInferPlugins(&sample::gLogger, "");
+    auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
+    if (!builder)
+    {
+        return false;
+    }
+
+    auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(0));
+    if (!network)
+    {
+        return false;
+    }
+
+    auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
+    if (!config)
+    {
+        return false;
+    }
+
+    auto parser
+        = SampleUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger()));
+    if (!parser)
+    {
+        return false;
+    }
+
+    auto constructed = constructNetwork(builder, network, config, parser);
+    if (!constructed)
+    {
+        return false;
+    }
+
+    // CUDA stream used for profiling by the builder.
+    auto profileStream = samplesCommon::makeCudaStream();
+    if (!profileStream)
+    {
+        return false;
+    }
+    config->setProfileStream(*profileStream);
+
+    SampleUniquePtr<nvinfer1::ITimingCache> timingCache{};
+
+    // Load timing cache
+    if (!mParams.timingCacheFile.empty())
+    {
+        timingCache
+            = samplesCommon::buildTimingCacheFromFile(sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile);
+    }
+
+    SampleUniquePtr<IHostMemory> plan{builder->buildSerializedNetwork(*network, *config)};
+    if (!plan)
+    {
+        return false;
+    }
+
+    if (timingCache != nullptr && !mParams.timingCacheFile.empty())
+    {
+        samplesCommon::updateTimingCacheFile(
+            sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder);
+    }
+
+    if (!mRuntime)
+    {
+        mRuntime = SampleUniquePtr<IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger()));
+    }
+
+    if (!mRuntime)
+    {
+        return false;
+    }
+
+    mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
+        mRuntime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter());
+    if (!mEngine)
+    {
+        return false;
+    }
+
+    assert(network->getNbInputs() == 1);
+    mInputDims = network->getInput(0)->getDimensions();
+    assert(mInputDims.nbDims == 4);
+
+    assert(network->getNbOutputs() == 1);
+    mOutputDims = network->getOutput(0)->getDimensions();
+    assert(mOutputDims.nbDims == 2);
+
+    return true;
+}
+
+//!
+//! \brief Uses a ONNX parser to create the Onnx MNIST Network and marks the
+//!        output layers
+//!
+//! \param network Pointer to the network that will be populated with the Onnx MNIST network
+//!
+//! \param builder Pointer to the engine builder
+//!
+bool SampleOnnxMnistCoordConvAC::constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
+    SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
+    SampleUniquePtr<nvonnxparser::IParser>& parser)
+{
+    auto parsed = parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(),
+        static_cast<int>(sample::gLogger.getReportableSeverity()));
+    if (!parsed)
+    {
+        return false;
+    }
+
+    if (mParams.fp16)
+    {
+        config->setFlag(BuilderFlag::kFP16);
+    }
+    if (mParams.int8)
+    {
+        config->setFlag(BuilderFlag::kINT8);
+        samplesCommon::setAllDynamicRanges(network.get(), 127.0F, 127.0F);
+    }
+
+    samplesCommon::enableDLA(builder.get(), config.get(), mParams.dlaCore);
+
+    return true;
+}
+
+//!
+//! \brief Runs the TensorRT inference engine for this sample
+//!
+//! \details This function is the main execution function of the sample. It allocates the buffer,
+//!          sets inputs and executes the engine.
+//!
+bool SampleOnnxMnistCoordConvAC::infer()
+{
+    // Create RAII buffer manager object
+    samplesCommon::BufferManager buffers(mEngine);
+
+    auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
+    if (!context)
+    {
+        return false;
+    }
+
+    for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
+    {
+        auto const name = mEngine->getIOTensorName(i);
+        context->setTensorAddress(name, buffers.getDeviceBuffer(name));
+    }
+
+    // Read the input data into the managed buffers
+    assert(mParams.inputTensorNames.size() == 1);
+    if (!processInput(buffers))
+    {
+        return false;
+    }
+
+    // Memcpy from host input buffers to device input buffers
+    buffers.copyInputToDevice();
+
+    bool status = context->executeV2(buffers.getDeviceBindings().data());
+    if (!status)
+    {
+        return false;
+    }
+
+    // Memcpy from device output buffers to host output buffers
+    buffers.copyOutputToHost();
+
+    // Verify results
+    if (!verifyOutput(buffers))
+    {
+        return false;
+    }
+
+    return true;
+}
+
+//!
+//! \brief Reads the input and stores the result in a managed buffer
+//!
+bool SampleOnnxMnistCoordConvAC::processInput(const samplesCommon::BufferManager& buffers)
+{
+    const int inputH = mInputDims.d[2];
+    const int inputW = mInputDims.d[3];
+
+    // Read a random digit file
+    srand(unsigned(time(nullptr)));
+    std::vector<uint8_t> fileData(inputH * inputW);
+    mNumber = 2;
+    samplesCommon::readPGMFile(samplesCommon::locateFile("2.pgm", mParams.dataDirs), fileData.data(), inputH, inputW);
+
+    // Print an ascii representation
+    sample::gLogInfo << "Input:" << std::endl;
+    for (int i = 0; i < inputH * inputW; i++)
+    {
+        sample::gLogInfo << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % inputW) ? "" : "\n");
+    }
+    sample::gLogInfo << std::endl;
+
+    float* hostDataBuffer = static_cast<float*>(buffers.getHostBuffer(mParams.inputTensorNames[0]));
+    for (int i = 0; i < inputH * inputW; i++)
+    {
+        hostDataBuffer[i] = ((1.0 - float(fileData[i] / 255.0)) - PYTORCH_NORMALIZE_MEAN) / PYTORCH_NORMALIZE_STD;
+    }
+
+    return true;
+}
+
+//!
+//! \brief Classifies digits and verify result
+//!
+//! \return whether the classification output matches expectations
+//!
+bool SampleOnnxMnistCoordConvAC::verifyOutput(const samplesCommon::BufferManager& buffers)
+{
+    const int outputSize = mOutputDims.d[1];
+    float* output = static_cast<float*>(buffers.getHostBuffer(mParams.outputTensorNames[0]));
+    float val{0.0F};
+    int idx{0};
+
+    // Calculate Softmax
+    float sum{0.0F};
+    for (int i = 0; i < outputSize; i++)
+    {
+        output[i] = exp(output[i]);
+        sum += output[i];
+    }
+
+    sample::gLogInfo << "Output:" << std::endl;
+    for (int i = 0; i < outputSize; i++)
+    {
+        output[i] /= sum;
+        val = std::max(val, output[i]);
+        if (val == output[i])
+        {
+            idx = i;
+        }
+
+        sample::gLogInfo << " Prob " << i << "  " << std::fixed << std::setw(5) << std::setprecision(4) << output[i]
+                         << " "
+                         << "Class " << i << ": " << std::string(int(std::floor(output[i] * 10 + 0.5F)), '*')
+                         << std::endl;
+    }
+    sample::gLogInfo << std::endl;
+
+    return idx == mNumber && val > 0.9F;
+}
+
+//!
+//! \brief Initializes members of the params struct using the command line args
+//!
+samplesCommon::OnnxSampleParams initializeSampleParams(const samplesCommon::Args& args)
+{
+    samplesCommon::OnnxSampleParams params;
+    if (args.dataDirs.empty()) // Use default directories if user hasn't provided directory paths
+    {
+        params.dataDirs.push_back("data/mnist/");
+        params.dataDirs.push_back("data/samples/mnist/");
+    }
+    else // Use the data directory provided by the user
+    {
+        params.dataDirs = args.dataDirs;
+    }
+    params.onnxFileName = "mnist_with_coordconv.onnx";
+    params.inputTensorNames.push_back("conv1");
+    params.outputTensorNames.push_back("fc2");
+    params.dlaCore = args.useDLACore;
+    params.int8 = args.runInInt8;
+    params.fp16 = args.runInFp16;
+    params.timingCacheFile = args.timingCacheFile;
+
+    return params;
+}
+
+//!
+//! \brief Prints the help information for running this sample
+//!
+void printHelpInfo()
+{
+    std::cout << "Usage: ./sample_onnx_mnist_coord_conv_ac [-h or --help] [-d or --datadir=<path to data directory>] "
+                 "[--useDLACore=<int>] [--timingCacheFile=<path to timing cache file>]"
+              << std::endl;
+    std::cout << "--help             Display help information" << std::endl;
+    std::cout << "--datadir          Specify path to a data directory, overriding the default. This option can be used "
+                 "multiple times to add multiple directories. If no data directories are given, the default is to use "
+                 "(data/samples/mnist/, data/mnist/)"
+              << std::endl;
+    std::cout << "--useDLACore=N     Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, "
+                 "where n is the number of DLA engines on the platform."
+              << std::endl;
+    std::cout << "--timingCacheFile  Specify path to a timing cache file. If it does not already exist, it will be "
+              << "created." << std::endl;
+    std::cout << "--int8             Run in Int8 mode." << std::endl;
+    std::cout << "--fp16             Run in FP16 mode." << std::endl;
+}
+
+int main(int argc, char** argv)
+{
+    samplesCommon::Args args;
+    bool argsOK = samplesCommon::parseArgs(args, argc, argv);
+    if (!argsOK)
+    {
+        sample::gLogError << "Invalid arguments" << std::endl;
+        printHelpInfo();
+        return EXIT_FAILURE;
+    }
+    if (args.help)
+    {
+        printHelpInfo();
+        return EXIT_SUCCESS;
+    }
+
+    auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv);
+
+    sample::gLogger.reportTestStart(sampleTest);
+
+    SampleOnnxMnistCoordConvAC sample(initializeSampleParams(args));
+
+    sample::gLogInfo << "Building and running a GPU inference engine for Onnx MNIST" << std::endl;
+
+    if (!sample.build())
+    {
+        return sample::gLogger.reportFail(sampleTest);
+    }
+    if (!sample.infer())
+    {
+        return sample::gLogger.reportFail(sampleTest);
+    }
+
+    return sample::gLogger.reportPass(sampleTest);
+}
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleProgressMonitor/Makefile b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleProgressMonitor/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..6acc482c5b3d114a8c97ac1bba8bf897362cc3e2
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleProgressMonitor/Makefile
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+OUTNAME_RELEASE = sample_progress_monitor
+OUTNAME_DEBUG = sample_progress_monitor_debug
+EXTRA_DIRECTORIES = ../common ../utils
+SAMPLE_DIR_NAME = $(shell basename $(dir $(abspath $(firstword $(MAKEFILE_LIST)))))
+MAKEFILE ?= ../Makefile.config
+include $(MAKEFILE)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleProgressMonitor/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleProgressMonitor/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..354776773a0e50e2a8243da31de8a598a4528d01
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleProgressMonitor/README.md
@@ -0,0 +1,189 @@
+# Progress Monitor API usage example based off sampleMNIST in TensorRT
+
+**Table Of Contents**
+
+- [Description](#description)
+- [How does this sample work?](#how-does-this-sample-work)
+    - [Progress bar display](#progress-bar-display)
+- [Preparing sample data](#preparing-sample-data)
+- [Running the sample](#running-the-sample)
+	- [Sample `--help` options](#sample---help-options)
+- [Additional resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+## Description
+
+This sample, sampleProgressMonitor, shows an example of how to use the progress monitor API based on sampleOnnxMNIST ([documentation](https://docs.nvidia.com/deeplearning/tensorrt/sample-support-guide/index.html#onnx_mnist_sample)).
+
+This sample demonstrates the usage of `IProgressMonitor` to report the status of TRT engine-building operations.
+
+## How does this sample work?
+
+This sample uses a Onnx model that was trained on the [MNIST dataset](https://github.com/NVIDIA/DIGITS/blob/master/docs/GettingStarted.md).
+
+Specifically, this sample performs the following steps:
+- Performs the basic setup and initialization of TensorRT using the Onnx parser
+- [Imports a trained Onnx model using Onnx parser](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#import_onnx_c)
+- Preprocesses the input and stores the result in a managed buffer
+- Builds an engine using incremental progress reporting
+- [Serializes and deserializes the engines](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#serial_model_c)
+- [Uses the engines to perform inference on an input image](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#perform_inference_c)
+
+To verify whether the engine is operating correctly, this sample picks a 28x28 image of a digit at random and runs inference on it using the engine it created. The output of the network is a probability distribution on the digit, showing which digit is likely to be that in the image.
+
+### Progress bar display
+
+This sample implements an `IProgressMonitor` to display progress while building a TensorRT engine. Each long-running step of the process can define a new progress phase, nesting them as necessary.
+1. Phase entry - The `IProgressMonitor::phaseBegin` callback determines an appropriate nesting level for the new phase and updates the terminal display.
+2. Phase progress - The `IProgressMonitor::stepComplete` callback increments the progress bar for the selected phase and updates the terminal display. This sample always returns `true` from `stepComplete` in order to progress the build unconditionally. If you wish to cancel a build in progress, such as in response to user input, you can return `false` from this function to stop the build early.
+3. Phase completion - The `IProgressMonitor::phaseEnd` callback removes the line corresponding to the completed phase and updates the terminal display.
+
+The progress bars are drawn using virtual terminal escape sequences to manipulate the terminal's cursor and clear lines.
+
+## Preparing sample data
+
+1. Download the sample data from [TensorRT release tarball](https://developer.nvidia.com/nvidia-tensorrt-download#), if not already mounted under `/usr/src/tensorrt/data` (NVIDIA NGC containers) and set it to `$TRT_DATADIR`.
+    ```bash
+    export TRT_DATADIR=/usr/src/tensorrt/data
+    pushd $TRT_DATADIR/mnist
+    pip3 install Pillow
+    popd
+    ```
+
+## Running the sample
+
+1. Compile the sample by following build instructions in [TensorRT README](https://github.com/NVIDIA/TensorRT/).
+
+2. Run the sample to perform inference on the digit:
+    ```bash
+    ./sample_progress_monitor [-h] [--datadir=/path/to/data/dir/] [--useDLA=N] [--fp16 or --int8]
+    ```
+
+    For example:
+    ```bash
+    ./sample_progress_monitor --datadir $TRT_DATADIR/mnist --fp16
+    ```
+
+	This sample reads the `mnist.onnx` file to build the network:
+
+	This sample can be run in FP16 and INT8 modes as well.
+
+	**Note:** By default, the sample expects these files to be in either the `data/samples/mnist/` or `data/mnist/` directories. The list of default directories can be changed by adding one or more paths with `--datadir=/new/path/` as a command line argument.
+
+	**Note:** The sample should be run from a terminal. It uses xterm-style escape sequences to animate its output, and is not designed to be redirected to a file.
+
+3.  Verify that the sample ran successfully. If the sample runs successfully you should see animated progress bars during the network build phase and output similar to the following:
+    ```
+	&&&& RUNNING TensorRT.sample_progress_monitor [TensorRT v8700] # ./sample_progress_monitor
+	[I] Building and running a GPU inference engine for MNIST.
+	[I] [TRT] [MemUsageChange] Init CUDA: CPU +14, GPU +0, now: CPU 19, GPU 1217 (MiB)
+	[I] [TRT] [MemUsageChange] Init builder kernel library: CPU +1450, GPU +266, now: CPU 1545, GPU 1483 (MiB)
+	[I] [TRT] ----------------------------------------------------------------
+	[I] [TRT] Input filename:   ../../../../data/samples/mnist/mnist.onnx
+	[I] [TRT] ONNX IR version:  0.0.3
+	[I] [TRT] Opset version:    8
+	[I] [TRT] Producer name:    CNTK
+	[I] [TRT] Producer version: 2.5.1
+	[I] [TRT] Domain:           ai.cntk
+	[I] [TRT] Model version:    1
+	[I] [TRT] Doc string:       
+	[I] [TRT] ----------------------------------------------------------------
+	[W] [TRT] onnx2trt_utils.cpp:374: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.
+	[I] [TRT] Graph optimization time: 0.00293778 seconds.
+	[I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored.
+	[=======---] Building engine 3/4
+	 [----------] Building engine from subgraph 0/1
+	  [----------] Computing profile costs 0/1
+	   [=======---] Timing graph nodes 11/15
+	    [===-------] Finding fastest tactic for Times212 12/37
+	     [==========] Measuring tactic time 4/4
+    ```
+    After the TensorRT network has been constructed, you should see output similar to the following. An ASCII rendering of the input image with digit 3:
+    ```
+	&&&& RUNNING TensorRT.sample_progress_monitor # ./sample_progress_monitor
+	[I] Building and running a GPU inference engine for MNIST
+	[I] Input:
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@#-:.-=@@@@@@@@@@@@@@
+	@@@@@%=     . *@@@@@@@@@@@@@
+	@@@@% .:+%%%  *@@@@@@@@@@@@@
+	@@@@+=#@@@@@# @@@@@@@@@@@@@@
+	@@@@@@@@@@@%  @@@@@@@@@@@@@@
+	@@@@@@@@@@@: *@@@@@@@@@@@@@@
+	@@@@@@@@@@- .@@@@@@@@@@@@@@@
+	@@@@@@@@@:  #@@@@@@@@@@@@@@@
+	@@@@@@@@:   +*%#@@@@@@@@@@@@
+	@@@@@@@%         :+*@@@@@@@@
+	@@@@@@@@#*+--.::     +@@@@@@
+	@@@@@@@@@@@@@@@@#=:.  +@@@@@
+	@@@@@@@@@@@@@@@@@@@@  .@@@@@
+	@@@@@@@@@@@@@@@@@@@@#. #@@@@
+	@@@@@@@@@@@@@@@@@@@@#  @@@@@
+	@@@@@@@@@%@@@@@@@@@@- +@@@@@
+	@@@@@@@@#-@@@@@@@@*. =@@@@@@
+	@@@@@@@@ .+%%%%+=.  =@@@@@@@
+	@@@@@@@@           =@@@@@@@@
+	@@@@@@@@*=:   :--*@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+	@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+	[I] Output:
+	Prob 1  0.0000 Class 1:
+	Prob 2  0.0000 Class 2:
+	Prob 3  1.0000 Class 3: **********
+	Prob 4  0.0000 Class 4:
+	Prob 5  0.0000 Class 5:
+	Prob 6  0.0000 Class 6:
+	Prob 7  0.0000 Class 7:
+	Prob 8  0.0000 Class 8:
+	Prob 9  0.0000 Class 9:
+
+	&&&& PASSED TensorRT.sample_progress_monitor # ./sample_progress_monitor
+	```
+
+	This output shows that the sample ran successfully; `PASSED`.
+
+
+### Sample `--help` options
+
+To see the full list of available options and their descriptions, use the `-h` or `--help` command line option. For example:
+```
+Usage: ./sample_progress_monitor [-h or --help] [-d or --datadir=<path to data directory>] [--useDLACore=<int>]
+--help Display help information
+--datadir Specify path to a data directory, overriding the default. This option can be used multiple times to add multiple directories. If no data directories are given, the default is to use (data/samples/mnist/, data/mnist/)
+--useDLACore=N Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, where n is the number of DLA engines on the platform.
+--int8 Run in Int8 mode.
+--fp16 Run in FP16 mode.
+```
+
+# Additional resources
+
+The following resources provide a deeper understanding about sampleProgressMonitor:
+
+**MNIST**
+- [MNIST dataset](https://github.com/NVIDIA/DIGITS/blob/master/docs/GettingStarted.md)
+
+**Documentation**
+- [Introduction To NVIDIAâ€™s TensorRT Samples](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html#samples)
+- [Working With TensorRT Using The C++ API](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#c_topics)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation.
+
+# Changelog
+
+**May 2023**
+- This `README.md` file was created and reviewed.
+
+# Known issues
+
+There are no known issues in this sample.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleProgressMonitor/sampleProgressMonitor.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleProgressMonitor/sampleProgressMonitor.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8030626b878679cf55c91379ecede92b44578ea0
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/sampleProgressMonitor/sampleProgressMonitor.cpp
@@ -0,0 +1,596 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//! \file sampleProgressMonitor.cpp
+//! \brief This file contains the implementation of the Progress Monitor sample.
+//!
+//! It demonstrates the usage of IProgressMonitor for displaying engine build progress on the user's terminal.
+//! It builds a TensorRT engine by importing a trained MNIST ONNX model and runs inference on an input image of a
+//! digit.
+//! It can be run with the following command line:
+//! Command: ./sample_progress_monitor [-h or --help] [-d=/path/to/data/dir or --datadir=/path/to/data/dir]
+
+// Define TRT entrypoints used in common code
+#define DEFINE_TRT_ENTRYPOINTS 1
+
+#include "argsParser.h"
+#include "buffers.h"
+#include "common.h"
+#include "logger.h"
+
+#include "NvInfer.h"
+#include "NvOnnxParser.h"
+#include "parserOnnxConfig.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cuda_runtime_api.h>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+using namespace nvinfer1;
+using samplesCommon::SampleUniquePtr;
+std::string const gSampleName = "TensorRT.sample_progress_monitor";
+
+//!
+//! \brief The ConsoleProgressMonitor class displays a simple progress graph for each step of the build process.
+//!
+class ConsoleProgressMonitor : public IProgressMonitor
+{
+public:
+    void phaseStart(char const* phaseName, char const* parentPhase, int32_t nbSteps) noexcept final
+    {
+        PhaseEntry newPhase;
+        newPhase.title = phaseName;
+        newPhase.nbSteps = nbSteps;
+
+        PhaseIter iParent = mPhases.end();
+        if (parentPhase)
+        {
+            iParent = findPhase(parentPhase);
+            newPhase.nbIndents = 1 + iParent->nbIndents;
+            do
+            {
+                ++iParent;
+            } while (iParent != mPhases.end() && iParent->nbIndents >= newPhase.nbIndents);
+        }
+        mPhases.insert(iParent, newPhase);
+        redraw();
+    }
+
+    bool stepComplete(char const* phaseName, int32_t step) noexcept final
+    {
+        PhaseIter const iPhase = findPhase(phaseName);
+        iPhase->steps = step;
+        redraw();
+        return true;
+    }
+
+    void phaseFinish(char const* phaseName) noexcept final
+    {
+        PhaseIter const iPhase = findPhase(phaseName);
+        iPhase->active = false;
+        redraw();
+        mPhases.erase(iPhase);
+    }
+
+private:
+    struct PhaseEntry
+    {
+        std::string title;
+        int32_t steps{0};
+        int32_t nbSteps{0};
+        int32_t nbIndents{0};
+        bool active{true};
+    };
+    using PhaseIter = std::vector<PhaseEntry>::iterator;
+
+    std::vector<PhaseEntry> mPhases;
+
+    static int32_t constexpr kPROGRESS_INNER_WIDTH = 10;
+
+    void redraw()
+    {
+        auto const moveToStartOfLine = []() { std::cout << "\x1b[0G"; };
+        auto const clearCurrentLine = []() { std::cout << "\x1b[2K"; };
+
+        moveToStartOfLine();
+
+        int32_t inactivePhases = 0;
+        for (PhaseEntry const& phase : mPhases)
+        {
+            clearCurrentLine();
+
+            if (phase.nbIndents > 0)
+            {
+                for (int32_t indent = 0; indent < phase.nbIndents; ++indent)
+                {
+                    std::cout << ' ';
+                }
+            }
+
+            if (phase.active)
+            {
+                std::cout << progressBar(phase.steps, phase.nbSteps) << ' ' << phase.title << ' ' << phase.steps << '/'
+                          << phase.nbSteps << std::endl;
+            }
+            else
+            {
+                // Don't draw anything at this time, but prepare to emit blank lines later.
+                // This ensures that stale phases are removed from display rather than lingering.
+                ++inactivePhases;
+            }
+        }
+
+        for (int32_t phase = 0; phase < inactivePhases; ++phase)
+        {
+            clearCurrentLine();
+            std::cout << std::endl;
+        }
+
+        // Move (mPhases.size()) lines up so that logger output can overwrite the progress bars.
+        std::cout << "\x1b[" << mPhases.size() << "A";
+    }
+
+    std::string progressBar(int32_t steps, int32_t nbSteps) const
+    {
+        std::ostringstream bar;
+        bar << '[';
+        int32_t const completedChars
+            = static_cast<int32_t>(kPROGRESS_INNER_WIDTH * steps / static_cast<float>(nbSteps));
+        for (int32_t i = 0; i < completedChars; ++i)
+        {
+            bar << '=';
+        }
+        for (int32_t i = completedChars; i < kPROGRESS_INNER_WIDTH; ++i)
+        {
+            bar << '-';
+        }
+        bar << ']';
+        return bar.str();
+    }
+
+    PhaseIter findPhase(std::string const& title)
+    {
+        return std::find_if(mPhases.begin(), mPhases.end(),
+            [title](PhaseEntry const& phase) { return phase.title == title && phase.active; });
+    }
+};
+
+//!
+//! \brief The SampleProgressMonitor class implements the SampleProgressReporter sample.
+//!
+//! \details It creates the network using a trained ONNX MNIST classification model.
+//!
+class SampleProgressMonitor
+{
+public:
+    explicit SampleProgressMonitor(samplesCommon::OnnxSampleParams const& params)
+        : mParams(params)
+    {
+    }
+
+    //!
+    //! \brief Builds the network engine.
+    //!
+    bool build(IProgressMonitor* monitor);
+
+    //!
+    //! \brief Runs the TensorRT inference engine for this sample.
+    //!
+    bool infer();
+
+private:
+    //!
+    //! \brief uses a Onnx parser to create the MNIST Network and marks the output layers.
+    //!
+    bool constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
+        SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
+        SampleUniquePtr<nvonnxparser::IParser>& parser);
+    //!
+    //! \brief Reads the input and mean data, preprocesses, and stores the result in a managed buffer.
+    //!
+    bool processInput(
+        samplesCommon::BufferManager const& buffers, std::string const& inputTensorName, int32_t inputFileIdx) const;
+
+    //!
+    //! \brief Verifies that the output is correct and prints it.
+    //!
+    bool verifyOutput(samplesCommon::BufferManager const& buffers, std::string const& outputTensorName,
+        int32_t groundTruthDigit) const;
+
+    SampleUniquePtr<IRuntime> mRuntime{};
+    std::shared_ptr<nvinfer1::ICudaEngine> mEngine{nullptr}; //!< The TensorRT engine used to run the network.
+
+    samplesCommon::OnnxSampleParams mParams; //!< The parameters for the sample.
+
+    nvinfer1::Dims mInputDims; //!< The dimensions of the input to the network.
+};
+
+//!
+//! \brief Creates the network, configures the builder and creates the network engine.
+//!
+//! \details This function creates the MNIST network by parsing the ONNX model and builds
+//!          the engine that will be used to run MNIST (mEngine).
+//!
+//! \return true if the engine was created successfully and false otherwise.
+//!
+bool SampleProgressMonitor::build(IProgressMonitor* monitor)
+{
+    auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
+    if (!builder)
+    {
+        return false;
+    }
+
+    auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(0));
+    if (!network)
+    {
+        return false;
+    }
+
+    auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
+    if (!config)
+    {
+        return false;
+    }
+
+    auto parser
+        = SampleUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger()));
+    if (!parser)
+    {
+        return false;
+    }
+
+    auto constructed = constructNetwork(builder, network, config, parser);
+    if (!constructed)
+    {
+        return false;
+    }
+
+    config->setProgressMonitor(monitor);
+
+    if (mParams.fp16)
+    {
+        config->setFlag(BuilderFlag::kFP16);
+    }
+    if (mParams.int8)
+    {
+        config->setFlag(BuilderFlag::kINT8);
+    }
+
+    samplesCommon::enableDLA(builder.get(), config.get(), mParams.dlaCore, true /*GPUFallback*/);
+
+    if (mParams.int8)
+    {
+        // The sample fails for Int8 with kREJECT_EMPTY_ALGORITHMS flag set.
+        config->clearFlag(BuilderFlag::kREJECT_EMPTY_ALGORITHMS);
+    }
+
+    if (!mRuntime)
+    {
+        mRuntime = SampleUniquePtr<IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger()));
+    }
+    if (!mRuntime)
+    {
+        return false;
+    }
+
+    // CUDA stream used for profiling by the builder.
+    auto profileStream = samplesCommon::makeCudaStream();
+    if (!profileStream)
+    {
+        return false;
+    }
+    config->setProfileStream(*profileStream);
+
+    SampleUniquePtr<nvinfer1::ITimingCache> timingCache{};
+
+    // Load timing cache
+    if (!mParams.timingCacheFile.empty())
+    {
+        timingCache
+            = samplesCommon::buildTimingCacheFromFile(sample::gLogger.getTRTLogger(), *config, mParams.timingCacheFile);
+    }
+
+    SampleUniquePtr<IHostMemory> plan{builder->buildSerializedNetwork(*network, *config)};
+    if (!plan)
+    {
+        return false;
+    }
+
+    if (timingCache != nullptr && !mParams.timingCacheFile.empty())
+    {
+        samplesCommon::updateTimingCacheFile(
+            sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder);
+    }
+
+    mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
+        mRuntime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter());
+    if (!mEngine)
+    {
+        return false;
+    }
+
+    ASSERT(network->getNbInputs() == 1);
+    mInputDims = network->getInput(0)->getDimensions();
+    ASSERT(mInputDims.nbDims == 4);
+
+    return true;
+}
+
+//!
+//! \brief Reads the input and mean data, preprocesses, and stores the result in a managed buffer.
+//!
+bool SampleProgressMonitor::processInput(
+    samplesCommon::BufferManager const& buffers, std::string const& inputTensorName, int32_t inputFileIdx) const
+{
+    int32_t const inputH = mInputDims.d[2];
+    int32_t const inputW = mInputDims.d[3];
+
+    // Read a random digit file.
+    srand(unsigned(time(nullptr)));
+    std::vector<uint8_t> fileData(inputH * inputW);
+    samplesCommon::readPGMFile(samplesCommon::locateFile(std::to_string(inputFileIdx) + ".pgm", mParams.dataDirs),
+        fileData.data(), inputH, inputW);
+
+    // Print ASCII representation of digit.
+    sample::gLogInfo << "Input:\n";
+    for (int32_t i = 0; i < inputH * inputW; i++)
+    {
+        sample::gLogInfo << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % inputW) ? "" : "\n");
+    }
+    sample::gLogInfo << std::endl;
+
+    float* hostInputBuffer = static_cast<float*>(buffers.getHostBuffer(inputTensorName));
+
+    for (int32_t i = 0; i < inputH * inputW; i++)
+    {
+        hostInputBuffer[i] = 1.0F - static_cast<float>(fileData[i]) / 255.0F;
+    }
+
+    return true;
+}
+
+//!
+//! \brief Verifies that the output is correct and prints it.
+//!
+bool SampleProgressMonitor::verifyOutput(
+    samplesCommon::BufferManager const& buffers, std::string const& outputTensorName, int32_t groundTruthDigit) const
+{
+    float* prob = static_cast<float*>(buffers.getHostBuffer(outputTensorName));
+    int32_t constexpr kDIGITS = 10;
+
+    std::for_each(prob, prob + kDIGITS, [](float& n) { n = exp(n); });
+
+    float const sum = std::accumulate(prob, prob + kDIGITS, 0.F);
+
+    std::for_each(prob, prob + kDIGITS, [sum](float& n) { n = n / sum; });
+
+    auto max_ele = std::max_element(prob, prob + kDIGITS);
+
+    float const val = *max_ele;
+
+    int32_t const idx = max_ele - prob;
+
+    // Print histogram of the output probability distribution.
+    sample::gLogInfo << "Output:\n";
+    for (int32_t i = 0; i < kDIGITS; i++)
+    {
+        sample::gLogInfo << " Prob " << i << "  " << std::fixed << std::setw(5) << std::setprecision(4) << prob[i]
+                         << " "
+                         << "Class " << i << ": " << std::string(int32_t(std::floor(prob[i] * 10 + 0.5F)), '*')
+                         << std::endl;
+    }
+    sample::gLogInfo << std::endl;
+
+    return (idx == groundTruthDigit && val > 0.9F);
+}
+
+//!
+//! \brief Uses an ONNX parser to create the MNIST Network and marks the
+//!        output layers.
+//!
+//! \param network Pointer to the network that will be populated with the MNIST network.
+//!
+//! \param builder Pointer to the engine builder.
+//!
+bool SampleProgressMonitor::constructNetwork(SampleUniquePtr<nvinfer1::IBuilder>& builder,
+    SampleUniquePtr<nvinfer1::INetworkDefinition>& network, SampleUniquePtr<nvinfer1::IBuilderConfig>& config,
+    SampleUniquePtr<nvonnxparser::IParser>& parser)
+{
+    auto parsed = parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(),
+        static_cast<int32_t>(sample::gLogger.getReportableSeverity()));
+    if (!parsed)
+    {
+        return false;
+    }
+
+    if (mParams.fp16)
+    {
+        config->setFlag(BuilderFlag::kFP16);
+    }
+    if (mParams.int8)
+    {
+        config->setFlag(BuilderFlag::kINT8);
+        network->getInput(0)->setDynamicRange(-1.0F, 1.0F);
+        constexpr float kTENSOR_DYNAMIC_RANGE = 4.0F;
+        samplesCommon::setAllDynamicRanges(network.get(), kTENSOR_DYNAMIC_RANGE, kTENSOR_DYNAMIC_RANGE);
+    }
+
+    samplesCommon::enableDLA(builder.get(), config.get(), mParams.dlaCore);
+
+    return true;
+}
+
+//!
+//! \brief Runs the TensorRT inference engine for this sample.
+//!
+//! \details This function is the main execution function of the sample. It allocates
+//!          the buffer, sets inputs, executes the engine, and verifies the output.
+//!
+bool SampleProgressMonitor::infer()
+{
+    // Create RAII buffer manager object.
+    samplesCommon::BufferManager buffers(mEngine);
+
+    auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
+    if (!context)
+    {
+        return false;
+    }
+
+    // Pick a random digit to try to infer.
+    srand(time(NULL));
+    int32_t const digit = rand() % 10;
+
+    // Read the input data into the managed buffers.
+    // There should be just 1 input tensor.
+    ASSERT(mParams.inputTensorNames.size() == 1);
+
+    if (!processInput(buffers, mParams.inputTensorNames[0], digit))
+    {
+        return false;
+    }
+    // Create CUDA stream for the execution of this inference.
+    cudaStream_t stream;
+    CHECK(cudaStreamCreate(&stream));
+
+    // Asynchronously copy data from host input buffers to device input buffers
+    buffers.copyInputToDeviceAsync(stream);
+
+    for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
+    {
+        auto const& name = mEngine->getIOTensorName(i);
+        context->setTensorAddress(name, buffers.getDeviceBuffer(name));
+    }
+
+    // Asynchronously enqueue the inference work
+    if (!context->enqueueV3(stream))
+    {
+        return false;
+    }
+    // Asynchronously copy data from device output buffers to host output buffers.
+    buffers.copyOutputToHostAsync(stream);
+
+    // Wait for the work in the stream to complete.
+    CHECK(cudaStreamSynchronize(stream));
+
+    // Release stream.
+    CHECK(cudaStreamDestroy(stream));
+
+    // Check and print the output of the inference.
+    // There should be just one output tensor.
+    ASSERT(mParams.outputTensorNames.size() == 1);
+    bool outputCorrect = verifyOutput(buffers, mParams.outputTensorNames[0], digit);
+    return outputCorrect;
+}
+
+//!
+//! \brief Initializes members of the params struct using the command line args
+//!
+samplesCommon::OnnxSampleParams initializeSampleParams(samplesCommon::Args const& args)
+{
+    samplesCommon::OnnxSampleParams params;
+    if (args.dataDirs.empty()) // Use default directories if user hasn't provided directory paths.
+    {
+        params.dataDirs.push_back("data/mnist/");
+        params.dataDirs.push_back("data/samples/mnist/");
+    }
+    else // Use the data directory provided by the user.
+    {
+        params.dataDirs = args.dataDirs;
+    }
+
+    params.dlaCore = args.useDLACore;
+    params.int8 = args.runInInt8;
+    params.fp16 = args.runInFp16;
+
+    params.onnxFileName = "mnist.onnx";
+    params.inputTensorNames.push_back("Input3");
+    params.outputTensorNames.push_back("Plus214_Output_0");
+    params.timingCacheFile = args.timingCacheFile;
+
+    return params;
+}
+
+//!
+//! \brief Prints the help information for running this sample.
+//!
+void printHelpInfo()
+{
+    std::cout << "Usage: ./sample_progress_monitor [-h or --help] [-d or --datadir=<path to data directory>] "
+                 "[--useDLACore=<int>] [--timingCacheFile=<path to timing cache file>]\n";
+    std::cout << "--help          Display help information\n";
+    std::cout << "--datadir       Specify path to a data directory, overriding the default. This option can be used "
+                 "multiple times to add multiple directories. If no data directories are given, the default is to use "
+                 "(data/samples/mnist/, data/mnist/)"
+              << std::endl;
+    std::cout << "--useDLACore=N  Specify a DLA engine for layers that support DLA. Value can range from 0 to n-1, "
+                 "where n is the number of DLA engines on the platform."
+              << std::endl;
+    std::cout << "--timingCacheFile  Specify path to a timing cache file. If it does not already exist, it will be "
+              << "created." << std::endl;
+    std::cout << "--int8          Run in Int8 mode.\n";
+    std::cout << "--fp16          Run in FP16 mode.\n";
+}
+
+int32_t main(int32_t argc, char** argv)
+{
+    samplesCommon::Args args;
+    bool const argsOK = samplesCommon::parseArgs(args, argc, argv);
+    if (!argsOK)
+    {
+        sample::gLogError << "Invalid arguments" << std::endl;
+        printHelpInfo();
+        return EXIT_FAILURE;
+    }
+    if (args.help)
+    {
+        printHelpInfo();
+        return EXIT_SUCCESS;
+    }
+
+    auto sampleTest = sample::Logger::defineTest(gSampleName, argc, argv);
+
+    sample::Logger::reportTestStart(sampleTest);
+
+    samplesCommon::OnnxSampleParams params = initializeSampleParams(args);
+
+    SampleProgressMonitor sampleProgressMonitor(params);
+    {
+        sample::gLogInfo << "Building and running a GPU inference engine for MNIST." << std::endl;
+        ConsoleProgressMonitor progressMonitor;
+
+        if (!sampleProgressMonitor.build(&progressMonitor))
+        {
+            return sample::Logger::reportFail(sampleTest);
+        }
+
+        if (!sampleProgressMonitor.infer())
+        {
+            return sample::Logger::reportFail(sampleTest);
+        }
+    }
+
+    return sample::Logger::reportPass(sampleTest);
+}
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/trtexec/Makefile b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/trtexec/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..03ed53d3ccc8d2bc56023644f5070e92ccf0fbe7
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/trtexec/Makefile
@@ -0,0 +1,21 @@
+# SPDX-FileCopyrightText: Copyright (c) 2019-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
+#
+# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+# property and proprietary rights in and to this material, related
+# documentation and any modifications thereto. Any use, reproduction,
+# disclosure or distribution of this material and related documentation
+# without an express license agreement from NVIDIA CORPORATION or
+# its affiliates is strictly prohibited.
+
+ifneq ($(TRT_WINML),1)
+OUTNAME_RELEASE = trtexec
+OUTNAME_DEBUG   = trtexec_debug
+else
+OUTNAME_RELEASE = tensorrt_rtx
+OUTNAME_DEBUG = tensorrt_rtx_debug
+endif
+EXTRA_DIRECTORIES = ../common ../utils
+SAMPLE_DIR_NAME = $(shell basename $(dir $(abspath $(firstword $(MAKEFILE_LIST)))))
+MAKEFILE ?= ../Makefile.config
+include $(MAKEFILE)
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/trtexec/README.md b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/trtexec/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3d7331609de728b87678a789b977662a4c26baa4
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/trtexec/README.md
@@ -0,0 +1,190 @@
+# TensorRT Command-Line Wrapper: trtexec
+
+**Table Of Contents**
+- [TensorRT Command-Line Wrapper: trtexec](#tensorrt-command-line-wrapper-trtexec)
+  - [Description](#description)
+  - [Building `trtexec`](#building-trtexec)
+  - [Using `trtexec`](#using-trtexec)
+    - [Example 1: Profiling a custom layer](#example-1-profiling-a-custom-layer)
+    - [Example 2: Running a network on DLA](#example-2-running-a-network-on-dla)
+    - [Example 3: Running an ONNX model with full dimensions and dynamic shapes](#example-3-running-an-onnx-model-with-full-dimensions-and-dynamic-shapes)
+    - [Example 4: Collecting and printing a timing trace](#example-4-collecting-and-printing-a-timing-trace)
+    - [Example 5: Tune throughput with multi-streaming](#example-5-tune-throughput-with-multi-streaming)
+    - [Example 6: Create a strongly typed plan file](#example-6-create-a-strongly-typed-plan-file)
+  - [Tool command line arguments](#tool-command-line-arguments)
+  - [Additional resources](#additional-resources)
+- [License](#license)
+- [Changelog](#changelog)
+- [Known issues](#known-issues)
+
+## Description
+
+Included in the `samples` directory is a command line wrapper tool, called `trtexec`. `trtexec` is a tool to quickly utilize TensorRT without having to develop your own application. The `trtexec` tool has two main purposes:
+-   Itâ€™s useful for benchmarking networks on random or user-provided input data.
+-   Itâ€™s useful for generating serialized engines from models.
+
+**Benchmarking network** - If you have a model saved as an ONNX file, you can use the `trtexec` tool to test the performance of running inference on your network using TensorRT. The `trtexec` tool has many options for specifying inputs and outputs, iterations for performance timing, precision allowed, and other options.
+
+**Serialized engine generation** - If you generate a saved serialized engine file, you can pull it into another application that runs inference. For example, you can use the [TensorRT Laboratory](https://github.com/NVIDIA/tensorrt-laboratory) to run the engine with multiple execution contexts from multiple threads in a fully pipelined asynchronous way to test parallel inference performance. Also, in INT8 mode, random weights are used, meaning trtexec does not provide calibration capability.
+
+**Using custom input data** - By default trtexec will run inference with randomly generated inputs. To provide custom inputs for an inference run, trtexec expects a binary file containing the data for each input tensor. It is recommended that this binary file be generated through `numpy`. For example, to create custom data of all ones to an ONNX model with one input named `data` with shape `(1,3,244,244)` and type `FLOAT`:
+
+```
+import numpy as np
+data = np.ones((1,3,244,244), dtype=np.float32)
+data.tofile("data.bin")
+```
+
+This binary file can be be loaded by trtexec during inference by using the `--loadInputs` flag:
+
+```
+./trtexec --onnx=model.onnx --loadInputs="data":data.bin
+```
+
+## Building `trtexec`
+
+`trtexec` can be used to build engines, using different TensorRT features (see command line arguments), and run inference. `trtexec` also measures and reports execution time and can be used to understand performance and possibly locate bottlenecks.
+
+Compile the sample by following build instructions in [TensorRT README](https://github.com/NVIDIA/TensorRT/).
+
+## Using `trtexec`
+
+`trtexec` can build engines from models in ONNX format.
+
+### Example 1: Profiling a custom layer
+
+You can profile a custom layer, implemented as a [TensorRT plugin](https://github.com/NVIDIA/TensorRT/tree/main/plugin#tensorrt-plugins), by leveraging `trtexec`. Plugins need to be registered in the plugin registry (instance of `IPluginRegistry`) to be visible to TensorRT. `trtexec` will load the TensorRT standard plugin library (`libnvinfer_plugin.so` / `nvinfer_plugin.dll`) that provides plugin support to TensorRT. Checkout the [Non-Zero Plugins Sample](../sampleNonZeroPlugin/) for a quick sample, or the [Plugins section](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#extending) of the TensorRT Developer Guide for a more detailed walkthrough.
+
+Plugins can be used with `trtexec` in the following 2 ways:
+
+<details>
+<summary> Using TensorRT-shipped Plugins </summary>
+
+
+- If you are using TensorRT-shipped plugins (included in `libnvinfer_plugin.so` / `nvinfer_plugin.dll`), no extra steps are required from the user as these plugins are pre-registered with the plugin registry.
+</details>
+
+<details>
+<summary> Using your own Plugin  </summary>
+
+  - If you want to define your own plugin and have `trtexec` use it as part of the network, you should define your own _Plugin Shared library_ with specific entry-points recognized by TensorRT. Then, provide the shared plugin library path to `trtexec` using the `--dynamicPlugins` flag.
+  - More information on Plugin Shared Libraries and how to define them can be seen in the [Plugin Shared Libraries](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#plugin-serialization) section of the [TensorRT Developer Guide](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html).
+
+    In summary, there are two methods:
+    1. The `REGISTER_TENSORRT_PLUGIN` macro can be applied to the plugin creator for each plugin that needs to be statically registered. i.e. Registered at load-time of the plugin library.
+    2. For dynamic registration, the plugin shared library must expose the below symbols which will be the entry points for TensorRT:
+
+        ```cpp
+        extern "C" void setLoggerFinder(ILoggerFinder* finder);
+        extern "C" IPluginCreatorInterface* const* getCreators(int32_t& nbCreators)
+        ```
+    In the above, `setLoggerFinder()` should accept a pointer to an `ILoggerFinder`, through which an `ILogger` instance can be retrieved for the purpose of logging inside the library code. `getCreators()` should return an array of plugin creators the library contains. Example implementations of these entry points can be found in [plugin/vc/vfcCommon.cpp](../../plugin/vc/vfcCommon.cpp) and [plugin/vc/vfcCommon.h](../../plugin/vc/vfcCommon.h).
+
+      **Note**: Usage of `getPluginCreators` instead of `getCreators` is also valid, but deprecated.
+  - If the user wants to build a TensorRT engine first and run later, the user has the option to serialize the shared plugin library as part of the engine itself by specifying `--setPluginsToSerialize`. By doing so, the user does not have to specify `--dynamicPlugins` to `trtexec` when running the built engine.
+  - For more information on these flags, run `./trtexec --help`.
+</details>
+
+### Example 2: Running a network on DLA
+
+To run the MNIST network on NVIDIA DLA (Deep Learning Accelerator) using `trtexec` in FP16 mode, issue:
+```
+./trtexec --onnx=data/mnist/mnist.onnx --useDLACore=1 --fp16 --allowGPUFallback
+```
+To run the MNIST network on DLA using `trtexec` in INT8 mode, issue:
+```
+./trtexec --onnx=data/mnist/mnist.onnx --useDLACore=1 --int8 --allowGPUFallback
+```
+To run the MNIST network on DLA using `trtexec`, issue:
+```
+./trtexec --onnx=data/mnist/mnist.onnx --useDLACore=0 --fp16 --allowGPUFallback
+```
+
+For more information about DLA, see [Working With DLA](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#dla_topic).
+
+### Example 3: Running an ONNX model with full dimensions and dynamic shapes
+
+To run an ONNX model in full-dimensions mode with static input shapes:
+
+```
+./trtexec --onnx=model.onnx
+```
+
+The following examples assumes an ONNX model with one dynamic input with name `input` and dimensions `[-1, 3, 244, 244]`
+
+To run an ONNX model in full-dimensions mode with an given input shape:
+
+```
+./trtexec --onnx=model.onnx --shapes=input:32x3x244x244
+```
+
+To benchmark your ONNX model with a range of possible input shapes:
+
+```
+./trtexec --onnx=model.onnx --minShapes=input:1x3x244x244 --optShapes=input:16x3x244x244 --maxShapes=input:32x3x244x244 --shapes=input:5x3x244x244
+```
+
+### Example 4: Collecting and printing a timing trace
+
+When running, `trtexec` prints the measured performance, but can also export the measurement trace to a json file:
+```
+./trtexec --onnx=data/mnist/mnist.onnx --exportTimes=trace.json
+```
+Once the trace is stored in a file, it can be printed using the `tracer.py` utility. This tool prints timestamps and duration of input, compute, and output, in different forms:
+```
+./tracer.py trace.json
+```
+Similarly, profiles can also be printed and stored in a json file. The utility `profiler.py` can be used to read and print the profile from a json file.
+
+### Example 5: Tune throughput with multi-streaming
+
+Tuning throughput may require running multiple concurrent streams of execution. This is the case for example when the latency achieved is well within the desired
+threshold, and we can increase the throughput, even at the expense of some latency. For example, saving engines with different precisions and assume that both
+execute within 2ms, the latency threshold:
+```
+trtexec --onnx=resnet50.onnx --saveEngine=g1.trt --int8 --skipInference
+trtexec --onnx=resnet50.onnx --saveEngine=g2.trt --best --skipInference
+```
+Now, the saved engines can be tried to find the combination precision/streams below 2 ms that maximizes the throughput:
+```
+trtexec --loadEngine=g1.trt --streams=2
+trtexec --loadEngine=g1.trt --streams=3
+trtexec --loadEngine=g1.trt --streams=4
+trtexec --loadEngine=g2.trt --streams=2
+```
+
+### Example 6: Create a strongly typed plan file
+This flag will create a network with the `NetworkDefinitionCreationFlag::kSTRONGLY_TYPED` flag where tensor data types are inferred from network input types
+and operator type specification.  Use of specific builder precision flags such as `--int8` or `--best` with this option is not allowed.
+```
+./trtexec --onnx=model.onnx --stronglyTyped
+```
+
+## Tool command line arguments
+
+To see the full list of available options and their descriptions, issue the `./trtexec --help` command.
+
+**Note:** Specifying the `--safe` parameter turns the safety mode switch `ON`. By default, the `--safe` parameter is not specified; the safety mode switch is `OFF`. The layers and parameters that are contained within the `--safe` subset are restricted if the switch is set to `ON`. The switch is used for prototyping the safety restricted flows until the TensorRT safety runtime is made available. This parameter is required when loading or saving safe engines with the standard TensorRT package. For more information, see the [Working With Automotive Safety section in the TensorRT Developer Guide](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#working_auto_safety).
+
+## Additional resources
+
+The following resources provide more details about `trtexec`:
+
+**Documentation**
+- [NVIDIA trtexec](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#trtexec)
+- [TensorRT Sample Support Guide](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sample-support-guide/index.html)
+- [NVIDIAâ€™s TensorRT Documentation Library](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)
+
+# License
+
+For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html)
+documentation.
+
+# Changelog
+
+April 2019
+This is the first release of this `README.md` file.
+
+# Known issues
+
+There are no known issues in this sample.
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/trtexec/prn_utils.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/trtexec/prn_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b0abf9fb3cfa9b4b759e7dde4d9b85887c31b13
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/trtexec/prn_utils.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Simple printing utils
+
+Utils to print traces and profiles in CSV format
+"""
+
+
+from __future__ import print_function
+
+
+def combineDescriptions(prolog, features, descriptions):
+    """Combine features with their descriptions"""
+
+    fullDescription = prolog
+    sep = " "
+    for feature, description in zip(features, descriptions):
+        fullDescription += sep + feature + " (" + description + ")"
+        sep = ", "
+
+    return fullDescription
+
+
+def printHeader(allFeatures, selection, gp=False, count=False):
+    """Print table header"""
+
+    if gp:
+        sep = "#"
+        if count:
+            sep += "count, "
+    else:
+        sep = ""
+
+    for feature in allFeatures:
+        if feature in selection:
+            print(sep + feature, end="")
+            sep = ", "
+
+    print("")
+
+
+def printCsv(data, count=False):
+    """Print trace in CSV format"""
+
+    c = 0
+    for row in data:
+        if count:
+            print(c, end="")
+            c += 1
+            sep = ", "
+        else:
+            sep = ""
+        for r in row:
+            if isinstance(r, str):
+                print(sep + r, end="")
+            else:
+                print("{}{:.6}".format(sep, float(r)), end="")
+            sep = ", "
+        print("")
+
+
+def filterData(data, allFeatures, selection):
+    """Drop features not in the given set"""
+
+    filteredData = []
+    for d in data:
+        row = []
+        for f in allFeatures:
+            if f in selection:
+                if f in d:
+                    row.append(d[f])
+                else:
+                    row.append("")
+        filteredData.append(row)
+
+    return filteredData
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/trtexec/profiler.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/trtexec/profiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a34e69fdad3b47070f87dd3e85bb7e43f14e290
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/trtexec/profiler.py
@@ -0,0 +1,210 @@
+#!/usr/bin/env python3
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Print a trtexec profile from a JSON file
+
+Given a JSON file containing a trtexec profile,
+this program prints the profile in CSV table format.
+Each row represents a layer in the profile.
+
+The output format can be optionally converted to a
+format suitable for GNUPlot.
+"""
+
+import sys
+import json
+import argparse
+import prn_utils as pu
+
+
+allFeatures = ["name", "timeMs", "averageMs", "percentage"]
+
+defaultFeatures = ",".join(allFeatures)
+
+descriptions = ["layer name", "total layer time", "average layer time", "percentage of total time"]
+
+featuresDescription = pu.combineDescriptions("Features are (times in ms):", allFeatures, descriptions)
+
+
+def hasNames(features):
+    """Check if the name is included in the set"""
+
+    return "name" in features
+
+
+def totalData(features, profile):
+    """Add row at the bottom with the total"""
+
+    accumulator = {}
+    for f in features:
+        accumulator[f] = 0
+    accumulator["name"] = "total"
+
+    for row in profile:
+        for f in features:
+            if f in row and not f == "name":
+                accumulator[f] += row[f]
+
+    return accumulator
+
+
+def findAndRemove(profile, name):
+    """Find named row in profile and remove"""
+
+    for r in range(len(profile)):
+        if profile[r]["name"] == name:
+            row = profile[r]
+            del profile[r]
+            return row
+
+    return None
+
+
+def refName(name):
+    """Add prefix ref to name"""
+
+    return "ref" + name[0].capitalize() + name[1:]
+
+
+def refFeatures(names):
+    """Add prefix ref to features names"""
+
+    refNames = []
+    for name in names:
+        refNames.append(refName(name))
+    return refNames
+
+
+def mergeHeaders(features, skipFirst=True):
+    """Duplicate feature names for reference and target profile"""
+
+    if skipFirst:
+        return [features[0]] + refFeatures(features[1:]) + features[1:] + ["% difference"]
+    return refFeatures(features) + features + ["% difference"]
+
+
+def addReference(row, reference):
+    """Add reference results to results dictionary"""
+
+    for k, v in reference.items():
+        if k == "name":
+            if k in row:
+                continue
+        else:
+            k = refName(k)
+        row[k] = v
+
+
+def mergeRow(reference, profile, diff):
+    """Merge reference and target profile results into a single row"""
+
+    row = {}
+    if profile:
+        row = profile
+    if reference:
+        addReference(row, reference)
+    if diff:
+        row["% difference"] = diff
+
+    return row
+
+
+def alignData(reference, profile, threshold):
+    """Align and merge reference and target profiles"""
+
+    alignedData = []
+    for ref in reference:
+        prof = findAndRemove(profile, ref["name"])
+
+        if prof:
+            diff = (prof["averageMs"] / ref["averageMs"] - 1) * 100
+            if abs(diff) >= threshold:
+                alignedData.append(mergeRow(ref, prof, diff))
+        else:
+            alignedData.append(mergeRow(ref, None, None))
+
+    for prof in profile:
+        alignedData.append(mergeRow(None, prof, None))
+
+    return alignedData
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--features",
+        metavar="F[,F]*",
+        default=defaultFeatures,
+        help="Comma separated list of features to print. " + featuresDescription,
+    )
+    parser.add_argument("--total", action="store_true", help="Add total time row.")
+    parser.add_argument("--gp", action="store_true", help="Print GNUPlot format.")
+    parser.add_argument("--no-header", action="store_true", help="Omit the header row.")
+    parser.add_argument("--threshold", metavar="T", default=0.0, type=float, help="Threshold of percentage difference.")
+    parser.add_argument("--reference", metavar="R", help="Reference profile file name.")
+    parser.add_argument("name", metavar="filename", help="Profile file.")
+    args = parser.parse_args()
+
+    global allFeatures
+    features = args.features.split(",")
+    for f in features:
+        if not f in allFeatures:
+            print("Feature {} not recognized".format(f))
+            return
+
+    count = args.gp and not hasNames(features)
+
+    profile = None
+    reference = None
+
+    with open(args.name) as f:
+        profile = json.load(f)
+        profileCount = profile[0]["count"]
+        profile = profile[1:]
+
+    if args.reference:
+        with open(args.reference) as f:
+            reference = json.load(f)
+            referenceCount = reference[0]["count"]
+            reference = reference[1:]
+        allFeatures = mergeHeaders(allFeatures)
+        features = mergeHeaders(features, hasNames(features))
+
+    if not args.no_header:
+        if reference:
+            comment = "#" if args.gp else ""
+            print(comment + "reference count: {} - profile count: {}".format(referenceCount, profileCount))
+        pu.printHeader(allFeatures, features, args.gp, count)
+
+    if reference:
+        profile = alignData(reference, profile, args.threshold)
+
+    if args.total:
+        profile.append(totalData(allFeatures, profile))
+        if reference:
+            total = profile[len(profile) - 1]
+            total["% difference"] = (total["averageMs"] / total["refAverageMs"] - 1) * 100
+
+    profile = pu.filterData(profile, allFeatures, features)
+
+    pu.printCsv(profile, count)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/trtexec/tracer.py b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/trtexec/tracer.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b093d76d4d22167d630a26e33b99d148e018081
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/trtexec/tracer.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Print a trtexec timing trace from a JSON file
+
+Given a JSON file containing a trtexec timing trace,
+this program prints the trace in CSV table format.
+Each row represents an entry point in the trace.
+
+The columns, as indicated by the header, respresent
+one of the metric recorded. The output format can
+be optionally converted to a format suitable for
+GNUPlot.
+"""
+
+import sys
+import json
+import argparse
+import prn_utils as pu
+
+
+timestamps = ["startInMs", "endInMs", "startComputeMs", "endComputeMs", "startOutMs", "endOutMs"]
+
+intervals = ["inMs", "computeMs", "outMs", "latencyMs", "endToEndMs"]
+
+allMetrics = timestamps + intervals
+
+defaultMetrics = ",".join(allMetrics)
+
+descriptions = [
+    "start input",
+    "end input",
+    "start compute",
+    "end compute",
+    "start output",
+    "end output",
+    "input",
+    "compute",
+    "output",
+    "latency",
+    "end to end latency",
+]
+
+metricsDescription = pu.combineDescriptions("Possible metrics (all in ms) are:", allMetrics, descriptions)
+
+
+def skipTrace(trace, start):
+    """Skip trace entries until start time"""
+
+    for t in range(len(trace)):
+        if trace[t]["startComputeMs"] >= start:
+            return trace[t:]
+
+    return []
+
+
+def hasTimestamp(metrics):
+    """Check if features have at least one timestamp"""
+
+    for timestamp in timestamps:
+        if timestamp in metrics:
+            return True
+    return False
+
+
+def avgData(data, avg, times):
+    """Average trace entries (every avg entries)"""
+
+    averaged = []
+    accumulator = []
+    r = 0
+
+    for row in data:
+        if r == 0:
+            for m in row:
+                accumulator.append(m)
+        else:
+            for m in row[times:]:
+                accumulator[t] += m
+
+        r += 1
+        if r == avg:
+            for t in range(times, len(row)):
+                accumulator[t] /= avg
+            averaged.append(accumulator)
+            accumulator = []
+            r = 0
+
+    return averaged
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--metrics",
+        metavar="M[,M]*",
+        default=defaultMetrics,
+        help="Comma separated list of metrics to print. " + metricsDescription,
+    )
+    parser.add_argument("--avg", metavar="N", type=int, default=1, help="Print average every N records.")
+    parser.add_argument(
+        "--start",
+        metavar="T",
+        type=float,
+        default=0,
+        help="Start trace at time T (drop records with compute start before T ms).",
+    )
+    parser.add_argument("--gp", action="store_true", help="Print GNUPlot format.")
+    parser.add_argument("--no-header", action="store_true", help="Omit the header row.")
+    parser.add_argument("name", metavar="filename", help="Trace file.")
+    args = parser.parse_args()
+
+    metrics = args.metrics.split(",")
+    count = args.gp and (not hasTimestamp(metrics) or len(metrics) == 1)
+
+    if not args.no_header:
+        pu.printHeader(allMetrics, metrics, args.gp, count)
+
+    with open(args.name) as f:
+        trace = json.load(f)
+
+    if args.start > 0:
+        trace = skipTrace(trace, args.start)
+
+    trace = pu.filterData(trace, allMetrics, metrics)
+
+    if args.avg > 1:
+        trace = avgData(trace, args.avg, hasTimestamp(metrics))
+
+    pu.printCsv(trace, count)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/trtexec/trtexec.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/trtexec/trtexec.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9456562b87b9aafc45dcef3be71aa042ad7f0aa1
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/trtexec/trtexec.cpp
@@ -0,0 +1,524 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <cctype>
+#include <chrono>
+#include <cmath>
+#include <functional>
+#include <iostream>
+#include <memory>
+#include <sys/stat.h>
+#include <vector>
+
+#include "NvInfer.h"
+#include "NvInferPlugin.h"
+
+#include "buffers.h"
+#include "common.h"
+#include "logger.h"
+#include "sampleDevice.h"
+#include "sampleEngines.h"
+#include "sampleInference.h"
+#include "sampleOptions.h"
+#include "sampleReporting.h"
+
+using namespace nvinfer1;
+using namespace sample;
+using namespace samplesCommon;
+
+#if ENABLE_UNIFIED_BUILDER
+using namespace nvinfer2::safe;
+__attribute__((weak)) std::shared_ptr<sample::SampleSafeRecorder> gSafeRecorder
+    = std::make_shared<sample::SampleSafeRecorder>(nvinfer2::safe::Severity::kINFO);
+#endif
+
+namespace
+{
+using LibraryPtr = std::unique_ptr<DynamicLibrary>;
+
+std::function<void*(void*, int32_t)> pCreateInferRuntimeInternal{};
+std::function<void*(void*, void*, int32_t)> pCreateInferRefitterInternal{};
+std::function<void*(void*, int32_t)> pCreateInferBuilderInternal{};
+std::function<void*(void*, void*, int)> pCreateNvOnnxParserInternal{};
+
+//! Track runtime used for the execution of trtexec.
+//! Must be tracked as a global variable due to how library init functions APIs are organized.
+RuntimeMode gUseRuntime = RuntimeMode::kFULL;
+
+bool initNvinfer()
+{
+#if !TRT_STATIC
+    static LibraryPtr libnvinferPtr{};
+    auto fetchPtrs = [](DynamicLibrary* l) {
+        pCreateInferRuntimeInternal = l->symbolAddress<void*(void*, int32_t)>("createInferRuntime_INTERNAL");
+        try
+        {
+            pCreateInferRefitterInternal
+                = l->symbolAddress<void*(void*, void*, int32_t)>("createInferRefitter_INTERNAL");
+        }
+        catch (const std::exception& e)
+        {
+            sample::gLogWarning << "Could not load function createInferRefitter_INTERNAL : " << e.what() << std::endl;
+        }
+
+        if (gUseRuntime == RuntimeMode::kFULL)
+        {
+            pCreateInferBuilderInternal = l->symbolAddress<void*(void*, int32_t)>("createInferBuilder_INTERNAL");
+        }
+    };
+    return initLibrary(libnvinferPtr, getRuntimeLibraryName(gUseRuntime), fetchPtrs);
+#else
+    pCreateInferRuntimeInternal = createInferRuntime_INTERNAL;
+    pCreateInferRefitterInternal = createInferRefitter_INTERNAL;
+    pCreateInferBuilderInternal = createInferBuilder_INTERNAL;
+    return true;
+#endif // !TRT_STATIC
+}
+
+bool initNvonnxparser()
+{
+#if !TRT_STATIC
+    static LibraryPtr libnvonnxparserPtr{};
+    auto fetchPtrs = [](DynamicLibrary* l) {
+        pCreateNvOnnxParserInternal = l->symbolAddress<void*(void*, void*, int)>("createNvOnnxParser_INTERNAL");
+    };
+    return initLibrary(libnvonnxparserPtr, kNVONNXPARSER_LIBNAME, fetchPtrs);
+#else
+    pCreateNvOnnxParserInternal = createNvOnnxParser_INTERNAL;
+    return true;
+#endif // !TRT_STATIC
+}
+
+} // namespace
+
+IRuntime* createRuntime()
+{
+    if (!initNvinfer())
+    {
+        return {};
+    }
+    ASSERT(pCreateInferRuntimeInternal != nullptr);
+    return static_cast<IRuntime*>(pCreateInferRuntimeInternal(&gLogger.getTRTLogger(), NV_TENSORRT_VERSION));
+}
+
+IBuilder* createBuilder()
+{
+    if (!initNvinfer())
+    {
+        return {};
+    }
+    ASSERT(pCreateInferBuilderInternal != nullptr);
+    return static_cast<IBuilder*>(pCreateInferBuilderInternal(&gLogger.getTRTLogger(), NV_TENSORRT_VERSION));
+}
+
+IRefitter* createRefitter(ICudaEngine& engine)
+{
+    if (!initNvinfer())
+    {
+        return {};
+    }
+    ASSERT(pCreateInferRefitterInternal != nullptr);
+    return static_cast<IRefitter*>(pCreateInferRefitterInternal(&engine, &gLogger.getTRTLogger(), NV_TENSORRT_VERSION));
+}
+
+nvonnxparser::IParser* createONNXParser(INetworkDefinition& network)
+{
+    if (!initNvonnxparser())
+    {
+        return {};
+    }
+    ASSERT(pCreateNvOnnxParserInternal != nullptr);
+    return static_cast<nvonnxparser::IParser*>(
+        pCreateNvOnnxParserInternal(&network, &gLogger.getTRTLogger(), NV_ONNX_PARSER_VERSION));
+}
+
+#if ENABLE_UNIFIED_BUILDER
+
+bool processSafetyPluginLibrary(nvinfer2::safe::ISafePluginRegistry* safetyPluginRegistry, DynamicLibrary* libPtr,
+    samplesSafeCommon::SafetyPluginLibraryArgument const& pluginArgs)
+{
+    if (libPtr == nullptr)
+    {
+        sample::gLogError << "Cannot open safety plugin library " << pluginArgs.libraryName << std::endl;
+        return false;
+    }
+    std::string const pluginGetterSymbolName{"getSafetyPluginCreator"};
+    auto pGetSafetyPluginCreator
+        = libPtr->symbolAddress<void*(char const*, char const*)>(pluginGetterSymbolName.c_str());
+    if (pGetSafetyPluginCreator == nullptr)
+    {
+        sample::gLogError << "Cannot find plugin creator getter symbol from plugin library: " << pluginArgs.libraryName
+                          << std::endl;
+        sample::gLogError << "Please ensure interface function is correctly implemented and exported." << std::endl;
+        return false;
+    }
+
+    for (auto const& pluginAttr : pluginArgs.pluginAttrs)
+    {
+        auto pluginCreator = static_cast<IPluginCreatorInterface*>(
+            pGetSafetyPluginCreator(pluginAttr.pluginNamespace.c_str(), pluginAttr.pluginName.c_str()));
+        if (pluginCreator == nullptr)
+        {
+            sample::gLogInfo << "Cannot find plugin " << pluginAttr.pluginNamespace << "::" << pluginAttr.pluginName
+                             << " in the safety plugin library: " << pluginArgs.libraryName << std::endl;
+            continue;
+        }
+        sample::gLogInfo << "Registering " << pluginAttr.pluginNamespace << "::" << pluginAttr.pluginName
+                         << " for TensorRT safety." << std::endl;
+        safetyPluginRegistry->registerCreator(*pluginCreator, pluginAttr.pluginNamespace.c_str(), *gSafeRecorder);
+    }
+    return true;
+}
+#endif
+
+using time_point = std::chrono::time_point<std::chrono::high_resolution_clock>;
+using duration = std::chrono::duration<float>;
+
+int main(int argc, char** argv)
+{
+    std::string const sampleName = "TensorRT.trtexec";
+
+    auto sampleTest = sample::gLogger.defineTest(sampleName, argc, argv);
+
+    try
+    {
+        sample::gLogger.reportTestStart(sampleTest);
+
+        Arguments args = argsToArgumentsMap(argc, argv);
+        AllOptions options;
+
+        if (parseHelp(args))
+        {
+            AllOptions::help(std::cout);
+            return EXIT_SUCCESS;
+        }
+
+        if (!args.empty())
+        {
+            bool failed{false};
+            try
+            {
+                options.parse(args);
+
+                if (!args.empty())
+                {
+                    AllOptions::help(std::cout);
+                    for (auto const& arg : args)
+                    {
+                        sample::gLogError << "Unknown option: " << arg.first << " " << arg.second.first << std::endl;
+                    }
+                    failed = true;
+                }
+            }
+            catch (std::invalid_argument const& arg)
+            {
+                AllOptions::help(std::cout);
+                sample::gLogError << arg.what() << std::endl;
+                failed = true;
+            }
+
+            if (failed)
+            {
+                return sample::gLogger.reportFail(sampleTest);
+            }
+        }
+        else
+        {
+            options.helps = true;
+        }
+
+        if (options.helps)
+        {
+            AllOptions::help(std::cout);
+            return sample::gLogger.reportPass(sampleTest);
+        }
+
+        sample::gLogInfo << options;
+        if (options.reporting.verbose)
+        {
+            sample::setReportableSeverity(ILogger::Severity::kVERBOSE);
+        }
+        std::string const jitInVersion;
+        setCudaDevice(options.system.device, sample::gLogInfo);
+        sample::gLogInfo << std::endl;
+        sample::gLogInfo << "TensorRT version: " << NV_TENSORRT_MAJOR << "." << NV_TENSORRT_MINOR << "."
+                         << NV_TENSORRT_PATCH << jitInVersion << std::endl;
+
+        // Record specified runtime
+        gUseRuntime = options.build.useRuntime;
+#if !TRT_STATIC
+        LibraryPtr nvinferPluginLib{};
+#endif /* TRT_STATIC */
+        std::vector<LibraryPtr> pluginLibs;
+        if (gUseRuntime == RuntimeMode::kFULL)
+        {
+            sample::gLogInfo << "Loading standard plugins" << std::endl;
+#if !TRT_STATIC
+            nvinferPluginLib = loadLibrary(kNVINFER_PLUGIN_LIBNAME);
+            auto pInitLibNvinferPlugins
+                = nvinferPluginLib->symbolAddress<bool(void*, char const*)>("initLibNvInferPlugins");
+#else /* TRT_STATIC */
+            auto pInitLibNvinferPlugins = initLibNvInferPlugins;
+#endif /* TRT_STATIC */
+            ASSERT(pInitLibNvinferPlugins != nullptr);
+            pInitLibNvinferPlugins(&sample::gLogger.getTRTLogger(), "");
+            for (auto const& pluginPath : options.system.plugins)
+            {
+                sample::gLogInfo << "Loading supplied plugin library: " << pluginPath << std::endl;
+                pluginLibs.emplace_back(loadLibrary(pluginPath));
+            }
+        }
+        else if (!options.system.plugins.empty())
+        {
+            throw std::runtime_error("TRT-18412: Plugins require --useRuntime=full.");
+        }
+#if ENABLE_UNIFIED_BUILDER
+        auto safetyPluginRegistry = sample::safe::getSafePluginRegistry(*gSafeRecorder);
+        ASSERT(safetyPluginRegistry != nullptr);
+
+        if (!options.system.safetyPlugins.empty())
+        {
+            for (auto const& safetyPluginArg : options.system.safetyPlugins)
+            {
+                sample::gLogInfo << "Loading supplied safety plugin library with manual registration: "
+                                 << safetyPluginArg.libraryName << std::endl;
+                auto pluginLib = loadLibrary(safetyPluginArg.libraryName);
+                processSafetyPluginLibrary(safetyPluginRegistry, pluginLib.get(), safetyPluginArg);
+                pluginLibs.emplace_back(std::move(pluginLib));
+            }
+        }
+#endif // ENABLE_UNIFIED_BUILDER
+        if (options.build.safe && !sample::hasSafeRuntime())
+        {
+            sample::gLogError << "Safety is not supported because safety runtime library is unavailable." << std::endl;
+            return sample::gLogger.reportFail(sampleTest);
+        }
+
+        if (!options.build.safe && options.build.consistency)
+        {
+            sample::gLogInfo << "Skipping consistency checker on non-safety mode." << std::endl;
+            options.build.consistency = false;
+        }
+
+       // Start engine building phase.
+        std::unique_ptr<BuildEnvironment> bEnv(new BuildEnvironment(options.build.safe, options.build.versionCompatible,
+            options.system.DLACore, options.build.tempdir, options.build.tempfileControls, options.build.leanDLLPath,
+            sampleTest.getCmdline()));
+
+        bool buildPass = getEngineBuildEnv(options.model, options.build, options.system, *bEnv, sample::gLogError);
+
+        if (!buildPass)
+        {
+            sample::gLogError << "Engine set up failed" << std::endl;
+            return sample::gLogger.reportFail(sampleTest);
+        }
+
+#if ENABLE_UNIFIED_BUILDER
+        safetyPluginRegistry->setSafeRecorder(*gSafeRecorder);
+#endif // ENABLE_UNIFIED_BUILDER
+
+        // Exit as version is already printed during getEngineBuildEnv
+        if (options.build.getPlanVersionOnly)
+        {
+            return sample::gLogger.reportPass(sampleTest);
+        }
+
+
+        // dynamicPlugins may have been updated by getEngineBuildEnv above
+        bEnv->engine.setDynamicPlugins(options.system.dynamicPlugins);
+       // When some options are enabled, engine deserialization is not supported on the platform that the engine was
+       // built.
+        bool const supportDeserialization = !options.build.safe && !options.build.buildDLAStandalone
+            && options.build.runtimePlatform == nvinfer1::RuntimePlatform::kSAME_AS_BUILD;
+
+        if (supportDeserialization && options.build.refittable)
+        {
+            auto* engine = bEnv->engine.get();
+            if (options.reporting.refit)
+            {
+                dumpRefittable(*engine);
+            }
+            if (options.inference.timeRefit)
+            {
+                if (bEnv->network.operator bool())
+                {
+                    bool const success = timeRefit(*bEnv->network, *engine, options.inference.threads);
+                    if (!success)
+                    {
+                        sample::gLogError << "Engine refit failed." << std::endl;
+                        return sample::gLogger.reportFail(sampleTest);
+                    }
+                }
+                else
+                {
+                    sample::gLogWarning << "Network not available, skipped timing refit." << std::endl;
+                }
+            }
+        }
+
+        if (options.build.skipInference)
+        {
+            if (supportDeserialization)
+            {
+                printLayerInfo(options.reporting, bEnv->engine.get(), nullptr);
+                printOptimizationProfileInfo(options.reporting, bEnv->engine.get());
+            }
+            sample::gLogInfo << "Skipped inference phase since --skipInference is added." << std::endl;
+            return sample::gLogger.reportPass(sampleTest);
+        }
+
+        std::unique_ptr<InferenceEnvironmentBase> iEnv;
+
+        if (!options.build.safe)
+        {
+            iEnv = std::make_unique<InferenceEnvironmentStd>(*bEnv);
+        }
+        else
+        {
+#if ENABLE_UNIFIED_BUILDER
+            iEnv = std::make_unique<InferenceEnvironmentSafe>(*bEnv);
+#else
+            sample::gLogInfo << "--safe flag is enabled but application is not compatible with safety." << std::endl;
+            return sample::gLogger.reportFail(sampleTest);
+#endif
+        }
+
+        // We avoid re-loading some dynamic plugins while deserializing
+        // if they were already serialized with `setPluginsToSerialize`.
+        std::vector<std::string> dynamicPluginsNotSerialized;
+        for (auto& pluginName : options.system.dynamicPlugins)
+        {
+            if (std::find(options.system.setPluginsToSerialize.begin(), options.system.setPluginsToSerialize.end(),
+                    pluginName)
+                == options.system.setPluginsToSerialize.end())
+            {
+                dynamicPluginsNotSerialized.emplace_back(pluginName);
+            }
+        }
+
+        iEnv->engine.setDynamicPlugins(dynamicPluginsNotSerialized);
+        // Delete build environment.
+        bEnv.reset();
+
+        if (options.inference.timeDeserialize)
+        {
+            if (timeDeserialize(*iEnv, options.system))
+            {
+                return sample::gLogger.reportFail(sampleTest);
+            }
+            return sample::gLogger.reportPass(sampleTest);
+        }
+        if (options.build.safe && options.system.DLACore >= 0)
+        {
+            sample::gLogInfo << "Safe DLA capability is detected. Please save DLA loadable with --saveEngine option, "
+                                "then use dla_safety_runtime to run inference with saved DLA loadable, "
+                                "or alternatively run with your own application"
+                             << std::endl;
+            return sample::gLogger.reportFail(sampleTest);
+        }
+        bool const profilerEnabled = options.reporting.profile || !options.reporting.exportProfile.empty();
+
+        bool const layerInfoEnabled = options.reporting.layerInfo || !options.reporting.exportLayerInfo.empty();
+        if (iEnv->safe && (profilerEnabled || layerInfoEnabled))
+        {
+            sample::gLogError << "Safe runtime does not support --dumpProfile or --exportProfile=<file> or "
+                                 "--dumpLayerInfo or --exportLayerInfo=<file>, please use "
+                                 "--verbose to print profiling info."
+                              << std::endl;
+            return sample::gLogger.reportFail(sampleTest);
+        }
+        if (profilerEnabled && !options.inference.rerun)
+        {
+            iEnv->profiler.reset(new Profiler);
+            if (options.inference.graph && (getCudaDriverVersion() < 11010 || getCudaRuntimeVersion() < 11000))
+            {
+                options.inference.graph = false;
+                sample::gLogWarning
+                    << "Graph profiling only works with CUDA 11.1 and beyond. Ignored --useCudaGraph flag "
+                       "and disabled CUDA graph."
+                    << std::endl;
+            }
+        }
+
+        if (!setUpInference(*iEnv, options.inference, options.system))
+        {
+            sample::gLogError << "Inference set up failed" << std::endl;
+            return sample::gLogger.reportFail(sampleTest);
+        }
+
+        if (!options.build.safe)
+        {
+            printLayerInfo(options.reporting, iEnv->engine.get(),
+                static_cast<InferenceEnvironmentStd*>(iEnv.get())->contexts.front().get());
+            printOptimizationProfileInfo(options.reporting, iEnv->engine.get());
+        }
+        std::vector<InferenceTrace> trace;
+        sample::gLogInfo << "Starting inference" << std::endl;
+
+        if (!runInference(options.inference, *iEnv, options.system.device, trace, options.reporting))
+        {
+            sample::gLogError << "Error occurred during inference" << std::endl;
+            return sample::gLogger.reportFail(sampleTest);
+        }
+
+        if (profilerEnabled && !options.inference.rerun)
+        {
+            sample::gLogInfo << "The e2e network timing is not reported since it is inaccurate due to the extra "
+                             << "synchronizations when the profiler is enabled." << std::endl;
+            sample::gLogInfo
+                << "To show e2e network timing report, add --separateProfileRun to profile layer timing in a "
+                << "separate run or remove --dumpProfile to disable the profiler." << std::endl;
+        }
+        else
+        {
+            printPerformanceReport(trace, options.reporting, options.inference, sample::gLogInfo, sample::gLogWarning,
+                sample::gLogVerbose);
+        }
+
+        printOutput(options.reporting, *iEnv, options.inference.batch);
+
+        if (profilerEnabled && options.inference.rerun)
+        {
+            auto* profiler = new Profiler;
+            iEnv->profiler.reset(profiler);
+            static_cast<InferenceEnvironmentStd*>(iEnv.get())->contexts.front()->setProfiler(profiler);
+            static_cast<InferenceEnvironmentStd*>(iEnv.get())->contexts.front()->setEnqueueEmitsProfile(false);
+            if (options.inference.graph && (getCudaDriverVersion() < 11010 || getCudaRuntimeVersion() < 11000))
+            {
+                options.inference.graph = false;
+                sample::gLogWarning
+                    << "Graph profiling only works with CUDA 11.1 and beyond. Ignored --useCudaGraph flag "
+                       "and disabled CUDA graph."
+                    << std::endl;
+            }
+            if (!runInference(options.inference, *iEnv, options.system.device, trace, options.reporting))
+            {
+                sample::gLogError << "Error occurred during inference" << std::endl;
+                return sample::gLogger.reportFail(sampleTest);
+            }
+        }
+        printPerformanceProfile(options.reporting, *iEnv);
+
+        return sample::gLogger.reportPass(sampleTest);
+    }
+    catch (std::exception const& e)
+    {
+        sample::gLogError << "Uncaught exception detected: " << e.what() << std::endl;
+    }
+    return sample::gLogger.reportFail(sampleTest);
+}
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/utils/cacheUtils.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/utils/cacheUtils.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..be163b1493c4a983fd4fc8fd06e666b8a05b183d
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/utils/cacheUtils.cpp
@@ -0,0 +1,147 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cacheUtils.h"
+#include "NvInfer.h"
+#include "fileLock.h"
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+namespace nvinfer1::utils
+{
+std::vector<char> loadCacheFile(ILogger& logger, std::string const& inFileName)
+{
+    try
+    {
+        FileLock fileLock{logger, inFileName};
+        std::ifstream iFile(inFileName, std::ios::in | std::ios::binary);
+        if (!iFile)
+        {
+            std::stringstream ss;
+            ss << "Could not read cache from: " << inFileName << ". A new cache will be generated and written.";
+            logger.log(ILogger::Severity::kWARNING, ss.str().c_str());
+            return std::vector<char>();
+        }
+        iFile.seekg(0, std::ifstream::end);
+        size_t fsize = iFile.tellg();
+        iFile.seekg(0, std::ifstream::beg);
+        std::vector<char> content(fsize);
+        iFile.read(content.data(), fsize);
+        iFile.close();
+        std::stringstream ss;
+        ss << "Loaded " << fsize << " bytes of cache from file: " << inFileName;
+        logger.log(ILogger::Severity::kINFO, ss.str().c_str());
+        return content;
+    }
+    catch (std::exception const& e)
+    {
+        std::cerr << "Exception while loading cache file " << inFileName << ": " << e.what() << std::endl;
+    }
+    return {};
+}
+
+std::unique_ptr<ITimingCache> buildTimingCacheFromFile(
+    ILogger& logger, IBuilderConfig& config, std::string const& timingCacheFile)
+{
+    std::unique_ptr<nvinfer1::ITimingCache> timingCache{};
+    std::vector<char> timingCacheContents = loadCacheFile(logger, timingCacheFile);
+
+    timingCache.reset(config.createTimingCache(timingCacheContents.data(), timingCacheContents.size()));
+    if (timingCache == nullptr)
+    {
+        logger.log(ILogger::Severity::kERROR, ("Failed to create ITimingCache from file " + timingCacheFile).c_str());
+        return nullptr;
+    }
+
+    config.clearFlag(BuilderFlag::kDISABLE_TIMING_CACHE);
+    if (!config.setTimingCache(*timingCache, true))
+    {
+        logger.log(ILogger::Severity::kERROR,
+            ("IBuilderConfig#setTimingCache failed with timing cache from file " + timingCacheFile).c_str());
+        return nullptr;
+    }
+    return timingCache;
+}
+
+void saveCacheFile(ILogger& logger, std::string const& outFileName, IHostMemory const* blob)
+{
+    try
+    {
+        FileLock fileLock{logger, outFileName};
+        std::ofstream oFile(outFileName, std::ios::out | std::ios::binary);
+        if (!oFile)
+        {
+            std::stringstream ss;
+            ss << "Could not write cache to file: " << outFileName;
+            logger.log(ILogger::Severity::kWARNING, ss.str().c_str());
+            return;
+        }
+        oFile.write(reinterpret_cast<char const*>(blob->data()), blob->size());
+        oFile.close();
+        std::stringstream ss;
+        ss << "Saved " << blob->size() << " bytes of cache to file: " << outFileName;
+        logger.log(ILogger::Severity::kINFO, ss.str().c_str());
+    }
+    catch (std::exception const& e)
+    {
+        std::cerr << "Exception while saving cache file " << outFileName << ": " << e.what() << std::endl;
+    }
+}
+
+void updateTimingCacheFile(nvinfer1::ILogger& logger, std::string const& fileName,
+    nvinfer1::ITimingCache const* timingCache, nvinfer1::IBuilder& builder)
+{
+    try
+    {
+        std::unique_ptr<IBuilderConfig> config{builder.createBuilderConfig()};
+        std::vector<char> timingCacheContents = loadCacheFile(logger, fileName);
+        std::unique_ptr<ITimingCache> fileTimingCache{
+            config->createTimingCache(timingCacheContents.data(), timingCacheContents.size())};
+
+        fileTimingCache->combine(*timingCache, false);
+        std::unique_ptr<IHostMemory> blob{fileTimingCache->serialize()};
+        if (!blob)
+        {
+            throw std::runtime_error("Failed to serialize combined ITimingCache!");
+        }
+
+        FileLock fileLock{logger, fileName};
+        std::ofstream oFile(fileName, std::ios::out | std::ios::binary);
+        if (!oFile)
+        {
+            std::stringstream ss;
+            ss << "Could not write timing cache to: " << fileName;
+            logger.log(ILogger::Severity::kWARNING, ss.str().c_str());
+            return;
+        }
+
+        oFile.write(reinterpret_cast<char const*>(blob->data()), blob->size());
+        oFile.close();
+        std::stringstream ss;
+        ss << "Saved " << blob->size() << " bytes of timing cache to " << fileName;
+        logger.log(ILogger::Severity::kINFO, ss.str().c_str());
+    }
+    catch (std::exception const& e)
+    {
+        std::cerr << "Exception while updating timing cache file " << fileName << ": " << e.what() << std::endl;
+    }
+}
+} // namespace nvinfer1::utils
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/utils/cacheUtils.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/utils/cacheUtils.h
new file mode 100644
index 0000000000000000000000000000000000000000..41851da43d41cb072791ed3efde837fb27721426
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/utils/cacheUtils.h
@@ -0,0 +1,60 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef TRT_SHARED_TIMINGCACHE_H_
+#define TRT_SHARED_TIMINGCACHE_H_
+
+#include "NvInfer.h"
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace nvinfer1::utils
+{
+
+//! \brief Loads the binary contents of a cache file into a char vector. Used for both timing cache and runtime cache.
+//!
+//! \note This is a blocking operation, as this method will acquire an exclusive file lock on the cache file for
+//! the duration of the read. \returns The binary data from the file, or an empty vector if an error occurred.
+std::vector<char> loadCacheFile(nvinfer1::ILogger& logger, std::string const& inFileName);
+
+//! \brief Helper method to load a timing cache from a file, build an ITimingCache with the data, and then set the new
+//! timing cache to the builder config. If the file is blank, or cannot be read, a new timing cache will be created from
+//! scratch.
+//!
+//! \returns The newly created timing cache, or nullptr if an error occurred during creation.
+std::unique_ptr<ITimingCache> buildTimingCacheFromFile(
+    ILogger& logger, IBuilderConfig& config, std::string const& timingCacheFile);
+
+//! \brief Saves the contents of a cache object to a binary file.
+//!
+//! \note This is a blocking operation, as this method will acquire an exclusive file lock on the cache file for
+//! the duration of the write.
+void saveCacheFile(nvinfer1::ILogger& logger, std::string const& outFileName, nvinfer1::IHostMemory const* blob);
+
+//! \brief Updates the contents of a timing cache binary file.
+//! This operation loads the timing cache file, combines it with the passed timingCache, and serializes the combined
+//! timing cache.
+//!
+//! \note This is a blocking operation, as this method will acquire an exclusive file lock on the timing cache file for
+//! the duration of the write.
+void updateTimingCacheFile(nvinfer1::ILogger& logger, std::string const& fileName,
+    nvinfer1::ITimingCache const* timingCache, nvinfer1::IBuilder& builder);
+
+} // namespace nvinfer1::utils
+
+#endif // TRT_SHARED_TIMINGCACHE_H_
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/utils/fileLock.cpp b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/utils/fileLock.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..177aaa5397cb5a5c61535500bb5f737a360766a4
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/utils/fileLock.cpp
@@ -0,0 +1,96 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "fileLock.h"
+#include "NvInfer.h"
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+namespace nvinfer1::utils
+{
+
+FileLock::FileLock(ILogger& logger, std::string const& fileName)
+    : mLogger(logger)
+    , mFileName(fileName)
+{
+    std::string lockFileName = mFileName + ".lock";
+#ifdef _MSC_VER
+    {
+        std::stringstream ss;
+        ss << "Trying to set exclusive file lock " << lockFileName << std::endl;
+        mLogger.log(ILogger::Severity::kVERBOSE, ss.str().c_str());
+    }
+    // MS docs said this is a blocking IO if "FILE_FLAG_OVERLAPPED" is not provided
+    mHandle = CreateFileA(lockFileName.c_str(), GENERIC_WRITE, 0, NULL, OPEN_ALWAYS, 0, NULL);
+    if (mHandle == INVALID_HANDLE_VALUE)
+    {
+        throw std::runtime_error("Failed to lock " + lockFileName + "!");
+    }
+#elif defined(__QNX__)
+    // Calling lockf(F_TLOCK) on QNX returns -1; the reported error is 89 (function not implemented).
+#else
+    mHandle = fopen(lockFileName.c_str(), "wb+");
+    if (mHandle == nullptr)
+    {
+        throw std::runtime_error("Cannot open " + lockFileName + "!");
+    }
+    {
+        std::stringstream ss;
+        ss << "Trying to set exclusive file lock " << lockFileName << std::endl;
+        mLogger.log(ILogger::Severity::kVERBOSE, ss.str().c_str());
+    }
+    mDescriptor = fileno(mHandle);
+    auto ret = lockf(mDescriptor, F_LOCK, 0);
+    if (ret != 0)
+    {
+        mDescriptor = -1;
+        fclose(mHandle);
+        throw std::runtime_error("Failed to lock " + lockFileName + "!");
+    }
+#endif
+}
+
+FileLock::~FileLock()
+{
+    std::string lockFileName = mFileName + ".lock";
+#ifdef _MSC_VER
+    if (mHandle != INVALID_HANDLE_VALUE)
+    {
+        CloseHandle(mHandle);
+    }
+#elif defined(__QNX__)
+    // Calling lockf(F_TLOCK) on QNX returns -1; the reported error is 89 (function not implemented).
+#else
+    if (mDescriptor != -1)
+    {
+        auto ret = lockf(mDescriptor, F_ULOCK, 0);
+        if (mHandle != nullptr)
+        {
+            fclose(mHandle);
+        }
+        if (ret != 0)
+        {
+            std::stringstream ss;
+            ss << "Failed to unlock " << lockFileName << ", please remove " << lockFileName << ".lock manually!"
+               << std::endl;
+            mLogger.log(ILogger::Severity::kVERBOSE, ss.str().c_str());
+        }
+    }
+#endif
+}
+
+} // namespace nvinfer1::utils
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/utils/fileLock.h b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/utils/fileLock.h
new file mode 100644
index 0000000000000000000000000000000000000000..6a59547380ffcf6e875f28bc09beb1ff30da53bd
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/samples/utils/fileLock.h
@@ -0,0 +1,83 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TRT_SHARED_FILELOCK_H_
+#define TRT_SHARED_FILELOCK_H_
+#include "NvInfer.h"
+#ifdef _MSC_VER
+// Needed so that the max/min definitions in windows.h do not conflict with std::max/min.
+#define NOMINMAX
+#include <windows.h>
+#undef NOMINMAX
+#else
+#include <stdio.h>  // fileno
+#include <unistd.h> // lockf
+#endif
+#include <string>
+
+namespace nvinfer1::utils
+{
+
+//! \brief RAII object that locks the specified file.
+//!
+//! The FileLock class uses a lock file to specify that the
+//! current file is being used by a TensorRT tool or sample
+//! so that things like the TimingCache can be updated across
+//! processes without having conflicts.
+class FileLock
+{
+public:
+    FileLock(nvinfer1::ILogger& logger, std::string const& fileName);
+    ~FileLock();
+    FileLock() = delete;                           // no default ctor
+    FileLock(FileLock const&) = delete;            // no copy ctor
+    FileLock& operator=(FileLock const&) = delete; // no copy assignment
+    FileLock(FileLock&&) = delete;                 // no move ctor
+    FileLock& operator=(FileLock&&) = delete;      // no move assignment
+
+private:
+    //!
+    //! The logger that emits any error messages that might show up.
+    //!
+    nvinfer1::ILogger& mLogger;
+
+    //!
+    //! The filename that the FileLock is protecting from multiple
+    //! TensorRT processes from writing to.
+    //!
+    std::string const mFileName;
+
+#ifdef _MSC_VER
+    //!
+    //! The file handle on windows for the file lock.
+    //!
+    HANDLE mHandle{};
+#else
+    //!
+    //! The file handle on linux for the file lock.
+    //!
+    FILE* mHandle{};
+    //!
+    //! The file descriptor on linux of the file lock.
+    //!
+    int32_t mDescriptor{-1};
+#endif
+}; // class FileLock
+
+} // namespace nvinfer1::utils
+
+#endif // TRT_SHARED_FILELOCK_H_
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/bin/trtexec b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/bin/trtexec
new file mode 100644
index 0000000000000000000000000000000000000000..071fdc44684924071c7d2b2e2d9a40377c32a6f0
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/bin/trtexec
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d87306981a600d006498bbb781a2234410aa89b2ae034b97f1dcbd28aedcd6b3
+size 1848288
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer.so.10.13.0 b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer.so.10.13.0
new file mode 100644
index 0000000000000000000000000000000000000000..dc49b4cf78de612abd26d6dfaa277d8b31957f2e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer.so.10.13.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d17cef1cecfcbe0163badca8dab178bdadd061612cc96ec43a4f122f21b5c37d
+size 673605816
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_builder_resource.so.10.13.0 b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_builder_resource.so.10.13.0
new file mode 100644
index 0000000000000000000000000000000000000000..716708c060788a4a2e8f87d9b8f332b6d865ce24
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_builder_resource.so.10.13.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57e582ff5348e63765da257d307cdc8b4dcb9dc13b907d5fc184eb509705ba92
+size 2122218488
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_builder_resource_win.so.10.13.0 b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_builder_resource_win.so.10.13.0
new file mode 100644
index 0000000000000000000000000000000000000000..e6af43b876878960704c2f20a3e856caaee77644
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_builder_resource_win.so.10.13.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c1ea8b969a5206a4d355d4602303c983a4baeb2041f8c7319eb74a6daf09db3
+size 1823325912
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_dispatch.so.10.13.0 b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_dispatch.so.10.13.0
new file mode 100644
index 0000000000000000000000000000000000000000..729429e6df30acdbb51be29173af57aa03c190b8
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_dispatch.so.10.13.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b5335466707c233b056bd2900a9438f5c2b531ba75f58a0f6e621e1f2f329d4
+size 978600
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_dispatch_static.a b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_dispatch_static.a
new file mode 100644
index 0000000000000000000000000000000000000000..eead16b29e51ad42858eb08ada7f9f74700c8559
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_dispatch_static.a
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:372a6ac873a6b27fc3da053e3eab3c418d61b3d9b7b053d4e34784c44e8821b5
+size 786164
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_lean.so.10.13.0 b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_lean.so.10.13.0
new file mode 100644
index 0000000000000000000000000000000000000000..a189012c732148e396ca4f76a0ad3a7beb9223c5
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_lean.so.10.13.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9359c40938acf62fb5ee44f932ca89146888c9baa4dc975d18696c22cea75b54
+size 104114728
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_lean_static.a b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_lean_static.a
new file mode 100644
index 0000000000000000000000000000000000000000..2126610ec0b54739f3a5bc20d926b7cd79495f0d
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_lean_static.a
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be498f25968afa2f6677f83c0eb22443f93ea4d5093d9ee040a4ff9c39c93dd7
+size 1116874458
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_plugin.so.10.13.0 b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_plugin.so.10.13.0
new file mode 100644
index 0000000000000000000000000000000000000000..c9771554b0b09c3870f61760783cc275794999cd
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_plugin.so.10.13.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afbad21d0f0c5543e580f268f01e03407e2d300ae1d137c9c42e9b9efa53812d
+size 59818792
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_plugin_static.a b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_plugin_static.a
new file mode 100644
index 0000000000000000000000000000000000000000..e57c80d6bbb8bfa4398ad64a4113d79110d88109
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_plugin_static.a
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1c7468e1f5bd959e6deacd84fc7e6d77b168fd47dc84018d5525264890692e6b
+size 66965756
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_static.a b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_static.a
new file mode 100644
index 0000000000000000000000000000000000000000..fa18437fbabaa803f4ca82f8f4d6c0923dbc1c51
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_static.a
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:063fa65861157a78c06dca291b4b0e2ffd6ff17232dea45ad62fd7b28d8ee18b
+size 5329509576
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_vc_plugin.so.10.13.0 b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_vc_plugin.so.10.13.0
new file mode 100644
index 0000000000000000000000000000000000000000..26c30ae92fb021a9fe3858bb9ac8ce33f46807dd
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_vc_plugin.so.10.13.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a38f487ebcac3e6421754af1273eb8f5e609caa58651273da10abfa692cd94f
+size 933183
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_vc_plugin_static.a b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_vc_plugin_static.a
new file mode 100644
index 0000000000000000000000000000000000000000..3b920b3e974e69be17db82ee510d3b87536fb557
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvinfer_vc_plugin_static.a
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:120988043a5372282d15b307110429d880d31c4fe2f8207794fbf6c963fe94b5
+size 360828
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvonnxparser.so.10.13.0 b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvonnxparser.so.10.13.0
new file mode 100644
index 0000000000000000000000000000000000000000..4b513a268c8a304c1641ae860824c8ceae14e574
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvonnxparser.so.10.13.0
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92fa56ecde11c24f5e6f39a8a2f731fd36118ccfd6f0fe63622e8ebb70cdaff8
+size 4607120
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvonnxparser_static.a b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvonnxparser_static.a
new file mode 100644
index 0000000000000000000000000000000000000000..0d171ddc6a59db7621b0b62ade47a0a756dc486e
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/libnvonnxparser_static.a
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea6adb0690e788919b2a27ee5eeec8b3ce74bb5cd2e6d3da43128518c63cf092
+size 14287632
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/stubs/libnvinfer.so b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/stubs/libnvinfer.so
new file mode 100644
index 0000000000000000000000000000000000000000..3421081e15df00db7a749c89b59142293ba7fae8
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/stubs/libnvinfer.so differ
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/stubs/libnvinfer_dispatch.so b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/stubs/libnvinfer_dispatch.so
new file mode 100644
index 0000000000000000000000000000000000000000..efdb4c6f75f65e18bced0ce939c8ec48e58c8e1a
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/stubs/libnvinfer_dispatch.so differ
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/stubs/libnvinfer_lean.so b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/stubs/libnvinfer_lean.so
new file mode 100644
index 0000000000000000000000000000000000000000..bdd16366d9147c7f15642aad399e45d3920b575b
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/stubs/libnvinfer_lean.so differ
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/stubs/libnvinfer_plugin.so b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/stubs/libnvinfer_plugin.so
new file mode 100644
index 0000000000000000000000000000000000000000..d5ee14e9e06dacbf94146788feb358a0a6f2d7d9
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/stubs/libnvinfer_plugin.so differ
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/stubs/libnvinfer_vc_plugin.so b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/stubs/libnvinfer_vc_plugin.so
new file mode 100644
index 0000000000000000000000000000000000000000..5079932babb9fddf17e1c514d156381f50645e1a
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/stubs/libnvinfer_vc_plugin.so differ
diff --git a/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/stubs/libnvonnxparser.so b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/stubs/libnvonnxparser.so
new file mode 100644
index 0000000000000000000000000000000000000000..0ac2707a85cf53efde0bdd47279380e869c73422
Binary files /dev/null and b/g0plus_dockerfile/docker-assets/data/TensorRT-10.13.0.35/targets/x86_64-linux-gnu/lib/stubs/libnvonnxparser.so differ
diff --git a/g0plus_dockerfile/docker-assets/super_client_configuration_file.xml.tpl b/g0plus_dockerfile/docker-assets/super_client_configuration_file.xml.tpl
new file mode 100644
index 0000000000000000000000000000000000000000..69d4ee247265c4a989d83e12db8dd1a5129411a7
--- /dev/null
+++ b/g0plus_dockerfile/docker-assets/super_client_configuration_file.xml.tpl
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+ <dds>
+     <profiles xmlns="http://www.eprosima.com/XMLSchemas/fastRTPS_Profiles">
+         <participant profile_name="super_client_profile" is_default_profile="true">
+             <rtps>
+                 <builtin>
+                     <discovery_config>
+                         <discoveryProtocol>SUPER_CLIENT</discoveryProtocol>
+                         <discoveryServersList>
+                             <RemoteServer prefix="44.53.00.5f.45.50.52.4f.53.49.4d.41">
+                                 <metatrafficUnicastLocatorList>
+                                     <locator>
+                                         <udpv4>
+                                             <address>10.42.0.RLROBOT_PORT</address>
+                                             <port>11811</port>
+                                         </udpv4>
+                                     </locator>
+                                 </metatrafficUnicastLocatorList>
+                             </RemoteServer>
+                         </discoveryServersList>
+                     </discovery_config>
+                 </builtin>
+             </rtps>
+         </participant>
+     </profiles>
+ </dds>
+
diff --git a/g0plus_dockerfile/github_token b/g0plus_dockerfile/github_token
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391