Spaces:

fansunqi
/

tasker-keyframe-extractor

Running on Zero

App Files Files Community

multimodalart HF Staff commited on 1 day ago

Commit

2f54371

verified ·

1 Parent(s): bda7caf

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

README.md +39 -7
app.py +574 -0
requirements.txt +5 -0

README.md CHANGED Viewed

@@ -1,13 +1,45 @@
 ---
-title: Tasker Keyframe Extractor
-emoji: 🚀
 colorFrom: blue
-colorTo: pink
 sdk: gradio
-sdk_version: 6.19.0
-python_version: '3.12'
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: TASKER Keyframe Extractor
+emoji: 🔍
 colorFrom: blue
+colorTo: indigo
 sdk: gradio
+sdk_version: 6.15.1
 app_file: app.py
+short_description: VLM-guided tree-search keyframe extraction from videos
+python_version: "3.12"
+startup_duration_timeout: 30m
 ---
+## TASKER Keyframe Extractor
+This Space demonstrates **TASKER** (**Ta**sk-driven **a**nd **S**cene-aware **Ke**yframe sea**r**cher), a keyframe extraction algorithm from the ECCV 2026 paper [Bridging VideoQA and Video-Guided Agentic Tasks via Generalized Keyframe Extraction](https://arxiv.org/abs/2606.29445).
+### How it works
+TASKER reformulates keyframe extraction as a **generalized graph-search problem**:
+1. The input video is segmented into a tree of segments.
+2. A Vision-Language Model (Qwen2.5-VL-7B) evaluates which segments likely contain crucial missing actions.
+3. The selected segments are expanded (split at visual change points).
+4. Visual deduplication filters near-identical frames.
+5. The search terminates when the VLM is confident enough (confidence ≥ 3) or a frame limit is reached.
+Four search strategies are available:
+- **A\*** (default): balances goal-relevance and visual state changes
+- **BFS**: broad exploration, can select multiple segments per step
+- **GBFS**: greedy best-first, focuses on goal-critical actions
+- **Dijkstra**: focuses on maximum visual state transitions
+### Usage
+1. Upload a video file
+2. Enter a task query (e.g., "How to send an email with an attachment?")
+3. Select a search strategy
+4. Click "Extract Keyframes"
+The model returns a gallery of keyframes with timestamps and frame indices.
+### Model
+Uses [Qwen/Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) as the VLM for segment evaluation, running on ZeroGPU.

app.py ADDED Viewed

	@@ -0,0 +1,574 @@

+import os
+# Expandable segments to avoid allocator fragmentation under memory spikes
+os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
+import spaces  # MUST be before any torch/CUDA import
+import cv2
+import re
+import json
+import torch
+import numpy as np
+from PIL import Image
+from typing import List, Optional, Tuple
+import tempfile
+import gradio as gr
+from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
+MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct"
+# ── Load model at module scope (ZeroGPU rule 2) ──────────────────────────────
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+    MODEL_ID,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="sdpa",
+).to("cuda")
+# ── VLM call helper ──────────────────────────────────────────────────────────
+def vlm_call(images: List[Image.Image], question: str, system_prompt: str = "You are a highly strict UI navigation assistant designed to output JSON.") -> str:
+    """Call the local VLM with images and a question, return text response."""
+    content = []
+    for img in images:
+        content.append({"type": "image", "image": img})
+    content.append({"type": "text", "text": question})
+    messages = [
+        {"role": "system", "content": [{"type": "text", "text": system_prompt}]},
+        {"role": "user", "content": content},
+    ]
+    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    inputs = processor(
+        text=[text],
+        images=[images] if images else None,
+        padding=True,
+        return_tensors="pt",
+    ).to("cuda")
+    with torch.no_grad():
+        output_ids = model.generate(**inputs, max_new_tokens=8192, do_sample=False, temperature=1.0)
+    # Trim the input tokens from output
+    input_len = inputs["input_ids"].shape[1]
+    output_text = processor.batch_decode(
+        output_ids[:, input_len:], skip_special_tokens=True
+    )[0]
+    return output_text
+def parse_json_response(text: str):
+    """Extract a JSON object from a text response."""
+    try:
+        match = re.search(r'\{.*\}', text, re.DOTALL)
+        if match:
+            return json.loads(match.group(0))
+    except Exception:
+        pass
+    return None
+# ── Video utilities ──────────────────────────────────────────────────────────
+def extract_frame(video_path: str, frame_idx: int) -> Optional[Image.Image]:
+    """Extract a single frame from the video as PIL Image."""
+    cap = cv2.VideoCapture(video_path)
+    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
+    ret, frame = cap.read()
+    cap.release()
+    if not ret:
+        return None
+    return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+def compute_color_histogram(img: Image.Image) -> np.ndarray:
+    """Compute a normalized 3-channel color histogram."""
+    arr = np.array(img)
+    hist = cv2.calcHist([arr], [0, 1, 2], None, [50, 50, 50], [0, 256, 0, 256, 0, 256])
+    cv2.normalize(hist, hist)
+    return hist
+def frame_similarity(hist1: np.ndarray, hist2: np.ndarray) -> float:
+    """Compare two color histograms using correlation."""
+    return float(cv2.compareHist(hist1, hist2, cv2.HISTCMP_CORREL))
+def is_frame_redundant(new_hist: np.ndarray, existing_hists: List[np.ndarray], threshold: float = 0.985) -> bool:
+    """Check if a new frame is too similar to existing ones."""
+    for h in existing_hists:
+        if frame_similarity(new_hist, h) >= threshold:
+            return True
+    return False
+# ── TASKER core: A* tree search keyframe extraction ─────────────────────────
+class VideoSeg:
+    """A video segment (tree node)."""
+    def __init__(self, start: int, end: int):
+        self.start = start
+        self.end = end
+def find_visual_change_split_point(video_path: str, seg_start: int, seg_end: int) -> int:
+    """Find the frame with the largest visual change in a segment."""
+    midpoint = (seg_start + seg_end) // 2
+    try:
+        seg_length = seg_end - seg_start
+        if seg_length <= 2:
+            return midpoint
+        cap = cv2.VideoCapture(video_path)
+        num_samples = min(seg_length, 10)
+        step = max(1, seg_length // num_samples)
+        sample_indices = list(range(seg_start, seg_end, step))
+        if sample_indices[-1] != seg_end:
+            sample_indices.append(seg_end)
+        frames = {}
+        hists = {}
+        for idx in sample_indices:
+            cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
+            ret, frame = cap.read()
+            if ret:
+                frames[idx] = frame
+                hist = cv2.calcHist([frame], [0, 1, 2], None, [50, 50, 50], [0, 256, 0, 256, 0, 256])
+                cv2.normalize(hist, hist)
+                hists[idx] = hist
+        if len(frames) < 2:
+            cap.release()
+            return midpoint
+        sorted_indices = sorted(frames.keys())
+        max_diff = -1
+        best_a, best_b = sorted_indices[0], sorted_indices[-1]
+        for i in range(len(sorted_indices) - 1):
+            idx_a, idx_b = sorted_indices[i], sorted_indices[i + 1]
+            if idx_a in hists and idx_b in hists:
+                diff = 1.0 - cv2.compareHist(hists[idx_a], hists[idx_b], cv2.HISTCMP_CORREL)
+                if diff > max_diff:
+                    max_diff = diff
+                    best_a, best_b = idx_a, idx_b
+        candidate = best_b
+        cap.release()
+        # Clamp to valid range
+        min_pos = seg_start + int(seg_length * 0.15)
+        max_pos = seg_start + int(seg_length * 0.85)
+        if candidate < min_pos or candidate > max_pos:
+            return midpoint
+        return candidate
+    except Exception:
+        return midpoint
+def a_star_select_segment(images: List[Image.Image], goal: str, segment_des: str) -> str:
+    """A* strategy: balance goal-relevance and UI state changes."""
+    prompt = f"""You are provided with sequential images sampled from a video.
+    Each image is labeled with its frame index. The images are shown in chronological order.
+    Goal: {goal}
+    Candidate segments (gaps between current frames):
+    {segment_des}
+    (A* Strategy - Balance missing goal-relevant info and visual state changes)
+    Identify ONE single candidate segment that BEST satisfies BOTH conditions simultaneously:
+    1. GOAL PROXIMITY: The segment likely contains crucial missing actions that are necessary steps toward achieving the Goal.
+    2. STATE CHANGE MAGNITUDE: The segment whose boundary frames show the MOST different visual states is more likely to contain important operations.
+    Return JSON format: {{"frame_descriptions": [{{"segment_id": "1", "description": "Best A* candidate: missing goal step + visual state change"}}]}}
+    """
+    return vlm_call(images, prompt)
+def qa_and_reflect(images: List[Image.Image], goal: str) -> Tuple[str, int]:
+    """Evaluate whether current frames are sufficient."""
+    prompt_qa = f"Task Goal: {goal}\nLook at these sequential frames. Describe the EXACT step-by-step actions that happen transitioning from one frame to the next."
+    answer = vlm_call(images, prompt_qa, system_prompt="You are a helpful video analysis assistant.")
+    prompt_eval = f"""Task Goal: {goal}
+Your sequential analysis: {answer}
+Evaluate your confidence level strictly:
+1: Severe Jumps (There are completely missing screens or sudden state changes. MUST expand.)
+2: Minor Disconnects (The flow makes sense, but some intermediate actions are missing. Should expand.)
+3: Strong Continuity (The frames capture all important actions and transitions. No key step is skipped.)
+Output JSON exactly like this: {{"confidence": 3}}
+"""
+    conf_str = vlm_call(images, prompt_eval)
+    conf_json = parse_json_response(conf_str)
+    confidence = conf_json.get("confidence", 1) if conf_json else 1
+    return answer, int(confidence)
+@spaces.GPU(duration=240)
+def extract_keyframes(video_path: str, goal: str, search_strategy: str = "a_star", max_frames: int = 10, min_frames: int = 6, min_steps: int = 3, conf_lower: int = 3, progress=gr.Progress()):
+    """
+    TASKER keyframe extraction: tree-search with VLM-guided segment selection.
+    Args:
+        video_path: Path to the input video.
+        goal: Task query describing what the user wants to see.
+        search_strategy: One of "a_star", "bfs", "gbfs", "dijkstra".
+        max_frames: Maximum number of keyframes to extract.
+        min_frames: Minimum number of frames before confidence check can stop.
+        min_steps: Minimum expansion steps before confidence check can stop.
+        conf_lower: Confidence threshold (1-3) to stop searching.
+    Returns:
+        List of (PIL Image, caption) tuples for gallery display, plus a summary string.
+    """
+    cap = cv2.VideoCapture(video_path)
+    num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    cap.release()
+    if num_frames <= 0 or fps <= 0:
+        return [], "Error: Could not read video file. Please upload a valid video."
+    # ── Initial uniform sampling ─────────────────────────────────────────────
+    init_frames = 4
+    content_start = 0
+    content_end = num_frames - 1
+    if content_end - content_start + 1 <= init_frames:
+        sample_idx = list(range(content_start, content_end + 1))
+    else:
+        interval = max(1, (content_end - content_start + 1) // (init_frames - 1))
+        sample_idx = list(range(content_start, content_end + 1, interval))
+        if sample_idx[-1] != content_end:
+            sample_idx.append(content_end)
+    progress(0.1, desc=f"Initial sampling: {len(sample_idx)} frames from {num_frames} total")
+    video_segments = [VideoSeg(sample_idx[i-1], sample_idx[i]) for i in range(1, len(sample_idx))]
+    # Histogram cache for dedup
+    hist_cache = {}
+    frozen_segments = set()
+    effective_step = 0
+    last_confidence = 0
+    max_total_attempts = max_frames + 10
+    for attempt in range(1, max_total_attempts + 1):
+        current_frames = len(sample_idx)
+        if current_frames >= max_frames:
+            break
+        # Extract current frames as images
+        images = []
+        for idx in sample_idx:
+            img = extract_frame(video_path, idx)
+            if img is not None:
+                images.append(img)
+        if not images:
+            break
+        progress(
+            0.1 + 0.6 * (attempt / max_total_attempts),
+            desc=f"Step {attempt}: {current_frames} frames, evaluating..."
+        )
+        # Confidence check
+        if current_frames >= min_frames and effective_step > min_steps:
+            _, confidence = qa_and_reflect(images, goal)
+            last_confidence = confidence
+            if confidence >= conf_lower:
+                break
+        else:
+            if current_frames < min_frames:
+                pass  # forced expansion
+        # Build segment descriptions
+        frame_to_img_idx = {frame: i + 1 for i, frame in enumerate(sample_idx)}
+        segment_des_lines = []
+        for i, seg in enumerate(video_segments):
+            seg_id = i + 1
+            if (seg.start, seg.end) in frozen_segments:
+                continue
+            start_img = frame_to_img_idx.get(seg.start, "?")
+            end_img = frame_to_img_idx.get(seg.end, "?")
+            segment_des_lines.append(
+                f"  Segment {seg_id}: frames {seg.start}-{seg.end} (Image #{start_img} -> Image #{end_img})"
+            )
+        segment_des_str = "\n".join(segment_des_lines)
+        if not segment_des_str:
+            break
+        # VLM segment selection
+        try:
+            if search_strategy == "bfs":
+                response = vlm_call(images, f"""You are provided with sequential images sampled from a video.
+Goal: {goal}
+Candidate segments:
+{segment_des_str}
+Select MULTIPLE segments that likely contain crucial missing actions.
+Return JSON: {{"frame_descriptions": [{{"segment_id": "1", "description": "..."}}]}}""")
+            elif search_strategy == "gbfs":
+                response = vlm_call(images, f"""You are provided with sequential images sampled from a video.
+Goal: {goal}
+Candidate segments:
+{segment_des_str}
+Select the SINGLE segment MOST LIKELY to contain crucial missing actions.
+Return JSON: {{"frame_descriptions": [{{"segment_id": "1", "description": "..."}}]}}""")
+            elif search_strategy == "dijkstra":
+                response = vlm_call(images, f"""You are provided with sequential images sampled from a video.
+Candidate segments:
+{segment_des_str}
+Select the SINGLE segment with the MOST significant visual state transition.
+Return JSON: {{"frame_descriptions": [{{"segment_id": "1", "description": "..."}}]}}""")
+            else:  # a_star
+                response = a_star_select_segment(images, goal, segment_des_str)
+            parsed = parse_json_response(response)
+        except Exception as e:
+            print(f"VLM call error at step {attempt}: {e}")
+            parsed = None
+        # Determine selected segment IDs
+        selected_seg_ids = set()
+        if parsed and "frame_descriptions" in parsed:
+            for desc in parsed["frame_descriptions"]:
+                for key in desc:
+                    if key.lower() == "segment_id":
+                        val = str(desc[key]).strip()
+                        nums = re.findall(r'\d+', val)
+                        if nums:
+                            seg_id = int(nums[0])
+                            if 1 <= seg_id <= len(video_segments):
+                                selected_seg_ids.add(seg_id)
+                        break
+        # Fallback: pick longest segment
+        if not selected_seg_ids:
+            longest_seg_id = None
+            longest_len = 0
+            for i, seg in enumerate(video_segments):
+                seg_len = seg.end - seg.start
+                if seg_len > longest_len and seg_len > 1 and (seg.start, seg.end) not in frozen_segments:
+                    longest_len = seg_len
+                    longest_seg_id = i + 1
+            if longest_seg_id is not None:
+                selected_seg_ids.add(longest_seg_id)
+        if not selected_seg_ids:
+            break
+        # BFS quota limit
+        if search_strategy == "bfs" and len(selected_seg_ids) > 1:
+            remaining_quota = max_frames - len(sample_idx)
+            if remaining_quota <= 0:
+                break
+            if len(selected_seg_ids) > remaining_quota:
+                sorted_seg_ids = sorted(selected_seg_ids,
+                                        key=lambda sid: video_segments[sid-1].end - video_segments[sid-1].start,
+                                        reverse=True)
+                selected_seg_ids = set(sorted_seg_ids[:remaining_quota])
+        # Split selected segments
+        split_origin = {}
+        new_segments = []
+        seg_counter = 0
+        for i, seg in enumerate(video_segments):
+            seg_id = i + 1
+            if seg_id in selected_seg_ids:
+                if seg.end - seg.start <= 1:
+                    seg_counter += 1
+                    new_segments.append(VideoSeg(seg.start, seg.end))
+                else:
+                    sp = find_visual_change_split_point(video_path, seg.start, seg.end)
+                    split_origin[sp] = (seg.start, seg.end)
+                    seg_counter += 1
+                    new_segments.append(VideoSeg(seg.start, sp))
+                    seg_counter += 1
+                    new_segments.append(VideoSeg(sp, seg.end))
+            else:
+                seg_counter += 1
+                new_segments.append(VideoSeg(seg.start, seg.end))
+        video_segments = new_segments
+        # Rebuild sample_idx
+        sample_idx_set = set()
+        for seg in video_segments:
+            sample_idx_set.add(seg.start)
+            sample_idx_set.add(seg.end)
+        new_sample_idx = sorted(list(sample_idx_set))
+        # Visual deduplication
+        new_frames = [idx for idx in new_sample_idx if idx not in set(sample_idx)]
+        old_sample_set = set(sample_idx)
+        # Compute histograms for old frames
+        old_hists = []
+        for idx in sample_idx:
+            img = extract_frame(video_path, idx)
+            if img is not None:
+                old_hists.append(compute_color_histogram(img))
+        frames_to_remove = []
+        accepted_new_hists = []
+        for new_idx in new_frames:
+            new_img = extract_frame(video_path, new_idx)
+            if new_img is None:
+                continue
+            new_hist = compute_color_histogram(new_img)
+            all_compare_hists = old_hists + accepted_new_hists
+            if is_frame_redundant(new_hist, all_compare_hists, threshold=0.985):
+                frames_to_remove.append(new_idx)
+                if new_idx in split_origin:
+                    frozen_segments.add(split_origin[new_idx])
+            else:
+                accepted_new_hists.append(new_hist)
+        if frames_to_remove:
+            new_sample_idx = [idx for idx in new_sample_idx if idx not in frames_to_remove]
+            new_sample_idx = sorted(new_sample_idx)
+            video_segments = [VideoSeg(new_sample_idx[i-1], new_sample_idx[i])
+                              for i in range(1, len(new_sample_idx))]
+        actually_added = len(new_sample_idx) > len(sample_idx)
+        sample_idx = new_sample_idx
+        if actually_added:
+            effective_step += 1
+    progress(0.85, desc="Finalizing keyframes...")
+    # Force-fill if too few frames
+    if len(sample_idx) < min_frames and last_confidence < conf_lower:
+        max_force = min_frames + 5
+        for _ in range(max_force):
+            if len(sample_idx) >= min_frames:
+                break
+            max_gap = 0
+            max_gap_idx = 0
+            for i in range(len(sample_idx) - 1):
+                if (sample_idx[i], sample_idx[i+1]) in frozen_segments:
+                    continue
+                gap = sample_idx[i+1] - sample_idx[i]
+                if gap > max_gap:
+                    max_gap = gap
+                    max_gap_idx = i
+            if max_gap <= 1:
+                break
+            sp = find_visual_change_split_point(video_path, sample_idx[max_gap_idx], sample_idx[max_gap_idx + 1])
+            sp_img = extract_frame(video_path, sp)
+            if sp_img is None:
+                break
+            sp_hist = compute_color_histogram(sp_img)
+            existing_hists = []
+            for idx in sample_idx:
+                img = extract_frame(video_path, idx)
+                if img is not None:
+                    existing_hists.append(compute_color_histogram(img))
+            if is_frame_redundant(sp_hist, existing_hists, threshold=0.985):
+                frozen_segments.add((sample_idx[max_gap_idx], sample_idx[max_gap_idx + 1]))
+                continue
+            sample_idx.insert(max_gap_idx + 1, sp)
+    # Extract final keyframes
+    progress(0.95, desc="Extracting final keyframes...")
+    gallery = []
+    for i, idx in enumerate(sample_idx):
+        img = extract_frame(video_path, idx)
+        if img is not None:
+            timestamp = idx / fps if fps > 0 else 0
+            mins = int(timestamp // 60)
+            secs = int(timestamp % 60)
+            percent = (idx / max(1, num_frames - 1)) * 100
+            caption = f"Frame {i+1}/{len(sample_idx)} | idx={idx} | {mins:02d}:{secs:02d} | {percent:.1f}%"
+            gallery.append((img, caption))
+    summary = (
+        f"**TASKER {search_strategy.upper()}** extracted **{len(gallery)}** keyframes "
+        f"from {num_frames} total frames ({num_frames/fps:.1f}s video).\n\n"
+        f"Search stats: {effective_step} effective expansion steps, "
+        f"confidence={last_confidence}/3, "
+        f"target range {min_frames}-{max_frames} frames."
+    )
+    progress(1.0, desc="Done!")
+    return gallery, summary
+# ── Gradio UI ───────────────────────────────────────────────────────────────
+CUSTOM_CSS = """
+#header { text-align: center; margin-bottom: 20px; }
+#header h1 { font-size: 2em; margin-bottom: 5px; }
+#header p { color: #666; font-size: 1.1em; }
+"""
+with gr.Blocks(css=CUSTOM_CSS, title="TASKER Keyframe Extractor") as demo:
+    gr.HTML("""
+    <div id="header">
+        <h1>TASKER: Task-driven and Scene-aware Keyframe Search</h1>
+        <p>Extract task-relevant keyframes from a video using VLM-guided tree search (A* / BFS / GBFS / Dijkstra)</p>
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            video_input = gr.Video(label="Upload Video", sources=["upload"])
+            goal_input = gr.Textbox(
+                label="Task Query / Goal",
+                placeholder="e.g., How to send an email with an attachment?",
+                lines=2,
+            )
+            strategy_input = gr.Dropdown(
+                choices=["a_star", "bfs", "gbfs", "dijkstra"],
+                value="a_star",
+                label="Search Strategy",
+                info="A* balances goal-relevance and visual changes. BFS explores broadly. GBFS focuses on goal. Dijkstra focuses on visual changes.",
+            )
+            with gr.Accordion("Advanced Settings", open=False):
+                max_frames_slider = gr.Slider(4, 16, value=10, step=1, label="Max Keyframes")
+                min_frames_slider = gr.Slider(2, 8, value=6, step=1, label="Min Keyframes (before confidence check)")
+                min_steps_slider = gr.Slider(1, 8, value=3, step=1, label="Min Search Steps")
+                conf_slider = gr.Slider(1, 3, value=3, step=1, label="Confidence Threshold (3=strictest)")
+            extract_btn = gr.Button("Extract Keyframes", variant="primary")
+        with gr.Column(scale=2):
+            summary_output = gr.Markdown(label="Summary")
+            gallery_output = gr.Gallery(
+                label="Extracted Keyframes",
+                columns=3,
+                height=600,
+                object_fit="contain",
+            )
+    gr.Examples(
+        examples=[
+            ["https://huggingface.co/datasets/hugging-apps/tasker-demo-videos/resolve/main/cooking_demo.mp4",
+             "Show the steps to cook pasta"],
+        ],
+        inputs=[video_input, goal_input],
+    )
+    extract_btn.click(
+        fn=extract_keyframes,
+        inputs=[
+            video_input,
+            goal_input,
+            strategy_input,
+            max_frames_slider,
+            min_frames_slider,
+            min_steps_slider,
+            conf_slider,
+        ],
+        outputs=[gallery_output, summary_output],
+    )
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torchvision
+transformers
+opencv-python-headless
+pillow
+numpy