|
|
import { useState, useRef, useEffect, useCallback } from "react"; |
|
|
import WebcamCapture from "./WebcamCapture"; |
|
|
import PromptInput from "./PromptInput"; |
|
|
import LiveCaption, { type HistoryEntry } from "./LiveCaption"; |
|
|
import { useVLMContext } from "../context/useVLMContext"; |
|
|
import { PROMPTS, TIMING } from "../constants"; |
|
|
|
|
|
interface CaptioningViewProps { |
|
|
videoRef: React.RefObject<HTMLVideoElement | null>; |
|
|
} |
|
|
|
|
|
function useCaptioningLoop( |
|
|
videoRef: React.RefObject<HTMLVideoElement | null>, |
|
|
isRunning: boolean, |
|
|
promptRef: React.RefObject<string>, |
|
|
onCaptionUpdate: (caption: string) => void, |
|
|
onError: (error: string) => void, |
|
|
onGenerationComplete: (caption: string) => void, |
|
|
onStatsUpdate: (stats: { tps?: number; ttft?: number }) => void, |
|
|
) { |
|
|
const { isLoaded, runInference } = useVLMContext(); |
|
|
const abortControllerRef = useRef<AbortController | null>(null); |
|
|
const onCaptionUpdateRef = useRef(onCaptionUpdate); |
|
|
const onErrorRef = useRef(onError); |
|
|
const onGenerationCompleteRef = useRef(onGenerationComplete); |
|
|
const onStatsUpdateRef = useRef(onStatsUpdate); |
|
|
|
|
|
useEffect(() => { |
|
|
onCaptionUpdateRef.current = onCaptionUpdate; |
|
|
}, [onCaptionUpdate]); |
|
|
|
|
|
useEffect(() => { |
|
|
onErrorRef.current = onError; |
|
|
}, [onError]); |
|
|
|
|
|
useEffect(() => { |
|
|
onGenerationCompleteRef.current = onGenerationComplete; |
|
|
}, [onGenerationComplete]); |
|
|
|
|
|
useEffect(() => { |
|
|
onStatsUpdateRef.current = onStatsUpdate; |
|
|
}, [onStatsUpdate]); |
|
|
|
|
|
useEffect(() => { |
|
|
abortControllerRef.current?.abort(); |
|
|
if (!isRunning || !isLoaded) return; |
|
|
|
|
|
abortControllerRef.current = new AbortController(); |
|
|
const signal = abortControllerRef.current.signal; |
|
|
const video = videoRef.current; |
|
|
const captureLoop = async () => { |
|
|
while (!signal.aborted) { |
|
|
if ( |
|
|
video && |
|
|
video.readyState >= 2 && |
|
|
!video.paused && |
|
|
video.videoWidth > 0 |
|
|
) { |
|
|
try { |
|
|
const currentPrompt = promptRef.current || ""; |
|
|
const result = await runInference( |
|
|
video, |
|
|
currentPrompt, |
|
|
onCaptionUpdateRef.current, |
|
|
(stats) => onStatsUpdateRef.current(stats), |
|
|
); |
|
|
if (result && !signal.aborted) { |
|
|
onCaptionUpdateRef.current(result); |
|
|
onGenerationCompleteRef.current(result); |
|
|
} |
|
|
} catch (error) { |
|
|
if (!signal.aborted) { |
|
|
const message = |
|
|
error instanceof Error ? error.message : String(error); |
|
|
onErrorRef.current(message); |
|
|
console.error("Error processing frame:", error); |
|
|
} |
|
|
} |
|
|
} |
|
|
if (signal.aborted) break; |
|
|
await new Promise((resolve) => |
|
|
setTimeout(resolve, TIMING.FRAME_CAPTURE_DELAY), |
|
|
); |
|
|
} |
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
setTimeout(captureLoop, 0); |
|
|
|
|
|
return () => { |
|
|
abortControllerRef.current?.abort(); |
|
|
}; |
|
|
}, [isRunning, isLoaded, runInference, promptRef, videoRef]); |
|
|
} |
|
|
|
|
|
export default function CaptioningView({ videoRef }: CaptioningViewProps) { |
|
|
const { imageSize, setImageSize } = useVLMContext(); |
|
|
const [caption, setCaption] = useState<string>(""); |
|
|
const [isLoopRunning, setIsLoopRunning] = useState<boolean>(true); |
|
|
const [currentPrompt, setCurrentPrompt] = useState<string>(PROMPTS.default); |
|
|
const [error, setError] = useState<string | null>(null); |
|
|
const [history, setHistory] = useState<HistoryEntry[]>([]); |
|
|
const [stats, setStats] = useState<{ tps?: number; ttft?: number }>({}); |
|
|
|
|
|
|
|
|
const promptRef = useRef<string>(currentPrompt); |
|
|
|
|
|
|
|
|
useEffect(() => { |
|
|
promptRef.current = currentPrompt; |
|
|
}, [currentPrompt]); |
|
|
|
|
|
const handleCaptionUpdate = useCallback((newCaption: string) => { |
|
|
setCaption(newCaption); |
|
|
setError(null); |
|
|
}, []); |
|
|
|
|
|
const handleError = useCallback((errorMessage: string) => { |
|
|
setError(errorMessage); |
|
|
setCaption(`Error: ${errorMessage}`); |
|
|
}, []); |
|
|
|
|
|
const handleGenerationComplete = useCallback((text: string) => { |
|
|
const now = new Date(); |
|
|
const timeString = now.toLocaleTimeString("en-US", { |
|
|
hour12: false, |
|
|
hour: "2-digit", |
|
|
minute: "2-digit", |
|
|
second: "2-digit", |
|
|
}); |
|
|
|
|
|
setHistory((prev) => |
|
|
[ |
|
|
{ |
|
|
timestamp: timeString, |
|
|
text: text, |
|
|
}, |
|
|
...prev, |
|
|
].slice(0, 50), |
|
|
); |
|
|
}, []); |
|
|
|
|
|
const handleStatsUpdate = useCallback( |
|
|
(newStats: { tps?: number; ttft?: number }) => { |
|
|
setStats((prev) => ({ ...prev, ...newStats })); |
|
|
}, |
|
|
[], |
|
|
); |
|
|
|
|
|
useCaptioningLoop( |
|
|
videoRef, |
|
|
isLoopRunning, |
|
|
promptRef, |
|
|
handleCaptionUpdate, |
|
|
handleError, |
|
|
handleGenerationComplete, |
|
|
handleStatsUpdate, |
|
|
); |
|
|
|
|
|
const handlePromptChange = useCallback((prompt: string) => { |
|
|
setCurrentPrompt(prompt); |
|
|
setError(null); |
|
|
}, []); |
|
|
|
|
|
const handleToggleLoop = useCallback(() => { |
|
|
setIsLoopRunning((prev) => !prev); |
|
|
if (error) setError(null); |
|
|
}, [error]); |
|
|
|
|
|
return ( |
|
|
<div className="absolute inset-0 text-white"> |
|
|
<div className="relative w-full h-full"> |
|
|
<WebcamCapture |
|
|
isRunning={isLoopRunning} |
|
|
onToggleRunning={handleToggleLoop} |
|
|
error={error} |
|
|
imageSize={imageSize} |
|
|
onImageSizeChange={setImageSize} |
|
|
/> |
|
|
{/* Prompt Input - Bottom Left */} |
|
|
<div className="absolute bottom-5 left-5 z-30 w-[540px]"> |
|
|
<PromptInput onPromptChange={handlePromptChange} /> |
|
|
</div> |
|
|
{/* Live Caption - Bottom Right */} |
|
|
<div className="absolute bottom-5 right-5 z-30 w-[720px]"> |
|
|
<LiveCaption |
|
|
caption={caption} |
|
|
isRunning={isLoopRunning} |
|
|
error={error} |
|
|
history={history} |
|
|
stats={stats} |
|
|
/> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
); |
|
|
} |