添加语音识别和朗读功能
This commit is contained in:
@@ -32,11 +32,40 @@ import StopRounded from "@mui/icons-material/StopRounded";
|
|||||||
import AutoAwesome from "@mui/icons-material/AutoAwesome"; // Sparkle icon for AI
|
import AutoAwesome from "@mui/icons-material/AutoAwesome"; // Sparkle icon for AI
|
||||||
import ErrorOutlineRounded from "@mui/icons-material/ErrorOutlineRounded";
|
import ErrorOutlineRounded from "@mui/icons-material/ErrorOutlineRounded";
|
||||||
import AddCommentRounded from "@mui/icons-material/AddCommentRounded";
|
import AddCommentRounded from "@mui/icons-material/AddCommentRounded";
|
||||||
|
import VolumeUpRounded from "@mui/icons-material/VolumeUpRounded";
|
||||||
|
import PauseRounded from "@mui/icons-material/PauseRounded";
|
||||||
|
import PlayArrowRounded from "@mui/icons-material/PlayArrowRounded";
|
||||||
|
import MicRounded from "@mui/icons-material/MicRounded";
|
||||||
|
|
||||||
// Logic
|
// Logic
|
||||||
import { streamCopilotChat } from "@/lib/chatStream";
|
import { streamCopilotChat } from "@/lib/chatStream";
|
||||||
import { parseAssistantMessageSections } from "./chatMessageSections";
|
import { parseAssistantMessageSections } from "./chatMessageSections";
|
||||||
|
|
||||||
|
// WebKit Speech Recognition compatibility
|
||||||
|
interface SpeechRecognitionEvent extends Event {
|
||||||
|
readonly resultIndex: number;
|
||||||
|
readonly results: SpeechRecognitionResultList;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SpeechRecognition extends EventTarget {
|
||||||
|
lang: string;
|
||||||
|
continuous: boolean;
|
||||||
|
interimResults: boolean;
|
||||||
|
onresult: ((event: SpeechRecognitionEvent) => void) | null;
|
||||||
|
onerror: ((event: Event) => void) | null;
|
||||||
|
onend: (() => void) | null;
|
||||||
|
start(): void;
|
||||||
|
stop(): void;
|
||||||
|
abort(): void;
|
||||||
|
}
|
||||||
|
|
||||||
|
declare global {
|
||||||
|
interface Window {
|
||||||
|
SpeechRecognition?: { new (): SpeechRecognition; prototype: SpeechRecognition };
|
||||||
|
webkitSpeechRecognition?: { new (): SpeechRecognition; prototype: SpeechRecognition };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Types
|
// Types
|
||||||
type Message = {
|
type Message = {
|
||||||
id: string;
|
id: string;
|
||||||
@@ -59,6 +88,26 @@ const normalizeThoughtTagToken = (token: string): string =>
|
|||||||
closingSlash ? "</think>" : "<think>",
|
closingSlash ? "</think>" : "<think>",
|
||||||
);
|
);
|
||||||
|
|
||||||
|
type SpeechState = "idle" | "playing" | "paused";
|
||||||
|
|
||||||
|
const stripMarkdown = (md: string): string =>
|
||||||
|
md
|
||||||
|
.replace(/```[\s\S]*?```/g, "")
|
||||||
|
.replace(/`([^`]+)`/g, "$1")
|
||||||
|
.replace(/!\[.*?\]\(.*?\)/g, "")
|
||||||
|
.replace(/\[([^\]]+)\]\(.*?\)/g, "$1")
|
||||||
|
.replace(/#{1,6}\s+/g, "")
|
||||||
|
.replace(/\*\*\*(.+?)\*\*\*/g, "$1")
|
||||||
|
.replace(/\*\*(.+?)\*\*/g, "$1")
|
||||||
|
.replace(/\*(.+?)\*/g, "$1")
|
||||||
|
.replace(/~~(.+?)~~/g, "$1")
|
||||||
|
.replace(/>\s+/g, "")
|
||||||
|
.replace(/[-*+]\s+/g, "")
|
||||||
|
.replace(/\d+\.\s+/g, "")
|
||||||
|
.replace(/\n{2,}/g, "\n")
|
||||||
|
.replace(/<[^>]+>/g, "")
|
||||||
|
.trim();
|
||||||
|
|
||||||
type PersistedChatState = {
|
type PersistedChatState = {
|
||||||
messages: Message[];
|
messages: Message[];
|
||||||
conversationId?: string;
|
conversationId?: string;
|
||||||
@@ -150,10 +199,16 @@ const Blob = ({ color, size, top, left, delay }: { color: string; size: number;
|
|||||||
type ChatMessageItemProps = {
|
type ChatMessageItemProps = {
|
||||||
message: Message;
|
message: Message;
|
||||||
theme: Theme;
|
theme: Theme;
|
||||||
|
messageSpeechState: SpeechState;
|
||||||
|
onSpeak: (messageId: string, text: string) => void;
|
||||||
|
onPause: () => void;
|
||||||
|
onResume: () => void;
|
||||||
|
onStopSpeech: () => void;
|
||||||
|
isTtsSupported: boolean;
|
||||||
};
|
};
|
||||||
|
|
||||||
const ChatMessageItem = React.memo(
|
const ChatMessageItem = React.memo(
|
||||||
({ message, theme }: ChatMessageItemProps) => {
|
({ message, theme, messageSpeechState, onSpeak, onPause, onResume, onStopSpeech, isTtsSupported }: ChatMessageItemProps) => {
|
||||||
const isUser = message.role === "user";
|
const isUser = message.role === "user";
|
||||||
const isErrorMessage = Boolean(message.isError);
|
const isErrorMessage = Boolean(message.isError);
|
||||||
const parsedAssistantSections =
|
const parsedAssistantSections =
|
||||||
@@ -187,6 +242,7 @@ const ChatMessageItem = React.memo(
|
|||||||
</Avatar>
|
</Avatar>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
<Box>
|
||||||
<Paper
|
<Paper
|
||||||
elevation={isUser ? 8 : isErrorMessage ? 1 : 2}
|
elevation={isUser ? 8 : isErrorMessage ? 1 : 2}
|
||||||
sx={{
|
sx={{
|
||||||
@@ -278,12 +334,194 @@ const ChatMessageItem = React.memo(
|
|||||||
<ReactMarkdown remarkPlugins={[remarkGfm]}>{answerContent || "..."}</ReactMarkdown>
|
<ReactMarkdown remarkPlugins={[remarkGfm]}>{answerContent || "..."}</ReactMarkdown>
|
||||||
</div>
|
</div>
|
||||||
</Paper>
|
</Paper>
|
||||||
|
{!isUser && !isErrorMessage && isTtsSupported && (
|
||||||
|
<Stack direction="row" spacing={0.5} sx={{ mt: 0.5, ml: 0.5 }}>
|
||||||
|
{messageSpeechState === "idle" && (
|
||||||
|
<IconButton
|
||||||
|
size="small"
|
||||||
|
onClick={() => onSpeak(message.id, stripMarkdown(answerContent))}
|
||||||
|
aria-label="朗读消息"
|
||||||
|
sx={{ color: "text.secondary", opacity: 0.6, "&:hover": { opacity: 1 }, p: 0.5 }}
|
||||||
|
>
|
||||||
|
<VolumeUpRounded sx={{ fontSize: 16 }} />
|
||||||
|
</IconButton>
|
||||||
|
)}
|
||||||
|
{messageSpeechState === "playing" && (
|
||||||
|
<>
|
||||||
|
<IconButton
|
||||||
|
size="small"
|
||||||
|
onClick={onPause}
|
||||||
|
aria-label="暂停朗读"
|
||||||
|
sx={{ color: "primary.main", p: 0.5 }}
|
||||||
|
>
|
||||||
|
<PauseRounded sx={{ fontSize: 16 }} />
|
||||||
|
</IconButton>
|
||||||
|
<IconButton
|
||||||
|
size="small"
|
||||||
|
onClick={onStopSpeech}
|
||||||
|
aria-label="停止朗读"
|
||||||
|
sx={{ color: "error.main", p: 0.5 }}
|
||||||
|
>
|
||||||
|
<StopRounded sx={{ fontSize: 16 }} />
|
||||||
|
</IconButton>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
{messageSpeechState === "paused" && (
|
||||||
|
<>
|
||||||
|
<IconButton
|
||||||
|
size="small"
|
||||||
|
onClick={onResume}
|
||||||
|
aria-label="继续朗读"
|
||||||
|
sx={{ color: "primary.main", p: 0.5 }}
|
||||||
|
>
|
||||||
|
<PlayArrowRounded sx={{ fontSize: 16 }} />
|
||||||
|
</IconButton>
|
||||||
|
<IconButton
|
||||||
|
size="small"
|
||||||
|
onClick={onStopSpeech}
|
||||||
|
aria-label="停止朗读"
|
||||||
|
sx={{ color: "error.main", p: 0.5 }}
|
||||||
|
>
|
||||||
|
<StopRounded sx={{ fontSize: 16 }} />
|
||||||
|
</IconButton>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</Stack>
|
||||||
|
)}
|
||||||
|
</Box>
|
||||||
</motion.div>
|
</motion.div>
|
||||||
);
|
);
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
ChatMessageItem.displayName = "ChatMessageItem";
|
ChatMessageItem.displayName = "ChatMessageItem";
|
||||||
|
|
||||||
|
// --- Voice Hooks ---
|
||||||
|
|
||||||
|
function useSpeechSynthesis() {
|
||||||
|
const [speechState, setSpeechState] = useState<SpeechState>("idle");
|
||||||
|
const [speakingMessageId, setSpeakingMessageId] = useState<string | null>(null);
|
||||||
|
const utteranceRef = useRef<SpeechSynthesisUtterance | null>(null);
|
||||||
|
|
||||||
|
const isSupported = typeof window !== "undefined" && "speechSynthesis" in window;
|
||||||
|
|
||||||
|
const stop = useCallback(() => {
|
||||||
|
if (!isSupported) return;
|
||||||
|
window.speechSynthesis.cancel();
|
||||||
|
utteranceRef.current = null;
|
||||||
|
setSpeechState("idle");
|
||||||
|
setSpeakingMessageId(null);
|
||||||
|
}, [isSupported]);
|
||||||
|
|
||||||
|
const speak = useCallback(
|
||||||
|
(messageId: string, text: string) => {
|
||||||
|
if (!isSupported || !text) return;
|
||||||
|
window.speechSynthesis.cancel();
|
||||||
|
|
||||||
|
const utterance = new SpeechSynthesisUtterance(text);
|
||||||
|
utterance.lang = "zh-CN";
|
||||||
|
utterance.rate = 1;
|
||||||
|
utterance.onend = () => {
|
||||||
|
setSpeechState("idle");
|
||||||
|
setSpeakingMessageId(null);
|
||||||
|
utteranceRef.current = null;
|
||||||
|
};
|
||||||
|
utterance.onerror = () => {
|
||||||
|
setSpeechState("idle");
|
||||||
|
setSpeakingMessageId(null);
|
||||||
|
utteranceRef.current = null;
|
||||||
|
};
|
||||||
|
utterance.onpause = () => setSpeechState("paused");
|
||||||
|
utterance.onresume = () => setSpeechState("playing");
|
||||||
|
|
||||||
|
utteranceRef.current = utterance;
|
||||||
|
setSpeakingMessageId(messageId);
|
||||||
|
setSpeechState("playing");
|
||||||
|
window.speechSynthesis.speak(utterance);
|
||||||
|
},
|
||||||
|
[isSupported],
|
||||||
|
);
|
||||||
|
|
||||||
|
const pause = useCallback(() => {
|
||||||
|
if (!isSupported) return;
|
||||||
|
window.speechSynthesis.pause();
|
||||||
|
}, [isSupported]);
|
||||||
|
|
||||||
|
const resume = useCallback(() => {
|
||||||
|
if (!isSupported) return;
|
||||||
|
window.speechSynthesis.resume();
|
||||||
|
}, [isSupported]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
return () => {
|
||||||
|
if (typeof window !== "undefined" && "speechSynthesis" in window) {
|
||||||
|
window.speechSynthesis.cancel();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
return { speechState, speakingMessageId, speak, pause, resume, stop, isSupported };
|
||||||
|
}
|
||||||
|
|
||||||
|
function useSpeechRecognition(onResult: (text: string) => void) {
|
||||||
|
const [isListening, setIsListening] = useState(false);
|
||||||
|
const recognitionRef = useRef<SpeechRecognition | null>(null);
|
||||||
|
const onResultRef = useRef(onResult);
|
||||||
|
useEffect(() => {
|
||||||
|
onResultRef.current = onResult;
|
||||||
|
}, [onResult]);
|
||||||
|
|
||||||
|
const isSupported =
|
||||||
|
typeof window !== "undefined" &&
|
||||||
|
("SpeechRecognition" in window || "webkitSpeechRecognition" in window);
|
||||||
|
|
||||||
|
const start = useCallback(() => {
|
||||||
|
if (!isSupported || recognitionRef.current) return;
|
||||||
|
const Ctor = window.SpeechRecognition ?? window.webkitSpeechRecognition;
|
||||||
|
if (!Ctor) return;
|
||||||
|
|
||||||
|
const recognition = new Ctor();
|
||||||
|
recognition.lang = "zh-CN";
|
||||||
|
recognition.continuous = true;
|
||||||
|
recognition.interimResults = false;
|
||||||
|
|
||||||
|
recognition.onresult = (event: SpeechRecognitionEvent) => {
|
||||||
|
for (let i = event.resultIndex; i < event.results.length; i++) {
|
||||||
|
if (event.results[i].isFinal) {
|
||||||
|
onResultRef.current(event.results[i][0].transcript);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
recognition.onerror = () => {
|
||||||
|
setIsListening(false);
|
||||||
|
recognitionRef.current = null;
|
||||||
|
};
|
||||||
|
|
||||||
|
recognition.onend = () => {
|
||||||
|
setIsListening(false);
|
||||||
|
recognitionRef.current = null;
|
||||||
|
};
|
||||||
|
|
||||||
|
recognitionRef.current = recognition;
|
||||||
|
recognition.start();
|
||||||
|
setIsListening(true);
|
||||||
|
}, [isSupported]);
|
||||||
|
|
||||||
|
const stop = useCallback(() => {
|
||||||
|
recognitionRef.current?.stop();
|
||||||
|
recognitionRef.current = null;
|
||||||
|
setIsListening(false);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
return () => {
|
||||||
|
recognitionRef.current?.stop();
|
||||||
|
};
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
return { isListening, start, stop, isSupported };
|
||||||
|
}
|
||||||
|
|
||||||
export const GlobalChatbox: React.FC<Props> = ({ open, onClose }) => {
|
export const GlobalChatbox: React.FC<Props> = ({ open, onClose }) => {
|
||||||
const initialChatStateRef = useRef<PersistedChatState | null>(null);
|
const initialChatStateRef = useRef<PersistedChatState | null>(null);
|
||||||
if (initialChatStateRef.current === null) {
|
if (initialChatStateRef.current === null) {
|
||||||
@@ -304,6 +542,28 @@ export const GlobalChatbox: React.FC<Props> = ({ open, onClose }) => {
|
|||||||
const inputRef = useRef<HTMLInputElement | null>(null);
|
const inputRef = useRef<HTMLInputElement | null>(null);
|
||||||
const theme = useTheme();
|
const theme = useTheme();
|
||||||
|
|
||||||
|
// --- Voice Features ---
|
||||||
|
const {
|
||||||
|
speechState,
|
||||||
|
speakingMessageId,
|
||||||
|
speak: handleSpeak,
|
||||||
|
pause: handlePauseSpeech,
|
||||||
|
resume: handleResumeSpeech,
|
||||||
|
stop: handleStopSpeech,
|
||||||
|
isSupported: isTtsSupported,
|
||||||
|
} = useSpeechSynthesis();
|
||||||
|
|
||||||
|
const handleSpeechResult = useCallback((text: string) => {
|
||||||
|
setInput((prev) => prev + text);
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const {
|
||||||
|
isListening,
|
||||||
|
start: startListening,
|
||||||
|
stop: stopListening,
|
||||||
|
isSupported: isSttSupported,
|
||||||
|
} = useSpeechRecognition(handleSpeechResult);
|
||||||
|
|
||||||
const canSend = useMemo(() => input.trim().length > 0 && !isStreaming, [input, isStreaming]);
|
const canSend = useMemo(() => input.trim().length > 0 && !isStreaming, [input, isStreaming]);
|
||||||
const isHeaderMenuOpen = Boolean(headerMenuAnchorEl);
|
const isHeaderMenuOpen = Boolean(headerMenuAnchorEl);
|
||||||
|
|
||||||
@@ -333,6 +593,7 @@ export const GlobalChatbox: React.FC<Props> = ({ open, onClose }) => {
|
|||||||
const handleSend = async () => {
|
const handleSend = async () => {
|
||||||
const prompt = input.trim();
|
const prompt = input.trim();
|
||||||
if (!prompt || isStreaming) return;
|
if (!prompt || isStreaming) return;
|
||||||
|
stopListening();
|
||||||
|
|
||||||
const userId = createId();
|
const userId = createId();
|
||||||
const assistantId = createId();
|
const assistantId = createId();
|
||||||
@@ -422,6 +683,8 @@ export const GlobalChatbox: React.FC<Props> = ({ open, onClose }) => {
|
|||||||
|
|
||||||
const handleNewConversation = useCallback(() => {
|
const handleNewConversation = useCallback(() => {
|
||||||
abortRef.current?.abort();
|
abortRef.current?.abort();
|
||||||
|
handleStopSpeech();
|
||||||
|
stopListening();
|
||||||
setMessages([]);
|
setMessages([]);
|
||||||
setConversationId(undefined);
|
setConversationId(undefined);
|
||||||
setInput("");
|
setInput("");
|
||||||
@@ -431,7 +694,7 @@ export const GlobalChatbox: React.FC<Props> = ({ open, onClose }) => {
|
|||||||
window.setTimeout(() => {
|
window.setTimeout(() => {
|
||||||
inputRef.current?.focus();
|
inputRef.current?.focus();
|
||||||
}, 0);
|
}, 0);
|
||||||
}, [handleHeaderMenuClose]);
|
}, [handleHeaderMenuClose, handleStopSpeech, stopListening]);
|
||||||
|
|
||||||
const handleMouseDown = useCallback((e: React.MouseEvent) => {
|
const handleMouseDown = useCallback((e: React.MouseEvent) => {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
@@ -469,9 +732,15 @@ export const GlobalChatbox: React.FC<Props> = ({ open, onClose }) => {
|
|||||||
key={message.id}
|
key={message.id}
|
||||||
message={message}
|
message={message}
|
||||||
theme={theme}
|
theme={theme}
|
||||||
|
messageSpeechState={speakingMessageId === message.id ? speechState : "idle"}
|
||||||
|
onSpeak={handleSpeak}
|
||||||
|
onPause={handlePauseSpeech}
|
||||||
|
onResume={handleResumeSpeech}
|
||||||
|
onStopSpeech={handleStopSpeech}
|
||||||
|
isTtsSupported={isTtsSupported}
|
||||||
/>
|
/>
|
||||||
)),
|
)),
|
||||||
[messages, theme],
|
[messages, theme, speechState, speakingMessageId, handleSpeak, handlePauseSpeech, handleResumeSpeech, handleStopSpeech, isTtsSupported],
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
@@ -756,6 +1025,45 @@ export const GlobalChatbox: React.FC<Props> = ({ open, onClose }) => {
|
|||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
{isSttSupported && (
|
||||||
|
<Box sx={{ display: "flex", alignItems: "center", mr: 1 }}>
|
||||||
|
{isListening ? (
|
||||||
|
<motion.div
|
||||||
|
animate={{ scale: [1, 1.15, 1] }}
|
||||||
|
transition={{ duration: 1.5, repeat: Infinity, ease: "easeInOut" }}
|
||||||
|
>
|
||||||
|
<IconButton
|
||||||
|
onClick={stopListening}
|
||||||
|
aria-label="停止语音输入"
|
||||||
|
sx={{
|
||||||
|
color: "error.main",
|
||||||
|
bgcolor: alpha(theme.palette.error.main, 0.1),
|
||||||
|
width: 44,
|
||||||
|
height: 44,
|
||||||
|
"&:hover": { bgcolor: alpha(theme.palette.error.main, 0.2) },
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<MicRounded />
|
||||||
|
</IconButton>
|
||||||
|
</motion.div>
|
||||||
|
) : (
|
||||||
|
<IconButton
|
||||||
|
onClick={startListening}
|
||||||
|
disabled={isStreaming}
|
||||||
|
aria-label="语音输入"
|
||||||
|
sx={{
|
||||||
|
color: "text.secondary",
|
||||||
|
width: 44,
|
||||||
|
height: 44,
|
||||||
|
"&:hover": { color: "primary.main" },
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<MicRounded />
|
||||||
|
</IconButton>
|
||||||
|
)}
|
||||||
|
</Box>
|
||||||
|
)}
|
||||||
|
|
||||||
<Box sx={{ pr: 0.5 }}>
|
<Box sx={{ pr: 0.5 }}>
|
||||||
<AnimatePresence mode="wait">
|
<AnimatePresence mode="wait">
|
||||||
{isStreaming ? (
|
{isStreaming ? (
|
||||||
|
|||||||
Reference in New Issue
Block a user