| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007 |
- <!doctype html>
- <html lang="en" class="no-js">
- <head>
-
- <meta charset="utf-8">
- <meta name="viewport" content="width=device-width,initial-scale=1">
-
- <meta name="description" content="Targeting SOTA TTS solutions.">
-
-
-
- <link rel="canonical" href="https://speech.fish.audio/inference/">
-
-
- <link rel="prev" href="../finetune/">
-
-
- <link rel="next" href="../server/">
-
-
-
- <link rel="alternate" href="./" hreflang="en">
-
- <link rel="alternate" href="../zh/inference/" hreflang="zh">
-
- <link rel="alternate" href="../ja/inference/" hreflang="ja">
-
- <link rel="alternate" href="../pt/inference/" hreflang="pt">
-
- <link rel="alternate" href="../ko/inference/" hreflang="ko">
-
- <link rel="alternate" href="../ar/inference/" hreflang="ar">
-
-
-
- <link rel="icon" href="../assets/logo.svg">
- <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.1">
-
-
-
- <title>Inference - Fish Audio</title>
-
-
-
- <link rel="stylesheet" href="../assets/stylesheets/main.484c7ddc.min.css">
-
-
- <link rel="stylesheet" href="../assets/stylesheets/palette.ab4e12ef.min.css">
-
-
-
-
-
-
-
-
-
-
- <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
- <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
- <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
-
-
-
- <link rel="stylesheet" href="../stylesheets/extra.css">
-
- <script>__md_scope=new URL("..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
-
-
-
-
- </head>
-
-
-
-
-
-
-
-
-
- <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo">
-
-
- <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
- <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
- <label class="md-overlay" for="__drawer"></label>
- <div data-md-component="skip">
-
-
- <a href="#inference" class="md-skip">
- Skip to content
- </a>
-
- </div>
- <div data-md-component="announce">
-
- </div>
-
-
-
-
- <header class="md-header md-header--shadow" data-md-component="header">
- <nav class="md-header__inner md-grid" aria-label="Header">
- <a href="https://speech.fish.audio" title="Fish Audio" class="md-header__button md-logo" aria-label="Fish Audio" data-md-component="logo">
-
- <img src="../assets/logo.svg" alt="logo">
- </a>
- <label class="md-header__button md-icon" for="__drawer">
-
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
- </label>
- <div class="md-header__title" data-md-component="header-title">
- <div class="md-header__ellipsis">
- <div class="md-header__topic">
- <span class="md-ellipsis">
- Fish Audio
- </span>
- </div>
- <div class="md-header__topic" data-md-component="header-topic">
- <span class="md-ellipsis">
-
- Inference
-
- </span>
- </div>
- </div>
- </div>
-
-
- <form class="md-header__option" data-md-component="palette">
-
-
-
-
- <input class="md-option" data-md-color-media="(prefers-color-scheme)" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_0">
-
- <label class="md-header__button md-icon" title="Switch to light mode" for="__palette_1" hidden>
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m14.3 16-.7-2h-3.2l-.7 2H7.8L11 7h2l3.2 9zM20 8.69V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12zm-9.15 3.96h2.3L12 9z"/></svg>
- </label>
-
-
-
-
-
- <input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_1">
-
- <label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_2" hidden>
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
- </label>
-
-
-
-
-
- <input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_2">
-
- <label class="md-header__button md-icon" title="Switch to light mode" for="__palette_0" hidden>
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
- </label>
-
-
- </form>
-
-
-
- <script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
-
-
- <div class="md-header__option">
- <div class="md-select">
-
- <button class="md-header__button md-icon" aria-label="Select language">
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m12.87 15.07-2.54-2.51.03-.03A17.5 17.5 0 0 0 14.07 6H17V4h-7V2H8v2H1v2h11.17C11.5 7.92 10.44 9.75 9 11.35 8.07 10.32 7.3 9.19 6.69 8h-2c.73 1.63 1.73 3.17 2.98 4.56l-5.09 5.02L4 19l5-5 3.11 3.11zM18.5 10h-2L12 22h2l1.12-3h4.75L21 22h2zm-2.62 7 1.62-4.33L19.12 17z"/></svg>
- </button>
- <div class="md-select__inner">
- <ul class="md-select__list">
-
- <li class="md-select__item">
- <a href="./" hreflang="en" class="md-select__link">
- English
- </a>
- </li>
-
- <li class="md-select__item">
- <a href="../zh/inference/" hreflang="zh" class="md-select__link">
- 简体中文
- </a>
- </li>
-
- <li class="md-select__item">
- <a href="../ja/inference/" hreflang="ja" class="md-select__link">
- 日本語
- </a>
- </li>
-
- <li class="md-select__item">
- <a href="../pt/inference/" hreflang="pt" class="md-select__link">
- Português (Brasil)
- </a>
- </li>
-
- <li class="md-select__item">
- <a href="../ko/inference/" hreflang="ko" class="md-select__link">
- 한국어
- </a>
- </li>
-
- <li class="md-select__item">
- <a href="../ar/inference/" hreflang="ar" class="md-select__link">
- العربية
- </a>
- </li>
-
- </ul>
- </div>
- </div>
- </div>
-
-
-
-
- <label class="md-header__button md-icon" for="__search">
-
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
- </label>
- <div class="md-search" data-md-component="search" role="dialog">
- <label class="md-search__overlay" for="__search"></label>
- <div class="md-search__inner" role="search">
- <form class="md-search__form" name="search">
- <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
- <label class="md-search__icon md-icon" for="__search">
-
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
-
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
- </label>
- <nav class="md-search__options" aria-label="Search">
-
- <a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
-
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
- </a>
-
- <button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
-
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
- </button>
- </nav>
-
- <div class="md-search__suggest" data-md-component="search-suggest"></div>
-
- </form>
- <div class="md-search__output">
- <div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
- <div class="md-search-result" data-md-component="search-result">
- <div class="md-search-result__meta">
- Initializing search
- </div>
- <ol class="md-search-result__list" role="presentation"></ol>
- </div>
- </div>
- </div>
- </div>
- </div>
-
-
-
- <div class="md-header__source">
- <a href="https://github.com/fishaudio/fish-speech" title="Go to repository" class="md-source" data-md-component="source">
- <div class="md-source__icon md-icon">
-
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
- </div>
- <div class="md-source__repository">
- fishaudio/fish-speech
- </div>
- </a>
- </div>
-
- </nav>
-
- </header>
-
- <div class="md-container" data-md-component="container">
-
-
-
-
-
-
- <main class="md-main" data-md-component="main">
- <div class="md-main__inner md-grid">
-
-
-
- <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
- <div class="md-sidebar__scrollwrap">
- <div class="md-sidebar__inner">
-
- <nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
- <label class="md-nav__title" for="__drawer">
- <a href="https://speech.fish.audio" title="Fish Audio" class="md-nav__button md-logo" aria-label="Fish Audio" data-md-component="logo">
-
- <img src="../assets/logo.svg" alt="logo">
- </a>
- Fish Audio
- </label>
-
- <div class="md-nav__source">
- <a href="https://github.com/fishaudio/fish-speech" title="Go to repository" class="md-source" data-md-component="source">
- <div class="md-source__icon md-icon">
-
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
- </div>
- <div class="md-source__repository">
- fishaudio/fish-speech
- </div>
- </a>
- </div>
-
- <ul class="md-nav__list" data-md-scrollfix>
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href=".." class="md-nav__link">
-
-
-
- <span class="md-ellipsis">
-
-
- Introduction
-
-
- </span>
-
-
- </a>
- </li>
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../install/" class="md-nav__link">
-
-
-
- <span class="md-ellipsis">
-
-
- Installation
-
-
- </span>
-
-
- </a>
- </li>
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../finetune/" class="md-nav__link">
-
-
-
- <span class="md-ellipsis">
-
-
- Finetune
-
-
- </span>
-
-
- </a>
- </li>
-
-
-
-
-
-
-
-
-
-
- <li class="md-nav__item md-nav__item--active">
-
- <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
-
-
-
-
-
- <label class="md-nav__link md-nav__link--active" for="__toc">
-
-
-
- <span class="md-ellipsis">
-
-
- Inference
-
-
- </span>
-
-
- <span class="md-nav__icon md-icon"></span>
- </label>
-
- <a href="./" class="md-nav__link md-nav__link--active">
-
-
-
- <span class="md-ellipsis">
-
-
- Inference
-
-
- </span>
-
-
- </a>
-
-
- <nav class="md-nav md-nav--secondary" aria-label="Table of contents">
-
-
-
-
-
-
- <label class="md-nav__title" for="__toc">
- <span class="md-nav__icon md-icon"></span>
- Table of contents
- </label>
- <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
-
- <li class="md-nav__item">
- <a href="#download-weights" class="md-nav__link">
- <span class="md-ellipsis">
-
- Download Weights
-
- </span>
- </a>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#command-line-inference" class="md-nav__link">
- <span class="md-ellipsis">
-
- Command Line Inference
-
- </span>
- </a>
-
- <nav class="md-nav" aria-label="Command Line Inference">
- <ul class="md-nav__list">
-
- <li class="md-nav__item">
- <a href="#1-get-vq-tokens-from-reference-audio" class="md-nav__link">
- <span class="md-ellipsis">
-
- 1. Get VQ tokens from reference audio
-
- </span>
- </a>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#2-generate-semantic-tokens-from-text" class="md-nav__link">
- <span class="md-ellipsis">
-
- 2. Generate Semantic tokens from text:
-
- </span>
- </a>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#3-generate-vocals-from-semantic-tokens" class="md-nav__link">
- <span class="md-ellipsis">
-
- 3. Generate vocals from semantic tokens:
-
- </span>
- </a>
-
- </li>
-
- </ul>
- </nav>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#webui-inference" class="md-nav__link">
- <span class="md-ellipsis">
-
- WebUI Inference
-
- </span>
- </a>
-
- <nav class="md-nav" aria-label="WebUI Inference">
- <ul class="md-nav__list">
-
- <li class="md-nav__item">
- <a href="#1-gradio-webui" class="md-nav__link">
- <span class="md-ellipsis">
-
- 1. Gradio WebUI
-
- </span>
- </a>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#2-awesome-webui" class="md-nav__link">
- <span class="md-ellipsis">
-
- 2. Awesome WebUI
-
- </span>
- </a>
-
- </li>
-
- </ul>
- </nav>
-
- </li>
-
- </ul>
-
- </nav>
-
- </li>
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../server/" class="md-nav__link">
-
-
-
- <span class="md-ellipsis">
-
-
- Server
-
-
- </span>
-
-
- </a>
- </li>
-
-
-
-
-
-
-
-
- <li class="md-nav__item">
- <a href="../en/samples.md" class="md-nav__link">
-
-
-
- <span class="md-ellipsis">
-
-
- Samples
-
-
- </span>
-
-
- </a>
- </li>
-
-
- </ul>
- </nav>
- </div>
- </div>
- </div>
-
-
-
- <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
- <div class="md-sidebar__scrollwrap">
- <div class="md-sidebar__inner">
-
- <nav class="md-nav md-nav--secondary" aria-label="Table of contents">
-
-
-
-
-
-
- <label class="md-nav__title" for="__toc">
- <span class="md-nav__icon md-icon"></span>
- Table of contents
- </label>
- <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
-
- <li class="md-nav__item">
- <a href="#download-weights" class="md-nav__link">
- <span class="md-ellipsis">
-
- Download Weights
-
- </span>
- </a>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#command-line-inference" class="md-nav__link">
- <span class="md-ellipsis">
-
- Command Line Inference
-
- </span>
- </a>
-
- <nav class="md-nav" aria-label="Command Line Inference">
- <ul class="md-nav__list">
-
- <li class="md-nav__item">
- <a href="#1-get-vq-tokens-from-reference-audio" class="md-nav__link">
- <span class="md-ellipsis">
-
- 1. Get VQ tokens from reference audio
-
- </span>
- </a>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#2-generate-semantic-tokens-from-text" class="md-nav__link">
- <span class="md-ellipsis">
-
- 2. Generate Semantic tokens from text:
-
- </span>
- </a>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#3-generate-vocals-from-semantic-tokens" class="md-nav__link">
- <span class="md-ellipsis">
-
- 3. Generate vocals from semantic tokens:
-
- </span>
- </a>
-
- </li>
-
- </ul>
- </nav>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#webui-inference" class="md-nav__link">
- <span class="md-ellipsis">
-
- WebUI Inference
-
- </span>
- </a>
-
- <nav class="md-nav" aria-label="WebUI Inference">
- <ul class="md-nav__list">
-
- <li class="md-nav__item">
- <a href="#1-gradio-webui" class="md-nav__link">
- <span class="md-ellipsis">
-
- 1. Gradio WebUI
-
- </span>
- </a>
-
- </li>
-
- <li class="md-nav__item">
- <a href="#2-awesome-webui" class="md-nav__link">
- <span class="md-ellipsis">
-
- 2. Awesome WebUI
-
- </span>
- </a>
-
- </li>
-
- </ul>
- </nav>
-
- </li>
-
- </ul>
-
- </nav>
- </div>
- </div>
- </div>
-
-
-
- <div class="md-content" data-md-component="content">
-
- <article class="md-content__inner md-typeset">
-
-
-
- <a href="https://github.com/fishaudio/fish-speech/blob/main/docs/en/inference.md" title="Edit this page" class="md-content__button md-icon" rel="edit">
-
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M10 20H6V4h7v5h5v3.1l2-2V8l-6-6H6c-1.1 0-2 .9-2 2v16c0 1.1.9 2 2 2h4zm10.2-7c.1 0 .3.1.4.2l1.3 1.3c.2.2.2.6 0 .8l-1 1-2.1-2.1 1-1c.1-.1.2-.2.4-.2m0 3.9L14.1 23H12v-2.1l6.1-6.1z"/></svg>
- </a>
-
-
-
-
-
- <a href="https://github.com/fishaudio/fish-speech/raw/main/docs/en/inference.md" title="View source of this page" class="md-content__button md-icon">
-
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M17 18c.56 0 1 .44 1 1s-.44 1-1 1-1-.44-1-1 .44-1 1-1m0-3c-2.73 0-5.06 1.66-6 4 .94 2.34 3.27 4 6 4s5.06-1.66 6-4c-.94-2.34-3.27-4-6-4m0 6.5a2.5 2.5 0 0 1-2.5-2.5 2.5 2.5 0 0 1 2.5-2.5 2.5 2.5 0 0 1 2.5 2.5 2.5 2.5 0 0 1-2.5 2.5M9.27 20H6V4h7v5h5v4.07c.7.08 1.36.25 2 .49V8l-6-6H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h4.5a8.2 8.2 0 0 1-1.23-2"/></svg>
- </a>
-
- <h1 id="inference">Inference</h1>
- <p>The Fish Audio S2 model requires a large amount of VRAM. We recommend using a GPU with at least 24GB for inference.</p>
- <h2 id="download-weights">Download Weights</h2>
- <p>First, you need to download the model weights:</p>
- <div class="language-bash highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a>hf<span class="w"> </span>download<span class="w"> </span>fishaudio/s2-pro<span class="w"> </span>--local-dir<span class="w"> </span>checkpoints/s2-pro
- </span></code></pre></div>
- <h2 id="command-line-inference">Command Line Inference</h2>
- <div class="admonition note">
- <p class="admonition-title">Note</p>
- <p>If you plan to let the model randomly choose a voice timbre, you can skip this step.</p>
- </div>
- <h3 id="1-get-vq-tokens-from-reference-audio">1. Get VQ tokens from reference audio</h3>
- <div class="language-bash highlight"><pre><span></span><code><span id="__span-1-1"><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a>python<span class="w"> </span>fish_speech/models/dac/inference.py<span class="w"> </span><span class="se">\</span>
- </span><span id="__span-1-2"><a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a><span class="w"> </span>-i<span class="w"> </span><span class="s2">"test.wav"</span><span class="w"> </span><span class="se">\</span>
- </span><span id="__span-1-3"><a id="__codelineno-1-3" name="__codelineno-1-3" href="#__codelineno-1-3"></a><span class="w"> </span>--checkpoint-path<span class="w"> </span><span class="s2">"checkpoints/s2-pro/codec.pth"</span>
- </span></code></pre></div>
- <p>You should get a <code>fake.npy</code> and a <code>fake.wav</code>.</p>
- <h3 id="2-generate-semantic-tokens-from-text">2. Generate Semantic tokens from text:</h3>
- <div class="language-bash highlight"><pre><span></span><code><span id="__span-2-1"><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a>python<span class="w"> </span>fish_speech/models/text2semantic/inference.py<span class="w"> </span><span class="se">\</span>
- </span><span id="__span-2-2"><a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a><span class="w"> </span>--text<span class="w"> </span><span class="s2">"The text you want to convert"</span><span class="w"> </span><span class="se">\</span>
- </span><span id="__span-2-3"><a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a><span class="w"> </span>--prompt-text<span class="w"> </span><span class="s2">"Your reference text"</span><span class="w"> </span><span class="se">\</span>
- </span><span id="__span-2-4"><a id="__codelineno-2-4" name="__codelineno-2-4" href="#__codelineno-2-4"></a><span class="w"> </span>--prompt-tokens<span class="w"> </span><span class="s2">"fake.npy"</span><span class="w"> </span><span class="se">\</span>
- </span><span id="__span-2-5"><a id="__codelineno-2-5" name="__codelineno-2-5" href="#__codelineno-2-5"></a><span class="w"> </span><span class="c1"># --compile</span>
- </span></code></pre></div>
- <p>This command will create a <code>codes_N</code> file in the working directory, where N is an integer starting from 0.</p>
- <div class="admonition note">
- <p class="admonition-title">Note</p>
- <p>You may want to use <code>--compile</code> to fuse CUDA kernels for faster inference. However, we recommend using our sglang inference acceleration optimization.
- Correspondingly, if you do not plan to use acceleration, you can comment out the <code>--compile</code> parameter.</p>
- </div>
- <div class="admonition info">
- <p class="admonition-title">Info</p>
- <p>For GPUs that do not support bf16, you may need to use the <code>--half</code> parameter.</p>
- </div>
- <h3 id="3-generate-vocals-from-semantic-tokens">3. Generate vocals from semantic tokens:</h3>
- <div class="language-bash highlight"><pre><span></span><code><span id="__span-3-1"><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a>python<span class="w"> </span>fish_speech/models/dac/inference.py<span class="w"> </span><span class="se">\</span>
- </span><span id="__span-3-2"><a id="__codelineno-3-2" name="__codelineno-3-2" href="#__codelineno-3-2"></a><span class="w"> </span>-i<span class="w"> </span><span class="s2">"codes_0.npy"</span><span class="w"> </span><span class="se">\</span>
- </span></code></pre></div>
- <p>After that, you will get a <code>fake.wav</code> file.</p>
- <h2 id="webui-inference">WebUI Inference</h2>
- <h3 id="1-gradio-webui">1. Gradio WebUI</h3>
- <p>For compatibility, we still maintain the Gradio WebUI.</p>
- <div class="language-bash highlight"><pre><span></span><code><span id="__span-4-1"><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a>python<span class="w"> </span>tools/run_webui.py<span class="w"> </span><span class="c1"># --compile if you need acceleration</span>
- </span></code></pre></div>
- <h3 id="2-awesome-webui">2. Awesome WebUI</h3>
- <p>Awesome WebUI is a modernized Web interface built with TypeScript, offering richer features and a better user experience.</p>
- <p><strong>Build WebUI:</strong></p>
- <p>You need to have Node.js and npm installed on your local machine or server.</p>
- <ol>
- <li>Enter the <code>awesome_webui</code> directory:
- <div class="language-bash highlight"><pre><span></span><code><span id="__span-5-1"><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a><span class="nb">cd</span><span class="w"> </span>awesome_webui
- </span></code></pre></div></li>
- <li>Install dependencies:
- <div class="language-bash highlight"><pre><span></span><code><span id="__span-6-1"><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a>npm<span class="w"> </span>install
- </span></code></pre></div></li>
- <li>Build the WebUI:
- <div class="language-bash highlight"><pre><span></span><code><span id="__span-7-1"><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a>npm<span class="w"> </span>run<span class="w"> </span>build
- </span></code></pre></div></li>
- </ol>
- <p><strong>Start Backend Server:</strong></p>
- <p>After building the WebUI, return to the project root and start the API server:</p>
- <div class="language-bash highlight"><pre><span></span><code><span id="__span-8-1"><a id="__codelineno-8-1" name="__codelineno-8-1" href="#__codelineno-8-1"></a>python<span class="w"> </span>tools/api_server.py<span class="w"> </span>--listen<span class="w"> </span><span class="m">0</span>.0.0.0:8888<span class="w"> </span>--compile
- </span></code></pre></div>
- <p><strong>Access:</strong></p>
- <p>Once the server is running, you can access it via your browser:
- <code>http://localhost:8888/ui</code></p>
-
- </article>
- </div>
-
-
- <script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
- </div>
-
- </main>
-
- <footer class="md-footer">
-
-
-
- <nav class="md-footer__inner md-grid" aria-label="Footer" >
-
-
- <a href="../finetune/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Finetune">
- <div class="md-footer__button md-icon">
-
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
- </div>
- <div class="md-footer__title">
- <span class="md-footer__direction">
- Previous
- </span>
- <div class="md-ellipsis">
- Finetune
- </div>
- </div>
- </a>
-
-
-
- <a href="../server/" class="md-footer__link md-footer__link--next" aria-label="Next: Server">
- <div class="md-footer__title">
- <span class="md-footer__direction">
- Next
- </span>
- <div class="md-ellipsis">
- Server
- </div>
- </div>
- <div class="md-footer__button md-icon">
-
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11z"/></svg>
- </div>
- </a>
-
- </nav>
-
-
- <div class="md-footer-meta md-typeset">
- <div class="md-footer-meta__inner md-grid">
- <div class="md-copyright">
-
- <div class="md-copyright__highlight">
- Copyright © 2023-2025 by Fish Audio
- </div>
-
-
- Made with
- <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
- Material for MkDocs
- </a>
-
- </div>
-
-
- <div class="md-social">
-
-
-
-
-
-
-
-
- <a href="https://discord.gg/Es5qTB9BcN" target="_blank" rel="noopener" title="discord.gg" class="md-social__link">
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M492.5 69.8c-.2-.3-.4-.6-.8-.7-38.1-17.5-78.4-30-119.7-37.1-.4-.1-.8 0-1.1.1s-.6.4-.8.8c-5.5 9.9-10.5 20.2-14.9 30.6-44.6-6.8-89.9-6.8-134.4 0-4.5-10.5-9.5-20.7-15.1-30.6-.2-.3-.5-.6-.8-.8s-.7-.2-1.1-.2C162.5 39 122.2 51.5 84.1 69c-.3.1-.6.4-.8.7C7.1 183.5-13.8 294.6-3.6 404.2c0 .3.1.5.2.8s.3.4.5.6c44.4 32.9 94 58 146.8 74.2.4.1.8.1 1.1 0s.7-.4.9-.7c11.3-15.4 21.4-31.8 30-48.8.1-.2.2-.5.2-.8s0-.5-.1-.8-.2-.5-.4-.6-.4-.3-.7-.4c-15.8-6.1-31.2-13.4-45.9-21.9-.3-.2-.5-.4-.7-.6s-.3-.6-.3-.9 0-.6.2-.9.3-.5.6-.7c3.1-2.3 6.2-4.7 9.1-7.1.3-.2.6-.4.9-.4s.7 0 1 .1c96.2 43.9 200.4 43.9 295.5 0 .3-.1.7-.2 1-.2s.7.2.9.4c2.9 2.4 6 4.9 9.1 7.2.2.2.4.4.6.7s.2.6.2.9-.1.6-.3.9-.4.5-.6.6c-14.7 8.6-30 15.9-45.9 21.8-.2.1-.5.2-.7.4s-.3.4-.4.7-.1.5-.1.8.1.5.2.8c8.8 17 18.8 33.3 30 48.8.2.3.6.6.9.7s.8.1 1.1 0c52.9-16.2 102.6-41.3 147.1-74.2.2-.2.4-.4.5-.6s.2-.5.2-.8c12.3-126.8-20.5-236.9-86.9-334.5zm-302 267.7c-29 0-52.8-26.6-52.8-59.2s23.4-59.2 52.8-59.2c29.7 0 53.3 26.8 52.8 59.2 0 32.7-23.4 59.2-52.8 59.2m195.4 0c-29 0-52.8-26.6-52.8-59.2s23.4-59.2 52.8-59.2c29.7 0 53.3 26.8 52.8 59.2 0 32.7-23.2 59.2-52.8 59.2"/></svg>
- </a>
-
-
-
-
-
-
-
-
- <a href="https://hub.docker.com/r/fishaudio/fish-speech" target="_blank" rel="noopener" title="hub.docker.com" class="md-social__link">
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 640 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M349.9 236.3h-66.1v-59.4h66.1zm0-204.3h-66.1v60.7h66.1zm78.2 144.8H362v59.4h66.1zm-156.3-72.1h-66.1v60.1h66.1zm78.1 0h-66.1v60.1h66.1zm276.8 100c-14.4-9.7-47.6-13.2-73.1-8.4-3.3-24-16.7-44.9-41.1-63.7l-14-9.3-9.3 14c-18.4 27.8-23.4 73.6-3.7 103.8-8.7 4.7-25.8 11.1-48.4 10.7H2.4c-8.7 50.8 5.8 116.8 44 162.1 37.1 43.9 92.7 66.2 165.4 66.2 157.4 0 273.9-72.5 328.4-204.2 21.4.4 67.6.1 91.3-45.2 1.5-2.5 6.6-13.2 8.5-17.1zm-511.1-27.9h-66v59.4h66.1v-59.4zm78.1 0h-66.1v59.4h66.1zm78.1 0h-66.1v59.4h66.1zm-78.1-72.1h-66.1v60.1h66.1z"/></svg>
- </a>
-
-
-
-
-
-
-
-
- <a href="http://qm.qq.com/cgi-bin/qm/qr?_wv=1027&k=jCKlUP7QgSm9kh95UlBoYv6s1I-Apl1M&authKey=xI5ttVAp3do68IpEYEalwXSYZFdfxZSkah%2BctF5FIMyN2NqAa003vFtLqJyAVRfF&noverify=0&group_code=593946093" target="_blank" rel="noopener" title="qm.qq.com" class="md-social__link">
- <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M434.1 420.4c-11.5 1.4-44.9-52.7-44.9-52.7 0 31.3-16.1 72.2-51 101.8 16.8 5.2 54.8 19.2 45.8 34.4-7.3 12.3-125.5 7.9-159.6 4-34.1 3.8-152.3 8.3-159.6-4-9-15.2 28.9-29.2 45.8-34.4-34.9-29.5-51.1-70.4-51.1-101.8 0 0-33.3 54.1-44.9 52.7-5.4-.6-12.4-29.6 9.3-99.7 10.3-33 22-60.5 40.1-105.8C60.9 98 109.2-.1 224.3-.1 338-.1 387.5 96 384.6 214.9c18.1 45.2 29.9 72.9 40.1 105.8 21.8 70.1 14.7 99.1 9.3 99.7z"/></svg>
- </a>
-
- </div>
-
- </div>
- </div>
- </footer>
-
- </div>
- <div class="md-dialog" data-md-component="dialog">
- <div class="md-dialog__inner md-typeset"></div>
- </div>
-
-
-
-
-
- <script id="__config" type="application/json">{"annotate": null, "base": "..", "features": ["content.action.edit", "content.action.view", "navigation.tracking", "navigation.footer", "search", "search.suggest", "search.highlight", "search.share", "content.code.copy"], "search": "../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
-
-
- <script src="../assets/javascripts/bundle.79ae519e.min.js"></script>
-
-
- </body>
- </html>
|