index.html 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007
  1. <!doctype html>
  2. <html lang="en" class="no-js">
  3. <head>
  4. <meta charset="utf-8">
  5. <meta name="viewport" content="width=device-width,initial-scale=1">
  6. <meta name="description" content="Targeting SOTA TTS solutions.">
  7. <link rel="canonical" href="https://speech.fish.audio/inference/">
  8. <link rel="prev" href="../finetune/">
  9. <link rel="next" href="../server/">
  10. <link rel="alternate" href="./" hreflang="en">
  11. <link rel="alternate" href="../zh/inference/" hreflang="zh">
  12. <link rel="alternate" href="../ja/inference/" hreflang="ja">
  13. <link rel="alternate" href="../pt/inference/" hreflang="pt">
  14. <link rel="alternate" href="../ko/inference/" hreflang="ko">
  15. <link rel="alternate" href="../ar/inference/" hreflang="ar">
  16. <link rel="icon" href="../assets/logo.svg">
  17. <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.1">
  18. <title>Inference - Fish Audio</title>
  19. <link rel="stylesheet" href="../assets/stylesheets/main.484c7ddc.min.css">
  20. <link rel="stylesheet" href="../assets/stylesheets/palette.ab4e12ef.min.css">
  21. <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
  22. <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
  23. <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
  24. <link rel="stylesheet" href="../stylesheets/extra.css">
  25. <script>__md_scope=new URL("..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
  26. </head>
  27. <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo">
  28. <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
  29. <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
  30. <label class="md-overlay" for="__drawer"></label>
  31. <div data-md-component="skip">
  32. <a href="#inference" class="md-skip">
  33. Skip to content
  34. </a>
  35. </div>
  36. <div data-md-component="announce">
  37. </div>
  38. <header class="md-header md-header--shadow" data-md-component="header">
  39. <nav class="md-header__inner md-grid" aria-label="Header">
  40. <a href="https://speech.fish.audio" title="Fish Audio" class="md-header__button md-logo" aria-label="Fish Audio" data-md-component="logo">
  41. <img src="../assets/logo.svg" alt="logo">
  42. </a>
  43. <label class="md-header__button md-icon" for="__drawer">
  44. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
  45. </label>
  46. <div class="md-header__title" data-md-component="header-title">
  47. <div class="md-header__ellipsis">
  48. <div class="md-header__topic">
  49. <span class="md-ellipsis">
  50. Fish Audio
  51. </span>
  52. </div>
  53. <div class="md-header__topic" data-md-component="header-topic">
  54. <span class="md-ellipsis">
  55. Inference
  56. </span>
  57. </div>
  58. </div>
  59. </div>
  60. <form class="md-header__option" data-md-component="palette">
  61. <input class="md-option" data-md-color-media="(prefers-color-scheme)" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_0">
  62. <label class="md-header__button md-icon" title="Switch to light mode" for="__palette_1" hidden>
  63. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m14.3 16-.7-2h-3.2l-.7 2H7.8L11 7h2l3.2 9zM20 8.69V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12zm-9.15 3.96h2.3L12 9z"/></svg>
  64. </label>
  65. <input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_1">
  66. <label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_2" hidden>
  67. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
  68. </label>
  69. <input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_2">
  70. <label class="md-header__button md-icon" title="Switch to light mode" for="__palette_0" hidden>
  71. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
  72. </label>
  73. </form>
  74. <script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
  75. <div class="md-header__option">
  76. <div class="md-select">
  77. <button class="md-header__button md-icon" aria-label="Select language">
  78. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m12.87 15.07-2.54-2.51.03-.03A17.5 17.5 0 0 0 14.07 6H17V4h-7V2H8v2H1v2h11.17C11.5 7.92 10.44 9.75 9 11.35 8.07 10.32 7.3 9.19 6.69 8h-2c.73 1.63 1.73 3.17 2.98 4.56l-5.09 5.02L4 19l5-5 3.11 3.11zM18.5 10h-2L12 22h2l1.12-3h4.75L21 22h2zm-2.62 7 1.62-4.33L19.12 17z"/></svg>
  79. </button>
  80. <div class="md-select__inner">
  81. <ul class="md-select__list">
  82. <li class="md-select__item">
  83. <a href="./" hreflang="en" class="md-select__link">
  84. English
  85. </a>
  86. </li>
  87. <li class="md-select__item">
  88. <a href="../zh/inference/" hreflang="zh" class="md-select__link">
  89. 简体中文
  90. </a>
  91. </li>
  92. <li class="md-select__item">
  93. <a href="../ja/inference/" hreflang="ja" class="md-select__link">
  94. 日本語
  95. </a>
  96. </li>
  97. <li class="md-select__item">
  98. <a href="../pt/inference/" hreflang="pt" class="md-select__link">
  99. Português (Brasil)
  100. </a>
  101. </li>
  102. <li class="md-select__item">
  103. <a href="../ko/inference/" hreflang="ko" class="md-select__link">
  104. 한국어
  105. </a>
  106. </li>
  107. <li class="md-select__item">
  108. <a href="../ar/inference/" hreflang="ar" class="md-select__link">
  109. العربية
  110. </a>
  111. </li>
  112. </ul>
  113. </div>
  114. </div>
  115. </div>
  116. <label class="md-header__button md-icon" for="__search">
  117. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
  118. </label>
  119. <div class="md-search" data-md-component="search" role="dialog">
  120. <label class="md-search__overlay" for="__search"></label>
  121. <div class="md-search__inner" role="search">
  122. <form class="md-search__form" name="search">
  123. <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
  124. <label class="md-search__icon md-icon" for="__search">
  125. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
  126. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
  127. </label>
  128. <nav class="md-search__options" aria-label="Search">
  129. <a href="javascript:void(0)" class="md-search__icon md-icon" title="Share" aria-label="Share" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
  130. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
  131. </a>
  132. <button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
  133. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
  134. </button>
  135. </nav>
  136. <div class="md-search__suggest" data-md-component="search-suggest"></div>
  137. </form>
  138. <div class="md-search__output">
  139. <div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
  140. <div class="md-search-result" data-md-component="search-result">
  141. <div class="md-search-result__meta">
  142. Initializing search
  143. </div>
  144. <ol class="md-search-result__list" role="presentation"></ol>
  145. </div>
  146. </div>
  147. </div>
  148. </div>
  149. </div>
  150. <div class="md-header__source">
  151. <a href="https://github.com/fishaudio/fish-speech" title="Go to repository" class="md-source" data-md-component="source">
  152. <div class="md-source__icon md-icon">
  153. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
  154. </div>
  155. <div class="md-source__repository">
  156. fishaudio/fish-speech
  157. </div>
  158. </a>
  159. </div>
  160. </nav>
  161. </header>
  162. <div class="md-container" data-md-component="container">
  163. <main class="md-main" data-md-component="main">
  164. <div class="md-main__inner md-grid">
  165. <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
  166. <div class="md-sidebar__scrollwrap">
  167. <div class="md-sidebar__inner">
  168. <nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
  169. <label class="md-nav__title" for="__drawer">
  170. <a href="https://speech.fish.audio" title="Fish Audio" class="md-nav__button md-logo" aria-label="Fish Audio" data-md-component="logo">
  171. <img src="../assets/logo.svg" alt="logo">
  172. </a>
  173. Fish Audio
  174. </label>
  175. <div class="md-nav__source">
  176. <a href="https://github.com/fishaudio/fish-speech" title="Go to repository" class="md-source" data-md-component="source">
  177. <div class="md-source__icon md-icon">
  178. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
  179. </div>
  180. <div class="md-source__repository">
  181. fishaudio/fish-speech
  182. </div>
  183. </a>
  184. </div>
  185. <ul class="md-nav__list" data-md-scrollfix>
  186. <li class="md-nav__item">
  187. <a href=".." class="md-nav__link">
  188. <span class="md-ellipsis">
  189. Introduction
  190. </span>
  191. </a>
  192. </li>
  193. <li class="md-nav__item">
  194. <a href="../install/" class="md-nav__link">
  195. <span class="md-ellipsis">
  196. Installation
  197. </span>
  198. </a>
  199. </li>
  200. <li class="md-nav__item">
  201. <a href="../finetune/" class="md-nav__link">
  202. <span class="md-ellipsis">
  203. Finetune
  204. </span>
  205. </a>
  206. </li>
  207. <li class="md-nav__item md-nav__item--active">
  208. <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
  209. <label class="md-nav__link md-nav__link--active" for="__toc">
  210. <span class="md-ellipsis">
  211. Inference
  212. </span>
  213. <span class="md-nav__icon md-icon"></span>
  214. </label>
  215. <a href="./" class="md-nav__link md-nav__link--active">
  216. <span class="md-ellipsis">
  217. Inference
  218. </span>
  219. </a>
  220. <nav class="md-nav md-nav--secondary" aria-label="Table of contents">
  221. <label class="md-nav__title" for="__toc">
  222. <span class="md-nav__icon md-icon"></span>
  223. Table of contents
  224. </label>
  225. <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
  226. <li class="md-nav__item">
  227. <a href="#download-weights" class="md-nav__link">
  228. <span class="md-ellipsis">
  229. Download Weights
  230. </span>
  231. </a>
  232. </li>
  233. <li class="md-nav__item">
  234. <a href="#command-line-inference" class="md-nav__link">
  235. <span class="md-ellipsis">
  236. Command Line Inference
  237. </span>
  238. </a>
  239. <nav class="md-nav" aria-label="Command Line Inference">
  240. <ul class="md-nav__list">
  241. <li class="md-nav__item">
  242. <a href="#1-get-vq-tokens-from-reference-audio" class="md-nav__link">
  243. <span class="md-ellipsis">
  244. 1. Get VQ tokens from reference audio
  245. </span>
  246. </a>
  247. </li>
  248. <li class="md-nav__item">
  249. <a href="#2-generate-semantic-tokens-from-text" class="md-nav__link">
  250. <span class="md-ellipsis">
  251. 2. Generate Semantic tokens from text:
  252. </span>
  253. </a>
  254. </li>
  255. <li class="md-nav__item">
  256. <a href="#3-generate-vocals-from-semantic-tokens" class="md-nav__link">
  257. <span class="md-ellipsis">
  258. 3. Generate vocals from semantic tokens:
  259. </span>
  260. </a>
  261. </li>
  262. </ul>
  263. </nav>
  264. </li>
  265. <li class="md-nav__item">
  266. <a href="#webui-inference" class="md-nav__link">
  267. <span class="md-ellipsis">
  268. WebUI Inference
  269. </span>
  270. </a>
  271. <nav class="md-nav" aria-label="WebUI Inference">
  272. <ul class="md-nav__list">
  273. <li class="md-nav__item">
  274. <a href="#1-gradio-webui" class="md-nav__link">
  275. <span class="md-ellipsis">
  276. 1. Gradio WebUI
  277. </span>
  278. </a>
  279. </li>
  280. <li class="md-nav__item">
  281. <a href="#2-awesome-webui" class="md-nav__link">
  282. <span class="md-ellipsis">
  283. 2. Awesome WebUI
  284. </span>
  285. </a>
  286. </li>
  287. </ul>
  288. </nav>
  289. </li>
  290. </ul>
  291. </nav>
  292. </li>
  293. <li class="md-nav__item">
  294. <a href="../server/" class="md-nav__link">
  295. <span class="md-ellipsis">
  296. Server
  297. </span>
  298. </a>
  299. </li>
  300. <li class="md-nav__item">
  301. <a href="../en/samples.md" class="md-nav__link">
  302. <span class="md-ellipsis">
  303. Samples
  304. </span>
  305. </a>
  306. </li>
  307. </ul>
  308. </nav>
  309. </div>
  310. </div>
  311. </div>
  312. <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
  313. <div class="md-sidebar__scrollwrap">
  314. <div class="md-sidebar__inner">
  315. <nav class="md-nav md-nav--secondary" aria-label="Table of contents">
  316. <label class="md-nav__title" for="__toc">
  317. <span class="md-nav__icon md-icon"></span>
  318. Table of contents
  319. </label>
  320. <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
  321. <li class="md-nav__item">
  322. <a href="#download-weights" class="md-nav__link">
  323. <span class="md-ellipsis">
  324. Download Weights
  325. </span>
  326. </a>
  327. </li>
  328. <li class="md-nav__item">
  329. <a href="#command-line-inference" class="md-nav__link">
  330. <span class="md-ellipsis">
  331. Command Line Inference
  332. </span>
  333. </a>
  334. <nav class="md-nav" aria-label="Command Line Inference">
  335. <ul class="md-nav__list">
  336. <li class="md-nav__item">
  337. <a href="#1-get-vq-tokens-from-reference-audio" class="md-nav__link">
  338. <span class="md-ellipsis">
  339. 1. Get VQ tokens from reference audio
  340. </span>
  341. </a>
  342. </li>
  343. <li class="md-nav__item">
  344. <a href="#2-generate-semantic-tokens-from-text" class="md-nav__link">
  345. <span class="md-ellipsis">
  346. 2. Generate Semantic tokens from text:
  347. </span>
  348. </a>
  349. </li>
  350. <li class="md-nav__item">
  351. <a href="#3-generate-vocals-from-semantic-tokens" class="md-nav__link">
  352. <span class="md-ellipsis">
  353. 3. Generate vocals from semantic tokens:
  354. </span>
  355. </a>
  356. </li>
  357. </ul>
  358. </nav>
  359. </li>
  360. <li class="md-nav__item">
  361. <a href="#webui-inference" class="md-nav__link">
  362. <span class="md-ellipsis">
  363. WebUI Inference
  364. </span>
  365. </a>
  366. <nav class="md-nav" aria-label="WebUI Inference">
  367. <ul class="md-nav__list">
  368. <li class="md-nav__item">
  369. <a href="#1-gradio-webui" class="md-nav__link">
  370. <span class="md-ellipsis">
  371. 1. Gradio WebUI
  372. </span>
  373. </a>
  374. </li>
  375. <li class="md-nav__item">
  376. <a href="#2-awesome-webui" class="md-nav__link">
  377. <span class="md-ellipsis">
  378. 2. Awesome WebUI
  379. </span>
  380. </a>
  381. </li>
  382. </ul>
  383. </nav>
  384. </li>
  385. </ul>
  386. </nav>
  387. </div>
  388. </div>
  389. </div>
  390. <div class="md-content" data-md-component="content">
  391. <article class="md-content__inner md-typeset">
  392. <a href="https://github.com/fishaudio/fish-speech/blob/main/docs/en/inference.md" title="Edit this page" class="md-content__button md-icon" rel="edit">
  393. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M10 20H6V4h7v5h5v3.1l2-2V8l-6-6H6c-1.1 0-2 .9-2 2v16c0 1.1.9 2 2 2h4zm10.2-7c.1 0 .3.1.4.2l1.3 1.3c.2.2.2.6 0 .8l-1 1-2.1-2.1 1-1c.1-.1.2-.2.4-.2m0 3.9L14.1 23H12v-2.1l6.1-6.1z"/></svg>
  394. </a>
  395. <a href="https://github.com/fishaudio/fish-speech/raw/main/docs/en/inference.md" title="View source of this page" class="md-content__button md-icon">
  396. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M17 18c.56 0 1 .44 1 1s-.44 1-1 1-1-.44-1-1 .44-1 1-1m0-3c-2.73 0-5.06 1.66-6 4 .94 2.34 3.27 4 6 4s5.06-1.66 6-4c-.94-2.34-3.27-4-6-4m0 6.5a2.5 2.5 0 0 1-2.5-2.5 2.5 2.5 0 0 1 2.5-2.5 2.5 2.5 0 0 1 2.5 2.5 2.5 2.5 0 0 1-2.5 2.5M9.27 20H6V4h7v5h5v4.07c.7.08 1.36.25 2 .49V8l-6-6H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h4.5a8.2 8.2 0 0 1-1.23-2"/></svg>
  397. </a>
  398. <h1 id="inference">Inference</h1>
  399. <p>The Fish Audio S2 model requires a large amount of VRAM. We recommend using a GPU with at least 24GB for inference.</p>
  400. <h2 id="download-weights">Download Weights</h2>
  401. <p>First, you need to download the model weights:</p>
  402. <div class="language-bash highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a>hf<span class="w"> </span>download<span class="w"> </span>fishaudio/s2-pro<span class="w"> </span>--local-dir<span class="w"> </span>checkpoints/s2-pro
  403. </span></code></pre></div>
  404. <h2 id="command-line-inference">Command Line Inference</h2>
  405. <div class="admonition note">
  406. <p class="admonition-title">Note</p>
  407. <p>If you plan to let the model randomly choose a voice timbre, you can skip this step.</p>
  408. </div>
  409. <h3 id="1-get-vq-tokens-from-reference-audio">1. Get VQ tokens from reference audio</h3>
  410. <div class="language-bash highlight"><pre><span></span><code><span id="__span-1-1"><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a>python<span class="w"> </span>fish_speech/models/dac/inference.py<span class="w"> </span><span class="se">\</span>
  411. </span><span id="__span-1-2"><a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a><span class="w"> </span>-i<span class="w"> </span><span class="s2">&quot;test.wav&quot;</span><span class="w"> </span><span class="se">\</span>
  412. </span><span id="__span-1-3"><a id="__codelineno-1-3" name="__codelineno-1-3" href="#__codelineno-1-3"></a><span class="w"> </span>--checkpoint-path<span class="w"> </span><span class="s2">&quot;checkpoints/s2-pro/codec.pth&quot;</span>
  413. </span></code></pre></div>
  414. <p>You should get a <code>fake.npy</code> and a <code>fake.wav</code>.</p>
  415. <h3 id="2-generate-semantic-tokens-from-text">2. Generate Semantic tokens from text:</h3>
  416. <div class="language-bash highlight"><pre><span></span><code><span id="__span-2-1"><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a>python<span class="w"> </span>fish_speech/models/text2semantic/inference.py<span class="w"> </span><span class="se">\</span>
  417. </span><span id="__span-2-2"><a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a><span class="w"> </span>--text<span class="w"> </span><span class="s2">&quot;The text you want to convert&quot;</span><span class="w"> </span><span class="se">\</span>
  418. </span><span id="__span-2-3"><a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a><span class="w"> </span>--prompt-text<span class="w"> </span><span class="s2">&quot;Your reference text&quot;</span><span class="w"> </span><span class="se">\</span>
  419. </span><span id="__span-2-4"><a id="__codelineno-2-4" name="__codelineno-2-4" href="#__codelineno-2-4"></a><span class="w"> </span>--prompt-tokens<span class="w"> </span><span class="s2">&quot;fake.npy&quot;</span><span class="w"> </span><span class="se">\</span>
  420. </span><span id="__span-2-5"><a id="__codelineno-2-5" name="__codelineno-2-5" href="#__codelineno-2-5"></a><span class="w"> </span><span class="c1"># --compile</span>
  421. </span></code></pre></div>
  422. <p>This command will create a <code>codes_N</code> file in the working directory, where N is an integer starting from 0.</p>
  423. <div class="admonition note">
  424. <p class="admonition-title">Note</p>
  425. <p>You may want to use <code>--compile</code> to fuse CUDA kernels for faster inference. However, we recommend using our sglang inference acceleration optimization.
  426. Correspondingly, if you do not plan to use acceleration, you can comment out the <code>--compile</code> parameter.</p>
  427. </div>
  428. <div class="admonition info">
  429. <p class="admonition-title">Info</p>
  430. <p>For GPUs that do not support bf16, you may need to use the <code>--half</code> parameter.</p>
  431. </div>
  432. <h3 id="3-generate-vocals-from-semantic-tokens">3. Generate vocals from semantic tokens:</h3>
  433. <div class="language-bash highlight"><pre><span></span><code><span id="__span-3-1"><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a>python<span class="w"> </span>fish_speech/models/dac/inference.py<span class="w"> </span><span class="se">\</span>
  434. </span><span id="__span-3-2"><a id="__codelineno-3-2" name="__codelineno-3-2" href="#__codelineno-3-2"></a><span class="w"> </span>-i<span class="w"> </span><span class="s2">&quot;codes_0.npy&quot;</span><span class="w"> </span><span class="se">\</span>
  435. </span></code></pre></div>
  436. <p>After that, you will get a <code>fake.wav</code> file.</p>
  437. <h2 id="webui-inference">WebUI Inference</h2>
  438. <h3 id="1-gradio-webui">1. Gradio WebUI</h3>
  439. <p>For compatibility, we still maintain the Gradio WebUI.</p>
  440. <div class="language-bash highlight"><pre><span></span><code><span id="__span-4-1"><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a>python<span class="w"> </span>tools/run_webui.py<span class="w"> </span><span class="c1"># --compile if you need acceleration</span>
  441. </span></code></pre></div>
  442. <h3 id="2-awesome-webui">2. Awesome WebUI</h3>
  443. <p>Awesome WebUI is a modernized Web interface built with TypeScript, offering richer features and a better user experience.</p>
  444. <p><strong>Build WebUI:</strong></p>
  445. <p>You need to have Node.js and npm installed on your local machine or server.</p>
  446. <ol>
  447. <li>Enter the <code>awesome_webui</code> directory:
  448. <div class="language-bash highlight"><pre><span></span><code><span id="__span-5-1"><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a><span class="nb">cd</span><span class="w"> </span>awesome_webui
  449. </span></code></pre></div></li>
  450. <li>Install dependencies:
  451. <div class="language-bash highlight"><pre><span></span><code><span id="__span-6-1"><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a>npm<span class="w"> </span>install
  452. </span></code></pre></div></li>
  453. <li>Build the WebUI:
  454. <div class="language-bash highlight"><pre><span></span><code><span id="__span-7-1"><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a>npm<span class="w"> </span>run<span class="w"> </span>build
  455. </span></code></pre></div></li>
  456. </ol>
  457. <p><strong>Start Backend Server:</strong></p>
  458. <p>After building the WebUI, return to the project root and start the API server:</p>
  459. <div class="language-bash highlight"><pre><span></span><code><span id="__span-8-1"><a id="__codelineno-8-1" name="__codelineno-8-1" href="#__codelineno-8-1"></a>python<span class="w"> </span>tools/api_server.py<span class="w"> </span>--listen<span class="w"> </span><span class="m">0</span>.0.0.0:8888<span class="w"> </span>--compile
  460. </span></code></pre></div>
  461. <p><strong>Access:</strong></p>
  462. <p>Once the server is running, you can access it via your browser:
  463. <code>http://localhost:8888/ui</code></p>
  464. </article>
  465. </div>
  466. <script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
  467. </div>
  468. </main>
  469. <footer class="md-footer">
  470. <nav class="md-footer__inner md-grid" aria-label="Footer" >
  471. <a href="../finetune/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Finetune">
  472. <div class="md-footer__button md-icon">
  473. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
  474. </div>
  475. <div class="md-footer__title">
  476. <span class="md-footer__direction">
  477. Previous
  478. </span>
  479. <div class="md-ellipsis">
  480. Finetune
  481. </div>
  482. </div>
  483. </a>
  484. <a href="../server/" class="md-footer__link md-footer__link--next" aria-label="Next: Server">
  485. <div class="md-footer__title">
  486. <span class="md-footer__direction">
  487. Next
  488. </span>
  489. <div class="md-ellipsis">
  490. Server
  491. </div>
  492. </div>
  493. <div class="md-footer__button md-icon">
  494. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11z"/></svg>
  495. </div>
  496. </a>
  497. </nav>
  498. <div class="md-footer-meta md-typeset">
  499. <div class="md-footer-meta__inner md-grid">
  500. <div class="md-copyright">
  501. <div class="md-copyright__highlight">
  502. Copyright &copy; 2023-2025 by Fish Audio
  503. </div>
  504. Made with
  505. <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
  506. Material for MkDocs
  507. </a>
  508. </div>
  509. <div class="md-social">
  510. <a href="https://discord.gg/Es5qTB9BcN" target="_blank" rel="noopener" title="discord.gg" class="md-social__link">
  511. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M492.5 69.8c-.2-.3-.4-.6-.8-.7-38.1-17.5-78.4-30-119.7-37.1-.4-.1-.8 0-1.1.1s-.6.4-.8.8c-5.5 9.9-10.5 20.2-14.9 30.6-44.6-6.8-89.9-6.8-134.4 0-4.5-10.5-9.5-20.7-15.1-30.6-.2-.3-.5-.6-.8-.8s-.7-.2-1.1-.2C162.5 39 122.2 51.5 84.1 69c-.3.1-.6.4-.8.7C7.1 183.5-13.8 294.6-3.6 404.2c0 .3.1.5.2.8s.3.4.5.6c44.4 32.9 94 58 146.8 74.2.4.1.8.1 1.1 0s.7-.4.9-.7c11.3-15.4 21.4-31.8 30-48.8.1-.2.2-.5.2-.8s0-.5-.1-.8-.2-.5-.4-.6-.4-.3-.7-.4c-15.8-6.1-31.2-13.4-45.9-21.9-.3-.2-.5-.4-.7-.6s-.3-.6-.3-.9 0-.6.2-.9.3-.5.6-.7c3.1-2.3 6.2-4.7 9.1-7.1.3-.2.6-.4.9-.4s.7 0 1 .1c96.2 43.9 200.4 43.9 295.5 0 .3-.1.7-.2 1-.2s.7.2.9.4c2.9 2.4 6 4.9 9.1 7.2.2.2.4.4.6.7s.2.6.2.9-.1.6-.3.9-.4.5-.6.6c-14.7 8.6-30 15.9-45.9 21.8-.2.1-.5.2-.7.4s-.3.4-.4.7-.1.5-.1.8.1.5.2.8c8.8 17 18.8 33.3 30 48.8.2.3.6.6.9.7s.8.1 1.1 0c52.9-16.2 102.6-41.3 147.1-74.2.2-.2.4-.4.5-.6s.2-.5.2-.8c12.3-126.8-20.5-236.9-86.9-334.5zm-302 267.7c-29 0-52.8-26.6-52.8-59.2s23.4-59.2 52.8-59.2c29.7 0 53.3 26.8 52.8 59.2 0 32.7-23.4 59.2-52.8 59.2m195.4 0c-29 0-52.8-26.6-52.8-59.2s23.4-59.2 52.8-59.2c29.7 0 53.3 26.8 52.8 59.2 0 32.7-23.2 59.2-52.8 59.2"/></svg>
  512. </a>
  513. <a href="https://hub.docker.com/r/fishaudio/fish-speech" target="_blank" rel="noopener" title="hub.docker.com" class="md-social__link">
  514. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 640 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M349.9 236.3h-66.1v-59.4h66.1zm0-204.3h-66.1v60.7h66.1zm78.2 144.8H362v59.4h66.1zm-156.3-72.1h-66.1v60.1h66.1zm78.1 0h-66.1v60.1h66.1zm276.8 100c-14.4-9.7-47.6-13.2-73.1-8.4-3.3-24-16.7-44.9-41.1-63.7l-14-9.3-9.3 14c-18.4 27.8-23.4 73.6-3.7 103.8-8.7 4.7-25.8 11.1-48.4 10.7H2.4c-8.7 50.8 5.8 116.8 44 162.1 37.1 43.9 92.7 66.2 165.4 66.2 157.4 0 273.9-72.5 328.4-204.2 21.4.4 67.6.1 91.3-45.2 1.5-2.5 6.6-13.2 8.5-17.1zm-511.1-27.9h-66v59.4h66.1v-59.4zm78.1 0h-66.1v59.4h66.1zm78.1 0h-66.1v59.4h66.1zm-78.1-72.1h-66.1v60.1h66.1z"/></svg>
  515. </a>
  516. <a href="http://qm.qq.com/cgi-bin/qm/qr?_wv=1027&k=jCKlUP7QgSm9kh95UlBoYv6s1I-Apl1M&authKey=xI5ttVAp3do68IpEYEalwXSYZFdfxZSkah%2BctF5FIMyN2NqAa003vFtLqJyAVRfF&noverify=0&group_code=593946093" target="_blank" rel="noopener" title="qm.qq.com" class="md-social__link">
  517. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M434.1 420.4c-11.5 1.4-44.9-52.7-44.9-52.7 0 31.3-16.1 72.2-51 101.8 16.8 5.2 54.8 19.2 45.8 34.4-7.3 12.3-125.5 7.9-159.6 4-34.1 3.8-152.3 8.3-159.6-4-9-15.2 28.9-29.2 45.8-34.4-34.9-29.5-51.1-70.4-51.1-101.8 0 0-33.3 54.1-44.9 52.7-5.4-.6-12.4-29.6 9.3-99.7 10.3-33 22-60.5 40.1-105.8C60.9 98 109.2-.1 224.3-.1 338-.1 387.5 96 384.6 214.9c18.1 45.2 29.9 72.9 40.1 105.8 21.8 70.1 14.7 99.1 9.3 99.7z"/></svg>
  518. </a>
  519. </div>
  520. </div>
  521. </div>
  522. </footer>
  523. </div>
  524. <div class="md-dialog" data-md-component="dialog">
  525. <div class="md-dialog__inner md-typeset"></div>
  526. </div>
  527. <script id="__config" type="application/json">{"annotate": null, "base": "..", "features": ["content.action.edit", "content.action.view", "navigation.tracking", "navigation.footer", "search", "search.suggest", "search.highlight", "search.share", "content.code.copy"], "search": "../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
  528. <script src="../assets/javascripts/bundle.79ae519e.min.js"></script>
  529. </body>
  530. </html>