index.html 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962
  1. <!doctype html>
  2. <html lang="pt" class="no-js">
  3. <head>
  4. <meta charset="utf-8">
  5. <meta name="viewport" content="width=device-width,initial-scale=1">
  6. <meta name="description" content="Targeting SOTA TTS solutions.">
  7. <link rel="canonical" href="https://speech.fish.audio/pt/inference/">
  8. <link rel="prev" href="../finetune/">
  9. <link rel="alternate" href="../../inference/" hreflang="en">
  10. <link rel="alternate" href="../../zh/inference/" hreflang="zh">
  11. <link rel="alternate" href="../../ja/inference/" hreflang="ja">
  12. <link rel="alternate" href="./" hreflang="pt">
  13. <link rel="alternate" href="../../ko/inference/" hreflang="ko">
  14. <link rel="alternate" href="../../ar/inference/" hreflang="ar">
  15. <link rel="icon" href="../../assets/logo.svg">
  16. <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.1">
  17. <title>Inferência - Fish Audio</title>
  18. <link rel="stylesheet" href="../../assets/stylesheets/main.484c7ddc.min.css">
  19. <link rel="stylesheet" href="../../assets/stylesheets/palette.ab4e12ef.min.css">
  20. <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
  21. <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
  22. <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
  23. <link rel="stylesheet" href="../../stylesheets/extra.css">
  24. <script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
  25. </head>
  26. <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo">
  27. <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
  28. <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
  29. <label class="md-overlay" for="__drawer"></label>
  30. <div data-md-component="skip">
  31. <a href="#inferencia" class="md-skip">
  32. Ir para o conteúdo
  33. </a>
  34. </div>
  35. <div data-md-component="announce">
  36. </div>
  37. <header class="md-header md-header--shadow" data-md-component="header">
  38. <nav class="md-header__inner md-grid" aria-label="Cabeçalho">
  39. <a href="https://speech.fish.audio" title="Fish Audio" class="md-header__button md-logo" aria-label="Fish Audio" data-md-component="logo">
  40. <img src="../../assets/logo.svg" alt="logo">
  41. </a>
  42. <label class="md-header__button md-icon" for="__drawer">
  43. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
  44. </label>
  45. <div class="md-header__title" data-md-component="header-title">
  46. <div class="md-header__ellipsis">
  47. <div class="md-header__topic">
  48. <span class="md-ellipsis">
  49. Fish Audio
  50. </span>
  51. </div>
  52. <div class="md-header__topic" data-md-component="header-topic">
  53. <span class="md-ellipsis">
  54. Inferência
  55. </span>
  56. </div>
  57. </div>
  58. </div>
  59. <form class="md-header__option" data-md-component="palette">
  60. <input class="md-option" data-md-color-media="(prefers-color-scheme)" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_0">
  61. <label class="md-header__button md-icon" title="Switch to light mode" for="__palette_1" hidden>
  62. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m14.3 16-.7-2h-3.2l-.7 2H7.8L11 7h2l3.2 9zM20 8.69V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12zm-9.15 3.96h2.3L12 9z"/></svg>
  63. </label>
  64. <input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_1">
  65. <label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_2" hidden>
  66. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
  67. </label>
  68. <input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_2">
  69. <label class="md-header__button md-icon" title="Switch to light mode" for="__palette_0" hidden>
  70. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
  71. </label>
  72. </form>
  73. <script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
  74. <div class="md-header__option">
  75. <div class="md-select">
  76. <button class="md-header__button md-icon" aria-label="Selecione o idioma">
  77. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m12.87 15.07-2.54-2.51.03-.03A17.5 17.5 0 0 0 14.07 6H17V4h-7V2H8v2H1v2h11.17C11.5 7.92 10.44 9.75 9 11.35 8.07 10.32 7.3 9.19 6.69 8h-2c.73 1.63 1.73 3.17 2.98 4.56l-5.09 5.02L4 19l5-5 3.11 3.11zM18.5 10h-2L12 22h2l1.12-3h4.75L21 22h2zm-2.62 7 1.62-4.33L19.12 17z"/></svg>
  78. </button>
  79. <div class="md-select__inner">
  80. <ul class="md-select__list">
  81. <li class="md-select__item">
  82. <a href="../../inference/" hreflang="en" class="md-select__link">
  83. English
  84. </a>
  85. </li>
  86. <li class="md-select__item">
  87. <a href="../../zh/inference/" hreflang="zh" class="md-select__link">
  88. 简体中文
  89. </a>
  90. </li>
  91. <li class="md-select__item">
  92. <a href="../../ja/inference/" hreflang="ja" class="md-select__link">
  93. 日本語
  94. </a>
  95. </li>
  96. <li class="md-select__item">
  97. <a href="./" hreflang="pt" class="md-select__link">
  98. Português (Brasil)
  99. </a>
  100. </li>
  101. <li class="md-select__item">
  102. <a href="../../ko/inference/" hreflang="ko" class="md-select__link">
  103. 한국어
  104. </a>
  105. </li>
  106. <li class="md-select__item">
  107. <a href="../../ar/inference/" hreflang="ar" class="md-select__link">
  108. العربية
  109. </a>
  110. </li>
  111. </ul>
  112. </div>
  113. </div>
  114. </div>
  115. <label class="md-header__button md-icon" for="__search">
  116. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
  117. </label>
  118. <div class="md-search" data-md-component="search" role="dialog">
  119. <label class="md-search__overlay" for="__search"></label>
  120. <div class="md-search__inner" role="search">
  121. <form class="md-search__form" name="search">
  122. <input type="text" class="md-search__input" name="query" aria-label="Buscar" placeholder="Buscar" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
  123. <label class="md-search__icon md-icon" for="__search">
  124. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
  125. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
  126. </label>
  127. <nav class="md-search__options" aria-label="Pesquisar">
  128. <a href="javascript:void(0)" class="md-search__icon md-icon" title="Compartilhar" aria-label="Compartilhar" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
  129. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
  130. </a>
  131. <button type="reset" class="md-search__icon md-icon" title="Limpar" aria-label="Limpar" tabindex="-1">
  132. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
  133. </button>
  134. </nav>
  135. <div class="md-search__suggest" data-md-component="search-suggest"></div>
  136. </form>
  137. <div class="md-search__output">
  138. <div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
  139. <div class="md-search-result" data-md-component="search-result">
  140. <div class="md-search-result__meta">
  141. Inicializando a pesquisa
  142. </div>
  143. <ol class="md-search-result__list" role="presentation"></ol>
  144. </div>
  145. </div>
  146. </div>
  147. </div>
  148. </div>
  149. <div class="md-header__source">
  150. <a href="https://github.com/fishaudio/fish-speech" title="Ir ao repositório" class="md-source" data-md-component="source">
  151. <div class="md-source__icon md-icon">
  152. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
  153. </div>
  154. <div class="md-source__repository">
  155. fishaudio/fish-speech
  156. </div>
  157. </a>
  158. </div>
  159. </nav>
  160. </header>
  161. <div class="md-container" data-md-component="container">
  162. <main class="md-main" data-md-component="main">
  163. <div class="md-main__inner md-grid">
  164. <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
  165. <div class="md-sidebar__scrollwrap">
  166. <div class="md-sidebar__inner">
  167. <nav class="md-nav md-nav--primary" aria-label="Navegação" data-md-level="0">
  168. <label class="md-nav__title" for="__drawer">
  169. <a href="https://speech.fish.audio" title="Fish Audio" class="md-nav__button md-logo" aria-label="Fish Audio" data-md-component="logo">
  170. <img src="../../assets/logo.svg" alt="logo">
  171. </a>
  172. Fish Audio
  173. </label>
  174. <div class="md-nav__source">
  175. <a href="https://github.com/fishaudio/fish-speech" title="Ir ao repositório" class="md-source" data-md-component="source">
  176. <div class="md-source__icon md-icon">
  177. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
  178. </div>
  179. <div class="md-source__repository">
  180. fishaudio/fish-speech
  181. </div>
  182. </a>
  183. </div>
  184. <ul class="md-nav__list" data-md-scrollfix>
  185. <li class="md-nav__item">
  186. <a href="../" class="md-nav__link">
  187. <span class="md-ellipsis">
  188. Introdução
  189. </span>
  190. </a>
  191. </li>
  192. <li class="md-nav__item">
  193. <a href="../install/" class="md-nav__link">
  194. <span class="md-ellipsis">
  195. Instalação
  196. </span>
  197. </a>
  198. </li>
  199. <li class="md-nav__item">
  200. <a href="../finetune/" class="md-nav__link">
  201. <span class="md-ellipsis">
  202. Ajuste Fino
  203. </span>
  204. </a>
  205. </li>
  206. <li class="md-nav__item md-nav__item--active">
  207. <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
  208. <label class="md-nav__link md-nav__link--active" for="__toc">
  209. <span class="md-ellipsis">
  210. Inferência
  211. </span>
  212. <span class="md-nav__icon md-icon"></span>
  213. </label>
  214. <a href="./" class="md-nav__link md-nav__link--active">
  215. <span class="md-ellipsis">
  216. Inferência
  217. </span>
  218. </a>
  219. <nav class="md-nav md-nav--secondary" aria-label="Índice">
  220. <label class="md-nav__title" for="__toc">
  221. <span class="md-nav__icon md-icon"></span>
  222. Índice
  223. </label>
  224. <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
  225. <li class="md-nav__item">
  226. <a href="#baixar-pesos" class="md-nav__link">
  227. <span class="md-ellipsis">
  228. Baixar Pesos
  229. </span>
  230. </a>
  231. </li>
  232. <li class="md-nav__item">
  233. <a href="#inferencia-por-linha-de-comando" class="md-nav__link">
  234. <span class="md-ellipsis">
  235. Inferência por Linha de Comando
  236. </span>
  237. </a>
  238. <nav class="md-nav" aria-label="Inferência por Linha de Comando">
  239. <ul class="md-nav__list">
  240. <li class="md-nav__item">
  241. <a href="#1-obter-tokens-vq-do-audio-de-referencia" class="md-nav__link">
  242. <span class="md-ellipsis">
  243. 1. Obter tokens VQ do áudio de referência
  244. </span>
  245. </a>
  246. </li>
  247. <li class="md-nav__item">
  248. <a href="#2-gerar-tokens-semanticos-a-partir-do-texto" class="md-nav__link">
  249. <span class="md-ellipsis">
  250. 2. Gerar tokens Semânticos a partir do texto:
  251. </span>
  252. </a>
  253. </li>
  254. <li class="md-nav__item">
  255. <a href="#3-gerar-vocais-a-partir-de-tokens-semanticos" class="md-nav__link">
  256. <span class="md-ellipsis">
  257. 3. Gerar vocais a partir de tokens semânticos:
  258. </span>
  259. </a>
  260. </li>
  261. </ul>
  262. </nav>
  263. </li>
  264. <li class="md-nav__item">
  265. <a href="#inferencia-webui" class="md-nav__link">
  266. <span class="md-ellipsis">
  267. Inferência WebUI
  268. </span>
  269. </a>
  270. <nav class="md-nav" aria-label="Inferência WebUI">
  271. <ul class="md-nav__list">
  272. <li class="md-nav__item">
  273. <a href="#1-gradio-webui" class="md-nav__link">
  274. <span class="md-ellipsis">
  275. 1. Gradio WebUI
  276. </span>
  277. </a>
  278. </li>
  279. <li class="md-nav__item">
  280. <a href="#2-awesome-webui" class="md-nav__link">
  281. <span class="md-ellipsis">
  282. 2. Awesome WebUI
  283. </span>
  284. </a>
  285. </li>
  286. </ul>
  287. </nav>
  288. </li>
  289. </ul>
  290. </nav>
  291. </li>
  292. <li class="md-nav__item">
  293. <a href="../samples.md" class="md-nav__link">
  294. <span class="md-ellipsis">
  295. Amostras
  296. </span>
  297. </a>
  298. </li>
  299. </ul>
  300. </nav>
  301. </div>
  302. </div>
  303. </div>
  304. <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
  305. <div class="md-sidebar__scrollwrap">
  306. <div class="md-sidebar__inner">
  307. <nav class="md-nav md-nav--secondary" aria-label="Índice">
  308. <label class="md-nav__title" for="__toc">
  309. <span class="md-nav__icon md-icon"></span>
  310. Índice
  311. </label>
  312. <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
  313. <li class="md-nav__item">
  314. <a href="#baixar-pesos" class="md-nav__link">
  315. <span class="md-ellipsis">
  316. Baixar Pesos
  317. </span>
  318. </a>
  319. </li>
  320. <li class="md-nav__item">
  321. <a href="#inferencia-por-linha-de-comando" class="md-nav__link">
  322. <span class="md-ellipsis">
  323. Inferência por Linha de Comando
  324. </span>
  325. </a>
  326. <nav class="md-nav" aria-label="Inferência por Linha de Comando">
  327. <ul class="md-nav__list">
  328. <li class="md-nav__item">
  329. <a href="#1-obter-tokens-vq-do-audio-de-referencia" class="md-nav__link">
  330. <span class="md-ellipsis">
  331. 1. Obter tokens VQ do áudio de referência
  332. </span>
  333. </a>
  334. </li>
  335. <li class="md-nav__item">
  336. <a href="#2-gerar-tokens-semanticos-a-partir-do-texto" class="md-nav__link">
  337. <span class="md-ellipsis">
  338. 2. Gerar tokens Semânticos a partir do texto:
  339. </span>
  340. </a>
  341. </li>
  342. <li class="md-nav__item">
  343. <a href="#3-gerar-vocais-a-partir-de-tokens-semanticos" class="md-nav__link">
  344. <span class="md-ellipsis">
  345. 3. Gerar vocais a partir de tokens semânticos:
  346. </span>
  347. </a>
  348. </li>
  349. </ul>
  350. </nav>
  351. </li>
  352. <li class="md-nav__item">
  353. <a href="#inferencia-webui" class="md-nav__link">
  354. <span class="md-ellipsis">
  355. Inferência WebUI
  356. </span>
  357. </a>
  358. <nav class="md-nav" aria-label="Inferência WebUI">
  359. <ul class="md-nav__list">
  360. <li class="md-nav__item">
  361. <a href="#1-gradio-webui" class="md-nav__link">
  362. <span class="md-ellipsis">
  363. 1. Gradio WebUI
  364. </span>
  365. </a>
  366. </li>
  367. <li class="md-nav__item">
  368. <a href="#2-awesome-webui" class="md-nav__link">
  369. <span class="md-ellipsis">
  370. 2. Awesome WebUI
  371. </span>
  372. </a>
  373. </li>
  374. </ul>
  375. </nav>
  376. </li>
  377. </ul>
  378. </nav>
  379. </div>
  380. </div>
  381. </div>
  382. <div class="md-content" data-md-component="content">
  383. <article class="md-content__inner md-typeset">
  384. <a href="https://github.com/fishaudio/fish-speech/blob/main/docs/pt/inference.md" title="Editar esta página" class="md-content__button md-icon" rel="edit">
  385. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M10 20H6V4h7v5h5v3.1l2-2V8l-6-6H6c-1.1 0-2 .9-2 2v16c0 1.1.9 2 2 2h4zm10.2-7c.1 0 .3.1.4.2l1.3 1.3c.2.2.2.6 0 .8l-1 1-2.1-2.1 1-1c.1-.1.2-.2.4-.2m0 3.9L14.1 23H12v-2.1l6.1-6.1z"/></svg>
  386. </a>
  387. <a href="https://github.com/fishaudio/fish-speech/raw/main/docs/pt/inference.md" title="Ver fonte desta página" class="md-content__button md-icon">
  388. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M17 18c.56 0 1 .44 1 1s-.44 1-1 1-1-.44-1-1 .44-1 1-1m0-3c-2.73 0-5.06 1.66-6 4 .94 2.34 3.27 4 6 4s5.06-1.66 6-4c-.94-2.34-3.27-4-6-4m0 6.5a2.5 2.5 0 0 1-2.5-2.5 2.5 2.5 0 0 1 2.5-2.5 2.5 2.5 0 0 1 2.5 2.5 2.5 2.5 0 0 1-2.5 2.5M9.27 20H6V4h7v5h5v4.07c.7.08 1.36.25 2 .49V8l-6-6H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h4.5a8.2 8.2 0 0 1-1.23-2"/></svg>
  389. </a>
  390. <h1 id="inferencia">Inferência</h1>
  391. <p>O modelo Fish Audio S2 requer uma grande quantidade de VRAM. Recomendamos o uso de uma GPU com pelo menos 24GB para inferência.</p>
  392. <h2 id="baixar-pesos">Baixar Pesos</h2>
  393. <p>Primeiro, você precisa baixar os pesos do modelo:</p>
  394. <div class="language-bash highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a>hf<span class="w"> </span>download<span class="w"> </span>fishaudio/s2-pro<span class="w"> </span>--local-dir<span class="w"> </span>checkpoints/s2-pro
  395. </span></code></pre></div>
  396. <h2 id="inferencia-por-linha-de-comando">Inferência por Linha de Comando</h2>
  397. <div class="admonition note">
  398. <p class="admonition-title">Note</p>
  399. <p>Se você planeja deixar o modelo escolher aleatoriamente um timbre de voz, pode pular esta etapa.</p>
  400. </div>
  401. <h3 id="1-obter-tokens-vq-do-audio-de-referencia">1. Obter tokens VQ do áudio de referência</h3>
  402. <div class="language-bash highlight"><pre><span></span><code><span id="__span-1-1"><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a>python<span class="w"> </span>fish_speech/models/dac/inference.py<span class="w"> </span><span class="se">\</span>
  403. </span><span id="__span-1-2"><a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a><span class="w"> </span>-i<span class="w"> </span><span class="s2">&quot;test.wav&quot;</span><span class="w"> </span><span class="se">\</span>
  404. </span><span id="__span-1-3"><a id="__codelineno-1-3" name="__codelineno-1-3" href="#__codelineno-1-3"></a><span class="w"> </span>--checkpoint-path<span class="w"> </span><span class="s2">&quot;checkpoints/s2-pro/codec.pth&quot;</span>
  405. </span></code></pre></div>
  406. <p>Você deve obter um <code>fake.npy</code> e um <code>fake.wav</code>.</p>
  407. <h3 id="2-gerar-tokens-semanticos-a-partir-do-texto">2. Gerar tokens Semânticos a partir do texto:</h3>
  408. <div class="language-bash highlight"><pre><span></span><code><span id="__span-2-1"><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a>python<span class="w"> </span>fish_speech/models/text2semantic/inference.py<span class="w"> </span><span class="se">\</span>
  409. </span><span id="__span-2-2"><a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a><span class="w"> </span>--text<span class="w"> </span><span class="s2">&quot;O texto que você deseja converter&quot;</span><span class="w"> </span><span class="se">\</span>
  410. </span><span id="__span-2-3"><a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a><span class="w"> </span>--prompt-text<span class="w"> </span><span class="s2">&quot;Seu texto de referência&quot;</span><span class="w"> </span><span class="se">\</span>
  411. </span><span id="__span-2-4"><a id="__codelineno-2-4" name="__codelineno-2-4" href="#__codelineno-2-4"></a><span class="w"> </span>--prompt-tokens<span class="w"> </span><span class="s2">&quot;fake.npy&quot;</span><span class="w"> </span><span class="se">\</span>
  412. </span><span id="__span-2-5"><a id="__codelineno-2-5" name="__codelineno-2-5" href="#__codelineno-2-5"></a><span class="w"> </span><span class="c1"># --compile</span>
  413. </span></code></pre></div>
  414. <p>Este comando criará um arquivo <code>codes_N</code> no diretório de trabalho, onde N é um número inteiro começando em 0.</p>
  415. <div class="admonition note">
  416. <p class="admonition-title">Note</p>
  417. <p>Você pode querer usar <code>--compile</code> para fundir kernels CUDA para uma inferência mais rápida. No entanto, recomendamos usar nossa otimização de aceleração de inferência sglang.
  418. Da mesma forma, se você não planeja usar aceleração, pode comentar o parâmetro <code>--compile</code>.</p>
  419. </div>
  420. <div class="admonition info">
  421. <p class="admonition-title">Info</p>
  422. <p>Para GPUs que não suportam bf16, você pode precisar usar o parâmetro <code>--half</code>.</p>
  423. </div>
  424. <h3 id="3-gerar-vocais-a-partir-de-tokens-semanticos">3. Gerar vocais a partir de tokens semânticos:</h3>
  425. <div class="language-bash highlight"><pre><span></span><code><span id="__span-3-1"><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a>python<span class="w"> </span>fish_speech/models/dac/inference.py<span class="w"> </span><span class="se">\</span>
  426. </span><span id="__span-3-2"><a id="__codelineno-3-2" name="__codelineno-3-2" href="#__codelineno-3-2"></a><span class="w"> </span>-i<span class="w"> </span><span class="s2">&quot;codes_0.npy&quot;</span><span class="w"> </span><span class="se">\</span>
  427. </span></code></pre></div>
  428. <p>Depois disso, você obterá um arquivo <code>fake.wav</code>.</p>
  429. <h2 id="inferencia-webui">Inferência WebUI</h2>
  430. <h3 id="1-gradio-webui">1. Gradio WebUI</h3>
  431. <p>Para manter a compatibilidade, mantemos a interface Gradio WebUI anterior.</p>
  432. <div class="language-bash highlight"><pre><span></span><code><span id="__span-4-1"><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a>python<span class="w"> </span>tools/run_webui.py<span class="w"> </span><span class="c1"># --compile se você precisar de aceleração</span>
  433. </span></code></pre></div>
  434. <h3 id="2-awesome-webui">2. Awesome WebUI</h3>
  435. <p>A Awesome WebUI é uma interface web moderna baseada em TypeScript, oferecendo funcionalidades mais ricas e uma melhor experiência do usuário.</p>
  436. <p><strong>Construir a WebUI:</strong></p>
  437. <p>Você precisa ter o Node.js e o npm instalados em seu computador local ou servidor.</p>
  438. <ol>
  439. <li>Entre no diretório <code>awesome_webui</code>:
  440. <div class="language-bash highlight"><pre><span></span><code><span id="__span-5-1"><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a><span class="nb">cd</span><span class="w"> </span>awesome_webui
  441. </span></code></pre></div></li>
  442. <li>Instale as dependências:
  443. <div class="language-bash highlight"><pre><span></span><code><span id="__span-6-1"><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a>npm<span class="w"> </span>install
  444. </span></code></pre></div></li>
  445. <li>Construa a WebUI:
  446. <div class="language-bash highlight"><pre><span></span><code><span id="__span-7-1"><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a>npm<span class="w"> </span>run<span class="w"> </span>build
  447. </span></code></pre></div></li>
  448. </ol>
  449. <p><strong>Iniciar o Servidor Backend:</strong></p>
  450. <p>Após a construção da WebUI, retorne ao diretório raiz do projeto e inicie o servidor API:</p>
  451. <div class="language-bash highlight"><pre><span></span><code><span id="__span-8-1"><a id="__codelineno-8-1" name="__codelineno-8-1" href="#__codelineno-8-1"></a>python<span class="w"> </span>tools/api_server.py<span class="w"> </span>--listen<span class="w"> </span><span class="m">0</span>.0.0.0:8888<span class="w"> </span>--compile
  452. </span></code></pre></div>
  453. <p><strong>Acesso:</strong></p>
  454. <p>Após o servidor ser iniciado, você pode acessá-lo através do navegador no seguinte endereço:
  455. <code>http://localhost:8888/ui</code></p>
  456. </article>
  457. </div>
  458. <script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
  459. </div>
  460. </main>
  461. <footer class="md-footer">
  462. <nav class="md-footer__inner md-grid" aria-label="Rodapé" >
  463. <a href="../finetune/" class="md-footer__link md-footer__link--prev" aria-label="Anterior: Ajuste Fino">
  464. <div class="md-footer__button md-icon">
  465. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
  466. </div>
  467. <div class="md-footer__title">
  468. <span class="md-footer__direction">
  469. Anterior
  470. </span>
  471. <div class="md-ellipsis">
  472. Ajuste Fino
  473. </div>
  474. </div>
  475. </a>
  476. </nav>
  477. <div class="md-footer-meta md-typeset">
  478. <div class="md-footer-meta__inner md-grid">
  479. <div class="md-copyright">
  480. <div class="md-copyright__highlight">
  481. Copyright &copy; 2023-2025 by Fish Audio
  482. </div>
  483. Made with
  484. <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
  485. Material for MkDocs
  486. </a>
  487. </div>
  488. <div class="md-social">
  489. <a href="https://discord.gg/Es5qTB9BcN" target="_blank" rel="noopener" title="discord.gg" class="md-social__link">
  490. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M492.5 69.8c-.2-.3-.4-.6-.8-.7-38.1-17.5-78.4-30-119.7-37.1-.4-.1-.8 0-1.1.1s-.6.4-.8.8c-5.5 9.9-10.5 20.2-14.9 30.6-44.6-6.8-89.9-6.8-134.4 0-4.5-10.5-9.5-20.7-15.1-30.6-.2-.3-.5-.6-.8-.8s-.7-.2-1.1-.2C162.5 39 122.2 51.5 84.1 69c-.3.1-.6.4-.8.7C7.1 183.5-13.8 294.6-3.6 404.2c0 .3.1.5.2.8s.3.4.5.6c44.4 32.9 94 58 146.8 74.2.4.1.8.1 1.1 0s.7-.4.9-.7c11.3-15.4 21.4-31.8 30-48.8.1-.2.2-.5.2-.8s0-.5-.1-.8-.2-.5-.4-.6-.4-.3-.7-.4c-15.8-6.1-31.2-13.4-45.9-21.9-.3-.2-.5-.4-.7-.6s-.3-.6-.3-.9 0-.6.2-.9.3-.5.6-.7c3.1-2.3 6.2-4.7 9.1-7.1.3-.2.6-.4.9-.4s.7 0 1 .1c96.2 43.9 200.4 43.9 295.5 0 .3-.1.7-.2 1-.2s.7.2.9.4c2.9 2.4 6 4.9 9.1 7.2.2.2.4.4.6.7s.2.6.2.9-.1.6-.3.9-.4.5-.6.6c-14.7 8.6-30 15.9-45.9 21.8-.2.1-.5.2-.7.4s-.3.4-.4.7-.1.5-.1.8.1.5.2.8c8.8 17 18.8 33.3 30 48.8.2.3.6.6.9.7s.8.1 1.1 0c52.9-16.2 102.6-41.3 147.1-74.2.2-.2.4-.4.5-.6s.2-.5.2-.8c12.3-126.8-20.5-236.9-86.9-334.5zm-302 267.7c-29 0-52.8-26.6-52.8-59.2s23.4-59.2 52.8-59.2c29.7 0 53.3 26.8 52.8 59.2 0 32.7-23.4 59.2-52.8 59.2m195.4 0c-29 0-52.8-26.6-52.8-59.2s23.4-59.2 52.8-59.2c29.7 0 53.3 26.8 52.8 59.2 0 32.7-23.2 59.2-52.8 59.2"/></svg>
  491. </a>
  492. <a href="https://hub.docker.com/r/fishaudio/fish-speech" target="_blank" rel="noopener" title="hub.docker.com" class="md-social__link">
  493. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 640 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M349.9 236.3h-66.1v-59.4h66.1zm0-204.3h-66.1v60.7h66.1zm78.2 144.8H362v59.4h66.1zm-156.3-72.1h-66.1v60.1h66.1zm78.1 0h-66.1v60.1h66.1zm276.8 100c-14.4-9.7-47.6-13.2-73.1-8.4-3.3-24-16.7-44.9-41.1-63.7l-14-9.3-9.3 14c-18.4 27.8-23.4 73.6-3.7 103.8-8.7 4.7-25.8 11.1-48.4 10.7H2.4c-8.7 50.8 5.8 116.8 44 162.1 37.1 43.9 92.7 66.2 165.4 66.2 157.4 0 273.9-72.5 328.4-204.2 21.4.4 67.6.1 91.3-45.2 1.5-2.5 6.6-13.2 8.5-17.1zm-511.1-27.9h-66v59.4h66.1v-59.4zm78.1 0h-66.1v59.4h66.1zm78.1 0h-66.1v59.4h66.1zm-78.1-72.1h-66.1v60.1h66.1z"/></svg>
  494. </a>
  495. <a href="http://qm.qq.com/cgi-bin/qm/qr?_wv=1027&k=jCKlUP7QgSm9kh95UlBoYv6s1I-Apl1M&authKey=xI5ttVAp3do68IpEYEalwXSYZFdfxZSkah%2BctF5FIMyN2NqAa003vFtLqJyAVRfF&noverify=0&group_code=593946093" target="_blank" rel="noopener" title="qm.qq.com" class="md-social__link">
  496. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M434.1 420.4c-11.5 1.4-44.9-52.7-44.9-52.7 0 31.3-16.1 72.2-51 101.8 16.8 5.2 54.8 19.2 45.8 34.4-7.3 12.3-125.5 7.9-159.6 4-34.1 3.8-152.3 8.3-159.6-4-9-15.2 28.9-29.2 45.8-34.4-34.9-29.5-51.1-70.4-51.1-101.8 0 0-33.3 54.1-44.9 52.7-5.4-.6-12.4-29.6 9.3-99.7 10.3-33 22-60.5 40.1-105.8C60.9 98 109.2-.1 224.3-.1 338-.1 387.5 96 384.6 214.9c18.1 45.2 29.9 72.9 40.1 105.8 21.8 70.1 14.7 99.1 9.3 99.7z"/></svg>
  497. </a>
  498. </div>
  499. </div>
  500. </div>
  501. </footer>
  502. </div>
  503. <div class="md-dialog" data-md-component="dialog">
  504. <div class="md-dialog__inner md-typeset"></div>
  505. </div>
  506. <script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": ["content.action.edit", "content.action.view", "navigation.tracking", "navigation.footer", "search", "search.suggest", "search.highlight", "search.share", "content.code.copy"], "search": "../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copiado para \u00e1rea de transfer\u00eancia", "clipboard.copy": "Copiar para \u00e1rea de transfer\u00eancia", "search.result.more.one": "Mais 1 nesta p\u00e1gina", "search.result.more.other": "Mais # nesta p\u00e1gina", "search.result.none": "Nenhum resultado encontrado", "search.result.one": "1 resultado encontrado", "search.result.other": "# resultados encontrados", "search.result.placeholder": "Digite para iniciar a busca", "search.result.term.missing": "Ausente", "select.version": "Selecione a vers\u00e3o"}, "version": null}</script>
  507. <script src="../../assets/javascripts/bundle.79ae519e.min.js"></script>
  508. </body>
  509. </html>