index.html 48 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301
  1. <!doctype html>
  2. <html lang="ko" class="no-js">
  3. <head>
  4. <meta charset="utf-8">
  5. <meta name="viewport" content="width=device-width,initial-scale=1">
  6. <meta name="description" content="Targeting SOTA TTS solutions.">
  7. <link rel="canonical" href="https://speech.fish.audio/ko/">
  8. <link rel="next" href="install/">
  9. <link rel="alternate" href="/" hreflang="en">
  10. <link rel="alternate" href="/zh/" hreflang="zh">
  11. <link rel="alternate" href="/ja/" hreflang="ja">
  12. <link rel="alternate" href="/pt/" hreflang="pt">
  13. <link rel="alternate" href="/ko/" hreflang="ko">
  14. <link rel="alternate" href="/ar/" hreflang="ar">
  15. <link rel="icon" href="../assets/logo.svg">
  16. <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.1">
  17. <title>소개 - Fish Audio</title>
  18. <link rel="stylesheet" href="../assets/stylesheets/main.484c7ddc.min.css">
  19. <link rel="stylesheet" href="../assets/stylesheets/palette.ab4e12ef.min.css">
  20. <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
  21. <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
  22. <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
  23. <link rel="stylesheet" href="../stylesheets/extra.css">
  24. <script>__md_scope=new URL("..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
  25. </head>
  26. <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo">
  27. <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
  28. <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
  29. <label class="md-overlay" for="__drawer"></label>
  30. <div data-md-component="skip">
  31. <a href="#_1" class="md-skip">
  32. 콘텐츠로 이동
  33. </a>
  34. </div>
  35. <div data-md-component="announce">
  36. </div>
  37. <header class="md-header md-header--shadow" data-md-component="header">
  38. <nav class="md-header__inner md-grid" aria-label="상단/헤더">
  39. <a href="https://speech.fish.audio" title="Fish Audio" class="md-header__button md-logo" aria-label="Fish Audio" data-md-component="logo">
  40. <img src="../assets/logo.svg" alt="logo">
  41. </a>
  42. <label class="md-header__button md-icon" for="__drawer">
  43. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
  44. </label>
  45. <div class="md-header__title" data-md-component="header-title">
  46. <div class="md-header__ellipsis">
  47. <div class="md-header__topic">
  48. <span class="md-ellipsis">
  49. Fish Audio
  50. </span>
  51. </div>
  52. <div class="md-header__topic" data-md-component="header-topic">
  53. <span class="md-ellipsis">
  54. 소개
  55. </span>
  56. </div>
  57. </div>
  58. </div>
  59. <form class="md-header__option" data-md-component="palette">
  60. <input class="md-option" data-md-color-media="(prefers-color-scheme)" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_0">
  61. <label class="md-header__button md-icon" title="Switch to light mode" for="__palette_1" hidden>
  62. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m14.3 16-.7-2h-3.2l-.7 2H7.8L11 7h2l3.2 9zM20 8.69V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12zm-9.15 3.96h2.3L12 9z"/></svg>
  63. </label>
  64. <input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_1">
  65. <label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_2" hidden>
  66. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
  67. </label>
  68. <input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_2">
  69. <label class="md-header__button md-icon" title="Switch to light mode" for="__palette_0" hidden>
  70. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
  71. </label>
  72. </form>
  73. <script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
  74. <div class="md-header__option">
  75. <div class="md-select">
  76. <button class="md-header__button md-icon" aria-label="언어설정">
  77. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m12.87 15.07-2.54-2.51.03-.03A17.5 17.5 0 0 0 14.07 6H17V4h-7V2H8v2H1v2h11.17C11.5 7.92 10.44 9.75 9 11.35 8.07 10.32 7.3 9.19 6.69 8h-2c.73 1.63 1.73 3.17 2.98 4.56l-5.09 5.02L4 19l5-5 3.11 3.11zM18.5 10h-2L12 22h2l1.12-3h4.75L21 22h2zm-2.62 7 1.62-4.33L19.12 17z"/></svg>
  78. </button>
  79. <div class="md-select__inner">
  80. <ul class="md-select__list">
  81. <li class="md-select__item">
  82. <a href="/" hreflang="en" class="md-select__link">
  83. English
  84. </a>
  85. </li>
  86. <li class="md-select__item">
  87. <a href="/zh/" hreflang="zh" class="md-select__link">
  88. 简体中文
  89. </a>
  90. </li>
  91. <li class="md-select__item">
  92. <a href="/ja/" hreflang="ja" class="md-select__link">
  93. 日本語
  94. </a>
  95. </li>
  96. <li class="md-select__item">
  97. <a href="/pt/" hreflang="pt" class="md-select__link">
  98. Português (Brasil)
  99. </a>
  100. </li>
  101. <li class="md-select__item">
  102. <a href="/ko/" hreflang="ko" class="md-select__link">
  103. 한국어
  104. </a>
  105. </li>
  106. <li class="md-select__item">
  107. <a href="/ar/" hreflang="ar" class="md-select__link">
  108. العربية
  109. </a>
  110. </li>
  111. </ul>
  112. </div>
  113. </div>
  114. </div>
  115. <label class="md-header__button md-icon" for="__search">
  116. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
  117. </label>
  118. <div class="md-search" data-md-component="search" role="dialog">
  119. <label class="md-search__overlay" for="__search"></label>
  120. <div class="md-search__inner" role="search">
  121. <form class="md-search__form" name="search">
  122. <input type="text" class="md-search__input" name="query" aria-label="검색" placeholder="검색" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
  123. <label class="md-search__icon md-icon" for="__search">
  124. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
  125. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
  126. </label>
  127. <nav class="md-search__options" aria-label="검색">
  128. <a href="javascript:void(0)" class="md-search__icon md-icon" title="공유" aria-label="공유" data-clipboard data-clipboard-text="" data-md-component="search-share" tabindex="-1">
  129. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18 16.08c-.76 0-1.44.3-1.96.77L8.91 12.7c.05-.23.09-.46.09-.7s-.04-.47-.09-.7l7.05-4.11c.54.5 1.25.81 2.04.81a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3c0 .24.04.47.09.7L8.04 9.81C7.5 9.31 6.79 9 6 9a3 3 0 0 0-3 3 3 3 0 0 0 3 3c.79 0 1.5-.31 2.04-.81l7.12 4.15c-.05.21-.08.43-.08.66 0 1.61 1.31 2.91 2.92 2.91s2.92-1.3 2.92-2.91A2.92 2.92 0 0 0 18 16.08"/></svg>
  130. </a>
  131. <button type="reset" class="md-search__icon md-icon" title="지우기" aria-label="지우기" tabindex="-1">
  132. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
  133. </button>
  134. </nav>
  135. <div class="md-search__suggest" data-md-component="search-suggest"></div>
  136. </form>
  137. <div class="md-search__output">
  138. <div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
  139. <div class="md-search-result" data-md-component="search-result">
  140. <div class="md-search-result__meta">
  141. 검색 초기화
  142. </div>
  143. <ol class="md-search-result__list" role="presentation"></ol>
  144. </div>
  145. </div>
  146. </div>
  147. </div>
  148. </div>
  149. <div class="md-header__source">
  150. <a href="https://github.com/fishaudio/fish-speech" title="저장소로 이동" class="md-source" data-md-component="source">
  151. <div class="md-source__icon md-icon">
  152. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
  153. </div>
  154. <div class="md-source__repository">
  155. fishaudio/fish-speech
  156. </div>
  157. </a>
  158. </div>
  159. </nav>
  160. </header>
  161. <div class="md-container" data-md-component="container">
  162. <main class="md-main" data-md-component="main">
  163. <div class="md-main__inner md-grid">
  164. <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
  165. <div class="md-sidebar__scrollwrap">
  166. <div class="md-sidebar__inner">
  167. <nav class="md-nav md-nav--primary" aria-label="네비게이션" data-md-level="0">
  168. <label class="md-nav__title" for="__drawer">
  169. <a href="https://speech.fish.audio" title="Fish Audio" class="md-nav__button md-logo" aria-label="Fish Audio" data-md-component="logo">
  170. <img src="../assets/logo.svg" alt="logo">
  171. </a>
  172. Fish Audio
  173. </label>
  174. <div class="md-nav__source">
  175. <a href="https://github.com/fishaudio/fish-speech" title="저장소로 이동" class="md-source" data-md-component="source">
  176. <div class="md-source__icon md-icon">
  177. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
  178. </div>
  179. <div class="md-source__repository">
  180. fishaudio/fish-speech
  181. </div>
  182. </a>
  183. </div>
  184. <ul class="md-nav__list" data-md-scrollfix>
  185. <li class="md-nav__item md-nav__item--active">
  186. <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
  187. <label class="md-nav__link md-nav__link--active" for="__toc">
  188. <span class="md-ellipsis">
  189. 소개
  190. </span>
  191. <span class="md-nav__icon md-icon"></span>
  192. </label>
  193. <a href="./" class="md-nav__link md-nav__link--active">
  194. <span class="md-ellipsis">
  195. 소개
  196. </span>
  197. </a>
  198. <nav class="md-nav md-nav--secondary" aria-label="목차">
  199. <label class="md-nav__title" for="__toc">
  200. <span class="md-nav__icon md-icon"></span>
  201. 목차
  202. </label>
  203. <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
  204. <li class="md-nav__item">
  205. <a href="#_1" class="md-nav__link">
  206. <span class="md-ellipsis">
  207. 빠른 시작
  208. </span>
  209. </a>
  210. <nav class="md-nav" aria-label="빠른 시작">
  211. <ul class="md-nav__list">
  212. <li class="md-nav__item">
  213. <a href="#_2" class="md-nav__link">
  214. <span class="md-ellipsis">
  215. 문서로 바로 시작하기
  216. </span>
  217. </a>
  218. </li>
  219. <li class="md-nav__item">
  220. <a href="#llm-agent" class="md-nav__link">
  221. <span class="md-ellipsis">
  222. LLM Agent 가이드
  223. </span>
  224. </a>
  225. </li>
  226. </ul>
  227. </nav>
  228. </li>
  229. <li class="md-nav__item">
  230. <a href="#fish-audio-s2" class="md-nav__link">
  231. <span class="md-ellipsis">
  232. Fish Audio S2
  233. </span>
  234. </a>
  235. <nav class="md-nav" aria-label="Fish Audio S2">
  236. <ul class="md-nav__list">
  237. <li class="md-nav__item">
  238. <a href="#_3" class="md-nav__link">
  239. <span class="md-ellipsis">
  240. 모델 변형
  241. </span>
  242. </a>
  243. </li>
  244. </ul>
  245. </nav>
  246. </li>
  247. <li class="md-nav__item">
  248. <a href="#_4" class="md-nav__link">
  249. <span class="md-ellipsis">
  250. 벤치마크 결과
  251. </span>
  252. </a>
  253. </li>
  254. <li class="md-nav__item">
  255. <a href="#_5" class="md-nav__link">
  256. <span class="md-ellipsis">
  257. 주요 특징
  258. </span>
  259. </a>
  260. <nav class="md-nav" aria-label="주요 특징">
  261. <ul class="md-nav__list">
  262. <li class="md-nav__item">
  263. <a href="#_6" class="md-nav__link">
  264. <span class="md-ellipsis">
  265. 자연어 기반 세밀한 인라인 제어
  266. </span>
  267. </a>
  268. </li>
  269. <li class="md-nav__item">
  270. <a href="#dual-autoregressive" class="md-nav__link">
  271. <span class="md-ellipsis">
  272. Dual-Autoregressive 아키텍처
  273. </span>
  274. </a>
  275. </li>
  276. <li class="md-nav__item">
  277. <a href="#_7" class="md-nav__link">
  278. <span class="md-ellipsis">
  279. 강화학습 정렬
  280. </span>
  281. </a>
  282. </li>
  283. <li class="md-nav__item">
  284. <a href="#sglang" class="md-nav__link">
  285. <span class="md-ellipsis">
  286. SGLang 기반 프로덕션 스트리밍
  287. </span>
  288. </a>
  289. </li>
  290. <li class="md-nav__item">
  291. <a href="#_8" class="md-nav__link">
  292. <span class="md-ellipsis">
  293. 다국어 지원
  294. </span>
  295. </a>
  296. </li>
  297. <li class="md-nav__item">
  298. <a href="#_9" class="md-nav__link">
  299. <span class="md-ellipsis">
  300. 네이티브 멀티 화자 생성
  301. </span>
  302. </a>
  303. </li>
  304. <li class="md-nav__item">
  305. <a href="#_10" class="md-nav__link">
  306. <span class="md-ellipsis">
  307. 멀티 턴 대화 생성
  308. </span>
  309. </a>
  310. </li>
  311. <li class="md-nav__item">
  312. <a href="#_11" class="md-nav__link">
  313. <span class="md-ellipsis">
  314. 빠른 음성 복제
  315. </span>
  316. </a>
  317. </li>
  318. </ul>
  319. </nav>
  320. </li>
  321. <li class="md-nav__item">
  322. <a href="#_12" class="md-nav__link">
  323. <span class="md-ellipsis">
  324. 크레딧
  325. </span>
  326. </a>
  327. </li>
  328. <li class="md-nav__item">
  329. <a href="#_13" class="md-nav__link">
  330. <span class="md-ellipsis">
  331. 기술 보고서
  332. </span>
  333. </a>
  334. </li>
  335. </ul>
  336. </nav>
  337. </li>
  338. <li class="md-nav__item">
  339. <a href="install/" class="md-nav__link">
  340. <span class="md-ellipsis">
  341. 설치
  342. </span>
  343. </a>
  344. </li>
  345. <li class="md-nav__item">
  346. <a href="finetune/" class="md-nav__link">
  347. <span class="md-ellipsis">
  348. 파인튜닝
  349. </span>
  350. </a>
  351. </li>
  352. <li class="md-nav__item">
  353. <a href="inference/" class="md-nav__link">
  354. <span class="md-ellipsis">
  355. 추론
  356. </span>
  357. </a>
  358. </li>
  359. <li class="md-nav__item">
  360. <a href="samples.md" class="md-nav__link">
  361. <span class="md-ellipsis">
  362. 샘플
  363. </span>
  364. </a>
  365. </li>
  366. </ul>
  367. </nav>
  368. </div>
  369. </div>
  370. </div>
  371. <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
  372. <div class="md-sidebar__scrollwrap">
  373. <div class="md-sidebar__inner">
  374. <nav class="md-nav md-nav--secondary" aria-label="목차">
  375. <label class="md-nav__title" for="__toc">
  376. <span class="md-nav__icon md-icon"></span>
  377. 목차
  378. </label>
  379. <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
  380. <li class="md-nav__item">
  381. <a href="#_1" class="md-nav__link">
  382. <span class="md-ellipsis">
  383. 빠른 시작
  384. </span>
  385. </a>
  386. <nav class="md-nav" aria-label="빠른 시작">
  387. <ul class="md-nav__list">
  388. <li class="md-nav__item">
  389. <a href="#_2" class="md-nav__link">
  390. <span class="md-ellipsis">
  391. 문서로 바로 시작하기
  392. </span>
  393. </a>
  394. </li>
  395. <li class="md-nav__item">
  396. <a href="#llm-agent" class="md-nav__link">
  397. <span class="md-ellipsis">
  398. LLM Agent 가이드
  399. </span>
  400. </a>
  401. </li>
  402. </ul>
  403. </nav>
  404. </li>
  405. <li class="md-nav__item">
  406. <a href="#fish-audio-s2" class="md-nav__link">
  407. <span class="md-ellipsis">
  408. Fish Audio S2
  409. </span>
  410. </a>
  411. <nav class="md-nav" aria-label="Fish Audio S2">
  412. <ul class="md-nav__list">
  413. <li class="md-nav__item">
  414. <a href="#_3" class="md-nav__link">
  415. <span class="md-ellipsis">
  416. 모델 변형
  417. </span>
  418. </a>
  419. </li>
  420. </ul>
  421. </nav>
  422. </li>
  423. <li class="md-nav__item">
  424. <a href="#_4" class="md-nav__link">
  425. <span class="md-ellipsis">
  426. 벤치마크 결과
  427. </span>
  428. </a>
  429. </li>
  430. <li class="md-nav__item">
  431. <a href="#_5" class="md-nav__link">
  432. <span class="md-ellipsis">
  433. 주요 특징
  434. </span>
  435. </a>
  436. <nav class="md-nav" aria-label="주요 특징">
  437. <ul class="md-nav__list">
  438. <li class="md-nav__item">
  439. <a href="#_6" class="md-nav__link">
  440. <span class="md-ellipsis">
  441. 자연어 기반 세밀한 인라인 제어
  442. </span>
  443. </a>
  444. </li>
  445. <li class="md-nav__item">
  446. <a href="#dual-autoregressive" class="md-nav__link">
  447. <span class="md-ellipsis">
  448. Dual-Autoregressive 아키텍처
  449. </span>
  450. </a>
  451. </li>
  452. <li class="md-nav__item">
  453. <a href="#_7" class="md-nav__link">
  454. <span class="md-ellipsis">
  455. 강화학습 정렬
  456. </span>
  457. </a>
  458. </li>
  459. <li class="md-nav__item">
  460. <a href="#sglang" class="md-nav__link">
  461. <span class="md-ellipsis">
  462. SGLang 기반 프로덕션 스트리밍
  463. </span>
  464. </a>
  465. </li>
  466. <li class="md-nav__item">
  467. <a href="#_8" class="md-nav__link">
  468. <span class="md-ellipsis">
  469. 다국어 지원
  470. </span>
  471. </a>
  472. </li>
  473. <li class="md-nav__item">
  474. <a href="#_9" class="md-nav__link">
  475. <span class="md-ellipsis">
  476. 네이티브 멀티 화자 생성
  477. </span>
  478. </a>
  479. </li>
  480. <li class="md-nav__item">
  481. <a href="#_10" class="md-nav__link">
  482. <span class="md-ellipsis">
  483. 멀티 턴 대화 생성
  484. </span>
  485. </a>
  486. </li>
  487. <li class="md-nav__item">
  488. <a href="#_11" class="md-nav__link">
  489. <span class="md-ellipsis">
  490. 빠른 음성 복제
  491. </span>
  492. </a>
  493. </li>
  494. </ul>
  495. </nav>
  496. </li>
  497. <li class="md-nav__item">
  498. <a href="#_12" class="md-nav__link">
  499. <span class="md-ellipsis">
  500. 크레딧
  501. </span>
  502. </a>
  503. </li>
  504. <li class="md-nav__item">
  505. <a href="#_13" class="md-nav__link">
  506. <span class="md-ellipsis">
  507. 기술 보고서
  508. </span>
  509. </a>
  510. </li>
  511. </ul>
  512. </nav>
  513. </div>
  514. </div>
  515. </div>
  516. <div class="md-content" data-md-component="content">
  517. <article class="md-content__inner md-typeset">
  518. <a href="https://github.com/fishaudio/fish-speech/blob/main/docs/ko/index.md" title="이 페이지를 편집" class="md-content__button md-icon" rel="edit">
  519. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M10 20H6V4h7v5h5v3.1l2-2V8l-6-6H6c-1.1 0-2 .9-2 2v16c0 1.1.9 2 2 2h4zm10.2-7c.1 0 .3.1.4.2l1.3 1.3c.2.2.2.6 0 .8l-1 1-2.1-2.1 1-1c.1-.1.2-.2.4-.2m0 3.9L14.1 23H12v-2.1l6.1-6.1z"/></svg>
  520. </a>
  521. <a href="https://github.com/fishaudio/fish-speech/raw/main/docs/ko/index.md" title="페이지소스 보기" class="md-content__button md-icon">
  522. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M17 18c.56 0 1 .44 1 1s-.44 1-1 1-1-.44-1-1 .44-1 1-1m0-3c-2.73 0-5.06 1.66-6 4 .94 2.34 3.27 4 6 4s5.06-1.66 6-4c-.94-2.34-3.27-4-6-4m0 6.5a2.5 2.5 0 0 1-2.5-2.5 2.5 2.5 0 0 1 2.5-2.5 2.5 2.5 0 0 1 2.5 2.5 2.5 2.5 0 0 1-2.5 2.5M9.27 20H6V4h7v5h5v4.07c.7.08 1.36.25 2 .49V8l-6-6H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h4.5a8.2 8.2 0 0 1-1.23-2"/></svg>
  523. </a>
  524. <div align="center">
  525. <h1>Fish Speech</h1>
  526. <p><a href="../en/">English</a> | <a href="../zh/">简体中文</a> | <a href="../pt/">Portuguese</a> | <a href="../ja/">日本語</a> | <strong>한국어</strong> | <a href="../ar/">العربية</a> | <a href="../es/">Español</a></p>
  527. <a href="https://www.producthunt.com/products/fish-speech?embed=true&utm_source=badge-top-post-badge&utm_medium=badge&utm_source=badge-fish&#0045;audio&#0045;s1" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/top-post-badge.svg?post_id=1023740&theme=light&period=daily&t=1761164814710" alt="Fish&#0032;Audio&#0032;S1 - Expressive&#0032;Voice&#0032;Cloning&#0032;and&#0032;Text&#0045;to&#0045;Speech | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" /></a>
  528. <a href="https://trendshift.io/repositories/7014" target="_blank">
  529. <img src="https://trendshift.io/api/badge/repositories/7014" alt="fishaudio%2Ffish-speech | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/>
  530. </a>
  531. </div>
  532. <p><br></p>
  533. <div align="center">
  534. <img src="https://count.getloli.com/get/@fish-speech?theme=asoul" /><br>
  535. </div>
  536. <p><br></p>
  537. <div align="center">
  538. <a target="_blank" href="https://discord.gg/Es5qTB9BcN">
  539. <img alt="Discord" src="https://img.shields.io/discord/1214047546020728892?color=%23738ADB&label=Discord&logo=discord&logoColor=white&style=flat-square"/>
  540. </a>
  541. <a target="_blank" href="https://hub.docker.com/r/fishaudio/fish-speech">
  542. <img alt="Docker" src="https://img.shields.io/docker/pulls/fishaudio/fish-speech?style=flat-square&logo=docker"/>
  543. </a>
  544. <a target="_blank" href="https://pd.qq.com/s/bwxia254o">
  545. <img alt="QQ Channel" src="https://img.shields.io/badge/QQ-blue?logo=tencentqq">
  546. </a>
  547. </div>
  548. <div align="center">
  549. <a target="_blank" href="https://huggingface.co/fishaudio/s2">
  550. <img alt="HuggingFace Model" src="https://img.shields.io/badge/🤗%20-models-orange"/>
  551. </a>
  552. <a target="_blank" href="https://fish.audio/blog/fish-audio-open-sources-s2/">
  553. <img alt="Fish Audio Blog" src="https://img.shields.io/badge/Blog-Fish_Audio_S2-1f7a8c?style=flat-square&logo=readme&logoColor=white"/>
  554. </a>
  555. <a target="_blank" href="https://arxiv.org/abs/2603.08823">
  556. <img alt="Paper | Technical Report" src="https://img.shields.io/badge/Paper-Technical_Report-b31b1b?style=flat-square"/>
  557. </a>
  558. </div>
  559. <div class="admonition info">
  560. <p class="admonition-title">라이선스 공지</p>
  561. <p>이 코드베이스 및 관련 모델 가중치는 <strong>FISH AUDIO RESEARCH LICENSE</strong> 하에 릴리스되었습니다. 자세한 내용은 <a href="https://github.com/fishaudio/fish-speech/blob/main/LICENSE">LICENSE</a>를 참조하십시오.</p>
  562. </div>
  563. <div class="admonition warning">
  564. <p class="admonition-title">법적 면책 조항</p>
  565. <p>코드베이스의 불법적인 사용에 대해 당사는 어떠한 책임도 지지 않습니다. DMCA 및 기타 관련 법률에 관한 현지 규정을 참조하십시오.</p>
  566. </div>
  567. <h2 id="_1">빠른 시작</h2>
  568. <h3 id="_2">문서로 바로 시작하기</h3>
  569. <p>Fish Audio S2 공식 문서입니다. 아래 링크에서 바로 시작할 수 있습니다.</p>
  570. <ul>
  571. <li><a href="https://speech.fish.audio/ko/install/">설치</a></li>
  572. <li><a href="https://speech.fish.audio/ko/inference/">커맨드라인 추론</a></li>
  573. <li><a href="https://speech.fish.audio/ko/inference/">WebUI 추론</a></li>
  574. <li><a href="https://speech.fish.audio/ko/server/">서버 추론</a></li>
  575. <li><a href="https://speech.fish.audio/ko/install/">Docker 설정</a></li>
  576. </ul>
  577. <blockquote>
  578. <p>[!IMPORTANT]
  579. <strong>SGLang 서버는 <a href="https://github.com/sgl-project/sglang-omni/blob/main/sglang_omni/models/fishaudio_s2_pro/README.md">SGLang-Omni README</a>를 참고하세요.</strong></p>
  580. </blockquote>
  581. <h3 id="llm-agent">LLM Agent 가이드</h3>
  582. <div class="language-text highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a>https://speech.fish.audio/ko/install/ 문서를 따라 Fish Audio S2를 설치하고 구성하세요.
  583. </span></code></pre></div>
  584. <h2 id="fish-audio-s2">Fish Audio S2</h2>
  585. <p><strong>오픈 소스와 클로즈드 소스 모두에서 가장 뛰어난 텍스트 음성 변환 시스템</strong></p>
  586. <p>Fish Audio S2는 <a href="https://fish.audio/">Fish Audio</a>가 개발한 최신 모델입니다. 약 50개 언어, 1,000만 시간 이상의 오디오 데이터로 학습되었고, 강화학습 정렬과 Dual-Autoregressive 아키텍처를 결합해 자연스럽고 사실적이며 감정 표현이 풍부한 음성을 생성합니다.</p>
  587. <p>S2는 <code>[laugh]</code>, <code>[whispers]</code>, <code>[super happy]</code> 같은 자연어 태그를 사용해 운율과 감정을 문장 내부에서 세밀하게 제어할 수 있으며, 멀티 화자/멀티 턴 생성도 네이티브로 지원합니다.</p>
  588. <p>실시간 데모는 <a href="https://fish.audio/">Fish Audio 웹사이트</a>에서, 자세한 내용은 <a href="https://fish.audio/blog/fish-audio-open-sources-s2/">블로그 글</a>과 <a href="https://arxiv.org/abs/2603.08823">기술 보고서</a>에서 확인할 수 있습니다.</p>
  589. <h3 id="_3">모델 변형</h3>
  590. <table>
  591. <thead>
  592. <tr>
  593. <th>모델</th>
  594. <th>크기</th>
  595. <th>가용성</th>
  596. <th>설명</th>
  597. </tr>
  598. </thead>
  599. <tbody>
  600. <tr>
  601. <td>S2-Pro</td>
  602. <td>4B 매개변수</td>
  603. <td><a href="https://huggingface.co/fishaudio/s2-pro">HuggingFace</a></td>
  604. <td>최고 수준의 품질과 안정성을 제공하는 풀기능 플래그십 모델</td>
  605. </tr>
  606. </tbody>
  607. </table>
  608. <p>모델 상세는 <a href="https://arxiv.org/abs/2411.01156">기술 보고서</a>를 참고하세요.</p>
  609. <h2 id="_4">벤치마크 결과</h2>
  610. <table>
  611. <thead>
  612. <tr>
  613. <th>벤치마크</th>
  614. <th>Fish Audio S2</th>
  615. </tr>
  616. </thead>
  617. <tbody>
  618. <tr>
  619. <td>Seed-TTS Eval — WER (중국어)</td>
  620. <td><strong>0.54%</strong> (전체 최고)</td>
  621. </tr>
  622. <tr>
  623. <td>Seed-TTS Eval — WER (영어)</td>
  624. <td><strong>0.99%</strong> (전체 최고)</td>
  625. </tr>
  626. <tr>
  627. <td>Audio Turing Test (지시 포함)</td>
  628. <td><strong>0.515</strong> 사후 평균</td>
  629. </tr>
  630. <tr>
  631. <td>EmergentTTS-Eval — 승률</td>
  632. <td><strong>81.88%</strong> (전체 최고)</td>
  633. </tr>
  634. <tr>
  635. <td>Fish Instruction Benchmark — TAR</td>
  636. <td><strong>93.3%</strong></td>
  637. </tr>
  638. <tr>
  639. <td>Fish Instruction Benchmark — 품질</td>
  640. <td><strong>4.51 / 5.0</strong></td>
  641. </tr>
  642. <tr>
  643. <td>다국어 (MiniMax Testset) — 최고 WER</td>
  644. <td><strong>24개 언어 중 11개</strong></td>
  645. </tr>
  646. <tr>
  647. <td>다국어 (MiniMax Testset) — 최고 SIM</td>
  648. <td><strong>24개 언어 중 17개</strong></td>
  649. </tr>
  650. </tbody>
  651. </table>
  652. <p>Seed-TTS Eval에서 S2는 클로즈드 소스 시스템을 포함한 전체 비교 모델 중 가장 낮은 WER를 기록했습니다: Qwen3-TTS (0.77/1.24), MiniMax Speech-02 (0.99/1.90), Seed-TTS (1.12/2.25). Audio Turing Test에서는 0.515를 기록해 Seed-TTS (0.417) 대비 24%, MiniMax-Speech (0.387) 대비 33% 높았습니다. EmergentTTS-Eval에서는 파라언어 표현(91.61%), 의문문(84.41%), 구문 복잡도(83.39%)에서 특히 강한 성능을 보였습니다.</p>
  653. <h2 id="_5">주요 특징</h2>
  654. <p><img src="../assets/totalability.png" width=200%></p>
  655. <h3 id="_6">자연어 기반 세밀한 인라인 제어</h3>
  656. <p>Fish Audio S2는 텍스트의 특정 단어 또는 구문 위치에 자연어 지시를 직접 삽입해 음성 생성을 국소적으로 제어할 수 있습니다. 고정된 사전 정의 태그에 의존하는 대신, S2는 [whisper in small voice], [professional broadcast tone], [pitch up] 같은 자유 형식 텍스트 설명을 받아 단어 수준의 개방형 표현 제어를 지원합니다.</p>
  657. <h3 id="dual-autoregressive">Dual-Autoregressive 아키텍처</h3>
  658. <p>S2는 decoder-only Transformer와 RVQ 기반 오디오 코덱(10 codebooks, 약 21 Hz 프레임레이트)을 결합합니다. Dual-AR은 생성 과정을 두 단계로 나눕니다.</p>
  659. <ul>
  660. <li><strong>Slow AR</strong>: 시간축을 따라 동작하며 주 semantic codebook을 예측</li>
  661. <li><strong>Fast AR</strong>: 각 시점에서 나머지 9개 residual codebook을 생성해 세밀한 음향 디테일을 복원</li>
  662. </ul>
  663. <p>이 비대칭 설계(시간축 4B 파라미터, 깊이축 400M 파라미터)는 음질을 유지하면서 추론 효율을 높입니다.</p>
  664. <h3 id="_7">강화학습 정렬</h3>
  665. <p>S2는 후학습 정렬을 위해 Group Relative Policy Optimization(GRPO)을 사용합니다. 학습 데이터 필터링/라벨링에 쓰인 동일한 모델을 RL 보상 모델로 재사용해, 사전학습 데이터 분포와 후학습 목표 간의 분포 불일치를 줄였습니다. 보상 신호는 의미 정확도, 지시 준수도, 음향 선호 점수, 음색 유사도를 함께 반영합니다.</p>
  666. <h3 id="sglang">SGLang 기반 프로덕션 스트리밍</h3>
  667. <p>Dual-AR 구조는 표준 자기회귀 LLM과 구조적으로 동형이기 때문에, S2는 SGLang의 LLM 서빙 최적화를 그대로 활용합니다. 예: continuous batching, paged KV cache, CUDA graph replay, RadixAttention 기반 prefix caching.</p>
  668. <p>NVIDIA H200 단일 GPU 기준:</p>
  669. <ul>
  670. <li><strong>실시간 계수(RTF):</strong> 0.195</li>
  671. <li><strong>첫 오디오 출력까지 시간:</strong> 약 100 ms</li>
  672. <li><strong>처리량:</strong> RTF 0.5 미만 유지 시 3,000+ acoustic tokens/s</li>
  673. </ul>
  674. <h3 id="_8">다국어 지원</h3>
  675. <p>Fish Audio S2는 음소나 언어별 전처리 없이 고품질 다국어 텍스트 음성 변환을 지원합니다. 포함 사항:</p>
  676. <p><strong>영어, 중국어, 일본어, 한국어, 아랍어, 독일어, 프랑스어...</strong></p>
  677. <p><strong>그리고 더 많이!</strong></p>
  678. <p>목록은 계속 확장되고 있습니다. 최신 릴리스는 <a href="https://fish.audio/">Fish Audio</a>를 확인하세요.</p>
  679. <h3 id="_9">네이티브 멀티 화자 생성</h3>
  680. <p><img src="../assets/chattemplate.png" width=200%></p>
  681. <p>Fish Audio S2는 사용자가 여러 화자가 포함된 참조 오디오를 업로드할 수 있도록 하며, 모델은 <code>&lt;|speaker:i|&gt;</code> 토큰을 통해 각 화자의 특징을 처리합니다. 그런 다음 화자 ID 토큰으로 모델의 성능을 제어하여 한 번의 생성으로 여러 화자를 포함할 수 있습니다. 이전처럼 각 화자마다 별도로 참조 오디오를 업로드하고 음성을 생성할 필요가 없습니다.</p>
  682. <h3 id="_10">멀티 턴 대화 생성</h3>
  683. <p>모델 컨텍스트의 확장 덕분에 이제 이전 정보를 활용하여 후속 생성 콘텐츠의 표현력을 높이고 콘텐츠의 자연스러움을 향상시킬 수 있습니다.</p>
  684. <h3 id="_11">빠른 음성 복제</h3>
  685. <p>Fish Audio S2는 짧은 참조 샘플(일반적으로 10-30초)을 사용하여 정확한 음성 복제를 지원합니다. 모델은 음색, 말하기 스타일 및 감정적 경향을 캡처하여 추가 미세 조정 없이 사실적이고 일관된 복제 음성을 생성합니다.
  686. SGLang 서버 사용은 <a href="https://github.com/sgl-project/sglang-omni/blob/main/sglang_omni/models/fishaudio_s2_pro/README.md">SGLang-Omni README</a> 를 참고하세요.</p>
  687. <hr />
  688. <h2 id="_12">크레딧</h2>
  689. <ul>
  690. <li><a href="https://github.com/daniilrobnikov/vits2">VITS2 (daniilrobnikov)</a></li>
  691. <li><a href="https://github.com/fishaudio/Bert-VITS2">Bert-VITS2</a></li>
  692. <li><a href="https://github.com/innnky/gpt-vits">GPT VITS</a></li>
  693. <li><a href="https://github.com/b04901014/MQTTS">MQTTS</a></li>
  694. <li><a href="https://github.com/pytorch-labs/gpt-fast">GPT Fast</a></li>
  695. <li><a href="https://github.com/RVC-Boss/GPT-SoVITS">GPT-SoVITS</a></li>
  696. <li><a href="https://github.com/QwenLM/Qwen3">Qwen3</a></li>
  697. </ul>
  698. <h2 id="_13">기술 보고서</h2>
  699. <div class="language-bibtex highlight"><pre><span></span><code><span id="__span-1-1"><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a><span class="nc">@misc</span><span class="p">{</span><span class="nl">fish-speech-v1.4</span><span class="p">,</span>
  700. </span><span id="__span-1-2"><a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a><span class="w"> </span><span class="na">title</span><span class="p">=</span><span class="s">{Fish-Speech: Leveraging Large Language Models for Advanced Multilingual Text-to-Speech Synthesis}</span><span class="p">,</span>
  701. </span><span id="__span-1-3"><a id="__codelineno-1-3" name="__codelineno-1-3" href="#__codelineno-1-3"></a><span class="w"> </span><span class="na">author</span><span class="p">=</span><span class="s">{Shijia Liao and Yuxuan Wang and Tianyu Li and Yifan Cheng and Ruoyi Zhang and Rongzhi Zhou and Yijin Xing}</span><span class="p">,</span>
  702. </span><span id="__span-1-4"><a id="__codelineno-1-4" name="__codelineno-1-4" href="#__codelineno-1-4"></a><span class="w"> </span><span class="na">year</span><span class="p">=</span><span class="s">{2024}</span><span class="p">,</span>
  703. </span><span id="__span-1-5"><a id="__codelineno-1-5" name="__codelineno-1-5" href="#__codelineno-1-5"></a><span class="w"> </span><span class="na">eprint</span><span class="p">=</span><span class="s">{2411.01156}</span><span class="p">,</span>
  704. </span><span id="__span-1-6"><a id="__codelineno-1-6" name="__codelineno-1-6" href="#__codelineno-1-6"></a><span class="w"> </span><span class="na">archivePrefix</span><span class="p">=</span><span class="s">{arXiv}</span><span class="p">,</span>
  705. </span><span id="__span-1-7"><a id="__codelineno-1-7" name="__codelineno-1-7" href="#__codelineno-1-7"></a><span class="w"> </span><span class="na">primaryClass</span><span class="p">=</span><span class="s">{cs.SD}</span><span class="p">,</span>
  706. </span><span id="__span-1-8"><a id="__codelineno-1-8" name="__codelineno-1-8" href="#__codelineno-1-8"></a><span class="w"> </span><span class="na">url</span><span class="p">=</span><span class="s">{https://arxiv.org/abs/2411.01156}</span><span class="p">,</span>
  707. </span><span id="__span-1-9"><a id="__codelineno-1-9" name="__codelineno-1-9" href="#__codelineno-1-9"></a><span class="p">}</span>
  708. </span><span id="__span-1-10"><a id="__codelineno-1-10" name="__codelineno-1-10" href="#__codelineno-1-10"></a>
  709. </span><span id="__span-1-11"><a id="__codelineno-1-11" name="__codelineno-1-11" href="#__codelineno-1-11"></a><span class="nc">@misc</span><span class="p">{</span><span class="nl">liao2026fishaudios2technical</span><span class="p">,</span>
  710. </span><span id="__span-1-12"><a id="__codelineno-1-12" name="__codelineno-1-12" href="#__codelineno-1-12"></a><span class="w"> </span><span class="na">title</span><span class="p">=</span><span class="s">{Fish Audio S2 Technical Report}</span><span class="p">,</span><span class="w"> </span>
  711. </span><span id="__span-1-13"><a id="__codelineno-1-13" name="__codelineno-1-13" href="#__codelineno-1-13"></a><span class="w"> </span><span class="na">author</span><span class="p">=</span><span class="s">{Shijia Liao and Yuxuan Wang and Songting Liu and Yifan Cheng and Ruoyi Zhang and Tianyu Li and Shidong Li and Yisheng Zheng and Xingwei Liu and Qingzheng Wang and Zhizhuo Zhou and Jiahua Liu and Xin Chen and Dawei Han}</span><span class="p">,</span>
  712. </span><span id="__span-1-14"><a id="__codelineno-1-14" name="__codelineno-1-14" href="#__codelineno-1-14"></a><span class="w"> </span><span class="na">year</span><span class="p">=</span><span class="s">{2026}</span><span class="p">,</span>
  713. </span><span id="__span-1-15"><a id="__codelineno-1-15" name="__codelineno-1-15" href="#__codelineno-1-15"></a><span class="w"> </span><span class="na">eprint</span><span class="p">=</span><span class="s">{2603.08823}</span><span class="p">,</span>
  714. </span><span id="__span-1-16"><a id="__codelineno-1-16" name="__codelineno-1-16" href="#__codelineno-1-16"></a><span class="w"> </span><span class="na">archivePrefix</span><span class="p">=</span><span class="s">{arXiv}</span><span class="p">,</span>
  715. </span><span id="__span-1-17"><a id="__codelineno-1-17" name="__codelineno-1-17" href="#__codelineno-1-17"></a><span class="w"> </span><span class="na">primaryClass</span><span class="p">=</span><span class="s">{cs.SD}</span><span class="p">,</span>
  716. </span><span id="__span-1-18"><a id="__codelineno-1-18" name="__codelineno-1-18" href="#__codelineno-1-18"></a><span class="w"> </span><span class="na">url</span><span class="p">=</span><span class="s">{https://arxiv.org/abs/2603.08823}</span><span class="p">,</span><span class="w"> </span>
  717. </span><span id="__span-1-19"><a id="__codelineno-1-19" name="__codelineno-1-19" href="#__codelineno-1-19"></a><span class="p">}</span>
  718. </span></code></pre></div>
  719. </article>
  720. </div>
  721. <script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
  722. </div>
  723. </main>
  724. <footer class="md-footer">
  725. <nav class="md-footer__inner md-grid" aria-label="하단/푸터" >
  726. <a href="install/" class="md-footer__link md-footer__link--next" aria-label="다음: 설치">
  727. <div class="md-footer__title">
  728. <span class="md-footer__direction">
  729. 다음
  730. </span>
  731. <div class="md-ellipsis">
  732. 설치
  733. </div>
  734. </div>
  735. <div class="md-footer__button md-icon">
  736. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11z"/></svg>
  737. </div>
  738. </a>
  739. </nav>
  740. <div class="md-footer-meta md-typeset">
  741. <div class="md-footer-meta__inner md-grid">
  742. <div class="md-copyright">
  743. <div class="md-copyright__highlight">
  744. Copyright &copy; 2023-2025 by Fish Audio
  745. </div>
  746. Made with
  747. <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
  748. Material for MkDocs
  749. </a>
  750. </div>
  751. <div class="md-social">
  752. <a href="https://discord.gg/Es5qTB9BcN" target="_blank" rel="noopener" title="discord.gg" class="md-social__link">
  753. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M492.5 69.8c-.2-.3-.4-.6-.8-.7-38.1-17.5-78.4-30-119.7-37.1-.4-.1-.8 0-1.1.1s-.6.4-.8.8c-5.5 9.9-10.5 20.2-14.9 30.6-44.6-6.8-89.9-6.8-134.4 0-4.5-10.5-9.5-20.7-15.1-30.6-.2-.3-.5-.6-.8-.8s-.7-.2-1.1-.2C162.5 39 122.2 51.5 84.1 69c-.3.1-.6.4-.8.7C7.1 183.5-13.8 294.6-3.6 404.2c0 .3.1.5.2.8s.3.4.5.6c44.4 32.9 94 58 146.8 74.2.4.1.8.1 1.1 0s.7-.4.9-.7c11.3-15.4 21.4-31.8 30-48.8.1-.2.2-.5.2-.8s0-.5-.1-.8-.2-.5-.4-.6-.4-.3-.7-.4c-15.8-6.1-31.2-13.4-45.9-21.9-.3-.2-.5-.4-.7-.6s-.3-.6-.3-.9 0-.6.2-.9.3-.5.6-.7c3.1-2.3 6.2-4.7 9.1-7.1.3-.2.6-.4.9-.4s.7 0 1 .1c96.2 43.9 200.4 43.9 295.5 0 .3-.1.7-.2 1-.2s.7.2.9.4c2.9 2.4 6 4.9 9.1 7.2.2.2.4.4.6.7s.2.6.2.9-.1.6-.3.9-.4.5-.6.6c-14.7 8.6-30 15.9-45.9 21.8-.2.1-.5.2-.7.4s-.3.4-.4.7-.1.5-.1.8.1.5.2.8c8.8 17 18.8 33.3 30 48.8.2.3.6.6.9.7s.8.1 1.1 0c52.9-16.2 102.6-41.3 147.1-74.2.2-.2.4-.4.5-.6s.2-.5.2-.8c12.3-126.8-20.5-236.9-86.9-334.5zm-302 267.7c-29 0-52.8-26.6-52.8-59.2s23.4-59.2 52.8-59.2c29.7 0 53.3 26.8 52.8 59.2 0 32.7-23.4 59.2-52.8 59.2m195.4 0c-29 0-52.8-26.6-52.8-59.2s23.4-59.2 52.8-59.2c29.7 0 53.3 26.8 52.8 59.2 0 32.7-23.2 59.2-52.8 59.2"/></svg>
  754. </a>
  755. <a href="https://hub.docker.com/r/fishaudio/fish-speech" target="_blank" rel="noopener" title="hub.docker.com" class="md-social__link">
  756. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 640 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M349.9 236.3h-66.1v-59.4h66.1zm0-204.3h-66.1v60.7h66.1zm78.2 144.8H362v59.4h66.1zm-156.3-72.1h-66.1v60.1h66.1zm78.1 0h-66.1v60.1h66.1zm276.8 100c-14.4-9.7-47.6-13.2-73.1-8.4-3.3-24-16.7-44.9-41.1-63.7l-14-9.3-9.3 14c-18.4 27.8-23.4 73.6-3.7 103.8-8.7 4.7-25.8 11.1-48.4 10.7H2.4c-8.7 50.8 5.8 116.8 44 162.1 37.1 43.9 92.7 66.2 165.4 66.2 157.4 0 273.9-72.5 328.4-204.2 21.4.4 67.6.1 91.3-45.2 1.5-2.5 6.6-13.2 8.5-17.1zm-511.1-27.9h-66v59.4h66.1v-59.4zm78.1 0h-66.1v59.4h66.1zm78.1 0h-66.1v59.4h66.1zm-78.1-72.1h-66.1v60.1h66.1z"/></svg>
  757. </a>
  758. <a href="http://qm.qq.com/cgi-bin/qm/qr?_wv=1027&k=jCKlUP7QgSm9kh95UlBoYv6s1I-Apl1M&authKey=xI5ttVAp3do68IpEYEalwXSYZFdfxZSkah%2BctF5FIMyN2NqAa003vFtLqJyAVRfF&noverify=0&group_code=593946093" target="_blank" rel="noopener" title="qm.qq.com" class="md-social__link">
  759. <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M434.1 420.4c-11.5 1.4-44.9-52.7-44.9-52.7 0 31.3-16.1 72.2-51 101.8 16.8 5.2 54.8 19.2 45.8 34.4-7.3 12.3-125.5 7.9-159.6 4-34.1 3.8-152.3 8.3-159.6-4-9-15.2 28.9-29.2 45.8-34.4-34.9-29.5-51.1-70.4-51.1-101.8 0 0-33.3 54.1-44.9 52.7-5.4-.6-12.4-29.6 9.3-99.7 10.3-33 22-60.5 40.1-105.8C60.9 98 109.2-.1 224.3-.1 338-.1 387.5 96 384.6 214.9c18.1 45.2 29.9 72.9 40.1 105.8 21.8 70.1 14.7 99.1 9.3 99.7z"/></svg>
  760. </a>
  761. </div>
  762. </div>
  763. </div>
  764. </footer>
  765. </div>
  766. <div class="md-dialog" data-md-component="dialog">
  767. <div class="md-dialog__inner md-typeset"></div>
  768. </div>
  769. <script id="__config" type="application/json">{"annotate": null, "base": "..", "features": ["content.action.edit", "content.action.view", "navigation.tracking", "navigation.footer", "search", "search.suggest", "search.highlight", "search.share", "content.code.copy"], "search": "../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "\ud074\ub9bd\ubcf4\ub4dc\uc5d0 \ubcf5\uc0ac\ub428", "clipboard.copy": "\ud074\ub9bd\ubcf4\ub4dc\ub85c \ubcf5\uc0ac", "search.result.more.one": "\uc774 \ubb38\uc11c\uc5d0\uc11c 1\uac1c\uc758 \uac80\uc0c9 \uacb0\uacfc \ub354 \ubcf4\uae30", "search.result.more.other": "\uc774 \ubb38\uc11c\uc5d0\uc11c #\uac1c\uc758 \uac80\uc0c9 \uacb0\uacfc \ub354 \ubcf4\uae30", "search.result.none": "\uac80\uc0c9\uc5b4\uc640 \uc77c\uce58\ud558\ub294 \ubb38\uc11c\uac00 \uc5c6\uc2b5\ub2c8\ub2e4", "search.result.one": "1\uac1c\uc758 \uc77c\uce58\ud558\ub294 \ubb38\uc11c", "search.result.other": "#\uac1c\uc758 \uc77c\uce58\ud558\ub294 \ubb38\uc11c", "search.result.placeholder": "\uac80\uc0c9\uc5b4\ub97c \uc785\ub825\ud558\uc138\uc694", "search.result.term.missing": "\ud3ec\ud568\ub418\uc9c0 \uc54a\uc740 \uac80\uc0c9\uc5b4", "select.version": "\ubc84\uc804 \uc120\ud0dd"}, "version": null}</script>
  770. <script src="../assets/javascripts/bundle.79ae519e.min.js"></script>
  771. </body>
  772. </html>