yin.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. # adapted from https://github.com/patriceguyot/Yin
  2. import numpy as np
  3. def differenceFunction(x, N, tau_max):
  4. """
  5. Compute difference function of data x. This corresponds to equation (6) in [1]
  6. This solution is implemented directly with Numpy fft.
  7. :param x: audio data
  8. :param N: length of data
  9. :param tau_max: integration window size
  10. :return: difference function
  11. :rtype: list
  12. """
  13. x = np.array(x, np.float64)
  14. w = x.size
  15. tau_max = min(tau_max, w)
  16. x_cumsum = np.concatenate((np.array([0.]), (x * x).cumsum()))
  17. size = w + tau_max
  18. p2 = (size // 32).bit_length()
  19. nice_numbers = (16, 18, 20, 24, 25, 27, 30, 32)
  20. size_pad = min(x * 2 ** p2 for x in nice_numbers if x * 2 ** p2 >= size)
  21. fc = np.fft.rfft(x, size_pad)
  22. conv = np.fft.irfft(fc * fc.conjugate())[:tau_max]
  23. return x_cumsum[w:w - tau_max:-1] + x_cumsum[w] - x_cumsum[:tau_max] - 2 * conv
  24. def cumulativeMeanNormalizedDifferenceFunction(df, N):
  25. """
  26. Compute cumulative mean normalized difference function (CMND).
  27. This corresponds to equation (8) in [1]
  28. :param df: Difference function
  29. :param N: length of data
  30. :return: cumulative mean normalized difference function
  31. :rtype: list
  32. """
  33. cmndf = df[1:] * range(1, N) / np.cumsum(df[1:]).astype(float) #scipy method
  34. return np.insert(cmndf, 0, 1)
  35. def getPitch(cmdf, tau_min, tau_max, harmo_th=0.1):
  36. """
  37. Return fundamental period of a frame based on CMND function.
  38. :param cmdf: Cumulative Mean Normalized Difference function
  39. :param tau_min: minimum period for speech
  40. :param tau_max: maximum period for speech
  41. :param harmo_th: harmonicity threshold to determine if it is necessary to compute pitch frequency
  42. :return: fundamental period if there is values under threshold, 0 otherwise
  43. :rtype: float
  44. """
  45. tau = tau_min
  46. while tau < tau_max:
  47. if cmdf[tau] < harmo_th:
  48. while tau + 1 < tau_max and cmdf[tau + 1] < cmdf[tau]:
  49. tau += 1
  50. return tau
  51. tau += 1
  52. return 0 # if unvoiced
  53. def compute_yin(sig, sr, w_len=512, w_step=256, f0_min=100, f0_max=500,
  54. harmo_thresh=0.1):
  55. """
  56. Compute the Yin Algorithm. Return fundamental frequency and harmonic rate.
  57. :param sig: Audio signal (list of float)
  58. :param sr: sampling rate (int)
  59. :param w_len: size of the analysis window (samples)
  60. :param w_step: size of the lag between two consecutives windows (samples)
  61. :param f0_min: Minimum fundamental frequency that can be detected (hertz)
  62. :param f0_max: Maximum fundamental frequency that can be detected (hertz)
  63. :param harmo_tresh: Threshold of detection. The yalgorithmù return the first minimum of the CMND function below this treshold.
  64. :returns:
  65. * pitches: list of fundamental frequencies,
  66. * harmonic_rates: list of harmonic rate values for each fundamental frequency value (= confidence value)
  67. * argmins: minimums of the Cumulative Mean Normalized DifferenceFunction
  68. * times: list of time of each estimation
  69. :rtype: tuple
  70. """
  71. tau_min = int(sr / f0_max)
  72. tau_max = int(sr / f0_min)
  73. timeScale = range(0, len(sig) - w_len, w_step) # time values for each analysis window
  74. times = [t/float(sr) for t in timeScale]
  75. frames = [sig[t:t + w_len] for t in timeScale]
  76. pitches = [0.0] * len(timeScale)
  77. harmonic_rates = [0.0] * len(timeScale)
  78. argmins = [0.0] * len(timeScale)
  79. for i, frame in enumerate(frames):
  80. # Compute YIN
  81. df = differenceFunction(frame, w_len, tau_max)
  82. cmdf = cumulativeMeanNormalizedDifferenceFunction(df, tau_max)
  83. p = getPitch(cmdf, tau_min, tau_max, harmo_thresh)
  84. # Get results
  85. if np.argmin(cmdf) > tau_min:
  86. argmins[i] = float(sr / np.argmin(cmdf))
  87. if p != 0: # A pitch was found
  88. pitches[i] = float(sr / p)
  89. harmonic_rates[i] = cmdf[p]
  90. else: # No pitch, but we compute a value of the harmonic rate
  91. harmonic_rates[i] = min(cmdf)
  92. return pitches, harmonic_rates, argmins, times