process_feature.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. features_name_list = [
  2. 'dt',
  3. 'videoid',
  4. 'day1viewcount', # 1/3/7/14/30/60日内曝光
  5. 'day3viewcount',
  6. 'day7viewcount',
  7. 'day14viewcount',
  8. # 'day30viewcount',
  9. # 'day60viewcount',
  10. 'day1playcount', # 1/3/7/14/30/60日内播放
  11. 'day3playcount',
  12. 'day7playcount',
  13. 'day14playcount',
  14. # 'day30playcount',
  15. # 'day60playcount',
  16. 'day1sharecount', # 1/3/7/14/30/60日内分享,一层回流
  17. 'day3sharecount',
  18. 'day7sharecount',
  19. 'day14sharecount',
  20. # 'day30sharecount',
  21. # 'day60sharecount',
  22. 'day1returncount', # 一层回流
  23. 'day3returncount',
  24. 'day7returncount',
  25. 'day14returncount',
  26. # 'day30returncount',
  27. # 'day60returncount',
  28. 'videocategory11',
  29. 'videocategory12',
  30. 'videocategory45',
  31. 'videocategory49',
  32. 'videocategory1',
  33. 'videocategory2',
  34. 'videocategory3',
  35. 'videocategory4',
  36. 'videocategory5',
  37. 'videocategory6',
  38. 'videocategory7',
  39. 'videocategory8',
  40. 'videocategory9',
  41. 'videocategory85',
  42. 'videocategory10',
  43. 'videocategory555',
  44. 'usercategory1',
  45. 'usercategory2',
  46. 'usercategory3',
  47. 'usercategory4',
  48. 'usercategory5',
  49. 'usercategory6',
  50. 'usercategory7',
  51. 'usercategory8',
  52. 'usercategory9',
  53. 'usercategory10',
  54. 'usercategory11',
  55. 'usercategory12',
  56. 'usercategory45',
  57. 'usercategory49',
  58. 'usercategory85',
  59. 'usercategory555',
  60. 'todyviewcount', # 5日曝光
  61. 'day5returncount_1_stage',
  62. 'day5returncount_2_stage',
  63. 'day5returncount_3_stage',
  64. 'day5returncount_4_stage',
  65. 'stage_one_retrn', # 首页一层回流
  66. 'stage_two_retrn',
  67. 'stage_three_retrn',
  68. 'stage_four_retrn']
  69. add_feature = [
  70. 'all_return_day1_return_count', # -- 1/3/7/14日内总回流 #12
  71. 'all_return_day3_return_count',
  72. 'all_return_day7_return_count',
  73. 'all_return_day14_return_count',
  74. 'three_return_day1_return_count', # -- 1/3/7/14日内前三层回流 #14
  75. 'three_return_day3_return_count',
  76. 'three_return_day7_return_count',
  77. 'three_return_day14_return_count',
  78. 'four_up_return_day1_return_count', # -- 1/3/7/14日内四+层回流 #15
  79. 'four_up_return_day3_return_count',
  80. 'four_up_return_day7_return_count',
  81. 'four_up_return_day14_return_count',
  82. 'one_return_day1_return_count', # -- 1/3/7/14日内一层回流 #13
  83. 'one_return_day3_return_count',
  84. 'one_return_day7_return_count',
  85. 'one_return_day14_return_count',
  86. 'four_up_return_div_three_return_day1', # -- 1/3/7/14日内四+层回流/前三层回流 #23
  87. 'four_up_return_div_three_return_day3',
  88. 'four_up_return_div_three_return_day7',
  89. 'four_up_return_div_three_return_day14',
  90. 'all_return_day1_view_day1_return_count', # -- 1/3/7/14日内曝光在1/3/7/14日内回流 #8
  91. 'all_return_day3_view_day3_return_count',
  92. 'all_return_day7_view_day7_return_count',
  93. 'all_return_day14_view_day14_return_count',
  94. 'three_return_day1_view_day1_return_count', # -- 1/3/7/14日内曝光在1/3/7/14日内前三层回流 #10
  95. 'three_return_day3_view_day3_return_count',
  96. 'three_return_day7_view_day7_return_count',
  97. 'three_return_day14_view_day14_return_count',
  98. 'four_up_return_day1_view_day1_return_count', # -- 1/3/7/14日内曝光在1/3/7/14日内四+层回流 # 11
  99. 'four_up_return_day3_view_day3_return_count',
  100. 'four_up_return_day7_view_day7_return_count',
  101. 'four_up_return_day14_view_day14_return_count',
  102. 'one_return_day1_view_day1_return_count', # -- 1/3/7/14日内曝光在1/3/7/14日内一层回流 #9
  103. 'one_return_day3_view_day3_return_count',
  104. 'one_return_day7_view_day7_return_count',
  105. 'one_return_day14_view_day14_return_count',
  106. 'all_return_day1_on_day1_return_count', # 前day1+1 / day1+3/day1+7/day1+14 到前 day1+1日内曝光在 day1的总回流 #16
  107. 'all_return_day3_on_day1_return_count',
  108. 'all_return_day7_on_day1_return_count',
  109. 'all_return_day14_on_day1_return_count',
  110. 'four_up_return_day1_view_day1_return_div_three_d1', # -- 1/3/7/14日内曝光在1/3/7/14日内四+层回流/前三层回流 #22
  111. 'four_up_return_day3_view_day3_return_div_three_d3',
  112. 'four_up_return_day7_view_day7_return_div_three_d7',
  113. 'four_up_return_day14_view_day14_return_div_three_d14',
  114. 'day1ctr', # -- 1/3/7/14/30/60日内播放/曝光 #17
  115. 'day3ctr',
  116. 'day7ctr',
  117. 'day14ctr',
  118. # 'day30ctr',
  119. # 'day60ctr',
  120. 'day1sov', # -- 1/3/7/14/30/60日内分享/曝光 #18
  121. 'day3sov',
  122. 'day7sov',
  123. 'day14sov',
  124. # 'day30sov',
  125. # 'day60sov',
  126. 'day1rov', # -- 1/3/7/14日内曝光的回流/曝光 #19
  127. 'day3rov',
  128. 'day7rov',
  129. 'day14rov',
  130. 'day1soc', # -- 1/3/7/14/30/60日内分享/播放 #20
  131. 'day3soc',
  132. 'day7soc',
  133. 'day14soc',
  134. # 'day30soc',
  135. # 'day60soc',
  136. 'day1roc', # -- 1/3/7/14日内曝光的回流/播放 #21
  137. 'day3roc',
  138. 'day7roc',
  139. 'day14roc',
  140. 'oneday_day1rov', # -- 1/3/7/14日内曝光在今日的回流/ 1/3/7/14日内曝光 #24
  141. 'oneday_day3rov',
  142. 'oneday_day7rov',
  143. 'oneday_day14rov',
  144. 'futre7dayreturn', # 预测目标,未来7天回流
  145. 'todyviewcount_rank',
  146. 'day1viewcount_rank'
  147. ]
  148. words = ['videotags', 'words_without_tags']
  149. features = features_name_list + add_feature + words