recycle_outside_account_articles.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856
  1. import time, json
  2. import traceback
  3. import urllib.parse
  4. from tqdm.asyncio import tqdm
  5. from .recycle_daily_publish_articles import UpdateRootSourceIdAndUpdateTimeTask
  6. from .recycle_daily_publish_articles import Const
  7. from applications.crawler.wechat import get_article_list_from_account
  8. from applications.crawler.wechat import get_article_detail
  9. from applications.pipeline import insert_outside_article_into_recycle_pool
  10. from applications.api import feishu_robot
  11. account_name_set = {
  12. "念念私语",
  13. "发现趣论奇闻",
  14. "一晴方春",
  15. "生活技巧悦读",
  16. "妙招百科享生活",
  17. "精选问候祝福寄语",
  18. "生活实用妙招收录",
  19. "生活妙计通",
  20. "逸事趣闻说",
  21. "天天一起跳广场舞",
  22. "零点生活志",
  23. "居家生活实录",
  24. "生活妙招实录",
  25. "生活妙招点子库",
  26. "生活百科巧事通",
  27. "早晨送你暖心祝福",
  28. "生活情感感悟",
  29. "家庭百科大全",
  30. "清晨早安温馨问候",
  31. "经典祝福语大全集",
  32. "日常技巧全书",
  33. "无忧生活锦囊",
  34. "家庭妙方实录",
  35. "生活技巧宝藏库",
  36. "妙招集锦全书",
  37. "乐享技巧馆",
  38. "暖心问候语录",
  39. "乐享技巧汇",
  40. "巧手理想家",
  41. "品质生活有妙招",
  42. "生活妙招万家通",
  43. "生活妙招大赏",
  44. "温暖祝福语大全",
  45. "好愿祝福语录",
  46. "精选日常祝福语",
  47. "邻家生活点滴",
  48. "创新生活妙招百科",
  49. "小技巧生活录",
  50. "妙生活家手记",
  51. "生活窍门事事通",
  52. "巧手技巧百科",
  53. "生活技巧小支招",
  54. "趣生活百科",
  55. "生活妙用技巧合集",
  56. "日常祝福语大全",
  57. "妙招达人养成记",
  58. "妙生活锦囊",
  59. "生活百科一点知",
  60. "生活妙计科普馆",
  61. "巧妈生活妙招助手",
  62. "小窍门生活录",
  63. "好生活点点通",
  64. "实用技巧馆",
  65. "生活情感肆读",
  66. "生活技巧小奥秘",
  67. "送温暖祝福精选",
  68. "生活妙招选集",
  69. "每日祝福语问候",
  70. "日常生活点子库",
  71. "每日好友祝福集",
  72. "节气祝福问候录",
  73. "趣味生活铺子",
  74. "居家生活妙招大全",
  75. "落日情绪屋",
  76. "邻家生活技巧",
  77. "每日精选祝福语录",
  78. "邻家生活有妙招",
  79. "妙招集合录",
  80. "多学生活技巧",
  81. "祝福问候手册",
  82. "点滴生活坊",
  83. "早安心语合集",
  84. "微情话语录",
  85. "邻家妙招知识宝典",
  86. "生活妙招好能手",
  87. "节庆祝福语大全",
  88. "精选早安祝福合集",
  89. "节日问候语大全",
  90. "居家生活妙招技巧",
  91. "精选早安问候语合集",
  92. "治愈情感宝典",
  93. "生活小妙知",
  94. "生活妙想指南",
  95. "技巧达人站",
  96. "幸福语录精选",
  97. "技巧生活手册",
  98. "早安祝福语集",
  99. "家居生活小窍门全集",
  100. "祝福问候大集锦",
  101. "乐活研究社",
  102. "实用生活365",
  103. "无忧生活小支招",
  104. "家有妙计百变通",
  105. "生活技巧智慧库",
  106. "点滴乐活小妙招",
  107. "早安问候精选祝福",
  108. "生活妙招巧思汇",
  109. "热门广场舞大全",
  110. "中老年妙招大全",
  111. "技巧百变生活馆",
  112. "生活巧手指南",
  113. "有妙招享生活",
  114. "巧手生活录",
  115. "生活技巧我知道",
  116. "每日早晚安祝福语录",
  117. "情暖祝福语录",
  118. "有趣生活妙招屋",
  119. "家居生活趣用",
  120. "日常生活妙招百科",
  121. "灵巧生活一点通",
  122. "拾遗情感铺",
  123. "广场歌舞热榜",
  124. "邻里妙方百科",
  125. "妙招技巧帮",
  126. "经典广场舞热榜",
  127. "早安祝福集大全",
  128. "祝福语每日送",
  129. "情感情报库",
  130. "生活妙招技巧汇",
  131. "实用妙招宝",
  132. "巧思收集录",
  133. "日用妙招点点通",
  134. "家庭技巧生活录",
  135. "每日早安祝福集",
  136. "清晨祝福合集",
  137. "技巧生活集萃",
  138. "生活妙思巧手集",
  139. "晨间送祝语",
  140. "便捷小妙招",
  141. "趣招一点通",
  142. "妙招收集馆",
  143. "情感慢读",
  144. "安心祝福集",
  145. "生活技巧点子库",
  146. "懂点技巧吧",
  147. "技巧能手妙招库",
  148. "事事妙招集锦",
  149. "祝福语合集",
  150. "技巧生活录",
  151. "生活妙思小帮手",
  152. "暖心祝福寄语录",
  153. "生活妙计百宝库",
  154. "每日祝福语选集",
  155. "巧手常识集锦",
  156. "日常祝福问候语",
  157. "日常问候心愿语录",
  158. "小窍门宝典",
  159. "情感阅读舍",
  160. "精选日常祝福",
  161. "生活小机智",
  162. "生活乐享君",
  163. "早上好心情祝福",
  164. "幸福小窍门",
  165. "绸缪情感铺",
  166. "晨间暖心语录",
  167. "生活妙方宝典",
  168. "生活妙招点通铺",
  169. "美好微祝福语录",
  170. "生活最有招",
  171. "祝福语温暖问候集锦",
  172. "节日问候温馨祝福",
  173. "每日幸福语录集锦",
  174. "生活巧招点子库",
  175. "生活情感课堂",
  176. "情感避风湾",
  177. "实用生活小方法",
  178. "什锦生活录",
  179. "晨间祝福语精选",
  180. "巧居生活妙招站",
  181. "巧知生活集",
  182. "日常妙招收集录",
  183. "技巧生活百知",
  184. "居家小妙术",
  185. "时光说情感",
  186. "好愿祝福温馨问候",
  187. "最美祝愿问候",
  188. "生活妙招乐享",
  189. "情绪解忧屋",
  190. "生活妙招空间站",
  191. "乐活技巧馆",
  192. "如意祝福选集",
  193. "日常技巧大讲堂",
  194. "三分钟技巧集",
  195. "情感生活百态",
  196. "早安问候语精选大全",
  197. "招招妙招百事通",
  198. "经典热门广场舞曲",
  199. "日常生活技能大全",
  200. "一点儿窍门馆",
  201. "邻家生活技巧合集",
  202. "每日送祝福手册",
  203. "早上好常用祝福问候",
  204. "妙招巧生活杂货铺",
  205. "生活全知通",
  206. "友友刷刷看",
  207. "退休也乐呵",
  208. "奇闻怪异集",
  209. "退休养老攻略",
  210. "竹边生活记",
  211. "诡秘奇闻记",
  212. "趣味生活简记",
  213. "生活墨记",
  214. "企退老人心声",
  215. "生活静语",
  216. "生活趣时光",
  217. "生活向暖",
  218. "老年退休小贴士",
  219. "退休乐时光",
  220. "生活趣谈会",
  221. "生活百科闲谈",
  222. "生活志记",
  223. "退休生活报告",
  224. "奇闻趣世界",
  225. "日常饮食百科",
  226. "趣享生活时光",
  227. "趣读奇闻汇",
  228. "生活点滴小栈",
  229. "浮光生活记",
  230. "生活解忧坊",
  231. "生活多彩时光",
  232. "退休生活那些事",
  233. "生活茶话集",
  234. "美好生活闲谈",
  235. "民间奇闻集",
  236. "生活百味记",
  237. "悠享生活指南",
  238. "杂谈异闻社",
  239. "饮食智慧屋",
  240. "漫谈奇闻社",
  241. "退休生活驿站",
  242. "幸福养身大全",
  243. "生活念记",
  244. "生活栖风",
  245. "生活沐暖",
  246. "生活絮事记",
  247. "生活集韵",
  248. "轻享生活记",
  249. "幽巷奇闻谈",
  250. "饮食匠心录",
  251. "生活畅聊集",
  252. "鉴赏奇闻集",
  253. "生活拾碎光",
  254. "企业退休杂谈",
  255. "智慧退休计划",
  256. "老年退休指南",
  257. "饮食创意坊",
  258. "趣闻奇谈汇",
  259. "生活漫读集",
  260. "生活趣事乐园",
  261. "慢品生活味",
  262. "企退老年乐园",
  263. "生活悠闲记",
  264. "生活饮食百科",
  265. "畅享饮食集",
  266. "幸福生活乐园",
  267. "退休知识宝典",
  268. "生活流年记",
  269. "生活知百味",
  270. "生活感悟集",
  271. "精致饮食指南",
  272. "生活知识宝典",
  273. "\\N",
  274. "沧桑时光生活",
  275. "自得美好生活",
  276. "生活暖暖舒心",
  277. "每日技巧集",
  278. "温暖问候精选",
  279. "百惠优生活",
  280. "生活爱问百科",
  281. "技巧便生活",
  282. "生活一点知",
  283. "精选祝福问候大全",
  284. "巧手生活碎片",
  285. "巧思知识库",
  286. "温暖祝福寄语",
  287. "智巧日常百宝库",
  288. "招招巧手汇",
  289. "早安祝福精选手册",
  290. "每日祝福寄语",
  291. "生活技能百科全知",
  292. "生活妙招窍门指南",
  293. "美好生活智慧录",
  294. "生活妙招小工匠",
  295. "幸知情感书房",
  296. "便捷生活通",
  297. "心享好生活",
  298. "祝福语录问候精选",
  299. "解忧生活铺",
  300. "家庭妙招站",
  301. "智享生活巧招",
  302. "早上好祝福问候心语",
  303. "技巧达人生活馆",
  304. "暖心幸福语集",
  305. "养身百科常谈",
  306. "省时省力小技巧",
  307. "妙招干货合集",
  308. "万事生活通",
  309. "每日精选祝福寄语",
  310. "祝福语正能量问候大全",
  311. "生活妙手多",
  312. "妙生活集市录",
  313. "生活无忧大全",
  314. "巧手生活技巧指南",
  315. "窍门百事通",
  316. "生活锦囊妙集",
  317. "技巧生活大全",
  318. "知著书局",
  319. "生活妙招百家录",
  320. "邻里生活妙招宝典",
  321. "技巧百变馆",
  322. "趣招技巧生活通",
  323. "家庭窍门实录",
  324. "快乐祝福语录集",
  325. "合家欢乐祝福问候",
  326. "日常家居技能",
  327. "好友祝福正能量语录",
  328. "百科巧事万事通",
  329. "巧手生活百科",
  330. "巧手生活体验馆",
  331. "每日祝福语早安语录",
  332. "情感经典说",
  333. "纷云说",
  334. "家庭妙招优选",
  335. "生活能手妙招",
  336. "居家生活实用小妙招",
  337. "巧生活妙招馆",
  338. "妙招干货铺",
  339. "速学妙招录",
  340. "科普生活小帮手",
  341. "祝福语热门精选",
  342. "美满祝福语录",
  343. "畅想生活招",
  344. "技巧百科巧招",
  345. "经典情感祝福语录",
  346. "耀舟实用妙招汇",
  347. "祝福心语选集",
  348. "节日问候语选集",
  349. "居家技能全书",
  350. "祝福贺词精选",
  351. "一起跳个广场舞",
  352. "小窍门大帮手",
  353. "精选问候语送祝福",
  354. "生活妙招收录馆",
  355. "家用妙招技巧集",
  356. "邻家生活小妙招",
  357. "温馨问候语集",
  358. "真心祝福暖心问候",
  359. "技巧实用生活馆",
  360. "实用生活妙招全录",
  361. "百科妙招一点通",
  362. "妙招百科集",
  363. "花好月圆吉祥祝福",
  364. "妙招知识通",
  365. "创意生活技巧集",
  366. "颜夕漫读",
  367. "美好祝福日常集锦",
  368. "邻里技能宝典",
  369. "早上好暖心祝福语录",
  370. "妙享生活妙招社",
  371. "微看情感好文",
  372. "节日祝福常用问候语录",
  373. "暖心祝愿语录",
  374. "乐活妙招小帮手",
  375. "友友过来看",
  376. "静好生活社",
  377. "漫读生活指南",
  378. "中老年饮食杂谈",
  379. "生活之百科",
  380. "生活微光志",
  381. "生活图鉴",
  382. "生活漫记簿",
  383. "奇闻秘传",
  384. "生活轻描记",
  385. "退休生活百科",
  386. "生活拾光机",
  387. "日常慧窍门",
  388. "奇闻汐语",
  389. "家庭饮食宝典",
  390. "分享建康知识",
  391. "老年健康生活",
  392. "异闻奇谈录",
  393. "江湖奇闻记",
  394. "烟火慢生活",
  395. "奇闻逸事阁",
  396. "饮食养身秘诀",
  397. "慢享生活记",
  398. "生活山河集",
  399. "养老微光录",
  400. "甄选生活册",
  401. "居家生活指南",
  402. "老年退休手册",
  403. "生活知趣多",
  404. "流年新生活",
  405. "林下思忆",
  406. "忆往深情",
  407. "生活百科小常识",
  408. "春耕秋实录",
  409. "生活技巧锦囊大全",
  410. "趣享生活社",
  411. "每日圆满祝福",
  412. "微读情感驿站",
  413. "巧手来当家",
  414. "祝福问候早安手册",
  415. "日用窍门大全",
  416. "365天早安祝福问候",
  417. "趣事说奇闻",
  418. "技巧百科说",
  419. "趣闻奇谈录",
  420. "生活能手宝典",
  421. "日日送福语",
  422. "小妙招锦囊",
  423. "幸福生活事事通",
  424. "生活巧思屋",
  425. "温情祝福合集",
  426. "日常家居小智慧",
  427. "祝福语暖心语录精选",
  428. "生活妙招日志",
  429. "生活妙招因子",
  430. "早晚安正能量问候大全",
  431. "优选早安问候语",
  432. "巧手生活知识馆",
  433. "礼貌问候祝福语",
  434. "日常生活小工匠",
  435. "百科技巧生活屋",
  436. "美好祝福问候语集",
  437. "幸福问候祝福馆",
  438. "早安祝福温馨问候",
  439. "优享生活妙招社",
  440. "生活集事通",
  441. "邻家妙招集合录",
  442. "情感碎语",
  443. "一招妙生活",
  444. "日常祝福语录合集",
  445. "明月观尘",
  446. "技巧百科生活屋",
  447. "奇妙知识生活库",
  448. "祝福早安每日集锦",
  449. "暖心问候心语",
  450. "生活常识技能馆",
  451. "每日一祝福精选",
  452. "小技巧百科馆",
  453. "生活妙招物语",
  454. "妙招百科全录",
  455. "日常窍门百事通",
  456. "祝福节日问候馆",
  457. "巧手新生活",
  458. "退休时光宝典",
  459. "快乐开心最重要",
  460. "生活浅酌",
  461. "退休日常百科",
  462. "记录农村日常",
  463. "退休常识宝典",
  464. "闲谈养老生活",
  465. "退休老年乐园",
  466. "精选退休大全",
  467. "饮食趣发现",
  468. "精选退休录",
  469. "奇闻实记",
  470. "退休谈生活",
  471. "每日建康小妙招",
  472. "诡夜奇闻录",
  473. "每日饮食推荐",
  474. "生活闲语",
  475. "天天饮食搭配",
  476. "镜像奇闻录",
  477. "饮食慢品",
  478. "老年饮食合集",
  479. "妙趣生活集",
  480. "生活锦记",
  481. "生活行语",
  482. "退休好岁月",
  483. "生活智行记",
  484. "饮食新风尚",
  485. "经典家常食谱",
  486. "优选生活笔记",
  487. "趣谈奇闻集",
  488. "落月情绵",
  489. "安然若似",
  490. "看生活有妙招",
  491. "云露华浓",
  492. "悦读时光书房",
  493. "生活百科妙招通",
  494. "俏生活有妙招",
  495. "实用生活妙招指南",
  496. "妙招集锦百宝箱",
  497. "情感避风溏",
  498. "岁月乐活集",
  499. "奇闻妙趣谈",
  500. "小客精选团",
  501. "邻家妙招巧事通",
  502. "趣味生活指南",
  503. "生活技巧百招馆",
  504. "小妙招大智慧",
  505. "幸福语录祝语",
  506. "实用窍门馆",
  507. "优生活妙招学习馆",
  508. "邻家妙招实用技巧",
  509. "技能生活小贴士",
  510. "静听情感语录",
  511. "家居技能秘籍",
  512. "温馨祝福语早上好",
  513. "精选节日祝福语录",
  514. "美满祝福大全",
  515. "家庭日用妙招",
  516. "祝福贴心语录",
  517. "老年金色岁月",
  518. "退休都来看",
  519. "快乐金秋生活",
  520. "人老天地宽",
  521. "奇闻怪谈志",
  522. "异度奇闻录",
  523. "退休悦读汇",
  524. "养老生活感悟",
  525. "退休岁月杂谈",
  526. "生活小窍门推荐",
  527. "家庭饮食百科",
  528. "老年生活食谱",
  529. "奇闻幻彩集",
  530. "趣说奇闻汇",
  531. "退休铭记",
  532. "退休生活小贴士",
  533. "感受退休生活",
  534. "生活微光闲语",
  535. "智慧妙招生活",
  536. "甄选生活社",
  537. "生活识百味",
  538. "奇闻趣集",
  539. "每日妙招共享",
  540. "鉴赏生活日记",
  541. "生活窍门50条",
  542. "奇事异闻录",
  543. "奇闻好看",
  544. "秘探奇闻录",
  545. "记录退休秘籍",
  546. "家庭食谱合集",
  547. "与心浮沉",
  548. "唯美微情感",
  549. "春月不惜",
  550. "心怡趣论奇闻",
  551. "老年生活有依",
  552. "别样新生活",
  553. "居家幸福老人",
  554. "暖心情感屋",
  555. "点滴妙招乐生活",
  556. "日常祝福温馨问候",
  557. "生活必备技巧集",
  558. "小妙招大用途",
  559. "会点生活小妙招",
  560. "生活百科常识库",
  561. "巧思巧手汇聚集",
  562. "深情驿站",
  563. "早安祝福问候精选",
  564. "一招一生活",
  565. "温馨问候祝福语录",
  566. "每日问候暖心语录",
  567. "奇闻趣谈社",
  568. "生活妙招技巧通",
  569. "无忧巧生活",
  570. "妙招百事帮",
  571. "一招一巧生活馆",
  572. "老年生活愉快",
  573. "养老生活讲堂",
  574. "农村知识铺",
  575. "居家常用秘籍",
  576. "品质退休生活",
  577. "万象奇闻录",
  578. "退休养老之路",
  579. "名厨美食推荐",
  580. "记录养老生活",
  581. "中老年退休知识",
  582. "实用退休知识",
  583. "生活辰光记",
  584. "极光奇闻",
  585. "奇闻放大镜",
  586. "退休生活讲堂",
  587. "奇事百闻录",
  588. "退休生活新视角",
  589. "生活色彩斑斓",
  590. "迎风好生活",
  591. "闲庭信步生活",
  592. "生活其乐融融",
  593. "雾里云淡",
  594. "雨凉思情",
  595. "矜柔人生",
  596. "生活智慧妙招姐",
  597. "圆满祝福精选",
  598. "温馨问候语集锦",
  599. "实用生活技能合集",
  600. "生活趣事笔记",
  601. "生活解语",
  602. "奇谈趣闻社",
  603. "奇人见闻",
  604. "退休生活好帮手",
  605. "奇闻集锦",
  606. "退休老年社群",
  607. "奇闻故事栈",
  608. "企退生活日记",
  609. "分享生活小常识",
  610. "中老年美食圈",
  611. "退休生活常识",
  612. "精选退休美文",
  613. "休闲养老生活",
  614. "养老退休指南",
  615. "安享退休事",
  616. "悠闲退休时光",
  617. "夜谈奇闻志",
  618. "南山遗梦",
  619. "奇特人生",
  620. "奇闻集合",
  621. }
  622. class RecycleOutsideAccountArticlesTask(Const):
  623. def __init__(self, pool, log_client, date_string):
  624. self.pool = pool
  625. self.log_client = log_client
  626. self.date_string = date_string
  627. async def get_outside_accounts(self):
  628. query = """
  629. select
  630. t2.group_source_name as account_source,
  631. t3.name as name,
  632. t3.gh_id as gh_id
  633. from wx_statistics_group_source t1
  634. join wx_statistics_group_source_account t2 on t2.group_source_name = t1.account_source_name
  635. join publish_account t3 on t3.id = t2.account_id
  636. where
  637. t1.mode_type = '代运营服务号' and
  638. (
  639. t2.group_source_name like '%云誉%'
  640. or t2.group_source_name like '%微小盟%'
  641. or t2.group_source_name like '%阿雅达%'
  642. or t2.group_source_name like '%创易%'
  643. )
  644. and t3.status = 1 and t3.name != '';
  645. """
  646. return await self.pool.async_fetch(query=query, db_name="aigc")
  647. async def recycle_single_account(self, account):
  648. """recycle single account"""
  649. if account["name"] not in account_name_set:
  650. return
  651. query = """
  652. select max(update_time) as publish_timestamp \
  653. from outside_account_articles
  654. where gh_id = %s;
  655. """
  656. response = await self.pool.async_fetch(query=query, params=(account["gh_id"],))
  657. if response:
  658. max_publish_timestamp = response[0]["publish_timestamp"]
  659. else:
  660. max_publish_timestamp = int(time.time()) - self.NEW_ACCOUNT_CRAWL_PERIOD
  661. cursor = None
  662. while True:
  663. response = await get_article_list_from_account(
  664. account_id=account["gh_id"], index=cursor
  665. )
  666. response_code = response["code"]
  667. match response_code:
  668. case self.ACCOUNT_FORBIDDEN_CODE:
  669. # await feishu_robot.bot(
  670. # title="发布账号封禁",
  671. # detail={
  672. # "账号名称": account["name"],
  673. # "账号id": account["gh_id"],
  674. # },
  675. # )
  676. return
  677. case self.ARTICLE_SUCCESS_CODE:
  678. msg_list = response.get("data", {}).get("data", [])
  679. if not msg_list:
  680. return
  681. await insert_outside_article_into_recycle_pool(
  682. self.pool, self.log_client, msg_list, account
  683. )
  684. # check last article
  685. last_article = msg_list[-1]
  686. last_publish_timestamp = last_article["AppMsg"]["BaseInfo"][
  687. "UpdateTime"
  688. ]
  689. if last_publish_timestamp <= max_publish_timestamp:
  690. return
  691. cursor = response["data"].get("next_cursor")
  692. if not cursor:
  693. return
  694. case self.CRAWL_CRASH_CODE:
  695. await self.log_client.log(
  696. contents={
  697. "task": "recycle_daily_publish_articles",
  698. "data": {
  699. "gh_id": account["gh_id"],
  700. },
  701. "message": "爬虫挂掉",
  702. "status": "fail",
  703. }
  704. )
  705. case _:
  706. return
  707. async def deal(self):
  708. subscription_accounts = await self.get_outside_accounts()
  709. for account in tqdm(subscription_accounts, desc="recycle each account"):
  710. try:
  711. await self.recycle_single_account(account)
  712. except Exception as e:
  713. print(
  714. f"{account['name']}\t{account['gh_id']}: recycle account error:", e
  715. )
  716. class UpdateOutsideRootSourceIdAndUpdateTimeTask(UpdateRootSourceIdAndUpdateTimeTask):
  717. def __init__(self, pool, log_client):
  718. super().__init__(pool, log_client)
  719. async def get_outside_article_list_v2(self) -> list[dict]:
  720. query = """
  721. select content_url, wx_sn
  722. from outside_account_articles where publish_timestamp in %s
  723. and account_name in %s
  724. order by update_time desc;
  725. """
  726. article_list = await self.pool.async_fetch(
  727. query=query, params=(tuple([0, -1, -3]), tuple(account_name_set))
  728. )
  729. return article_list
  730. async def check_each_article(self, article: dict):
  731. url = article["content_url"]
  732. wx_sn = article["wx_sn"]
  733. try:
  734. response = await get_article_detail(url)
  735. response_code = response["code"]
  736. if response_code == self.ARTICLE_DELETE_CODE:
  737. publish_timestamp_s = self.DELETE_STATUS
  738. root_source_id_list = []
  739. elif response_code == self.ARTICLE_ILLEGAL_CODE:
  740. publish_timestamp_s = self.ILLEGAL_STATUS
  741. root_source_id_list = []
  742. elif response_code == self.ARTICLE_SUCCESS_CODE:
  743. data = response["data"]["data"]
  744. publish_timestamp_ms = data["publish_timestamp"]
  745. publish_timestamp_s = int(publish_timestamp_ms / 1000)
  746. mini_program = data.get("mini_program", [])
  747. if mini_program:
  748. root_source_id_list = [
  749. urllib.parse.parse_qs(urllib.parse.unquote(i["path"])).get(
  750. "rootSourceId", [""]
  751. )[0]
  752. for i in mini_program
  753. ]
  754. else:
  755. root_source_id_list = []
  756. else:
  757. publish_timestamp_s = self.UNKNOWN_STATUS
  758. root_source_id_list = []
  759. except Exception as e:
  760. publish_timestamp_s = self.REQUEST_FAIL_STATUS
  761. root_source_id_list = None
  762. error_msg = traceback.format_exc()
  763. await self.log_client.log(
  764. contents={
  765. "task": "get_official_article_detail",
  766. "data": {
  767. "url": url,
  768. "wx_sn": wx_sn,
  769. "error_msg": error_msg,
  770. "error": str(e),
  771. },
  772. "function": "check_each_article",
  773. "status": "fail",
  774. }
  775. )
  776. query = """
  777. update outside_account_articles set publish_timestamp = %s, root_source_id_list = %s
  778. where wx_sn = %s;
  779. """
  780. await self.pool.async_save(
  781. query=query,
  782. params=(
  783. publish_timestamp_s,
  784. json.dumps(root_source_id_list, ensure_ascii=False),
  785. wx_sn,
  786. ),
  787. )
  788. if publish_timestamp_s == self.REQUEST_FAIL_STATUS:
  789. article["wx_sn"] = wx_sn
  790. return article
  791. else:
  792. return None
  793. async def fallback_mechanism(self):
  794. # 若还是无 publish_timestamp,用update_time当作 publish_timestamp
  795. update_sql_2 = f"""
  796. update outside_account_articles
  797. set publish_timestamp = update_time
  798. where publish_timestamp < %s;
  799. """
  800. affected_rows_2 = await self.pool.async_save(query=update_sql_2, params=(0,))
  801. if affected_rows_2:
  802. await feishu_robot.bot(
  803. title="执行兜底修改发布时间戳",
  804. detail={
  805. # "通过msgId修改": affected_rows_1,
  806. "通过create_timestamp修改": affected_rows_2,
  807. },
  808. mention=False,
  809. )
  810. async def deal(self):
  811. task_list = await self.get_outside_article_list_v2()
  812. for task in tqdm(task_list, desc="get article detail step1: "):
  813. try:
  814. await self.check_each_article(task)
  815. except Exception as e:
  816. try:
  817. await self.log_client.log(
  818. contents={
  819. "task": "get_official_article_detail_step1",
  820. "data": {
  821. "detail": {
  822. "url": task["ContentUrl"],
  823. "wx_sn": task["wx_sn"],
  824. },
  825. "error_msg": traceback.format_exc(),
  826. "error": str(e),
  827. },
  828. "function": "check_each_article",
  829. "status": "fail",
  830. }
  831. )
  832. except Exception as e:
  833. print(e)
  834. print(traceback.format_exc())