|
@@ -3,6 +3,7 @@ import math
|
|
|
|
|
|
|
|
from datetime import datetime
|
|
from datetime import datetime
|
|
|
from typing import Optional, List, Dict
|
|
from typing import Optional, List, Dict
|
|
|
|
|
+import pandas as pd
|
|
|
from pandas import DataFrame
|
|
from pandas import DataFrame
|
|
|
from scipy import stats
|
|
from scipy import stats
|
|
|
from tqdm.asyncio import tqdm
|
|
from tqdm.asyncio import tqdm
|
|
@@ -113,12 +114,12 @@ class WeixinAccountManager(CrawlerAccountManager):
|
|
|
ci_lower, ci_upper = mean - margin, mean + margin
|
|
ci_lower, ci_upper = mean - margin, mean + margin
|
|
|
|
|
|
|
|
# 计算发文频率
|
|
# 计算发文频率
|
|
|
- publish_times = dataframe["publish_time"].dropna()
|
|
|
|
|
|
|
+ publish_times = pd.to_numeric(dataframe["publish_time"], errors="coerce")
|
|
|
|
|
+ publish_times = publish_times.replace([float("inf"), float("-inf")], pd.NA).dropna()
|
|
|
if len(publish_times) >= 2:
|
|
if len(publish_times) >= 2:
|
|
|
- delta = publish_times.max() - publish_times.min()
|
|
|
|
|
- publish_frequency = (
|
|
|
|
|
- (len(publish_times) / delta * self.ONE_DAY_TIMESTAMP) if delta else 0.0
|
|
|
|
|
- )
|
|
|
|
|
|
|
+ dates = pd.to_datetime(publish_times, unit="s").dt.normalize()
|
|
|
|
|
+ days_delta = max(int((dates.max() - dates.min()).days) + 1, 1)
|
|
|
|
|
+ publish_frequency = len(publish_times) / days_delta
|
|
|
else:
|
|
else:
|
|
|
publish_frequency = 0.0
|
|
publish_frequency = 0.0
|
|
|
|
|
|