zhaohaipeng 2 месяцев назад
Родитель
Сommit
35852b9dd2
2 измененных файлов с 247 добавлено и 161 удалено
  1. 1 0
      enums/automation_job.py
  2. 246 161
      udf/solar_to_lunar.py

+ 1 - 0
enums/automation_job.py

@@ -13,6 +13,7 @@ class AutomationJobCronInfo(Enum):
     video_decode_accurate_text_top = ("视频解构精准文本(每日Top)", "video_decode_accurate_text", "top", 9)
     video_decode_accurate_text_top = ("视频解构精准文本(每日Top)", "video_decode_accurate_text", "top", 9)
     keywords_top = ("视频解构关键词(每日Top)", "keywords", "top", 9)
     keywords_top = ("视频解构关键词(每日Top)", "keywords", "top", 9)
     account_tencent_huxuan = ("腾讯互选平台账号供给(其他)", "account_tencent_huxuan", "other", 9)
     account_tencent_huxuan = ("腾讯互选平台账号供给(其他)", "account_tencent_huxuan", "other", 9)
+    workflow_schedule = ("基于需求的供给workflow", "workflow_schedule", "other", 6)
 
 
     def __init__(self, task_name, crawler_mode, video_source, task_start_hour):
     def __init__(self, task_name, crawler_mode, video_source, task_start_hour):
         self.task_name = task_name
         self.task_name = task_name

+ 246 - 161
udf/solar_to_lunar.py

@@ -3,150 +3,53 @@ import datetime
 
 
 from odps.udf import annotate
 from odps.udf import annotate
 
 
-# 中文农历
-LUNAR_MONTH_NAME = [
-    "", "正月", "二月", "三月", "四月", "五月", "六月",
-    "七月", "八月", "九月", "十月", "冬月", "腊月"
-]
+FMT_STR_MAP = {
+    "yyyyMMdd": "%Y%m%d",
+    "yyyy-MM-dd": "%Y-%m-%d",
+    "yyyy年MM月dd日": "%Y年%m月%d日"
+}
 
 
-LUNAR_DAY_NAME = [
-    "", "初一", "初二", "初三", "初四", "初五", "初六", "初七", "初八", "初九", "初十",
-    "十一", "十二", "十三", "十四", "十五", "十六", "十七", "十八", "十九", "二十",
-    "廿一", "廿二", "廿三", "廿四", "廿五", "廿六", "廿七", "廿八", "廿九", "三十"
-]
+DEFAULT_OUTPUT_MODE = "default"
+CN_OUTPUT_MODE = "cn"
 
 
 
 
-def lunar_to_yyyy_mm_dd(ly, lm, ld):
-    return "{ly}{lm}{ld}".format(ly=ly, lm=lm, ld=ld)
+class LunarDate(object):
+    def __init__(self, year, month, day, is_leap=False):
+        self.year = year
+        self.month = month
+        self.day = day
+        self.is_leap = is_leap
 
 
 
 
-def lunar_to_yyyy_mm_dd_2(ly, lm, ld):
-    return "{ly}-{lm}-{ld}".format(ly=ly, lm=lm, ld=ld)
+class InputDateParser(object):
+    """第一部分: 解析输入日期。"""
 
 
+    def __init__(self, fmt_map=None):
+        self._fmt_map = fmt_map or FMT_STR_MAP
 
 
-def lunar_to_yyyy_mm_dd_cn(ly, lm, ld):
-    return "{ly}年{lm}{ld}日".format(ly=ly, lm=lm, ld=ld)
+    def parse(self, date_str, fmt_str):
+        py_fmt_str = self._fmt_map.get(fmt_str)
+        if py_fmt_str is None:
+            raise ValueError("unsupported input format {fmt_str}".format(fmt_str=fmt_str))
+        dt = datetime.datetime.strptime(date_str, py_fmt_str)
+        return dt.year, dt.month, dt.day
 
 
 
 
-FMT_STR_MAP = {
-    "yyyyMMdd": "%Y%m%d",
-    "yyyy-MM-dd": "%Y-%m-%d",
-    "yyyy年MM月dd日": "%Y年%m月%d日"
-}
+class LunarFormatterBase(object):
+    """第三部分: 根据 fmt_str 和 output 输出最终结果。"""
 
 
-OUTPUT_FUNC_MAP = {
-    "yyyyMMdd": lunar_to_yyyy_mm_dd,
-    "yyyy-MM-dd": lunar_to_yyyy_mm_dd_2,
-    "yyyy年MM月dd日": lunar_to_yyyy_mm_dd_cn
-}
+    LUNAR_MONTH_NAME = [
+        "", "正月", "二月", "三月", "四月", "五月", "六月",
+        "七月", "八月", "九月", "十月", "冬月", "腊月"
+    ]
+
+    LUNAR_DAY_NAME = [
+        "", "初一", "初二", "初三", "初四", "初五", "初六", "初七", "初八", "初九", "初十",
+        "十一", "十二", "十三", "十四", "十五", "十六", "十七", "十八", "十九", "二十",
+        "廿一", "廿二", "廿三", "廿四", "廿五", "廿六", "廿七", "廿八", "廿九", "三十"
+    ]
 
 
-# 农历数据(1900-2100)
-LUNAR_INFO = [
-    0x04bd8, 0x04ae0, 0x0a570, 0x054d5, 0x0d260, 0x0d950, 0x16554,
-    0x056a0, 0x09ad0, 0x055d2, 0x04ae0, 0x0a5b6, 0x0a4d0, 0x0d250,
-    0x1d255, 0x0b540, 0x0d6a0, 0x0ada2, 0x095b0, 0x14977, 0x04970,
-    0x0a4b0, 0x0b4b5, 0x06a50, 0x06d40, 0x1ab54, 0x02b60, 0x09570,
-    0x052f2, 0x04970, 0x06566, 0x0d4a0, 0x0ea50, 0x06e95, 0x05ad0,
-    0x02b60, 0x186e3, 0x092e0, 0x1c8d7, 0x0c950, 0x0d4a0, 0x1d8a6,
-    0x0b550, 0x056a0, 0x1a5b4, 0x025d0, 0x092d0, 0x0d2b2, 0x0a950,
-    0x0b557, 0x06ca0, 0x0b550, 0x15355, 0x04da0, 0x0a5b0, 0x14573,
-    0x052b0, 0x0a9a8, 0x0e950, 0x06aa0, 0x0aea6, 0x0ab50, 0x04b60,
-    0x0aae4, 0x0a570, 0x05260, 0x0f263, 0x0d950, 0x05b57, 0x056a0,
-    0x096d0, 0x04dd5, 0x04ad0, 0x0a4d0, 0x0d4d4, 0x0d250, 0x0d558,
-    0x0b540, 0x0b5a0, 0x195a6, 0x095b0, 0x049b0, 0x0a974, 0x0a4b0,
-    0x0b27a, 0x06a50, 0x06d40, 0x0af46, 0x0ab60, 0x09570, 0x04af5,
-    0x04970, 0x064b0, 0x074a3, 0x0ea50, 0x06b58, 0x05ac0, 0x0ab60,
-    0x096d5, 0x092e0, 0x0c960, 0x0d954, 0x0d4a0, 0x0da50, 0x07552,
-    0x056a0, 0x0abb7, 0x025d0, 0x092d0, 0x0cab5, 0x0a950, 0x0b4a0,
-    0x0baa4, 0x0ad50, 0x055d9, 0x04ba0, 0x0a5b0, 0x15176, 0x052b0,
-    0x0a930, 0x07954, 0x06aa0, 0x0ad50, 0x05b52, 0x04b60, 0x0a6e6,
-    0x0a4e0, 0x0d260, 0x0ea65, 0x0d530, 0x05aa0, 0x076a3, 0x096d0,
-    0x04bd7, 0x04ad0, 0x0a4d0, 0x1d0b6, 0x0d250, 0x0d520, 0x0dd45,
-    0x0b5a0, 0x056d0, 0x055b2, 0x049b0, 0x0a577, 0x0a4b0, 0x0aa50,
-    0x1b255, 0x06d20, 0x0ada0
-]
-
-
-def _leap_month(y):
-    return LUNAR_INFO[y - 1900] & 0xf
-
-
-def _leap_days(y):
-    if _leap_month(y):
-        return 30 if (LUNAR_INFO[y - 1900] & 0x10000) else 29
-    return 0
-
-
-def _month_days(y, m):
-    return 30 if (LUNAR_INFO[y - 1900] & (0x10000 >> m)) else 29
-
-
-def _year_days(y):
-    sum_days = 348
-    i = 0x8000
-    while i > 0x8:
-        if LUNAR_INFO[y - 1900] & i:
-            sum_days += 1
-        i >>= 1
-    return sum_days + _leap_days(y)
-
-
-def solar_to_lunar_core(y, m, d):
-    base = datetime.date(1900, 1, 31)
-    obj = datetime.date(y, m, d)
-    offset = (obj - base).days
-
-    year = 1900
-    while year < 2100 and offset > 0:
-        temp = _year_days(year)
-        offset -= temp
-        year += 1
-
-    if offset < 0:
-        offset += temp
-        year -= 1
-
-    leap = _leap_month(year)
-    is_leap = False
-
-    month = 1
-    while month <= 12 and offset > 0:
-        if leap > 0 and month == (leap + 1) and not is_leap:
-            month -= 1
-            is_leap = True
-            temp = _leap_days(year)
-        else:
-            temp = _month_days(year, month)
-
-        offset -= temp
-
-        if is_leap and month == (leap + 1):
-            is_leap = False
-
-        month += 1
-
-    if offset < 0:
-        offset += temp
-        month -= 1
-
-    day = offset + 1
-
-    return year, month, day, is_leap
-
-
-def _parse_input_date_str(date_str, fmt_str):
-    # 用 Python 标准库解析
-    py_fmt_str = FMT_STR_MAP.get(fmt_str, None)
-    if py_fmt_str is None:
-        return "ERROR: unsupported format {fmt_str}".format(fmt_str=fmt_str)
-
-    dt = datetime.datetime.strptime(date_str, py_fmt_str)
-    return dt.year, dt.month, dt.day
-
-
-def _year_to_cnt(year):
-    # 数字到汉字的映射(使用标准“〇”表示零)
-    digit_map = {
+    DIGIT_MAP = {
         '0': '〇',
         '0': '〇',
         '1': '一',
         '1': '一',
         '2': '二',
         '2': '二',
@@ -158,43 +61,225 @@ def _year_to_cnt(year):
         '8': '八',
         '8': '八',
         '9': '九'
         '9': '九'
     }
     }
-    # 将年份转为字符串,逐位转换并拼接
-    chinese_digits = ''.join(digit_map[ch] for ch in str(year))
-    return chinese_digits
 
 
+    @staticmethod
+    def _apply_leap_prefix(value, is_leap):
+        return "闰" + str(value) if is_leap else str(value)
+
+    def _year_to_cnt(self, year):
+        return ''.join(self.DIGIT_MAP[ch] for ch in str(year))
+
+    def _to_cn_number(self, value):
+        return self._year_to_cnt(value)
+
+    @staticmethod
+    def _to_arabic_number(value, width=None):
+        # width 为空时只做普通字符串转换;需要固定宽度时由 formatter 传入。
+        value_str = str(value)
+        if width is not None:
+            return value_str.zfill(width)
+        return value_str
+
+    def format(self, lunar_date):
+        raise NotImplementedError
+
+class YyyyMMddDefaultFormatter(LunarFormatterBase):
+    def format(self, lunar_date):
+        ly = self._to_arabic_number(lunar_date.year)
+        lm = self._apply_leap_prefix(self._to_arabic_number(lunar_date.month, 2), lunar_date.is_leap)
+        ld = self._to_arabic_number(lunar_date.day, 2)
+        return "{ly}{lm}{ld}".format(ly=ly, lm=lm, ld=ld)
+
+
+class YyyyMMddCnFormatter(LunarFormatterBase):
+    def format(self, lunar_date):
+        ly = self._to_cn_number(lunar_date.year)
+        lm = self._apply_leap_prefix(self._to_cn_number(lunar_date.month), lunar_date.is_leap)
+        ld = self._to_cn_number(lunar_date.day)
+        return "{ly}{lm}{ld}".format(ly=ly, lm=lm, ld=ld)
+
+
+class YyyyDashMmDashDdDefaultFormatter(LunarFormatterBase):
+    def format(self, lunar_date):
+        ly = self._to_arabic_number(lunar_date.year)
+        lm = self._apply_leap_prefix(self._to_arabic_number(lunar_date.month, 2), lunar_date.is_leap)
+        ld = self._to_arabic_number(lunar_date.day, 2)
+        return "{ly}-{lm}-{ld}".format(ly=ly, lm=lm, ld=ld)
+
+
+class YyyyDashMmDashDdCnFormatter(LunarFormatterBase):
+    def format(self, lunar_date):
+        ly = self._to_cn_number(lunar_date.year)
+        lm = self._apply_leap_prefix(self._to_cn_number(lunar_date.month), lunar_date.is_leap)
+        ld = self._to_cn_number(lunar_date.day)
+        return "{ly}-{lm}-{ld}".format(ly=ly, lm=lm, ld=ld)
+
+
+class YyyyCnFormatter(LunarFormatterBase):
+    def format(self, lunar_date):
+        ly = self._year_to_cnt(lunar_date.year)
+        lm = self.LUNAR_MONTH_NAME[lunar_date.month]
+        ld = self.LUNAR_DAY_NAME[lunar_date.day]
+        lm = self._apply_leap_prefix(lm, lunar_date.is_leap)
+        return "{ly}年{lm}{ld}日".format(ly=ly, lm=lm, ld=ld)
+
+
+class LunarFormatterRegistry(object):
+    """按 fmt_str 选择输出格式化策略。"""
+
+    _registry = {
+        "yyyyMMdd": {
+            DEFAULT_OUTPUT_MODE: YyyyMMddDefaultFormatter(),
+            CN_OUTPUT_MODE: YyyyMMddCnFormatter()
+        },
+        "yyyy-MM-dd": {
+            DEFAULT_OUTPUT_MODE: YyyyDashMmDashDdDefaultFormatter(),
+            CN_OUTPUT_MODE: YyyyDashMmDashDdCnFormatter()
+        },
+        "yyyy年MM月dd日": {
+            DEFAULT_OUTPUT_MODE: YyyyCnFormatter()
+        }
+    }
+
+    SUPPORTED_FMT_STRS = frozenset(_registry.keys())
+
+    @classmethod
+    def get(cls, fmt_str, output_mode):
+        if fmt_str not in cls.SUPPORTED_FMT_STRS:
+            return None
+        mode_registry = cls._registry.get(fmt_str, {})
+        return mode_registry.get(output_mode)
+
+
+class SolarToLunarConverter(object):
+    # 农历数据(1900-2100)
+    LUNAR_INFO = [
+        0x04bd8, 0x04ae0, 0x0a570, 0x054d5, 0x0d260, 0x0d950, 0x16554,
+        0x056a0, 0x09ad0, 0x055d2, 0x04ae0, 0x0a5b6, 0x0a4d0, 0x0d250,
+        0x1d255, 0x0b540, 0x0d6a0, 0x0ada2, 0x095b0, 0x14977, 0x04970,
+        0x0a4b0, 0x0b4b5, 0x06a50, 0x06d40, 0x1ab54, 0x02b60, 0x09570,
+        0x052f2, 0x04970, 0x06566, 0x0d4a0, 0x0ea50, 0x06e95, 0x05ad0,
+        0x02b60, 0x186e3, 0x092e0, 0x1c8d7, 0x0c950, 0x0d4a0, 0x1d8a6,
+        0x0b550, 0x056a0, 0x1a5b4, 0x025d0, 0x092d0, 0x0d2b2, 0x0a950,
+        0x0b557, 0x06ca0, 0x0b550, 0x15355, 0x04da0, 0x0a5b0, 0x14573,
+        0x052b0, 0x0a9a8, 0x0e950, 0x06aa0, 0x0aea6, 0x0ab50, 0x04b60,
+        0x0aae4, 0x0a570, 0x05260, 0x0f263, 0x0d950, 0x05b57, 0x056a0,
+        0x096d0, 0x04dd5, 0x04ad0, 0x0a4d0, 0x0d4d4, 0x0d250, 0x0d558,
+        0x0b540, 0x0b5a0, 0x195a6, 0x095b0, 0x049b0, 0x0a974, 0x0a4b0,
+        0x0b27a, 0x06a50, 0x06d40, 0x0af46, 0x0ab60, 0x09570, 0x04af5,
+        0x04970, 0x064b0, 0x074a3, 0x0ea50, 0x06b58, 0x05ac0, 0x0ab60,
+        0x096d5, 0x092e0, 0x0c960, 0x0d954, 0x0d4a0, 0x0da50, 0x07552,
+        0x056a0, 0x0abb7, 0x025d0, 0x092d0, 0x0cab5, 0x0a950, 0x0b4a0,
+        0x0baa4, 0x0ad50, 0x055d9, 0x04ba0, 0x0a5b0, 0x15176, 0x052b0,
+        0x0a930, 0x07954, 0x06aa0, 0x0ad50, 0x05b52, 0x04b60, 0x0a6e6,
+        0x0a4e0, 0x0d260, 0x0ea65, 0x0d530, 0x05aa0, 0x076a3, 0x096d0,
+        0x04bd7, 0x04ad0, 0x0a4d0, 0x1d0b6, 0x0d250, 0x0d520, 0x0dd45,
+        0x0b5a0, 0x056d0, 0x055b2, 0x049b0, 0x0a577, 0x0a4b0, 0x0aa50,
+        0x1b255, 0x06d20, 0x0ada0
+    ]
+
+    def __init__(self):
+        self._input_parser = InputDateParser()
+
+    @staticmethod
+    def _leap_month(year):
+        return SolarToLunarConverter.LUNAR_INFO[year - 1900] & 0xf
+
+    @staticmethod
+    def _leap_days(year):
+        if SolarToLunarConverter._leap_month(year):
+            return 30 if (SolarToLunarConverter.LUNAR_INFO[year - 1900] & 0x10000) else 29
+        return 0
+
+    @staticmethod
+    def _month_days(year, month):
+        return 30 if (SolarToLunarConverter.LUNAR_INFO[year - 1900] & (0x10000 >> month)) else 29
+
+    @staticmethod
+    def _year_days(year):
+        sum_days = 348
+        i = 0x8000
+        while i > 0x8:
+            if SolarToLunarConverter.LUNAR_INFO[year - 1900] & i:
+                sum_days += 1
+            i >>= 1
+        return sum_days + SolarToLunarConverter._leap_days(year)
+
+    @staticmethod
+    def _days_since_base_date(year, month, day):
+        base = datetime.date(1900, 1, 31)
+        obj = datetime.date(year, month, day)
+        return (obj - base).days
+
+    def _find_lunar_year_and_offset(self, offset):
+        """先把公历偏移量落到具体的农历年份里。"""
+        lunar_year = 1900
+        while lunar_year < 2100:
+            year_days = self._year_days(lunar_year)
+            if offset < year_days:
+                return lunar_year, offset
+            offset -= year_days
+            lunar_year += 1
+
+        raise ValueError("date is out of supported lunar range")
+
+    def _iter_lunar_month_schedule(self, year):
+        """按顺序列出该农历年的月份,闰月会在对应普通月后额外插入一次。"""
+        leap = self._leap_month(year)
+        for lunar_month in range(1, 13):
+            yield lunar_month, False, self._month_days(year, lunar_month)
+            if leap == lunar_month:
+                yield lunar_month, True, self._leap_days(year)
+
+    def _find_lunar_month_day(self, year, offset):
+        """在指定农历年内,根据剩余偏移量定位到月和日。"""
+        for lunar_month, is_leap, month_days in self._iter_lunar_month_schedule(year):
+            if offset < month_days:
+                return lunar_month, offset + 1, is_leap
+            offset -= month_days
+
+        raise ValueError("date is out of supported lunar range")
+
+    def solar_to_lunar_core(self, year, month, day):
+        """把公历日期转换成农历日期。"""
+        offset = self._days_since_base_date(year, month, day)
+        if offset < 0:
+            raise ValueError("date is out of supported lunar range")
+
+        lunar_year, lunar_offset = self._find_lunar_year_and_offset(offset)
+        lunar_month, lunar_day, is_leap = self._find_lunar_month_day(lunar_year, lunar_offset)
+        return lunar_year, lunar_month, lunar_day, is_leap
+
+    def convert(self, date_str, fmt_str="yyyy-MM-dd", output=DEFAULT_OUTPUT_MODE):
+        if not date_str:
+            return None
 
 
-def _lunar_to_string(lunar_year, lunar_month, lunar_day, is_leap, fmt_str, output):
-    if output == 'cn':
-        lunar_year = _year_to_cnt(lunar_year)
-        lunar_month = LUNAR_MONTH_NAME[lunar_month]
-        lunar_day = LUNAR_DAY_NAME[lunar_day]
-    else:
-        if "MM" in fmt_str:
-            lunar_month = str(lunar_month).zfill(2)
-        if "dd" in fmt_str:
-            lunar_day = str(lunar_day).zfill(2)
+        output = DEFAULT_OUTPUT_MODE if output in (None, "") else output
+        y, m, d = self._input_parser.parse(date_str, fmt_str)
+        lunar_year, lunar_month, lunar_day, is_leap = self.solar_to_lunar_core(y, m, d)
+        lunar_date = LunarDate(lunar_year, lunar_month, lunar_day, is_leap)
 
 
-    ly = str(lunar_year)
-    lm = str(lunar_month) if not is_leap else "闰" + str(lunar_month)
-    ld = str(lunar_day)
+        formatter = LunarFormatterRegistry.get(fmt_str, output)
+        if formatter is None:
+            raise ValueError(
+                "unsupported output mode {output} for format {fmt_str}".format(
+                    output=output,
+                    fmt_str=fmt_str
+                )
+            )
 
 
-    return OUTPUT_FUNC_MAP.get(fmt_str, lunar_to_yyyy_mm_dd)(ly, lm, ld)
+        return formatter.format(lunar_date)
 
 
 
 
 @annotate("*->string")
 @annotate("*->string")
 class solar_to_lunar(object):
 class solar_to_lunar(object):
+    def __init__(self):
+        self._converter = SolarToLunarConverter()
 
 
-    def evaluate(self, date_str, fmt_str="yyyy-MM-dd", output=""):
-        if not date_str:
-            return None
+    def evaluate(self, date_str, fmt_str="yyyy-MM-dd", output=DEFAULT_OUTPUT_MODE):
         try:
         try:
-            # 解析输入的日期
-            y, m, d = _parse_input_date_str(date_str, fmt_str)
-
-            # 农历转换
-            ly, lm, ld, is_leap = solar_to_lunar_core(y, m, d)
-
-            return _lunar_to_string(ly, lm, ld, is_leap, fmt_str, output)
-
+            return self._converter.convert(date_str, fmt_str, output)
+        except ValueError as e:
+            return "ERROR: {type}: {e}".format(type=type(e).__name__, e=str(e))
         except Exception as e:
         except Exception as e:
             return "ERROR: {type}: {e}".format(type=type(e).__name__, e=str(e))
             return "ERROR: {type}: {e}".format(type=type(e).__name__, e=str(e))