2 年之前 · a40879c784
--- a/all_stopword.txt
+++ b/all_stopword.txt
@@ -0,0 +1,3885 @@
 
				+--
			
 
				+?
			
 
				+“
			
 
				+”
			
 
				+》
			
 
				+－－
			
 
				+able
			
 
				+about
			
 
				+above
			
 
				+according
			
 
				+accordingly
			
 
				+across
			
 
				+actually
			
 
				+after
			
 
				+afterwards
			
 
				+again
			
 
				+against
			
 
				+ain't
			
 
				+all
			
 
				+allow
			
 
				+allows
			
 
				+almost
			
 
				+alone
			
 
				+along
			
 
				+already
			
 
				+also
			
 
				+although
			
 
				+always
			
 
				+am
			
 
				+among
			
 
				+amongst
			
 
				+an
			
 
				+and
			
 
				+another
			
 
				+any
			
 
				+anybody
			
 
				+anyhow
			
 
				+anyone
			
 
				+anything
			
 
				+anyway
			
 
				+anyways
			
 
				+anywhere
			
 
				+apart
			
 
				+appear
			
 
				+appreciate
			
 
				+appropriate
			
 
				+are
			
 
				+aren't
			
 
				+around
			
 
				+as
			
 
				+a's
			
 
				+aside
			
 
				+ask
			
 
				+asking
			
 
				+associated
			
 
				+at
			
 
				+available
			
 
				+away
			
 
				+awfully
			
 
				+be
			
 
				+became
			
 
				+because
			
 
				+become
			
 
				+becomes
			
 
				+becoming
			
 
				+been
			
 
				+before
			
 
				+beforehand
			
 
				+behind
			
 
				+being
			
 
				+believe
			
 
				+below
			
 
				+beside
			
 
				+besides
			
 
				+best
			
 
				+better
			
 
				+between
			
 
				+beyond
			
 
				+both
			
 
				+brief
			
 
				+but
			
 
				+by
			
 
				+came
			
 
				+can
			
 
				+cannot
			
 
				+cant
			
 
				+can't
			
 
				+cause
			
 
				+causes
			
 
				+certain
			
 
				+certainly
			
 
				+changes
			
 
				+clearly
			
 
				+c'mon
			
 
				+co
			
 
				+com
			
 
				+come
			
 
				+comes
			
 
				+concerning
			
 
				+consequently
			
 
				+consider
			
 
				+considering
			
 
				+contain
			
 
				+containing
			
 
				+contains
			
 
				+corresponding
			
 
				+could
			
 
				+couldn't
			
 
				+course
			
 
				+c's
			
 
				+currently
			
 
				+definitely
			
 
				+described
			
 
				+despite
			
 
				+did
			
 
				+didn't
			
 
				+different
			
 
				+do
			
 
				+does
			
 
				+doesn't
			
 
				+doing
			
 
				+done
			
 
				+don't
			
 
				+down
			
 
				+downwards
			
 
				+during
			
 
				+each
			
 
				+edu
			
 
				+eg
			
 
				+eight
			
 
				+either
			
 
				+else
			
 
				+elsewhere
			
 
				+enough
			
 
				+entirely
			
 
				+especially
			
 
				+et
			
 
				+etc
			
 
				+even
			
 
				+ever
			
 
				+every
			
 
				+everybody
			
 
				+everyone
			
 
				+everything
			
 
				+everywhere
			
 
				+ex
			
 
				+exactly
			
 
				+example
			
 
				+except
			
 
				+far
			
 
				+few
			
 
				+fifth
			
 
				+first
			
 
				+five
			
 
				+followed
			
 
				+following
			
 
				+follows
			
 
				+for
			
 
				+former
			
 
				+formerly
			
 
				+forth
			
 
				+four
			
 
				+from
			
 
				+further
			
 
				+furthermore
			
 
				+get
			
 
				+gets
			
 
				+getting
			
 
				+given
			
 
				+gives
			
 
				+go
			
 
				+goes
			
 
				+going
			
 
				+gone
			
 
				+got
			
 
				+gotten
			
 
				+greetings
			
 
				+had
			
 
				+hadn't
			
 
				+happens
			
 
				+hardly
			
 
				+has
			
 
				+hasn't
			
 
				+have
			
 
				+haven't
			
 
				+having
			
 
				+he
			
 
				+hello
			
 
				+help
			
 
				+hence
			
 
				+her
			
 
				+here
			
 
				+hereafter
			
 
				+hereby
			
 
				+herein
			
 
				+here's
			
 
				+hereupon
			
 
				+hers
			
 
				+herself
			
 
				+he's
			
 
				+hi
			
 
				+him
			
 
				+himself
			
 
				+his
			
 
				+hither
			
 
				+hopefully
			
 
				+how
			
 
				+howbeit
			
 
				+however
			
 
				+i'd
			
 
				+ie
			
 
				+if
			
 
				+ignored
			
 
				+i'll
			
 
				+i'm
			
 
				+immediate
			
 
				+in
			
 
				+inasmuch
			
 
				+inc
			
 
				+indeed
			
 
				+indicate
			
 
				+indicated
			
 
				+indicates
			
 
				+inner
			
 
				+insofar
			
 
				+instead
			
 
				+into
			
 
				+inward
			
 
				+is
			
 
				+isn't
			
 
				+it
			
 
				+it'd
			
 
				+it'll
			
 
				+its
			
 
				+it's
			
 
				+itself
			
 
				+i've
			
 
				+just
			
 
				+keep
			
 
				+keeps
			
 
				+kept
			
 
				+know
			
 
				+known
			
 
				+knows
			
 
				+last
			
 
				+lately
			
 
				+later
			
 
				+latter
			
 
				+latterly
			
 
				+least
			
 
				+less
			
 
				+lest
			
 
				+let
			
 
				+let's
			
 
				+like
			
 
				+liked
			
 
				+likely
			
 
				+little
			
 
				+look
			
 
				+looking
			
 
				+looks
			
 
				+ltd
			
 
				+mainly
			
 
				+many
			
 
				+may
			
 
				+maybe
			
 
				+me
			
 
				+mean
			
 
				+meanwhile
			
 
				+merely
			
 
				+might
			
 
				+more
			
 
				+moreover
			
 
				+most
			
 
				+mostly
			
 
				+much
			
 
				+must
			
 
				+my
			
 
				+myself
			
 
				+name
			
 
				+namely
			
 
				+nd
			
 
				+near
			
 
				+nearly
			
 
				+necessary
			
 
				+need
			
 
				+needs
			
 
				+neither
			
 
				+never
			
 
				+nevertheless
			
 
				+new
			
 
				+next
			
 
				+nine
			
 
				+no
			
 
				+nobody
			
 
				+non
			
 
				+none
			
 
				+noone
			
 
				+nor
			
 
				+normally
			
 
				+not
			
 
				+nothing
			
 
				+novel
			
 
				+now
			
 
				+nowhere
			
 
				+obviously
			
 
				+of
			
 
				+off
			
 
				+often
			
 
				+oh
			
 
				+ok
			
 
				+okay
			
 
				+old
			
 
				+on
			
 
				+once
			
 
				+one
			
 
				+ones
			
 
				+only
			
 
				+onto
			
 
				+or
			
 
				+other
			
 
				+others
			
 
				+otherwise
			
 
				+ought
			
 
				+our
			
 
				+ours
			
 
				+ourselves
			
 
				+out
			
 
				+outside
			
 
				+over
			
 
				+overall
			
 
				+own
			
 
				+particular
			
 
				+particularly
			
 
				+per
			
 
				+perhaps
			
 
				+placed
			
 
				+please
			
 
				+plus
			
 
				+possible
			
 
				+presumably
			
 
				+probably
			
 
				+provides
			
 
				+que
			
 
				+quite
			
 
				+qv
			
 
				+rather
			
 
				+rd
			
 
				+re
			
 
				+really
			
 
				+reasonably
			
 
				+regarding
			
 
				+regardless
			
 
				+regards
			
 
				+relatively
			
 
				+respectively
			
 
				+right
			
 
				+said
			
 
				+same
			
 
				+saw
			
 
				+say
			
 
				+saying
			
 
				+says
			
 
				+second
			
 
				+secondly
			
 
				+see
			
 
				+seeing
			
 
				+seem
			
 
				+seemed
			
 
				+seeming
			
 
				+seems
			
 
				+seen
			
 
				+self
			
 
				+selves
			
 
				+sensible
			
 
				+sent
			
 
				+serious
			
 
				+seriously
			
 
				+seven
			
 
				+several
			
 
				+shall
			
 
				+she
			
 
				+should
			
 
				+shouldn't
			
 
				+since
			
 
				+six
			
 
				+so
			
 
				+some
			
 
				+somebody
			
 
				+somehow
			
 
				+someone
			
 
				+something
			
 
				+sometime
			
 
				+sometimes
			
 
				+somewhat
			
 
				+somewhere
			
 
				+soon
			
 
				+sorry
			
 
				+specified
			
 
				+specify
			
 
				+specifying
			
 
				+still
			
 
				+sub
			
 
				+such
			
 
				+sup
			
 
				+sure
			
 
				+take
			
 
				+taken
			
 
				+tell
			
 
				+tends
			
 
				+th
			
 
				+than
			
 
				+thank
			
 
				+thanks
			
 
				+thanx
			
 
				+that
			
 
				+thats
			
 
				+that's
			
 
				+the
			
 
				+their
			
 
				+theirs
			
 
				+them
			
 
				+themselves
			
 
				+then
			
 
				+thence
			
 
				+there
			
 
				+thereafter
			
 
				+thereby
			
 
				+therefore
			
 
				+therein
			
 
				+theres
			
 
				+there's
			
 
				+thereupon
			
 
				+these
			
 
				+they
			
 
				+they'd
			
 
				+they'll
			
 
				+they're
			
 
				+they've
			
 
				+think
			
 
				+third
			
 
				+this
			
 
				+thorough
			
 
				+thoroughly
			
 
				+those
			
 
				+though
			
 
				+three
			
 
				+through
			
 
				+throughout
			
 
				+thru
			
 
				+thus
			
 
				+to
			
 
				+together
			
 
				+too
			
 
				+took
			
 
				+toward
			
 
				+towards
			
 
				+tried
			
 
				+tries
			
 
				+truly
			
 
				+try
			
 
				+trying
			
 
				+t's
			
 
				+twice
			
 
				+two
			
 
				+un
			
 
				+under
			
 
				+unfortunately
			
 
				+unless
			
 
				+unlikely
			
 
				+until
			
 
				+unto
			
 
				+up
			
 
				+upon
			
 
				+us
			
 
				+use
			
 
				+used
			
 
				+useful
			
 
				+uses
			
 
				+using
			
 
				+usually
			
 
				+value
			
 
				+various
			
 
				+very
			
 
				+via
			
 
				+viz
			
 
				+vs
			
 
				+want
			
 
				+wants
			
 
				+was
			
 
				+wasn't
			
 
				+way
			
 
				+we
			
 
				+we'd
			
 
				+welcome
			
 
				+well
			
 
				+we'll
			
 
				+went
			
 
				+were
			
 
				+we're
			
 
				+weren't
			
 
				+we've
			
 
				+what
			
 
				+whatever
			
 
				+what's
			
 
				+when
			
 
				+whence
			
 
				+whenever
			
 
				+where
			
 
				+whereafter
			
 
				+whereas
			
 
				+whereby
			
 
				+wherein
			
 
				+where's
			
 
				+whereupon
			
 
				+wherever
			
 
				+whether
			
 
				+which
			
 
				+while
			
 
				+whither
			
 
				+who
			
 
				+whoever
			
 
				+whole
			
 
				+whom
			
 
				+who's
			
 
				+whose
			
 
				+why
			
 
				+will
			
 
				+willing
			
 
				+wish
			
 
				+with
			
 
				+within
			
 
				+without
			
 
				+wonder
			
 
				+won't
			
 
				+would
			
 
				+wouldn't
			
 
				+yes
			
 
				+yet
			
 
				+you
			
 
				+you'd
			
 
				+you'll
			
 
				+your
			
 
				+you're
			
 
				+yours
			
 
				+yourself
			
 
				+yourselves
			
 
				+you've
			
 
				+zero
			
 
				+zt
			
 
				+ZT
			
 
				+zz
			
 
				+ZZ
			
 
				+一
			
 
				+一下
			
 
				+一些
			
 
				+一切
			
 
				+一则
			
 
				+一天
			
 
				+一定
			
 
				+一方面
			
 
				+一旦
			
 
				+一时
			
 
				+一来
			
 
				+一样
			
 
				+一次
			
 
				+一片
			
 
				+一直
			
 
				+一致
			
 
				+一般
			
 
				+一起
			
 
				+一边
			
 
				+一面
			
 
				+万一
			
 
				+上下
			
 
				+上升
			
 
				+上去
			
 
				+上来
			
 
				+上述
			
 
				+上面
			
 
				+下列
			
 
				+下去
			
 
				+下来
			
 
				+下面
			
 
				+不一
			
 
				+不久
			
 
				+不仅
			
 
				+不会
			
 
				+不但
			
 
				+不光
			
 
				+不单
			
 
				+不变
			
 
				+不只
			
 
				+不可
			
 
				+不同
			
 
				+不够
			
 
				+不如
			
 
				+不得
			
 
				+不怕
			
 
				+不惟
			
 
				+不成
			
 
				+不拘
			
 
				+不敢
			
 
				+不断
			
 
				+不是
			
 
				+不比
			
 
				+不然
			
 
				+不特
			
 
				+不独
			
 
				+不管
			
 
				+不能
			
 
				+不要
			
 
				+不论
			
 
				+不足
			
 
				+不过
			
 
				+不问
			
 
				+与
			
 
				+与其
			
 
				+与否
			
 
				+与此同时
			
 
				+专门
			
 
				+且
			
 
				+两者
			
 
				+严格
			
 
				+严重
			
 
				+个
			
 
				+个人
			
 
				+个别
			
 
				+中小
			
 
				+中间
			
 
				+丰富
			
 
				+临
			
 
				+为
			
 
				+为主
			
 
				+为了
			
 
				+为什么
			
 
				+为什麽
			
 
				+为何
			
 
				+为着
			
 
				+主张
			
 
				+主要
			
 
				+举行
			
 
				+乃
			
 
				+乃至
			
 
				+么
			
 
				+之
			
 
				+之一
			
 
				+之前
			
 
				+之后
			
 
				+之後
			
 
				+之所以
			
 
				+之类
			
 
				+乌乎
			
 
				+乎
			
 
				+乘
			
 
				+也
			
 
				+也好
			
 
				+也是
			
 
				+也罢
			
 
				+了
			
 
				+了解
			
 
				+争取
			
 
				+于
			
 
				+于是
			
 
				+于是乎
			
 
				+云云
			
 
				+互相
			
 
				+产生
			
 
				+人们
			
 
				+人家
			
 
				+什么
			
 
				+什么样
			
 
				+什麽
			
 
				+今后
			
 
				+今天
			
 
				+今年
			
 
				+今後
			
 
				+仍然
			
 
				+从
			
 
				+从事
			
 
				+从而
			
 
				+他
			
 
				+他人
			
 
				+他们
			
 
				+他的
			
 
				+代替
			
 
				+以
			
 
				+以上
			
 
				+以下
			
 
				+以为
			
 
				+以便
			
 
				+以免
			
 
				+以前
			
 
				+以及
			
 
				+以后
			
 
				+以外
			
 
				+以後
			
 
				+以来
			
 
				+以至
			
 
				+以至于
			
 
				+以致
			
 
				+们
			
 
				+任
			
 
				+任何
			
 
				+任凭
			
 
				+任务
			
 
				+企图
			
 
				+伟大
			
 
				+似乎
			
 
				+似的
			
 
				+但
			
 
				+但是
			
 
				+何
			
 
				+何况
			
 
				+何处
			
 
				+何时
			
 
				+作为
			
 
				+你
			
 
				+你们
			
 
				+你的
			
 
				+使得
			
 
				+使用
			
 
				+例如
			
 
				+依
			
 
				+依照
			
 
				+依靠
			
 
				+促进
			
 
				+保持
			
 
				+俺
			
 
				+俺们
			
 
				+倘
			
 
				+倘使
			
 
				+倘或
			
 
				+倘然
			
 
				+倘若
			
 
				+假使
			
 
				+假如
			
 
				+假若
			
 
				+做到
			
 
				+像
			
 
				+允许
			
 
				+充分
			
 
				+先后
			
 
				+先後
			
 
				+先生
			
 
				+全部
			
 
				+全面
			
 
				+兮
			
 
				+共同
			
 
				+关于
			
 
				+其
			
 
				+其一
			
 
				+其中
			
 
				+其二
			
 
				+其他
			
 
				+其余
			
 
				+其它
			
 
				+其实
			
 
				+其次
			
 
				+具体
			
 
				+具体地说
			
 
				+具体说来
			
 
				+具有
			
 
				+再者
			
 
				+再说
			
 
				+冒
			
 
				+冲
			
 
				+决定
			
 
				+况且
			
 
				+准备
			
 
				+几
			
 
				+几乎
			
 
				+几时
			
 
				+凭
			
 
				+凭借
			
 
				+出去
			
 
				+出来
			
 
				+出现
			
 
				+分别
			
 
				+则
			
 
				+别
			
 
				+别的
			
 
				+别说
			
 
				+到
			
 
				+前后
			
 
				+前者
			
 
				+前进
			
 
				+前面
			
 
				+加之
			
 
				+加以
			
 
				+加入
			
 
				+加强
			
 
				+十分
			
 
				+即
			
 
				+即令
			
 
				+即使
			
 
				+即便
			
 
				+即或
			
 
				+即若
			
 
				+却不
			
 
				+原来
			
 
				+又
			
 
				+及
			
 
				+及其
			
 
				+及时
			
 
				+及至
			
 
				+双方
			
 
				+反之
			
 
				+反应
			
 
				+反映
			
 
				+反过来
			
 
				+反过来说
			
 
				+取得
			
 
				+受到
			
 
				+变成
			
 
				+另
			
 
				+另一方面
			
 
				+另外
			
 
				+只是
			
 
				+只有
			
 
				+只要
			
 
				+只限
			
 
				+叫
			
 
				+叫做
			
 
				+召开
			
 
				+叮咚
			
 
				+可
			
 
				+可以
			
 
				+可是
			
 
				+可能
			
 
				+可见
			
 
				+各
			
 
				+各个
			
 
				+各人
			
 
				+各位
			
 
				+各地
			
 
				+各种
			
 
				+各级
			
 
				+各自
			
 
				+合理
			
 
				+同
			
 
				+同一
			
 
				+同时
			
 
				+同样
			
 
				+后来
			
 
				+后面
			
 
				+向
			
 
				+向着
			
 
				+吓
			
 
				+吗
			
 
				+否则
			
 
				+吧
			
 
				+吧哒
			
 
				+吱
			
 
				+呀
			
 
				+呃
			
 
				+呕
			
 
				+呗
			
 
				+呜
			
 
				+呜呼
			
 
				+呢
			
 
				+周围
			
 
				+呵
			
 
				+呸
			
 
				+呼哧
			
 
				+咋
			
 
				+和
			
 
				+咚
			
 
				+咦
			
 
				+咱
			
 
				+咱们
			
 
				+咳
			
 
				+哇
			
 
				+哈
			
 
				+哈哈
			
 
				+哉
			
 
				+哎
			
 
				+哎呀
			
 
				+哎哟
			
 
				+哗
			
 
				+哟
			
 
				+哦
			
 
				+哩
			
 
				+哪
			
 
				+哪个
			
 
				+哪些
			
 
				+哪儿
			
 
				+哪天
			
 
				+哪年
			
 
				+哪怕
			
 
				+哪样
			
 
				+哪边
			
 
				+哪里
			
 
				+哼
			
 
				+哼唷
			
 
				+唉
			
 
				+啊
			
 
				+啐
			
 
				+啥
			
 
				+啦
			
 
				+啪达
			
 
				+喂
			
 
				+喏
			
 
				+喔唷
			
 
				+嗡嗡
			
 
				+嗬
			
 
				+嗯
			
 
				+嗳
			
 
				+嘎
			
 
				+嘎登
			
 
				+嘘
			
 
				+嘛
			
 
				+嘻
			
 
				+嘿
			
 
				+因
			
 
				+因为
			
 
				+因此
			
 
				+因而
			
 
				+固然
			
 
				+在
			
 
				+在下
			
 
				+地
			
 
				+坚决
			
 
				+坚持
			
 
				+基本
			
 
				+处理
			
 
				+复杂
			
 
				+多
			
 
				+多少
			
 
				+多数
			
 
				+多次
			
 
				+大力
			
 
				+大多数
			
 
				+大大
			
 
				+大家
			
 
				+大批
			
 
				+大约
			
 
				+大量
			
 
				+失去
			
 
				+她
			
 
				+她们
			
 
				+她的
			
 
				+好的
			
 
				+好象
			
 
				+如
			
 
				+如上所述
			
 
				+如下
			
 
				+如何
			
 
				+如其
			
 
				+如果
			
 
				+如此
			
 
				+如若
			
 
				+存在
			
 
				+宁
			
 
				+宁可
			
 
				+宁愿
			
 
				+宁肯
			
 
				+它
			
 
				+它们
			
 
				+它们的
			
 
				+它的
			
 
				+安全
			
 
				+完全
			
 
				+完成
			
 
				+实现
			
 
				+实际
			
 
				+宣布
			
 
				+容易
			
 
				+密切
			
 
				+对
			
 
				+对于
			
 
				+对应
			
 
				+将
			
 
				+少数
			
 
				+尔后
			
 
				+尚且
			
 
				+尤其
			
 
				+就
			
 
				+就是
			
 
				+就是说
			
 
				+尽
			
 
				+尽管
			
 
				+属于
			
 
				+岂但
			
 
				+左右
			
 
				+巨大
			
 
				+巩固
			
 
				+己
			
 
				+已经
			
 
				+帮助
			
 
				+常常
			
 
				+并
			
 
				+并不
			
 
				+并不是
			
 
				+并且
			
 
				+并没有
			
 
				+广大
			
 
				+广泛
			
 
				+应当
			
 
				+应用
			
 
				+应该
			
 
				+开外
			
 
				+开始
			
 
				+开展
			
 
				+引起
			
 
				+强烈
			
 
				+强调
			
 
				+归
			
 
				+当
			
 
				+当前
			
 
				+当时
			
 
				+当然
			
 
				+当着
			
 
				+形成
			
 
				+彻底
			
 
				+彼
			
 
				+彼此
			
 
				+往
			
 
				+往往
			
 
				+待
			
 
				+後来
			
 
				+後面
			
 
				+得
			
 
				+得出
			
 
				+得到
			
 
				+心里
			
 
				+必然
			
 
				+必要
			
 
				+必须
			
 
				+怎
			
 
				+怎么
			
 
				+怎么办
			
 
				+怎么样
			
 
				+怎样
			
 
				+怎麽
			
 
				+总之
			
 
				+总是
			
 
				+总的来看
			
 
				+总的来说
			
 
				+总的说来
			
 
				+总结
			
 
				+总而言之
			
 
				+恰恰相反
			
 
				+您
			
 
				+意思
			
 
				+愿意
			
 
				+慢说
			
 
				+成为
			
 
				+我
			
 
				+我们
			
 
				+我的
			
 
				+或
			
 
				+或是
			
 
				+或者
			
 
				+战斗
			
 
				+所
			
 
				+所以
			
 
				+所有
			
 
				+所谓
			
 
				+打
			
 
				+扩大
			
 
				+把
			
 
				+抑或
			
 
				+拿
			
 
				+按
			
 
				+按照
			
 
				+换句话说
			
 
				+换言之
			
 
				+据
			
 
				+掌握
			
 
				+接着
			
 
				+接著
			
 
				+故
			
 
				+故此
			
 
				+整个
			
 
				+方便
			
 
				+方面
			
 
				+旁人
			
 
				+无宁
			
 
				+无法
			
 
				+无论
			
 
				+既
			
 
				+既是
			
 
				+既然
			
 
				+时候
			
 
				+明显
			
 
				+明确
			
 
				+是
			
 
				+是否
			
 
				+是的
			
 
				+显然
			
 
				+显著
			
 
				+普通
			
 
				+普遍
			
 
				+更加
			
 
				+曾经
			
 
				+替
			
 
				+最后
			
 
				+最大
			
 
				+最好
			
 
				+最後
			
 
				+最近
			
 
				+最高
			
 
				+有
			
 
				+有些
			
 
				+有关
			
 
				+有利
			
 
				+有力
			
 
				+有所
			
 
				+有效
			
 
				+有时
			
 
				+有点
			
 
				+有的
			
 
				+有着
			
 
				+有著
			
 
				+望
			
 
				+朝
			
 
				+朝着
			
 
				+本
			
 
				+本着
			
 
				+来
			
 
				+来着
			
 
				+极了
			
 
				+构成
			
 
				+果然
			
 
				+果真
			
 
				+某
			
 
				+某个
			
 
				+某些
			
 
				+根据
			
 
				+根本
			
 
				+欢迎
			
 
				+正在
			
 
				+正如
			
 
				+正常
			
 
				+此
			
 
				+此外
			
 
				+此时
			
 
				+此间
			
 
				+毋宁
			
 
				+每
			
 
				+每个
			
 
				+每天
			
 
				+每年
			
 
				+每当
			
 
				+比
			
 
				+比如
			
 
				+比方
			
 
				+比较
			
 
				+毫不
			
 
				+没有
			
 
				+沿
			
 
				+沿着
			
 
				+注意
			
 
				+深入
			
 
				+清楚
			
 
				+满足
			
 
				+漫说
			
 
				+焉
			
 
				+然则
			
 
				+然后
			
 
				+然後
			
 
				+然而
			
 
				+照
			
 
				+照着
			
 
				+特别是
			
 
				+特殊
			
 
				+特点
			
 
				+现代
			
 
				+现在
			
 
				+甚么
			
 
				+甚而
			
 
				+甚至
			
 
				+用
			
 
				+由
			
 
				+由于
			
 
				+由此可见
			
 
				+的
			
 
				+的话
			
 
				+目前
			
 
				+直到
			
 
				+直接
			
 
				+相似
			
 
				+相信
			
 
				+相反
			
 
				+相同
			
 
				+相对
			
 
				+相对而言
			
 
				+相应
			
 
				+相当
			
 
				+相等
			
 
				+省得
			
 
				+看出
			
 
				+看到
			
 
				+看来
			
 
				+看看
			
 
				+看见
			
 
				+真是
			
 
				+真正
			
 
				+着
			
 
				+着呢
			
 
				+矣
			
 
				+知道
			
 
				+确定
			
 
				+离
			
 
				+积极
			
 
				+移动
			
 
				+突出
			
 
				+突然
			
 
				+立即
			
 
				+第
			
 
				+等
			
 
				+等等
			
 
				+管
			
 
				+紧接着
			
 
				+纵
			
 
				+纵令
			
 
				+纵使
			
 
				+纵然
			
 
				+练习
			
 
				+组成
			
 
				+经
			
 
				+经常
			
 
				+经过
			
 
				+结合
			
 
				+结果
			
 
				+给
			
 
				+绝对
			
 
				+继续
			
 
				+继而
			
 
				+维持
			
 
				+综上所述
			
 
				+罢了
			
 
				+考虑
			
 
				+者
			
 
				+而
			
 
				+而且
			
 
				+而况
			
 
				+而外
			
 
				+而已
			
 
				+而是
			
 
				+而言
			
 
				+联系
			
 
				+能
			
 
				+能否
			
 
				+能够
			
 
				+腾
			
 
				+自
			
 
				+自个儿
			
 
				+自从
			
 
				+自各儿
			
 
				+自家
			
 
				+自己
			
 
				+自身
			
 
				+至
			
 
				+至于
			
 
				+良好
			
 
				+若
			
 
				+若是
			
 
				+若非
			
 
				+范围
			
 
				+莫若
			
 
				+获得
			
 
				+虽
			
 
				+虽则
			
 
				+虽然
			
 
				+虽说
			
 
				+行为
			
 
				+行动
			
 
				+表明
			
 
				+表示
			
 
				+被
			
 
				+要
			
 
				+要不
			
 
				+要不是
			
 
				+要不然
			
 
				+要么
			
 
				+要是
			
 
				+要求
			
 
				+规定
			
 
				+觉得
			
 
				+认为
			
 
				+认真
			
 
				+认识
			
 
				+让
			
 
				+许多
			
 
				+论
			
 
				+设使
			
 
				+设若
			
 
				+该
			
 
				+说明
			
 
				+诸位
			
 
				+谁
			
 
				+谁知
			
 
				+赶
			
 
				+起
			
 
				+起来
			
 
				+起见
			
 
				+趁
			
 
				+趁着
			
 
				+越是
			
 
				+跟
			
 
				+转动
			
 
				+转变
			
 
				+转贴
			
 
				+较
			
 
				+较之
			
 
				+边
			
 
				+达到
			
 
				+迅速
			
 
				+过
			
 
				+过去
			
 
				+过来
			
 
				+运用
			
 
				+还是
			
 
				+还有
			
 
				+这
			
 
				+这个
			
 
				+这么
			
 
				+这么些
			
 
				+这么样
			
 
				+这么点儿
			
 
				+这些
			
 
				+这会儿
			
 
				+这儿
			
 
				+这就是说
			
 
				+这时
			
 
				+这样
			
 
				+这点
			
 
				+这种
			
 
				+这边
			
 
				+这里
			
 
				+这麽
			
 
				+进入
			
 
				+进步
			
 
				+进而
			
 
				+进行
			
 
				+连
			
 
				+连同
			
 
				+适应
			
 
				+适当
			
 
				+适用
			
 
				+逐步
			
 
				+逐渐
			
 
				+通常
			
 
				+通过
			
 
				+造成
			
 
				+遇到
			
 
				+遭到
			
 
				+避免
			
 
				+那
			
 
				+那个
			
 
				+那么
			
 
				+那么些
			
 
				+那么样
			
 
				+那些
			
 
				+那会儿
			
 
				+那儿
			
 
				+那时
			
 
				+那样
			
 
				+那边
			
 
				+那里
			
 
				+那麽
			
 
				+部分
			
 
				+鄙人
			
 
				+采取
			
 
				+里面
			
 
				+重大
			
 
				+重新
			
 
				+重要
			
 
				+鉴于
			
 
				+问题
			
 
				+防止
			
 
				+阿
			
 
				+附近
			
 
				+限制
			
 
				+除
			
 
				+除了
			
 
				+除此之外
			
 
				+除非
			
 
				+随
			
 
				+随着
			
 
				+随著
			
 
				+集中
			
 
				+需要
			
 
				+非但
			
 
				+非常
			
 
				+非徒
			
 
				+靠
			
 
				+顺
			
 
				+顺着
			
 
				+首先
			
 
				+高兴
			
 
				+是不是
			
 
				+说说
			
 
				+ 
			
 
				+$
			
 
				+0
			
 
				+1
			
 
				+2
			
 
				+3
			
 
				+4
			
 
				+5
			
 
				+6
			
 
				+7
			
 
				+8
			
 
				+9
			
 
				+?
			
 
				+_
			
 
				+“
			
 
				+”
			
 
				+、
			
 
				+。
			
 
				+《
			
 
				+》
			
 
				+一
			
 
				+一些
			
 
				+一何
			
 
				+一切
			
 
				+一则
			
 
				+一方面
			
 
				+一旦
			
 
				+一来
			
 
				+一样
			
 
				+一般
			
 
				+一转眼
			
 
				+万一
			
 
				+上
			
 
				+上下
			
 
				+下
			
 
				+不
			
 
				+不仅
			
 
				+不但
			
 
				+不光
			
 
				+不单
			
 
				+不只
			
 
				+不外乎
			
 
				+不如
			
 
				+不妨
			
 
				+不尽
			
 
				+不尽然
			
 
				+不得
			
 
				+不怕
			
 
				+不惟
			
 
				+不成
			
 
				+不拘
			
 
				+不料
			
 
				+不是
			
 
				+不比
			
 
				+不然
			
 
				+不特
			
 
				+不独
			
 
				+不管
			
 
				+不至于
			
 
				+不若
			
 
				+不论
			
 
				+不过
			
 
				+不问
			
 
				+与
			
 
				+与其
			
 
				+与其说
			
 
				+与否
			
 
				+与此同时
			
 
				+且
			
 
				+且不说
			
 
				+且说
			
 
				+两者
			
 
				+个
			
 
				+个别
			
 
				+临
			
 
				+为
			
 
				+为了
			
 
				+为什么
			
 
				+为何
			
 
				+为止
			
 
				+为此
			
 
				+为着
			
 
				+乃
			
 
				+乃至
			
 
				+乃至于
			
 
				+么
			
 
				+之
			
 
				+之一
			
 
				+之所以
			
 
				+之类
			
 
				+乌乎
			
 
				+乎
			
 
				+乘
			
 
				+也
			
 
				+也好
			
 
				+也罢
			
 
				+了
			
 
				+二来
			
 
				+于
			
 
				+于是
			
 
				+于是乎
			
 
				+云云
			
 
				+云尔
			
 
				+些
			
 
				+亦
			
 
				+人
			
 
				+人们
			
 
				+人家
			
 
				+什么
			
 
				+什么样
			
 
				+今
			
 
				+介于
			
 
				+仍
			
 
				+仍旧
			
 
				+从
			
 
				+从此
			
 
				+从而
			
 
				+他
			
 
				+他人
			
 
				+他们
			
 
				+以
			
 
				+以上
			
 
				+以为
			
 
				+以便
			
 
				+以免
			
 
				+以及
			
 
				+以故
			
 
				+以期
			
 
				+以来
			
 
				+以至
			
 
				+以至于
			
 
				+以致
			
 
				+们
			
 
				+任
			
 
				+任何
			
 
				+任凭
			
 
				+似的
			
 
				+但
			
 
				+但凡
			
 
				+但是
			
 
				+何
			
 
				+何以
			
 
				+何况
			
 
				+何处
			
 
				+何时
			
 
				+余外
			
 
				+作为
			
 
				+你
			
 
				+你们
			
 
				+使
			
 
				+使得
			
 
				+例如
			
 
				+依
			
 
				+依据
			
 
				+依照
			
 
				+便于
			
 
				+俺
			
 
				+俺们
			
 
				+倘
			
 
				+倘使
			
 
				+倘或
			
 
				+倘然
			
 
				+倘若
			
 
				+借
			
 
				+假使
			
 
				+假如
			
 
				+假若
			
 
				+傥然
			
 
				+像
			
 
				+儿
			
 
				+先不先
			
 
				+光是
			
 
				+全体
			
 
				+全部
			
 
				+兮
			
 
				+关于
			
 
				+其
			
 
				+其一
			
 
				+其中
			
 
				+其二
			
 
				+其他
			
 
				+其余
			
 
				+其它
			
 
				+其次
			
 
				+具体地说
			
 
				+具体说来
			
 
				+兼之
			
 
				+内
			
 
				+再
			
 
				+再其次
			
 
				+再则
			
 
				+再有
			
 
				+再者
			
 
				+再者说
			
 
				+再说
			
 
				+冒
			
 
				+冲
			
 
				+况且
			
 
				+几
			
 
				+几时
			
 
				+凡
			
 
				+凡是
			
 
				+凭
			
 
				+凭借
			
 
				+出于
			
 
				+出来
			
 
				+分别
			
 
				+则
			
 
				+则甚
			
 
				+别
			
 
				+别人
			
 
				+别处
			
 
				+别是
			
 
				+别的
			
 
				+别管
			
 
				+别说
			
 
				+到
			
 
				+前后
			
 
				+前此
			
 
				+前者
			
 
				+加之
			
 
				+加以
			
 
				+即
			
 
				+即令
			
 
				+即使
			
 
				+即便
			
 
				+即如
			
 
				+即或
			
 
				+即若
			
 
				+却
			
 
				+去
			
 
				+又
			
 
				+又及
			
 
				+及
			
 
				+及其
			
 
				+及至
			
 
				+反之
			
 
				+反而
			
 
				+反过来
			
 
				+反过来说
			
 
				+受到
			
 
				+另
			
 
				+另一方面
			
 
				+另外
			
 
				+另悉
			
 
				+只
			
 
				+只当
			
 
				+只怕
			
 
				+只是
			
 
				+只有
			
 
				+只消
			
 
				+只要
			
 
				+只限
			
 
				+叫
			
 
				+叮咚
			
 
				+可
			
 
				+可以
			
 
				+可是
			
 
				+可见
			
 
				+各
			
 
				+各个
			
 
				+各位
			
 
				+各种
			
 
				+各自
			
 
				+同
			
 
				+同时
			
 
				+后
			
 
				+后者
			
 
				+向
			
 
				+向使
			
 
				+向着
			
 
				+吓
			
 
				+吗
			
 
				+否则
			
 
				+吧
			
 
				+吧哒
			
 
				+吱
			
 
				+呀
			
 
				+呃
			
 
				+呕
			
 
				+呗
			
 
				+呜
			
 
				+呜呼
			
 
				+呢
			
 
				+呵
			
 
				+呵呵
			
 
				+呸
			
 
				+呼哧
			
 
				+咋
			
 
				+和
			
 
				+咚
			
 
				+咦
			
 
				+咧
			
 
				+咱
			
 
				+咱们
			
 
				+咳
			
 
				+哇
			
 
				+哈
			
 
				+哈哈
			
 
				+哉
			
 
				+哎
			
 
				+哎呀
			
 
				+哎哟
			
 
				+哗
			
 
				+哟
			
 
				+哦
			
 
				+哩
			
 
				+哪
			
 
				+哪个
			
 
				+哪些
			
 
				+哪儿
			
 
				+哪天
			
 
				+哪年
			
 
				+哪怕
			
 
				+哪样
			
 
				+哪边
			
 
				+哪里
			
 
				+哼
			
 
				+哼唷
			
 
				+唉
			
 
				+唯有
			
 
				+啊
			
 
				+啐
			
 
				+啥
			
 
				+啦
			
 
				+啪达
			
 
				+啷当
			
 
				+喂
			
 
				+喏
			
 
				+喔唷
			
 
				+喽
			
 
				+嗡
			
 
				+嗡嗡
			
 
				+嗬
			
 
				+嗯
			
 
				+嗳
			
 
				+嘎
			
 
				+嘎登
			
 
				+嘘
			
 
				+嘛
			
 
				+嘻
			
 
				+嘿
			
 
				+嘿嘿
			
 
				+因
			
 
				+因为
			
 
				+因了
			
 
				+因此
			
 
				+因着
			
 
				+因而
			
 
				+固然
			
 
				+在
			
 
				+在下
			
 
				+在于
			
 
				+地
			
 
				+基于
			
 
				+处在
			
 
				+多
			
 
				+多么
			
 
				+多少
			
 
				+大
			
 
				+大家
			
 
				+她
			
 
				+她们
			
 
				+好
			
 
				+如
			
 
				+如上
			
 
				+如上所述
			
 
				+如下
			
 
				+如何
			
 
				+如其
			
 
				+如同
			
 
				+如是
			
 
				+如果
			
 
				+如此
			
 
				+如若
			
 
				+始而
			
 
				+孰料
			
 
				+孰知
			
 
				+宁
			
 
				+宁可
			
 
				+宁愿
			
 
				+宁肯
			
 
				+它
			
 
				+它们
			
 
				+对
			
 
				+对于
			
 
				+对待
			
 
				+对方
			
 
				+对比
			
 
				+将
			
 
				+小
			
 
				+尔
			
 
				+尔后
			
 
				+尔尔
			
 
				+尚且
			
 
				+就
			
 
				+就是
			
 
				+就是了
			
 
				+就是说
			
 
				+就算
			
 
				+就要
			
 
				+尽
			
 
				+尽管
			
 
				+尽管如此
			
 
				+岂但
			
 
				+己
			
 
				+已
			
 
				+已矣
			
 
				+巴
			
 
				+巴巴
			
 
				+并
			
 
				+并且
			
 
				+并非
			
 
				+庶乎
			
 
				+庶几
			
 
				+开外
			
 
				+开始
			
 
				+归
			
 
				+归齐
			
 
				+当
			
 
				+当地
			
 
				+当然
			
 
				+当着
			
 
				+彼
			
 
				+彼时
			
 
				+彼此
			
 
				+往
			
 
				+待
			
 
				+很
			
 
				+得
			
 
				+得了
			
 
				+怎
			
 
				+怎么
			
 
				+怎么办
			
 
				+怎么样
			
 
				+怎奈
			
 
				+怎样
			
 
				+总之
			
 
				+总的来看
			
 
				+总的来说
			
 
				+总的说来
			
 
				+总而言之
			
 
				+恰恰相反
			
 
				+您
			
 
				+惟其
			
 
				+慢说
			
 
				+我
			
 
				+我们
			
 
				+或
			
 
				+或则
			
 
				+或是
			
 
				+或曰
			
 
				+或者
			
 
				+截至
			
 
				+所
			
 
				+所以
			
 
				+所在
			
 
				+所幸
			
 
				+所有
			
 
				+才
			
 
				+才能
			
 
				+打
			
 
				+打从
			
 
				+把
			
 
				+抑或
			
 
				+拿
			
 
				+按
			
 
				+按照
			
 
				+换句话说
			
 
				+换言之
			
 
				+据
			
 
				+据此
			
 
				+接着
			
 
				+故
			
 
				+故此
			
 
				+故而
			
 
				+旁人
			
 
				+无
			
 
				+无宁
			
 
				+无论
			
 
				+既
			
 
				+既往
			
 
				+既是
			
 
				+既然
			
 
				+时候
			
 
				+是
			
 
				+是以
			
 
				+是的
			
 
				+曾
			
 
				+替
			
 
				+替代
			
 
				+最
			
 
				+有
			
 
				+有些
			
 
				+有关
			
 
				+有及
			
 
				+有时
			
 
				+有的
			
 
				+望
			
 
				+朝
			
 
				+朝着
			
 
				+本
			
 
				+本人
			
 
				+本地
			
 
				+本着
			
 
				+本身
			
 
				+来
			
 
				+来着
			
 
				+来自
			
 
				+来说
			
 
				+极了
			
 
				+果然
			
 
				+果真
			
 
				+某
			
 
				+某个
			
 
				+某些
			
 
				+某某
			
 
				+根据
			
 
				+欤
			
 
				+正值
			
 
				+正如
			
 
				+正巧
			
 
				+正是
			
 
				+此
			
 
				+此地
			
 
				+此处
			
 
				+此外
			
 
				+此时
			
 
				+此次
			
 
				+此间
			
 
				+毋宁
			
 
				+每
			
 
				+每当
			
 
				+比
			
 
				+比及
			
 
				+比如
			
 
				+比方
			
 
				+没奈何
			
 
				+沿
			
 
				+沿着
			
 
				+漫说
			
 
				+焉
			
 
				+然则
			
 
				+然后
			
 
				+然而
			
 
				+照
			
 
				+照着
			
 
				+犹且
			
 
				+犹自
			
 
				+甚且
			
 
				+甚么
			
 
				+甚或
			
 
				+甚而
			
 
				+甚至
			
 
				+甚至于
			
 
				+用
			
 
				+用来
			
 
				+由
			
 
				+由于
			
 
				+由是
			
 
				+由此
			
 
				+由此可见
			
 
				+的
			
 
				+的确
			
 
				+的话
			
 
				+直到
			
 
				+相对而言
			
 
				+省得
			
 
				+看
			
 
				+眨眼
			
 
				+着
			
 
				+着呢
			
 
				+矣
			
 
				+矣乎
			
 
				+矣哉
			
 
				+离
			
 
				+竟而
			
 
				+第
			
 
				+等
			
 
				+等到
			
 
				+等等
			
 
				+简言之
			
 
				+管
			
 
				+类如
			
 
				+紧接着
			
 
				+纵
			
 
				+纵令
			
 
				+纵使
			
 
				+纵然
			
 
				+经
			
 
				+经过
			
 
				+结果
			
 
				+给
			
 
				+继之
			
 
				+继后
			
 
				+继而
			
 
				+综上所述
			
 
				+罢了
			
 
				+者
			
 
				+而
			
 
				+而且
			
 
				+而况
			
 
				+而后
			
 
				+而外
			
 
				+而已
			
 
				+而是
			
 
				+而言
			
 
				+能
			
 
				+能否
			
 
				+腾
			
 
				+自
			
 
				+自个儿
			
 
				+自从
			
 
				+自各儿
			
 
				+自后
			
 
				+自家
			
 
				+自己
			
 
				+自打
			
 
				+自身
			
 
				+至
			
 
				+至于
			
 
				+至今
			
 
				+至若
			
 
				+致
			
 
				+般的
			
 
				+若
			
 
				+若夫
			
 
				+若是
			
 
				+若果 
			
 
				+若非
			
 
				+莫不然
			
 
				+莫如
			
 
				+莫若
			
 
				+虽
			
 
				+虽则
			
 
				+虽然
			
 
				+虽说
			
 
				+被
			
 
				+要
			
 
				+要不
			
 
				+要不是
			
 
				+要不然
			
 
				+要么
			
 
				+要是
			
 
				+譬喻
			
 
				+譬如
			
 
				+让
			
 
				+许多
			
 
				+论
			
 
				+设使
			
 
				+设或
			
 
				+设若
			
 
				+诚如
			
 
				+诚然
			
 
				+该
			
 
				+说来
			
 
				+诸
			
 
				+诸位
			
 
				+诸如
			
 
				+谁
			
 
				+谁人
			
 
				+谁料
			
 
				+谁知
			
 
				+贼死
			
 
				+赖以
			
 
				+赶
			
 
				+起
			
 
				+起见
			
 
				+趁
			
 
				+趁着
			
 
				+越是
			
 
				+距
			
 
				+跟
			
 
				+较
			
 
				+较之
			
 
				+边
			
 
				+过
			
 
				+还
			
 
				+还是
			
 
				+还有
			
 
				+还要
			
 
				+这
			
 
				+这一来
			
 
				+这个
			
 
				+这么
			
 
				+这么些
			
 
				+这么样
			
 
				+这么点儿
			
 
				+这些
			
 
				+这会儿
			
 
				+这儿
			
 
				+这就是说
			
 
				+这时
			
 
				+这样
			
 
				+这次
			
 
				+这般
			
 
				+这边
			
 
				+这里
			
 
				+进而
			
 
				+连
			
 
				+连同
			
 
				+逐步
			
 
				+通过
			
 
				+遵循
			
 
				+遵照
			
 
				+那
			
 
				+那个
			
 
				+那么
			
 
				+那么些
			
 
				+那么样
			
 
				+那些
			
 
				+那会儿
			
 
				+那儿
			
 
				+那时
			
 
				+那样
			
 
				+那般
			
 
				+那边
			
 
				+那里
			
 
				+都
			
 
				+鄙人
			
 
				+鉴于
			
 
				+针对
			
 
				+阿
			
 
				+除
			
 
				+除了
			
 
				+除外
			
 
				+除开
			
 
				+除此之外
			
 
				+除非
			
 
				+随
			
 
				+随后
			
 
				+随时
			
 
				+随着
			
 
				+难道说
			
 
				+非但
			
 
				+非徒
			
 
				+非特
			
 
				+非独
			
 
				+靠
			
 
				+顺
			
 
				+顺着
			
 
				+首先
			
 
				+！
			
 
				+，
			
 
				+：
			
 
				+；
			
 
				+？
			
 
				+———
			
 
				+》），
			
 
				+）÷（１－
			
 
				+”，
			
 
				+）、
			
 
				+＝（
			
 
				+:
			
 
				+→
			
 
				+℃ 
			
 
				+&
			
 
				+*
			
 
				+一一
			
 
				+~~~~
			
 
				+’
			
 
				+. 
			
 
				+『
			
 
				+.一
			
 
				+./
			
 
				+-- 
			
 
				+』
			
 
				+＝″
			
 
				+【
			
 
				+［＊］
			
 
				+｝＞
			
 
				+［⑤］］
			
 
				+［①Ｄ］
			
 
				+ｃ］
			
 
				+ｎｇ昉
			
 
				+＊
			
 
				+//
			
 
				+［
			
 
				+］
			
 
				+［②ｅ］
			
 
				+［②ｇ］
			
 
				+＝｛
			
 
				+}
			
 
				+，也 
			
 
				+‘
			
 
				+Ａ
			
 
				+［①⑥］
			
 
				+［②Ｂ］ 
			
 
				+［①ａ］
			
 
				+［④ａ］
			
 
				+［①③］
			
 
				+［③ｈ］
			
 
				+③］
			
 
				+１． 
			
 
				+－－ 
			
 
				+［②ｂ］
			
 
				+’‘ 
			
 
				+××× 
			
 
				+［①⑧］
			
 
				+０：２ 
			
 
				+＝［
			
 
				+［⑤ｂ］
			
 
				+［②ｃ］ 
			
 
				+［④ｂ］
			
 
				+［②③］
			
 
				+［③ａ］
			
 
				+［④ｃ］
			
 
				+［①⑤］
			
 
				+［①⑦］
			
 
				+［①ｇ］
			
 
				+∈［ 
			
 
				+［①⑨］
			
 
				+［①④］
			
 
				+［①ｃ］
			
 
				+［②ｆ］
			
 
				+［②⑧］
			
 
				+［②①］
			
 
				+［①Ｃ］
			
 
				+［③ｃ］
			
 
				+［③ｇ］
			
 
				+［②⑤］
			
 
				+［②②］
			
 
				+一.
			
 
				+［①ｈ］
			
 
				+.数
			
 
				+［］
			
 
				+［①Ｂ］
			
 
				+数/
			
 
				+［①ｉ］
			
 
				+［③ｅ］
			
 
				+［①①］
			
 
				+［④ｄ］
			
 
				+［④ｅ］
			
 
				+［③ｂ］
			
 
				+［⑤ａ］
			
 
				+［①Ａ］
			
 
				+［②⑧］
			
 
				+［②⑦］
			
 
				+［①ｄ］
			
 
				+［②ｊ］
			
 
				+〕〔
			
 
				+］［
			
 
				+://
			
 
				+′∈
			
 
				+［②④
			
 
				+［⑤ｅ］
			
 
				+１２％
			
 
				+ｂ］
			
 
				+...
			
 
				+...................
			
 
				+…………………………………………………③
			
 
				+ＺＸＦＩＴＬ
			
 
				+［③Ｆ］
			
 
				+」
			
 
				+［①ｏ］
			
 
				+］∧′＝［ 
			
 
				+∪φ∈
			
 
				+′｜
			
 
				+｛－
			
 
				+②ｃ
			
 
				+｝
			
 
				+［③①］
			
 
				+Ｒ．Ｌ．
			
 
				+［①Ｅ］
			
 
				+Ψ
			
 
				+－［＊］－
			
 
				+↑
			
 
				+.日 
			
 
				+［②ｄ］
			
 
				+［②
			
 
				+［②⑦］
			
 
				+［②②］
			
 
				+［③ｅ］
			
 
				+［①ｉ］
			
 
				+［①Ｂ］
			
 
				+［①ｈ］
			
 
				+［①ｄ］
			
 
				+［①ｇ］
			
 
				+［①②］
			
 
				+［②ａ］
			
 
				+ｆ］
			
 
				+［⑩］
			
 
				+ａ］
			
 
				+［①ｅ］
			
 
				+［②ｈ］
			
 
				+［②⑥］
			
 
				+［③ｄ］
			
 
				+［②⑩］
			
 
				+ｅ］
			
 
				+〉
			
 
				+】
			
 
				+元／吨
			
 
				+［②⑩］
			
 
				+２．３％
			
 
				+５：０  
			
 
				+［①］
			
 
				+::
			
 
				+［②］
			
 
				+［③］
			
 
				+［④］
			
 
				+［⑤］
			
 
				+［⑥］
			
 
				+［⑦］
			
 
				+［⑧］
			
 
				+［⑨］ 
			
 
				+……
			
 
				+——
			
 
				+?
			
 
				+、
			
 
				+。
			
 
				+“
			
 
				+”
			
 
				+《
			
 
				+》
			
 
				+！
			
 
				+，
			
 
				+：
			
 
				+；
			
 
				+？
			
 
				+．
			
 
				+,
			
 
				+．
			
 
				+'
			
 
				+? 
			
 
				+·
			
 
				+———
			
 
				+──
			
 
				+? 
			
 
				+—
			
 
				+<
			
 
				+>
			
 
				+（
			
 
				+）
			
 
				+〔
			
 
				+〕
			
 
				+[
			
 
				+]
			
 
				+(
			
 
				+)
			
 
				+-
			
 
				++
			
 
				+～
			
 
				+×
			
 
				+／
			
 
				+/
			
 
				+①
			
 
				+②
			
 
				+③
			
 
				+④
			
 
				+⑤
			
 
				+⑥
			
 
				+⑦
			
 
				+⑧
			
 
				+⑨
			
 
				+⑩
			
 
				+Ⅲ
			
 
				+В
			
 
				+"
			
 
				+;
			
 
				+#
			
 
				+@
			
 
				+γ
			
 
				+μ
			
 
				+φ
			
 
				+φ．
			
 
				+× 
			
 
				+Δ
			
 
				+■
			
 
				+▲
			
 
				+sub
			
 
				+exp 
			
 
				+sup
			
 
				+sub
			
 
				+Lex 
			
 
				+＃
			
 
				+％
			
 
				+＆
			
 
				+＇
			
 
				+＋
			
 
				+＋ξ
			
 
				+＋＋
			
 
				+－
			
 
				+－β
			
 
				+＜
			
 
				+＜±
			
 
				+＜Δ
			
 
				+＜λ
			
 
				+＜φ
			
 
				+＜＜
			
 
				+=
			
 
				+＝
			
 
				+＝☆
			
 
				+＝－
			
 
				+＞
			
 
				+＞λ
			
 
				+＿
			
 
				+～±
			
 
				+～＋
			
 
				+［⑤ｆ］
			
 
				+［⑤ｄ］
			
 
				+［②ｉ］
			
 
				+≈ 
			
 
				+［②Ｇ］
			
 
				+［①ｆ］
			
 
				+ＬＩ
			
 
				+㈧ 
			
 
				+［－
			
 
				+......
			
 
				+〉
			
 
				+［③⑩］
			
 
				+第二
			
 
				+一番
			
 
				+一直
			
 
				+一个
			
 
				+一些
			
 
				+许多
			
 
				+种
			
 
				+有的是
			
 
				+也就是说
			
 
				+末##末
			
 
				+啊
			
 
				+阿
			
 
				+哎
			
 
				+哎呀
			
 
				+哎哟
			
 
				+唉
			
 
				+俺
			
 
				+俺们
			
 
				+按
			
 
				+按照
			
 
				+吧
			
 
				+吧哒
			
 
				+把
			
 
				+罢了
			
 
				+被
			
 
				+本
			
 
				+本着
			
 
				+比
			
 
				+比方
			
 
				+比如
			
 
				+鄙人
			
 
				+彼
			
 
				+彼此
			
 
				+边
			
 
				+别
			
 
				+别的
			
 
				+别说
			
 
				+并
			
 
				+并且
			
 
				+不比
			
 
				+不成
			
 
				+不单
			
 
				+不但
			
 
				+不独
			
 
				+不管
			
 
				+不光
			
 
				+不过
			
 
				+不仅
			
 
				+不拘
			
 
				+不论
			
 
				+不怕
			
 
				+不然
			
 
				+不如
			
 
				+不特
			
 
				+不惟
			
 
				+不问
			
 
				+不只
			
 
				+朝
			
 
				+朝着
			
 
				+趁
			
 
				+趁着
			
 
				+乘
			
 
				+冲
			
 
				+除
			
 
				+除此之外
			
 
				+除非
			
 
				+除了
			
 
				+此
			
 
				+此间
			
 
				+此外
			
 
				+从
			
 
				+从而
			
 
				+打
			
 
				+待
			
 
				+但
			
 
				+但是
			
 
				+当
			
 
				+当着
			
 
				+到
			
 
				+得
			
 
				+的
			
 
				+的话
			
 
				+等
			
 
				+等等
			
 
				+地
			
 
				+第
			
 
				+叮咚
			
 
				+对
			
 
				+对于
			
 
				+多
			
 
				+多少
			
 
				+而
			
 
				+而况
			
 
				+而且
			
 
				+而是
			
 
				+而外
			
 
				+而言
			
 
				+而已
			
 
				+尔后
			
 
				+反过来
			
 
				+反过来说
			
 
				+反之
			
 
				+非但
			
 
				+非徒
			
 
				+否则
			
 
				+嘎
			
 
				+嘎登
			
 
				+该
			
 
				+赶
			
 
				+个
			
 
				+各
			
 
				+各个
			
 
				+各位
			
 
				+各种
			
 
				+各自
			
 
				+给
			
 
				+根据
			
 
				+跟
			
 
				+故
			
 
				+故此
			
 
				+固然
			
 
				+关于
			
 
				+管
			
 
				+归
			
 
				+果然
			
 
				+果真
			
 
				+过
			
 
				+哈
			
 
				+哈哈
			
 
				+呵
			
 
				+和
			
 
				+何
			
 
				+何处
			
 
				+何况
			
 
				+何时
			
 
				+嘿
			
 
				+哼
			
 
				+哼唷
			
 
				+呼哧
			
 
				+乎
			
 
				+哗
			
 
				+还是
			
 
				+还有
			
 
				+换句话说
			
 
				+换言之
			
 
				+或
			
 
				+或是
			
 
				+或者
			
 
				+极了
			
 
				+及
			
 
				+及其
			
 
				+及至
			
 
				+即
			
 
				+即便
			
 
				+即或
			
 
				+即令
			
 
				+即若
			
 
				+即使
			
 
				+几
			
 
				+几时
			
 
				+己
			
 
				+既
			
 
				+既然
			
 
				+既是
			
 
				+继而
			
 
				+加之
			
 
				+假如
			
 
				+假若
			
 
				+假使
			
 
				+鉴于
			
 
				+将
			
 
				+较
			
 
				+较之
			
 
				+叫
			
 
				+接着
			
 
				+结果
			
 
				+借
			
 
				+紧接着
			
 
				+进而
			
 
				+尽
			
 
				+尽管
			
 
				+经
			
 
				+经过
			
 
				+就
			
 
				+就是
			
 
				+就是说
			
 
				+据
			
 
				+具体地说
			
 
				+具体说来
			
 
				+开始
			
 
				+开外
			
 
				+靠
			
 
				+咳
			
 
				+可
			
 
				+可见
			
 
				+可是
			
 
				+可以
			
 
				+况且
			
 
				+啦
			
 
				+来
			
 
				+来着
			
 
				+离
			
 
				+例如
			
 
				+哩
			
 
				+连
			
 
				+连同
			
 
				+两者
			
 
				+了
			
 
				+临
			
 
				+另
			
 
				+另外
			
 
				+另一方面
			
 
				+论
			
 
				+嘛
			
 
				+吗
			
 
				+慢说
			
 
				+漫说
			
 
				+冒
			
 
				+么
			
 
				+每
			
 
				+每当
			
 
				+们
			
 
				+莫若
			
 
				+某
			
 
				+某个
			
 
				+某些
			
 
				+拿
			
 
				+哪
			
 
				+哪边
			
 
				+哪儿
			
 
				+哪个
			
 
				+哪里
			
 
				+哪年
			
 
				+哪怕
			
 
				+哪天
			
 
				+哪些
			
 
				+哪样
			
 
				+那
			
 
				+那边
			
 
				+那儿
			
 
				+那个
			
 
				+那会儿
			
 
				+那里
			
 
				+那么
			
 
				+那么些
			
 
				+那么样
			
 
				+那时
			
 
				+那些
			
 
				+那样
			
 
				+乃
			
 
				+乃至
			
 
				+呢
			
 
				+能
			
 
				+你
			
 
				+你们
			
 
				+您
			
 
				+宁
			
 
				+宁可
			
 
				+宁肯
			
 
				+宁愿
			
 
				+哦
			
 
				+呕
			
 
				+啪达
			
 
				+旁人
			
 
				+呸
			
 
				+凭
			
 
				+凭借
			
 
				+其
			
 
				+其次
			
 
				+其二
			
 
				+其他
			
 
				+其它
			
 
				+其一
			
 
				+其余
			
 
				+其中
			
 
				+起
			
 
				+起见
			
 
				+起见
			
 
				+岂但
			
 
				+恰恰相反
			
 
				+前后
			
 
				+前者
			
 
				+且
			
 
				+然而
			
 
				+然后
			
 
				+然则
			
 
				+让
			
 
				+人家
			
 
				+任
			
 
				+任何
			
 
				+任凭
			
 
				+如
			
 
				+如此
			
 
				+如果
			
 
				+如何
			
 
				+如其
			
 
				+如若
			
 
				+如上所述
			
 
				+若
			
 
				+若非
			
 
				+若是
			
 
				+啥
			
 
				+上下
			
 
				+尚且
			
 
				+设若
			
 
				+设使
			
 
				+甚而
			
 
				+甚么
			
 
				+甚至
			
 
				+省得
			
 
				+时候
			
 
				+什么
			
 
				+什么样
			
 
				+使得
			
 
				+是
			
 
				+是的
			
 
				+首先
			
 
				+谁
			
 
				+谁知
			
 
				+顺
			
 
				+顺着
			
 
				+似的
			
 
				+虽
			
 
				+虽然
			
 
				+虽说
			
 
				+虽则
			
 
				+随
			
 
				+随着
			
 
				+所
			
 
				+所以
			
 
				+他
			
 
				+他们
			
 
				+他人
			
 
				+它
			
 
				+它们
			
 
				+她
			
 
				+她们
			
 
				+倘
			
 
				+倘或
			
 
				+倘然
			
 
				+倘若
			
 
				+倘使
			
 
				+腾
			
 
				+替
			
 
				+通过
			
 
				+同
			
 
				+同时
			
 
				+哇
			
 
				+万一
			
 
				+往
			
 
				+望
			
 
				+为
			
 
				+为何
			
 
				+为了
			
 
				+为什么
			
 
				+为着
			
 
				+喂
			
 
				+嗡嗡
			
 
				+我
			
 
				+我们
			
 
				+呜
			
 
				+呜呼
			
 
				+乌乎
			
 
				+无论
			
 
				+无宁
			
 
				+毋宁
			
 
				+嘻
			
 
				+吓
			
 
				+相对而言
			
 
				+像
			
 
				+向
			
 
				+向着
			
 
				+嘘
			
 
				+呀
			
 
				+焉
			
 
				+沿
			
 
				+沿着
			
 
				+要
			
 
				+要不
			
 
				+要不然
			
 
				+要不是
			
 
				+要么
			
 
				+要是
			
 
				+也
			
 
				+也罢
			
 
				+也好
			
 
				+一
			
 
				+一般
			
 
				+一旦
			
 
				+一方面
			
 
				+一来
			
 
				+一切
			
 
				+一样
			
 
				+一则
			
 
				+依
			
 
				+依照
			
 
				+矣
			
 
				+以
			
 
				+以便
			
 
				+以及
			
 
				+以免
			
 
				+以至
			
 
				+以至于
			
 
				+以致
			
 
				+抑或
			
 
				+因
			
 
				+因此
			
 
				+因而
			
 
				+因为
			
 
				+哟
			
 
				+用
			
 
				+由
			
 
				+由此可见
			
 
				+由于
			
 
				+有
			
 
				+有的
			
 
				+有关
			
 
				+有些
			
 
				+又
			
 
				+于
			
 
				+于是
			
 
				+于是乎
			
 
				+与
			
 
				+与此同时
			
 
				+与否
			
 
				+与其
			
 
				+越是
			
 
				+云云
			
 
				+哉
			
 
				+再说
			
 
				+再者
			
 
				+在
			
 
				+在下
			
 
				+咱
			
 
				+咱们
			
 
				+则
			
 
				+怎
			
 
				+怎么
			
 
				+怎么办
			
 
				+怎么样
			
 
				+怎样
			
 
				+咋
			
 
				+照
			
 
				+照着
			
 
				+者
			
 
				+这
			
 
				+这边
			
 
				+这儿
			
 
				+这个
			
 
				+这会儿
			
 
				+这就是说
			
 
				+这里
			
 
				+这么
			
 
				+这么点儿
			
 
				+这么些
			
 
				+这么样
			
 
				+这时
			
 
				+这些
			
 
				+这样
			
 
				+正如
			
 
				+吱
			
 
				+之
			
 
				+之类
			
 
				+之所以
			
 
				+之一
			
 
				+只是
			
 
				+只限
			
 
				+只要
			
 
				+只有
			
 
				+至
			
 
				+至于
			
 
				+诸位
			
 
				+着
			
 
				+着呢
			
 
				+自
			
 
				+自从
			
 
				+自个儿
			
 
				+自各儿
			
 
				+自己
			
 
				+自家
			
 
				+自身
			
 
				+综上所述
			
 
				+总的来看
			
 
				+总的来说
			
 
				+总的说来
			
 
				+总而言之
			
 
				+总之
			
 
				+纵
			
 
				+纵令
			
 
				+纵然
			
 
				+纵使
			
 
				+遵照
			
 
				+作为
			
 
				+兮
			
 
				+呃
			
 
				+呗
			
 
				+咚
			
 
				+咦
			
 
				+喏
			
 
				+啐
			
 
				+喔唷
			
 
				+嗬
			
 
				+嗯
			
 
				+嗳
			
 
				+打开天窗说亮话
			
 
				+到目前为止
			
 
				+赶早不赶晚
			
 
				+常言说得好
			
 
				+何乐而不为
			
 
				+毫无保留地
			
 
				+由此可见
			
 
				+这就是说
			
 
				+这么点儿
			
 
				+综上所述
			
 
				+总的来看
			
 
				+总的来说
			
 
				+总的说来
			
 
				+总而言之
			
 
				+相对而言
			
 
				+除此之外
			
 
				+反过来说
			
 
				+恰恰相反
			
 
				+如上所述
			
 
				+换句话说
			
 
				+具体地说
			
 
				+具体说来
			
 
				+另一方面
			
 
				+与此同时
			
 
				+一则通过
			
 
				+毫无例外
			
 
				+不然的话
			
 
				+从此以后
			
 
				+从古到今
			
 
				+从古至今
			
 
				+从今以后
			
 
				+大张旗鼓
			
 
				+从无到有
			
 
				+从早到晚
			
 
				+弹指之间
			
 
				+不亦乐乎
			
 
				+不知不觉
			
 
				+不止一次
			
 
				+不择手段
			
 
				+不可开交
			
 
				+不可抗拒
			
 
				+不仅仅是
			
 
				+不管怎样
			
 
				+挨家挨户
			
 
				+长此下去
			
 
				+长话短说
			
 
				+除此而外
			
 
				+除此以外
			
 
				+除此之外
			
 
				+得天独厚
			
 
				+川流不息
			
 
				+长期以来
			
 
				+挨门挨户
			
 
				+挨门逐户
			
 
				+多多少少
			
 
				+多多益善
			
 
				+二话不说
			
 
				+更进一步
			
 
				+二话没说
			
 
				+分期分批
			
 
				+风雨无阻
			
 
				+归根到底
			
 
				+归根结底
			
 
				+反之亦然
			
 
				+大面儿上
			
 
				+倒不如说
			
 
				+成年累月
			
 
				+换句话说
			
 
				+或多或少
			
 
				+简而言之
			
 
				+接连不断
			
 
				+尽如人意
			
 
				+尽心竭力
			
 
				+尽心尽力
			
 
				+尽管如此
			
 
				+据我所知
			
 
				+具体地说
			
 
				+具体来说
			
 
				+具体说来
			
 
				+近几年来
			
 
				+每时每刻
			
 
				+屡次三番
			
 
				+三番两次
			
 
				+三番五次
			
 
				+三天两头
			
 
				+另一方面
			
 
				+老老实实
			
 
				+年复一年
			
 
				+恰恰相反
			
 
				+顷刻之间
			
 
				+穷年累月
			
 
				+千万千万
			
 
				+日复一日
			
 
				+如此等等
			
 
				+如前所述
			
 
				+如上所述
			
 
				+一方面
			
 
				+切不可
			
 
				+顷刻间
			
 
				+全身心
			
 
				+另方面
			
 
				+另一个
			
 
				+猛然间
			
 
				+默默地
			
 
				+就是说
			
 
				+近年来
			
 
				+尽可能
			
 
				+接下来
			
 
				+简言之
			
 
				+急匆匆
			
 
				+即是说
			
 
				+基本上
			
 
				+换言之
			
 
				+充其极
			
 
				+充其量
			
 
				+暗地里
			
 
				+反之则
			
 
				+比如说
			
 
				+背地里
			
 
				+背靠背
			
 
				+并没有
			
 
				+不得不
			
 
				+不得了
			
 
				+不得已
			
 
				+不仅仅
			
 
				+不经意
			
 
				+不能不
			
 
				+不外乎
			
 
				+不由得
			
 
				+不怎么
			
 
				+不至于
			
 
				+策略地
			
 
				+差不多
			
 
				+常言道
			
 
				+常言说
			
 
				+多年来
			
 
				+多年前
			
 
				+差一点
			
 
				+敞开儿
			
 
				+抽冷子
			
 
				+大不了
			
 
				+反倒是
			
 
				+反过来
			
 
				+大体上
			
 
				+当口儿
			
 
				+倒不如
			
 
				+怪不得
			
 
				+动不动
			
 
				+看起来
			
 
				+看上去
			
 
				+看样子
			
 
				+够瞧的
			
 
				+到了儿
			
 
				+呆呆地
			
 
				+来不及
			
 
				+来得及
			
 
				+到头来
			
 
				+连日来
			
 
				+于是乎
			
 
				+为什么
			
 
				+这会儿
			
 
				+换言之
			
 
				+那会儿
			
 
				+那么些
			
 
				+那么样
			
 
				+什么样
			
 
				+反过来
			
 
				+紧接着
			
 
				+就是说
			
 
				+要不然
			
 
				+要不是
			
 
				+一方面
			
 
				+以至于
			
 
				+自个儿
			
 
				+自各儿
			
 
				+之所以
			
 
				+这么些
			
 
				+这么样
			
 
				+怎么办
			
 
				+怎么样
			
 
				+谁知
			
 
				+顺着
			
 
				+似的
			
 
				+虽然
			
 
				+虽说
			
 
				+虽则
			
 
				+随着
			
 
				+所以
			
 
				+他们
			
 
				+他人
			
 
				+它们
			
 
				+她们
			
 
				+倘或
			
 
				+倘然
			
 
				+倘若
			
 
				+倘使
			
 
				+要么
			
 
				+要是
			
 
				+也罢
			
 
				+也好
			
 
				+以便
			
 
				+依照
			
 
				+以及
			
 
				+以免
			
 
				+以至
			
 
				+以致
			
 
				+抑或
			
 
				+因此
			
 
				+因而
			
 
				+因为
			
 
				+由于
			
 
				+有的
			
 
				+有关
			
 
				+有些
			
 
				+于是
			
 
				+与否
			
 
				+与其
			
 
				+越是
			
 
				+云云
			
 
				+一般
			
 
				+一旦
			
 
				+一来
			
 
				+一切
			
 
				+一样
			
 
				+同时
			
 
				+万一
			
 
				+为何
			
 
				+为了
			
 
				+为着
			
 
				+嗡嗡
			
 
				+我们
			
 
				+呜呼
			
 
				+乌乎
			
 
				+无论
			
 
				+无宁
			
 
				+沿着
			
 
				+毋宁
			
 
				+向着
			
 
				+照着
			
 
				+怎么
			
 
				+咱们
			
 
				+在下
			
 
				+再说
			
 
				+再者
			
 
				+怎样
			
 
				+这边
			
 
				+这儿
			
 
				+这个
			
 
				+这里
			
 
				+这么
			
 
				+这时
			
 
				+这些
			
 
				+这样
			
 
				+正如
			
 
				+之类
			
 
				+之一
			
 
				+只是
			
 
				+只限
			
 
				+只要
			
 
				+只有
			
 
				+至于
			
 
				+诸位
			
 
				+着呢
			
 
				+纵令
			
 
				+纵然
			
 
				+纵使
			
 
				+遵照
			
 
				+作为
			
 
				+喔唷
			
 
				+自从
			
 
				+自己
			
 
				+自家
			
 
				+自身
			
 
				+总之
			
 
				+要不
			
 
				+哎呀
			
 
				+哎哟
			
 
				+俺们
			
 
				+按照
			
 
				+吧哒
			
 
				+罢了
			
 
				+本着
			
 
				+比方
			
 
				+比如
			
 
				+鄙人
			
 
				+彼此
			
 
				+别的
			
 
				+别说
			
 
				+并且
			
 
				+不比
			
 
				+不成
			
 
				+不单
			
 
				+不但
			
 
				+不独
			
 
				+不管
			
 
				+不光
			
 
				+不过
			
 
				+不仅
			
 
				+不拘
			
 
				+不论
			
 
				+不怕
			
 
				+不然
			
 
				+不如
			
 
				+不特
			
 
				+不惟
			
 
				+不问
			
 
				+不只
			
 
				+朝着
			
 
				+趁着
			
 
				+除非
			
 
				+除了
			
 
				+此间
			
 
				+此外
			
 
				+从而
			
 
				+但是
			
 
				+当着
			
 
				+的话
			
 
				+等等
			
 
				+叮咚
			
 
				+对于
			
 
				+多少
			
 
				+而况
			
 
				+而且
			
 
				+而是
			
 
				+而外
			
 
				+而言
			
 
				+而已
			
 
				+尔后
			
 
				+反之
			
 
				+非但
			
 
				+非徒
			
 
				+否则
			
 
				+嘎登
			
 
				+各个
			
 
				+各位
			
 
				+各种
			
 
				+各自
			
 
				+根据
			
 
				+故此
			
 
				+固然
			
 
				+关于
			
 
				+果然
			
 
				+果真
			
 
				+哈哈
			
 
				+何处
			
 
				+何况
			
 
				+何时
			
 
				+哼唷
			
 
				+呼哧
			
 
				+还是
			
 
				+还有
			
 
				+或是
			
 
				+或者
			
 
				+极了
			
 
				+及其
			
 
				+及至
			
 
				+即便
			
 
				+即或
			
 
				+即令
			
 
				+即若
			
 
				+即使
			
 
				+既然
			
 
				+既是
			
 
				+继而
			
 
				+加之
			
 
				+假如
			
 
				+假若
			
 
				+假使
			
 
				+鉴于
			
 
				+几时
			
 
				+较之
			
 
				+接着
			
 
				+结果
			
 
				+进而
			
 
				+尽管
			
 
				+经过
			
 
				+就是
			
 
				+可见
			
 
				+可是
			
 
				+可以
			
 
				+况且
			
 
				+开始
			
 
				+开外
			
 
				+来着
			
 
				+例如
			
 
				+连同
			
 
				+两者
			
 
				+另外
			
 
				+慢说
			
 
				+漫说
			
 
				+每当
			
 
				+莫若
			
 
				+某个
			
 
				+某些
			
 
				+哪边
			
 
				+哪儿
			
 
				+哪个
			
 
				+哪里
			
 
				+哪年
			
 
				+哪怕
			
 
				+哪天
			
 
				+哪些
			
 
				+哪样
			
 
				+那边
			
 
				+那儿
			
 
				+那个
			
 
				+那里
			
 
				+那么
			
 
				+那时
			
 
				+那些
			
 
				+那样
			
 
				+乃至
			
 
				+宁可
			
 
				+宁肯
			
 
				+宁愿
			
 
				+你们
			
 
				+啪达
			
 
				+旁人
			
 
				+凭借
			
 
				+其次
			
 
				+其二
			
 
				+其他
			
 
				+其它
			
 
				+其一
			
 
				+其余
			
 
				+其中
			
 
				+起见
			
 
				+起见
			
 
				+岂但
			
 
				+前后
			
 
				+前者
			
 
				+然而
			
 
				+然后
			
 
				+然则
			
 
				+人家
			
 
				+任何
			
 
				+任凭
			
 
				+如此
			
 
				+如果
			
 
				+如何
			
 
				+如其
			
 
				+如若
			
 
				+若非
			
 
				+若是
			
 
				+上下
			
 
				+尚且
			
 
				+设若
			
 
				+设使
			
 
				+甚而
			
 
				+甚么
			
 
				+甚至
			
 
				+省得
			
 
				+时候
			
 
				+什么
			
 
				+使得
			
 
				+是的
			
 
				+首先
			
 
				+首先
			
 
				+其次
			
 
				+再次
			
 
				+最后
			
 
				+您们
			
 
				+它们
			
 
				+她们
			
 
				+他们
			
 
				+我们
			
 
				+你是
			
 
				+您是
			
 
				+我是
			
 
				+他是
			
 
				+她是
			
 
				+它是
			
 
				+不是
			
 
				+你们
			
 
				+啊哈
			
 
				+啊呀
			
 
				+啊哟
			
 
				+挨次
			
 
				+挨个
			
 
				+挨着
			
 
				+哎呀
			
 
				+哎哟
			
 
				+俺们
			
 
				+按理
			
 
				+按期
			
 
				+默然
			
 
				+按时
			
 
				+按说
			
 
				+按照
			
 
				+暗中
			
 
				+暗自
			
 
				+昂然
			
 
				+八成
			
 
				+倍感
			
 
				+倍加
			
 
				+本人
			
 
				+本身
			
 
				+本着
			
 
				+并非
			
 
				+别人
			
 
				+必定
			
 
				+比起
			
 
				+比如
			
 
				+比照
			
 
				+鄙人
			
 
				+毕竟
			
 
				+必将
			
 
				+必须
			
 
				+并肩
			
 
				+并没
			
 
				+并排
			
 
				+并且
			
 
				+并无
			
 
				+勃然
			
 
				+不必
			
 
				+不常
			
 
				+不大
			
 
				+不单
			
 
				+不但
			
 
				+而且
			
 
				+不得
			
 
				+不迭
			
 
				+不定
			
 
				+不独
			
 
				+不对
			
 
				+不妨
			
 
				+不管
			
 
				+不光
			
 
				+不过
			
 
				+不会
			
 
				+不仅
			
 
				+不拘
			
 
				+不力
			
 
				+不了
			
 
				+不料
			
 
				+不论
			
 
				+不满
			
 
				+不免
			
 
				+不起
			
 
				+不巧
			
 
				+不然
			
 
				+不日
			
 
				+不少
			
 
				+不胜
			
 
				+不时
			
 
				+不是
			
 
				+不同
			
 
				+不能
			
 
				+不要
			
 
				+不外
			
 
				+不下
			
 
				+不限
			
 
				+不消
			
 
				+不已
			
 
				+不再
			
 
				+不曾
			
 
				+不止
			
 
				+不只
			
 
				+才能
			
 
				+彻夜
			
 
				+趁便
			
 
				+趁机
			
 
				+趁热
			
 
				+趁势
			
 
				+趁早
			
 
				+趁着
			
 
				+成心
			
 
				+乘机
			
 
				+乘势
			
 
				+乘隙
			
 
				+乘虚
			
 
				+诚然
			
 
				+迟早
			
 
				+充分
			
 
				+出来
			
 
				+出去
			
 
				+除此
			
 
				+除非
			
 
				+除开
			
 
				+除了
			
 
				+除去
			
 
				+除却
			
 
				+除外
			
 
				+处处
			
 
				+传说
			
 
				+传闻
			
 
				+纯粹
			
 
				+此后
			
 
				+此间
			
 
				+此外
			
 
				+此中
			
 
				+次第
			
 
				+匆匆
			
 
				+从不
			
 
				+从此
			
 
				+从而
			
 
				+从宽
			
 
				+从来
			
 
				+从轻
			
 
				+从速
			
 
				+从头
			
 
				+从未
			
 
				+从小
			
 
				+从新
			
 
				+从严
			
 
				+从优
			
 
				+从中
			
 
				+从重
			
 
				+凑巧
			
 
				+存心
			
 
				+达旦
			
 
				+打从
			
 
				+大大
			
 
				+大抵
			
 
				+大都
			
 
				+大多
			
 
				+大凡
			
 
				+大概
			
 
				+大家
			
 
				+大举
			
 
				+大略
			
 
				+大约
			
 
				+大致
			
 
				+待到
			
 
				+单纯
			
 
				+单单
			
 
				+但是
			
 
				+但愿
			
 
				+当场
			
 
				+当儿
			
 
				+当即
			
 
				+当然
			
 
				+当庭
			
 
				+当头
			
 
				+当下
			
 
				+当真
			
 
				+当中
			
 
				+当着
			
 
				+倒是
			
 
				+到处
			
 
				+到底
			
 
				+到头
			
 
				+得起
			
 
				+的话
			
 
				+的确
			
 
				+等到
			
 
				+等等
			
 
				+顶多
			
 
				+动辄
			
 
				+陡然
			
 
				+独自
			
 
				+断然
			
 
				+对于
			
 
				+顿时
			
 
				+多次
			
 
				+多多
			
 
				+多亏
			
 
				+而后
			
 
				+而论
			
 
				+而且
			
 
				+而是
			
 
				+而外
			
 
				+而言
			
 
				+而已
			
 
				+而又
			
 
				+尔等
			
 
				+反倒
			
 
				+反而
			
 
				+反手
			
 
				+反之
			
 
				+方才
			
 
				+方能
			
 
				+非常
			
 
				+非但
			
 
				+非得
			
 
				+分头
			
 
				+奋勇
			
 
				+愤然
			
 
				+更为
			
 
				+更加
			
 
				+根据
			
 
				+个人
			
 
				+各式
			
 
				+刚才
			
 
				+敢情
			
 
				+该当
			
 
				+嘎嘎
			
 
				+否则
			
 
				+赶快
			
 
				+敢于
			
 
				+刚好
			
 
				+刚巧
			
 
				+高低
			
 
				+格外
			
 
				+隔日
			
 
				+隔夜
			
 
				+公然
			
 
				+过于
			
 
				+果然
			
 
				+果真
			
 
				+光是
			
 
				+关于
			
 
				+共总
			
 
				+姑且
			
 
				+故此
			
 
				+故而
			
 
				+故意
			
 
				+固然
			
 
				+惯常
			
 
				+毫不
			
 
				+毫无
			
 
				+很多
			
 
				+何须
			
 
				+好在
			
 
				+何必
			
 
				+何尝
			
 
				+何妨
			
 
				+何苦
			
 
				+何况
			
 
				+何止
			
 
				+很少
			
 
				+轰然
			
 
				+后来
			
 
				+呼啦
			
 
				+哗啦
			
 
				+互相
			
 
				+忽地
			
 
				+忽然
			
 
				+话说
			
 
				+或是
			
 
				+伙同
			
 
				+豁然
			
 
				+恍然
			
 
				+还是
			
 
				+或许
			
 
				+或者
			
 
				+基本
			
 
				+基于
			
 
				+极大
			
 
				+极度
			
 
				+极端
			
 
				+极力
			
 
				+极其
			
 
				+极为
			
 
				+即便
			
 
				+即将
			
 
				+及其
			
 
				+及至
			
 
				+即刻
			
 
				+即令
			
 
				+即使
			
 
				+几度
			
 
				+几番
			
 
				+几乎
			
 
				+几经
			
 
				+既然
			
 
				+继而
			
 
				+继之
			
 
				+加上
			
 
				+加以
			
 
				+加之
			
 
				+假如
			
 
				+假若
			
 
				+假使
			
 
				+间或
			
 
				+将才
			
 
				+简直
			
 
				+鉴于
			
 
				+将近
			
 
				+将要
			
 
				+交口
			
 
				+较比
			
 
				+较为
			
 
				+较之
			
 
				+皆可
			
 
				+截然
			
 
				+截至
			
 
				+藉以
			
 
				+借此
			
 
				+借以
			
 
				+届时
			
 
				+尽快
			
 
				+近来
			
 
				+进而
			
 
				+进来
			
 
				+进去
			
 
				+尽管
			
 
				+尽量
			
 
				+尽然
			
 
				+就算
			
 
				+居然
			
 
				+就此
			
 
				+就地
			
 
				+竟然
			
 
				+究竟
			
 
				+经常
			
 
				+尽早
			
 
				+精光
			
 
				+经过
			
 
				+就是
			
 
				+局外
			
 
				+举凡
			
 
				+据称
			
 
				+据此
			
 
				+据实
			
 
				+据说
			
 
				+可好
			
 
				+看来
			
 
				+开外
			
 
				+绝不
			
 
				+决不
			
 
				+据悉
			
 
				+决非
			
 
				+绝顶
			
 
				+绝对
			
 
				+绝非
			
 
				+可见
			
 
				+可能
			
 
				+可是
			
 
				+可以
			
 
				+恐怕
			
 
				+来讲
			
 
				+来看
			
 
				+快要
			
 
				+况且
			
 
				+拦腰
			
 
				+牢牢
			
 
				+老是
			
 
				+累次
			
 
				+累年
			
 
				+理当
			
 
				+理该
			
 
				+理应
			
 
				+例如
			
 
				+立地
			
 
				+立刻
			
 
				+立马
			
 
				+立时
			
 
				+联袂
			
 
				+连连
			
 
				+连日
			
 
				+路经
			
 
				+临到
			
 
				+连声
			
 
				+连同
			
 
				+连袂
			
 
				+另外
			
 
				+另行
			
 
				+屡次
			
 
				+屡屡
			
 
				+缕缕
			
 
				+率尔
			
 
				+率然
			
 
				+略加
			
 
				+略微
			
 
				+略为
			
 
				+论说
			
 
				+马上
			
 
				+猛然
			
 
				+没有
			
 
				+每当
			
 
				+每逢
			
 
				+每每
			
 
				+莫不
			
 
				+莫非
			
 
				+莫如
			
 
				+莫若
			
 
				+哪怕
			
 
				+那么
			
 
				+那末
			
 
				+那些
			
 
				+乃至
			
 
				+难道
			
 
				+难得
			
 
				+难怪
			
 
				+难说
			
 
				+你们
			
 
				+凝神
			
 
				+宁可
			
 
				+宁肯
			
 
				+宁愿
			
 
				+偶而
			
 
				+偶尔
			
 
				+碰巧
			
 
				+譬如
			
 
				+偏偏
			
 
				+平素
			
 
				+迫于
			
 
				+扑通
			
 
				+其次
			
 
				+其后
			
 
				+其实
			
 
				+其它
			
 
				+起初
			
 
				+起来
			
 
				+起首
			
 
				+起头
			
 
				+起先
			
 
				+岂但
			
 
				+岂非
			
 
				+岂止
			
 
				+恰逢
			
 
				+恰好
			
 
				+恰恰
			
 
				+恰巧
			
 
				+恰如
			
 
				+恰似
			
 
				+前后
			
 
				+前者
			
 
				+切莫
			
 
				+切切
			
 
				+切勿
			
 
				+亲口
			
 
				+亲身
			
 
				+亲手
			
 
				+亲眼
			
 
				+亲自
			
 
				+顷刻
			
 
				+请勿
			
 
				+取道
			
 
				+权时
			
 
				+全都
			
 
				+全力
			
 
				+全年
			
 
				+全然
			
 
				+然而
			
 
				+然后
			
 
				+人家
			
 
				+人人
			
 
				+仍旧
			
 
				+仍然
			
 
				+日见
			
 
				+日渐
			
 
				+日益
			
 
				+日臻
			
 
				+如常
			
 
				+如次
			
 
				+如果
			
 
				+如今
			
 
				+如期
			
 
				+如若
			
 
				+如上
			
 
				+如下
			
 
				+上来
			
 
				+上去
			
 
				+瑟瑟
			
 
				+沙沙
			
 
				+啊
			
 
				+哎
			
 
				+唉
			
 
				+俺
			
 
				+按
			
 
				+吧
			
 
				+把
			
 
				+甭
			
 
				+别
			
 
				+嘿
			
 
				+很
			
 
				+乎
			
 
				+会
			
 
				+或
			
 
				+既
			
 
				+及
			
 
				+啦
			
 
				+了
			
 
				+们
			
 
				+你
			
 
				+您
			
 
				+哦
			
 
				+砰
			
 
				+啊
			
 
				+你
			
 
				+我
			
 
				+他
			
 
				+她
			
 
				+它
			
--- a/calCtr.py
+++ b/calCtr.py
@@ -0,0 +1,81 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+from operator import itemgetter
			
 
				+import json
			
 
				+import pandas as pd
			
 
				+from db_help import RedisHelper
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    #1.load data
			
 
				+    nowdate=sys.argv[1]
			
 
				+    vlog='0'
			
 
				+    love_live = 4
			
 
				+    data_path = "./data/video_data_"+nowdate
			
 
				+    f = open(data_path)
			
 
				+    #data = pd.read_csv(data_path, encoding="utf-8", sep='\t')
			
 
				+    #print(data)
			
 
				+    index = 0
			
 
				+    data_dict = {}
			
 
				+    redis_helper = RedisHelper()
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        if index==0:
			
 
				+            index += 1
			
 
				+            continue
			
 
				+        index +=1
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<11:
			
 
				+            continue
			
 
				+        vid = items[1]
			
 
				+        view_users = int(items[4])
			
 
				+        view_pv = int(items[5])
			
 
				+        play_users = int(items[6])
			
 
				+        play_pv = int(items[7])
			
 
				+        share_users = int(items[8])
			
 
				+        share_pv = int(items[9])
			
 
				+        return_users = int(items[10])
			
 
				+        #print(items)
			
 
				+        if vid not in data_dict:
			
 
				+            data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+        else:
			
 
				+            item_info = data_dict[vid]
			
 
				+            view_users = item_info[0]+view_users
			
 
				+            view_pv = item_info[1]+view_pv
			
 
				+            play_users = item_info[2]+play_users
			
 
				+            play_pv = item_info[3]+play_pv
			
 
				+            share_users = item_info[4]+share_users
			
 
				+            share_pv = item_info[5]+share_pv
			
 
				+            return_users = item_info[6]+return_users 
			
 
				+            data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+    #print(data_dict.items())
			
 
				+    info_dict = {}
			
 
				+    data_path = "./data/sorted_data_"+nowdate
			
 
				+    f = open(data_path, 'w')
			
 
				+    for k, v in data_dict.items():
			
 
				+        #print(v)
			
 
				+        return_users = v[6]
			
 
				+        #print(return_users)
			
 
				+        view_users = v[0]
			
 
				+        view_pv = v[1]
			
 
				+        share_pv = v[5]
			
 
				+        share_users = v[4]
			
 
				+        play_users = v[2]
			
 
				+        play_pv = v[3]
			
 
				+        #print("return_users:", return_users) 
			
 
				+        k_score = float(return_users)/(float(view_users)+10)
			
 
				+        k_score2 = float(return_users)/(float(view_pv)+10)
			
 
				+        #print(k_score)
			
 
				+        share_score = float(share_pv)/(float(view_pv)+5)
			
 
				+        backrate = float(return_users)/(float(share_pv)+5)
			
 
				+        #print(k, k_score, share_score*backrate, share_score, backrate) 
			
 
				+        score_info = [k_score2, share_score*backrate, share_score, backrate, view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users]
			
 
				+        k = "k_p:"+k
			
 
				+        score_info = json.dumps(score_info)
			
 
				+        info_dict[k] = score_info
			
 
				+        f.write(k+"\t"+score_info+"\n")
			
 
				+    redis_helper.update_batch_setnx_key(info_dict, 60*60*24*15) 
			
 
				+    f.close()
			
 
				+
			
 
				+     
			
--- a/calCtr1days.py
+++ b/calCtr1days.py
@@ -0,0 +1,82 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+from operator import itemgetter
			
 
				+import json
			
 
				+import pandas as pd
			
 
				+from db_help import RedisHelper
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    #1.load data
			
 
				+    nowdate=sys.argv[1]
			
 
				+    vlog='0'
			
 
				+    love_live = 4
			
 
				+    data_path = "./data/video_data_"+nowdate
			
 
				+    f = open(data_path)
			
 
				+    #data = pd.read_csv(data_path, encoding="utf-8", sep='\t')
			
 
				+    #print(data)
			
 
				+    index = 0
			
 
				+    data_dict = {}
			
 
				+    redis_helper = RedisHelper()
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        if index==0:
			
 
				+            index += 1
			
 
				+            continue
			
 
				+        index +=1
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<11:
			
 
				+            continue
			
 
				+        vid = int(items[1])
			
 
				+        view_users = int(items[4])
			
 
				+        view_pv = int(items[5])
			
 
				+        play_users = int(items[6])
			
 
				+        play_pv = int(items[7])
			
 
				+        share_users = int(items[8])
			
 
				+        share_pv = int(items[9])
			
 
				+        return_users = int(items[10])
			
 
				+        #print(items)
			
 
				+        if vid not in data_dict:
			
 
				+            data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+        else:
			
 
				+            item_info = data_dict[vid]
			
 
				+            view_users = item_info[0]+view_users
			
 
				+            view_pv = item_info[1]+view_pv
			
 
				+            play_users = item_info[2]+play_users
			
 
				+            play_pv = item_info[3]+play_pv
			
 
				+            share_users = item_info[4]+share_users
			
 
				+            share_pv = item_info[5]+share_pv
			
 
				+            return_users = item_info[6]+return_users 
			
 
				+            data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+    #print(data_dict.items())
			
 
				+    info_dict = {}
			
 
				+    data_path = "./data/1_days_sorted_data_"+nowdate
			
 
				+    f = open(data_path, 'w')
			
 
				+    for k, v in data_dict.items():
			
 
				+        #print(v)
			
 
				+        return_users = v[6]
			
 
				+        #print(return_users)
			
 
				+        view_users = v[0]
			
 
				+        view_pv = v[1]
			
 
				+        share_pv = v[5]
			
 
				+        share_users = v[4]
			
 
				+        play_users = v[2]
			
 
				+        play_pv = v[3]
			
 
				+        #print("return_users:", return_users) 
			
 
				+        k_score = float(return_users)/(float(view_users)+10)
			
 
				+        k_score2 = float(return_users)/(float(view_pv)+10)
			
 
				+        #print(k_score)
			
 
				+        share_score = float(share_pv)/(float(view_pv)+50)
			
 
				+        backrate = float(return_users)/(float(view_users)+10)
			
 
				+        ctr_score = float(play_pv)/(float(view_pv)+50)
			
 
				+        #print(k, k_score, share_score*backrate, share_score, backrate) 
			
 
				+        score_info = [k_score2, share_score*backrate, share_score, backrate, ctr_score, view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users]
			
 
				+        #k = "k_p3:"+k
			
 
				+        score_info = json.dumps(score_info)
			
 
				+        info_dict[k] = score_info
			
 
				+        f.write(str(k)+"\t"+score_info+"\n")
			
 
				+    #redis_helper.update_batch_setnx_key(info_dict, 60*60*24*15) 
			
 
				+    f.close()
			
 
				+
			
 
				+     
			
--- a/calCtr3days.py
+++ b/calCtr3days.py
@@ -0,0 +1,88 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+from operator import itemgetter
			
 
				+import json
			
 
				+import pandas as pd
			
 
				+from db_help import RedisHelper
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    #1.load data
			
 
				+    nowdate=sys.argv[1]
			
 
				+    vlog='0'
			
 
				+    love_live = 4
			
 
				+    data_path = "./data/3_days_video_data_"+nowdate
			
 
				+    f = open(data_path)
			
 
				+    #data = pd.read_csv(data_path, encoding="utf-8", sep='\t')
			
 
				+    #print(data)
			
 
				+    index = 0
			
 
				+    data_dict = {}
			
 
				+    redis_helper = RedisHelper()
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        if index==0:
			
 
				+            index += 1
			
 
				+            continue
			
 
				+        index +=1
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<11:
			
 
				+            continue
			
 
				+        vid = -1
			
 
				+        try:
			
 
				+            vid = int(items[1])
			
 
				+        except:
			
 
				+            continue
			
 
				+        if vid ==-1:
			
 
				+            continue
			
 
				+        view_users = int(items[4])
			
 
				+        view_pv = int(items[5])
			
 
				+        play_users = int(items[6])
			
 
				+        play_pv = int(items[7])
			
 
				+        share_users = int(items[8])
			
 
				+        share_pv = int(items[9])
			
 
				+        return_users = int(items[10])
			
 
				+        #print(items)
			
 
				+        if vid not in data_dict:
			
 
				+            data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+        else:
			
 
				+            item_info = data_dict[vid]
			
 
				+            view_users = item_info[0]+view_users
			
 
				+            view_pv = item_info[1]+view_pv
			
 
				+            play_users = item_info[2]+play_users
			
 
				+            play_pv =  item_info[3]+play_pv
			
 
				+            share_users = item_info[4]+share_users
			
 
				+            share_pv = item_info[5]+share_pv
			
 
				+            return_users = item_info[6]+return_users 
			
 
				+            data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+    #print(data_dict.items())
			
 
				+    info_dict = {}
			
 
				+    data_path = "./data/3_days_sorted_data_"+nowdate
			
 
				+    f = open(data_path, 'w')
			
 
				+    for k, v in data_dict.items():
			
 
				+        #print(v)
			
 
				+        return_users = v[6]
			
 
				+        #print(return_users)
			
 
				+        view_users = v[0]
			
 
				+        view_pv = v[1]
			
 
				+        share_pv = v[5]
			
 
				+        share_users = v[4]
			
 
				+        play_users = v[2]
			
 
				+        play_pv = v[3]
			
 
				+        #print("return_users:", return_users) 
			
 
				+        k_score = float(return_users)/(float(view_users)+30)
			
 
				+        k_score2 = float(return_users)/(float(view_pv)+30)
			
 
				+        #print(k_score)
			
 
				+        share_score = float(share_pv)/(float(view_pv)+100)
			
 
				+        backrate = float(return_users)/(float(view_users)+30)
			
 
				+        ctr_score = float(play_pv)/(float(view_pv)+100)
			
 
				+        #print(k, k_score, share_score*backrate, share_score, backrate) 
			
 
				+        score_info = [k_score2, share_score*backrate, share_score, backrate, ctr_score, view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users]
			
 
				+        #k = "k_p3:"+k
			
 
				+        score_info = json.dumps(score_info)
			
 
				+        info_dict[k] = score_info
			
 
				+        f.write(str(k)+"\t"+score_info+"\n")
			
 
				+    #redis_helper.update_batch_setnx_key(info_dict, 60*60*24*15) 
			
 
				+    f.close()
			
 
				+
			
 
				+     
			
--- a/calCtr7days.py
+++ b/calCtr7days.py
@@ -0,0 +1,78 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+from operator import itemgetter
			
 
				+import json
			
 
				+import pandas as pd
			
 
				+from db_help import RedisHelper
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    #1.load data
			
 
				+    nowdate=sys.argv[1]
			
 
				+    vlog='0'
			
 
				+    love_live = 4
			
 
				+    data_path = "./data/7_days_video_data_"+nowdate
			
 
				+    f = open(data_path)
			
 
				+    #data = pd.read_csv(data_path, encoding="utf-8", sep='\t')
			
 
				+    #print(data)
			
 
				+    index = 0
			
 
				+    data_dict = {}
			
 
				+    redis_helper = RedisHelper()
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        if index==0:
			
 
				+            index += 1
			
 
				+            continue
			
 
				+        index +=1
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<11:
			
 
				+            continue
			
 
				+        vid = items[1]
			
 
				+        view_users = items[4] 
			
 
				+        view_pv = items[5]
			
 
				+        play_users = items[6]
			
 
				+        play_pv = items[7]
			
 
				+        share_users = items[8]
			
 
				+        share_pv = items[9]
			
 
				+        return_users = items[10]
			
 
				+        #print(items)
			
 
				+        if vid not in data_dict:
			
 
				+            data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+        else:
			
 
				+            item_info = data_dict[vid]
			
 
				+            view_users = item_info[0]+view_users
			
 
				+            view_pv = item_info[1]+view_pv
			
 
				+            play_users = item_info[2]+play_pv
			
 
				+            share_users = item_info[3]+share_users
			
 
				+            share_pv = item_info[4]+share_pv
			
 
				+            return_users = item_info[5]+return_users 
			
 
				+            data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+    #print(data_dict.items())
			
 
				+    info_dict = {}
			
 
				+    data_path = "./data/7_days_sorted_data_"+nowdate
			
 
				+    f = open(data_path, 'w')
			
 
				+    for k, v in data_dict.items():
			
 
				+        #print(v)
			
 
				+        return_users = v[6]
			
 
				+        #print(return_users)
			
 
				+        view_users = v[0]
			
 
				+        view_pv = v[1]
			
 
				+        share_pv = v[5]
			
 
				+        share_users = [4]
			
 
				+        play_users = v[2]
			
 
				+        #print("return_users:", return_users) 
			
 
				+        k_score = float(return_users)/(float(view_users)+10)
			
 
				+        #print(k_score)
			
 
				+        share_score = float(share_pv)/(float(view_pv)+5)
			
 
				+        backrate = float(return_users)/(float(view_users)+5)
			
 
				+        #print(k, k_score, share_score*backrate, share_score, backrate) 
			
 
				+        score_info = [share_score, share_score*backrate, share_score, backrate]
			
 
				+        #k = "k_p4:"+k
			
 
				+        score_info = json.dumps(score_info)
			
 
				+        info_dict[k] = score_info
			
 
				+        #f.write(k+"\t"+score_info+"\n")
			
 
				+    #redis_helper.update_batch_setnx_key(info_dict, 60*60*24*15) 
			
 
				+    f.close()
			
 
				+
			
 
				+     
			
--- a/calCtr_Type.py
+++ b/calCtr_Type.py
@@ -0,0 +1,82 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+from operator import itemgetter
			
 
				+import json
			
 
				+import pandas as pd
			
 
				+from db_help import RedisHelper
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    #1.load data
			
 
				+    nowdate=sys.argv[1]
			
 
				+    vlog='0'
			
 
				+    love_live = 4
			
 
				+    data_path = "./data/video_data_"+nowdate
			
 
				+    f = open(data_path)
			
 
				+    #data = pd.read_csv(data_path, encoding="utf-8", sep='\t')
			
 
				+    #print(data)
			
 
				+    index = 0
			
 
				+    data_dict = {}
			
 
				+    redis_helper = RedisHelper()
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        if index==0:
			
 
				+            index += 1
			
 
				+            continue
			
 
				+        index +=1
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<11:
			
 
				+            continue
			
 
				+        apptype = items[0]
			
 
				+        vid = items[1]
			
 
				+        view_users = int(items[4])
			
 
				+        view_pv = int(items[5])
			
 
				+        play_users = int(items[6])
			
 
				+        play_pv = int(items[7])
			
 
				+        share_users = int(items[8])
			
 
				+        share_pv = int(items[9])
			
 
				+        return_users = int(items[10])
			
 
				+        #print(items)
			
 
				+        key_vid = apptype+"\t"+vid
			
 
				+        if key_vid not in data_dict:
			
 
				+            data_dict[key_vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+        else:
			
 
				+            item_info = data_dict[key_vid]
			
 
				+            view_users = item_info[0]+view_users
			
 
				+            view_pv = item_info[1]+view_pv
			
 
				+            play_users = item_info[2]+play_users
			
 
				+            play_pv = item_info[3]+play_pv
			
 
				+            share_users = item_info[4]+share_users
			
 
				+            share_pv = item_info[5]+share_pv
			
 
				+            return_users = item_info[6]+return_users 
			
 
				+            data_dict[key_vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+    #print(data_dict.items())
			
 
				+    info_dict = {}
			
 
				+    data_path = "./data/sorted_type_data_"+nowdate
			
 
				+    f = open(data_path, 'w')
			
 
				+    for k, v in data_dict.items():
			
 
				+        #print(v)
			
 
				+        return_users = v[6]
			
 
				+        #print(return_users)
			
 
				+        view_users = v[0]
			
 
				+        view_pv = v[1]
			
 
				+        share_pv = v[5]
			
 
				+        share_users = v[4]
			
 
				+        play_users = v[2]
			
 
				+        play_pv = v[3]
			
 
				+        #print("return_users:", return_users) 
			
 
				+        k_score = float(return_users)/(float(view_users)+10)
			
 
				+        k_score2 = float(return_users)/(float(view_pv)+10)
			
 
				+        #print(k_score)
			
 
				+        share_score = float(share_pv)/(float(view_pv)+5)
			
 
				+        backrate = float(return_users)/(float(view_users)+5)
			
 
				+        #print(k, k_score, share_score*backrate, share_score, backrate) 
			
 
				+        score_info = [k_score2, share_score*backrate, share_score, backrate, view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users]
			
 
				+        score_info = json.dumps(score_info)
			
 
				+        #info_dict[k] = score_info
			
 
				+        f.write(k+"\t"+score_info+"\n")
			
 
				+    #redis_helper.update_batch_setnx_key(info_dict, 60*60*24*15) 
			
 
				+    f.close()
			
 
				+
			
 
				+     
			
--- a/calHourCtr.py
+++ b/calHourCtr.py
@@ -0,0 +1,82 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+from operator import itemgetter
			
 
				+import json
			
 
				+import pandas as pd
			
 
				+#from db_help import RedisHelper
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    #1.load data
			
 
				+    nowdate=sys.argv[1]
			
 
				+    vlog='0'
			
 
				+    love_live = 4
			
 
				+    data_path = "./data/hour_video_data_"+nowdate
			
 
				+    f = open(data_path)
			
 
				+    #data = pd.read_csv(data_path, encoding="utf-8", sep='\t')
			
 
				+    #print(data)
			
 
				+    index = 0
			
 
				+    data_dict = {}
			
 
				+    #redis_helper = RedisHelper()
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        if index==0:
			
 
				+            index += 1
			
 
				+            continue
			
 
				+        index +=1
			
 
				+        items = line.strip().split("\t")
			
 
				+        #print(items)
			
 
				+        if len(items)<9:
			
 
				+            continue
			
 
				+        vid = items[1]
			
 
				+        view_users = int(items[2])
			
 
				+        view_pv = int(items[3])
			
 
				+        play_users = int(items[4])
			
 
				+        play_pv = int(items[5])
			
 
				+        share_users = int(items[6])
			
 
				+        share_pv = int(items[7])
			
 
				+        return_users = int(items[8])
			
 
				+        #print(items)
			
 
				+        if vid not in data_dict:
			
 
				+            data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+        else:
			
 
				+            item_info = data_dict[vid]
			
 
				+            view_users = item_info[0]+view_users
			
 
				+            view_pv = item_info[1]+view_pv
			
 
				+            play_users = item_info[2]+play_users
			
 
				+            play_pv = item_info[3]+play_pv
			
 
				+            share_users = item_info[4]+share_users
			
 
				+            share_pv = item_info[5]+share_pv
			
 
				+            return_users = item_info[6]+return_users 
			
 
				+            data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+    #print(data_dict.items())
			
 
				+    f.close()
			
 
				+    info_dict = {}
			
 
				+    hour_data_path = "./data/sorted_hour_data_"+nowdate
			
 
				+    f = open(hour_data_path, 'w')
			
 
				+    for k, v in data_dict.items():
			
 
				+        #print(v)
			
 
				+        return_users = v[6]
			
 
				+        #print(return_users)
			
 
				+        view_users = v[0]
			
 
				+        view_pv = v[1]
			
 
				+        share_pv = v[5]
			
 
				+        share_users = v[4]
			
 
				+        play_users = v[2]
			
 
				+        play_pv = v[3]
			
 
				+        #print("return_users:", return_users) 
			
 
				+        k_score = float(return_users)/(float(view_users)+5)
			
 
				+        k_score2 = float(return_users)/(float(view_pv)+5)
			
 
				+        #print(k_score)
			
 
				+        share_score = float(share_pv)/(float(view_pv)+5)
			
 
				+        backrate = float(return_users)/(float(share_pv)+5)
			
 
				+        #print(k, k_score, share_score*backrate, share_score, backrate) 
			
 
				+        score_info = [k_score2, share_score*backrate, share_score, backrate, view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users]
			
 
				+        score_info = json.dumps(score_info)
			
 
				+        info_dict[k] = score_info
			
 
				+        f.write(k+"\t"+score_info+"\n")
			
 
				+    #redis_helper.update_batch_setnx_key(info_dict, 60*60*24*15) 
			
 
				+    f.close()
			
 
				+
			
 
				+     
			
--- a/calHourData.py
+++ b/calHourData.py
@@ -0,0 +1,82 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+from operator import itemgetter
			
 
				+import json
			
 
				+import pandas as pd
			
 
				+#from db_help import RedisHelper
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    #1.load data
			
 
				+    nowdate=sys.argv[1]
			
 
				+    vlog='0'
			
 
				+    love_live = 4
			
 
				+    data_path = "./data/hour_video_data_"+nowdate
			
 
				+    f = open(data_path)
			
 
				+    #data = pd.read_csv(data_path, encoding="utf-8", sep='\t')
			
 
				+    #print(data)
			
 
				+    index = 0
			
 
				+    data_dict = {}
			
 
				+    #redis_helper = RedisHelper()
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        if index==0:
			
 
				+            index += 1
			
 
				+            continue
			
 
				+        index +=1
			
 
				+        items = line.strip().split("\t")
			
 
				+        #print(items)
			
 
				+        if len(items)<9:
			
 
				+            continue
			
 
				+        vid = items[1]
			
 
				+        view_users = int(items[2])
			
 
				+        view_pv = int(items[3])
			
 
				+        play_users = int(items[4])
			
 
				+        play_pv = int(items[5])
			
 
				+        share_users = int(items[6])
			
 
				+        share_pv = int(items[7])
			
 
				+        return_users = int(items[8])
			
 
				+        #print(items)
			
 
				+        if vid not in data_dict:
			
 
				+            data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+        else:
			
 
				+            item_info = data_dict[vid]
			
 
				+            view_users = item_info[0]+view_users
			
 
				+            view_pv = item_info[1]+view_pv
			
 
				+            play_users = item_info[2]+play_users
			
 
				+            play_pv =  item_info[3]+play_pv
			
 
				+            share_users = item_info[4]+share_users
			
 
				+            share_pv = item_info[5]+share_pv
			
 
				+            return_users = item_info[6]+return_users 
			
 
				+            data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+    #print(data_dict.items())
			
 
				+    f.close()
			
 
				+    info_dict = {}
			
 
				+    hour_data_path = "./data/sorted_hour_info_"+nowdate
			
 
				+    f = open(hour_data_path, 'w')
			
 
				+    for k, v in data_dict.items():
			
 
				+        #print(v)
			
 
				+        return_users = v[6]
			
 
				+        #print(return_users)
			
 
				+        view_users = v[0]
			
 
				+        view_pv = v[1]
			
 
				+        share_pv = v[5]
			
 
				+        share_users = v[4]
			
 
				+        play_users = v[2]
			
 
				+        play_pv  = v[3]
			
 
				+        #print("return_users:", return_users) 
			
 
				+        k_score = float(return_users)/(float(view_users)+5)
			
 
				+        k_score2 = float(return_users)/(float(view_pv)+5)
			
 
				+        #print(k_score)
			
 
				+        share_score = float(share_pv)/(float(view_pv)+5)
			
 
				+        backrate = float(return_users)/(float(view_users)+5)
			
 
				+        #print(k, k_score, share_score*backrate, share_score, backrate) 
			
 
				+        score_info = [k_score2, share_score*backrate, share_score, backrate, view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users]
			
 
				+        score_info = json.dumps(score_info)
			
 
				+        info_dict[k] = score_info
			
 
				+        f.write(k+"\t"+score_info+"\n")
			
 
				+    #redis_helper.update_batch_setnx_key(info_dict, 60*60*24*15) 
			
 
				+    f.close()
			
 
				+
			
 
				+     
			
--- a/calHourTypeCtr.py
+++ b/calHourTypeCtr.py
@@ -0,0 +1,84 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+from operator import itemgetter
			
 
				+import json
			
 
				+import pandas as pd
			
 
				+#from db_help import RedisHelper
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    #1.load data
			
 
				+    nowdate=sys.argv[1]
			
 
				+    vlog='0'
			
 
				+    love_live = 4
			
 
				+    data_path = "./data/hour_video_data_"+nowdate
			
 
				+    f = open(data_path)
			
 
				+    #data = pd.read_csv(data_path, encoding="utf-8", sep='\t')
			
 
				+    #print(data)
			
 
				+    index = 0
			
 
				+    data_dict = {}
			
 
				+    #redis_helper = RedisHelper()
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        if index==0:
			
 
				+            index += 1
			
 
				+            continue
			
 
				+        index +=1
			
 
				+        items = line.strip().split("\t")
			
 
				+        #print(items)
			
 
				+        if len(items)<9:
			
 
				+            continue
			
 
				+        apptype = items[0]
			
 
				+        vid = items[1]
			
 
				+        view_users = int(items[2])
			
 
				+        view_pv = int(items[3])
			
 
				+        play_users = int(items[4])
			
 
				+        play_pv = int(items[5])
			
 
				+        share_users = int(items[6])
			
 
				+        share_pv = int(items[7])
			
 
				+        return_users = int(items[8])
			
 
				+        #print(items)
			
 
				+        app_vid = apptype+"\t"+vid
			
 
				+        if app_vid not in data_dict:
			
 
				+            data_dict[app_vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+        else:
			
 
				+            item_info = data_dict[app_vid]
			
 
				+            view_users = item_info[0]+view_users
			
 
				+            view_pv = item_info[1]+view_pv
			
 
				+            play_users = item_info[2]+play_users
			
 
				+            play_pv = item_info[3]+play_pv
			
 
				+            share_users = item_info[4]+share_users
			
 
				+            share_pv = item_info[5]+share_pv
			
 
				+            return_users = item_info[6]+return_users 
			
 
				+            data_dict[app_vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+    #print(data_dict.items())
			
 
				+    f.close()
			
 
				+    info_dict = {}
			
 
				+    hour_data_path = "./data/sorted_type_hour_data_"+nowdate
			
 
				+    f = open(hour_data_path, 'w')
			
 
				+    for k, v in data_dict.items():
			
 
				+        #print(v)
			
 
				+        return_users = v[6]
			
 
				+        #print(return_users)
			
 
				+        view_users = v[0]
			
 
				+        view_pv = v[1]
			
 
				+        share_pv = v[5]
			
 
				+        share_users = v[4]
			
 
				+        play_users = v[2]
			
 
				+        play_pv = v[3]
			
 
				+        #print("return_users:", return_users) 
			
 
				+        k_score = float(return_users)/(float(view_users)+5)
			
 
				+        k_score2 = float(return_users)/(float(view_pv)+5)
			
 
				+        #print(k_score)
			
 
				+        share_score = float(share_pv)/(float(view_pv)+5)
			
 
				+        backrate = float(return_users)/(float(view_users)+5)
			
 
				+        #print(k, k_score, share_score*backrate, share_score, backrate) 
			
 
				+        score_info = [k_score2, share_score*backrate, share_score, backrate, view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users]
			
 
				+        score_info = json.dumps(score_info)
			
 
				+        info_dict[k] = score_info
			
 
				+        f.write(k+"\t"+score_info+"\n")
			
 
				+    #redis_helper.update_batch_setnx_key(info_dict, 60*60*24*15) 
			
 
				+    f.close()
			
 
				+
			
 
				+     
			
--- a/calI2I.py
+++ b/calI2I.py
@@ -0,0 +1,142 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+from operator import itemgetter
			
 
				+import json
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    #1.load data
			
 
				+    nowdate=sys.argv[1]
			
 
				+    f = open("./data/user_item_share_"+nowdate)
			
 
				+    user_item_dict={}
			
 
				+    item_dict = {}  
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<3:
			
 
				+            continue
			
 
				+        key = (items[1],items[2])
			
 
				+        #print(key)
			
 
				+        if key not in user_item_dict:
			
 
				+            user_item_dict[key] = 1
			
 
				+        else:
			
 
				+            user_item_dict[key] = user_item_dict[key]+1
			
 
				+        if items[2] not in item_dict:
			
 
				+            item_dict[items[2]] = 1
			
 
				+        else:
			
 
				+            item_dict[items[2]] = item_dict[items[2]]+1
			
 
				+    f.close()
			
 
				+    nowhour=sys.argv[2]
			
 
				+    f1 = open("./data/user_cur_day_item_share_"+nowhour)
			
 
				+    while True:
			
 
				+        line = f1.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<3:
			
 
				+            continue
			
 
				+        key = (items[1],items[2])
			
 
				+        #print(key)
			
 
				+        if key not in user_item_dict:
			
 
				+            user_item_dict[key] = 1
			
 
				+        else:
			
 
				+            user_item_dict[key] = user_item_dict[key]+1
			
 
				+        if items[2] not in item_dict:
			
 
				+            item_dict[items[2]] = 1
			
 
				+        else:
			
 
				+            item_dict[items[2]] = item_dict[items[2]]+1
			
 
				+    f1.close()
			
 
				+    #((user,item), score)
			
 
				+    #print(user_item_dict)
			
 
				+    #2. (uid, [(vid, score)....])
			
 
				+    user_group_dict = {}
			
 
				+    for k, v in user_item_dict.items():
			
 
				+        uid = k[0]
			
 
				+        vid = k[1]
			
 
				+        score = v
			
 
				+        vid_list = []
			
 
				+        if uid not in user_group_dict:
			
 
				+            vid_list.append((vid, score))
			
 
				+            user_group_dict[uid] = vid_list
			
 
				+        else:
			
 
				+            vid_list = user_group_dict[uid]
			
 
				+            vid_list.append((vid, score))
			
 
				+            user_group_dict[uid] = vid_list
			
 
				+    #print(user_group_dict)
			
 
				+    item_pair_dict = {}
			
 
				+    #3. expand item
			
 
				+    for k, v_list in user_group_dict.items():
			
 
				+         v_n = len(v_list)
			
 
				+         if v_n<2:
			
 
				+             continue
			
 
				+         for i in range(v_n):
			
 
				+             for j in range(1, v_n):
			
 
				+                if v_list[i][0] == v_list[j][0]:
			
 
				+                    continue
			
 
				+                item_key = (v_list[i][0], v_list[j][0])
			
 
				+                item_score = min(v_list[i][1], v_list[j][1])
			
 
				+                if item_key not in item_pair_dict:
			
 
				+                    item_pair_dict[item_key] = item_score
			
 
				+                else:
			
 
				+                    item_pair_dict[item_key] = item_pair_dict[item_key]+item_score
			
 
				+    #print(item_pair_dict)
			
 
				+    print(len(item_pair_dict))
			
 
				+    print(len(item_dict))
			
 
				+    left_pair_num = 0
			
 
				+    rec_item_dict = {}
			
 
				+    #4. rec item
			
 
				+    for k, v in item_pair_dict.items():
			
 
				+        if v<2:
			
 
				+            continue
			
 
				+        left_pair_num+=1
			
 
				+        item1 = k[0]
			
 
				+        item2 = k[1]
			
 
				+        pair_score = v
			
 
				+        if item1 in item_dict:
			
 
				+             item_score1 = item_dict[item1]
			
 
				+             i2i_pro = pair_score/(item_score1+5)
			
 
				+             rec_list1 = []
			
 
				+             if item2 not in rec_item_dict:
			
 
				+                 rec_list1.append((item1, i2i_pro, pair_score, item_score1))
			
 
				+                 rec_item_dict[item2] = rec_list1
			
 
				+             else:
			
 
				+                 rec_list1 = rec_item_dict[item2]
			
 
				+                 rec_list1.append((item1, i2i_pro, pair_score, item_score1))
			
 
				+                 rec_item_dict[item2] = rec_list1
			
 
				+        if item2 in item_dict:
			
 
				+             item_score2 = item_dict[item2]
			
 
				+             i2i_pro = pair_score/(item_score2+5)
			
 
				+             rec_list2 = []
			
 
				+             if item1 not in rec_item_dict:
			
 
				+                 rec_list2.append((item2, i2i_pro, pair_score, item_score2))
			
 
				+                 rec_item_dict[item1] = rec_list2
			
 
				+             else:
			
 
				+                 rec_list2 = rec_item_dict[item1]
			
 
				+                 rec_list2.append((item2, i2i_pro, pair_score, item_score2))
			
 
				+                 rec_item_dict[item1] = rec_list2          
			
 
				+     
			
 
				+    #(item, share_count)
			
 
				+    print(left_pair_num)
			
 
				+    #print(rec_item_dict)
			
 
				+    final_rec_list = []
			
 
				+    #f = open("rec_result", "w")
			
 
				+    #5. sorted item_list
			
 
				+    
			
 
				+    for k,v in rec_item_dict.items():
			
 
				+        v_set = set('')
			
 
				+        value_list = v
			
 
				+        dup_list = []
			
 
				+        for item in value_list:
			
 
				+            if item[0] in v_set:
			
 
				+                continue
			
 
				+            v_set.add(item[0])
			
 
				+            dup_list.append(item)
			
 
				+        sorted_v = sorted(dup_list, key=itemgetter(1), reverse=True)
			
 
				+        final_rec_list.append((k, sorted_v))
			
 
				+    #print(final_rec_list[:1])
			
 
				+    #json_str = json.dumps(final_rec_list)
			
 
				+    with open("./data/rec_result_"+nowhour+".json", "w") as f :
			
 
				+        json.dump(final_rec_list, f)
			
 
				+    
			
 
				+     
			
--- a/calI2I2.py
+++ b/calI2I2.py
@@ -0,0 +1,176 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+from operator import itemgetter
			
 
				+import json
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    #1.load data
			
 
				+    nowdate=sys.argv[1]
			
 
				+    f = open("./data/user_item_share_filter_"+nowdate)
			
 
				+    user_item_dict={}
			
 
				+    item_dict = {}  
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<3:
			
 
				+            continue
			
 
				+        vid = -1
			
 
				+        try:
			
 
				+            vid = int(items[2])
			
 
				+        except:
			
 
				+            continue
			
 
				+        if vid == -1:
			
 
				+            continue
			
 
				+        key = (items[1],vid)
			
 
				+        #print(key)
			
 
				+        if key not in user_item_dict:
			
 
				+            user_item_dict[key] = 1
			
 
				+        else:
			
 
				+            user_item_dict[key] = user_item_dict[key]+1
			
 
				+        if vid not in item_dict:
			
 
				+            item_dict[vid] = 1
			
 
				+        else:
			
 
				+            item_dict[vid] = item_dict[vid]+1
			
 
				+    f.close()
			
 
				+    nowhour=sys.argv[2]
			
 
				+    f1 = open("./data/user_cur_day_item_share_filter_"+nowhour)
			
 
				+    while True:
			
 
				+        line = f1.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<3:
			
 
				+            continue
			
 
				+        vid = -1
			
 
				+        try:
			
 
				+            vid = int(items[2])
			
 
				+        except:
			
 
				+            continue
			
 
				+        if vid == -1:
			
 
				+            continue
			
 
				+
			
 
				+        key = (items[1],vid)
			
 
				+        #print(key)
			
 
				+        if key not in user_item_dict:
			
 
				+            user_item_dict[key] = 1
			
 
				+        else:
			
 
				+            user_item_dict[key] = user_item_dict[key]+1
			
 
				+        if vid not in item_dict:
			
 
				+            item_dict[vid] = 1
			
 
				+        else:
			
 
				+            item_dict[vid] = item_dict[vid]+1
			
 
				+    f1.close()
			
 
				+
			
 
				+    #((user,item), score)
			
 
				+    #print(user_item_dict)
			
 
				+    #2. (uid, [(vid, score)....])
			
 
				+    user_group_dict = {}
			
 
				+    for k, v in user_item_dict.items():
			
 
				+        uid = k[0]
			
 
				+        vid = k[1]
			
 
				+        score = v
			
 
				+        #if score <3:
			
 
				+        #    continue
			
 
				+        vid_list = []
			
 
				+        if uid not in user_group_dict:
			
 
				+            vid_list.append((vid, score))
			
 
				+            user_group_dict[uid] = vid_list
			
 
				+        else:
			
 
				+            vid_list = user_group_dict[uid]
			
 
				+            vid_list.append((vid, score))
			
 
				+            user_group_dict[uid] = vid_list
			
 
				+    #print(user_group_dict)
			
 
				+    item_pair_dict = {}
			
 
				+    #3. expand item
			
 
				+    for k, v_list in user_group_dict.items():
			
 
				+         v_n = len(v_list)
			
 
				+         if v_n<2:
			
 
				+             continue
			
 
				+         for i in range(v_n):
			
 
				+             for j in range(1, v_n):
			
 
				+                if v_list[i][0] == v_list[j][0]:
			
 
				+                    continue
			
 
				+                item_key = (v_list[i][0], v_list[j][0])
			
 
				+                item_score = 1
			
 
				+                if item_key not in item_pair_dict:
			
 
				+                    item_pair_dict[item_key] = item_score
			
 
				+                else:
			
 
				+                    item_pair_dict[item_key] = item_pair_dict[item_key]+item_score
			
 
				+    print(len(item_pair_dict))
			
 
				+    #print(item_pair_dict)
			
 
				+    #print(item_dict)
			
 
				+    left_pair_num = 0
			
 
				+    rec_item_dict = {}
			
 
				+    #4. rec item
			
 
				+    for k, v in item_pair_dict.items():
			
 
				+        if v<3:
			
 
				+            continue
			
 
				+        left_pair_num+=1
			
 
				+        #print(k[0])
			
 
				+        #print(k[1])
			
 
				+        
			
 
				+        item1 = int(k[0])
			
 
				+        item2 = int(k[1])
			
 
				+        pair_score = v
			
 
				+        if item1 in item_dict:
			
 
				+            item_score1 = item_dict[item1]
			
 
				+            #if item_score1<10:
			
 
				+            #    continue
			
 
				+            item_score1 = 1
			
 
				+            i2i_pro = float(pair_score)/(float(item_score1)+5)
			
 
				+            if i2i_pro<0.000001:
			
 
				+                continue
			
 
				+            rec_list1 = []
			
 
				+            if item2 not in rec_item_dict:
			
 
				+                rec_list1.append((item1, i2i_pro, pair_score, item_score1))
			
 
				+                rec_item_dict[item2] = rec_list1
			
 
				+            else:
			
 
				+                rec_list1 = rec_item_dict[item2]
			
 
				+                rec_list1.append((item1, i2i_pro, pair_score, item_score1))
			
 
				+                rec_item_dict[item2] = rec_list1
			
 
				+        if item2 in item_dict:
			
 
				+            item_score2 = item_dict[item2]
			
 
				+            #if item_score2<10:
			
 
				+            #    continue
			
 
				+            item_score2 = 1.0
			
 
				+            i2i_pro = float(pair_score)/(float(item_score2)+5)
			
 
				+            if i2i_pro<0.000001:
			
 
				+                continue
			
 
				+            rec_list2 = []
			
 
				+            if item1 not in rec_item_dict:
			
 
				+                rec_list2.append((item2, i2i_pro, pair_score, item_score2))
			
 
				+                rec_item_dict[item1] = rec_list2
			
 
				+            else:
			
 
				+                rec_list2 = rec_item_dict[item1]
			
 
				+                rec_list2.append((item2, i2i_pro, pair_score, item_score2))
			
 
				+                rec_item_dict[item1] = rec_list2   
			
 
				+     
			
 
				+    #(item, share_count)
			
 
				+    print(left_pair_num)
			
 
				+    #print(rec_item_dict)
			
 
				+    final_rec_list = []
			
 
				+    #f = open("rec_result", "w")
			
 
				+    #5. sorted item_list
			
 
				+    
			
 
				+    for k,v in rec_item_dict.items():
			
 
				+        v_set = set('')
			
 
				+        value_list = v
			
 
				+        dup_list = []
			
 
				+        for item in value_list:
			
 
				+            if item[0] in v_set:
			
 
				+                continue
			
 
				+            v_set.add(item[0])
			
 
				+            #print(item[1])
			
 
				+            #if float(items[1])<0.000001:
			
 
				+            #    continue
			
 
				+            dup_list.append(item)
			
 
				+        sorted_v = sorted(dup_list, key=itemgetter(1), reverse=True)
			
 
				+        final_rec_list.append((k, sorted_v))
			
 
				+    #print(final_rec_list[:1])
			
 
				+    #json_str = json.dumps(final_rec_list)
			
 
				+    with open("./data/rec_result3_"+nowhour+".json", "w") as f :
			
 
				+        json.dump(final_rec_list, f)
			
 
				+    
			
 
				+     
			
--- a/calI2I3.py
+++ b/calI2I3.py
@@ -0,0 +1,124 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+from operator import itemgetter
			
 
				+import json
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    #1.load data
			
 
				+    nowdate=sys.argv[1]
			
 
				+    f = open("./data/user_item_share_"+nowdate)
			
 
				+    user_item_dict={}
			
 
				+    item_dict = {}  
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<3:
			
 
				+            continue
			
 
				+        key = (items[1],items[2])
			
 
				+        #print(key)
			
 
				+        if key not in user_item_dict:
			
 
				+            user_item_dict[key] = 1
			
 
				+        else:
			
 
				+            user_item_dict[key] = user_item_dict[key]+1
			
 
				+        if items[2] not in item_dict:
			
 
				+            item_dict[items[2]] = 1
			
 
				+        else:
			
 
				+            item_dict[items[2]] = item_dict[items[2]]+1
			
 
				+    f.close()
			
 
				+    #((user,item), score)
			
 
				+    #print(user_item_dict)
			
 
				+    #2. (uid, [(vid, score)....])
			
 
				+    user_group_dict = {}
			
 
				+    for k, v in user_item_dict.items():
			
 
				+        uid = k[0]
			
 
				+        vid = k[1]
			
 
				+        score = v
			
 
				+        #if score <3:
			
 
				+        #    continue
			
 
				+        vid_list = []
			
 
				+        if uid not in user_group_dict:
			
 
				+            vid_list.append((vid, score))
			
 
				+            user_group_dict[uid] = vid_list
			
 
				+        else:
			
 
				+            vid_list = user_group_dict[uid]
			
 
				+            vid_list.append((vid, score))
			
 
				+            user_group_dict[uid] = vid_list
			
 
				+    #print(user_group_dict)
			
 
				+    item_pair_dict = {}
			
 
				+    #3. expand item
			
 
				+    for k, v_list in user_group_dict.items():
			
 
				+         v_n = len(v_list)
			
 
				+         if v_n<2:
			
 
				+             continue
			
 
				+         for i in range(v_n):
			
 
				+             for j in range(1, v_n):
			
 
				+                if v_list[i][0] == v_list[j][0]:
			
 
				+                    continue
			
 
				+                item_key = (v_list[i][0], v_list[j][0])
			
 
				+                item_score = 1
			
 
				+                if item_key not in item_pair_dict:
			
 
				+                    item_pair_dict[item_key] = item_score
			
 
				+                else:
			
 
				+                    item_pair_dict[item_key] = item_pair_dict[item_key]+item_score
			
 
				+    #print(item_pair_dict)
			
 
				+    print(item_pair_dict)
			
 
				+    print(item_dict)
			
 
				+    left_pair_num = 0
			
 
				+    rec_item_dict = {}
			
 
				+    #4. rec item
			
 
				+    for k, v in item_pair_dict.items():
			
 
				+        if v<2:
			
 
				+            continue
			
 
				+        left_pair_num+=1
			
 
				+        item1 = k[0]
			
 
				+        item2 = k[1]
			
 
				+        pair_score = v
			
 
				+        if item1 in item_dict:
			
 
				+             item_score1 = item_dict[item1]
			
 
				+             i2i_pro = pair_score/(item_score1+5)
			
 
				+             rec_list1 = []
			
 
				+             if item2 not in rec_item_dict:
			
 
				+                 rec_list1.append((item1, i2i_pro, pair_score, item_score1))
			
 
				+                 rec_item_dict[item2] = rec_list1
			
 
				+             else:
			
 
				+                 rec_list1 = rec_item_dict[item2]
			
 
				+                 rec_list1.append((item1, i2i_pro, pair_score, item_score1))
			
 
				+                 rec_item_dict[item2] = rec_list1
			
 
				+        if item2 in item_dict:
			
 
				+             item_score2 = item_dict[item2]
			
 
				+             i2i_pro = pair_score/(item_score2+5)
			
 
				+             rec_list2 = []
			
 
				+             if item1 not in rec_item_dict:
			
 
				+                 rec_list2.append((item2, i2i_pro, pair_score, item_score2))
			
 
				+                 rec_item_dict[item1] = rec_list2
			
 
				+             else:
			
 
				+                 rec_list2 = rec_item_dict[item1]
			
 
				+                 rec_list2.append((item2, i2i_pro, pair_score, item_score2))
			
 
				+                 rec_item_dict[item1] = rec_list2          
			
 
				+     
			
 
				+    #(item, share_count)
			
 
				+    print(left_pair_num)
			
 
				+    #print(rec_item_dict)
			
 
				+    final_rec_list = []
			
 
				+    #f = open("rec_result", "w")
			
 
				+    #5. sorted item_list
			
 
				+    
			
 
				+    for k,v in rec_item_dict.items():
			
 
				+        v_set = set('')
			
 
				+        value_list = v
			
 
				+        dup_list = []
			
 
				+        for item in value_list:
			
 
				+            if item[0] in v_set:
			
 
				+                continue
			
 
				+            v_set.add(item[0])
			
 
				+            dup_list.append(item)
			
 
				+        sorted_v = sorted(dup_list, key=itemgetter(1), reverse=True)
			
 
				+        final_rec_list.append((k, sorted_v))
			
 
				+    #print(final_rec_list[:1])
			
 
				+    #json_str = json.dumps(final_rec_list)
			
 
				+    with open("./data/rec_result2_"+nowdate+".json", "w") as f :
			
 
				+        json.dump(final_rec_list, f)
			
 
				+    
			
 
				+     
			
--- a/clean.sh
+++ b/clean.sh
@@ -0,0 +1,36 @@
 
				+ #!/bin/bash
			
 
				+source ~/.bash_profile
			
 
				+source ~/.bashrc
			
 
				+
			
 
				+last3day=`date  +"%Y%m%d" -d -4days`
			
 
				+rec_result_path=./data/rec_result3_${last3day}'*'
			
 
				+rec_cur_day_item_path=./data/user_cur_day_item_share_filter_${last3day}'*'
			
 
				+merge_path=./data/merge_score_${last3day}'*'
			
 
				+user_item_share_filter_path=./data/user_item_share_filter_${last3day}'*'
			
 
				+video_data_path=./data/video_data_${last3day}'*'
			
 
				+sorted_path=./data/sorted_data_${last3day}'*'
			
 
				+cls_path=./data/redis_cls_${last3day}'*'
			
 
				+hour_video_path=./data/hour_video_data_${last3day}'*'
			
 
				+sorted_hour_path=./data/sorted_hour_data_${last3day}'*'
			
 
				+rec_path=./data/rec_result_'*'
			
 
				+three_days_sorted_path=./data/3_days_sorted_data_${last3day}'*'
			
 
				+merge_3_days_score_path=./data/merge_3_days_score_${last3day}'*'
			
 
				+user_cur_day_path=./data/user_cur_day_item_share_${last3day}'*'
			
 
				+#user_cur_d=./data/user_cur_day_item_share_${last3day}'*'
			
 
				+echo ${merge_path}
			
 
				+echo ${video_data_path}
			
 
				+echo ${cls_path}
			
 
				+
			
 
				+rm -rf ${user_cur_day_path}
			
 
				+rm -rf ${user_item_share_filter_path}
			
 
				+rm -rf ${rec_cur_day_item_path}
			
 
				+rm -rf ${rec_result_path}
			
 
				+rm -rf ${merge_path}
			
 
				+rm -rf ${video_data_path}
			
 
				+rm -rf ${sorted_path}
			
 
				+rm -rf ${cls_path}
			
 
				+rm -rf ${hour_video_path}
			
 
				+rm -rf ${sorted_hour_path}
			
 
				+rm -rf ${rec_path}
			
 
				+rm -rf ${three_days_sorted_path}
			
 
				+rm -rf ${merge_3_days_score_path}
			
--- a/compose_score.py
+++ b/compose_score.py
@@ -0,0 +1,62 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+import json
			
 
				+from db_help import RedisHelper
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    nowdate = sys.argv[1]
			
 
				+    f1 = open("./data/sorted_hour_data_"+nowdate)
			
 
				+    f2 = open("./data/sorted_data_"+nowdate)
			
 
				+    data_dict = {}
			
 
				+    while True:
			
 
				+        line = f1.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<2:
			
 
				+            continue
			
 
				+        kid = items[0]
			
 
				+        #print(items[1])
			
 
				+        item_info = json.loads(items[1])
			
 
				+        data_dict[kid] = item_info
			
 
				+    f1.close()
			
 
				+    f3 = open("./data/merge_score_"+nowdate, 'w')
			
 
				+    info_dict = {}
			
 
				+    while True:
			
 
				+        line = f2.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        items = line.split("\t")
			
 
				+        if len(items)<2:
			
 
				+            continue
			
 
				+        kid = items[0].replace("k_p:", "")
			
 
				+        kid2 = "k_p2:"+kid
			
 
				+        #print(kid)
			
 
				+        d_item_info = json.loads(items[1])
			
 
				+        if kid in data_dict:
			
 
				+            item_info = data_dict[kid]
			
 
				+            #print("h:",item_info)
			
 
				+            #print("d:",d_item_info)
			
 
				+            total_info = []
			
 
				+            for i in range(len(item_info)):
			
 
				+                total_info.append(0.001*item_info[i]+d_item_info[i])
			
 
				+            if len(total_info)>2:
			
 
				+                total_info[0] = total_info[0]+0.1*total_info[3]
			
 
				+            total_item_info = json.dumps(total_info)
			
 
				+            f3.write(kid2+"\t"+total_item_info+"\n")
			
 
				+            info_dict[kid2] = total_item_info
			
 
				+            #print("m:",total_item_info)
			
 
				+        else:
			
 
				+            total_info = []
			
 
				+            for i  in range(len(d_item_info)):
			
 
				+                total_info.append(d_item_info[i])
			
 
				+            if len(total_info)>2:
			
 
				+                total_info[0] = total_info[1]+0.1*total_info[3]
			
 
				+            total_item_info = json.dumps(total_info)
			
 
				+            f3.write(kid2+"\t"+total_item_info+"\n")
			
 
				+            info_dict[kid2] = total_item_info
			
 
				+    print(info_dict)
			
 
				+    if len(info_dict)>0:
			
 
				+        redis_helper = RedisHelper()
			
 
				+        redis_helper.update_batch_setnx_key(info_dict, 60*60*24*15)
			
 
				+    f2.close()
			
--- a/compose_score2.py
+++ b/compose_score2.py
@@ -0,0 +1,83 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+import json
			
 
				+from db_help import RedisHelper
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    nowdate = sys.argv[1]
			
 
				+    f1 = open("./data/sorted_hour_data_"+nowdate)
			
 
				+    f2 = open("./data/sorted_data_"+nowdate)
			
 
				+    data_dict = {}
			
 
				+    while True:
			
 
				+        line = f1.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<2:
			
 
				+            continue
			
 
				+        kid = items[0]
			
 
				+        #print(items[1])
			
 
				+        item_info = json.loads(items[1])
			
 
				+        data_dict[kid] = item_info
			
 
				+    f1.close()
			
 
				+    f3 = open("./data/merge_score_"+nowdate, 'w')
			
 
				+    info_dict = {}
			
 
				+    info_dict2 = {}
			
 
				+    info_dict3 = {}
			
 
				+    info_dict4 = {}
			
 
				+    while True:
			
 
				+        line = f2.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        items = line.split("\t")
			
 
				+        if len(items)<2:
			
 
				+            continue
			
 
				+        kid = items[0].replace("k_p:", "")
			
 
				+        kid2 = "k_p2:"+kid
			
 
				+        kid3 = "k_p3:"+kid
			
 
				+        kid4 = "k_p4:"+kid
			
 
				+        kid5 = "k_p7:"+kid
			
 
				+        #print(kid)
			
 
				+        d_item_info = json.loads(items[1])
			
 
				+        if kid in data_dict:
			
 
				+            item_info = data_dict[kid]
			
 
				+            #print("h:",item_info)
			
 
				+            #print("d:",d_item_info)
			
 
				+            total_info = []
			
 
				+            for i in range(len(item_info[:5])):
			
 
				+                total_info.append(item_info[i]+d_item_info[i])
			
 
				+            #if len(total_info)>2:
			
 
				+            #    total_info[0] = total_info[0]+0.1*total_info[3]
			
 
				+            total_item_info = json.dumps(total_info)
			
 
				+            f3.write(kid2+"\t"+total_item_info+"\n")
			
 
				+            info_dict[kid2] = total_item_info
			
 
				+            info_dict2[kid3] = total_item_info
			
 
				+            info_dict3[kid4] = total_item_info
			
 
				+            info_dict4[kid5] = total_item_info
			
 
				+            #print("m:",total_item_info)
			
 
				+        else:
			
 
				+            total_info = []
			
 
				+            for i  in range(len(d_item_info[:5])):
			
 
				+                total_info.append(d_item_info[i])
			
 
				+            #if len(total_info)>2:
			
 
				+            #    total_info[0] = total_info[1]+0.1*total_info[3]
			
 
				+            total_item_info = json.dumps(total_info)
			
 
				+            f3.write(kid2+"\t"+total_item_info+"\n")
			
 
				+            info_dict[kid2] = total_item_info
			
 
				+            info_dict2[kid3] = total_item_info
			
 
				+            info_dict3[kid4] = total_item_info
			
 
				+            info_dict4[kid5] = total_item_info
			
 
				+    print(info_dict4)
			
 
				+    if len(info_dict)>0:
			
 
				+        redis_helper = RedisHelper()
			
 
				+        redis_helper.update_batch_setnx_key(info_dict, 60*60*24*3) 
			
 
				+    if len(info_dict2)>0:
			
 
				+        redis_helper = RedisHelper()
			
 
				+        redis_helper.update_batch_setnx_key(info_dict2, 60*60*24*3)
			
 
				+    if len(info_dict3)>0:
			
 
				+        redis_helper = RedisHelper()
			
 
				+        redis_helper.update_batch_setnx_key(info_dict3, 60*60*24*3) 
			
 
				+    if len(info_dict4)>0:
			
 
				+        redis_helper = RedisHelper()
			
 
				+        redis_helper.update_batch_setnx_key(info_dict4, 60*60*24*3)
			
 
				+    f2.close()
			
--- a/compose_score_3day.py
+++ b/compose_score_3day.py
@@ -0,0 +1,97 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+import json
			
 
				+from db_help import RedisHelper
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    nowdate = sys.argv[1]
			
 
				+    f1 = open("./data/sorted_hour_info_"+nowdate)
			
 
				+    f2 = open("./data/1_days_sorted_data_"+nowdate)
			
 
				+    f3 = open("./data/3_days_sorted_data_"+nowdate)
			
 
				+    data_dict = {}
			
 
				+    while True:
			
 
				+        line = f1.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<2:
			
 
				+            continue
			
 
				+        kid = items[0]
			
 
				+        #print(items[1])
			
 
				+        item_info = json.loads(items[1])
			
 
				+        data_dict[kid] = item_info
			
 
				+    f1.close()
			
 
				+    #f3 = open("./data/merge_score_"+nowdate, 'w')
			
 
				+    info_dict = {}
			
 
				+    while True:
			
 
				+        line = f2.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        items = line.split("\t")
			
 
				+        if len(items)<2:
			
 
				+            continue
			
 
				+        kid = items[0]
			
 
				+        d_item_info = json.loads(items[1])
			
 
				+        if kid in data_dict:
			
 
				+            item_info = data_dict[kid]
			
 
				+            total_info = []
			
 
				+            for i in range(len(item_info)):
			
 
				+                total_info.append(float(item_info[i])+0.1*float(d_item_info[i]))
			
 
				+            info_dict[kid] = total_info
			
 
				+        else:
			
 
				+            total_info = []
			
 
				+            for i  in range(len(d_item_info)):
			
 
				+                total_info.append(float(d_item_info[i]))
			
 
				+            #if len(total_info)>2:
			
 
				+            #    total_info[0] = total_info[1]+0.1*total_info[3]
			
 
				+            #total_item_info = json.dumps(total_info)
			
 
				+            #f3.write(kid2+"\t"+total_item_info+"\n")
			
 
				+            info_dict[kid] = total_info
			
 
				+    #print(info_dict)
			
 
				+    print("info:", len(info_dict))
			
 
				+    day3_dict = {}
			
 
				+    while True:
			
 
				+        line = f3.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        items = line.split("\t")
			
 
				+        if len(items)<2:
			
 
				+            continue
			
 
				+        kid = items[0]
			
 
				+        d_item_info = json.loads(items[1])
			
 
				+        if kid in info_dict:
			
 
				+            item_info = info_dict[kid]
			
 
				+            total_info = []
			
 
				+            for i in range(len(item_info)):
			
 
				+                total_info.append(float(item_info[i])+0.1*float(d_item_info[i]))
			
 
				+            day3_dict[kid] = total_info
			
 
				+        else:
			
 
				+            total_info = []
			
 
				+            for i  in range(len(d_item_info)):
			
 
				+                total_info.append(float(d_item_info[i]))
			
 
				+            day3_dict[kid] = total_info
			
 
				+    print("info3:", len(day3_dict))
			
 
				+    f3 = open("./data/merge_3_days_score_"+nowdate, 'w')
			
 
				+    res_dict = {}
			
 
				+    res_dict2 = {}
			
 
				+    for k, v in day3_dict.items():
			
 
				+        score = v[0]
			
 
				+        new_arr = []
			
 
				+        new_arr.append(score)
			
 
				+        for i in range(4):
			
 
				+            new_arr.append(v[i])
			
 
				+        #print(v)
			
 
				+        #print(new_arr)
			
 
				+        total_item_info = json.dumps(new_arr)
			
 
				+        kid2 = "kp_3:"+k
			
 
				+        kid3 = "kp_4:"+k
			
 
				+        f3.write(kid2+"\t"+total_item_info+"\n")
			
 
				+        #res_dict[kid2] = total_item_info
			
 
				+        res_dict2[kid3] = total_item_info
			
 
				+    #if len(res_dict)>0:
			
 
				+    #    redis_helper = RedisHelper()
			
 
				+    #    redis_helper.update_batch_setnx_key(res_dict, 60*60*24*15)
			
 
				+    if len(res_dict2)>0:
			
 
				+        redis_helper = RedisHelper()
			
 
				+        redis_helper.update_batch_setnx_key(res_dict2, 60*60*24*3)
			
 
				+    f2.close()
			
--- a/compose_score_type.py
+++ b/compose_score_type.py
@@ -0,0 +1,79 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+import json
			
 
				+from db_help import RedisHelper
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    nowdate = sys.argv[1]
			
 
				+    f1 = open("./data/sorted_type_hour_data_"+nowdate)
			
 
				+    f2 = open("./data/sorted_type_data_"+nowdate)
			
 
				+    data_dict = {}
			
 
				+    while True:
			
 
				+        line = f1.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<3:
			
 
				+            continue
			
 
				+        kid = items[0]+"\t"+items[1]
			
 
				+        #print(items[1])
			
 
				+        item_info = json.loads(items[2])
			
 
				+        data_dict[kid] = item_info
			
 
				+    f1.close()
			
 
				+    f3 = open("./data/merge_type_score_"+nowdate, 'w')
			
 
				+    info_dict = {}
			
 
				+    info_dict2 = {}
			
 
				+    while True:
			
 
				+        line = f2.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        items = line.split("\t")
			
 
				+        if len(items)<3:
			
 
				+            continue
			
 
				+        kid = items[0]+"\t"+items[1]
			
 
				+        #kid2 = "k_p2:"+kid
			
 
				+        #kid3 = "k_p4:"+kid
			
 
				+        #print(kid)
			
 
				+        d_item_info = json.loads(items[2])
			
 
				+        if kid in data_dict:
			
 
				+            item_info = data_dict[kid]
			
 
				+            total_info = []
			
 
				+            for i in range(len(item_info)):
			
 
				+                total_info.append(item_info[i]+d_item_info[i])
			
 
				+            info_dict[kid] = total_info
			
 
				+        else:
			
 
				+            total_info = []
			
 
				+            for i in range(len(d_item_info)):
			
 
				+                total_info.append(d_item_info[i])
			
 
				+            info_dict[kid] = total_info
			
 
				+    #print(info_dict)
			
 
				+    redis_dict = {}
			
 
				+    for k, v in info_dict.items():
			
 
				+        items = k.split("\t")
			
 
				+        if len(items)<2:
			
 
				+            continue
			
 
				+        app_type = items[0]
			
 
				+        vid = items[1]
			
 
				+        rov = v[0]
			
 
				+        if app_type!="4":
			
 
				+            continue
			
 
				+        v_log_k = "0"+"\t"+vid
			
 
				+        #print(v_log_k)
			
 
				+        v_log_rov = 0.0
			
 
				+        if v_log_k in info_dict:
			
 
				+            v_log_rov = info_dict[v_log_k][0]
			
 
				+            print(v_log_rov)
			
 
				+        compose_rov = float(rov)+ 0.1*float(v_log_rov)
			
 
				+        redis_key="k_p5:"+vid
			
 
				+        score_arr = [compose_rov, rov, v_log_rov]
			
 
				+        #score_arr.append((compose_rov, rov, v_log_rov))
			
 
				+        redis_dict[redis_key] = json.dumps(score_arr)
			
 
				+        f3.write(str(vid)+"\t"+json.dumps(score_arr)+"\n")
			
 
				+    print(redis_dict)     
			
 
				+    if len(redis_dict)>0:
			
 
				+        redis_helper = RedisHelper()
			
 
				+        redis_helper.update_batch_setnx_key(redis_dict, 60*60*24*1) 
			
 
				+    #if len(info_dict2)>0:
			
 
				+    #    redis_helper = RedisHelper()
			
 
				+    #    redis_helper.update_batch_setnx_key(info_dict2, 60*60*24*3)
			
 
				+    f2.close()
			
--- a/config.py
+++ b/config.py
@@ -0,0 +1,361 @@
 
				+import os
			
 
				+# from log import Log
			
 
				+# log_ = Log()
			
 
				+
			
 
				+class BaseConfig(object):
			
 
				+    # 产品标识
			
 
				+    APP_TYPE = {
			
 
				+        'VLOG': 0,  # vlog
			
 
				+        'LOVE_LIVE': 4,  # 票圈视频
			
 
				+        'LONG_VIDEO': 5,  # 内容精选
			
 
				+        'SHORT_VIDEO': 6,  # 票圈短视频
			
 
				+        'WAN_NENG_VIDEO': 17,  # 万能影视屋
			
 
				+        'LAO_HAO_KAN_VIDEO': 18,  # 老好看视频
			
 
				+        'ZUI_JING_QI': 19,  # 票圈最惊奇
			
 
				+        'APP': 13,  # 票圈视频APP
			
 
				+        'PIAO_QUAN_VIDEO_PLUS': 21,  # 票圈视频+
			
 
				+        'JOURNEY': 22,  # 票圈足迹
			
 
				+        'BLESSING_YEAR': 3,  # 票圈福年
			
 
				+    }
			
 
				+    # ODPS服务配置
			
 
				+    ODPS_CONFIG = {
			
 
				+        'ENDPOINT': 'http://service.cn.maxcompute.aliyun.com/api',
			
 
				+        'ACCESSID': 'LTAIWYUujJAm7CbH',
			
 
				+        'ACCESSKEY': 'RfSjdiWwED1sGFlsjXv0DlfTnZTG1P',
			
 
				+    }
			
 
				+    
			
 
				+    # 日志服务配置
			
 
				+    ALIYUN_LOG = {
			
 
				+        'ENDPOINT': 'cn-hangzhou-intranet.log.aliyuncs.com',
			
 
				+        'ACCESSID': 'LTAIWYUujJAm7CbH',
			
 
				+        'ACCESSKEY': 'RfSjdiWwED1sGFlsjXv0DlfTnZTG1P',
			
 
				+        'PROJECT': 'rov-server',
			
 
				+    }
			
 
				+  
			
 
				+    REDIS_INFO = {
			
 
				+        'host': 'r-bp1fogs2mflr1ybfot.redis.rds.aliyuncs.com',
			
 
				+        'port': 6379,
			
 
				+        'password': 'Wqsd@2019',
			
 
				+    }
			
 
				+ 
			
 
				+    # 小年糕视频redis存储key
			
 
				+    XNG_KEY_NAME = 'xng:videos'
			
 
				+    # 特殊地区屏蔽危险视频redis存储key
			
 
				+    SPECIAL_AREA_LIMIT_KEY_NAME = 'special:area:limit:videos'
			
 
				+    #24 hour update 
			
 
				+    PROJECT_REGION_APP_TYPE = 'loghubods'
			
 
				+    TABLE_REGION_APP_TYPE = 'video_each_hour_update_province_apptype'
			
 
				+    
			
 
				+    CITY_CODE = {
			
 
				+        '广州': '440100', '深圳': '440300', '成都': '510100', '长沙': '430100',
			
 
				+    }
			
 
				+    DATA_PARAMS = {
			
 
				+        'data1': {APP_TYPE['VLOG']: 0},  # vlog
			
 
				+        'data2': {APP_TYPE['VLOG']: 0.5, APP_TYPE['LONG_VIDEO']: 0.5},  # [vlog, 内容精选]
			
 
				+        'data3': {APP_TYPE['VLOG']: 0.5, APP_TYPE['LOVE_LIVE']: 0.5},  # [vlog, 票圈视频]
			
 
				+        'data4': {APP_TYPE['VLOG']: 0.5, APP_TYPE['SHORT_VIDEO']: 0.5},  # [vlog, 票圈短视频]
			
 
				+        # 'data5': [APP_TYPE['VLOG'], APP_TYPE['ZUI_JING_QI']],  # [vlog, 最惊奇]
			
 
				+        'data6': {APP_TYPE['VLOG']: 0.25, APP_TYPE['LOVE_LIVE']: 0.25, APP_TYPE['SHORT_VIDEO']: 0.25,
			
 
				+                  APP_TYPE['LONG_VIDEO']: 0.25},
			
 
				+        'data7': {APP_TYPE['VLOG']: 0.5, APP_TYPE['APP']: 0.5},  # [vlog, 票圈视频APP]
			
 
				+        'data8': {APP_TYPE['VLOG']: 0.7, APP_TYPE['LONG_VIDEO']: 0.3},  # [vlog, 内容精选]
			
 
				+        'data9': {APP_TYPE['VLOG']: 0.3, APP_TYPE['LONG_VIDEO']: 0.7},  # [vlog, 内容精选]
			
 
				+        'data10': {APP_TYPE['VLOG']: 0.2, APP_TYPE['LOVE_LIVE']: 0.8},  # [vlog, 票圈视频]
			
 
				+        'data11': {APP_TYPE['VLOG']: 0.3, APP_TYPE['LOVE_LIVE']: 0.7},  # [vlog, 票圈视频]
			
 
				+        'data12': {APP_TYPE['VLOG']: 0.4, APP_TYPE['SHORT_VIDEO']: 0.6},  # [vlog, 票圈短视频]
			
 
				+        'data13': {APP_TYPE['VLOG']: 0.3, APP_TYPE['SHORT_VIDEO']: 0.7},  # [vlog, 票圈短视频]
			
 
				+        'data14': {APP_TYPE['VLOG']: 0.78, APP_TYPE['LOVE_LIVE']: 0.11, APP_TYPE['SHORT_VIDEO']: 0.08,
			
 
				+                   APP_TYPE['LONG_VIDEO']: 0.03},
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+    REGION_CODE = {
			
 
				+        '北京': '110000', '天津': '120000', '河北省': '130000', '山西省': '140000', '内蒙古': '150000',
			
 
				+        '辽宁省': '210000', '吉林省': '220000', '黑龙江省': '230000',
			
 
				+        '上海': '310000', '江苏省': '320000', '浙江省': '330000', '安徽省': '340000', '福建省': '350000', '江西省': '360000', '山东省': '370000',
			
 
				+        '河南省': '410000', '湖北省': '420000', '湖南省': '430000', '广东省': '440000', '广西': '450000', '海南省': '460000',
			
 
				+        '重庆': '500000',  '四川省': '510000', '贵州省': '520000', '云南省': '530000', '西藏': '540000',
			
 
				+        '陕西省': '610000', '甘肃省': '620000', '青海省': '630000', '宁夏': '640000', '新疆': '650000',
			
 
				+        '台湾省': '710000', '香港': '810000', '澳门': '820000',
			
 
				+        'None': '-1'
			
 
				+    } 
			
 
				+     # 屏蔽视频配置实验组
			
 
				+    SHIELD_CONFIG2 = {
			
 
				+        REGION_CODE['北京']: [XNG_KEY_NAME, SPECIAL_AREA_LIMIT_KEY_NAME, ],
			
 
				+        REGION_CODE['None']: [SPECIAL_AREA_LIMIT_KEY_NAME, XNG_KEY_NAME, ],
			
 
				+        CITY_CODE['广州']: [SPECIAL_AREA_LIMIT_KEY_NAME, ],
			
 
				+        CITY_CODE['深圳']: [SPECIAL_AREA_LIMIT_KEY_NAME, ],
			
 
				+        CITY_CODE['成都']: [SPECIAL_AREA_LIMIT_KEY_NAME, ],
			
 
				+        CITY_CODE['长沙']: [SPECIAL_AREA_LIMIT_KEY_NAME, ],
			
 
				+    }
			
 
				+ 
			
 
				+    RULE_PARAMS_REGION_APP_TYPE_48H = {
			
 
				+        'rule_params': {
			
 
				+            'rule5': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
			
 
				+                      'region_24h_rule_key': 'rule2', '48h_rule_key': 'rule1'},
			
 
				+        },
			
 
				+        'data_params': {
			
 
				+            'data1': [APP_TYPE['VLOG'], ],
			
 
				+        },
			
 
				+        'params_list': [
			
 
				+            {'data': 'data1', 'rule': 'rule5'},
			
 
				+        ],
			
 
				+    }
			
 
				+    
			
 
				+     # 地域分组小时级规则参数
			
 
				+    RULE_PARAMS_REGION_APP_TYPE = {
			
 
				+        'rule_params': {
			
 
				+            # 'rule2': {'view_type': 'video-show', 'platform_return_rate': 0.001, 'region_24h_rule_key': 'rule2'},
			
 
				+            # 'rule3': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
			
 
				+            #           'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule2'},
			
 
				+            'rule4': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
			
 
				+                      'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3'},
			
 
				+            # 涉政视频过滤
			
 
				+            'rule4-1': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
			
 
				+                        'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3', 'political_filter': True},
			
 
				+            # 特殊地域屏蔽危险视频
			
 
				+            'rule4-2': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
			
 
				+                        'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3', 'shield_config': SHIELD_CONFIG2},
			
 
				+
			
 
				+            # 'rule6': {'view_type': 'preview', 'platform_return_rate': 0.001,
			
 
				+            #           'region_24h_rule_key': 'rule3', '24h_rule_key': 'rule2'},
			
 
				+            'rule7': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
			
 
				+                      'region_24h_rule_key': 'rule4', '24h_rule_key': 'rule4', 'merge_func': 2},
			
 
				+            'rule7-1': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
			
 
				+                        'region_24h_rule_key': 'rule4', '24h_rule_key': 'rule4', 'merge_func': 2,
			
 
				+                        'political_filter': True},
			
 
				+            'rule8': {'view_type': 'preview', 'platform_return_rate': 0.001,
			
 
				+                      'region_24h_rule_key': 'rule5', '24h_rule_key': 'rule4', 'merge_func': 2},
			
 
				+            # 'rule9': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
			
 
				+            #           'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3', '30day_rule_key': 'rule1'},
			
 
				+            # # 无回流人群
			
 
				+            # 'rule10': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
			
 
				+            #            'region_24h_rule_key': 'rule6', '24h_rule_key': 'rule5', 'click_score_rate': 0.7},
			
 
				+            # 'rule13': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
			
 
				+            #            'region_24h_rule_key': 'rule8', '24h_rule_key': 'rule7', 'click_score_rate': 0.8},
			
 
				+            # # 有回流人群
			
 
				+            # 'rule11': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
			
 
				+            #            'region_24h_rule_key': 'rule7', '24h_rule_key': 'rule6', 'back_score_rate': 0.7},
			
 
				+            # 'rule14': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
			
 
				+            #            'region_24h_rule_key': 'rule9', '24h_rule_key': 'rule8', 'back_score_rate': 0.8},
			
 
				+            # # 20点地域小时级列表中增加7点-19点地域小时级的优质视频
			
 
				+            # 'rule12': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
			
 
				+            #            'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3', 'add_videos_in_20h': True},
			
 
				+
			
 
				+            # 地域小时级列表中增加 前6小时 地域小时级的优质视频
			
 
				+            'rule15': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
			
 
				+                       'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3',
			
 
				+                       'add_videos_with_pre_h': True, 'hour_count': 6},
			
 
				+            # 地域小时级列表中增加 前2小时 地域小时级的优质视频，排序优化1：半小时级列表中有的视频以本小时的分数为准
			
 
				+            'rule16': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
			
 
				+                       'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3',
			
 
				+                       'add_videos_with_pre_h': True, 'hour_count': 2, 'add_func': 'func2'},
			
 
				+            # 地域小时级列表中增加 前47小时 地域小时级的优质视频
			
 
				+            'rule17': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
			
 
				+                       'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3',
			
 
				+                       'add_videos_with_pre_h': True, 'hour_count': 47},
			
 
				+            # 地域小时级列表中增加 前3小时 地域小时级的优质视频，排序优化1：半小时级列表中有的视频以本小时的分数为准
			
 
				+            'rule18': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
			
 
				+                       'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3',
			
 
				+                       'add_videos_with_pre_h': True, 'hour_count': 3, 'add_func': 'func2'},
			
 
				+
			
 
				+        },
			
 
				+        'data_params': DATA_PARAMS,
			
 
				+        'params_list': [
			
 
				+            {'data': 'data1', 'rule': 'rule4'},  # 095 vlog
			
 
				+            {'data': 'data1', 'rule': 'rule4-1'},  # 095-1
			
 
				+            {'data': 'data1', 'rule': 'rule4-2'},  # 262 特殊地域屏蔽危险视频
			
 
				+            # {'data': 'data2', 'rule': 'rule4'},
			
 
				+            {'data': 'data2', 'rule': 'rule7-1'},  # 121 内容精选
			
 
				+            # {'data': 'data3', 'rule': 'rule7'},
			
 
				+            # {'data': 'data4', 'rule': 'rule7'},
			
 
				+            # {'data': 'data6', 'rule': 'rule7'},
			
 
				+            {'data': 'data7', 'rule': 'rule8'},  # 票圈视频APP 10003.110156
			
 
				+            # {'data': 'data1', 'rule': 'rule9'},
			
 
				+            # {'data': 'data1', 'rule': 'rule10'},
			
 
				+            # {'data': 'data1', 'rule': 'rule11'},
			
 
				+            # {'data': 'data8', 'rule': 'rule7'},
			
 
				+            # {'data': 'data9', 'rule': 'rule7'},
			
 
				+            {'data': 'data10', 'rule': 'rule7'},  # 144 票圈视频
			
 
				+            # {'data': 'data11', 'rule': 'rule7'},
			
 
				+            # {'data': 'data12', 'rule': 'rule7'},
			
 
				+            # {'data': 'data13', 'rule': 'rule7'},
			
 
				+            # {'data': 'data1', 'rule': 'rule12'},
			
 
				+            # {'data': 'data14', 'rule': 'rule7'},  # 159
			
 
				+            # {'data': 'data1', 'rule': 'rule13'},  # 161
			
 
				+            # {'data': 'data1', 'rule': 'rule14'},  # 162
			
 
				+            # {'data': 'data1', 'rule': 'rule15'},  # 200 vlog
			
 
				+            # {'data': 'data1', 'rule': 'rule16'},  # 214 vlog
			
 
				+            # {'data': 'data1', 'rule': 'rule17'},  # 215 vlog
			
 
				+            # {'data': 'data1', 'rule': 'rule18'},  # 224 vlog
			
 
				+        ],
			
 
				+    }
			
 
				+
			
 
				+class TestConfig(BaseConfig):
			
 
				+    """测试环境配置"""
			
 
				+    # 报警内容 环境区分
			
 
				+    ENV_TEXT = "测试环境"
			
 
				+    # 项目存放目录
			
 
				+    PROJECT_PATH = '/data2/rov-offline'
			
 
				+
			
 
				+    # 测试环境redis地址
			
 
				+    REDIS_INFO = {
			
 
				+        'host': 'r-bp1ps6my7lzg8rdhwx682.redis.rds.aliyuncs.com',
			
 
				+        'port': 6379,
			
 
				+        'password': 'Wqsd@2019',
			
 
				+    }
			
 
				+
			
 
				+    # Hologres连接参数，服务器使用
			
 
				+    HOLOGRES_INFO = {
			
 
				+        'host': 'hgprecn-cn-7pp28y18c00c-cn-hangzhou-vpc.hologres.aliyuncs.com',
			
 
				+        'port': 80,
			
 
				+        'dbname': 'dssm',
			
 
				+        'user': 'LTAI5tMPqPy9yboQAf1mBCCN',
			
 
				+        'password': '4BEcOgxREOPq7t3A7EWkjciVULeQGj'
			
 
				+    }
			
 
				+
			
 
				+    # 测试环境mysql地址
			
 
				+    MYSQL_INFO = {
			
 
				+        'host': 'rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com',
			
 
				+        'port': 3306,
			
 
				+        'user': 'wx2016_longvideo',
			
 
				+        'password': 'wx2016_longvideoP@assword1234',
			
 
				+        'db': 'longvideo',
			
 
				+        'charset': 'utf8'
			
 
				+    }
			
 
				+
			
 
				+    # 测试环境 过滤用mysql地址
			
 
				+    FILTER_MYSQL_INFO = {
			
 
				+        'host': 'am-bp1g3ys9u00u483uc131930.ads.aliyuncs.com',
			
 
				+        'port': 3306,
			
 
				+        'user': 'lv_manager',
			
 
				+        'password': 'lv_manager@2020',
			
 
				+        'db': 'longvideo',
			
 
				+        'charset': 'utf8'
			
 
				+    }
			
 
				+
			
 
				+    # 日志服务配置
			
 
				+    ALIYUN_LOG = {
			
 
				+        'ENDPOINT': 'cn-hangzhou.log.aliyuncs.com',
			
 
				+        'ACCESSID': 'LTAIWYUujJAm7CbH',
			
 
				+        'ACCESSKEY': 'RfSjdiWwED1sGFlsjXv0DlfTnZTG1P',
			
 
				+        'PROJECT': 'rov-server-test',
			
 
				+    }
			
 
				+
			
 
				+    # Hologres视频状态存储表名
			
 
				+    VIDEO_STATUS = 'longvideo_test.dwd_mdm_item_video_stat'
			
 
				+
			
 
				+    # 快速曝光流量池ID
			
 
				+    QUICK_FLOW_POOL_ID = 3
			
 
				+
			
 
				+    # 获取流量池分发配置接口地址
			
 
				+    GET_FLOW_POOL_RECOMMEND_CONFIG_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/recommend/getConfig'
			
 
				+    # 从流量池获取视频接口地址
			
 
				+    GET_VIDEOS_FROM_POOL_URL = 'http://testapi-internal.piaoquantv.com/flowpool/video/getAllVideo'
			
 
				+    # 获取视频在流量池中的剩余可分发数接口地址
			
 
				+    GET_REMAIN_VIEW_COUNT_URL = 'http://testapi-internal.piaoquantv.com/flowpool/video/remainViewCount'
			
 
				+    # 计算完ROV通知后端接口地址
			
 
				+    NOTIFY_BACKEND_UPDATE_ROV_SCORE_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/recommend/updateRovScore'
			
 
				+    # 获取置顶视频列表接口地址
			
 
				+    TOP_VIDEO_LIST_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/recommend/topVideoList'
			
 
				+    # 获取首页兜底视频json接口地址
			
 
				+    BOTTOM_JSON_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/video/distribute/structure/video/list'
			
 
				+    # 通知后端更新兜底视频接口地址
			
 
				+    NOTIFY_BACKEND_updateFallBackVideoList_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/recommend/updateFallBackVideoList'
			
 
				+    # 获取限流视频接口地址
			
 
				+    GET_VIDEO_LIMIT_LIST_URL = 'http://videotest-internal.yishihui.com/longvideoapi/openapi/recommend/getVideoLimitList'
			
 
				+    # 获取管理后台设置的广告目标uv值接口地址
			
 
				+    GET_AD_TARGET_UV_URL = 'https://testadmin.piaoquantv.com/manager/ad/algo/threshold/productUvTargetList'
			
 
				+
			
 
				+    # # logs 上传oss 目标Bucket指定目录
			
 
				+    # OSS_FOLDER_LOGS = 'rov-offline/test/logs/'
			
 
				+    # # data 上传oss 目标Bucket指定目录
			
 
				+    # OSS_FOLDER_DATA = 'rov-offline/test/data/'
			
 
				+
			
 
				+class ProductionConfig(BaseConfig):
			
 
				+    """生产环境配置"""
			
 
				+    # 报警内容 环境区分
			
 
				+    ENV_TEXT = "生产环境"
			
 
				+    # 项目存放目录
			
 
				+    PROJECT_PATH = '/data/rov-offline'
			
 
				+
			
 
				+    # 线上环境redis地址
			
 
				+    REDIS_INFO = {
			
 
				+        'host': 'r-bp1fogs2mflr1ybfot.redis.rds.aliyuncs.com',
			
 
				+        'port': 6379,
			
 
				+        'password': 'Wqsd@2019',
			
 
				+    }
			
 
				+
			
 
				+    # Hologres连接参数，服务器使用
			
 
				+    HOLOGRES_INFO = {
			
 
				+        'host': 'hgprecn-cn-7pp28y18c00c-cn-hangzhou-vpc.hologres.aliyuncs.com',
			
 
				+        'port': 80,
			
 
				+        'dbname': 'dssm',
			
 
				+        'user': 'LTAI5tMPqPy9yboQAf1mBCCN',
			
 
				+        'password': '4BEcOgxREOPq7t3A7EWkjciVULeQGj'
			
 
				+    }
			
 
				+
			
 
				+    # 生产环境mysql地址
			
 
				+    MYSQL_INFO = {
			
 
				+        'host': 'rr-bp1x9785e8h5452bi157.mysql.rds.aliyuncs.com',
			
 
				+        'port': 3306,
			
 
				+        'user': 'wx2016_longvideo',
			
 
				+        'password': 'wx2016_longvideoP@assword1234',
			
 
				+        'db': 'longvideo',
			
 
				+        'charset': 'utf8'
			
 
				+    }
			
 
				+
			
 
				+    # 生产环境 过滤用mysql地址
			
 
				+    FILTER_MYSQL_INFO = {
			
 
				+        'host': 'am-bp15tqt957i3b3sgi131950.ads.aliyuncs.com',
			
 
				+        'port': 3306,
			
 
				+        'user': 'lv_manager',
			
 
				+        'password': 'lv_manager@2020',
			
 
				+        'db': 'longvideo',
			
 
				+        'charset': 'utf8'
			
 
				+    }
			
 
				+
			
 
				+    # 日志服务配置
			
 
				+    ALIYUN_LOG = {
			
 
				+        'ENDPOINT': 'cn-hangzhou.log.aliyuncs.com',
			
 
				+        'ACCESSID': 'LTAIWYUujJAm7CbH',
			
 
				+        'ACCESSKEY': 'RfSjdiWwED1sGFlsjXv0DlfTnZTG1P',
			
 
				+        'PROJECT': 'rov-server',
			
 
				+    }
			
 
				+
			
 
				+    # Hologres视频状态存储表名
			
 
				+    VIDEO_STATUS = 'longvideo.dwd_mdm_item_video_stat'
			
 
				+
			
 
				+    # 快速曝光流量池ID
			
 
				+    QUICK_FLOW_POOL_ID = 3
			
 
				+
			
 
				+    # 获取流量池分发配置接口地址
			
 
				+    GET_FLOW_POOL_RECOMMEND_CONFIG_URL = 'http://recommend-common-internal.piaoquantv.com/longvideoapi/openapi/recommend/getConfig'
			
 
				+    # 从流量池获取视频接口地址
			
 
				+    GET_VIDEOS_FROM_POOL_URL = 'http://api-internal.piaoquantv.com/flowpool/video/getAllVideo'
			
 
				+    # 获取视频在流量池中的剩余可分发数接口地址
			
 
				+    GET_REMAIN_VIEW_COUNT_URL = 'http://api-internal.piaoquantv.com/flowpool/video/remainViewCount'
			
 
				+    # 计算完ROV通知后端接口地址
			
 
				+    NOTIFY_BACKEND_UPDATE_ROV_SCORE_URL = 'http://recommend-common-internal.piaoquantv.com/longvideoapi/openapi/recommend/updateRovScore'
			
 
				+    # 获取置顶视频列表接口地址
			
 
				+    TOP_VIDEO_LIST_URL = 'http://recommend-common-internal.piaoquantv.com/longvideoapi/openapi/recommend/topVideoList'
			
 
				+    # 获取首页兜底视频json接口地址
			
 
				+    BOTTOM_JSON_URL = 'http://recommend-common-internal.piaoquantv.com/longvideoapi/openapi/video/distribute/structure/video/list'
			
 
				+    # 通知后端更新兜底视频接口地址
			
 
				+    NOTIFY_BACKEND_updateFallBackVideoList_URL = 'http://recommend-common-internal.piaoquantv.com/longvideoapi/openapi/recommend/updateFallBackVideoList'
			
 
				+    # 获取限流视频接口地址
			
 
				+    GET_VIDEO_LIMIT_LIST_URL = 'http://recommend-common-internal.piaoquantv.com/longvideoapi/openapi/recommend/getVideoLimitList'
			
 
				+    # 获取管理后台设置的广告目标uv值接口地址
			
 
				+    GET_AD_TARGET_UV_URL = 'https://admin.piaoquantv.com/manager/ad/algo/threshold/productUvTargetList'
			
 
				+
			
 
				+    # # logs 上传oss 目标Bucket指定目录
			
 
				+    # OSS_FOLDER_LOGS = 'rov-offline/pro/logs/'
			
 
				+    # # data 上传oss 目标Bucket指定目录
			
 
				+    # OSS_FOLDER_DATA = 'rov-offline/pro/data/'
			
 
				+
			
 
				+def set_config():
			
 
				+    # 获取环境变量 ROV_OFFLINE_ENV
			
 
				+    env = os.environ.get('Base_ENV')
			
 
				+    return ProductionConfig()
			
 
				+
			
 
				+
			
--- a/cut_title.py
+++ b/cut_title.py
@@ -0,0 +1,47 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+import jieba 
			
 
				+import os
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    #f1 = open(sys.argv[1])
			
 
				+    stop_words = set('')
			
 
				+    path = sys.argv[1]
			
 
				+    files_dir = os.listdir(path)
			
 
				+    #print(files_dir)
			
 
				+    for file_name in files_dir:
			
 
				+        if file_name.find('.txt')>-1:
			
 
				+            f1 = open(path+"/"+file_name)
			
 
				+            while True:
			
 
				+                file_line = f1.readline()
			
 
				+                if not file_line:
			
 
				+                    break
			
 
				+                file_line = file_line.strip()
			
 
				+                stop_words.add(file_line)
			
 
				+            f1.close()
			
 
				+    #print(len(stop_words))
			
 
				+    f = open(sys.argv[2])
			
 
				+    f3 = open(sys.argv[3], 'w')
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        line = line.strip()
			
 
				+        items = line.split("\t")
			
 
				+        if len(items)<2:
			
 
				+            continue
			
 
				+        vid = items[0]
			
 
				+        title = items[1] 
			
 
				+        cut_info =  jieba.lcut(title)
			
 
				+        cut_arr = []
			
 
				+        for cut_item in cut_info:
			
 
				+            #print("cut_item:", cut_item)
			
 
				+            if cut_item==' ':
			
 
				+                continue
			
 
				+            if cut_item in stop_words:
			
 
				+                continue
			
 
				+            cut_arr.append(cut_item)
			
 
				+        vid_info = vid+'\t'+" ".join(cut_arr)
			
 
				+        f3.write(vid_info.strip()+"\n")
			
 
				+    f3.close()
			
 
				+       
			
--- a/cut_title_top.py
+++ b/cut_title_top.py
@@ -0,0 +1,47 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+import jieba 
			
 
				+import os
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    #f1 = open(sys.argv[1])
			
 
				+    stop_words = set('')
			
 
				+    path = sys.argv[1]
			
 
				+    files_dir = os.listdir(path)
			
 
				+    #print(files_dir)
			
 
				+    for file_name in files_dir:
			
 
				+        if file_name.find('.txt')>-1:
			
 
				+            f1 = open(path+"/"+file_name)
			
 
				+            while True:
			
 
				+                file_line = f1.readline()
			
 
				+                if not file_line:
			
 
				+                    break
			
 
				+                file_line = file_line.strip()
			
 
				+                stop_words.add(file_line)
			
 
				+            f1.close()
			
 
				+    #print(len(stop_words))
			
 
				+    f = open(sys.argv[2])
			
 
				+    f3 = open(sys.argv[3], 'w')
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        line = line.strip()
			
 
				+        items = line.split("\t")
			
 
				+        if len(items)<2:
			
 
				+            continue
			
 
				+        vid = items[0]
			
 
				+        title = items[1] 
			
 
				+        cut_info =  jieba.lcut(title)
			
 
				+        cut_arr = []
			
 
				+        for cut_item in cut_info:
			
 
				+            #print("cut_item:", cut_item)
			
 
				+            if cut_item==' ':
			
 
				+                continue
			
 
				+            if cut_item in stop_words:
			
 
				+                continue
			
 
				+            cut_arr.append(cut_item)
			
 
				+        vid_info = vid+'\t'+" ".join(cut_arr)
			
 
				+        f3.write(vid_info.strip()+"\n")
			
 
				+    f3.close()
			
 
				+       
			
--- a/db_help.py
+++ b/db_help.py
@@ -0,0 +1,337 @@
 
				+# coding:utf-8
			
 
				+import redis
			
 
				+from config import set_config
			
 
				+config_  = set_config()
			
 
				+conn_redis = None
			
 
				+import pymysql
			
 
				+
			
 
				+class RedisHelper(object):
			
 
				+    def __init__(self):
			
 
				+        """
			
 
				+        初始化redis连接信息
			
 
				+        redis_info: redis连接信息, 格式：dict, {'host': '', 'port': '', 'password': ''}
			
 
				+        """
			
 
				+        redis_info = config_.REDIS_INFO
			
 
				+        self.host = redis_info['host']
			
 
				+        self.port = redis_info['port']
			
 
				+        self.password = redis_info['password']
			
 
				+
			
 
				+    def connect(self):
			
 
				+        """
			
 
				+        连接redis
			
 
				+        :return: conn
			
 
				+        """
			
 
				+        global conn_redis
			
 
				+        if conn_redis is None:
			
 
				+            pool = redis.ConnectionPool(host=self.host,
			
 
				+                                        port=self.port,
			
 
				+                                        password=self.password,
			
 
				+                                        decode_responses=True)
			
 
				+            conn = redis.Redis(connection_pool=pool)
			
 
				+            conn_redis = conn
			
 
				+        return conn_redis
			
 
				+
			
 
				+    def key_exists(self, key_name):
			
 
				+        """
			
 
				+        判断key是否存在
			
 
				+        :param key_name: key
			
 
				+        :return: 存在-True, 不存在-False
			
 
				+        """
			
 
				+        conn = self.connect()
			
 
				+        return conn.exists(key_name)
			
 
				+
			
 
				+    def del_keys(self, key_name):
			
 
				+        """
			
 
				+        删除key
			
 
				+        :param key_name: key
			
 
				+        :return: None
			
 
				+        """
			
 
				+        conn = self.connect()
			
 
				+        conn.delete(key_name)
			
 
				+
			
 
				+    def get_data_from_redis(self, key_name):
			
 
				+        """
			
 
				+        读取redis中的数据
			
 
				+        :param key_name: key
			
 
				+        :return: data
			
 
				+        """
			
 
				+        conn = self.connect()
			
 
				+        if not conn.exists(key_name):
			
 
				+            # key不存在
			
 
				+            return None
			
 
				+        data = conn.get(key_name)
			
 
				+        return data
			
 
				+
			
 
				+    def set_data_to_redis(self, key_name, value, expire_time=24*3600):
			
 
				+        """
			
 
				+        新增数据
			
 
				+        :param key_name: key
			
 
				+        :param value: 元素的值 videoId
			
 
				+        :param expire_time: 过期时间，单位：s，默认1天
			
 
				+        :return: None
			
 
				+        """
			
 
				+        conn = self.connect()
			
 
				+        conn.set(key_name, value, ex=int(expire_time))
			
 
				+
			
 
				+    def add_data_with_zset(self, key_name, data, expire_time=7*24*3600):
			
 
				+        """
			
 
				+        新增数据，有序set
			
 
				+        :param key_name: key
			
 
				+        :param data: 元素的值及对应分数 type-dict  {value: score}
			
 
				+        :param expire_time: 过期时间，单位：s，默认7天
			
 
				+        :return: None
			
 
				+        """
			
 
				+        if not data:
			
 
				+            return
			
 
				+        conn = self.connect()
			
 
				+        # 数据量大时一次性写入耗时长，分批次写入
			
 
				+        keys_list = list(data.keys())
			
 
				+        zadd_data = {}
			
 
				+        for i, key in enumerate(keys_list):
			
 
				+            if i % 100 == 0:
			
 
				+                if zadd_data:
			
 
				+                    conn.zadd(key_name, zadd_data)
			
 
				+                zadd_data = {key: data.get(key)}
			
 
				+            else:
			
 
				+                zadd_data[key] = data.get(key)
			
 
				+        if zadd_data:
			
 
				+            conn.zadd(key_name, zadd_data)
			
 
				+        # 设置过期时间
			
 
				+        conn.expire(key_name, int(expire_time))
			
 
				+
			
 
				+    def get_data_zset_with_index(self, key_name, start, end, desc=True, with_scores=False):
			
 
				+        """
			
 
				+        根据索引位置获取元素的值
			
 
				+        :param key_name: key
			
 
				+        :param start: 索引起始点 闭区间，包含start
			
 
				+        :param end: 索引结束点 闭区间，包含end
			
 
				+        :param desc: 分数排序方式，默认从大到小
			
 
				+        :param with_scores: 是否获取元素的分数，默认 False，只获取元素的值
			
 
				+        :return: data 元素值列表（不包含分数）,value(videoId)类型转换为int, 包含分数时不进行类型转换
			
 
				+        """
			
 
				+        conn = self.connect()
			
 
				+        if not conn.exists(key_name):
			
 
				+            return None
			
 
				+        data = conn.zrange(key_name, start, end, desc, with_scores)
			
 
				+        return data
			
 
				+        # if with_scores:
			
 
				+        #     return data
			
 
				+        # else:
			
 
				+        #     return [eval(value) for value in data]
			
 
				+
			
 
				+    def get_all_data_from_zset(self, key_name, desc=True, with_scores=False):
			
 
				+        """
			
 
				+        获取zset中所有元素的值
			
 
				+        :param key_name: key
			
 
				+        :param desc: 分数排序方式，默认从大到小
			
 
				+        :param with_scores: 是否获取元素的分数，默认 False，只获取元素的值
			
 
				+        :return: data 元素值列表（不包含分数）,value(videoId)类型转换为int, 包含分数时不进行类型转换
			
 
				+        """
			
 
				+        conn = self.connect()
			
 
				+        if not conn.exists(key_name):
			
 
				+            return None
			
 
				+        data = []
			
 
				+        start = 0
			
 
				+        step = 100
			
 
				+        while True:
			
 
				+            end = start + step - 1
			
 
				+            temp = conn.zrange(key_name, start, end, desc, with_scores)
			
 
				+            if not temp:
			
 
				+                break
			
 
				+            data.extend(temp)
			
 
				+            start += step
			
 
				+        return data
			
 
				+
			
 
				+    def get_score_with_value(self, key_name, value):
			
 
				+        """
			
 
				+        在zset中，根据元素的value获取对应的score
			
 
				+        :param key_name: key
			
 
				+        :param value: 元素的值
			
 
				+        :return: score value对应的score
			
 
				+        """
			
 
				+        conn = self.connect()
			
 
				+        return conn.zscore(key_name, value)
			
 
				+
			
 
				+    def update_score_with_value(self, key_name, value, score, expire_time=7*24*3600):
			
 
				+        """
			
 
				+        在zset中，修改元素value对应的score
			
 
				+        :param key_name: key
			
 
				+        :param value: 元素的值
			
 
				+        :param score: value对应的score更新值
			
 
				+        :param expire_time: 过期时间，单位：s，默认7天
			
 
				+        """
			
 
				+        conn = self.connect()
			
 
				+        if conn.exists(key_name):
			
 
				+            conn.zadd(key_name, {value: score})
			
 
				+        else:
			
 
				+            # key不存在时，需设置过期时间
			
 
				+            conn.zadd(key_name, {value: score})
			
 
				+            conn.expire(key_name, expire_time)
			
 
				+
			
 
				+    def remove_value_from_zset(self, key_name, value):
			
 
				+        """
			
 
				+        删除zset中的指定元素
			
 
				+        :param key_name: key
			
 
				+        :param value: 元素的值
			
 
				+        :return: None
			
 
				+        """
			
 
				+        conn = self.connect()
			
 
				+        conn.zrem(key_name, *value)
			
 
				+
			
 
				+    def remove_by_rank_from_zset(self, key_name, start, stop):
			
 
				+        """
			
 
				+        移除有序集中，指定排名(rank)区间内的所有成员
			
 
				+        :param key_name: key
			
 
				+        :param start: 开始位
			
 
				+        :param stop: 结束位
			
 
				+        :return: None
			
 
				+        """
			
 
				+        conn = self.connect()
			
 
				+        conn.zremrangebyrank(name=key_name, min=start, max=stop)
			
 
				+
			
 
				+    def get_index_with_data(self, key_name, value):
			
 
				+        """
			
 
				+        根据元素的值获取在有序set中的位置，按照分数倒序（从大到小）
			
 
				+        :param key_name: key
			
 
				+        :param value: 元素的值
			
 
				+        :return: idx 位置索引
			
 
				+        """
			
 
				+        conn = self.connect()
			
 
				+        return conn.zrevrank(key_name, value)
			
 
				+
			
 
				+    def get_data_from_set(self, key_name):
			
 
				+        """
			
 
				+        获取set中的所有数据
			
 
				+        :param key_name: key
			
 
				+        :return: data
			
 
				+        """
			
 
				+        conn = self.connect()
			
 
				+        if not conn.exists(key_name):
			
 
				+            # key不存在
			
 
				+            return None
			
 
				+        data = []
			
 
				+        cursor = 0
			
 
				+        while True:
			
 
				+            cur, temp = conn.sscan(key_name, cursor=cursor, count=2000)
			
 
				+            data.extend(temp)
			
 
				+            if cur == 0:
			
 
				+                break
			
 
				+            cursor = cur
			
 
				+        return list(set(data))
			
 
				+
			
 
				+    def add_data_with_set(self, key_name, values, expire_time=30*60):
			
 
				+        """
			
 
				+        新增数据，set
			
 
				+        :param key_name: key
			
 
				+        :param values: 要添加的元素  类型-set
			
 
				+        :param expire_time: 过期时间，单位：s，默认0.5小时
			
 
				+        :return: None
			
 
				+        """
			
 
				+        conn = self.connect()
			
 
				+        conn.sadd(key_name, *values)
			
 
				+        # 设置过期时间
			
 
				+        conn.expire(key_name, expire_time)
			
 
				+
			
 
				+    def data_exists_with_set(self, key_name, value):
			
 
				+        """
			
 
				+        判断元素value是否在集合key_name中
			
 
				+        :param key_name: key
			
 
				+        :param value: 需判断的元素
			
 
				+        :return: 存在-True, 不存在-False
			
 
				+        """
			
 
				+        conn = self.connect()
			
 
				+        return conn.sismember(key_name, value)
			
 
				+
			
 
				+    def remove_value_from_set(self, key_name, values):
			
 
				+        """
			
 
				+        删除set中的指定元素
			
 
				+        :param key_name: key
			
 
				+        :param values: 元素的值, 类型-set
			
 
				+        :return: None
			
 
				+        """
			
 
				+        conn = self.connect()
			
 
				+        conn.srem(key_name, *values)
			
 
				+
			
 
				+    def persist_key(self, key_name):
			
 
				+        """
			
 
				+        移除key的过期时间，将其转换为永久状态
			
 
				+        :param key_name: key
			
 
				+        :return:
			
 
				+        """
			
 
				+        conn = self.connect()
			
 
				+        conn.persist(key_name)
			
 
				+
			
 
				+    def setnx_key(self, key_name, value, expire_time=5*60):
			
 
				+        """
			
 
				+        当key不存在时，将value塞入key中，key存在时不做操作
			
 
				+        :param key_name: key
			
 
				+        :param value: value
			
 
				+        :return: 过期时间，单位：s，默认5分钟 type-int
			
 
				+        """
			
 
				+        conn = self.connect()
			
 
				+        conn.setnx(name=key_name, value=value)
			
 
				+        conn.expire(name=key_name, time=int(expire_time))
			
 
				+
			
 
				+    def update_expire_time(self, key_name, expire_time):
			
 
				+        """
			
 
				+        修改过期时间
			
 
				+        :param key_name: key
			
 
				+        :param expire_time: 过期时间
			
 
				+        :return:
			
 
				+        """
			
 
				+        conn = self.connect()
			
 
				+        conn.expire(name=key_name, time=int(expire_time))
			
 
				+
			
 
				+    def update_batch_setnx_key(self, data, expire_time=5*60):
			
 
				+        conn = self.connect()
			
 
				+        for key_name, v in data.items():
			
 
				+            conn.set(name=key_name, value=v)
			
 
				+            conn.expire(name=key_name, time=int(expire_time))
			
 
				+        #self.disconnet()
			
 
				+ 
			
 
				+    #def disconnet(self):
			
 
				+    #    conn_redis.disconnect()        
			
 
				+class MysqlHelper(object):
			
 
				+    def __init__(self, mysql_info):
			
 
				+        """
			
 
				+        初始化mysql连接信息
			
 
				+        """
			
 
				+        self.mysql_info = mysql_info
			
 
				+
			
 
				+    def get_data(self, sql):
			
 
				+        """
			
 
				+        查询数据
			
 
				+        :param sql: sql语句
			
 
				+        :return: data
			
 
				+        """
			
 
				+        # 连接数据库
			
 
				+        conn = pymysql.connect(**self.mysql_info)
			
 
				+        # 创建游标
			
 
				+        cursor = conn.cursor()
			
 
				+        try:
			
 
				+            # 执行SQL语句
			
 
				+            cursor.execute(sql)
			
 
				+            # 获取查询的所有记录
			
 
				+            data = cursor.fetchall()
			
 
				+        except Exception as e:
			
 
				+            return None
			
 
				+        # 关闭游标对象
			
 
				+        cursor.close()
			
 
				+        # 关闭数据库连接
			
 
				+        conn.close()
			
 
				+        return data
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    redis_helper = RedisHelper()
			
 
				+    # key = 'com.weiqu.video.hot.recommend.item.score.20210901'
			
 
				+    # res = redis_helper.get_score_with_value(key, 90797)
			
 
				+    # print(res)
			
 
				+    # redis_helper.remove_value_from_set(key_name=config_.RELEVANT_TOP_VIDEOS_KEY_NAME, values=(8633849,))
			
 
				+    con = redis_helper.connect()
			
 
				+    res = redis_helper.key_exists(key_name='eeew')
			
 
				+    print(res)
			
 
				+    redis_helper.setnx_key('eeew','1')
			
 
				+    res = redis_helper.key_exists(key_name='eeew')
			
 
				+    print(res)
			
 
				+
			
--- a/export_3_day.py
+++ b/export_3_day.py
@@ -0,0 +1,53 @@
 
				+#coding utf-8
			
 
				+from odps import ODPS
			
 
				+from config import set_config
			
 
				+import datetime
			
 
				+import pandas as pd
			
 
				+from collections import defaultdict
			
 
				+import sys
			
 
				+
			
 
				+config_ = set_config()
			
 
				+
			
 
				+odps = ODPS(
			
 
				+        access_id=config_.ODPS_CONFIG['ACCESSID'],
			
 
				+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
			
 
				+        project="loghubods",
			
 
				+        endpoint=config_.ODPS_CONFIG['ENDPOINT'])
			
 
				+
			
 
				+
			
 
				+def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000,
			
 
				+                       pool_maxsize=1000, pool_connections=1000):
			
 
				+    """
			
 
				+    从odps获取数据
			
 
				+    :param date: 日期 type-string '%Y%m%d'
			
 
				+    :param project: type-string
			
 
				+    :param table: 表名 type-string
			
 
				+    :param connect_timeout: 连接超时设置
			
 
				+    :param read_timeout: 读取超时设置
			
 
				+    :param pool_maxsize:
			
 
				+    :param pool_connections:
			
 
				+    :return: records
			
 
				+    """
			
 
				+    records = odps.read_table(name=table, partition='dt=%s' % date)
			
 
				+    return records
			
 
				+
			
 
				+def exe_sql(sql):    
			
 
				+    data = []
			
 
				+    with odps.execute_sql(sql).open_reader() as reader:
			
 
				+        d = defaultdict(list)  # collection默认一个dict
			
 
				+        for record in reader:
			
 
				+            for res in record:
			
 
				+                d[res[0]].append(res[1])  # 解析record中的每一个元组，存储方式为(k,v)，以k作为key，存储每一列的内容；
			
 
				+        data = pd.DataFrame.from_dict(d, orient='index').T  # 转换为数据框，并转置，不转置的话是横条数据
			
 
				+    return data
			
 
				+
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    project = 'loghubods'
			
 
				+    now_date=sys.argv[1]
			
 
				+    print("now date:", now_date)
			
 
				+    table = 'loghubods.video_data_each_hour_dataset_3days_total_apptype'
			
 
				+    sql = "select apptype, videoid, preview_users, preview_times, view_users, view_times, play_users, play_times, share_users, share_times, CASE WHEN return_users is NULL then 0 else return_users end  as return_users from loghubods.video_data_each_hour_dataset_3days_total_apptype where dt="+now_date
			
 
				+    print(sql)
			
 
				+    data = exe_sql(sql)
			
 
				+    data.to_csv("./data/3_days_video_data_"+now_date, sep='\t', index=None) 
			
--- a/export_7_day.py
+++ b/export_7_day.py
@@ -0,0 +1,53 @@
 
				+#coding utf-8
			
 
				+from odps import ODPS
			
 
				+from config import set_config
			
 
				+import datetime
			
 
				+import pandas as pd
			
 
				+from collections import defaultdict
			
 
				+import sys
			
 
				+
			
 
				+config_ = set_config()
			
 
				+
			
 
				+odps = ODPS(
			
 
				+        access_id=config_.ODPS_CONFIG['ACCESSID'],
			
 
				+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
			
 
				+        project="loghubods",
			
 
				+        endpoint=config_.ODPS_CONFIG['ENDPOINT'])
			
 
				+
			
 
				+
			
 
				+def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000,
			
 
				+                       pool_maxsize=1000, pool_connections=1000):
			
 
				+    """
			
 
				+    从odps获取数据
			
 
				+    :param date: 日期 type-string '%Y%m%d'
			
 
				+    :param project: type-string
			
 
				+    :param table: 表名 type-string
			
 
				+    :param connect_timeout: 连接超时设置
			
 
				+    :param read_timeout: 读取超时设置
			
 
				+    :param pool_maxsize:
			
 
				+    :param pool_connections:
			
 
				+    :return: records
			
 
				+    """
			
 
				+    records = odps.read_table(name=table, partition='dt=%s' % date)
			
 
				+    return records
			
 
				+
			
 
				+def exe_sql(sql):    
			
 
				+    data = []
			
 
				+    with odps.execute_sql(sql).open_reader() as reader:
			
 
				+        d = defaultdict(list)  # collection默认一个dict
			
 
				+        for record in reader:
			
 
				+            for res in record:
			
 
				+                d[res[0]].append(res[1])  # 解析record中的每一个元组，存储方式为(k,v)，以k作为key，存储每一列的内容；
			
 
				+        data = pd.DataFrame.from_dict(d, orient='index').T  # 转换为数据框，并转置，不转置的话是横条数据
			
 
				+    return data
			
 
				+
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    project = 'loghubods'
			
 
				+    now_date=sys.argv[1]
			
 
				+    print("now date:", now_date)
			
 
				+    table = 'loghubods.video_data_each_hour_dataset_7days_total_apptype'
			
 
				+    sql = "select apptype, videoid, preview_users, preview_times, view_users, view_times, play_users, play_times, share_users, share_times, return_users from loghubods.video_data_each_hour_dataset_7days_total_apptype where dt="+now_date
			
 
				+    print(sql)
			
 
				+    data = exe_sql(sql)
			
 
				+    data.to_csv("./data/7_days_video_data_"+now_date, sep='\t', index=None) 
			
--- a/export_good_vid.py
+++ b/export_good_vid.py
@@ -0,0 +1,53 @@
 
				+#coding utf-8
			
 
				+from odps import ODPS
			
 
				+from config import set_config
			
 
				+import datetime
			
 
				+import pandas as pd
			
 
				+from collections import defaultdict
			
 
				+import sys
			
 
				+
			
 
				+config_ = set_config()
			
 
				+
			
 
				+odps = ODPS(
			
 
				+        access_id=config_.ODPS_CONFIG['ACCESSID'],
			
 
				+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
			
 
				+        project="loghubods",
			
 
				+        endpoint=config_.ODPS_CONFIG['ENDPOINT'])
			
 
				+
			
 
				+
			
 
				+def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000,
			
 
				+                       pool_maxsize=1000, pool_connections=1000):
			
 
				+    """
			
 
				+    从odps获取数据
			
 
				+    :param date: 日期 type-string '%Y%m%d'
			
 
				+    :param project: type-string
			
 
				+    :param table: 表名 type-string
			
 
				+    :param connect_timeout: 连接超时设置
			
 
				+    :param read_timeout: 读取超时设置
			
 
				+    :param pool_maxsize:
			
 
				+    :param pool_connections:
			
 
				+    :return: records
			
 
				+    """
			
 
				+    records = odps.read_table(name=table, partition='dt=%s' % date)
			
 
				+    return records
			
 
				+
			
 
				+def exe_sql(sql):    
			
 
				+    data = []
			
 
				+    with odps.execute_sql(sql).open_reader() as reader:
			
 
				+        d = defaultdict(list)  # collection默认一个dict
			
 
				+        for record in reader:
			
 
				+            for res in record:
			
 
				+                d[res[0]].append(res[1])  # 解析record中的每一个元组，存储方式为(k,v)，以k作为key，存储每一列的内容；
			
 
				+        data = pd.DataFrame.from_dict(d, orient='index').T  # 转换为数据框，并转置，不转置的话是横条数据
			
 
				+    return data
			
 
				+
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    project = 'loghubods'
			
 
				+    now_date=sys.argv[1]
			
 
				+    print("now date:", now_date)
			
 
				+    table = 'top_return_videolist_allapptype'
			
 
				+    sql = "select videoid,return_times, return_users, rov from loghubods.top_return_videolist_allapptype where return_users>2000000 and dt="+now_date
			
 
				+    print(sql)
			
 
				+    data = exe_sql(sql)
			
 
				+    data.to_csv("./data/return_good_video_"+now_date, sep='\t', index=None) 
			
--- a/export_hour_vid.py
+++ b/export_hour_vid.py
@@ -0,0 +1,53 @@
 
				+#coding utf-8
			
 
				+from odps import ODPS
			
 
				+from config import set_config
			
 
				+import datetime
			
 
				+import pandas as pd
			
 
				+from collections import defaultdict
			
 
				+import sys
			
 
				+
			
 
				+config_ = set_config()
			
 
				+
			
 
				+odps = ODPS(
			
 
				+        access_id=config_.ODPS_CONFIG['ACCESSID'],
			
 
				+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
			
 
				+        project="loghubods",
			
 
				+        endpoint=config_.ODPS_CONFIG['ENDPOINT'])
			
 
				+
			
 
				+
			
 
				+def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000,
			
 
				+                       pool_maxsize=1000, pool_connections=1000):
			
 
				+    """
			
 
				+    从odps获取数据
			
 
				+    :param date: 日期 type-string '%Y%m%d'
			
 
				+    :param project: type-string
			
 
				+    :param table: 表名 type-string
			
 
				+    :param connect_timeout: 连接超时设置
			
 
				+    :param read_timeout: 读取超时设置
			
 
				+    :param pool_maxsize:
			
 
				+    :param pool_connections:
			
 
				+    :return: records
			
 
				+    """
			
 
				+    records = odps.read_table(name=table, partition='dt=%s' % date)
			
 
				+    return records
			
 
				+
			
 
				+def exe_sql(sql):    
			
 
				+    data = []
			
 
				+    with odps.execute_sql(sql).open_reader() as reader:
			
 
				+        d = defaultdict(list)  # collection默认一个dict
			
 
				+        for record in reader:
			
 
				+            for res in record:
			
 
				+                d[res[0]].append(res[1])  # 解析record中的每一个元组，存储方式为(k,v)，以k作为key，存储每一列的内容；
			
 
				+        data = pd.DataFrame.from_dict(d, orient='index').T  # 转换为数据框，并转置，不转置的话是横条数据
			
 
				+    return data
			
 
				+
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    project = 'loghubods'
			
 
				+    now_date=sys.argv[1]
			
 
				+    print("now date:", now_date)
			
 
				+    table = 'video_data_each_hour_dataset_24h_total_apptype'
			
 
				+    sql = "select apptype, videoid, lastonehour_view, lastonehour_view_total, lastonehour_play, lastonehour_play_total,lastonehour_share, lastonehour_share_total, CASE WHEN lastonehour_return is NULL then 0 else lastonehour_return end  as lastonehour_return from loghubods.video_each_hour_update_province_apptype where dt="+now_date
			
 
				+    print(sql)
			
 
				+    data = exe_sql(sql)
			
 
				+    data.to_csv("./data/hour_video_data_"+now_date, sep='\t', index=None) 
			
--- a/export_vid.py
+++ b/export_vid.py
@@ -0,0 +1,53 @@
 
				+#coding utf-8
			
 
				+from odps import ODPS
			
 
				+from config import set_config
			
 
				+import datetime
			
 
				+import pandas as pd
			
 
				+from collections import defaultdict
			
 
				+import sys
			
 
				+
			
 
				+config_ = set_config()
			
 
				+
			
 
				+odps = ODPS(
			
 
				+        access_id=config_.ODPS_CONFIG['ACCESSID'],
			
 
				+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
			
 
				+        project="loghubods",
			
 
				+        endpoint=config_.ODPS_CONFIG['ENDPOINT'])
			
 
				+
			
 
				+
			
 
				+def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000,
			
 
				+                       pool_maxsize=1000, pool_connections=1000):
			
 
				+    """
			
 
				+    从odps获取数据
			
 
				+    :param date: 日期 type-string '%Y%m%d'
			
 
				+    :param project: type-string
			
 
				+    :param table: 表名 type-string
			
 
				+    :param connect_timeout: 连接超时设置
			
 
				+    :param read_timeout: 读取超时设置
			
 
				+    :param pool_maxsize:
			
 
				+    :param pool_connections:
			
 
				+    :return: records
			
 
				+    """
			
 
				+    records = odps.read_table(name=table, partition='dt=%s' % date)
			
 
				+    return records
			
 
				+
			
 
				+def exe_sql(sql):    
			
 
				+    data = []
			
 
				+    with odps.execute_sql(sql).open_reader() as reader:
			
 
				+        d = defaultdict(list)  # collection默认一个dict
			
 
				+        for record in reader:
			
 
				+            for res in record:
			
 
				+                d[res[0]].append(res[1])  # 解析record中的每一个元组，存储方式为(k,v)，以k作为key，存储每一列的内容；
			
 
				+        data = pd.DataFrame.from_dict(d, orient='index').T  # 转换为数据框，并转置，不转置的话是横条数据
			
 
				+    return data
			
 
				+
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    project = 'loghubods'
			
 
				+    now_date=sys.argv[1]
			
 
				+    print("now date:", now_date)
			
 
				+    table = 'video_data_each_hour_dataset_24h_total_apptype'
			
 
				+    sql = "select apptype, videoid, preview人数, preview次数, view人数, view次数, play人数, play次数, share人数, share次数, CASE WHEN 回流人数 is NULL then 0 else 回流人数 end  as return_users from loghubods.video_data_each_hour_dataset_24h_total_apptype where dt="+now_date
			
 
				+    print(sql)
			
 
				+    data = exe_sql(sql)
			
 
				+    data.to_csv("./data/video_data_"+now_date, sep='\t', index=None) 
			
--- a/export_vid_range.py
+++ b/export_vid_range.py
@@ -0,0 +1,53 @@
 
				+#coding utf-8
			
 
				+from odps import ODPS
			
 
				+from config import set_config
			
 
				+import datetime
			
 
				+import pandas as pd
			
 
				+from collections import defaultdict
			
 
				+import sys
			
 
				+
			
 
				+config_ = set_config()
			
 
				+
			
 
				+odps = ODPS(
			
 
				+        access_id=config_.ODPS_CONFIG['ACCESSID'],
			
 
				+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
			
 
				+        project="loghubods",
			
 
				+        endpoint=config_.ODPS_CONFIG['ENDPOINT'])
			
 
				+
			
 
				+
			
 
				+def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000,
			
 
				+                       pool_maxsize=1000, pool_connections=1000):
			
 
				+    """
			
 
				+    从odps获取数据
			
 
				+    :param date: 日期 type-string '%Y%m%d'
			
 
				+    :param project: type-string
			
 
				+    :param table: 表名 type-string
			
 
				+    :param connect_timeout: 连接超时设置
			
 
				+    :param read_timeout: 读取超时设置
			
 
				+    :param pool_maxsize:
			
 
				+    :param pool_connections:
			
 
				+    :return: records
			
 
				+    """
			
 
				+    records = odps.read_table(name=table, partition='dt=%s' % date)
			
 
				+    return records
			
 
				+
			
 
				+def exe_sql(sql):    
			
 
				+    data = []
			
 
				+    with odps.execute_sql(sql).open_reader() as reader:
			
 
				+        d = defaultdict(list)  # collection默认一个dict
			
 
				+        for record in reader:
			
 
				+            for res in record:
			
 
				+                d[res[0]].append(res[1])  # 解析record中的每一个元组，存储方式为(k,v)，以k作为key，存储每一列的内容；
			
 
				+        data = pd.DataFrame.from_dict(d, orient='index').T  # 转换为数据框，并转置，不转置的话是横条数据
			
 
				+    return data
			
 
				+
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    project = 'loghubods'
			
 
				+    now_date=sys.argv[1]
			
 
				+    print("now date:", now_date)
			
 
				+    table = 'video_data_each_hour_dataset_24h_total_apptype'
			
 
				+    sql = "select apptype, videoid, preview人数, preview次数, view人数, view次数, play人数, play次数, share人数, share次数, 回流人数 from loghubods.video_data_each_hour_dataset_24h_total_apptype where dt="+now_date
			
 
				+    print(sql)
			
 
				+    data = exe_sql(sql)
			
 
				+    data.to_csv("./data/video_data_"+now_date, sep='\t', index=None) 
			
--- a/extract_cur_share_log.py
+++ b/extract_cur_share_log.py
@@ -0,0 +1,54 @@
 
				+#coding utf-8
			
 
				+from odps import ODPS
			
 
				+from config import set_config
			
 
				+import datetime
			
 
				+import pandas as pd
			
 
				+from collections import defaultdict
			
 
				+import sys
			
 
				+
			
 
				+config_ = set_config()
			
 
				+
			
 
				+odps = ODPS(
			
 
				+        access_id=config_.ODPS_CONFIG['ACCESSID'],
			
 
				+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
			
 
				+        project="loghubods",
			
 
				+        endpoint=config_.ODPS_CONFIG['ENDPOINT'])
			
 
				+
			
 
				+
			
 
				+def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000,
			
 
				+                       pool_maxsize=1000, pool_connections=1000):
			
 
				+    """
			
 
				+    从odps获取数据
			
 
				+    :param date: 日期 type-string '%Y%m%d'
			
 
				+    :param project: type-string
			
 
				+    :param table: 表名 type-string
			
 
				+    :param connect_timeout: 连接超时设置
			
 
				+    :param read_timeout: 读取超时设置
			
 
				+    :param pool_maxsize:
			
 
				+    :param pool_connections:
			
 
				+    :return: records
			
 
				+    """
			
 
				+    records = odps.read_table(name=table, partition='dt=%s' % date)
			
 
				+    return records
			
 
				+
			
 
				+def exe_sql(sql):    
			
 
				+    data = []
			
 
				+    with odps.execute_sql(sql).open_reader() as reader:
			
 
				+        d = defaultdict(list)  # collection默认一个dict
			
 
				+        for record in reader:
			
 
				+            for res in record:
			
 
				+                d[res[0]].append(res[1])  # 解析record中的每一个元组，存储方式为(k,v)，以k作为key，存储每一列的内容；
			
 
				+        data = pd.DataFrame.from_dict(d, orient='index').T  # 转换为数据框，并转置，不转置的话是横条数据
			
 
				+    return data
			
 
				+
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    project = 'loghubods'
			
 
				+    last7day=sys.argv[1]
			
 
				+    now_date=sys.argv[2]
			
 
				+    print("now date:", now_date)
			
 
				+    table = 'user_share_log'
			
 
				+    sql = "select machinecode, shareobjectid from loghubods.user_share_log_per5min where dt between '"+last7day+"' and '"+now_date+"' and topic='share';"
			
 
				+    print(sql)
			
 
				+    data = exe_sql(sql)
			
 
				+    data.to_csv("./data/user_cur_day_item_share_"+now_date, sep='\t') 
			
--- a/extract_share_log.py
+++ b/extract_share_log.py
@@ -0,0 +1,54 @@
 
				+#coding utf-8
			
 
				+from odps import ODPS
			
 
				+from config import set_config
			
 
				+import datetime
			
 
				+import pandas as pd
			
 
				+from collections import defaultdict
			
 
				+import sys
			
 
				+
			
 
				+config_ = set_config()
			
 
				+
			
 
				+odps = ODPS(
			
 
				+        access_id=config_.ODPS_CONFIG['ACCESSID'],
			
 
				+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
			
 
				+        project="loghubods",
			
 
				+        endpoint=config_.ODPS_CONFIG['ENDPOINT'])
			
 
				+
			
 
				+
			
 
				+def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000,
			
 
				+                       pool_maxsize=1000, pool_connections=1000):
			
 
				+    """
			
 
				+    从odps获取数据
			
 
				+    :param date: 日期 type-string '%Y%m%d'
			
 
				+    :param project: type-string
			
 
				+    :param table: 表名 type-string
			
 
				+    :param connect_timeout: 连接超时设置
			
 
				+    :param read_timeout: 读取超时设置
			
 
				+    :param pool_maxsize:
			
 
				+    :param pool_connections:
			
 
				+    :return: records
			
 
				+    """
			
 
				+    records = odps.read_table(name=table, partition='dt=%s' % date)
			
 
				+    return records
			
 
				+
			
 
				+def exe_sql(sql):    
			
 
				+    data = []
			
 
				+    with odps.execute_sql(sql).open_reader() as reader:
			
 
				+        d = defaultdict(list)  # collection默认一个dict
			
 
				+        for record in reader:
			
 
				+            for res in record:
			
 
				+                d[res[0]].append(res[1])  # 解析record中的每一个元组，存储方式为(k,v)，以k作为key，存储每一列的内容；
			
 
				+        data = pd.DataFrame.from_dict(d, orient='index').T  # 转换为数据框，并转置，不转置的话是横条数据
			
 
				+    return data
			
 
				+
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    project = 'loghubods'
			
 
				+    last7day=sys.argv[1]
			
 
				+    now_date=sys.argv[2]
			
 
				+    print("now date:", now_date)
			
 
				+    table = 'user_share_log'
			
 
				+    sql = "select machinecode, shareobjectid from loghubods.user_share_log where dt between '"+last7day+"' and '"+now_date+"' and topic='share';"
			
 
				+    print(sql)
			
 
				+    data = exe_sql(sql)
			
 
				+    data.to_csv("./data/user_item_share_"+now_date, sep='\t') 
			
--- a/extract_title_tag.py
+++ b/extract_title_tag.py
@@ -0,0 +1,78 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+import jieba
			
 
				+from jieba import analyse
			
 
				+import jieba.posseg as pseg
			
 
				+import re
			
 
				+ 
			
 
				+import os
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    #f1 = open(sys.argv[1])
			
 
				+    stop_words = set('')
			
 
				+    '''path = sys.argv[1]
			
 
				+    files_dir = os.listdir(path)
			
 
				+    #print(files_dir)
			
 
				+    for file_name in files_dir:
			
 
				+        if file_name.find('.txt')>-1:
			
 
				+            f1 = open(path+"/"+file_name)
			
 
				+            while True:
			
 
				+                file_line = f1.readline()
			
 
				+                if not file_line:
			
 
				+                    break
			
 
				+                file_line = file_line.strip()
			
 
				+                stop_words.add(file_line)
			
 
				+            f1.close()
			
 
				+    #print(len(stop_words))'''
			
 
				+    analyse.set_stop_words("all_stopword.txt")
			
 
				+    f = open(sys.argv[1])
			
 
				+    f3 = open(sys.argv[2], 'w')
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        line = line.strip()
			
 
				+        items = line.split("\t")
			
 
				+        if len(items)<2:
			
 
				+            continue
			
 
				+        vid = items[0]
			
 
				+        title = items[1] 
			
 
				+        #cut_info =  pseg.cut(title)
			
 
				+        #tfif_top =jieba.analyse.extract_tags(title,topK=3, withWeight=True, allowPOS=("nr","ns","n","nt","nw","nz","vn","v","a", "d", "f", "s","t", "PER", "LOC", "ORG"))
			
 
				+        tfif_top =jieba.analyse.extract_tags(title,topK=4, withWeight=True)
			
 
				+        #text_rank_top =jieba.analyse.textrank(title,topK=3,withWeight=True, allowPOS=("nr","ns","n","nt","nw","nz","vn","v","a", "d", "f", "s","t", "PER", "LOC", "ORG"))
			
 
				+        #print(title)
			
 
				+        #print(tfif_top)
			
 
				+        #print(text_rank_top)
			
 
				+        tags = []
			
 
				+        for word in tfif_top:
			
 
				+            #pattern = re.compile(r'^[-+]?[-0-9]\d*\.\d*|[-+]?\.?[0-9]\d*$')
			
 
				+            #result = pattern.match(word)
			
 
				+            #if result:
			
 
				+            #    continue
			
 
				+            if word[0].isdigit():
			
 
				+                continue
			
 
				+            try:
			
 
				+                vid = float(word[0])
			
 
				+                continue
			
 
				+            except:
			
 
				+                tags.append(str(word[0]))
			
 
				+                #print('%s %s' % (word[0], word[1]))
			
 
				+            #print('%s %s' % (word[0], word[1]))
			
 
				+        if len(tags)>0:
			
 
				+            #print(tags)
			
 
				+            vid_info=str(vid)+"\t"+",".join(tags)
			
 
				+            f3.write(vid_info.strip()+"\n")
			
 
				+            #print("--------------")
			
 
				+        '''cut_arr = []
			
 
				+        for cut_item in cut_info:
			
 
				+            #print("cut_item:", cut_item)
			
 
				+            if cut_item==' ':
			
 
				+                continue
			
 
				+            if cut_item in stop_words:
			
 
				+                continue
			
 
				+            cut_arr.append(cut_item)'''
			
 
				+        #vid_info = vid+'\t'+" ".join(cut_arr)
			
 
				+        #f3.write(vid_info.strip()+"\n")
			
 
				+    f3.close()
			
 
				+       
			
--- a/extract_user_action.py
+++ b/extract_user_action.py
@@ -0,0 +1,54 @@
 
				+#coding utf-8
			
 
				+from odps import ODPS
			
 
				+from config import set_config
			
 
				+import datetime
			
 
				+import pandas as pd
			
 
				+from collections import defaultdict
			
 
				+import sys
			
 
				+
			
 
				+config_ = set_config()
			
 
				+
			
 
				+odps = ODPS(
			
 
				+        access_id=config_.ODPS_CONFIG['ACCESSID'],
			
 
				+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
			
 
				+        project="loghubods",
			
 
				+        endpoint=config_.ODPS_CONFIG['ENDPOINT'])
			
 
				+
			
 
				+
			
 
				+def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000,
			
 
				+                       pool_maxsize=1000, pool_connections=1000):
			
 
				+    """
			
 
				+    从odps获取数据
			
 
				+    :param date: 日期 type-string '%Y%m%d'
			
 
				+    :param project: type-string
			
 
				+    :param table: 表名 type-string
			
 
				+    :param connect_timeout: 连接超时设置
			
 
				+    :param read_timeout: 读取超时设置
			
 
				+    :param pool_maxsize:
			
 
				+    :param pool_connections:
			
 
				+    :return: records
			
 
				+    """
			
 
				+    records = odps.read_table(name=table, partition='dt=%s' % date)
			
 
				+    return records
			
 
				+
			
 
				+def exe_sql(sql):    
			
 
				+    data = []
			
 
				+    with odps.execute_sql(sql).open_reader() as reader:
			
 
				+        d = defaultdict(list)  # collection默认一个dict
			
 
				+        for record in reader:
			
 
				+            for res in record:
			
 
				+                d[res[0]].append(res[1])  # 解析record中的每一个元组，存储方式为(k,v)，以k作为key，存储每一列的内容；
			
 
				+        data = pd.DataFrame.from_dict(d, orient='index').T  # 转换为数据框，并转置，不转置的话是横条数据
			
 
				+    return data
			
 
				+
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    project = 'loghubods'
			
 
				+    last7day=sys.argv[1]
			
 
				+    now_date=sys.argv[2]
			
 
				+    print("now date:", now_date)
			
 
				+    table = 'user_action_log_base'
			
 
				+    sql = "select  mid, videoid, businesstype, clienttimestamp, return from loghubods.user_action_log_base_addrealplay where dt between '"+last7day+"' and '"+now_date+"' and businesstype in ('videoShareFriend');"
			
 
				+    print(sql)
			
 
				+    data = exe_sql(sql)
			
 
				+    data.to_csv("./data/user_action_"+now_date, sep='\t') 
			
--- a/extract_video_info.py
+++ b/extract_video_info.py
@@ -0,0 +1,53 @@
 
				+#coding utf-8
			
 
				+from odps import ODPS
			
 
				+from config import set_config
			
 
				+import datetime
			
 
				+import pandas as pd
			
 
				+from collections import defaultdict
			
 
				+import sys
			
 
				+
			
 
				+config_ = set_config()
			
 
				+
			
 
				+odps = ODPS(
			
 
				+        access_id=config_.ODPS_CONFIG['ACCESSID'],
			
 
				+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],
			
 
				+        project="loghubods",
			
 
				+        endpoint=config_.ODPS_CONFIG['ENDPOINT'])
			
 
				+
			
 
				+
			
 
				+def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000,
			
 
				+                       pool_maxsize=1000, pool_connections=1000):
			
 
				+    """
			
 
				+    从odps获取数据
			
 
				+    :param date: 日期 type-string '%Y%m%d'
			
 
				+    :param project: type-string
			
 
				+    :param table: 表名 type-string
			
 
				+    :param connect_timeout: 连接超时设置
			
 
				+    :param read_timeout: 读取超时设置
			
 
				+    :param pool_maxsize:
			
 
				+    :param pool_connections:
			
 
				+    :return: records
			
 
				+    """
			
 
				+    records = odps.read_table(name=table, partition='dt=%s' % date)
			
 
				+    return records
			
 
				+
			
 
				+def exe_sql(sql):    
			
 
				+    data = []
			
 
				+    with odps.execute_sql(sql).open_reader() as reader:
			
 
				+        d = defaultdict(list)  # collection默认一个dict
			
 
				+        for record in reader:
			
 
				+            for res in record:
			
 
				+                d[res[0]].append(res[1])  # 解析record中的每一个元组，存储方式为(k,v)，以k作为key，存储每一列的内容；
			
 
				+        data = pd.DataFrame.from_dict(d, orient='index').T  # 转换为数据框，并转置，不转置的话是横条数据
			
 
				+    return data
			
 
				+
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    project = 'loghubods'
			
 
				+    now_date=sys.argv[1]
			
 
				+    print("now date:", now_date)
			
 
				+    table = 'video_data_each_hour_dataset_24h_total_apptype'
			
 
				+    sql = "select id, title, video_path, cover_img_path,self_cover_img_path,play_count, share_count, reported_count, favoriteds, total_time, tag_count,stage_recommend_examine_status, sensitive_status, new_share_image_path from videoods.wx_video_per1h where status=1 and examine_status=1 ";
			
 
				+    print(sql)
			
 
				+    data = exe_sql(sql)
			
 
				+    data.to_csv("./data/video_data_info_"+now_date, sep='\t', index=None) 
			
--- a/filter_video.py
+++ b/filter_video.py
@@ -0,0 +1,46 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+from utils import  filter_video_status
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    f = open(sys.argv[1])
			
 
				+    video_set = set('')
			
 
				+    video_rec = []
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+           break
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<3:
			
 
				+            continue
			
 
				+        vid = -1
			
 
				+        try:
			
 
				+            vid = int(items[2])
			
 
				+        except:
			
 
				+            continue
			
 
				+        if vid ==-1:
			
 
				+            continue
			
 
				+        video_set.add(vid)
			
 
				+        video_rec.append(line)
			
 
				+    f.close()
			
 
				+    print(len(video_set))
			
 
				+    video_list = list(video_set)
			
 
				+    left_video_list = filter_video_status(video_list)
			
 
				+    left_video_set = set(left_video_list)
			
 
				+    print(left_video_list)
			
 
				+    f2 = open(sys.argv[2], 'w')
			
 
				+    for line in video_rec:
			
 
				+        items  = line.strip().split("\t")
			
 
				+        if len(items)<3:
			
 
				+           continue
			
 
				+        vid = -1
			
 
				+        try:
			
 
				+            vid  = int(items[2])
			
 
				+        except:
			
 
				+            continue
			
 
				+        if vid not in left_video_set:
			
 
				+            continue
			
 
				+        f2.write(line)  
			
 
				+    f2.close()
			
 
				+    #print(len(left_video_list))
			
 
				+    
			
--- a/get3HotRecall.py
+++ b/get3HotRecall.py
@@ -0,0 +1,95 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+from operator import itemgetter
			
 
				+import json
			
 
				+import pandas as pd
			
 
				+from db_help import RedisHelper
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    #1.load data
			
 
				+    nowdate=sys.argv[1]
			
 
				+    vlog='0'
			
 
				+    love_live = 4
			
 
				+    data_path = "./data/3_days_video_data_"+nowdate
			
 
				+    f = open(data_path)
			
 
				+    #data = pd.read_csv(data_path, encoding="utf-8", sep='\t')
			
 
				+    #print(data)
			
 
				+    index = 0
			
 
				+    data_dict = {}
			
 
				+    redis_helper = RedisHelper()
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        if index==0:
			
 
				+            index += 1
			
 
				+            continue
			
 
				+        index +=1
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<11:
			
 
				+            continue
			
 
				+        vid = int(items[1])
			
 
				+        view_users = int(items[4])
			
 
				+        view_pv = int(items[5])
			
 
				+        play_users = int(items[6])
			
 
				+        play_pv = int(items[7])
			
 
				+        share_users = int(items[8])
			
 
				+        share_pv = int(items[9])
			
 
				+        return_users = int(items[10])
			
 
				+        #print(items)
			
 
				+        if vid not in data_dict:
			
 
				+            data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+        else:
			
 
				+            item_info = data_dict[vid]
			
 
				+            view_users = item_info[0]+view_users
			
 
				+            view_pv = item_info[1]+view_pv
			
 
				+            play_users = item_info[2]+play_pv
			
 
				+            share_users = item_info[3]+share_users
			
 
				+            share_pv = item_info[4]+share_pv
			
 
				+            return_users = item_info[5]+return_users 
			
 
				+            data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+    #print(data_dict.items())
			
 
				+    info_dict = {}
			
 
				+    data_path = "./data/3_days_recall_hot_"+nowdate
			
 
				+    f = open(data_path, 'w')
			
 
				+    for k, v in data_dict.items():
			
 
				+        #print(v)
			
 
				+        return_users = v[6]
			
 
				+        #print(return_users)
			
 
				+        view_users = v[0]
			
 
				+        view_pv = v[1]
			
 
				+        if view_pv<100 or view_users<10:
			
 
				+            continue
			
 
				+        share_pv = v[5]
			
 
				+        share_users = v[4]
			
 
				+        play_users = v[2]
			
 
				+        play_pv = v[3]
			
 
				+        #print("return_users:", return_users) 
			
 
				+        k_score = float(return_users)/(float(view_users)+30)
			
 
				+        #print(k_score)
			
 
				+        share_score = float(share_pv)/(float(view_pv)+100)
			
 
				+        backrate = float(return_users)/(float(view_users)+30)
			
 
				+        ctr_score = float(play_pv)/float(view_pv+100)
			
 
				+        if ctr_score<=0.5:
			
 
				+            continue
			
 
				+        #print(k, k_score, share_score*backrate, share_score, backrate) 
			
 
				+        score_info = [k_score, share_score*backrate, share_score, backrate, ctr_score, view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users]
			
 
				+        info_dict[k] = score_info
			
 
				+    sorted_v = sorted(info_dict.items(), key=lambda s:s[1][1], reverse=True) 
			
 
				+    print("sorted_v:", sorted_v[:30])
			
 
				+    recall_name = "hot_3day:"
			
 
				+    hot_recall_res = []
			
 
				+    for item in sorted_v[:5]:
			
 
				+        hot_recall_res.append((item[0], item[1][1]))
			
 
				+    if len(hot_recall_res)>5:
			
 
				+        score_info = json.dumps(hot_recall_res)
			
 
				+        print("score_info:", score_info)
			
 
				+        redis_helper.set_data_to_redis(recall_name, score_info, 60*60*24*15)
			
 
				+        f.write(recall_name+"\t"+score_info+"\n")
			
 
				+    f.close()
			
 
				+    #info_dict[k] = score_info
			
 
				+    #f.write(k+"\t"+score_info+"\n")
			
 
				+    #redis_helper.update_batch_setnx_key(info_dict, 60*60*24*15) 
			
 
				+    #f.close()
			
 
				+
			
 
				+     
			
--- a/get7HotRecall.py
+++ b/get7HotRecall.py
@@ -0,0 +1,94 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+from operator import itemgetter
			
 
				+import json
			
 
				+import pandas as pd
			
 
				+from db_help import RedisHelper
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    #1.load data
			
 
				+    nowdate=sys.argv[1]
			
 
				+    vlog='0'
			
 
				+    love_live = 4
			
 
				+    data_path = "./data/7_days_video_data_"+nowdate
			
 
				+    f = open(data_path)
			
 
				+    #data = pd.read_csv(data_path, encoding="utf-8", sep='\t')
			
 
				+    #print(data)
			
 
				+    index = 0
			
 
				+    data_dict = {}
			
 
				+    redis_helper = RedisHelper()
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        if index==0:
			
 
				+            index += 1
			
 
				+            continue
			
 
				+        index +=1
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<11:
			
 
				+            continue
			
 
				+        vid = int(items[1])
			
 
				+        view_users = int(items[4])
			
 
				+        view_pv = int(items[5])
			
 
				+        play_users = int(items[6])
			
 
				+        play_pv = int(items[7])
			
 
				+        share_users = int(items[8])
			
 
				+        share_pv = int(items[9])
			
 
				+        return_users = int(items[10])
			
 
				+        #print(items)
			
 
				+        if vid not in data_dict:
			
 
				+            data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+        else:
			
 
				+            item_info = data_dict[vid]
			
 
				+            view_users = item_info[0]+view_users
			
 
				+            view_pv = item_info[1]+view_pv
			
 
				+            play_users = item_info[2]+play_pv
			
 
				+            share_users = item_info[3]+share_users
			
 
				+            share_pv = item_info[4]+share_pv
			
 
				+            return_users = item_info[5]+return_users 
			
 
				+            data_dict[vid] = (view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users)
			
 
				+    #print(data_dict.items())
			
 
				+    info_dict = {}
			
 
				+    data_path = "./data/7_days_recall_hot_"+nowdate
			
 
				+    f = open(data_path, 'w')
			
 
				+    for k, v in data_dict.items():
			
 
				+        #print(v)
			
 
				+        return_users = v[6]
			
 
				+        #print(return_users)
			
 
				+        view_users = v[0]
			
 
				+        view_pv = v[1]
			
 
				+        if view_pv<100 or view_users<10:
			
 
				+            continue
			
 
				+        share_pv = v[5]
			
 
				+        share_users = v[4]
			
 
				+        play_users = v[2]
			
 
				+        play_pv = v[3]
			
 
				+        #print("return_users:", return_users) 
			
 
				+        k_score = float(return_users)/(float(view_users)+5)
			
 
				+        #print(k_score)
			
 
				+        share_score = float(share_pv)/(float(view_pv)+5)
			
 
				+        backrate = float(return_users)/(float(view_users)+5)
			
 
				+        ctr_score = float(play_pv)/float(view_pv+5)
			
 
				+        if ctr_score<=0.5:
			
 
				+            continue
			
 
				+        #print(k, k_score, share_score*backrate, share_score, backrate) 
			
 
				+        score_info = [k_score, share_score*backrate, share_score, backrate, ctr_score, view_users, view_pv, play_users, play_pv, share_users, share_pv, return_users]
			
 
				+        info_dict[k] = score_info
			
 
				+    sorted_v = sorted(info_dict.items(), key=lambda s:s[1][1], reverse=True) 
			
 
				+    print("sorted_v:", sorted_v[:100])
			
 
				+    recall_name = "hot_7day:"
			
 
				+    hot_recall_res = []
			
 
				+    for item in sorted_v[:100]:
			
 
				+        hot_recall_res.append((item[0], item[1][1]))
			
 
				+    if len(hot_recall_res)>10:
			
 
				+        score_info = json.dumps(hot_recall_res)
			
 
				+        print("score_info:", score_info)
			
 
				+        redis_helper.set_data_to_redis(recall_name, score_info, 60*60*24*15)
			
 
				+        f.write(recall_name+"\t"+score_info+"\n") 
			
 
				+    #info_dict[k] = score_info
			
 
				+    #f.write(k+"\t"+score_info+"\n")
			
 
				+    #redis_helper.update_batch_setnx_key(info_dict, 60*60*24*15) 
			
 
				+    f.close()
			
 
				+
			
 
				+     
			
--- a/getGoodRecall.py
+++ b/getGoodRecall.py
@@ -0,0 +1,54 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+from operator import itemgetter
			
 
				+import json
			
 
				+import pandas as pd
			
 
				+from db_help import RedisHelper
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    #1.load data
			
 
				+    nowdate=sys.argv[1]
			
 
				+    vlog='0'
			
 
				+    love_live = 4
			
 
				+    data_path = "./data/return_good_video_"+nowdate
			
 
				+    f = open(data_path)
			
 
				+    #data = pd.read_csv(data_path, encoding="utf-8", sep='\t')
			
 
				+    #print(data)
			
 
				+    index = 0
			
 
				+    data_dict = {}
			
 
				+    video_list = []
			
 
				+    redis_helper = RedisHelper()
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        if index==0:
			
 
				+            index += 1
			
 
				+            continue
			
 
				+        index +=1
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<4:
			
 
				+            continue
			
 
				+        vid = int(items[0])
			
 
				+        return_times = int(items[1])
			
 
				+        return_users = int(items[2])
			
 
				+        rov = float(items[3])
			
 
				+        video_list.append((vid,rov,return_times, return_users))
			
 
				+        #print(items)
			
 
				+    video_list = sorted(video_list , key=lambda tup: tup[1], reverse=True)
			
 
				+    f1 = open("./data/hot_good_recall_"+nowdate, 'w') 
			
 
				+    #sorted_v = sorted(info_dict.items(), key=lambda s:s[1][1], reverse=True) 
			
 
				+    print("sorted_v:", video_list[:50])
			
 
				+    recall_name = "hot_video:"
			
 
				+    if len(video_list)>0:
			
 
				+        score_info = json.dumps(video_list[:50])
			
 
				+        print("score_info:", score_info)
			
 
				+        redis_helper.set_data_to_redis(recall_name, score_info, 60*60*24*7)
			
 
				+        f1.write(recall_name+"\t"+score_info+"\n")
			
 
				+    f1.close()
			
 
				+    #info_dict[k] = score_info
			
 
				+    #f.write(k+"\t"+score_info+"\n")
			
 
				+    #redis_helper.update_batch_setnx_key(info_dict, 60*60*24*15) 
			
 
				+    #f.close()
			
 
				+
			
 
				+     
			
--- a/get_batch_sim_k.py
+++ b/get_batch_sim_k.py
@@ -0,0 +1,83 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+import faiss
			
 
				+import time
			
 
				+
			
 
				+def gen_i2i(index_item, embeddings,i2i):
			
 
				+    fw=open(i2i,"w")
			
 
				+    #print(i2i)
			
 
				+    start_time = time.time()
			
 
				+    #xb = embeddings
			
 
				+    xb=np.array(embeddings).astype('float32')
			
 
				+    #print(xb)
			
 
				+    #index.add(xb)
			
 
				+    dim, measure = 64, faiss.METRIC_L2  
			
 
				+    param =  'IVF100,PQ16'
			
 
				+    index = faiss.index_factory(dim, param, measure) 
			
 
				+    #print(index.is_trained)                          # 此时输出为False，因为倒排索引需要训练k-means， 
			
 
				+    index.train(xb) 
			
 
				+    end_time = time.time()
			
 
				+    print("time:", (end_time-start_time))
			
 
				+    #index=faiss.IndexFlatL2(100)
			
 
				+    #index.add(embed_matrix)
			
 
				+    #the candicate matrix is embed_matrix,but the search matrix is the same.
			
 
				+    #if the search vector is in the candicate matrix, the return idx>> the first is the search vector itself
			
 
				+    #if the search vector is not in the candicate matrix, the return idx>>the first is the index of the candicate
			
 
				+    batch = 10000
			
 
				+    num = len(embeddings)
			
 
				+    per_rounds = int(num/batch)+1
			
 
				+    #index=faiss.IndexFlatL2(64)
			
 
				+    index.add(xb)
			
 
				+    print("cost time:", (end_time-start_time))
			
 
				+    #distence_matrix,recall_list=index.search(xb, 20)
			
 
				+    #print(distence_matrix)
			
 
				+    #print(recall_list)
			
 
				+    for i in range(per_rounds):
			
 
				+        per_embedding = xb[i:(i+1)*batch]
			
 
				+        #print(per_embedding)
			
 
				+        #print(len(per_embedding))
			
 
				+        distence_matrix,recall_list=index.search(per_embedding, 20)
			
 
				+        #print("distence_matrix:", distence_matrix)
			
 
				+        #print("recall_list:", recall_list)
			
 
				+       	for idx,rec_arr in enumerate(recall_list):
			
 
				+            #print("idx:", idx)
			
 
				+            orgin_item=str(index_item[idx])
			
 
				+            #print("orgin_item:", orgin_item)
			
 
				+            #print("rec_arr:", rec_arr)
			
 
				+            recall_str=""
			
 
				+            for re_id in rec_arr[1:]:
			
 
				+                if re_id in index_item:
			
 
				+                    recall_idstr=str(index_item[re_id])
			
 
				+                    recall_str=recall_str+","+recall_idstr
			
 
				+            fw.write(orgin_item+"\t"+recall_str[1:]+"\n")
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    f = open(sys.argv[1])
			
 
				+    index = 0
			
 
				+    index_dict = {}
			
 
				+    index_arr = []
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+           break
			
 
				+        items = line.strip().split(" ")
			
 
				+        try:
			
 
				+            vid = int(items[0])
			
 
				+            vid_vec = eval(" ".join(items[1:]))
			
 
				+            vid_vec=np.array(vid_vec)
			
 
				+            float_arr = vid_vec.astype(np.float64).tolist()
			
 
				+            #print(float_arr)
			
 
				+            index_arr.append(float_arr)
			
 
				+            #index +=1
			
 
				+            index_dict[index] = vid
			
 
				+            index +=1
			
 
				+            #break
			
 
				+            #print(index_arr)
			
 
				+        except:
			
 
				+            #break
			
 
				+            continue
			
 
				+    f.close()
			
 
				+    #print(index_arr)
			
 
				+    gen_i2i(index_dict, index_arr, "i2i_result")
			
--- a/get_sim_k.py
+++ b/get_sim_k.py
@@ -0,0 +1,59 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+import faiss
			
 
				+import time
			
 
				+
			
 
				+def gen_i2i(index_item, embeddings,i2i):
			
 
				+    fw=open(i2i,"w")
			
 
				+    #print(i2i)
			
 
				+    embed_matrix=np.array(embeddings).astype('float32')
			
 
				+    #print(embed_matrix)
			
 
				+    index=faiss.IndexFlatL2(100)
			
 
				+    index.add(embed_matrix)
			
 
				+    #the candicate matrix is embed_matrix,but the search matrix is the same.
			
 
				+    #if the search vector is in the candicate matrix, the return idx>> the first is the search vector itself
			
 
				+    #if the search vector is not in the candicate matrix, the return idx>>the first is the index of the candicate
			
 
				+    distence_matrix,recall_list=index.search(embed_matrix, 20)
			
 
				+    for idx,rec_arr in enumerate(recall_list):
			
 
				+        #print("idx:", idx)
			
 
				+        orgin_item=str(index_item[idx])
			
 
				+        recall_str=""
			
 
				+        #rec_arr=[0 6 3 8 7 1]
			
 
				+        for re_id in rec_arr[1:]:
			
 
				+            recall_idstr=str(index_item[re_id])
			
 
				+            #print(recall_idstr)
			
 
				+            recall_str=recall_str+","+recall_idstr
			
 
				+        fw.write(orgin_item+"\t"+recall_str[1:]+"\n")
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    f = open(sys.argv[1])
			
 
				+    index = 0
			
 
				+    start_time = time.time()
			
 
				+    index_dict = {}
			
 
				+    index_arr = []
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+           break
			
 
				+        line = line.strip().replace("[","").replace("]","")
			
 
				+        #print(eval(line))
			
 
				+        items = line.split(" ")
			
 
				+        if len(items)<2:
			
 
				+            continue
			
 
				+        try:
			
 
				+           vid = int(items[0])
			
 
				+           #vid_vec = items[1:]
			
 
				+           print(line.split(" "))
			
 
				+           vid_vec = eval(" ".join(items[1:]))
			
 
				+           index_arr.append(vid_vec)
			
 
				+           index_dict[index] = vid
			
 
				+           index +=1
			
 
				+        except:
			
 
				+           continue
			
 
				+    f.close()
			
 
				+    print(len(index_arr))
			
 
				+    end_time = time.time()
			
 
				+    print("time:", (end_time-start_time))
			
 
				+    #gen_i2i(index_dict, index_arr, "i2i_result")
			
--- a/getmax_return.py
+++ b/getmax_return.py
@@ -0,0 +1,27 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    f = open(sys.argv[1])
			
 
				+    max_num = 0
			
 
				+    max_line = ""
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        items = line.strip().split("\t")
			
 
				+        if len(items)<3:
			
 
				+            continue
			
 
				+        app = items[0]
			
 
				+        vid = items[1]
			
 
				+        arr = eval(items[2])
			
 
				+        if app!="4":
			
 
				+            continue
			
 
				+        if max_num <int(arr[10]):
			
 
				+            max_num = int(arr[10])
			
 
				+            max_line = line
			
 
				+            #print(max_num)
			
 
				+        #max_num = max(max_num, int(arr[10]))
			
 
				+    print(max_num)
			
 
				+    print(max_line)
			
--- a/import_redist.py
+++ b/import_redist.py
@@ -0,0 +1,30 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+import json
			
 
				+from db_help import RedisHelper
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+     f2 = open(sys.argv[2], 'w')
			
 
				+     with  open(sys.argv[1]) as f:
			
 
				+         rec_json_list=json.load(f)
			
 
				+         #print(rec_json_list)
			
 
				+         import_data_dict = {}
			
 
				+         for line in rec_json_list:
			
 
				+             #print(line)
			
 
				+             rec_list = line[1][:10]
			
 
				+             rec_item_list = []
			
 
				+             for rec_item in rec_list:
			
 
				+                 rec_item_list.append((rec_item[0], round(rec_item[1],3)))
			
 
				+             res_info = json.dumps(rec_item_list)
			
 
				+             
			
 
				+             f2.write(str(line[0])+"\t"+res_info+"\n")
			
 
				+             key="sim_hot_"+str(line[0])
			
 
				+             import_data_dict[key] = res_info
			
 
				+         redis_helper = RedisHelper()
			
 
				+         redis_helper.update_batch_setnx_key(import_data_dict, 60*60*24*7)
			
 
				+         #con = redis_helper.connect()
			
 
				+         res = redis_helper.get_data_from_redis("sim_hot_14330133")
			
 
				+         print(res)
			
 
				+         f2.close()
			
 
				+     f.close()
			
 
				+   
			
--- a/predict.py
+++ b/predict.py
@@ -0,0 +1,56 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+from gensim import models
			
 
				+import numpy as np
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    #model = models.word2vec.Word2Vec.load('word2vec.txt')
			
 
				+    #print(model.wx)
			
 
				+    f1 = open('word2vec.txt')
			
 
				+    word_dict = {}
			
 
				+    while True:
			
 
				+        line = f1.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        items = line.strip().split(" ")
			
 
				+        if len(items)<64:
			
 
				+            continue
			
 
				+        arr = []
			
 
				+        for w in items[1:]:
			
 
				+            arr.append(float(w))
			
 
				+        word_dict[items[0]] = arr
			
 
				+    #print(word_dict)
			
 
				+    f  = open(sys.argv[1])
			
 
				+    num = 0
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        num = num+1
			
 
				+        if num == 1:
			
 
				+            continue
			
 
				+        items = line.split("\t")
			
 
				+        if len(items)<2:
			
 
				+            continue
			
 
				+        vid = items[0]
			
 
				+        title_arr = items[1].split(" ")
			
 
				+        title_info = np.zeros(64)
			
 
				+        word_len = 0
			
 
				+        for word in title_arr:
			
 
				+            if word in word_dict:
			
 
				+                 #print(title_info)
			
 
				+                 #print(word)
			
 
				+                 word_vec = word_dict[word]
			
 
				+                 #print(word_vec)
			
 
				+                 title_info = np.add(title_info, word_vec)
			
 
				+                 word_len +=1
			
 
				+        #print(title_info)
			
 
				+        title_info_list = []
			
 
				+        if word_len<=0:
			
 
				+            continue
			
 
				+        for j in title_info:
			
 
				+            title_info_list.append(j/word_len)
			
 
				+        #print("title_info_list:", title_info_list)
			
 
				+        print(vid,"\t",title_info_list)
			
 
				+        
			
 
				+    
			
--- a/process_video.py
+++ b/process_video.py
@@ -0,0 +1,53 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+import faiss
			
 
				+
			
 
				+
			
 
				+def gen_i2i(index_item, embeddings,i2i):
			
 
				+    fw=open(i2i,"w")
			
 
				+    #print(i2i)
			
 
				+    embed_matrix=np.array(embeddings).astype('float32')
			
 
				+    #print(embed_matrix)
			
 
				+    index=faiss.IndexFlatL2(100)
			
 
				+    index.add(embed_matrix)
			
 
				+    #the candicate matrix is embed_matrix,but the search matrix is the same.
			
 
				+    #if the search vector is in the candicate matrix, the return idx>> the first is the search vector itself
			
 
				+    #if the search vector is not in the candicate matrix, the return idx>>the first is the index of the candicate
			
 
				+    distence_matrix,recall_list=index.search(embed_matrix, 20)
			
 
				+    for idx,rec_arr in enumerate(recall_list):
			
 
				+        #print("idx:", idx)
			
 
				+        orgin_item=str(index_item[idx])
			
 
				+        recall_str=""
			
 
				+        #rec_arr=[0 6 3 8 7 1]
			
 
				+        for re_id in rec_arr[1:]:
			
 
				+            recall_idstr=str(index_item[re_id])
			
 
				+            #print(recall_idstr)
			
 
				+            recall_str=recall_str+","+recall_idstr
			
 
				+        fw.write(orgin_item+"\t"+recall_str[1:]+"\n")
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    f = open(sys.argv[1])
			
 
				+    index = 0
			
 
				+    index_dict = {}
			
 
				+    index_arr = []
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+           break
			
 
				+        line = line.strip()
			
 
				+        #print(line)
			
 
				+        items = line.split(" ")
			
 
				+        #print(int(items[0]))
			
 
				+        try:
			
 
				+            vid = int(items[0])
			
 
				+            print(line)
			
 
				+            #print(str(vid)+"\t"+items[1:])
			
 
				+            #print(index_arr)
			
 
				+        except:
			
 
				+            #print(int(items[0]))
			
 
				+            continue
			
 
				+    f.close()
			
 
				+    #print(len(index_arr))
			
 
				+    #gen_i2i(index_dict, index_arr, "i2i_result")
			
--- a/run.sh
+++ b/run.sh
@@ -0,0 +1,68 @@
 
				+#!/bin/bash
			
 
				+source ~/.bash_profile
			
 
				+source ~/.bashrc
			
 
				+conda activate tf2 
			
 
				+
			
 
				+cd /data/OffLineRec
			
 
				+
			
 
				+#1. download data
			
 
				+nowday=`date  +"%Y%m%d" -d -0days`
			
 
				+last7day=`date  +"%Y%m%d" -d -15days`
			
 
				+echo ${nowday} 
			
 
				+echo ${last7day}
			
 
				+mkdir -p ./data/
			
 
				+mkdir -p ./logs/
			
 
				+#conda activate py36
			
 
				+
			
 
				+nowhour=`date  +"%Y%m%d%H" -d -0days`
			
 
				+nowstart=$nowday'000000'
			
 
				+nowhour=${nowhour}'0000'
			
 
				+#nowhour='20230601140000'
			
 
				+echo $nowhour
			
 
				+echo ${nowstart}
			
 
				+echo ${last7day}
			
 
				+
			
 
				+#python extract_cur_share_log.py ${nowstart} ${nowhour}
			
 
				+#if [ $? -ne 0 ];
			
 
				+#then
			
 
				+   # msg = "[ERROR] simrecall extract_share_log"
			
 
				+   # sh sendmsg.sh  $nowday  $msg
			
 
				+   # echo "[ERROR] echo 'extract_share_log"
			
 
				+   # exit 255
			
 
				+#fi
			
 
				+
			
 
				+
			
 
				+python extract_share_log.py ${last7day} ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] simrecall extract_share_log"
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'extract_share_log"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+python filter_video.py ./data/user_item_share_${nowday} ./data/user_item_share_filter_${nowday}
			
 
				+
			
 
				+
			
 
				+exit
			
 
				+#nowday='20230505'
			
 
				+#2. cal i2i result
			
 
				+#python calI2I2.py ${nowday}  ${nowhour}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] simrecall calI2I.py"
			
 
				+    #sh sendmsg.sh $nowday $msg
			
 
				+    echo $msg
			
 
				+    exit -1
			
 
				+fi
			
 
				+
			
 
				+#3.import res
			
 
				+#python import_redist.py "./data/rec_result3_"${nowhour}".json"  "./data/redis_cls_"${nowhour}".json"
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] simhot recall import_redist.py"
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo $msg
			
 
				+    exit -1
			
 
				+fi
			
 
				+echo 'finish sorted'
			
--- a/run_3day.sh
+++ b/run_3day.sh
@@ -0,0 +1,95 @@
 
				+#!/bin/bash
			
 
				+source ~/.bash_profile
			
 
				+source ~/.bashrc
			
 
				+
			
 
				+#conda activate python36 
			
 
				+cd /data/rec_project/OffLineRec
			
 
				+#cd /home/rec/project/git_project/OffLineRec 
			
 
				+#cd /data/rec_project/OffLineRec
			
 
				+#1. download data
			
 
				+nowday=`date  +"%Y%m%d%H" -d -1hours`
			
 
				+echo ${nowday} 
			
 
				+#exit
			
 
				+#nowday='2023052413'
			
 
				+#3.import res
			
 
				+mkdir -p ./data/
			
 
				+
			
 
				+python export_3_day.py  ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] export_3_day.py"
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] export_3_day.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+#python export_7_day.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] export_7_day.py"
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo export_7_day.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+python export_hour_vid.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] export_7_day.py"
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo export_7_day.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+
			
 
				+python calHourData.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] export_hour_data.py"
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo export_7_day.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+
			
 
				+python calCtr1days.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] export_1_day.py"
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo export_7_day.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+python calCtr3days.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal3ctr "
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'cal3days"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+#python calCtr7days.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal 7 day ctr "
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'calCtr.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+python compose_score_3day.py ${nowday}
			
 
				+#python get3HotRecall.py ${nowday} 
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal  3 hot "
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'calCtr.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+python get3HotRecall.py ${nowday}
			
 
				+
			
 
				+#python get7HotRecall.py ${nowday}
			
 
				+
			
 
				+
			
 
				+echo "finish sorted"
			
 
				+
			
 
				+
			
--- a/run_ctr.sh
+++ b/run_ctr.sh
@@ -0,0 +1,61 @@
 
				+#!/bin/bash
			
 
				+source ~/.bash_profile
			
 
				+source ~/.bashrc
			
 
				+
			
 
				+#conda activate python36 
			
 
				+cd /data/rec_project/OffLineRec
			
 
				+#cd /home/rec/project/git_project/OffLineRec 
			
 
				+#cd /data/rec_project/OffLineRec
			
 
				+#1. download data
			
 
				+nowday=`date  +"%Y%m%d%H" -d -0days`
			
 
				+echo ${nowday} 
			
 
				+#3.import res
			
 
				+mkdir -p ./data/
			
 
				+
			
 
				+python export_vid.py  ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] sorted extract_vid_log"
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'extract_vid.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+python export_hour_vid.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] sorted extract_hour_log"
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'extract_hour_vid.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+python calCtr.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal  ctr "
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'calCtr.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+python calHourCtr.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal hour ctr "
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'calCtr.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+python compose_score2.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal compose_score "
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'compose_score.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+sh clean.sh
			
 
				+echo "finish sorted"
			
 
				+
			
 
				+
			
--- a/run_extract_tag.sh
+++ b/run_extract_tag.sh
@@ -0,0 +1,32 @@
 
				+#!/bin/bash
			
 
				+source ~/.bash_profile
			
 
				+source ~/.bashrc
			
 
				+
			
 
				+conda activate python36 
			
 
				+
			
 
				+#1. download data
			
 
				+nowday=`date  +"%Y%m%d" -d -0days`
			
 
				+last7day=`date  +"%Y%m%d" -d -1days`
			
 
				+echo ${nowday} 
			
 
				+#3.import res
			
 
				+mkdir -p ./data/
			
 
				+
			
 
				+python extract_user_action.py  ${last7day} ${nowday}
			
 
				+#if [ $? -ne 0 ];
			
 
				+#then
			
 
				+#    msg = "[ERROR] sorted extract_vid_log"
			
 
				+#    sh sendmsg.sh  $nowday  $msg
			
 
				+#    echo "[ERROR] echo 'extract_vid.py"
			
 
				+#    exit 255
			
 
				+#fi
			
 
				+
			
 
				+#python extract_video_info.py ${nowday}
			
 
				+#if [ $? -ne 0 ];
			
 
				+#then
			
 
				+#    msg = "[ERROR] cal ctr "
			
 
				+#    sh sendmsg.sh  $nowday  $msg
			
 
				+#    echo "[ERROR] echo 'calCtr.py"
			
 
				+#    exit 255
			
 
				+#fi
			
 
				+#echo "finish sorted"
			
 
				+
			
--- a/run_good_recall.sh
+++ b/run_good_recall.sh
@@ -0,0 +1,28 @@
 
				+#!/bin/bash
			
 
				+source ~/.bash_profile
			
 
				+source ~/.bashrc
			
 
				+
			
 
				+#conda activate python36 
			
 
				+cd /data/rec_project/OffLineRec
			
 
				+#cd /home/rec/project/git_project/OffLineRec 
			
 
				+#cd /data/rec_project/OffLineRec
			
 
				+#1. download data
			
 
				+nowday=`date  +"%Y%m%d" -d -1days`
			
 
				+echo ${nowday} 
			
 
				+#3.import res
			
 
				+mkdir -p ./data/
			
 
				+
			
 
				+python export_good_vid.py  ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] sorted extract_vid_log"
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'extract_vid.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+python getGoodRecall.py ${nowday} 
			
 
				+
			
 
				+echo "finish sorted"
			
 
				+
			
 
				+
			
--- a/run_hour.sh
+++ b/run_hour.sh
@@ -0,0 +1,65 @@
 
				+#!/bin/bash
			
 
				+source ~/.bash_profile
			
 
				+source ~/.bashrc
			
 
				+conda activate tf2
			
 
				+
			
 
				+cd /data/OffLineRec
			
 
				+
			
 
				+#1. download data
			
 
				+nowday=`date  +"%Y%m%d" -d -0days`
			
 
				+last7day=`date  +"%Y%m%d" -d -15days`
			
 
				+echo ${nowday} 
			
 
				+echo ${last7day}
			
 
				+mkdir -p ./data/
			
 
				+mkdir -p ./logs/
			
 
				+#conda activate py36
			
 
				+
			
 
				+nowhour=`date  +"%Y%m%d%H" -d -0days`
			
 
				+nowstart=$nowday'000000'
			
 
				+nowhour=${nowhour}'0000'
			
 
				+#nowhour='20230601140000'
			
 
				+echo $nowhour
			
 
				+echo ${nowstart}
			
 
				+echo ${last7day}
			
 
				+
			
 
				+python extract_cur_share_log.py ${nowstart} ${nowhour}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] simrecall extract_share_log"
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'extract_share_log"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+python filter_video.py ./data/user_cur_day_item_share_${nowhour} ./data/user_cur_day_item_share_filter_${nowhour}
			
 
				+
			
 
				+#python extract_share_log.py ${last7day} ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] simrecall extract_share_log"
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'extract_share_log"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+#nowday='20230505'
			
 
				+#2. cal i2i result
			
 
				+python calI2I2.py ${nowday}  ${nowhour}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] simrecall calI2I.py"
			
 
				+    #sh sendmsg.sh $nowday $msg
			
 
				+    echo $msg
			
 
				+    exit -1
			
 
				+fi
			
 
				+
			
 
				+#3.import res
			
 
				+python import_redist.py "./data/rec_result3_"${nowhour}".json"  "./data/redis_cls_"${nowhour}".json"
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] simhot recall import_redist.py"
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo $msg
			
 
				+    exit -1
			
 
				+fi
			
 
				+echo 'finish sorted'
			
--- a/run_type_ctr.sh
+++ b/run_type_ctr.sh
@@ -0,0 +1,63 @@
 
				+#!/bin/bash
			
 
				+source ~/.bash_profile
			
 
				+source ~/.bashrc
			
 
				+
			
 
				+#conda activate python36 
			
 
				+cd /data/rec_project/OffLineRec
			
 
				+#cd /home/rec/project/git_project/OffLineRec 
			
 
				+#cd /data/rec_project/OffLineRec
			
 
				+#1. download data
			
 
				+nowday=`date  +"%Y%m%d%H" -d -0days`
			
 
				+echo ${nowday} 
			
 
				+#3.import res
			
 
				+mkdir -p ./data/
			
 
				+#nowday=2023062720
			
 
				+
			
 
				+#python export_vid.py  ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] sorted extract_vid_log"
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'extract_vid.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+#python export_hour_vid.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] sorted extract_hour_log"
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'extract_hour_vid.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+python calCtr_Type.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal  ctr "
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'calCtr.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+python calHourTypeCtr.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal hour ctr "
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'calCtr.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+python compose_score_type.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal compose_score "
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'compose_score.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+sh clean.sh
			
 
				+echo "finish sorted"
			
 
				+
			
 
				+
			
--- a/sendmsg.sh
+++ b/sendmsg.sh
@@ -0,0 +1,36 @@
 
				+#!/bin/bash
			
 
				+nowdate=$1
			
 
				+content=${@:2}
			
 
				+content=${content//\<font color=\'red\'\>/}
			
 
				+content=${content//\<\/font\>\<\/br\>/}
			
 
				+content=${content//\ /}
			
 
				+echo "warn content is : $content"
			
 
				+
			
 
				+api=https://open.feishu.cn/open-apis/bot/v2/hook/00cf9bb4-ecea-4f0d-bf02-1a20592a916c #飞书机器人webhook 地址
			
 
				+
			
 
				+
			
 
				+curl -X POST \
			
 
				+  $api \
			
 
				+  -H 'Content-Type: application/json' \
			
 
				+  -d '{
			
 
				+    "msg_type": "post",
			
 
				+    "content": {
			
 
				+        "post": {
			
 
				+            "zh_cn": {
			
 
				+                "title": "recall alert",
			
 
				+                "content": [
			
 
				+                    [
			
 
				+                        {
			
 
				+                            "tag": "text",
			
 
				+                            "un_escape": true,
			
 
				+                            "text": "'$content'"
			
 
				+                        }
			
 
				+                    ],
			
 
				+                    [
			
 
				+
			
 
				+                    ]
			
 
				+                ]
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}'
			
--- a/test.py
+++ b/test.py
@@ -0,0 +1,30 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+import json
			
 
				+from db_help import RedisHelper
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+     '''f2 = open(sys.argv[2], 'w')
			
 
				+     with  open(sys.argv[1]) as f:
			
 
				+         rec_json_list=json.load(f)
			
 
				+         #print(rec_json_list)
			
 
				+         import_data_dict = {}
			
 
				+         for line in rec_json_list:
			
 
				+             #print(line)
			
 
				+             rec_list = line[1][:10]
			
 
				+             rec_item_list = []
			
 
				+             for rec_item in rec_list:
			
 
				+                 rec_item_list.append((rec_item[0], round(rec_item[1],3)))
			
 
				+             res_info = json.dumps(rec_item_list)
			
 
				+             
			
 
				+             f2.write(str(line[0])+"\t"+res_info+"\n")
			
 
				+             key="sim_hot_"+line[0]
			
 
				+             import_data_dict[key] = res_info'''
			
 
				+     redis_helper = RedisHelper()
			
 
				+         #redis_helper.update_batch_setnx_key(import_data_dict, 60*60*24*7)
			
 
				+         #con = redis_helper.connect()
			
 
				+     res = redis_helper.get_data_from_redis("k_p4:6399911")
			
 
				+     print(res)
			
 
				+     #f2.close()
			
 
				+     #f.close()
			
 
				+   
			
--- a/test.sh
+++ b/test.sh
@@ -0,0 +1,36 @@
 
				+#!/bin/bash
			
 
				+source ~/.bash_profile
			
 
				+source ~/.bashrc
			
 
				+
			
 
				+conda activate base 
			
 
				+cd /data/rec_project/OffLineRec
			
 
				+#1. download data
			
 
				+nowday=`date  +"%Y%m%d%H" -d -0days`
			
 
				+echo ${nowday} 
			
 
				+#3.import res
			
 
				+#mkdir -p ./data/
			
 
				+
			
 
				+nowday='2023051814'
			
 
				+#python export_hour_vid.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] sorted extract_hour_log"
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'extract_hour_vid.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+#python calHourCtr.py ${nowday}
			
 
				+
			
 
				+python compose_score.py ${nowday} 
			
 
				+:wq
			
 
				+
			
 
				+pytho calCtr.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal ctr "
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'calCtr.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+echo "finish sorted"
			
 
				+
			
--- a/test2.sh
+++ b/test2.sh
@@ -0,0 +1,95 @@
 
				+#!/bin/bash
			
 
				+source ~/.bash_profile
			
 
				+source ~/.bashrc
			
 
				+
			
 
				+#conda activate python36 
			
 
				+cd /data/rec_project/OffLineRec
			
 
				+#cd /home/rec/project/git_project/OffLineRec 
			
 
				+#cd /data/rec_project/OffLineRec
			
 
				+#1. download data
			
 
				+nowday=`date  +"%Y%m%d%H" -d -1hours`
			
 
				+echo ${nowday} 
			
 
				+#exit
			
 
				+#nowday='2023052413'
			
 
				+#3.import res
			
 
				+#mkdir -p ./data/
			
 
				+
			
 
				+#python export_3_day.py  ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] export_3_day.py"
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] export_3_day.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+#python export_7_day.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] export_7_day.py"
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo export_7_day.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+#python export_hour_vid.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] export_7_day.py"
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo export_7_day.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+
			
 
				+python calHourData.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] export_hour_data.py"
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo export_7_day.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+
			
 
				+#python calCtr1days.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] export_1_day.py"
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo export_7_day.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+#python calCtr3days.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal3ctr "
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'cal3days"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+#python calCtr7days.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal 7 day ctr "
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'calCtr.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+python compose_score_3day.py ${nowday}
			
 
				+#python get3HotRecall.py ${nowday} 
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal  3 hot "
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'calCtr.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+#python get3HotRecall.py ${nowday}
			
 
				+
			
 
				+#python get7HotRecall.py ${nowday}
			
 
				+
			
 
				+
			
 
				+echo "finish sorted"
			
 
				+
			
 
				+
			
--- a/test_3day.sh
+++ b/test_3day.sh
@@ -0,0 +1,95 @@
 
				+#!/bin/bash
			
 
				+source ~/.bash_profile
			
 
				+source ~/.bashrc
			
 
				+
			
 
				+#conda activate python36 
			
 
				+cd /data/rec_project/OffLineRec
			
 
				+#cd /home/rec/project/git_project/OffLineRec 
			
 
				+#cd /data/rec_project/OffLineRec
			
 
				+#1. download data
			
 
				+nowday=`date  +"%Y%m%d%H" -d -2hours`
			
 
				+echo ${nowday} 
			
 
				+#exit
			
 
				+#nowday='2023052413'
			
 
				+#3.import res
			
 
				+mkdir -p ./data/
			
 
				+
			
 
				+#python export_3_day.py  ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] export_3_day.py"
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] export_3_day.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+#python export_7_day.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] export_7_day.py"
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo export_7_day.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+#python export_hour_vid.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] export_7_day.py"
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo export_7_day.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+
			
 
				+#python calHourData.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] export_hour_data.py"
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo export_7_day.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+
			
 
				+#python calCtr1days.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] export_1_day.py"
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo export_7_day.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+python calCtr3days.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal3ctr "
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'cal3days"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+#python calCtr7days.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal 7 day ctr "
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'calCtr.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+python compose_score_3day.py ${nowday}
			
 
				+#python get3HotRecall.py ${nowday} 
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal  3 hot "
			
 
				+    #sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'calCtr.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+#python get3HotRecall.py ${nowday}
			
 
				+
			
 
				+#python get7HotRecall.py ${nowday}
			
 
				+
			
 
				+
			
 
				+echo "finish sorted"
			
 
				+
			
 
				+
			
--- a/test_ctr.sh
+++ b/test_ctr.sh
@@ -0,0 +1,63 @@
 
				+#!/bin/bash
			
 
				+source ~/.bash_profile
			
 
				+source ~/.bashrc
			
 
				+
			
 
				+#conda activate python36 
			
 
				+cd /data/rec_project/OffLineRec
			
 
				+#cd /home/rec/project/git_project/OffLineRec 
			
 
				+#cd /data/rec_project/OffLineRec
			
 
				+#1. download data
			
 
				+nowday=`date  +"%Y%m%d%H" -d -0days`
			
 
				+echo ${nowday} 
			
 
				+nowday='2023060410'
			
 
				+#3.import res
			
 
				+mkdir -p ./data/
			
 
				+
			
 
				+#python export_vid.py  ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] sorted extract_vid_log"
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'extract_vid.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+#python export_hour_vid.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] sorted extract_hour_log"
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'extract_hour_vid.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+
			
 
				+python calCtr.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal  ctr "
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'calCtr.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+python calHourCtr.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal hour ctr "
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'calCtr.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+python compose_score2.py ${nowday}
			
 
				+if [ $? -ne 0 ];
			
 
				+then
			
 
				+    msg = "[ERROR] cal compose_score "
			
 
				+    sh sendmsg.sh  $nowday  $msg
			
 
				+    echo "[ERROR] echo 'compose_score.py"
			
 
				+    exit 255
			
 
				+fi
			
 
				+
			
 
				+sh clean.sh
			
 
				+echo "finish sorted"
			
 
				+
			
 
				+
			
--- a/test_faiss.py
+++ b/test_faiss.py
@@ -0,0 +1,11 @@
 
				+import numpy as np
			
 
				+d = 64                                           # 向量维度
			
 
				+nb = 100000                                      # index向量库的数据量
			
 
				+nq = 10000                                       # 待检索query的数目
			
 
				+np.random.seed(1234)             
			
 
				+xb = np.random.random((nb, d)).astype('float32')
			
 
				+#xb[:, 0] += np.arange(nb) / 1000.                # index向量库的向量
			
 
				+xq = np.random.random((nq, d)).astype('float32')
			
 
				+#xq[:, 0] += np.arange(nq) / 1000.
			
 
				+
			
 
				+print(xb)
			
--- a/train_vec.sh
+++ b/train_vec.sh
@@ -0,0 +1,12 @@
 
				+cd /home/rec/project/git_project/OffLineRec
			
 
				+
			
 
				+#1.cut_title
			
 
				+nowday=`date  +"%Y%m%d" -d -0days`
			
 
				+nowday=20230512
			
 
				+
			
 
				+#python cut_title.py ./stopwords/ ./data/video_title_${nowday} ./data/video_cut_title_${nowday}
			
 
				+
			
 
				+#python word2vec.py 
			
 
				+
			
 
				+
			
 
				+#python predict.py ./data/word2vec_cut_title  > ./data/video_title_embedding
			
--- a/utils.py
+++ b/utils.py
@@ -0,0 +1,550 @@
 
				+# coding:utf-8

			
 
				+import pickle

			
 
				+import os

			
 
				+import requests

			
 
				+import json

			
 
				+import traceback

			
 
				+import pandas as pd

			
 
				+

			
 
				+from odps import ODPS

			
 
				+from config import set_config

			
 
				+from db_help import  MysqlHelper, RedisHelper

			
 
				+#from log import Log

			
 
				+

			
 
				+config_ = set_config()

			
 
				+#log_ = Log()

			
 
				+

			
 
				+

			
 
				+def execute_sql_from_odps(project, sql, connect_timeout=3000, read_timeout=500000,

			
 
				+                       pool_maxsize=1000, pool_connections=1000):

			
 
				+    odps = ODPS(

			
 
				+        access_id=config_.ODPS_CONFIG['ACCESSID'],

			
 
				+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],

			
 
				+        project=project,

			
 
				+        endpoint=config_.ODPS_CONFIG['ENDPOINT'],

			
 
				+        connect_timeout=connect_timeout,

			
 
				+        read_timeout=read_timeout,

			
 
				+        pool_maxsize=pool_maxsize,

			
 
				+        pool_connections=pool_connections

			
 
				+    )

			
 
				+    records = odps.execute_sql(sql=sql)

			
 
				+    return records

			
 
				+

			
 
				+

			
 
				+def get_data_from_odps(date, project, table, connect_timeout=3000, read_timeout=500000,

			
 
				+                       pool_maxsize=1000, pool_connections=1000):

			
 
				+    """

			
 
				+    从odps获取数据

			
 
				+    :param date: 日期 type-string '%Y%m%d'

			
 
				+    :param project: type-string

			
 
				+    :param table: 表名 type-string

			
 
				+    :param connect_timeout: 连接超时设置

			
 
				+    :param read_timeout: 读取超时设置

			
 
				+    :param pool_maxsize:

			
 
				+    :param pool_connections:

			
 
				+    :return: records

			
 
				+    """

			
 
				+    odps = ODPS(

			
 
				+        access_id=config_.ODPS_CONFIG['ACCESSID'],

			
 
				+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],

			
 
				+        project=project,

			
 
				+        endpoint=config_.ODPS_CONFIG['ENDPOINT'],

			
 
				+        connect_timeout=connect_timeout,

			
 
				+        read_timeout=read_timeout,

			
 
				+        pool_maxsize=pool_maxsize,

			
 
				+        pool_connections=pool_connections

			
 
				+    )

			
 
				+    records = odps.read_table(name=table, partition='dt=%s' % date)

			
 
				+    return records

			
 
				+

			
 
				+

			
 
				+def check_table_partition_exits(date, project, table, connect_timeout=3000, read_timeout=500000,

			
 
				+                                pool_maxsize=1000, pool_connections=1000):

			
 
				+    """

			
 
				+    判断表中是否存在这个分区

			
 
				+    :param date: 日期 type-string '%Y%m%d'

			
 
				+    :param project: type-string

			
 
				+    :param table: 表名 type-string

			
 
				+    :param connect_timeout: 连接超时设置

			
 
				+    :param read_timeout: 读取超时设置

			
 
				+    :param pool_maxsize:

			
 
				+    :param pool_connections:

			
 
				+    :return: records

			
 
				+    """

			
 
				+    odps = ODPS(

			
 
				+        access_id=config_.ODPS_CONFIG['ACCESSID'],

			
 
				+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],

			
 
				+        project=project,

			
 
				+        endpoint=config_.ODPS_CONFIG['ENDPOINT'],

			
 
				+        connect_timeout=connect_timeout,

			
 
				+        read_timeout=read_timeout,

			
 
				+        pool_maxsize=pool_maxsize,

			
 
				+        pool_connections=pool_connections

			
 
				+    )

			
 
				+    t = odps.get_table(name=table)

			
 
				+    return t.exist_partition(partition_spec=f'dt={date}')

			
 
				+

			
 
				+

			
 
				+'''def write_to_pickle(data, filename, filepath=config_.DATA_DIR_PATH):

			
 
				+    """

			
 
				+    将数据写入pickle文件中

			
 
				+    :param data: 数据

			
 
				+    :param filename: 写入的文件名

			
 
				+    :param filepath: 文件存放路径，默认为config_.DATA_DIR_PATH

			
 
				+    :return: None

			
 
				+    """

			
 
				+    if not os.path.exists(filepath):

			
 
				+        os.makedirs(filepath)

			
 
				+    file = os.path.join(filepath, filename)

			
 
				+    with open(file, 'wb') as wf:

			
 
				+        pickle.dump(data, wf)

			
 
				+

			
 
				+

			
 
				+def read_from_pickle(filename, filepath=config_.DATA_DIR_PATH):

			
 
				+    """

			
 
				+    从pickle文件读取数据

			
 
				+    :param filename: 文件名

			
 
				+    :param filepath: 文件存放路径，默认为config_.DATA_DIR_PATH

			
 
				+    :return: data

			
 
				+    """

			
 
				+    file = os.path.join(filepath, filename)

			
 
				+    if not os.path.exists(file):

			
 
				+        return None

			
 
				+    with open(file, 'rb') as rf:

			
 
				+        data = pickle.load(rf)

			
 
				+    return data '''

			
 
				+

			
 
				+

			
 
				+def send_msg_to_feishu(webhook, key_word, msg_text):

			
 
				+    """发送消息到飞书"""

			
 
				+    headers = {'Content-Type': 'application/json'}

			
 
				+    payload_message = {

			
 
				+        "msg_type": "text",

			
 
				+        "content": {

			
 
				+            "text": '{}: {}'.format(key_word, msg_text)

			
 
				+        }

			
 
				+    }

			
 
				+    response = requests.request('POST', url=webhook, headers=headers, data=json.dumps(payload_message))

			
 
				+    print(response.text)

			
 
				+

			
 
				+

			
 
				+def send_msg_to_feishu_new(webhook, key_word, title, msg_list):

			
 
				+    """发送消息到飞书"""

			
 
				+    headers = {'Content-Type': 'application/json'}

			
 
				+    content_list = [

			
 
				+        [

			
 
				+            {

			
 
				+                "tag": "text",

			
 
				+                "text": msg

			
 
				+            }

			
 
				+        ]

			
 
				+        for msg in msg_list

			
 
				+    ]

			
 
				+    payload_message = {

			
 
				+        "msg_type": "post",

			
 
				+        "content": {

			
 
				+            "post": {

			
 
				+                "zh_cn": {

			
 
				+                    "title": f"{key_word}: {title}",

			
 
				+                    "content": content_list,

			
 
				+                }

			
 
				+            }

			
 
				+        }

			
 
				+    }

			
 
				+    response = requests.request('POST', url=webhook, headers=headers, data=json.dumps(payload_message))

			
 
				+    print(response.text)

			
 
				+

			
 
				+

			
 
				+def request_post(request_url, request_data=None, **kwargs):

			
 
				+    """

			
 
				+    post 请求 HTTP接口

			
 
				+    :param request_url: 接口URL

			
 
				+    :param request_data: 请求参数

			
 
				+    :return: res_data json格式

			
 
				+    """

			
 
				+    try:

			
 
				+        response = requests.post(url=request_url, json=request_data, **kwargs)

			
 
				+        if response.status_code == 200:

			
 
				+            res_data = json.loads(response.text)

			
 
				+            return res_data

			
 
				+        else:

			
 
				+            #log_.info(f"response.status_code: {response.status_code}")

			
 
				+            return None

			
 
				+    except Exception as e:

			
 
				+        #log_.error('url: {}, exception: {}, traceback: {}'.format(request_url, e, traceback.format_exc()))

			
 
				+        send_msg_to_feishu(

			
 
				+            webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),

			
 
				+            key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),

			
 
				+            msg_text='rov-offline{} - 接口请求失败：{}, exception: {}'.format(config_.ENV_TEXT, request_url, e)

			
 
				+        )

			
 
				+        return None

			
 
				+

			
 
				+

			
 
				+def request_get(request_url):

			
 
				+    """

			
 
				+    get 请求 HTTP接口

			
 
				+    :param request_url: 接口URL

			
 
				+    :return: res_data json格式

			
 
				+    """

			
 
				+    try:

			
 
				+        response = requests.get(url=request_url)

			
 
				+        if response.status_code == 200:

			
 
				+            res_data = json.loads(response.text)

			
 
				+            return res_data

			
 
				+        else:

			
 
				+            #log_.info(f"response.status_code: {response.status_code}")

			
 
				+            return None

			
 
				+    except Exception as e:

			
 
				+        #log_.error('url: {}, exception: {}, traceback: {}'.format(request_url, e, traceback.format_exc()))

			
 
				+        send_msg_to_feishu(

			
 
				+            webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),

			
 
				+            key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),

			
 
				+            msg_text='rov-offline{} - 接口请求失败：{}, exception: {}'.format(config_.ENV_TEXT, request_url, e)

			
 
				+        )

			
 
				+        return None

			
 
				+

			
 
				+

			
 
				+def data_normalization(data):

			
 
				+    """

			
 
				+    对结果做归一化处理(Min-Max Normalization)，将分数控制在[0, 100]

			
 
				+    :param data: type-list

			
 
				+    :return: normal_data, type-list 归一化后的数据

			
 
				+    """

			
 
				+    x_max = max(data)

			
 
				+    x_min = min(data)

			
 
				+    normal_data = [(x-x_min)/(x_max-x_min)*100 for x in data]

			
 
				+    return normal_data

			
 
				+

			
 
				+

			
 
				+def filter_video_status(video_ids):

			
 
				+    """

			
 
				+    对视频状态进行过滤

			
 
				+    :param video_ids: 视频id列表 type-list

			
 
				+    :return: filtered_videos

			
 
				+    """

			
 
				+    i = 0

			
 
				+    while i < 3:

			
 
				+        try:

			
 
				+            mysql_helper = MysqlHelper(mysql_info=config_.FILTER_MYSQL_INFO)

			
 
				+            video_status_sql = "SELECT t1.id AS 'video_id', " \

			
 
				+                               "t1.transcode_status AS 'transcoding_status', " \

			
 
				+                               "t2.audit_status AS 'audit_status', " \

			
 
				+                               "t2.video_status AS 'open_status', " \

			
 
				+                               "t2.recommend_status AS 'applet_rec_status', " \

			
 
				+                               "t2.app_recommend_status AS 'app_rec_status', " \

			
 
				+                               "t3.charge AS 'payment_status', " \

			
 
				+                               "case when t4.max_validate_count is null then 0 else t4.max_validate_count end AS 'encryption_status' " \

			
 
				+                               "FROM longvideo.wx_video t1 " \

			
 
				+                               "LEFT JOIN longvideo.wx_video_status t2 ON t1.id= t2.video_id " \

			
 
				+                               "LEFT JOIN longvideo.wx_video_detail t3 ON t1.id= t3.video_id " \

			
 
				+                               "LEFT JOIN longvideo.wx_video_pwd t4 ON t1.id= t4.video_id"

			
 
				+            if len(video_ids) == 1:

			
 
				+                sql = "SELECT video_id " \

			
 
				+                      "FROM ({}) " \

			
 
				+                      "WHERE audit_status = 5 " \

			
 
				+                      "AND applet_rec_status IN (1, -6) " \

			
 
				+                      "AND open_status = 1 " \

			
 
				+                      "AND payment_status = 0 " \

			
 
				+                      "AND encryption_status != 5 " \

			
 
				+                      "AND transcoding_status = 3 " \

			
 
				+                      "AND video_id IN ({});".format(video_status_sql, video_ids[0])

			
 
				+                data = mysql_helper.get_data(sql=sql)

			
 
				+

			
 
				+            else:

			
 
				+                data = []

			
 
				+                for i in range(len(video_ids) // 200 + 1):

			
 
				+                    sql = "SELECT video_id " \

			
 
				+                          "FROM ({}) " \

			
 
				+                          "WHERE audit_status = 5 " \

			
 
				+                          "AND applet_rec_status IN (1, -6) " \

			
 
				+                          "AND open_status = 1 " \

			
 
				+                          "AND payment_status = 0 " \

			
 
				+                          "AND encryption_status != 5 " \

			
 
				+                          "AND transcoding_status = 3 " \

			
 
				+                          "AND video_id IN {};".format(video_status_sql, tuple(video_ids[i*200:(i+1)*200]))

			
 
				+                    select_res = mysql_helper.get_data(sql=sql)

			
 
				+                    if select_res is not None:

			
 
				+                        data += select_res

			
 
				+            filtered_videos = [int(temp[0]) for temp in data]

			
 
				+            return filtered_videos

			
 
				+

			
 
				+        except Exception as e:

			
 
				+            #log_.error(f"过滤失败, exception: {e}, traceback: {traceback.format_exc()}")

			
 
				+            send_msg_to_feishu(

			
 
				+                webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),

			
 
				+                key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),

			
 
				+                msg_text=f"rov-offline{config_.ENV_TEXT} - 过滤失败\n"

			
 
				+                         f"retry count: {i}\n"

			
 
				+                         f"exception: {e}\n"

			
 
				+                         f"traceback: {traceback.format_exc()}"

			
 
				+            )

			
 
				+            i += 1

			
 
				+            if i == 1:

			
 
				+                return video_ids

			
 
				+

			
 
				+

			
 
				+def filter_video_status_with_applet_rec(video_ids, applet_rec_status):

			
 
				+    """

			
 
				+    对视频状态进行过滤

			
 
				+    :param video_ids: 视频id列表 type-list

			
 
				+    :param applet_rec_status: 小程序推荐状态 -6:待推荐 1:普通推荐

			
 
				+    :return: filtered_videos

			
 
				+    """

			
 
				+    i = 0

			
 
				+    while i < 3:

			
 
				+        try:

			
 
				+            mysql_helper = MysqlHelper(mysql_info=config_.FILTER_MYSQL_INFO)

			
 
				+            video_status_sql = "SELECT t1.id AS 'video_id', " \

			
 
				+                               "t1.transcode_status AS 'transcoding_status', " \

			
 
				+                               "t2.audit_status AS 'audit_status', " \

			
 
				+                               "t2.video_status AS 'open_status', " \

			
 
				+                               "t2.recommend_status AS 'applet_rec_status', " \

			
 
				+                               "t2.app_recommend_status AS 'app_rec_status', " \

			
 
				+                               "t3.charge AS 'payment_status', " \

			
 
				+                               "case when t4.max_validate_count is null then 0 else t4.max_validate_count end AS 'encryption_status' " \

			
 
				+                               "FROM longvideo.wx_video t1 " \

			
 
				+                               "LEFT JOIN longvideo.wx_video_status t2 ON t1.id= t2.video_id " \

			
 
				+                               "LEFT JOIN longvideo.wx_video_detail t3 ON t1.id= t3.video_id " \

			
 
				+                               "LEFT JOIN longvideo.wx_video_pwd t4 ON t1.id= t4.video_id"

			
 
				+            if len(video_ids) == 1:

			
 
				+                sql = "SELECT video_id " \

			
 
				+                      "FROM ({}) " \

			
 
				+                      "WHERE audit_status = 5 " \

			
 
				+                      "AND applet_rec_status = {} " \

			
 
				+                      "AND open_status = 1 " \

			
 
				+                      "AND payment_status = 0 " \

			
 
				+                      "AND encryption_status != 5 " \

			
 
				+                      "AND transcoding_status = 3 " \

			
 
				+                      "AND video_id IN ({});".format(video_status_sql, applet_rec_status, video_ids[0])

			
 
				+                data = mysql_helper.get_data(sql=sql)

			
 
				+

			
 
				+            else:

			
 
				+                data = []

			
 
				+                for i in range(len(video_ids) // 200 + 1):

			
 
				+                    sql = "SELECT video_id " \

			
 
				+                          "FROM ({}) " \

			
 
				+                          "WHERE audit_status = 5 " \

			
 
				+                          "AND applet_rec_status = {} " \

			
 
				+                          "AND open_status = 1 " \

			
 
				+                          "AND payment_status = 0 " \

			
 
				+                          "AND encryption_status != 5 " \

			
 
				+                          "AND transcoding_status = 3 " \

			
 
				+                          "AND video_id IN {};".format(video_status_sql, applet_rec_status,

			
 
				+                                                       tuple(video_ids[i*200:(i+1)*200]))

			
 
				+                    select_res = mysql_helper.get_data(sql=sql)

			
 
				+                    if select_res is not None:

			
 
				+                        data += select_res

			
 
				+            filtered_videos = [int(temp[0]) for temp in data]

			
 
				+            return filtered_videos

			
 
				+

			
 
				+        except Exception as e:

			
 
				+            #log_.error(f"过滤失败, exception: {e}, traceback: {traceback.format_exc()}")

			
 
				+            send_msg_to_feishu(

			
 
				+                webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),

			
 
				+                key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),

			
 
				+                msg_text=f"rov-offline{config_.ENV_TEXT} - 过滤失败\n"

			
 
				+                         f"retry count: {i}\n"

			
 
				+                         f"exception: {e}\n"

			
 
				+                         f"traceback: {traceback.format_exc()}"

			
 
				+            )

			
 
				+            i += 1

			
 
				+            if i == 1:

			
 
				+                return video_ids

			
 
				+

			
 
				+

			
 
				+def filter_video_status_app(video_ids):

			
 
				+    """

			
 
				+    对视频状态进行过滤 - app

			
 
				+    :param video_ids: 视频id列表 type-list

			
 
				+    :return: filtered_videos

			
 
				+    """

			
 
				+    i = 0

			
 
				+    while i < 3:

			
 
				+        try:

			
 
				+            mysql_helper = MysqlHelper(mysql_info=config_.FILTER_MYSQL_INFO)

			
 
				+            video_status_sql = "SELECT t1.id AS 'video_id', " \

			
 
				+                               "t1.transcode_status AS 'transcoding_status', " \

			
 
				+                               "t2.app_audit_status AS 'app_audit_status', " \

			
 
				+                               "t2.original_status AS 'open_status', " \

			
 
				+                               "t2.recommend_status AS 'applet_rec_status', " \

			
 
				+                               "t2.app_recommend_status AS 'app_rec_status', " \

			
 
				+                               "t3.charge AS 'payment_status', " \

			
 
				+                               "case when t4.max_validate_count is null then 0 else t4.max_validate_count end AS 'encryption_status' " \

			
 
				+                               "FROM longvideo.wx_video t1 " \

			
 
				+                               "LEFT JOIN longvideo.wx_video_status t2 ON t1.id= t2.video_id " \

			
 
				+                               "LEFT JOIN longvideo.wx_video_detail t3 ON t1.id= t3.video_id " \

			
 
				+                               "LEFT JOIN longvideo.wx_video_pwd t4 ON t1.id= t4.video_id"

			
 
				+

			
 
				+            if len(video_ids) == 1:

			
 
				+                sql = "SELECT video_id " \

			
 
				+                      "FROM ({}) " \

			
 
				+                      "WHERE app_audit_status = 5 " \

			
 
				+                      "AND app_rec_status IN (1, -6, 10) " \

			
 
				+                      "AND open_status = 1 " \

			
 
				+                      "AND payment_status = 0 " \

			
 
				+                      "AND encryption_status != 5 " \

			
 
				+                      "AND transcoding_status = 3 " \

			
 
				+                      "AND video_id IN ({});".format(video_status_sql, video_ids[0])

			
 
				+                data = mysql_helper.get_data(sql=sql)

			
 
				+

			
 
				+            else:

			
 
				+                data = []

			
 
				+                for i in range(len(video_ids) // 200 + 1):

			
 
				+                    sql = "SELECT video_id " \

			
 
				+                          "FROM ({}) " \

			
 
				+                          "WHERE app_audit_status = 5 " \

			
 
				+                          "AND app_rec_status IN (1, -6, 10) " \

			
 
				+                          "AND open_status = 1 " \

			
 
				+                          "AND payment_status = 0 " \

			
 
				+                          "AND encryption_status != 5 " \

			
 
				+                          "AND transcoding_status = 3 " \

			
 
				+                          "AND video_id IN {};".format(video_status_sql, tuple(video_ids[i*200:(i+1)*200]))

			
 
				+                    select_res = mysql_helper.get_data(sql=sql)

			
 
				+                    if select_res is not None:

			
 
				+                        data += select_res

			
 
				+

			
 
				+            filtered_videos = [int(temp[0]) for temp in data]

			
 
				+            return filtered_videos

			
 
				+

			
 
				+        except Exception as e:

			
 
				+            #log_.error(f"过滤失败, exception: {e}, traceback: {traceback.format_exc()}")

			
 
				+            send_msg_to_feishu(

			
 
				+                webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),

			
 
				+                key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),

			
 
				+                msg_text=f"rov-offline{config_.ENV_TEXT} - 过滤失败\n"

			
 
				+                         f"retry count: {i}\n"

			
 
				+                         f"exception: {e}\n"

			
 
				+                         f"traceback: {traceback.format_exc()}"

			
 
				+            )

			
 
				+            i += 1

			
 
				+            if i == 1:

			
 
				+                return video_ids

			
 
				+

			
 
				+

			
 
				+def filter_shield_video(video_ids, shield_key_name_list):

			
 
				+    """

			
 
				+    过滤屏蔽视频视频

			
 
				+    :param video_ids: 需过滤的视频列表 type-list

			
 
				+    :param shield_key_name_list: 过滤视频 redis-key

			
 
				+    :return: filtered_videos  过滤后的列表  type-list

			
 
				+    """

			
 
				+    if len(video_ids) == 0:

			
 
				+        return video_ids

			
 
				+    # 根据Redis缓存中的数据过滤

			
 
				+    redis_helper = RedisHelper()

			
 
				+    for shield_key_name in shield_key_name_list:

			
 
				+        shield_videos_list = redis_helper.get_data_from_set(key_name=shield_key_name)

			
 
				+        if not shield_videos_list:

			
 
				+            continue

			
 
				+        shield_videos = [int(video) for video in shield_videos_list]

			
 
				+        video_ids = [int(video_id) for video_id in video_ids if int(video_id) not in shield_videos]

			
 
				+

			
 
				+    return video_ids

			
 
				+

			
 
				+

			
 
				+def filter_political_videos(video_ids):

			
 
				+    """

			
 
				+    过滤涉政视频

			
 
				+    :param video_ids: 需过滤的视频列表 type-list

			
 
				+    :return: filtered_video_ids  过滤后的列表  type-list

			
 
				+    """

			
 
				+    if len(video_ids) == 0:

			
 
				+        return video_ids

			
 
				+    # 根据Redis缓存中的数据过滤

			
 
				+    redis_helper = RedisHelper()

			
 
				+    political_key_name = config_.POLITICAL_VIDEOS_KEY_NAME

			
 
				+    political_videos_list = redis_helper.get_data_from_set(key_name=political_key_name)

			
 
				+    if not political_videos_list:

			
 
				+        return video_ids

			
 
				+    political_videos = [int(video) for video in political_videos_list]

			
 
				+    filtered_video_ids = [int(video_id) for video_id in video_ids if int(video_id) not in political_videos]

			
 
				+

			
 
				+    return filtered_video_ids

			
 
				+

			
 
				+

			
 
				+def update_video_w_h_rate(video_ids, key_name):

			
 
				+    """

			
 
				+    获取横屏视频的宽高比，并存入redis中 （width/height>1)

			
 
				+    :param video_ids: videoId列表 type-list

			
 
				+    :param key_name: redis key

			
 
				+    :return: None

			
 
				+    """

			
 
				+    # 获取数据

			
 
				+    if len(video_ids) == 1:

			
 
				+        sql = "SELECT id, width, height, rotate FROM longvideo.wx_video WHERE id = {};".format(video_ids[0])

			
 
				+    else:

			
 
				+        sql = "SELECT id, width, height, rotate FROM longvideo.wx_video WHERE id IN {};".format(tuple(video_ids))

			
 
				+

			
 
				+    mysql_helper = MysqlHelper(mysql_info=config_.MYSQL_INFO)

			
 
				+    data = mysql_helper.get_data(sql=sql)

			
 
				+

			
 
				+    # 更新到redis

			
 
				+    info_data = {}

			
 
				+    for video_id, width, height, rotate in data:

			
 
				+        if int(width) == 0 or int(height) == 0:

			
 
				+            continue

			
 
				+        # rotate 字段值为 90或270时，width和height的值相反

			
 
				+        if int(rotate) in (90, 270):

			
 
				+            w_h_rate = int(height) / int(width)

			
 
				+        else:

			
 
				+            w_h_rate = int(width) / int(height)

			
 
				+        if w_h_rate > 1:

			
 
				+            info_data[int(video_id)] = w_h_rate

			
 
				+    redis_helper = RedisHelper()

			
 
				+    # 删除旧数据

			
 
				+    redis_helper.del_keys(key_name=key_name)

			
 
				+    # 写入新数据

			
 
				+    if len(info_data) > 0:

			
 
				+        redis_helper.add_data_with_zset(key_name=key_name, data=info_data)

			
 
				+

			
 
				+

			
 
				+def data_check(project, table, dt):

			
 
				+    """检查数据是否准备好"""

			
 
				+    odps = ODPS(

			
 
				+        access_id=config_.ODPS_CONFIG['ACCESSID'],

			
 
				+        secret_access_key=config_.ODPS_CONFIG['ACCESSKEY'],

			
 
				+        project=project,

			
 
				+        endpoint=config_.ODPS_CONFIG['ENDPOINT'],

			
 
				+        connect_timeout=3000,

			
 
				+        read_timeout=500000,

			
 
				+        pool_maxsize=1000,

			
 
				+        pool_connections=1000

			
 
				+    )

			
 
				+

			
 
				+    try:

			
 
				+        check_res = check_table_partition_exits(date=dt, project=project, table=table)

			
 
				+        if check_res:

			
 
				+            sql = f'select * from {project}.{table} where dt = {dt}'

			
 
				+            with odps.execute_sql(sql=sql).open_reader() as reader:

			
 
				+                data_count = reader.count

			
 
				+        else:

			
 
				+            data_count = 0

			
 
				+    except Exception as e:

			
 
				+        data_count = 0

			
 
				+    return data_count

			
 
				+

			
 
				+

			
 
				+def get_feature_data(project, table, features, dt):

			
 
				+    """获取特征数据"""

			
 
				+    records = get_data_from_odps(date=dt, project=project, table=table)

			
 
				+    feature_data = []

			
 
				+    for record in records:

			
 
				+        item = {}

			
 
				+        for feature_name in features:

			
 
				+            item[feature_name] = record[feature_name]

			
 
				+        feature_data.append(item)

			
 
				+    feature_df = pd.DataFrame(feature_data)

			
 
				+    return feature_df

			
 
				+

			
 
				+

			
 
				+if __name__ == '__main__':

			
 
				+    # data_test = [9.20273281e+03, 7.00795065e+03, 5.54813112e+03, 9.97402494e-01, 9.96402495e-01, 9.96402494e-01]

			
 
				+    # data_normalization(data_test)

			
 
				+    # request_post(request_url=config_.NOTIFY_BACKEND_UPDATE_ROV_SCORE_URL, request_data={'videos': []})

			
 
				+    # video_ids = [110, 112, 113, 115, 116, 117, 8289883]

			
 
				+    # update_video_w_h_rate(video_ids=video_ids, key_name='')

			
 
				+    project = config_.PROJECT_24H_APP_TYPE

			
 
				+    table = config_.TABLE_24H_APP_TYPE

			
 
				+    dt = '2022080115'

			
 
				+    check_res = check_table_partition_exits(date=dt, project=project, table=table)

			
 
				+    print(check_res)

			
--- a/word2vec.py
+++ b/word2vec.py
@@ -0,0 +1,28 @@
 
				+#coding utf-8
			
 
				+import sys
			
 
				+from gensim.models import word2vec
			
 
				+
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    f = open(sys.argv[1])
			
 
				+    arr = []
			
 
				+    num = 0
			
 
				+    while True:
			
 
				+        line = f.readline()
			
 
				+        if not line:
			
 
				+            break
			
 
				+        num = num+1
			
 
				+        if  num == 1:
			
 
				+            continue
			
 
				+        items = line.strip().split("\t")
			
 
				+        #print(items)
			
 
				+        if len(items)<2:
			
 
				+            continue
			
 
				+        arr.append(items[1].split(" "))
			
 
				+        #print(arr)
			
 
				+    f.close()
			
 
				+    model = word2vec.Word2Vec(arr, vector_size=64, min_count=2,sg=1, workers=10)
			
 
				+    model.wv.save_word2vec_format('word2vec.txt',binary=False)
			
 
				+    #model.save('word2vec.model')
			
 
				+
			
 
				+