丁云鹏 7 ヶ月 前
コミット
6e168112fb

+ 36 - 23
src/main/java/com/tzld/piaoquan/recommend/similarity/word2vec/Demo.java

@@ -28,19 +28,42 @@ public class Demo {
             e.printStackTrace();
         }
 
+//        String[] s = new String[]{
+//                "🔴终于找到了这首歌,献给你!",
+//                "各位退休的同学,请听!",
+//                "这首歌太好听了,听醉了别怪我!",
+//                "老了真的很难!",
+//                "老同学在聚会上的演讲幽默是太实在了💢",
+//                "🔥🔥🔥一篇关于养老金问题的文章,请过来看看",
+//                "🔴老人考级的标准出台!符合6个条件的了不得🔥",
+//                "超级贵的景色,看过的彻底傻眼了📣",
+//                "她走了!泪目!留下了这段话,让人潸然泪下!",
+//                "🔴老同学❗️好久不见了,大家来看看吧!",
+//                "⭕谁写的?把人《一辈子》写明白了,给老友看看吧 ~!",
+//                "太美了,难得一见的美景~"
+//        };
+
         String[] s = new String[]{
-                "🔴终于找到了这首歌,献给你!",
-                "各位退休的同学,请听!",
-                "这首歌太好听了,听醉了别怪我!",
-                "老了真的很难!",
-                "老同学在聚会上的演讲幽默是太实在了💢",
-                "🔥🔥🔥一篇关于养老金问题的文章,请过来看看",
-                "🔴老人考级的标准出台!符合6个条件的了不得🔥",
-                "超级贵的景色,看过的彻底傻眼了📣",
-                "她走了!泪目!留下了这段话,让人潸然泪下!",
-                "🔴老同学❗️好久不见了,大家来看看吧!",
-                "⭕谁写的?把人《一辈子》写明白了,给老友看看吧 ~!",
-                "太美了,难得一见的美景~"
+                "九九重阳节快乐🔴",
+                "不结婚不生孩子,老了之后就是这个下场!",
+                "🔴瞠目结舌!朝鲜这个国家有多狠!",
+                "⭕九九重阳节,将出现“双月同天”的天文景象",
+                "🍑⭕《九九重阳节》",
+                "⭕这个建议实在是太及时了,忍不住为他鼓掌",
+                "⭕老外做梦也想不到,我国又出了一位像袁隆平一样的农业奇才",
+                "重阳节快乐🚩",
+                "🔴《知心朋友》一首好听的歌,说的好,",
+                "⚡为什么农历九月九为“重阳节”为什么又叫“老人节”",
+                "🔥📢🔴十分通透的一段话,写得太棒了,请朋友们收好!",
+                "🔴重阳将至,小宝贝送来最美最真挚的祝福,愿老友吉祥幸福",
+                "🔴老外惊掉下巴!中国竟在天上造机场,巫山机场太牛了",
+                "这就是人生,听完我哭了",
+                "⭕很多人都在找这首歌,太美了,太好听了!",
+                "⭕九九重阳节,将出现“双月同天”的珍贵天文景象",
+                "想念老同学",
+                "⭕今年的重阳节有什么讲究?看完你就明白了",
+                "⭕养老院的真实生活,看完让人泪目",
+                "早上好,有你相伴,幸运满满"
         };
 
 
@@ -50,23 +73,13 @@ public class Demo {
             words[i] = Segment.getWords(s[i]);
         }
 
-//快速句子相似度
-        System.out.println("快速句子相似度:");
         for (int i = 0; i < words.length - 1; i++) {
             for (int j = i + 1; j < words.length; j++) {
-                System.out.println(s[i] + "|||" + s[j] + ": " + vec.fastSentenceSimilarity(words[i], words[j]));
+                System.out.println(s[i] + "\t" + s[j] + "\t" + vec.fastSentenceSimilarity(words[i], words[j]) + "\t" + vec.sentenceSimilarity(words[i], words[j]));
             }
         }
 
 
-//句子相似度(所有词语权值设为1)
-        System.out.println("句子相似度:");
-        for (int i = 0; i < s.length - 1; i++) {
-            for (int j = i + 1; j < s.length; j++) {
-                System.out.println(s[i] + "|||" + s[j] + ": " + vec.sentenceSimilarity(words[i], words[j]));
-            }
-        }
-
 //句子相似度(名词、动词权值设为1,其他设为0.8)
 //        float[] weightArray1 = Segment.getPOSWeightArray(Segment.getPOS(s1));
 //        float[] weightArray2 = Segment.getPOSWeightArray(Segment.getPOS(s2));

+ 16 - 16
src/main/java/com/tzld/piaoquan/recommend/similarity/word2vec/Demo2.java

@@ -30,20 +30,19 @@ public class Demo2 {
         }
 
         String[] tags = {
-//                "风油精,妙用,学到,老歌,记住,教师节,建议,真情,人间,早安",
-//                "风油精,妙用,建议,学到,伟人,致敬,铜像,听的歌,送给,心情",
-//                "送给,学到,风油精,妙用,亲人,朋友,骗局,小心,注意,瓶子",
-//                "听的歌,送给,心情,建议,真情,人间,预防,可惜,知道,伟人",
-//                "风油精,妙用,建议,送给,学到,骗局,小心,注意,瓶子,作用",
-//                "知道,血管,水果,推荐,预防,可惜,真情,人间,小常识,老百姓",
-//                "按摩,心脏,老歌,记住,骗局,小心,注意,建议,教师节,风油精",
-//                "注意,骗局,小心,按摩,心脏,老歌,记住,家人,朋友,建议",
-//                "知道,小常识,老百姓,真情,人间,预防,可惜,血管,水果,推荐",
-//                "教师节,知道,老歌,记住,风油精,妙用,按摩,心脏,早安,创意",
-//                "教师节,知道,家人,朋友,妙用,好了吗,收下,骗局,小心,注意",
-//                "听的歌,送给,心情,建议,真情,人间,预防,可惜,知道,伟人",
-//                "送给,风油精,妙用,建议,亲人,骗局,小心,注意,瓶子,作用"
-                "教师节,知道,家人,朋友,妙用,好了吗,收下,骗局,小心,注意"
+                "风油精,妙用,学到,老歌,记住,教师节,建议,真情,人间,早安",
+                "风油精,妙用,建议,学到,伟人,致敬,铜像,听的歌,送给,心情",
+                "送给,学到,风油精,妙用,亲人,朋友,骗局,小心,注意,瓶子",
+                "听的歌,送给,心情,建议,真情,人间,预防,可惜,知道,伟人",
+                "风油精,妙用,建议,送给,学到,骗局,小心,注意,瓶子,作用",
+                "知道,血管,水果,推荐,预防,可惜,真情,人间,小常识,老百姓",
+                "按摩,心脏,老歌,记住,骗局,小心,注意,建议,教师节,风油精",
+                "注意,骗局,小心,按摩,心脏,老歌,记住,家人,朋友,建议",
+                "知道,小常识,老百姓,真情,人间,预防,可惜,血管,水果,推荐",
+                "教师节,知道,老歌,记住,风油精,妙用,按摩,心脏,早安,创意",
+                "教师节,知道,家人,朋友,妙用,好了吗,收下,骗局,小心,注意",
+                "听的歌,送给,心情,建议,真情,人间,预防,可惜,知道,伟人",
+                "送给,风油精,妙用,建议,亲人,骗局,小心,注意,瓶子,作用"
         };
         //String tags = "按摩,心脏,老歌,记住,骗局,小心,注意,建议,教师节,风油精";
         //String tags = "风油精";
@@ -55,7 +54,7 @@ public class Demo2 {
             Double[] score = funcC34567ForTags(tags[j], title);
             long end = System.currentTimeMillis();
             System.out.println(tags[j] + "||" + title + "||" + score[0] + "||" + score[1] + "||" + score[2] + "||" +
-                    "cost : "+(end - start));
+                    "cost : " + (end - start));
 
         }
     }
@@ -74,7 +73,8 @@ public class Demo2 {
                 d2.add(tag);
             }
             List<String> tagWords = Segment.getWords(tag);
-            double score = vec.fastSentenceSimilarity(tagWords, titleWords);
+            //double score = vec.fastSentenceSimilarity(tagWords, titleWords);
+            double score = vec.sentenceSimilarity(tagWords, titleWords);
             if (score > d3) {
                 d3 = score;
             }

+ 7 - 5
src/main/java/com/tzld/piaoquan/recommend/similarity/word2vec/Word2Vec.java

@@ -126,8 +126,8 @@ public class Word2Vec {
         } else if (!sentence1Words.isEmpty() && !sentence2Words.isEmpty()) {
             float[] sen1vector = new float[this.vec.getSize()];
             float[] sen2vector = new float[this.vec.getSize()];
-            double len1 = 0.0D;
-            double len2 = 0.0D;
+            float len1 = 0.0F;
+            float len2 = 0.0F;
 
             int i;
             float[] tmp;
@@ -146,14 +146,16 @@ public class Word2Vec {
             }
 
             for (i = 0; i < this.vec.getSize(); ++i) {
-                len1 += (double) (sen1vector[i] * sen1vector[i]);
-                len2 += (double) (sen2vector[i] * sen2vector[i]);
+                len1 += sen1vector[i] * sen1vector[i];
+                len2 += sen2vector[i] * sen2vector[i];
             }
 
             if (len1 * len2 == 0) {
                 return 0.0F;
             }
-            return (float) ((double) this.calDist(sen1vector, sen2vector) / Math.sqrt(len1 * len2));
+            float dist = this.calDist(sen1vector, sen2vector);
+            float sqrt = (float) Math.sqrt(len1 * len2);
+            return dist / sqrt;
         } else {
             return 0.0F;
         }