丁云鹏 7 ヶ月 前
コミット
8a68c13985

+ 10 - 11
src/main/java/com/tzld/piaoquan/recommend/similarity/word2vec/Demo2.java

@@ -1,7 +1,5 @@
 package com.tzld.piaoquan.recommend.similarity.word2vec;
 
-import org.nlpcn.commons.lang.util.FileFinder;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
@@ -31,7 +29,7 @@ public class Demo2 {
             e.printStackTrace();
         }
 
-//        List<String> tags = Lists.newArrayList(
+        String[] tags = {
 //                "风油精,妙用,学到,老歌,记住,教师节,建议,真情,人间,早安",
 //                "风油精,妙用,建议,学到,伟人,致敬,铜像,听的歌,送给,心情",
 //                "送给,学到,风油精,妙用,亲人,朋友,骗局,小心,注意,瓶子",
@@ -45,16 +43,20 @@ public class Demo2 {
 //                "教师节,知道,家人,朋友,妙用,好了吗,收下,骗局,小心,注意",
 //                "听的歌,送给,心情,建议,真情,人间,预防,可惜,知道,伟人",
 //                "送给,风油精,妙用,建议,亲人,骗局,小心,注意,瓶子,作用"
-//        );
+                "教师节,知道,家人,朋友,妙用,好了吗,收下,骗局,小心,注意"
+        };
         //String tags = "按摩,心脏,老歌,记住,骗局,小心,注意,建议,教师节,风油精";
-        String tags = "风油精";
+        //String tags = "风油精";
         String title = "\uD83D\uDD34少年夫妻老来伴,珍惜身边人";
+        Segment.getWords("init");
 
-        for (int j = 0; j < 1; j++) {
+        for (int j = 0; j < tags.length; j++) {
             long start = System.currentTimeMillis();
-            funcC34567ForTags(tags, title);
+            Double[] score = funcC34567ForTags(tags[j], title);
             long end = System.currentTimeMillis();
-            System.out.println("total cost : " + (end - start));
+            System.out.println(tags[j] + "||" + title + "||" + score[0] + "||" + score[1] + "||" + score[2] + "||" +
+                    "cost : "+(end - start));
+
         }
     }
 
@@ -67,7 +69,6 @@ public class Demo2 {
 
         List<String> titleWords = Segment.getWords(title);
         for (String tag : tagsList) {
-            long start = System.currentTimeMillis();
             if (title.contains(tag)) {
                 d1++;
                 d2.add(tag);
@@ -78,8 +79,6 @@ public class Demo2 {
                 d3 = score;
             }
             d4 += score;
-            long end = System.currentTimeMillis();
-            System.out.println(tag + " conceptSimilarity cost : " + (end - start));
         }
 
         d4 = (tagsList.length > 0) ? d4 / tagsList.length : d4;

+ 13 - 10
src/main/java/com/tzld/piaoquan/recommend/similarity/word2vec/Word2Vec.java

@@ -132,14 +132,14 @@ public class Word2Vec {
             int i;
             float[] tmp;
             for (i = 0; i < sentence1Words.size(); ++i) {
-                tmp = this.getWordVector((String) sentence1Words.get(i));
+                tmp = this.getWordVector(sentence1Words.get(i));
                 if (tmp != null) {
                     this.calSum(sen1vector, tmp);
                 }
             }
 
             for (i = 0; i < sentence2Words.size(); ++i) {
-                tmp = this.getWordVector((String) sentence2Words.get(i));
+                tmp = this.getWordVector(sentence2Words.get(i));
                 if (tmp != null) {
                     this.calSum(sen2vector, tmp);
                 }
@@ -150,6 +150,9 @@ public class Word2Vec {
                 len2 += (double) (sen2vector[i] * sen2vector[i]);
             }
 
+            if (len1 * len2 == 0) {
+                return 0.0F;
+            }
             return (float) ((double) this.calDist(sen1vector, sen2vector) / Math.sqrt(len1 * len2));
         } else {
             return 0.0F;
@@ -167,16 +170,16 @@ public class Word2Vec {
 
             int i;
             for (i = 0; i < sentence1Words.size(); ++i) {
-                if (this.getWordVector((String) sentence1Words.get(i)) != null) {
+                if (this.getWordVector(sentence1Words.get(i)) != null) {
                     ++count1;
-                    sum1 += this.calMaxSimilarity((String) sentence1Words.get(i), sentence2Words);
+                    sum1 += this.calMaxSimilarity(sentence1Words.get(i), sentence2Words);
                 }
             }
 
             for (i = 0; i < sentence2Words.size(); ++i) {
-                if (this.getWordVector((String) sentence2Words.get(i)) != null) {
+                if (this.getWordVector(sentence2Words.get(i)) != null) {
                     ++count2;
-                    sum2 += this.calMaxSimilarity((String) sentence2Words.get(i), sentence1Words);
+                    sum2 += this.calMaxSimilarity(sentence2Words.get(i), sentence1Words);
                 }
             }
 
@@ -199,16 +202,16 @@ public class Word2Vec {
                 int i;
                 float wordMaxSimi;
                 for (i = 0; i < sentence1Words.size(); ++i) {
-                    if (this.getWordVector((String) sentence1Words.get(i)) != null) {
-                        wordMaxSimi = this.calMaxSimilarity((String) sentence1Words.get(i), sentence2Words);
+                    if (this.getWordVector(sentence1Words.get(i)) != null) {
+                        wordMaxSimi = this.calMaxSimilarity(sentence1Words.get(i), sentence2Words);
                         sum1 += wordMaxSimi * weightVector1[i];
                         divide1 += weightVector1[i];
                     }
                 }
 
                 for (i = 0; i < sentence2Words.size(); ++i) {
-                    if (this.getWordVector((String) sentence2Words.get(i)) != null) {
-                        wordMaxSimi = this.calMaxSimilarity((String) sentence2Words.get(i), sentence1Words);
+                    if (this.getWordVector(sentence2Words.get(i)) != null) {
+                        wordMaxSimi = this.calMaxSimilarity(sentence2Words.get(i), sentence1Words);
                         sum2 += wordMaxSimi * weightVector2[i];
                         divide2 += weightVector2[i];
                     }