Bladeren bron

Revert "init"

This reverts commit 7a2d6165e085c7f3f730a1b3bc1c7d3892798b16.
丁云鹏 6 maanden geleden
bovenliggende
commit
53222b9a6d

File diff suppressed because it is too large
+ 104 - 104
similarity/corpus/tianlongbabu-WordFrequencyStatistics-Result.txt


+ 3 - 3
similarity/data/WordFrequencyStatistics-Result.txt

@@ -3,14 +3,14 @@
 和	2
 的	2
 基础吧	1
-下雨天	1
 去	1
+下雨天	1
 代码	1
 下雨	1
 关于	1
 数据	1
-建模	1
 听	1
+建模	1
 是	1
 不算	1
 课程	1
@@ -22,6 +22,6 @@
 有	1
 了	1
 分子	1
-什么	1
 要	1
+什么	1
 。	1

+ 0 - 71
similarity/src/main/java/org/xm/Demo.java

@@ -1,71 +0,0 @@
-package org.xm;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * @author dyp
- */
-public class Demo {
-
-    public static void main(String[] args) {
-
-        funcC34567ForTags("1", "1");
-
-//        List<String> tags = Lists.newArrayList(
-//                "风油精,妙用,学到,老歌,记住,教师节,建议,真情,人间,早安",
-//                "风油精,妙用,建议,学到,伟人,致敬,铜像,听的歌,送给,心情",
-//                "送给,学到,风油精,妙用,亲人,朋友,骗局,小心,注意,瓶子",
-//                "听的歌,送给,心情,建议,真情,人间,预防,可惜,知道,伟人",
-//                "风油精,妙用,建议,送给,学到,骗局,小心,注意,瓶子,作用",
-//                "知道,血管,水果,推荐,预防,可惜,真情,人间,小常识,老百姓",
-//                "按摩,心脏,老歌,记住,骗局,小心,注意,建议,教师节,风油精",
-//                "注意,骗局,小心,按摩,心脏,老歌,记住,家人,朋友,建议",
-//                "知道,小常识,老百姓,真情,人间,预防,可惜,血管,水果,推荐",
-//                "教师节,知道,老歌,记住,风油精,妙用,按摩,心脏,早安,创意",
-//                "教师节,知道,家人,朋友,妙用,好了吗,收下,骗局,小心,注意",
-//                "听的歌,送给,心情,建议,真情,人间,预防,可惜,知道,伟人",
-//                "送给,风油精,妙用,建议,亲人,骗局,小心,注意,瓶子,作用"
-//        );
-        //String tags = "按摩,心脏,老歌,记住,骗局,小心,注意,建议,教师节,风油精";
-        String tags = "风油精";
-        String title = "\uD83D\uDD34少年夫妻老来伴,珍惜身边人";
-
-        for (int j = 0; j < 1; j++) {
-            long start = System.currentTimeMillis();
-            funcC34567ForTags(tags, title);
-            long end = System.currentTimeMillis();
-            System.out.println("total cost : " + (end - start));
-        }
-    }
-
-    public static Double[] funcC34567ForTags(String tags, String title) {
-        String[] tagsList = tags.split(",");
-        int d1 = 0;
-        List<String> d2 = new ArrayList<>();
-        double d3 = 0.0;
-        double d4 = 0.0;
-
-        for (String tag : tagsList) {
-            long start = System.currentTimeMillis();
-            if (title.contains(tag)) {
-                d1++;
-                d2.add(tag);
-            }
-            double score = Similarity.conceptSimilarity(tag, title);
-            if (score > d3) {
-                d3 = score;
-            }
-            d4 += score;
-            long end = System.currentTimeMillis();
-            System.out.println(tag + " conceptSimilarity cost : " + (end - start));
-        }
-
-        d4 = (tagsList.length > 0) ? d4 / tagsList.length : d4;
-
-        // 使用数组来返回多个值
-        Double[] result = {(double) d1, d3, d4};
-        return result;
-    }
-}

+ 5 - 18
similarity/src/main/java/org/xm/similarity/word/hownet/concept/ConceptSimilarity.java

@@ -51,32 +51,21 @@ public class ConceptSimilarity extends ConceptParser {
         if (word1.equals(word2)) {
             return 1.0;
         }
-        long start = System.currentTimeMillis();
         Collection<Concept> concepts1 = getConcepts(word1);
-        long time1 = System.currentTimeMillis();
-        System.out.println("concepts1 cost : " + (time1 - start));
-
         Collection<Concept> concepts2 = getConcepts(word2);
-        long time2 = System.currentTimeMillis();
-        System.out.println("concepts2 cost : " + (time2 - time1));
         // 未登录词需要计算组合概念
-        long time3 = time2;
         if (StringUtil.isBlank(concepts1) && StringUtil.isNotBlank(concepts2)) {
             concepts1 = autoCombineConcepts(word1, concepts2);
-            time3 = System.currentTimeMillis();
-            System.out.println("autoCombineConcepts1 cost : " + (time3 - time2));
-        } else if (StringUtil.isBlank(concepts2) && StringUtil.isNotBlank(concepts1)) {
+        }
+        if (StringUtil.isBlank(concepts2) && StringUtil.isNotBlank(concepts1)) {
             concepts2 = autoCombineConcepts(word2, concepts1);
-            time3 = System.currentTimeMillis();
-            System.out.println("autoCombineConcepts2 cost : " + (time3 - time2));
-        } else if (StringUtil.isBlank(concepts1) && StringUtil.isBlank(concepts2)) {
+        }
+        if (StringUtil.isBlank(concepts1) && StringUtil.isBlank(concepts2)) {
             concepts1 = autoCombineConcepts(word1, concepts2);
             concepts2 = autoCombineConcepts(word2, concepts1);
             // 修正
             concepts1 = autoCombineConcepts(word1, concepts2);
             concepts2 = autoCombineConcepts(word2, concepts1);
-            time3 = System.currentTimeMillis();
-            System.out.println("autoCombineConcepts3 cost : " + (time3 - time2));
         }
 
         // 处理所有可能组合的相似度
@@ -91,8 +80,6 @@ public class ConceptSimilarity extends ConceptParser {
                 }
             }
         }
-        long time4 = System.currentTimeMillis();
-        System.out.println("loop cost : " + (time4 - time3));
         return similarity;
     }
 
@@ -160,7 +147,7 @@ public class ConceptSimilarity extends ConceptParser {
         }
         // 过滤删除最后的1/3
         if ((newConcepts.size() > MAX_COMBINED_COUNT)) {
-            newConcepts.removeLast(newConcepts.size() / 3);
+            newConcepts.removeLast(MAX_COMBINED_COUNT / 3);
         }
         return newConcepts;
     }

Some files were not shown because too many files changed in this diff