zhangbo 9 maanden geleden
bovenliggende
commit
6413be47eb

+ 0 - 0
.gitignore


BIN
MyFirstUDF/src/.DS_Store


BIN
MyFirstUDF/src/main/.DS_Store


BIN
MyFirstUDF/src/main/java/.DS_Store


BIN
MyFirstUDF/src/main/java/com/.DS_Store


BIN
MyFirstUDF/src/main/java/com/alidata/.DS_Store


BIN
MyFirstUDF/src/main/java/com/alidata/odps/.DS_Store


BIN
MyFirstUDF/src/main/java/com/alidata/odps/udtf/.DS_Store


+ 218 - 0
similarity/pom.xml

@@ -0,0 +1,218 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>org.xm</groupId>
+    <artifactId>similarity</artifactId>
+    <version>1.2</version>
+    <parent>
+        <groupId>com.tzld.commons</groupId>
+        <artifactId>supom</artifactId>
+        <version>1.0.9</version>
+    </parent>
+
+    <properties>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <junit.version>4.11</junit.version>
+        <slf4j.version>1.7.7</slf4j.version>
+        <logback.version>1.1.2</logback.version>
+        <commons.lang3.version>3.3.1</commons.lang3.version>
+    </properties>
+
+    <dependencies>
+        <!-- 日志包 -->
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <version>${logback.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-core</artifactId>
+            <version>${logback.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-access</artifactId>
+            <version>${logback.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+            <version>${slf4j.version}</version>
+        </dependency>
+        <!-- 分词器 (可以替换为其他中文分词器)-->
+        <!--<dependency>-->
+            <!--<groupId>org.xm</groupId>-->
+            <!--<artifactId>xmnlp</artifactId>-->
+            <!--<version>1.3</version>-->
+        <!--</dependency>-->
+        <dependency>
+            <groupId>com.hankcs</groupId>
+            <artifactId>hanlp</artifactId>
+            <version>portable-1.3.4</version>
+        </dependency>
+
+        <dependency>
+            <groupId>org.mockito</groupId>
+            <artifactId>mockito-all</artifactId>
+            <version>1.9.5</version>
+        </dependency>
+        <dependency>
+            <groupId>org.hamcrest</groupId>
+            <artifactId>hamcrest-all</artifactId>
+            <version>1.3</version>
+        </dependency>
+        <dependency>
+            <groupId>args4j</groupId>
+            <artifactId>args4j</artifactId>
+            <version>2.0.16</version>
+        </dependency>
+        <dependency>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+            <version>13.0.1</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-lang3</artifactId>
+            <version>${commons.lang3.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>com.google.collections</groupId>
+            <artifactId>google-collections</artifactId>
+            <version>1.0</version>
+        </dependency>
+        <!-- 测试包 -->
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>${junit.version}</version>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+
+    <repositories>
+        <repository>
+            <id>cengtral</id>
+            <url>http://repo1.maven.org/maven2/</url>
+        </repository>
+        <!-- java.net maven repository, for example java mail -->
+        <repository>
+            <id>Java.Net</id>
+            <url>http://download.java.net/maven/2/</url>
+        </repository>
+        <repository>
+            <id>ansj-repo</id>
+            <url>http://maven.ansj.org/</url>
+        </repository>
+        <repository>
+            <id>info-bliki-repository</id>
+            <url>http://gwtwiki.googlecode.com/svn/maven-repository/</url>
+            <releases>
+                <enabled>true</enabled>
+            </releases>
+            <snapshots>
+                <enabled>false</enabled>
+            </snapshots>
+        </repository>
+        <repository>
+            <id>lib_id</id>
+            <url>file://${project.basedir}/lib</url>
+        </repository>
+    </repositories>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>3.2.4</version>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <relocations>
+                                <relocation>
+                                    <pattern>com.google.protobuf</pattern>
+                                    <shadedPattern>shaded.com.google.protobuf</shadedPattern>
+                                </relocation>
+                            </relocations>
+                            <minimizeJar>false</minimizeJar>
+                            <shadedArtifactAttached>true</shadedArtifactAttached>
+                            <artifactSet>
+                                <includes>
+                                    <!-- Include here the dependencies you
+                                        want to be packed in your fat jar -->
+                                    <include>*:*</include>
+                                </includes>
+                            </artifactSet>
+                            <filters>
+                                <filter>
+                                    <artifact>*:*</artifact>
+                                    <excludes>
+                                        <exclude>META-INF/*.SF</exclude>
+                                        <exclude>META-INF/*.DSA</exclude>
+                                        <exclude>META-INF/*.RSA</exclude>
+                                        <exclude>**/log4j.properties</exclude>
+                                    </excludes>
+                                </filter>
+                            </filters>
+                            <transformers>
+                                <transformer
+                                        implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+                                    <resource>reference.conf</resource>
+                                </transformer>
+                                <transformer
+                                        implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+                                    <resource>
+                                        META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+                                    </resource>
+                                </transformer>
+                            </transformers>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>net.alchim31.maven</groupId>
+                <artifactId>scala-maven-plugin</artifactId>
+                <version>3.3.2</version>
+                <executions>
+                    <execution>
+                        <id>scala-compile-first</id>
+                        <phase>process-resources</phase>
+                        <goals>
+                            <goal>compile</goal>
+                        </goals>
+                    </execution>
+                    <execution>
+                        <id>scala-test-compile-first</id>
+                        <phase>process-test-resources</phase>
+                        <goals>
+                            <goal>testCompile</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.8.1</version>
+                <configuration>
+                    <source>1.8</source>
+                    <target>1.8</target>
+                    <!--<compilerId>scala</compilerId>-->
+                    <!-- <compilerVersion>2.12.10</compilerVersion>-->
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+
+</project>

+ 32 - 0
similarity/src/test/java/org/xm/classification/NaiveBayesClassifierTest.java

@@ -0,0 +1,32 @@
+package org.xm.classification;
+
+import org.junit.Test;
+
+import java.io.File;
+
+/**
+ * @author xuming
+ */
+public class NaiveBayesClassifierTest {
+    @Test
+    public void test() throws Exception {
+//        NaiveBayesClassifier classifier = new NaiveBayesClassifier();
+//
+//        File samplePath = new File("data/test/testcorpus");
+//        for (File categoryPath : samplePath.listFiles()) {
+//            String category = categoryPath.getName();
+//            for (File f : categoryPath.listFiles()) {
+//                classifier.training(new Instance(category, f, "UTF-8"));
+//            }
+//        }
+//        classifier.save(new File("result.dat"));
+//        System.out.println("Finished!");
+//
+//        classifier.load(new File("result.dat"));
+//
+//        Instance doc = new Instance(null, new File("data/test/shanxiuniversity-part.txt"), "UTF-8");
+//        System.out.println("get category:" + classifier.getCategory(doc));
+
+    }
+
+}

+ 27 - 0
similarity/src/test/java/org/xm/similarity/util/FileUtilTest.java

@@ -0,0 +1,27 @@
+package org.xm.similarity.util;
+
+import org.junit.Test;
+
+import java.io.File;
+
+/**
+ * @author xuming
+ */
+public class FileUtilTest {
+    @Test
+    public void fileTest() throws Exception {
+//        int count = 0;
+//        File dir = new File("C:/windows");
+//        for (File a : dir.listFiles()) {
+//            if (a != null && a.isFile())
+//                count++;
+//        }
+//        System.out.println(count);
+    }
+
+    @Test
+    public void saveStringToFile() throws Exception {
+        FileUtil.saveStringToFile("hi nihao .", "C:/temp/temp.txt");
+    }
+
+}