|
@@ -0,0 +1,192 @@
|
|
|
+package com.tzld.piaoquan.data.score.feature;
|
|
|
+
|
|
|
+
|
|
|
+import com.tzld.piaoquan.recommend.server.gen.recommend.BaseFeature;
|
|
|
+
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.List;
|
|
|
+
|
|
|
+/**
|
|
|
+ * Extract features from user, item & context info. Returns 64-bit murmurhash of feature string as results.
|
|
|
+ */
|
|
|
+public class BytesUtils {
|
|
|
+ private static final byte[] SEPARATOR = "_".getBytes();
|
|
|
+ private static final byte[] FEATURE_SEPARATOR = "#".getBytes();
|
|
|
+ private static final int MAX_FEATURE_BYTES_LENGTH = 512;
|
|
|
+ private static final long SEED = 11L;
|
|
|
+ private BytesGroup[] groups;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 一个种特殊的List,在尝试写入null的时候回默默地扔掉.
|
|
|
+ * @param <E> List的元素类型.
|
|
|
+ */
|
|
|
+ public static class NullRejectingArrayList<E> extends ArrayList<E> {
|
|
|
+ public NullRejectingArrayList(int capacity) {
|
|
|
+ super(capacity);
|
|
|
+ }
|
|
|
+
|
|
|
+ public NullRejectingArrayList() {
|
|
|
+ super();
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public boolean add(E e) {
|
|
|
+ return e != null && super.add(e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public BytesUtils(BytesGroup[] groups) {
|
|
|
+ this.groups = groups;
|
|
|
+ for (BytesGroup g : groups) {
|
|
|
+ byte[] buffer = prepareBuffer(g.getName(), g.getNameBytes());
|
|
|
+ groups[g.getId()].setBuffer(buffer);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public byte[] prepareBuffer(String name, byte[] nameBytes) {
|
|
|
+
|
|
|
+ byte[] buffer = new byte[MAX_FEATURE_BYTES_LENGTH];
|
|
|
+ System.arraycopy(nameBytes, 0, buffer, 0, nameBytes.length);
|
|
|
+ System.arraycopy(FEATURE_SEPARATOR, 0, buffer, nameBytes.length, 1);
|
|
|
+ return buffer;
|
|
|
+ }
|
|
|
+
|
|
|
+ public BaseFeature baseFea(byte[] buffer, int length) {
|
|
|
+ long hash = FeatureHash.MurmurHash64(buffer, 0, length, SEED);
|
|
|
+
|
|
|
+ // debug中查看 String fea = new String(buffer, 0, length);
|
|
|
+ // 初始化protobuf并赋值
|
|
|
+ BaseFeature.Builder tmp = BaseFeature.newBuilder();
|
|
|
+ tmp.setIdentifier(hash);
|
|
|
+ return tmp.build();
|
|
|
+ }
|
|
|
+
|
|
|
+ public BaseFeature makeFea(int id, byte[] value) {
|
|
|
+ byte[] buffer = groups[id].getBuffer();
|
|
|
+ if (buffer == null || value == null) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ final int nameLength = groups[id].getNameBytes().length + 1;
|
|
|
+ final int length = nameLength + value.length;
|
|
|
+ System.arraycopy(value, 0, buffer, nameLength, value.length);
|
|
|
+ return baseFea(buffer, length);
|
|
|
+ }
|
|
|
+
|
|
|
+ public BaseFeature makeFea(int id, final byte[] p1, final byte[] p2) {
|
|
|
+ byte[] buffer = groups[id].getBuffer();
|
|
|
+ if (buffer == null || p1 == null || p2 == null) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ final int nameLength = groups[id].getNameBytes().length + 1;
|
|
|
+ final int length = nameLength + p1.length + 1 + p2.length;
|
|
|
+
|
|
|
+ System.arraycopy(p1, 0, buffer, nameLength, p1.length);
|
|
|
+ System.arraycopy(SEPARATOR, 0, buffer, nameLength + p1.length, 1);
|
|
|
+ System.arraycopy(p2, 0, buffer, nameLength + p1.length + 1, p2.length);
|
|
|
+ return baseFea(buffer, length);
|
|
|
+ }
|
|
|
+
|
|
|
+ public BaseFeature makeFea(int id, final byte[] p1, final byte[] p2, final byte[] p3) {
|
|
|
+ byte[] buffer = groups[id].getBuffer();
|
|
|
+ if (buffer == null || p1 == null || p2 == null || p3 == null) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ final int nameLength = groups[id].getNameBytes().length + 1;
|
|
|
+ final int length = nameLength + p1.length + 1 + p2.length + 1 + p3.length;
|
|
|
+ System.arraycopy(p1, 0, buffer, nameLength, p1.length);
|
|
|
+ System.arraycopy(SEPARATOR, 0, buffer, nameLength + p1.length, 1);
|
|
|
+ System.arraycopy(p2, 0, buffer, nameLength + p1.length + 1, p2.length);
|
|
|
+ System.arraycopy(SEPARATOR, 0, buffer, nameLength + p1.length + 1 + p2.length, 1);
|
|
|
+ System.arraycopy(p3, 0, buffer, nameLength + p1.length + 1 + p2.length + 1, p3.length);
|
|
|
+
|
|
|
+ return baseFea(buffer, length);
|
|
|
+ }
|
|
|
+
|
|
|
+ public BaseFeature makeFea(int id, final byte[] p1, final byte[] p2, final byte[] p3, final byte[] p4) {
|
|
|
+ byte[] buffer = groups[id].getBuffer();
|
|
|
+ if (buffer == null || p1 == null || p2 == null || p3 == null || p4 == null) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ final int nameLength = groups[id].getNameBytes().length + 1;
|
|
|
+ final int length = nameLength + p1.length + 1 + p2.length + 1 + p3.length + 1 + p4.length;
|
|
|
+ System.arraycopy(p1, 0, buffer, nameLength, p1.length);
|
|
|
+ System.arraycopy(SEPARATOR, 0, buffer, nameLength + p1.length, 1);
|
|
|
+ System.arraycopy(p2, 0, buffer, nameLength + p1.length + 1, p2.length);
|
|
|
+ System.arraycopy(SEPARATOR, 0, buffer, nameLength + p1.length + 1 + p2.length, 1);
|
|
|
+ System.arraycopy(p3, 0, buffer, nameLength + p1.length + 1 + p2.length + 1, p3.length);
|
|
|
+ System.arraycopy(SEPARATOR, 0, buffer, nameLength + p1.length + 1 + p2.length + 1 + p3.length, 1);
|
|
|
+ System.arraycopy(p4, 0, buffer, nameLength + p1.length + 1 + p2.length + 1 + p3.length + 1, p4.length);
|
|
|
+
|
|
|
+ return baseFea(buffer, length);
|
|
|
+ }
|
|
|
+
|
|
|
+ public List<BaseFeature> makeFea(int id, byte[][] list) {
|
|
|
+ List<BaseFeature> result = new NullRejectingArrayList<BaseFeature>(list.length);
|
|
|
+ for (byte[] t: list) {
|
|
|
+ result.add(makeFea(id, t));
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ public List<BaseFeature> makeFea(int id, byte[][] left, byte[] right) {
|
|
|
+ List<BaseFeature> result = new NullRejectingArrayList<BaseFeature>(left.length);
|
|
|
+ for (byte[] l: left) {
|
|
|
+ result.add(makeFea(id, l, right));
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ public List<BaseFeature> makeFea(int id, byte[][] left, byte[] right1, byte[] right2) {
|
|
|
+ List<BaseFeature> result = new NullRejectingArrayList<BaseFeature>(left.length);
|
|
|
+ for (byte[] l: left) {
|
|
|
+ result.add(makeFea(id, l, right1, right2));
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ public List<BaseFeature> makeFea(int id, byte[][] left, byte[] right1, byte[] right2, byte[] right3) {
|
|
|
+ List<BaseFeature> result = new NullRejectingArrayList<BaseFeature>(left.length);
|
|
|
+ for (byte[] l: left) {
|
|
|
+ result.add(makeFea(id, l, right1, right2, right3));
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ public List<BaseFeature> makeFea(int id, byte[] left, byte[][] right) {
|
|
|
+ List<BaseFeature> result = new NullRejectingArrayList<BaseFeature>(right.length);
|
|
|
+ for (byte[] r : right) {
|
|
|
+ result.add(makeFea(id, left, r));
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ public List<BaseFeature> makeFea(int id, byte[] left1, byte[] left2, byte[][] right) {
|
|
|
+ List<BaseFeature> result = new NullRejectingArrayList<BaseFeature>(right.length);
|
|
|
+ for (byte[] r : right) {
|
|
|
+ result.add(makeFea(id, left1, left2, r));
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ public List<BaseFeature> makeFea(int id, byte[] left1, byte[] left2, byte[] left3, byte[][] right) {
|
|
|
+ List<BaseFeature> result = new NullRejectingArrayList<BaseFeature>(right.length);
|
|
|
+ for (byte[] r : right) {
|
|
|
+ result.add(makeFea(id, left1, left2, left3, r));
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ public List<BaseFeature> makeFea(int id, byte[][] left, byte[][] right) {
|
|
|
+ List<BaseFeature> result = new NullRejectingArrayList<BaseFeature>(left.length * right.length);
|
|
|
+ for (byte[] l: left) {
|
|
|
+ for (byte[] r: right) {
|
|
|
+ result.add(makeFea(id, l, r));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+}
|