forked from dmlc/parameter_server
-
Notifications
You must be signed in to change notification settings - Fork 0
/
linear.proto
101 lines (80 loc) · 2.75 KB
/
linear.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
// configuration of linear methods
package PS.LM;
import "data/proto/data.proto";
import "learner/proto/bcd.proto";
import "filter/proto/filter.proto";
message Config {
optional DataConfig training_data = 1;
optional DataConfig validation_data = 2;
optional DataConfig model_output = 4;
optional DataConfig model_input = 5;
optional LossConfig loss = 10;
optional PenaltyConfig penalty = 11;
optional LearningRateConfig learning_rate = 12;
optional SGDConfig async_sgd = 17;
optional BCDConfig darlin = 15;
}
extend BCDConfig {
// Used by the trust region method. All changes of parameters will be bounded
// by *delta*. *delta* is updated according to the convergence, whose intial
// value is *delta_init_value* and maximal value is *delta_max_value*
optional double delta_init_value = 101 [default = 1];
optional double delta_max_value = 102 [default = 5];
// kkt_filter_threshold = max_gradient_violation / num_examples *
// kkt_filter_threshold_ratio. increasing this number reduces the effect of
// kkt filter.
optional double kkt_filter_threshold_ratio = 103 [default = 10];
}
message SGDConfig {
enum Algo {
STANDARD = 1;
FTRL = 2;
}
required Algo algo = 1;
// The size of minibatch
optional int32 minibatch = 2 [default = 1000];
optional int32 data_buf = 12 [default = 1000]; // in mb
optional bool ada_grad = 5 [default = true];
optional int32 max_delay = 4 [default = 0];
// The number of data passes
optional int32 num_data_pass = 11 [default = 1];
// in sec
optional int32 report_interval = 3 [default = 1];
// features which occurs <= *tail_feature_freq* will be filtered before
// training. it save both memory and bandwidth.
optional int32 tail_feature_freq = 6 [default = 0];
// It controls the countmin size. We filter the tail features by countmin, which
// is more efficient than hash, but still is the memory bottleneck for servers. A
// smaller ratio reduces the memory footprint, but may increase the size of
// filtered feature.
optional float countmin_n = 8 [default = 1e8];
optional int32 countmin_k = 7 [default = 2];
repeated FilterConfig push_filter = 13;
repeated FilterConfig pull_filter = 14;
}
message LossConfig {
enum Type {
SQUARE = 1;
LOGIT = 2;
HINGE = 3;
SQUARE_HINGE = 4;
}
required Type type = 1;
}
message PenaltyConfig {
enum Type {
L1 = 1; // lambda(0) * ||w||_1 + lambda(1) * ||w||_F^2
L2 = 2; // lambda(0) * ||w||_F^2
}
required Type type = 1;
repeated double lambda = 2;
}
message LearningRateConfig {
enum Type {
CONSTANT = 1; // = alpha
DECAY = 2; // = alpha / (beta + x), where x is user-defined, such as sqrt(iter)
}
optional Type type = 1;
optional double alpha = 2;
optional double beta = 3;
}