00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #include "common/fimcp_basic.hh"
00036
00037
00038 inline float nlogn ( float n ) {
00039
00040 if ( !n )
00041 return 0;
00042 return -log(n) * n;
00043 }
00044 inline int convex_function(int X, int pos_x, int Y, int pos_y, int precision) {
00045 float tot = X + Y;
00046 float base = nlogn(X/tot) + nlogn(Y/tot);
00047
00048 float pos = pos_x + pos_y;
00049 float neg = tot - pos;
00050 float calc = pos * ( nlogn( pos_x/pos) + nlogn( pos_y/pos) ) +
00051 neg * ( nlogn((X-pos_x)/neg) + nlogn((Y-pos_y)/neg) );
00052 if (isnan(calc)) calc = -1;
00053 return (int) (precision*(base - calc/tot));
00054 }
00055 inline bool convex_functionWrapper(int X,int l_x,int u_x,int Y,int l_y,int u_y,int precision, int tresholdLE) {
00056
00057 return (convex_function(X, 0, Y, u_x+u_y, precision) < tresholdLE
00058 && convex_function(X, u_x+u_y, Y, 0, precision) < tresholdLE);
00059 }
00060 #include "constraint_Fconvex_emulator.cpp"
00061
00062 #define PRECISION 100000
00063
00064
00065
00066
00067
00068
00069
00070 class Emulator_1support : public Fimcp_basic {
00071 protected:
00072
00073 IntVar score;
00074
00075 public:
00076 Emulator_1support(const Options_fimcp& opt) :
00077 Fimcp_basic(opt) {
00078 run(opt);
00079 }
00080
00081 Emulator_1support(bool share, Emulator_1support& s) :
00082 Fimcp_basic(share, s) {
00083 score.update(*this, share, s.score);
00084 }
00085
00086
00087 virtual Space* copy(bool share) {
00088 return new Emulator_1support(share,*this);
00089 }
00090
00091
00092 virtual void constrain(const Space& _best);
00093
00094 virtual void run(const Options_fimcp& opt);
00095
00096
00097 virtual void print(std::ostream&) const;
00098 };
00099
00100 void Emulator_1support::constrain(const Space& _best) {
00101 const Emulator_1support* best =
00102 dynamic_cast<const Emulator_1support*>(&_best);
00103 if (best == NULL)
00104 throw DynamicCastFailed("OptimizeSpace::constrain");
00105
00106 int posTot = 0; int pos = 0;
00107 int negTot = 0; int neg = 0;
00108 for (int t=0; t!=nr_t; t++) {
00109 posTot += classes[t];
00110 if (classes[t])
00111 pos += best->transactions[t].val();
00112 else
00113 neg += best->transactions[t].val();
00114 }
00115 negTot = nr_t - posTot;
00116 int treshold = convex_function(posTot, pos, negTot, neg, PRECISION);
00117
00118
00119 rel(*this, score, IRT_GR, treshold);
00120 }
00121
00122 void Emulator_1support::run(const Options_fimcp& opt) {
00123 const vector< vector<bool> > tdb = common_construction(opt);
00124 if (classes.size() == 0)
00125 throw Exception("Class label error", "no class labels found");
00126
00127
00128 if (opt.cclause()) {
00129
00130 coverage_clause(tdb);
00131 } else {
00132 IntArgs row_(nr_i);
00133 for (int t=0; t!=nr_t; t++) {
00134
00135 for (int i=0; i!=nr_i; i++)
00136 row_[i] = (1-tdb[t][i]);
00137
00138
00139
00140 linear(*this, row_, items, IRT_EQ, 0, transactions[t]);
00141 }
00142 }
00143
00144
00145 {
00146 IntArgs col_(nr_t);
00147 for (int i=0; i!=nr_i; i++) {
00148
00149 for (int t=0; t!=nr_t; t++)
00150 col_[t] = (1-tdb[t][i]);
00151
00152
00153
00154 linear(*this, col_, transactions, IRT_EQ, 0, items[i]);
00155 }
00156 }
00157
00158
00159 {
00160
00161 score = IntVar(*this, (int)(opt.delta()*PRECISION), Gecode::Int::Limits::max);
00162
00163
00164 for (int i=0; i!=nr_i; i++) {
00165
00166 int posTot = 0; int pos = 0;
00167 int negTot = 0; int neg = 0;
00168 for (int t=0; t!=nr_t; t++) {
00169 posTot += classes[t];
00170 if (classes[t])
00171 pos += tdb[t][i];
00172 else if (classes[t] == 0)
00173 neg += tdb[t][i];
00174 else {
00175 throw Exception("Class label error",
00176 "illegal class label found, only '1' (pos) or '0' (neg) allowed");
00177 }
00178 }
00179 negTot = nr_t - posTot;
00180
00181 BoolVarArgs col_pos(pos);
00182 BoolVarArgs col_neg(neg);
00183 for (int t=0; t!=nr_t; t++) {
00184 if (tdb[t][i] == 1) {
00185 if (classes[t] == 1)
00186 col_pos[--pos] = transactions[t];
00187 else
00188 col_neg[--neg] = transactions[t];
00189 }
00190 }
00191
00192
00193 FconvexBool_emulator::imply_convex(*this, items[i], posTot, col_pos, negTot, col_neg, IRT_GQ, score, PRECISION);
00194 }
00195 }
00196
00197
00198 linear(*this, items, IRT_GQ, 1);
00199
00200
00201 int freq = opt.getFreq(nr_t);
00202 linear(*this, transactions, IRT_GQ, freq);
00203
00204
00205 branch(*this, items, (IntVarBranch)opt.branching(), (IntValBranch)opt.branchval());
00206 }
00207
00208
00209 void Emulator_1support::print(std::ostream& os) const {
00210 if (print_itemsets == PRINT_NONE) {
00211 return;
00212 } else if (print_itemsets == PRINT_FIMI) {
00213
00214 for (int i=0; i!=nr_i; i++) {
00215 if (items[i].val() == 1)
00216 fprintf(solfile, "%i ", i);
00217 }
00218
00219 int posTot = 0; int pos = 0;
00220 int negTot = 0; int neg = 0;
00221 for (int t=0; t!=nr_t; t++) {
00222 posTot += classes[t];
00223 if (classes[t])
00224 pos += transactions[t].val();
00225 else
00226 neg += transactions[t].val();
00227 }
00228 negTot = nr_t - posTot;
00229 float val = convex_function(posTot, pos, negTot, neg, PRECISION)/(float)PRECISION;
00230 fprintf(solfile, "(%i) [%.5f]\n", pos+neg, val);
00231 } else if (print_itemsets == PRINT_CPVARS) {
00232
00233 os << "\tscore = " << score << std::endl;
00234 os << "\tI[] = " << items << std::endl;
00235 os << "\tT[] = " << transactions << std::endl;
00236 }
00237 }
00238
00239 int main(int argc, char* argv[]) {
00240 Options_fimcp opt(strchr(argv[0],'/')+1);
00241 opt.freq(1);
00242 opt.delta(0);
00243 opt.alpha(1);
00244 opt.description("This model finds discriminative patterns, using information gain as measure and the 1-support bound");
00245 opt.usage("-datafile example.txt -freq 1\n\
00246 -datafile example.txt\t itemset file where last item is the class: 0 or 1\n\
00247 -alpha 1\t use branch-and-bound search for top-1 itemset\n\
00248 -alpha 0\t to find all patternsets given tresholds:\n\
00249 \t -delta 0.50\t minimal measure value");
00250 opt.parse(argc, argv);
00251
00252 if (opt.alpha() == 0) {
00253 fprintf(stdout, "Running DF search for ");
00254 Script::run<Emulator_1support,DFS,Options_fimcp>(opt);
00255 } else {
00256 fprintf(stdout, "Running BAB search for ");
00257 Script::run<Emulator_1support,BAB,Options_fimcp>(opt);
00258 }
00259 return 0;
00260 }
00261
00262 void Fimcp_basic::run(const Options_fimcp&) {};