35 #define LR_CLASS2LABEL(class) \ 36 ((class) == LR_CLASS_HAM ? "ham" : \ 37 (class) == LR_CLASS_SPAM ? "spam" : "unknown") 39 #define LR_LABEL2CLASS(label) \ 40 (STRCASEEQUAL((label), "spam") ? LR_CLASS_SPAM : \ 41 STRCASEEQUAL((label), "ham") ? LR_CLASS_HAM : \ 58 lr_evt_fill(evt, cmd, date,
class, fname)
67 memset(evt, 0,
sizeof (*evt));
70 strlcpy(evt->fname, fname, sizeof (evt->fname));
76 #define LR_EVT_FILL(evt, cmd, date, class, fname) \ 78 if ((evt) != NULL) { \ 79 memset((evt), 0, sizeof(*(evt))); \ 80 (evt)->date = (date); \ 81 (evt)->class = (class); \ 82 strlcpy((evt)->fname, (fname), sizeof((evt)->fname)); \ 98 #define PILE_INIT(p) \ 101 memset(p, 0, sizeof(p)); \ 112 static bool pile_sort(
pile_T * pile);
121 #define CLI_CLASSIFY 1 145 #define CLI_OPT_INIT {FALSE, FALSE, 0.2, 0.05, NULL, -1., -1., FALSE} 149 static bool decode_lr_options(
lr_opts_T * lrOpts,
char *optarg);
151 static int cli_lr_learn(
char *fileIn,
char *dataFile,
cli_opt_T * opt);
152 static int cli_lr_classify(
char *fileIn,
char *dataFile);
154 static int cli_lr_simul(
char *fileIn,
char *dataFile,
cli_opt_T * opt);
157 static double lrate = 0.004;
165 char *fileData =
"/tmp/lr.txt";
177 const char *args =
"hi:d:lat:rR:m:x:o:";
181 while ((c = getopt(argc, argv, args)) != -1)
211 cliopt.
pmiss = atof(optarg);
215 lrate = atof(optarg);
222 if (!decode_lr_options(&lrOpts, optarg))
233 while (io < argc && *argv[io] ==
'-')
256 return cli_lr_learn(fileIn, fileData, &cliopt);
263 return cli_lr_classify(fileIn, fileData);
271 return cli_lr_learn(fileIn, fileData, &cliopt);
273 return cli_lr_classify(fileIn, fileData);
285 cli_lr_classify(fileIn, dataFile)
289 char *
id =
"000000.000";
291 FILE *fdata = NULL, *fin = NULL;
301 fin = fopen(fileIn,
"r");
317 int nham = 0, nspam = 0;
324 memset(&cargs, 0,
sizeof (cargs));
325 memset(&margs, 0,
sizeof (margs));
326 memset(&mscore, 0,
sizeof (mscore));
328 memset(stime, 0,
sizeof (stime));
329 memset(sclass, 0,
sizeof (sclass));
330 memset(sfile, 0,
sizeof (sfile));
332 while (fin != NULL && fscanf(fin,
"%s %s %s", stime, sclass, sfile) == 3)
342 ZE_MessageInfo(10,
"%s judge=%-4s class=%-4s score=%.4f prob=%.4f",
344 spam ?
"spam" :
"ham",
345 mscore.
odds > 0.0 ?
"spam" :
"ham",
363 cli_lr_learn(fileIn, dataFile, cliopt)
368 char *
id =
"000000.000";
370 FILE *fdata = NULL, *fin = NULL;
373 double pmiss = cliopt->
pmiss;
390 fin = fopen(fileIn,
"r");
414 int nham = 0, nspam = 0;
415 int pham = 0, pspam = 0;
422 time_t t_last, t_now;
430 if ((env = getenv(
"LRRESAMPLEWIN")) != NULL)
440 memset(&mscore, 0,
sizeof (mscore));
441 memset(&cargs, 0,
sizeof (cargs));
442 memset(&margs, 0,
sizeof (margs));
444 memset(stime, 0,
sizeof (stime));
445 memset(sclass, 0,
sizeof (sclass));
446 memset(sfile, 0,
sizeof (sfile));
456 if (fscanf(fin,
"%s %s %s", stime, sclass, sfile) != 3)
472 ok =
lr_learn(
id, fname, &cargs, &margs, &mscore, spam);
476 "%s classification : %8.4f %.8f judge=%-4s class=%-4s" 477 " learn=%s query=%s features=%d" 478 " score=%g prob=%.6f",
489 memset(&evt, 0,
sizeof (evt));
495 pspam = nspam % nbMax;
510 if (nspam == 0 || nham == 0)
517 i = random() % nbMax;
519 i = random() % nspam;
520 fname = eSpam[i].
fname;
524 i = random() % nbMax;
527 fname = eHam[i].
fname;
531 lr_learn(
id, fname, &cargs, &margs, &mscore, spam);
558 learn_callback(i, cargs, margs)
566 static int srate = 2;
567 static double ri = 0.5, rf = 0.004, teta = -0.005;
573 if ((s = getenv(
"LRATEDEFS")) != NULL)
579 memset(argv, 0,
sizeof (argv));
580 strlcpy(ebuf, s,
sizeof (ebuf));
583 srate = atof(argv[0]);
592 teta = atof(argv[3]);
603 r = ri / sqrt(i + 1);
608 r = (ri - rf) * exp(teta * i) + rf;
632 if (pile->n >=
DPILE - 1)
636 evt->
serial = pile->serial;
637 pile->p[pile->n] = *evt;
654 *evt = pile->p[pile->n - 1];
661 pile_shift(pile, evt)
675 for (i = 0; i < pile->n - 1; i++)
676 pile->p[i] = pile->p[i + 1];
684 pile_check_top(pile, evt)
694 *evt = pile->p[pile->n - 1];
700 pile_check_bottom(pile, evt)
716 lrevtcmp(
const void *ea,
const void *eb)
737 qsort(pile->p, pile->n, sizeof (
learn_evt_T), lrevtcmp);
747 decode_lr_options(lrOpts, optarg)
758 strlcpy(buf, optarg,
sizeof buf);
815 printf(
"Usage : %s options\n" 816 " -h : this message\n" 817 " -l : toggle learn option\n" 818 " -i : input file with commands\n" 820 " -a : active learning\n" 821 " -t : active learning threshold (0.5 - margin)\n" 823 " -m : feedback miss probability ([0.0, 1.0])\n" 824 " -R : asymptotic learning rate\n" 825 " -x mode : where mode in learn, class, simulate\n", arg);
bool lr_data_open(char *fname)
#define LR_OPTS_INITIALIZER
bool lr_data_dump(char *fname)
bool lr_set_learn_callback(lr_callback_F)
int main(int argc, char **argv)
#define STRNCASEEQUAL(a, b, n)
bool lr_learn_options(bool active, double threshold)
bool lr_classify(char *id, char *fname, lr_cargs_T *cargs, lr_margs_T *margs, test_score_T *score)
void zeLog_SetOutput(bool, bool)
int zeStr2Tokens(char *, int, char **, char *)
bool lr_learn(char *id, char *fname, lr_cargs_T *cargs, lr_margs_T *margs, test_score_T *score, bool spam)
#define ZE_MessageInfo(level,...)
#define ZE_LogSysError(...)
int configure(char *, char *, bool)
#define STRCASEEQUAL(a, b)
time_t zeStr2time(char *s, int *error, time_t dval)