39 static bool bfilter_db2bf(
bfilter_T * bf);
52 #define BFILTER_LOCK() MUTEX_LOCK(&(bfilter.mutex)) 53 #define BFILTER_UNLOCK() MUTEX_UNLOCK(&(bfilter.mutex)) 85 memset(k, 0,
sizeof (k));
86 memset(v, 0,
sizeof (v));
88 snprintf(k,
sizeof (k),
"%s:%s",
"count",
"msgs");
97 n = sscanf(v,
"%d %d", &ns, &nh);
107 bf->kms = bf->kmh = 1.;
110 if (nh == 0 || nh > ns)
111 bf->kms = (double) nh / (
double) ns;
112 if (ns == 0 || nh < ns)
113 bf->kmh = (double) ns / (
double) nh;
117 snprintf(k,
sizeof (k),
"%s:%s",
"count",
"tokens");
126 n = sscanf(v,
"%d %d", &ns, &nh);
133 bf->nbTokensHam = nh;
134 bf->nbTokensSpam = ns;
135 bf->kts = bf->kth = 1.;
138 if (nh == 0 || nh > ns)
139 bf->kts = (double) nh / (
double) ns;
140 if (ns == 0 || nh < ns)
141 bf->kth = (double) ns / (
double) nh;
145 snprintf(k,
sizeof (k),
"%s:%s",
"count",
"features");
154 n = sscanf(v,
"%d %d", &ns, &nh);
161 bf->nbFeaturesHam = nh;
162 bf->nbFeaturesSpam = ns;
163 bf->kfs = bf->kfh = 1.;
166 if (nh == 0 || nh > ns)
167 bf->kfs = (double) nh / (
double) ns;
168 if (ns == 0 || nh < ns)
169 bf->kfh = (double) ns / (
double) nh;
173 memset(k, 0,
sizeof (k));
174 memset(v, 0,
sizeof (v));
175 snprintf(k,
sizeof (k),
"%s:%s",
"crypt",
"tokens");
215 if ((bf->
dbname = strdup(dbname)) == NULL)
226 res = bfilter_db2bf(bf);
231 env = getenv(
"CLASSIFIER");
244 for (i = 0; i < argc; i++)
290 env = getenv(
"SEGMENTER");
303 for (i = 0; i < argc; i++)
346 env = getenv(
"TOKENIZER");
359 for (i = 0; i < argc; i++)
373 for (j = 0; j < xargc; j++)
382 for (j = 0; j < xargc; j++)
408 bool old = bf->
logit;
637 res = bf->
ok = bfilter_db2bf(bf);
698 #define HW(x) ((x) * log((x)) + (1. - (x)) * log(1. - (x))) 706 p = nts / (nts + nth);
709 ig = ((nts + nth) * (1 -
HW(p))) / (nms + nmh);
711 ig += ((nts + nth) *
HW(p)) / (nms + nmh);
716 p = nts / (nts + nth);
717 ig += (nts + nth) *
HW(p) / (nms + nmh);
734 if (key == NULL || strlen(key) == 0)
748 memset(k, 0,
sizeof (k));
749 memset(v, 0,
sizeof (v));
758 memset(buf, 0,
sizeof (buf));
760 snprintf(k,
sizeof (k),
"%s:%s",
"token", buf);
764 snprintf(k,
sizeof (k),
"%s:%s",
"token", key);
779 if (sscanf(v,
"%d %d", &ns, &nh) < 2)
794 prob = (ks * dns + 0.5) / (kh * dnh + ks * dns + 1.);
796 prob = (ks * dns + 0.5) / (kh * dnh * bf->
rhs + ks * dns + 1.);
803 tnm = kh * dnh + ks * dns;
808 prob = (q != 0 ? p / q : 0.5);
809 prob = (1 *
UT_PROB + tnm * prob) / (1 + tnm);
820 token->
value = fabs(ig);
822 token->
value = fabs(prob - 0.5);
865 char *skey =
STRNULL(prefix,
"");
867 memset(k, 0,
sizeof (k));
868 memset(d, 0,
sizeof (d));
870 snprintf(k,
sizeof (k),
"%s", skey);
878 if (strncasecmp(k, skey, strlen(skey)) != 0)
int(* smodel_db_browse_F)(void *, void *, void *)
bool set_bfilter_nb_tokens(int nbt)
bool set_bfilter_max_sizes(size_t msg, size_t mime)
bfilter_T * bfilter_ptr()
uint32_t get_bfilter_flags()
double get_bfilter_ham_spam_ratio()
bool smodel_db_check_token(char *key, sfilter_token_T *token)
void set_tokconf_active(char *tag, bool active)
#define DB_BTREE_SEQ_START()
double information_gain(double nms, double nmh, double nts, double nth)
bool set_bfilter_unknown_token_prob(double prob)
#define ZE_LogMsgError(level,...)
bool zeDb_CursorGetFirst(ZEDB_T *h, char *, size_t, void *, size_t)
bool zeStrRegex(char *, char *, long *, long *, bool)
bool bfilter_init(char *dbname)
int zeStr2Tokens(char *, int, char **, char *)
size_t get_bfilter_max_sizes()
bool zeDb_Open(ZEDB_T *h, ZEDB_ENV_T *, char *, int, bool, bool, size_t)
bool str2hash2hex(int code, char *sout, char *sin, size_t szout)
int get_bfilter_nb_tokens()
bool zeDb_Close(ZEDB_T *h)
#define DB_BTREE_SEQ_END()
char * hash_code2label(int code)
#define ZE_MessageInfo(level,...)
bool zeDb_GetRec(ZEDB_T *h, char *, void *, size_t)
bool set_bfilter_ham_spam_ratio(double ratio)
double get_bfilter_unknown_token_prob()
#define BFILTER_INITIALIZER
#define ZE_LogSysError(...)
bool zeDb_CursorOpen(ZEDB_T *h, bool)
bool set_bfilter_logit(bool enable)
#define ZE_LogMsgWarning(level,...)
void smodel_db_info(char *prefix, smodel_db_browse_F func, void *arg)
#define DB_BTREE_SEQ_CHECK(key, dbname)
char * zeStr2Lower(char *)
bool zeDb_CursorClose(ZEDB_T *h)
#define STRCASEEQUAL(a, b)
bool set_bfilter_db_crypt(int crypt)
uint32_t set_bfilter_flags(uint32_t flags)
int hash_label2code(char *label)
bool zeDb_CursorGetNext(ZEDB_T *h, char *, size_t, void *, size_t)
int get_bfilter_db_crypt()