/*----------------------------------------------------------------------*\ | Distance routines for modern analog method | | | | The distance functions have three arguments, two sample pointers and | | a match table. Note that the match table must have been constructed | | through a call to create_match_table of the form | | m = create_match_table (p,q); | | where p == s1->data_base and q == s2->data_base, i.e. the order of | | arguments is critical, and the sample pointed to by the first | | argument passed to the distance function must belong tothe data base | | pointed to by the first argument passed to create_match_table. | | | | Peter N. Schweitzer (U.S. Geological Survey, Reston, VA 22092) | \*----------------------------------------------------------------------*/ #include #include #include "analog.h" static double manhattan (struct sample *s1, struct sample *s2, struct match *m) { double *d1,*d2; double d = 0.0; d1 = s1->data; d2 = s2->data; while (m->p >= 0) { double e,f; e = d1[m->p]; f = d2[m->q]; if (e != MISSING_VALUE && f != MISSING_VALUE) d += fabs(e - f); m++; } return (d); } static double squared_euclidean (struct sample *s1, struct sample *s2, struct match *m) { double *d1,*d2; double d = 0.0; d1 = s1->data; d2 = s2->data; while (m->p >= 0) { double e,f,g; e = d1[m->p]; f = d2[m->q]; if (e != MISSING_VALUE && f != MISSING_VALUE) { g = e - f; d += g*g; } m++; } return (d); } static double euclidean (struct sample *s1, struct sample *s2, struct match *m) { return (sqrt (squared_euclidean (s1,s2,m))); } static double canberra (struct sample *s1, struct sample *s2, struct match *m) { double *d1,*d2; double d = 0.0; d1 = s1->data; d2 = s2->data; while (m->p >= 0) { double e; double f; e = d1[m->p]; f = d2[m->q]; if (e != MISSING_VALUE && f != MISSING_VALUE) d += fabs ((e-f)/(e+f)); m++; } return (d); } static double squared_chord (struct sample *s1, struct sample *s2, struct match *m) { double *d1,*d2; double d = 0.0; d1 = s1->data; d2 = s2->data; while (m->p >= 0) { double e,f,g; e = d1[m->p]; f = d2[m->q]; if (e != MISSING_VALUE && f != MISSING_VALUE) { g = sqrt(e) - sqrt(f); d += g*g; } m++; } return (d); } static double squared_chisquared (struct sample *s1, struct sample *s2, struct match *m) { double *d1,*d2; double d = 0.0; d1 = s1->data; d2 = s2->data; while (m->p >= 0) { double e; double f; double g; e = d1[m->p]; f = d2[m->q]; if (e != MISSING_VALUE && f != MISSING_VALUE) { g = e - f; d += g*g/(e+f); } m++; } return (d); } static double sorensen (struct sample *s1, struct sample *s2, struct match *m) { int i,j; double *d1,*d2; int a,b,c; double d = 0.0; d1 = s1->data; d2 = s2->data; a = b = c = 0; while (m->p >= 0) { double e,f; e = d1[m->p]; f = d2[m->q]; i = j = 0; if (e != MISSING_VALUE && e != 0.0) i = 1; if (f != MISSING_VALUE && f != 0.0) j = 1; a += i; b += j; c += (i+j)/2; /* int arithmetic; zero unless both i and j are 1 */ m++; } d = ((double) 2*c)/(double)(a + b); return (d); } static double jaccard (struct sample *s1, struct sample *s2, struct match *m) { int i,j,match_count,mismatch_count; double *d1,*d2; double d = 0.0; d1 = s1->data; d2 = s2->data; match_count = 0; mismatch_count = 0; while (m->p >= 0) { double e,f; e = d1[m->p]; f = d2[m->q]; if (e != MISSING_VALUE && f != MISSING_VALUE) { i = (e != 0.0); j = (f != 0.0); switch (i+j) { case 2: match_count++; break; case 1: mismatch_count++; break; case 0: break; } } m++; } d = ((double) match_count)/(double)(match_count + mismatch_count); return (d); } static double bob (struct sample *s1, struct sample *s2, struct match *m) { int i,j,match_count,mismatch_count; int fossil_count,modern_count; double *d1,*d2; double d = 0.0; d1 = s1->data; d2 = s2->data; fossil_count = 0; modern_count = 0; match_count = 0; mismatch_count = 0; while (m->p >= 0) { double e,f; e = d1[m->p]; f = d2[m->q]; if (e != MISSING_VALUE && f != MISSING_VALUE) { i = (e != 0.0); j = (f != 0.0); fossil_count += i; modern_count += j; switch (i+j) { case 2: match_count++; break; case 1: mismatch_count++; break; case 0: break; } } m++; } d = ((double) match_count)/(double)(fossil_count); return (d); } static double simple_matching (struct sample *s1, struct sample *s2, struct match *m) { int i,j,match_count,mismatch_count; double *d1,*d2; double d = 0.0; int A,B,C,D; d1 = s1->data; d2 = s2->data; A = B = C = D = 0; match_count = 0; mismatch_count = 0; while (m->p >= 0) { double e,f; e = d1[m->p]; f = d2[m->q]; if (e != MISSING_VALUE && f != MISSING_VALUE) { i = (e != 0.0); j = (f != 0.0); if (i) if (j) A++; else B++; else if (j) C++; else D++; } m++; } d = ((double) A+D)/(double)(A+B+C+D); return (d); } static double dot_product (struct sample *s1, struct sample *s2, struct match *m) { double *d1,*d2; double d = 0.0; d1 = s1->data; d2 = s2->data; while (m->p >= 0) { double e,f; e = d1[m->p]; f = d2[m->q]; if (e != MISSING_VALUE && f != MISSING_VALUE) { d += e*f; } m++; } return (d); } static double correlation (struct sample *s1, struct sample *s2, struct match *m) { double *d1,*d2; double d = 0.0; double sx,sy,sxx,syy,sxy,rn,vx,vy; int k = 0; d1 = s1->data; d2 = s2->data; while (m->p >= 0) { double e,f; e = d1[m->p]; f = d2[m->q]; if (e != MISSING_VALUE && f != MISSING_VALUE) { sx += e; sy += f; sxx += e*e; syy += f*f; sxy += e*f; k++; } m++; } rn = (double)k; vx = (sxx - sx*sx/rn); vy = (syy - sy*sy/rn); if (k && vx > 0.0 && vy > 0.0) d = (sxy - sx*sy/rn)/sqrt(vx*vy); return (d); } /*----------------------------------------------------------------------*\ | set_distance_function returns a pointer to the distance function | | whose name is given as the argument. | \*----------------------------------------------------------------------*/ struct d_function_table { char *name; distance_function function; enum measure_type_t type; }; static struct d_function_table table[] = { {"manhattan", manhattan, DISTANCE}, {"euclidean", euclidean, DISTANCE}, {"squared_euclidean", squared_euclidean, DISTANCE}, {"canberra", canberra, DISTANCE}, {"squared_chord", squared_chord, DISTANCE}, {"squared_chisquared", squared_chisquared, DISTANCE}, {"jaccard", jaccard, SIMILARITY}, {"bob", bob, SIMILARITY}, {"sorensen", sorensen, SIMILARITY}, {"simple_matching", simple_matching, SIMILARITY}, {"dot_product", dot_product, SIMILARITY}, {"correlation", correlation, SIMILARITY}, {NULL, NULL, DISTANCE} }; distance_function set_distance_function (char *name) { int i; for (i=0; table[i].name; i++) if (stricmp (name,table[i].name) == 0) break; return (table[i].function); } enum measure_type_t get_measure_type (char *name) { int i; for (i=0; table[i].name; i++) if (stricmp (name,table[i].name) == 0) break; return (table[i].type); } /*----------------------------------------------------------------------*\ \*----------------------------------------------------------------------*/