/*----------------------------------------------------------------------*\ | Data input routines | | | | Peter N. Schweitzer (U.S. Geological Survey, Reston, VA 22092) | \*----------------------------------------------------------------------*/ #include #include #include #include #include #include "analog.h" #ifdef THINK_C extern FILE *Mac_fopen (char *name, char *mode); #define FOPEN Mac_fopen #else #define FOPEN fopen #endif #define MAX_DATA_FILE_LINE_LENGTH 8192 static char line [MAX_DATA_FILE_LINE_LENGTH]; /*----------------------------------------------------------------------*\ | Tab-delimited tables will have the following form and rules: | | | | The first row of the file will contain the names of the variables | | described by the columns. Every variable represented in the data | | must have a name, although the name may be blank. | | | | The first column (i.e. all characters up to the first tab) of each | | row will contain a sample identifier which will be stored as a | | character string, so it may contain letters, numbers, or both. | | | | The second and subsequent columns of each row contain numerical | | values that can be stored in double variables. If a cell contains | | both letters and numbers and the numbers come first, the program | | will interpret the numbers and skip everything that follows the | | first letter. | | | | Blank lines may occur anywhere in the file; they will be ignored. | \*----------------------------------------------------------------------*/ static void read_raw_data_from_tab_table (struct data_base *p) { int i,j,n; int line_count; FILE *in; char *s,*b,*e; char **ss; double *dd; if (in = FOPEN(p->raw.filespec,"r")) { line_count = 0; /*--------------------------------------------------------------*\ | Find the first nonblank line. Assume that it contains the | | names of the variables, separated by tabs. | \*--------------------------------------------------------------*/ *line = 0; while (*line == 0) { if (fgets (line,MAX_DATA_FILE_LINE_LENGTH,in) == NULL) return; line_count++; s = line + strlen (line) - 1; if (*s == '\n') *s-- = 0; if (*s == '\r') *s-- = 0; for (s=line; *s && isspace (*s); s++); if (*s == 0) *line = 0; } if (p->raw.name_buffer = (char *) malloc (1 + strlen(line))) { strcpy (p->raw.name_buffer,line); } else { sprintf (message,"Error: unable to allocate raw name buffer"); error_exit (message); } /*--------------------------------------------------------------*\ | Count the tab-delimited cells, then allocate an array of | | pointers to the cell names. Parse the line by tabs, storing | | the cell names in dynamically allocated space. | | | | Note that the number of variables is the same as the number | | of tabs, since characters up to the first tab are used as | | the sample identifier and are not stored in the data array. | \*--------------------------------------------------------------*/ for (i=0,s=p->raw.name_buffer; *s; s++) if (*s == '\t') i++; p->raw.count = i; if (p->raw.name = (char **) malloc (p->raw.count * sizeof (char *))) { ss = p->raw.name; /*----------------------------------------------------------*\ | Skip the first name, since by convention, it refers to | | the sample identifier. | \*----------------------------------------------------------*/ b = e = p->raw.name_buffer; while (*e && *e != '\t') e++; s = b; b = e; if (*e) b++; *e = 0; if (verbose) printf ("id name is \"%s\"\n",s); /*----------------------------------------------------------*\ | Store all of the other names in the names array. | \*----------------------------------------------------------*/ while (*b) { e = b; while (*e && *e != '\t') e++; *ss++ = b; b = e; if (*e) b++; *e = 0; } } else { sprintf (message,"Error: could not allocate memory for variable names in %s",p->raw.filespec); error_exit (message); } /*--------------------------------------------------------------*\ | Strip trailing blanks from the cell names. | | Replace underscores with spaces. | \*--------------------------------------------------------------*/ for (i=0; i < p->raw.count; i++) { b = p->raw.name[i]; if (*b) { for (e=b; *e; e++) if (*e == '_') *e = ' '; e = b + strlen(b) - 1; while (e > b && isspace (*e)) *e-- = 0; } } /*--------------------------------------------------------------*\ | Optionally output the cell names. | \*--------------------------------------------------------------*/ if (verbose) { printf ("Cell names from input file %s\n",p->raw.filespec); for (i=0; i < p->raw.count; i++) printf (" %d: \"%s\"\n",i,p->raw.name[i]); } /*--------------------------------------------------------------*\ | Read the data from the rest of the rows in the file. | \*--------------------------------------------------------------*/ if (!p->sample) if (p->sample = (struct sample *) malloc (GRANULARITY * sizeof (struct sample))) p->limit = GRANULARITY; else { sprintf (message,"Error: could not allocate sample array for input file %s",p->raw.filespec); error_exit (message); } n = 0; while (fgets (line,MAX_DATA_FILE_LINE_LENGTH,in)) { line_count++; s = line + strlen (line) - 1; if (*s == '\n') *s-- = 0; if (*s == '\r') *s-- = 0; if (s > line) { /*------------------------------------------------------*\ | Each row begins with a sample identifier, which is | | stored as a character string. | \*------------------------------------------------------*/ b = e = line; while (*e && *e != '\t') e++; if (e - b >= MAX_NAME_LENGTH) { memcpy (p->sample[n].id,b,MAX_NAME_LENGTH - 1); p->sample[n].id [MAX_NAME_LENGTH - 1] = 0; } else { memcpy (p->sample[n].id,b,e-b); p->sample[n].id [e-b] = 0; } b = e; if (*e) b++; *e = 0; /*------------------------------------------------------*\ | Subsequent cells either contain decimal numbers or | | are empty. | \*------------------------------------------------------*/ if (dd = (double *) malloc (p->raw.count * sizeof (double))) { memset (dd,0,p->raw.count * sizeof(double)); p->sample[n].raw = dd; p->sample[n].data = NULL; p->sample[n].meta = NULL; p->sample[n].meta_buffer = NULL; p->sample[n].data_base = p; j = 0; while (*b) { e = b; while (*e && *e != '\t') e++; s = b; b = e; if (*e) b++; *e = 0; /*----------------------------------------------*\ | There is no convention here for missing data | | although one could be coded. For example, | | we could say that if s contains no digits, | | then *dd++ = MISSING_VALUE. But some people | | prefer to use blank cells as zero values. | | Actually, MISSING_VALUE should be used only | | when the taxon was not counted. | \*----------------------------------------------*/ *dd++ = strtod (s,NULL); j++; if (j >= p->raw.count && *b) { sprintf (message,"Warning: extra data at end of line %d in file %s: \"%s\"", line_count, p->raw.filespec, b ); warning (message); break; } } /*------------------------------------------------------*\ | For debugging only, output the cell values. | \*------------------------------------------------------*/ if (verbose) { printf ("%s",p->sample[n].id); for (i=0; i < p->raw.count; i++) printf ("\t%5.2lf",p->sample[n].raw[i]); printf ("\n"); } /*------------------------------------------------------*\ | Increment the sample counter, and enlarge p->sample | | if necessary. | \*------------------------------------------------------*/ n++; if (n >= p->limit) { if (p->sample = (struct sample *) realloc (p->sample,(p->limit + GRANULARITY)*sizeof(struct sample))) p->limit += GRANULARITY; else { sprintf (message,"Error: could not enlarge sample array for input file %s",p->raw.filespec); error_exit (message); } } } else { sprintf (message,"Error: could not allocate raw data array for sample %s of input file %s", p->sample[n].id, p->raw.filespec ); error_exit (message); } } } p->count = n; fclose (in); } else { sprintf (message,"Warning: could not open raw data file \"%s\"",p->raw.filespec); warning (message); } } static void read_meta_data_from_tab_table (struct data_base *p) { int i,j,n; int line_count; FILE *in; char *s,*b,*e; char **ss; char meta_id[MAX_NAME_LENGTH]; if (in = FOPEN(p->meta.filespec,"r")) { line_count = 0; /*--------------------------------------------------------------*\ | Find the first nonblank line. Assume that it contains the | | names of the variables, separated by tabs. | \*--------------------------------------------------------------*/ *line = 0; while (*line == 0) { if (fgets (line,MAX_DATA_FILE_LINE_LENGTH,in) == NULL) return; line_count++; s = line + strlen (line) - 1; if (*s == '\n') *s-- = 0; if (*s == '\r') *s-- = 0; for (s=line; *s && isspace (*s); s++); if (*s == 0) *line = 0; } if (p->meta.name_buffer = (char *) malloc (1 + strlen(line))) { strcpy (p->meta.name_buffer,line); } else { sprintf (message,"Error: unable to allocate meta name buffer"); error_exit (message); } /*--------------------------------------------------------------*\ | Count the tab-delimited cells, then allocate an array of | | pointers to the cell names. Parse the line by tabs, storing | | the cell names in dynamically allocated space. | | | | Note that the number of variables is the same as the number | | of tabs, since characters up to the first tab are used as | | the sample identifier and are not stored in the data array. | \*--------------------------------------------------------------*/ for (i=0,s=p->meta.name_buffer; *s; s++) if (*s == '\t') i++; p->meta.count = i+1; if (p->meta.name = (char **) malloc (p->meta.count * sizeof (char *))) { ss = p->meta.name; /*----------------------------------------------------------*\ | Store all of the names in the names array. | \*----------------------------------------------------------*/ b = e = p->meta.name_buffer; while (*b) { e = b; while (*e && *e != '\t') e++; *ss++ = b; b = e; if (*e) b++; *e = 0; } } else { sprintf (message,"Error: could not allocate memory for variable names in %s",p->meta.filespec); error_exit (message); } /*--------------------------------------------------------------*\ | Strip trailing blanks from the cell names. | | Replace underscores with spaces. | \*--------------------------------------------------------------*/ for (i=0; i < p->meta.count; i++) { b = p->meta.name[i]; if (*b) { for (e=b; *e; e++) if (*e == '_') *e = ' '; e = b + strlen(b) - 1; while (e > b && isspace (*e)) *e-- = 0; } } /*--------------------------------------------------------------*\ | Optionally output the cell names. | \*--------------------------------------------------------------*/ if (verbose) { printf ("Meta names from input file %s\n",p->meta.filespec); for (i=0; i < p->meta.count; i++) printf (" %d: \"%s\"\n",i,p->meta.name[i]); } /*--------------------------------------------------------------*\ | Read the data from the rest of the rows in the file. | \*--------------------------------------------------------------*/ if (!p->sample) if (p->sample = (struct sample *) malloc (GRANULARITY * sizeof (struct sample))) p->limit = GRANULARITY; else { sprintf (message,"Error: could not allocate sample array for input file %s",p->meta.filespec); error_exit (message); } while (fgets (line,MAX_DATA_FILE_LINE_LENGTH,in)) { line_count++; s = line + strlen (line) - 1; if (*s == '\n') *s-- = 0; if (*s == '\r') *s-- = 0; if (s > line) { /*------------------------------------------------------*\ | Assume that the first cell contains the sample id. | \*------------------------------------------------------*/ b = e = line; while (*e && *e != '\t') e++; if (e - b >= MAX_NAME_LENGTH) { memcpy (meta_id,b,MAX_NAME_LENGTH - 1); meta_id [MAX_NAME_LENGTH - 1] = 0; } else { memcpy (meta_id,b,e-b); meta_id [e-b] = 0; } /*------------------------------------------------------*\ | Find n such that p->sample[n].id equals meta_id. | | If there is no such sample, discard this line. | | The samples are not sorted; this is a linear search. | \*------------------------------------------------------*/ for (n=0; n < p->count; n++) if (strcmp (p->sample[n].id,meta_id) == 0) break; if (n < p->count) { /*--------------------------------------------------*\ | Store the meta data as a single character buffer | \*--------------------------------------------------*/ if (p->sample[n].meta_buffer = (char *) malloc (1 + strlen (line))) { strcpy (p->sample[n].meta_buffer,line); } else { sprintf (message,"Error: could not allocate buffer for meta data"); error_exit (message); } /*--------------------------------------------------*\ | Create an array of char pointers which point at | | parts of the meta data buffer just allocated. | \*--------------------------------------------------*/ if (ss = (char **) malloc (p->meta.count * sizeof (char *))) { memset (ss,0,p->meta.count * sizeof (char *)); p->sample[n].meta = ss; j = 0; b = e = p->sample[n].meta_buffer; while (*b) { e = b; while (*e && *e != '\t') e++; *ss++ = b; b = e; if (*e) b++; *e = 0; j++; if (j >= p->meta.count && *b) { sprintf (message,"Warning: extra meta data at end of line %d in file %s: \"%s\"", line_count, p->meta.filespec, b ); warning (message); break; } } /*----------------------------------------------*\ | For debugging only, output the cell values. | \*----------------------------------------------*/ if (verbose) { printf ("%3d",n); for (i=0; i < p->meta.count; i++) printf ("\t%s",p->sample[n].meta[i]); printf ("\n"); } } else { sprintf (message,"Error: could not allocate meta data pointer array for sample %s of input file %s", p->sample[n].id, p->meta.filespec ); error_exit (message); } } else { sprintf (message,"Warning: found meta data but no numerical data for sample \"%s\" in %s", meta_id, p->meta.filespec ); warning (message); } } } for (i=0; i < p->count; i++) if (!p->sample[i].meta) { sprintf (message,"Warning: no meta data found for sample \"%s\" in %s", p->sample[i].id, p->meta.filespec ); warning (message); } fclose (in); } else if (*p->meta.filespec) { sprintf (message,"Warning: could not open meta data file \"%s\"",p->meta.filespec); warning (message); } } /*----------------------------------------------------------------------*\ | Specmap files have the format described in specmap.070. Meta data | | and raw data are mixed together in the same file, but the format is | | so prescribed that we can use names declared statically here. | \*----------------------------------------------------------------------*/ static char specmap_meta_names[] = "\ Ship code\t\ Cruise Number\t\ Core Number\t\ Latitude degrees\t\ Latitude minutes\t\ Latitude tenths\t\ Longitude degrees\t\ Longitude minutes\t\ Longitude tenths\t\ Marsden-10\t\ Marsden-1\t\ Water Depth\t\ Core Length\t\ Ocean Area\t\ Core Type\t\ Depth in Core\t\ SST August\t\ SST February"; static int master_card_meta_length[] = {2,3,3,-4,4,4,4,4,4,4,4,4,4,4,4,-17,1,0}; static char specmap_raw_names[] = "\ O. universa\t\ G. conglobatus\t\ G. ruber (pink)\t\ G. ruber (white)\t\ G. ruber (total)\t\ G. tenellus\t\ G. sacculifer (no sac)\t\ G. sacculifer (with sac)\t\ G. sacculifer (total)\t\ S. dehiscens\t\ G. adamsi\t\ G. aequilateralis\t\ G. calida\t\ G. bulloides\t\ G. falconesis\t\ G. digitata\t\ G. rubescens\t\ G. humilis\t\ G. quinqueloba\t\ G. pachyderma (L)\t\ G. pachyderma (R)\t\ G. dutertrei\t\ G. conglomerata\t\ G. hexagona\t\ P. obliquiloculata\t\ G. inflata\t\ G. truncatulinoides (L)\t\ G. truncatulinoides (R)\t\ G. crassaformis\t\ P-D intergrade\t\ G. hirsuta\t\ G. scitula\t\ G. anfracta\t\ G. menardii\t\ G. tumida\t\ G. m. flexuosa\t\ G. menardii complex (total)\t\ C. nitida\t\ G. glutinata\t\ G. iota\t\ G. bradyi\t\ G. pumilio\t\ H. pelagica\t\ H. digitata\t\ Other"; static void read_raw_data_from_specmap_file (struct data_base *p) { int i,j,m,n; char *b,*e,*s,*t; char **ss; double *dd; FILE *in; int line_count = 0; if (in = FOPEN(p->raw.filespec,"r")) { if (p->raw.name_buffer = (char *) malloc (1 + strlen(specmap_raw_names))) { strcpy (p->raw.name_buffer,specmap_raw_names); } else { sprintf (message,"Error: unable to allocate raw name buffer"); error_exit (message); } /*--------------------------------------------------------------*\ | Count the tab-delimited cells, then allocate an array of | | pointers to the cell names. Parse the line by tabs, storing | | the cell names in dynamically allocated space. | \*--------------------------------------------------------------*/ for (i=0,s=p->raw.name_buffer; *s; s++) if (*s == '\t') i++; p->raw.count = i+1; if (p->raw.name = (char **) malloc (p->raw.count * sizeof (char *))) { ss = p->raw.name; b = e = p->raw.name_buffer; while (*b) { e = b; while (*e && *e != '\t') e++; *ss++ = b; b = e; if (*e) b++; *e = 0; } } else { sprintf (message,"Error: could not allocate memory for variable names in %s",p->raw.filespec); error_exit (message); } /*--------------------------------------------------------------*\ | Optionally output the cell names. | \*--------------------------------------------------------------*/ if (verbose) { printf ("Cell names from input file %s\n",p->raw.filespec); for (i=0; i < p->raw.count; i++) printf (" %d: \"%s\"\n",i,p->raw.name[i]); } /*--------------------------------------------------------------*\ | Read the data from the rows in the file. | \*--------------------------------------------------------------*/ if (!p->sample) if (p->sample = (struct sample *) malloc (GRANULARITY * sizeof (struct sample))) p->limit = GRANULARITY; else { sprintf (message,"Error: could not allocate sample array for input file %s",p->raw.filespec); error_exit (message); } n = 0; while (fgets (line,MAX_DATA_FILE_LINE_LENGTH,in)) { line_count++; s = line + strlen (line) - 1; if (*s == '\n') *s-- = 0; if (*s == '\r') *s-- = 0; if (s > line) { if (strlen(line) >= 79 && line[72] == 'F' && line[79] == '1') { /*--------------------------------------------------*\ | At data card 1, set up new sample data array and | | compose the sample id from the first 12 bytes. | \*--------------------------------------------------*/ if (dd = (double *) malloc (p->raw.count * sizeof (double))) { memset (dd,0,p->raw.count * sizeof(double)); p->sample[n].raw = dd; p->sample[n].data = NULL; p->sample[n].data_base = p; p->sample[n].meta = NULL; p->sample[n].meta_buffer = NULL; /*----------------------------------------------*\ | id = ship + cruise + core + depth | \*----------------------------------------------*/ memcpy (p->sample[n].id,line,12); p->sample[n].id[12] = 0; /*----------------------------------------------*\ | Read raw data values from card 1. | \*----------------------------------------------*/ t = line + 12; while (t < line + 72) { if (memcmp (t,"-999",4) == 0) { *dd++ = MISSING_VALUE; } else { int v = 0; if (isdigit (t[0])) v = 10*v + t[0] - '0'; if (isdigit (t[1])) v = 10*v + t[1] - '0'; if (isdigit (t[2])) v = 10*v + t[2] - '0'; if (isdigit (t[3])) v = 10*v + t[3] - '0'; *dd++ = (double) v; } t += 4; } /*----------------------------------------------*\ | Data cards 2 and 3 must follow 1 in sequence | \*----------------------------------------------*/ for (j=2; j < 4; j++) if (fgets (line,MAX_DATA_FILE_LINE_LENGTH,in)) { line_count++; s = line + strlen (line) - 1; if (*s == '\n') *s-- = 0; if (*s == '\r') *s-- = 0; /*--------------------------------------*\ | Three tests are made on the card: | | 1: 'F' must be in column 72 | | 2: number must be in column 80 | | 3: Core id must match master | \*--------------------------------------*/ if (strlen(line) < 72 || line[72] != 'F') { sprintf (message,"Error: unexpected data in line %d of file %s: \"%s\"", line_count, p->raw.filespec, line ); error_exit (message); } if (strlen(line) < 80 || line[79] != '0' + j) { sprintf (message,"Error: card out of order at line %d of file %s", line_count, p->raw.filespec ); error_exit (message); } if (memcmp (line,p->sample[n].id,12) != 0) { sprintf (message,"Error: data card %d refers to wrong core at line %d of file %s", line[79], line_count, p->raw.filespec ); error_exit (message); } /*--------------------------------------*\ | Read raw data values from card. | \*--------------------------------------*/ t = line + 12; while (t < line + 72) { if (memcmp (t,"-999",4) == 0) { *dd++ = MISSING_VALUE; } else { int v = 0; if (isdigit (t[0])) v = 10*v + t[0] - '0'; if (isdigit (t[1])) v = 10*v + t[1] - '0'; if (isdigit (t[2])) v = 10*v + t[2] - '0'; if (isdigit (t[3])) v = 10*v + t[3] - '0'; *dd++ = (double) v; } t += 4; } } else { sprintf (message,"Error: unexpected end of file at line %d in %s", line_count, p->raw.filespec ); error_exit (message); } } else { sprintf (message,"Error: could not allocate raw data array for sample %s of input file %s", p->sample[n].id, p->raw.filespec ); error_exit (message); } /*--------------------------------------------------*\ | Increment the sample counter, and enlarge | | p->sample if necessary. | \*--------------------------------------------------*/ n++; if (n >= p->limit) { if (p->sample = (struct sample *) realloc (p->sample,(p->limit + GRANULARITY)*sizeof(struct sample))) p->limit += GRANULARITY; else { sprintf (message,"Error: could not enlarge sample array for input file %s",p->raw.filespec); error_exit (message); } } } else { /* ignore master cards and SST cards */ } } else { /* ignore blank line */ } } p->count = n; fclose (in); /*--------------------------------------------------------------*\ | Output the raw data as tab-delimited text. | \*--------------------------------------------------------------*/ printf ("Raw data from file %s\n",p->raw.filespec); printf ("%s",p->raw.name[0]); for (i=1; i < p->raw.count; i++) printf ("\t%s",p->raw.name[i]); printf ("\n"); for (j=0; j < p->count; j++) { printf ("%s",p->sample[j].id); for (i=0; i < p->raw.count; i++) printf ("\t%.0lf",p->sample[j].raw[i]); printf ("\n"); } } else { sprintf (message,"Warning: could not open raw data file \"%s\"",p->raw.filespec); warning (message); } } static void read_meta_data_from_specmap_file (struct data_base *p) { int i,j,m,n; char *b,*e,*s,*t; char *meta_ptr; char meta_id [16]; char **ss; FILE *in; int line_count = 0; if (in = FOPEN(p->meta.filespec,"r")) { /*--------------------------------------------------------------*\ | Allocate space for the meta variable names. | \*--------------------------------------------------------------*/ if (p->meta.name_buffer = (char *) malloc (1 + strlen(specmap_meta_names))) { strcpy (p->meta.name_buffer,specmap_meta_names); } else { sprintf (message,"Error: unable to allocate meta name buffer"); error_exit (message); } /*--------------------------------------------------------------*\ | Count the tab-delimited cells, then allocate an array of | | pointers to the cell names. Parse the line by tabs, storing | | the cell names in dynamically allocated space. | \*--------------------------------------------------------------*/ for (i=0,s=p->meta.name_buffer; *s; s++) if (*s == '\t') i++; p->meta.count = i+1; if (p->meta.name = (char **) malloc (p->meta.count * sizeof (char *))) { ss = p->meta.name; b = e = p->meta.name_buffer; while (*b) { e = b; while (*e && *e != '\t') e++; *ss++ = b; b = e; if (*e) b++; *e = 0; } } else { sprintf (message,"Error: could not allocate memory for meta variable names in %s",p->raw.filespec); error_exit (message); } /*--------------------------------------------------------------*\ | Optionally output the meta names. | \*--------------------------------------------------------------*/ if (verbose) { printf ("Meta names from input file %s\n",p->raw.filespec); for (i=0; i < p->meta.count; i++) printf (" %d: \"%s\"\n",i,p->meta.name[i]); } /*--------------------------------------------------------------*\ | Read the meta data from the rows in the file. | \*--------------------------------------------------------------*/ while (fgets (line,MAX_DATA_FILE_LINE_LENGTH,in)) { line_count++; s = line + strlen (line) - 1; if (*s == '\n') *s-- = 0; if (*s == '\r') *s-- = 0; if (s > line) { if (strlen(line) >= 72 && line[72] == 'M') { /*--------------------------------------------------*\ | Found a master card. Find the first data record | | from the same ship, cruise, and core. | \*--------------------------------------------------*/ memcpy (meta_id,line,8); meta_id[8] = 0; for (n=0; n < p->count; n++) if (memcmp (p->sample[n].id,meta_id,8) == 0) break; if (n < p->count) { /*----------------------------------------------*\ | Store the meta data as a single character | | buffer whose length of the buffer is found | | from the following table: | | | | Variable name c columns length+NUL | | Ship code 1 1:2 3 | | Cruise Number 1 3:5 4 | | Core Number 1 6:8 4 | | Latitude degrees 1 13:16 5 | | Latitude minutes 1 17:20 5 | | Latitude tenths 1 21:24 5 | | Longitude degrees 1 25:28 5 | | Longitude minutes 1 29:32 5 | | Longitude tenths 1 33:36 5 | | Marsden-10 1 37:44 5 | | Marsden-1 1 41:44 5 | | Water Depth 1 45:48 5 | | Core Length 1 49:52 5 | | Ocean Area 1 53:56 5 | | Core Type 1 74 2 | | Depth in Core 2 9:12 5 | | SST August 5 13:16 5 | | SST February 5 17:20 5 | | ------------------------------------ | | total 83 | \*----------------------------------------------*/ if (p->sample[n].meta_buffer = (char *) malloc (96)) { *p->sample[n].meta_buffer = 0; } else { sprintf (message,"Error: could not allocate buffer for meta data"); error_exit (message); } /*----------------------------------------------*\ | Create an array of char pointers which point | | at parts of the meta data buffer that was | | just allocated. | \*----------------------------------------------*/ if (ss = (char **) malloc (p->meta.count * sizeof (char *))) { memset (ss,0,p->meta.count * sizeof (char *)); p->sample[n].meta = ss; } else { sprintf (message,"Error: could not allocate meta data pointer array for sample %s of input file %s", p->sample[n].id, p->meta.filespec ); error_exit (message); } /*----------------------------------------------*\ | Decode the master card. | \*----------------------------------------------*/ meta_ptr = p->sample[n].meta_buffer; t = line; for (i=0; master_card_meta_length[i]; i++) if ((m = master_card_meta_length[i]) > 0) { memcpy (meta_ptr,t,m); *ss++ = meta_ptr; meta_ptr += m; t += m; *meta_ptr++ = 0; } else t += -m; /*----------------------------------------------*\ | "Depth in Core" is not listed on the master | | card; it is only listed on the data cards. | | But since we included this field in the id | | for the sample, we can extract it from the | | id and include it among the meta data. | \*----------------------------------------------*/ memcpy (meta_ptr,p->sample[n].id+8,4); *ss++ = meta_ptr; meta_ptr += 4; *meta_ptr++ = 0; } else { sprintf (message,"Warning: found meta data but no numerical data for sample \"%s\" in %s", meta_id, p->meta.filespec ); warning (message); } } if (strlen (line) < 72) { /*----------------------------------------------*\ | This may be card 5, containing temperature | | for a core top. | \*----------------------------------------------*/ memcpy (meta_id,line,8); meta_id[8] = 0; for (n=0; n < p->count; n++) if (memcmp (p->sample[n].id,meta_id,8) == 0) break; if (n < p->count) { /*----------------------------------------------*\ | Point meta_ptr at the end of the meta_buffer | | and ss to the first NULL value in meta. | \*----------------------------------------------*/ for (ss = p->sample[n].meta; *ss; ss++); meta_ptr = *(ss-1); meta_ptr += 1 + strlen (meta_ptr); /* SST (Aug) */ memcpy (meta_ptr,line+12,4); *ss++ = meta_ptr; meta_ptr += 4; *meta_ptr++ = 0; /* SST (Feb) */ memcpy (meta_ptr,line+16,4); *ss++ = meta_ptr; meta_ptr += 4; *meta_ptr++ = 0; } else { sprintf (message,"Warning: found temperature data but no numerical data for sample \"%s\" in %s", meta_id, p->meta.filespec ); warning (message); } } } else { /* ignore blank lines */ } } fclose (in); /*--------------------------------------------------------------*\ | Output the meta data as tab-delimited text. | \*--------------------------------------------------------------*/ printf ("Meta data from %s\n",p->meta.filespec); printf ("%s",p->meta.name[0]); for (i=1; i < p->meta.count; i++) printf ("\t%s",p->meta.name[i]); printf ("\n"); for (j=0; j < p->count; j++) if (p->sample[j].meta) { printf ("%s",p->sample[j].meta[0]); for (i=1; i < p->meta.count; i++) printf ("\t%s",p->sample[j].meta[i]); printf ("\n"); } } else if (*p->meta.filespec) { sprintf (message,"Warning: could not open meta data file \"%s\"",p->meta.filespec); warning (message); } } /*----------------------------------------------------------------------*\ \*----------------------------------------------------------------------*/ #define NAPD_ASCII_HEADER "# NAPD ASCII Format" static void read_raw_data_from_napd_table (struct data_base *p) { int i,j,n; int line_count; FILE *in; char *s,*t,*b,*e; char **ss; double *dd; char number[12]; if (in = FOPEN(p->raw.filespec,"r")) { line_count = 0; /*--------------------------------------------------------------*\ | Find the first nonblank line. It must contain the string | | "# NAPD ASCII Format" | \*--------------------------------------------------------------*/ *line = 0; while (*line == 0) { if (fgets (line,MAX_DATA_FILE_LINE_LENGTH,in) == NULL) return; line_count++; s = line + strlen (line) - 1; if (*s == '\n') *s-- = 0; if (*s == '\r') *s-- = 0; for (s=line; *s && isspace (*s); s++); if (*s == 0) *line = 0; } if (memcmp (line,NAPD_ASCII_HEADER,strlen(NAPD_ASCII_HEADER)) != 0) { sprintf (message,"Error: expected NAPD ASCII header line, got \"%s\" in line %d of file %s", line, line_count, p->raw.filespec ); error_exit (message); } /*--------------------------------------------------------------*\ | Skip all subsequent lines beginning with '#' | \*--------------------------------------------------------------*/ while (*line == '#') { if (fgets (line,MAX_DATA_FILE_LINE_LENGTH,in) == NULL) return; line_count++; s = line + strlen (line) - 1; if (*s == '\n') *s-- = 0; if (*s == '\r') *s-- = 0; for (s=line; *s && isspace (*s); s++); if (*s == 0) *line = 0; } /*--------------------------------------------------------------*\ | The next line contains two integers, the number of variables | | and the number of samples. | \*--------------------------------------------------------------*/ p->raw.count = p->count = -1; for (b=line; *b && !isdigit(*b); b++); for (e=b; *e && isdigit(*e); e++); if (e > b) { memcpy (number,b,e-b); number[e-b] = 0; p->raw.count = atoi (number); for (b=e; *b && !isdigit(*b); b++); for (e=b; *e && isdigit(*e); e++); if (e > b) { memcpy (number,b,e-b); number[e-b] = 0; p->count = atoi (number); } } if (p->raw.count < 0 || p->count < 0) { sprintf (message,"Error: expected two numbers, got \"%s\" in line %d of file %s", line, line_count, p->raw.filespec ); error_exit (message); } /*--------------------------------------------------------------*\ | The next p->raw.count lines contain the names of variables. | | Variables occur in short and long forms. | \*--------------------------------------------------------------*/ if (p->raw.name_buffer = (char *) malloc (MAX_NAME_LENGTH * p->raw.count)) { t = p->raw.name_buffer; *t = 0; if (p->raw.name = (char **) malloc (p->raw.count * sizeof (char *))) { ss = p->raw.name; for (i=0; i < p->raw.count; i++) { if (fgets (line,MAX_DATA_FILE_LINE_LENGTH,in) == NULL) { sprintf (message,"Error: unexpected end of file at line %d of %s", line_count, p->raw.filespec ); error_exit (message); } line_count++; s = line + strlen (line) - 1; if (*s == '\n') *s-- = 0; if (*s == '\r') *s-- = 0; while (s > line && isspace (*s)) s--; if (s > line) s++; j = s - line + 17; memcpy (t,line+17,j); *ss++ = t; t += j; *t++ = 0; } /*------------------------------------------------------*\ | Here it should be possible to resize the array that | | contains the names themselves, but the pointers to | | the names must be correspondingly adjusted. | \*------------------------------------------------------*/ } else { sprintf (message,"Error: unable to allocate raw name pointers"); error_exit (message); } } else { sprintf (message,"Error: unable to allocate raw name buffer"); error_exit (message); } /*--------------------------------------------------------------*\ | Optionally output the cell names. | \*--------------------------------------------------------------*/ if (verbose) { printf ("Cell names from input file %s\n",p->raw.filespec); for (i=0; i < p->raw.count; i++) printf (" %d: \"%s\"\n",i,p->raw.name[i]); } /*--------------------------------------------------------------*\ | Allocate space for sample data. | \*--------------------------------------------------------------*/ if (!p->sample) if (p->sample = (struct sample *) malloc (p->count * sizeof (struct sample))) p->limit = p->count; else { sprintf (message,"Error: could not allocate sample array for input file %s",p->raw.filespec); error_exit (message); } /*--------------------------------------------------------------*\ | Read the data from the rest of the the file. | \*--------------------------------------------------------------*/ for (n=0; n < p->count; n++) { /* Skip any number of comment lines (typically only one) */ do { if (fgets (line,MAX_DATA_FILE_LINE_LENGTH,in) == NULL) { sprintf (message,"Error: unexpected end of file at line %d of %s", line_count, p->raw.filespec ); error_exit (message); } line_count++; s = line + strlen (line) - 1; if (*s == '\n') *s-- = 0; if (*s == '\r') *s-- = 0; } while (*line == '#'); /* Use the entire next line as the sample id */ if (strlen (line) >= MAX_NAME_LENGTH) { memcpy (p->sample[n].id,line,MAX_NAME_LENGTH - 1); p->sample[n].id [MAX_NAME_LENGTH - 1] = 0; } else strcpy (p->sample[n].id,line); /* Allocate raw data array for sample */ if (dd = (double *) malloc (p->raw.count * sizeof (double))) { memset (dd,0,p->raw.count * sizeof(double)); p->sample[n].raw = dd; p->sample[n].data = NULL; p->sample[n].meta = NULL; p->sample[n].meta_buffer = NULL; p->sample[n].data_base = p; } else { sprintf (message,"Error: could not allocate raw data array for sample %s of input file %s", p->sample[n].id, p->raw.filespec ); error_exit (message); } /* Read raw data into data array */ i = 0; while (i < p->raw.count) { if (fgets (line,MAX_DATA_FILE_LINE_LENGTH,in) == NULL) { sprintf (message,"Error: unexpected end of file at line %d of %s", line_count, p->raw.filespec ); error_exit (message); } line_count++; s = line + strlen (line) - 1; if (*s == '\n') *s-- = 0; if (*s == '\r') *s-- = 0; b = line; e = line + strlen (line); while (b < e && i < p->raw.count) { memcpy (number,b,8); number[8] = 0; *dd++ = strtod (number,NULL); i++; b += 8; } } /*----------------------------------------------------------*\ | For debugging only, output the cell values. | \*----------------------------------------------------------*/ if (verbose) { printf ("%s",p->sample[n].id); for (i=0; i < p->raw.count; i++) printf ("\t%5.2lf",p->sample[n].raw[i]); printf ("\n"); } } fclose (in); } else { sprintf (message,"Warning: could not open raw data file \"%s\"",p->raw.filespec); warning (message); } } static char *napd_meta_name[] = { "Lat", "Lon", "Country", "State or Province", "PubIndx", NULL }; static void read_meta_data_from_napd_table (struct data_base *p) { int i,j,n; int line_count; int variable_count,sample_count,raw_data_line_count; FILE *in; char *s,*t,*b,*e; char **ss; char number[12]; char *pLat,*pLon,*pUSA,*pState,*pCAN,*pProvince,*pPubIndx; char *meta_buffer; char sample_id [MAX_NAME_LENGTH]; if (in = FOPEN(p->meta.filespec,"r")) { line_count = 0; /*--------------------------------------------------------------*\ | Find the first nonblank line. It must contain the string | | "# NAPD ASCII Format" | \*--------------------------------------------------------------*/ *line = 0; while (*line == 0) { if (fgets (line,MAX_DATA_FILE_LINE_LENGTH,in) == NULL) return; line_count++; s = line + strlen (line) - 1; if (*s == '\n') *s-- = 0; if (*s == '\r') *s-- = 0; for (s=line; *s && isspace (*s); s++); if (*s == 0) *line = 0; } if (memcmp (line,NAPD_ASCII_HEADER,strlen(NAPD_ASCII_HEADER)) != 0) { sprintf (message,"Error: expected NAPD ASCII header line, got \"%s\" in line %d of file %s", line, line_count, p->meta.filespec ); error_exit (message); } /*--------------------------------------------------------------*\ | Skip subsequent lines beginning with '#'. | \*--------------------------------------------------------------*/ while (*line == '#') { if (fgets (line,MAX_DATA_FILE_LINE_LENGTH,in) == NULL) return; line_count++; s = line + strlen (line) - 1; if (*s == '\n') *s-- = 0; if (*s == '\r') *s-- = 0; for (s=line; *s && isspace (*s); s++); if (*s == 0) *line = 0; } /*--------------------------------------------------------------*\ | The next line contains two integers, the number of variables | | and the number of samples. | \*--------------------------------------------------------------*/ variable_count = sample_count = -1; for (b=line; *b && !isdigit(*b); b++); for (e=b; *e && isdigit(*e); e++); if (e > b) { memcpy (number,b,e-b); number[e-b] = 0; variable_count = atoi (number); raw_data_line_count = variable_count/10; if (variable_count % 10) raw_data_line_count++; for (b=e; *b && !isdigit(*b); b++); for (e=b; *e && isdigit(*e); e++); if (e > b) { memcpy (number,b,e-b); number[e-b] = 0; sample_count = atoi (number); } } if (variable_count < 0 || sample_count < 0) { sprintf (message,"Error: expected two numbers, got \"%s\" in line %d of file %s", line, line_count, p->meta.filespec ); error_exit (message); } /*--------------------------------------------------------------*\ | The next variable_count lines contain the names of the raw | | variables. Skip them. | \*--------------------------------------------------------------*/ for (i=0; i < variable_count; i++) { if (fgets (line,MAX_DATA_FILE_LINE_LENGTH,in) == NULL) { sprintf (message,"Error: unexpected end of file at line %d of %s", line_count, p->meta.filespec ); error_exit (message); } line_count++; /*\ s = line + strlen (line) - 1; if (*s == '\n') *s-- = 0; if (*s == '\r') *s-- = 0; \*/ } /*--------------------------------------------------------------*\ | Set up the data base meta buffer and name pointers. | \*--------------------------------------------------------------*/ n = 0; for (i=0; napd_meta_name[i]; i++) n += 1 + strlen (napd_meta_name[i]); p->meta.count = i; if (p->meta.name_buffer = (char *) malloc (n)) { if (p->meta.name = (char **) malloc (p->meta.count * sizeof(char *))) { ss = p->meta.name; s = p->meta.name_buffer; for (i=0; i < p->meta.count; i++) { strcpy (s,napd_meta_name[i]); *ss++ = s; s += 1 + strlen (napd_meta_name[i]); } } else { sprintf (message,"Error: could not allocate space for meta name pointers, file %s", p->meta.filespec ); error_exit (message); } } else { sprintf (message,"Error: could not allocate space for meta name buffer, file %s", p->meta.filespec ); error_exit (message); } /*--------------------------------------------------------------*\ | Optionally output the meta names. | \*--------------------------------------------------------------*/ if (verbose) { printf ("Meta names from input file %s\n",p->meta.filespec); for (i=0; i < p->meta.count; i++) printf (" %d: \"%s\"\n",i,p->meta.name[i]); } /*--------------------------------------------------------------*\ | Read the data from the rest of the the file. | \*--------------------------------------------------------------*/ for (n=0; n < sample_count; n++) { /*----------------------------------------------------------*\ | The first line for each sample begins with a '#' and | | contains meta variables and their values. | \*----------------------------------------------------------*/ if (fgets (line,MAX_DATA_FILE_LINE_LENGTH,in) == NULL) { sprintf (message,"Error: unexpected end of file at line %d of %s", line_count, p->raw.filespec ); error_exit (message); } line_count++; s = line + strlen (line) - 1; if (*s == '\n') *s-- = 0; if (*s == '\r') *s-- = 0; pLat = pLon = pUSA = pState = pCAN = pProvince = pPubIndx = NULL; if (*line == '#') { if (meta_buffer = (char *) malloc (1 + strlen (line))) { strcpy (meta_buffer,line); /*--------------------------------------------------*\ | Find the keywords in the line. | \*--------------------------------------------------*/ if (s = strstr (meta_buffer,"Lat:")) pLat = s; if (s = strstr (meta_buffer,"Lon:")) pLon = s; if (s = strstr (meta_buffer,"USA:")) pUSA = s; if (s = strstr (meta_buffer,"CAN:")) pCAN = s; if (s = strstr (meta_buffer,"PubIndx:")) pPubIndx = s; /*--------------------------------------------------*\ | Terminate the keyword values, Adjust the keyword | | pointers, and remove leading and trailing space. | \*--------------------------------------------------*/ if (pLat) { s = pLat - 1; while (s > meta_buffer && isspace(*s)) *s-- = 0; *(pLat - 1) = 0; pLat += strlen ("Lat:"); while (*pLat && isspace (*pLat)) pLat++; } if (pLon) { s = pLon - 1; while (s > meta_buffer && isspace(*s)) *s-- = 0; *(pLon - 1) = 0; pLon += strlen ("Lon:"); while (*pLon && isspace (*pLon)) pLon++; } if (pUSA) { s = pUSA - 1; while (s > meta_buffer && isspace(*s)) *s-- = 0; pState = pUSA + strlen ("USA:"); *(pState - 1) = 0; while (*pState && isspace (*pState)) pState++; } if (pCAN) { s = pCAN - 1; while (s > meta_buffer && isspace(*s)) *s-- = 0; pProvince = pCAN + strlen ("CAN:"); *(pProvince - 1) = 0; while (*pProvince && isspace (*pProvince)) pProvince++; } if (pPubIndx) { s = pPubIndx - 1; while (s > meta_buffer && isspace(*s)) *s-- = 0; pPubIndx += strlen ("PubIndx:"); while (*pPubIndx && isspace (*pPubIndx)) pPubIndx++; } /*--------------------------------------------------*\ | Warn the user if you didn't find any keywords. | \*--------------------------------------------------*/ if (pLat || pLon || pUSA || pState || pCAN || pProvince || pPubIndx) { } else { sprintf (message,"Warning: Expected sample meta data, got \"%s\" on line %d of meta file %s", line, line_count, p->meta.filespec ); warning (message); } } else { sprintf (message,"Error: could not allocate meta buffer for line %d of meta file %s", line_count, p->meta.filespec ); error_exit (message); } } /*----------------------------------------------------------*\ | The next line contains the sample id. | \*----------------------------------------------------------*/ if (*line == '#') { if (fgets (line,MAX_DATA_FILE_LINE_LENGTH,in) == NULL) { sprintf (message,"Error: unexpected end of file at line %d of %s", line_count, p->raw.filespec ); error_exit (message); } line_count++; s = line + strlen (line) - 1; if (*s == '\n') *s-- = 0; if (*s == '\r') *s-- = 0; } if (strlen (line) >= MAX_NAME_LENGTH) { memcpy (sample_id,line,MAX_NAME_LENGTH - 1); sample_id [MAX_NAME_LENGTH - 1] = 0; } else strcpy (sample_id,line); /*----------------------------------------------------------*\ | Find the master pointer for this sample using a linear | | search. The target is n such that p->sample[n].id is | | identical to sample_id. | \*----------------------------------------------------------*/ for (n=0; n < p->count; n++) if (strcmp (p->sample[n].id,sample_id) == 0) break; if (n >= p->count) { sprintf (message,"Warning: found meta data but no raw data for sample %s", sample_id ); warning (message); } /*----------------------------------------------------------*\ | Assign the meta data to p->sample[n] | \*----------------------------------------------------------*/ p->sample[n].meta_buffer = meta_buffer; if (!(p->sample[n].meta = (char **) malloc (p->meta.count * sizeof (char *)))) { sprintf (message,"Error: could not allocate meta pointer array for sample %s", p->sample[n].id ); error_exit (message); } p->sample[n].meta[0] = pLat; p->sample[n].meta[1] = pLon; if (pUSA) { p->sample[n].meta[2] = pUSA; p->sample[n].meta[3] = pState; } if (pCAN) { p->sample[n].meta[2] = pCAN; p->sample[n].meta[3] = pProvince; } p->sample[n].meta[4] = pPubIndx; /*----------------------------------------------------------*\ | For debugging only, output the meta values. | \*----------------------------------------------------------*/ if (verbose) { printf ("%3d",n); for (i=0; i < p->meta.count; i++) if (p->sample[n].meta[i]) printf ("\t%s",p->sample[n].meta[i]); else printf ("\t"); printf ("\n"); } /*----------------------------------------------------------*\ | Skip lines containing raw data | \*----------------------------------------------------------*/ for (i=0; i < raw_data_line_count; i++) { if (fgets (line,MAX_DATA_FILE_LINE_LENGTH,in) == NULL) { sprintf (message,"Error: unexpected end of file at line %d of %s", line_count, p->raw.filespec ); error_exit (message); } line_count++; /*\ s = line + strlen (line) - 1; if (*s == '\n') *s-- = 0; if (*s == '\r') *s-- = 0; \*/ } } fclose (in); } else if (*p->meta.filespec) { sprintf (message,"Warning: could not open meta data file \"%s\"",p->meta.filespec); warning (message); } } /*----------------------------------------------------------------------*\ \*----------------------------------------------------------------------*/ struct read_function_t { char *format; void (*raw)(struct data_base *p); void (*meta)(struct data_base *p); }; static struct read_function_t read_function[] = { {"tab", read_raw_data_from_tab_table, read_meta_data_from_tab_table }, {"specmap", read_raw_data_from_specmap_file, read_meta_data_from_specmap_file}, {"napd-ascii", read_raw_data_from_napd_table, read_meta_data_from_napd_table }, {NULL,NULL,NULL} }; static void read_raw_data (struct data_base *p) { int i; for (i=0; read_function[i].format; i++) if (stricmp (p->raw.format,read_function[i].format) == 0) { (*read_function[i].raw)(p); return; } if (read_function[i].format == NULL) read_raw_data_from_tab_table (p); } static void read_meta_data (struct data_base *p) { int i; for (i=0; read_function[i].format; i++) if (stricmp (p->meta.format,read_function[i].format) == 0) { (*read_function[i].meta)(p); return; } if (read_function[i].format == NULL) read_meta_data_from_tab_table (p); } void read_data_base (struct data_base *p) { read_raw_data (p); read_meta_data (p); read_rules (p); apply_rules (p); } /*----------------------------------------------------------------------*\ \*----------------------------------------------------------------------*/