csv.cpp

Go to the documentation of this file.
00001 /*
00002  * csv.cpp
00003  *
00004  * Copyright (C) 2007  Thomas A. Vaughan
00005  * All rights reserved.
00006  *
00007  *
00008  * Redistribution and use in source and binary forms, with or without
00009  * modification, are permitted provided that the following conditions are met:
00010  *     * Redistributions of source code must retain the above copyright
00011  *       notice, this list of conditions and the following disclaimer.
00012  *     * Redistributions in binary form must reproduce the above copyright
00013  *       notice, this list of conditions and the following disclaimer in the
00014  *       documentation and/or other materials provided with the distribution.
00015  *     * Neither the name of the <organization> nor the
00016  *       names of its contributors may be used to endorse or promote products
00017  *       derived from this software without specific prior written permission.
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY THOMAS A. VAUGHAN ''AS IS'' AND ANY
00020  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00021  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00022  * DISCLAIMED. IN NO EVENT SHALL THOMAS A. VAUGHAN BE LIABLE FOR ANY
00023  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
00024  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00025  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00026  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00027  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00028  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029  *
00030  *
00031  * Implementation of the CSV class (see csv.h)
00032  */
00033 
00034 // includes --------------------------------------------------------------------
00035 #include "csv.h"                        // always include our own header first!
00036 
00037 #include "common/wave_ex.h"
00038 
00039 
00040 
00041 ////////////////////////////////////////////////////////////////////////////////
00042 //
00043 //      static helper methods
00044 //
00045 ////////////////////////////////////////////////////////////////////////////////
00046 
00047 static bool
00048 getNextValue
00049 (
00050 IN std::istream& in,
00051 IN std::string& val,
00052 IN bool& eol
00053 )
00054 {
00055         ASSERT(in.good(), "bad?");
00056         val = "";
00057         eol = false;
00058 
00059         // keep reading until comma or end-of-line
00060         while (!in.eof()) {
00061                 char a;
00062                 in.read(&a, 1);
00063                 if ('\n' == a) {
00064                         eol = true;
00065                         return true;
00066                 } else if (',' == a) {
00067                         return true;
00068                 }
00069 
00070                 val += a;
00071         }
00072 
00073         // can only get here if file ends!
00074         return false;
00075 }
00076 
00077 
00078 
00079 ////////////////////////////////////////////////////////////////////////////////
00080 //
00081 //      public API
00082 //
00083 ////////////////////////////////////////////////////////////////////////////////
00084 
00085 void
00086 parseCsvStream
00087 (
00088 IN std::istream& in,
00089 IN const VecString& columns,
00090 IN csv_callback_fn callback,
00091 IN void * context
00092 )
00093 {
00094         ASSERT(in.good(), "bad?");
00095         ASSERT(columns.size(), "no columns requested?");
00096         ASSERT(callback, "null callback provided?");
00097         // ASSERT(context) -- we don't care if this is null
00098 
00099         // the mapOut array is the key to the whole algorithm: if the value of
00100         //  the ith element is a non-negative integer j, we need to map from
00101         //  the input column i to the output (col_data_t) at index j 
00102         static const int s_maxColumns           = 32;
00103         ASSERT((int) columns.size() <= s_maxColumns,
00104             "caller is looking for a lot of columns! (more than %d)",
00105             s_maxColumns);
00106         int mapOut[s_maxColumns];
00107         int N = columns.size();
00108 
00109         // set up output
00110         vec_col_t data;
00111         data.resize(N);
00112         for (int i = 0; i < N; ++i) {
00113                 col_data_t& cd = data[i];
00114                 cd.col_name = NULL;
00115                 cd.value = NULL;
00116         }
00117 
00118         // run through header and construct mapOut
00119         bool eol = false;       // not yet at end-of-line
00120         int idx = 0;
00121         for (; !eol; ++idx) {
00122 
00123                 // initialize
00124                 mapOut[idx] = -1;
00125 
00126                 // read column header
00127                 std::string val;
00128                 if (!getNextValue(in, val, eol)) {
00129                         WAVE_EX(wex);
00130                         wex << "Premature end of stream while parsing header";
00131                 }
00132                 // DPRINTF("Read column header: '%s'", val.c_str());
00133 
00134                 // does this match an input?
00135                 for (int i = 0; i < N; ++i) {
00136                         const char * col = columns[i].c_str();
00137                         if (!strcmp(col, val.c_str())) {
00138                                 // match!
00139                                 // DPRINTF("Input column '%s' at index %d found",
00140                                 //     col, i);
00141                                 mapOut[idx] = i;
00142                                 col_data_t& cd = data[i];
00143                                 cd.col_name = col;
00144                                 break;
00145                         }
00146                 }
00147         }
00148         int nCols = idx;
00149         // DPRINTF("Read %d columns total", nCols);
00150         ASSERT(nCols > 0, "no columns?");
00151         ASSERT(nCols >= N, "read fewer columns (%d) than requested (%d)?",
00152             nCols, N);
00153 
00154         // verify all requested columns were found
00155         for (int i = 0; i < N; ++i) {
00156                 col_data_t& cd = data[i];
00157                 if (!cd.col_name) {
00158                         WAVE_EX(wex);
00159                         wex << "Unable to find index " << i << ": '";
00160                         wex << columns[i] << "'";
00161                 }
00162         }
00163 
00164         // okay, have our output mapping.  Now parse all the rows
00165         VecString vals;
00166         vals.resize(N);
00167         std::string val;
00168         while (!in.eof()) {
00169                 // read entire row
00170                 for (int idx = 0; idx < nCols; ++idx) {
00171                         if (!getNextValue(in, val, eol)) {
00172                                 if (0 == idx) {
00173                                         // assume end of file!
00174                                         break;
00175                                 }
00176                                 else if (idx < nCols - 1) {
00177                                         WAVE_EX(wex);
00178                                         wex << "Premature end of stream while ";
00179                                         wex << "parsing data columns";
00180                                 }
00181                         }
00182                         if (eol && idx < nCols - 1) {
00183                                 WAVE_EX(wex);
00184                                 wex << "Row had only " << (idx + 1);
00185                                 wex << " columns?   Need " << nCols;
00186                         }
00187 
00188                         int j = mapOut[idx];
00189                         if (j < 0)
00190                                 continue;
00191 
00192                         vals[j] = val;
00193                         col_data_t& cd = data[j];
00194                         cd.value = vals[j].c_str();
00195                 }
00196 
00197                 // tell client about this row
00198                 callback(context, data);
00199         }
00200 }
00201