Lumiera  0.pre.03
»edit your freedom«
data.hpp
Go to the documentation of this file.
1 /*
2  DATA.hpp - read and write a table with CSV data
3 
4  Copyright (C) Lumiera.org
5  2022, Hermann Vosseler <Ichthyostega@web.de>
6 
7  This program is free software; you can redistribute it and/or
8  modify it under the terms of the GNU General Public License as
9  published by the Free Software Foundation; either version 2 of
10  the License, or (at your option) any later version.
11 
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with this program; if not, write to the Free Software
19  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 
21 */
22 
23 
87 #ifndef LIB_STAT_DATA_H
88 #define LIB_STAT_DATA_H
89 
90 
91 #include "lib/error.hpp"
92 #include "lib/nocopy.hpp"
93 #include "lib/stat/csv.hpp"
94 #include "lib/stat/file.hpp"
95 #include "lib/format-string.hpp"
96 #include "lib/util.hpp"
97 
98 #include <type_traits>
99 #include <utility>
100 #include <fstream>
101 #include <vector>
102 #include <string>
103 #include <limits>
104 #include <deque>
105 
106 
107 namespace lib {
108 namespace stat{
109 
110  namespace error = lumiera::error;
111 
112  using std::move;
113  using std::tuple;
114  using std::vector;
115  using std::string;
116  using util::isnil;
117  using util::unConst;
118  using util::_Fmt;
119  using util::min;
120 
121 
122 
123 
129  template<typename VAL>
130  struct Column
132  {
133  string header;
134  vector<VAL> data;
135 
136  using ValueType = VAL;
137 
138 
139  Column (string headerID)
140  : header{headerID}
141  , data{}
142  { }
143 
144 
145  VAL&
146  get()
147  {
148  if (isnil (data))
149  throw error::State{"No rows in DataTable yet"};
150  return data.back();
151  }
152 
153  operator VAL&()
154  {
155  return get();
156  }
157 
158  operator VAL const&() const
159  {
160  return unConst(this)->get();
161  }
162 
163  template<typename X>
164  VAL& operator= (X&& newVal)
165  {
166  return get() = std::forward<X> (newVal);
167  }
168  };
169 
170 
171 
172 
173 
174  /******************************************************************************************/
193  template<class TAB>
194  class DataTable
195  : public TAB
197  {
198  fs::path filename_;
199 
200  public:
201  DataTable(fs::path csvFile ="")
202  : filename_{fs::consolidated (csvFile)}
203  {
204  loadData();
205  }
206 
207  DataTable (CSVData const& csv)
208  : filename_{}
209  {
210  appendFrom (csv);
211  }
212 
213 
214  /* === Data Access === */
215 
216  static constexpr size_t columnCnt = std::tuple_size_v<decltype(std::declval<TAB>().allColumns())>;
217 
218  bool
219  empty() const
220  {
221  return 0 == this->size();
222  }
223 
224  size_t
225  size() const
226  {
227  if (0 == columnCnt) return 0;
228  size_t rowCnt = std::numeric_limits<size_t>::max();
229  forAllColumns(
230  [&](auto& col)
231  {
232  rowCnt = min (rowCnt, col.data.size());
233  }); // the smallest number of data points found in any column
234  return rowCnt;
235  }
236 
237  CSVData
238  renderCSV() const
239  {
240  CSVData csv{{}};
241  csv.reserve (size()+1);
242  auto header = generateHeaderSpec();
243  std::swap (csv[0], header);
244  for (uint i=0; i < size(); ++i)
245  csv.emplace_back (formatCSVRow(i));
246  return csv;
247  }
248 
249 
250 
251  /* === Manipulation === */
252 
253  void
254  newRow()
255  {
256  forAllColumns(
257  [siz = size()+1]
258  (auto& col)
259  {
260  col.data.resize (siz);
261  });
262  }
263 
264  void
265  dupRow()
266  {
267  if (empty())
268  newRow();
269  else
270  forAllColumns(
271  [](auto& col)
272  {
273  col.data.emplace_back (col.data.back());
274  });
275  }
276 
277  void
278  dropLastRow()
279  {
280  if (not empty())
281  forAllColumns(
282  [](auto& col)
283  {
284  size_t siz = col.data.size();
285  col.data.resize (siz>0? siz-1 : 0);
286  });
287  }
288 
289  void
290  reserve (size_t expectedCapacity)
291  {
292  forAllColumns(
293  [=](auto& col)
294  {
295  col.data.reserve(expectedCapacity);
296  });
297  }
298 
299  void
300  clear()
301  {
302  forAllColumns(
303  [](auto& col)
304  {
305  col.data.clear();
306  });
307  }
308 
309  void
310  appendFrom (CSVData const& csv)
311  {
312  if (isnil (csv)) return;
313  verifyHeaderSpec (csv[0]);
314  for (size_t row=1; row<csv.size(); ++row)
315  if (not isnil (csv[row]))
316  appendRowFromCSV (csv[row]);
317  }
318 
319 
320 
322  void
323  save (size_t lineLimit =std::numeric_limits<size_t>::max()
324  ,bool backupOld =false)
325  {
326  if (filename_.empty())
327  throw error::Logic{"Unable to save DataFile without filename given."};
328 
329  fs::path newFilename{filename_};
330  newFilename += ".tmp";
331 
332  std::ofstream csvFile{newFilename, std::ios_base::out | std::ios_base::trunc};
333  if (not csvFile.good())
334  throw error::State{_Fmt{"Unable to create CSV output file %s"}
335  % newFilename};
336  saveData (csvFile, lineLimit);
337 
338  if (backupOld)
339  {
340  fs::path oldFile{filename_};
341  oldFile += ".bak";
342  if (fs::exists (filename_))
343  fs::rename (filename_, oldFile);
344  }
345  fs::rename (newFilename, filename_);
346  filename_ = fs::consolidated(filename_);
347  } // lock onto absolute path
348 
349 
350  void
351  saveAs (fs::path newStorage
352  ,size_t lineLimit =std::numeric_limits<size_t>::max())
353  {
354  newStorage = fs::consolidated (newStorage);
355  if (fs::exists(newStorage))
356  throw error::Invalid{_Fmt{"Storing DataFile rejected: target %s exists already"}
357  % newStorage};
358  if (not (newStorage.parent_path().empty()
359  or fs::exists(newStorage.parent_path())))
360  throw error::Invalid{_Fmt{"DataFile(%s) placed into nonexistent directory %s"}
361  % newStorage.filename() % newStorage.parent_path()};
362  filename_ = newStorage;
363  save (lineLimit);
364  }
365 
366 
367  private: /* === Implementation === */
368 
370  template<class OP>
371  void
372  forAllColumns (OP&& doIt) const
373  {
374  lib::meta::forEach (unConst(this)->allColumns()
375  ,std::forward<OP> (doIt));
376  }
377 
378  void
379  loadData()
380  {
381  if (not (filename_.parent_path().empty()
382  or fs::exists(filename_.parent_path())))
383  throw error::Invalid{_Fmt{"DataFile(%s) placed into nonexistent directory %s"}
384  % filename_.filename() % filename_.parent_path()};
385  if (not fs::exists(filename_))
386  return; // leave the table empty
387 
388  std::ifstream csvFile{filename_};
389  if (not csvFile.good())
390  throw error::Config{_Fmt{"unable to read CSV data file %s"} % filename_};
391 
392  std::deque<string> rawLines;
393  for (string line; std::getline(csvFile, line); )
394  rawLines.emplace_back (move(line));
395 
396  if (rawLines.size() < 1) return;
397  verifyHeaderSpec (rawLines[0]);
398 
399  // we know the number of rows now...
400  reserve (rawLines.size() - 1);
401 
402  // storage in file is backwards, with newest data on top
403  for (size_t row = rawLines.size()-1; 0<row; --row)
404  if (not isnil(rawLines[row]))
405  appendRowFromCSV (rawLines[row]);
406  }
407 
408 
409  void
410  saveData (std::ofstream& csvFile, size_t lineLimit)
411  {
412  csvFile << generateHeaderSpec() << "\n";
413  if (empty())
414  return;
415  lineLimit = size() > lineLimit? size()-lineLimit : 0;
416  // store newest data first, possibly discard old data
417  for (size_t row = size(); lineLimit < row; --row)
418  csvFile << formatCSVRow(row-1) << "\n";
419  }
420 
421 
422  void
423  verifyHeaderSpec (string headerLine)
424  {
425  CsvParser header{headerLine};
426  forAllColumns(
427  [&](auto& col)
428  {
429  if (*header != col.header)
430  throw error::Invalid{_Fmt{"Header mismatch in CSV file %s. "
431  "Expecting column(%s) but found \"%s\""}
432  % filename_ % col.header % *header};
433  ++header;
434  });
435  }
436 
437  CSVLine
438  generateHeaderSpec() const
439  {
440  CSVLine csv;
441  forAllColumns(
442  [&](auto& col)
443  {
444  csv += col.header;
445  });
446  return csv;
447  }
448 
449 
450  void
451  appendRowFromCSV (string line)
452  {
453  newRow();
454  CsvParser csv(line);
455  forAllColumns(
456  [&](auto& col)
457  {
458  if (not csv)
459  {
460  if (csv.isParseFail())
461  csv.fail();
462  else
463  throw error::Invalid{_Fmt{"Insufficient data; only %d fields, %d expected. Line:%s"}
464  % csv.getParsedFieldCnt() % columnCnt % line};
465  }
466 
467  using Value = typename std::remove_reference<decltype(col)>::type::ValueType;
468  col.get() = parseAs<Value>(*csv);
469  ++csv;
470  });
471  if (csv)
472  throw error::Invalid{_Fmt{"Excess data fields in CSV. Expect %d fields. Line:%s"}
473  % columnCnt % line};
474  }
475 
476 
477  CSVLine
478  formatCSVRow (size_t rownum) const
479  {
480  if (this->empty())
481  throw error::Logic{"Attempt to access data from empty DataTable."};
482  if (rownum >= this->size())
483  throw error::Logic{_Fmt{"Attempt to access row #%d beyond range [0..%d]."}
484  % rownum % (size()-1)};
485 
486  CSVLine csvLine;
487  forAllColumns(
488  [&](auto& col)
489  {
490  csvLine += col.data.at(rownum);
491  });
492  return csvLine;
493  }
494  };
495 
496 }} // namespace lib::stat
497 #endif /*LIB_STAT_DATA_H*/
Encoding and decoding of data into CSV format.
Wrapper to simplify notation in tests.
Definition: csv.hpp:149
Includes the C++ Filesystem library and provides some convenience helpers.
A string with the ability to construct or append the CSV-rendering of data fields.
Definition: csv.hpp:114
Types marked with this mix-in may be moved but not copied.
Definition: nocopy.hpp:58
Front-end for printf-style string template interpolation.
A front-end for using printf-style formatting.
Implementation namespace for support and library code.
Derived specific exceptions within Lumiera&#39;s exception hierarchy.
Definition: error.hpp:199
Mix-Ins to allow or prohibit various degrees of copying and cloning.
void save(size_t lineLimit=std::numeric_limits< size_t >::max(), bool backupOld=false)
Definition: data.hpp:323
Tiny helper functions and shortcuts to be used everywhere Consider this header to be effectively incl...
Table with data values, stored persistently as CSV file.
Definition: data.hpp:194
Descriptor and Accessor for a data column within a DataTable table.
Definition: data.hpp:130
Lumiera error handling (C++ interface).
Parser to split one line of CSV data into fields.
Definition: csv.hpp:250
void forAllColumns(OP &&doIt) const
apply a generic Lambda to all columns
Definition: data.hpp:372