Lumiera  0.pre.03
»edit your freedom«
data.hpp
Go to the documentation of this file.
1 /*
2  DATA.hpp - read and write a table with CSV data
3 
4  Copyright (C)
5  2022, Hermann Vosseler <Ichthyostega@web.de>
6 
7   **Lumiera** is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by the
9   Free Software Foundation; either version 2 of the License, or (at your
10   option) any later version. See the file COPYING for further details.
11 
12 */
13 
14 
78 #ifndef LIB_STAT_DATA_H
79 #define LIB_STAT_DATA_H
80 
81 
82 #include "lib/error.hpp"
83 #include "lib/nocopy.hpp"
84 #include "lib/stat/csv.hpp"
85 #include "lib/stat/file.hpp"
86 #include "lib/format-string.hpp"
87 #include "lib/util.hpp"
88 
89 #include <type_traits>
90 #include <utility>
91 #include <fstream>
92 #include <vector>
93 #include <string>
94 #include <limits>
95 #include <deque>
96 
97 
98 namespace lib {
99 namespace stat{
100 
101  namespace error = lumiera::error;
102 
103  using std::move;
104  using std::tuple;
105  using std::vector;
106  using std::string;
107  using util::isnil;
108  using util::unConst;
109  using util::_Fmt;
110  using util::min;
111 
112 
113 
114 
120  template<typename VAL>
121  struct Column
123  {
124  string header;
125  vector<VAL> data;
126 
127  using ValueType = VAL;
128 
129 
130  Column (string headerID)
131  : header{headerID}
132  , data{}
133  { }
134 
135 
136  VAL&
137  get()
138  {
139  if (isnil (data))
140  throw error::State{"No rows in DataTable yet"};
141  return data.back();
142  }
143 
144  operator VAL&()
145  {
146  return get();
147  }
148 
149  operator VAL const&() const
150  {
151  return unConst(this)->get();
152  }
153 
154  template<typename X>
155  VAL& operator= (X&& newVal)
156  {
157  return get() = std::forward<X> (newVal);
158  }
159  };
160 
161 
162 
163 
164 
165  /******************************************************************************************/
184  template<class TAB>
185  class DataTable
186  : public TAB
188  {
189  fs::path filename_;
190 
191  public:
192  DataTable(fs::path csvFile ="")
193  : filename_{fs::consolidated (csvFile)}
194  {
195  loadData();
196  }
197 
198  DataTable (CSVData const& csv)
199  : filename_{}
200  {
201  appendFrom (csv);
202  }
203 
204 
205  /* === Data Access === */
206 
207  static constexpr size_t columnCnt = std::tuple_size_v<decltype(std::declval<TAB>().allColumns())>;
208 
209  bool
210  empty() const
211  {
212  return 0 == this->size();
213  }
214 
215  size_t
216  size() const
217  {
218  if (0 == columnCnt) return 0;
219  size_t rowCnt = std::numeric_limits<size_t>::max();
220  forAllColumns(
221  [&](auto& col)
222  {
223  rowCnt = min (rowCnt, col.data.size());
224  }); // the smallest number of data points found in any column
225  return rowCnt;
226  }
227 
228  CSVData
229  renderCSV() const
230  {
231  CSVData csv{{}};
232  csv.reserve (size()+1);
233  auto header = generateHeaderSpec();
234  using std::swap;
235  swap (csv[0], header);
236  for (uint i=0; i < size(); ++i)
237  csv.emplace_back (formatCSVRow(i));
238  return csv;
239  }
240 
241 
242 
243  /* === Manipulation === */
244 
245  void
246  newRow()
247  {
248  forAllColumns(
249  [siz = size()+1]
250  (auto& col)
251  {
252  col.data.resize (siz);
253  });
254  }
255 
256  void
257  dupRow()
258  {
259  if (empty())
260  newRow();
261  else
262  forAllColumns(
263  [](auto& col)
264  {
265  col.data.emplace_back (col.data.back());
266  });
267  }
268 
269  void
270  dropLastRow()
271  {
272  if (not empty())
273  forAllColumns(
274  [](auto& col)
275  {
276  size_t siz = col.data.size();
277  col.data.resize (siz>0? siz-1 : 0);
278  });
279  }
280 
281  void
282  reserve (size_t expectedCapacity)
283  {
284  forAllColumns(
285  [=](auto& col)
286  {
287  col.data.reserve(expectedCapacity);
288  });
289  }
290 
291  void
292  clear()
293  {
294  forAllColumns(
295  [](auto& col)
296  {
297  col.data.clear();
298  });
299  }
300 
301  void
302  appendFrom (CSVData const& csv)
303  {
304  if (isnil (csv)) return;
305  verifyHeaderSpec (csv[0]);
306  for (size_t row=1; row<csv.size(); ++row)
307  if (not isnil (csv[row]))
308  appendRowFromCSV (csv[row]);
309  }
310 
311 
312 
314  void
315  save (size_t lineLimit =std::numeric_limits<size_t>::max()
316  ,bool backupOld =false)
317  {
318  if (filename_.empty())
319  throw error::Logic{"Unable to save DataFile without filename given."};
320 
321  fs::path newFilename{filename_};
322  newFilename += ".tmp";
323 
324  std::ofstream csvFile{newFilename, std::ios_base::out | std::ios_base::trunc};
325  if (not csvFile.good())
326  throw error::State{_Fmt{"Unable to create CSV output file %s"}
327  % newFilename};
328  saveData (csvFile, lineLimit);
329 
330  if (backupOld)
331  {
332  fs::path oldFile{filename_};
333  oldFile += ".bak";
334  if (fs::exists (filename_))
335  fs::rename (filename_, oldFile);
336  }
337  fs::rename (newFilename, filename_);
338  filename_ = fs::consolidated(filename_);
339  } // lock onto absolute path
340 
341 
342  void
343  saveAs (fs::path newStorage
344  ,size_t lineLimit =std::numeric_limits<size_t>::max())
345  {
346  newStorage = fs::consolidated (newStorage);
347  if (fs::exists(newStorage))
348  throw error::Invalid{_Fmt{"Storing DataFile rejected: target %s exists already"}
349  % newStorage};
350  if (not (newStorage.parent_path().empty()
351  or fs::exists(newStorage.parent_path())))
352  throw error::Invalid{_Fmt{"DataFile(%s) placed into nonexistent directory %s"}
353  % newStorage.filename() % newStorage.parent_path()};
354  filename_ = newStorage;
355  save (lineLimit);
356  }
357 
358 
359  private: /* === Implementation === */
360 
362  template<class OP>
363  void
364  forAllColumns (OP&& doIt) const
365  {
366  lib::meta::forEach (unConst(this)->allColumns()
367  ,std::forward<OP> (doIt));
368  }
369 
370  void
371  loadData()
372  {
373  if (not (filename_.parent_path().empty()
374  or fs::exists(filename_.parent_path())))
375  throw error::Invalid{_Fmt{"DataFile(%s) placed into nonexistent directory %s"}
376  % filename_.filename() % filename_.parent_path()};
377  if (not fs::exists(filename_))
378  return; // leave the table empty
379 
380  std::ifstream csvFile{filename_};
381  if (not csvFile.good())
382  throw error::Config{_Fmt{"unable to read CSV data file %s"} % filename_};
383 
384  std::deque<string> rawLines;
385  for (string line; std::getline(csvFile, line); )
386  rawLines.emplace_back (move(line));
387 
388  if (rawLines.size() < 1) return;
389  verifyHeaderSpec (rawLines[0]);
390 
391  // we know the number of rows now...
392  reserve (rawLines.size() - 1);
393 
394  // storage in file is backwards, with newest data on top
395  for (size_t row = rawLines.size()-1; 0<row; --row)
396  if (not isnil(rawLines[row]))
397  appendRowFromCSV (rawLines[row]);
398  }
399 
400 
401  void
402  saveData (std::ofstream& csvFile, size_t lineLimit)
403  {
404  csvFile << generateHeaderSpec() << "\n";
405  if (empty())
406  return;
407  lineLimit = size() > lineLimit? size()-lineLimit : 0;
408  // store newest data first, possibly discard old data
409  for (size_t row = size(); lineLimit < row; --row)
410  csvFile << formatCSVRow(row-1) << "\n";
411  }
412 
413 
414  void
415  verifyHeaderSpec (string headerLine)
416  {
417  CsvParser header{headerLine};
418  forAllColumns(
419  [&](auto& col)
420  {
421  if (*header != col.header)
422  throw error::Invalid{_Fmt{"Header mismatch in CSV file %s. "
423  "Expecting column(%s) but found \"%s\""}
424  % filename_ % col.header % *header};
425  ++header;
426  });
427  }
428 
429  CSVLine
430  generateHeaderSpec() const
431  {
432  CSVLine csv;
433  forAllColumns(
434  [&](auto& col)
435  {
436  csv += col.header;
437  });
438  return csv;
439  }
440 
441 
442  void
443  appendRowFromCSV (string line)
444  {
445  newRow();
446  CsvParser csv(line);
447  forAllColumns(
448  [&](auto& col)
449  {
450  if (not csv)
451  {
452  if (csv.isParseFail())
453  csv.fail();
454  else
455  throw error::Invalid{_Fmt{"Insufficient data; only %d fields, %d expected. Line:%s"}
456  % csv.getParsedFieldCnt() % columnCnt % line};
457  }
458 
459  using Value = typename std::remove_reference<decltype(col)>::type::ValueType;
460  col.get() = parseAs<Value>(*csv);
461  ++csv;
462  });
463  if (csv)
464  throw error::Invalid{_Fmt{"Excess data fields in CSV. Expect %d fields. Line:%s"}
465  % columnCnt % line};
466  }
467 
468 
469  CSVLine
470  formatCSVRow (size_t rownum) const
471  {
472  if (this->empty())
473  throw error::Logic{"Attempt to access data from empty DataTable."};
474  if (rownum >= this->size())
475  throw error::Logic{_Fmt{"Attempt to access row #%d beyond range [0..%d]."}
476  % rownum % (size()-1)};
477 
478  CSVLine csvLine;
479  forAllColumns(
480  [&](auto& col)
481  {
482  csvLine += col.data.at(rownum);
483  });
484  return csvLine;
485  }
486  };
487 
488 }} // namespace lib::stat
489 #endif /*LIB_STAT_DATA_H*/
Encoding and decoding of data into CSV format.
Wrapper to simplify notation in tests.
Definition: csv.hpp:140
Includes the C++ Filesystem library and provides some convenience helpers.
A string with the ability to construct or append the CSV-rendering of data fields.
Definition: csv.hpp:105
Types marked with this mix-in may be moved but not copied.
Definition: nocopy.hpp:49
Front-end for printf-style string template interpolation.
A front-end for using printf-style formatting.
Implementation namespace for support and library code.
Derived specific exceptions within Lumiera&#39;s exception hierarchy.
Definition: error.hpp:190
Mix-Ins to allow or prohibit various degrees of copying and cloning.
void save(size_t lineLimit=std::numeric_limits< size_t >::max(), bool backupOld=false)
Definition: data.hpp:315
Tiny helper functions and shortcuts to be used everywhere Consider this header to be effectively incl...
Table with data values, stored persistently as CSV file.
Definition: data.hpp:185
Descriptor and Accessor for a data column within a DataTable table.
Definition: data.hpp:121
Lumiera error handling (C++ interface).
Parser to split one line of CSV data into fields.
Definition: csv.hpp:241
void forAllColumns(OP &&doIt) const
apply a generic Lambda to all columns
Definition: data.hpp:364