Lumiera 0.pre.04
»edit your freedom«
Loading...
Searching...
No Matches
data.hpp
Go to the documentation of this file.
1/*
2 DATA.hpp - read and write a table with CSV data
3
4 Copyright (C)
5 2022, Hermann Vosseler <Ichthyostega@web.de>
6
7  **Lumiera** is free software; you can redistribute it and/or modify it
8  under the terms of the GNU General Public License as published by the
9  Free Software Foundation; either version 2 of the License, or (at your
10  option) any later version. See the file COPYING for further details.
11
12*/
13
14
78#ifndef LIB_STAT_DATA_H
79#define LIB_STAT_DATA_H
80
81
82#include "lib/error.hpp"
83#include "lib/nocopy.hpp"
84#include "lib/stat/csv.hpp"
85#include "lib/file.hpp"
86#include "lib/format-string.hpp"
87#include "lib/util.hpp"
88
89#include <type_traits>
90#include <utility>
91#include <fstream>
92#include <vector>
93#include <string>
94#include <limits>
95#include <deque>
96
97
98namespace lib {
99namespace stat{
100
101 namespace error = lumiera::error;
102
103 using std::move;
104 using std::tuple;
105 using std::vector;
106 using std::string;
107 using util::isnil;
108 using util::unConst;
109 using util::_Fmt;
110 using util::min;
111
112
113
114
120 template<typename VAL>
121 struct Column
123 {
124 string header;
125 vector<VAL> data;
126
127 using ValueType = VAL;
128
129
130 Column (string headerID)
131 : header{headerID}
132 , data{}
133 { }
134
135
136 VAL&
138 {
139 if (isnil (data))
140 throw error::State{"No rows in DataTable yet"};
141 return data.back();
142 }
143
144 operator VAL&()
145 {
146 return get();
147 }
148
149 operator VAL const&() const
150 {
151 return unConst(this)->get();
152 }
153
154 template<typename X>
155 VAL& operator= (X&& newVal)
156 {
157 return get() = std::forward<X> (newVal);
158 }
159 };
160
161
162
163
164
165 /******************************************************************************************/
184 template<class TAB>
186 : public TAB
188 {
189 fs::path filename_;
190
191 public:
192 DataTable(fs::path csvFile ="")
193 : filename_{fs::consolidated (csvFile)}
194 {
195 loadData();
196 }
197
198 DataTable (CSVData const& csv)
199 : filename_{}
200 {
201 appendFrom (csv);
202 }
203
204
205 /* === Data Access === */
206
207 static constexpr size_t columnCnt = std::tuple_size_v<decltype(std::declval<TAB>().allColumns())>;
208
209 bool
210 empty() const
211 {
212 return 0 == this->size();
213 }
214
215 size_t
216 size() const
217 {
218 if (0 == columnCnt) return 0;
219 size_t rowCnt = std::numeric_limits<size_t>::max();
221 [&](auto& col)
222 {
223 rowCnt = min (rowCnt, col.data.size());
224 }); // the smallest number of data points found in any column
225 return rowCnt;
226 }
227
228 CSVData
229 renderCSV() const
230 {
231 CSVData csv{{}};
232 csv.reserve (size()+1);
233 auto header = generateHeaderSpec();
234 using std::swap;
235 swap (csv[0], header);
236 for (uint i=0; i < size(); ++i)
237 csv.emplace_back (formatCSVRow(i));
238 return csv;
239 }
240
241
242
243 /* === Manipulation === */
244
245 void
247 {
249 [siz = size()+1]
250 (auto& col)
251 {
252 col.data.resize (siz);
253 });
254 }
255
256 void
258 {
259 if (empty())
260 newRow();
261 else
263 [](auto& col)
264 {
265 col.data.emplace_back (col.data.back());
266 });
267 }
268
269 void
271 {
272 if (not empty())
274 [](auto& col)
275 {
276 size_t siz = col.data.size();
277 col.data.resize (siz>0? siz-1 : 0);
278 });
279 }
280
281 void
282 reserve (size_t expectedCapacity)
283 {
285 [=](auto& col)
286 {
287 col.data.reserve(expectedCapacity);
288 });
289 }
290
291 void
293 {
295 [](auto& col)
296 {
297 col.data.clear();
298 });
299 }
300
301 void
302 appendFrom (CSVData const& csv)
303 {
304 if (isnil (csv)) return;
305 verifyHeaderSpec (csv[0]);
306 for (size_t row=1; row<csv.size(); ++row)
307 if (not isnil (csv[row]))
308 appendRowFromCSV (csv[row]);
309 }
310
311
312
314 void
315 save (size_t lineLimit =std::numeric_limits<size_t>::max()
316 ,bool backupOld =false)
317 {
318 if (filename_.empty())
319 throw error::Logic{"Unable to save DataFile without filename given."};
320
321 fs::path newFilename{filename_};
322 newFilename += ".tmp";
323
324 std::ofstream csvFile{newFilename, std::ios_base::out | std::ios_base::trunc};
325 if (not csvFile.good())
326 throw error::State{_Fmt{"Unable to create CSV output file %s"}
327 % newFilename};
328 saveData (csvFile, lineLimit);
329
330 if (backupOld)
331 {
332 fs::path oldFile{filename_};
333 oldFile += ".bak";
334 if (fs::exists (filename_))
335 fs::rename (filename_, oldFile);
336 }
337 fs::rename (newFilename, filename_);
339 } // lock onto absolute path
340
341
342 void
343 saveAs (fs::path newStorage
344 ,size_t lineLimit =std::numeric_limits<size_t>::max())
345 {
346 newStorage = fs::consolidated (newStorage);
347 if (fs::exists(newStorage))
348 throw error::Invalid{_Fmt{"Storing DataFile rejected: target %s exists already"}
349 % newStorage};
350 if (not (newStorage.parent_path().empty()
351 or fs::exists(newStorage.parent_path())))
352 throw error::Invalid{_Fmt{"DataFile(%s) placed into nonexistent directory %s"}
353 % newStorage.filename() % newStorage.parent_path()};
354 filename_ = newStorage;
355 save (lineLimit);
356 }
357
358
359 private: /* === Implementation === */
360
362 template<class OP>
363 void
364 forAllColumns (OP&& doIt) const
365 {
366 lib::meta::forEach (unConst(this)->allColumns()
367 ,std::forward<OP> (doIt));
368 }
369
370 void
372 {
373 if (not (filename_.parent_path().empty()
374 or fs::exists(filename_.parent_path())))
375 throw error::Invalid{_Fmt{"DataFile(%s) placed into nonexistent directory %s"}
376 % filename_.filename() % filename_.parent_path()};
377 if (not fs::exists(filename_))
378 return; // leave the table empty
379
380 std::ifstream csvFile{filename_};
381 if (not csvFile.good())
382 throw error::Config{_Fmt{"unable to read CSV data file %s"} % filename_};
383
384 std::deque<string> rawLines;
385 for (string line; std::getline(csvFile, line); )
386 rawLines.emplace_back (move(line));
387
388 if (rawLines.size() < 1) return;
389 verifyHeaderSpec (rawLines[0]);
390
391 // we know the number of rows now...
392 reserve (rawLines.size() - 1);
393
394 // storage in file is backwards, with newest data on top
395 for (size_t row = rawLines.size()-1; 0<row; --row)
396 if (not isnil(rawLines[row]))
397 appendRowFromCSV (rawLines[row]);
398 }
399
400
401 void
402 saveData (std::ofstream& csvFile, size_t lineLimit)
403 {
404 csvFile << generateHeaderSpec() << "\n";
405 if (empty())
406 return;
407 lineLimit = size() > lineLimit? size()-lineLimit : 0;
408 // store newest data first, possibly discard old data
409 for (size_t row = size(); lineLimit < row; --row)
410 csvFile << formatCSVRow(row-1) << "\n";
411 }
412
413
414 void
415 verifyHeaderSpec (string headerLine)
416 {
417 CsvParser header{headerLine};
419 [&](auto& col)
420 {
421 if (*header != col.header)
422 throw error::Invalid{_Fmt{"Header mismatch in CSV file %s. "
423 "Expecting column(%s) but found \"%s\""}
424 % filename_ % col.header % *header};
425 ++header;
426 });
427 }
428
429 CSVLine
431 {
432 CSVLine csv;
433 forAllColumns(
434 [&](auto& col)
435 {
436 csv += col.header;
437 });
438 return csv;
439 }
440
441
442 void
443 appendRowFromCSV (string line)
444 {
445 newRow();
446 CsvParser csv(line);
447 forAllColumns(
448 [&](auto& col)
449 {
450 if (not csv)
451 {
452 if (csv.isParseFail())
453 csv.fail();
454 else
455 throw error::Invalid{_Fmt{"Insufficient data; only %d fields, %d expected. Line:%s"}
456 % csv.getParsedFieldCnt() % columnCnt % line};
457 }
458
459 using Value = std::remove_reference<decltype(col)>::type::ValueType;
460 col.get() = parseAs<Value>(*csv);
461 ++csv;
462 });
463 if (csv)
464 throw error::Invalid{_Fmt{"Excess data fields in CSV. Expect %d fields. Line:%s"}
465 % columnCnt % line};
466 }
467
468
469 CSVLine
470 formatCSVRow (size_t rownum) const
471 {
472 if (this->empty())
473 throw error::Logic{"Attempt to access data from empty DataTable."};
474 if (rownum >= this->size())
475 throw error::Logic{_Fmt{"Attempt to access row #%d beyond range [0..%d]."}
476 % rownum % (size()-1)};
477
478 CSVLine csvLine;
479 forAllColumns(
480 [&](auto& col)
481 {
482 csvLine += col.data.at(rownum);
483 });
484 return csvLine;
485 }
486 };
487
488}} // namespace lib::stat
489#endif /*LIB_STAT_DATA_H*/
Parser to split one line of CSV data into fields.
Definition csv.hpp:243
Table with data values, stored persistently as CSV file.
Definition data.hpp:188
DataTable(CSVData const &csv)
Definition data.hpp:198
size_t size() const
Definition data.hpp:216
DataTable(fs::path csvFile="")
Definition data.hpp:192
void save(size_t lineLimit=std::numeric_limits< size_t >::max(), bool backupOld=false)
Definition data.hpp:315
CSVData renderCSV() const
Definition data.hpp:229
bool empty() const
Definition data.hpp:210
void verifyHeaderSpec(string headerLine)
Definition data.hpp:415
void appendFrom(CSVData const &csv)
Definition data.hpp:302
void saveAs(fs::path newStorage, size_t lineLimit=std::numeric_limits< size_t >::max())
Definition data.hpp:343
void reserve(size_t expectedCapacity)
Definition data.hpp:282
void saveData(std::ofstream &csvFile, size_t lineLimit)
Definition data.hpp:402
void forAllColumns(OP &&doIt) const
apply a generic Lambda to all columns
Definition data.hpp:364
static constexpr size_t columnCnt
Definition data.hpp:207
void appendRowFromCSV(string line)
Definition data.hpp:443
fs::path filename_
Definition data.hpp:189
CSVLine generateHeaderSpec() const
Definition data.hpp:430
CSVLine formatCSVRow(size_t rownum) const
Definition data.hpp:470
Derived specific exceptions within Lumiera's exception hierarchy.
Definition error.hpp:193
Types marked with this mix-in may be moved but not copied.
Definition nocopy.hpp:50
A front-end for using printf-style formatting.
Encoding and decoding of data into CSV format.
Lumiera error handling (C++ interface).
Includes the C++ Filesystem library and provides some convenience helpers.
Front-end for printf-style string template interpolation.
unsigned int uint
Definition integral.hpp:29
constexpr void forEach(TUP &&tuple, FUN fun)
Tuple iteration: perform some arbitrary operation on each element of a tuple.
Implementation namespace for support and library code.
LumieraError< LERR_(STATE)> State
Definition error.hpp:209
LumieraError< LERR_(LOGIC)> Logic
Definition error.hpp:207
LumieraError< LERR_(CONFIG), Invalid > Config
Definition error.hpp:212
fs::path consolidated(fs::path rawPath)
resolves symlinks, ~ (Unix home dir) and relative specs
Definition file.hpp:61
OBJ * unConst(const OBJ *)
shortcut to save some typing when having to define const and non-const variants of member functions
Definition util.hpp:358
auto min(IT &&elms)
bool isnil(lib::time::Duration const &dur)
Mix-Ins to allow or prohibit various degrees of copying and cloning.
Wrapper to simplify notation in tests.
Definition csv.hpp:142
A string with the ability to construct or append the CSV-rendering of data fields.
Definition csv.hpp:107
Descriptor and Accessor for a data column within a DataTable table.
Definition data.hpp:123
Column(string headerID)
Definition data.hpp:130
vector< VAL > data
Definition data.hpp:125
VAL & operator=(X &&newVal)
Definition data.hpp:155
Tiny helper functions and shortcuts to be used everywhere Consider this header to be effectively incl...