Lumiera  0.pre.03
»edit your freedom«
csv.hpp
Go to the documentation of this file.
1 /*
2  CSV.hpp - Parser and Encoder for CSV data
3 
4  Copyright (C)
5  2022, Hermann Vosseler <Ichthyostega@web.de>
6 
7   **Lumiera** is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by the
9   Free Software Foundation; either version 2 of the License, or (at your
10   option) any later version. See the file COPYING for further details.
11 
12 */
13 
14 
38 #ifndef LIB_STAT_CSV_H
39 #define LIB_STAT_CSV_H
40 
41 #include "lib/error.hpp"
42 #include "lib/null-value.hpp"
44 #include "lib/format-string.hpp"
45 #include "lib/regex.hpp"
46 
47 #include <limits>
48 #include <string>
49 #include <vector>
50 
51 namespace lib {
52 namespace stat {
53 
54  namespace error = lumiera::error;
55 
56  using util::_Fmt;
57  using util::toString;
58  using std::string;
59  using std::regex;
60 
61 
62  namespace { // Implementation details...
63 
64  const string MATCH_SINGLE_TOKEN { R"~(([^,;"\s]*)\s*)~"};
65  const string MATCH_QUOTED_TOKEN { R"~("([^"]*)"\s*)~" };
66  const string MATCH_DELIMITER { R"~((?:^|,|;)\s*)~" };
67 
68  const regex FIND_DELIMITER_TOKEN{"[,;]"};
69 
70  const regex ACCEPT_FIELD{ MATCH_DELIMITER + "(?:"+ MATCH_QUOTED_TOKEN +"|"+ MATCH_SINGLE_TOKEN +")"
71  , regex::optimize};
72 
73  template<typename VAL>
74  inline string
75  format4Csv (VAL const& val)
76  {
77  if constexpr (std::is_floating_point_v<VAL>)
78  return util::showDecimal (val);
79  // standard textual rendering
80  auto res = util::toString (val);
81  if constexpr (std::is_arithmetic_v<VAL>)
82  return res; // includes bool
83  else
84  return '"'+res+'"';
85  }
86  }//(End)Implementation
87 
88 
92  template<typename VAL>
93  inline void
94  appendCsvField (string& csv, VAL const& val)
95  {
96  csv += (0 == csv.length()? "":",")
97  + format4Csv(val);
98  }
99 
100 
105  struct CSVLine
106  : std::string
107  {
108  using value_type = string;
109 
110  template<typename...ELMS, typename = meta::disable_if_self<CSVLine,ELMS...>>
111  CSVLine (ELMS&& ...items)
112  {
113  meta::forEach (std::make_tuple (items...)
114  ,[this](auto const& it){ *this += it; }
115  );
116  }
117  // Standard copy acceptable
118 
119 
120  template<typename X>
121  CSVLine&
122  operator+= (X const& x)
123  {
124  stat::appendCsvField (*this, x);
125  return *this;
126  }
127  };
128 
140  struct CSVData
141  : std::vector<CSVLine>
142  {
143  using VecCSV = std::vector<CSVLine>;
144 
145  CSVData (std::initializer_list<string> lines)
146  : VecCSV(detectHeader(lines))
147  { }
148 
149  CSVData (std::initializer_list<string> header
150  ,std::initializer_list<CSVLine> data)
151  {
152  reserve (data.size()+1);
153  appendHeaderLine(*this, header);
154  for (CSVLine const& line : data)
155  emplace_back (line);
156  }
157 
158  // standard copy operations acceptable
159 
160 
161  operator string() const
162  {
163  std::ostringstream buffer;
164  for (string const& line : *this)
165  buffer << line << '\n';
166  return buffer.str();
167  }
168 
169 
170  private:
171  static bool
172  containsCSV (string const& line)
173  {
174  return std::regex_search (line, FIND_DELIMITER_TOKEN);
175  }
176 
177  static void
178  appendHeaderLine (VecCSV& data, std::initializer_list<string> const& input)
179  {
180  CSVLine header;
181  for (string const& s : input)
182  header += s;
183  data.emplace_back (move(header));
184  }
185 
186  static VecCSV
187  detectHeader (std::initializer_list<string> input)
188  {
189  VecCSV csv;
190  if (input.size() > 0 and containsCSV(*input.begin()))
191  {// the first line is a header => slurp in all as lines
192  csv.reserve (input.size());
193  for (string const& s : input)
194  csv.emplace_back (s);
195  }
196  else // combine all strings into a single header line
197  appendHeaderLine (csv, input);
198  return csv;
199  }
200  };
201 
202 
203 
205  template<typename TAR>
206  inline TAR
207  parseAs (string const& encodedVal)
208  {
209  std::istringstream converter{encodedVal};
210  TAR value;
211  converter >> value;
212  if (converter.fail())
213  throw error::Invalid{_Fmt{"unable to parse \"%s\""} % encodedVal};
214  return value;
215  }
216 
217  template<>
218  inline bool
219  parseAs (string const& encodedBool)
220  {
221  return util::boolVal(encodedBool);
222  }
223  template<>
224  inline string
225  parseAs (string const& string)
226  {
227  return string; // pass-through (even if empty)
228  }
229 
230 
231 
232 
241  class CsvParser
242  : public util::RegexSearchIter
243  {
244  string const& line_{};
245  size_t field_{0};
246  size_t pos_{0};
247 
248  util::RegexSearchIter const& curr() const { return *this; }
249  util::RegexSearchIter end() const { return util::RegexSearchIter{}; }
250 
251  public:
252  CsvParser()
253  : line_{lib::NullValue<string>::get()}
254  { }
255 
256  CsvParser (string& line) // NOTE: string and reg-exp must exist elsewhere
257  : RegexSearchIter(line, ACCEPT_FIELD)
258  , line_{line}
259  { }
260 
261  explicit operator bool() const
262  {
263  return isValid();
264  }
265 
267 
268 
269  string operator*() const
270  {
271  if (not isValid()) fail();
272  auto& mat = *curr();
273  return mat[2].matched? mat[2]
274  : mat[1];
275  }
276 
277  void
278  operator++()
279  {
280  if (not isValid())
281  fail();
282  pos_ = curr()->position() + curr()->length();
283  util::RegexSearchIter::operator ++();
284  if (pos_ < line_.length() and not isValid())
285  fail ();
286  ++field_;
287  }
288 
289  size_t
290  getParsedFieldCnt()
291  {
292  return field_;
293  }
294 
295  bool
296  isValid() const
297  {
298  return curr() != end()
299  and pos_ == size_t(curr()->position())
300  and not curr()->empty();
301  }
302 
303  bool
304  isParseFail() const
305  {
306  return curr() != end()
307  and not isValid();
308  }
309 
310  void
311  fail() const
312  {
313  if (curr() == end())
314  if (pos_ >= line_.length())
315  throw error::Invalid{_Fmt{"Only %d data fields. Line:%s"}
316  % field_ % line_};
317  else
318  throw error::Invalid{_Fmt{"Garbage after last field. Line:%s|↯|%s"}
319  % line_.substr(0,pos_) % line_.substr(pos_)};
320  else
321  if (pos_ != size_t(curr()->position()))
322  throw error::Invalid{_Fmt{"Garbage before field(%d):%s|↯|%s"}
323  % (field_+1)
324  % line_.substr(0,pos_) % line_.substr(pos_)};
325 
326  throw error::Invalid{_Fmt{"CSV parse floundered. Line:%s"} % line_};
327  }
328  };
329 
330 }} // namespace lib::stat
331 #endif /*LIB_STAT_CSV_H*/
Wrapper to simplify notation in tests.
Definition: csv.hpp:140
void appendCsvField(string &csv, VAL const &val)
Format and append a data value to a CSV string representation.
Definition: csv.hpp:94
A string with the ability to construct or append the CSV-rendering of data fields.
Definition: csv.hpp:105
Front-end for printf-style string template interpolation.
Singleton holder for NIL or default value objects.
Definition: null-value.hpp:62
#define ENABLE_USE_IN_STD_RANGE_FOR_LOOPS(ITER)
use a given Lumiera Forward Iterator in standard "range for loops"
Singleton-style holder for NIL or default values.
A front-end for using printf-style formatting.
Implementation namespace for support and library code.
Metaprogramming with tuples-of-types and the std::tuple record.
TAR parseAs(string const &encodedVal)
parse string representation into typed value
Definition: csv.hpp:207
Derived specific exceptions within Lumiera&#39;s exception hierarchy.
Definition: error.hpp:190
std::string toString(TY const &val) noexcept
get some string representation of any object, reliably.
Definition: format-obj.hpp:191
Lumiera error handling (C++ interface).
wrapped regex iterator to allow usage in foreach loops
Definition: regex.hpp:40
Convenience wrappers and helpers for dealing with regular expressions.
Parser to split one line of CSV data into fields.
Definition: csv.hpp:241
disable_if< std::is_same< std::remove_cv_t< std::remove_reference_t< extractFirst_t< ARGS... > >>, SELF > > disable_if_self
helper to prevent a template constructor from shadowing inherited copy ctors
Definition: meta/util.hpp:151