Lumiera  0.pre.03
»edit your freedom«
csv.hpp
Go to the documentation of this file.
1 /*
2  CSV.hpp - Parser and Encoder for CSV data
3 
4  Copyright (C) Lumiera.org
5  2022, Hermann Vosseler <Ichthyostega@web.de>
6 
7  This program is free software; you can redistribute it and/or
8  modify it under the terms of the GNU General Public License as
9  published by the Free Software Foundation; either version 2 of
10  the License, or (at your option) any later version.
11 
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with this program; if not, write to the Free Software
19  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 
21 */
22 
23 
47 #ifndef LIB_STAT_CSV_H
48 #define LIB_STAT_CSV_H
49 
50 #include "lib/error.hpp"
51 #include "lib/null-value.hpp"
53 #include "lib/format-string.hpp"
54 #include "lib/regex.hpp"
55 
56 #include <limits>
57 #include <string>
58 #include <vector>
59 
60 namespace lib {
61 namespace stat {
62 
63  namespace error = lumiera::error;
64 
65  using util::_Fmt;
66  using util::toString;
67  using std::string;
68  using std::regex;
69 
70 
71  namespace { // Implementation details...
72 
73  const string MATCH_SINGLE_TOKEN { R"~(([^,;"\s]*)\s*)~"};
74  const string MATCH_QUOTED_TOKEN { R"~("([^"]*)"\s*)~" };
75  const string MATCH_DELIMITER { R"~((?:^|,|;)\s*)~" };
76 
77  const regex FIND_DELIMITER_TOKEN{"[,;]"};
78 
79  const regex ACCEPT_FIELD{ MATCH_DELIMITER + "(?:"+ MATCH_QUOTED_TOKEN +"|"+ MATCH_SINGLE_TOKEN +")"
80  , regex::optimize};
81 
82  template<typename VAL>
83  inline string
84  format4Csv (VAL const& val)
85  {
86  if constexpr (std::is_floating_point_v<VAL>)
87  return util::showDecimal (val);
88  // standard textual rendering
89  auto res = util::toString (val);
90  if constexpr (std::is_arithmetic_v<VAL>)
91  return res; // includes bool
92  else
93  return '"'+res+'"';
94  }
95  }//(End)Implementation
96 
97 
101  template<typename VAL>
102  inline void
103  appendCsvField (string& csv, VAL const& val)
104  {
105  csv += (0 == csv.length()? "":",")
106  + format4Csv(val);
107  }
108 
109 
114  struct CSVLine
115  : std::string
116  {
117  using value_type = string;
118 
119  template<typename...ELMS, typename = meta::disable_if_self<CSVLine,ELMS...>>
120  CSVLine (ELMS&& ...items)
121  {
122  meta::forEach (std::make_tuple (items...)
123  ,[this](auto const& it){ *this += it; }
124  );
125  }
126  // Standard copy acceptable
127 
128 
129  template<typename X>
130  CSVLine&
131  operator+= (X const& x)
132  {
133  stat::appendCsvField (*this, x);
134  return *this;
135  }
136  };
137 
149  struct CSVData
150  : std::vector<CSVLine>
151  {
152  using VecCSV = std::vector<CSVLine>;
153 
154  CSVData (std::initializer_list<string> lines)
155  : VecCSV(detectHeader(lines))
156  { }
157 
158  CSVData (std::initializer_list<string> header
159  ,std::initializer_list<CSVLine> data)
160  {
161  reserve (data.size()+1);
162  appendHeaderLine(*this, header);
163  for (CSVLine const& line : data)
164  emplace_back (line);
165  }
166 
167  // standard copy operations acceptable
168 
169 
170  operator string() const
171  {
172  std::ostringstream buffer;
173  for (string const& line : *this)
174  buffer << line << '\n';
175  return buffer.str();
176  }
177 
178 
179  private:
180  static bool
181  containsCSV (string const& line)
182  {
183  return std::regex_search (line, FIND_DELIMITER_TOKEN);
184  }
185 
186  static void
187  appendHeaderLine (VecCSV& data, std::initializer_list<string> const& input)
188  {
189  CSVLine header;
190  for (string const& s : input)
191  header += s;
192  data.emplace_back (move(header));
193  }
194 
195  static VecCSV
196  detectHeader (std::initializer_list<string> input)
197  {
198  VecCSV csv;
199  if (input.size() > 0 and containsCSV(*input.begin()))
200  {// the first line is a header => slurp in all as lines
201  csv.reserve (input.size());
202  for (string const& s : input)
203  csv.emplace_back (s);
204  }
205  else // combine all strings into a single header line
206  appendHeaderLine (csv, input);
207  return csv;
208  }
209  };
210 
211 
212 
214  template<typename TAR>
215  inline TAR
216  parseAs (string const& encodedVal)
217  {
218  std::istringstream converter{encodedVal};
219  TAR value;
220  converter >> value;
221  if (converter.fail())
222  throw error::Invalid{_Fmt{"unable to parse \"%s\""} % encodedVal};
223  return value;
224  }
225 
226  template<>
227  inline bool
228  parseAs (string const& encodedBool)
229  {
230  return util::boolVal(encodedBool);
231  }
232  template<>
233  inline string
234  parseAs (string const& string)
235  {
236  return string; // pass-through (even if empty)
237  }
238 
239 
240 
241 
250  class CsvParser
251  : public util::RegexSearchIter
252  {
253  string const& line_{};
254  size_t field_{0};
255  size_t pos_{0};
256 
257  util::RegexSearchIter const& curr() const { return *this; }
258  util::RegexSearchIter end() const { return util::RegexSearchIter{}; }
259 
260  public:
261  CsvParser()
262  : line_{lib::NullValue<string>::get()}
263  { }
264 
265  CsvParser (string& line) // NOTE: string and reg-exp must exist elsewhere
266  : RegexSearchIter(line, ACCEPT_FIELD)
267  , line_{line}
268  { }
269 
270  explicit operator bool() const
271  {
272  return isValid();
273  }
274 
276 
277 
278  string operator*() const
279  {
280  if (not isValid()) fail();
281  auto& mat = *curr();
282  return mat[2].matched? mat[2]
283  : mat[1];
284  }
285 
286  void
287  operator++()
288  {
289  if (not isValid())
290  fail();
291  pos_ = curr()->position() + curr()->length();
292  util::RegexSearchIter::operator ++();
293  if (pos_ < line_.length() and not isValid())
294  fail ();
295  ++field_;
296  }
297 
298  size_t
299  getParsedFieldCnt()
300  {
301  return field_;
302  }
303 
304  bool
305  isValid() const
306  {
307  return curr() != end()
308  and pos_ == size_t(curr()->position())
309  and not curr()->empty();
310  }
311 
312  bool
313  isParseFail() const
314  {
315  return curr() != end()
316  and not isValid();
317  }
318 
319  void
320  fail() const
321  {
322  if (curr() == end())
323  if (pos_ >= line_.length())
324  throw error::Invalid{_Fmt{"Only %d data fields. Line:%s"}
325  % field_ % line_};
326  else
327  throw error::Invalid{_Fmt{"Garbage after last field. Line:%s|↯|%s"}
328  % line_.substr(0,pos_) % line_.substr(pos_)};
329  else
330  if (pos_ != size_t(curr()->position()))
331  throw error::Invalid{_Fmt{"Garbage before field(%d):%s|↯|%s"}
332  % (field_+1)
333  % line_.substr(0,pos_) % line_.substr(pos_)};
334 
335  throw error::Invalid{_Fmt{"CSV parse floundered. Line:%s"} % line_};
336  }
337  };
338 
339 }} // namespace lib::stat
340 #endif /*LIB_STAT_CSV_H*/
Wrapper to simplify notation in tests.
Definition: csv.hpp:149
void appendCsvField(string &csv, VAL const &val)
Format and append a data value to a CSV string representation.
Definition: csv.hpp:103
A string with the ability to construct or append the CSV-rendering of data fields.
Definition: csv.hpp:114
Front-end for printf-style string template interpolation.
Singleton holder for NIL or default value objects.
Definition: null-value.hpp:71
#define ENABLE_USE_IN_STD_RANGE_FOR_LOOPS(ITER)
use a given Lumiera Forward Iterator in standard "range for loops"
Singleton-style holder for NIL or default values.
A front-end for using printf-style formatting.
Implementation namespace for support and library code.
Metaprogramming with tuples-of-types and the std::tuple record.
TAR parseAs(string const &encodedVal)
parse string representation into typed value
Definition: csv.hpp:216
Derived specific exceptions within Lumiera&#39;s exception hierarchy.
Definition: error.hpp:199
std::string toString(TY const &val) noexcept
get some string representation of any object, reliably.
Definition: format-obj.hpp:200
Lumiera error handling (C++ interface).
wrapped regex iterator to allow usage in foreach loops
Definition: regex.hpp:49
Convenience wrappers and helpers for dealing with regular expressions.
Parser to split one line of CSV data into fields.
Definition: csv.hpp:250
disable_if< std::is_same< std::remove_cv_t< std::remove_reference_t< extractFirst_t< ARGS... > >>, SELF > > disable_if_self
helper to prevent a template constructor from shadowing inherited copy ctors
Definition: meta/util.hpp:160