Lumiera 0.pre.04
»edit your freedom«
Loading...
Searching...
No Matches
csv.hpp
Go to the documentation of this file.
1/*
2 CSV.hpp - Parser and Encoder for CSV data
3
4 Copyright (C)
5 2022, Hermann Vosseler <Ichthyostega@web.de>
6
7  **Lumiera** is free software; you can redistribute it and/or modify it
8  under the terms of the GNU General Public License as published by the
9  Free Software Foundation; either version 2 of the License, or (at your
10  option) any later version. See the file COPYING for further details.
11
12*/
13
14
38#ifndef LIB_STAT_CSV_H
39#define LIB_STAT_CSV_H
40
41#include "lib/error.hpp"
42#include "lib/null-value.hpp"
44#include "lib/format-string.hpp"
45#include "lib/regex.hpp"
46
47#include <limits>
48#include <string>
49#include <vector>
50
51namespace lib {
52namespace stat {
53
54 namespace error = lumiera::error;
55
56 using util::_Fmt;
57 using util::toString;
58 using std::string;
59 using std::regex;
60
61
62 namespace { // Implementation details...
63
64 const string MATCH_SINGLE_TOKEN { R"~(([^,;"\s]*)\s*)~"};
65 const string MATCH_QUOTED_TOKEN { R"~("([^"]*)"\s*)~" };
66 const string MATCH_DELIMITER { R"~((?:^|,|;)\s*)~" };
67
68 const regex FIND_DELIMITER_TOKEN{"[,;]"};
69
71 , regex::optimize};
72
73 template<typename VAL>
74 inline string
75 format4Csv (VAL const& val)
76 {
77 if constexpr (std::is_floating_point_v<VAL>)
78 return util::showDecimal (val);
79 // standard textual rendering
80 auto res = util::toString (val);
81 if constexpr (std::is_arithmetic_v<VAL>)
82 return res; // includes bool
83 else
84 return '"'+res+'"';
85 }
86 }//(End)Implementation
87
88
92 template<typename VAL>
93 inline void
94 appendCsvField (string& csv, VAL const& val)
95 {
96 csv += (0 == csv.length()? "":",")
97 + format4Csv(val);
98 }
99
100
105 struct CSVLine
106 : std::string
107 {
108 using value_type = string;
109
110 template<typename...ELMS, typename = meta::disable_if_self<CSVLine,ELMS...>>
111 CSVLine (ELMS&& ...items)
112 {
113 meta::forEach (std::make_tuple (items...)
114 ,[this](auto const& it){ *this += it; }
115 );
116 }
117 // Standard copy acceptable
118
119
120 template<typename X>
121 CSVLine&
122 operator+= (X const& x)
123 {
124 stat::appendCsvField (*this, x);
125 return *this;
126 }
127 };
128
140 struct CSVData
141 : std::vector<CSVLine>
142 {
143 using VecCSV = std::vector<CSVLine>;
144
145 CSVData (std::initializer_list<string> lines)
146 : VecCSV(detectHeader(lines))
147 { }
148
149 CSVData (std::initializer_list<string> header
150 ,std::initializer_list<CSVLine> data)
151 {
152 reserve (data.size()+1);
153 appendHeaderLine(*this, header);
154 for (CSVLine const& line : data)
155 emplace_back (line);
156 }
157
158 // standard copy operations acceptable
159
160
161 operator string() const
162 {
163 std::ostringstream buffer;
164 for (string const& line : *this)
165 buffer << line << '\n';
166 return buffer.str();
167 }
168
169
170 private:
171 static bool
172 containsCSV (string const& line)
173 {
174 return std::regex_search (line, FIND_DELIMITER_TOKEN);
175 }
176
177 static void
178 appendHeaderLine (VecCSV& data, std::initializer_list<string> const& input)
179 {
180 CSVLine header;
181 for (string const& s : input)
182 header += s;
183 data.emplace_back (move(header));
184 }
185
186 static VecCSV
187 detectHeader (std::initializer_list<string> input)
188 {
189 VecCSV csv;
190 if (input.size() > 0 and containsCSV(*input.begin()))
191 {// the first line is a header => slurp in all as lines
192 csv.reserve (input.size());
193 for (string const& s : input)
194 csv.emplace_back (s);
195 }
196 else // combine all strings into a single header line
197 appendHeaderLine (csv, input);
198 return csv;
199 }
200 };
201
202
203
205 template<typename TAR>
206 inline TAR
207 parseAs (string const& encodedVal)
208 {
209 std::istringstream converter{encodedVal};
210 TAR value;
211 converter >> value;
212 if (converter.fail())
213 throw error::Invalid{_Fmt{"unable to parse \"%s\""} % encodedVal};
214 return value;
215 }
216
217 template<>
218 inline bool
219 parseAs (string const& encodedBool)
220 {
221 return util::boolVal(encodedBool);
222 }
223 template<>
224 inline string
225 parseAs (string const& string)
226 {
227 return string; // pass-through (even if empty)
228 }
229
230
231
232
242 : public util::RegexSearchIter
243 {
244 string const& line_{};
245 size_t field_{0};
246 size_t pos_{0};
247
248 util::RegexSearchIter const& curr() const { return *this; }
250
251 public:
253 : line_{lib::NullValue<string>::get()}
254 { }
255
256 CsvParser (string& line) // NOTE: string and reg-exp must exist elsewhere
257 : RegexSearchIter(line, ACCEPT_FIELD)
258 , line_{line}
259 { }
260
261 explicit operator bool() const
262 {
263 return isValid();
264 }
265
267
268
269 string operator*() const
270 {
271 if (not isValid()) fail();
272 auto& mat = *curr();
273 return mat[2].matched? mat[2]
274 : mat[1];
275 }
276
277 void
279 {
280 if (not isValid())
281 fail();
282 pos_ = curr()->position() + curr()->length();
283 util::RegexSearchIter::operator ++();
284 if (pos_ < line_.length() and not isValid())
285 fail ();
286 ++field_;
287 }
288
289 size_t
291 {
292 return field_;
293 }
294
295 bool
296 isValid() const
297 {
298 return curr() != end()
299 and pos_ == size_t(curr()->position())
300 and not curr()->empty();
301 }
302
303 bool
305 {
306 return curr() != end()
307 and not isValid();
308 }
309
310 void
311 fail() const
312 {
313 if (curr() == end())
314 if (pos_ >= line_.length())
315 throw error::Invalid{_Fmt{"Only %d data fields. Line:%s"}
316 % field_ % line_};
317 else
318 throw error::Invalid{_Fmt{"Garbage after last field. Line:%s|↯|%s"}
319 % line_.substr(0,pos_) % line_.substr(pos_)};
320 else
321 if (pos_ != size_t(curr()->position()))
322 throw error::Invalid{_Fmt{"Garbage before field(%d):%s|↯|%s"}
323 % (field_+1)
324 % line_.substr(0,pos_) % line_.substr(pos_)};
325
326 throw error::Invalid{_Fmt{"CSV parse floundered. Line:%s"} % line_};
327 }
328 };
329
330}} // namespace lib::stat
331#endif /*LIB_STAT_CSV_H*/
Parser to split one line of CSV data into fields.
Definition csv.hpp:243
string const & line_
Definition csv.hpp:244
void fail() const
Definition csv.hpp:311
util::RegexSearchIter const & curr() const
Definition csv.hpp:248
string operator*() const
Definition csv.hpp:269
bool isParseFail() const
Definition csv.hpp:304
size_t getParsedFieldCnt()
Definition csv.hpp:290
bool isValid() const
Definition csv.hpp:296
CsvParser(string &line)
Definition csv.hpp:256
ENABLE_USE_IN_STD_RANGE_FOR_LOOPS(CsvParser)
util::RegexSearchIter end() const
Definition csv.hpp:249
Derived specific exceptions within Lumiera's exception hierarchy.
Definition error.hpp:193
A front-end for using printf-style formatting.
Lumiera error handling (C++ interface).
Front-end for printf-style string template interpolation.
disable_if< std::is_same< std::remove_cv_t< std::remove_reference_t< extractFirst_t< ARGS... > > >, SELF > > disable_if_self
helper to prevent a template constructor from shadowing inherited copy ctors
constexpr void forEach(TUP &&tuple, FUN fun)
Tuple iteration: perform some arbitrary operation on each element of a tuple.
string format4Csv(VAL const &val)
Definition csv.hpp:75
void appendCsvField(string &csv, VAL const &val)
Format and append a data value to a CSV string representation.
Definition csv.hpp:94
TAR parseAs(string const &encodedVal)
parse string representation into typed value
Definition csv.hpp:207
Implementation namespace for support and library code.
string showDecimal(double val) noexcept
show maximum reproducible decimal representation
std::string toString(TY const &val) noexcept
get some string representation of any object, reliably.
bool boolVal(string const &textForm)
interpret text representation of a boolean value.
Definition util.cpp:99
Singleton-style holder for NIL or default values.
Convenience wrappers and helpers for dealing with regular expressions.
Singleton holder for NIL or default value objects.
Wrapper to simplify notation in tests.
Definition csv.hpp:142
CSVData(std::initializer_list< string > header, std::initializer_list< CSVLine > data)
Definition csv.hpp:149
CSVData(std::initializer_list< string > lines)
Definition csv.hpp:145
static VecCSV detectHeader(std::initializer_list< string > input)
Definition csv.hpp:187
static bool containsCSV(string const &line)
Definition csv.hpp:172
std::vector< CSVLine > VecCSV
Definition csv.hpp:143
static void appendHeaderLine(VecCSV &data, std::initializer_list< string > const &input)
Definition csv.hpp:178
A string with the ability to construct or append the CSV-rendering of data fields.
Definition csv.hpp:107
CSVLine(ELMS &&...items)
Definition csv.hpp:111
string value_type
Definition csv.hpp:108
CSVLine & operator+=(X const &x)
Definition csv.hpp:122
wrapped regex iterator to allow usage in foreach loops
Definition regex.hpp:42
bool empty() const
Definition regex.hpp:56
Metaprogramming with tuples-of-types and the std::tuple record.