1 /* This file is part of the Amalthea library.
2  *
3  * Copyright (C) 2018-2021, 2024 Eugene 'Vindex' Stulin
4  *
5  * Distributed under the Boost Software License 1.0 or (at your option)
6  * the GNU Lesser General Public License 3.0 or later.
7  */
8 
9 module amalthea.csv;
10 
11 public import amalthea.libcore;
12 import amalthea.dataprocessing : getIndex;
13 
14 import std.algorithm, std.array, std.path, std.string;
15 import std.format : format;
16 
17 
18 /*******************************************************************************
19  * Structure for working with CSV-tables.
20  */
21 struct CSV {
22     private string csvFilePath;
23     private char delimiter = ',';
24 
25     private string[][] dataTable;
26     string[] headersOfColumns;
27 
28     /***************************************************************************
29      * The constructor takes a path to CSV-file.
30      */
31     this(string filepath) {
32         this.csvFilePath = filepath;
33         this.generateDataTable();
34     }
35 
36     /***************************************************************************
37      * The constructor takes a two-dimensional array as the initiating table.
38      */
39     this(string[][] table) {
40         this.createTable(table);
41     }
42 
43     /***************************************************************************
44      * The constructor takes a path to future CSV-file and
45      * two-dimensional array as the initiating table.
46      * Params:
47      *     filepath = Path to file for saving CSV table. The existence of
48      *                the file, as well as its contents, do not matter.
49      *     table    = 2D array with content for the CSV file.
50      */
51     this(string filepath, string[][] table) {
52         this.csvFilePath = filepath;
53         this.createTable(table);
54     }
55 
56     /***************************************************************************
57      * The constructor takes an array of table column names
58      * and a two-dimensional array as the initiating table.
59      * Params:
60      *     keys = Table column titles.
61      *     table = 2D  with content for the CSV file (without titles).
62      */
63     this(string[] keys, string[][] table) {
64         createTable(keys, table);
65     }
66 
67     /***************************************************************************
68      * The method returns content of the table of the CSV-object
69      * as two-dimensional array.
70      */
71     string[][] getTable() {
72         return this.dataTable.dup;
73     }
74 
75     /***************************************************************************
76      * The method returns rows of the table as associative arrays,
77      * where keys are names of the columns and values are content of the fields.
78      */
79     string[string][] getArrayOfRowMaps() {
80         if (this.dataTable.length == 0) {
81             return null;
82         }
83         string[][] table = this.getTable[1 .. $];
84         string[string][] arrayOfMaps;
85         foreach(row; table) {
86             string[string] map;
87             foreach(i, header; this.headersOfColumns) {
88                 map[header] = row[i];
89             }
90             arrayOfMaps ~= map;
91         }
92         return arrayOfMaps;
93     }
94 
95     /***************************************************************************
96      * Finds the table field by another field in the same row (key1 and value)
97      * and column name (key2). Returns first match only.
98      */
99     string getValueOfField(string key1, string value, string key2) {
100         string[string] row = this.getRowMapByField(key1, value);
101         if (key2 !in row) {
102             throw new CsvException(format!"'%s': key not found."(key2));
103         }
104         return row[key2];
105     }
106 
107     /***************************************************************************
108      * The method finds table row by field (by column name and value),
109      * returns associative array, where keys are names of columns and
110      * values are content of fields of this row.
111      */
112     string[string] getRowMapByField(string key, string value) {
113         auto arrayOfMaps = this.getArrayOfRowMaps();
114         if (!arrayOfMaps.empty) {
115             if (key !in arrayOfMaps[0]) {
116                 throw new CsvException(format!"'%s': key not found."(key));
117             }
118         }
119         foreach(mapLine; arrayOfMaps) {
120             if (mapLine[key] == value) {
121                 return mapLine;
122             }
123         }
124         throw new CsvException(format!"'%s': value not found."(value));
125     }
126 
127     /***************************************************************************
128      * The method sets new value (value2)
129      * by other field in the same row and column name.
130      */
131     void setValueOfField(
132         string key1, string value1, string key2, string value2
133     ) {
134         ssize_t index = -1;
135         auto arrayOfMaps = getArrayOfRowMaps();
136         if (!arrayOfMaps.empty && key1 !in arrayOfMaps[0]) {
137             throw new CsvException(format!"'%s': key not found."(key1));
138         }
139         foreach(i, mapLine; arrayOfMaps) {
140             if (mapLine[key1] != value1) {
141                 continue;
142             }
143             index = i;
144             break;
145         }
146         if (index == -1) {
147             throw new CsvException(format!"'%s': value not found."(value1));
148         }
149         auto indexForKey2 = headersOfColumns.getIndex(key2);
150         if (indexForKey2 == -1) {
151             throw new CsvException(format!"'%s': key not found."(key2));
152         }
153         dataTable[index+1][indexForKey2] = value2;
154     }
155 
156     /***************************************************************************
157      * The methods adds new table row.
158      */
159     void addRow(string[] row) {
160         if (this.headersOfColumns.length > 0) {
161             row.length = this.headersOfColumns.length;
162         } else {
163             this.headersOfColumns ~= row;
164         }
165         dataTable ~= row;
166     }
167     void addRow(S...)(S args) {
168         string[] row;
169         foreach(arg; args) {
170             row ~= to!string(arg);
171         }
172         addRow(row);
173     }
174 
175     /***************************************************************************
176      * The method deletes table row by field (by its key and value).
177      */
178     void deleteRowByKeyAndValue(string key, string value) {
179         ssize_t index = -1;
180         auto arrayOfMaps = this.getArrayOfRowMaps();
181         if (!arrayOfMaps.empty && key !in arrayOfMaps[0]) {
182             throw new CsvException(format!"'%s': key not found."(key));
183         }
184         foreach(i, mapLine; arrayOfMaps) {
185             if (mapLine[key] != value) {
186                 continue;
187             }
188             index = i;
189             break;
190         }
191         if (index == -1) {
192             throw new CsvException(format!"'%s': value not found."(value));
193         }
194         // +1 because 0-element is header
195         dataTable = dataTable.remove(index + 1);
196     }
197     alias deleteRowByField = deleteRowByKeyAndValue;
198 
199     /***************************************************************************
200      * The method writes table content to the initiating file.
201      */
202     void rewrite() {
203         writeTo(csvFilePath);
204     }
205 
206     /***************************************************************************
207      * The method writes table content to the specified file.
208      */
209     void writeTo(string filepath) {
210         auto dir = std.path.dirName(filepath);
211         if (dir != "" && !dir.exists) mkdirRecurse(dir);
212         auto f = File(filepath, "w");
213         string newContent;
214         foreach(row; dataTable) {
215             foreach(i, field; row) {
216                 newContent ~= `"`~field.replace(`"`, `""`)~`"`;
217                 if (i != row.length-1) newContent ~= delimiter;
218             }
219             newContent ~= "\n";
220         }
221         f.write(newContent);
222         f.flush();
223     }
224 
225     /***************************************************************************
226      * The two-dimensional array will be used as the content of the CSV table.
227      */
228     void createTable(string[][] table) {
229         this.dataTable = table.dup;
230 
231         if (!this.dataTable.empty) {
232             this.headersOfColumns = this.dataTable[0].dup;
233         }
234     }
235 
236     /***************************************************************************
237      * The two-dimensional array will be used as the content of the CSV table.
238      * Params:
239      *     keys = Titles for columns.
240      *     table = 2D array for CSV fields.
241      */
242     void createTable(string[] keys, string[][] table) {
243         if (!table.empty && keys.length != table[0].length) {
244             throw new CsvException("Dimensional mismatch.");
245         }
246         this.dataTable = table.dup;
247         this.dataTable = keys ~ table;
248         this.headersOfColumns = keys.dup;
249     }
250 
251     private void generateDataTable() {
252         // prepare data
253         if (!csvFilePath.exists) {
254             return;
255         }
256         auto f = File(csvFilePath, "r");
257         string[][] arr;
258         size_t i;
259         foreach(line; f.byLine) {
260             arr.length++;
261             foreach(el; this.splitCSVLine(line.idup)) {
262                 arr[i] ~= [el];
263             }
264             ++i;
265         }
266         if (i == 0) {
267             return;
268         }
269 
270         // validate
271         if (!arr.empty) {
272             size_t ncol = arr[0].length;
273             foreach (row; arr[1 .. $]) {
274                 if (row.length != ncol) {
275                     throw new CsvException("Dimensional mismatch.");
276                 }
277             }
278         }
279 
280         // store
281         this.dataTable = arr.dup;
282         string[] keys;
283         foreach(headerField; this.dataTable[0]) {
284             keys ~= headerField;
285         }
286         this.headersOfColumns = keys.dup;
287 
288     }
289 
290     private string[] splitCSVLine(string csvRow) {
291         string[] tmp = csvRow.split(delimiter);
292         string[] result;
293         for(size_t i; i < tmp.length; i++) {
294             auto el = tmp[i];
295             if (el.startsWith('"')) {
296                 el = el[1 .. $];
297                 if (el.endsWith('"')) {
298                     el = el[0 .. $-1];
299                 } else {
300                     auto nextIndex = i + 1;
301                     foreach(fragment; tmp[nextIndex .. $]) {
302                         i++;
303                         el ~= delimiter ~ fragment;
304                         if (el.endsWith('"')) {
305                             el = el[0 .. $-1];
306                             break;
307                         }
308                     }
309                 }
310             }
311             result ~= el;
312         }
313         foreach(ref el; result) {
314             if (el != `""`) {
315                 el = el.replace(`""`, `"`);
316             }
317         }
318         return result;
319     }
320 
321 }
322 
323 
324 class CsvException : Exception { mixin RealizeException; }
325 
326 
327 unittest {
328     string[][] table = [
329         ["name", "age"],
330         ["Kristina", "23"],
331         ["Yana", "23"],
332         ["Xenia", "33"],
333         ["Maria", "29"]
334     ];
335     auto csv = CSV(table);
336     string[][] readTable = csv.getTable();
337     assert(equal(readTable, table));
338 
339     string[string][] expectedMaps = [
340         ["name": "Kristina", "age": "23"],
341         ["name": "Yana",     "age": "23"],
342         ["name": "Xenia",    "age": "33"],
343         ["name": "Maria",    "age": "29"]
344     ];
345     assert(equal(csv.getArrayOfRowMaps(), expectedMaps));
346 
347     assert("Xenia" == csv.getValueOfField("age", "33", "name"));
348     assert("Kristina" == csv.getValueOfField("age", "23", "name"));
349     assert("29" == csv.getValueOfField("name", "Maria", "age"));
350 
351     bool notFound = false;
352     try {
353         writeln(csv.getValueOfField("age", "17", "name"));
354     } catch(CsvException e) {
355         assert(e.msg == "'17': value not found.");
356         notFound = true;
357     }
358     assert(notFound);
359     
360     notFound = false;
361     try {
362         writeln(csv.getValueOfField("city", "Antananarivo", "name"));
363     } catch(CsvException e) {
364         assert(e.msg == "'city': key not found.");
365         notFound = true;
366     }
367     assert(notFound);
368 
369     notFound = false;
370     try {
371         writeln(csv.getValueOfField("name", "Yana", "surname"));
372     } catch(CsvException e) {
373         assert(e.msg == "'surname': key not found.");
374         notFound = true;
375     }
376     assert(notFound);
377 
378     csv.setValueOfField("name", "Xenia", "age", "34");
379     assert("34" == csv.getValueOfField("name", "Xenia", "age"));
380 
381     notFound = false;
382     try {
383         csv.setValueOfField("name", "Valery", "age", "28");
384     } catch(CsvException e) {
385         assert(e.msg == "'Valery': value not found.");
386         notFound = true;
387     }
388     assert(notFound);
389 
390     notFound = false;
391     try {
392         csv.setValueOfField("name", "Maria", "surname", "Larina");
393     } catch(CsvException e) {
394         assert(e.msg == "'surname': key not found.");
395         notFound = true;
396     }
397     assert(notFound);
398 
399     notFound = false;
400     try {
401         csv.setValueOfField("id", "0x264", "name", "Secret");
402     } catch(CsvException e) {
403         assert(e.msg == "'id': key not found.");
404         notFound = true;
405     }
406     assert(notFound);
407 
408     csv.addRow(["Kate", "24"]);
409     readTable = csv.getTable();
410     assert(readTable[$-1] == ["Kate", "24"]);
411 
412     csv.deleteRowByKeyAndValue("name", "Kate");
413     expectedMaps = [
414         ["age": "23", "name": "Kristina"],
415         ["age": "23", "name": "Yana"],
416         ["age": "34", "name": "Xenia"],
417         ["age": "29", "name": "Maria"]
418     ];
419     assert(equal(csv.getArrayOfRowMaps(), expectedMaps));
420 
421     notFound = false;
422     try {
423         csv.deleteRowByKeyAndValue("name", "Lucy");
424     } catch(CsvException e) {
425         assert(e.msg == "'Lucy': value not found.");
426         notFound = true;
427     }
428     assert(notFound);
429 }
430 
431 
432 unittest {
433     auto csv = CSV();
434     csv.addRow("name", "age");
435     assert(csv.getArrayOfRowMaps().empty);
436     csv.addRow("Mary", "34");
437     assert(equal(csv.getArrayOfRowMaps(), [["name": "Mary", "age": "34"]]));
438 }