// FIXME: remove this to generic factory or similar
import { PangaeaDatasetDescription } from './PangaeaDatasetDescription.js';
import { Statistics } from './Statistics.js';
/**
* A representation of a date frame with columns, rows and series and optional
* metadata.
*
* TODO: Including special metadata handling for PANGAEA metadata header. We
* should come up with a cleaner class structure.
*
* @author rkoppe <roland.koppe@awi.de>
*/
export class DataFrame {
description = {};
columns = [];
columnMap = {};
rows = [];
series = [];
static number = new RegExp('\d+(\.\d+)?');
static datetime = new RegExp('\d{4}-\d{2}-\d{2}[T| ]\d{2}:\d{2}:\d{2}');
constructor() {
}
/**
* Reads the given content into columns, rows and series.
*
* @param {string} content
* @returns {DataFrame}
*/
read(content, separator = '\t') {
// table start position in content
let start = 0;
// detect commented header, e.g. PANGAEA
if (content.startsWith('/*')) {
let pos = content.indexOf('*/');
let meta = content.substring(3, pos - 1);
this.description = new PangaeaDatasetDescription(meta);
start = pos + 3;
}
// "stream" data into rows
let time = Date.now();
let count = 0;
let end = 0;
do {
end = content.indexOf('\n', start);
if (end == -1) end = content.length;
let line = content.substring(start, end).trim();
start = end + 1;
// skip empty lines
if (line == '') continue;
let parts = line.split(separator);
// header
if (count == 0) {
this.columns = parts;
for (let i = 0; i < this.columns.length; i++) {
this.series[i] = [];
this.series[this.columns[i]] = this.series[i];
this.columnMap[this.columns[i]] = i;
}
// content
} else {
let row = count - 1;
for (let i = 0; i < this.columns.length; i++) {
let value = parts[i] || '';
// parse
if ((value != '') && (!isNaN(value))) {
value = parseFloat(value);
parts[i] = value;
}
this.series[i][row] = value;
}
this.rows[row] = parts;
}
count++;
} while (end < content.length);
time = Date.now() - time;
console.log('content parsed in ' + time + 'ms');
return this;
}
/**
* Calculates and returns simple Statistics.
*
* TODO: also for nominal values.
*
* @param {number[]|string} data
* @returns Statistics
*/
statistics(data) {
if (typeof data === 'string') {
if (!(data in this.columnMap)) throw new Error('Column ' + data + ' does not exists.');
data = this.series[this.columnMap[data]];
}
return new Statistics(data);
}
/**
* Applies the given filter function to all rows in this data frame and
* returns a new data frame containning all matched rows.
*
* If the boolean *true* is provided, a copy of the original data frame
* is returned. If the bookean *false* is provided, just the structure
* of the original data frame is copied.
*
* Note: the returned data frame holds references to the original data frames rows.
*
* @param {Function|Boolean} filter
* @returns {DataFrame}
*/
filter(filter) {
let df = new DataFrame();
df.description = this.description;
df.columns = this.columns.slice();
df.columnMap = Object.assign({}, this.columnMap);
df.rows = [];
df.series = [];
// prepare series
for (let i = 0; i < df.columns.length; i++) {
df.series[i] = [];
df.series[df.columns[i]] = df.series[i];
}
if (!filter) return df;
// filter data
let count = 0;
for (let i = 0; i < this.rows.length; i++) {
let row = this.rows[i];
if ((filter === true) || filter(i, row)) {
df.rows.push(row);
for (let j = 0; j < df.columns.length; j++) {
df.series[j][count] = row[j];
}
count++;
}
}
return df;
}
/**
* Returns a string representation of this
* data frame.
*
* @param {string} separator
* @param {begin} begin optional first row
* @param {end} end optional last row
* @returns string
*/
asText(separator = '\t', begin = 0, end = 0) {
let text = this.columns.join(separator) + '\n';
if (begin >= this.rows.length) return text;
if (begin < 0) {
begin = this.rows.length + begin;
end = this.rows.length;
}
if (end < begin) return text;
if (end == 0) end = this.rows.length;
for (let i = begin; i < end; i++) {
text +=
this.rows[i].join(separator) + '\n';
}
return text;
}
/**
* Returns at max count rows of the head.
*
* @param {number} count
* @param {string} separator
* @returns string
*/
head(count, separator = '\t') {
return this.asText(separator, 0, count);
}
/**
* Returns at max count rows of the tail.
*
* @param {number} count
* @param {string} separator
* @returns string
*/
tail(count, separator = '\t') {
return this.asText(separator, -count);
}
/**
* Groups values of each column for the given grouping values or given
* function and returns a map of statistics for all columns of the data
* frame.
*
* For a function it gets the current row as parameter and have to
* return an unique value to group rows by.
*
* @param {string|function} grouping
* @param {string[]} columns optional list of columns to calculate statistics for
* @returns Statistics
*/
groups(grouping, columns = []) {
// validate column grouping function
let fn = null;
if (typeof grouping === 'string') {
if (!(grouping in this.columnMap)) throw new Error('Column ' + grouping + ' does not exists.');
let that = this;
fn = function (row) {
return row[that.columnMap[grouping]];
};
} else {
if (typeof grouping !== 'function') throw new Error('Column parameter must be a string or function.');
fn = grouping;
}
// validate statistic columns
if (!columns || (columns.length == 0)) {
columns = this.columns.slice();
} else {
columns = columns.slice();
}
for (let i = 0; i < columns.length; i++) {
if (!(columns[i] in this.columnMap)) throw new Error('Column for y ' + columns[i] + ' is not defined.');
}
// collect by group and column
// key => column => values
let groups = {};
for (let i = 0; i < this.rows.length; i++) {
let key = fn(this.rows[i]);
// initialize cache
if (!(key in groups)) {
groups[key] = [];
}
// collect values
for (let j = 0; j < columns.length; j++) {
let values = groups[key][j];
if (!values) values = groups[key][j] = [];
let value = this.rows[i][this.columnMap[columns[j]]];
values.push(value);
}
}
// calculate statistics for each group key and column
let stats = {};
for (let group in groups) {
stats[group] = [];
for (let j = 0; j < columns.length; j++) {
let statistics = new Statistics(groups[group][j]);
stats[group][j] = statistics;
}
}
return stats;
}
}