Type-safe data analytics and statistics framework for TypeScript. Built for modern data science workflows with compile-time safety.
deno add jsr:@tidy-ts/dataframe
deno add jsr:@tidy-ts/dataframe
import { createDataFrame, stats as s } from "@tidy-ts/dataframe";
import { createDataFrame, stats as s } from "@tidy-ts/dataframe";
This example demonstrates creating DataFrames, adding calculated columns with full type safety, grouping data, and performing aggregations. Notice how the 'row' parameter provides typed access to columns without casting, and how you can access the entire DataFrame for calculations.
import { createDataFrame, stats as s } from "@tidy-ts/dataframe";
// import { createDataFrame, s } from "@tidy-ts/dataframe" works as well
// Create DataFrame from rows
const sales = createDataFrame([
{ region: "North", product: "Widget", quantity: 10, price: 100 },
{ region: "South", product: "Widget", quantity: 20, price: 100 },
{ region: "East", product: "Widget", quantity: 8, price: 100 },
]);
// Complete data analysis workflow
const analysis = sales
.mutate({
// Use 'row' to access a neatly typed row while defining new columns - no type casting needed
revenue: (row) => row.quantity * row.price,
// Use standard function syntax for more complicated calculations. The DataFrame will keep track of the types.
totalTax: (row) => {
const taxRate = 0.08;
const taxPerItem = taxRate * row.price;
const totalTax = taxPerItem * row.quantity;
return totalTax
},
// Use 'index' to get the current row number, sometimes helpful for indexing into external arrays
row_number: (_row, index) => index,
// Use 'df' to access the entire DataFrame when needed for a calculation
moreQuantityThanAvg: (row, _index, df) => row.quantity > s.mean(df.quantity)
})
.groupBy("region")
.summarize({
total_revenue: (group) => s.sum(group.revenue),
avg_quantity: (group) => s.mean(group.quantity),
product_count: (group) => group.nrows() // We have some helpers to calculate commonly needed values
})
.arrange("total_revenue", "desc");
// Pretty print the table with the .print() method
analysis.print("Sales Analysis");
import { createDataFrame, stats as s } from "@tidy-ts/dataframe";
// import { createDataFrame, s } from "@tidy-ts/dataframe" works as well
// Create DataFrame from rows
const sales = createDataFrame([
{ region: "North", product: "Widget", quantity: 10, price: 100 },
{ region: "South", product: "Widget", quantity: 20, price: 100 },
{ region: "East", product: "Widget", quantity: 8, price: 100 },
]);
// Complete data analysis workflow
const analysis = sales
.mutate({
// Use 'row' to access a neatly typed row while defining new columns - no type casting needed
revenue: (row) => row.quantity * row.price,
// Use standard function syntax for more complicated calculations. The DataFrame will keep track of the types.
totalTax: (row) => {
const taxRate = 0.08;
const taxPerItem = taxRate * row.price;
const totalTax = taxPerItem * row.quantity;
return totalTax
},
// Use 'index' to get the current row number, sometimes helpful for indexing into external arrays
row_number: (_row, index) => index,
// Use 'df' to access the entire DataFrame when needed for a calculation
moreQuantityThanAvg: (row, _index, df) => row.quantity > s.mean(df.quantity)
})
.groupBy("region")
.summarize({
total_revenue: (group) => s.sum(group.revenue),
avg_quantity: (group) => s.mean(group.quantity),
product_count: (group) => group.nrows() // We have some helpers to calculate commonly needed values
})
.arrange("total_revenue", "desc");
// Pretty print the table with the .print() method
analysis.print("Sales Analysis");
DataFrames can be created from arrays of objects (rows) or from column objects. TypeScript automatically infers column types and provides full type safety throughout your data pipeline.
import { createDataFrame } from "@tidy-ts/dataframe";
// Create DataFrame from rows
const people = createDataFrame([
{ id: 1, name: "Luke", species: "Human", mass: 77, height: 172 },
{ id: 2, name: "C-3PO", species: "Droid", mass: 75, height: 167 },
{ id: 3, name: "R2-D2", species: "Droid", mass: 32, height: 96 },
{ id: 4, name: "Darth Vader", species: "Human", mass: 136, height: 202 },
{ id: 5, name: "Chewbacca", species: "Wookiee", mass: 112, height: 228 },
]);
// Or create DataFrame from columns
const salesFromColumns = createDataFrame({
columns: {
region: ["North", "South", "East"],
product: ["Widget", "Widget", "Widget"],
quantity: [10, 20, 8],
price: [100, 100, 100]
}
});
import { createDataFrame } from "@tidy-ts/dataframe";
// Create DataFrame from rows
const people = createDataFrame([
{ id: 1, name: "Luke", species: "Human", mass: 77, height: 172 },
{ id: 2, name: "C-3PO", species: "Droid", mass: 75, height: 167 },
{ id: 3, name: "R2-D2", species: "Droid", mass: 32, height: 96 },
{ id: 4, name: "Darth Vader", species: "Human", mass: 136, height: 202 },
{ id: 5, name: "Chewbacca", species: "Wookiee", mass: 112, height: 228 },
]);
// Or create DataFrame from columns
const salesFromColumns = createDataFrame({
columns: {
region: ["North", "South", "East"],
product: ["Widget", "Widget", "Widget"],
quantity: [10, 20, 8],
price: [100, 100, 100]
}
});
Each key becomes a new column name. Functions receive (row, index, dataframe) parameters with full typing - no casting needed. Access typed row properties, current index, or the entire DataFrame for calculations.
const example = people
.mutate({
// Calculate BMI using the row's mass and height values
bmi: (r) => r.mass / Math.pow(r.height / 100, 2),
// Create boolean flags based on conditions
is_heavy: (r) => r.mass > 100,
// Use the index parameter to create row numbers (0-based, so add 1)
row_number: (_r, idx) => idx + 1,
// Access the entire DataFrame for calculations across all rows
cumulative_mass: (_r, _idx, df) => {
return s.sum(df.mass);
},
// Return constant values for all rows
constant: () => "fixed_value",
});
const example = people
.mutate({
// Calculate BMI using the row's mass and height values
bmi: (r) => r.mass / Math.pow(r.height / 100, 2),
// Create boolean flags based on conditions
is_heavy: (r) => r.mass > 100,
// Use the index parameter to create row numbers (0-based, so add 1)
row_number: (_r, idx) => idx + 1,
// Access the entire DataFrame for calculations across all rows
cumulative_mass: (_r, _idx, df) => {
return s.sum(df.mass);
},
// Return constant values for all rows
constant: () => "fixed_value",
});
// DataFrames have a length property like arrays
console.log("Number of rows:", people.nrows());
// Access individual rows using array indexing (0-based)
console.log("First row:", people[0]);
console.log("Last row:", people[people.nrows() - 1]);
// DataFrames have a length property like arrays
console.log("Number of rows:", people.nrows());
// Access individual rows using array indexing (0-based)
console.log("First row:", people[0]);
console.log("Last row:", people[people.nrows() - 1]);
// Access entire columns as typed arrays
const names = people.name; // string[] - all names as an array
const masses = people.mass; // number[] - all masses as an array
const species = people.species; // string[] - all species as an array
console.log("All names:", names);
console.log("All masses:", masses);
console.log("Unique species:", s.unique(species));
// Access entire columns as typed arrays
const names = people.name; // string[] - all names as an array
const masses = people.mass; // number[] - all masses as an array
const species = people.species; // string[] - all species as an array
console.log("All names:", names);
console.log("All masses:", masses);
console.log("Unique species:", s.unique(species));