06-18-2023, 04:09 PM
This is code to use Microsoft.Data.Analysis, a Nuget package. It was only slightly modified from code in this blog post. It relies on a couple of classes in the project ZIP archive.
@MDA1.zip (Size: 2.91 KB / Downloads: 120)
Main script:
Output:
@MDA1.zip (Size: 2.91 KB / Downloads: 120)
Main script:
/*/ nuget MDA\Microsoft.Data.Analysis; /*/
using Microsoft.Data.Analysis;
using static PrettyPrinters;
using static MeanAndStandard;
string[] names = { "Oliver", "Charlotte", "Henry", "Amelia", "Owen" };
int[] ages = { 23, 19, 42, 64, 35 };
double[] heights = { 1.91, 1.62, 1.72, 1.57, 1.85 };
DataFrameColumn[] columns = {
new StringDataFrameColumn("Name", names),
new PrimitiveDataFrameColumn<int>("Age", ages),
new PrimitiveDataFrameColumn<double>("Height", heights),
};
DataFrame df = new(columns);
// Append row data, so we'll have 6 columns
List<KeyValuePair<string, object>> newRowData = new()
{
new KeyValuePair<string, object>("Name", "Scott"),
new KeyValuePair<string, object>("Age", 36),
new KeyValuePair<string, object>("Height", 1.65),
};
df.Append(newRowData, inPlace: true);
// We now have six rows, so we need six weights to match
// Mismatching the number of weights will throw an error.
int[] weights = { 123, 321, 111, 121, 131, 200}; // array of weights
PrimitiveDataFrameColumn<int> weightCol = new("Weight", weights);
df.Columns.Add(weightCol);
PrettyPrinters.PrettyPrint(df);
df.OrderBy("Name");
// Trying the example code didn't work. Apparently chaining OrderBy("Name")
// before filtering throws calculation off. So I changed the order and
// created another dataframe.
DataFrame df2 = df.Filter(df["Age"].ElementwiseGreaterThan(30)).OrderBy("Name");
print.it("**********************");
print.it("Filtering by age > 30");
print.it("**********************");
df2.PrettyPrint();
DataFrameColumn iqCol = df["Age"] * df["Height"] * 1.5;
// Interesting method of calculating IQ ... :)
double[] iqs = Enumerable.Range(0, (int)iqCol.Length)
.Select(x => (double)iqCol[x])
.ToArray();
df.Columns.Add(new PrimitiveDataFrameColumn<double>("IQ", iqs));
print.it("************************");
print.it("Add bogus IQ calculation");
print.it("************************");
df.PrettyPrint();
// See class MeanAndStandard
print.it("***************************");
print.it("Mean and Std Deviation demo");
print.it("***************************");
df.PrettyPrint();
foreach (DataFrameColumn col in df.Columns.Skip(1))
{
// warning: additional care must be taken for datasets which contain null
double[] values = Enumerable.Range(0, (int)col.Length).Select(x => Convert.ToDouble(col[x])).ToArray();
(double mean, double std) = MeanAndStd(values);
print.it($"{col.Name} = {mean} +/- {std:N3} (n={values.Length})");
}
Name Age Height Weight
Oliver 23 1.91 123
Charlotte 19 1.62 321
Henry 42 1.72 111
Amelia 64 1.57 121
Owen 35 1.85 131
Scott 36 1.65 200
**********************
Filtering by age > 30
**********************
Name Age Height Weight
Amelia 64 1.57 121
Henry 42 1.72 111
Owen 35 1.85 131
Scott 36 1.65 200
************************
Add bogus IQ calculation
************************
Name Age Height Weight IQ
Oliver 23 1.91 123 65.895
Charlotte 19 1.62 321 46.17
Henry 42 1.72 111 108.35999999999999
Amelia 64 1.57 121 150.72
Owen 35 1.85 131 97.125
Scott 36 1.65 200 89.1
***************************
Mean and Std Deviation demo
***************************
Name Age Height Weight IQ
Oliver 23 1.91 123 65.895
Charlotte 19 1.62 321 46.17
Henry 42 1.72 111 108.35999999999999
Amelia 64 1.57 121 150.72
Owen 35 1.85 131 97.125
Scott 36 1.65 200 89.1
Age = 36.5 +/- 14.592 (n=6)
Height = 1.72 +/- 0.123 (n=6)
Weight = 167.83333333333334 +/- 74.481 (n=6)
IQ = 92.895 +/- 32.983 (n=6)