Login

burque505 · 06-18-2023, 04:09 PM

This is code to use Microsoft.Data.Analysis, a Nuget package. It was only slightly modified from code in this blog post. It relies on a couple of classes in the project ZIP archive.

.zip

@MDA1.zip (Size: 2.91 KB / Downloads: 120)
Main script:

Code:

Copy Help

/*/ nuget MDA\Microsoft.Data.Analysis; /*/

using Microsoft.Data.Analysis;

using static PrettyPrinters;

using static MeanAndStandard;



string[] names = { "Oliver", "Charlotte", "Henry", "Amelia", "Owen" };

int[] ages = { 23, 19, 42, 64, 35 };

double[] heights = { 1.91, 1.62, 1.72, 1.57, 1.85 };



DataFrameColumn[] columns = {

    new StringDataFrameColumn("Name", names),

    new PrimitiveDataFrameColumn<int>("Age", ages),

    new PrimitiveDataFrameColumn<double>("Height", heights),

};



DataFrame df = new(columns);



// Append row data, so we'll have 6 columns

List<KeyValuePair<string, object>> newRowData = new()

{

    new KeyValuePair<string, object>("Name", "Scott"),

    new KeyValuePair<string, object>("Age", 36),

    new KeyValuePair<string, object>("Height", 1.65),

};



df.Append(newRowData, inPlace: true);



// We now have six rows, so we need six weights to match

// Mismatching the number of weights will throw an error.

int[] weights = { 123, 321, 111, 121, 131, 200}; // array of weights



PrimitiveDataFrameColumn<int> weightCol = new("Weight", weights);



df.Columns.Add(weightCol);



PrettyPrinters.PrettyPrint(df);



df.OrderBy("Name");



// Trying the example code didn't work. Apparently chaining  OrderBy("Name")

// before filtering throws calculation off.    So I changed the order and

// created another dataframe.

DataFrame df2 = df.Filter(df["Age"].ElementwiseGreaterThan(30)).OrderBy("Name");



print.it("**********************");

print.it("Filtering by age > 30");

print.it("**********************");

df2.PrettyPrint();



DataFrameColumn iqCol = df["Age"] * df["Height"] * 1.5;



// Interesting method of calculating IQ ... :)

double[] iqs = Enumerable.Range(0, (int)iqCol.Length)

    .Select(x => (double)iqCol[x])

    .ToArray();



df.Columns.Add(new PrimitiveDataFrameColumn<double>("IQ", iqs));



print.it("************************");

print.it("Add bogus IQ calculation");

print.it("************************");

df.PrettyPrint();



// See class MeanAndStandard





print.it("***************************");

print.it("Mean and Std Deviation demo");

print.it("***************************");

df.PrettyPrint();

foreach (DataFrameColumn col in df.Columns.Skip(1))

{

    // warning: additional care must be taken for datasets which contain null

    double[] values = Enumerable.Range(0, (int)col.Length).Select(x => Convert.ToDouble(col[x])).ToArray();

    (double mean, double std) = MeanAndStd(values);

    print.it($"{col.Name} = {mean} +/- {std:N3} (n={values.Length})");

}

Output:

Code:

Copy Help

Name       Age  Height  Weight  

Oliver     23   1.91    123     

Charlotte  19   1.62    321     

Henry      42   1.72    111     

Amelia     64   1.57    121     

Owen       35   1.85    131     

Scott      36   1.65    200     



**********************

Filtering by age > 30

**********************

Name    Age  Height  Weight  

Amelia  64   1.57    121     

Henry   42   1.72    111     

Owen    35   1.85    131     

Scott   36   1.65    200     



************************

Add bogus IQ calculation

************************

Name       Age  Height  Weight  IQ                  

Oliver     23   1.91    123     65.895              

Charlotte  19   1.62    321     46.17               

Henry      42   1.72    111     108.35999999999999  

Amelia     64   1.57    121     150.72              

Owen       35   1.85    131     97.125              

Scott      36   1.65    200     89.1                



***************************

Mean and Std Deviation demo

***************************

Name       Age  Height  Weight  IQ                  

Oliver     23   1.91    123     65.895              

Charlotte  19   1.62    321     46.17               

Henry      42   1.72    111     108.35999999999999  

Amelia     64   1.57    121     150.72              

Owen       35   1.85    131     97.125              

Scott      36   1.65    200     89.1                



Age = 36.5 +/- 14.592 (n=6)

Height = 1.72 +/- 0.123 (n=6)

Weight = 167.83333333333334 +/- 74.481 (n=6)

IQ = 92.895 +/- 32.983 (n=6)