The following warnings occurred:
Warning [2] count(): Parameter must be an array or an object that implements Countable - Line: 895 - File: showthread.php PHP 7.2.34 (Linux)
File Line Function
/showthread.php 895 errorHandler->error




Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
MS Data Analysis (dataframes)
#1
This is code to use Microsoft.Data.Analysis, a Nuget package. It was only slightly modified from code in this blog post. It relies on a couple of classes in the project ZIP archive.

.zip   @MDA1.zip (Size: 2.91 KB / Downloads: 120)
Main script:
 
Code:
Copy      Help
/*/ nuget MDA\Microsoft.Data.Analysis; /*/
using Microsoft.Data.Analysis;
using static PrettyPrinters;
using static MeanAndStandard;

string[] names = { "Oliver", "Charlotte", "Henry", "Amelia", "Owen" };
int[] ages = { 23, 19, 42, 64, 35 };
double[] heights = { 1.91, 1.62, 1.72, 1.57, 1.85 };

DataFrameColumn[] columns = {
    new StringDataFrameColumn("Name", names),
    new PrimitiveDataFrameColumn<int>("Age", ages),
    new PrimitiveDataFrameColumn<double>("Height", heights),
};

DataFrame df = new(columns);

// Append row data, so we'll have 6 columns
List<KeyValuePair<string, object>> newRowData = new()
{
    new KeyValuePair<string, object>("Name", "Scott"),
    new KeyValuePair<string, object>("Age", 36),
    new KeyValuePair<string, object>("Height", 1.65),
};

df.Append(newRowData, inPlace: true);

// We now have six rows, so we need six weights to match
// Mismatching the number of weights will throw an error.
int[] weights = { 123, 321, 111, 121, 131, 200}; // array of weights

PrimitiveDataFrameColumn<int> weightCol = new("Weight", weights);

df.Columns.Add(weightCol);

PrettyPrinters.PrettyPrint(df);

df.OrderBy("Name");

// Trying the example code didn't work. Apparently chaining  OrderBy("Name")
// before filtering throws calculation off.    So I changed the order and
// created another dataframe.
DataFrame df2 = df.Filter(df["Age"].ElementwiseGreaterThan(30)).OrderBy("Name");

print.it("**********************");
print.it("Filtering by age > 30");
print.it("**********************");
df2.PrettyPrint();

DataFrameColumn iqCol = df["Age"] * df["Height"] * 1.5;

// Interesting method of calculating IQ ... :)
double[] iqs = Enumerable.Range(0, (int)iqCol.Length)
    .Select(x => (double)iqCol[x])
    .ToArray();

df.Columns.Add(new PrimitiveDataFrameColumn<double>("IQ", iqs));

print.it("************************");
print.it("Add bogus IQ calculation");
print.it("************************");
df.PrettyPrint();

// See class MeanAndStandard


print.it("***************************");
print.it("Mean and Std Deviation demo");
print.it("***************************");
df.PrettyPrint();
foreach (DataFrameColumn col in df.Columns.Skip(1))
{
    // warning: additional care must be taken for datasets which contain null
    double[] values = Enumerable.Range(0, (int)col.Length).Select(x => Convert.ToDouble(col[x])).ToArray();
    (double mean, double std) = MeanAndStd(values);
    print.it($"{col.Name} = {mean} +/- {std:N3} (n={values.Length})");
}
Output:
 
Code:
Copy      Help
Name       Age  Height  Weight  
Oliver     23   1.91    123    
Charlotte  19   1.62    321    
Henry      42   1.72    111    
Amelia     64   1.57    121    
Owen       35   1.85    131    
Scott      36   1.65    200    

**********************
Filtering by age > 30
**********************
Name    Age  Height  Weight  
Amelia  64   1.57    121    
Henry   42   1.72    111    
Owen    35   1.85    131    
Scott   36   1.65    200    

************************
Add bogus IQ calculation
************************
Name       Age  Height  Weight  IQ                  
Oliver     23   1.91    123     65.895              
Charlotte  19   1.62    321     46.17              
Henry      42   1.72    111     108.35999999999999  
Amelia     64   1.57    121     150.72              
Owen       35   1.85    131     97.125              
Scott      36   1.65    200     89.1                

***************************
Mean and Std Deviation demo
***************************
Name       Age  Height  Weight  IQ                  
Oliver     23   1.91    123     65.895              
Charlotte  19   1.62    321     46.17              
Henry      42   1.72    111     108.35999999999999  
Amelia     64   1.57    121     150.72              
Owen       35   1.85    131     97.125              
Scott      36   1.65    200     89.1                

Age = 36.5 +/- 14.592 (n=6)
Height = 1.72 +/- 0.123 (n=6)
Weight = 167.83333333333334 +/- 74.481 (n=6)
IQ = 92.895 +/- 32.983 (n=6)


Messages In This Thread
MS Data Analysis (dataframes) - by burque505 - 06-18-2023, 04:09 PM

Forum Jump:


Users browsing this thread: 1 Guest(s)