nokken/native/statistics/impact_analysis.cpp
2025-04-20 11:17:03 -04:00

361 lines
No EOL
12 KiB
C++

// SPDX-FileCopyrightText: © 2025 Nøkken.io <nokken.io@proton.me>
// SPDX-License-Identifier: AGPL-3.0
//
// impact_analysis.cpp
// Implementation of factor impact and medication analysis functions
//
#include "health_analytics_engine.h"
#include "utils.h"
/**
* @brief Rank factors by their impact on a target variable
*
* @param factors Array of factor time series
* @param target Target variable time series
* @param factorNames Array of factor names
* @param factorCount Number of factors
* @param dataLength Length of time series
* @return FactorImpactResult* Array of factor impact results
*/
FactorImpactResult* rank_factor_impacts(const double** factors,
const double* target,
const char** factorNames,
int factorCount,
int dataLength) {
if (factorCount <= 0 || dataLength <= 2) {
FactorImpactResult* dummy = new FactorImpactResult[1];
memset(dummy, 0, sizeof(FactorImpactResult));
dummy[0].factorIndex = -1; // Mark as invalid
return dummy;
}
// Allocate space for results (plus one for terminator)
FactorImpactResult* results = new FactorImpactResult[factorCount + 1];
// Calculate correlation matrix for all factors + target
std::vector<std::vector<double>> corrMatrix(factorCount + 1, std::vector<double>(factorCount + 1, 0));
for (int i = 0; i < factorCount; i++) {
// Correlation between factor i and target
corrMatrix[i][factorCount] = calculateCorrelation(factors[i], target, dataLength);
corrMatrix[factorCount][i] = corrMatrix[i][factorCount];
// Correlations between factors
for (int j = i + 1; j < factorCount; j++) {
corrMatrix[i][j] = calculateCorrelation(factors[i], factors[j], dataLength);
corrMatrix[j][i] = corrMatrix[i][j];
}
// Self-correlation is 1
corrMatrix[i][i] = 1.0;
}
corrMatrix[factorCount][factorCount] = 1.0;
// Calculate the impact of each factor
for (int i = 0; i < factorCount; i++) {
FactorImpactResult& impact = results[i];
// Direct effect is correlation with target
double directEffect = corrMatrix[i][factorCount];
// Calculate indirect effects through other factors
double indirectEffect = 0;
for (int j = 0; j < factorCount; j++) {
if (j != i) {
// Indirect effect through factor j
indirectEffect += corrMatrix[i][j] * corrMatrix[j][factorCount];
}
}
// Normalize indirect effect
indirectEffect /= std::max(1, factorCount - 1);
// Calculate partial correlation (direct effect controlling for other factors)
// This is a simplified approach - real implementation would use matrix operations
double partialCorr = directEffect;
if (factorCount > 1) {
double sumControlledVar = 0;
for (int j = 0; j < factorCount; j++) {
if (j != i) {
// Remove effect of factor j from both target and factor i
double controlEffect = corrMatrix[i][j] * corrMatrix[j][factorCount];
partialCorr -= controlEffect / (factorCount - 1);
sumControlledVar += corrMatrix[i][j] * corrMatrix[i][j];
}
}
sumControlledVar /= (factorCount - 1);
// Normalize partial correlation
if (sumControlledVar < 0.98) { // Avoid division by near-zero
partialCorr /= sqrt((1 - sumControlledVar));
}
}
// Calculate total impact score
impact.factorIndex = i;
impact.directEffect = directEffect;
impact.indirectEffect = indirectEffect;
// Total impact is weighted sum of direct and partial correlation
double partialWeight = 0.7; // Weight more toward direct unique contribution
impact.impactScore = partialWeight * std::abs(partialCorr) + (1 - partialWeight) * std::abs(directEffect);
// Calculate confidence based on correlation strength and sample size
double t_stat = std::abs(directEffect) * std::sqrt((dataLength - 2) / (1 - directEffect * directEffect));
double p_value = 2 * (1 - std::min(1.0, std::exp(-0.717 * t_stat - 0.416 * t_stat * t_stat)));
impact.confidence = std::min(0.95, (1.0 - p_value) * (1.0 - 1.0 / std::sqrt(dataLength)));
// Copy factor name
strncpy(impact.factorName, factorNames[i], MAX_STRING_SIZE - 1);
impact.factorName[MAX_STRING_SIZE - 1] = '\0';
// Generate mechanism description based on direct and indirect effects
const char* direction = (directEffect > 0) ? "positive" : "negative";
const char* strength =
(std::abs(directEffect) > 0.7) ? "strong" :
(std::abs(directEffect) > 0.4) ? "moderate" : "weak";
// Check for mediation effects
bool hasMediationEffect = std::abs(indirectEffect) > 0.2 &&
std::abs(indirectEffect) > std::abs(directEffect) * 0.5;
if (hasMediationEffect) {
snprintf(impact.mechanism, MAX_STRING_SIZE,
"%s %s impact: %s affects the target both directly (%.2f) and through other factors (%.2f)",
strength, direction, factorNames[i], directEffect, indirectEffect);
} else {
snprintf(impact.mechanism, MAX_STRING_SIZE,
"%s %s impact: changes in %s are %s associated with changes in the target (r=%.2f)",
strength, direction, factorNames[i], direction, directEffect);
}
}
// Sort by impact score (descending)
std::sort(results, results + factorCount,
[](const FactorImpactResult& a, const FactorImpactResult& b) {
return a.impactScore > b.impactScore;
});
// Mark the end of valid results
results[factorCount].factorIndex = -1;
return results;
}
/**
* @brief Free memory for factor impact results
*
* @param results Pointer to factor impact results array
*/
void free_factor_impact_results(FactorImpactResult* results) {
delete[] results;
}
/**
* @brief Analyze the impact of medication on a health metric
*
* @param before_data Values before medication
* @param before_length Length of before data
* @param after_data Values after medication
* @param after_length Length of after data
* @param medication_name Name of the medication
* @param factor_name Name of the health factor
* @return MedicationImpactAnalysis* Pointer to impact analysis result
*/
MedicationImpactAnalysis* analyze_medication_impact(
const double* before_data,
int before_length,
const double* after_data,
int after_length,
const char* medication_name,
const char* factor_name) {
MedicationImpactAnalysis* result = new MedicationImpactAnalysis();
memset(result, 0, sizeof(MedicationImpactAnalysis));
// Copy names
strncpy(result->medicationName, medication_name, MAX_STRING_SIZE-1);
strncpy(result->factorName, factor_name, MAX_STRING_SIZE-1);
// Return if insufficient data
if (before_length < 5 || after_length < 5) {
strncpy(result->description, "Insufficient data for analysis", MAX_STRING_SIZE-1);
return result;
}
// Calculate means
double beforeSum = 0, afterSum = 0;
for (int i = 0; i < before_length; i++) {
beforeSum += before_data[i];
}
for (int i = 0; i < after_length; i++) {
afterSum += after_data[i];
}
result->beforeMean = beforeSum / before_length;
result->afterMean = afterSum / after_length;
// Calculate change magnitude
result->changeMagnitude = result->afterMean - result->beforeMean;
// Calculate significance with simple t-test
double beforeVar = 0, afterVar = 0;
for (int i = 0; i < before_length; i++) {
double diff = before_data[i] - result->beforeMean;
beforeVar += diff * diff;
}
for (int i = 0; i < after_length; i++) {
double diff = after_data[i] - result->afterMean;
afterVar += diff * diff;
}
beforeVar /= (before_length - 1);
afterVar /= (after_length - 1);
double se = sqrt(beforeVar/before_length + afterVar/after_length);
double tStat = fabs(result->changeMagnitude) / (se + 0.0001);
// Simple significance estimation (0-1)
result->changeSignificance = std::min(1.0, tStat / 5.0);
// Overall impact combines magnitude and significance
result->overallImpact = fabs(result->changeMagnitude) * result->changeSignificance;
// Estimate days to effect (placeholder implementation)
result->daysToEffect = 7; // Assumed 1 week
// Generate description
const char* direction = (result->changeMagnitude > 0) ? "increased" : "decreased";
const char* significance = (result->changeSignificance > 0.7) ? "significant" :
(result->changeSignificance > 0.3) ? "moderate" : "slight";
snprintf(result->description, MAX_STRING_SIZE,
"%s shows a %s %s effect on %s (%.1f → %.1f)",
medication_name, significance, direction, factor_name,
result->beforeMean, result->afterMean);
return result;
}
/**
* @brief Free memory for medication impact analysis
*
* @param analysis Pointer to medication impact analysis
*/
void free_medication_impact_analysis(MedicationImpactAnalysis* analysis) {
delete analysis;
}
/**
* @brief Analyze hormone impact on health metrics
*
* @param hormone_levels Array of hormone levels
* @param data_length Length of hormone data
* @param factor_data Array of factor data arrays
* @param factor_names Array of factor names
* @param factor_count Number of factors
* @param hormone_name Name of the hormone
* @param min_optimal_level Lower bound of optimal range
* @param max_optimal_level Upper bound of optimal range
* @return HormoneImpactAnalysis* Pointer to impact analysis result
*/
HormoneImpactAnalysis* analyze_hormone_impact(
const double* hormone_levels,
int data_length,
const double** factor_data,
const char** factor_names,
int factor_count,
const char* hormone_name,
double min_optimal_level,
double max_optimal_level) {
HormoneImpactAnalysis* result = new HormoneImpactAnalysis();
memset(result, 0, sizeof(HormoneImpactAnalysis));
// Copy hormone name
strncpy(result->hormoneName, hormone_name, MAX_STRING_SIZE-1);
// Return if insufficient data
if (data_length < 3 || factor_count <= 0) {
strncpy(result->description, "Insufficient data for analysis", MAX_STRING_SIZE-1);
return result;
}
// Calculate current hormone level (average of recent readings)
double sum = 0;
for (int i = 0; i < data_length; i++) {
sum += hormone_levels[i];
}
result->currentLevel = sum / data_length;
// Set optimal levels
result->optimalRangeLower = min_optimal_level;
result->optimalRangeUpper = max_optimal_level;
result->optimalLevel = (min_optimal_level + max_optimal_level) / 2;
// Calculate deviation from optimal range
if (result->currentLevel < min_optimal_level) {
result->deviation = (result->currentLevel - min_optimal_level) / min_optimal_level;
} else if (result->currentLevel > max_optimal_level) {
result->deviation = (result->currentLevel - max_optimal_level) / max_optimal_level;
} else {
result->deviation = 0; // Within optimal range
}
// Calculate correlations with factors
for (int i = 0; i < factor_count && i < 50; i++) {
// Calculate correlation
double correlation = 0;
double sum_xy = 0, sum_x2 = 0, sum_y2 = 0;
double sum_x = 0, sum_y = 0;
for (int j = 0; j < data_length; j++) {
sum_x += hormone_levels[j];
sum_y += factor_data[i][j];
sum_xy += hormone_levels[j] * factor_data[i][j];
sum_x2 += hormone_levels[j] * hormone_levels[j];
sum_y2 += factor_data[i][j] * factor_data[i][j];
}
double n = data_length;
double denominator = sqrt((n * sum_x2 - sum_x * sum_x) * (n * sum_y2 - sum_y * sum_y));
if (denominator > 0) {
correlation = (n * sum_xy - sum_x * sum_y) / denominator;
}
// Store impact and factor name
result->impactOnOtherFactors[i] = correlation;
strncpy(result->factorNames[i], factor_names[i], MAX_STRING_SIZE-1);
// Set impact on mood and energy if found
if (strcmp(factor_names[i], "Mood") == 0) {
result->impactOnMood = correlation;
} else if (strcmp(factor_names[i], "Energy") == 0) {
result->impactOnEnergy = correlation;
}
}
// Generate description
const char* status;
if (fabs(result->deviation) < 0.1) {
status = "within optimal range";
} else if (result->deviation < 0) {
status = "below optimal range";
} else {
status = "above optimal range";
}
snprintf(result->description, MAX_STRING_SIZE,
"%s level is %s (%.1f, range: %.1f-%.1f)",
hormone_name, status, result->currentLevel,
min_optimal_level, max_optimal_level);
return result;
}
/**
* @brief Free memory for hormone impact analysis
*
* @param analysis Pointer to hormone impact analysis
*/
void free_hormone_impact_analysis(HormoneImpactAnalysis* analysis) {
delete analysis;
}