361 lines
No EOL
12 KiB
C++
361 lines
No EOL
12 KiB
C++
// SPDX-FileCopyrightText: © 2025 Nøkken.io <nokken.io@proton.me>
|
|
// SPDX-License-Identifier: AGPL-3.0
|
|
//
|
|
// impact_analysis.cpp
|
|
// Implementation of factor impact and medication analysis functions
|
|
//
|
|
#include "health_analytics_engine.h"
|
|
#include "utils.h"
|
|
/**
|
|
* @brief Rank factors by their impact on a target variable
|
|
*
|
|
* @param factors Array of factor time series
|
|
* @param target Target variable time series
|
|
* @param factorNames Array of factor names
|
|
* @param factorCount Number of factors
|
|
* @param dataLength Length of time series
|
|
* @return FactorImpactResult* Array of factor impact results
|
|
*/
|
|
FactorImpactResult* rank_factor_impacts(const double** factors,
|
|
const double* target,
|
|
const char** factorNames,
|
|
int factorCount,
|
|
int dataLength) {
|
|
if (factorCount <= 0 || dataLength <= 2) {
|
|
FactorImpactResult* dummy = new FactorImpactResult[1];
|
|
memset(dummy, 0, sizeof(FactorImpactResult));
|
|
dummy[0].factorIndex = -1; // Mark as invalid
|
|
return dummy;
|
|
}
|
|
|
|
// Allocate space for results (plus one for terminator)
|
|
FactorImpactResult* results = new FactorImpactResult[factorCount + 1];
|
|
|
|
// Calculate correlation matrix for all factors + target
|
|
std::vector<std::vector<double>> corrMatrix(factorCount + 1, std::vector<double>(factorCount + 1, 0));
|
|
|
|
for (int i = 0; i < factorCount; i++) {
|
|
// Correlation between factor i and target
|
|
corrMatrix[i][factorCount] = calculateCorrelation(factors[i], target, dataLength);
|
|
corrMatrix[factorCount][i] = corrMatrix[i][factorCount];
|
|
|
|
// Correlations between factors
|
|
for (int j = i + 1; j < factorCount; j++) {
|
|
corrMatrix[i][j] = calculateCorrelation(factors[i], factors[j], dataLength);
|
|
corrMatrix[j][i] = corrMatrix[i][j];
|
|
}
|
|
|
|
// Self-correlation is 1
|
|
corrMatrix[i][i] = 1.0;
|
|
}
|
|
corrMatrix[factorCount][factorCount] = 1.0;
|
|
|
|
// Calculate the impact of each factor
|
|
for (int i = 0; i < factorCount; i++) {
|
|
FactorImpactResult& impact = results[i];
|
|
|
|
// Direct effect is correlation with target
|
|
double directEffect = corrMatrix[i][factorCount];
|
|
|
|
// Calculate indirect effects through other factors
|
|
double indirectEffect = 0;
|
|
for (int j = 0; j < factorCount; j++) {
|
|
if (j != i) {
|
|
// Indirect effect through factor j
|
|
indirectEffect += corrMatrix[i][j] * corrMatrix[j][factorCount];
|
|
}
|
|
}
|
|
// Normalize indirect effect
|
|
indirectEffect /= std::max(1, factorCount - 1);
|
|
|
|
// Calculate partial correlation (direct effect controlling for other factors)
|
|
// This is a simplified approach - real implementation would use matrix operations
|
|
double partialCorr = directEffect;
|
|
if (factorCount > 1) {
|
|
double sumControlledVar = 0;
|
|
for (int j = 0; j < factorCount; j++) {
|
|
if (j != i) {
|
|
// Remove effect of factor j from both target and factor i
|
|
double controlEffect = corrMatrix[i][j] * corrMatrix[j][factorCount];
|
|
partialCorr -= controlEffect / (factorCount - 1);
|
|
sumControlledVar += corrMatrix[i][j] * corrMatrix[i][j];
|
|
}
|
|
}
|
|
sumControlledVar /= (factorCount - 1);
|
|
// Normalize partial correlation
|
|
if (sumControlledVar < 0.98) { // Avoid division by near-zero
|
|
partialCorr /= sqrt((1 - sumControlledVar));
|
|
}
|
|
}
|
|
|
|
// Calculate total impact score
|
|
impact.factorIndex = i;
|
|
impact.directEffect = directEffect;
|
|
impact.indirectEffect = indirectEffect;
|
|
|
|
// Total impact is weighted sum of direct and partial correlation
|
|
double partialWeight = 0.7; // Weight more toward direct unique contribution
|
|
impact.impactScore = partialWeight * std::abs(partialCorr) + (1 - partialWeight) * std::abs(directEffect);
|
|
|
|
// Calculate confidence based on correlation strength and sample size
|
|
double t_stat = std::abs(directEffect) * std::sqrt((dataLength - 2) / (1 - directEffect * directEffect));
|
|
double p_value = 2 * (1 - std::min(1.0, std::exp(-0.717 * t_stat - 0.416 * t_stat * t_stat)));
|
|
impact.confidence = std::min(0.95, (1.0 - p_value) * (1.0 - 1.0 / std::sqrt(dataLength)));
|
|
|
|
// Copy factor name
|
|
strncpy(impact.factorName, factorNames[i], MAX_STRING_SIZE - 1);
|
|
impact.factorName[MAX_STRING_SIZE - 1] = '\0';
|
|
|
|
// Generate mechanism description based on direct and indirect effects
|
|
const char* direction = (directEffect > 0) ? "positive" : "negative";
|
|
const char* strength =
|
|
(std::abs(directEffect) > 0.7) ? "strong" :
|
|
(std::abs(directEffect) > 0.4) ? "moderate" : "weak";
|
|
|
|
// Check for mediation effects
|
|
bool hasMediationEffect = std::abs(indirectEffect) > 0.2 &&
|
|
std::abs(indirectEffect) > std::abs(directEffect) * 0.5;
|
|
|
|
if (hasMediationEffect) {
|
|
snprintf(impact.mechanism, MAX_STRING_SIZE,
|
|
"%s %s impact: %s affects the target both directly (%.2f) and through other factors (%.2f)",
|
|
strength, direction, factorNames[i], directEffect, indirectEffect);
|
|
} else {
|
|
snprintf(impact.mechanism, MAX_STRING_SIZE,
|
|
"%s %s impact: changes in %s are %s associated with changes in the target (r=%.2f)",
|
|
strength, direction, factorNames[i], direction, directEffect);
|
|
}
|
|
}
|
|
|
|
// Sort by impact score (descending)
|
|
std::sort(results, results + factorCount,
|
|
[](const FactorImpactResult& a, const FactorImpactResult& b) {
|
|
return a.impactScore > b.impactScore;
|
|
});
|
|
|
|
// Mark the end of valid results
|
|
results[factorCount].factorIndex = -1;
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* @brief Free memory for factor impact results
|
|
*
|
|
* @param results Pointer to factor impact results array
|
|
*/
|
|
void free_factor_impact_results(FactorImpactResult* results) {
|
|
delete[] results;
|
|
}
|
|
|
|
/**
|
|
* @brief Analyze the impact of medication on a health metric
|
|
*
|
|
* @param before_data Values before medication
|
|
* @param before_length Length of before data
|
|
* @param after_data Values after medication
|
|
* @param after_length Length of after data
|
|
* @param medication_name Name of the medication
|
|
* @param factor_name Name of the health factor
|
|
* @return MedicationImpactAnalysis* Pointer to impact analysis result
|
|
*/
|
|
MedicationImpactAnalysis* analyze_medication_impact(
|
|
const double* before_data,
|
|
int before_length,
|
|
const double* after_data,
|
|
int after_length,
|
|
const char* medication_name,
|
|
const char* factor_name) {
|
|
|
|
MedicationImpactAnalysis* result = new MedicationImpactAnalysis();
|
|
memset(result, 0, sizeof(MedicationImpactAnalysis));
|
|
|
|
// Copy names
|
|
strncpy(result->medicationName, medication_name, MAX_STRING_SIZE-1);
|
|
strncpy(result->factorName, factor_name, MAX_STRING_SIZE-1);
|
|
|
|
// Return if insufficient data
|
|
if (before_length < 5 || after_length < 5) {
|
|
strncpy(result->description, "Insufficient data for analysis", MAX_STRING_SIZE-1);
|
|
return result;
|
|
}
|
|
|
|
// Calculate means
|
|
double beforeSum = 0, afterSum = 0;
|
|
for (int i = 0; i < before_length; i++) {
|
|
beforeSum += before_data[i];
|
|
}
|
|
for (int i = 0; i < after_length; i++) {
|
|
afterSum += after_data[i];
|
|
}
|
|
|
|
result->beforeMean = beforeSum / before_length;
|
|
result->afterMean = afterSum / after_length;
|
|
|
|
// Calculate change magnitude
|
|
result->changeMagnitude = result->afterMean - result->beforeMean;
|
|
|
|
// Calculate significance with simple t-test
|
|
double beforeVar = 0, afterVar = 0;
|
|
for (int i = 0; i < before_length; i++) {
|
|
double diff = before_data[i] - result->beforeMean;
|
|
beforeVar += diff * diff;
|
|
}
|
|
for (int i = 0; i < after_length; i++) {
|
|
double diff = after_data[i] - result->afterMean;
|
|
afterVar += diff * diff;
|
|
}
|
|
|
|
beforeVar /= (before_length - 1);
|
|
afterVar /= (after_length - 1);
|
|
|
|
double se = sqrt(beforeVar/before_length + afterVar/after_length);
|
|
double tStat = fabs(result->changeMagnitude) / (se + 0.0001);
|
|
|
|
// Simple significance estimation (0-1)
|
|
result->changeSignificance = std::min(1.0, tStat / 5.0);
|
|
|
|
// Overall impact combines magnitude and significance
|
|
result->overallImpact = fabs(result->changeMagnitude) * result->changeSignificance;
|
|
|
|
// Estimate days to effect (placeholder implementation)
|
|
result->daysToEffect = 7; // Assumed 1 week
|
|
|
|
// Generate description
|
|
const char* direction = (result->changeMagnitude > 0) ? "increased" : "decreased";
|
|
const char* significance = (result->changeSignificance > 0.7) ? "significant" :
|
|
(result->changeSignificance > 0.3) ? "moderate" : "slight";
|
|
|
|
snprintf(result->description, MAX_STRING_SIZE,
|
|
"%s shows a %s %s effect on %s (%.1f → %.1f)",
|
|
medication_name, significance, direction, factor_name,
|
|
result->beforeMean, result->afterMean);
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* @brief Free memory for medication impact analysis
|
|
*
|
|
* @param analysis Pointer to medication impact analysis
|
|
*/
|
|
void free_medication_impact_analysis(MedicationImpactAnalysis* analysis) {
|
|
delete analysis;
|
|
}
|
|
|
|
/**
|
|
* @brief Analyze hormone impact on health metrics
|
|
*
|
|
* @param hormone_levels Array of hormone levels
|
|
* @param data_length Length of hormone data
|
|
* @param factor_data Array of factor data arrays
|
|
* @param factor_names Array of factor names
|
|
* @param factor_count Number of factors
|
|
* @param hormone_name Name of the hormone
|
|
* @param min_optimal_level Lower bound of optimal range
|
|
* @param max_optimal_level Upper bound of optimal range
|
|
* @return HormoneImpactAnalysis* Pointer to impact analysis result
|
|
*/
|
|
HormoneImpactAnalysis* analyze_hormone_impact(
|
|
const double* hormone_levels,
|
|
int data_length,
|
|
const double** factor_data,
|
|
const char** factor_names,
|
|
int factor_count,
|
|
const char* hormone_name,
|
|
double min_optimal_level,
|
|
double max_optimal_level) {
|
|
|
|
HormoneImpactAnalysis* result = new HormoneImpactAnalysis();
|
|
memset(result, 0, sizeof(HormoneImpactAnalysis));
|
|
|
|
// Copy hormone name
|
|
strncpy(result->hormoneName, hormone_name, MAX_STRING_SIZE-1);
|
|
|
|
// Return if insufficient data
|
|
if (data_length < 3 || factor_count <= 0) {
|
|
strncpy(result->description, "Insufficient data for analysis", MAX_STRING_SIZE-1);
|
|
return result;
|
|
}
|
|
|
|
// Calculate current hormone level (average of recent readings)
|
|
double sum = 0;
|
|
for (int i = 0; i < data_length; i++) {
|
|
sum += hormone_levels[i];
|
|
}
|
|
result->currentLevel = sum / data_length;
|
|
|
|
// Set optimal levels
|
|
result->optimalRangeLower = min_optimal_level;
|
|
result->optimalRangeUpper = max_optimal_level;
|
|
result->optimalLevel = (min_optimal_level + max_optimal_level) / 2;
|
|
|
|
// Calculate deviation from optimal range
|
|
if (result->currentLevel < min_optimal_level) {
|
|
result->deviation = (result->currentLevel - min_optimal_level) / min_optimal_level;
|
|
} else if (result->currentLevel > max_optimal_level) {
|
|
result->deviation = (result->currentLevel - max_optimal_level) / max_optimal_level;
|
|
} else {
|
|
result->deviation = 0; // Within optimal range
|
|
}
|
|
|
|
// Calculate correlations with factors
|
|
for (int i = 0; i < factor_count && i < 50; i++) {
|
|
// Calculate correlation
|
|
double correlation = 0;
|
|
double sum_xy = 0, sum_x2 = 0, sum_y2 = 0;
|
|
double sum_x = 0, sum_y = 0;
|
|
|
|
for (int j = 0; j < data_length; j++) {
|
|
sum_x += hormone_levels[j];
|
|
sum_y += factor_data[i][j];
|
|
sum_xy += hormone_levels[j] * factor_data[i][j];
|
|
sum_x2 += hormone_levels[j] * hormone_levels[j];
|
|
sum_y2 += factor_data[i][j] * factor_data[i][j];
|
|
}
|
|
|
|
double n = data_length;
|
|
double denominator = sqrt((n * sum_x2 - sum_x * sum_x) * (n * sum_y2 - sum_y * sum_y));
|
|
|
|
if (denominator > 0) {
|
|
correlation = (n * sum_xy - sum_x * sum_y) / denominator;
|
|
}
|
|
|
|
// Store impact and factor name
|
|
result->impactOnOtherFactors[i] = correlation;
|
|
strncpy(result->factorNames[i], factor_names[i], MAX_STRING_SIZE-1);
|
|
|
|
// Set impact on mood and energy if found
|
|
if (strcmp(factor_names[i], "Mood") == 0) {
|
|
result->impactOnMood = correlation;
|
|
} else if (strcmp(factor_names[i], "Energy") == 0) {
|
|
result->impactOnEnergy = correlation;
|
|
}
|
|
}
|
|
|
|
// Generate description
|
|
const char* status;
|
|
if (fabs(result->deviation) < 0.1) {
|
|
status = "within optimal range";
|
|
} else if (result->deviation < 0) {
|
|
status = "below optimal range";
|
|
} else {
|
|
status = "above optimal range";
|
|
}
|
|
|
|
snprintf(result->description, MAX_STRING_SIZE,
|
|
"%s level is %s (%.1f, range: %.1f-%.1f)",
|
|
hormone_name, status, result->currentLevel,
|
|
min_optimal_level, max_optimal_level);
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* @brief Free memory for hormone impact analysis
|
|
*
|
|
* @param analysis Pointer to hormone impact analysis
|
|
*/
|
|
void free_hormone_impact_analysis(HormoneImpactAnalysis* analysis) {
|
|
delete analysis;
|
|
} |