first commit - migrated from codeberg
This commit is contained in:
commit
5ead03e1f7
567 changed files with 102721 additions and 0 deletions
272
native/statistics/time_series.cpp
Normal file
272
native/statistics/time_series.cpp
Normal file
|
@ -0,0 +1,272 @@
|
|||
// SPDX-FileCopyrightText: © 2025 Nøkken.io <nokken.io@proton.me>
|
||||
// SPDX-License-Identifier: AGPL-3.0
|
||||
//
|
||||
// time_series.cpp
|
||||
// Implementation of time series analysis functions
|
||||
//
|
||||
#include "health_analytics_engine.h"
|
||||
#include "utils.h"
|
||||
/**
|
||||
* @brief Detect trends in time series data
|
||||
*
|
||||
* @param values Time series data
|
||||
* @param length Number of elements in the array
|
||||
* @param strength Output parameter for trend strength
|
||||
* @return TrendType Enum indicating trend direction and type
|
||||
*/
|
||||
TrendType detect_trend(const double* values, int length, double* strength) {
|
||||
if (length < 3) {
|
||||
*strength = 0;
|
||||
return TREND_NONE;
|
||||
}
|
||||
|
||||
// Generate time vector (0, 1, 2, ...)
|
||||
std::vector<double> time(length);
|
||||
for (int i = 0; i < length; i++) {
|
||||
time[i] = i;
|
||||
}
|
||||
|
||||
// Calculate linear regression
|
||||
double slope, intercept, r_squared;
|
||||
if (!calculateLinearRegression(time.data(), values, length, slope, intercept, r_squared)) {
|
||||
*strength = 0;
|
||||
return TREND_NONE;
|
||||
}
|
||||
|
||||
// Detrend the data for further analysis
|
||||
std::vector<double> detrended(length);
|
||||
for (int i = 0; i < length; i++) {
|
||||
detrended[i] = values[i] - (intercept + slope * i);
|
||||
}
|
||||
|
||||
// Check for cyclical patterns using autocorrelation
|
||||
bool has_cycle = false;
|
||||
int cycle_length = 0;
|
||||
double max_autocorr = 0;
|
||||
|
||||
// Check autocorrelation for various lags
|
||||
const int MIN_LAG = 2;
|
||||
const int MAX_LAG = length / 3; // Look for cycles up to 1/3 of series length
|
||||
|
||||
for (int lag = MIN_LAG; lag < MAX_LAG; lag++) {
|
||||
double autocorr = calculateAutocorrelation(detrended.data(), length, lag);
|
||||
|
||||
// If strong positive autocorrelation found
|
||||
if (autocorr > 0.3 && autocorr > max_autocorr) {
|
||||
max_autocorr = autocorr;
|
||||
cycle_length = lag;
|
||||
has_cycle = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if cycle pattern is stronger than linear trend
|
||||
if (has_cycle && max_autocorr > std::abs(r_squared)) {
|
||||
*strength = max_autocorr;
|
||||
return TREND_CYCLIC;
|
||||
}
|
||||
|
||||
// Determine trend direction based on slope and strength
|
||||
*strength = std::abs(r_squared);
|
||||
|
||||
// Require minimum strength to declare a trend
|
||||
if (*strength < 0.2) {
|
||||
return TREND_NONE;
|
||||
} else if (slope > 0) {
|
||||
return TREND_INCREASING;
|
||||
} else {
|
||||
return TREND_DECREASING;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Predict future values of a time series using ARIMA-like approach
|
||||
*
|
||||
* @param timeSeries Time series data
|
||||
* @param dataLength Length of time series
|
||||
* @param stepsAhead Number of future steps to predict
|
||||
* @param factorName Name of the factor being predicted
|
||||
* @return TimeSeriesForecast Structure containing predictions and confidence intervals
|
||||
*/
|
||||
TimeSeriesForecast predict_time_series(const double* timeSeries,
|
||||
int dataLength,
|
||||
int stepsAhead,
|
||||
const char* factorName) {
|
||||
TimeSeriesForecast forecast;
|
||||
memset(&forecast, 0, sizeof(TimeSeriesForecast));
|
||||
|
||||
if (dataLength < 5 || stepsAhead <= 0) {
|
||||
forecast.overallConfidence = 0;
|
||||
return forecast;
|
||||
}
|
||||
|
||||
// Copy factor name
|
||||
strncpy(forecast.factorName, factorName, MAX_STRING_SIZE - 1);
|
||||
forecast.factorName[MAX_STRING_SIZE - 1] = '\0';
|
||||
|
||||
// First, check for seasonality
|
||||
int potentialSeasonality = 0;
|
||||
double maxAutocorr = 0;
|
||||
|
||||
// Look for seasonality in range 2 to dataLength/3
|
||||
for (int lag = 2; lag <= dataLength/3; lag++) {
|
||||
double acf = calculateAutocorrelation(timeSeries, dataLength, lag);
|
||||
if (acf > 0.3 && acf > maxAutocorr) {
|
||||
maxAutocorr = acf;
|
||||
potentialSeasonality = lag;
|
||||
}
|
||||
}
|
||||
|
||||
// Set seasonality period if detected
|
||||
forecast.seasonalityPeriod = potentialSeasonality;
|
||||
|
||||
// Decompose time series if seasonality detected
|
||||
std::vector<double> trend(dataLength);
|
||||
std::vector<double> seasonal(dataLength);
|
||||
std::vector<double> residual(dataLength);
|
||||
|
||||
bool hasSeasonality = potentialSeasonality > 0 && maxAutocorr > 0.3;
|
||||
|
||||
if (hasSeasonality) {
|
||||
// Decompose the time series
|
||||
decomposeTimeSeries(timeSeries, dataLength, potentialSeasonality,
|
||||
trend.data(), seasonal.data(), residual.data());
|
||||
} else {
|
||||
// No seasonality, just use simple moving average for trend
|
||||
calculateMovingAverage(timeSeries, dataLength, std::min(7, dataLength/3), trend.data());
|
||||
|
||||
// No seasonal component
|
||||
for (int i = 0; i < dataLength; i++) {
|
||||
seasonal[i] = 0;
|
||||
residual[i] = timeSeries[i] - trend[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Fit AR model to residuals for short-term dynamics
|
||||
// Determine optimal AR order using PACF
|
||||
int maxLag = std::min(10, dataLength/5);
|
||||
std::vector<double> pacf(maxLag + 1);
|
||||
calculatePACF(residual.data(), dataLength, maxLag, pacf.data());
|
||||
|
||||
// Find significant AR terms (PACF > 0.2)
|
||||
std::vector<int> significantLags;
|
||||
for (int i = 1; i <= maxLag; i++) {
|
||||
if (std::abs(pacf[i]) > 0.2) {
|
||||
significantLags.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
// Limit to 3 most significant terms
|
||||
if (significantLags.size() > 3) {
|
||||
std::sort(significantLags.begin(), significantLags.end(),
|
||||
[&pacf](int a, int b) {
|
||||
return std::abs(pacf[a]) > std::abs(pacf[b]);
|
||||
});
|
||||
significantLags.resize(3);
|
||||
}
|
||||
|
||||
// Fit AR coefficients using linear regression
|
||||
int arOrder = significantLags.size();
|
||||
std::vector<double> arCoefficients(arOrder, 0);
|
||||
|
||||
if (arOrder > 0) {
|
||||
// Prepare training data for AR model
|
||||
int trainingSize = dataLength - significantLags.back();
|
||||
std::vector<std::vector<double>> X(trainingSize, std::vector<double>(arOrder));
|
||||
std::vector<double> y(trainingSize);
|
||||
|
||||
for (int i = 0; i < trainingSize; i++) {
|
||||
int t = i + significantLags.back();
|
||||
y[i] = residual[t];
|
||||
|
||||
for (int j = 0; j < arOrder; j++) {
|
||||
X[i][j] = residual[t - significantLags[j]];
|
||||
}
|
||||
}
|
||||
|
||||
// Very simplified AR coefficient estimation
|
||||
// Real implementation would use matrix operations
|
||||
for (int j = 0; j < arOrder; j++) {
|
||||
double sumXY = 0, sumX2 = 0;
|
||||
for (int i = 0; i < trainingSize; i++) {
|
||||
sumXY += X[i][j] * y[i];
|
||||
sumX2 += X[i][j] * X[i][j];
|
||||
}
|
||||
if (sumX2 > 0) {
|
||||
arCoefficients[j] = sumXY / sumX2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Set time unit (days by default)
|
||||
forecast.timeUnit = TIME_UNIT_DAYS;
|
||||
|
||||
// Generate forecasts
|
||||
double trendGrowth = 0;
|
||||
if (dataLength > 10) {
|
||||
// Calculate average trend growth over last 10 points
|
||||
trendGrowth = (trend[dataLength-1] - trend[dataLength-11]) / 10.0;
|
||||
}
|
||||
|
||||
// Last observed values
|
||||
std::vector<double> lastResiduals(dataLength);
|
||||
for (int i = 0; i < dataLength; i++) {
|
||||
lastResiduals[i] = residual[i];
|
||||
}
|
||||
|
||||
// Generate predictions
|
||||
for (int i = 0; i < stepsAhead && i < 30; i++) {
|
||||
int t = dataLength + i;
|
||||
|
||||
// Forecast trend component
|
||||
double trendForecast = trend[dataLength-1] + trendGrowth * (i + 1);
|
||||
|
||||
// Forecast seasonal component (if any)
|
||||
double seasonalForecast = 0;
|
||||
if (hasSeasonality && potentialSeasonality > 0) {
|
||||
seasonalForecast = seasonal[dataLength - potentialSeasonality + (i % potentialSeasonality)];
|
||||
}
|
||||
|
||||
// Forecast residual component using AR model
|
||||
double residualForecast = 0;
|
||||
for (int j = 0; j < arOrder; j++) {
|
||||
int lag = significantLags[j];
|
||||
if (i >= lag) {
|
||||
// Use previously forecasted residuals
|
||||
residualForecast += arCoefficients[j] * lastResiduals[dataLength + i - lag];
|
||||
} else {
|
||||
// Use observed residuals
|
||||
residualForecast += arCoefficients[j] * residual[dataLength - lag + i];
|
||||
}
|
||||
}
|
||||
|
||||
// Store forecasted residual
|
||||
lastResiduals.push_back(residualForecast);
|
||||
|
||||
// Combine components for final forecast
|
||||
forecast.predictions[i] = trendForecast + seasonalForecast + residualForecast;
|
||||
|
||||
// Calculate confidence intervals (widen with forecast horizon)
|
||||
double stdError = 0;
|
||||
for (int j = 0; j < dataLength; j++) {
|
||||
stdError += residual[j] * residual[j];
|
||||
}
|
||||
stdError = sqrt(stdError / dataLength);
|
||||
|
||||
// Wider intervals for longer forecasts
|
||||
double multiplier = 1.96 * sqrt(1.0 + 0.25 * i); // Roughly 95% CI with growing uncertainty
|
||||
|
||||
forecast.confidenceIntervals[i][0] = forecast.predictions[i] - multiplier * stdError;
|
||||
forecast.confidenceIntervals[i][1] = forecast.predictions[i] + multiplier * stdError;
|
||||
}
|
||||
|
||||
// Set overall confidence based on model quality and forecast distance
|
||||
double modelAccuracy = 0.8; // Would be calculated from validation in real model
|
||||
if (hasSeasonality) modelAccuracy += 0.1;
|
||||
if (arOrder > 0) modelAccuracy += 0.1 * std::min(arOrder, 2);
|
||||
|
||||
forecast.overallConfidence = modelAccuracy * exp(-0.05 * stepsAhead);
|
||||
if (forecast.overallConfidence > 0.95) forecast.overallConfidence = 0.95;
|
||||
if (forecast.overallConfidence < 0.2) forecast.overallConfidence = 0.2;
|
||||
|
||||
return forecast;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue