177 lines
5.5 KiB
JavaScript
177 lines
5.5 KiB
JavaScript
/**
|
||
* 优雅分段分类 - 类似QGIS的Pretty Breaks
|
||
* 生成"好看"、易读的断点数值
|
||
* @param {number[]} data - 数据数组
|
||
* @param {number} n_classes - 分类数量
|
||
* @returns {number[]} 断点数组
|
||
*/
|
||
function prettyBreaksClassification(data, n_classes) {
|
||
if (data.length === 0) return [];
|
||
|
||
const min_val = Math.min(...data);
|
||
const max_val = Math.max(...data);
|
||
const data_range = max_val - min_val;
|
||
|
||
// 计算基础间隔
|
||
const raw_interval = data_range / n_classes;
|
||
|
||
// 寻找"优雅"的间隔
|
||
const magnitude = 10 ** Math.floor(Math.log10(raw_interval));
|
||
const normalized = raw_interval / magnitude;
|
||
|
||
// 选择最接近的优雅数字
|
||
let nice_interval;
|
||
if (normalized <= 1) {
|
||
nice_interval = magnitude;
|
||
} else if (normalized <= 2) {
|
||
nice_interval = 2 * magnitude;
|
||
} else if (normalized <= 5) {
|
||
nice_interval = 5 * magnitude;
|
||
} else {
|
||
nice_interval = 10 * magnitude;
|
||
}
|
||
|
||
// 计算优雅的起始点
|
||
const nice_min = Math.floor(min_val / nice_interval) * nice_interval;
|
||
const nice_max = Math.ceil(max_val / nice_interval) * nice_interval;
|
||
|
||
// 生成断点
|
||
const breaks = [];
|
||
let current = nice_min;
|
||
while (current <= nice_max && breaks.length < n_classes + 1) {
|
||
breaks.push(current);
|
||
current += nice_interval;
|
||
}
|
||
|
||
// 确保包含最大值
|
||
if (breaks.length === 0 || breaks[breaks.length - 1] < max_val) {
|
||
breaks.push(nice_max);
|
||
}
|
||
|
||
// 调整为n_classes个区间
|
||
if (breaks.length > n_classes + 1) {
|
||
breaks.splice(n_classes + 1);
|
||
}
|
||
|
||
return breaks;
|
||
}
|
||
|
||
/**
|
||
* 计算类内方差
|
||
* @param {number[]} data - 排序后的数据数组
|
||
* @param {number} start - 起始索引
|
||
* @param {number} end - 结束索引
|
||
* @returns {number} 类内方差
|
||
*/
|
||
function variance(data, start, end) {
|
||
if (start >= end) return 0;
|
||
const mean = data.slice(start, end + 1).reduce((a, b) => a + b, 0) / (end - start + 1);
|
||
return data.slice(start, end + 1).reduce((sum, val) => sum + (val - mean) ** 2, 0);
|
||
}
|
||
|
||
/**
|
||
* Jenks自然断点分类算法
|
||
* @param {number[]} data - 数据数组
|
||
* @param {number} n_classes - 分类数量
|
||
* @returns {number[]} 断点数组
|
||
*/
|
||
function jenks_breaks_jenkspy(data, n_classes) {
|
||
if (data.length === 0) return [];
|
||
if (n_classes >= data.length) return data.slice().sort((a, b) => a - b);
|
||
|
||
const sortedData = data.slice().sort((a, b) => a - b);
|
||
const n = sortedData.length;
|
||
const k = n_classes;
|
||
|
||
// 初始化矩阵
|
||
const lowerClassLimits = Array.from({ length: n + 1 }, () => Array(k + 1).fill(0));
|
||
const varianceCombinations = Array.from({ length: n + 1 }, () => Array(k + 1).fill(0));
|
||
|
||
for (let i = 1; i <= n; i++) {
|
||
lowerClassLimits[i][1] = 1;
|
||
varianceCombinations[i][1] = variance(sortedData, 0, i - 1);
|
||
for (let j = 2; j <= k; j++) {
|
||
varianceCombinations[i][j] = Infinity;
|
||
}
|
||
}
|
||
|
||
// 动态规划
|
||
for (let l = 2; l <= k; l++) {
|
||
for (let m = l; m <= n; m++) {
|
||
for (let i = l - 1; i < m; i++) {
|
||
const v = varianceCombinations[i][l - 1] + variance(sortedData, i, m - 1);
|
||
if (v < varianceCombinations[m][l]) {
|
||
varianceCombinations[m][l] = v;
|
||
lowerClassLimits[m][l] = i;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// 回溯找到断点
|
||
const breaks = [];
|
||
let current = n;
|
||
for (let j = k; j >= 1; j--) {
|
||
breaks.unshift(sortedData[lowerClassLimits[current][j] - 1] || sortedData[0]);
|
||
current = lowerClassLimits[current][j];
|
||
}
|
||
breaks.push(sortedData[n - 1]);
|
||
|
||
return breaks;
|
||
}
|
||
|
||
/**
|
||
* 使用分层采样优化的Jenks算法
|
||
* 确保采样数据能代表原数据的分布
|
||
* @param {number[]} data - 数据数组
|
||
* @param {number} n_classes - 分类数量
|
||
* @param {number} sample_size - 采样大小,默认10000
|
||
* @returns {number[]} 断点数组
|
||
*/
|
||
function jenks_with_stratified_sampling(data, n_classes, sample_size = 10000) {
|
||
if (data.length <= sample_size) {
|
||
return jenks_breaks_jenkspy(data, n_classes);
|
||
}
|
||
|
||
// 对数据排序
|
||
const sorted_data = data.slice().sort((a, b) => a - b);
|
||
|
||
// 计算采样间隔
|
||
const interval = sorted_data.length / sample_size;
|
||
|
||
// 分层采样
|
||
const sampled_data = [];
|
||
for (let i = 0; i < sample_size; i++) {
|
||
const index = Math.floor(i * interval);
|
||
if (index < sorted_data.length) {
|
||
sampled_data.push(sorted_data[index]);
|
||
}
|
||
}
|
||
|
||
return jenks_breaks_jenkspy(sampled_data, n_classes);
|
||
}
|
||
|
||
/**
|
||
* 根据指定的方法计算数据的分类断点。
|
||
* @param {Array<number>} data - 要分类的数值数据数组。
|
||
* @param {number} segments - 要创建的段数或类别数。
|
||
* @param {string} classificationMethod - 要使用的分类方法。支持的值:"pretty_breaks" 或 "jenks_optimized"。
|
||
* @returns {Array<number>} 分类的断点数组。如果数据为空或无效,则返回空数组。
|
||
*/
|
||
function calculateClassification(
|
||
data,
|
||
segments,
|
||
classificationMethod
|
||
) {
|
||
if (!data || data.length === 0) {
|
||
return [];
|
||
}
|
||
if (classificationMethod === "pretty_breaks") {
|
||
return prettyBreaksClassification(data, segments);
|
||
}
|
||
if (classificationMethod === "jenks_optimized") {
|
||
return jenks_with_stratified_sampling(data, segments);
|
||
}
|
||
}
|
||
|
||
module.exports = { prettyBreaksClassification, jenks_breaks_jenkspy, jenks_with_stratified_sampling, calculateClassification }; |