#pragma once #include #include #include #include "../untwine/Common.hpp" namespace untwine { class Stats { public: enum EnumType { NoEnum, Enumerate, Count, Global }; using EnumMap = std::unordered_map; using DataVector = std::vector; public: Stats(std::string name, EnumType enumerate, bool advanced = true) : m_name(name), m_enumerate(enumerate), m_advanced(advanced) { reset(); } // Merge another summary with this one. 'name', 'enumerate' and 'advanced' must match // or false is returns and no merge occurs. bool merge(const Stats& s); double minimum() const { return m_min; } double maximum() const { return m_max; } double average() const { return M1; } double populationVariance() const { return M2 / m_cnt; } double sampleVariance() const { return M2 / (m_cnt - 1.0); } double variance() const { return sampleVariance(); } double populationStddev() const { return std::sqrt(populationVariance()); } double sampleStddev() const { return std::sqrt(sampleVariance()); } double stddev() const { return sampleStddev(); } double populationSkewness() const { if (!M2 || ! m_advanced) return 0; return std::sqrt(double(m_cnt)) * M3 / std::pow(M2, 1.5); } double sampleSkewness() const { if (M2 == 0 || m_cnt <= 2 || !m_advanced) return 0.0; double c((double)m_cnt); return populationSkewness() * std::sqrt(c) * std::sqrt(c - 1) / (c - 2); } double skewness() const { return sampleSkewness(); } double populationKurtosis() const { if (M2 == 0 || !m_advanced) return 0; return double(m_cnt) * M4 / (M2 * M2); } double populationExcessKurtosis() const { if (M2 == 0 || !m_advanced) return 0; return populationKurtosis() - 3; } double sampleKurtosis() const { if (M2 == 0 || m_cnt <= 3 || !m_advanced) return 0; double c((double)m_cnt); return populationKurtosis() * (c + 1) * (c - 1) / ((c - 2) * (c - 3)); } double sampleExcessKurtosis() const { if (M2 == 0 || m_cnt <= 3 || !m_advanced) return 0; double c((double)m_cnt); return sampleKurtosis() - 3 * (c - 1) * (c - 1) / ((c - 2) * (c - 3)); } double kurtosis() const { return sampleExcessKurtosis(); } double median() const { return m_median; } double mad() const { return m_mad; } PointCount count() const { return m_cnt; } std::string name() const { return m_name; } const EnumMap& values() const { return m_values; } void computeGlobalStats(); void reset() { m_max = (std::numeric_limits::lowest)(); m_min = (std::numeric_limits::max)(); m_cnt = 0; m_median = 0.0; m_mad = 0.0; M1 = M2 = M3 = M4 = 0.0; } void insert(double value) { m_cnt++; m_min = (std::min)(m_min, value); m_max = (std::max)(m_max, value); if (m_enumerate != NoEnum) m_values[value]++; if (m_enumerate == Global) { if (m_data.capacity() - m_data.size() < 10000) m_data.reserve(m_data.capacity() + m_cnt); m_data.push_back(value); } // stolen from http://www.johndcook.com/blog/skewness_kurtosis/ PointCount n(m_cnt); // Difference from the mean double delta = value - M1; // Portion that this point's difference from the mean contributes // to the mean. double delta_n = delta / n; double term1 = delta * delta_n * (n - 1); // First moment - average. M1 += delta_n; if (m_advanced) { double delta_n2 = pow(delta_n, 2.0); // Fourth moment - kurtosis (sum part) M4 += term1 * delta_n2 * (n*n - 3*n + 3) + (6 * delta_n2 * M2) - (4 * delta_n * M3); // Third moment - skewness (sum part) M3 += term1 * delta_n * (n - 2) - 3 * delta_n * M2; } // Second moment - variance (sum part) M2 += term1; } private: std::string m_name; EnumType m_enumerate; bool m_advanced; double m_max; double m_min; double m_mad; double m_median; EnumMap m_values; DataVector m_data; PointCount m_cnt; double M1, M2, M3, M4; }; using IndexedStats = std::vector>; using StatsMap = std::unordered_map; } // namespace untwine namespace std { template<> struct hash { std::size_t operator()(const pdal::Dimension::Id& id) const noexcept { return std::hash()((int)id); } }; }