C# を使用してクラスター分析アルゴリズムを作成する方法-C#.Net チュートリアル-php.cn

C# を使用してクラスター分析アルゴリズムを作成する方法

王林

リリース： 2023-09-19 14:40:54

オリジナル

742 人が閲覧しました

C# を使用してクラスター分析アルゴリズムを作成する方法

1. 概要
クラスター分析は、類似したデータポイントを異なるクラスターにグループ化するデータ分析手法です。相互のデータポイント。機械学習とデータマイニングの分野では、クラスター分析は、分類器を構築し、データの構造を調査し、隠れたパターンを明らかにするために一般的に使用されます。

この記事では、C# を使用してクラスター分析アルゴリズムを作成する方法を紹介します。 K 平均法アルゴリズムをアルゴリズム例として使用し、具体的なコード例を示します。

2. K 平均法アルゴリズムの概要
K 平均法アルゴリズムは、最も一般的に使用されるクラスター分析アルゴリズムの 1 つであり、その基本的な考え方は、サンプル間の距離を計算し、原則に従ってサンプルを並べ替えることです。最も近い距離の K 個のクラスターに分割されます。具体的な手順は次のとおりです。

K 個の初期クラスタリング中心点 (トレーニングデータ内の K 個のサンプル) をランダムに選択します。
トレーニングデータを走査し、各サンプルと各クラスター中心間の距離を計算し、サンプルを最も近いクラスター中心に分割します。
各クラスターのクラスター中心を更新し、クラスター内のすべてのサンプルの平均を計算し、それを新しいクラスター中心として使用します。
クラスターが変化しなくなるか、最大反復回数に達するまで、ステップ 2 と 3 を繰り返します。

3. C# コードの例
次は、C# を使用して K 平均法アルゴリズムを作成するコード例です:

using System;
using System.Collections.Generic;
using System.Linq;

public class KMeans
{
    public List<List<double>> Cluster(List<List<double>> data, int k, int maxIterations)
    {
        // 初始化聚类中心
        List<List<double>> centroids = InitializeCentroids(data, k);
        
        for (int i = 0; i < maxIterations; i++)
        {
            // 创建临时的聚类结果
            List<List<List<double>>> clusters = new List<List<List<double>>>();
            for (int j = 0; j < k; j++)
            {
                clusters.Add(new List<List<double>>());
            }
            
            // 将数据样本分配到最近的聚类中心
            foreach (var point in data)
            {
                int nearestCentroidIndex = FindNearestCentroidIndex(point, centroids);
                clusters[nearestCentroidIndex].Add(point);
            }
            
            // 更新聚类中心
            List<List<double>> newCentroids = new List<List<double>>();
            for (int j = 0; j < k; j++)
            {
                newCentroids.Add(UpdateCentroid(clusters[j]));
            }
            
            // 判断聚类结果是否变化，若不再变化则停止迭代
            if (CentroidsNotChanged(centroids, newCentroids))
            {
                break;
            }
            
            centroids = newCentroids;
        }
        
        return centroids;
    }

    private List<List<double>> InitializeCentroids(List<List<double>> data, int k)
    {
        List<List<double>> centroids = new List<List<double>>();
        Random random = new Random();

        for (int i = 0; i < k; i++)
        {
            int randomIndex = random.Next(data.Count);
            centroids.Add(data[randomIndex]);
            data.RemoveAt(randomIndex);
        }

        return centroids;
    }

    private int FindNearestCentroidIndex(List<double> point, List<List<double>> centroids)
    {
        int index = 0;
        double minDistance = double.MaxValue;

        for (int i = 0; i < centroids.Count; i++)
        {
            double distance = CalculateDistance(point, centroids[i]);
            if (distance < minDistance)
            {
                minDistance = distance;
                index = i;
            }
        }

        return index;
    }

    private double CalculateDistance(List<double> PointA, List<double> PointB)
    {
        double sumSquaredDifferences = 0;
        for (int i = 0; i < PointA.Count; i++)
        {
            sumSquaredDifferences += Math.Pow(PointA[i] - PointB[i], 2);
        }

        return Math.Sqrt(sumSquaredDifferences);
    }

    private List<double> UpdateCentroid(List<List<double>> cluster)
    {
        int dimension = cluster[0].Count;
        List<double> centroid = new List<double>();

        for (int i = 0; i < dimension; i++)
        {
            double sum = 0;
            foreach (var point in cluster)
            {
                sum += point[i];
            }
            centroid.Add(sum / cluster.Count);
        }

        return centroid;
    }

    private bool CentroidsNotChanged(List<List<double>> oldCentroids, List<List<double>> newCentroids)
    {
        for (int i = 0; i < oldCentroids.Count; i++)
        {
            for (int j = 0; j < oldCentroids[i].Count; j++)
            {
                if (Math.Abs(oldCentroids[i][j] - newCentroids[i][j]) > 1e-6)
                {
                    return false;
                }
            }
        }

        return true;
    }
}

class Program
{
    static void Main(string[] args)
    {
        // 假设我们有以下数据样本
        List<List<double>> data = new List<List<double>>()
        {
            new List<double>() {1, 1},
            new List<double>() {1, 2},
            new List<double>() {2, 1},
            new List<double>() {2, 2},
            new List<double>() {5, 6},
            new List<double>() {6, 5},
            new List<double>() {6, 6},
            new List<double>() {7, 5},
        };

        KMeans kmeans = new KMeans();
        List<List<double>> centroids = kmeans.Cluster(data, 2, 100);

        Console.WriteLine("聚类中心：");
        foreach (var centroid in centroids)
        {
            Console.WriteLine(string.Join(", ", centroid));
        }
    }
}

ログイン後にコピー

上記のコードは、C# を使用して次のことを行う方法を示しています。 K 平均法アルゴリズムを作成し、単純なクラスタリング操作を実行します。ユーザーは、必要に応じてデータサンプルとクラスターセンターの数を変更し、実際の状況に応じて最大反復回数を調整できます。

4. 概要
この記事では、C# を使用してクラスター分析アルゴリズムを作成する方法を紹介し、K 平均法アルゴリズムの具体的なコード例を示します。この記事を通じて、読者が C# を使用してクラスター分析を実装する方法をすぐに理解して、独自のデータ分析とマイニングプロジェクトをより強力にサポートできることを願っています。

以上がC# を使用してクラスター分析アルゴリズムを作成する方法の詳細内容です。詳細については、PHP 中国語 Web サイトの他の関連記事を参照してください。