Dataset 'admission.csv'
1. K-means Clustering
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import pandas as pd
my_data=pd.read_csv('admission.csv')
X=my_data[['NORMALIZED_GPA', 'NORMALIZED_SAT']]
random_state = 17
cluster = KMeans(n_clusters=2, random_state=random_state).fit(X)
y_pred=cluster.predict(X)
fig, axs = plt.subplots(1,2)
axs[0].set_aspect('equal')
axs[0].scatter( X["NORMALIZED_GPA"], X["NORMALIZED_SAT"], c=y_pred, s=30, cmap=plt.cm.Paired)
axs[1].set_aspect('equal')
axs[1].scatter( X["NORMALIZED_GPA"], X["NORMALIZED_SAT"], c=my_data['ACCEPT_NUM'].tolist(), s=30, cmap=plt.cm.Paired)
Line 1-6 is import and data block
Line 8-10 is the clustering part.
In line 10, y_pred is the clustering results, for 2 clusters, it is either 0 or 1 for each point. Line 12-16 is for plotting, you can only plot two-dimensional X data.
The left plot is the clustering results. The right plot is the true “Accept” value.
Output:
2. DBSCAN
import pandas as pd
from sklearn.feature_extraction import image
from sklearn.cluster import DBSCAN
import numpy as np
import matplotlib.pyplot as plt
my_data=pd.read_csv('admission.csv')
X=my_data[['NORMALIZED_GPA', 'NORMALIZED_SAT']]
cluster = DBSCAN(eps=0.2)
cluster.fit(X)
Pred=cluster.labels_.astype(np.int)
fig, axs = plt.subplots(1,2)
axs[0].set_aspect('equal')
axs[0].scatter( X["NORMALIZED_GPA"], X["NORMALIZED_SAT"], c=Pred, s=30, cmap=plt.cm.Paired)
axs[1].set_aspect('equal')
axs[1].scatter( X["NORMALIZED_GPA"], X["NORMALIZED_SAT"], c=my_data['ACCEPT_NUM'].tolist(), s=30, cmap=plt.cm.Paired)
3. Dendrogram
import pandas as pd
from scipy.cluster.hierarchy import dendrogram, linkage
my_data=pd.read_csv('customer.csv')
data=my_data[['Age', 'IncomeNum','GenderNum']]
Z = linkage(data)
#dendrogram(Z)
dendrogram(Z,labels =my_data['ID'].tolist())
4. GMM(GaussianMixture)
import pandas as pd
from sklearn.feature_extraction import image
from sklearn import mixture
import numpy as np
import matplotlib.pyplot as plt
my_data=pd.read_csv('admission.csv')
X=my_data[['NORMALIZED_GPA', 'NORMALIZED_SAT']]
cluster = mixture.GaussianMixture(n_components=2, covariance_type='full')
cluster.fit(X)
Pred=cluster.predict(X)
fig, axs = plt.subplots(1,2)
axs[0].set_aspect('equal')
axs[0].scatter( X["NORMALIZED_GPA"], X["NORMALIZED_SAT"], c=Pred, s=30, cmap=plt.cm.Paired)
axs[1].set_aspect('equal')
axs[1].scatter( X["NORMALIZED_GPA"], X["NORMALIZED_SAT"], c=my_data['ACCEPT_NUM'].tolist(), s=30, cmap=plt.cm.Paired)
5. AgglomerativeClustering
import pandas as pd
from sklearn.feature_extraction import image
from sklearn.cluster import AgglomerativeClustering
import numpy as np
import matplotlib.pyplot as plt
my_data=pd.read_csv('admission.csv')
X=my_data[['NORMALIZED_GPA', 'NORMALIZED_SAT']]
cluster =AgglomerativeClustering(linkage="average", affinity="cityblock",n_clusters=2)
cluster.fit(X)
Pred=cluster.labels_.astype(np.int)
fig, axs = plt.subplots(1,2)
axs[0].set_aspect('equal')
axs[0].scatter( X["NORMALIZED_GPA"], X["NORMALIZED_SAT"], c=Pred, s=30, cmap=plt.cm.Paired)
axs[1].set_aspect('equal')
axs[1].scatter( X["NORMALIZED_GPA"], X["NORMALIZED_SAT"], c=my_data['ACCEPT_NUM'].tolist(), s=30, cmap=plt.cm.Paired)
6. SpectralClustering
import pandas as pd
from sklearn.feature_extraction import image
from sklearn.cluster import SpectralClustering
import numpy as np
import matplotlib.pyplot as plt
my_data=pd.read_csv('admission.csv')
X=my_data[['NORMALIZED_GPA', 'NORMALIZED_SAT']]
spectral =SpectralClustering(n_clusters=2, eigen_solver='arpack',affinity="nearest_neighbors")
spectral.fit(X)
Pred=spectral.labels_.astype(np.int)
fig, axs = plt.subplots(1,2)
axs[0].set_aspect('equal')
axs[0].scatter( X["NORMALIZED_GPA"], X["NORMALIZED_SAT"], c=Pred, s=30, cmap=plt.cm.Paired)
axs[1].set_aspect('equal')
axs[1].scatter( X["NORMALIZED_GPA"], X["NORMALIZED_SAT"], c=my_data['ACCEPT_NUM'].tolist(), s=30, cmap=plt.cm.Paired)
For more details you can contact us or send your requirement details at:
realcode4you@gmail.com
Comments