{ "cells": [ { "cell_type": "markdown", "id": "bee22d09", "metadata": {}, "source": [ "# Evaluating Clustering" ] }, { "cell_type": "code", "execution_count": 1, "id": "fa9f1ed3", "metadata": {}, "outputs": [], "source": [ "import seaborn as sns\n", "import string\n", "from sklearn import datasets\n", "from sklearn.cluster import KMeans\n", "from sklearn import metrics\n", "import pandas as pd" ] }, { "cell_type": "markdown", "id": "f0c8b296", "metadata": {}, "source": [ "We'll continue with the iris dataset because it's visually clear." ] }, { "cell_type": "code", "execution_count": 2, "id": "a2d7cd4c", "metadata": {}, "outputs": [], "source": [ "measurement_cols = ['sepal_length','petal_length','sepal_width','petal_width']\n", "\n", "iris_df = sns.load_dataset('iris')\n", "iris_X = iris_df[measurement_cols]" ] }, { "cell_type": "markdown", "id": "0aa42edf", "metadata": {}, "source": [ "## Clusting with KMeans" ] }, { "cell_type": "code", "execution_count": 3, "id": "c3f9d5b6", "metadata": {}, "outputs": [], "source": [ "km3 = KMeans(n_clusters=3)" ] }, { "cell_type": "code", "execution_count": 4, "id": "aeabced1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
KMeans(n_clusters=3)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KMeans(n_clusters=3)
KMeans(n_clusters=4)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KMeans(n_clusters=4)