ALL LABS CSDA: October 2024

Program 1: Find S Algorithm Program

import pandas as pd

import numpy as np

data = pd.read_csv("data.csv")

con = np.array(data)[:, :-1]

tar = np.array(data)[:, -1]

def train(con, tar):

for i, val in enumerate(tar):

if val == 'yes':

sh = con[i].copy()

break

for i, val in enumerate(con):

if tar[i] == 'yes':

for x in range(len(con)):

if val[x] != sh[x]:

sh[x] = '?'

else:

pass

return sh

print(train(con,tar))

---------------------------------------------------------------------------------------------------------------------

Program 2 Candidate Elimination Algorithm python Program

# candidate elmination algorithm

import pandas as pd

import numpy as np

data = pd.read_csv("data.csv")

con = np.array(data)[:, : -1]

tar = np.array(data)[:, -1]

def learn (con, tar):

sh = con[0].copy()

print("Initializtion of Specific Hypothesis\t:", sh)

gh = [["?" for i in range(len(sh))]for i in range(len(sh))]

print("Initialization of General Hypothesis\t:", sh)

for i, val in enumerate(con):

if tar[i] == 'yes':

for x in range(len(sh)):

if val[x] != sh[x]:

sh[x] = '?'

gh[x][x] = "?"

if tar[i] == 'no':

for x in range(len(sh)):

if val[x] != sh[x]:

gh[x][x] = sh[x]

else:

gh[x][x] = "?"

print("step:", i)

print("\n")

print("General Hypothesis:\n",gh)

print("Specific Hypothesis:\n", sh)

indices = [i for i , val in enumerate (gh) if val == ['?','?','?','?','?','?']]

for i in indices:

gh.remove(['?','?','?','?','?','?'])

return gh, sh

g_final, s_final = learn(con, tar)

print("Final G:\n", g_final, sep = " ")

print("Final S:\n", s_final, sep = " ")

------------------------------------------------------------------------------------------------------------------------

Program 3

3.Write a program to implement the naïve Bayesian classifier for a sample training data set stored as a .CSV file. Compute the accuracy of the classifier, considering few test data sets.

import pandas as pd

from sklearn import metrics

from sklearn.preprocessing import LabelEncoder

from sklearn.model_selection import train_test_split

from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import accuracy_score

# Load the dataset - tennisdata

data = pd.read_csv('/content/play_tennis_3.csv')

# Display the first 5 rows of the dataset

print("\nThe first 5 values of data are : \n", data.head())

# Separate features (X) and target variable (y)

X = data.iloc[:, :-1]

y = data.iloc[:, -1]

# Label encode all categorical columns

le = LabelEncoder()

X = X.apply(le.fit_transform)

# Display the first 5 rows of the transformed dataset

print("\nNow the Train data is : \n", X.head())

# Label encode the target variable

le_y = LabelEncoder()

y = le_y.fit_transform(y)

# Display the first 5 values of the transformed target variable

print("\nNow the Train output is : ", y)

# Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

# Create a Gaussian Naive Bayes classifier

classifier = GaussianNB()

# Train the classifier

classifier.fit(X_train, y_train)

# Make predictions on the test set

y_pred = classifier.predict(X_test)

# Calculate and print the accuracy

accuracy = accuracy_score(y_test, y_pred)

print("\nAccuracy is : ", accuracy)

# Display the test set and corresponding true labels

print("\nTest set : \n", X_test)

print("\nTrue labels : \n", y_test)

----------------------------------------------------------------------------------------------------------------------------

Lab Program 4

Assuming a set of documents that need to be classified, use the naïve Bayesian classifier model to perform this task. Built-in Java classes /API can be used to write the program. Calculate the accuracy, precision and recall for your data set.

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.feature_extraction.text import CountVectorizer

from sklearn.naive_bayes import MultinomialNB

from sklearn import metrics

# Load data - text_classification_4

df = pd.read_csv("text_classification_4.csv", names=["message", "label"])

df["label_num"] = df.label.map({"pos": 1, "neg": 0})

# Split data

x = df['message']

y = df['label_num']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4)

# Vectorize text data

cv = CountVectorizer()

x_train_cv = cv.fit_transform(x_train)

x_test_cv = cv.transform(x_test)

# Train the classifier

mnb = MultinomialNB()

mnb.fit(x_train_cv, y_train)

# Predict and evaluate

y_pred = mnb.predict(x_test_cv)

accuracy = metrics.accuracy_score(y_test, y_pred)

precision = metrics.precision_score(y_test, y_pred)

recall = metrics.recall_score(y_test, y_pred)

print(f"Accuracy: {accuracy}")

print(f"Precision: {precision}")

print(f"Recall: {recall}")

----------------------------------------------------------------------------------------------------------------------------

Lab Program 5

Write a program to construct a Bayesian network considering medical data. Use this model to demonstrate the diagnosis of heart patients using standard heart disease data set. You can use Java or Python ML Library classes /API

!pip install pgmpy

import pandas as pd

from pgmpy.estimators import MaximumLikelihoodEstimator

from pgmpy.models import BayesianModel

from pgmpy.inference import VariableElimination

# Load the heart disease dataset - heart

df = pd.read_csv("heart.csv")

df = df.replace('?', pd.np.nan)

# Define the structure of the Bayesian Network

model = BayesianModel([

('age', 'target'), ('sex', 'target'), ('cp', 'target'),

('fbs', 'target'), ('exang', 'target'), ('target', 'restecg'),

('target', 'chol')

])

# Fit the model to the dataset using Maximum Likelihood Estimation

model.fit(df, estimator=MaximumLikelihoodEstimator)

# Perform inference

inference = VariableElimination(model)

# Query the model for probability distributions

q1 = inference.query(variables=['target'], evidence={'restecg': 1})

print(q1)

q2 = inference.query(variables=['target'], evidence={'cp': 2})

print(q2)

---------------------------------------------------------------------------------------------------------------------------

Program 6

Write a program to classify new data sample using decision tree classifier.

import pandas as pd

from sklearn.preprocessing import LabelEncoder

from sklearn.tree import DecisionTreeClassifier

# Load data from CSV - tennisdata

data = pd.read_csv('/content/tennisdata.csv')

# Display the first few rows of the dataset

print("The First 5 values of data is \n", data.head())

# Obtain Train data and Train output

X = data.iloc[:, :-1] # Features (all columns except the last one)

y = data.iloc[:, -1] # Target variable (last column)

# Convert categorical data into numerical format for features

le_X = LabelEncoder()

X_encoded = X.apply(le_X.fit_transform)

# Convert target variable into numerical format

le_y = LabelEncoder()

y_encoded = le_y.fit_transform(y)

# Train the decision tree model

classifier = DecisionTreeClassifier()

classifier.fit(X_encoded, y) # Use original 'y' without label encoding

# Function to encode user input using the same LabelEncoders used for training

def label_encoder_for_input(lst):

encoded_input = [le_X.transform([lst[i]])[0] for i in range(len(lst))]

return encoded_input

# User input for prediction

print("\nEnter the weather conditions for prediction :- ")

inp = []

for feature in X.columns:

val = input(f"Enter {feature} : ")

inp.append(val)

# Encode user input using the same label encoders used for training

inp_encoded = label_encoder_for_input(inp)

# Predict using the trained model (without transforming the user input)

y_pred = classifier.predict([inp_encoded])

# Directly use the predicted label without inverse transformation

predicted_label = y_pred[0]

print("\nFor input {0}, the predicted output is {1}".format(inp,

predicted_label))

----------------------------------------------------------------------------------------------------------------------------

Lab Program 7

Build an Artificial Neural Network by implementing the Back propagation algorithm and test the same using appropriate datasets.

import numpy as np

# Input and output data

x = np.array([[2, 9], [1, 5], [3, 6]], dtype=float)

y = np.array([[86], [92], [89]], dtype=float)

# Normalize input and output

x = x / np.amax(x, axis=0)

y = y / 100

# Define the sigmoid activation function and its derivative

def sigmoid(x):

return 1 / (1 + np.exp(-x))

def derivative_sigmoid(x):

return x * (1 - x)

# Define hyperparameters

epochs = 5

learning_rate = 0.1

# Neural network architecture

input_neurons = 2

hidden_neurons = 1

output_neurons = 1

# Initialize weights and biases

weights_hidden = np.random.uniform(size=(input_neurons, hidden_neurons))

bias_hidden = np.random.uniform(size=(1, hidden_neurons))

weights_output = np.random.uniform(size=(hidden_neurons, output_neurons))

bias_output = np.random.uniform(size=(1, output_neurons))

# Training the neural network

for epoch in range(epochs):

# Forward propagation

hidden_input = np.dot(x, weights_hidden) + bias_hidden

hidden_output = sigmoid(hidden_input)

output_input = np.dot(hidden_output, weights_output) + bias_output

output = sigmoid(output_input)

# Calculate errors and gradients

output_error = y - output

output_gradient = derivative_sigmoid(output)

d_output = output_error * output_gradient

hidden_error = np.dot(d_output, weights_output.T)

hidden_gradient = derivative_sigmoid(hidden_output)

d_hidden = hidden_error * hidden_gradient

# Update weights and biases

weights_output += np.dot(hidden_output.T, d_output) * learning_rate

weights_hidden += np.dot(x.T, d_hidden) * learning_rate

# Display epoch information

print("Epoch:", epoch + 1)

print("Input:", x)

print("Actual output:", y)

print("Predicted output:", output)

-----------------------------------------------------------------------------------------------------------------------------

Lab Program 8

Write a program to implement k-Nearest Neighbor algorithm to classify there is dataset. Print both correct and wrong predictions. Java /Python MLlibrary classes can be used for this problem.

from sklearn.datasets import load_iris

from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import train_test_split

# Load the Iris dataset

dataset = load_iris()

X_train, X_test, y_train, y_test = train_test_split(dataset["data"],

dataset["target"], random_state = 0)

# Initialize the k-Nearest Neighbors classifier

kn = KNeighborsClassifier(n_neighbors = 1)

kn.fit(X_train, y_train)

# Make predictions and print in the desired format

for i in range(len(X_test)):

x = X_test[i]

prediction = kn.predict([x])[0]

target_label = y_test[i]

target_name = dataset["target_names"][target_label]

predicted_name = dataset["target_names"][prediction]

print("Sample {} :- TARGET = {} ({}), PREDICTED = {} ({})".

format(i + 1, target_label, target_name, prediction, predicted_name))

# Calculate and print accuracy score

accuracy = kn.score(X_test, y_test)

print(f"\nAccuracy: {accuracy}")

----------------------------------------------------------------------------------------------------------------------------

Program 9

9. Implement the non-parametric Locally Weighted Regression algorithm

in order to fit data points. Select appropriate data set your experiment

and draw graphs.

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

# Locally Weighted Regression function

def local_regression(xmat, ymat, k):

m = np.shape(xmat)[0]

ypred = np.zeros(m)

for i in range(m):

weights = np.exp(np.sum((xmat - xmat[i])**2, axis=1) / (-2 * k**2))

W = np.diag(weights)

xW = xmat.T.dot(W).dot(xmat)

theta = np.linalg.inv(xW).dot(xmat.T).dot(W).dot(ymat)

ypred[i] = xmat[i].dot(theta)

return ypred

# Load dataset - 10-dataset

df = pd.read_csv("10-dataset.csv")

cola = np.array(df.total_bill)

colb = np.array(df.tip)

x = np.column_stack((np.ones_like(cola), cola))

y = colb.reshape(-1, 1)

# Apply local regression

ypred = local_regression(x, y, 0.8)

# Sort for plotting

sorted_indices = np.argsort(cola)

plt.scatter(cola, colb, color='blue')

plt.plot(cola[sorted_indices], ypred[sorted_indices], color='green',

linewidth=5)

plt.xlabel('Total Bill')

plt.ylabel('Tip')

plt.title('Locally Weighted Regression')

plt.show()

-----------------------------------------------------------------------------------------------------------------------------

Program 10

10. Apply EM algorithm to cluster a set of data stored in a .CSV file. Use

the same data set for clustering using the K-Means algorithm.

import pandas as pd

import matplotlib.pyplot as plt

import numpy as np

from sklearn import preprocessing

from sklearn import datasets

from sklearn.cluster import KMeans

from sklearn.mixture import GaussianMixture

# Load Iris dataset

iris = datasets.load_iris()

x = pd.DataFrame(iris.data, columns=["sepal_length", "sepal_width",

"petal_length", "petal_width"])

# K-Means clustering

km = KMeans(n_clusters=3)

km.fit(x)

km_labels = km.labels_

# Gaussian Mixture Model (GMM) clustering

scaler = preprocessing.StandardScaler()

xs = pd.DataFrame(scaler.fit_transform(x), columns=x.columns)

gm = GaussianMixture(n_components=3)

gm.fit(xs)

gmm_y = gm.predict(xs)

# Plotting

plt.figure(figsize=(14, 5))

plt.subplot(1, 3, 1)

plt.scatter(x.petal_length, x.petal_width, c=iris.target, cmap='viridis', s=40)

plt.title("Real Plot")

plt.subplot(1, 3, 2)

plt.scatter(x.petal_length, x.petal_width, c=km_labels, cmap='viridis', s=40)

plt.title("K-Means Plot")

plt.subplot(1, 3, 3)

plt.scatter(x.petal_length, x.petal_width, c=gmm_y, cmap='viridis', s=40)

plt.title("GMM Plot")

plt.tight_layout()

plt.show()

----------------------------------------------END-------------------------------------------------------------------

ALL LABS CSDA

Friday, 18 October 2024

Cloud Computing CD 352

Thursday, 17 October 2024

Machine Learning Lab CD 351

BIG DATA CD 362