CC handwritten Record
Friday, 18 October 2024
Thursday, 17 October 2024
Machine Learning Lab CD 351
Program 1: Find S Algorithm Program
import pandas as pd
import numpy as np
data = pd.read_csv("data.csv")
con = np.array(data)[:, :-1]
tar = np.array(data)[:, -1]
def train(con, tar):
for i, val in enumerate(tar):
if val == 'yes':
sh = con[i].copy()
break
for i, val in enumerate(con):
if tar[i] == 'yes':
for x in range(len(con)):
if val[x] != sh[x]:
sh[x] = '?'
else:
pass
return sh
print(train(con,tar))
---------------------------------------------------------------------------------------------------------------------
Program 2 Candidate Elimination Algorithm python Program
# candidate elmination algorithm
import pandas as pd
import numpy as np
data = pd.read_csv("data.csv")
con = np.array(data)[:, : -1]
tar = np.array(data)[:, -1]
def learn (con, tar):
sh = con[0].copy()
print("Initializtion of Specific Hypothesis\t:", sh)
gh = [["?" for i in range(len(sh))]for i in range(len(sh))]
print("Initialization of General Hypothesis\t:", sh)
for i, val in enumerate(con):
if tar[i] == 'yes':
for x in range(len(sh)):
if val[x] != sh[x]:
sh[x] = '?'
gh[x][x] = "?"
if tar[i] == 'no':
for x in range(len(sh)):
if val[x] != sh[x]:
gh[x][x] = sh[x]
else:
gh[x][x] = "?"
print("step:", i)
print("\n")
print("General Hypothesis:\n",gh)
print("Specific Hypothesis:\n", sh)
indices = [i for i , val in enumerate (gh) if val == ['?','?','?','?','?','?']]
for i in indices:
gh.remove(['?','?','?','?','?','?'])
return gh, sh
g_final, s_final = learn(con, tar)
print("Final G:\n", g_final, sep = " ")
print("Final S:\n", s_final, sep = " ")
------------------------------------------------------------------------------------------------------------------------
Program 3
3.Write a program to implement the naïve Bayesian classifier for a sample training data set stored as a .CSV file. Compute the accuracy of the classifier, considering few test data sets.
import pandas as pd
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
# Load the dataset - tennisdata
data = pd.read_csv('/content/play_tennis_3.csv')
# Display the first 5 rows of the dataset
print("\nThe first 5 values of data are : \n", data.head())
# Separate features (X) and target variable (y)
X = data.iloc[:, :-1]
y = data.iloc[:, -1]
# Label encode all categorical columns
le = LabelEncoder()
X = X.apply(le.fit_transform)
# Display the first 5 rows of the transformed dataset
print("\nNow the Train data is : \n", X.head())
# Label encode the target variable
le_y = LabelEncoder()
y = le_y.fit_transform(y)
# Display the first 5 values of the transformed target variable
print("\nNow the Train output is : ", y)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
# Create a Gaussian Naive Bayes classifier
classifier = GaussianNB()
# Train the classifier
classifier.fit(X_train, y_train)
# Make predictions on the test set
y_pred = classifier.predict(X_test)
5
# Calculate and print the accuracy
accuracy = accuracy_score(y_test, y_pred)
print("\nAccuracy is : ", accuracy)
# Display the test set and corresponding true labels
print("\nTest set : \n", X_test)
print("\nTrue labels : \n", y_test)
----------------------------------------------------------------------------------------------------------------------------
Lab Program 4
Assuming a set of documents that need to be classified, use the naïve Bayesian classifier model to perform this task. Built-in Java classes /API can be used to write the program. Calculate the accuracy, precision and recall for your data set.
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
# Load data - text_classification_4
df = pd.read_csv("text_classification_4.csv", names=["message", "label"])
df["label_num"] = df.label.map({"pos": 1, "neg": 0})
# Split data
x = df['message']
y = df['label_num']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.4)
6
# Vectorize text data
cv = CountVectorizer()
x_train_cv = cv.fit_transform(x_train)
x_test_cv = cv.transform(x_test)
# Train the classifier
mnb = MultinomialNB()
mnb.fit(x_train_cv, y_train)
# Predict and evaluate
y_pred = mnb.predict(x_test_cv)
accuracy = metrics.accuracy_score(y_test, y_pred)
precision = metrics.precision_score(y_test, y_pred)
recall = metrics.recall_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
Lab Program 5
Write a program to construct a Bayesian network considering medical data. Use this model to demonstrate the diagnosis of heart patients using standard heart disease data set. You can use Java or Python ML Library classes /API
!pip install pgmpy
import pandas as pd
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination
# Load the heart disease dataset - heart
df = pd.read_csv("heart.csv")
df = df.replace('?', pd.np.nan)
# Define the structure of the Bayesian Network
model = BayesianModel([
('age', 'target'), ('sex', 'target'), ('cp', 'target'),
('fbs', 'target'), ('exang', 'target'), ('target', 'restecg'),
('target', 'chol')
])
# Fit the model to the dataset using Maximum Likelihood Estimation
model.fit(df, estimator=MaximumLikelihoodEstimator)
# Perform inference
inference = VariableElimination(model)
# Query the model for probability distributions
q1 = inference.query(variables=['target'], evidence={'restecg': 1})
print(q1)
q2 = inference.query(variables=['target'], evidence={'cp': 2})
print(q2)
Program 6
Write a program to classify new data sample using decision tree classifier.
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
# Load data from CSV - tennisdata
data = pd.read_csv('/content/tennisdata.csv')
# Display the first few rows of the dataset
print("The First 5 values of data is \n", data.head())
# Obtain Train data and Train output
X = data.iloc[:, :-1] # Features (all columns except the last one)
y = data.iloc[:, -1] # Target variable (last column)
# Convert categorical data into numerical format for features
le_X = LabelEncoder()
X_encoded = X.apply(le_X.fit_transform)
# Convert target variable into numerical format
le_y = LabelEncoder()
y_encoded = le_y.fit_transform(y)
# Train the decision tree model
classifier = DecisionTreeClassifier()
classifier.fit(X_encoded, y) # Use original 'y' without label encoding
# Function to encode user input using the same LabelEncoders used for training
def label_encoder_for_input(lst):
encoded_input = [le_X.transform([lst[i]])[0] for i in range(len(lst))]
return encoded_input
# User input for prediction
print("\nEnter the weather conditions for prediction :- ")
inp = []
for feature in X.columns:
val = input(f"Enter {feature} : ")
inp.append(val)
# Encode user input using the same label encoders used for training
inp_encoded = label_encoder_for_input(inp)
# Predict using the trained model (without transforming the user input)
y_pred = classifier.predict([inp_encoded])
# Directly use the predicted label without inverse transformation
predicted_label = y_pred[0]
print("\nFor input {0}, the predicted output is {1}".format(inp,
predicted_label))
----------------------------------------------------------------------------------------------------------------------------
Lab Program 7
Build an Artificial Neural Network by implementing the Back propagation algorithm and test the same using appropriate datasets.
import numpy as np
# Input and output data
x = np.array([[2, 9], [1, 5], [3, 6]], dtype=float)
y = np.array([[86], [92], [89]], dtype=float)
# Normalize input and output
x = x / np.amax(x, axis=0)
y = y / 100
# Define the sigmoid activation function and its derivative
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def derivative_sigmoid(x):
return x * (1 - x)
# Define hyperparameters
epochs = 5
learning_rate = 0.1
# Neural network architecture
input_neurons = 2
hidden_neurons = 1
output_neurons = 1
# Initialize weights and biases
10
weights_hidden = np.random.uniform(size=(input_neurons, hidden_neurons))
bias_hidden = np.random.uniform(size=(1, hidden_neurons))
weights_output = np.random.uniform(size=(hidden_neurons, output_neurons))
bias_output = np.random.uniform(size=(1, output_neurons))
# Training the neural network
for epoch in range(epochs):
# Forward propagation
hidden_input = np.dot(x, weights_hidden) + bias_hidden
hidden_output = sigmoid(hidden_input)
output_input = np.dot(hidden_output, weights_output) + bias_output
output = sigmoid(output_input)
# Calculate errors and gradients
output_error = y - output
output_gradient = derivative_sigmoid(output)
d_output = output_error * output_gradient
hidden_error = np.dot(d_output, weights_output.T)
hidden_gradient = derivative_sigmoid(hidden_output)
d_hidden = hidden_error * hidden_gradient
# Update weights and biases
weights_output += np.dot(hidden_output.T, d_output) * learning_rate
weights_hidden += np.dot(x.T, d_hidden) * learning_rate
# Display epoch information
print("Epoch:", epoch + 1)
print("Input:", x)
print("Actual output:", y)
print("Predicted output:", output)
Lab Program 8
Write a program to implement k-Nearest Neighbor algorithm to classify there is dataset. Print both correct and wrong predictions. Java /Python MLlibrary classes can be used for this problem.
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
# Load the Iris dataset
dataset = load_iris()
X_train, X_test, y_train, y_test = train_test_split(dataset["data"],
dataset["target"], random_state = 0)
# Initialize the k-Nearest Neighbors classifier
kn = KNeighborsClassifier(n_neighbors = 1)
kn.fit(X_train, y_train)
# Make predictions and print in the desired format
for i in range(len(X_test)):
x = X_test[i]
prediction = kn.predict([x])[0]
target_label = y_test[i]
target_name = dataset["target_names"][target_label]
predicted_name = dataset["target_names"][prediction]
print("Sample {} :- TARGET = {} ({}), PREDICTED = {} ({})".
format(i + 1, target_label, target_name, prediction, predicted_name))
# Calculate and print accuracy score
accuracy = kn.score(X_test, y_test)
print(f"\nAccuracy: {accuracy}")
Program 9
9. Implement the non-parametric Locally Weighted Regression algorithm
in order to fit data points. Select appropriate data set your experiment
and draw graphs.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Locally Weighted Regression function
def local_regression(xmat, ymat, k):
m = np.shape(xmat)[0]
ypred = np.zeros(m)
for i in range(m):
weights = np.exp(np.sum((xmat - xmat[i])**2, axis=1) / (-2 * k**2))
W = np.diag(weights)
xW = xmat.T.dot(W).dot(xmat)
theta = np.linalg.inv(xW).dot(xmat.T).dot(W).dot(ymat)
ypred[i] = xmat[i].dot(theta)
return ypred
# Load dataset - 10-dataset
df = pd.read_csv("10-dataset.csv")
cola = np.array(df.total_bill)
colb = np.array(df.tip)
x = np.column_stack((np.ones_like(cola), cola))
y = colb.reshape(-1, 1)
# Apply local regression
ypred = local_regression(x, y, 0.8)
# Sort for plotting
sorted_indices = np.argsort(cola)
plt.scatter(cola, colb, color='blue')
plt.plot(cola[sorted_indices], ypred[sorted_indices], color='green',
linewidth=5)
plt.xlabel('Total Bill')
plt.ylabel('Tip')
plt.title('Locally Weighted Regression')
plt.show()
Program 10
10. Apply EM algorithm to cluster a set of data stored in a .CSV file. Use
the same data set for clustering using the K-Means algorithm.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn import preprocessing
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
# Load Iris dataset
iris = datasets.load_iris()
x = pd.DataFrame(iris.data, columns=["sepal_length", "sepal_width",
"petal_length", "petal_width"])
# K-Means clustering
km = KMeans(n_clusters=3)
km.fit(x)
km_labels = km.labels_
# Gaussian Mixture Model (GMM) clustering
scaler = preprocessing.StandardScaler()
xs = pd.DataFrame(scaler.fit_transform(x), columns=x.columns)
gm = GaussianMixture(n_components=3)
gm.fit(xs)
gmm_y = gm.predict(xs)
# Plotting
15
plt.figure(figsize=(14, 5))
plt.subplot(1, 3, 1)
plt.scatter(x.petal_length, x.petal_width, c=iris.target, cmap='viridis', s=40)
plt.title("Real Plot")
plt.subplot(1, 3, 2)
plt.scatter(x.petal_length, x.petal_width, c=km_labels, cmap='viridis', s=40)
plt.title("K-Means Plot")
plt.subplot(1, 3, 3)
plt.scatter(x.petal_length, x.petal_width, c=gmm_y, cmap='viridis', s=40)
plt.title("GMM Plot")
plt.tight_layout()
plt.show()
BIG DATA CD 362
Program 1: Implement the following Data structures in java a)lists b)Stacks c)Queues sol: a) List (i) ArrayList imp...
-
Program 1: Find S Algorithm Program import pandas as pd import numpy as np data = pd.read_csv("data.csv") con = np.array(data)[...
-
CC handwritten Record CC RECORD CLICK HERE 👇