final

 

3.Perform Exploratory Data Analysis (EDA) and Uni-variate, Bi-variate, and Multi-variate Analysis on titanic Dataset.

 

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

 

data=pd.read_csv('titanic.csv')

 

sns.heatmap(data.isna())

 

g=sns.histplot(x='Sex', data=data)

g=sns.countplot(x='Embarked', hue='Pclass', data=data)

 

 

 

 

 

 

 

 

 

 

 

def add_family(df):

    df['FamilySize']=df['SibSp'] + df['Parch'] +1

    return df

data=add_family(data)

data.head(10)

 

 

 

g=sns.countplot(x='FamilySize', hue='Survived', data=data)

 

 

g=sns.catplot(x="Embarked", hue="Sex", col="Survived",data=data, kind="count", height=4, aspect=.7)

 

 

4.Write a program to identify the attributes containing missing values, number of missing values. perform data cleaning by removing missing values using various techniques.

 

import pandas as pd

import seaborn as sns

 

df=pd.read_csv("C:/Users/Shilpa/Desktop/LAB_programs_dataset/Titanic_dataset.csv")

print(df.head())

 

#Checking missing values

print(df.isna().sum())

sns.heatmap(df.isna())

 

#Filling missing values through mean

df['Age']. fillna(df['Age']. mean(),inplace=True)

 

 

#Filling missing values through mode

df['Embarked'].fillna(df['Embarked'].mode()[0],inplace=True)

 

#Dropping column

df.drop(['Cabin'],axis=1,inplace=True)

 

#Dropping specific rows

df.drop(df[(df['Name']=="Braund, Mr. Owen Harris")].index,inplace=True)

df.drop(df[(df['PassengerId']==5)].index,inplace=True)

 

print(df.isna().sum())

sns.heatmap(df.isna())

 

Output:

 

   PassengerId  Survived  Pclass  ...     Fare Cabin  Embarked

0            1         0       3  ...   7.2500   NaN         S

1            2         1       1  ...  71.2833   C85         C

2            3         1       3  ...   7.9250   NaN         S

3            4         1       1  ...  53.1000  C123         S

4            5         0       3  ...   8.0500   NaN         S

 

[5 rows x 12 columns]

PassengerId      0

Survived           0

Pclass               0

Name                0

Sex                   0

Age               177

SibSp                0

Parch                0

Ticket               0

Fare                  0

Cabin            687

Embarked         2

dtype: int64

PassengerId        0

Survived             0

Pclass                 0

Name                  0

Sex                     0

Age                     0

SibSp                  0

Parch                  0

Ticket                 0

Fare                    0

Embarked           0

dtype: int64

5. Write a program to demonstrate to remove outliers in a dataset

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

 

data=pd.read_csv("C:/Users/Shilpa/Desktop/LAB_programs_dataset/Athlete_events_outliers.csv")

 

print(data.head(10))

 

#Removing missing values in Height and weight columns

c=data['Age'].mean()

data['Age'].fillna(c,inplace=True)

 

a=data['Height'].mean()

data['Height'].fillna(a,inplace=True)

print(a)

 

b=data['Weight'].mean()

data['Weight'].fillna(b,inplace=True)

print(b)

 

data.info()

 

data['Weight'].skew()

sns.boxplot(data['Weight'])

 

q1=data['Weight'].quantile(0.25)

q3=data['Weight'].quantile(0.75)

IQR=q3-q1

 

lower=q1-(1.5*IQR)

upper=q3+(1.5*IQR)

data['Weight']=np.where(data['Weight']>upper,upper,np.where(data['Weight']<lower,lower,data['Weight']))

 

sns.boxplot(data['Weight'])

data['Weight'].skew()

 

 

 

 

Output:

 

175.33896987366376

 

70.70239290053351

                                                                                                                               

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

6.Build Simple Linear Regression Machine Learning Model to analysis relationship between CIE and SEE

 

import pandas as pd

import numpy as np

 

df=pd.read_csv("CIE_SEE.csv")

print(df.info ())

 

x=df['CIE'].values.reshape(-1,1)

y=df['SEE'].values.reshape(-1,1)

 

from sklearn.model_selection import train_test_split

x_train, x_test,y_train,y_test=train_test_split(x,y,random_state=0)

 

from sklearn.linear_model import LinearRegression

lm=LinearRegression()

lm.fit(x_train,y_train)

y_pred=lm.predict(x_test)

 

from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score

g=y_test.reshape(21,)

h=y_pred.reshape(21,)

 

print("MAE--->",mean_absolute_error(g,h))

print("MSE--->",mean_squared_error(g,h))

print("score--->",r2_score(g,h))

print (" RMSE--->",np.sqrt(mean_squared_error(g,h)))

 

import matplotlib.pyplot as plt

plt.scatter(x_train, y_train,color='g')

plt.plot(x_test, y_pred,color='k')

plt.show()

 

output:

 

RangeIndex: 83 entries, 0 to 82

Data columns (total 2 columns):

 #   Column  Non-Null Count  Dtype

---  ------  --------------  -----

 0   CIE     83 non-null     int64

 1   SEE     83 non-null     int64

dtypes: int64(2)

memory usage: 1.4 KB

None

MAE---> 29.367841250713415

MSE---> 1593.0385277231076

score---> 0.4491593937437446

 RMSE---> 39.912886737532624

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

7.Build a Multi Linear Regression Model for House Price Prediction

 

import pandas as pd

import numpy as np

 

df=pd.read_csv("C:/Users/Shilpa/Desktop/LAB_programs_dataset/Housing_multilinear_reg.csv")

df.head()

 

df=pd.get_dummies(df)

 

df.drop(['mainroad_no','guestroom_no','basement_yes','hotwaterheating_yes','airconditioning_yes'],axis=1,inplace=True)

 

x=df.iloc[:,1:]

y=df.iloc[:,0]  #0 because target variable price is in zero column

 

 

from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

 

from sklearn.linear_model import LinearRegression

lm=LinearRegression()

lm.fit(x_train,y_train)

y_pred=lm.predict(x_test)

 

from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

 

print("MSE          ---- >" , mean_squared_error(y_test,y_pred))

print("RMSE       ----- > " , np.sqrt(mean_squared_error(y_test,y_pred)))

print ("MAE        ----- >", mean_absolute_error(y_test,y_pred))

print("r2 score “ ----- >",r2_score(y_test,y_pred))

 

Output:

 

MSE          ---- > 1080485739437.5288

RMSE       ----- > 1039464.1597657559

MAE        ----- > 797371.25393815

r2 score “----- > 0.6598261620391519

8.Build predictive machine learning model for Breast Cancer Detection using Decision Tree Classifier for Wisconsin (diagnostic) Dataset

 

import pandas as pd

data=pd.read_csv("C:/Users/Shilpa/Desktop/LAB_programs_dataset/breast_cancer_analysis_DecisionTrees.csv")

print (data.info ())

 

data=data.drop(['id'],axis=1)

x=data.drop(['diagnosis'],axis=1)

y=data['diagnosis']

 

from sklearn.model_selection  import train_test_split

x_train, x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

 

from sklearn.tree import DecisionTreeClassifier

model=DecisionTreeClassifier()

model.fit(x_train,y_train)

 

y_pred=model.predict(x_test)

from sklearn.metrics import accuracy_score,classification_report

print ("The accuracy of the model built is ", accuracy_score(y_pred,y_test)*100)

 

#Finding Best Hyperparameters for Decision Trees Using GridSearch

from sklearn.model_selection import GridSearchCV

pram_dict={'criterion':['gini','entropy'],

           'max_depth':range(1,10),

           'min_samples_split':range(1,10),

           'min_samples_leaf':range(1,5)}

grid=GridSearchCV(model, param_grid=pram_dict,cv=10,verbose=1,n_jobs=-1)

grid.fit(x_train,y_train)

print(grid.best_score_)

 

output:

 

The accuracy of the model built is 91.22807017543859

 

Fitting 10 folds for each of 648 candidates, totalling 6480 fits

0.9405797101449276

 

9.Build a Predictive Model to Analysis Heart Disease Prediction using Logistic Regression.

 

import pandas as pd

import numpy as np

data=pd.read_csv("C:/Users/Shilpa/Desktop/LAB_programs_dataset/Heart_disease_logregression.csv")

data.head(10)

 

#converting String to Integer using label encoder

from sklearn.preprocessing import LabelEncoder

le=LabelEncoder()

data=data.apply(lambda x:le.fit_transform(x))

 

x = data.drop(['HeartDisease'],axis=1)

y = data['HeartDisease']

 

from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=0)

 

from sklearn.linear_model import LogisticRegression

log_regression = LogisticRegression()

log_regression.fit(x_train,y_train)

y_pred = log_regression.predict(x_test)

 

from sklearn import metrics

from sklearn.metrics import classification_report,confusion_matrix

 

print("confusion_matrix: ",confusion_matrix(y_test, y_pred))

print("classification_report:")

print(metrics.classification_report(y_test, y_pred))

 

Output:

confusion_matrix:  [[ 91  22]

                                [ 24 139]]

 

 

 

 

classification_report:

              precision    recall  f1-score   support

 

           0            0.79      0.81      0.80       113

           1            0.86      0.85      0.86       163

    accuracy                                 0.83       276

   macro avg     0.83      0.83      0.83       276

weighted avg   0.83      0.83       0.83       276

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

10.Build predictive Machine Learning model to Detect Lung Cancer using Support

Vector Machine

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

 

data=pd.read_csv("LUNG_CANCER.csv")

data.head()

 

x= data. drop(labels=['LUNG_CANCER'],axis=1))

y=data['LUNG_CANCER'].values.reshape(-1,1)

 

sns.heatmap(corrmat,annot=True,fmt='.2f',cmap='RdYlGn',ax=ax)

plt.show()

 

from sklearn.model_selection import train_test_split

x_train, x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

from sklearn.svm import SVC

sv=SVC ( )

sv.fit(x_train,y_train)

y_pred=sv.predict(x_test)

 

from sklearn.metrics import classification_report,accuracy_score

print ("Classification_report\n”, classification_report(y_test,y_pred))

print('Accuracy',accuracy_score(y_test,y_pred))

 

output:

 

Classification_report               precision    recall  f1-score   support

 

                                    NO        0.80           0.44        0.57         9

                                   YES       0.91           0.98       0.95        53

 

                            accuracy                                     0.90           62

                         macro avg        0.86       0.71        0.76            62

                     weighted avg       0.90      0.90         0.89            62

 

 

Accuracy 0.9032258064516129

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

11.Build a supervised machine learning program for Credit Card Fraud Detection using Random Forest Classifier.

 

import pandas as pd

df=pd. read_csv("creditcard.csv")

 

print(df.head())

print(df.isna().sum())

 

del df['nameOrig']

del df['nameDest']

df['isFraud'].value_counts(). plot(kind='pie')

 

from sklearn.preprocessing import LabelEncoder

le=LabelEncoder()

df['type'] = le.fit_transform(df['type'])

df.head()

 

x=df.iloc[ : , : -1]

y=df.iloc[: , -1]

 

from sklearn.model_selection import train_test_split

x_train,x_test, y_train, y_test = train_test_split(x_train,y_train,test_size=0.2)

 

from sklearn.ensemble import RandomForestClassifier

rm=RandomForestClassifier()

rm.fit(x_train,y_train)

y_pred=rm.predict(x_test)

 

from sklearn.metrics import confusion_matrix,accuracy_score,classification_report

print(confusion_matrix(y_pred,y_test))

print(accuracy_score(y_pred,y_test))

print(classification_report(y_pred,y_test))

 

output:

 

step      type    amount  ... oldbalanceDest  newbalanceDest  isFraud

0     1   PAYMENT   9839.64  ...            0.0             0.0            0

1     1   PAYMENT   1864.28  ...            0.0             0.0            0

2     1  TRANSFER    181.00  ...            0.0              0.0            1

3     1  CASH_OUT    181.00  ...        21182.0          0.0           1

4     1   PAYMENT  11668.14  ...            0.0             0.0           0

 

[5 rows x 10 columns]

step                            0

type                           0

amount                      0

nameOrig                   0

oldbalanceOrig          0

newbalanceOrig        0

nameDest                   0

oldbalanceDest          0

newbalanceDest         0

isFraud                       0

dtype: int64

 

Name: isFraud, dtype: float64

  [[209455     15]

 [   228 209276]]

Accuracy 0.9994200117429721

                                    precision    recall  f1-score   support

                     0             1.00      1.00      1.00    209470

                     1             1.00      1.00      1.00    209504

 

    accuracy                                         1.00    418974

   macro avg              1.00      1.00      1.00    418974

weighted avg            1.00      1.00      1.00    418974

 

 

 

 

 

 

 

 

 

 

12.Program to demonstrate K-means unsupervised clustering algorithm (mall customer dataset is used to group income v/s spending)

 

# Importing libraries    

import numpy as nm    

import matplotlib.pyplot as mtp    

import pandas as pd

 

 dataset = pd.read_csv('Mall_Customers_data.csv')  

 x = dataset.iloc[:, [34]].values 

 

#finding optimal number of clusters using the elbow method  

from sklearn.cluster import KMeans  

wcss_list= []  

  

#Using for loop for iterations from 1 to 10.  

for i in range(1, 11):  

    kmeans = KMeans(n_clusters=i, init='k-means++', random_state= 42)  

    kmeans.fit(x)  

    wcss_list.append(kmeans.inertia_)  

 

mtp.plot(range(1, 11), wcss_list)  

mtp.title('The Elobw Method Graph')  

mtp.xlabel('Number of clusters(k)')  

mtp.ylabel('wcss_list')  

mtp.show()  

 

#training the K-means model on a dataset  

kmeans = KMeans(n_clusters=5, init='k-means++', random_state= 42)  

y_predict= kmeans.fit_predict(x) #visulaizing the clusters  

 

mtp.scatter(x[y_predict == 0, 0], x[y_predict == 0, 1], s = 100, c = 'blue',   label = 'Cluster 1') 

mtp.scatter(x[y_predict == 1, 0], x[y_predict == 1, 1], s = 100, c = 'green', label = 'Cluster 2') 

mtp.scatter(x[y_predic t== 2, 0], x[y_predict == 2, 1], s = 100,c = 'red',     label = 'Cluster 3'

mtp.scatter(x[y_predict == 3, 0], x[y_predict == 3, 1], s = 100, c = 'cyan',   label = 'Cluster 4') 

mtp.scatter(x[y_predict == 4, 0], x[y_predict == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5') 

 

mtp.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'yellow', label = 'Centroid')   

 

mtp.title('Clusters of customers')  

mtp.xlabel('Annual Income (k$)')  

mtp.ylabel('Spending Score (1-100)')  

mtp.legend()  

mtp.show()  

 

 

 

The output image is clearly showing the five different clusters with different colors. The clusters are formed between two parameters of the dataset; Annual income of customer and Spending. We can change the colors and labels as per the requirement or choice. We can also observe some points from the above patterns,

o   Cluster1: shows the customers with average salary and average spending so we can categorize these customers as

o   Cluster2 shows the customer has a high income but low spending, so we can categorize them as careful.

o   Cluster3 shows the low income and low spending so they can be categorized as sensible.

o   Cluster4 shows the customers with low income with very high spending so they can be categorized as careless.

o   Cluster5 shows the customers with high income and high spending so they can be categorized as target, and these customers can be the most profitable customers for the mall owner.

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

13.Program to Demonstrate Dimensionality Reduction using principal component analysis (PCA) for iris dataset.

 

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from sklearn import datasets

from sklearn.decomposition import PCA

 

iris=datasets.load_iris()

x=iris.data

y=iris.target

 

print(x.shape)

print(y.shape)

 

pca=PCA(n_components=2)

pca.fit(x)

print(pca.components_)

 

x=pca.transform(x)

print(“shape of X: after transformation”,x.shape)

plt.scatter(x[:,0],x[:,1],c=y)

 

from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score

 

x_train, x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

res=DecisionTreeClassifier()

res.fit(x_train,y_train)

 

y_predict=res.predict(x_test)

print(accuracy_score(y_test,y_predict))

 

output: 

Shape before PCA : (150, 4)

 

(150,)

 

[[ 0.36138659 -0.08452251  0.85667061  0.3582892 ]

 [ 0.65658877  0.73016143 -0.17337266 -0.07548102]]

 

Shape after pca  (150, 2)

 

Accuracy----à          0.9666666666666667

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

14.Build a Convolutional Neural Networks (CNN) model for MNIST dataset with following conditions.

 

·       One Flatten () layer. o One Dense layer with 512 neurons using a ReLU as the activation function.

·       A Dropout layer with the probability of retaining the unit of 20%.

·       A final Dense layer, that computes the probability scores via the softmax function, for each of the 10 output labels.

·       Show the losses and the final architecture on TensorBoard.

 

import tensorflow as tf

m=tf.keras.datasets.mnist

 

(x_train,y_train),(x_test,y_test)=m.load_data()

x_train,x_test=x_train/255,x_test/255

 

model=tf.keras.models.Sequential([

tf.keras.layers.Flatten(input_shape=(28,28)),

tf.keras.layers.Dense(512,activation='relu'),

tf.keras.layers.Dropout(0.2),

tf.keras.layers.Dense(10,activation='softmax')])

 

model.compile(optimizer='sgd',

loss='sparse_categorical_crossentropy',

metrics=['accuracy'])

 

log="C:/Users/varsh/OneDrive/Desktop/log"

from tensorflow.keras.callbacks import TensorBoard

 

callbacks= [TensorBoard(

log_dir=log,

histogram_freq=1,

write_graph=True,

write_images=True,

update_freq='epoch',

profile_batch=2,

embeddings_freq=1)]

 

model.fit(x_train, y_train,epochs=5,validation_split=0.2,callbacks=callbacks)

model.save('m1.hs')

 

In CMD:

Type:

C:\Users\varsh>python

#Install python

C:\Users\varsh>pip3 install tensorboard

C:\Users\varsh>python -m tensorboard.main --logdir="C:/Users/varsh/OneDrive/Desktop/log"--port=6006

#Copy the link and paste in google

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 


15.program to Build NLP pipeline for text processing using NLTK

 

import nltk

from nltk import sent_tokenize

from nltk import word_tokenize

from nltk.corpus import stopwords

 

 

text= "The first time you see The Second Renaissance it may look boring. Look at it at least twice and watch part 2. It will change your view of the matrix. Are the human people the ones who started the war? Is AI a bad thing?"

print(text)

 

#Tokenization

word_tocken = word_tokenize(text)

print(word_tocken)

 

#Normalization

#Punctuation Removal

elist= [ ]

for i in word_tocken:

    if i.isalpha():

        elist.append(i)

print(elist)

 

#Stop Words Removal

stopwords=stopwords.words("english")

print (stopwords)

 

 

elist1=[]

for i in elist:

    if i not in stopwords:

        elist1.append(i)

print(elist1)

 

 

#Parts of Speech (POS) Tagging

#Named Entity Recognition (NER)

 

from nltk import pos_tag

from nltk import ne_chunk

tag=nltk.pos_tag(elist1)

print(tag)

 

tree=nltk.ne_chunk(tag,binary=True)

print(tree)

tree.draw()

 

#Lemmatization

from nltk import WordNetLemmatizer

lemma= WordNetLemmatizer()

word_list=elist1

g=[]

for i in word_list:

    g.append(lemma.lemmatize(i))

print(g)

 

#Tf-IdfVectorizer

from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer()

x=vectorizer.fit_transform(g)

print(x.toarray())

 

 

output:

The first time you see The Second Renaissance it may look boring. Look at it at least twice and watch part 2. It will change your view of the matrix. Are the human people the ones who started the war? Is AI a bad thing?

['The', 'first', 'time', 'you', 'see', 'The', 'Second', 'Renaissance', 'it', 'may', 'look', 'boring', '.', 'Look', 'at', 'it', 'at', 'least', 'twice', 'and', 'watch', 'part', '2', '.', 'It', 'will', 'change', 'your', 'view', 'of', 'the', 'matrix', '.', 'Are', 'the', 'human', 'people', 'the', 'ones', 'who', 'started', 'the', 'war', '?', 'Is', 'AI', 'a', 'bad', 'thing', '?']

 

['The', 'first', 'time', 'you', 'see', 'The', 'Second', 'Renaissance', 'it', 'may', 'look', 'boring', 'Look', 'at', 'it', 'at', 'least', 'twice', 'and', 'watch', 'part', 'It', 'will', 'change', 'your', 'view', 'of', 'the', 'matrix', 'Are', 'the', 'human', 'people', 'the', 'ones', 'who', 'started', 'the', 'war', 'Is', 'AI', 'a', 'bad', 'thing']

 

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't"]

 

['The', 'first', 'time', 'see', 'The', 'Second', 'Renaissance', 'may', 'look', 'boring', 'Look', 'least', 'twice', 'watch', 'part', 'It', 'change', 'view', 'matrix', 'Are', 'human', 'people', 'ones', 'started', 'war', 'Is', 'AI', 'bad', 'thing']

[('The', 'DT'), ('first', 'JJ'), ('time', 'NN'), ('see', 'VB'), ('The', 'DT'), ('Second', 'NNP'), ('Renaissance', 'NNP'), ('may', 'MD'), ('look', 'VB'), ('boring', 'VBG'), ('Look', 'NNP'), ('least', 'JJS'), ('twice', 'RB'), ('watch', 'JJ'), ('part', 'NN'), ('It', 'PRP'), ('change', 'VBZ'), ('view', 'NN'), ('matrix', 'NN'), ('Are', 'NNP'), ('human', 'JJ'), ('people', 'NNS'), ('ones', 'NNS'), ('started', 'VBD'), ('war', 'NN'), ('Is', 'NNP'), ('AI', 'NNP'), ('bad', 'JJ'), ('thing', 'NN')]

(S

  The/DT

  first/JJ

  time/NN

  see/VB

  The/DT

  (NE Second/NNP Renaissance/NNP)

  may/MD

  look/VB

  boring/VBG

  Look/NNP

  least/JJS

  twice/RB

  watch/JJ

  part/NN

  It/PRP

  change/VBZ

  view/NN

  matrix/NN

  Are/NNP

  human/JJ

  people/NNS

  ones/NNS

  started/VBD

  war/NN

  Is/NNP

  AI/NNP

  bad/JJ

  thing/NN)

 

 

 

 

 

 

 

16.Write a program to perform Sentimental Analysis using NLTK

from textblob import TextBlob

from textblob.classifiers import NaiveBayesClassifier

train = [

     ('I love this sandwich.', 'pos'),

     ('This is an amazing place!', 'pos'),

     ('I feel very good about these beers.', 'pos'),

     ('I do not like this restaurant', 'neg'),

     ('I am tired of this stuff.', 'neg'),

     ("I can't deal with this", 'neg'),

    ("My boss is horrible.", "neg")

    ]

cl = NaiveBayesClassifier(train)

 

print("The polarity of sentence I feel amazing is",cl.classify("I feel amazing!"))

blob = TextBlob("The beer is good. But the hangover is horrible. I can't drive", classifier=cl)

 

for s in blob.sentences:

    print(s)

    print(s.classify())

 

output:

The polarity of sentence I feel amazing is pos

The beer is good.

pos

But the hangover is horrible.

neg

I can't drive

Neg

 

 

 

 

 

 

 

17. Build neural networks to predict diabetes using TensorFlow and keras

 

import pandas as pd

data = pd.read_csv("diabetes .csv")

 

x = data.drop("Outcome", axis=1)

y = data["Outcome"]

 

from keras.models import Sequential

from keras.layers import Dense

 

model = Sequential(

model.add(Dense(12, input_dim=8, activation="relu"))

model.add(Dense(12, activation="relu"))

model.add(Dense(1, activation="sigmoid"))

 

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

log="C:/Users/Shilpa/Desktop/logs"

 

from tensorflow.keras.callbacks import TensorBoard

callbacks= [TensorBoard(

log_dir=log,

histogram_freq=1,

write_graph=True,

write_images=True,

update_freq='epoch',

profile_batch=2,

embeddings_freq=1)]

 

model.fit(x,y, epochs=10, batch_size=10,callbacks=callbacks)

 

_, accuracy = model.evaluate(x, y)

print("Model accuracy: %.2f"% (accuracy*100))

 

 

 

 

 

 

 

Output:

 

 

 

 

 

 

 

 

 

 

 

18. Build neural networks to predict lung cancer using TensorFlow and keras

 

import pandas as pd

data = pd.read_csv("survey_lung_cancer_tensorflow.csv")

 

x = data.drop("LUNG_CANCER", axis=1)

y = data["LUNG_CANCER"]

 

from keras.models import Sequential

from keras.layers import Dense

 

model = Sequential(

model.add(Dense(512, input_dim=15, activation="relu"))

model.add(Dense(512, activation="relu"))

model.add(Dense(1, activation="sigmoid"))

 

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

 

log="C:/Users/Shilpa/Desktop/logs"

from tensorflow.keras.callbacks import TensorBoard

 

callbacks= [TensorBoard(

log_dir=log,

histogram_freq=1,

write_graph=True,

write_images=True,

update_freq='epoch',

profile_batch=2,

embeddings_freq=1)]

 

model.fit(x,y, epochs=5, batch_size=10,callbacks=callbacks)

 

_, accuracy = model.evaluate(x, y)

print("Model accuracy: %.2f"% (accuracy*100))

 

 

 

 

 

Output:

 

 

 

 

Comments

Popular posts from this blog

Usp

data science