Documente Academic
Documente Profesional
Documente Cultură
Lab Assessment -5
Malware Detection using ML in Python
Code:
import pandas as pd
import numpy as np
data=pd.read_csv("../input/Malware dataset.csv")
data.head()
data.shape
data.isnull().sum()
data.columns
data1=data.dropna(how="any",axis=0)
data1.head()
data1["classification"].value_counts()
data1.head()
data1.tail()
sns.countplot(data1["classification"])
plt.show()
data1["classification"].value_counts().plot(kind="pie",autopct="%1.1f%%")
plt.axis("equal")
plt.show()
benign1=data.loc[data['classification']=='benign']
benign1["classification"].head()
malware1=data.loc[data['classification']=='malware']
malware1["classification"].head()
corr=data1.corr()
corr.nlargest(35,'classification')["classification"]
x=data1.drop(["hash","classification",'vm_truncate_count','shared_vm','exec_vm','nvcsw','maj_flt','u
time'],axis=1)
x.head()
y=data1["classification"]
y
from sklearn.naive_bayes import GaussianNB
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=1)
model=GaussianNB()
model.fit(x_train,y_train)
pred=model.predict(x_test)
pred
model.score(x_test,y_test)
result=pd.DataFrame({
"Actual_Value":y_test,
"Predict_Value":pred
})
result
Predict_Value
Actual_Value
43660 0 1
87278 1 1
14317 0 1
81932 1 1
Predict_Value
Actual_Value
95321 1 1
5405 1 1
33188 0 1
63421 1 1
72897 1 1
9507 0 0
88624 1 1
95115 1 1
99243 1 1
77045 1 1
31791 0 1
45417 1 1
71963 1 1
91216 1 1
31924 0 1
15134 0 1
16405 0 1
22718 0 1
Predict_Value
Actual_Value
15522 0 0
24507 0 1
13979 0 0
71898 1 1
64290 0 0
27706 0 1
92621 1 1
66503 1 1
18845 0 1
64740 0 0
92316 1 1
84568 1 1
9284 0 0
31510 0 1
45911 1 1
7593 0 1
17393 0 1
Predict_Value
Actual_Value
1407 0 1
30455 0 0
96375 1 1
97553 1 1
54718 0 1
96667 1 1
10506 1 1
37636 0 1
19884 0 0
22766 0 1
13499 0 1
90422 1 1
23841 0 0
24559 0 0
7599 0 1
56585 1 1
994 1 1
42287 0 1
Predict_Value
Actual_Value
4967 0 1
47725 0 0
42348 0 1