Sunteți pe pagina 1din 9

return umr_sum(a, axis, dtype, out, keepdims, initial)

TypeError: can only concatenate str (not "int") to str

cancer['bare_nuclei']=cancer.fillna(method=ffill())
cancer.isnull().sum()
Traceback (most recent call last):

File "<ipython-input-45-a4b0576b1038>", line 1, in <module>


cancer['bare_nuclei']=cancer.fillna(method=ffill())

NameError: name 'ffill' is not defined

cancer['bare_nuclei']=cancer.fillna(method="ffill")
cancer.isnull().sum()
Out[46]:
patient_id 0
clump_thickness 0
cell_size_uniformity 1
cell_shape_uniformity 0
marginal_adhesion 0
single_ep_cell_size 0
bare_nuclei 0
bland_chromatin 4
normal_nucleoli 1
mitoses 0
class 0
doctor_name 0
dtype: int64

# deal with string /categorical value

x=pd.get_dummies(cancer['class'])

x
Out[49]:
benign malignant
0 1 0
1 1 0
2 1 0
3 1 0
4 1 0
5 0 1
6 1 0
7 1 0
8 1 0
9 1 0
10 1 0
11 1 0
12 0 1
13 1 0
14 0 1
15 0 1
16 1 0
17 1 0
18 0 1
19 1 0
20 0 1
21 0 1
22 1 0
23 0 1
24 1 0
25 0 1
26 1 0
27 1 0
28 1 0
29 1 0
.. ... ...
669 0 1
670 0 1
671 1 0
672 1 0
673 1 0
674 1 0
675 1 0
676 1 0
677 1 0
678 1 0
679 1 0
680 0 1
681 0 1
682 1 0
683 1 0
684 1 0
685 1 0
686 1 0
687 1 0
688 1 0
689 1 0
690 1 0
691 0 1
692 1 0
693 1 0
694 1 0
695 1 0
696 0 1
697 0 1
698 0 1

[699 rows x 2 columns]

# first of all we have to drop the class (here)

#then need to add this begin malingant value to original dataset .

cnacer.head()
Traceback (most recent call last):

File "<ipython-input-52-7781995f2132>", line 1, in <module>


cnacer.head()

NameError: name 'cnacer' is not defined


cancer.head()
Out[53]:
patient_id clump_thickness ... class doctor_name
0 1000025 1000025 ... benign Dr. Doe
1 1002945 1002945 ... benign Dr. Smith
2 1015425 1015425 ... benign Dr. Lee
3 1016277 1016277 ... benign Dr. Smith
4 1017023 1017023 ... benign Dr. Wong

[5 rows x 12 columns]

cancer .tail()
Out[54]:
patient_id clump_thickness ... class doctor_name
694 776715 776715 ... benign Dr. Lee
695 841769 841769 ... benign Dr. Smith
696 888820 888820 ... malignant Dr. Lee
697 897471 897471 ... malignant Dr. Lee
698 897471 897471 ... malignant Dr. Wong

[5 rows x 12 columns]

cancer=cancer.drop(['class'],axis=1) #class for column having axis=1,for row axis


=0(i.e. default values.)

cancer
Out[56]:
patient_id clump_thickness ... mitoses doctor_name
0 1000025 1000025 ... 1 Dr. Doe
1 1002945 1002945 ... 1 Dr. Smith
2 1015425 1015425 ... 1 Dr. Lee
3 1016277 1016277 ... 1 Dr. Smith
4 1017023 1017023 ... 1 Dr. Wong
5 1017122 1017122 ... 1 Dr. Smith
6 1018099 1018099 ... 1 Dr. Doe
7 1018561 1018561 ... 1 Dr. Smith
8 1033078 1033078 ... 5 Dr. Smith
9 1033078 1033078 ... 1 Dr. Doe
10 1035283 1035283 ... 1 Dr. Doe
11 1036172 1036172 ... 1 Dr. Smith
12 1041801 1041801 ... 1 Dr. Smith
13 1043999 1043999 ... 1 Dr. Wong
14 1044572 1044572 ... 4 Dr. Doe
15 1047630 1047630 ... 1 Dr. Lee
16 1048672 1048672 ... 1 Dr. Lee
17 1049815 1049815 ... 1 Dr. Smith
18 1050670 1050670 ... 2 Dr. Smith
19 1050718 1050718 ... 1 Dr. Wong
20 1054590 1054590 ... 4 Dr. Doe
21 1054593 1054593 ... 1 Dr. Smith
22 1056784 1056784 ... 1 Dr. Smith
23 1057013 1057013 ... 1 Dr. Smith
24 1059552 1059552 ... 1 Dr. Smith
25 1065726 1065726 ... 1 Dr. Doe
26 1066373 1066373 ... 1 Dr. Doe
27 1066979 1066979 ... 1 Dr. Lee
28 1067444 1067444 ... 1 Dr. Doe
29 1070935 1070935 ... 1 Dr. Doe
.. ... ... ... ... ...
669 1350423 1350423 ... 1 Dr. Smith
670 1352848 1352848 ... 1 Dr. Lee
671 1353092 1353092 ... 1 Dr. Lee
672 1354840 1354840 ... 1 Dr. Wong
673 1354840 1354840 ... 1 Dr. Lee
674 1355260 1355260 ... 1 Dr. Wong
675 1365075 1365075 ... 1 Dr. Doe
676 1365328 1365328 ... 1 Dr. Wong
677 1368267 1368267 ... 1 Dr. Lee
678 1368273 1368273 ... 1 Dr. Lee
679 1368882 1368882 ... 1 Dr. Doe
680 1369821 1369821 ... 7 Dr. Lee
681 1371026 1371026 ... 3 Dr. Wong
682 1371920 1371920 ... 1 Dr. Wong
683 466906 466906 ... 1 Dr. Lee
684 466906 466906 ... 1 Dr. Wong
685 534555 534555 ... 1 Dr. Doe
686 536708 536708 ... 1 Dr. Doe
687 566346 566346 ... 1 Dr. Lee
688 603148 603148 ... 1 Dr. Smith
689 654546 654546 ... 8 Dr. Lee
690 654546 654546 ... 1 Dr. Doe
691 695091 695091 ... 1 Dr. Wong
692 714039 714039 ... 1 Dr. Wong
693 763235 763235 ... 2 Dr. Lee
694 776715 776715 ... 1 Dr. Lee
695 841769 841769 ... 1 Dr. Smith
696 888820 888820 ... 2 Dr. Lee
697 897471 897471 ... 1 Dr. Lee
698 897471 897471 ... 1 Dr. Wong

[699 rows x 11 columns]

cancer= pd.concat([cancer,x],axis=1)

cancer.head()
Out[58]:
patient_id clump_thickness ... benign malignant
0 1000025 1000025 ... 1 0
1 1002945 1002945 ... 1 0
2 1015425 1015425 ... 1 0
3 1016277 1016277 ... 1 0
4 1017023 1017023 ... 1 0

[5 rows x 13 columns]

y=cancer.iloc[:,-3]

y
Out[60]:
0 Dr. Doe
1 Dr. Smith
2 Dr. Lee
3 Dr. Smith
4 Dr. Wong
5 Dr. Smith
6 Dr. Doe
7 Dr. Smith
8 Dr. Smith
9 Dr. Doe
10 Dr. Doe
11 Dr. Smith
12 Dr. Smith
13 Dr. Wong
14 Dr. Doe
15 Dr. Lee
16 Dr. Lee
17 Dr. Smith
18 Dr. Smith
19 Dr. Wong
20 Dr. Doe
21 Dr. Smith
22 Dr. Smith
23 Dr. Smith
24 Dr. Smith
25 Dr. Doe
26 Dr. Doe
27 Dr. Lee
28 Dr. Doe
29 Dr. Doe

669 Dr. Smith


670 Dr. Lee
671 Dr. Lee
672 Dr. Wong
673 Dr. Lee
674 Dr. Wong
675 Dr. Doe
676 Dr. Wong
677 Dr. Lee
678 Dr. Lee
679 Dr. Doe
680 Dr. Lee
681 Dr. Wong
682 Dr. Wong
683 Dr. Lee
684 Dr. Wong
685 Dr. Doe
686 Dr. Doe
687 Dr. Lee
688 Dr. Smith
689 Dr. Lee
690 Dr. Doe
691 Dr. Wong
692 Dr. Wong
693 Dr. Lee
694 Dr. Lee
695 Dr. Smith
696 Dr. Lee
697 Dr. Lee
698 Dr. Wong
Name: doctor_name, Length: 699, dtype: object

from sklearn.preprocessing import LabelEncoder


lc=LabelEncoder()

X=lc.fit_transform(y)# apply as well as use.

X
Out[64]:
array([0, 2, 1, 2, 3, 2, 0, 2, 2, 0, 0, 2, 2, 3, 0, 1, 1, 2, 2, 3, 0, 2,
2, 2, 2, 0, 0, 1, 0, 0, 2, 1, 0, 2, 1, 1, 0, 3, 3, 1, 3, 2, 2, 0,
0, 2, 3, 2, 1, 2, 2, 3, 0, 3, 3, 3, 2, 1, 0, 2, 3, 3, 0, 2, 3, 2,
3, 0, 3, 0, 2, 2, 3, 3, 3, 3, 0, 3, 0, 1, 3, 3, 3, 1, 1, 2, 2, 1,
3, 3, 0, 1, 1, 1, 2, 1, 0, 0, 1, 1, 0, 2, 1, 3, 3, 1, 2, 3, 3, 1,
0, 2, 2, 3, 1, 0, 3, 2, 0, 1, 0, 1, 1, 2, 2, 3, 1, 0, 0, 2, 1, 3,
0, 1, 0, 3, 1, 1, 3, 1, 0, 2, 0, 2, 3, 3, 3, 3, 1, 3, 1, 2, 0, 1,
2, 0, 1, 0, 0, 2, 2, 1, 1, 0, 2, 1, 0, 2, 1, 1, 1, 0, 0, 2, 3, 1,
0, 1, 1, 0, 1, 3, 2, 2, 1, 1, 0, 3, 1, 0, 3, 0, 3, 2, 3, 1, 0, 3,
2, 1, 1, 2, 3, 0, 2, 2, 0, 3, 0, 1, 0, 2, 1, 3, 2, 2, 0, 3, 1, 3,
1, 2, 0, 2, 1, 0, 1, 2, 1, 0, 1, 1, 0, 3, 0, 0, 3, 3, 3, 0, 2, 3,
2, 0, 2, 2, 2, 0, 3, 3, 0, 0, 3, 1, 3, 3, 3, 0, 1, 2, 1, 3, 1, 0,
3, 1, 2, 3, 0, 0, 2, 3, 2, 1, 2, 1, 0, 3, 0, 3, 3, 2, 1, 2, 3, 2,
0, 0, 3, 0, 2, 0, 3, 2, 1, 2, 1, 2, 2, 0, 2, 0, 3, 3, 1, 1, 3, 1,
1, 2, 1, 3, 0, 2, 0, 3, 1, 1, 2, 3, 1, 2, 0, 2, 1, 0, 3, 2, 3, 1,
1, 3, 2, 1, 1, 3, 0, 3, 3, 1, 0, 0, 1, 0, 2, 2, 0, 0, 1, 2, 3, 0,
0, 2, 2, 3, 1, 2, 0, 0, 1, 0, 1, 0, 0, 1, 3, 0, 3, 0, 2, 2, 3, 1,
2, 0, 0, 0, 2, 1, 0, 0, 1, 0, 2, 3, 2, 1, 1, 2, 3, 0, 3, 0, 1, 2,
0, 2, 0, 1, 3, 3, 0, 0, 3, 3, 0, 3, 3, 1, 3, 1, 2, 3, 0, 2, 0, 1,
2, 1, 1, 3, 3, 1, 2, 3, 3, 2, 2, 0, 3, 3, 2, 1, 3, 1, 0, 1, 2, 0,
2, 1, 1, 0, 0, 0, 0, 2, 1, 2, 3, 0, 1, 1, 1, 3, 2, 0, 2, 1, 1, 2,
1, 3, 2, 1, 2, 0, 2, 0, 3, 0, 3, 2, 0, 1, 0, 1, 1, 0, 1, 0, 2, 0,
3, 0, 3, 0, 1, 1, 0, 2, 1, 1, 0, 0, 0, 3, 1, 0, 1, 0, 2, 2, 1, 2,
1, 3, 2, 0, 1, 2, 3, 0, 3, 2, 0, 0, 1, 1, 0, 1, 0, 1, 2, 0, 2, 0,
3, 2, 1, 3, 0, 2, 2, 3, 3, 1, 1, 0, 0, 1, 2, 3, 1, 1, 2, 1, 1, 0,
1, 1, 1, 2, 2, 1, 0, 1, 2, 0, 0, 1, 0, 1, 2, 2, 2, 3, 2, 2, 2, 2,
3, 2, 2, 2, 2, 1, 2, 3, 0, 2, 1, 3, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0,
0, 3, 3, 3, 3, 3, 0, 1, 2, 0, 1, 1, 2, 2, 1, 1, 1, 2, 0, 0, 2, 0,
1, 2, 0, 3, 2, 2, 0, 1, 3, 0, 3, 2, 3, 3, 3, 1, 0, 3, 2, 2, 2, 1,
1, 0, 0, 2, 3, 3, 2, 0, 2, 1, 2, 0, 0, 0, 2, 0, 2, 0, 3, 2, 2, 2,
1, 2, 0, 0, 3, 1, 1, 3, 3, 2, 1, 1, 3, 1, 3, 0, 3, 1, 1, 0, 1, 3,
3, 1, 3, 0, 0, 1, 2, 1, 0, 3, 3, 1, 1, 2, 1, 1, 3])

#label encoder will give numpy array...

X=pd.DataFrame(X)

X
Out[67]:
0
0 0
1 2
2 1
3 2
4 3
5 2
6 0
7 2
8 2
9 0
10 0
11 2
12 2
13 3
14 0
15 1
16 1
17 2
18 2
19 3
20 0
21 2
22 2
23 2
24 2
25 0
26 0
27 1
28 0
29 0
.. ..
669 2
670 1
671 1
672 3
673 1
674 3
675 0
676 3
677 1
678 1
679 0
680 1
681 3
682 3
683 1
684 3
685 0
686 0
687 1
688 2
689 1
690 0
691 3
692 3
693 1
694 1
695 2
696 1
697 1
698 3

[699 rows x 1 columns]

cancer=cancer.drop(['doctor_name'],axis=1)

cancer
Out[69]:
patient_id clump_thickness ... benign malignant
0 1000025 1000025 ... 1 0
1 1002945 1002945 ... 1 0
2 1015425 1015425 ... 1 0
3 1016277 1016277 ... 1 0
4 1017023 1017023 ... 1 0
5 1017122 1017122 ... 0 1
6 1018099 1018099 ... 1 0
7 1018561 1018561 ... 1 0
8 1033078 1033078 ... 1 0
9 1033078 1033078 ... 1 0
10 1035283 1035283 ... 1 0
11 1036172 1036172 ... 1 0
12 1041801 1041801 ... 0 1
13 1043999 1043999 ... 1 0
14 1044572 1044572 ... 0 1
15 1047630 1047630 ... 0 1
16 1048672 1048672 ... 1 0
17 1049815 1049815 ... 1 0
18 1050670 1050670 ... 0 1
19 1050718 1050718 ... 1 0
20 1054590 1054590 ... 0 1
21 1054593 1054593 ... 0 1
22 1056784 1056784 ... 1 0
23 1057013 1057013 ... 0 1
24 1059552 1059552 ... 1 0
25 1065726 1065726 ... 0 1
26 1066373 1066373 ... 1 0
27 1066979 1066979 ... 1 0
28 1067444 1067444 ... 1 0
29 1070935 1070935 ... 1 0
.. ... ... ... ... ...
669 1350423 1350423 ... 0 1
670 1352848 1352848 ... 0 1
671 1353092 1353092 ... 1 0
672 1354840 1354840 ... 1 0
673 1354840 1354840 ... 1 0
674 1355260 1355260 ... 1 0
675 1365075 1365075 ... 1 0
676 1365328 1365328 ... 1 0
677 1368267 1368267 ... 1 0
678 1368273 1368273 ... 1 0
679 1368882 1368882 ... 1 0
680 1369821 1369821 ... 0 1
681 1371026 1371026 ... 0 1
682 1371920 1371920 ... 1 0
683 466906 466906 ... 1 0
684 466906 466906 ... 1 0
685 534555 534555 ... 1 0
686 536708 536708 ... 1 0
687 566346 566346 ... 1 0
688 603148 603148 ... 1 0
689 654546 654546 ... 1 0
690 654546 654546 ... 1 0
691 695091 695091 ... 0 1
692 714039 714039 ... 1 0
693 763235 763235 ... 1 0
694 776715 776715 ... 1 0
695 841769 841769 ... 1 0
696 888820 888820 ... 0 1
697 897471 897471 ... 0 1
698 897471 897471 ... 0 1
[699 rows x 12 columns]

cancer= pd.concat([cancer,X],axis=1)

cancer.head()
Out[71]:
patient_id clump_thickness cell_size_uniformity ... benign malignant 0
0 1000025 1000025 1.0 ... 1 0 0
1 1002945 1002945 4.0 ... 1 0 2
2 1015425 1015425 1.0 ... 1 0 1
3 1016277 1016277 8.0 ... 1 0 2
4 1017023 1017023 1.0 ... 1 0 3

[5 rows x 13 columns]

cancer.tail()
Out[72]:
patient_id clump_thickness cell_size_uniformity ... benign malignant 0
694 776715 776715 1.0 ... 1 0 1
695 841769 841769 1.0 ... 1 0 2
696 888820 888820 10.0 ... 0 1 1
697 897471 897471 8.0 ... 0 1 1
698 897471 897471 8.0 ... 0 1 3

[5 rows x 13 columns]

S-ar putea să vă placă și