Sunteți pe pagina 1din 18

In [59]:

import pandas as pd
import numpy as np
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.preprocessing import LabelEncoder
from collections import defaultdict
from nltk.corpus import wordnet as wn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import model_selection, naive_bayes, svm
from sklearn.metrics import accuracy_score
In [60]:

df=pd.read_csv(r"amazon_co-ecommerce_sample.csv")
df
Out[60]:

uniq_id product_name manufacturer price number_ava

Hornby 2014
0 eac7efa5dbd3d667f26eb3d3ab504464 Hornby £3.42
Catalogue

FunkyBuys®
Large
1 b17540ef7e86e461d37f3ae58b7b72ac Christmas FunkyBuys £16.99
Holiday
Express Fes...

CLASSIC TOY
TRAIN SET
2 348f344247b0c1a935b1223072ef9d8a TRACK ccf £9.99
CARRIAGES
LIGHT EN...

HORNBY
Coach R4410A
3 e12b92dbb8eaee78b22965d2a9bbbd9f BR Hornby £39.99
Hawksworth
Corridor 3rd

Hornby 00
Gauge 0-4-0
4 e33a9adeed5f36840ccc227db4682a36 Hornby £32.19
Gildenlow Salt
Co. Steam...

20pcs Model
Garden Light
5 cb34f0a84102c1ebc3ef6892d7444d36 Generic £6.99
Double Heads
Lamppost...

Hornby 00
Gauge 230mm
6 f74b562470571dfb689324adf236f82c BR Bogie Hornby £24.99
Passenger
Brake...

Hornby Santa's
7 87bbb472ef9d90dcef140a551665c929 Express Train Hornby £69.93
Set

Hornby Gauge
Western
8 7e2aa2b4596a39ba852449718413d7cc Hornby £235.58
Express Digital
Train Set...

Learning Curve
Chuggington
9 5afbaf65680c9f378af5b3a3ae22427e Chuggington NaN
Interactive
Chatsworth

Hornby Gauge
Railroad
10 5c76389a8c302c6d7d6e179393031b97 Mosley Hornby £27.49
Tarmacadam
Locomo...

Kato (USA)
176-1308 F3B
11 878048c41f3c249badb3704e160b4c6e Kato (USA) £273.60
Denver & Rio
Grande We...
uniq_id product_name manufacturer price number_ava

Bachmann 37-
662 14 Ton
12 f910c6542ededa5abf81787c0fd87c99 Bachmann £9.60
Tank Wagon
Pease & Part...

Hornby 00
Gauge 253mm
13 03a35de3f7af9814978e6de645cb8ffa Weathered Hornby £119.50
Paviland
Grang...

Kato 3060-2
EF65 500 (F
14 c68c3ae3b0ea3146beae99f3d4a6997c Kato NaN
Model) Electric
Locomo...

Glacier
Express of N
15 d27964f50577db8e46f2069b050c62c6 Kato NaN
gauge 10-1219
Alps [UNESC...

Power Trains
Freight
16 c3b2f6ec9cf6250c960c26ee8ad33509 Power Trains NaN
Industrial (Pack
of 4)

Chuggington
Interactive
17 e9ef14a0253f074343b5441540d8471f Chuggington NaN
Wash and Fuel
Set with...

Kumoyuni 74-0
18 0ca63377ca8015e585efa1d418f04756 Shonan Color Kato £17.08
(Model Train)

Bachmann 31-
588
19 42d20aa25e5902eff9f5d307bb38230e Freightliner Bachmann £96.05
Class 70 005
Powe...

Preiser 30495
Horse Drawn
20 4aaa27925929708a6b56fd7e46b35208 Wedding Preiser £27.55
Coach
(Closed)

Preiser 30414
Horse Drawn
21 81b3ad5c07a31fa67e4dc8643afa3275 Preiser £24.50
Liquid Manure
Wagon

Bachmann
Class A2
22 26de90be8191b92ea208c2111c7090cd 60534 'Irish Bachmann £149.92
Elegance' BR
Li...

Plarail - S-29
Steam
23 4eb5ea748ea030b14c3d1789f9578133 Locomotive Takara Tomy £12.87
Type C61-20
w/...

Roco 64723
OBB Railjet
24 1b3d6d9f9c79db8c54c0c2cbb50de1cf Roco £49.95
Economy
Coach V
uniq_id product_name manufacturer price number_ava

Learning Curve
Chuggington
25 79fd94b655dbcf797005e39e700eb3f1 Chuggington NaN
Interactive
Brewster

Plarail - AS-07
Shinkansen
26 6c4062e5b0da136365915b470ec9d4a4 Takara Tomy £14.44
Series 700
(Model T...

Thomas and
27 3b4be663a9878555100854f626202b17 Friends Take- Fisher-Price NaN
n-Play Molly

Hornby R2981
London 2012
28 8e8be9b29d3b1a794a906510fad8a75d Hornby NaN
1948 Games
00 Gauge L...

Gaugemaster
HBYS Hornby
29 8a9af1fc5f227191c00447b0c44f7700 Gaugemaster £5.95
Type Colour
Light Sign...

... ... ... ... ...

Star Trek Play


9970 3fb67f6e1d381c96db00bc4751628174 Arts Kai Spock Star Trek £49.88
Action Figure

Star Wars 30th


9971 747ccbe47d02b7efac8c247162343523 #04 R2-D2 Hasbro £22.69
Action Figure

Funko POP!
9972 c5dc2700783716cce0b0b9118bdd17c0 Harry Potter FunKo £13.90
Sword

Hot Toys -
Batman The
9973 9736294cd697539a88eeb851dfdd2771 Hot Toys £399.99
Dark Knight
Rises Movie ...

Lavender
Brown
Noble
9974 debdd5dc7adae2e92ef590ce4612dd5c Character £26.99
Collection
Wand. Harry
Potter No...

GAME OF
THRONES
9975 d8972b4f08f551d6b7d7ce23c5bf95ed Abystyle £15.63
Flag Stark
(70x120)

Batman The
Dark Knight
9976 569b2e8a3aef1831cd68fe1de287c809 Batman NaN
Batarang Prop
Replica W...

Star Wars
Costume, Kids
9977 c38c62b7eaf8de33f0addff73b17f807 Star Wars £26.99
Han Solo Outfit
Style ...
uniq_id product_name manufacturer price number_ava

Star Wars
9978 fca7110160af8a481fc94e9a9c8a07e3 Filled Desk Star Wars £2.89
Tidy

Albus
Dumbledore
9979 ccfd41e97f866080af3f17bc64120e3d (Harry Potter Mattel £63.73
Magical Minis
C...

Square Enix
Halo Reach
9980 1c0b975783a1c9f25ba226d4394bf46d Square-Enix £39.48
Play Arts Kai
Series 1 ...

HSDS030BO
foam tray for
9981 ab9fe33566df86b90c82ed78e38e08c7 Star Wars Feldherr £3.70
Armada Wave
...

Star Wars
Clone Wars -
9982 7fa63c36b92bc52acd490c5774ea4101 Obi-Wan Hasbro £58.18
Kenobi's
Starfi...

Dc Comics
Infinite Crisis
9983 32165eaeb83b3e5e772f7bd5b31ac13b DC Comics NaN
Pajama Party
Harley ...

Master
Replicas -
Master
9984 df6b6fa9e9d4d0994ac3fa67cd3ded71 Clone Trooper NaN
Replicas
Helmet Scaled
...

Playskool
Heroes Super
9985 5962c0a2623129d656aac1ee19239dcb Hero Marvel £11.95
Adventures
Action ...

Marauder's
9986 6b9c92678116a53b8d5a656b64cbcabb map wallscroll - GGS £31.14
Harry Potter

Thundercats
9987 0c99ecefabf3ec25d4fae53d0646fb3b 10cm Action Thundercats £11.44
Figure: Wilykat

Captain
America - The Captain
9988 8cc983a24c305a6dc91e3d8e6e421a72 £25.33
First Avenger - america
Movie Se...

Teen Titans
Shape-Shifting
9989 1fbd746051cc006738435a8d64d1e1d1 Ban Dai £29.99
Beast Boy 5"
inch F...

Iron Maiden 8-
Inch Eddie 2
9990 cf75a470360f08eaac9e4d9882999cee IronMan £29.79
Mintutes To
Midnigh...
uniq_id product_name manufacturer price number_ava

Power Rangers
Dino Charge Power
9991 57c638712b4ecb8dac1ec3004039f1f1 £9.75
30 cm Blue Rangers
Ranger Fi...

Star Wars The


Clone Wars
9992 fa13bf1bd4a3a98b990a4ee64dcf6eca Star Wars £32.99
CW01 Captain
Rex 3.75...

Playskool
Heroes Super
9993 3e64e4223988a85f6884c8c6a85a75cc Super Heroes £9.99
Hero Repulsor
Drill Veh...

Factory
Entertainment
9994 791719b23e393dc4a3384d4d7777c089 Green Hornet £9.50
Green Hornet
Movie: Kato...

Batman 1966
TV Series
9995 44d6967f083825a5de36ad4865a65bcd Mattel £22.95
Action Figures
- The Rid...

Star Wars
Costume, Kids
9996 08f0747b6fc6687215ffb994c3a6fb32 Star Wars £39.99
Stormtrooper
Costume S...

Defiance
Lawkeeper Olde Scotland
9997 bf6cc073f8f24e6e338190fa16f6ee9d £43.99
Metal Badge Yard Ltd.
Prop Replica

Justice League
of America
9998 cd783d0b8b44e631b9788b203eaaefae DC Comics £49.81
Series 3 Green
Lante...

Star Wars 1/72


9999 94d18e86e504bee1e392df7e5738b18a Y-Wing Bandai £21.20
Starfighter

10000 rows × 17 columns


In [61]:

df.isnull().sum()

Out[61]:

uniq_id 0
product_name 0
manufacturer 7
price 1435
number_available_in_stock 2500
number_of_reviews 18
number_of_answered_questions 765
average_review_rating 18
amazon_category_and_sub_category 690
customers_who_bought_this_item_also_bought 1062
description 651
product_information 58
product_description 651
items_customers_buy_after_viewing_this_item 3065
customer_questions_and_answers 9086
customer_reviews 21
sellers 3082
dtype: int64

In [62]:

df.drop('customer_questions_and_answers', axis=1 , inplace=True)


df.drop('items_customers_buy_after_viewing_this_item', axis=1 , inplace=True)
df.drop('number_available_in_stock', axis=1 , inplace=True)
df.drop('customers_who_bought_this_item_also_bought', axis=1 , inplace=True)
df.drop('sellers', axis=1 , inplace=True)
df.dropna(subset=['number_of_reviews'], inplace=True)
df.isnull().sum()

Out[62]:

uniq_id 0
product_name 0
manufacturer 7
price 1432
number_of_reviews 0
number_of_answered_questions 764
average_review_rating 0
amazon_category_and_sub_category 684
description 647
product_information 58
product_description 647
customer_reviews 3
dtype: int64
In [63]:

df.dtypes

Out[63]:

uniq_id object
product_name object
manufacturer object
price object
number_of_reviews object
number_of_answered_questions float64
average_review_rating object
amazon_category_and_sub_category object
description object
product_information object
product_description object
customer_reviews object
dtype: object

In [64]:

df['number_of_reviews'] = df['number_of_reviews'].str.replace(',','')

In [65]:

df['number_of_reviews'] = df['number_of_reviews'].astype(np.int64)

In [66]:

df.shape

Out[66]:

(9982, 12)

In [67]:

df['customer_reviews'][3]

Out[67]:

"I love it // 5.0 // 22 July 2013 // By\n \n Lilla Lukacs\n \n on 2


2 July 2013 // I love it. Perfect with the earlier ordered locomotive.Agai
n: I would recommend it to the masters of the topic. It's not just a toy."

In [68]:

df=df['customer_reviews'].str.split("//",n=4,expand=True)
In [69]:

df.head()

Out[69]:

0 1 2 3 4

Worth Buying For The 6 April By\n \n Copnovelist\n Part of the magic for me
0 4.0
Pictures Alone (As Ever) 2014 \n on 6 April 2014 growing up as a boy ...

18
By\n \n kenneth bell\n Very happy with the
1 Four Stars 4.0 Dec.
\n on 18 Dec. 2... communication with funkyb...
2015

26
By\n \n Simon.B :-)\n Simple & GREAT FUN for
2 **Highly Recommended!** 5.0 May
\n on 26 May 2015 5+My nephews face was ...
2015

22 July By\n \n Lilla Lukacs\n I love it. Perfect with the earlier
3 I love it 5.0
2013 \n on 22 July 2... ordered l...

14
By\n \n Love my Dog\n Bought this for my Grandson's
4 Birthday present 5.0 April
\n on 14 April 2... birthday. He i...
2014
In [70]:

df['review title']=df[0]
df['rating']=df[1]
df['review_date']=df[2]
df['customer_name']=df[3]
df['review']=df[4]
df.head()

Out[70]:

0 1 2 3 4 review title rating review_d

By\n \n Part of the


Worth Buying For 6 Worth Buying For
Copnovelist\n magic for me
0 The Pictures 4.0 April The Pictures 4.0 6 April 20
\n on 6 April growing up as
Alone (As Ever) 2014 Alone (As Ever)
2014 a boy ...

By\n \n Very happy


18
kenneth with the 18 D
1 Four Stars 4.0 Dec. Four Stars 4.0
bell\n \n on communication 20
2015
18 Dec. 2... with funkyb...

Simple &
By\n \n
26 GREAT FUN
**Highly Simon.B :-)\n **Highly 26 M
2 5.0 May for 5+My 5.0
Recommended!** \n on 26 May Recommended!** 20
2015 nephews face
2015
was ...

By\n \n Lilla I love it.


22
Lukacs\n \n Perfect with
3 I love it 5.0 July I love it 5.0 22 July 20
on 22 July the earlier
2013
2... ordered l...

By\n \n Love
14 Bought this for
my Dog\n \n 14 A
4 Birthday present 5.0 April my Grandson's Birthday present 5.0
on 14 April 20
2014 birthday. He i...
2...
In [71]:

df.drop(columns=0,inplace=True)
df.drop(columns=1,inplace=True)
df.drop(columns=2,inplace=True)
df.drop(columns=3,inplace=True)
df.drop(columns=4,inplace=True)
df.head()

Out[71]:

review title rating review_date customer_name review

By\n \n
Worth Buying For The Part of the magic for me
0 4.0 6 April 2014 Copnovelist\n \n on
Pictures Alone (As Ever) growing up as a boy ...
6 April 2014

By\n \n kenneth Very happy with the


18 Dec.
1 Four Stars 4.0 bell\n \n on 18 Dec. communication with
2015
2... funkyb...

By\n \n Simon.B Simple & GREAT FUN for


26 May
2 **Highly Recommended!** 5.0 :-)\n \n on 26 May 5+My nephews face was
2015
2015 ...

By\n \n Lilla
I love it. Perfect with the
3 I love it 5.0 22 July 2013 Lukacs\n \n on 22
earlier ordered l...
July 2...

By\n \n Love my Bought this for my


14 April
4 Birthday present 5.0 Dog\n \n on 14 Grandson's birthday. He
2014
April 2... i...

In [72]:

df['customer_name'][1]

Out[72]:

' By\n \n kenneth bell\n \n on 18 Dec. 2015 '


In [73]:

df['customer_name']=df['customer_name'].str.split("\n \n",n=1,expand=True)
df.head()

Out[73]:

review title rating review_date customer_name review

Worth Buying For The Pictures By\n \n Part of the magic for me
0 4.0 6 April 2014
Alone (As Ever) Copnovelist growing up as a boy ...

Very happy with the


18 Dec. By\n \n kenneth
1 Four Stars 4.0 communication with
2015 bell
funkyb...

Simple & GREAT FUN


26 May By\n \n Simon.B
2 **Highly Recommended!** 5.0 for 5+My nephews face
2015 :-)
was ...

By\n \n Lilla I love it. Perfect with the


3 I love it 5.0 22 July 2013
Lukacs earlier ordered l...

Bought this for my


14 April By\n \n Love my
4 Birthday present 5.0 Grandson's birthday. He
2014 Dog
i...
In [74]:

df['review']=df['review'].str.lower()
df['review']
Out[74]:

0 part of the magic for me growing up as a boy ...


1 very happy with the communication with funkyb...
2 simple & great fun for 5+my nephews face was ...
3 i love it. perfect with the earlier ordered l...
4 bought this for my grandson's birthday. he i...
5 the victorian style added to the character th...
6 high standard model, well worth the wait. re...
7 this has gone down very well this christmas. ...
8 xmas box for grandson,am sure it will be great
9 we bought this (and 2 others)for our grandson...
10 grandson loved it. hauls good load
11 very good product. thank you.
12 very good
13 beautiful model, if your a fan of the g.w.r, ...
14 great quality lots of detail run nic and smoo...
15 fantastic little set, beautifully made, and a...
16 the product is as expected. the only worry i...
17 my 2 year old loves chuggington so we decided...
18 lovely quality from kato.
19 i now have a second of these bachmann powerha...
20 this is an excellent ho scale diorama piece c...
21 this is an excellent ho scale diorama piece c...
22 beautiful little model, which i wholeheartedl...
23 a nice little train for our layout and a chan...
24 excellent model in perfect condition from rep...
25 got these for my 2 yr old - he's obsessed wit...
26 we love tomica!
27 it seems as though we are gradually purchasin...
28 have to say that this is a lovely item,if a t...
29 it was as advertised ,and delivered promptly ...
...
9970 excellent product, well presented and packaged
9971 hasbro's first wave of figures from star wars...
9972 good
9973 this is a quality figure, its literally a min...
9974 i bought this wand for my daughter who is a b...
9975 quick delivery. feels a bit plastics but that...
9976 bought this for my boyfriends 21st, honestly ...
9977 my grandson thought it was great
9978 my son loves it
9979 lovely collectable toy, great price for how o...
9980 i am a hgue fan of halo, and these figures ar...
9981 great tray, everything fits perfectly and a g...
9982 my 7yr old son has owned this toy for over a ...
9983 this figure is great, harley looks adorable.i...
9984 great attention to detail and a very affordab...
9985 bought for my friend's son and he loves it. g...
9986 got this for my niece who loves hp, she absol...
9987 i bought this for my brother who is a very se...
9988 this figure really does look like he's steppe...
9989 delighted with this item
9990 up the irons this is amazing figure to have a...
9991 we have a similar red ranger which comes with...
9992 this 3, 3/4" figure has a great range of arti...
9993 my son loved this substantial toy, it is real...
9994 came across this figure when i was looking fo...
9995 very true to the batman classic series of the...
9996 its a christmas present however from what i s...
9997 arrived in excellent condition
9998 okay, hal jordan figures come and go. there i...
9999 1st class kit top marks to bandai if your not...
Name: review, Length: 9982, dtype: object

In [75]:

import string

df['review']= df['review'].replace(string.punctuation,'')

In [76]:

df['review'].head()

Out[76]:

0 part of the magic for me growing up as a boy ...


1 very happy with the communication with funkyb...
2 simple & great fun for 5+my nephews face was ...
3 i love it. perfect with the earlier ordered l...
4 bought this for my grandson's birthday. he i...
Name: review, dtype: object

In [19]:

df.isnull().sum()

Out[19]:

review title 3
rating 3
review_date 3
customer_name 3
review 3
dtype: int64

In [20]:

df.dropna(subset=['review'], inplace=True)

In [21]:

df.isnull().sum()

Out[21]:

review title 0
rating 0
review_date 0
customer_name 0
review 0
dtype: int64

In [22]:

import nltk
df['review']=df['review'].apply(nltk.word_tokenize)
In [23]:

df['review'].head(5)

Out[23]:

0 [part, of, the, magic, for, me, growing, up, a...


1 [very, happy, with, the, communication, with, ...
2 [simple, &, great, fun, for, 5+my, nephews, fa...
3 [i, love, it, ., perfect, with, the, earlier, ...
4 [bought, this, for, my, grandson, 's, birthday...
Name: review, dtype: object

In [24]:

def remove_stopwords(text):
words=[w for w in text if w not in stopwords.words('english')]
return words

In [25]:

df['review']=df['review'].apply(lambda x:remove_stopwords(x))
df['review'].head(5)

Out[25]:

0 [part, magic, growing, boy, buy, (, given, ), ...


1 [happy, communication, funkybuys, |, five, sta...
2 [simple, &, great, fun, 5+my, nephews, face, a...
3 [love, ., perfect, earlier, ordered, locomotiv...
4 [bought, grandson, 's, birthday, ., currently,...
Name: review, dtype: object

In [27]:

lemmatizer= WordNetLemmatizer()
def word_lemmatizer(text):
lem_text=[lemmatizer.lemmatize(i) for i in text ]
return lem_text

In [29]:

import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to


[nltk_data] C:\Users\ccat\AppData\Roaming\nltk_data...
[nltk_data] Unzipping corpora\wordnet.zip.

Out[29]:

True

In [30]:

df['review']=df['review'].apply(lambda x: word_lemmatizer(x))
In [31]:

df['review'].head(5)

Out[31]:

0 [part, magic, growing, boy, buy, (, given, ), ...


1 [happy, communication, funkybuys, |, five, sta...
2 [simple, &, great, fun, 5+my, nephew, face, am...
3 [love, ., perfect, earlier, ordered, locomotiv...
4 [bought, grandson, 's, birthday, ., currently,...
Name: review, dtype: object

In [34]:

from nltk.stem import PorterStemmer


stemmer = PorterStemmer()
def word_stemmer(text):
stem_text="".join([stemmer.stem(i) for i in text])
return stem_text

In [35]:

df['review']=df['review'].apply(lambda x: word_stemmer(x))

In [36]:

df['review'].head(5)

Out[36]:

0 partmagicgrowboybuy(given)newhornbicatalogueve...
1 happicommunfunkybuy|fivestar//5.0//14jan.2016/...
2 simpl&greatfun5+minephewfaceamazopenbirthday!!...
3 love.perfectearlierorderlocomotive.again:would...
4 boughtgrandson'sbirthday.currentcollectbittrai...
Name: review, dtype: object

In [ ]:

S-ar putea să vă placă și