Documente Academic
Documente Profesional
Documente Cultură
Script:
import pymongo
import pandas as pd
from pprint import pprint
conn = pymongo.MongoClient("mongodb://37.120.249.57:27017/")
db = conn["daune_leasing"]
collection = db["clienti_leasing"]
projection = {"_id": 0,
"NUME_CLIENT": 1,
"SUMA_SOLICITATA": 1,
"SUMA_DEPOZIT": 1,
"FIDELITATE": 1,
"VARSTA":1
}
sort = [("SUMA_SOLICITATA", 1)]
df = pd.DataFrame.from_dict(list(cursor))
cursor.close()
pprint (df)
df.loc[(df['SUMA_SOLICITATA'] < df['SUMA_DEPOZIT']),
'FIDELITATE']=df.loc[(df['SUMA_SOLICITATA'] < df['SUMA_DEPOZIT']),
'FIDELITATE' == 5]
pprint(df)
df.to_csv('clienti_leasing.csv')
2. Încărcați într-un df profesia, venitul anual, suma din depozite și suma solicitată
pe fiecare profesie. În df adăugați o nouă coloană pentru a calcula gradul de
îndatorare pe fiecare profesie (suma_solicitata/(venit_anual+suma_depozit)*100).
Reprezentați grafic gradul de îndatorare pe fiecare profesie.
Script:
import pymongo
import pandas as pd
import matplotlib.pyplot as plt
from pprint import pprint
conn = pymongo.MongoClient("mongodb://37.120.249.57:27017/")
db = conn["daune_leasing"]
collection = db["clienti_leasing"]
pipeline=[{'$group' : {
"_id" : "$PROFESIA",
"VENIT_ANUAL": { '$sum': "$VENIT_ANUAL" },
"SUMA_SOLICITATA": { '$sum': "$SUMA_SOLICITATA" },
"SUMA_DEPOZIT": { '$sum': "$SUMA_DEPOZIT" }
}},
{ '$sort': {"_id": 1}}]
cursor = collection.aggregate(pipeline)
df = pd.DataFrame.from_dict(list(cursor))
cursor.close()
df['GRAD_INDATORARE'] =df["SUMA_SOLICITATA"] / (df["VENIT_ANUAL"]
+df[ "SUMA_DEPOZIT"])*100
pprint (df)
Script:
import pymongo
import pandas as pd
import matplotlib.pyplot as plt
from pprint import pprint
conn = pymongo.MongoClient("mongodb://37.120.249.57:27017/")
db = conn["daune_leasing"]
collection = db["clienti_daune"]
pipeline=[{'$group' : {
"_id" : {'MARCA':"$MARCA", 'MODEL':"$MODEL"},
"DAUNA": { '$count': "$DAUNA" },
"VALOARE_DAUNA": { '$sum': "$VALOARE_DAUNA" }
}},
{ '$sort': {"_id": 1}}]
df_plot=df.loc[(df['DAUNA']>100)]
df_plot.plot.bar(x='_id', y='DAUNA', rot=0)
plt.xlabel('MODEL')
plt.ylabel('DAUNA')
plt.show()
Script:
import pymongo
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.cluster import KMeans
from scipy import stats
conn = pymongo.MongoClient("mongodb://37.120.249.57:27017/")
db = conn["daune_leasing"]
collection = db["clienti_daune"]
pipeline=[{'$group' : {
"_id" : "$MARCA",
"PRODUCATOR" : 1,
"AN_FABRICATIE" : 1,
"PRET_MANOPERA" : 1,
"PRET_TOTAL" : 1,
"VALOARE_DAUNA": { '$sum': "$VALOARE_DAUNA" }
}},
{ '$sort': {"_id": 1}}]
cursor = collection.aggregate(pipeline)
df = pd.DataFrame.from_dict(list(cursor))
cursor.close()
df = pd.concat([df.drop(['_id'],axis=1), df['_id'].apply(pd.Series)],axis=1)
df.dropna(inplace=True, axis=0)
labelEncoder = LabelEncoder()
df['PRODUCATOR']= labelEncoder.fit_transform(df['PRODUCATOR'].astype(str))
df['MARCA'] = labelEncoder.fit_transform(df['MARCA'].astype(str))
df.fillna(df.mean(), inplace=True)
df = stats.zscore(df)
kmeans = KMeans(n_clusters=6).fit(df)
y = kmeans.predict(df)
df['CLUSTER'] = y
print (df.groupby(['CLUSTER']).mean())
df.to_csv('CLIENTI_CLUSTERIZATI.csv', encoding='utf-8', index=False)
plt.scatter(df['VALOARE_DAUNA'], y, cmap='rainbow')
plt.show ()
df.groupby(['CLUSTER']).size().plot(kind='pie', autopct='%.1f%%')
plt.axis('equal')
plt.show()
Script:
conn = pymongo.MongoClient("mongodb://37.120.249.57:27017/")
db = conn["daune_leasing"]
collection = db["clienti_leasing"]
pipeline=[{'$group' : {
"_id" : { 'NUME_CLIENT': "$NUME_CLIENT",'PROFESIA':"$PROFESIA",
'VARSTA':"$VARSTA"},
"SUMA_SOLICITATA": { '$sum': "$SUMA_SOLICITATA" }
}},
{ '$sort': {"_id": 1}}]
cursor = collection.aggregate(pipeline)
df = pd.DataFrame.from_dict(list(cursor))
cursor.close()
df = pd.concat([df.drop(['_id'],axis=1), df['_id'].apply(pd.Series)],axis=1)
df_x=df.copy()
df_x=df_x.drop("SUMA_SOLICITATA", axis=1)
labelEncoder = LabelEncoder()
df_x['NUME_CLIENT']= labelEncoder.fit_transform(df_x['NUME_CLIENT'].astype(str))
df_x['PROFESIA'] = labelEncoder.fit_transform(df_x['PROFESIA'].astype(str))
df_x['VARSTA'] = labelEncoder.fit_transform(df_x['VARSTA'].astype(str))
df_x.fillna(df.mean(), inplace=True)
df_y=df['SUMA_SOLICITATA']
k_features = 4
selector = SelectKBest(f_regression, k = k_features)
scaler = StandardScaler()
scaler.fit(df_x)
df_x = scaler.transform(df_x)
lm = linear_model.LinearRegression()
model = lm.fit(df_x,df_y)
y_pred= lm.predict(df_x)
df['SUMA_SOLICITATA_LR'] = y_pred
y_pred= mlp.predict(df_x)
df['SUMA_SOLICITATA_ANN'] = y_pred