New Text Document

#impor required librares
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#read the datafile into dataframe

data = pd.read_csv('WorkOrderData.csv')
#get the basic info of DataFrame

data.info()
"""
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28922 entries, 0 to 28921
Data columns (total 14 columns):
site_id 28922 non-null int64
maintenance_typ 27879 non-null object
category_cd 27345 non-null object
status_cd 28922 non-null object
closed_time_id 28922 non-null int64
Closed_date 25148 non-null object
required_time_id 28922 non-null int64
Required_date 28922 non-null object
planned_end_time_id 28922 non-null int64
Planned_date 13666 non-null object
wo_count 28922 non-null int64
wo_ontime_count 28922 non-null int64
wo_late_count 28922 non-null int64
wo_upcoming_count 28922 non-null int64
dtypes: int64(8), object(6)
memory usage: 3.1+ MB
"""
#drop the empty rows

data.dropna(inplace=True)
#get the first 5 samples of data

data.head()
"""
site_id maintenance_typ category_cd status_cd closed_time_id
Closed_date required_time_id Required_date planned_end_time_id
Planned_date wo_count wo_ontime_count wo_late_count
wo_upcoming_count
0 265 BREAKDOWN REPAIR Asset Maintenance Orders COMPLETED 20170206
00:00.0 20170207 00:00.0 20170206 00:00.0 1 1 0
0
1 265 BREAKDOWN REPAIR Asset Maintenance Orders COMPLETED 20170206
00:00.0 20170207 00:00.0 20170206 00:00.0 1 1 0
0
2 334 PREDICTIVE TASK Asset Maintenance Orders CANCELLED 20170406
00:00.0 20170413 00:00.0 20170406 00:00.0 1 0 1
0
3 334 PREDICTIVE TASK Asset Maintenance Orders COMPLETED 20170408
00:00.0 20170413 00:00.0 20170406 00:00.0 1 1 0
0
4 324 CORRECTIVE TASK Asset Maintenance Orders COMPLETED 20170408
00:00.0 20170413 00:00.0 20170408 00:00.0 1 1 0
0
"""
#conert str-dates to timestamp columns

def conv_dt(clm):
"""
function to convert the date formates into timestam
which represents date in 'yyyy-mm-dd' format.
"""
return pd.to_datetime(clm,format='%Y%m%d')
#conert date columns into timestamp columns

data['closed_time_id'] = conv_dt(data['closed_time_id'])
data['required_time_id'] = conv_dt(data['required_time_id'])
data['planned_end_time_id'] = conv_dt(data['planned_end_time_id'])
data.shape #(12093, 14)
#perform predictions on tasks occured in year 2017

data_year = data[data['closed_time_id'].dt.year == 2017]
data_year.shape #(11944, 14)
#total no of samples whos status is 'completed'

data['status_cd'].value_counts()
"""
COMPLETED 8727
CLOSED 3208
CANCELLED 158
Name: status_cd, dtype: int64
"""
"""
get the samples whos maintenance is completed
so that we can predict the date of complition of maintenance only for completed
sites
"""
data_completed = data_year[data_year['status_cd']=='COMPLETED']
data_completed.shape #(8614, 14)
data_completed['closed_day']=data_completed['closed_time_id'].map(lambda x:
x.dayofyear)
data_completed['required_time(day)']=data_completed['required_time_id'].map(lambda
x: x.dayofyear)
data_completed['planned_time(day)']=data_completed['planned_end_time_id'].map(lambd
a x: x.dayofyear)
#errors can be neglected

#get the new data for training the model and testing the data
data_new = data_completed[['site_id', 'maintenance_typ', 'category_cd',

'status_cd',
'wo_count', 'wo_ontime_count', 'wo_late_count',
'wo_upcoming_count',
'closed_day', 'required_time(day)',
'planned_time(day)']].copy()
data_new.shape #(8614, 11)
data_new['site_id'].value_counts().count() #40
data_new['category_cd'].value_counts()
"""
Asset Maintenance Orders 5161
Sanitation Work Orders 1609
Integrated Pest Management 802
Safety Task Work Orders 385
Final Product Zone Order 365
Autonomous Maintenance 147
Food Safety 130
Pest Management Work Order 5
Environmental Task Work Orders 4
Capital Work Orders 4
COR - Corrective Maintenance Work Orders 2
Name: category_cd, dtype: int64
"""
#import the preprocessing and lableencoder

#to perform the preprocessing and lable encoding to the string columns
from sklearn import preprocessing

from sklearn.preprocessing import LabelEncoder
#get the linear regression model

from sklearn.linear_model import LinearRegression
#encode the string columns

le_color = LabelEncoder()
le_make = LabelEncoder()
data_new['maintenance_typ'] = le_color.fit_transform(data_new.maintenance_typ)
data_new['category_cd'] = le_make.fit_transform(data_new.category_cd)
X = data_new[['site_id', 'maintenance_typ', 'category_cd',

'wo_count','wo_ontime_count','wo_late_count','wo_upcoming_count',
'required_time(day)','planned_time(day)']]
y= data_new[ 'closed_day' ]
lm = LinearRegression()
#split the dat into traing and testing sets

from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=101)
#fit the data for training the model

lm.fit(X_train,y_train)
#predict the date in form of DayOfYear

"""
it can be converted back to yyyy-mm-dd format
"""
pred_train = lm.predict(X_train)
pred_test = lm.predict(X_test)
"""
pred_train
array([109.07451724, 43.69171545, 54.58952948, ..., 46.84330053,
52.12862088, 57.95741627])
pred_test
array([ 73.39881804, 95.87136349, 82.24623855, ..., 90.58016066,
102.27159995, 98.12409611])
"""

New Text Document

Încărcat de

Informații document

Drepturi de autor

Formate disponibile

Partajați acest document

Partajați sau inserați document

Opțiuni de partajare

Vi se pare util acest document?

Este necorespunzător acest conținut?

Drepturi de autor:

Formate disponibile

New Text Document

Încărcat de

Drepturi de autor:

Formate disponibile

#impor required librares

#read the datafile into dataframe

#get the basic info of DataFrame

#drop the empty rows

#get the first 5 samples of data

#conert str-dates to timestamp columns

#conert date columns into timestamp columns

data.shape #(12093, 14)

#perform predictions on tasks occured in year 2017

data_year.shape #(11944, 14)

#total no of samples whos status is 'completed'

data_completed.shape #(8614, 14)

#errors can be neglected

data_new = data_completed[['site_id', 'maintenance_typ', 'category_cd',

data_new.shape #(8614, 11)

#import the preprocessing and lableencoder

from sklearn import preprocessing

#get the linear regression model

#encode the string columns

X = data_new[['site_id', 'maintenance_typ', 'category_cd',

#split the dat into traing and testing sets

#fit the data for training the model

#predict the date in form of DayOfYear

S-ar putea să vă placă și