基于主成分分析和马氏距离的车辆状态异常检测(Python,jupyter notebook文件)

作品简介

部分代码如下(jupyter notebook)

Mahalanobis distance (multiple sensors) results on train & two test sets (normal & faulty vehicle) for smooth road surface

import pandas as pd
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import os
import math
import csv

Import and read train (subset of Feb - normal vehicle) set

data_vib1 = pd.read_csv("Input_Data_Files/SmoothDataTrain.csv", index_col=0)
data_vib1

data_vib1.describe()

plt.style.use('fivethirtyeight')
data_vib1.plot(figsize=(20,18), subplots=True)

Time domain feature extraction (7 features) function

def time_domain_features(test1,w,overlap):
    columns = test1.columns
    for c in range (len(columns)):
        for i in range(0,test1.shape[0]-(w-1),overlap):
            test1.loc[test1.index[i+w-1],'mean_'+str(c)] = test1.iloc[i:i+w,c].mean()
            test1.loc[test1.index[i+w-1],'var_'+str(c)] = test1.iloc[i:i+w,c].var()
            test1.loc[test1.index[i+w-1],'skew_'+str(c)] = test1.iloc[i:i+w,c].skew()
            test1.loc[test1.index[i+w-1],'kurtosis_'+str(c)] = test1.iloc[i:i+w,c].kurtosis()
            test1.loc[test1.index[i+w-1],'peak_'+str(c)] = np.max(np.abs(test1.iloc[i:i+w,c]))
            test1.loc[test1.index[i+w-1],'rms_'+str(c)] = np.sqrt(np.mean(np.square(test1.iloc[i:i+w,c])))
            test1.loc[test1.index[i+w-1],'crest_'+str(c)] = (np.max(np.abs(test1.iloc[i:i+w,c])))/(np.sqrt(np.mean(np.square(test1.iloc[i:i+w,c]))))   
    return (test1)

Feature extraction (7 features for each sensor) of train set (total 35 features)

data_vib2 = time_domain_features(data_vib1,2000,250)
data_vib2 = data_vib2.dropna()
data_vib2

plt.style.use('fivethirtyeight')
data_vib = data_vib2.iloc[:,5:40]
print(data_vib.head())
data_vib.plot(figsize=(20,15))
plt.show()

Feature selection of train set using PCA (reduced to 10 features)

plt.style.use('fivethirtyeight')
from sklearn.decomposition import PCA
​
#find explained variance and n_components
pca = PCA()
fit = pca.fit(data_vib)
plt.figure(figsize=(15, 10))
plt.plot(pca.explained_variance_, linewidth=2)
plt.axis('tight')
plt.xlabel('N_Components')
plt.ylabel('Explained_Variance')
plt.title('PCA Explained Variance vs. N Componets')

data_vib.plot(figsize=(20,15))
data_vib.hist(bins=50, figsize=(18,16))
plt.show()​

plt.style.use('fivethirtyeight')
ax = data.plot(figsize=(20,18), subplots=False)
ax.set_xlabel('Datapoints', fontsize=16)
ax.set_ylabel('Vibration Sensor Values', fontsize=16)
ax.set_title('Raw Vibration Signal for all Sensors', fontsize=16)
plt.show()

data_new = data_test1_new.iloc[:,5:40]
data_new = pd.concat([data_test_new.loc[:,'DateTime'], data_new], axis=1 )
data_new = data_new.dropna()
print(data_new.shape)
print(data_new.head())
ax=data_new.plot(figsize=(15,12))
ax.set_xlabel('Datapoints', fontsize=16)
ax.set_ylabel('Feature Values', fontsize=16)
ax.set_title('Features Extracted from Raw Vibration Signal for all Sensors', fontsize=16)
plt.show()

ax=data_new.plot(figsize=(15,12))
ax.set_xlabel('Datapoints', fontsize=16)
ax.set_ylabel('Feature Values', fontsize=16)
ax.set_title('
Features 
Space
after
 transformation 
with
 PCA
', fontsize=16)
plt.show()

plt.style.use('fivethirtyeight')
ax = df_x['mahala'].plot(figsize=(20,15), marker='o', markersize=10, markerfacecolor='black')
ax.set_xlabel('Datapoints', fontsize=20)
ax.set_ylabel('Mahalanobis Distance', fontsize=20)
ax.set_title('Mahalanobis Distance for Multivariate Data with PCA (All Sensors)', fontsize=20)
ax.legend(fontsize=20)
plt.axhline(45, color='k', linestyle='--')
ax.legend(("Test1 (Feb)", "Anomaly Threshold"), fontsize=20)
plt.tight_layout()
​
df_x.hist(bins=50, figsize=(18,16))
plt.show()

plt.style.use('fivethirtyeight')
ax = Maha_both.plot(figsize=(10,8), marker='o', markersize=10, markerfacecolor='black')
ax.set_xlabel('Datapoints', fontsize=20)
ax.set_ylabel('Mahalanobis Distance', fontsize=20)
ax.set_title('MD Values for Testing Data Over Smooth Surface (With PCA)', fontsize=20)
ax.legend(fontsize=20)
plt.axhline(40, color='blue', linestyle='--')
plt.axvline(46040, color='black', linestyle='--')
ax.legend(("Test1 (Feb)", "Test2 (March)", "Anomaly Threshold", "Onset of Failure"), fontsize=20)
plt.tight_layout()

注意:

1.所有代码均经过运行测试,没有问题。 

2.拍前请仔细阅读作品简介,这非常重要,因为涉及到不同的编程语言(Python或matlab)。

3.程序为特殊商品,经售出不退,有问题请及时联系。

4.建议有一定Python或Matlab基础的同学或工程师购买。

5.该代码不讲解哦。

创作时间: