Python Data Visualization Cookbook 2.9.2

时间:2023-03-08 22:08:51
 import numpy as np
import matplotlib.pyplot as plt def is_outlier(points, threshold=3.5):
if len(points.shape) == 1:
points = points[:, None] # Find the median number of points
median = np.median(points, axis=0) diff = np.sum((points - median)**2, axis=-1)
diff = np.sqrt(diff)
MAD = np.median(diff) MZS = 0.6745 * diff / MAD return MZS > threshold # Create 100 random numbers
x = np.random.random(100) # The number of the histogram buckets
buckets = 50 # Add in a few outliers
x = np.r_[x, -49, 95, 100, -100] # The function 'is_outlier()' return a array of boolean
# If True, get the element; else pass the element
# For example:
# x = [1,2,3,4]
# y = x[array([False,True,True,False])]
# y is [2,3]
filtered = x[~is_outlier(x)] # Create a new figure
plt.figure() # Define the width of the figure
plt.subplot(211)
# Drawing histogram
# histogram(arr,bins,normed,facecolor,edgecolor,alpha,histtype)
plt.hist(x, buckets)
plt.xlabel('Raw') plt.subplot(212)
plt.hist(filtered, buckets)
plt.xlabel('Cleaned') # Show the figure
plt.show()