import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
#sns.set_style("whitegrid")
sns.set_style("darkgrid")
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
n = 10000
figsize = 9, 8
np.random.seed(1234)
x = 1.25 ** np.random.standard_normal(n)
y = 2 ** np.random.standard_normal(n)
df = pd.DataFrame(dict(x=x, y=y))
ax = df.plot.scatter('x', 'y', figsize=figsize)
There's no kde plot including a scatter plot of the points, so I just create two plots and the kde plot on top of the scatter plot. It looks ok, but it's not quite the same.
ax = (sns.jointplot(x, y, color='k', size=9)
.plot_joint(sns.kdeplot, zorder=1, n_levels=3, shade=True,
shade_lowest=False, alpha=.9))
To get the quartiles the get_quantiles function executes following three steps:
from sklearn.neighbors import KDTree
def get_quantiles(X, k=25, n=4):
kdt = KDTree(X, leaf_size=30, metric='euclidean')
dists = kdt.query(X, k=k, return_distance=True)[0]
densities = dists.mean(axis=1)
return pd.qcut(densities, n, labels=range(n))
Two more steps are needed to be able to create a similar plot:
df['quartile'] = get_quantiles(df[['x', 'y']].values, n=4)
# colors = ['r', 'y', 'g', 'b']
colors = ['#FDE724', '#34B779', '#31688E', '#440154']
df['colors'] = df.quartile.apply(lambda x: colors[x])
df = df.sort_values(by='quartile', ascending=False)
ax = df.plot.scatter('x', 'y', c=df.colors, figsize=figsize, s=40)