用R或Matlab进行双变量分布的三维图。

时间:2021-08-27 01:16:58

i would like to know if someone could tell me how you plot something similar to this 用R或Matlab进行双变量分布的三维图。 with histograms of the sample generates from the code below under the two curves. Using R or Matlab but preferably R.

我想知道是否有人能告诉我,你是如何画出类似于这个的图的,从下面两条曲线下的代码生成的样本的直方图。使用R或Matlab,但最好是R。

# bivariate normal with a gibbs sampler...

gibbs<-function (n, rho) 
{
  mat <- matrix(ncol = 2, nrow = n)
  x <- 0
  y <- 0
  mat[1, ] <- c(x, y)
  for (i in 2:n) {
    x <- rnorm(1, rho * y, (1 - rho^2))
    y <- rnorm(1, rho * x,(1 - rho^2))
    mat[i, ] <- c(x, y)
  }
  mat
}



bvn<-gibbs(10000,0.98)
par(mfrow=c(3,2))
plot(bvn,col=1:10000,main="bivariate normal distribution",xlab="X",ylab="Y")
plot(bvn,type="l",main="bivariate normal distribution",xlab="X",ylab="Y")

hist(bvn[,1],40,main="bivariate normal distribution",xlab="X",ylab="")
hist(bvn[,2],40,main="bivariate normal distribution",xlab="Y",ylab="")
par(mfrow=c(1,1))`

Thanks in advance

谢谢提前

Best regards,

最好的问候,

JC T.

JC T。

6 个解决方案

#1


10  

You could do it in Matlab programmatically.

你可以用Matlab编程。

This is the result:

这是由于:

用R或Matlab进行双变量分布的三维图。

Code:

代码:

% Generate some data.
data = randn(10000, 2);

% Scale and rotate the data (for demonstration purposes).
data(:,1) = data(:,1) * 2;
theta = deg2rad(130);
data = ([cos(theta) -sin(theta); sin(theta) cos(theta)] * data')';

% Get some info.
m = mean(data);
s = std(data);
axisMin = m - 4 * s;
axisMax = m + 4 * s;

% Plot data points on (X=data(x), Y=data(y), Z=0)
plot3(data(:,1), data(:,2), zeros(size(data,1),1), 'k.', 'MarkerSize', 1);

% Turn on hold to allow subsequent plots.
hold on

% Plot the ellipse using Eigenvectors and Eigenvalues.
data_zeroMean = bsxfun(@minus, data, m);
[V,D] = eig(data_zeroMean' * data_zeroMean / (size(data_zeroMean, 1)));
[D, order] = sort(diag(D), 'descend');
D = diag(D);
V = V(:, order);
V = V * sqrt(D);
t = linspace(0, 2 * pi);
e = bsxfun(@plus, 2*V * [cos(t); sin(t)], m');
plot3(...
    e(1,:), e(2,:), ...
    zeros(1, nPointsEllipse), 'g-', 'LineWidth', 2);

maxP = 0;
for side = 1:2
    % Calculate the histogram.
    p = [0 hist(data(:,side), 20) 0];
    p = p / sum(p);
    maxP = max([maxP p]);
    dx = (axisMax(side) - axisMin(side)) / numel(p) / 2.3;
    p2 = [zeros(1,numel(p)); p; p; zeros(1,numel(p))]; p2 = p2(:);
    x = linspace(axisMin(side), axisMax(side), numel(p));
    x2 = [x-dx; x-dx; x+dx; x+dx]; x2 = max(min(x2(:), axisMax(side)), axisMin(side));

    % Calculate the curve.
    nPtsCurve = numel(p) * 10;
    xx = linspace(axisMin(side), axisMax(side), nPtsCurve);

    % Plot the curve and the histogram.
    if side == 1
        plot3(xx, ones(1, nPtsCurve) * axisMax(3 - side), spline(x,p,xx), 'r-', 'LineWidth', 2);
        plot3(x2, ones(numel(p2), 1) * axisMax(3 - side), p2, 'k-', 'LineWidth', 1);
    else
        plot3(ones(1, nPtsCurve) * axisMax(3 - side), xx, spline(x,p,xx), 'b-', 'LineWidth', 2);
        plot3(ones(numel(p2), 1) * axisMax(3 - side), x2, p2, 'k-', 'LineWidth', 1);
    end

end

% Turn off hold.
hold off

% Axis labels.
xlabel('x');
ylabel('y');
zlabel('p(.)');

axis([axisMin(1) axisMax(1) axisMin(2) axisMax(2) 0 maxP * 1.05]);
grid on;

#2


8  

I must admit, I took this on as a challenge because I was looking for different ways to show other datasets. I have normally done something along the lines of the scatterhist 2D graphs shown in other answers, but I've wanted to try my hand at rgl for a while.

我必须承认,我认为这是一个挑战,因为我正在寻找不同的方法来展示其他数据集。我通常会做一些类似于其他答案的scatterhist 2D图,但我想尝试一下rgl。

I use your function to generate the data

我使用您的函数来生成数据。

gibbs<-function (n, rho) {
    mat <- matrix(ncol = 2, nrow = n)
    x <- 0
    y <- 0
    mat[1, ] <- c(x, y)
    for (i in 2:n) {
        x <- rnorm(1, rho * y, (1 - rho^2))
        y <- rnorm(1, rho * x, (1 - rho^2))
        mat[i, ] <- c(x, y)
    }
    mat
}
bvn <- gibbs(10000, 0.98)

Setup

I use rgl for the hard lifting, but I didn't know how to get the confidence ellipse without going to car. I'm guessing there are other ways to attack this.

我使用rgl来进行硬举,但我不知道如何得到信心椭圆而不去汽车。我猜还有其他的方法来攻击这个。

library(rgl) # plot3d, quads3d, lines3d, grid3d, par3d, axes3d, box3d, mtext3d
library(car) # dataEllipse

Process the data

Getting the histogram data without plotting it, I then extract the densities and normalize them into probabilities. The *max variables are to simplify future plotting.

得到直方图数据而不进行绘图,然后提取密度并将其规范化为概率。最大的变量是简化未来的绘图。

hx <- hist(bvn[,2], plot=FALSE)
hxs <- hx$density / sum(hx$density)
hy <- hist(bvn[,1], plot=FALSE)
hys <- hy$density / sum(hy$density)

## [xy]max: so that there's no overlap in the adjoining corner
xmax <- tail(hx$breaks, n=1) + diff(tail(hx$breaks, n=2))
ymax <- tail(hy$breaks, n=1) + diff(tail(hy$breaks, n=2))
zmax <- max(hxs, hys)

Basic scatterplot on the floor

The scale should be set to whatever is appropriate based on the distributions. Admittedly, the X and Y labels aren't placed beautifully, but that shouldn't be too hard to reposition based on the data.

应该根据发行版设置适当的规模。不可否认的是,X和Y的标签并不是很漂亮,但是根据这些数据重新定位并不难。

## the base scatterplot
plot3d(bvn[,2], bvn[,1], 0, zlim=c(0, zmax), pch='.',
       xlab='X', ylab='Y', zlab='', axes=FALSE)
par3d(scale=c(1,1,3))

Histograms on the back walls

I couldn't figure out how to get them automatically plotted on a plane in the overall 3D render, so I had to make each rect manually.

我不知道如何让它们在整个3D渲染的平面上自动绘制出来,所以我必须手工制作每一个矩形。

## manually create each histogram
for (ii in seq_along(hx$counts)) {
    quads3d(hx$breaks[ii]*c(.9,.9,.1,.1) + hx$breaks[ii+1]*c(.1,.1,.9,.9),
            rep(ymax, 4),
            hxs[ii]*c(0,1,1,0), color='gray80')
}
for (ii in seq_along(hy$counts)) {
    quads3d(rep(xmax, 4),
            hy$breaks[ii]*c(.9,.9,.1,.1) + hy$breaks[ii+1]*c(.1,.1,.9,.9),
            hys[ii]*c(0,1,1,0), color='gray80')
}

Summary Lines

## I use these to ensure the lines are plotted "in front of" the
## respective dot/hist
bb <- par3d('bbox')
inset <- 0.02 # percent off of the floor/wall for lines
x1 <- bb[1] + (1-inset)*diff(bb[1:2])
y1 <- bb[3] + (1-inset)*diff(bb[3:4])
z1 <- bb[5] + inset*diff(bb[5:6])

## even with draw=FALSE, dataEllipse still pops up a dev, so I create
## a dummy dev and destroy it ... better way to do this?
dev.new()
de <- dataEllipse(bvn[,1], bvn[,2], draw=FALSE, levels=0.95)
dev.off()

## the ellipse
lines3d(de[,2], de[,1], z1, color='green', lwd=3)

## the two density curves, probability-style
denx <- density(bvn[,2])
lines3d(denx$x, rep(y1, length(denx$x)), denx$y / sum(hx$density), col='red', lwd=3)
deny <- density(bvn[,1])
lines3d(rep(x1, length(deny$x)), deny$x, deny$y / sum(hy$density), col='blue', lwd=3)

Beautifications

grid3d(c('x+', 'y+', 'z-'), n=10)
box3d()
axes3d(edges=c('x-', 'y-', 'z+'))
outset <- 1.2 # place text outside of bbox *this* percentage
mtext3d('P(X)', edge='x+', pos=c(0, ymax, outset * zmax))
mtext3d('P(Y)', edge='y+', pos=c(xmax, 0, outset * zmax))

Final Product

One bonus of using rgl is that you can spin it around with your mouse and find the best perspective. Lacking making an animation for this SO page, doing all of the above should allow you the play-time. (If you spin it, you'll be able to see that the lines are slightly in front of the histograms and slightly above the scatterplot; otherwise I found intersections, so it looked noncontinuous at places.)

使用rgl的一个好处是,你可以用鼠标旋转它,找到最好的视角。如果没有为这个页面做动画,那么做以上所有的事情都可以让你在游戏的时间。(如果你旋转它,你就能看到这些线在直方图前面稍微有点,略高于散点图;否则,我发现了十字路口,所以它在一些地方看起来是不连续的。

用R或Matlab进行双变量分布的三维图。

In the end, I find this a bit distracting (the 2D variants sufficed): showing the z-axis implies that there is a third dimension to the data; Tufte specifically discourages this behavior (Tufte, "Envisioning Information," 1990). However, with higher dimensionality, this technique of using RGL will allow significant perspective on patterns.

最后,我发现这有点让人分心(2D变体已经足够了):显示z轴意味着数据有第三个维度;Tufte特别不鼓励这种行为(Tufte,“想象信息”,1990)。然而,随着维度的提高,这种使用RGL的技术将允许对模式进行重要的透视。

(For the record, Win7 x64, tested with R-3.0.3 in 32-bit and 64-bit, rgl v0.93.996, car v2.0-19.)

(对于记录,Win7 x64,在32位和64位的R-3.0.3测试,rgl v0.93.996, car v2.0-19。)

#3


6  

Create the dataframe with bvn <- as.data.frame(gibbs(10000,0.98)). Several 2d solutions in R:

使用bvn <- as.data.frame(gibbs(10000,0.98))创建dataframe。R中的几个二维解:


1: A quick & dirty solution with the psych package:

1:快速、肮脏的心理治疗方案:

library(psych)
scatter.hist(x=bvn$V1, y=bvn$V2, density=TRUE, ellipse=TRUE)

which results in:

结果:

用R或Matlab进行双变量分布的三维图。


2: A nice & pretty solution with ggplot2:

2:ggplot2的一个漂亮的解决方案:

library(ggplot2)
library(gridExtra)
library(devtools)
source_url("https://raw.github.com/low-decarie/FAAV/master/r/stat-ellipse.R") # needed to create the 95% confidence ellipse

htop <- ggplot(data=bvn, aes(x=V1)) + 
  geom_histogram(aes(y=..density..), fill = "white", color = "black", binwidth = 2) + 
  stat_density(colour = "blue", geom="line", size = 1.5, position="identity", show_guide=FALSE) +
  scale_x_continuous("V1", limits = c(-40,40), breaks = c(-40,-20,0,20,40)) + 
  scale_y_continuous("Count", breaks=c(0.0,0.01,0.02,0.03,0.04), labels=c(0,100,200,300,400)) + 
  theme_bw() + theme(axis.title.x = element_blank())

blank <- ggplot() + geom_point(aes(1,1), colour="white") +
  theme(axis.ticks=element_blank(), panel.background=element_blank(), panel.grid=element_blank(),
        axis.text.x=element_blank(), axis.text.y=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank())

scatter <- ggplot(data=bvn, aes(x=V1, y=V2)) + 
  geom_point(size = 0.6) + stat_ellipse(level = 0.95, size = 1, color="green") +
  scale_x_continuous("label V1", limits = c(-40,40), breaks = c(-40,-20,0,20,40)) + 
  scale_y_continuous("label V2", limits = c(-20,20), breaks = c(-20,-10,0,10,20)) + 
  theme_bw()

hright <- ggplot(data=bvn, aes(x=V2)) + 
  geom_histogram(aes(y=..density..), fill = "white", color = "black", binwidth = 1) + 
  stat_density(colour = "red", geom="line", size = 1, position="identity", show_guide=FALSE) +
  scale_x_continuous("V2", limits = c(-20,20), breaks = c(-20,-10,0,10,20)) + 
  scale_y_continuous("Count", breaks=c(0.0,0.02,0.04,0.06,0.08), labels=c(0,200,400,600,800)) + 
  coord_flip() + theme_bw() + theme(axis.title.y = element_blank())

grid.arrange(htop, blank, scatter, hright, ncol=2, nrow=2, widths=c(4, 1), heights=c(1, 4))

which results in:

结果:

用R或Matlab进行双变量分布的三维图。


3: A compact solution with ggplot2:

3:与ggplot2的紧凑解决方案:

library(ggplot2)
library(devtools)
source_url("https://raw.github.com/low-decarie/FAAV/master/r/stat-ellipse.R") # needed to create the 95% confidence ellipse

ggplot(data=bvn, aes(x=V1, y=V2)) + 
  geom_point(size = 0.6) + 
  geom_rug(sides="t", size=0.05, col=rgb(.8,0,0,alpha=.3)) + 
  geom_rug(sides="r", size=0.05, col=rgb(0,0,.8,alpha=.3)) + 
  stat_ellipse(level = 0.95, size = 1, color="green") +
  scale_x_continuous("label V1", limits = c(-40,40), breaks = c(-40,-20,0,20,40)) + 
  scale_y_continuous("label V2", limits = c(-20,20), breaks = c(-20,-10,0,10,20)) + 
  theme_bw()

which results in:

结果:

用R或Matlab进行双变量分布的三维图。

#4


2  

Matlab's implementation is called scatterhist and requires the Statistics Toolbox. Unfortunately it is not 3D, it is an extended 2D plot.

Matlab的实现被称为scatterhist,需要统计工具箱。不幸的是,它不是3D,而是一个扩展的2D图形。

% some example data
x = randn(1000,1);
y = randn(1000,1);

h = scatterhist(x,y,'Location','SouthEast',...
                'Direction','out',...
                'Color','k',...
                'Marker','o',...
                'MarkerSize',4);

legend('data')
legend boxoff
grid on

用R或Matlab进行双变量分布的三维图。

It also allows grouping of datasets:

它还允许对数据集进行分组:

load fisheriris.mat;
x = meas(:,1);        %// x-data
y = meas(:,2);        %// y-data
gnames = species;     %// assigning of names to certain elements of x and y


scatterhist(x,y,'Group',gnames,'Location','SouthEast',...
            'Direction','out',...
            'Color','kbr',...
            'LineStyle',{'-','-.',':'},...
            'LineWidth',[2,2,2],...
            'Marker','+od',...
            'MarkerSize',[4,5,6]);

用R或Matlab进行双变量分布的三维图。

#5


2  

R Implementation

R实现

Load library "car". We use only dataEllipse function to draw ellipse based on the percent of data (0.95 means 95% data falls within the ellipse).

装载库“汽车”。我们只使用dataEllipse函数来根据数据的百分比来绘制椭圆(0.95意味着95%的数据属于椭圆)。

library("car")

gibbs<-function (n, rho) 
 {
   mat <- matrix(ncol = 2, nrow = n)
   x <- 0
   y <- 0
   mat[1, ] <- c(x, y)
   for (i in 2:n) {
   x <- rnorm(1, rho * y, (1 - rho^2))
   y <- rnorm(1, rho * x,(1 - rho^2))
   mat[i, ] <- c(x, y)
   }
   mat
 }

bvn<-gibbs(10000,0.98)

Open a PDF Device:

打开一个PDF装置:

OUTFILE <- "bivar_dist.pdf"

pdf(OUTFILE)

Set up the layout first

首先设置布局。

layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), widths=c(3,1), heights=c(1,3), TRUE)

Make Scatterplot

做散点图

par(mar=c(5.1,4.1,0.1,0))

The commented lines can be used to plot a scatter diagram without "car" package from where we use dataEllipse function

注释行可以用来绘制没有“car”包的散点图,我们使用dataEllipse函数。

# plot(bvn[,2], bvn[,1], 
#      pch=".",cex = 1, col=1:length(bvn[,2]),
#      xlim=c(-0.6, 0.6),
#      ylim=c(-0.6,0.6),
#      xlab="X",
#      ylab="Y")
# 
# grid(NULL, NULL, lwd = 2)


dataEllipse(bvn[,2], bvn[,1],
        levels = c(0.95),
        pch=".",
        col=1:length(bvn[,2]),
        xlim=c(-0.6, 0.6),
        ylim=c(-0.6,0.6),
        xlab="X",
        ylab="Y",
        center.cex = 1
        )

Plot histogram of X variable in the top row

第一行的X变量的图直方图。

     par(mar=c(0,4.1,3,0))

     hist(bvn[,2],
          ann=FALSE,axes=FALSE,
          col="light blue",border="black",
          )
     title(main = "Bivariate Normal Distribution")

Plot histogram of Y variable to the right of the scatterplot

将Y变量的直方图绘制到scatterplot的右侧。

     yhist <- hist(bvn[,1],
                   plot=FALSE
                  )

     par(mar=c(5.1,0,0.1,1))

     barplot(yhist$density,
             horiz=TRUE,
             space=0,
             axes=FALSE,
             col="light blue",
             border="black"
             )

 dev.off(which = dev.cur())

用R或Matlab进行双变量分布的三维图。

用R或Matlab进行双变量分布的三维图。

      dataEllipse(bvn[,2], bvn[,1],
                  levels = c(0.5, 0.95),
                  pch=".",
                  col= 1:length(bvn[,2]),
                  xlim=c(-0.6, 0.6),
                  ylim=c(-0.6,0.6),
                  xlab="X",
                  ylab="Y",
                  center.cex = 1
                 )

#6


0  

I took @jaap's code above and turned it into a slightly more generalized function. The code can be sourced here. Note: I am not adding anything new to @jaap's code, just a few minor changes and wrapped it in a function. Hopefully it is helpful.

我把@jaap的代码放在上面,把它变成一个稍微一般化的函数。代码可以从这里获取。注意:我没有在@jaap的代码中添加任何新内容,只是进行了一些细微的修改,并将其封装在一个函数中。希望是有帮助的。

density.hist <- function(df, x=NULL, y=NULL) {

require(ggplot2)
require(gridExtra)
require(devtools)

htop <- ggplot(data=df, aes_string(x=x)) + 
  geom_histogram(aes(y=..density..), fill = "white", color = "black", bins=100) + 
  stat_density(colour = "blue", geom="line", size = 1, position="identity", show.legend=FALSE) +
  theme_bw() + theme(axis.title.x = element_blank())

blank <- ggplot() + geom_point(aes(1,1), colour="white") +
  theme(axis.ticks=element_blank(), panel.background=element_blank(), panel.grid=element_blank(),
  axis.text.x=element_blank(), axis.text.y=element_blank(), axis.title.x=element_blank(), 
  axis.title.y=element_blank())

scatter <- ggplot(data=df, aes_string(x=x, y=y)) + 
  geom_point(size = 0.6) + stat_ellipse(type = "norm", linetype = 2, color="green",size=1) +
  stat_ellipse(type = "t",color="green",size=1) +
  theme_bw() + labs(x=x, y=y)

hright <- ggplot(data=df, aes_string(x=x)) + 
  geom_histogram(aes(y=..density..), fill = "white", color = "black", bins=100) + 
  stat_density(colour = "red", geom="line", size = 1, position="identity", show.legend=FALSE) +
  coord_flip() + theme_bw() + theme(axis.title.y = element_blank())

grid.arrange(htop, blank, scatter, hright, ncol=2, nrow=2, widths=c(4, 1), heights=c(1, 4))

}

用R或Matlab进行双变量分布的三维图。

#1


10  

You could do it in Matlab programmatically.

你可以用Matlab编程。

This is the result:

这是由于:

用R或Matlab进行双变量分布的三维图。

Code:

代码:

% Generate some data.
data = randn(10000, 2);

% Scale and rotate the data (for demonstration purposes).
data(:,1) = data(:,1) * 2;
theta = deg2rad(130);
data = ([cos(theta) -sin(theta); sin(theta) cos(theta)] * data')';

% Get some info.
m = mean(data);
s = std(data);
axisMin = m - 4 * s;
axisMax = m + 4 * s;

% Plot data points on (X=data(x), Y=data(y), Z=0)
plot3(data(:,1), data(:,2), zeros(size(data,1),1), 'k.', 'MarkerSize', 1);

% Turn on hold to allow subsequent plots.
hold on

% Plot the ellipse using Eigenvectors and Eigenvalues.
data_zeroMean = bsxfun(@minus, data, m);
[V,D] = eig(data_zeroMean' * data_zeroMean / (size(data_zeroMean, 1)));
[D, order] = sort(diag(D), 'descend');
D = diag(D);
V = V(:, order);
V = V * sqrt(D);
t = linspace(0, 2 * pi);
e = bsxfun(@plus, 2*V * [cos(t); sin(t)], m');
plot3(...
    e(1,:), e(2,:), ...
    zeros(1, nPointsEllipse), 'g-', 'LineWidth', 2);

maxP = 0;
for side = 1:2
    % Calculate the histogram.
    p = [0 hist(data(:,side), 20) 0];
    p = p / sum(p);
    maxP = max([maxP p]);
    dx = (axisMax(side) - axisMin(side)) / numel(p) / 2.3;
    p2 = [zeros(1,numel(p)); p; p; zeros(1,numel(p))]; p2 = p2(:);
    x = linspace(axisMin(side), axisMax(side), numel(p));
    x2 = [x-dx; x-dx; x+dx; x+dx]; x2 = max(min(x2(:), axisMax(side)), axisMin(side));

    % Calculate the curve.
    nPtsCurve = numel(p) * 10;
    xx = linspace(axisMin(side), axisMax(side), nPtsCurve);

    % Plot the curve and the histogram.
    if side == 1
        plot3(xx, ones(1, nPtsCurve) * axisMax(3 - side), spline(x,p,xx), 'r-', 'LineWidth', 2);
        plot3(x2, ones(numel(p2), 1) * axisMax(3 - side), p2, 'k-', 'LineWidth', 1);
    else
        plot3(ones(1, nPtsCurve) * axisMax(3 - side), xx, spline(x,p,xx), 'b-', 'LineWidth', 2);
        plot3(ones(numel(p2), 1) * axisMax(3 - side), x2, p2, 'k-', 'LineWidth', 1);
    end

end

% Turn off hold.
hold off

% Axis labels.
xlabel('x');
ylabel('y');
zlabel('p(.)');

axis([axisMin(1) axisMax(1) axisMin(2) axisMax(2) 0 maxP * 1.05]);
grid on;

#2


8  

I must admit, I took this on as a challenge because I was looking for different ways to show other datasets. I have normally done something along the lines of the scatterhist 2D graphs shown in other answers, but I've wanted to try my hand at rgl for a while.

我必须承认,我认为这是一个挑战,因为我正在寻找不同的方法来展示其他数据集。我通常会做一些类似于其他答案的scatterhist 2D图,但我想尝试一下rgl。

I use your function to generate the data

我使用您的函数来生成数据。

gibbs<-function (n, rho) {
    mat <- matrix(ncol = 2, nrow = n)
    x <- 0
    y <- 0
    mat[1, ] <- c(x, y)
    for (i in 2:n) {
        x <- rnorm(1, rho * y, (1 - rho^2))
        y <- rnorm(1, rho * x, (1 - rho^2))
        mat[i, ] <- c(x, y)
    }
    mat
}
bvn <- gibbs(10000, 0.98)

Setup

I use rgl for the hard lifting, but I didn't know how to get the confidence ellipse without going to car. I'm guessing there are other ways to attack this.

我使用rgl来进行硬举,但我不知道如何得到信心椭圆而不去汽车。我猜还有其他的方法来攻击这个。

library(rgl) # plot3d, quads3d, lines3d, grid3d, par3d, axes3d, box3d, mtext3d
library(car) # dataEllipse

Process the data

Getting the histogram data without plotting it, I then extract the densities and normalize them into probabilities. The *max variables are to simplify future plotting.

得到直方图数据而不进行绘图,然后提取密度并将其规范化为概率。最大的变量是简化未来的绘图。

hx <- hist(bvn[,2], plot=FALSE)
hxs <- hx$density / sum(hx$density)
hy <- hist(bvn[,1], plot=FALSE)
hys <- hy$density / sum(hy$density)

## [xy]max: so that there's no overlap in the adjoining corner
xmax <- tail(hx$breaks, n=1) + diff(tail(hx$breaks, n=2))
ymax <- tail(hy$breaks, n=1) + diff(tail(hy$breaks, n=2))
zmax <- max(hxs, hys)

Basic scatterplot on the floor

The scale should be set to whatever is appropriate based on the distributions. Admittedly, the X and Y labels aren't placed beautifully, but that shouldn't be too hard to reposition based on the data.

应该根据发行版设置适当的规模。不可否认的是,X和Y的标签并不是很漂亮,但是根据这些数据重新定位并不难。

## the base scatterplot
plot3d(bvn[,2], bvn[,1], 0, zlim=c(0, zmax), pch='.',
       xlab='X', ylab='Y', zlab='', axes=FALSE)
par3d(scale=c(1,1,3))

Histograms on the back walls

I couldn't figure out how to get them automatically plotted on a plane in the overall 3D render, so I had to make each rect manually.

我不知道如何让它们在整个3D渲染的平面上自动绘制出来,所以我必须手工制作每一个矩形。

## manually create each histogram
for (ii in seq_along(hx$counts)) {
    quads3d(hx$breaks[ii]*c(.9,.9,.1,.1) + hx$breaks[ii+1]*c(.1,.1,.9,.9),
            rep(ymax, 4),
            hxs[ii]*c(0,1,1,0), color='gray80')
}
for (ii in seq_along(hy$counts)) {
    quads3d(rep(xmax, 4),
            hy$breaks[ii]*c(.9,.9,.1,.1) + hy$breaks[ii+1]*c(.1,.1,.9,.9),
            hys[ii]*c(0,1,1,0), color='gray80')
}

Summary Lines

## I use these to ensure the lines are plotted "in front of" the
## respective dot/hist
bb <- par3d('bbox')
inset <- 0.02 # percent off of the floor/wall for lines
x1 <- bb[1] + (1-inset)*diff(bb[1:2])
y1 <- bb[3] + (1-inset)*diff(bb[3:4])
z1 <- bb[5] + inset*diff(bb[5:6])

## even with draw=FALSE, dataEllipse still pops up a dev, so I create
## a dummy dev and destroy it ... better way to do this?
dev.new()
de <- dataEllipse(bvn[,1], bvn[,2], draw=FALSE, levels=0.95)
dev.off()

## the ellipse
lines3d(de[,2], de[,1], z1, color='green', lwd=3)

## the two density curves, probability-style
denx <- density(bvn[,2])
lines3d(denx$x, rep(y1, length(denx$x)), denx$y / sum(hx$density), col='red', lwd=3)
deny <- density(bvn[,1])
lines3d(rep(x1, length(deny$x)), deny$x, deny$y / sum(hy$density), col='blue', lwd=3)

Beautifications

grid3d(c('x+', 'y+', 'z-'), n=10)
box3d()
axes3d(edges=c('x-', 'y-', 'z+'))
outset <- 1.2 # place text outside of bbox *this* percentage
mtext3d('P(X)', edge='x+', pos=c(0, ymax, outset * zmax))
mtext3d('P(Y)', edge='y+', pos=c(xmax, 0, outset * zmax))

Final Product

One bonus of using rgl is that you can spin it around with your mouse and find the best perspective. Lacking making an animation for this SO page, doing all of the above should allow you the play-time. (If you spin it, you'll be able to see that the lines are slightly in front of the histograms and slightly above the scatterplot; otherwise I found intersections, so it looked noncontinuous at places.)

使用rgl的一个好处是,你可以用鼠标旋转它,找到最好的视角。如果没有为这个页面做动画,那么做以上所有的事情都可以让你在游戏的时间。(如果你旋转它,你就能看到这些线在直方图前面稍微有点,略高于散点图;否则,我发现了十字路口,所以它在一些地方看起来是不连续的。

用R或Matlab进行双变量分布的三维图。

In the end, I find this a bit distracting (the 2D variants sufficed): showing the z-axis implies that there is a third dimension to the data; Tufte specifically discourages this behavior (Tufte, "Envisioning Information," 1990). However, with higher dimensionality, this technique of using RGL will allow significant perspective on patterns.

最后,我发现这有点让人分心(2D变体已经足够了):显示z轴意味着数据有第三个维度;Tufte特别不鼓励这种行为(Tufte,“想象信息”,1990)。然而,随着维度的提高,这种使用RGL的技术将允许对模式进行重要的透视。

(For the record, Win7 x64, tested with R-3.0.3 in 32-bit and 64-bit, rgl v0.93.996, car v2.0-19.)

(对于记录,Win7 x64,在32位和64位的R-3.0.3测试,rgl v0.93.996, car v2.0-19。)

#3


6  

Create the dataframe with bvn <- as.data.frame(gibbs(10000,0.98)). Several 2d solutions in R:

使用bvn <- as.data.frame(gibbs(10000,0.98))创建dataframe。R中的几个二维解:


1: A quick & dirty solution with the psych package:

1:快速、肮脏的心理治疗方案:

library(psych)
scatter.hist(x=bvn$V1, y=bvn$V2, density=TRUE, ellipse=TRUE)

which results in:

结果:

用R或Matlab进行双变量分布的三维图。


2: A nice & pretty solution with ggplot2:

2:ggplot2的一个漂亮的解决方案:

library(ggplot2)
library(gridExtra)
library(devtools)
source_url("https://raw.github.com/low-decarie/FAAV/master/r/stat-ellipse.R") # needed to create the 95% confidence ellipse

htop <- ggplot(data=bvn, aes(x=V1)) + 
  geom_histogram(aes(y=..density..), fill = "white", color = "black", binwidth = 2) + 
  stat_density(colour = "blue", geom="line", size = 1.5, position="identity", show_guide=FALSE) +
  scale_x_continuous("V1", limits = c(-40,40), breaks = c(-40,-20,0,20,40)) + 
  scale_y_continuous("Count", breaks=c(0.0,0.01,0.02,0.03,0.04), labels=c(0,100,200,300,400)) + 
  theme_bw() + theme(axis.title.x = element_blank())

blank <- ggplot() + geom_point(aes(1,1), colour="white") +
  theme(axis.ticks=element_blank(), panel.background=element_blank(), panel.grid=element_blank(),
        axis.text.x=element_blank(), axis.text.y=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank())

scatter <- ggplot(data=bvn, aes(x=V1, y=V2)) + 
  geom_point(size = 0.6) + stat_ellipse(level = 0.95, size = 1, color="green") +
  scale_x_continuous("label V1", limits = c(-40,40), breaks = c(-40,-20,0,20,40)) + 
  scale_y_continuous("label V2", limits = c(-20,20), breaks = c(-20,-10,0,10,20)) + 
  theme_bw()

hright <- ggplot(data=bvn, aes(x=V2)) + 
  geom_histogram(aes(y=..density..), fill = "white", color = "black", binwidth = 1) + 
  stat_density(colour = "red", geom="line", size = 1, position="identity", show_guide=FALSE) +
  scale_x_continuous("V2", limits = c(-20,20), breaks = c(-20,-10,0,10,20)) + 
  scale_y_continuous("Count", breaks=c(0.0,0.02,0.04,0.06,0.08), labels=c(0,200,400,600,800)) + 
  coord_flip() + theme_bw() + theme(axis.title.y = element_blank())

grid.arrange(htop, blank, scatter, hright, ncol=2, nrow=2, widths=c(4, 1), heights=c(1, 4))

which results in:

结果:

用R或Matlab进行双变量分布的三维图。


3: A compact solution with ggplot2:

3:与ggplot2的紧凑解决方案:

library(ggplot2)
library(devtools)
source_url("https://raw.github.com/low-decarie/FAAV/master/r/stat-ellipse.R") # needed to create the 95% confidence ellipse

ggplot(data=bvn, aes(x=V1, y=V2)) + 
  geom_point(size = 0.6) + 
  geom_rug(sides="t", size=0.05, col=rgb(.8,0,0,alpha=.3)) + 
  geom_rug(sides="r", size=0.05, col=rgb(0,0,.8,alpha=.3)) + 
  stat_ellipse(level = 0.95, size = 1, color="green") +
  scale_x_continuous("label V1", limits = c(-40,40), breaks = c(-40,-20,0,20,40)) + 
  scale_y_continuous("label V2", limits = c(-20,20), breaks = c(-20,-10,0,10,20)) + 
  theme_bw()

which results in:

结果:

用R或Matlab进行双变量分布的三维图。

#4


2  

Matlab's implementation is called scatterhist and requires the Statistics Toolbox. Unfortunately it is not 3D, it is an extended 2D plot.

Matlab的实现被称为scatterhist,需要统计工具箱。不幸的是,它不是3D,而是一个扩展的2D图形。

% some example data
x = randn(1000,1);
y = randn(1000,1);

h = scatterhist(x,y,'Location','SouthEast',...
                'Direction','out',...
                'Color','k',...
                'Marker','o',...
                'MarkerSize',4);

legend('data')
legend boxoff
grid on

用R或Matlab进行双变量分布的三维图。

It also allows grouping of datasets:

它还允许对数据集进行分组:

load fisheriris.mat;
x = meas(:,1);        %// x-data
y = meas(:,2);        %// y-data
gnames = species;     %// assigning of names to certain elements of x and y


scatterhist(x,y,'Group',gnames,'Location','SouthEast',...
            'Direction','out',...
            'Color','kbr',...
            'LineStyle',{'-','-.',':'},...
            'LineWidth',[2,2,2],...
            'Marker','+od',...
            'MarkerSize',[4,5,6]);

用R或Matlab进行双变量分布的三维图。

#5


2  

R Implementation

R实现

Load library "car". We use only dataEllipse function to draw ellipse based on the percent of data (0.95 means 95% data falls within the ellipse).

装载库“汽车”。我们只使用dataEllipse函数来根据数据的百分比来绘制椭圆(0.95意味着95%的数据属于椭圆)。

library("car")

gibbs<-function (n, rho) 
 {
   mat <- matrix(ncol = 2, nrow = n)
   x <- 0
   y <- 0
   mat[1, ] <- c(x, y)
   for (i in 2:n) {
   x <- rnorm(1, rho * y, (1 - rho^2))
   y <- rnorm(1, rho * x,(1 - rho^2))
   mat[i, ] <- c(x, y)
   }
   mat
 }

bvn<-gibbs(10000,0.98)

Open a PDF Device:

打开一个PDF装置:

OUTFILE <- "bivar_dist.pdf"

pdf(OUTFILE)

Set up the layout first

首先设置布局。

layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), widths=c(3,1), heights=c(1,3), TRUE)

Make Scatterplot

做散点图

par(mar=c(5.1,4.1,0.1,0))

The commented lines can be used to plot a scatter diagram without "car" package from where we use dataEllipse function

注释行可以用来绘制没有“car”包的散点图,我们使用dataEllipse函数。

# plot(bvn[,2], bvn[,1], 
#      pch=".",cex = 1, col=1:length(bvn[,2]),
#      xlim=c(-0.6, 0.6),
#      ylim=c(-0.6,0.6),
#      xlab="X",
#      ylab="Y")
# 
# grid(NULL, NULL, lwd = 2)


dataEllipse(bvn[,2], bvn[,1],
        levels = c(0.95),
        pch=".",
        col=1:length(bvn[,2]),
        xlim=c(-0.6, 0.6),
        ylim=c(-0.6,0.6),
        xlab="X",
        ylab="Y",
        center.cex = 1
        )

Plot histogram of X variable in the top row

第一行的X变量的图直方图。

     par(mar=c(0,4.1,3,0))

     hist(bvn[,2],
          ann=FALSE,axes=FALSE,
          col="light blue",border="black",
          )
     title(main = "Bivariate Normal Distribution")

Plot histogram of Y variable to the right of the scatterplot

将Y变量的直方图绘制到scatterplot的右侧。

     yhist <- hist(bvn[,1],
                   plot=FALSE
                  )

     par(mar=c(5.1,0,0.1,1))

     barplot(yhist$density,
             horiz=TRUE,
             space=0,
             axes=FALSE,
             col="light blue",
             border="black"
             )

 dev.off(which = dev.cur())

用R或Matlab进行双变量分布的三维图。

用R或Matlab进行双变量分布的三维图。

      dataEllipse(bvn[,2], bvn[,1],
                  levels = c(0.5, 0.95),
                  pch=".",
                  col= 1:length(bvn[,2]),
                  xlim=c(-0.6, 0.6),
                  ylim=c(-0.6,0.6),
                  xlab="X",
                  ylab="Y",
                  center.cex = 1
                 )

#6


0  

I took @jaap's code above and turned it into a slightly more generalized function. The code can be sourced here. Note: I am not adding anything new to @jaap's code, just a few minor changes and wrapped it in a function. Hopefully it is helpful.

我把@jaap的代码放在上面,把它变成一个稍微一般化的函数。代码可以从这里获取。注意:我没有在@jaap的代码中添加任何新内容,只是进行了一些细微的修改,并将其封装在一个函数中。希望是有帮助的。

density.hist <- function(df, x=NULL, y=NULL) {

require(ggplot2)
require(gridExtra)
require(devtools)

htop <- ggplot(data=df, aes_string(x=x)) + 
  geom_histogram(aes(y=..density..), fill = "white", color = "black", bins=100) + 
  stat_density(colour = "blue", geom="line", size = 1, position="identity", show.legend=FALSE) +
  theme_bw() + theme(axis.title.x = element_blank())

blank <- ggplot() + geom_point(aes(1,1), colour="white") +
  theme(axis.ticks=element_blank(), panel.background=element_blank(), panel.grid=element_blank(),
  axis.text.x=element_blank(), axis.text.y=element_blank(), axis.title.x=element_blank(), 
  axis.title.y=element_blank())

scatter <- ggplot(data=df, aes_string(x=x, y=y)) + 
  geom_point(size = 0.6) + stat_ellipse(type = "norm", linetype = 2, color="green",size=1) +
  stat_ellipse(type = "t",color="green",size=1) +
  theme_bw() + labs(x=x, y=y)

hright <- ggplot(data=df, aes_string(x=x)) + 
  geom_histogram(aes(y=..density..), fill = "white", color = "black", bins=100) + 
  stat_density(colour = "red", geom="line", size = 1, position="identity", show.legend=FALSE) +
  coord_flip() + theme_bw() + theme(axis.title.y = element_blank())

grid.arrange(htop, blank, scatter, hright, ncol=2, nrow=2, widths=c(4, 1), heights=c(1, 4))

}

用R或Matlab进行双变量分布的三维图。