# 【2.6】直方图（Histograms）和频数多边形（frequency polygons）

geom_freqpoly(mapping = NULL, data = NULL, stat = "bin",
position = "identity", ..., na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE)

geom_histogram(mapping = NULL, data = NULL, stat = "bin",
position = "stack", ..., binwidth = NULL, bins = NULL, na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE)

stat_bin(mapping = NULL, data = NULL, geom = "bar", position = "stack",
..., binwidth = NULL, bins = NULL, center = NULL, boundary = NULL,
breaks = NULL, closed = c("right", "left"), pad = FALSE,
na.rm = FALSE, show.legend = NA, inherit.aes = TRUE)


## 二、例子

ggplot(diamonds, aes(carat)) +
geom_histogram()
#> stat_bin() using bins = 30. Pick better value with binwidth.

ggplot(diamonds, aes(carat)) +
geom_histogram(binwidth = 0.01)

ggplot(diamonds, aes(carat)) +
geom_histogram(bins = 200)

# Rather than stacking histograms, it's easier to compare frequency
# polygons
ggplot(diamonds, aes(price, fill = cut)) +
geom_histogram(binwidth = 500)

ggplot(diamonds, aes(price, colour = cut)) +
geom_freqpoly(binwidth = 500)

# To make it easier to compare distributions with very different counts,
# put density on the y axis instead of the default count
ggplot(diamonds, aes(price, stat(density), colour = cut)) +
geom_freqpoly(binwidth = 500)

if (require("ggplot2movies")) {
# Often we don't want the height of the bar to represent the
# count of observations, but the sum of some other variable.
# For example, the following plot shows the number of movies
# in each rating.
m <- ggplot(movies, aes(rating))
m + geom_histogram(binwidth = 0.1)

# If, however, we want to see the number of votes cast in each
# category, we need to weight by the votes variable

# For transformed scales, binwidth applies to the transformed data.
# The bins have constant width on the transformed scale.
m + geom_histogram() + scale_x_log10()
m + geom_histogram(binwidth = 0.05) + scale_x_log10()

# For transformed coordinate systems, the binwidth applies to the
# raw data. The bins have constant width on the original scale.

# Using log scales does not work here, because the first
# bar is anchored at zero, and so when transformed becomes negative
# infinity. This is not a problem when transforming the scales, because
# no observations have 0 ratings.
m + geom_histogram(boundary = 0) + coord_trans(x = "log10")
# Use boundary = 0, to make sure we don't take sqrt of negative values
m + geom_histogram(boundary = 0) + coord_trans(x = "sqrt")

# You can also transform the y axis.  Remember that the base of the bars
# has value 0, so log transformations are not appropriate
m <- ggplot(movies, aes(x = rating))
m + geom_histogram(binwidth = 0.5) + scale_y_sqrt()
}

# You can specify a function for calculating binwidth,
# particularly useful when faceting along variables with
# different ranges
mtlong <- reshape2::melt(mtcars)
#> No id variables; using all as measure variables
ggplot(mtlong, aes(value)) + facet_wrap(~variable, scales = 'free_x') +
geom_histogram(binwidth = function(x) 2 * IQR(x) / (length(x)^(1/3)))


## 参考资料

https://ggplot2.tidyverse.org/reference/geom_histogram.html