Created
June 2, 2025 15:30
-
-
Save VincentTam/b6bd59f251c33bfa51e5ba55b32e683b to your computer and use it in GitHub Desktop.
Generalized Cauchy–Schwarz
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import "@preview/touying:0.6.1": * | |
#import themes.simple: * | |
#show: simple-theme.with(aspect-ratio: "16-9") | |
#import "@preview/theorion:0.3.3": * | |
#show: show-theorion | |
#import "@preview/cetz:0.3.4" | |
#import "@preview/mannot:0.3.0" | |
#set par(spacing: 0.55em) | |
#show link: underline | |
#let mainrefurl = "https://www.sec.ntnu.edu.tw/uploads/asset/data/625641aa381784d09345bff8/1998-214-02(16-22).pdf" | |
= Generalized Cauchy--Schwarz Inequality | |
Vincent Tam | |
Notes for #link(mainrefurl)[a journal article on this inequality] | |
== Review: AM--GM Inequality | |
#slide( | |
repeat: 6, | |
self => [ | |
#let (uncover, only, alternatives) = utils.methods(self) | |
#grid( | |
columns: (1fr, 1fr), | |
[ | |
For any positive real numbers $a_1, dots, a_n$, | |
$ | |
underbrace((product_(k=1)^n a_i)^(1\/n), "geometric mean" \ "of" a_1\, dots \, a_n) <= | |
underbrace(1/n (sum_(k=1)^n a_i), "arithmetic mean" \ "of" a_1\, dots \, a_n) | |
$ | |
Equality holds if and only if all $a_1 = dots.c = a_n$. | |
], | |
only("2-")[ | |
#proof[(by replacement) | |
#only("2-4")[ | |
If all $a_i$'s are equal, then it's trivial. | |
] | |
#only("3-")[ | |
If not, let | |
$ alpha = underbrace(op("AM")(a_1, dots, a_n), "arithmetic mean of" a_1\, dots \, a_n). $ | |
] | |
#only("4-")[ | |
Then there exists $underbrace(i "and" j, "indices")$ such that $a_i < alpha < a_j$. | |
] | |
#only("5")[ | |
('$alpha$' comes from "#underline("a")rithmetic mean") | |
] | |
#only("6-")[ | |
$underbrace(a_i <-- text(fill: #blue, alpha), a_i "replaced by" text(fill: #blue, alpha))$, | |
$a_j <-- text(fill: #blue, a_i + a_j - alpha)$, so after this replacement process, | |
$ | |
"old AM" &= text(fill: #blue, "new AM"), "but" \ | |
"old GM" &< text(fill: #blue, "new GM") "because" \ | |
a_i a_j &< text(fill: #blue, alpha (a_i + a_j - alpha)) \ | |
<=> (alpha-a_i)(alpha-a_j) &< 0. | |
$ | |
After this replacement, in the #text(fill: blue, "new AM") | |
we have at least one less number not equal to $alpha$, | |
This process can be repeated until all $text(fill: #blue, a_i)$'s are equal | |
(to $text(fill: #blue, alpha)$), then | |
$ | |
text(fill: #blue, "final GM") = alpha. | |
$ | |
Hence | |
$ | |
&text(fill: #blue, "initial AM") = alpha = text(fill: #blue, "final GM") \ | |
&>= text(fill: #blue, "new GM") > "initial GM". | |
$ | |
] | |
] | |
] | |
) | |
] | |
) | |
== Generalized Cauchy--Schwarz Inequality | |
#v(0.5em) | |
#cetz.canvas({ | |
import cetz.draw: * | |
set-style(padding: 0.5em) | |
// left matrix | |
content( | |
(), | |
[$lr([wide #box(text(bottom-edge: "bounds", white)[$a_(i j)$], fill: blue, inset: 1pt, baseline: 50%) wide], size: #300%)_(n times m)$], | |
name: "A" | |
) | |
// right matrix | |
content((rel: (17em, 0)), name: "B", | |
[$mannot.mark(lr([wide #box(text(bottom-edge: "bounds", white)[$bold(a)_j$], fill: blue, inset: 1pt, baseline: 50%) wide], size: #300%)_(n times m), tag: #<Rmat>)$] | |
) | |
// right matrix ℓ-norm arrow | |
on-layer(-1, { | |
line((rel: (-1em, 3em)), (rel: (0pt, -5em)), stroke: (paint: blue.lighten(30%), thickness: 2pt), mark: (end: "barbed")) | |
}) | |
// right matrix step one | |
content((), name: "B1", anchor: "north", | |
box(text(blue, 0.7em, bottom-edge: "bounds")[$mannot.markhl(norm(bold(a)_j)_m, tag: #<lm>, color: #blue)$], fill: white) | |
) | |
// right matrix steps 1 → 2 | |
on-layer(-1, { | |
bezier((rel: (-2em, -1.1em)), (rel: (5em, -0.7em)), (rel: (4em, 0em), update: false), (rel: (5em, -0.2em), update: false), stroke: (paint: red, thickness: 2pt), mark: (end: "straight")) | |
}) | |
// right matrix step two | |
content((), name: "B2", anchor: "north", | |
box( | |
text(red, 0.6em, bottom-edge: "bounds")[2. product of all column \ $ell^m$ norms $norm(bold(a)_j)_m$], | |
inset: 5pt, | |
stroke: (paint: red, thickness: 1pt) | |
) | |
) | |
// left matrix step one | |
content( | |
"A.east", | |
box([$mannot.mark(Pi^((i)), tag: #<Pin>, color: #blue)$], fill: white, inset: 3pt), | |
anchor: "west", name: "A1", | |
) | |
// left matrix steps 1 → 2 | |
on-layer(-1, { | |
line("A.west", "A.east", stroke: (paint: blue.lighten(30%), thickness: 2pt), mark: (end: "barbed")) | |
line((rel: (1em, 1em)), (rel: (0, -5em)), stroke: (paint: red, thickness: 2pt), mark: (end: "barbed"), name: "A1A2") | |
}) | |
// left matrix step two | |
content((), name: "A2", anchor: "north", | |
box( | |
text(red, 0.6em, bottom-edge: "bounds")[2. sum of all row \ products $product^((i))$'s], | |
inset: 5pt, | |
stroke: (paint: red, thickness: 1pt) | |
) | |
) | |
content((name: "A1A2", anchor: 75%), text(red, 0.7em)[$sum$], anchor: "west") | |
// '≤' sign | |
line("A2", "B2", stroke: none, name: "A2B2") | |
content("A2B2.mid", name: "C", anchor: "west", text(red, 1em)[$bold(lt.eq)$]) | |
on-layer(-2, { | |
rect(("A2.north-west", "|-", "B2.north"), "B2.south-east", stroke: none, fill: yellow.lighten(40%)) | |
}) | |
}) | |
#mannot.annot(<Pin>, pos: top, dy: -1.8em)[1. product of all \ entries in $i$-th row] | |
#mannot.annot-cetz((<lm>, <Rmat>), cetz, { | |
import cetz.draw: * | |
content("Rmat.east", name: "lm-annot", text(0.6em, blue)[1. $ell^m$-norm of $j$-th \ column, i.e. \ $ (sum_(i = 1)^n a_(i j)^m)^(1\/m) $], anchor: "south-west") | |
bezier-through("lm.east", (rel: (7em, 2.7em)), "lm-annot.south", mark: (start: "barbed"), stoke: blue) | |
}) | |
== Question: How to remember this inequality? | |
#pause | |
- '#text(blue)[$-->$]' linked with '#text(blue)[$product$]' as '#text(blue)[H]' in "#text(blue)[H]orizontal" looks like '#box(baseline: 25%, [#par(leading: 0.4em)[#text(0.7em)[$product.co$] \ #text(0.7em, blue)[$product$]]])' | |
- '#text(red)[$arrow.b$]' linked with '#text(red)[$sum$]' as two '#text(red)[V]'s ("#text(red)[V]ertical") looks like '#box([#rotate(90deg)[V#text(red)[V]]])' | |
Orders of arrows in each side: | |
#grid( | |
columns: (1fr, 1fr), | |
[ | |
- LHS: #text(0.7em)[natural reading order (*"#underline[less] strange"*)] | |
1. "left ⟶ right" first | |
2. then #h(1em) #box(baseline: 50%, [#set align(center); top \ ↓ \ bottom]) | |
], | |
[ | |
- RHS: #text(0.7em)["*#underline[more] strange*" reading order] | |
1. #box(baseline: 50%, [#set align(center); top \ ↓ \ bottom]) #h(1em) first | |
2. then "left ⟶ right" | |
] | |
) | |
== Proof step 1: homogeneity on each column <homo> | |
#only("1")[Overall strategy is similar to the proof of Hölder's inequality.] | |
Observe that in the target inequality | |
$ | |
sum_(i=1)^n product_(j=1)^m a_(i j) <= | |
product_(j=1)^m (sum_(i=1)^n a_(i j)^m)^(1\/m), | |
$ | |
#only("1")[if we multiply (the entries of) the $j$-th column by a positive constant $k$ (i.e. for each $i in {1, dots, n}$ and a particular fixed $j in {1, dots, n}$, $a_(i j) <-- k a_(i j)$), each side of the above inequality is also multiplied by $k$.] | |
#only("2-")[WLOG (without loss of generality), we can assume that the $j$-th column is normalized, i.e. $norm(bold(a_(j)))_m = 1$.] | |
#only("3-")[The same goes for the remaining columns.] | |
#only("4")[Then the RHS becomes $1$.] | |
== Proof step 2: #text(maroon)[AM]--#text(purple)[GM] on each row product | |
$ | |
mannot.markrect(product_(j=1)^m a_(i j), tag: #<pfGM>, color: #purple) <= | |
mannot.markrect(1/m sum_(j=1)^m a_(i j)^m, tag: #<pfAM>, color: #maroon) | |
$ <step2> | |
#mannot.annot(<pfGM>, pos: bottom + left, dy: 1em)[$op("GM")(a_(i 1)^m, dots, a_(i m)^m)$] | |
#mannot.annot(<pfAM>, pos: bottom + right, dy: 1em)[$op("AM")(a_(i 1)^m, dots, a_(i m)^m)$] | |
Guide: | |
1. '$sum_i mannot.markrect(product_j a_(i j), tag: #<pfGM1>, color: #purple)$' | |
in LHS of #link(<homo>)[previous slide] seems hard, so tackle each row product | |
$mannot.markrect(product_j a_(i j), tag: #<pfGM1>, color: #purple)$ | |
with #text(maroon)[AM]--#text(purple)[GM] first. A "product" reminds us of | |
"#text(purple)[geometric mean]". | |
2. The power '$m$' (in superscript) and the '$sum$' in RHS of #link(<homo>)[previous slide] | |
seems to be an #text(maroon)[arithmetic mean]. | |
== Proof step 3: make LHS appear | |
#slide(repeat: 4, self => [ | |
#let (uncover, only, alternatives) = utils.methods(self) | |
In target LHS, we have '$sum_i$' on the left of '$product_j$', so take '$sum_i$' | |
on both sides of the inequality in the #link(<step2>)[previous step]. | |
$ | |
sum_(i=1)^n product_(j=1)^m a_(i j) & <= | |
only("1", sum_(i=1)^n 1/m sum_(j=1)^m a_(i j)^m) | |
only("2-", 1/m sum_(j=1)^m sum_(i=1)^n a_(i j)^m) \ | |
uncover("3-", &= 1/m sum_(j=1)^m mannot.markhl(norm(bold(a)_j)_m^m, tag: #<pfLm>)) \ | |
uncover("4-", &= 1) | |
$ | |
#uncover("3", mannot.annot(<pfLm>, pos: bottom, dy: 1em)[each column is normalized]) | |
]) | |
== Equality case | |
#box[ | |
#set text(size: 0.83em) | |
We've applied the AM--GM inequality to each row product, so equality holds if and only if | |
all entries in each row are equal, \ | |
i.e. $a_(i 1) = dots.c = a_(i m)$ for all (row index) | |
$i in {1, dots, n}$. | |
#pause | |
In this case, each corresponding component in any two distinct column vectors is equal. \ | |
i.e. for all (row index) $i in {1, dots, n}$ and (column indices) $j, j' in {1, dots, m}, a_(i j) = a_(i j')$. | |
#pause | |
Two vectors are equal to each other if and only if each of their corresponding components are equal. | |
#pause | |
] | |
Since we have normalized each column in the first step of our proof, we have | |
*equality holds if and only if all column vectors in the matrix are parallel to each other*. | |
== Corollary: Carlson's inequality | |
$ | |
(sum_(i=1)^n root(m, product_(j=1)^m a_(i j))) / n <= | |
root(m, product_(j=1)^m (sum_(i=1)^n a_(i j)) / n) | |
$ | |
#pause | |
#proof[ | |
Apply the generalized Cauchy--Schwarz inequality to the matrix | |
$ | |
mat( | |
a_(1 1)^(1\/m)\/n, dots.c, a_(1 m)^(1\/m)\/n; | |
dots.v, dots.down, dots.v; | |
a_(n 1)^(1\/m)\/n, dots.c, a_(n m)^(1\/m)\/n; | |
delim: "[" | |
). | |
$ | |
] | |
== Application: avoid fractional powers | |
The figure in the slide for the generalized Cauchy--Schwarz inequality is often | |
too difficult to apply on questions. In practice, we often take the $m$-th power | |
on both sides to avoid fractional powers. | |
$ | |
"i.e." (sum_(i=1)^n product_(j=1)^m a_(i j))^m <= | |
product_(j=1)^m (sum_(i=1)^n a_(i j)^m). | |
$ | |
The following question is a good example to illustrate how arranging terms in the form of a matrix can help organizing thoughts. | |
== Example: optimization of sum of reciprocals <sincoseg> | |
#slide( | |
repeat: 5, | |
self => [ | |
#let (uncover, only, alternatives) = utils.methods(self) | |
#example[ | |
Let $0 < theta < pi \/ 2$. Find the minimum value of $2/(sin theta) + 3/(cos theta)$. | |
] | |
#pause | |
#grid( | |
columns: (1fr, 1fr), | |
[ | |
_Discussion_: | |
1. Constraint: Pythagorean identity $sin^2 theta + cos^2 theta = 1$. | |
2. Objective function should stay on RHS. | |
3. Tricky part: $ | |
mat(delim: "[", | |
2/(sin theta), uncover("3-", 2/(sin theta)), sin^2 theta; | |
3/(cos theta), uncover("3-", 3/(cos theta)), cos^2 theta; | |
). | |
$ | |
], | |
[ | |
#uncover("4-", [ | |
_Problem_: $"#col" = m = 3$, so rightmost column norm (on RHS) doesn't | |
match the (equality) constraint in point 1. | |
]) | |
#uncover("5-", [ | |
_Quickfix_: adjust the power of each term by taking the $m$-th root | |
#text(0.85em)[$ | |
"i.e." mat(delim: "[", | |
(2/(sin theta))^(1\/3), (2/(sin theta))^(1\/3), sin^(2\/3) theta; | |
(3/(cos theta))^(1\/3), (3/(cos theta))^(1\/3), cos^(2\/3) theta; | |
). | |
$ <finalmatrix>] | |
]) | |
], | |
inset: 0.3em | |
) | |
] | |
) | |
== Problem solving flow for minimization problems | |
#box[ | |
#set text(size: 0.7em) | |
It would be hard to get the #link(<finalmatrix>)[final matrix] at the first sight, | |
so I suggest the following steps (to "get the row product right" first). | |
0. Get rid of fractional powers (by writing a logically equivalent inequality with | |
only integer powers). #pause | |
1. Identify the relevant equality constraint (, which is independent of the choice | |
of matrix). #pause | |
2. Identify each term in the objective function, and place each of them in a row. | |
Now you have two columns. #pause | |
3. Make suitable amount of copies of columns, so that each row product are "balanced", | |
i.e. your row products are constants/terms on the RHS of the target inequality. #pause | |
4. Count the number of columns, and take this number as $m$. #pause | |
5. Take the $m$-th root of each term in the matrix (, so that the rightmost column norm | |
matches the equality constraint.) #pause | |
6. Apply the generalized Cauchy--Schwarz inequality to the matrix. #pause | |
7. State the equality case. | |
] | |
== Practice: generalization of #link(<sincoseg>)[previous example] | |
#exercise[ | |
If $a, b > 0$, $n in NN$, $0 < theta < pi \/ 2$, show that | |
$ | |
(a^(2\/(n+2)) + b^(2\/(n+2)))^((n+2)\2) <= | |
a / (sin^n theta) + b / (cos^n theta). | |
$ | |
] | |
#pause | |
_Hint_: | |
- two columns of $vec(a \/ sin^n theta, b \/ cos^n theta, delim: "[")$ | |
- $n$ columns of $vec(cos^2 theta, sin^2 theta, delim: "[")$ | |
== Practice: Power Mean Inequality for integer power | |
#box[ | |
#set text(size: 0.95em) | |
#exercise[ | |
For any positive real numbers $a_1, dots, a_n$ and positve integer $p > 0$, show that | |
$ | |
((sum_(i=1)^n a_i^p) / n)^(1\/p) >= | |
(sum_(i=1)^n a_i) / n. | |
$ | |
] | |
#pause | |
#solution[ | |
Apply the generalized Cauchy--Schwarz inequality to the matrix | |
$ | |
mat( | |
a_1, 1, dots.c, 1; | |
dots.v, dots.v, dots.down, dots.v; | |
a_n, 1, dots.c, 1; | |
delim: "[" | |
) | |
$ | |
with $p - 1$ columns of $1$'s. | |
] | |
] | |
== Variation: Generalized #link("https://en.wikipedia.org/wiki/Titu%27s_lemma")[Titu's Lemma] | |
#box[ | |
#set text(size: 0.95em) | |
For any real numbers $a_1, dots, a_n$, positive real numbers $b_1, dots, b_n$, positive integers $m, k in NN$ such that $k > m$, | |
$ | |
sum_(i=1)^n a_i^k / b_i^m >= | |
n^(1+m-k) (sum_(i=1)^n a_i)^k / (sum_(i=1)^n b_i)^m. | |
$ | |
#pause | |
#proof[ | |
Focus on the "draft matrix" | |
$ | |
mat( | |
a_1^k \/ b_1^m, b_1, dots.c, b_1, 1, dots.c, 1; | |
dots.v, dots.v, dots.down, dots.v, dots.v, dots.down, dots.v; | |
a_n^k \/ b_n^m, b_n, dots.c, b_n, 1, dots.c, 1; | |
delim: "[" | |
) | |
$ | |
with $m$ columns of $b$'s and $k - 1 - m$ columns of $1$'s. | |
] | |
] | |
== Last example | |
#box[ | |
#set text(size: 0.8em) | |
Most other questions are direct consequences of the previous lemma, including | |
the following: | |
#exercise[ | |
For any $a$, $b$, $c > 0$ satisfying $a b c = 1$, and positive $k >= 2$, show | |
that | |
$ | |
sum_("cyc") 1/(a^k (b + c)) >= 3/2. | |
$ | |
] | |
#pause | |
_Attempt_: Replace the numerator on LHS by $a b c$. Then | |
$ | |
"LHS" = sum_("cyc") (1/a)^(k-1) / (1/b + 1/c) | |
>= ((sum_("cyc") 1/a)^(k-1)) / (2 sum_("cyc") 1/a) | |
= 1/2 dot 3^(1+1-(k-1)) dot #text(blue)[$(sum_("cyc") 1/a)^(k-2)$] | |
mannot.mark(>=, tag: #<lasteg>, color: #blue) 1/2 dot 3^((3-k) + #text(blue)[$(k-2)$]) | |
$ | |
_Problem_: To apply the generalized Cauchy--Schwarz inequality, we need $k - 1 > 1$, | |
i.e. $k > 2$. The author of the #link(mainrefurl)[original article] doesn't address the case when $k = 2$, | |
#mannot.annot(<lasteg>, pos: bottom, dy: 0.8em)[AM--GM inequality] | |
] | |
== Last example (continued) | |
which turns out to be the | |
#link("https://en.wikipedia.org/wiki/Nesbitt%27s_inequality")[Nesbitt's inequality]: | |
For any positive real numbers $a, b, c$, we have | |
$ | |
sum_("cyc") a / (b + c) >= 3/2. | |
$ | |
Observe that the above inequality is homogeneous, so WLOG, we can assume $a + b + c = 1$. | |
Then it's equivalent to | |
$ | |
sum_("cyc") (a + b + c) / (b + c) >= 3/2 + 3. | |
$ | |
The numerator on LHS is $1 = 1^2$, so that #link("https://en.wikipedia.org/wiki/Titu%27s_lemma")[Titu's Lemma] can be used. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment