Wolfram Cloud Document

In[]:=

CompoundExpression[

]

deploy

Thu 18 Jan 2024 11:00:04

Di
https://math.stackexchange.com/questions/4843188/computing-e-log1-x2-for-chi-squared-x?noredirect=1#comment10326565_4843188

Symbolic integral

Forked from linear-estimation: Probabilistic stability of 1D Gaussian SGD

In[]:=

$Assumptions={x>0,a>0};dist[n_]=TransformedDistribution[x/n,xChiSquareDistribution[n]];pdf[n_,x_]=PDF[dist[n],x];expr[n_,x_]=Refine[Log[(1-ax)^2]pdf[2b,x]];mean[b_?IntegerQ]:=Inactive[Integrate][Refine[Log[(1-ax)^2]pdf[2b,x]],{x,0,1/a,∞}];bvals=Range[2];results=(mean[#]//Activate//FullSimplify)&/@bvals;Print["Integrating: ",Refine[Log[(1-ax)^2]pdf[2b,x]]//TraditionalForm]TableForm[{SF["b=``",#]&/@bvals,results}]

Integrating:

-b



1

b



-bx



b-1

x

log(

2

(1-ax)

)

Γ(b)

Out[]//TableForm=

b=1	-2 -1/a  ExpIntegralEi 1 a 
b=2	2- 2(2+a) -2/a  ExpIntegralEi 2 a  a

Out[]//TraditionalForm=

-b



1

b



-bx



b-1

x

log(

2

(1-ax)

)

Γ(b)

Get numerical cutoffs

Forked from linear-estimation: Probabilistic stability of 1D Gaussian SGD

In[]:=

findRoot[c0_,a0_,b0_,f_,df_]:=Module[{a,b,c,x,y,dy,maxIters,stopCriterion},c=c0;a=a0;b=b0;x=c;maxIters=100;stopCriterion=

-6

10

;For[i=1,i<=maxIters,i+=1,y=f[x];If[Abs[y]<stopCriterion,Break[]];dy=df[x];x=x-y/dy;(*replacewith(dy+eps)forill-conditoned*);Sow[y];(*checkifxgotoutofinterval*);Which[x<a,x=(a+b)/2;b=c,x>b,x=(a+b)/2;a=c,True,_];c=x;];c];adaptiveIntegrate[expr_]:=NIntegrate[expr,{x,0,∞},Method->"LocalAdaptive"];pvalueIntegrate[expr_,exclusion_]:=NIntegrate[expr,{x,0,∞},Method->"PrincipalValue",Exclusions->{exclusion},MinRecursion->4,MaxRecursion->20];SF=StringForm;Off[General::munfl];(*underflowwarnings*)ClearAll[n,x,a,b,getCritical,dist,pdf,meanLog,meanLogD];(*xisdomain,aisstepsize*)$Assumptions={x>0,a>0};(*Distributionofsampleaverageofnchi-squaredR.V.s*)dist[n_]=TransformedDistribution[x/n,xChiSquareDistribution[n]];pdf[n_,x_]=Refine@PDF[dist[n],x];(*refinetogetridofPiecewise*)(*Expectedvalueoflog*)expr[n_,a_,x_]=Log[

2

(1-a*x)

]*pdf[n,x];exprD[n_,a_,x_]=D[expr[n,a,x],a];meanLog[n_,a_?NumericQ]:=adaptiveIntegrate[expr[n,a,x]];meanLogD[n_,a_?NumericQ]:=pvalueIntegrate[exprD[n,a,x],1-ax==0];batchSizes={1,2,3,10,1000};legends=SF["b=``",#]&/@batchSizes;labeledPdfs=Table[Labeled[pdf[b,x],SF["b=``",b],{Top}],{b,batchSizes}];Plot@@{labeledPdfs,{x,0,3},PlotLegends->legends,PlotLabel->"Sample mean of b

2

χ

R.V.s",AxesLabel->{"x","density"}}labeledMeanLogs=Table[Labeled[meanLog[b,a],SF["b=``",b],{Top}],{b,batchSizes}];Plot@@{labeledMeanLogs,{a,0,3},PlotLegends->legends,PlotLabel->"E[log (1-ax

2

)\),

]",AxesLabel->{"x","density"}}(*Findcriticallearningrateforgivennumberofrandomvariables*)Off[NIntegrate::izero];getCritical[n_?NumericQ]:=findRoot[2.5,2,3,meanLog[n,#]&,meanLogD[n,#]&];critVals=getCritical/@batchSizes;pointsPlot=ListPlot[{batchSizes,critVals},PlotStyle->Directive[PointSize[Large],Red]];smoothPlot=Plot[Callout[getCritical[n],legends,{batchSizes,critVals}],{n,0.6,20},MaxRecursion->1,PlotLabel->"Critical step α",AxesLabel->{"b","α"}];Show[smoothPlot,pointsPlot]TableForm[{SF["b=``",#]&/@batchSizes,critVals}]

Visualize plot

Forked from:
mathoverflow-gaussian-convergence.nb

In[]:=

(*

https://math.stackexchange.com/questions/4519054/bounds-on-spectral-radius-of-2-textdiaghh-cdot-1t

*)stepL2SGDfast[h_]:=Module[{d=Length[h],normalize,step,evec},normalize[v_]:=v/Sqrt@Total[v*v];step[v_]:=2h*v+h*Total[v];evec=FixedPoint[normalize[step[#]]&,ConstantArray[1.,d],1000];2/Norm[step@evec]](*Gaussiansamplerwithdiagonalcovariance*)SeedRandom[1,Method->"MKL"];SF=StringForm;gaussianSampler[diag_]:=With{d=Length[diag]},Compile{{n,_Integer}},Module{vals,diagSqrt},diagSqrt=

diag

;vals=diagSqrt*#&/@RandomVariate[NormalDistribution[],{n,d}];tileRows[mat_,times_]:=ArrayFlatten[Table[mat,{times},{1}]];duplicateRows[mat_,times_]:=Flatten[Table[#,{times}]&/@mat,1];(*GeneratesplotforGaussianSGDwithcoveigenvalues

-p

1

,

-p

2

,...,

-p

d

,byusingstepsizeaandbatchsizeB1*)generatePlot[p_,d_,a_,B1_]:=Module[{},numSamples=1000;numSteps=100;ones=ConstantArray[1.,d];h=Table[

-p

i

,{i,1,d}];sampler=gaussianSampler[h];W0=RandomVariate[NormalDistribution[],{numSamples,Length[h]}];step[w_,x_]=w-axx.w;batchStep[W_]:=MapThread[step,{W,sampler[numSamples*B1]}];averagedStep[W_]:=Mean/@Partition[batchStep[duplicateRows[W,B1]],B1];traj=NestList[averagedStep,W0,numSteps];(*numStepsxnumSamplesxd*)errorNorms2=Total[traj*traj,{3}];(*numStepsxnumSamples*)numQuantiles=100;compress[l_]:=Quantile[l,#]&/@Range[0,1,1/numQuantiles];meanPlot=ListLinePlot[Total[errorNorms2,{2}]/numSamples,ScalingFunctions->"Log",PlotLegends->{"mean"}];filling=Table[i->{numQuantiles+2-i},{i,1,numQuantiles/2}];distPlot=ListLinePlot[Transpose[compress/@errorNorms2],ScalingFunctions->"Log",Filling->filling,FillingStyle->Directive[Opacity[.1],EdgeForm[]],PlotStyle->{Directive[Gray,Thin]}];nf[expr_]:=NumberForm[N@expr,{3,3}];Show[distPlot,meanPlot,AxesLabel->{"t","||e

2

||

"},PlotLabel->SF["B=``, α=``, final error=``",B1,nf@a,nf@Median@Last[errorNorms2]],ImageSize->Large]];p=1;d=1;(*1Ddivergencethreshold,seeDSP.se

post

*)a1=2.421249521036836042992780131428225933128842834067867847377838424345397835692066559296652232682365322;a2=2.6845103513837487`;a3=2.6988830381053224`;generatePlot[0,d,a2,1]generatePlot[0,d,a2,3]generatePlot[0,d,a2,10]