Skip to content

Commit c19a0a4

Browse files
juaristi22claude
andcommitted
Add L0 regularization paper draft (WIP)
Paper: "L0 regularization for subnational microsimulation calibration" targeting the International Journal of Microsimulation. Current state of the manuscript: - Full paper structure: abstract, introduction, background, data, methodology, results, discussion, conclusion, appendix - Formal survey calibration problem definition with GREG and IPF explained in depth, including benefits, drawbacks, and current practice in operational models (CBO, JCT, TPC, EUROMOD, TAXSIM) - Four-stage pipeline methodology (clone, matrix, L0 optimize, assemble) documented against the pipeline source code - Detailed appendix target tables populated from policy_data.db (37,758 targets: 33,572 district, 4,080 state, 106 national) - All writing in US English, citations linked via plainnat/natbib Still TODO: - [ ] Implement IPF and GREG baselines on the same calibration matrix to populate the comparison table (tables/comparison.tex) - [ ] Run calibration experiments and fill in all [TBC] placeholders in the results section (accuracy, sparsity, convergence, ESS) - [ ] Generate convergence curve figure from calibration_log.csv - [ ] Select and run a subnational policy application example (Section 5.5 — candidate: EITC expansion across CDs) - [ ] Review pipeline methodology section against latest code for accuracy (clone-and-assign, matrix builder, assembly steps) - [ ] Review and deepen background section: verify claims about GREG/IPF limitations, add any missing related work - [ ] Resolve pre-existing overfull hbox warnings (long URLs in conclusion, hyperparameters table width) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 1168667 commit c19a0a4

20 files changed

+1167
-0
lines changed

paper-l0/.gitignore

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
## Core latex/pdflatex auxiliary files:
2+
*.aux
3+
*.lof
4+
*.log
5+
*.lot
6+
*.fls
7+
*.out
8+
*.toc
9+
*.fmt
10+
*.fot
11+
*.cb
12+
*.cb2
13+
.*.lb
14+
*.nav
15+
*.snm
16+
*.vrb
17+
18+
## Generated if empty string is given at "Please type another file name for output:"
19+
.pdf
20+
21+
## Bibliography auxiliary files (bibtex/biblatex/biber):
22+
*.bbl
23+
*.bcf
24+
*.blg
25+
*-blx.aux
26+
*-blx.bib
27+
*.run.xml
28+
29+
## Build tool auxiliary files:
30+
*.fdb_latexmk
31+
*.synctex
32+
*.synctex(busy)
33+
*.synctex.gz
34+
*.synctex.gz(busy)
35+
*.pdfsync
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
% === Core method references ===
2+
3+
@inproceedings{louizos2018,
4+
title = {Learning Sparse Neural Networks through $L_0$ Regularization},
5+
author = {Louizos, Christos and Welling, Max and Kingma, Diederik P.},
6+
booktitle = {International Conference on Learning Representations},
7+
year = {2018},
8+
url = {https://openreview.net/forum?id=H1Y8hhg0b}
9+
}
10+
11+
@article{deville1992,
12+
title = {Calibration Estimators in Survey Sampling},
13+
author = {Deville, Jean-Claude and S{\"a}rndal, Carl-Erik},
14+
journal = {Journal of the American Statistical Association},
15+
volume = {87},
16+
number = {418},
17+
pages = {376--382},
18+
year = {1992}
19+
}
20+
21+
@article{sarndal2007,
22+
title = {The Calibration Approach in Survey Theory and Practice},
23+
author = {S{\"a}rndal, Carl-Erik},
24+
journal = {Survey Methodology},
25+
volume = {33},
26+
number = {2},
27+
pages = {99--119},
28+
year = {2007}
29+
}
30+
31+
@article{deming1940,
32+
title = {On a Least Squares Adjustment of a Sampled Frequency Table When the Expected Marginal Totals are Known},
33+
author = {Deming, W. Edwards and Stephan, Frederick F.},
34+
journal = {The Annals of Mathematical Statistics},
35+
volume = {11},
36+
number = {4},
37+
pages = {427--444},
38+
year = {1940}
39+
}
40+
41+
@article{ireland1968,
42+
title = {Contingency Tables with Given Marginals},
43+
author = {Ireland, C. T. and Kullback, S.},
44+
journal = {Biometrika},
45+
volume = {55},
46+
number = {1},
47+
pages = {179--188},
48+
year = {1968}
49+
}
50+
51+
@inproceedings{kingma2015,
52+
title = {Adam: A Method for Stochastic Optimization},
53+
author = {Kingma, Diederik P. and Ba, Jimmy},
54+
booktitle = {International Conference on Learning Representations},
55+
year = {2015},
56+
url = {https://arxiv.org/abs/1412.6980}
57+
}
58+
59+
% === Spatial microsimulation ===
60+
61+
@article{williamson1998,
62+
title = {The Creation of Census-Based Small Area Microdata Using Statistical Matching Techniques},
63+
author = {Williamson, Paul and Birkin, Mark and Rees, Phil},
64+
journal = {Environment and Planning A},
65+
volume = {30},
66+
number = {5},
67+
pages = {785--816},
68+
year = {1998}
69+
}
70+
71+
@article{huang2001,
72+
title = {A Spatial Microsimulation Model with Student Agents},
73+
author = {Huang, Zhixin and Williamson, Paul},
74+
journal = {Computers, Environment and Urban Systems},
75+
volume = {25},
76+
number = {6},
77+
pages = {473--489},
78+
year = {2001}
79+
}
80+
81+
@article{tanton2011,
82+
title = {Small Area Estimation Using a Reweighting Algorithm},
83+
author = {Tanton, Robert and Vidyattama, Yogi and Nepal, Binod and McNamara, John},
84+
journal = {Journal of the Royal Statistical Society: Series A},
85+
volume = {174},
86+
number = {4},
87+
pages = {931--951},
88+
year = {2011}
89+
}
90+
91+
@book{lovelace2016,
92+
title = {Spatial Microsimulation with {R}},
93+
author = {Lovelace, Robin and Dumont, Morgane},
94+
publisher = {Chapman and Hall/CRC},
95+
year = {2016}
96+
}
97+
98+
@article{harland2012,
99+
title = {Creating Realistic Synthetic Populations at Varying Spatial Scales: A Comparative Critique of Population Synthesis Techniques},
100+
author = {Harland, Kirk and Heppenstall, Alison and Smith, Dianna and Birkin, Mark},
101+
journal = {Journal of Artificial Societies and Social Simulation},
102+
volume = {15},
103+
number = {1},
104+
pages = {1},
105+
year = {2012}
106+
}
107+
108+
@article{anderson2013,
109+
title = {Microsimulation for Local Impact Analysis: The Role of Small Area Estimation},
110+
author = {Anderson, Brent},
111+
journal = {International Journal of Microsimulation},
112+
volume = {6},
113+
number = {2},
114+
pages = {30--55},
115+
year = {2013}
116+
}
117+
118+
% === PolicyEngine and predecessor ===
119+
120+
@techreport{woodruff2024,
121+
title = {Enhancing Survey Microdata with Administrative Records: A Novel Approach to Microsimulation Dataset Construction},
122+
author = {Woodruff, Nikhil and Ghenis, Max},
123+
institution = {PolicyEngine},
124+
year = {2024},
125+
url = {https://github.com/PolicyEngine/policyengine-us-data}
126+
}
127+
128+
% === Data sources ===
129+
130+
@techreport{census2024,
131+
title = {Current Population Survey, 2024 Annual Social and Economic ({ASEC}) Supplement},
132+
author = {{U.S. Census Bureau}},
133+
institution = {U.S. Census Bureau},
134+
year = {2024},
135+
url = {https://www2.census.gov/programs-surveys/cps/datasets/2024/march/asec2024_ddl_pub_full.pdf}
136+
}
137+
138+
@techreport{bryant2023a,
139+
title = {General Description Booklet for the 2015 Public Use Tax File},
140+
author = {Bryant, Victoria},
141+
institution = {Statistics of Income Division, Internal Revenue Service},
142+
year = {2023},
143+
month = {February},
144+
type = {Technical Documentation},
145+
url = {https://www.irs.gov/statistics/soi-tax-stats-individual-public-use-microdata-files}
146+
}
147+
148+
@techreport{cbo2018,
149+
title = {An Overview of {CBO}'s Microsimulation Tax Model},
150+
author = {{Congressional Budget Office}},
151+
institution = {Congressional Budget Office},
152+
year = {2018},
153+
url = {https://www.cbo.gov/publication/54096}
154+
}
155+
156+
@techreport{jct2023,
157+
title = {Overview of {JCT} Revenue Estimating Methods},
158+
author = {{Joint Committee on Taxation}},
159+
institution = {Joint Committee on Taxation},
160+
number = {JCX-48-23},
161+
year = {2023},
162+
url = {https://www.jct.gov/publications/2023/jcx-48-23/}
163+
}
164+
165+
@misc{tpc2024,
166+
title = {Brief Description of the Tax Model},
167+
author = {{Tax Policy Center}},
168+
year = {2024},
169+
url = {https://www.taxpolicycenter.org/resources/brief-description-tax-model}
170+
}
171+
172+
% === Machine learning / imputation ===
173+
174+
@article{meinshausen2006quantile,
175+
title = {Quantile Regression Forests},
176+
author = {Meinshausen, Nicolai},
177+
journal = {Journal of Machine Learning Research},
178+
volume = {7},
179+
pages = {983--999},
180+
year = {2006}
181+
}
182+
183+
@article{pytorch2019,
184+
title = {{PyTorch}: An Imperative Style, High-Performance Deep Learning Library},
185+
author = {Paszke, Adam and Gross, Sam and Massa, Francisco and Lerer, Adam and Bradbury, James and Chanan, Gregory and Killeen, Trevor and Lin, Zeming and Gimelshein, Natalia and Antiga, Luca and others},
186+
journal = {Advances in Neural Information Processing Systems},
187+
volume = {32},
188+
year = {2019}
189+
}
190+
191+
% === Income measurement / data quality ===
192+
193+
@article{burkhauser2012,
194+
title = {Recent Trends in Top Income Shares in the {United States}: Reconciling Estimates from {March CPS} and {IRS} Tax Return Data},
195+
author = {Burkhauser, Richard V. and Feng, Shuaizhang and Jenkins, Stephen P. and Larrimore, Jeff},
196+
journal = {Review of Economics and Statistics},
197+
volume = {94},
198+
number = {2},
199+
pages = {371--388},
200+
year = {2012}
201+
}
202+
203+
@article{rothbaum2021,
204+
title = {Has Income Underreporting Changed Over Time? Evidence from Linked Survey and Administrative Data},
205+
author = {Rothbaum, Jonathan and Bee, Adam},
206+
journal = {Journal of Business and Economic Statistics},
207+
volume = {39},
208+
number = {2},
209+
pages = {456--475},
210+
year = {2021}
211+
}
212+
213+
@article{meyer2021,
214+
title = {The Accuracy of Tax Imputations: Estimating Tax Liabilities and Credits Using Linked Survey and Administrative Data},
215+
author = {Meyer, Bruce D. and Wu, Derek and Finley, Grace and Langetieg, Patrick and Payne, Carla and Plumley, Alan and Yu, Alexa},
216+
journal = {NBER Working Paper},
217+
number = {28229},
218+
year = {2021}
219+
}

paper-l0/ijm.sty

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
% ijm.sty — Style shim for the International Journal of Microsimulation
2+
% Enforces IJM editorial policy formatting requirements.
3+
4+
\NeedsTeXFormat{LaTeX2e}
5+
\ProvidesPackage{ijm}[2025/04/01 IJM formatting shim]
6+
7+
% --- Font: Times New Roman equivalent ---
8+
\RequirePackage{mathptmx}
9+
10+
% --- Spacing: 1.5 throughout ---
11+
\RequirePackage{setspace}
12+
\onehalfspacing
13+
14+
% --- Margins: 1.5 inches on all sides ---
15+
\RequirePackage[margin=1.5in]{geometry}
16+
17+
% --- Section numbering: max 3 levels ---
18+
\setcounter{secnumdepth}{3}
19+
\setcounter{tocdepth}{3}
20+
21+
% --- Mathematics ---
22+
\RequirePackage{amsmath}
23+
\RequirePackage{amsfonts}
24+
25+
% --- Tables: no borders, Arial 10pt interior ---
26+
\RequirePackage{booktabs}
27+
\RequirePackage{array}
28+
\RequirePackage{helvet} % provides \sffamily for Arial-like interior text
29+
30+
% Table interior font switch
31+
\newcommand{\tablefont}{\fontsize{10}{12}\selectfont\sffamily}
32+
33+
% --- Figures ---
34+
\RequirePackage{graphicx}
35+
36+
% --- Citations: APA style via natbib ---
37+
\RequirePackage[round]{natbib}
38+
\bibpunct{(}{)}{;}{a}{,}{,}
39+
\setcitestyle{authoryear,round}
40+
41+
% --- Hyperlinks ---
42+
\RequirePackage{hyperref}
43+
\RequirePackage{xcolor}
44+
\hypersetup{
45+
colorlinks=true,
46+
linkcolor=blue,
47+
filecolor=magenta,
48+
urlcolor=blue,
49+
citecolor=blue,
50+
}
51+
52+
% --- Algorithms ---
53+
\RequirePackage{algorithm}
54+
\RequirePackage{algpseudocode}
55+
56+
% --- Footnotes: same font, size 10 ---
57+
\RequirePackage[hang,flushmargin]{footmisc}
58+
\renewcommand{\footnotesize}{\fontsize{10}{12}\selectfont}
59+
60+
% --- Float placement ---
61+
\RequirePackage{float}
62+
63+
% --- Source/note formatting below tables and figures ---
64+
\newcommand{\tablenote}[1]{%
65+
\par\vspace{2pt}%
66+
{\fontsize{10}{12}\selectfont\rmfamily #1}%
67+
}
68+
69+
\endinput

paper-l0/macros.tex

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
% Custom commands and mathematics macros
2+
3+
% --- Acronyms ---
4+
\newcommand{\policyengine}{\textsc{PolicyEngine}}
5+
\newcommand{\cps}{\textsc{CPS}}
6+
\newcommand{\puf}{\textsc{PUF}}
7+
\newcommand{\acs}{\textsc{ACS}}
8+
\newcommand{\sipp}{\textsc{SIPP}}
9+
\newcommand{\scf}{\textsc{SCF}}
10+
11+
% --- L0 / Hard Concrete notation ---
12+
\newcommand{\lzero}{$L_0$}
13+
\newcommand{\ltwo}{$L_2$}
14+
\newcommand{\hc}{Hard Concrete}
15+
16+
% --- Placeholder for empirical results ---
17+
\newcommand{\tbc}[1][]{%
18+
\textbf{[TBC%
19+
\def\temparg{#1}%
20+
\ifx\temparg\empty\else: #1\fi
21+
]}%
22+
}
23+
24+
% --- Math shortcuts ---
25+
\newcommand{\E}{\mathbb{E}}
26+
\DeclareMathOperator{\clip}{clip}
27+
\DeclareMathOperator*{\argmin}{arg\,min}
28+
\newcommand{\bw}{\mathbf{w}}
29+
\newcommand{\balpha}{\boldsymbol{\alpha}}
30+
\newcommand{\R}{\mathbb{R}}

paper-l0/main.pdf

238 KB
Binary file not shown.

0 commit comments

Comments
 (0)