@inproceedings{43c0e4df96c34895953d3dd9b27a1f63,
title = "Data-driven schemes for resolving misspecified MDPs: Asymptotics and error analysis",
abstract = "We consider the solution of a finite-state infinite horizon Markov Decision Process (MDP) in which both the transition matrix and the cost function are misspecified, the latter in a parametric sense. We consider a data-driven regime in which the learning problem is a stochastic convex optimization problem that resolves misspecification. Via such a framework, we make the following contributions: (1) We first show that a misspecified value iteration scheme converges almost surely to its true counterpart and the mean-squared error after K iterations is O(1/K1/2-α) with 0 < α < 1/2; (2) An analogous asymptotic almost-sure convergence statement is provided for misspecified policy iteration; and (3) Finally, we present a constant steplength misspecified Q-learning scheme and show that a suitable error metric is O(1/K1/2-α)+O(√δ) with 0 < α < 1/2 after K iterations where δ is a bound on the steplength.",
author = "Hao Jiang and Shanbhag, {Uday V.}",
note = "Publisher Copyright: {\textcopyright} 2015 IEEE.; Winter Simulation Conference, WSC 2015 ; Conference date: 06-12-2015 Through 09-12-2015",
year = "2016",
month = feb,
day = "16",
doi = "10.1109/WSC.2015.7408537",
language = "English (US)",
series = "Proceedings - Winter Simulation Conference",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "3801--3812",
booktitle = "2015 Winter Simulation Conference, WSC 2015",
address = "United States",
}