@inproceedings{43c0e4df96c34895953d3dd9b27a1f63,

title = "Data-driven schemes for resolving misspecified MDPs: Asymptotics and error analysis",

abstract = "We consider the solution of a finite-state infinite horizon Markov Decision Process (MDP) in which both the transition matrix and the cost function are misspecified, the latter in a parametric sense. We consider a data-driven regime in which the learning problem is a stochastic convex optimization problem that resolves misspecification. Via such a framework, we make the following contributions: (1) We first show that a misspecified value iteration scheme converges almost surely to its true counterpart and the mean-squared error after K iterations is O(1/K1/2-α) with 0 < α < 1/2; (2) An analogous asymptotic almost-sure convergence statement is provided for misspecified policy iteration; and (3) Finally, we present a constant steplength misspecified Q-learning scheme and show that a suitable error metric is O(1/K1/2-α)+O(√δ) with 0 < α < 1/2 after K iterations where δ is a bound on the steplength.",

author = "Hao Jiang and Shanbhag, {Uday V.}",

note = "Publisher Copyright: {\textcopyright} 2015 IEEE.; Winter Simulation Conference, WSC 2015 ; Conference date: 06-12-2015 Through 09-12-2015",

year = "2016",

month = feb,

day = "16",

doi = "10.1109/WSC.2015.7408537",

language = "English (US)",

series = "Proceedings - Winter Simulation Conference",

publisher = "Institute of Electrical and Electronics Engineers Inc.",

pages = "3801--3812",

booktitle = "2015 Winter Simulation Conference, WSC 2015",

address = "United States",

}