train.lua 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. require 'pl'
  2. local __FILE__ = (function() return string.gsub(debug.getinfo(2, 'S').source, "^@", "") end)()
  3. package.path = path.join(path.dirname(__FILE__), "lib", "?.lua;") .. package.path
  4. require 'optim'
  5. require 'xlua'
  6. require 'w2nn'
  7. local settings = require 'settings'
  8. local srcnn = require 'srcnn'
  9. local minibatch_adam = require 'minibatch_adam'
  10. local iproc = require 'iproc'
  11. local reconstruct = require 'reconstruct'
  12. local compression = require 'compression'
  13. local pairwise_transform = require 'pairwise_transform'
  14. local image_loader = require 'image_loader'
  15. local function save_test_scale(model, rgb, file)
  16. local up = reconstruct.scale(model, settings.scale, rgb,
  17. settings.scale * settings.crop_size,
  18. settings.upsampling_filter)
  19. image.save(file, up)
  20. end
  21. local function save_test_jpeg(model, rgb, file)
  22. local im, count = reconstruct.image(model, rgb)
  23. image.save(file, im)
  24. end
  25. local function split_data(x, test_size)
  26. local index = torch.randperm(#x)
  27. local train_size = #x - test_size
  28. local train_x = {}
  29. local valid_x = {}
  30. for i = 1, train_size do
  31. train_x[i] = x[index[i]]
  32. end
  33. for i = 1, test_size do
  34. valid_x[i] = x[index[train_size + i]]
  35. end
  36. return train_x, valid_x
  37. end
  38. local function make_validation_set(x, transformer, n, patches)
  39. n = n or 4
  40. local data = {}
  41. for i = 1, #x do
  42. for k = 1, math.max(n / patches, 1) do
  43. local xy = transformer(x[i], true, patches)
  44. for j = 1, #xy do
  45. table.insert(data, {x = xy[j][1], y = xy[j][2]})
  46. end
  47. end
  48. xlua.progress(i, #x)
  49. collectgarbage()
  50. end
  51. local new_data = {}
  52. local perm = torch.randperm(#data)
  53. for i = 1, perm:size(1) do
  54. new_data[i] = data[perm[i]]
  55. end
  56. data = new_data
  57. return data
  58. end
  59. local function validate(model, criterion, eval_metric, data, batch_size)
  60. local loss = 0
  61. local mse = 0
  62. local loss_count = 0
  63. local inputs_tmp = torch.Tensor(batch_size,
  64. data[1].x:size(1),
  65. data[1].x:size(2),
  66. data[1].x:size(3)):zero()
  67. local targets_tmp = torch.Tensor(batch_size,
  68. data[1].y:size(1),
  69. data[1].y:size(2),
  70. data[1].y:size(3)):zero()
  71. local inputs = inputs_tmp:clone():cuda()
  72. local targets = targets_tmp:clone():cuda()
  73. for t = 1, #data, batch_size do
  74. if t + batch_size -1 > #data then
  75. break
  76. end
  77. for i = 1, batch_size do
  78. inputs_tmp[i]:copy(data[t + i - 1].x)
  79. targets_tmp[i]:copy(data[t + i - 1].y)
  80. end
  81. inputs:copy(inputs_tmp)
  82. targets:copy(targets_tmp)
  83. local z = model:forward(inputs)
  84. loss = loss + criterion:forward(z, targets)
  85. mse = mse + eval_metric:forward(z, targets)
  86. loss_count = loss_count + 1
  87. if loss_count % 10 == 0 then
  88. xlua.progress(t, #data)
  89. collectgarbage()
  90. end
  91. end
  92. xlua.progress(#data, #data)
  93. return {loss = loss / loss_count, MSE = mse / loss_count, PSNR = 10 * math.log10(1 / (mse / loss_count))}
  94. end
  95. local function create_criterion(model)
  96. if reconstruct.is_rgb(model) then
  97. local offset = reconstruct.offset_size(model)
  98. local output_w = settings.crop_size - offset * 2
  99. local weight = torch.Tensor(3, output_w * output_w)
  100. weight[1]:fill(0.29891 * 3) -- R
  101. weight[2]:fill(0.58661 * 3) -- G
  102. weight[3]:fill(0.11448 * 3) -- B
  103. return w2nn.ClippedWeightedHuberCriterion(weight, 0.1, {0.0, 1.0}):cuda()
  104. else
  105. local offset = reconstruct.offset_size(model)
  106. local output_w = settings.crop_size - offset * 2
  107. local weight = torch.Tensor(1, output_w * output_w)
  108. weight[1]:fill(1.0)
  109. return w2nn.ClippedWeightedHuberCriterion(weight, 0.1, {0.0, 1.0}):cuda()
  110. end
  111. end
  112. local function transformer(model, x, is_validation, n, offset)
  113. x = compression.decompress(x)
  114. n = n or settings.patches
  115. if is_validation == nil then is_validation = false end
  116. local random_color_noise_rate = nil
  117. local random_overlay_rate = nil
  118. local active_cropping_rate = nil
  119. local active_cropping_tries = nil
  120. if is_validation then
  121. active_cropping_rate = settings.active_cropping_rate
  122. active_cropping_tries = settings.active_cropping_tries
  123. random_color_noise_rate = 0.0
  124. random_overlay_rate = 0.0
  125. else
  126. active_cropping_rate = settings.active_cropping_rate
  127. active_cropping_tries = settings.active_cropping_tries
  128. random_color_noise_rate = settings.random_color_noise_rate
  129. random_overlay_rate = settings.random_overlay_rate
  130. end
  131. if settings.method == "scale" then
  132. return pairwise_transform.scale(x,
  133. settings.scale,
  134. settings.crop_size, offset,
  135. n,
  136. {
  137. downsampling_filters = settings.downsampling_filters,
  138. upsampling_filter = settings.upsampling_filter,
  139. random_half_rate = settings.random_half_rate,
  140. random_color_noise_rate = random_color_noise_rate,
  141. random_overlay_rate = random_overlay_rate,
  142. random_unsharp_mask_rate = settings.random_unsharp_mask_rate,
  143. max_size = settings.max_size,
  144. active_cropping_rate = active_cropping_rate,
  145. active_cropping_tries = active_cropping_tries,
  146. rgb = (settings.color == "rgb"),
  147. gamma_correction = settings.gamma_correction,
  148. x_upsampling = not reconstruct.has_resize(model),
  149. resize_blur_min = settings.resize_blur_min,
  150. resize_blur_max = settings.resize_blur_max,
  151. })
  152. elseif settings.method == "noise" then
  153. return pairwise_transform.jpeg(x,
  154. settings.style,
  155. settings.noise_level,
  156. settings.crop_size, offset,
  157. n,
  158. {
  159. random_half_rate = settings.random_half_rate,
  160. random_color_noise_rate = random_color_noise_rate,
  161. random_overlay_rate = random_overlay_rate,
  162. random_unsharp_mask_rate = settings.random_unsharp_mask_rate,
  163. max_size = settings.max_size,
  164. jpeg_chroma_subsampling_rate = settings.jpeg_chroma_subsampling_rate,
  165. active_cropping_rate = active_cropping_rate,
  166. active_cropping_tries = active_cropping_tries,
  167. nr_rate = settings.nr_rate,
  168. rgb = (settings.color == "rgb")
  169. })
  170. end
  171. end
  172. local function resampling(x, y, train_x, transformer, input_size, target_size)
  173. local c = 1
  174. local shuffle = torch.randperm(#train_x)
  175. for t = 1, #train_x do
  176. xlua.progress(t, #train_x)
  177. local xy = transformer(train_x[shuffle[t]], false, settings.patches)
  178. for i = 1, #xy do
  179. x[c]:copy(xy[i][1])
  180. y[c]:copy(xy[i][2])
  181. c = c + 1
  182. if c > x:size(1) then
  183. break
  184. end
  185. end
  186. if c > x:size(1) then
  187. break
  188. end
  189. if t % 50 == 0 then
  190. collectgarbage()
  191. end
  192. end
  193. xlua.progress(#train_x, #train_x)
  194. end
  195. local function get_oracle_data(x, y, instance_loss, k, samples)
  196. local index = torch.LongTensor(instance_loss:size(1))
  197. local dummy = torch.Tensor(instance_loss:size(1))
  198. torch.topk(dummy, index, instance_loss, k, 1, true)
  199. print("average loss: " ..instance_loss:mean() .. ", average oracle loss: " .. dummy:mean())
  200. local shuffle = torch.randperm(k)
  201. local x_s = x:size()
  202. local y_s = y:size()
  203. x_s[1] = samples
  204. y_s[1] = samples
  205. local oracle_x = torch.Tensor(table.unpack(torch.totable(x_s)))
  206. local oracle_y = torch.Tensor(table.unpack(torch.totable(y_s)))
  207. for i = 1, samples do
  208. oracle_x[i]:copy(x[index[shuffle[i]]])
  209. oracle_y[i]:copy(y[index[shuffle[i]]])
  210. end
  211. return oracle_x, oracle_y
  212. end
  213. local function remove_small_image(x)
  214. local new_x = {}
  215. for i = 1, #x do
  216. local x_s = compression.size(x[i])
  217. if x_s[2] / settings.scale > settings.crop_size + 16 and
  218. x_s[3] / settings.scale > settings.crop_size + 16 then
  219. table.insert(new_x, x[i])
  220. end
  221. if i % 100 == 0 then
  222. collectgarbage()
  223. end
  224. end
  225. print(string.format("removed %d small images", #x - #new_x))
  226. return new_x
  227. end
  228. local function plot(train, valid)
  229. gnuplot.plot({
  230. {'training', torch.Tensor(train), '-'},
  231. {'validation', torch.Tensor(valid), '-'}})
  232. end
  233. local function train()
  234. local hist_train = {}
  235. local hist_valid = {}
  236. local model = srcnn.create(settings.model, settings.backend, settings.color)
  237. local offset = reconstruct.offset_size(model)
  238. local pairwise_func = function(x, is_validation, n)
  239. return transformer(model, x, is_validation, n, offset)
  240. end
  241. local criterion = create_criterion(model)
  242. local eval_metric = w2nn.ClippedMSECriterion(0, 1):cuda()
  243. local x = remove_small_image(torch.load(settings.images))
  244. local train_x, valid_x = split_data(x, math.max(math.floor(settings.validation_rate * #x), 1))
  245. local adam_config = {
  246. learningRate = settings.learning_rate,
  247. xBatchSize = settings.batch_size,
  248. }
  249. local lrd_count = 0
  250. local ch = nil
  251. if settings.color == "y" then
  252. ch = 1
  253. elseif settings.color == "rgb" then
  254. ch = 3
  255. end
  256. local best_score = 1000.0
  257. print("# make validation-set")
  258. local valid_xy = make_validation_set(valid_x, pairwise_func,
  259. settings.validation_crops,
  260. settings.patches)
  261. valid_x = nil
  262. collectgarbage()
  263. model:cuda()
  264. print("load .. " .. #train_x)
  265. local x = nil
  266. local y = torch.Tensor(settings.patches * #train_x,
  267. ch * (settings.crop_size - offset * 2) * (settings.crop_size - offset * 2)):zero()
  268. if reconstruct.has_resize(model) then
  269. x = torch.Tensor(settings.patches * #train_x,
  270. ch, settings.crop_size / settings.scale, settings.crop_size / settings.scale)
  271. else
  272. x = torch.Tensor(settings.patches * #train_x,
  273. ch, settings.crop_size, settings.crop_size)
  274. end
  275. local instance_loss = nil
  276. for epoch = 1, settings.epoch do
  277. model:training()
  278. print("# " .. epoch)
  279. print("## resampling")
  280. if instance_loss then
  281. -- active learning
  282. local oracle_k = math.min(x:size(1) * (settings.oracle_rate * (1 / (1 - settings.oracle_drop_rate))), x:size(1))
  283. local oracle_n = math.min(x:size(1) * settings.oracle_rate, x:size(1))
  284. if oracle_n > 0 then
  285. local oracle_x, oracle_y = get_oracle_data(x, y, instance_loss, oracle_k, oracle_n)
  286. resampling(x, y, train_x, pairwise_func)
  287. x:narrow(1, 1, oracle_x:size(1)):copy(oracle_x)
  288. y:narrow(1, 1, oracle_y:size(1)):copy(oracle_y)
  289. else
  290. resampling(x, y, train_x, pairwise_func)
  291. end
  292. else
  293. resampling(x, y, train_x, pairwise_func)
  294. end
  295. collectgarbage()
  296. instance_loss = torch.Tensor(x:size(1)):zero()
  297. for i = 1, settings.inner_epoch do
  298. model:training()
  299. local train_score, il = minibatch_adam(model, criterion, eval_metric, x, y, adam_config)
  300. instance_loss:copy(il)
  301. print(train_score)
  302. model:evaluate()
  303. print("# validation")
  304. local score = validate(model, criterion, eval_metric, valid_xy, adam_config.xBatchSize)
  305. table.insert(hist_train, train_score.loss)
  306. table.insert(hist_valid, score.loss)
  307. if settings.plot then
  308. plot(hist_train, hist_valid)
  309. end
  310. if score.loss < best_score then
  311. local test_image = image_loader.load_float(settings.test) -- reload
  312. lrd_count = 0
  313. best_score = score.loss
  314. print("* update best model")
  315. if settings.save_history then
  316. torch.save(string.format(settings.model_file, epoch, i), model:clearState(), "ascii")
  317. if settings.method == "noise" then
  318. local log = path.join(settings.model_dir,
  319. ("noise%d_best.%d-%d.png"):format(settings.noise_level,
  320. epoch, i))
  321. save_test_jpeg(model, test_image, log)
  322. elseif settings.method == "scale" then
  323. local log = path.join(settings.model_dir,
  324. ("scale%.1f_best.%d-%d.png"):format(settings.scale,
  325. epoch, i))
  326. save_test_scale(model, test_image, log)
  327. end
  328. else
  329. torch.save(settings.model_file, model:clearState(), "ascii")
  330. if settings.method == "noise" then
  331. local log = path.join(settings.model_dir,
  332. ("noise%d_best.png"):format(settings.noise_level))
  333. save_test_jpeg(model, test_image, log)
  334. elseif settings.method == "scale" then
  335. local log = path.join(settings.model_dir,
  336. ("scale%.1f_best.png"):format(settings.scale))
  337. save_test_scale(model, test_image, log)
  338. end
  339. end
  340. else
  341. lrd_count = lrd_count + 1
  342. if lrd_count > 2 then
  343. adam_config.learningRate = adam_config.learningRate * 0.874
  344. print("* learning rate decay: " .. adam_config.learningRate)
  345. lrd_count = 0
  346. end
  347. end
  348. print("PSNR: " .. score.PSNR .. ", loss: " .. score.loss .. ", Minimum loss: " .. best_score)
  349. collectgarbage()
  350. end
  351. end
  352. end
  353. if settings.gpu > 0 then
  354. cutorch.setDevice(settings.gpu)
  355. end
  356. torch.manualSeed(settings.seed)
  357. cutorch.manualSeed(settings.seed)
  358. print(settings)
  359. train()