train.lua 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583
  1. require 'pl'
  2. local __FILE__ = (function() return string.gsub(debug.getinfo(2, 'S').source, "^@", "") end)()
  3. package.path = path.join(path.dirname(__FILE__), "lib", "?.lua;") .. package.path
  4. require 'optim'
  5. require 'xlua'
  6. require 'image'
  7. require 'w2nn'
  8. local threads = require 'threads'
  9. local settings = require 'settings'
  10. local srcnn = require 'srcnn'
  11. local minibatch_adam = require 'minibatch_adam'
  12. local iproc = require 'iproc'
  13. local reconstruct = require 'reconstruct'
  14. local image_loader = require 'image_loader'
  15. local function save_test_scale(model, rgb, file)
  16. local up = reconstruct.scale(model, settings.scale, rgb)
  17. image.save(file, up)
  18. end
  19. local function save_test_jpeg(model, rgb, file)
  20. local im, count = reconstruct.image(model, rgb)
  21. image.save(file, im)
  22. end
  23. local function save_test_user(model, rgb, file)
  24. if settings.scale == 1 then
  25. save_test_jpeg(model, rgb, file)
  26. else
  27. save_test_scale(model, rgb, file)
  28. end
  29. end
  30. local function split_data(x, test_size)
  31. local index = torch.randperm(#x)
  32. local train_size = #x - test_size
  33. local train_x = {}
  34. local valid_x = {}
  35. for i = 1, train_size do
  36. train_x[i] = x[index[i]]
  37. end
  38. for i = 1, test_size do
  39. valid_x[i] = x[index[train_size + i]]
  40. end
  41. return train_x, valid_x
  42. end
  43. local g_transform_pool = nil
  44. local g_mutex = nil
  45. local g_mutex_id = nil
  46. local function transform_pool_init(has_resize, offset)
  47. local nthread = torch.getnumthreads()
  48. if (settings.thread > 0) then
  49. nthread = settings.thread
  50. end
  51. g_mutex = threads.Mutex()
  52. g_mutex_id = g_mutex:id()
  53. g_transform_pool = threads.Threads(
  54. nthread,
  55. threads.safe(
  56. function(threadid)
  57. require 'pl'
  58. local __FILE__ = (function() return string.gsub(debug.getinfo(2, 'S').source, "^@", "") end)()
  59. package.path = path.join(path.dirname(__FILE__), "lib", "?.lua;") .. package.path
  60. require 'torch'
  61. require 'nn'
  62. require 'cunn'
  63. torch.setnumthreads(1)
  64. torch.setdefaulttensortype("torch.FloatTensor")
  65. local threads = require 'threads'
  66. local compression = require 'compression'
  67. local pairwise_transform = require 'pairwise_transform'
  68. function transformer(x, is_validation, n)
  69. local mutex = threads.Mutex(g_mutex_id)
  70. local meta = {data = {}}
  71. local y = nil
  72. if type(x) == "table" and type(x[2]) == "table" then
  73. meta = x[2]
  74. if x[1].x and x[1].y then
  75. y = compression.decompress(x[1].y)
  76. x = compression.decompress(x[1].x)
  77. else
  78. x = compression.decompress(x[1])
  79. end
  80. else
  81. x = compression.decompress(x)
  82. end
  83. n = n or settings.patches
  84. if is_validation == nil then is_validation = false end
  85. local random_color_noise_rate = nil
  86. local random_overlay_rate = nil
  87. local active_cropping_rate = nil
  88. local active_cropping_tries = nil
  89. if is_validation then
  90. active_cropping_rate = settings.active_cropping_rate
  91. active_cropping_tries = settings.active_cropping_tries
  92. random_color_noise_rate = 0.0
  93. random_overlay_rate = 0.0
  94. else
  95. active_cropping_rate = settings.active_cropping_rate
  96. active_cropping_tries = settings.active_cropping_tries
  97. random_color_noise_rate = settings.random_color_noise_rate
  98. random_overlay_rate = settings.random_overlay_rate
  99. end
  100. if settings.method == "scale" then
  101. local conf = tablex.update({
  102. mutex = mutex,
  103. downsampling_filters = settings.downsampling_filters,
  104. random_half_rate = settings.random_half_rate,
  105. random_color_noise_rate = random_color_noise_rate,
  106. random_overlay_rate = random_overlay_rate,
  107. random_unsharp_mask_rate = settings.random_unsharp_mask_rate,
  108. random_blur_rate = settings.random_blur_rate,
  109. random_blur_size = settings.random_blur_size,
  110. random_blur_sigma_min = settings.random_blur_sigma_min,
  111. random_blur_sigma_max = settings.random_blur_sigma_max,
  112. max_size = settings.max_size,
  113. active_cropping_rate = active_cropping_rate,
  114. active_cropping_tries = active_cropping_tries,
  115. rgb = (settings.color == "rgb"),
  116. x_upsampling = not has_resize,
  117. resize_blur_min = settings.resize_blur_min,
  118. resize_blur_max = settings.resize_blur_max}, meta)
  119. return pairwise_transform.scale(x,
  120. settings.scale,
  121. settings.crop_size, offset,
  122. n, conf)
  123. elseif settings.method == "noise" then
  124. local conf = tablex.update({
  125. mutex = mutex,
  126. random_half_rate = settings.random_half_rate,
  127. random_color_noise_rate = random_color_noise_rate,
  128. random_overlay_rate = random_overlay_rate,
  129. random_unsharp_mask_rate = settings.random_unsharp_mask_rate,
  130. random_blur_rate = settings.random_blur_rate,
  131. random_blur_size = settings.random_blur_size,
  132. random_blur_sigma_min = settings.random_blur_sigma_min,
  133. random_blur_sigma_max = settings.random_blur_sigma_max,
  134. max_size = settings.max_size,
  135. jpeg_chroma_subsampling_rate = settings.jpeg_chroma_subsampling_rate,
  136. active_cropping_rate = active_cropping_rate,
  137. active_cropping_tries = active_cropping_tries,
  138. nr_rate = settings.nr_rate,
  139. rgb = (settings.color == "rgb")}, meta)
  140. return pairwise_transform.jpeg(x,
  141. settings.style,
  142. settings.noise_level,
  143. settings.crop_size, offset,
  144. n, conf)
  145. elseif settings.method == "noise_scale" then
  146. local conf = tablex.update({
  147. mutex = mutex,
  148. downsampling_filters = settings.downsampling_filters,
  149. random_half_rate = settings.random_half_rate,
  150. random_color_noise_rate = random_color_noise_rate,
  151. random_overlay_rate = random_overlay_rate,
  152. random_unsharp_mask_rate = settings.random_unsharp_mask_rate,
  153. random_blur_rate = settings.random_blur_rate,
  154. random_blur_size = settings.random_blur_size,
  155. random_blur_sigma_min = settings.random_blur_sigma_min,
  156. random_blur_sigma_max = settings.random_blur_sigma_max,
  157. max_size = settings.max_size,
  158. jpeg_chroma_subsampling_rate = settings.jpeg_chroma_subsampling_rate,
  159. nr_rate = settings.nr_rate,
  160. active_cropping_rate = active_cropping_rate,
  161. active_cropping_tries = active_cropping_tries,
  162. rgb = (settings.color == "rgb"),
  163. x_upsampling = not has_resize,
  164. resize_blur_min = settings.resize_blur_min,
  165. resize_blur_max = settings.resize_blur_max}, meta)
  166. return pairwise_transform.jpeg_scale(x,
  167. settings.scale,
  168. settings.style,
  169. settings.noise_level,
  170. settings.crop_size, offset,
  171. n, conf)
  172. elseif settings.method == "user" then
  173. if is_validation == nil then is_validation = false end
  174. local rotate_rate = nil
  175. local scale_rate = nil
  176. local negate_rate = nil
  177. local negate_x_rate = nil
  178. if is_validation then
  179. rotate_rate = 0
  180. scale_rate = 0
  181. negate_rate = 0
  182. negate_x_rate = 0
  183. else
  184. rotate_rate = settings.random_pairwise_rotate_rate
  185. scale_rate = settings.random_pairwise_scale_rate
  186. negate_rate = settings.random_pairwise_negate_rate
  187. negate_x_rate = settings.random_pairwise_negate_x_rate
  188. end
  189. local conf = tablex.update({
  190. max_size = settings.max_size,
  191. active_cropping_rate = active_cropping_rate,
  192. active_cropping_tries = active_cropping_tries,
  193. random_pairwise_rotate_rate = rotate_rate,
  194. random_pairwise_rotate_min = settings.random_pairwise_rotate_min,
  195. random_pairwise_rotate_max = settings.random_pairwise_rotate_max,
  196. random_pairwise_scale_rate = scale_rate,
  197. random_pairwise_scale_min = settings.random_pairwise_scale_min,
  198. random_pairwise_scale_max = settings.random_pairwise_scale_max,
  199. random_pairwise_negate_rate = negate_rate,
  200. random_pairwise_negate_x_rate = negate_x_rate,
  201. pairwise_y_binary = settings.pairwise_y_binary,
  202. pairwise_flip = settings.pairwise_flip,
  203. rgb = (settings.color == "rgb")}, meta)
  204. return pairwise_transform.user(x, y,
  205. settings.crop_size, offset,
  206. n, conf)
  207. end
  208. end
  209. end)
  210. )
  211. g_transform_pool:synchronize()
  212. end
  213. local function make_validation_set(x, n, patches)
  214. local nthread = torch.getnumthreads()
  215. if (settings.thread > 0) then
  216. nthread = settings.thread
  217. end
  218. n = n or 4
  219. local validation_patches = math.min(16, patches or 16)
  220. local data = {}
  221. g_transform_pool:synchronize()
  222. torch.setnumthreads(1) -- 1
  223. for i = 1, #x do
  224. for k = 1, math.max(n / validation_patches, 1) do
  225. local input = x[i]
  226. g_transform_pool:addjob(
  227. function()
  228. local xy = transformer(input, true, validation_patches)
  229. return xy
  230. end,
  231. function(xy)
  232. for j = 1, #xy do
  233. table.insert(data, {x = xy[j][1], y = xy[j][2]})
  234. end
  235. end
  236. )
  237. end
  238. if i % 20 == 0 then
  239. collectgarbage()
  240. g_transform_pool:synchronize()
  241. xlua.progress(i, #x)
  242. end
  243. end
  244. g_transform_pool:synchronize()
  245. torch.setnumthreads(nthread) -- revert
  246. local new_data = {}
  247. local perm = torch.randperm(#data)
  248. for i = 1, perm:size(1) do
  249. new_data[i] = data[perm[i]]
  250. end
  251. data = new_data
  252. return data
  253. end
  254. local function validate(model, criterion, eval_metric, data, batch_size)
  255. local loss = 0
  256. local mse = 0
  257. local loss_count = 0
  258. local inputs_tmp = torch.Tensor(batch_size,
  259. data[1].x:size(1),
  260. data[1].x:size(2),
  261. data[1].x:size(3)):zero()
  262. local targets_tmp = torch.Tensor(batch_size,
  263. data[1].y:size(1),
  264. data[1].y:size(2),
  265. data[1].y:size(3)):zero()
  266. local inputs = inputs_tmp:clone():cuda()
  267. local targets = targets_tmp:clone():cuda()
  268. for t = 1, #data, batch_size do
  269. if t + batch_size -1 > #data then
  270. break
  271. end
  272. for i = 1, batch_size do
  273. inputs_tmp[i]:copy(data[t + i - 1].x)
  274. targets_tmp[i]:copy(data[t + i - 1].y)
  275. end
  276. inputs:copy(inputs_tmp)
  277. targets:copy(targets_tmp)
  278. local z = model:forward(inputs)
  279. loss = loss + criterion:forward(z, targets)
  280. mse = mse + eval_metric:forward(z, targets)
  281. loss_count = loss_count + 1
  282. if loss_count % 10 == 0 then
  283. xlua.progress(t, #data)
  284. collectgarbage()
  285. end
  286. end
  287. xlua.progress(#data, #data)
  288. return {loss = loss / loss_count, MSE = mse / loss_count, PSNR = 10 * math.log10(1 / (mse / loss_count))}
  289. end
  290. local function create_criterion(model)
  291. if reconstruct.is_rgb(model) then
  292. local offset = reconstruct.offset_size(model)
  293. local output_w = settings.crop_size - offset * 2
  294. local weight = torch.Tensor(3, output_w * output_w)
  295. weight[1]:fill(0.29891 * 3) -- R
  296. weight[2]:fill(0.58661 * 3) -- G
  297. weight[3]:fill(0.11448 * 3) -- B
  298. return w2nn.ClippedWeightedHuberCriterion(weight, 0.1, {0.0, 1.0}):cuda()
  299. else
  300. local offset = reconstruct.offset_size(model)
  301. local output_w = settings.crop_size - offset * 2
  302. local weight = torch.Tensor(1, output_w * output_w)
  303. weight[1]:fill(1.0)
  304. return w2nn.ClippedWeightedHuberCriterion(weight, 0.1, {0.0, 1.0}):cuda()
  305. end
  306. end
  307. local function resampling(x, y, train_x)
  308. local c = 1
  309. local shuffle = torch.randperm(#train_x)
  310. local nthread = torch.getnumthreads()
  311. if (settings.thread > 0) then
  312. nthread = settings.thread
  313. end
  314. torch.setnumthreads(1) -- 1
  315. for t = 1, #train_x do
  316. local input = train_x[shuffle[t]]
  317. g_transform_pool:addjob(
  318. function()
  319. local xy = transformer(input, false, settings.patches)
  320. return xy
  321. end,
  322. function(xy)
  323. for i = 1, #xy do
  324. if c <= x:size(1) then
  325. x[c]:copy(xy[i][1])
  326. y[c]:copy(xy[i][2])
  327. c = c + 1
  328. else
  329. break
  330. end
  331. end
  332. end
  333. )
  334. if t % 50 == 0 then
  335. collectgarbage()
  336. g_transform_pool:synchronize()
  337. xlua.progress(t, #train_x)
  338. end
  339. if c > x:size(1) then
  340. break
  341. end
  342. end
  343. g_transform_pool:synchronize()
  344. xlua.progress(#train_x, #train_x)
  345. torch.setnumthreads(nthread) -- revert
  346. end
  347. local function get_oracle_data(x, y, instance_loss, k, samples)
  348. local index = torch.LongTensor(instance_loss:size(1))
  349. local dummy = torch.Tensor(instance_loss:size(1))
  350. torch.topk(dummy, index, instance_loss, k, 1, true)
  351. print("MSE of all data: " ..instance_loss:mean() .. ", MSE of oracle data: " .. dummy:mean())
  352. local shuffle = torch.randperm(k)
  353. local x_s = x:size()
  354. local y_s = y:size()
  355. x_s[1] = samples
  356. y_s[1] = samples
  357. local oracle_x = torch.Tensor(table.unpack(torch.totable(x_s)))
  358. local oracle_y = torch.Tensor(table.unpack(torch.totable(y_s)))
  359. for i = 1, samples do
  360. oracle_x[i]:copy(x[index[shuffle[i]]])
  361. oracle_y[i]:copy(y[index[shuffle[i]]])
  362. end
  363. return oracle_x, oracle_y
  364. end
  365. local function remove_small_image(x)
  366. local compression = require 'compression'
  367. local new_x = {}
  368. for i = 1, #x do
  369. local xe, meta, x_s
  370. xe = x[i]
  371. if type(x) == "table" and type(x[2]) == "table" then
  372. if xe[1].x and xe[1].y then
  373. x_s = compression.size(xe[1].y) -- y size
  374. else
  375. x_s = compression.size(xe[1])
  376. end
  377. else
  378. x_s = compression.size(xe)
  379. end
  380. if x_s[2] / settings.scale > settings.crop_size + 32 and
  381. x_s[3] / settings.scale > settings.crop_size + 32 then
  382. table.insert(new_x, x[i])
  383. end
  384. if i % 100 == 0 then
  385. collectgarbage()
  386. end
  387. end
  388. print(string.format("%d small images are removed", #x - #new_x))
  389. return new_x
  390. end
  391. local function plot(train, valid)
  392. gnuplot.plot({
  393. {'training', torch.Tensor(train), '-'},
  394. {'validation', torch.Tensor(valid), '-'}})
  395. end
  396. local function train()
  397. local x = remove_small_image(torch.load(settings.images))
  398. local train_x, valid_x = split_data(x, math.max(math.floor(settings.validation_rate * #x), 1))
  399. local hist_train = {}
  400. local hist_valid = {}
  401. local model
  402. if settings.resume:len() > 0 then
  403. model = torch.load(settings.resume, "ascii")
  404. else
  405. model = srcnn.create(settings.model, settings.backend, settings.color)
  406. end
  407. if model.w2nn_input_size then
  408. if settings.crop_size ~= model.w2nn_input_size then
  409. io.stderr:write(string.format("warning: crop_size is replaced with %d\n",
  410. model.w2nn_input_size))
  411. settings.crop_size = model.w2nn_input_size
  412. end
  413. end
  414. dir.makepath(settings.model_dir)
  415. local offset = reconstruct.offset_size(model)
  416. transform_pool_init(reconstruct.has_resize(model), offset)
  417. local criterion = create_criterion(model)
  418. local eval_metric = w2nn.ClippedMSECriterion(0, 1):cuda()
  419. local adam_config = {
  420. xLearningRate = settings.learning_rate,
  421. xBatchSize = settings.batch_size,
  422. xLearningRateDecay = settings.learning_rate_decay,
  423. xInstanceLoss = (settings.oracle_rate > 0)
  424. }
  425. local ch = nil
  426. if settings.color == "y" then
  427. ch = 1
  428. elseif settings.color == "rgb" then
  429. ch = 3
  430. end
  431. local best_score = 1000.0
  432. print("# make validation-set")
  433. local valid_xy = make_validation_set(valid_x,
  434. settings.validation_crops,
  435. settings.patches)
  436. valid_x = nil
  437. collectgarbage()
  438. model:cuda()
  439. print("load .. " .. #train_x)
  440. local x = nil
  441. local y = torch.Tensor(settings.patches * #train_x,
  442. ch * (settings.crop_size - offset * 2) * (settings.crop_size - offset * 2)):zero()
  443. if reconstruct.has_resize(model) then
  444. x = torch.Tensor(settings.patches * #train_x,
  445. ch, settings.crop_size / settings.scale, settings.crop_size / settings.scale)
  446. else
  447. x = torch.Tensor(settings.patches * #train_x,
  448. ch, settings.crop_size, settings.crop_size)
  449. end
  450. local instance_loss = nil
  451. for epoch = 1, settings.epoch do
  452. model:training()
  453. print("# " .. epoch)
  454. if adam_config.learningRate then
  455. print("learning rate: " .. adam_config.learningRate)
  456. end
  457. print("## resampling")
  458. if instance_loss then
  459. -- active learning
  460. local oracle_k = math.min(x:size(1) * (settings.oracle_rate * (1 / (1 - settings.oracle_drop_rate))), x:size(1))
  461. local oracle_n = math.min(x:size(1) * settings.oracle_rate, x:size(1))
  462. if oracle_n > 0 then
  463. local oracle_x, oracle_y = get_oracle_data(x, y, instance_loss, oracle_k, oracle_n)
  464. resampling(x:narrow(1, oracle_x:size(1) + 1, x:size(1)-oracle_x:size(1)),
  465. y:narrow(1, oracle_x:size(1) + 1, x:size(1) - oracle_x:size(1)), train_x)
  466. x:narrow(1, 1, oracle_x:size(1)):copy(oracle_x)
  467. y:narrow(1, 1, oracle_y:size(1)):copy(oracle_y)
  468. local draw_n = math.floor(math.sqrt(oracle_x:size(1), 0.5))
  469. if draw_n > 100 then
  470. draw_n = 100
  471. end
  472. image.save(path.join(settings.model_dir, "oracle_x.png"),
  473. image.toDisplayTensor({
  474. input = oracle_x:narrow(1, 1, draw_n * draw_n),
  475. padding = 2,
  476. nrow = draw_n,
  477. min = 0,
  478. max = 1}))
  479. else
  480. resampling(x, y, train_x)
  481. end
  482. else
  483. resampling(x, y, train_x, pairwise_func)
  484. end
  485. collectgarbage()
  486. instance_loss = torch.Tensor(x:size(1)):zero()
  487. for i = 1, settings.inner_epoch do
  488. model:training()
  489. local train_score, il = minibatch_adam(model, criterion, eval_metric, x, y, adam_config)
  490. instance_loss:copy(il)
  491. print(train_score)
  492. model:evaluate()
  493. print("# validation")
  494. local score = validate(model, criterion, eval_metric, valid_xy, adam_config.xBatchSize)
  495. table.insert(hist_train, train_score.loss)
  496. table.insert(hist_valid, score.loss)
  497. if settings.plot then
  498. plot(hist_train, hist_valid)
  499. end
  500. if score.MSE < best_score then
  501. local test_image = image_loader.load_float(settings.test) -- reload
  502. best_score = score.MSE
  503. print("* model has updated")
  504. if settings.save_history then
  505. torch.save(settings.model_file_best, model:clearState(), "ascii")
  506. torch.save(string.format(settings.model_file, epoch, i), model:clearState(), "ascii")
  507. if settings.method == "noise" then
  508. local log = path.join(settings.model_dir,
  509. ("noise%d_best.%d-%d.png"):format(settings.noise_level,
  510. epoch, i))
  511. save_test_jpeg(model, test_image, log)
  512. elseif settings.method == "scale" then
  513. local log = path.join(settings.model_dir,
  514. ("scale%.1f_best.%d-%d.png"):format(settings.scale,
  515. epoch, i))
  516. save_test_scale(model, test_image, log)
  517. elseif settings.method == "noise_scale" then
  518. local log = path.join(settings.model_dir,
  519. ("noise%d_scale%.1f_best.%d-%d.png"):format(settings.noise_level,
  520. settings.scale,
  521. epoch, i))
  522. save_test_scale(model, test_image, log)
  523. elseif settings.method == "user" then
  524. local log = path.join(settings.model_dir,
  525. ("%s_best.%d-%d.png"):format(settings.name,
  526. epoch, i))
  527. save_test_user(model, test_image, log)
  528. end
  529. else
  530. torch.save(settings.model_file, model:clearState(), "ascii")
  531. if settings.method == "noise" then
  532. local log = path.join(settings.model_dir,
  533. ("noise%d_best.png"):format(settings.noise_level))
  534. save_test_jpeg(model, test_image, log)
  535. elseif settings.method == "scale" then
  536. local log = path.join(settings.model_dir,
  537. ("scale%.1f_best.png"):format(settings.scale))
  538. save_test_scale(model, test_image, log)
  539. elseif settings.method == "noise_scale" then
  540. local log = path.join(settings.model_dir,
  541. ("noise%d_scale%.1f_best.png"):format(settings.noise_level,
  542. settings.scale))
  543. save_test_scale(model, test_image, log)
  544. elseif settings.method == "user" then
  545. local log = path.join(settings.model_dir,
  546. ("%s_best.png"):format(settings.name))
  547. save_test_user(model, test_image, log)
  548. end
  549. end
  550. end
  551. print("Batch-wise PSNR: " .. score.PSNR .. ", loss: " .. score.loss .. ", MSE: " .. score.MSE .. ", Minimum MSE: " .. best_score)
  552. collectgarbage()
  553. end
  554. end
  555. end
  556. if settings.gpu > 0 then
  557. cutorch.setDevice(settings.gpu)
  558. end
  559. torch.manualSeed(settings.seed)
  560. cutorch.manualSeed(settings.seed)
  561. print(settings)
  562. train()