From 84712a1740b154bfe467726f5778f5629c9724df Mon Sep 17 00:00:00 2001
From: Danila Fedorin <danila.fedorin@gmail.com>
Date: Sun, 22 Dec 2019 22:24:34 -0800
Subject: [PATCH] Initial model (broken)

---
 model.py | 155 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 155 insertions(+)
 create mode 100644 model.py

diff --git a/model.py b/model.py
new file mode 100644
index 0000000..79304a9
--- /dev/null
+++ b/model.py
@@ -0,0 +1,155 @@
+import datetime
+import csv
+import os
+import tensorflow as tf
+from keras import *
+from keras.layers import *
+from keras.backend import *
+from keras.optimizers import *
+from keras.preprocessing.image import *
+from keras.callbacks import TensorBoard
+
+class SimpleCNN(object):
+    def __init__(self):
+        self.image_size = 512
+        self.classes = 6
+        self.squares = 8
+        self.boxes = 2
+        self.build_model()
+
+    def iou(self, box_one, box_two):
+        def op_func(combine, compute):
+            return combine(compute(box_one), compute(box_two))
+
+        i_x1 = op_func(maximum, lambda b: b[0]-b[2]/2)
+        i_y1 = op_func(maximum, lambda b: b[1]-b[3]/2)
+        i_x2 = op_func(minimum, lambda b: b[0]+b[2]/2)
+        i_y2 = op_func(minimum, lambda b: b[1]+b[3]/2)
+
+        area_1 = box_one[2]*box_one[3]
+        area_2 = box_two[2]*box_one[3]
+        intersection_area = (i_x2-i_x1)*(i_y2-i_y1)
+
+        return intersection_area/(area_1+area_2-intersection_area+.01)
+
+    def cost(self, truth_tensor, output_tensor):
+        def input_output_tensor(f):
+            def per_output_tensor(output):
+                return tf.convert_to_tensor(
+                        [tf.map_fn(
+                                lambda truth: f(output[i*5:i*5+5],truth[0:5]),
+                                truth_tensor)
+                            for i in range(2)])
+
+            return tf.map_fn(per_output_tensor, output_tensor)
+
+        # Compute per object IOU values for each square, for each box.
+        ious = input_output_tensor(self.iou)
+
+        # Compute the minimum IOS per object.
+        min_class_ious = min(min(ious, axis=0), axis=0)
+
+        # Compute the minimum IOS per object.
+        max_class_ious = max(max(ious, axis=0), axis=0)
+
+        # Whether each box of each square is responsible for
+        # the minimum IOU. This is used for penalizing object absense.
+        eq_min_box = tf.map_fn(lambda iou:
+            tf.convert_to_tensor([equal(iou[j], min_class_ious) for j in range(2)]), ious, dtype='bool')
+        # Same as above, but per-square rather than per-box.
+        eq_min_square= any(eq_min_box, axis=1)
+
+        # Whether each box of each square is responsible
+        # for the maximum IOU. This is used for penalizing
+        # incorrect bounds and confidence.
+        eq_max_box = tf.map_fn(lambda iou:
+            tf.convert_to_tensor([equal(iou[j], max_class_ious) for j in range(2)]), ious, dtype='bool')
+        # Same as above, but per-square. Penalizes bad class guesses.
+        eq_max_square= any(eq_max_box, axis=1)
+
+        # The cost of incorrect coordinate guesses per box.
+        coord_cost = input_output_tensor(
+                lambda o,t: pow(o[0]-t[0], 2) + pow(o[1]-t[1], 2))
+        # The cost of incorrect size guesses per box.
+        dim_cost = input_output_tensor(
+                lambda o,t: pow(pow(o[0], 0.5)-pow(t[0], 0.5), 2) + pow(pow(o[1], 0.5)-pow(t[1], 0.5), 2))
+        # The cost of incorrect confidence guesses per box.
+        confidence_cost = input_output_tensor(
+                lambda o,t: pow(o[4]-t[4], 2))
+        # The cost of incorrect class guesses, per square.
+        class_cost = tf.map_fn(lambda output:
+                tf.map_fn(lambda truth:
+                    tf.norm(output[2*5:2*5+6]-truth[5:12]), truth_tensor), output_tensor)
+
+        # Weights from the YOLO paper.
+        coord_weight = 5
+        obj_weight = 1
+        noobj_weight = 0.5
+
+        # Cost, per box, penalized when an object is guessed.
+        obj_cost= coord_weight * (coord_cost + dim_cost) + obj_weight * confidence_cost
+        # Cost, per box, penalized when an object is not guessed.
+        noobj_cost= noobj_weight * confidence_cost
+        # Cost per box, selecting only "responsible" entries.
+        box_cost = (
+            obj_cost* cast(eq_max_box, 'float32') + 
+            noobj_cost* cast(eq_min_box, 'float32')
+        )
+
+        # Cost per square, penalizing only "responsible" squares.
+        square_cost= class_cost * cast(eq_max_square, 'float32')
+
+        # Total cost
+        cost = sum(sum(sum(box_cost))) + sum(sum(square_cost))
+        return cost
+        
+    def build_model(self):
+        self.model = Sequential()
+        self.convolutional_layer(
+                filters=16, kernel_size=7, strides=2, padding='same',
+                input_shape=(self.image_size,self.image_size,3)
+        )
+        self.maxpool_layer(pool_size=2, strides=2, padding='same')
+
+        for _ in range(4):
+            self.convolutional_layer(filters=8, kernel_size=3, padding='same')
+            self.maxpool_layer(pool_size=2, strides=2, padding='same')
+
+        self.model.add(Flatten())
+        self.model.add(Dense(units=self.squares*self.squares*(self.boxes*5+self.classes)))
+        self.model.add(Reshape((64,-1)))
+        self.model.compile(loss=self.loss, optimizer=Adam(lr=0.5e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0))
+
+    def maxpool_layer(self, *args, **kwargs):
+        self.model.add(MaxPooling2D(*args, **kwargs))
+
+    def convolutional_layer(self, *args, **kwargs):
+        self.model.add(Conv2D(*args, **kwargs))
+        self.model.add(LeakyReLU())
+
+    def summary(self):
+        self.model.summary()
+
+def image_generator(source, n):
+    for i in range(n):
+        image_path = os.path.join(source, 'image' + str(i) + '.png')
+        truth_path = os.path.join(source, 'objects' + str(i) + '.csv')
+        data = img_to_array(load_img(image_path))
+        truth_file = open(truth_path, "r")
+        csv_reader = csv.reader(truth_file, delimiter=',')
+        truth = tf.convert_to_tensor([[float(x) for x in row] for row in csv_reader])
+        truth_file.close
+        yield data, truth
+
+inputs = []
+outputs = []
+for (inp, out) in image_generator('data/test', 100):
+    inputs.append(inp)
+    outputs.append(out)
+
+log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)
+
+model = SimpleCNN()
+model.summary()
+model.model.fit(x=tf.convert_to_tensor(inputs), y=tf.convert_to_tensor(outputs), epochs=1, steps_per_epoch=1)