Initial commit

b4c3d793 · Stavros Piperakis · b4c3d793 · b4c3d793 · b4c3d793 · b4c3d793
Commit b4c3d793 authored 3 years ago by Stavros Piperakis
--- a/README.md
+++ b/README.md
+# SignTrack
+
+## Table of contents
+* [General info](#general-info)
+* [Dependances](#Dependances)
+* [Setup](#setup)
+
+## General info
+SignTrack is a real-time sign language transcriber, making interacting with numerous applications. From transforming signs to text to helping you learn sign language by its application in interactive learning.
+
+## Dependances
+This project depends on:
+* Python: 3.7
+* Tensorflow: 2.5
+* OpenCV: 4.1.2.30
+* Scikit-Learn
+* Matplotlib
+* Mediapipe
+
+## Setup
+Unlike most projects involving Tensorflow, SignTrack installation is beginner friendly.
+* Install Python Poetry (https://python-poetry.org/docs/)
+* Open the location where SignTrack is downloaded on your terminal
+* Run the command: poetry install
\ No newline at end of file
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
+[tool.poetry]
+name = "SignTrack"
+version = "0.1.0"
+description = ""
+authors = ["Stavros Piperakis <piperakis@protonmail.com>"]
+
+[tool.poetry.dependencies]
+python = "^3.7"
+tensorflow = "2.5"
+opencv-python = "4.1.2.30"
+mediapipe = "^0.8.9"
+sklearn = "^0.0"
+matplotlib = "^3.5.1"
+
+[tool.poetry.dev-dependencies]
+pytest = "^5.2"
+autopep8 = "^1.6.0"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
--- a/signtrack/SignTrack.py
+++ b/signtrack/SignTrack.py
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import LSTM, Dense
+import cv2
+import numpy as np
+import mediapipe as mp
+from essentials import mediapipe_detection, display_styled_landmarks, extract_keypoints
+
+actions = np.array(['yes', 'no', 'thanks', 'hello', 'nothing'])
+
+mp_holistic = mp.solutions.holistic  # Holistic model
+mp_drawing = mp.solutions.drawing_utils  # Drawing utilities
+
+
+model = Sequential()
+model.add(LSTM(64, return_sequences=True,
+          activation='relu', input_shape=(24, 126)))
+model.add(LSTM(128, return_sequences=True, activation='relu'))
+model.add(LSTM(64, return_sequences=False, activation='relu'))
+model.add(Dense(64, activation='relu'))
+model.add(Dense(32, activation='relu'))
+model.add(Dense(actions.shape[0], activation='softmax'))
+
+model.load_weights('SignTrack.h5')
+
+colors = [(245, 117, 16), (117, 245, 16), (16, 117, 245),
+          (16, 117, 245), (16, 117, 245)]
+
+
+def prob_viz(res, actions, input_frame, colors):
+    output_frame = input_frame.copy()
+    for num, prob in enumerate(res):
+        cv2.rectangle(output_frame, (0, 60+num*40),
+                      (int(prob*100), 90+num*40), colors[num], -1)
+        cv2.putText(output_frame, actions[num], (0, 85+num*40),
+                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
+
+    return output_frame
+
+
+sequence, sentence = [], []
+threshold = 0.90
+
+cap = cv2.VideoCapture(0)
+# Set mediapipe model
+with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
+    while cap.isOpened():
+
+        # Read feed
+        ret, frame = cap.read()
+
+        # Make detections
+        image, results = mediapipe_detection(frame, holistic)
+        print(results)
+
+        # Draw landmarks
+        display_styled_landmarks(image, results)
+
+        # 2. Prediction logic
+        keypoints = extract_keypoints(results)
+
+        sequence.append(keypoints)
+        sequence = sequence[-35:]
+
+        if len(sequence) == 35:
+            res = model.predict(np.expand_dims(sequence, axis=0))[0]
+            print(actions[np.argmax(res)])
+
+        # 3. Viz logic
+            if res[np.argmax(res)] > threshold:
+                if len(sentence) > 0:
+                    if actions[np.argmax(res)] != sentence[-1]:
+                        sentence.append(actions[np.argmax(res)])
+                else:
+                    sentence.append(actions[np.argmax(res)])
+
+            if len(sentence) > 5:
+                sentence = sentence[-5:]
+
+            # Viz probabilities
+            image = prob_viz(res, actions, image, colors)
+
+        cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
+        cv2.putText(image, ' '.join(sentence), (3, 30),
+                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
+
+        # Show to screen
+        cv2.imshow('SignTrack', image)
+
+        # Break gracefully
+        if cv2.waitKey(10) & 0xFF == ord('q'):
+            break
+    cap.release()
+    cv2.destroyAllWindows()
--- a/signtrack/SignTrack_DataColect.py
+++ b/signtrack/SignTrack_DataColect.py
+# Importing dependancies
+
+import cv2
+import numpy as np
+import os
+import mediapipe as mp
+from pathlib import Path
+from essentials import mediapipe_detection, extract_keypoints, display_styled_landmarks
+
+# Dataset export location, Changing requires changes in Signtrack_Train
+data_path = os.path.join('Dataset')
+
+# Actions that we try to detect, Changing requires changes in Signtrack_Train
+signs = np.array(['yes', 'no', 'thanks', 'hello', 'nothing'])
+
+# Number of data packs to be collected for each action
+no_datapacks = 3
+
+# Frames per data pack, Changing requires changes in Signtrack_Train
+sequence_length = 24
+
+cap = cv2.VideoCapture(0)  # Choose camera to be used
+
+mp_holistic = mp.solutions.holistic  # Holistic model
+mp_drawing = mp.solutions.drawing_utils  # Drawing utilities
+
+
+def existing_data(sign):
+    existing_data = 0
+    path = Path(data_path + '/' + sign)
+    if path.exists():
+        existing_data = len(os.listdir(data_path + '/' + sign))
+    return existing_data
+
+
+for sign in signs:
+    exdt = existing_data(sign)
+    for sequence in range(no_datapacks * 2 + 1):
+        try:
+            os.makedirs(os.path.join(data_path, sign,
+                        str((sequence) + exdt)))
+        except:
+            pass
+
+
+# Setting mediapipe parameters
+with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
+    # Loop through sign
+    for sign in signs:
+        exdt = existing_data(sign) - (no_datapacks * 2 + 1)
+        # Loop through sequences aka videos
+        for sequence in range(no_datapacks):
+            # Loop through video length aka sequence length
+            for frame_num in range(sequence_length):
+
+                # Read feed
+                ret, frame = cap.read()
+                frame_fliped = cv2.flip(frame, 1)
+
+                # Make detections
+                img, results = mediapipe_detection(frame, holistic)
+                img_flipped, results_flipped = mediapipe_detection(
+                    frame_fliped, holistic)
+
+                # Draw landmarks
+                display_styled_landmarks(img, results)
+
+                # NEW Apply wait logic
+                if frame_num == 0:
+                    cv2.putText(img, 'STARTING COLLECTION', (120, 200),
+                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4, cv2.LINE_AA)
+                    cv2.putText(img, 'Sign: {} Sequence: {}'.format(sign, sequence), (15, 12),
+                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
+                    # Show to screen
+                    cv2.imshow('SignTrack Training', img)
+                    cv2.waitKey(2000)
+                else:
+                    cv2.putText(img, 'Collecting frames for {} Video Number {}'.format(sign, sequence), (15, 12),
+                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
+                    # Show to screen
+                    cv2.imshow('SignTrack Training', img)
+
+                # Export keypoints
+                keypoints = extract_keypoints(results)
+                keypoints_flipped = extract_keypoints(results_flipped)
+
+                npy_path = os.path.join(
+                    data_path, sign, str((2 * sequence) + exdt), str(frame_num))
+                np.save(npy_path, keypoints)
+                npy_path_flipped = os.path.join(
+                    data_path, sign, str((2 * sequence + 1) + exdt), str(frame_num))
+                np.save(npy_path_flipped, keypoints_flipped)
+
+                #
+                if cv2.waitKey(10) & 0xFF == ord('q'):
+                    break
+
+    cap.release()
+    cv2.destroyAllWindows()
--- a/signtrack/SignTrack_Train.py
+++ b/signtrack/SignTrack_Train.py
+from gc import callbacks
+from tensorflow.keras.callbacks import TensorBoard
+from tensorflow.keras.layers import LSTM, Dense
+from tensorflow.keras.models import Sequential
+import numpy as np
+import os
+from matplotlib import pyplot as plt
+from sklearn.model_selection import train_test_split
+from tensorflow.keras.utils import to_categorical
+
+# Path for exported data, numpy arrays
+DATA_PATH = os.path.join('Dataset')
+
+# Signs that we try to detect
+signs = np.array(['yes', 'no', 'thanks', 'hello', 'nothing'])
+
+# Thirty videos worth of data
+no_sequences = 5
+
+# Videos are going to be 24 frames in length
+sequence_length = 24
+
+label_map = {label: num for num, label in enumerate(signs)}
+
+sequences, labels = [], []
+for sign in signs:
+    for sequence in range(no_sequences):
+        window = []
+        for frame_num in range(sequence_length):
+            res = np.load(os.path.join(DATA_PATH, sign, str(
+                sequence), "{}.npy".format(frame_num)))
+            window.append(res)
+        sequences.append(window)
+        labels.append(label_map[sign])
+
+X = np.array(sequences)
+y = to_categorical(labels).astype(int)
+
+log_dir = os.path.join('Insights')
+tb_callback = TensorBoard(log_dir=log_dir)
+
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+y_test.shape
+
+model = Sequential()
+model.add(LSTM(64, return_sequences=True,
+          activation='relu', input_shape=(24, 126)))
+model.add(LSTM(128, return_sequences=True, activation='relu'))
+model.add(LSTM(64, return_sequences=False, activation='relu'))
+model.add(Dense(64, activation='relu'))
+model.add(Dense(32, activation='relu'))
+model.add(Dense(signs.shape[0], activation='softmax'))
+
+res = [.7, 0.2, 0.1]
+
+model.compile(optimizer='Adam', loss='categorical_crossentropy',
+              metrics=['categorical_accuracy'])
+
+model.fit(X_train, y_train, epochs=70, callbacks=[tb_callback])
+
+model.save('SignTrack.h5')
--- a/signtrack/__pycache__/essentials.cpython-37.pyc
+++ b/signtrack/__pycache__/essentials.cpython-37.pyc
--- a/signtrack/essentials.py
+++ b/signtrack/essentials.py
+import numpy as np
+import cv2
+import numpy as np
+import mediapipe as mp
+
+mp_holistic = mp.solutions.holistic  # Holistic model
+mp_drawing = mp.solutions.drawing_utils  # Drawing utilities
+
+
+def mediapipe_detection(img, model):
+    # COLOR CONVERSION BGR 2 RGB
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    img.flags.writeable = False                  # Image is no longer writeable
+    results = model.process(img)                 # Make prediction
+    img.flags.writeable = True                   # Image is now writeable
+    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)  # COLOR COVERSION RGB 2 BGR
+    return img, results
+
+
+def draw_landmarks(img, results):
+    mp_drawing.draw_landmarks(img, results.left_hand_landmarks,
+                              mp_holistic.HAND_CONNECTIONS)  # Draw left hand connections
+    mp_drawing.draw_landmarks(img, results.right_hand_landmarks,
+                              mp_holistic.HAND_CONNECTIONS)  # Draw right hand connections
+
+
+def display_styled_landmarks(img, results):
+    # Draw left hand connections
+    mp_drawing.draw_landmarks(img, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
+                              mp_drawing.DrawingSpec(
+                                  color=(121, 22, 76), thickness=2, circle_radius=4),
+                              mp_drawing.DrawingSpec(
+                                  color=(121, 44, 250), thickness=2, circle_radius=2)
+                              )
+    # Draw right hand connections
+    mp_drawing.draw_landmarks(img, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
+                              mp_drawing.DrawingSpec(
+                                  color=(245, 117, 66), thickness=2, circle_radius=4),
+                              mp_drawing.DrawingSpec(
+                                  color=(245, 66, 230), thickness=2, circle_radius=2)
+                              )
+
+
+def extract_keypoints(results):
+    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten(
+    ) if results.left_hand_landmarks else np.zeros(21*3)
+    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten(
+    ) if results.right_hand_landmarks else np.zeros(21*3)
+    return np.concatenate([lh, rh])