Skip to content
Snippets Groups Projects
Commit b4c3d793 authored by Stavros Piperakis's avatar Stavros Piperakis
Browse files

Initial commit

parents
No related branches found
No related tags found
No related merge requests found
# SignTrack
## Table of contents
* [General info](#general-info)
* [Dependances](#Dependances)
* [Setup](#setup)
## General info
SignTrack is a real-time sign language transcriber, making interacting with numerous applications. From transforming signs to text to helping you learn sign language by its application in interactive learning.
## Dependances
This project depends on:
* Python: 3.7
* Tensorflow: 2.5
* OpenCV: 4.1.2.30
* Scikit-Learn
* Matplotlib
* Mediapipe
## Setup
Unlike most projects involving Tensorflow, SignTrack installation is beginner friendly.
* Install Python Poetry (https://python-poetry.org/docs/)
* Open the location where SignTrack is downloaded on your terminal
* Run the command: poetry install
\ No newline at end of file
This diff is collapsed.
[tool.poetry]
name = "SignTrack"
version = "0.1.0"
description = ""
authors = ["Stavros Piperakis <piperakis@protonmail.com>"]
[tool.poetry.dependencies]
python = "^3.7"
tensorflow = "2.5"
opencv-python = "4.1.2.30"
mediapipe = "^0.8.9"
sklearn = "^0.0"
matplotlib = "^3.5.1"
[tool.poetry.dev-dependencies]
pytest = "^5.2"
autopep8 = "^1.6.0"
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import cv2
import numpy as np
import mediapipe as mp
from essentials import mediapipe_detection, display_styled_landmarks, extract_keypoints
actions = np.array(['yes', 'no', 'thanks', 'hello', 'nothing'])
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities
model = Sequential()
model.add(LSTM(64, return_sequences=True,
activation='relu', input_shape=(24, 126)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))
model.load_weights('SignTrack.h5')
colors = [(245, 117, 16), (117, 245, 16), (16, 117, 245),
(16, 117, 245), (16, 117, 245)]
def prob_viz(res, actions, input_frame, colors):
output_frame = input_frame.copy()
for num, prob in enumerate(res):
cv2.rectangle(output_frame, (0, 60+num*40),
(int(prob*100), 90+num*40), colors[num], -1)
cv2.putText(output_frame, actions[num], (0, 85+num*40),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
return output_frame
sequence, sentence = [], []
threshold = 0.90
cap = cv2.VideoCapture(0)
# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
while cap.isOpened():
# Read feed
ret, frame = cap.read()
# Make detections
image, results = mediapipe_detection(frame, holistic)
print(results)
# Draw landmarks
display_styled_landmarks(image, results)
# 2. Prediction logic
keypoints = extract_keypoints(results)
sequence.append(keypoints)
sequence = sequence[-35:]
if len(sequence) == 35:
res = model.predict(np.expand_dims(sequence, axis=0))[0]
print(actions[np.argmax(res)])
# 3. Viz logic
if res[np.argmax(res)] > threshold:
if len(sentence) > 0:
if actions[np.argmax(res)] != sentence[-1]:
sentence.append(actions[np.argmax(res)])
else:
sentence.append(actions[np.argmax(res)])
if len(sentence) > 5:
sentence = sentence[-5:]
# Viz probabilities
image = prob_viz(res, actions, image, colors)
cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
cv2.putText(image, ' '.join(sentence), (3, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
# Show to screen
cv2.imshow('SignTrack', image)
# Break gracefully
if cv2.waitKey(10) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
# Importing dependancies
import cv2
import numpy as np
import os
import mediapipe as mp
from pathlib import Path
from essentials import mediapipe_detection, extract_keypoints, display_styled_landmarks
# Dataset export location, Changing requires changes in Signtrack_Train
data_path = os.path.join('Dataset')
# Actions that we try to detect, Changing requires changes in Signtrack_Train
signs = np.array(['yes', 'no', 'thanks', 'hello', 'nothing'])
# Number of data packs to be collected for each action
no_datapacks = 3
# Frames per data pack, Changing requires changes in Signtrack_Train
sequence_length = 24
cap = cv2.VideoCapture(0) # Choose camera to be used
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities
def existing_data(sign):
existing_data = 0
path = Path(data_path + '/' + sign)
if path.exists():
existing_data = len(os.listdir(data_path + '/' + sign))
return existing_data
for sign in signs:
exdt = existing_data(sign)
for sequence in range(no_datapacks * 2 + 1):
try:
os.makedirs(os.path.join(data_path, sign,
str((sequence) + exdt)))
except:
pass
# Setting mediapipe parameters
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
# Loop through sign
for sign in signs:
exdt = existing_data(sign) - (no_datapacks * 2 + 1)
# Loop through sequences aka videos
for sequence in range(no_datapacks):
# Loop through video length aka sequence length
for frame_num in range(sequence_length):
# Read feed
ret, frame = cap.read()
frame_fliped = cv2.flip(frame, 1)
# Make detections
img, results = mediapipe_detection(frame, holistic)
img_flipped, results_flipped = mediapipe_detection(
frame_fliped, holistic)
# Draw landmarks
display_styled_landmarks(img, results)
# NEW Apply wait logic
if frame_num == 0:
cv2.putText(img, 'STARTING COLLECTION', (120, 200),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4, cv2.LINE_AA)
cv2.putText(img, 'Sign: {} Sequence: {}'.format(sign, sequence), (15, 12),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
# Show to screen
cv2.imshow('SignTrack Training', img)
cv2.waitKey(2000)
else:
cv2.putText(img, 'Collecting frames for {} Video Number {}'.format(sign, sequence), (15, 12),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
# Show to screen
cv2.imshow('SignTrack Training', img)
# Export keypoints
keypoints = extract_keypoints(results)
keypoints_flipped = extract_keypoints(results_flipped)
npy_path = os.path.join(
data_path, sign, str((2 * sequence) + exdt), str(frame_num))
np.save(npy_path, keypoints)
npy_path_flipped = os.path.join(
data_path, sign, str((2 * sequence + 1) + exdt), str(frame_num))
np.save(npy_path_flipped, keypoints_flipped)
#
if cv2.waitKey(10) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
from gc import callbacks
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.models import Sequential
import numpy as np
import os
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('Dataset')
# Signs that we try to detect
signs = np.array(['yes', 'no', 'thanks', 'hello', 'nothing'])
# Thirty videos worth of data
no_sequences = 5
# Videos are going to be 24 frames in length
sequence_length = 24
label_map = {label: num for num, label in enumerate(signs)}
sequences, labels = [], []
for sign in signs:
for sequence in range(no_sequences):
window = []
for frame_num in range(sequence_length):
res = np.load(os.path.join(DATA_PATH, sign, str(
sequence), "{}.npy".format(frame_num)))
window.append(res)
sequences.append(window)
labels.append(label_map[sign])
X = np.array(sequences)
y = to_categorical(labels).astype(int)
log_dir = os.path.join('Insights')
tb_callback = TensorBoard(log_dir=log_dir)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
y_test.shape
model = Sequential()
model.add(LSTM(64, return_sequences=True,
activation='relu', input_shape=(24, 126)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(signs.shape[0], activation='softmax'))
res = [.7, 0.2, 0.1]
model.compile(optimizer='Adam', loss='categorical_crossentropy',
metrics=['categorical_accuracy'])
model.fit(X_train, y_train, epochs=70, callbacks=[tb_callback])
model.save('SignTrack.h5')
File added
import numpy as np
import cv2
import numpy as np
import mediapipe as mp
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities
def mediapipe_detection(img, model):
# COLOR CONVERSION BGR 2 RGB
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img.flags.writeable = False # Image is no longer writeable
results = model.process(img) # Make prediction
img.flags.writeable = True # Image is now writeable
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
return img, results
def draw_landmarks(img, results):
mp_drawing.draw_landmarks(img, results.left_hand_landmarks,
mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
mp_drawing.draw_landmarks(img, results.right_hand_landmarks,
mp_holistic.HAND_CONNECTIONS) # Draw right hand connections
def display_styled_landmarks(img, results):
# Draw left hand connections
mp_drawing.draw_landmarks(img, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(
color=(121, 22, 76), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(
color=(121, 44, 250), thickness=2, circle_radius=2)
)
# Draw right hand connections
mp_drawing.draw_landmarks(img, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(
color=(245, 117, 66), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(
color=(245, 66, 230), thickness=2, circle_radius=2)
)
def extract_keypoints(results):
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten(
) if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten(
) if results.right_hand_landmarks else np.zeros(21*3)
return np.concatenate([lh, rh])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment