diff --git a/Insights/ModelID.npy b/Insights/ModelID.npy new file mode 100644 index 0000000000000000000000000000000000000000..4348d5429ad98ed44f686dcf79062442ca624e6f Binary files /dev/null and b/Insights/ModelID.npy differ diff --git a/Insights/SignTrack.h5 b/Insights/SignTrack.h5 new file mode 100644 index 0000000000000000000000000000000000000000..148ce6e19e18a8dd28c516fc6a5b589d94dea239 Binary files /dev/null and b/Insights/SignTrack.h5 differ diff --git a/README.md b/README.md index 1c0af859b9dd49294987a36d7d73129d41bda969..4fd6bf83ebc7948658e76569dd4f9694df4682cf 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ ## What is SignTrack -SignTrack is a sign language transcriber that analyzes, processes, and recognizes sign language in real-time with remarkable accuracy and efficiency. It takes a people-centered approach to computing, aiming to make computers more accessible and inclusive for everyone. By seamlessly converting sign language into written language, SignTrack breaks down communication barriers and enables meaningful connections. It empowers individuals from diverse backgrounds to express themselves and be understood. With SignTrack, the beauty of sign language comes to life, ensuring that everyone's voice is heard and valued. +SignTrack is a sign language transcriber that analyzes, processes, and recognizes sign language in real-time with remarkable accuracy and efficiency. It takes a people-centered approach to computing, aiming to make computers more accessible and inclusive for everyone. By seamlessly converting sign language into written language, SignTrack breaks down communication barriers and enables meaningful connections. <br /> @@ -36,7 +36,7 @@ We prioritize making data collection user-friendly, even for non-coders. Our int With its recent updates, SignTrack DataCollect has undergone significant improvements, resulting in a seamlessly streamlined experience. The enhanced reliability and smooth functionality of the platform empower developers, allowing them to effortlessly create high-quality training datasets and build custom models that precisely cater to their unique requirements. -Before saving a sequence in the dataset, SignTrack employs a verification process to enasure that hands are visible within the frames. This approach eliminates unnecessary data and enhances the overall quality and accuracy of the collected dataset. +Now, before saving a sequence in the dataset, SignTrack employs a verification process to enasure that hands are visible within the frames. This approach eliminates unnecessary data and enhances the overall quality and accuracy of the collected dataset. Another standout feature is image flipping, generating data that simulates signing with the opposite hand. This enables unparalleled accuracy and versatility, ensuring precise predictions for both hands across all signing variations. @@ -78,7 +78,7 @@ SignTrack offers an uninterrupted experience through innovative optimizations. W For instance, the SignTrack model is intelligently triggered to make predictions only when hands are visible within the scene. This intelligent activation conserves resources and optimizes performance, resulting in a smooth and responsive user experience. -But that's not all. We have taken SignTrack a step further. SignTrack can now understand when the user has completed a word, enabling it to immediately begin detection for the next one. This seamless transition between words ensures a fluid and natural interaction, making sign language recognition with SignTrack even more intuitive and efficient. +But that's not all. We have taken SignTrack a step further. SignTrack Revision 1 can now understand when the user has completed a word, enabling it to immediately begin detection for the next one. This seamless transition between words ensures a fluid and natural interaction, making sign language recognition with SignTrack even more intuitive and efficient. <p align="center"> <img src="Assets/readme/Detect1.gif"> diff --git a/TestPose.npy b/TestPose.npy new file mode 100644 index 0000000000000000000000000000000000000000..5633be8eb03cf03f0c446c2a39713bf0d78255a7 Binary files /dev/null and b/TestPose.npy differ diff --git a/experimental/SignTrack Bolt.py b/experimental/SignTrack Bolt.py index 6f85e9443cae2896c5f3b8ba0f9a35598ba511d0..8c2fcc19811f73d2cd59ea14c7fdabb617caa69e 100644 --- a/experimental/SignTrack Bolt.py +++ b/experimental/SignTrack Bolt.py @@ -19,11 +19,11 @@ model = AutoModelForTokenClassification.from_pretrained( pun = pipeline('ner', model=model, tokenizer=tokenizer) -# The number of frames per sequence that the model has been trained on -seq_length = 12 +# The number of frames per sequence that the model has been trained on (must be an even number) +seq_length = 24 # Choose camera input -cap = cv2.VideoCapture(0) +cap = cv2.VideoCapture(1) # Resize camera input cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) @@ -40,7 +40,7 @@ mp_drawing = mp.solutions.drawing_utils # Drawing utilities # Setting up model parameters model = Sequential() model.add(LSTM(64, return_sequences=True, - activation='relu', input_shape=(12, 258))) + activation='relu', input_shape=(int(seq_length/2), 258))) model.add(LSTM(128, return_sequences=True, activation='relu')) model.add(LSTM(64, return_sequences=False, activation='relu')) model.add(Dense(64, activation='relu')) @@ -189,7 +189,7 @@ while cap.isOpened(): if True in HandsOnPrevFrames[-5:]: if HandsOnScene(results): seq.append(keypoints) - seq = seq[-seq_length:] + seq = seq[-int(seq_length/2):] text = grammar_correct((' '.join(sentence))) # Else if hands are not in the scene for the last 5 frames clear sequence data else: @@ -218,18 +218,18 @@ while cap.isOpened(): sentence = [] # If there are 12 frames in seq then call the model to predict - if len(seq) == seq_length: + if len(seq) == int(seq_length/2): res = model.predict(np.expand_dims(seq, axis=0))[0] """ - In case there is more than 65% the nummber of needed data + In case there is more than 50% the nummber of needed data call the model to predict in a new version of seq with randomly duplicated frames """ - elif len(seq) >= seq_length * 0.65: - missing = seq_length - len(seq) + elif len(seq) >= int(seq_length/2) * 0.50: + missing = int(seq_length/2) - len(seq) seqpros = seq for i in range(missing): rand = random.randint(2, len(seq)-2) diff --git a/experimental/SignTrack_DataColect.py b/experimental/SignTrack_DataColect.py index 14f951911f31cfc30c8c6877cd3030a3d89030f2..0682961df44a319dc1ac2137f63a34db0963143d 100644 --- a/experimental/SignTrack_DataColect.py +++ b/experimental/SignTrack_DataColect.py @@ -12,12 +12,12 @@ from essentials import mediapipe_detection, extract_keypoints, display_styled_la data_path = os.path.join('test') # Actions that we try to detect, Changing requires changes in Signtrack_Train -signs = np.array(["see"]) +signs = np.array(["see", 'later']) # Number of sequences to be collected for each action no_datapacks = 3 -# Frames per sequence, Changing requires changes in Signtrack_Train +# Frames per sequence collected, Changing requires changes in Signtrack_Train (must be an even number) seq_length = 24 #Time between sessions (in seconds) @@ -137,7 +137,7 @@ def Graphics(img, sign, sequence, collecting, error): ''' Mediapipe uses an LSTM model, just like SignTrack, that means that the results are made based on a sequence of data. Thus when trying to make predictions on the flipped image it is -important to utilize a different version of the Mediapipe model, to avoit the model's confusion. +important to utilize a different version of the Mediapipe model, to avoid the model's confusion. ''' holisticf = mp_holistic.Holistic( @@ -184,7 +184,8 @@ for sign in signs: # Draw landmarks display_styled_landmarks(img, results) - + + #Counting the frames in which the hands are visible if HandsOnScene(results): visframes +=1 @@ -208,6 +209,7 @@ for sign in signs: If the collected sequence is on its first frame, changes the apearence of the image accordingly. ''' + #If it is the first frame of the sequence take a break if frame_num == 0: start_time = time.time() @@ -224,12 +226,16 @@ for sign in signs: Graphics(img, sign, seq, False, FailedSave)) if cv2.waitKey(1) & 0xFF == ord('q'): break - + + #Checks time for each break elapsed_time = time.time() - start_time if elapsed_time >= breaktime: break + + #Check before saving the sequence check if frame_num == int(seq_length) - 1: - + + #If failed to pass delete data if int(visframes) < 0.8 * int(seq_length): for frame in range(seq_length): npy_path = os.path.join( @@ -243,6 +249,8 @@ for sign in signs: frame_num = 0 FailedSave = True + + #Else pass else: FailedSave = False break diff --git a/experimental/SignTrack_Train.py b/experimental/SignTrack_Train.py index 156d5963067eec2866e953e1eafbce92690d28bc..59c31bb34fcd37366fd4cdd61b3f8c2d4b7c73ff 100644 --- a/experimental/SignTrack_Train.py +++ b/experimental/SignTrack_Train.py @@ -17,12 +17,12 @@ DATA_PATH = os.path.join('Dataset') # Path to save model MODEL_PATH = 'Insights/SignTrack.h5' -# Signs that we try to detect -signs = np.array([ 'thank you', 'good', 'how', 'yes', +# Signs that the model will be trained on +signs = np.array([ 'thank you', 'good', 'how', 'yes','afternoon', 'morning', 'later', 'see', 'hello', "what's up", 'fine', 'you', 'no']) -# Videos are going to be 24 frames in length -sequence_length = 24 +# The length of the collected sequences (must be an even number) +seq_length = 24 # Creating a label map, where each sign is assigned to a specific numerical value @@ -97,11 +97,11 @@ for sign in signs: dirs = os.listdir('Dataset/' + sign) impframe= False - for i in range(230): + for i in range(20): window = [] window_aug = [] - for frame_num in range(sequence_length): + for frame_num in range(seq_length): res = np.load(os.path.join(DATA_PATH, sign, str( i), "{}.npy".format(frame_num))) @@ -118,8 +118,8 @@ for sign in signs: # Used for data augmentation randposs= random.randint(1, 2) if randposs==2: - for i in range(round(12 * random.randrange(4,6)* 0.1)): - rand = random.randint(1, 12-2) + for i in range(int((seq_length/2) * random.randrange(4,6)* 0.1)): + rand = random.randint(1, int(seq_length/2)-2) window_aug[rand] = np.divide(np.add(window[rand],window[rand+1]),2) for i in range(2): sequences.append(InSpaceR(window_aug)) @@ -128,7 +128,7 @@ for sign in signs: sequences.append(window_aug) labels.append(label_map[sign]) - for i in range(12): + for i in range(16): sequences.append(InSpaceR(window)) labels.append(label_map[sign]) @@ -152,7 +152,7 @@ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Setting up model parameters model = Sequential() model.add(LSTM(64, return_sequences=True, - activation='relu', input_shape=(12, 258))) + activation='relu', input_shape=(int(seq_length/2), 258))) model.add(LSTM(128, return_sequences=True, activation='relu')) model.add(LSTM(64, return_sequences=False, activation='relu')) model.add(Dense(64, activation='relu')) @@ -162,18 +162,20 @@ model.add(Dense(signs.shape[0], activation='softmax')) AutoTrain = model.compile(optimizer='Adam', loss='categorical_crossentropy') -ModelAvailable = False +LossList = [] AutoTrain = model.fit(X_train, y_train, epochs=5) for i in range(150): if AutoTrain.history['loss'][-1] >= 0.03: - AutoTrain = model.fit(X_train, y_train, epochs=1) - if AutoTrain.history['loss'][-1] == min(AutoTrain.history['loss']) : + AutoTrain = model.fit(X_train, y_train, epochs=5) + LossList.append(AutoTrain.history['loss'][-1]) + + if AutoTrain.history['loss'][-1] == min(LossList) : model.save(MODEL_PATH) print('model saved') - ModelAvailable = True - elif ModelAvailable: + + else: model.load_weights(MODEL_PATH) print('model loaded') diff --git a/experimental/__pycache__/Packr.cpython-38.pyc b/experimental/__pycache__/Packr.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b266c9d09374b3de4ab7a7628eb70b2cb79c4d1 Binary files /dev/null and b/experimental/__pycache__/Packr.cpython-38.pyc differ diff --git a/experimental/__pycache__/essentials.cpython-38.pyc b/experimental/__pycache__/essentials.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..33997bb361dc257899c440a4768ec09de53a0d6b Binary files /dev/null and b/experimental/__pycache__/essentials.cpython-38.pyc differ diff --git a/signtrack/SignTrack Bolt.py b/signtrack/SignTrack Bolt.py index 6f85e9443cae2896c5f3b8ba0f9a35598ba511d0..8c2fcc19811f73d2cd59ea14c7fdabb617caa69e 100644 --- a/signtrack/SignTrack Bolt.py +++ b/signtrack/SignTrack Bolt.py @@ -19,11 +19,11 @@ model = AutoModelForTokenClassification.from_pretrained( pun = pipeline('ner', model=model, tokenizer=tokenizer) -# The number of frames per sequence that the model has been trained on -seq_length = 12 +# The number of frames per sequence that the model has been trained on (must be an even number) +seq_length = 24 # Choose camera input -cap = cv2.VideoCapture(0) +cap = cv2.VideoCapture(1) # Resize camera input cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) @@ -40,7 +40,7 @@ mp_drawing = mp.solutions.drawing_utils # Drawing utilities # Setting up model parameters model = Sequential() model.add(LSTM(64, return_sequences=True, - activation='relu', input_shape=(12, 258))) + activation='relu', input_shape=(int(seq_length/2), 258))) model.add(LSTM(128, return_sequences=True, activation='relu')) model.add(LSTM(64, return_sequences=False, activation='relu')) model.add(Dense(64, activation='relu')) @@ -189,7 +189,7 @@ while cap.isOpened(): if True in HandsOnPrevFrames[-5:]: if HandsOnScene(results): seq.append(keypoints) - seq = seq[-seq_length:] + seq = seq[-int(seq_length/2):] text = grammar_correct((' '.join(sentence))) # Else if hands are not in the scene for the last 5 frames clear sequence data else: @@ -218,18 +218,18 @@ while cap.isOpened(): sentence = [] # If there are 12 frames in seq then call the model to predict - if len(seq) == seq_length: + if len(seq) == int(seq_length/2): res = model.predict(np.expand_dims(seq, axis=0))[0] """ - In case there is more than 65% the nummber of needed data + In case there is more than 50% the nummber of needed data call the model to predict in a new version of seq with randomly duplicated frames """ - elif len(seq) >= seq_length * 0.65: - missing = seq_length - len(seq) + elif len(seq) >= int(seq_length/2) * 0.50: + missing = int(seq_length/2) - len(seq) seqpros = seq for i in range(missing): rand = random.randint(2, len(seq)-2) diff --git a/signtrack/SignTrack_DataColect.py b/signtrack/SignTrack_DataColect.py index 14f951911f31cfc30c8c6877cd3030a3d89030f2..0682961df44a319dc1ac2137f63a34db0963143d 100644 --- a/signtrack/SignTrack_DataColect.py +++ b/signtrack/SignTrack_DataColect.py @@ -12,12 +12,12 @@ from essentials import mediapipe_detection, extract_keypoints, display_styled_la data_path = os.path.join('test') # Actions that we try to detect, Changing requires changes in Signtrack_Train -signs = np.array(["see"]) +signs = np.array(["see", 'later']) # Number of sequences to be collected for each action no_datapacks = 3 -# Frames per sequence, Changing requires changes in Signtrack_Train +# Frames per sequence collected, Changing requires changes in Signtrack_Train (must be an even number) seq_length = 24 #Time between sessions (in seconds) @@ -137,7 +137,7 @@ def Graphics(img, sign, sequence, collecting, error): ''' Mediapipe uses an LSTM model, just like SignTrack, that means that the results are made based on a sequence of data. Thus when trying to make predictions on the flipped image it is -important to utilize a different version of the Mediapipe model, to avoit the model's confusion. +important to utilize a different version of the Mediapipe model, to avoid the model's confusion. ''' holisticf = mp_holistic.Holistic( @@ -184,7 +184,8 @@ for sign in signs: # Draw landmarks display_styled_landmarks(img, results) - + + #Counting the frames in which the hands are visible if HandsOnScene(results): visframes +=1 @@ -208,6 +209,7 @@ for sign in signs: If the collected sequence is on its first frame, changes the apearence of the image accordingly. ''' + #If it is the first frame of the sequence take a break if frame_num == 0: start_time = time.time() @@ -224,12 +226,16 @@ for sign in signs: Graphics(img, sign, seq, False, FailedSave)) if cv2.waitKey(1) & 0xFF == ord('q'): break - + + #Checks time for each break elapsed_time = time.time() - start_time if elapsed_time >= breaktime: break + + #Check before saving the sequence check if frame_num == int(seq_length) - 1: - + + #If failed to pass delete data if int(visframes) < 0.8 * int(seq_length): for frame in range(seq_length): npy_path = os.path.join( @@ -243,6 +249,8 @@ for sign in signs: frame_num = 0 FailedSave = True + + #Else pass else: FailedSave = False break diff --git a/signtrack/SignTrack_Train.py b/signtrack/SignTrack_Train.py index 156d5963067eec2866e953e1eafbce92690d28bc..59c31bb34fcd37366fd4cdd61b3f8c2d4b7c73ff 100644 --- a/signtrack/SignTrack_Train.py +++ b/signtrack/SignTrack_Train.py @@ -17,12 +17,12 @@ DATA_PATH = os.path.join('Dataset') # Path to save model MODEL_PATH = 'Insights/SignTrack.h5' -# Signs that we try to detect -signs = np.array([ 'thank you', 'good', 'how', 'yes', +# Signs that the model will be trained on +signs = np.array([ 'thank you', 'good', 'how', 'yes','afternoon', 'morning', 'later', 'see', 'hello', "what's up", 'fine', 'you', 'no']) -# Videos are going to be 24 frames in length -sequence_length = 24 +# The length of the collected sequences (must be an even number) +seq_length = 24 # Creating a label map, where each sign is assigned to a specific numerical value @@ -97,11 +97,11 @@ for sign in signs: dirs = os.listdir('Dataset/' + sign) impframe= False - for i in range(230): + for i in range(20): window = [] window_aug = [] - for frame_num in range(sequence_length): + for frame_num in range(seq_length): res = np.load(os.path.join(DATA_PATH, sign, str( i), "{}.npy".format(frame_num))) @@ -118,8 +118,8 @@ for sign in signs: # Used for data augmentation randposs= random.randint(1, 2) if randposs==2: - for i in range(round(12 * random.randrange(4,6)* 0.1)): - rand = random.randint(1, 12-2) + for i in range(int((seq_length/2) * random.randrange(4,6)* 0.1)): + rand = random.randint(1, int(seq_length/2)-2) window_aug[rand] = np.divide(np.add(window[rand],window[rand+1]),2) for i in range(2): sequences.append(InSpaceR(window_aug)) @@ -128,7 +128,7 @@ for sign in signs: sequences.append(window_aug) labels.append(label_map[sign]) - for i in range(12): + for i in range(16): sequences.append(InSpaceR(window)) labels.append(label_map[sign]) @@ -152,7 +152,7 @@ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # Setting up model parameters model = Sequential() model.add(LSTM(64, return_sequences=True, - activation='relu', input_shape=(12, 258))) + activation='relu', input_shape=(int(seq_length/2), 258))) model.add(LSTM(128, return_sequences=True, activation='relu')) model.add(LSTM(64, return_sequences=False, activation='relu')) model.add(Dense(64, activation='relu')) @@ -162,18 +162,20 @@ model.add(Dense(signs.shape[0], activation='softmax')) AutoTrain = model.compile(optimizer='Adam', loss='categorical_crossentropy') -ModelAvailable = False +LossList = [] AutoTrain = model.fit(X_train, y_train, epochs=5) for i in range(150): if AutoTrain.history['loss'][-1] >= 0.03: - AutoTrain = model.fit(X_train, y_train, epochs=1) - if AutoTrain.history['loss'][-1] == min(AutoTrain.history['loss']) : + AutoTrain = model.fit(X_train, y_train, epochs=5) + LossList.append(AutoTrain.history['loss'][-1]) + + if AutoTrain.history['loss'][-1] == min(LossList) : model.save(MODEL_PATH) print('model saved') - ModelAvailable = True - elif ModelAvailable: + + else: model.load_weights(MODEL_PATH) print('model loaded') diff --git a/signtrack/inSpace Engine.py b/signtrack/inSpace Engine.py deleted file mode 100644 index 57bebb4990f68374bb1ced3d7b36dfbb46546832..0000000000000000000000000000000000000000 --- a/signtrack/inSpace Engine.py +++ /dev/null @@ -1,56 +0,0 @@ -import numpy as np -import matplotlib.pyplot as plt - -landmarks = np.load('TestPose.npy') -hands = landmarks[:126] -pose = landmarks[-132:] - -def InSpace(point, angles): - x, y, z = point - rx, ry, rz = np.radians(angles) - - # Rotate around x-axis - y_prime = y * np.cos(rx) - z * np.sin(rx) - z_prime = y * np.sin(rx) + z * np.cos(rx) - - # Rotate around y-axis - x_prime = x * np.cos(ry) + z_prime * np.sin(ry) - z_prime = -x * np.sin(ry) + z_prime * np.cos(ry) - - # Rotate around z-axis - x_prime = x_prime * np.cos(rz) - y_prime * np.sin(rz) - y_prime = x_prime * np.sin(rz) + y_prime * np.cos(rz) - - return x_prime, y_prime, z_prime - -# Example usage -hpoints = np.array_split(hands, 42) -ppoints = np.array_split(pose, 33) -print(hpoints) -angle = (2, 0, 0) - - -plt.rcParams["figure.figsize"] = [10, 10] -plt.rcParams["figure.autolayout"] = True -fig = plt.figure() -ax = fig.add_subplot(projection="3d") -for i in range(0,20,20): - angle = (0, -90, 0) - for point in hpoints: - x, y, z = InSpace(point, angle) - ax.scatter(x, y, z, c='red', s=10) - for point in ppoints: - print(point[:4]) - x, y, z = InSpace(point[:3], angle) - ax.scatter(x, y, z, c='red', s=10) - -for point in hpoints: - x,y,z = point - ax.scatter(x, y, z, c='blue', s=10) -for point in ppoints: - x,y,z=(point[:3]) - ax.scatter(x, y, z, c='blue', s=10) - -x, y, z = (0,0,0) -ax.scatter(x, y, z, c='purple', s=10) -plt.show()