import time import cv2 import mediapipe as mp # Use AVFoundation backend on macOS for better camera support cap = cv2.VideoCapture(0, cv2.CAP_AVFOUNDATION) if not cap.isOpened(): # fallback if needed cap = cv2.VideoCapture(0) # (Optional) reduce resolution for speed/latency cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) mp_drawing = mp.solutions.drawing_utils mp_pose = mp.solutions.pose # Higher model_complexity is slower but more accurate (0,1,2) pose = mp_pose.Pose(model_complexity=1, enable_segmentation=False) prev_t = time.time() fps = 0.0 def print_selected_landmarks(landmarks, w, h): # Example: shoulders (11,12), elbows (13,14), wrists (15,16) idxs = { "L_shoulder": 11, "R_shoulder": 12, "L_elbow": 13, "R_elbow": 14, "L_wrist": 15, "R_wrist": 16 } out = {} for name, i in idxs.items(): lm = landmarks[i] out[name] = (int(lm.x * w), int(lm.y * h), lm.visibility) print(out) while True: ok, frame = cap.read() if not ok: print("Camera read failed.") break # Mirror for natural interaction frame = cv2.flip(frame, 1) h, w = frame.shape[:2] # MediaPipe expects RGB rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) res = pose.process(rgb) if res.pose_landmarks: # draw landmarks mp_drawing.draw_landmarks( frame, res.pose_landmarks, mp_pose.POSE_CONNECTIONS) # print a few keypoints every ~0.5s now = time.time() if now - prev_t > 0.5: print_selected_landmarks(res.pose_landmarks.landmark, w, h) prev_t = now # FPS overlay fps = 0.9 * fps + 0.1 * (cv2.getTickFrequency() / (cv2.getTickCount() - cv2.getTickCount() + 1)) cv2.putText(frame, "Press 'q' to quit", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2) cv2.imshow("MediaPipe Pose (macOS/M1)", frame) if cv2.waitKey(1) & 0xFF == ord('q'): break pose.close() cap.release() cv2.destroyAllWindows()