YOLOv8 | Notion

YOLOv8은 YOLOv5의 구조를 개선한 모델로 YOLOv6처럼 앵커 프리 구조를 사용해 추론 속도를 향상시켰다. 또한 모자이크 합성 이미지로 학습을 수행하는 경우 성능이 저하되는 문제가 발생하였는데, YOLOv8은 10에폭만 적용해 과대적합을 방지해 성능을 개선했다.

YOLOv8.ipynb

from ultralytics import YOLO

model = YOLO("../models/yolov8m-pose.pt")
import cv2

capture = cv2.VideoCapture("../datasets/woman.mp4")
while cv2.waitKey(10) < 0:
    if capture.get(cv2.CAP_PROP_POS_FRAMES) == capture.get(cv2.CAP_PROP_FRAME_COUNT):
        capture.set(cv2.CAP_PROP_POS_FRAMES, 0)
        
    ret, frame = capture.read()
    cv2.imshow("VideoFrame", frame)

capture.release()
cv2.destroyAllWindows()
import torch

def predict(frame, iou=0.7, conf=0.25):
    results = model(
        source=frame,
        device="0" if torch.cuda.is_available() else "cpu",
        iou=0.7,
        conf=0.25,
        verbose=False,
    )
    result = results[0]
    return result
def draw_boxes(result, frame):
    for boxes in result.boxes:
        x1, y1, x2, y2, score, classes = boxes.data.squeeze().cpu().numpy()
        cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 1)
    return frame
import cv2

capture = cv2.VideoCapture("../datasets/woman.mp4")
while cv2.waitKey(10) < 0:
    if capture.get(cv2.CAP_PROP_POS_FRAMES) == capture.get(cv2.CAP_PROP_FRAME_COUNT):
        capture.set(cv2.CAP_PROP_POS_FRAMES, 0)
        
    ret, frame = capture.read()
    result = predict(frame)
    frame = draw_boxes(result, frame)
    cv2.imshow("VideoFrame", frame)

capture.release()
cv2.destroyAllWindows()
from ultralytics.yolo.utils.plotting import Annotator

def draw_keypoints(result, frame):
    annotator = Annotator(frame, line_width=1)
    for kps in result.keypoints:
        kps = kps.data.squeeze()
        annotator.kpts(kps)
        
        nkps = kps.cpu().numpy()
        # nkps[:,2] = 1
        # annotator.kpts(nkps)
        for idx, (x, y, score) in enumerate(nkps):
            if score > 0.5:
                cv2.circle(frame, (int(x), int(y)), 3, (0, 0, 255), cv2.FILLED)
                cv2.putText(frame, str(idx), (int(x), int(y)), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 255), 1)
        
    return frame
import cv2

capture = cv2.VideoCapture("../datasets/woman.mp4")
while cv2.waitKey(10) < 0:
    if capture.get(cv2.CAP_PROP_POS_FRAMES) == capture.get(cv2.CAP_PROP_FRAME_COUNT):
        capture.set(cv2.CAP_PROP_POS_FRAMES, 0)
        
    ret, frame = capture.read()
    result = predict(frame)
    frame = draw_boxes(result, frame)
    frame = draw_keypoints(result, frame)
    cv2.imshow("VideoFrame", frame)

capture.release()
cv2.destroyAllWindows()