1 (изменено: Malcev, 2023-04-29 12:34:06)

Тема: AHK: Mediapipe

smbape проделал большую и сложную работу и создал библиотеку mediapipe com для autoit.
https://www.autoitscript.com/forum/topi … apipe-udf/
https://google.github.io/mediapipe/
На autohotkey можно запустить примерно так (пример обнаружения рук):
https://github.com/smbape/node-autoit-m … -hands.au3

ImagePath := "C:\Users\malcev\Desktop\mediapipe\examples\data\brooke-cagle-mt2fyrdXxzk-unsplash.jpg"

opencv_world_path := A_ScriptDir "\opencv-4.7.0-windows\opencv\build\x64\vc16\bin\opencv_world470.dll"
opencv_ffmpeg_path := A_ScriptDir "\opencv-4.7.0-windows\opencv\build\x64\vc16\bin\opencv_videoio_ffmpeg470_64.dll"
autoit_opencv_com_path := A_ScriptDir "\autoit-opencv-com\autoit_opencv_com470.dll"
autoit_mediapipe_com_path := A_ScriptDir "\autoit-mediapipe-com\autoit_mediapipe_com-0.9.3.0-470.dll"

hOpencv := DllCall("LoadLibrary", "str", opencv_world_path, "ptr")
hOpencvFfmpeg := DllCall("LoadLibrary", "str", opencv_ffmpeg_path, "ptr")
hOpencvCom := DllCall("LoadLibrary", "str", autoit_opencv_com_path, "ptr")
hMediapipeCom := DllCall("LoadLibrary", "str", autoit_mediapipe_com_path, "ptr")
ComObjCreate := Func("_ComObjCreate").Bind(autoit_opencv_com_path, autoit_mediapipe_com_path)
Mediapipe_Params := Func("_Mediapipe_Params").Bind(ComObjCreate)
resource_util := ComObjCreate.Call("Mediapipe.mediapipe.autoit._framework_bindings.resource_util")
resource_util.set_resource_dir(RegexReplace(autoit_mediapipe_com_path, "^(.+)\\.*$", "$1"))

; comobject need to create with call.
cv := ComObjCreate.Call("OpenCV.cv")
mp := ComObjCreate.Call("Mediapipe.mediapipe")

image := cv.imread(ImagePath)
cv.imshow("Image", image)

mp_hands := mp.solutions.hands
mp_drawing := mp.solutions.drawing_utils
mp_drawing_styles := mp.solutions.drawing_styles

hands := mp_hands.Hands(Mediapipe_Params.Call({"static_image_mode": "True", "max_num_hands": 2, "min_detection_confidence": 0.7}))

; Convert the BGR image to RGB, flip the image around y-axis for correct handedness output and process it with MediaPipe Hands.
results := hands.process(cv.flip(cv.cvtColor(image, CV_COLOR_BGR2RGB := 4), 1))

loop % results.item("multi_handedness").MaxIndex() + 1
   msgbox % results.item("multi_handedness")[A_Index-1].__str__

if (results.item("multi_hand_landmarks") = -2147352572)
{
   msgbox No hand detection
   exitapp
}

; Draw hand landmarks of each hand.
image_width := image.width
image_height := image.height
annotated_image := cv.flip(image.copy(), 1)

; Print index finger tip coordinates
loop % results.item("multi_hand_landmarks").MaxIndex() + 1
{
   hand_landmarks := results.item("multi_hand_landmarks")[A_Index-1]
   msgbox % "Index finger tip coordinate:`n" hand_landmarks.landmark(mp_hands.HandLandmark.INDEX_FINGER_TIP).x * image_width "`n" hand_landmarks.landmark(mp_hands.HandLandmark.INDEX_FINGER_TIP).y * image_height
   mp_drawing.draw_landmarks(annotated_image, hand_landmarks, mp_hands.HAND_CONNECTIONS, mp_drawing_styles.get_default_hand_landmarks_style(), mp_drawing_styles.get_default_hand_connections_style())
}

cv.imshow("hands", cv.flip(annotated_image, 1))
cv.waitKey()
cv.destroyAllWindows()
return


_Mediapipe_Params(ComObjCreate, paramArray)
{
   static NamedParameters
   if !NamedParameters
      NamedParameters := ComObjCreate.Call("Mediapipe.NamedParameters")
   arr := ComObjArray(VT_VARIANT:=12, paramArray.Count())
   for k, v in paramArray
   {
      arr%k% := ComObjArray(VT_VARIANT:=12, 2)
      arr%k%[0] := k
      if v is float
         v+=0
      else if (v = "True")
         v := ComObj(0xB, -1)
      else if (v = "False")
         v := ComObj(0xB, 0)
      else if (v = "Null")
         v := ComObj(1, 0)
      arr%k%[1] := v
      arr[A_Index-1] := arr%k%
   }
   params := NamedParameters.create(arr)
   return params
}

_ComObjCreate(opencvPath, mediapipePath, comobject)
{
   if InStr(comobject, "opencv")
      path := opencvPath
   else
      path := mediapipePath
   DllCall(path "\DllActivateManifest")
   comobject := ComObjCreate(comobject)
   DllCall(path "\DllDeactivateActCtx")
   return comobject
}

Пример Face mesh:
https://github.com/smbape/node-autoit-m … e_mesh.au3

opencv_world_path := A_ScriptDir "\opencv-4.7.0-windows\opencv\build\x64\vc16\bin\opencv_world470.dll"
opencv_ffmpeg_path := A_ScriptDir "\opencv-4.7.0-windows\opencv\build\x64\vc16\bin\opencv_videoio_ffmpeg470_64.dll"
autoit_opencv_com_path := A_ScriptDir "\autoit-opencv-com\autoit_opencv_com470.dll"
autoit_mediapipe_com_path := A_ScriptDir "\autoit-mediapipe-com\autoit_mediapipe_com-0.9.3.0-470.dll"

hOpencv := DllCall("LoadLibrary", "str", opencv_world_path, "ptr")
hOpencvFfmpeg := DllCall("LoadLibrary", "str", opencv_ffmpeg_path, "ptr")
hOpencvCom := DllCall("LoadLibrary", "str", autoit_opencv_com_path, "ptr")
hMediapipeCom := DllCall("LoadLibrary", "str", autoit_mediapipe_com_path, "ptr")
ComObjCreate := Func("_ComObjCreate").Bind(autoit_opencv_com_path, autoit_mediapipe_com_path)
Mediapipe_Params := Func("_Mediapipe_Params").Bind(ComObjCreate)
resource_util := ComObjCreate.Call("Mediapipe.mediapipe.autoit._framework_bindings.resource_util")
resource_util.set_resource_dir(RegexReplace(autoit_mediapipe_com_path, "^(.+)\\.*$", "$1"))

cv := ComObjCreate.Call("OpenCV.cv")
mp := ComObjCreate.Call("Mediapipe.mediapipe")
image_path := A_ScriptDir "/testdata/portrait.jpg"
download_utils := mp.solutions.download_utils
download_utils.download("https://github.com/tensorflow/tfjs-models/raw/master/face-detection/test_data/portrait.jpg", image_path)

mp_face_mesh := mp.solutions.face_mesh
mp_drawing := mp.solutions.drawing_utils
mp_drawing_styles := mp.solutions.drawing_styles

image := cv.imread(image_path)
cv.imshow("Image", image)

face_mesh := mp_face_mesh.FaceMesh(Mediapipe_Params.Call({"static_image_mode": "True", "refine_landmarks": "True", "max_num_faces": 2, "min_detection_confidence": 0.5}))

; Convert the BGR image to RGB, and process it with MediaPipe Face Mesh.
results := face_mesh.process(cv.cvtColor(image, CV_COLOR_BGR2RGB := 4))

loop % results.item("multi_face_landmarks").MaxIndex() + 1
   msgbox % results.item("multi_face_landmarks")[A_Index-1].__str__

if (results.item("multi_face_landmarks") = -2147352572)
{
   msgbox No face detection
   exitapp
}

; Draw face detections of each face.
image_width := image.width
image_height := image.height
annotated_image := image.copy()

loop % results.item("multi_face_landmarks").MaxIndex() + 1
{
   face_landmarks := results.item("multi_face_landmarks")[A_Index-1]
   mp_drawing.draw_landmarks(Mediapipe_Params.Call({"image": annotated_image, "landmark_list": face_landmarks, "connections": mp_face_mesh.FACEMESH_TESSELATION, "landmark_drawing_spec": "Null", "connection_drawing_spec": mp_drawing_styles.get_default_face_mesh_tesselation_style(1)}))
   mp_drawing.draw_landmarks(Mediapipe_Params.Call({"image": annotated_image, "landmark_list": face_landmarks, "connections": mp_face_mesh.FACEMESH_CONTOURS, "landmark_drawing_spec": "Null", "connection_drawing_spec": mp_drawing_styles.get_default_face_mesh_contours_style(1)}))
   mp_drawing.draw_landmarks(Mediapipe_Params.Call({"image": annotated_image, "landmark_list": face_landmarks, "connections": mp_face_mesh.FACEMESH_IRISES, "landmark_drawing_spec": "Null", "connection_drawing_spec": mp_drawing_styles.get_default_face_mesh_iris_connections_style(1)}))
}

cv.imshow("face mesh", annotated_image)
cv.waitKey()
cv.destroyAllWindows()
return


_Mediapipe_Params(ComObjCreate, paramArray)
{
   static NamedParameters
   if !NamedParameters
      NamedParameters := ComObjCreate.Call("Mediapipe.NamedParameters")
   arr := ComObjArray(VT_VARIANT:=12, paramArray.Count())
   for k, v in paramArray
   {
      arr%k% := ComObjArray(VT_VARIANT:=12, 2)
      arr%k%[0] := k
      if v is float
         v+=0
      else if (v = "True")
         v := ComObj(0xB, -1)
      else if (v = "False")
         v := ComObj(0xB, 0)
      else if (v = "Null")
         v := ComObj(1, 0)
      arr%k%[1] := v
      arr[A_Index-1] := arr%k%
   }
   params := NamedParameters.create(arr)
   return params
}

_ComObjCreate(opencvPath, mediapipePath, comobject)
{
   if InStr(comobject, "opencv")
      path := opencvPath
   else
      path := mediapipePath
   DllCall(path "\DllActivateManifest")
   comobject := ComObjCreate(comobject)
   DllCall(path "\DllDeactivateActCtx")
   return comobject
}

Пример классификации текста:
https://github.com/smbape/node-autoit-m … sifier.au3
Надо скачать готовую модель отсюда:
https://storage.googleapis.com/mediapip … ier.tflite
и сохранить в папке со скриптом.

input_text := "I'm looking forward to what will come next."

opencv_world_path := A_ScriptDir "\opencv-4.7.0-windows\opencv\build\x64\vc16\bin\opencv_world470.dll"
opencv_ffmpeg_path := A_ScriptDir "\opencv-4.7.0-windows\opencv\build\x64\vc16\bin\opencv_videoio_ffmpeg470_64.dll"
autoit_opencv_com_path := A_ScriptDir "\autoit-opencv-com\autoit_opencv_com470.dll"
autoit_mediapipe_com_path := A_ScriptDir "\autoit-mediapipe-com\autoit_mediapipe_com-0.9.3.0-470.dll"
model_file := A_ScriptDir "\bert_text_classifier.tflite"

hOpencv := DllCall("LoadLibrary", "str", opencv_world_path, "ptr")
hOpencvFfmpeg := DllCall("LoadLibrary", "str", opencv_ffmpeg_path, "ptr")
hOpencvCom := DllCall("LoadLibrary", "str", autoit_opencv_com_path, "ptr")
hMediapipeCom := DllCall("LoadLibrary", "str", autoit_mediapipe_com_path, "ptr")
ComObjCreate := Func("_ComObjCreate").Bind(autoit_opencv_com_path, autoit_mediapipe_com_path)
Mediapipe_Params := Func("_Mediapipe_Params").Bind(ComObjCreate)
resource_util := ComObjCreate.Call("Mediapipe.mediapipe.autoit._framework_bindings.resource_util")
resource_util.set_resource_dir(RegexReplace(autoit_mediapipe_com_path, "^(.+)\\.*$", "$1"))

; comobject need to create with call.
mp := ComObjCreate.Call("Mediapipe.mediapipe")
autoit := ComObjCreate.Call("Mediapipe.mediapipe.tasks.autoit")
text := ComObjCreate.Call("Mediapipe.mediapipe.tasks.autoit.text")

; Create an TextClassifier object.
base_options := autoit.BaseOptions(Mediapipe_Params.Call({"model_asset_path": model_file}))
options := text.TextClassifierOptions(Mediapipe_Params.Call({"base_options": base_options}))
classifier := text.TextClassifier.create_from_options(options)

; Classify the input text.
classification_result := classifier.classify(input_text)

; Process the classification result. In this case, print out the most likely category.
top_category := classification_result.classifications(0).categories(0)
msgbox % top_category.category_name " - " top_category.score


_Mediapipe_Params(ComObjCreate, paramArray)
{
   static NamedParameters
   if !NamedParameters
      NamedParameters := ComObjCreate.Call("Mediapipe.NamedParameters")
   arr := ComObjArray(VT_VARIANT:=12, paramArray.Count())
   for k, v in paramArray
   {
      arr%k% := ComObjArray(VT_VARIANT:=12, 2)
      arr%k%[0] := k
      if v is float
         v+=0
      else if (v = "True")
         v := ComObj(0xB, -1)
      else if (v = "False")
         v := ComObj(0xB, 0)
      else if (v = "Null")
         v := ComObj(1, 0)
      arr%k%[1] := v
      arr[A_Index-1] := arr%k%
   }
   params := NamedParameters.create(arr)
   return params
}

_ComObjCreate(opencvPath, mediapipePath, comobject)
{
   if InStr(comobject, "opencv")
      path := opencvPath
   else
      path := mediapipePath
   DllCall(path "\DllActivateManifest")
   comobject := ComObjCreate(comobject)
   DllCall(path "\DllDeactivateActCtx")
   return comobject
}

Хорошие уроки для начала изучения opencv+mediapipe:
https://www.youtube.com/watch?v=01sAkU_NvOY
Тема для обсуждения