AIGC之 funtion-calling

doramart 作者

2024-05-02

阅读 2240

DoraCMS

针对近期学习，在本地尝试跑tabby，但是由于系统原因没成功(好像需要M系列的苹果芯片)；突然看到近期出现的llama3 7B出来了，结合 ollama 在本地跑起来了，通过终端调用稍微有点卡，通过 open webUI 调用卡的相当厉害，调用方式如下图：

针对正课近期学习了 function calling ,指导思想是在通过调用 open ai 的过程中可以通过自己的方式来获取想要的数据，open ai 通过自然语言分析是否调用 function 并给出相应的回答，我做了两个例子

① 通过 function call 的方式调用接口，但是并不是所有情况都会调用接口，要不断尝试

# 初始化
from openai import OpenAI
from dotenv import load_dotenv, find_dotenv
import json
_ = load_dotenv(find_dotenv())
client = OpenAI()

def print_json(data):
   """
   打印参数。如果参数是有结构的（如字典或列表），则以格式化的 JSON 形式打印；
   否则，直接打印该值。
   """
   if hasattr(data, "model_dump_json"):
       data = json.loads(data.model_dump_json())
   if isinstance(data, (list)):
       for item in data:
           print_json(item)
   elif isinstance(data, (dict)):
       print(json.dumps(data, indent=4, ensure_ascii=False))
   else:
       print(data)

def get_completion(messages, model="gpt-3.5-turbo"):
   response = client.chat.completions.create(
       model=model,
       messages=messages,
       temperature=0,
       seed=1024,  # 随机种子保持不变，temperature 和 prompt 不变的情况下，输出就会不变
       tool_choice="auto",  # 默认值，由 GPT 自主决定返回 function call 还是返回文字回复。也可以强制要求必须调用指定的函数，详见官方文档
       tools=[
           {
               "type": "function",
               "function": {
                   "name": "get_content_list",
                   "description": "根据关键字，优先从查出包含该关键字的文章列表，关键字可以存在于文章标题，标签，或者文章内容等",
                   "parameters": {
                       "type": "object",
                       "properties": {
                           "searchkey": {
                               "type": "string",
                               "description": "文章关键字",
                           },
                       },
                   },
               },
           },
       ],
   )
   return response.choices[0].message

import requests

def get_content_list(searchkey):
   # 使用replace方法去掉"DoraCMS"
   newSearchkey = searchkey.replace("DoraCMS", "")
   # 使用strip方法去掉空格
   final_string = newSearchkey.strip()
   url = f"https://www.html-js.cn/api/content/getList?tagName=${searchkey}"
   print(url)
   r = requests.get(url)
   result = r.json()
   # print(result)
   if "data" in result and result["data"]["docs"]:
       return result["data"]["docs"][0]
   return None

prompt = "DoraCMS的前端知识体系"
# prompt = "我到北京出差，给我推荐三里屯的酒店，和五道口附近的咖啡" # 一次请求两个调用
messages = [
   {
       "role": "system",
       "content": "你是一个精通基于Nodejs的内容管理系统 DoraCMS 的全栈开发者。",
   },
   {"role": "user", "content": prompt},
]
response = get_completion(messages)
messages.append(response)  # 把大模型的回复加入到对话中
print("=====GPT回复=====")
print_json(response)
while response.tool_calls is not None:
   # 支持一次返回多个函数调用请求，所以要考虑到这种情况
   for tool_call in response.tool_calls:
       args = json.loads(tool_call.function.arguments)
       print("函数参数展开：")
       print_json(args)
       # 函数路由
       if tool_call.function.name == "get_content_list":
           print("Call: get_content_list")
           result = get_content_list(**args)
       print("=====函数返回=====")
       print_json(result)
       messages.append(
           {
               "tool_call_id": tool_call.id,  # 用于标识函数调用的 ID
               "role": "tool",
               "name": tool_call.function.name,
               "content": str(result),  # 数值result 必须转成字符串
           }
       )
   response = get_completion(messages)
   messages.append(response)  # 把大模型的回复加入到对话中
print("=====最终回复=====")
print(response.content)
print("=====对话历史=====")
print_json(messages)

对话历史

=====对话历史=====
{
    "role": "system",
    "content": "你是一个精通基于Nodejs的内容管理系统 DoraCMS 的全栈开发者。"
}
{
    "role": "user",
    "content": "DoraCMS的前端知识体系"
}
{
    "content": null,
    "role": "assistant",
    "function_call": null,
    "tool_calls": [
        {
            "id": "call_rzVGg6qvlYQhCZTcuW7ThGSw",
            "function": {
                "arguments": "{\"searchkey\":\"DoraCMS前端知识体系\"}",
                "name": "get_content_list"
            },
            "type": "function"
        }
    ]
}
{
    "tool_call_id": "call_rzVGg6qvlYQhCZTcuW7ThGSw",
    "role": "tool",
    "name": "get_content_list",
    "content": "{'type': '1', 'categories': [{'enable': True, 'defaultUrl': 'document', '_id': 'Ek7skiaw', 'name': 'DoraCMS', 'contentTemp': None, 'url': '/document___Ek7skiaw', 'id': 'Ek7skiaw'}, {'enable': True, 'defaultUrl': 'document/softUpdate', '_id': '4yzPes6w', 'name': '版本升级', 'contentTemp': None, 'url': '/document/softUpdate___4yzPes6w', 'id': '4yzPes6w'}], 'tags': [{'_id': 'IZHCVQcry', 'name': 'nodecms', 'url': '/tag/nodecms', 'id': 'IZHCVQcry'}, {'_id': 'WXN4r2Ct', 'name': 'eggcms', 'url': '/tag/eggcms', 'id': 'WXN4r2Ct'}], 'keywords': [], 'sImg': 'https://cdn.html-js.cn/cms/upload/images/20210925/1632580364803065432.png', 'videoImg': '', 'state': '2', 'isTop': 1, 'roofPlacement': '0', 'clickNum': 7345, '_id': 'bTtSErSoF', 'title': 'Nodejs 内容管理系统 DoraCMS 2.1.8 发布', 'stitle': 'DoraCMS 2.1.7-- 版本发布', 'author': {'logo': 'https://cdn.html-js.cn/cms/upload/smallimgs/img1448202744000.jpg', '_id': '4JiWCMhzg', 'userName': 'doramart'}, 'discription': '近期对DoraCMS做了一些优化，也修复了一些bug，为了让vscode不再出现令人讨厌的格式问题，加入了eslint和prettier进行格式化校验', 'uAuthor': {'logo': 'https://cdn.html-js.cn/cms/upload/smallimgs/img1447739082000.jpg', 'group': '0', '_id': '41oT6sQXl', 'userName': 'doramart', 'name': '生哥', 'id': '41oT6sQXl', 'had_followed': False}, 'date': '2021-09-25 22:32:48', 'updateDate': '2021-09-25 22:32:48', 'url': '/details/bTtSErSoF.html', 'id': 'bTtSErSoF', 'hasPraised': False, 'hasComment': False, 'hasFavorite': False, 'hasDespise': False, 'commentNum': 0, 'likeNum': 1, 'favoriteNum': 0, 'despiseNum': 0}"
}
{
    "content": "我找到了关于DoraCMS前端知识体系的文章：[Nodejs 内容管理系统 DoraCMS 2.1.8 发布](https://cdn.html-js.cn/cms/upload/images/20210925/1632580364803065432.png)。该文章介绍了DoraCMS 2.1.8 版本的发布内容，包括优化、bug修复以及格式化校验等方面的内容。你可以查看详细信息和了解更多内容。",
    "role": "assistant",
    "function_call": null,
    "tool_calls": null
}

② 通过 function calling 的方式读取数据库，并查询；结果比较满意

# 读取数据库
import json
from openai import OpenAI
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

import pymysql
# # MariaDB连接配置
config = {
   "host": "192.168.31.69",  # 例如: 'your_remote_host.com'
   "user": "root",  # 例如: 'your_username'
   "password": "******",  # 例如: 'your_password'
   "database": "doracms",  # 例如: 'your_database'
   "charset": "utf8mb4",  # 防止乱码
   "cursorclass": pymysql.cursors.DictCursor,  # 返回字典类型的查询结果
   "port": 3309,
}
# 连接数据库
connection = pymysql.connect(**config)

# 一个辅助函数，只为演示方便。不重要
def print_json(data):
   """
   打印参数。如果参数是有结构的（如字典或列表），则以格式化的 JSON 形式打印；
   否则，直接打印该值。
   """
   if hasattr(data, "model_dump_json"):
       data = json.loads(data.model_dump_json())
   if isinstance(data, (list, dict)):
       print(json.dumps(data, indent=4, ensure_ascii=False))
   else:
       print(data)

client = OpenAI()

database_schema_string = """
CREATE TABLE `doracms_user` (
 `id` int(11) NOT NULL,
 `enable` tinyint(1) NOT NULL DEFAULT 1,
 `name` varchar(255) DEFAULT NULL,
 `user_name` varchar(255) DEFAULT NULL,
 `password` varchar(100) DEFAULT NULL,
 `email` varchar(255) DEFAULT NULL,
 `qq` int(11) DEFAULT NULL,
 `phone_num` varchar(255) DEFAULT NULL,
 `country_code` varchar(255) DEFAULT NULL,
 `id_no` int(11) DEFAULT NULL,
 `id_type` varchar(255) DEFAULT '1',
 `comments` varchar(255) DEFAULT '',
 `introduction` varchar(255) DEFAULT '',
 `position` varchar(255) DEFAULT NULL,
 `profession` varchar(255) DEFAULT NULL,
 `industry` varchar(255) DEFAULT NULL,
 `experience` varchar(255) DEFAULT NULL,
 `company` varchar(255) DEFAULT NULL,
 `website` varchar(255) DEFAULT NULL,
 `logo` varchar(500) DEFAULT '/static/upload/images/defaultlogo.png',
 `group` varchar(255) DEFAULT '0',
 `province` varchar(255) DEFAULT NULL,
 `city` varchar(255) DEFAULT NULL,
 `gender` varchar(255) DEFAULT '0',
 `state` varchar(255) DEFAULT '1',
 `retrieve_time` bigint(20) DEFAULT NULL,
 `login_active` tinyint(1) DEFAULT 0,
 `device_id` varchar(255) DEFAULT NULL,
 `created_at` datetime NOT NULL,
 `updated_at` datetime NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
"""

def get_sql_completion(messages, model="gpt-3.5-turbo"):
   response = client.chat.completions.create(
       model=model,
       messages=messages,
       temperature=0,
       tools=[
           {  # 摘自 OpenAI 官方示例 https://github.com/openai/openai-cookbook/blob/main/examples/How_to_call_functions_with_chat_models.ipynb
               "type": "function",
               "function": {
                   "name": "ask_database",
                   "description": "Use this function to answer user questions about business. \
                           Output should be a fully formed SQL query.",
                   "parameters": {
                       "type": "object",
                       "properties": {
                           "query": {
                               "type": "string",
                               "description": f"""
                           SQL query extracting info to answer the user's question.
                           SQL should be read or write using this database schema:
                           {database_schema_string}
                           The query should be returned in plain text, not in JSON.
                           The query should only contain grammars supported by Mairadb.
                           """,
                           }
                       },
                       "required": ["query"],
                   },
               },
           }
       ],
   )
   return response.choices[0].message

def ask_database(query):
   try:
       with connection.cursor() as cursor:
           # 执行查询脚本
           # 例如: SELECT * FROM your_table WHERE your_condition
           # sql = "SELECT * FROM your_table WHERE your_condition"
           cursor.execute(query)
           # 获取查询结果
           result = cursor.fetchall()
           # print("-----", result)
           return result
   finally:
       # 关闭数据库连接
       connection.close()

prompt = "DoraCMS站点注册用户数是多少"
messages = [
   {
       "role": "system",
       "content": "你是全栈开发者，熟悉DoraCMS管理系统实现的基本原理和数据库结构",
   },
   {"role": "user", "content": prompt},
]
response = get_sql_completion(messages)
if response.content is None:
   response.content = ""
messages.append(response)
print("====Function Calling====")
print_json(response)
if response.tool_calls is not None:
   tool_call = response.tool_calls[0]
   if tool_call.function.name == "ask_database":
       arguments = tool_call.function.arguments
       args = json.loads(arguments)
       print("====SQL====")
       print(args["query"])
       result = ask_database(args["query"])
       print("====DB Records====")
       print(result)
       messages.append(
           {
               "tool_call_id": tool_call.id,
               "role": "tool",
               "name": "ask_database",
               "content": str(result),
           }
       )
       response = get_sql_completion(messages)
       print("====最终回复====")
       print(response.content)

执行结果

====Function Calling====
{
    "content": "",
    "role": "assistant",
    "function_call": null,
    "tool_calls": [
        {
            "id": "call_LRm7MZQKYEDh4b6JQhimuuL9",
            "function": {
                "arguments": "{\"query\":\"SELECT COUNT(id) AS total_users FROM doracms_user\"}",
                "name": "ask_database"
            },
            "type": "function"
        }
    ]
}
====SQL====
SELECT COUNT(id) AS total_users FROM doracms_user
====DB Records====
[{'total_users': 808}]
====最终回复====
DoraCMS站点注册用户数为808。

总结： function-calling 的指导思想是可以自定义回调函数，这个回调函数可以是调用一个接口，也可以通过提示词告诉 gpt 数据库相关基础信息，让gpt生成 SQL语句并执行，通过执行结果作为入参继续调用gpt来获取最终结果。function-calling 适用场景为在网上不容易查到的信息，原因是大语言模型是数据并不是最新的（虽然可以联网），需要注意，通过function-calling 查询到的私有数据上送 gpt 是有风险的，对数据敏感的场景最好是私有的大模型来处理相关逻辑

评论（32）

AIGC之 funtion-calling

推荐阅读

Node.js API实例讲解——创建http服务器

Node.js API实例讲解——Buffer的静态方法

Node.js API实例讲解——https服务器与客户端

Node.js API实例讲解——FS文件系统概述

评论（32）