{"id":1242,"date":"2026-01-08T10:27:00","date_gmt":"2026-01-08T02:27:00","guid":{"rendered":"https:\/\/smartaiunion.com\/?p=1242"},"modified":"2026-01-07T13:29:56","modified_gmt":"2026-01-07T05:29:56","slug":"300-%e4%ba%bf%e5%8f%82%e6%95%b0-qwen-%e6%a8%a1%e5%9e%8b%e7%99%bb%e9%99%86%e6%a0%91%e8%8e%93%e6%b4%be-%e5%b9%b6%e5%ae%9e%e7%8e%b0%e5%ae%9e%e6%97%b6%e8%bf%90%e8%a1%8c","status":"publish","type":"post","link":"https:\/\/smartaiunion.com\/index.php\/2026\/01\/08\/1242\/","title":{"rendered":"300 \u4ebf\u53c2\u6570 Qwen \u6a21\u578b\u767b\u9646\u6811\u8393\u6d3e\u2026\u2026 \u5e76\u5b9e\u73b0\u5b9e\u65f6\u8fd0\u884c"},"content":{"rendered":"\n<p>\u672c\u6b21\u53d1\u5e03\uff0c\u6211\u4eec\u805a\u7126\u7528\u6237\u5b9e\u9645\u8fd0\u884c\u6a21\u578b\u65f6\u7684\u6838\u5fc3\u4f53\u9a8c\uff1a\u5728\u7279\u5b9a\u76ee\u6807\u8bbe\u5907\u4e0a\u5b9e\u73b0\u5feb\u901f\u3001\u9ad8\u8d28\u91cf\u7684\u54cd\u5e94\u3002<\/p>\n\n\n\n<p>\u6211\u4eec\u91c7\u7528\u81ea\u7814\u7684\u4f4d\u957f\u5b66\u4e60\u65b9\u6cd5 Shapelearn\uff0c\u4e3a Qwen3-30B-A3B-Instruct-2507 \u9009\u62e9\u6743\u91cd\u6570\u636e\u7c7b\u578b\uff0c\u6838\u5fc3\u76ee\u6807\u662f\u5728\u6bcf\u79d2\u4ee4\u724c\u6570\uff08TPS\uff09\u548c\u8f93\u51fa\u8d28\u91cf\u4e0a\u5b9e\u73b0\u6027\u80fd\u6700\u5927\u5316\uff0c\u540c\u65f6\u9075\u5faa\u4e00\u4e2a\u5b9e\u9645\u7ea6\u675f\uff1a\u6a21\u578b\u5fc5\u987b\u80fd\u8212\u9002\u9002\u914d\u8bbe\u5907\u7684\u53ef\u7528\u5185\u5b58\u3002\u4e00\u65e6\u6ee1\u8db3\u5185\u5b58\u9002\u914d\u9700\u6c42\uff0c\u5355\u7eaf\u7f29\u5c0f\u6587\u4ef6\u4f53\u79ef\u5e76\u975e\u76ee\u6807 \u2014\u2014 \u53ea\u6709\u5f53\u8fdb\u4e00\u6b65\u538b\u7f29\u80fd\u6539\u5584\u7528\u6237\u771f\u6b63\u5173\u5fc3\u7684\u6838\u5fc3\u6743\u8861\uff08\u901f\u5ea6\u4e0e\u8d28\u91cf\uff09\u65f6\uff0c\u6211\u4eec\u624d\u4f1a\u8fdb\u884c\u4f18\u5316\u3002<\/p>\n\n\n\n<p>\u8fd9\u79cd\u4f4d\u957f\u5b66\u4e60\u601d\u8def\u81f3\u5173\u91cd\u8981\uff0c\u56e0\u4e3a\u5728 llama.cpp \u4e2d\uff0c\u201c\u6bd4\u7279\u6570\u8d8a\u5c11\u201d \u5e76\u4e0d\u610f\u5473\u7740 \u201c\u901f\u5ea6\u8d8a\u5feb\u201d\u3002\u4e0d\u540c\u7684\u91cf\u5316\u683c\u5f0f\u4f1a\u89e6\u53d1\u4e0d\u540c\u7684\u5185\u6838\u548c\u5f00\u9500\uff0c\u5728\u90e8\u5206 GPU \u4e0a\uff0c\u5373\u4fbf\u5360\u7528\u5185\u5b58\u66f4\u5c11\uff0c\u66f4\u4f4e\u6bd4\u7279\u7684\u91cf\u5316\u53cd\u800c\u53ef\u80fd\u5bfc\u81f4\u901f\u5ea6\u53d8\u6162\u3002<\/p>\n\n\n\n<p>\u6838\u5fc3\u539f\u5219\uff1a\u5c06\u5185\u5b58\u89c6\u4e3a\u5fc5\u987b\u6ee1\u8db3\u7684\u9884\u7b97\uff0c\u518d\u4f18\u5316\u6700\u5173\u952e\u7684\u6307\u6807 \u2014\u2014TPS \u548c\u8d28\u91cf\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u6838\u5fc3\u6458\u8981<\/h2>\n\n\n\n<p>\u662f\u7684\uff0c\u8fd9\u6b3e 300 \u4ebf\u53c2\u6570\u7684 Qwen3 \u6a21\u578b\u53ef\u5728\u6811\u8393\u6d3e\u4e0a\u8fd0\u884c\u3002\u5728\u6811\u8393\u6d3e 5\uff0816GB \u7248\u672c\uff09\u4e0a\uff0c<code>Q3_K_S-2.70bpw [KQ-2]<\/code>&nbsp;\u53d8\u4f53\u4ee5 2.70 \u6bd4\u7279 \/ \u53c2\u6570\uff08BPW\uff09\u7684\u89c4\u683c\u5b9e\u73b0 8.03 TPS\uff0c\u540c\u65f6\u4fdd\u7559 94.18% \u7684 BF16 \u7cbe\u5ea6\uff0c\u771f\u6b63\u5e26\u6765\u5b9e\u65f6\u4ea4\u4e92\u611f\u3002\u66f4\u5e7f\u6cdb\u5730\u8bf4\uff0c\u8fd9\u4e00\u4f18\u52bf\u5728\u6240\u6709\u6d4b\u8bd5\u573a\u666f\u4e2d\u5747\u6210\u7acb\uff1aByteShape \u6a21\u578b\u7684 TPS \/ \u8d28\u91cf\u6743\u8861\u8868\u73b0\u4f18\u4e8e\u540c\u7c7b\u65b9\u6848\uff08\u672c\u6587\u5bf9\u6bd4\u4e86 Unsloth \u548c MagicQuant\uff09\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u4e2d\u592e\u5904\u7406\u5668\uff08CPU\uff09\u6d4b\u8bd5<\/h2>\n\n\n\n<p>\u5728 CPU \u4e0a\uff0c\u901a\u8fc7\u7f29\u77ed\u4f4d\u957f\u51cf\u5c0f\u6a21\u578b\u4f53\u79ef\u5bf9 TPS \u548c\u7cbe\u5ea6\u6743\u8861\u7684\u5f71\u54cd\u7b26\u5408\u9884\u671f\uff1a\u4e00\u65e6\u6a21\u578b\u9002\u914d\u5185\u5b58\uff0c\u51cf\u5c0f\u4f53\u79ef\u901a\u5e38\u4f1a\u4ee5\u8fd1\u4e4e\u5355\u8c03\u7684\u65b9\u5f0f\u63d0\u5347 TPS\u3002\u82e5\u6570\u636e\u7c7b\u578b\u9009\u62e9\u5f97\u5f53\uff0c\u53ef\u901a\u8fc7\u53ef\u9884\u6d4b\u7684\u5c11\u91cf\u8d28\u91cf\u635f\u8017\u6362\u53d6\u901f\u5ea6\u63d0\u5347\uff0c\u8ba9\u7528\u6237\u80fd\u8f7b\u677e\u627e\u5230\u7b26\u5408\u81ea\u8eab\u7ea6\u675f\u7684\u6027\u80fd\u5e73\u8861\u70b9\u3002<\/p>\n\n\n\n<p>\u6211\u4eec\u5148\u4ece\u5185\u5b58\u6700\u53d7\u9650\u7684 CPU \u573a\u666f\uff08\u6811\u8393\u6d3e 5 16GB\uff09\u5165\u624b\uff08\u201c\u9002\u914d\u5185\u5b58\u201d \u662f\u6838\u5fc3\u9650\u5236\u56e0\u7d20\uff09\uff0c\u518d\u6d4b\u8bd5\u6240\u6709\u6a21\u578b\u5747\u80fd\u9002\u914d\u7684 Intel i7\uff0864GB\uff09\u573a\u666f\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">\u6811\u8393\u6d3e 5<\/h3>\n\n\n\n<p>\u4e0b\u56fe\u5c55\u793a\u4e86\u9002\u914d\u6811\u8393\u6d3e 5 16GB \u5185\u5b58\u7684\u6a21\u578b\u7684 TPS \u4e0e\u6807\u51c6\u5316\u7cbe\u5ea6\u5173\u7cfb\uff08\u6c14\u6ce1\u5927\u5c0f = \u6a21\u578b\u4f53\u79ef BPW\uff0c\u6570\u503c\u8d8a\u5c0f\u8d8a\u4f18\uff0c\u8303\u56f4\uff1a2.1-3.9\uff09\uff1a<\/p>\n\n\n\n<p>\u6811\u8393\u6d3e 5\uff1a\u6bcf\u79d2\u4ee4\u724c\u6570\uff08TPS\uff09 vs \u8d28\u91cf\uff08\u6c14\u6ce1\u5927\u5c0f = \u6a21\u578b\u4f53\u79ef\uff09<\/p>\n\n\n\n<p>\u503c\u5f97\u6ce8\u610f\u7684\u662f\uff0c300 \u4ebf\u53c2\u6570\u6a21\u578b\u5728\u6811\u8393\u6d3e\u4e0a\u80fd\u4ee5 92% \u4ee5\u4e0a\u7684\u57fa\u51c6\u7cbe\u5ea6\u7a33\u5b9a\u8fd0\u884c\u5728 8.5 TPS\uff0c\u5f7b\u5e95\u91cd\u5851\u4e86\u5bf9\u6811\u8393\u6d3e\u7ea7\u8bbe\u5907\u7684\u6027\u80fd\u9884\u671f\u3002\u6574\u4f53\u8d8b\u52bf\u663e\u793a\uff0cShapelearn \u6301\u7eed\u751f\u6210\u66f4\u4f18\u6a21\u578b \u2014\u2014ByteShape \u7684\u6027\u80fd\u70b9\u59cb\u7ec8\u4f4d\u4e8e Unsloth \u7684\u53f3\u4e0a\u65b9\uff0c\u610f\u5473\u7740\u5728\u76f8\u540c\u8d28\u91cf\u4e0b\u5b9e\u73b0\u66f4\u9ad8 TPS\uff0c\u6216\u5728\u76f8\u540c\u541e\u5410\u91cf\u4e0b\u8fbe\u5230\u66f4\u9ad8\u8d28\u91cf\u3002<\/p>\n\n\n\n<p>\u6211\u4eec\u9488\u5bf9\u4e24\u5927\u6838\u5fc3\u9700\u6c42\u63d0\u4f9b\u4f18\u9009\u65b9\u6848\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u517c\u987e\u54cd\u5e94\u901f\u5ea6\u4e0e\u7cbe\u5ea6\uff1a\u5bf9\u4e8e\u4ea4\u4e92\u5f0f\u7aef\u4fa7\u5e94\u7528\uff0c\u7528\u6237\u611f\u77e5\u7684\u6d41\u7545\u5ea6\u53d6\u51b3\u4e8e\u6587\u672c\u751f\u6210\u901f\u5ea6\u800c\u975e\u5cf0\u503c\u541e\u5410\u91cf\u3002\u5b9e\u9645\u4e0a\uff0c\u5f53 TPS \u8fbe\u5230\u7ea6 8\uff08\u663e\u8457\u9ad8\u4e8e\u5e38\u89c4\u9605\u8bfb\u901f\u5ea6\uff09\u65f6\uff0c\u751f\u6210\u4f53\u9a8c\u5373\u53ef\u79f0\u4e3a \u201c\u5b9e\u65f6\u201d\u3002\u5728\u6811\u8393\u6d3e\u7684\u5b9e\u65f6\u8fd0\u884c\u573a\u666f\u4e2d\uff0c<code>Q3_K_S-2.70bpw [KQ-2]<\/code>\uff082.70 BPW\u30018.03 TPS\u300194.18% \u7cbe\u5ea6\uff09\u662f\u9996\u9009\u63a8\u8350\uff1a\u5b83\u65e2\u7a81\u7834\u5b9e\u65f6\u9608\u503c\uff0c\u53c8\u4fdd\u6301\u9ad8\u51c6\u786e\u6027\u3002\u4e0e\u540c\u7b49\u8d28\u91cf\u7684 Unsloth \u6a21\u578b\u76f8\u6bd4\uff0cByteShape \u4ee5\u66f4\u4f4e\u7684 BPW \u548c\u66f4\u9ad8\u7684 TPS \u5b9e\u73b0\u5b9e\u65f6\u6027\u80fd\uff0c\u662f\u4ea4\u4e92\u5f0f\u8fb9\u7f18\u90e8\u7f72\u7684\u66f4\u4f18\u9009\u62e9\u3002<\/li>\n\n\n\n<li>\u7cbe\u5ea6\u4f18\u5148\uff1a\u4e0b\u8868\u5217\u51fa\u4e86\u53ef\u5728\u6811\u8393\u6d3e\u4e0a\u8fd0\u884c\u7684\u6700\u9ad8\u7cbe\u5ea6\u6a21\u578b\u3002\u5176\u4e2d\uff0cByteShape \u6a21\u578b\u5145\u5206\u5229\u7528\u53ef\u7528\u8d44\u6e90\u6700\u5927\u5316\u7cbe\u5ea6\uff0c\u5360\u636e\u8bef\u5dee\u6700\u4f4e\u533a\u95f4\uff08\u76f8\u5bf9\u8bef\u5dee\u7ea6 1.1%-1.3%\uff0c\u7cbe\u5ea6\u7ea6 98.8%\uff09\uff0c\u800c Unsloth \u7684\u6700\u4f18\u6a21\u578b\u76f8\u5bf9\u8bef\u5dee\u4ecd\u5728 2.1%-2.2%\uff08\u7cbe\u5ea6\u7ea6 97.9%\uff09\u3002\u4e0e Unsloth \u7684<code>UD-Q3_K_XL [8]<\/code>\u76f8\u6bd4\uff0cByteShape \u7684\u8bef\u5dee\u7387\u6700\u9ad8\u53ef\u964d\u4f4e 1.87 \u500d\uff0c\u540c\u65f6\u4fdd\u6301 5-6 TPS \u7684\u8fd0\u884c\u901f\u5ea6\uff08\u5b8c\u5168\u7b26\u5408\u6811\u8393\u6d3e\u7684\u5e38\u89c4 TPS \u6807\u51c6\uff09\uff0c\u662f\u7cbe\u5ea6\u4f18\u5148\u573a\u666f\u7684\u66f4\u4f73\u9009\u62e9\u3002<\/li>\n<\/ul>\n\n\n\n<p>\u5373\u4fbf\u5728\u4f18\u5148\u8ffd\u6c42\u6781\u81f4\u901f\u5ea6\u5e76\u5141\u8bb8\u5c11\u91cf\u7cbe\u5ea6\u635f\u8017\u7684\u573a\u666f\u4e0b\uff0c<code>Q3_K_S-3.25bpw [KQ-5]<\/code>\u4ecd\u5c55\u73b0\u66f4\u4f18\u6743\u8861\uff1a\u6bd4 Unsloth \u7684\u6700\u5feb\u6a21\u578b\u66f4\u7cbe\u51c6\u3001\u4f53\u79ef\u66f4\u5c0f\u4e14\u901f\u5ea6\u66f4\u5feb\u3002<\/p>\n\n\n\n<figure class=\"wp-block-table\"><table class=\"has-fixed-layout\"><thead><tr><th class=\"has-text-align-center\" data-align=\"center\">\u6a21\u578b<\/th><th class=\"has-text-align-center\" data-align=\"center\">\u76f8\u5bf9\u8bef\u5dee<\/th><th class=\"has-text-align-center\" data-align=\"center\">\u6bd4\u7279 \/ \u53c2\u6570\uff08BPW\uff09<\/th><th class=\"has-text-align-center\" data-align=\"center\">\u6bcf\u79d2\u4ee4\u724c\u6570\uff08TPS\uff09<\/th><\/tr><\/thead><tbody><tr><td><code>Q4_K_S-3.92bpw [KQ-7]<\/code><\/td><td>1.14%<\/td><td>3.92<\/td><td>5.30<\/td><\/tr><tr><td><code>Q4_K_S-3.61bpw [KQ-6]<\/code><\/td><td>1.25%<\/td><td>3.61<\/td><td>5.94<\/td><\/tr><tr><td><code>Q3_K_S-3.25bpw [KQ-5]<\/code><\/td><td>2.03%<\/td><td>3.25<\/td><td>6.68<\/td><\/tr><tr><td><code>UD-IQ3_XXS [6]<\/code><\/td><td>2.22%<\/td><td>3.38<\/td><td>5.03<\/td><\/tr><tr><td><code>UD-Q3_K_XL [8]<\/code><\/td><td>2.13%<\/td><td>3.62<\/td><td>6.28<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n<p>\u6ce8\uff1a\u8bb8\u591a\u5176\u4ed6 Unsloth \u548c MagicQuant \u6a21\u578b\uff08\u5305\u62ec\u90e8\u5206 ByteShape \u6a21\u578b\uff09\u672a\u5217\u5165\u672c\u8868\uff0c\u5b83\u4eec\u5728\u5176\u4ed6\u7ae0\u8282\u4e2d\u8fdb\u884c\u5bf9\u6bd4\uff0c\u4f46\u56e0\u65e0\u6cd5\u9002\u914d\u6811\u8393\u6d3e\u5185\u5b58\u800c\u4e0d\u9002\u7528\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">Intel i7<\/h3>\n\n\n\n<p>\u63a5\u4e0b\u6765\u6d4b\u8bd5 Intel i7\uff0864GB \u5185\u5b58\uff09\uff0c\u4e0b\u56fe\u5c55\u793a\u6240\u6709\u6a21\u578b\u7684 TPS \u4e0e\u6807\u51c6\u5316\u7cbe\u5ea6\u5173\u7cfb\uff08\u6c14\u6ce1\u5927\u5c0f = \u6a21\u578b\u4f53\u79ef BPW\uff0c\u6570\u503c\u8d8a\u5c0f\u8d8a\u4f18\uff0c\u8303\u56f4\uff1a2.1-9.4\uff09\uff1a<\/p>\n\n\n\n<p>Intel i7\uff1a\u6bcf\u79d2\u4ee4\u724c\u6570\uff08TPS\uff09 vs \u8d28\u91cf\uff08\u6c14\u6ce1\u5927\u5c0f = \u6a21\u578b\u4f53\u79ef\uff09<\/p>\n\n\n\n<p>\u6574\u4f53\u800c\u8a00\uff0cByteShape \u6a21\u578b\u8868\u73b0\u4f18\u4e8e Unsloth \u548c MagicQuant\uff0c\u4ee5\u66f4\u5c11\u7684\u6bd4\u7279 \/ \u53c2\u6570\u5b9e\u73b0\u76f8\u5f53\u541e\u5410\u91cf\u4e0b\u7684\u66f4\u9ad8\u8d28\u91cf\u3002\u552f\u6709 ByteShape \u63d0\u4f9b\u4e86 26+ TPS \u533a\u95f4\u7684\u6a21\u578b\uff0c\u6027\u80fd\u8fdc\u8d85\u5176\u4ed6\u65b9\u6848\u3002<\/p>\n\n\n\n<p>\u6838\u5fc3\u4eae\u70b9\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u7cbe\u5ea6\u4f18\u5148\uff1a\u5728\u9ad8\u7cbe\u5ea6\u533a\u95f4\uff0c<code>IQ4_XS-4.67bpw [KQ-9]<\/code>\u5b9e\u73b0\u6700\u4f4e\u76f8\u5bf9\u8bef\u5dee\uff080.25%\uff09\uff0c\u4f18\u4e8e Unsloth \u7684\u6700\u4f18\u6a21\u578b\uff08<code>Q6_K [20]<\/code>\u76f8\u5bf9\u8bef\u5dee 0.36%\u3001<code>Q5_K_M [18]<\/code>\u76f8\u5bf9\u8bef\u5dee 0.44%\uff09\u3002\u76f4\u63a5\u5bf9\u6bd4\u663e\u793a\uff0cByteShape \u6bd4<code>Q6_K [20]<\/code>\u8bef\u5dee\u7387\u6700\u9ad8\u964d\u4f4e 1.44 \u500d\u4e14\u541e\u5410\u91cf\u66f4\u9ad8\uff0c\u4e0e<code>Q5_K_M [18]<\/code>\u901f\u5ea6\u57fa\u672c\u6301\u5e73\u4f46\u8bef\u5dee\u7387\u964d\u4f4e 1.76 \u500d\u3002MagicQuant \u7684<code>mxfp4 [3]<\/code>\u5728\u6b64\u533a\u95f4\u8868\u73b0\u843d\u540e\uff0c\u8bef\u5dee\u66f4\u9ad8\u4e14 TPS \u66f4\u4f4e\u3002<\/li>\n\n\n\n<li>\u5747\u8861\u9009\u62e9\uff1a\u5728\u4e2d\u7cbe\u5ea6\u3001\u9ad8\u541e\u5410\u91cf\u533a\u95f4\uff0c<code>Q3_K_S-3.25bpw [KQ-5]<\/code>\u4ee5 3.25 BPW \u7684\u89c4\u683c\u5b9e\u73b0\u7ea6 98% \u7cbe\u5ea6\u548c 23.1 TPS\uff0c\u662f\u6574\u4f53\u6743\u8861\u6700\u4f18\u7684\u65b9\u6848\u3002Unsloth \u9700\u4ee5\u66f4\u9ad8 BPW \u548c\u66f4\u4f4e TPS \u624d\u80fd\u8fbe\u5230\u540c\u7b49\u6216\u66f4\u9ad8\u7cbe\u5ea6\uff08\u5982<code>IQ4_XS [10]<\/code>\uff09\uff0c\u800c\u9009\u62e9\u901f\u5ea6\u63a5\u8fd1\u7684\u6a21\u578b\uff08\u5982<code>Q3_K_S [7]<\/code>\uff09\u5219\u4f1a\u5bfc\u81f4\u8bef\u5dee\u7387\u5347\u9ad8 1.73 \u500d\u3002MagicQuant \u5728\u8be5\u533a\u95f4\u65e0\u7ade\u4e89\u529b\u6a21\u578b\uff0c\u5176\u6700\u5feb\u65b9\u6848\uff08<code>IQ4_NL [2]<\/code>\uff09\u5728\u7cbe\u5ea6\u548c\u541e\u5410\u91cf\u4e0a\u5747\u843d\u540e\u4e8e ByteShape \u548c Unsloth\u3002<\/li>\n<\/ul>\n\n\n\n<p>\u5173\u952e\u7ed3\u8bba\uff1a\u65e0\u8bba\u5728\u7cbe\u5ea6\u4f18\u5148\u8fd8\u662f\u5747\u8861\u573a\u666f\u4e0b\uff0cByteShape \u59cb\u7ec8\u80fd\u5c06\u53ef\u7528\u6bd4\u7279\u9884\u7b97\u8f6c\u5316\u4e3a\u66f4\u9ad8\u7cbe\u5ea6\u6216\u66f4\u9ad8 TPS\uff0c\u662f\u552f\u4e00\u540c\u65f6\u8986\u76d6\u9ad8\u8d28\u91cf\u533a\u95f4\u548c 26+ TPS \u5747\u8861\u6027\u80fd\u533a\u95f4\u7684\u65b9\u6848\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u56fe\u5f62\u5904\u7406\u5668\uff08GPU\uff09\u6d4b\u8bd5\uff1aRTX 5090\/32GB \u4e0e RTX 4080\/16GB<\/h2>\n\n\n\n<p>\u5728 GPU \u4e0a\uff0c\u6027\u80fd\u8868\u73b0\u65e2\u53d6\u51b3\u4e8e\u5185\u5b58\u5360\u7528\uff0c\u4e5f\u540c\u6837\u4f9d\u8d56\u5185\u6838\u9009\u62e9\u3002\u5bf9\u4e8e\u77e9\u9635\u4e58\u6cd5 \/ \u77e9\u9635\u5411\u91cf\u4e58\u6cd5\uff0cllama.cpp \u7684\u91cf\u5316\u4e13\u7528 GPU \u89e3\u7801\u8def\u5f84\u4f1a\u4ea7\u751f\u622a\u7136\u4e0d\u540c\u7684\u5f00\u9500\uff0c\u56e0\u6b64 \u201c\u6bd4\u7279\u6570\u8d8a\u5c11\u201d \u5e76\u975e\u53ef\u9760\u7684 \u201cTPS \u8d8a\u9ad8\u201d \u4fdd\u8bc1\u3002\u76f8\u53cd\uff0cTPS \u901a\u5e38\u5728\u7279\u5b9a\u91cf\u5316 \u201c\u751c\u70b9\u533a\u95f4\u201d \u8fbe\u5230\u5cf0\u503c\uff0c\u8fc7\u5ea6\u964d\u4f4e BPW \u751a\u81f3\u53ef\u80fd\u589e\u52a0\u663e\u5b58\uff08VRAM\uff09\u6d41\u91cf\u548c\u6307\u4ee4\u6570\uff0c\u53cd\u800c\u635f\u5bb3\u6027\u80fd\u3002\u6211\u4eec\u5c06\u5728 GPU \u6d4b\u8bd5\u7ed3\u679c\u540e\u8be6\u7ec6\u89e3\u6790\u8fd9\u4e00\u73b0\u8c61\uff0c\u5c4a\u65f6\u5185\u6838\u7ea7\u522b\u7684\u6743\u8861\u4f1a\u66f4\u52a0\u6e05\u6670\u3002<\/p>\n\n\n\n<p>\u6d4b\u8bd5\u8986\u76d6\u4e24\u6b3e GPU\uff1aRTX 5090\uff0832GB \u663e\u5b58\uff09\u2014\u2014 \u53ef\u8fd0\u884c 4 BPW \u4ee5\u4e0a\u6a21\u578b\u5e76\u8fbe\u5230\u6700\u5feb\u751c\u70b9\u533a\u95f4\uff1bRTX 4080\uff0816GB \u663e\u5b58\uff09\u2014\u2014 \u65e0\u6cd5\u9002\u914d 4 BPW \u4ee5\u4e0a\u6a21\u578b\uff0c\u9700\u8fdb\u884c\u5dee\u5f02\u5316\u6743\u8861\uff0c\u66f4\u6613\u4f53\u73b0\u8bbe\u5907\u4f18\u5316\u66f2\u7ebf\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">RTX 5090\uff0832GB \u663e\u5b58\uff09<\/h3>\n\n\n\n<p>RTX 5090 \u5177\u5907\u5145\u8db3\u663e\u5b58\u652f\u6301\u6240\u6709\u91cf\u5316\u6a21\u578b\uff0c\u4e0b\u56fe\u5c55\u793a\u5176 TPS \u4e0e\u6807\u51c6\u5316\u7cbe\u5ea6\u5173\u7cfb\uff08\u6c14\u6ce1\u5927\u5c0f = \u6a21\u578b\u4f53\u79ef BPW\uff0c\u6570\u503c\u8d8a\u5c0f\u8d8a\u4f18\uff0c\u8303\u56f4\uff1a2.1-8.5\uff09\uff1a<\/p>\n\n\n\n<p>RTX 5090\uff1a\u6bcf\u79d2\u4ee4\u724c\u6570\uff08TPS\uff09 vs \u8d28\u91cf\uff08\u6c14\u6ce1\u5927\u5c0f = \u6a21\u578b\u4f53\u79ef\uff09<\/p>\n\n\n\n<p>\u4e24\u5927\u5173\u952e\u53d1\u73b0\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u8be5 GPU \u5b58\u5728\u660e\u786e\u7684\uff5e4 \u6bd4\u7279\u751c\u70b9\u533a\u95f4\uff1a\u591a\u6b3e\uff5e4 \u6bd4\u7279\u6a21\u578b\u805a\u96c6\u5728\u9ad8 TPS \u533a\u95f4\u4e14\u8d28\u91cf\u8fd1\u4e4e\u4e00\u81f4\uff0c\u4f8b\u5982<code>Unsloth Q4_0 [12]<\/code>\u3001<code>Unsloth IQ4_XS [10]<\/code>\u3001<code>IQ4_XS-3.87bpw [IQ-6]<\/code>\u548c MagicQuant \u7684<code>iq4_nl-EHQKOUD-IQ4NL [1]<\/code>\uff0c\u5747\u4ee5\uff5e302-303 TPS \u8fd0\u884c\uff0c\u7cbe\u5ea6\u8fbe\uff5e98.4%-98.9%\u3002\u5728\u8fd9\u4e00\u5bc6\u96c6\u533a\u95f4\u5185\uff0cUnsloth \u5728\u541e\u5410\u91cf\u548c\u8d28\u91cf\u4e0a\u7565\u6709\u4f18\u52bf\u3002<\/li>\n\n\n\n<li>\u751c\u70b9\u533a\u95f4\u4e4b\u5916\uff0c\u6743\u8861\u5173\u7cfb\u53d8\u5f97\u6781\u4e0d\u5747\u8861\uff1a<\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u591a\u6570\u5176\u4ed6 Unsloth \u548c MagicQuant \u6a21\u578b\u65e0\u8bba\u91cf\u5316\u7a0b\u5ea6\u5982\u4f55\uff0cTPS \u5747\u663e\u8457\u964d\u4f4e\uff1b<\/li>\n\n\n\n<li>\u8d85\u51fa\uff5e4 \u6bd4\u7279\u533a\u95f4\u540e\uff0c\u552f\u6709 ByteShape \u80fd\u5728\u7cbe\u5ea6\u53ef\u9884\u6d4b\u964d\u4f4e\u7684\u540c\u65f6\u6301\u7eed\u63d0\u5347 TPS\u3002<\/li>\n<\/ul>\n\n\n\n<p>\u9ad8\u7cbe\u5ea6\u5173\u952e\u8d1f\u8f7d\uff1a\u5f53\u8f93\u51fa\u8d28\u91cf\u81f3\u5173\u91cd\u8981\u65f6\uff0cByteShape \u63d0\u4f9b RTX 5090 \u4e0a\u6700\u7cbe\u51c6\u7684\u6a21\u578b \u2014\u2014<code>IQ4_XS-4.67bpw [IQ-8]<\/code>\uff084.67 BPW\u3001272.98 TPS\u300199.75% \u7cbe\u5ea6\uff09\u3002\u5b83\u4f18\u4e8e<code>Unsloth Q6_K [20]<\/code>\uff086.57 BPW\u3001264.88 TPS\u300199.64% \u7cbe\u5ea6\uff09\uff0c\u4ee5\u66f4\u5c11\u6bd4\u7279\u5b9e\u73b0\u7565\u9ad8\u541e\u5410\u91cf\uff1b\u540c\u65f6\u5728\u7cbe\u5ea6\u548c\u901f\u5ea6\u4e0a\u660e\u663e\u8d85\u8d8a MagicQuant \u7684<code>mxfp4_moe-H-B16-EUR-IQ4NL-KO-Q5K-QD-Q6K [3]<\/code>\uff085.46 BPW\u3001240.42 TPS\u300199.32% \u7cbe\u5ea6\uff09\uff0c\u662f\u8d28\u91cf\u5173\u952e\u578b\u90e8\u7f72\u573a\u666f\u7684\u6700\u4f18\u9009\u62e9\u3002<\/p>\n\n\n\n<p>\u5b9e\u7528\u5efa\u8bae\uff1a\u82e5\u4f60\u7684 GPU \u663e\u5b58\u5145\u8db3\uff0c\u80fd\u8fd0\u884c\u6ee1\u8db3\u901f\u5ea6\u548c\u7cbe\u5ea6\u9700\u6c42\u7684\uff5e4 \u6bd4\u7279\u6a21\u578b\uff0c\u8be5\u533a\u95f4\u662f\u7406\u60f3\u9ed8\u8ba4\u9009\u62e9\u3002\u5f53\u4efb\u52a1\u7ea6\u675f\u8981\u6c42\u66f4\u9ad8\u7cbe\u5ea6\u6216\u66f4\u5c0f\u6a21\u578b\uff08\u5982\u663e\u5b58\u9884\u7b97\u7d27\u5f20\u6216\u53d7\u9650\u73af\u5883\uff0c\u8be6\u89c1 RTX 4080 \u6d4b\u8bd5\uff09\u65f6\uff0c\u6027\u80fd\u66f2\u7ebf\u7684\u5dee\u5f02\u5316\u4f1a\u66f4\u5177\u53c2\u8003\u4ef7\u503c\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">RTX 4080\uff0816GB \u663e\u5b58\uff09<\/h3>\n\n\n\n<p>RTX 4080 \u662f\u66f4\u6613\u83b7\u53d6\u7684 GPU\uff0c\u4f46\u5176 16GB \u663e\u5b58\u65e0\u6cd5\u9002\u914d 300 \u4ebf\u53c2\u6570\u6a21\u578b\u7684\uff5e4 \u6bd4\u7279\u91cf\u5316\uff08\u201c\u751c\u70b9\u533a\u95f4\u201d\uff09\uff0c\u6070\u597d\u5f62\u6210 \u201c\u771f\u5b9e\u573a\u666f\u201d \u4e0b\u7684\u4e25\u683c\u663e\u5b58\u9884\u7b97\u5bf9\u6bd4\u3002\u4e0b\u56fe\u5c55\u793a\u9002\u914d\u8be5 GPU \u7684\u6a21\u578b\u7684 TPS \u4e0e\u6807\u51c6\u5316\u7cbe\u5ea6\u5173\u7cfb\uff08\u6c14\u6ce1\u5927\u5c0f = \u6a21\u578b\u4f53\u79ef BPW\uff0c\u6570\u503c\u8d8a\u5c0f\u8d8a\u4f18\uff0c\u8303\u56f4\uff1a2.1-3.9\uff09\uff1a<\/p>\n\n\n\n<p>RTX 4080\uff1a\u6bcf\u79d2\u4ee4\u724c\u6570\uff08TPS\uff09 vs \u8d28\u91cf\uff08\u6c14\u6ce1\u5927\u5c0f = \u6a21\u578b\u4f53\u79ef\uff09<\/p>\n\n\n\n<p>\u5728 16GB \u663e\u5b58\u7ea6\u675f\u4e0b\uff0cByteShape \u6301\u7eed\u4f18\u4e8e Unsloth\uff0c\u5c55\u73b0\u66f4\u4f18\u7684 TPS &#8211; \u8d28\u91cf\u6743\u8861\u3002<\/p>\n\n\n\n<p>\u5176\u4e2d\uff0cByteShape \u9002\u914d\u8be5 GPU \u7684\u6700\u9ad8\u7cbe\u5ea6\u6a21\u578b<code>IQ4_XS-3.87bpw [IQ-6]<\/code>\uff083.87 BPW\u3001214.81 TPS\u300198.66% \u7cbe\u5ea6\uff09\u8868\u73b0\u5982\u4e0b\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u76f8\u8f83\u4e8e<code>Unsloth Q3_K_XL [8]<\/code>\uff083.62 BPW\u3001196.42 TPS\u300197.87% \u7cbe\u5ea6\uff09\uff1a\u8bef\u5dee\u7387\u964d\u4f4e 1.59 \u500d\uff0cTPS \u63d0\u5347 9.4%\uff1b<\/li>\n\n\n\n<li>\u76f8\u8f83\u4e8e<code>Unsloth IQ2_M [2]<\/code>\uff082.84 BPW\u3001214.79 TPS\u300196.59% \u7cbe\u5ea6\uff09\uff1a\u5728\u76f8\u540c TPS \u4e0b\u8bef\u5dee\u7387\u964d\u4f4e 2.54 \u500d\u3002<\/li>\n<\/ul>\n\n\n\n<p>\u968f\u7740\u541e\u5410\u91cf\u63d0\u5347\uff0cByteShape \u80fd\u4fdd\u6301\u7cbe\u5ea6\u7a33\u5b9a\uff0c\u800c Unsloth \u7684\u8bef\u5dee\u7387\u4f1a\u51fa\u73b0\u65ad\u5d16\u5f0f\u4e0a\u5347\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u5173\u952e\u771f\u76f8\uff1a3 \u6bd4\u7279\u5e76\u975e\u5355\u7eaf\u7684 \u201c3 \u6bd4\u7279\u201d<\/h2>\n\n\n\n<p>\u6d4b\u8bd5\u7ed3\u679c\u63ed\u793a\u4e00\u4e2a\u4e0d\u5bb9\u5ffd\u89c6\u7684\u4e8b\u5b9e\uff1a\u5728\u591a\u4e2a\u8bbe\u5907\u914d\u7f6e\u4e2d\uff0c~4 BPW \u7684\u6a21\u578b\u5df2\u80fd\u5b9e\u73b0\u9ad8\u901f\u8fd0\u884c\uff0c\u8fdb\u4e00\u6b65\u5f3a\u5316\u91cf\u5316\u5e76\u4e0d\u80fd\u63d0\u5347\u901f\u5ea6\uff0c\u53cd\u800c\u4f1a\u5bfc\u81f4 \u201c\u4f53\u79ef\u66f4\u5c0f\u4f46\u901f\u5ea6\u66f4\u6162\u201d\u3002<\/p>\n\n\n\n<p>\u51cf\u5c0f\u6570\u636e\u4f53\u79ef\u5e76\u975e\u5fc5\u7136\u63d0\u5347\u901f\u5ea6\u3002\u867d\u7136\u7528\u66f4\u5c11\u6bd4\u7279\u5b58\u50a8\u6570\u636e\u770b\u4f3c\u80fd\u51cf\u5c11\u5185\u5b58\u6d41\u91cf\u3001\u52a0\u901f\u8ba1\u7b97\uff0c\u4f46 GPU \u7684\u5de5\u4f5c\u673a\u5236\u5e76\u975e\u5982\u6b64\u3002NVIDIA GPU \u4ee5 32 \u7ebf\u7a0b\u4e3a\u4e00\u7ec4\u7684 \u201c\u7ebf\u7a0b\u675f\u201d\uff08warps\uff09\u5904\u7406\u4efb\u52a1\uff0c\u7ebf\u7a0b\u675f\u8fd1\u4e4e\u540c\u6b65\u6267\u884c\u6307\u4ee4\u3002GPU \u786c\u4ef6\u9488\u5bf9\u7279\u5b9a\u6570\u636e\u683c\u5f0f\u3001\u5185\u5b58\u8bbf\u95ee\u6a21\u5f0f\u548c\u8fd0\u7b97\u8fdb\u884c\u4e86\u7269\u7406\u4f18\u5316\uff0c\u5f53\u8d1f\u8f7d\u5951\u5408\u8fd9\u4e9b \u201c\u9ec4\u91d1\u8def\u5f84\u201d \u65f6\uff0c\u624d\u80fd\u53d1\u6325\u5cf0\u503c\u6027\u80fd\uff1b\u4e00\u65e6\u504f\u79bb\uff0c\u5c31\u4f1a\u51fa\u73b0\u901f\u5ea6\u4e0b\u964d\u3002\u8fd9\u5e76\u975e\u8bbe\u8ba1\u7f3a\u9677\uff0c\u800c\u662f\u523b\u610f\u7684\u6743\u8861 \u2014\u2014 \u652f\u6301\u66f4\u591a\u7075\u6d3b\u6027\u9700\u8981\u989d\u5916\u7684\u7535\u8def\uff08\u66f4\u591a\u7ebf\u8def\u3001\u6676\u4f53\u7ba1\u548c\u590d\u6742\u5ea6\uff09\uff0c\u4f1a\u589e\u52a0\u6240\u6709\u8fd0\u7b97\u7684\u529f\u8017\u548c\u5ef6\u8fdf\uff0c\u65e0\u8bba\u7a0b\u5e8f\u662f\u5426\u9700\u8981\u8fd9\u4e9b\u7075\u6d3b\u6027\u3002<\/p>\n\n\n\n<p>\u4ee5\u4e0b\u662f\u51e0\u4e2a\u5173\u952e\u786c\u4ef6 \u201c\u7279\u6027\u201d \u793a\u4f8b\uff1a<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u663e\u5b58\u6309 32 \u5b57\u8282\u5bf9\u9f50\u5757\u8bfb\u53d6\uff0c\u8bfb\u53d6 1 \u5b57\u8282\u6216 32 \u5b57\u8282\u6d88\u8017\u76f8\u540c\u5e26\u5bbd\uff1b<\/li>\n\n\n\n<li>\u7247\u4e0a\u548c\u7247\u5916\u5185\u5b58\u7684\u6027\u80fd\u4f1a\u53d7\u6570\u636e\u5e03\u5c40\u5f71\u54cd\u4ea7\u751f\u51b2\u7a81\uff0c\u7ebf\u7a0b\u675f\u7684\u8bbf\u95ee\u53ef\u80fd\u4e00\u6b65\u5b8c\u6210\uff0c\u6700\u574f\u60c5\u51b5\u4e0b\u9700\u4e32\u884c\u6267\u884c 32 \u6b65\uff1b<\/li>\n\n\n\n<li>\u8ba1\u7b97\u524d\u7684\u91cf\u5316\u503c\u89e3\u7801\u9700\u989d\u5916\u6307\u4ee4\uff0c\u5f00\u9500\u53d6\u51b3\u4e8e\u91cf\u5316\u65b9\u6848\u3002<\/li>\n<\/ul>\n\n\n\n<p>\u8fd9\u89e3\u91ca\u4e86\u6211\u4eec\u89c2\u5bdf\u5230\u7684\u73b0\u8c61\uff1a4 \u6bd4\u7279\u5185\u6838\u6bd4 3 \u6bd4\u7279\u6216 2 \u6bd4\u7279\u5185\u6838\u66f4\u9ad8\u6548\u5229\u7528\u663e\u5b58\u5e26\u5bbd\uff0c\u4e14\u89e3\u7801\u6b65\u9aa4\u66f4\u5c11\uff1b\u540c\u65f6\uff0c4 \u6bd4\u7279\u5185\u6838\u4e0e\u4f4e\u6bd4\u7279\u5185\u6838\u4e00\u6837\u80fd\u6709\u6548\u5229\u7528\u5b50\u5b57\u5e76\u884c\u6027\uff0c\u4e14\u5747\u4e3b\u8981\u4f9d\u8d56\u52a8\u6001\u7f13\u5b58\uff08\u800c\u975e\u5171\u4eab\u5185\u5b58\uff09\u5b9e\u73b0\u6570\u636e\u590d\u7528\u3002<\/p>\n\n\n\n<p>\u4e3a\u4f55 llama.cpp \u672a\u9488\u5bf9\u6240\u6709\u4f4d\u957f\u4f18\u5316\u5cf0\u503c\u901f\u5ea6\uff1f\u6211\u4eec\u8ba4\u4e3a\uff0cllama.cpp \u4f18\u5148\u8003\u8651\u53ef\u79fb\u690d\u6027\u548c\u7a7a\u95f4\u9ad8\u6548\u7684\u91cf\u5316\u65b9\u6848\uff0c\u4ee5\u9002\u914d\u5e7f\u6cdb\u786c\u4ef6\u3002\u8fd9\u4e00\u8bbe\u8ba1\u76ee\u6807\u9650\u5236\u4e86\u540e\u7aef\u5bf9\u6570\u636e\u5e03\u5c40\u7684\u91cd\u6784\u6216\u8ba1\u7b97\u987a\u5e8f\u7684\u8c03\u6574\u529b\u5ea6 \u2014\u2014 \u8fd9\u7c7b\u4f18\u5316\u53ef\u80fd\u4ec5\u5bf9\u7279\u5b9a GPU \u6216\u4f4d\u957f\u6709\u6548\u3002<\/p>\n\n\n\n<p>\u4e00\u4e2a\u5173\u952e\u793a\u4f8b\u662f llama.cpp \u5c06\u91cf\u5316\u6743\u91cd\u5b58\u50a8\u5728 256 \u4e2a\u503c\u7684\u56fa\u5b9a\u5757\u4e2d\uff1a\u6bcf\u4e2a\u5757\u72ec\u7acb\u5b8c\u6574\uff08\u5305\u542b\u89e3\u7801\u6240\u9700\u5168\u90e8\u4fe1\u606f\uff09\uff0c\u4e14\u5728\u5f20\u91cf\u4e2d\u4f4d\u7f6e\u53ef\u9884\u6d4b\uff0c\u4fbf\u4e8e\u5b9e\u73b0\u4e14\u5b9a\u4f4d\u5feb\u901f\u3002<\/p>\n\n\n\n<p>\u4f46\u6743\u8861\u4e4b\u5904\u5728\u4e8e\uff0cGPU \u9700\u5e76\u884c\u89e3\u7801\u591a\u4e2a\u5757\u624d\u80fd\u5145\u5206\u5229\u7528\u5176\u5bbd\u8ba1\u7b97\u5355\u5143\u3002\u5927\u91cf\u72ec\u7acb\u7684 256 \u503c\u5757\u4f1a\u5bfc\u81f4\u5e76\u884c\u89e3\u7801\u4ea7\u751f\u5206\u6563\u7684\u663e\u5b58\u8bfb\u53d6\u548c\u989d\u5916\u89e3\u7801\u5f00\u9500\uff0c\u964d\u4f4e\u5e26\u5bbd\u6548\u7387\uff08\u5c24\u5176\u5bf9\u90e8\u5206\u4f4e\u6bd4\u7279\u683c\u5f0f\uff09\u3002<\/p>\n\n\n\n<p>\u4ee5 RTX 5090 \u4e3a\u4f8b\uff1a<code>iq4_xs<\/code>\u6570\u636e\u7c7b\u578b\u6267\u884c [256, 768]\u00d7[768, 2048] \u77e9\u9635\u4e58\u6cd5\u9700\uff5e54 \u5fae\u79d2\uff0c\u800c<code>iq3_xxs<\/code>\u9700\uff5e62 \u5fae\u79d2\uff08\u542b mul_mat_q () \u548c mul_mat_q_stream_k_fixup () \u64cd\u4f5c\uff09\u3002\u4e5f\u5c31\u662f\u8bf4\uff0c\u6bcf\u6743\u91cd\u51cf\u5c11\u8fd1 1.2 \u6bd4\u7279\uff08\u4f53\u79ef\u7f29\u51cf\u8d85 25%\uff09\uff0c\u53cd\u800c\u5bfc\u81f4\u901f\u5ea6\u4e0b\u964d\uff5e13%\uff0c\u76f4\u63a5\u5f71\u54cd\u7528\u6237\u4f53\u9a8c\u3002<\/p>\n\n\n\n<p>\u8fd9\u5145\u5206\u8bc1\u660e\u4f4d\u957f\u5b66\u4e60\u7684\u91cd\u8981\u6027\uff1a\u542f\u53d1\u5f0f\u65b9\u6cd5\u53ea\u80fd\u89e3\u51b3\u90e8\u5206\u95ee\u9898\uff0c\u800c Shapelearn \u901a\u8fc7\u9488\u5bf9\u6027\u7684\u9010\u5f20\u91cf\u6570\u636e\u7c7b\u578b\u9009\u62e9\uff0c\u5b9e\u73b0\u4e86\u901f\u5ea6\u63d0\u5347\u4e0e\u7cbe\u5ea6\u65e0\u635f\u7684\u517c\u987e\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u6d4b\u8bd5\u65b9\u6cd5\uff08\u7b80\u8981\u56de\u987e\uff09<\/h2>\n\n\n\n<p>\u82e5\u4f60\u60f3\u4e86\u89e3\u8bc4\u4f30\u6807\u51c6\uff0c\u5b8c\u6574\u65b9\u6cd5\u8be6\u89c1\u6211\u4eec\u4e0a\u4e00\u7bc7\u535a\u5ba2\u3002\u672c\u6587\u805a\u7126\u6027\u80fd\u66f2\u7ebf\u548c\u8bbe\u5907\u6743\u8861\uff0c\u4ee5\u4e0b\u662f\u7cbe\u7b80\u7248\u8bf4\u660e\uff1a<\/p>\n\n\n\n<p>\u9488\u5bf9\u6bcf\u4e2a\u91cf\u5316\u53d8\u4f53\uff0c\u6211\u4eec\u5728\u76ee\u6807\u8bbe\u5907\u4e0a\u6d4b\u8bd5\u541e\u5410\u91cf\uff08TPS\uff09\uff0c\u5e76\u57fa\u4e8e BF16 \u57fa\u51c6\u8ba1\u7b97\u6807\u51c6\u5316\u8d28\u91cf\u5f97\u5206\uff08\u91c7\u7528\u4e0e\u65b9\u6cd5\u5b66\u535a\u5ba2\u4e00\u81f4\u7684\u8bc4\u4f30\u6846\u67b6\u548c\u63d0\u793a\u8bcd\uff09\u3002\u8d28\u91cf\u5f97\u5206\u6574\u5408\u4e86\u6807\u51c6\u57fa\u51c6\u6d4b\u8bd5\uff08MMLU\u3001GSM8K\u3001IFEval\u3001LiveCodeBench V4\uff09\u7684\u7ed3\u679c\uff0c\u4fbf\u4e8e\u76f4\u63a5\u5bf9\u6bd4\u3002\u7b80\u8a00\u4e4b\uff0c\u56fe\u8868\u4e2d\u7684\u6bcf\u4e2a\u6570\u636e\u70b9\u5747\u56de\u7b54\u4e24\u4e2a\u95ee\u9898\uff1a\u5728\u8be5\u8bbe\u5907\u4e0a\u8fd0\u884c\u901f\u5ea6\u5982\u4f55\uff1f\u76f8\u8f83\u4e8e BF16 \u57fa\u51c6\u4fdd\u7559\u591a\u5c11\u8d28\u91cf\uff1f\uff08\u5185\u5b58\u9002\u914d\u4e3a\u9996\u8981\u7ea6\u675f\uff09<\/p>\n\n\n\n<p>\u611f\u8c22\u7528\u6237\u5728\u8fd1\u671f Reddit \u5e16\u5b50\u4e2d\u63d0\u51fa\u7684\u8bf8\u591a\u5b9d\u8d35\u5efa\u8bae\uff0c\u6211\u4eec\u6b63\u79ef\u6781\u843d\u5b9e\u3002\u76ee\u524d\uff0c\u8bc4\u4f30\u662f\u4e3b\u8981\u74f6\u9888\u800c\u975e\u4f4d\u957f\u5b66\u4e60 \/ \u91cf\u5316 \u2014\u2014 \u4e25\u8c28\u7684\u8bc4\u4f30\u662f\u6e05\u6670\u4f20\u8fbe\u5404\u6a21\u578b\u4f18\u52bf\u7684\u5173\u952e\u3002<\/p>\n\n\n\n<p>\u539f\u6587\u94fe\u63a5\uff1a<a href=\"https:\/\/byteshape.com\/blogs\/Qwen3-30B-A3B-Instruct-2507\/\">https:\/\/byteshape.com\/blogs\/Qwen3-30B-A3B-Instruct-2507\/<\/a><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u672c\u6b21\u53d1\u5e03\uff0c\u6211\u4eec\u805a\u7126\u7528\u6237\u5b9e\u9645\u8fd0\u884c\u6a21\u578b\u65f6\u7684\u6838\u5fc3\u4f53\u9a8c\uff1a\u5728\u7279\u5b9a\u76ee\u6807\u8bbe\u5907\u4e0a\u5b9e\u73b0&#46;&#46;&#46;<\/p>\n","protected":false},"author":2,"featured_media":813,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[18],"tags":[],"class_list":["post-1242","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-tools"],"_links":{"self":[{"href":"https:\/\/smartaiunion.com\/index.php\/wp-json\/wp\/v2\/posts\/1242","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/smartaiunion.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/smartaiunion.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/smartaiunion.com\/index.php\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/smartaiunion.com\/index.php\/wp-json\/wp\/v2\/comments?post=1242"}],"version-history":[{"count":1,"href":"https:\/\/smartaiunion.com\/index.php\/wp-json\/wp\/v2\/posts\/1242\/revisions"}],"predecessor-version":[{"id":1243,"href":"https:\/\/smartaiunion.com\/index.php\/wp-json\/wp\/v2\/posts\/1242\/revisions\/1243"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/smartaiunion.com\/index.php\/wp-json\/wp\/v2\/media\/813"}],"wp:attachment":[{"href":"https:\/\/smartaiunion.com\/index.php\/wp-json\/wp\/v2\/media?parent=1242"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/smartaiunion.com\/index.php\/wp-json\/wp\/v2\/categories?post=1242"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/smartaiunion.com\/index.php\/wp-json\/wp\/v2\/tags?post=1242"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}