{"id":5552,"date":"2025-06-21T14:45:30","date_gmt":"2025-06-21T06:45:30","guid":{"rendered":"http:\/\/xinblog.ltd\/?p=5552"},"modified":"2025-06-21T15:15:55","modified_gmt":"2025-06-21T07:15:55","slug":"%e5%9f%ba%e7%a1%80%e6%95%b0%e5%ad%a6%e8%af%be31-%e5%ae%9e%e7%8e%b0%e4%b8%80%e4%b8%aa%e6%8e%a8%e8%8d%90%e7%b3%bb%e7%bb%9f","status":"publish","type":"post","link":"http:\/\/xinblog.ltd\/?p=5552","title":{"rendered":"\u57fa\u7840\u6570\u5b66\u8bfe31-\u5b9e\u73b0\u4e00\u4e2a\u63a8\u8350\u7cfb\u7edf"},"content":{"rendered":"<p>\u57fa\u7840\u6570\u5b66\u8bfe31 \u5b9e\u73b0\u4e00\u4e2a\u63a8\u8350\u7cfb\u7edf<\/p>\n<p>\u8fd9\u91cc\u6211\u4eec\u5229\u7528\u4e00\u4e2a\u77e5\u540d\u6570\u636e\u96c6\u6765\u5b9e\u73b0\u4e00\u4e2a\u63a8\u8350\u7cfb\u7edf\u7684\u6838\u5fc3\u6a21\u5757\u7684\u8bbe\u8ba1\u548c\u5b9e\u73b0<\/p>\n<p>\u8fd9\u91cc\u7684\u77e5\u540d\u6570\u636e\u96c6\u5c31\u662fMovieLens\uff0c\u5176\u4e2d\u5305\u542b\u4e86\u56db\u4e2a\u6587\u4ef6\uff0c\u5206\u522b\u662fratings\uff0cmovies,tags.links\u3002\u5176\u4e2d\u7684\u6838\u5fc3\u5c31\u662fratings\uff0c\u5305\u542b\u4e86\u56db\u4e2a\u5b57\u6bb5\uff0c\u5305\u62ecuserId\uff0cmovieId\uff0crating\uff0ctimestamp\u8fd9\u51e0\u4e2a\u5b57\u6bb5\uff0cuserId\u4e0d\u5fc5\u8bf4\uff0c\u5bf9\u4e8emovieId\u5219\u662f\u7535\u5f71\u7684Id\uff0crating\u5219\u662f\u7528\u6237\u5bf9\u8fd9\u4e2a\u7535\u5f71\u7684\u8bc4\u5206\u3002<\/p>\n<p>\u90a3\u4e48\u6211\u4eec\u4e3b\u8981\u5c31\u662f\u5229\u7528\u534f\u540c\u8fc7\u6ee4\u63a8\u8350\u7b97\u6cd5\u8fdb\u884c\u7528\u6237\u548c\u7269\u54c1\u7684\u5339\u914d\u3002<\/p>\n<p>\u9996\u5148\u8981\u5bf9\u7528\u6237\u7684\u8bc4\u5206\u8fdb\u884c\u4e00\u4e2a\u6807\u51c6\u5316\u64cd\u4f5c\u3002<\/p>\n<p>\u7136\u540e\u8861\u91cf\u4e00\u4e2a\u7528\u6237\u548c\u5176\u4ed6\u7684\u7528\u6237\u6216\u8005\u7269\u54c1\u7684\u76f8\u4f3c\u5ea6\u3002<\/p>\n<p><img decoding=\"async\" loading=\"lazy\" width=\"1372\" height=\"638\" src=\"http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-227.png\" class=\"wp-image-5553\" srcset=\"http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-227.png 1372w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-227-300x140.png 300w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-227-1024x476.png 1024w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-227-768x357.png 768w\" sizes=\"(max-width: 1372px) 100vw, 1372px\" \/><\/p>\n<p>\u5bf9\u4e8eis\u7cfb\u5217\uff0c\u6211\u4eec\u53ef\u4ee5\u8fd9\u4e48\u7406\u89e3<\/p>\n<p>\u9996\u5148\u6211\u4eec\u6709\u4e00\u4e2a\u77e9\u9635X\uff0c\u6bcf\u4e00\u884c\u662f\u4e00\u4e2a\u7528\u6237\u7684\u559c\u597d\uff0c\u6bcf\u884c\u7684\u4e00\u4e2a\u5411\u91cf\u662f\u67d0\u4e2a\u7528\u6237\u7684\u6253\u5206\u3002<\/p>\n<p>\u90a3\u4e48\u6211\u4eec\u53ef\u4ee5\u5c06X\u8fdb\u884c\u8f6c\u7f6e\uff0c\u5f62\u6210X\u2018\uff0c\u5176\u4e2d\u4ee5\u5217\u533a\u5206\u7528\u6237\uff0c\u6bcf\u5217\u7684\u4e00\u4e2a\u5411\u91cf\u662f\u67d0\u4e2a\u7528\u6237\u7684\u6253\u5206\u3002<\/p>\n<p>\u8fd9\u91cc\u6211\u4eec\u5c06XX\u2018\u76f8\u4e58\u5f97\u5230\u7ed3\u679cY\uff0c\u90a3\u4e48Y\u4e2d\u7684\u6bcf\u4e00\u4e2a\u5143\u7d20\u5c31\u662f\u7528\u6237i\u548cj\u4e4b\u95f4\u7684\u559c\u597d\u91cf\u7ed3\u679c\uff0c\u5c31\u662f\u5206\u5b50\uff0c\u5982\u679ci=j\uff0c\u4e5f\u5c31\u662f\u5bf9\u89d2\u7ebf\u4e0a\u7684\u4e00\u4e2a\u5143\u7d20\uff0c\u53ef\u4ee5\u4f5c\u4e3a\u5206\u6bcd\u3002<\/p>\n<p>\u90a3\u4e48\u5206\u5b50\u5206\u6bcd\u5c31\u5b58\u5728\u4e86\uff0c\u6211\u4eec\u5c31\u53ef\u4ee5\u8ba1\u7b97\u4e0a\u8ff0\u7684\u516c\u5f0f<\/p>\n<p><img decoding=\"async\" loading=\"lazy\" width=\"1528\" height=\"1346\" src=\"http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-228.png\" class=\"wp-image-5554\" srcset=\"http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-228.png 1528w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-228-300x264.png 300w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-228-1024x902.png 1024w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-228-768x677.png 768w\" sizes=\"(max-width: 1528px) 100vw, 1528px\" \/><\/p>\n<p><img decoding=\"async\" loading=\"lazy\" width=\"1472\" height=\"848\" src=\"http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-229.png\" class=\"wp-image-5555\" srcset=\"http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-229.png 1472w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-229-300x173.png 300w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-229-1024x590.png 1024w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-229-768x442.png 768w\" sizes=\"(max-width: 1472px) 100vw, 1472px\" \/><\/p>\n<p>\u8fd9\u6837\u6211\u4eec\u5f97\u5230\u6700\u7ec8\u7684\u77e9\u9635\u540e\uff0c\u5229\u7528\u7528\u6237\u4e4b\u95f4\u7684\u76f8\u4f3c\u5ea6\uff0c\u5f97\u5230\u6700\u7ec8\u7684\u5f97\u5206P\u3002<\/p>\n<p><img decoding=\"async\" loading=\"lazy\" width=\"782\" height=\"954\" src=\"http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-230.png\" class=\"wp-image-5556\" srcset=\"http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-230.png 782w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-230-246x300.png 246w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-230-768x937.png 768w\" sizes=\"(max-width: 782px) 100vw, 782px\" \/><\/p>\n<p>\u6700\u7ec8\u5f97\u5230\u7ed3\u679cUSP<\/p>\n<p><img decoding=\"async\" loading=\"lazy\" width=\"1478\" height=\"562\" src=\"http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-231.png\" class=\"wp-image-5557\" srcset=\"http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-231.png 1478w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-231-300x114.png 300w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-231-1024x389.png 1024w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-231-768x292.png 768w\" sizes=\"(max-width: 1478px) 100vw, 1478px\" \/><\/p>\n<p>\u6309\u884c\u76f8\u52a0\u5f97\u5230USR<\/p>\n<p>USP\u548cUSR\u76f8\u9664\u3002\u5f97\u5230\u4e00\u4e2a\u6700\u7ec8\u77e9\u9635P<\/p>\n<p><img decoding=\"async\" loading=\"lazy\" width=\"1338\" height=\"476\" src=\"http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-232.png\" class=\"wp-image-5558\" srcset=\"http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-232.png 1338w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-232-300x107.png 300w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-232-1024x364.png 1024w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-232-768x273.png 768w\" sizes=\"(max-width: 1338px) 100vw, 1338px\" \/><\/p>\n<p><img decoding=\"async\" loading=\"lazy\" width=\"1358\" height=\"546\" src=\"http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-233.png\" class=\"wp-image-5559\" srcset=\"http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-233.png 1358w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-233-300x121.png 300w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-233-1024x412.png 1024w, http:\/\/xinblog.ltd\/wp-content\/uploads\/2025\/01\/word-image-233-768x309.png 768w\" sizes=\"(max-width: 1358px) 100vw, 1358px\" \/><\/p>\n<p>\u90a3\u4e48\u6211\u4eec\u8fdb\u884c\u4ee3\u7801\u8f6c\u6362<\/p>\n<table>\n<tr>\n<td>\n  import pandas as pd<\/p>\n<p>from numpy import *<\/p>\n<p># \u52a0\u8f7d\u7528\u6237\u5bf9\u7535\u5f71\u7684\u8bc4\u5206\u6570\u636e<\/p>\n<p>df = pd.read_csv(&#8220;\/Users\/shenhuang\/Data\/ml-latest-small\/ratings.csv&#8221;)<\/p>\n<p># \u83b7\u53d6\u7528\u6237\u7684\u6570\u91cf\u548c\u7535\u5f71\u7684\u6570\u91cf<\/p>\n<p>user_num = df[&#8220;userId&#8221;].max()<\/p>\n<p>movie_num = df[&#8220;movieId&#8221;].max()<\/p>\n<p># \u6784\u9020\u7528\u6237\u5bf9\u7535\u5f71\u7684\u4e8c\u5143\u5173\u7cfb\u77e9\u9635<\/p>\n<p>user_rating = [[0.0] * movie_num for i in range(user_num)]<\/p>\n<p>i = 0<\/p>\n<p>for index, row in df.iterrows():   # \u83b7\u53d6\u6bcf\u884c\u7684index\u3001row<\/p>\n<p># \u7531\u4e8e\u7528\u6237\u548c\u7535\u5f71\u7684ID\u90fd\u662f\u4ece1\u5f00\u59cb\uff0c\u4e3a\u4e86\u548cPython\u7684\u7d22\u5f15\u4e00\u81f4\uff0c\u51cf\u53bb1<\/p>\n<p>userId = int(row[&#8220;userId&#8221;]) &#8211; 1<\/p>\n<p>movieId = int(row[&#8220;movieId&#8221;]) &#8211; 1<\/p>\n<p># \u8bbe\u7f6e\u7528\u6237\u5bf9\u7535\u5f71\u7684\u8bc4\u5206<\/p>\n<p>user_rating[userId][movieId] = row[&#8220;rating&#8221;]<\/p>\n<p># \u663e\u793a\u8fdb\u5ea6<\/p>\n<p>i += 1<\/p>\n<p>if i % 10000 == 0:<\/p>\n<p>print(i)<\/p>\n<p># \u628a\u4e8c\u7ef4\u6570\u7ec4\u8f6c\u5316\u4e3a\u77e9\u9635<\/p>\n<p>x = mat(user_rating)<\/p>\n<p>print(x)<\/td>\n<\/tr>\n<\/table>\n<p>\u4e4b\u540e\u8fdb\u884c\u77e9\u9635\u6570\u636e\u4ee5\u884c\u4e3a\u7ef4\u5ea6\u8fdb\u884c\u6807\u51c6\u5316<\/p>\n<table>\n<tr>\n<td>\n  # \u6807\u51c6\u5316\u6bcf\u4f4d\u7528\u6237\u7684\u8bc4\u5206\u6570\u636e<\/p>\n<p>from sklearn.preprocessing import scale<\/p>\n<p># \u5bf9\u6bcf\u4e00\u884c\u7684\u6570\u636e\uff0c\u8fdb\u884c\u6807\u51c6\u5316<\/p>\n<p>x_s = scale(x, with_mean=True, with_std=True, axis=1)<\/p>\n<p>print(&#8220;\u6807\u51c6\u5316\u540e\u7684\u77e9\u9635\uff1a&#8221;, x_s)<\/td>\n<\/tr>\n<\/table>\n<p>\u7136\u540e\u8fdb\u884c\u7528\u6237\u76f8\u4f3c\u5ea6\u77e9\u9635\u7684\u8ba1\u7b97<\/p>\n<table>\n<tr>\n<td>\n  # \u83b7\u53d6XX&#8217;<\/p>\n<p>y = x_s.dot(x_s.transpose())<\/p>\n<p>print(&#8220;XX&#8217;\u7684\u7ed3\u679c\u662f&#8217;\uff1a&#8221;, y)<\/p>\n<p># \u83b7\u5f97\u7528\u6237\u76f8\u4f3c\u5ea6\u77e9\u9635US<\/p>\n<p>us = [[0.0] * user_num for i in range(user_num)]<\/p>\n<p>for userId1 in range(user_num):<\/p>\n<p>for userId2 in range(user_num):<\/p>\n<p># \u901a\u8fc7\u77e9\u9635Y\u4e2d\u7684\u5143\u7d20\uff0c\u8ba1\u7b97\u5939\u89d2\u4f59\u5f26<\/p>\n<p>us[userId1][userId2] = y[userId1][userId2] \/ sqrt((y[userId1][userId1] * y[userId2][userId2]))<\/td>\n<\/tr>\n<\/table>\n<p>\u6700\u540e\u5c31\u53ef\u4ee5\u8fdb\u884c\u534f\u540c\u8fc7\u6ee4\u63a8\u8350\u4e86\u3002<\/p>\n<table>\n<tr>\n<td>\n  # \u901a\u8fc7\u7528\u6237\u4e4b\u95f4\u7684\u76f8\u4f3c\u5ea6\uff0c\u8ba1\u7b97USP\u77e9\u9635<\/p>\n<p>usp = mat(us).dot(x_s)<\/p>\n<p># \u6c42\u7528\u4e8e\u5f52\u4e00\u5316\u7684\u5206\u6bcd<\/p>\n<p>usr = [0.0] * user_num<\/p>\n<p>for userId in range(user_num):<\/p>\n<p>usr[userId] = sum(us[userId])<\/p>\n<p># \u8fdb\u884c\u5143\u7d20\u5bf9\u5e94\u7684\u9664\u6cd5\uff0c\u5b8c\u6210\u5f52\u4e00\u5316<\/p>\n<p>p = divide(usp, mat(usr).transpose())<\/td>\n<\/tr>\n<\/table>\n<p>\u4ece\u800c\u6700\u540e\u5f97\u5230\u4e86\u63a8\u8350\u6548\u679c\u77e9\u9635\u3002<\/p>\n<p>\u63a5\u4e0b\u6765\u6211\u4eec\u8fd8\u53ef\u4ee5\u4f7f\u7528SVD\u7b97\u6cd5\u6765\u5f3a\u5316\u63a8\u8350\u7b56\u7565<\/p>\n<p>\u8fd9\u91cc\u53ef\u4ee5\u4f7f\u7528SVD\u5206\u89e3\u6765\u5c06\u539f\u672c\u7684\u7528\u6237-\u7535\u5f71\u4e24\u4e2a\u7ea7\u522b\u8f6c\u6362\u4e3a \u7535\u5f71-\u4e3b\u9898-\u7528\u6237\u4e09\u4e2a\u754c\u522b\uff0c\u8fd9\u91cc\u6211\u4eec\u5148\u56de\u5fc6\u4e0bSVD\u7684\u5b9a\u4e49\u3002<\/p>\n<p>\u5982\u679c\u4e00\u4e2a\u77e9\u9635X\u662f\u5bf9\u79f0\u7684\u65b9\u9635\uff0c\u90a3\u4e48\u53ef\u4ee5\u6c42\u7684\u8fd9\u4e2a\u77e9\u9635\u7684\u7279\u5f81\u503c\u548c\u7279\u5f81\u5411\u91cf\u3002<\/p>\n<p>\u5176\u4e2d\u5177\u6709n\u4e2a\u7279\u5f81\u503c\u4ee5\u53ca\u6bcf\u4e00\u4e2a\u7279\u5f81\u503c\u5bf9\u5e94\u7684\u7279\u5f81\u5411\u91cf\u3002<\/p>\n<p>\u6b64\u65f6X\u53ef\u4ee5\u8868\u793a\u4e3aX=V\u03a3V-1<\/p>\n<p>\u5176\u4e2dV\u662fn*n\u4e2a\u7279\u5f81\u5411\u91cf\u7ec4\u6210\u7684n\u7ef4\u77e9\u9635\uff0c\u03a3\u5219\u662f\u4ee5n\u4e2a\u7279\u5f81\u503c\u4e3a\u4e3b\u5bf9\u89d2\u7ebf\u7684n\u7ef4\u77e9\u9635\u3002<\/p>\n<p>\u4f46\u5982\u679c\u4e0d\u662f\u5bf9\u79f0\u7684\u77e9\u9635\uff0c\u90a3\u4e48\u5c31\u4e0d\u80fd\u76f4\u63a5\u5206\u89e3\uff0c\u8fd9\u65f6\u5019\u5c31\u5f97\u4f7f\u7528SVD\u5206\u89e3\u3002<\/p>\n<p>\u5c06X\u7684\u8f6c\u7f6eX\u2019 \u548c X\u505a\u77e9\u9635\u4e58\u6cd5\u3002\u5f97\u5230n\u7ef4\u77e9\u9635X\u2018X\uff0c\u7136\u540e\u5bf9\u8fd9\u4e2a\u5f97\u5230\u7684\u77e9\u9635\u8fdb\u884c\u7279\u5f81\u5206\u89e3\uff0c\u5f97\u5230n*n\u4e2a\u7279\u5f81\u5411\u91cf\uff0c\u6784\u5efa\u4e3a\u4e00\u4e2a\u77e9\u9635V\u3002<\/p>\n<p>\u540c\u6837X\u548cX\u2018\u505a\u4e58\u6cd5\u3002\u5f97\u5230m\u7ef4\u77e9\u9635XX\u2018\u3002\u8fdb\u4e00\u6b65\u5f97\u5230m*m\u4e2a\u7279\u5f81\u5411\u91cf\uff0c\u6784\u5efa\u4e3a\u77e9\u9635U<\/p>\n<p>\u8fd9\u6837\u5f97\u5230\u4e86\u4e24\u4e2a\u5411\u91cf\u77e9\u9635\uff0c\u9700\u8981\u6c42\u7684\u5947\u5f02\u503c\u77e9\u9635\u03a3\u3002<\/p>\n<p>\u8fd9\u91cc\u6211\u4eec\u5148\u7ed9\u51fa\u4e00\u4e2a\u516c\u5f0fX=U\u03a3V\u2019<\/p>\n<p>\u8fd9\u91cc\u7684\u5947\u5f02\u503c\u77e9\u9635\u4ee3\u8868\u7740\u4e0d\u540c\u6982\u5ff5\u5728\u6587\u6863\u96c6\u5408\u4e2d\u7684\u4e0d\u540c\u91cd\u8981\u7a0b\u5ea6\uff0cU\u4ee3\u8868\u4e86\u6587\u6863\u548c\u6982\u5ff5\u7684\u5173\u7cfb\u5f3a\u5f31\u3002V\u4ee3\u8868\u7740\u8bcd\u6761\u548c\u6982\u5ff5\u7684\u5f3a\u5f31\u3002<\/p>\n<p>\u5bf9\u5e94\u5230\u6211\u4eec\u7684\u7cfb\u7edf\u4e2d\uff0c\u5c31\u662f\u7535\u5f71\u5230\u7528\u6237\u4e2d\u533a\u5206\u7684 \u4e3b\u9898 \u6982\u5ff5<\/p>\n<p>\u5176\u4e2d\u4e3b\u9898\u5c31\u662f\u79d1\u5e7b\u7c7b\uff0c\u52a8\u4f5c\u7c7b\u8fd9\u6837\u7684\u6982\u5ff5\uff0c<\/p>\n<p>\u7136\u540eU\u8868\u793a\u7528\u6237\u5bf9\u4e3b\u9898\u7684\u70ed\u7231\u7a0b\u5ea6\uff0cV\u8868\u793a\u4e86\u7535\u5f71\u548c\u4e3b\u9898\u7684\u5173\u7cfb\u3002<\/p>\n<p>\u8fd9\u6837\u8fdb\u884c\u7684\u534f\u540c\u8fc7\u6ee4\u3002\u90a3\u4e48\u63a5\u4e0b\u6765\u6211\u4eec\u5c31\u4f7f\u7528Python\u8bed\u8a00\u8fdb\u884c\u5b9e\u73b0\u3002<\/p>\n<p>\u9996\u5148\u662f\u6784\u5efa\u77e9\u9635<\/p>\n<table>\n<tr>\n<td>\n  import pandas as pd<\/p>\n<p>from numpy import *<\/p>\n<p># \u52a0\u8f7d\u7528\u6237\u5bf9\u7535\u5f71\u7684\u8bc4\u5206\u6570\u636e<\/p>\n<p>df_ratings = pd.read_csv(&#8220;\/Users\/shenhuang\/Data\/ml-latest-small\/ratings.csv&#8221;)<\/p>\n<p># \u83b7\u53d6\u7528\u6237\u7684\u6570\u91cf\u548c\u7535\u5f71\u7684\u6570\u91cf\uff0c\u8fd9\u91cc\u6211\u4eec\u53ea\u53d6\u524d1\/10\u6765\u51cf\u5c0f\u6570\u636e\u89c4\u6a21<\/p>\n<p>user_num = int(df_ratings[&#8220;userId&#8221;].max() \/ 10)<\/p>\n<p>movie_num = int(df_ratings[&#8220;movieId&#8221;].max() \/ 10)<\/p>\n<p># \u6784\u9020\u7528\u6237\u5bf9\u7535\u5f71\u7684\u4e8c\u5143\u5173\u7cfb\u77e9\u9635<\/p>\n<p>user_rating = [[0.0] * movie_num for i in range(user_num)]<\/p>\n<p>i = 0<\/p>\n<p>for index, row in df_ratings.iterrows():   # \u83b7\u53d6\u6bcf\u884c\u7684index\u3001row<\/p>\n<p># \u7531\u4e8e\u7528\u6237\u548c\u7535\u5f71\u7684ID\u90fd\u662f\u4ece1\u5f00\u59cb\uff0c\u4e3a\u4e86\u548cPython\u7684\u7d22\u5f15\u4e00\u81f4\uff0c\u51cf\u53bb1<\/p>\n<p>userId = int(row[&#8220;userId&#8221;]) &#8211; 1<\/p>\n<p>movieId = int(row[&#8220;movieId&#8221;]) &#8211; 1<\/p>\n<p># \u6211\u4eec\u53ea\u53d6\u524d1\/10\u6765\u51cf\u5c0f\u6570\u636e\u89c4\u6a21<\/p>\n<p>if (userId &gt;= user_num) or (movieId &gt;= movie_num):<\/p>\n<p>continue<\/p>\n<p># \u8bbe\u7f6e\u7528\u6237\u5bf9\u7535\u5f71\u7684\u8bc4\u5206<\/p>\n<p>user_rating[userId][movieId] = row[&#8220;rati<\/td>\n<\/tr>\n<\/table>\n<p>\u7136\u540e\u5c06\u5176\u6807\u51c6\u5316<\/p>\n<p>\u4e4b\u540e\u4f7f\u7528numpy\u7ed9\u51fa\u7684svd\u51fd\u6570<\/p>\n<table>\n<tr>\n<td>\n  # \u628a\u4e8c\u7ef4\u6570\u7ec4\u8f6c\u5316\u4e3a\u77e9\u9635<\/p>\n<p>x = mat(user_rating)<\/p>\n<p># \u6807\u51c6\u5316\u6bcf\u4f4d\u7528\u6237\u7684\u8bc4\u5206\u6570\u636e<\/p>\n<p>from sklearn.preprocessing import scale<\/p>\n<p># \u5bf9\u6bcf\u4e00\u884c\u7684\u6570\u636e\uff0c\u8fdb\u884c\u6807\u51c6\u5316<\/p>\n<p>x_s = scale(x, with_mean=True, with_std=True, axis=1)<\/p>\n<p>print(&#8220;\u6807\u51c6\u5316\u540e\u7684\u77e9\u9635\uff1a&#8221;, x_s<\/p>\n<p># \u8fdb\u884cSVD\u5206\u89e3<\/p>\n<p>from numpy import linalg as LA<\/p>\n<p>u,sigma,vt = LA.svd(x_s, full_matrices=False, compute_uv=True)<\/p>\n<p>print(&#8220;U\u77e9\u9635\uff1a&#8221;, u)<\/p>\n<p>print(&#8220;Sigma\u5947\u5f02\u503c\uff1a&#8221;, sigma)<\/p>\n<p>print(&#8220;V\u77e9\u9635\uff1a&#8221;, vt)<\/td>\n<\/tr>\n<\/table>\n<p>\u4ece\u800c\u5f97\u5230\u5947\u5f02\u503c\u77e9\u9635\u3002<\/p>\n<p>\u7136\u540e\u6211\u4eec\u5229\u7528V\u6765\u770b\u4e0b\u662f\u4e0d\u662f\u5206\u7684\u5408\u7406\u3002<\/p>\n<table>\n<tr>\n<td>\n  # \u52a0\u8f7d\u7535\u5f71\u5143\u4fe1\u606f<\/p>\n<p>df_movies = pd.read_csv(&#8220;\/Users\/shenhuang\/Data\/ml-latest-small\/movies.csv&#8221;)<\/p>\n<p>dict_movies = {}<\/p>\n<p>for index, row in df_movies.iterrows():   # \u83b7\u53d6\u6bcf\u884c\u7684index\u3001row<\/p>\n<p>dict_movies[row[&#8220;movieId&#8221;]] = &#8220;{0},{1}&#8221;.format(row[&#8220;title&#8221;], row[&#8220;genres&#8221;])<\/p>\n<p>print(dict_movies)<\/p>\n<p>print(max(vt[1,:]))<\/p>\n<p>for i in range(movie_num):<\/p>\n<p>if (vt[1][i] &gt; 0.1):<\/p>\n<p>print(i + 1, vt[1][i], dict_movies[i + 1])<\/td>\n<\/tr>\n<\/table>\n<p>\u4ece\u800c\u67e5\u770b\u662f\u4e0d\u662f\u5206\u7ec4\u5408\u9002\uff0c\u8fd9\u91cc\u6211\u4eec\u770b\u4e0b\u76f8\u5173\u7684\u7535\u5f71\u3002<\/p>\n<table>\n<tr>\n<td>\n  260 0.14287410901699643 Star Wars: Episode IV &#8211; A New Hope (1977),Action|Adventure|Sci-Fi<\/p>\n<p>1196 0.1147295905497075 Star Wars: Episode V &#8211; The Empire Strikes Back (1980),Action|Adventure|Sci-Fi<\/p>\n<p>1198 0.15453176747222075 Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981),Action|Adventure<\/p>\n<p>1210 0.10411193224648774 Star Wars: Episode VI &#8211; Return of the Jedi (1983),Action|Adventure|Sci-Fi<\/p>\n<p>2571 0.17316444479201024 Matrix, The (1999),Action|Sci-Fi|Thriller<\/p>\n<p>3578 0.1268370902126096 Gladiator (2000),Action|Adventure|Drama<\/p>\n<p>4993 0.12445203514448012 Lord of the Rings: The Fellowship of the Ring, The (2001),Adventure|Fantasy<\/p>\n<p>5952 0.12535012292041953 Lord of the Rings: The Two Towers, The (2002),Adventure|Fantasy<\/p>\n<p>7153 0.10972312192709989 Lord of the Rings: The Return of the King, The (2003),Action|Adventure|Drama|Fantasy<\/td>\n<\/tr>\n<\/table>\n<p>\u4e0a\u8ff0\u90fd\u662f\u79d1\u5e7b\u7c7b\u7684\u7535\u5f71\uff0c\u81ea\u7136\u53ef\u4ee5\u8ba4\u4e3a\u5206\u7c7b\u6210\u529f\u3002<\/p>\n<p>\u90a3\u4e48\u6211\u4eec\u5c31\u901a\u8fc7SVD\uff0c\u5b9e\u73b0\u4e86\u77e9\u9635\u5206\u89e3\u3002<\/p>\n<p>\u4ece\u800c\u5b9e\u73b0\u4e86\u57fa\u4e8e\u4e3b\u9898\u7684\u534f\u540c\u8fc7\u6ee4\u3002<\/p>\n<p>\u6700\u540e\u8d34\u4e0a\u5b8c\u6574\u4ee3\u7801<\/p>\n<table>\n<tr>\n<td>\n  import numpy<\/p>\n<p>import pandas as pd<\/p>\n<p>from numpy import *<\/p>\n<p>from sklearn.preprocessing import scale<\/p>\n<p># \u52a0\u8f7d\u7528\u6237\u5bf9\u7535\u5f71\u7684\u8bc4\u5206\u6570\u636e<\/p>\n<p>df_ratings = pd.read_csv(&#8220;.\/ratings.csv&#8221;)<\/p>\n<p># \u83b7\u53d6\u7528\u6237\u7684\u6570\u91cf\u548c\u7535\u5f71\u7684\u6570\u91cf\uff0c\u8fd9\u91cc\u6211\u4eec\u53ea\u53d6\u524d1\/10\u6765\u51cf\u5c0f\u6570\u636e\u89c4\u6a21<\/p>\n<p>user_num = int(df_ratings[&#8220;userId&#8221;].max() \/ 10)<\/p>\n<p>movie_num = int(df_ratings[&#8220;movieId&#8221;].max() \/ 10)<\/p>\n<p># \u6784\u9020\u7528\u6237\u5bf9\u7535\u5f71\u7684\u4e8c\u5143\u5173\u7cfb\u77e9\u9635<\/p>\n<p>user_rating = [[0.0] * movie_num for i in range(user_num)]<\/p>\n<p>i = 0<\/p>\n<p>for index, row in df_ratings.iterrows():  # \u83b7\u53d6\u6bcf\u884c\u7684index\u3001row<\/p>\n<p># \u7531\u4e8e\u7528\u6237\u548c\u7535\u5f71\u7684ID\u90fd\u662f\u4ece1\u5f00\u59cb\uff0c\u4e3a\u4e86\u548cPython\u7684\u7d22\u5f15\u4e00\u81f4\uff0c\u51cf\u53bb1<\/p>\n<p>userId = int(row[&#8220;userId&#8221;]) &#8211; 1<\/p>\n<p>movieId = int(row[&#8220;movieId&#8221;]) &#8211; 1<\/p>\n<p># \u6211\u4eec\u53ea\u53d6\u524d1\/10\u6765\u51cf\u5c0f\u6570\u636e\u89c4\u6a21<\/p>\n<p>if (userId &gt;= user_num) or (movieId &gt;= movie_num):<\/p>\n<p>continue<\/p>\n<p># \u8bbe\u7f6e\u7528\u6237\u5bf9\u7535\u5f71\u7684\u8bc4\u5206<\/p>\n<p>user_rating[userId][movieId] = row[&#8220;rating&#8221;]<\/p>\n<p># \u628a\u4e8c\u7ef4\u6570\u7ec4\u8f6c\u5316\u4e3a\u77e9\u9635<\/p>\n<p>x = numpy.asarray(user_rating)<\/p>\n<p># \u6807\u51c6\u5316\u6bcf\u4f4d\u7528\u6237\u7684\u8bc4\u5206\u6570\u636e<\/p>\n<p># \u5bf9\u6bcf\u4e00\u884c\u7684\u6570\u636e\uff0c\u8fdb\u884c\u6807\u51c6\u5316<\/p>\n<p>x_s = scale(x, with_mean=True, with_std=True, axis=1)<\/p>\n<p>print(&#8220;\u6807\u51c6\u5316\u540e\u7684\u77e9\u9635\uff1a&#8221;, x_s)<\/p>\n<p>from numpy import linalg as LA<\/p>\n<p>u, sigma, vt = LA.svd(x_s, full_matrices=False, compute_uv=True)<\/p>\n<p>print(&#8220;U\u77e9\u9635\uff1a&#8221;, u)<\/p>\n<p>print(&#8220;Sigma\u5947\u5f02\u503c\uff1a&#8221;, sigma)<\/p>\n<p>print(&#8220;V\u77e9\u9635\uff1a&#8221;, vt)<\/p>\n<p>df_movies = pd.read_csv(&#8220;.\/movies.csv&#8221;)<\/p>\n<p>dict_movies = {}<\/p>\n<p>for index, row in df_movies.iterrows():  # \u83b7\u53d6\u6bcf\u884c\u7684index\u3001row<\/p>\n<p>dict_movies[row[&#8220;movieId&#8221;]] = &#8220;{0},{1}&#8221;.format(row[&#8220;title&#8221;], row[&#8220;genres&#8221;])<\/p>\n<p>print(dict_movies)<\/p>\n<p>print(max(vt[1, :]))<\/p>\n<p>for i in range(movie_num):<\/p>\n<p>if (vt[1][i] &gt; 0.1):<\/p>\n<p>print(i + 1, vt[1][i], dict_movies[i + 1])<\/td>\n<\/tr>\n<\/table>\n","protected":false},"excerpt":{"rendered":"<p>\u57fa\u7840\u6570\u5b66\u8bfe31 \u5b9e\u73b0\u4e00\u4e2a\u63a8\u8350\u7cfb [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[46],"tags":[],"_links":{"self":[{"href":"http:\/\/xinblog.ltd\/index.php?rest_route=\/wp\/v2\/posts\/5552"}],"collection":[{"href":"http:\/\/xinblog.ltd\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/xinblog.ltd\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/xinblog.ltd\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/xinblog.ltd\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=5552"}],"version-history":[{"count":1,"href":"http:\/\/xinblog.ltd\/index.php?rest_route=\/wp\/v2\/posts\/5552\/revisions"}],"predecessor-version":[{"id":5560,"href":"http:\/\/xinblog.ltd\/index.php?rest_route=\/wp\/v2\/posts\/5552\/revisions\/5560"}],"wp:attachment":[{"href":"http:\/\/xinblog.ltd\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=5552"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/xinblog.ltd\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=5552"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/xinblog.ltd\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=5552"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}